/NRPeditor/clusterFinderBackup.cpp
C++ | 3112 lines | 2732 code | 187 blank | 193 comment | 914 complexity | 213bc788573265f9dac075f3eb22d682 MD5 | raw file
Large files files are truncated, but you can click here to view the full file
- #include <iostream>
- #include <string>
- #include <fstream>
- #include <stdlib.h>
- #include <sstream>
- using namespace std;
- #include "constants.h"
- #include "UnitClass2.h"
- #include "QueueClass.h"
- #include "ReactionClass.h"
- #include "spawnl.h"
- // function declarations
- // overloading operator for outputing nodes
- ostream& operator <<(ostream& outputStream, const UnitClass2& domain );
- ostream& operator <<(ostream& outputStream, const UnitClass2& domain )
- {
- if( domain.getStartSite() == 0 )
- {
- outputStream << domain.getUnit() << " "
- << domain.getSmile() << endl;
- }
- else
- {
- outputStream << " Formula: "
- << domain.getFormula()
- << " start site: "
- << domain.getStartSite() << endl
- << " " << domain.getSmile() << endl;
- }
- return outputStream;
- }
- void clearPositions( string &pos235, string &pos236, string &pos239,
- string &pos278, string &pos299, string &pos301, string &pos322,
- string &pos330, string &pos331, string &pos58, string &pos59,
- string &pos60, string &pos61, string &pos62, string &pos70,
- string &pos72, string &pos74, string &pos75, string &pos77,
- string &pos93, string &pos189, string &pos191, string &pos198,
- string &pos199, string &pos200 );
- void outputSpecificitiesNRPS( int Aad, int Ala, int Asn, int Asp, int Cys,
- int Dab, int Dhb, int Gln, int Glu, int Ile,
- int Leu, int Orn, int Phe, int Phg, int Pip,
- int Pro, int Ser, int Thr, int Tyr, int His,
- int Lys, int Gly, int Val);
- void outputSpecificitiesPKS( int mal, int mmal, int emal, int omal);
- string maxSpecificityNRPS( int Aad, int Ala, int Asn, int Asp, int Cys,
- int Dab, int Dhb, int Gln, int Glu, int Ile,
- int Leu, int Orn, int Phe, int Phg, int Pip,
- int Pro, int Ser, int Thr, int Tyr, int His,
- int Lys, int Gly, int Val);
- string maxSpecificityPKS( int mal, int mmal, int emal, int omal);
- int getIntVal(string strConvert);
- float getFloatVal(string strConvert);
- void translateSeq( string &inSeq, string &outputSeq, int &frame);
- void predictDomains( string aminoAcidSeq,
- string &predictedSubstrates,
- QueueClass< UnitClass2 > &aaNameList,
- const bool &allFramesRead,
- const int &frame, const string &nucSeq,
- string &allClusters, int&NRPScount, int &PKScount,
- int &mixedNRPSPKScount, int &transATcount, int &mepCount,
- int &mvaCount);
- // function bodies
- void outputSpecificitiesPKS( int mal, int mmal, int emal, int omal)
- {
- cout << "PKS specificity scores: " << endl;
- cout << "mal: " << mal << " ";
- cout << "mmal: " << mmal << " ";
- cout << "emal: " << emal << " ";
- cout << "omal: " << omal << " ";
- cout << endl;
- }
- void outputSpecificitiesNRPS( int Aad, int Ala, int Asn, int Asp, int Cys,
- int Dab, int Dhb, int Gln, int Glu, int Ile,
- int Leu, int Orn, int Phe, int Phg, int Pip,
- int Pro, int Ser, int Thr, int Tyr, int His,
- int Lys, int Gly, int Val)
- {
- cout << "NRPS specificity scores: " << endl;
- cout << "Aad: " << Aad << " ";
- cout << "Ala: " << Ala << " ";
- cout << "Asn: " << Asn << " ";
- cout << "Asp: " << Asp << " ";
- cout << "Cys: " << Cys << " ";
- cout << "Dab: " << Dab << " ";
- cout << "Dhb: " << Dhb << " ";
- cout << "Gln: " << Gln << " ";
- cout << "Glu: " << Glu << " ";
- cout << "Gly: " << Gly << " ";
- cout << "His: " << His << endl;
- cout << "Ile: " << Ile << " ";
- cout << "Leu: " << Leu << " ";
- cout << "Lys: " << Lys << " ";
- cout << "Orn: " << Orn << " ";
- cout << "Phe: " << Phe << " ";
- cout << "Phg: " << Phg << " ";
- cout << "Pip: " << Pip << " ";
- cout << "Pro: " << Pro << " ";
- cout << "Ser: " << Ser << " ";
- cout << "Thr: " << Thr << " ";
- cout << "Tyr: " << Tyr << endl;
- cout << "Val: " << Val << " ";
- cout << endl;
- }
- string maxSpecificityPKS( int mal, int mmal, int emal, int omal)
- {
- int max;
- int distinguishingFactor = 5;
- string CA;
- max = mal;
- CA = "mal";
- if(mmal > mal)
- {
- max = mmal;
- CA = "mmal";
- }
- if(emal > max)
- {
- max = emal;
- CA = "emal";
- }
- if(omal > max)
- {
- max = omal;
- CA = "omal";
- }
- if(max < distinguishingFactor)
- {
- CA = "pk";
- }
- return CA;
- }
- string maxSpecificityNRPS( int Aad, int Ala, int Asn, int Asp, int Cys,
- int Dab, int Dhb, int Gln, int Glu, int Ile,
- int Leu, int Orn, int Phe, int Phg, int Pip,
- int Pro, int Ser, int Thr, int Tyr, int His,
- int Lys, int Gly, int Val)
- {
- int max;
- int distinguishingFactor = 3;
- string AA;
- max = Aad;
- AA = "Aad";
- if(Ala >= Aad)
- {
- max = Ala;
- AA = "ala";
- }
- if(Asn >= max)
- {
- max = Asn;
- AA = "asn";
- }
- if(Asp >= max)
- {
- max = Asp;
- AA = "asp";
- }
- if(Cys >= max)
- {
- max = Cys;
- AA = "cys";
- }
- if(Dab >= max)
- {
- max = Dab;
- AA = "dab";
- }
- if(Dhb >= max)
- {
- max = Dhb;
- AA = "dhb";
- }
- if(Gln >= max)
- {
- max = Gln;
- AA = "gln";
- }
- if(Glu >= max)
- {
- max = Glu;
- AA = "glu";
- }
- if(Ile >= max)
- {
- max = Ile;
- AA = "ile";
- }
- if(Leu >= max)
- {
- max = Leu;
- AA = "leu";
- }
- if(Orn >= max)
- {
- max = Orn;
- AA = "orn";
- }
- if(Phe >= max)
- {
- max = Phe;
- AA = "phe";
- }
- if(Phg >= max)
- {
- max = Phg;
- AA = "phg";
- }
- if(Pip >= max)
- {
- max = Pip;
- AA = "pip";
- }
- if(Pro >= max)
- {
- max = Pro;
- AA = "pro";
- }
- if(Ser >= max)
- {
- max = Ser;
- AA = "ser";
- }
- if(Thr >= max)
- {
- max = Thr;
- AA = "thr";
- }
- if(Tyr >= max)
- {
- max = Tyr;
- AA = "tyr";
- }
- if(His >= max)
- {
- max = His;
- AA = "his";
- }
- if(Lys >= max)
- {
- max = Lys;
- AA = "lys";
- }
- if(Gly >= max)
- {
- max = Gly;
- AA = "gly";
- }
- if(Val >= max)
- {
- max = Val;
- AA = "val";
- }
- if( max < distinguishingFactor)
- {
- AA = "nrp";
- }
- return AA;
- }
- int getIntVal(string strConvert)
- {
- int intReturn;
- // NOTE: You should probably do some checks to ensure that
- // this string contains only numbers. If the string is not
- // a valid integer, zero will be returned.
- intReturn = atoi(strConvert.c_str());
- return(intReturn);
- }
- float getFloatVal(string strConvert)
- {
- float floatReturn;
- // NOTE: You should probably do some checks to ensure that
- // this string contains only numbers. If the string is not
- // a valid integer, zero will be returned.
- floatReturn = atof(strConvert.c_str());
- return(floatReturn);
- }
- void clearPositions( string &pos235, string &pos236, string &pos239,
- string &pos278, string &pos299, string &pos301, string &pos322,
- string &pos330, string &pos331, string &pos58, string &pos59,
- string &pos60, string &pos61, string &pos62, string &pos70,
- string &pos72, string &pos74, string &pos75, string &pos77,
- string &pos93, string &pos189, string &pos191, string &pos198,
- string &pos199, string &pos200 )
- {
- pos235 = "";
- pos236 = "";
- pos239 = "";
- pos278 = "";
- pos299 = "";
- pos301 = "";
- pos322 = "";
- pos330 = "";
- pos331 = "";
- pos58 = "";
- pos59 = "";
- pos60 = "";
- pos61 = "";
- pos62 = "";
- pos70 = "";
- pos72 = "";
- pos74 = "";
- pos75 = "";
- pos77 = "";
- pos93 = "";
- pos189 = "";
- pos191 = "";
- pos198 = "";
- pos199 = "";
- pos200 = "";
- }
- // finds and returns the reverse complement sequence
- string revComp( const string &inSeq, const string &nucleotideSeq);
- string revComp( const string &inSeq, const string &nucleotideSeq)
- {
- string inputSeq;
- string outputSeq;
- string segment;
- string outputFilename;
- //string outFile = "rev-" + inSeq;
- int seqMin = 50; // minimum length of nucleotide line
- ifstream In;
- ofstream Out;
- int lineCharsCount = 0;
- int lineCharsMax = 70;
- In.open(inSeq.c_str());
- //outputFilename = "genomes/rev-"
- outputFilename = "genomes/rev-"
- + nucleotideSeq;
- In >> segment;
-
- // bypass initial FASTA details
- while(segment.length() < seqMin && !In.fail())
- {
- In >> segment;
- }
-
- // stores nucleotide
- inputSeq.append(segment);
- while(!In.fail())
- {
- In >> segment;
- inputSeq.append(segment);
- }
- In.close();
- for(int i = inputSeq.length(); i > 0; i--)
- {
- if(inputSeq[i] == 'A')
- {
- outputSeq.append("T");
- }
- if(inputSeq[i] == 'T')
- {
- outputSeq.append("A");
- }
- if(inputSeq[i] == 'C')
- {
- outputSeq.append("G");
- }
- if(inputSeq[i] == 'G')
- {
- outputSeq.append("C");
- }
-
- lineCharsCount++;
- if(lineCharsCount == lineCharsMax)
- {
- outputSeq.append("\n");
- lineCharsCount = 0;
- }
- }
- Out.open(outputFilename.c_str());
- Out << ">revComp-" << nucleotideSeq << "\n";
- Out << outputSeq;
- Out.clear();
- Out.close();
- return outputSeq;
- }
- // translates DNA to amino acid through common code
- void translateSeq( string &inSeq, string &outputSeq, int &frame)
- {
- int stopPrev = 0;
- int stopPres = 0;
- int stopLimit = 50;
- int start = 0;
- int choice;
- int stopCount = 0;
- int seqMin = 50;
- ifstream In;
- ofstream Out;
- string segment;
- outputSeq.clear();
- In.open(inSeq.c_str());
- string inputSeq;
- int charCount = 0;
- int charCountMax = 70;
-
- In >> segment;
- //cout << "segment: " << segment << endl;
- while(segment.length() < seqMin && !In.fail())
- {
- In >> segment;
- }
- inputSeq.append(segment);
- while(!In.fail())
- {
- In >> segment;
- inputSeq.append(segment);
- }
- //cout << "FASTA of DNA stored" << endl;
- In.close();
- //cout << "Frame: " << frame << endl;
- for(int i = frame; i < inputSeq.length()-3; i=i+3)
- {
- charCount++;
- if(charCount == charCountMax)
- {
- outputSeq.append("\n");
- charCount = 0;
- }
- if(inputSeq[i] == 't' ||
- inputSeq[i] == 'T')
- {
- if(inputSeq[i+1] == 't' ||
- inputSeq[i+1] == 'T')
- {
- if(inputSeq[i+2] == 't' ||
- inputSeq[i+2] == 'T' ||
- inputSeq[i+2] == 'c' ||
- inputSeq[i+2] == 'C')
- {
- outputSeq.append("F");
- }
- else
- {
- outputSeq.append("L");
- }
- }
- else if(inputSeq[i+1] == 'c' ||
- inputSeq[i+1] == 'C')
- {
- outputSeq.append("S");
- }
- else if(inputSeq[i+1] == 'a' ||
- inputSeq[i+1] == 'A')
- {
- if(inputSeq[i+2] == 't' ||
- inputSeq[i+2] == 'T' ||
- inputSeq[i+2] == 'c' ||
- inputSeq[i+2] == 'C')
- {
- outputSeq.append("Y");
- }
- else
- {
- outputSeq.append("-");
- stopCount++; // stop codon
- }
- }
- else if(inputSeq[i+1] == 'g' ||
- inputSeq[i+1] == 'G')
- {
- if(inputSeq[i+2] == 't' ||
- inputSeq[i+2] == 'T' ||
- inputSeq[i+2] == 'c' ||
- inputSeq[i+2] == 'C')
- {
- outputSeq.append("C");
- }
- else if(inputSeq[i+2] == 'g' ||
- inputSeq[i+2] == 'G')
- {
- outputSeq.append("W");
- }
- else
- {
- outputSeq.append("-");
- stopCount++; // stop codon
- }
- }
- } // sequence begins with 't' ends
- else if(inputSeq[i] == 'c' ||
- inputSeq[i] == 'C')
- {
- if(inputSeq[i+1] == 't' ||
- inputSeq[i+1] == 'T')
- {
- outputSeq.append("L");
- }
- else if(inputSeq[i+1] == 'c' ||
- inputSeq[i+1] == 'C')
- {
- outputSeq.append("P");
- }
- else if(inputSeq[i+1] == 'a' ||
- inputSeq[i+1] == 'A')
- {
- if(inputSeq[i+2] == 't' ||
- inputSeq[i+2] == 'T' ||
- inputSeq[i+2] == 'c' ||
- inputSeq[i+2] == 'C')
- {
- outputSeq.append("H");
- }
- else
- {
- outputSeq.append("Q");
- }
- }
- else if(inputSeq[i+1] == 'g' ||
- inputSeq[i+1] == 'G')
- {
- outputSeq.append("R");
- }
- } // sequence begins with 'c' ends
- else if(inputSeq[i] == 'a' ||
- inputSeq[i] == 'A')
- {
- if(inputSeq[i+1] == 't' ||
- inputSeq[i+1] == 'T')
- {
- if(inputSeq[i+2] != 'g' &&
- inputSeq[i+2] != 'G')
- {
- outputSeq.append("I");
- }
- else
- {
- outputSeq.append("M");
- }
- }
- else if(inputSeq[i+1] == 'c' ||
- inputSeq[i+1] == 'C')
- {
- outputSeq.append("T");
- }
- else if(inputSeq[i+1] == 'a' ||
- inputSeq[i+1] == 'A')
- {
- if(inputSeq[i+2] == 't' ||
- inputSeq[i+2] == 'T' ||
- inputSeq[i+2] == 'c' ||
- inputSeq[i+2] == 'C')
- {
- outputSeq.append("N");
- }
- else
- {
- outputSeq.append("K");
- }
- }
- else if(inputSeq[i+1] == 'g' ||
- inputSeq[i+1] == 'G')
- {
- if(inputSeq[i+2] == 't' ||
- inputSeq[i+2] == 'T' ||
- inputSeq[i+2] == 'c' ||
- inputSeq[i+2] == 'C')
- {
- outputSeq.append("S");
- }
- else
- {
- outputSeq.append("R");
- }
- }
- } // sequence begins with 'a' ends
- else if(inputSeq[i] == 'g' ||
- inputSeq[i] == 'G')
- {
- if(inputSeq[i+1] == 't' ||
- inputSeq[i+1] == 'T')
- {
- outputSeq.append("V");
- }
- else if(inputSeq[i+1] == 'c' ||
- inputSeq[i+1] == 'C')
- {
- outputSeq.append("A");
- }
- else if(inputSeq[i+1] == 'a' ||
- inputSeq[i+1] == 'A')
- {
- if(inputSeq[i+2] == 't' ||
- inputSeq[i+2] == 'T' ||
- inputSeq[i+2] == 'c' ||
- inputSeq[i+2] == 'C')
- {
- outputSeq.append("D");
- }
- else
- {
- outputSeq.append("E");
- }
- }
- else if(inputSeq[i+1] == 'g' ||
- inputSeq[i+1] == 'G')
- {
- outputSeq.append("G");
- }
- } // codon begins with 'g' ends
- else
- {
- outputSeq.append("-");
- stopCount++;
- }
- } // for loop ends
- Out.open("aminoAcidSeq.txt");
- Out.clear();
- Out << "Protein from one DNA frame" << "\n";
- Out << outputSeq;
- Out.close();
-
- } // function ends
- void predictDomains(string aminoAcidSeq,
- string &predictedSubstrates,
- QueueClass< UnitClass2 > &aaNameList,
- const bool &allFramesRead,
- const int &frame, const string &nucSeq,
- string &allClusters, int &NRPScount, int &PKScount,
- int &mixedNRPSPKScount, int &transATcount, int &mepCount,
- int &mvaCount )
- {
- QueueClass <UnitClass2> sortedListOfDomains;
- QueueClass <UnitClass2> sortedListOfMods;
- QueueClass <UnitClass2> sortedListOfATKS;
- UnitClass2 *domain;
- ifstream In;
- ifstream InMods;
- ifstream tempIn;
- ifstream aaSeqIn;
- ofstream Out;
- ofstream tempOut;
- string segment;
- float score;
- string scoreString;
- float modsScore;
- float modsScoreMax = 0.1;
- float scoreMax = 0.1; // minimum for domain synthetase matching
- float idenScore;
- float idenScoreMin = 120; // minimum for mods alignment
- float length;
- string SMILE;
- string formula;
- string mod;
- string substrate;
- float siteToAnalyze;
- int siteCount;
- bool siteFound;
- int whileCount = 0;
- int queryOrSbjctLength = 6;
- string aaSeq;
- char *arg[21];
- arg[0] = "./blastall"; // program to execute
- arg[1] = "blastp"; // protein blast
- arg[2] = "synthetases.fasta"; // control A-domain
- arg[3] = "mods.fasta"; // output file
- arg[4] = "100.0";
- arg[6] = "aaSeq.txt";
- arg[7] = "blast2.out";
- int AdomainStart = 5;
- int AdomainEnd = 5;
- // A-domains to analyze
- bool substrateKnown;
- string query;
- string sbjct;
- string predictedSubstrate;
- string proposedSubstrate;
- bool foundSubstrate;
- bool stringsCompare;
- int hits = 0;
- int misses = 0;
- int maxChars = 10000;
- // NRPS key positions
- int p235 = 11;
- int p236 = 12;
- int p239 = 15;
- int p278 = 10;
- int p299 = 1;
- int p301 = 3;
- int p322 = 1;
- int p330 = 9;
- int p331 = 10;
- string pos235;
- string pos236;
- string pos239;
- string pos278;
- string pos299;
- string pos301;
- string pos322;
- string pos330;
- string pos331;
- int Aad = 0;
- int Ala = 0;
- int Asn = 0;
- int Asp = 0;
- int Cys = 0;
- int Dab = 0;
- int Dhb = 0;
- int Gln = 0;
- int Glu = 0;
- int Ile = 0;
- int Leu = 0;
- int Orn = 0;
- int Phe = 0;
- int Phg = 0;
- int Pip = 0;
- int Pro = 0;
- int Ser = 0;
- int Thr = 0;
- int Tyr = 0;
- int Val = 0;
- int His = 0;
- int Lys = 0;
- int Gly = 0;
- int Gly1 = 0;
- int Gly2 = 0;
- int Gly3 = 0;
- int Leu1 = 0;
- int Leu2 = 0;
- int Leu3 = 0;
- int Leu4 = 0;
- int Tyr1 = 0;
- int Tyr2 = 0;
- int Tyr3 = 0;
- int Val1 = 0;
- int Val2 = 0;
- int Val3 = 0;
- int mal = 0;
- int mmal = 0;
- int emal = 0;
- int omal = 0;
- char FDxAA;
- char FD4xAA;
- char IT3xAA;
- char x1xG299;
- char x1xG301;
- char x7xxY322;
- char x7xxY330;
- char x7xxY331;
- // PKS key positions
- int p58 = 0;
- int p59 = 1;
- int p60 = 2;
- int p61 = 3;
- int p62 = 4;
- int p70 = 0;
- int p72 = 2;
- int p74 = 4;
- int p75 = 5;
- int p77 = 7;
- int p93 = 3;
- int p189 = 1;
- int p191 = 3;
- int p198 = 0;
- int p199 = 1;
- int p200 = 2;
- string pos58;
- string pos59;
- string pos60;
- string pos61;
- string pos62;
- string pos70;
- string pos72;
- string pos74;
- string pos75;
- string pos77;
- string pos93;
- string pos189;
- string pos191;
- string pos198;
- string pos199;
- string pos200;
- string sigSeq;
- float position;
- bool NRPS = 0;
- bool PKS = 0;
- string residue;
- string fname;
- bool success;
- string smileName;
- string inString;
- UnitClass2 *aaName;
- float currentDomainPos;
- float nextDomainPos;
- float thirdDomainPos;
- int PKSmodRange = 7000;
- bool KR = 0;
- bool DH = 0;
- bool ER = 0;
- string endSiteString;
- string prevSegment;
- int endSite;
- int startSite;
- int extraRange = 1000;
- int lastPKSkey = 3;
- string scoredSubstrate;
- string proteinID;
- string proteinFrame;
- string aminoAcids;
- int nucStartSite;
- int nucEndSite;
- cout << "Analyzing data..." << endl;
- //get every NRPS/PKS domain until no further domains are detected;
-
- // does blast alignment of cluster
- spawnl( 1, "./blastall", "./blastall",
- "-g", "T", "-p", arg[1], "-d", arg[2], "-i", aminoAcidSeq.c_str(),
- "-F", "F", "-C", "0", "-o", "blast.out", NULL);
- //cout << "called blast" << endl;
- spawnl( 1, "./blastall", "./blastall",
- "-g", "T", "-p", arg[1], "-d", arg[3], "-i", aminoAcidSeq.c_str(),
- "-F", "F", "-C", "0", "-o", "modsBlast.out", NULL);
- //cout << "called modblast" << endl;
- //cout << "frame: " << frame << endl;
- if(frame == 0)
- {
- system("cp blast.out frame0.out");
- //cout << "copied to frame0.out" << endl;
- system("cp modsBlast.out modsFrame0.out");
- //cout << "copied to modsFrame0.out" << endl;
- system("cp aminoAcidSeq.txt aminoAcidSeq0.txt");
- //cout << "aminoAcidSeq0 copied" << endl;
- }
- if(frame == 1)
- {
- system("cp blast.out frame1.out");
- //cout << "copied to frame1.out" << endl;
- system("cp modsBlast.out modsFrame1.out");
- //cout << "copied to modsFrame1.out" << endl;
- system("cp aminoAcidSeq.txt aminoAcidSeq1.txt");
- //cout << "aminoAcidSeq1 copied" << endl;
- }
- if(frame == 2)
- {
- system("cp blast.out frame2.out");
- //cout << "copied to frame2.out" << endl;
- system("cp modsBlast.out modsFrame2.out");
- //cout << "copied to modsFrame2.out" << endl;
- system("cat frame0.out frame1.out frame2.out > blast.out");
- //cout << "concatenated all domain frames" << endl;
- system("cat modsFrame0.out modsFrame1.out modsFrame2.out > modsBlast.out");
- //cout << "concatenated all mod frames" << endl;
- system("rm frame0.out frame1.out frame2.out modsFrame1.out modsFrame2.out");
- //cout << "last aminoAcidSeq" << endl;
- }
- //cout << "after frames copies" << endl;
- if(allFramesRead)
- {
- //cout << "ALL FRAMES READ" << endl;
- score = 0;
- // open NRPS/PKS alignment file
- In.open("blast.out");
- In >> segment;
- while( !In.fail())
- {
- //cout << "read blast.out" << endl;
- if(segment[0] == '*')
- {
- In >> segment;
- substrate = segment;
- }
- if(segment[0] == 'E' && segment[1] == 'x'
- && segment[2] == 'p')
- {
- In >> segment; // "="
- In >> segment; // expectation
- for(int i = 0; i < segment.length(); i++)
- {
- if(segment[i] == 'e')
- {
- i = segment.length();
- }
- else
- {
- scoreString.append(1, segment[i]);
- }
- }
-
- score = getFloatVal(scoreString);
- //cout << "scoreString: " << scoreString << endl;
- scoreString.clear();
- for(int i = 0; i < segment.length(); i++)
- {
- if(segment[i] == 'e')
- {
- for(int j = i+1; j < segment.length(); j++)
- {
- scoreString.append(1, segment[j]);
- }
- }
- }
- score = score * pow(10.0,(static_cast<double>(getFloatVal(scoreString))));
- //cout << "segment: " << segment << endl;
- //cout << "score: " << score << endl;
- //cout << "scoreString float: " << getFloatVal(scoreString) << endl;
- scoreString.clear();
- }
-
-
- if(segment[0] == 'I' && segment[1] == 'd'
- && segment[2] == 'e')
- {
- In >> segment; // "="
- In >> segment; // identities
-
- // collect denominator of identities value
- for(int i = 0; i < segment.length(); i++)
- {
- if(segment[i] == '/')
- {
- for(int j = i+1; j < segment.length(); j++)
- {
- scoreString.append(1, segment[j]);
- i = j;
- }
- }
- }
- idenScore = getIntVal(scoreString);
- scoreString.clear();
- //cout << "score: " << " idenScore: " << idenScore << endl;
- if(score < scoreMax && idenScore > idenScoreMin)
- {
- //cout << "score: " << score << " idenScore: " << idenScore << endl;
- while(segment[0] != 'Q' && segment[1] != 'u')
- {
- In >> segment;
- }
- In >> segment; // start site of domain on protein
- startSite = getIntVal(segment);
- //cout << "start site: " << startSite << endl;
-
- In >> segment;
- // get protein frame and find where adenylation domain begins
- proteinID = segment;
- for(int i = 0; i < proteinID.length(); i++)
- {
- while(proteinID[i] == '-')
- {
- proteinID.erase(i, 1);
- }
- }
- //cout << "PROTEINID: " << proteinID << endl;
-
- proteinFrame.clear();
-
- aaSeqIn.open("aminoAcidSeq.txt");
-
- aaSeqIn >> aminoAcids;
- while(!aaSeqIn.fail())
- {
- if(aminoAcids.length() > 1)
- {
- proteinFrame.append(aminoAcids);
- }
- aaSeqIn >> aminoAcids;
- }
- aaSeqIn.clear();
- aaSeqIn.close();
- //cout << "proteinFrame.length(): " << proteinFrame.length() << endl;
- nucStartSite = proteinFrame.find(proteinID, startSite - 100);
- if(nucStartSite < 0)
- {
- proteinFrame.clear();
- aaSeqIn.open("aminoAcidSeq0.txt");
-
- aaSeqIn >> aminoAcids;
- while(!aaSeqIn.fail())
- {
- if(aminoAcids.length() > 1)
- {
- proteinFrame.append(aminoAcids);
- }
- aaSeqIn >> aminoAcids;
- }
- aaSeqIn.clear();
- aaSeqIn.close();
- nucStartSite = proteinFrame.find(proteinID, startSite - 100);
- //cout << "NOT IN LAST AA FRAME" << endl;
- }
- if(nucStartSite < 0)
- {
- proteinFrame.clear();
- aaSeqIn.open("aminoAcidSeq1.txt");
-
- aaSeqIn >> aminoAcids;
- while(!aaSeqIn.fail())
- {
- if(aminoAcids.length() > 1)
- {
- proteinFrame.append(aminoAcids);
- }
- aaSeqIn >> aminoAcids;
- }
- aaSeqIn.clear();
- aaSeqIn.close();
- nucStartSite = proteinFrame.find(proteinID, startSite - 100);
- // cout << "NOT IN FIRST AA FRAME" << endl;
- }
-
- if(nucStartSite < 0)
- {
- //cout << "NOT IN SECOND AA FRAME" << endl;
- }
- nucStartSite = nucStartSite * 3; // translate amino acid position
- // into nucleotide position
- //cout << "NUCSTARTSITE: " << nucStartSite << endl;
- while((segment[0] != 'S' || segment[4] != 'e') && segment[0] != '>'
- && !In.fail())
- {
- prevSegment = segment;
- In >> segment;
- }
-
- endSiteString = prevSegment;
- endSite = getIntVal(prevSegment) + startSite + extraRange;
- domain = new UnitClass2;
- domain->setStartSite(startSite);
- domain->setFormula(substrate);
- domain->setEndSite(endSite);
- domain->setNucStartSite(nucStartSite);
- sortedListOfDomains.insertValue(*domain);
- delete domain;
- }
- score = 1;
- idenScore = 0;
- }
- In >> segment;
- }
- In.close();
- if(sortedListOfDomains.getNumElems() > 0)
- {
- //sortedListOfDomains.printForward();
- }
- else
- {
- cout << "No NRPS-A or PKS-AT domains detected" << endl;
- }
- // detect modifications in cluster and their locations
- InMods.open("modsBlast.out");
- InMods >> segment;
- //cout << "InMods: " << InMods << " segment: " << segment << endl;
- while( !InMods.fail())
- {
- //cout << "in InMods loop" << endl;
- if(segment[0] == '*')
- {
- InMods >> segment;
- mod = segment;
- }
- if(segment[0] == 'E' && segment[1] == 'x'
- && segment[2] == 'p')
- {
- InMods >> segment; // "="
- InMods >> segment; // expectation
- //cout << "segment(expectation): " << segment << endl;
- for(int i = 0; i < segment.length(); i++)
- {
- if(segment[i] == 'e')
- {
- i = segment.length();
- }
- else
- {
- scoreString.append(1, segment[i]);
- }
- }
- modsScore = getFloatVal(scoreString);
- scoreString.clear();
- for(int i = 0; i < segment.length(); i++)
- {
- if(segment[i] == 'e')
- {
- for(int j = i+1; j < segment.length(); j++)
- {
- scoreString.append(1, segment[j]);
- i = j;
- }
- }
- }
- }
- modsScore = modsScore * pow(10.0,(static_cast<double>(getFloatVal(scoreString))));
- scoreString.clear();
- if(segment[0] == 'I' && segment[1] == 'd'
- && segment[2] == 'e')
- {
- InMods >> segment; // "="
- InMods >> segment; // identities
-
- // collect denominator of identities value
- for(int i = 0; i < segment.length(); i++)
- {
- if(segment[i] == '/')
- {
- for(int j = i+1; j < segment.length(); j++)
- {
- scoreString.append(1, segment[j]);
- i = j;
- }
- }
- }
- idenScore = getIntVal(scoreString);
- scoreString.clear();
- //cout << "/identities: " << score << endl;
- if(modsScore < modsScoreMax and (idenScore > idenScoreMin
- || mod[0] == 'T' && mod[1] == 'E'))
- {
- //cout << "modsScore: " << modsScore << " idenScore: " << idenScore << endl;
- while( segment[0] != 'Q')
- {
- InMods >> segment;
- }
- InMods >> segment; // start site of domain on protein
- startSite = getIntVal(segment);
- InMods >> segment; // start site of domain on protein
- // get protein frame and find where adenylation domains begins
- proteinID = segment;
-
- for(int i = 0; i < proteinID.length(); i++)
- {
- while(proteinID[i] == '-')
- {
- proteinID.erase(i, 1);
- }
- }
- // cout << "PROTEINID: " << proteinID << endl;
- proteinFrame.clear();
- aaSeqIn.open("aminoAcidSeq.txt");
- aaSeqIn >> aminoAcids;
- while(!aaSeqIn.fail())
- {
- if(aminoAcids.length() > 1)
- {
- proteinFrame.append(aminoAcids);
- }
- aaSeqIn >> aminoAcids;
- }
- aaSeqIn.clear();
- aaSeqIn.close();
- //cout << "proteinFrame.length(): " << proteinFrame.length() << endl;
- nucStartSite = proteinFrame.find(proteinID, startSite);
- if(nucStartSite < 0)
- {
- proteinFrame.clear();
- aaSeqIn.open("aminoAcidSeq0.txt");
- aaSeqIn >> aminoAcids;
- while(!aaSeqIn.fail())
- {
- if(aminoAcids.length() > 1)
- {
- proteinFrame.append(aminoAcids);
- }
- aaSeqIn >> aminoAcids;
- }
- aaSeqIn.clear();
- aaSeqIn.close();
- nucStartSite = proteinFrame.find(proteinID, startSite);
- // cout << "NOT IN LAST AA FRAME" << endl;
- }
- if(nucStartSite < 0)
- {
- proteinFrame.clear();
- aaSeqIn.open("aminoAcidSeq1.txt");
- aaSeqIn >> aminoAcids;
- while(!aaSeqIn.fail())
- {
- if(aminoAcids.length() > 1)
- {
- proteinFrame.append(aminoAcids);
- }
- aaSeqIn >> aminoAcids;
- }
- aaSeqIn.clear();
- aaSeqIn.close();
- nucStartSite = proteinFrame.find(proteinID, startSite);
- // cout << "NOT IN FIRST AA FRAME" << endl;
- }
-
- if(nucStartSite < 0)
- {
- // cout << "NOT IN SECOND AA FRAME" << endl;
- }
- nucStartSite = nucStartSite * 3; // translate amino acid position
- // into nucleotide position
- domain = new UnitClass2;
- domain->setStartSite(startSite);
- domain->setNucStartSite(nucStartSite);
- domain->setFormula(mod);
- sortedListOfMods.insertValue(*domain);
- delete domain;
- }
- score = 1;
- idenScore = 0;
- }
- InMods >> segment;
- }
- InMods.clear();
- InMods.close();
- if(sortedListOfMods.getNumElems() > 0)
- {
- //sortedListOfMods.printForward();
- }
- else
- {
- cout << "No auxiliary domains detected" << endl;
- }
- siteCount = 0;
- // go through each domain and collect and align each
- while(siteCount < sortedListOfDomains.getNumElems() && whileCount < 1000000)
- {
- In.close();
- In.open("blast.out");
- In.clear();
- In >> segment;
- siteFound = 0;
- while( !In.fail() && siteFound == 0 && whileCount < 1000000)
- {
- //cout << "inner while" << endl;
- siteToAnalyze = sortedListOfDomains.getElemAtIndex(siteCount).getStartSite();
- //cout << "siteToAnalyze: " << siteToAnalyze << endl;
- if(segment[0] == 'Q' && segment[1] == 'u'
- && segment[queryOrSbjctLength - 1] == ':')
- {
- In >> segment;
-
- if(getIntVal(segment) == siteToAnalyze)
- {
- // analyze this segment only and add to aaNameList
- // store any modifications between this domain and the next
- // cout << "segment " << segment << " found " << endl;
- //cout << segment << " ";
- cout << sortedListOfDomains.getElemAtIndex(siteCount).getNucStartSite() << " ";
- cout << sortedListOfDomains.getElemAtIndex(siteCount).getNucStartSite() + 10000
- << " ";
- position = getIntVal(segment);
- siteFound = 1;
- siteCount++;
- In >> segment;
- aaSeq.append(segment);
- In >> segment;
- while((segment[0] != 'S' || segment[1] != 'c') && !In.fail())
- {
-
- if(segment[0] == 'Q' && segment[1] == 'u'
- && segment[queryOrSbjctLength - 1] == ':')
- {
- In >> segment;
- In >> segment;
-
- for(int i = 0; i < segment.length(); i++)
- {
- if((static_cast< unsigned int >(segment[i]) < 65
- || static_cast< unsigned int >(segment[i]) > 90)
- && (static_cast< unsigned int >(segment[i]) < 97
- || static_cast< unsigned int >(segment[i]) > 122))
- {
- segment.erase(i,1);
- i--;
- }
- }
- aaSeq.append(segment);
- }
- In >> segment;
- }
-
- // recognize amino acid sequence here
- //cout << "aaSeq: " << aaSeq << endl;
- Out.open("aaSeq.txt");
- Out.clear();
- Out << "> " << "aaSeq" << '\n' << aaSeq;
- Out.close();
- aaSeq.clear();
- // does blast alignment of individual domain to recognize
- // conserved motifs and substrate specificity
- spawnl( 1, "./blastall", "blastall",
- "-g", "T", "-p", arg[1], "-d", arg[2], "-i", "aaSeq.txt",
- "-F", "F", "-C", "0", "-o", "blast2.out", NULL);
- substrateKnown = 0;
- In.close();
- In.open("blast2.out");
- In.clear();
- segment.clear();
- In >> segment;
- while(!In.fail())
- {
- //cout << !In.fail() << " ";
- //cout << segment << endl;
- if(segment == "NRPS" && PKS == 0 && NRPS == 0)
- {
- NRPS = 1;
- //cout << "NRPS detected" << endl;
- }
- if(segment == "PKS" && NRPS == 0 && PKS == 0)
- {
- PKS = 1;
- //cout << "PKS detected" << endl;
- }
- if(segment.length() == queryOrSbjctLength)
- {
- if(segment[0] == 'Q' && segment[1] == 'u'
- && segment[queryOrSbjctLength - 1] == ':')
- {
- In >> segment;
- In >> segment;
- query.append(segment);
- //cout << "Query: " << segment << endl;
- }
- if(segment[0] == 'S' && segment[1] == 'b')
- {
- In >> segment;
- In >> segment;
- sbjct.append(segment);
- //cout << "Sbjct: " << segment << endl;
- }
- }
- In >> segment;
- }
- In.clear();
- In.close();
- segment.clear();
- // search for first FDX motif containing residues 235,236,239
-
- if(NRPS)
- {
- if(sbjct.find("SRVLQFASFSFD") > 0
- && sbjct.find("SRVLQFASFSFD") < maxChars )
- {
- pos235 = query[sbjct.find("SRVLQFASFSFD")+p235];
- pos236 = query[sbjct.find("SRVLQFASFSFD")+p236];
- pos239 = query[sbjct.find("SRVLQFASFSFD")+p239];
- sigSeq.append(pos235);
- sigSeq.append(pos236);
- sigSeq.append(pos239);
- }
- // search for IT motif containing the residue 278
- if(sbjct.find("RLKNDGITHVTLPP") > 0
- && sbjct.find("RLKNDGITHVTLPP") < maxChars )
- {
- pos278 = query[sbjct.find("RLKNDGITHVTLPP")+p278];
- sigSeq.append(pos278);
- }
- // search for GE motif containing the residues 299,301
- if(sbjct.find("IVVQGEACP") > 0
- && sbjct.find("IVVAGEACP") < maxChars )
- {
- pos299 = query[sbjct.find("IVVAGEACP")+p299];
- pos301 = query[sbjct.find("IVVAGEACP")+p301];
- sigSeq.append(pos299);
- sigSeq.append(pos301);
- }
- // search for YGPTE motif containing the residues 322,330,331
- if(sbjct.find("NAYGPTE") > 0
- && sbjct.find("NAYGPTE") < maxChars )
- {
- pos322 = query[sbjct.find("NAYGPTE")+p322];
- pos330 = query[sbjct.find("NAYGPTE")+p330];
- pos331 = query[sbjct.find("NAYGPTE")+p331];
- sigSeq.append(pos322);
- sigSeq.append(pos330);
- sigSeq.append(pos331);
- }
- FDxAA = pos236[0];
- FD4xAA = pos239[0];
- IT3xAA = pos278[0];
- x1xG299 = pos299[0];
- x1xG301 = pos301[0];
- x7xxY322 = pos322[0];
- x7xxY330 = pos330[0];
- x7xxY331 = pos331[0];
- }
- if(PKS)
- {
- if(sbjct.find("DVPLV") > 0
- && sbjct.find("DVPLV") < maxChars )
- {
- pos58 = query[sbjct.find("DVPLV")+p58];
- pos59 = query[sbjct.find("DVPLV")+p59];
- pos60 = query[sbjct.find("DVPLV")+p60];
- pos61 = query[sbjct.find("DVPLV")+p61];
- pos62 = query[sbjct.find("DVPLV")+p62];
- sigSeq.append(pos58);
- sigSeq.append(pos59);
- sigSeq.append(pos60);
- sigSeq.append(pos61);
- sigSeq.append(pos62);
- }
- if(sbjct.find("QVAL") > 0
- && sbjct.find("QVAL") < maxChars )
- {
- pos70 = query[sbjct.find("QVAL")+p70];
- pos72 = query[sbjct.find("QVAL")+p72];
- pos74 = query[sbjct.find("QVAL")+p74];
- pos75 = query[sbjct.find("QVAL")+p75];
- pos77 = query[sbjct.find("QVAL")+p77];
- sigSeq.append(pos70);
- sigSeq.append(pos72);
- sigSeq.append(pos74);
- sigSeq.append(pos75);
- sigSeq.append(pos77);
- }
- if(sbjct.find("GQSMGE") > 0
- && sbjct.find("GQSMGE") < maxChars )
- {
- pos93 = query[sbjct.find("GQSMGE")+p93];
- sigSeq.append(pos93);
- }
- if(sbjct.find("ITVRRI") > 0
- && sbjct.find("ITVRRI") < maxChars )
- {
- pos189 = query[sbjct.find("ITVRRI")+p189];
- sigSeq.append(pos189);
- pos191 = query[sbjct.find("ITVRRI")+p191];
- sigSeq.append(pos191);
- }
- if(sbjct.find("YASH") > 0
- && sbjct.find("YASH") < maxChars )
- {
- pos198 = query[sbjct.find("YASH")+p198];
- pos199 = query[sbjct.find("YASH")+p199];
- pos200 = query[sbjct.find("YASH")+p200];
- sigSeq.append(pos198);
- sigSeq.append(pos199);
- sigSeq.append(pos200);
- }
-
- if(pos58[0] == 'E' || pos58[0] == 'D' || pos58[0] == 'Q'
- || pos58[0] == 'S' || pos58[0] == 'K')
- {
- mal++;
- }
- if(pos58[0] == 'R' || pos58[0] == 'G')
- {
- mmal++;
- emal++;
- omal++;
- }
- if(pos59[0] == 'T' || pos59[0] == 'S')
- {
- mal++;
- }
- if(pos59[0] == 'V' || pos59[0] == 'A')
- {
- mmal++;
- emal++;
- omal++;
- }
- if(pos60[0] == 'G' || pos60[0] == 'P' || pos60[0] == 'A'
- || pos60[0] == 'L' || pos60[0] == 'R')
- {
- mal++;
- }
- if(pos60[0] == 'D' || pos60[0] == 'E')
- {
- mmal++;
- emal++;
- omal++;
- }
- if(pos61[0] == 'Y' || pos61[0] == 'F')
- {
- mal++;
- }
- if(pos61[0] == 'V')
- {
- mmal++;
- emal++;
- omal++;
- }
- if(pos62[0] == 'A' || pos62[0] == 'T')
- {
- mal++;
- }
- if(pos62[0] == 'V')
- {
- mmal++;
- emal++;
- omal++;
- }
- if(pos70[0] == 'Q')
- {
- mal++;
- }
- if(pos70[0] == 'M')
- {
- mmal++;
- emal++;
- omal++;
- }
- if(pos72[0] == 'A')
- {
- mal++;
- }
- if(pos72[0] == 'S')
- {
- mmal++;
- }
- if(pos74[0] == 'F')
- {
- mal++;
- }
- if(pos74[0] == 'A')
- {
- mmal++;
- emal++;
- omal++;
- }
- if(pos75[0] == 'G')
- {
- mal++;
- }
- if(pos75[0] == 'A')
- {
- mmal++;
- emal++;
- omal++;
- }
- if(pos77[0] == 'L')
- {
- mal++;
- }
- if(pos77[0] == 'W')
- {
- mmal++;
- emal++;
- omal++;
- }
- if(pos93[0] == 'L' || pos93[0] == 'V' || pos93[0] == 'I')
- {
- mal++;
- }
- if(pos93[0] == 'Q')
- {
- mmal++;
- emal++;
- omal++;
- }
- if(pos189[0] == 'M' || pos189[0] == 'L' || pos189[0] == 'Y')
- {
- omal++;
- }
- else
- {
- mal++;
- mmal++;
- emal++;
- }
- if(pos191[0] == 'W')
- {
- omal++;
- }
- else
- {
- mal++;
- mmal++;
- emal++;
- }
- if(pos198[0] == 'H')
- {
- mal = mal + lastPKSkey;
- }
- if(pos198[0] == 'Y')
- {
- mmal = mmal + lastPKSkey;
- }
- if(pos198[0] == 'T' || pos198[0] == 'C')
- {
- emal = emal + lastPKSkey;
- }
- if(pos199[0] == 'P')
- {
- emal++;
- }
- if(pos200[0] == 'F')
- {
- mal = mal + lastPKSkey;
- }
- if(pos200[0] == 'S')
- {
- mmal = mmal + lastPKSkey;
- }
- if(pos200[0] == 'G')
- {
- emal = emal + lastPKSkey;
- }
- if(pos200[0] == 'T')
- {
- emal++;
- }
-
- //outputSpecificitiesPKS( mal, mmal, emal, omal);
- scoredSubstrate = maxSpecificityPKS(mal, mmal, emal, omal);
- //cout << "scoredSubstrate: " << scoredSubstrate << endl;
- mal = 0;
- mmal = 0;
- emal = 0;
- omal = 0;
- clearPositions( pos235, pos236, pos239,
- pos278, pos299, pos301, pos322, pos330,
- pos331, pos58, pos59, pos60, pos61, pos62,
- pos70, pos72, pos74, pos75, pos77, pos93,
- pos189, pos191, pos198, pos199, pos200 );
- }
- //cout << "signature sequence: " << sigSeq << " ";
- cout << sigSeq << " ";
- Out.open("sigSeq.txt");
- Out.clear();
- Out << "> " << "SigSeq" << '\n'
- << sigSeq;
- Out.close();
- // align with NRPS/PKS signatures
- if(PKS)
- {
- spawnl(1, "./blastall", "blastall", "-p", "blastp", "-d",
- "PKSsignatures.fasta", "-i", "sigSeq.txt",
- "-e", arg[4], "-F", "F", "-C", "0", "-e", "10.0",
- "-o", "blast3.out", NULL);
- }
- if(NRPS)
- {
- spawnl(1, "./blastall", "blastall", "-p", "blastp", "-d",
- "NRPSsignatures.fasta", "-i", "sigSeq.txt",
- "-e", arg[4], "-F", "F", "-C", "0", "-e", "10.0",
- "-o", "blast3.out", NULL);
- if(FDxAA == 'P' || FDxAA == 'p')
- {
- Aad++;
- }
- if(FDxAA == 'L' || FDxAA == 'l')
- {
- Ala++;
- Asn++;
- Asp++;
- Cys++;
- Dab++;
- Dhb++;
- Gly3++;
- }
- if(FDxAA == 'H' || FDxAA == 'h')
- {
- Cys++;
- }
- if(FDxAA == 'A' || FDxAA == 'a')
- {
- Gln++;
- Glu++;
- Ile++;
- Leu1++;
- Leu2++;
- Leu4++;
- Phe++;
- Tyr2++;
- Tyr3++;
- Val1++;
- Val3++;
- Lys++;
- }
- if(FDxAA == 'G' || FDxAA == 'g')
- {
- Ile++;
- Leu3++;
- Tyr1++;
- }
- if(FDxAA == 'M' || FDxAA == 'm')
- {
- Orn++;
- }
- if(FDxAA == 'V' || FDxAA == 'v')
- {
- Orn++;
- Pro++;
- Ser++;
- }
- if(FDxAA == 'I' || FDxAA == 'i')
- {
- Phg++;
- Gly1++;
- Gly2++;
- }
- if(FDxAA == 'F' || FDxAA == 'f')
- {
- Phe++;
- Thr++;
- Val2++;
- }
- if(FDxAA == 'S' || FDxAA == 's')
- {
- His++;
- }
-
- if(FD4xAA == 'R' || FD4xAA == 'r')
- {
- Aad++;
- }
- if(FD4xAA == 'L' || FD4xAA == 'l')
- {
- Ala++;
- Tyr2++;
- Gly1++;
- }
- if(FD4xAA == 'T' || FD4xAA == 't')
- {
- Asn++;
- Asp++;
- Tyr1++;
- }
- if(FD4xAA == 'E' || FD4xAA == 'e')
- {
- Cys++;
- Dab++;
- Orn++;
- Val2++;
- His++;
- Lys++;
- }
- if(FD4xAA == 'Y' || FD4xAA == 'y')
- {
- Cys++;
- }
- if(FD4xAA == 'P' || FD4xAA == 'p')
- {
- Dhb++;
- }
- if(FD4xAA == 'Q' || FD4xAA == 'q')
- {
- Gln++;
- Pip++;
- Pro++;
- Gly2++;
- }
- if(FD4xAA == 'W' || FD4xAA == 'w')
- {
- Glu++;
- Leu1++;
- Leu2++;
- Phe++;
- Ser++;
- Thr++;
- Val3++;
- Gln++;
- }
- if(FD4xAA == 'K' || FD4xAA == 'k')
- {
- Glu++;
- }
- if(FD4xAA == 'F' || FD4xAA == 'f')
- {
- Ile++;
- Leu4++;
- Gly3++;
- Val1++;
- }
- if(FD4xAA == 'A' || FD4xAA == 'a')
- {
- Leu3++;
- }
- if(FD4xAA == 'G' || FD4xAA == 'g')
- {
- Orn++;
- }
- if(FD4xAA == 'S' || FD4xAA == 's')
- {
- Tyr3++;
- }
- // score IT3xAA
- if(IT3xAA == 'N' || IT3xAA == 'n')
- {
- Aad++;
- Cys++;
- Orn++;
- Thr++;
- Gly3++;
- }
- if(IT3xAA == 'F' || IT3xAA == 'f')
- {
- Ala++;
- Ile++;
- Leu++;
- }
- if(IT3xAA == 'K' || IT3xAA == 'k')
- {
- Asn++;
- Asp++;
- }
- if(IT3xAA == 'S' || IT3xAA == 's')
- {
- Cys++;
- Val++;
- Lys++;
- }
- if(IT3xAA == 'H' || IT3xAA == 'h')
- {
- Dab++;
- Glu++;
- Ser++;
- }
- if(IT3xAA == 'A' || IT3xAA == 'a')
- {
- Dhb++;
- His++;
- }
- if(IT3xAA == 'D' || IT3xAA == 'd')
- {
- Gln++;
- Glu++;
- }
- if(IT3xAA == 'L' || IT3xAA == 'l')
- {
- Leu++;
- Phg++;
- Pip++;
- Pro++;
- }
- if(IT3xAA == 'Y' || IT3xAA == 'y')
- {
- Leu++;
- }
- if(IT3xAA == 'M' || IT3xAA == 'm')
- {
- Leu++;
- Val++;
- Gly2++;
- }
- if(IT3xAA == 'E' || IT3xAA == 'e')
- {
- Orn++;
- }
- if(IT3xAA == 'T' || IT3xAA == 't')
- {
- Phe++;
- Tyr++;
- }
- if(IT3xAA == 'I' || IT3xAA == 'i')
- {
- Tyr++;
- }
- if(IT3xAA == 'V' || IT3xAA == 'v')
- {
- Tyr++;
- }
- if(IT3xAA == 'W' || IT3xAA == 'w')
- {
- Val++;
- }
- if(IT3xAA == 'Q' || IT3xAA == 'q')
- {
- Gly1++;
- Gln++;
- }
- if(x1xG299 == 'I' || x1xG299 == 'i')
- {
- Aad++;
- Orn++;
- Phe++;
- Pro++;
- Thr++;
- Val1++;
- }
- if(x1xG299 == 'G' || x1xG299 == 'g')
- {
- Ala++;
- }
- if(x1xG299 == 'L' || x1xG299 == 'l')
- {
- Asn++;
- Cys++;
- Glu++;
- Ile++;
- Leu1++;
- Leu4++;
- Orn++;
- Phg++;
- Pip++;
- Ser++;
- Gly1++;
- }
- if(x1xG299 == 'V' || x1xG299 == 'v')
- {
- Asp++;
- Phe++; // added from Challis
- Tyr3++;
- }
- if(x1xG299 == 'D' || x1xG299 == 'd')
- {
- Cys++;
- }
- if(x1xG299 == 'N' || x1xG299 == 'n')
- {
- Dab++;
- Gly3++;
- }
- if(x1xG299 == 'Q' || x1xG299 == 'q')
- {
- Dhb++;
- }
- if(x1xG299 == 'F' || x1xG299 == 'f')
- {
- Glu++;
- Val3++;
- Gly2++;
- Gln++;
- }
- if(x1xG299 == 'Y' || x1xG299 == 'y')
- {
- Ile++;
- Leu2++;
- }
- if(x1xG299 == 'T' || x1xG299 == 't')
- {
- Leu3++;
- Tyr1++;
- Tyr2++;
- Val2++;
- His++;
- }
- // detect 301 signature
- if(x1xG301 == 'V' || x1xG301 == 'v')
- {
- Aad++;
- Cys++;
- Gly2++;
- }
- if(x1xG301 == 'I' || x1xG301 == 'i')
- {
- Ala++;
- Cys++;
- Lys++;
- }
- if(x1xG301 == 'G' || x1xG301 == 'g')
- {
- Asn++;
- Asp++;
- Dhb++;
- Gln++;
- Glu++;
- Ile++;
- Leu1++;
- Leu2++;
- Leu3++;
- Leu4++;
- Orn++;
- Phg++;
- Phe++; // extra
- Pip++;
- Thr++;
- Tyr2++;
- Val1++;
- Lys++;
- Gly1++;
- }
- if(x1xG301 == 'S' || x1xG301 == 's')
- {
- Cys++;
- Ser++;
- }
- if(x1xG301 == 'T' || x1xG301 == 't')
- {
- Dab++;
- }
- if(x1xG301 == 'A' || x1xG301 == 'a')
- {
- Phe++;
- Pro++;
- Tyr1++;
- Tyr3++;
- Val2++;
- Val3++;
- His++;
- Gly3++;
- }
-
- // analyze 322 residue
- if(x7xxY322 == 'E' || x7xxY322 == 'e')
- {
- Aad++;
- Asn++;
- Leu3++;
- Tyr1++;
- His++;
- }
- if(x7xxY322 == 'A' || x7xxY322 == 'a')
- {
- Ala++;
- Leu2++;
- Phe++;
- Tyr2++;
- Tyr3++;
- Val2++;
- Val3++;
- Gly2++;
- }
- if(x7xxY322 == 'H' || x7xxY322 == 'h')
- {
- Asp++;
- Pro++;
- }
- if(x7xxY322 == 'G' || x7xxY322 == 'g')
- {
- Cys++;
- Glu++;
- Val1++;
- Phe++; // extra
- }
- if(x7xxY322 == 'L' || x7xxY322 == 'l')
- {
- Cys++;
- Orn++;
- Phg++;
- Ser++;
- Gly1++;
- Gly3++;
- Gln++;
- }
- if(x7xxY322 == 'T' || x7xxY322 == 't')
- {
- Dab++;
- }
- if(x7xxY322 == '…
Large files files are truncated, but you can click here to view the full file