PageRenderTime 100ms CodeModel.GetById 21ms app.highlight 66ms RepoModel.GetById 1ms app.codeStats 1ms

/lib/galaxy/tools/util/hyphy_util.py

https://bitbucket.org/ialbert/galaxy-genetrack
Python | 1163 lines | 1159 code | 2 blank | 2 comment | 0 complexity | 34f2c6b348f53d5e0c7314ec2fa2d535 MD5 | raw file
   1#Dan Blankenberg
   2#Contains file contents and helper methods for HYPHY configurations
   3import tempfile, os
   4
   5def get_filled_temp_filename(contents):
   6    fh = tempfile.NamedTemporaryFile('w')
   7    filename = fh.name
   8    fh.close()
   9    fh = open(filename, 'w')
  10    fh.write(contents)
  11    fh.close()
  12    return filename
  13
  14NJ_tree_shared_ibf = """
  15COUNT_GAPS_IN_FREQUENCIES = 0;
  16methodIndex                  = 1;
  17
  18/*-----------------------------------------------------------------------------------------------------------------------------------------*/
  19
  20function InferTreeTopology(verbFlag)
  21{
  22    distanceMatrix = {ds.species,ds.species};
  23
  24    MESSAGE_LOGGING              = 0;
  25    ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"chooseDistanceFormula.def");
  26    InitializeDistances (0);
  27        
  28    for (i = 0; i<ds.species; i=i+1)
  29    {
  30        for (j = i+1; j<ds.species; j = j+1)
  31        {
  32            distanceMatrix[i][j] = ComputeDistanceFormula (i,j);
  33        }
  34    }
  35
  36    MESSAGE_LOGGING              = 1;
  37    cladesMade                     = 1;
  38    
  39
  40    if (ds.species == 2)
  41    {
  42        d1 = distanceMatrix[0][1]/2;
  43        treeNodes = {{0,1,d1__},
  44                     {1,1,d1__},
  45                     {2,0,0}};
  46                     
  47        cladesInfo = {{2,0}};
  48    }
  49    else
  50    {
  51        if (ds.species == 3)
  52        {
  53            /* generate least squares estimates here */
  54            
  55            d1 = (distanceMatrix[0][1]+distanceMatrix[0][2]-distanceMatrix[1][2])/2;
  56            d2 = (distanceMatrix[0][1]-distanceMatrix[0][2]+distanceMatrix[1][2])/2;
  57            d3 = (distanceMatrix[1][2]+distanceMatrix[0][2]-distanceMatrix[0][1])/2;
  58            
  59            treeNodes = {{0,1,d1__},
  60                         {1,1,d2__},
  61                         {2,1,d3__}
  62                         {3,0,0}};
  63                         
  64            cladesInfo = {{3,0}};        
  65        }
  66        else
  67        {    
  68            njm = (distanceMatrix > methodIndex)>=ds.species;
  69                
  70            treeNodes         = {2*(ds.species+1),3};
  71            cladesInfo        = {ds.species-1,2};
  72            
  73            for (i=Rows(treeNodes)-1; i>=0; i=i-1)
  74            {
  75                treeNodes[i][0] = njm[i][0];
  76                treeNodes[i][1] = njm[i][1];
  77                treeNodes[i][2] = njm[i][2];
  78            }
  79
  80            for (i=Rows(cladesInfo)-1; i>=0; i=i-1)
  81            {
  82                cladesInfo[i][0] = njm[i][3];
  83                cladesInfo[i][1] = njm[i][4];
  84            }
  85            
  86            njm = 0;
  87        }
  88    }
  89    return 1.0;
  90}
  91
  92/*-----------------------------------------------------------------------------------------------------------------------------------------*/
  93
  94function TreeMatrix2TreeString (doLengths)
  95{
  96    treeString = "";
  97    p = 0;
  98    k = 0;
  99    m = treeNodes[0][1];
 100    n = treeNodes[0][0];
 101    treeString*(Rows(treeNodes)*25);
 102
 103    while (m)
 104    {    
 105        if (m>p)
 106        {
 107            if (p)
 108            {
 109                treeString*",";
 110            }
 111            for (j=p;j<m;j=j+1)
 112            {
 113                treeString*"(";
 114            }
 115        }
 116        else
 117        {
 118            if (m<p)
 119            {
 120                for (j=m;j<p;j=j+1)
 121                {
 122                    treeString*")";
 123                }
 124            }    
 125            else
 126            {
 127                treeString*",";
 128            }    
 129        }
 130        if (n<ds.species)
 131        {
 132            GetString (nodeName, ds, n);
 133            if (doLengths != 1)
 134            {
 135                treeString*nodeName;            
 136            }
 137            else
 138            {    
 139                treeString*taxonNameMap[nodeName];
 140            }
 141        }
 142        if (doLengths>.5)
 143        {
 144            nodeName = ":"+treeNodes[k][2];
 145            treeString*nodeName;
 146        }
 147        k=k+1;
 148        p=m;
 149        n=treeNodes[k][0];
 150        m=treeNodes[k][1];
 151    }
 152
 153    for (j=m;j<p;j=j+1)
 154    {
 155        treeString*")";
 156    }
 157    
 158    treeString*0;
 159    return treeString;
 160}
 161"""
 162
 163def get_NJ_tree (filename):
 164    return """
 165DISTANCE_PROMPTS            = 1;
 166ExecuteAFile ("%s");
 167
 168DataSet                 ds              = ReadDataFile (PROMPT_FOR_FILE);
 169DataSetFilter             filteredData = CreateFilter (ds,1);
 170
 171/* do sequence to branch map */
 172
 173taxonNameMap = {};
 174
 175for (k=0; k<ds.species; k=k+1)
 176{
 177    GetString         (thisName, ds,k);
 178    shortName         = (thisName^{{"\\\\..+",""}})&&1;
 179    taxonNameMap[shortName] = thisName;
 180    SetParameter (ds,k,shortName);
 181}
 182
 183DataSetFilter             filteredData = CreateFilter (ds,1);
 184InferTreeTopology         (0);
 185treeString                 = TreeMatrix2TreeString (1);
 186
 187fprintf                    (PROMPT_FOR_FILE, CLEAR_FILE, treeString);
 188fscanf                    (stdin, "String", ps_file);
 189
 190if (Abs(ps_file))
 191{
 192    treeString = TreeMatrix2TreeString (2);
 193    UseModel (USE_NO_MODEL);
 194    Tree givenTree = treeString;
 195    baseHeight         = TipCount (givenTree)*28;
 196    TREE_OUTPUT_OPTIONS = {};
 197    TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14;
 198    baseWidth = 0;
 199    treeAVL                = givenTree^0;
 200    drawLetter            = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n";
 201    for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1)
 202    {
 203        nodeName = (treeAVL[k3])["Name"];
 204        if(Abs((treeAVL[k3])["Children"]) == 0)
 205        {
 206            mySpecs = {};
 207            mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter";
 208            baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]);
 209        }
 210    }
 211    baseWidth = 40*baseWidth;
 212    
 213    fprintf (ps_file,  CLEAR_FILE, drawLetter, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}}));
 214}
 215""" % (filename)
 216
 217def get_NJ_treeMF (filename):
 218    return  """
 219ExecuteAFile ("%s");
 220
 221VERBOSITY_LEVEL            = -1;
 222fscanf                  (PROMPT_FOR_FILE, "Lines", inLines);
 223
 224_linesIn                        = Columns (inLines);
 225isomorphicTreesBySequenceCount  = {};
 226
 227/*---------------------------------------------------------*/
 228
 229_currentGene   = 1; 
 230_currentState = 0;
 231geneSeqs      = "";
 232geneSeqs      * 128;
 233
 234fprintf (PROMPT_FOR_FILE, CLEAR_FILE, KEEP_OPEN);
 235treeOutFile = LAST_FILE_PATH;
 236
 237fscanf  (stdin,"String", ps_file);
 238if (Abs(ps_file))
 239{
 240    fprintf (ps_file, CLEAR_FILE, KEEP_OPEN);
 241}
 242
 243for (l=0; l<_linesIn; l=l+1)
 244{
 245    if (Abs(inLines[l]) == 0)
 246    {
 247        if (_currentState == 1)
 248        {
 249            geneSeqs      * 0;
 250            DataSet       ds            = ReadFromString (geneSeqs);
 251            _processAGene (_currentGene,treeOutFile,ps_file);
 252            geneSeqs * 128;
 253            _currentGene = _currentGene + 1;
 254        }
 255    }
 256    else
 257    {
 258        if (_currentState == 0)
 259        {
 260            _currentState = 1;
 261        }
 262        geneSeqs * inLines[l];
 263        geneSeqs * "\\n";
 264    }
 265}
 266
 267
 268if (_currentState == 1)
 269{
 270    geneSeqs      * 0;
 271    if (Abs(geneSeqs))
 272    {
 273        DataSet       ds            = ReadFromString (geneSeqs);
 274        _processAGene (_currentGene,treeOutFile,ps_file);
 275    }
 276}
 277
 278fprintf (treeOutFile,CLOSE_FILE);
 279if (Abs(ps_file))
 280{
 281    fprintf (ps_file,CLOSE_FILE);
 282}
 283/*---------------------------------------------------------*/
 284
 285function _processAGene (_geneID, nwk_file, ps_file)
 286{
 287    if (ds.species == 1)
 288    {
 289        fprintf                    (nwk_file, _geneID-1, "\\tNone \\tNone\\n");
 290        return 0;
 291        
 292    }
 293    
 294    DataSetFilter             filteredData = CreateFilter (ds,1);
 295
 296    /* do sequence to branch map */
 297
 298    taxonNameMap = {};
 299
 300    for (k=0; k<ds.species; k=k+1)
 301    {
 302        GetString         (thisName, ds,k);
 303        shortName         = (thisName^{{"\\\\..+",""}});
 304        taxonNameMap[shortName] = thisName;
 305        SetParameter (ds,k,shortName);
 306    }
 307
 308    DataSetFilter             filteredData = CreateFilter (ds,1);
 309    DISTANCE_PROMPTS        = (_geneID==1);
 310
 311    InferTreeTopology         (0);
 312    baseTree                = TreeMatrix2TreeString (0);
 313    UseModel                 (USE_NO_MODEL);
 314    
 315    Tree             baseTop    = baseTree;
 316    
 317    /* standardize this top */
 318    
 319    for (k=0; k<Abs(isomorphicTreesBySequenceCount[filteredData.species]); k=k+1)
 320    {
 321        testString      = (isomorphicTreesBySequenceCount[filteredData.species])[k];
 322        Tree testTree = testString;
 323        if (testTree == baseTop)
 324        {
 325            baseTree = testString;
 326            break;
 327        }
 328    }
 329    if (k==Abs(isomorphicTreesBySequenceCount[filteredData.species]))
 330    {
 331        if (k==0)
 332        {
 333            isomorphicTreesBySequenceCount[filteredData.species] = {};
 334        }
 335        (isomorphicTreesBySequenceCount[filteredData.species])[k] = baseTree;
 336    }
 337    
 338    fprintf                    (nwk_file, _geneID-1, "\\t", baseTree, "\\t", TreeMatrix2TreeString (1), "\\n");
 339    if (Abs(ps_file))
 340    {
 341        treeString = TreeMatrix2TreeString (2);
 342        UseModel (USE_NO_MODEL);
 343        Tree givenTree = treeString;
 344        baseHeight         = TipCount (givenTree)*28;
 345        TREE_OUTPUT_OPTIONS = {};
 346        TREE_OUTPUT_OPTIONS["__FONT_SIZE__"] = 14;
 347        baseWidth = 0;
 348        treeAVL                = givenTree^0;
 349        drawLetter            = "/drawletter {"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$4+" -"+TREE_OUTPUT_OPTIONS["__FONT_SIZE__"]$2+ " show} def\\n";
 350        for (k3 = 1; k3 < Abs(treeAVL); k3=k3+1)
 351        {
 352            nodeName = (treeAVL[k3])["Name"];
 353            if(Abs((treeAVL[k3])["Children"]) == 0)
 354            {
 355                mySpecs = {};
 356                mySpecs ["TREE_OUTPUT_BRANCH_LABEL"] = "(" + taxonNameMap[nodeName] + ") drawLetter";
 357                baseWidth = Max (baseWidth, (treeAVL[k3])["Depth"]);
 358            }
 359        }
 360        baseWidth = 40*baseWidth;
 361        
 362        fprintf (stdout, _geneID, ":", givenTree,"\\n");
 363        fprintf (ps_file, PSTreeString (givenTree, "STRING_SUPPLIED_LENGTHS",{{baseWidth,baseHeight}}));
 364    }
 365    return 0;
 366}
 367""" % (filename)
 368
 369BranchLengthsMF = """
 370VERBOSITY_LEVEL            = -1;
 371
 372fscanf                  (PROMPT_FOR_FILE, "Lines", inLines);
 373
 374
 375
 376_linesIn = Columns (inLines);
 377
 378
 379
 380/*---------------------------------------------------------*/
 381
 382
 383
 384_currentGene   = 1; 
 385
 386_currentState = 0;
 387
 388geneSeqs      = "";
 389
 390geneSeqs      * 128;
 391
 392
 393
 394for (l=0; l<_linesIn; l=l+1)
 395
 396{
 397
 398    if (Abs(inLines[l]) == 0)
 399
 400    {
 401
 402        if (_currentState == 1)
 403
 404        {
 405
 406            geneSeqs      * 0;
 407
 408            DataSet       ds            = ReadFromString (geneSeqs);
 409
 410            _processAGene (_currentGene);
 411
 412             geneSeqs * 128;
 413
 414            _currentGene = _currentGene + 1;
 415
 416        }
 417
 418    }
 419
 420    else
 421
 422    {
 423
 424        if (_currentState == 0)
 425
 426        {
 427
 428            _currentState = 1;
 429
 430        }
 431
 432        geneSeqs * inLines[l];
 433
 434        geneSeqs * "\\n";
 435
 436    }
 437
 438}
 439
 440
 441
 442if (_currentState == 1)
 443
 444{
 445
 446    geneSeqs      * 0;
 447
 448    if (Abs(geneSeqs))
 449
 450    {
 451
 452        DataSet       ds            = ReadFromString (geneSeqs);
 453
 454        _processAGene (_currentGene);
 455
 456    }
 457
 458}
 459
 460
 461
 462fprintf (resultFile,CLOSE_FILE);
 463
 464
 465
 466/*---------------------------------------------------------*/
 467
 468
 469
 470function _processAGene (_geneID)
 471
 472{
 473
 474    DataSetFilter             filteredData = CreateFilter (ds,1);
 475
 476    if (_currentGene == 1)
 477
 478    {
 479
 480        SelectTemplateModel        (filteredData);
 481
 482        
 483
 484        SetDialogPrompt           ("Tree file");
 485
 486        fscanf                    (PROMPT_FOR_FILE, "Tree",  givenTree);
 487
 488        fscanf                    (stdin, "String", resultFile);
 489
 490        
 491
 492        /* do sequence to branch map */
 493
 494        
 495
 496        validNames = {};
 497
 498        taxonNameMap = {};
 499
 500        
 501
 502        for (k=0; k<TipCount(givenTree); k=k+1)
 503
 504        {
 505
 506            validNames[TipName(givenTree,k)&&1] = 1;
 507
 508        }
 509
 510        
 511
 512        for (k=0; k<BranchCount(givenTree); k=k+1)
 513
 514        {
 515
 516            thisName = BranchName(givenTree,k);
 517
 518            taxonNameMap[thisName&&1] = thisName;
 519
 520        }
 521
 522        
 523
 524        storeValidNames = validNames;
 525
 526        fprintf         (resultFile,CLEAR_FILE,KEEP_OPEN,"Block\\tBranch\\tLength\\tLowerBound\\tUpperBound\\n"); 
 527
 528    }
 529
 530    else
 531
 532    {
 533
 534        HarvestFrequencies (vectorOfFrequencies, filteredData, 1,1,1);
 535
 536        validNames = storeValidNames;
 537
 538    }
 539
 540    
 541
 542    for (k=0; k<ds.species; k=k+1)
 543
 544    {
 545
 546        GetString (thisName, ds,k);
 547
 548        shortName = (thisName^{{"\\\\..+",""}})&&1;
 549
 550        if (validNames[shortName])
 551
 552        {
 553
 554            taxonNameMap[shortName] = thisName;
 555
 556            validNames - (shortName);
 557
 558            SetParameter (ds,k,shortName);
 559
 560        }
 561
 562        else
 563
 564        {
 565
 566            fprintf         (resultFile,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree,"\\n"); 
 567
 568            return 0;
 569
 570        }
 571
 572    }
 573
 574    
 575
 576    /* */
 577
 578    
 579
 580    LikelihoodFunction lf = (filteredData,givenTree);
 581
 582    Optimize                 (res,lf);
 583
 584    
 585
 586    timer = Time(0)-timer;
 587
 588    
 589
 590    branchNames   = BranchName   (givenTree,-1);
 591
 592    branchLengths = BranchLength (givenTree,-1);
 593
 594    
 595
 596    
 597
 598    for (k=0; k<Columns(branchNames)-1; k=k+1)
 599
 600    {
 601
 602        COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t";
 603
 604        COVARIANCE_PRECISION = 0.95;
 605
 606        CovarianceMatrix (cmx,lf);
 607
 608        if (k==0)
 609
 610        {
 611
 612            /* compute a scaling factor */
 613
 614            ExecuteCommands ("givenTree."+branchNames[0]+".t=1");
 615
 616            scaleFactor = BranchLength (givenTree,0);
 617
 618            ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]);
 619
 620        }
 621
 622        fprintf (resultFile,_geneID,"\\t",taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n");
 623
 624    }
 625
 626    
 627
 628    ttl = (branchLengths*(Transpose(branchLengths["1"])))[0];
 629
 630    global treeScaler = 1;
 631
 632    ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree);
 633
 634    COVARIANCE_PARAMETER = "treeScaler";
 635
 636    COVARIANCE_PRECISION = 0.95;
 637
 638    CovarianceMatrix (cmx,lf);
 639
 640    fprintf (resultFile,_geneID,"\\tTotal Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n");
 641
 642    ClearConstraints (givenTree);
 643
 644    return 0;
 645
 646}
 647"""
 648
 649BranchLengths = """
 650DataSet                 ds              = ReadDataFile (PROMPT_FOR_FILE);
 651DataSetFilter             filteredData = CreateFilter (ds,1);
 652
 653SelectTemplateModel        (filteredData);
 654
 655SetDialogPrompt         ("Tree file");
 656fscanf                    (PROMPT_FOR_FILE, "Tree",  givenTree);
 657fscanf                    (stdin, "String", resultFile);
 658
 659/* do sequence to branch map */
 660
 661validNames = {};
 662taxonNameMap = {};
 663
 664for (k=0; k<TipCount(givenTree); k=k+1)
 665{
 666    validNames[TipName(givenTree,k)&&1] = 1;
 667}
 668
 669for (k=0; k<BranchCount(givenTree); k=k+1)
 670{
 671    thisName = BranchName(givenTree,k);
 672    taxonNameMap[thisName&&1] = thisName;
 673}
 674
 675for (k=0; k<ds.species; k=k+1)
 676{
 677    GetString (thisName, ds,k);
 678    shortName = (thisName^{{"\\\\..+",""}})&&1;
 679    if (validNames[shortName])
 680    {
 681        taxonNameMap[shortName] = thisName;
 682        validNames - (shortName);
 683        SetParameter (ds,k,shortName);
 684    }
 685    else
 686    {
 687        fprintf         (resultFile,CLEAR_FILE,"ERROR:", thisName, " could not be matched to any of the leaves in tree ", givenTree); 
 688        return 0;
 689    }
 690}
 691
 692/* */
 693
 694LikelihoodFunction lf = (filteredData,givenTree);
 695
 696Optimize                 (res,lf);
 697
 698timer = Time(0)-timer;
 699
 700branchNames   = BranchName   (givenTree,-1);
 701branchLengths = BranchLength (givenTree,-1);
 702
 703fprintf         (resultFile,CLEAR_FILE,KEEP_OPEN,"Branch\\tLength\\tLowerBound\\tUpperBound\\n"); 
 704
 705for (k=0; k<Columns(branchNames)-1; k=k+1)
 706{
 707    COVARIANCE_PARAMETER = "givenTree."+branchNames[k]+".t";
 708    COVARIANCE_PRECISION = 0.95;
 709    CovarianceMatrix (cmx,lf);
 710    if (k==0)
 711    {
 712        /* compute a scaling factor */
 713        ExecuteCommands ("givenTree."+branchNames[0]+".t=1");
 714        scaleFactor = BranchLength (givenTree,0);
 715        ExecuteCommands ("givenTree."+branchNames[0]+".t="+cmx[0][1]);
 716    }
 717    fprintf (resultFile,taxonNameMap[branchNames[k]&&1],"\\t",branchLengths[k],"\\t",scaleFactor*cmx[0][0],"\\t",scaleFactor*cmx[0][2],"\\n");
 718}
 719
 720ttl = (branchLengths*(Transpose(branchLengths["1"])))[0];
 721global treeScaler = 1;
 722ReplicateConstraint ("this1.?.t:=treeScaler*this2.?.t__",givenTree,givenTree);
 723COVARIANCE_PARAMETER = "treeScaler";
 724COVARIANCE_PRECISION = 0.95;
 725CovarianceMatrix (cmx,lf);
 726ClearConstraints (givenTree);
 727fprintf (resultFile,"Total Tree\\t",ttl,"\\t",ttl*cmx[0][0],"\\t",ttl*cmx[0][2],"\\n");
 728fprintf (resultFile,CLOSE_FILE);
 729"""
 730
 731SimpleLocalFitter = """
 732VERBOSITY_LEVEL               = -1;
 733COUNT_GAPS_IN_FREQUENCIES  = 0;
 734
 735/*---------------------------------------------------------*/
 736
 737function returnResultHeaders (dummy)
 738{
 739    _analysisHeaders = {};
 740    _analysisHeaders[0] = "BLOCK";
 741    _analysisHeaders[1] = "BP";
 742    _analysisHeaders[2] = "S_sites";
 743    _analysisHeaders[3] = "NS_sites";
 744    _analysisHeaders[4] = "Stop_codons";
 745    _analysisHeaders[5] = "LogL";
 746    _analysisHeaders[6] = "AC";
 747    _analysisHeaders[7] = "AT";
 748    _analysisHeaders[8] = "CG";
 749    _analysisHeaders[9] = "CT";
 750    _analysisHeaders[10] = "GT";
 751    _analysisHeaders[11] = "Tree";
 752
 753    for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1)
 754    {
 755        branchName = treeBranchNames[_biterator];
 756        
 757        _analysisHeaders [Abs(_analysisHeaders)] = "length("+branchName+")";
 758        _analysisHeaders [Abs(_analysisHeaders)] = "dS("+branchName+")";
 759        _analysisHeaders [Abs(_analysisHeaders)] = "dN("+branchName+")";
 760        _analysisHeaders [Abs(_analysisHeaders)] = "omega("+branchName+")";
 761    }
 762
 763    return _analysisHeaders;
 764}
 765
 766/*---------------------------------------------------------*/
 767
 768function runAGeneFit (myID)
 769{
 770    DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
 771
 772    if (_currentGene==1)
 773    {
 774        _MG94stdinOverload = {};
 775        _MG94stdinOverload ["0"] = "Local";
 776        _MG94stdinOverload ["1"] = modelSpecString;
 777
 778        ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl",
 779                                 _MG94stdinOverload);
 780        
 781        Tree    codonTree           = treeString;
 782    }
 783    else
 784    {
 785        HarvestFrequencies               (observedFreq,filteredData,3,1,1);
 786        MULTIPLY_BY_FREQS             = PopulateModelMatrix ("MG94custom", observedFreq);
 787        vectorOfFrequencies         = BuildCodonFrequencies (observedFreq);
 788        Model MG94customModel         = (MG94custom,vectorOfFrequencies,0);
 789
 790        Tree    codonTree            = treeString;
 791    }
 792
 793    LikelihoodFunction lf     = (filteredData,codonTree);
 794
 795    Optimize                     (res,lf);
 796
 797    _snsAVL       =                 _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0);
 798    _cL           =                  ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree");
 799
 800
 801    _returnMe = {};
 802    _returnMe ["BLOCK"]              = myID;
 803    _returnMe ["LogL"]              = res[1][0];
 804    _returnMe ["BP"]                 = _snsAVL ["Sites"];
 805    _returnMe ["S_sites"]             = _snsAVL ["SSites"];
 806    _returnMe ["NS_sites"]             = _snsAVL ["NSSites"];
 807    _returnMe ["AC"]                 = AC;
 808    _returnMe ["AT"]                 = AT;
 809    _returnMe ["CG"]                 = CG;
 810    _returnMe ["CT"]                 = CT;
 811    _returnMe ["GT"]                 = GT;
 812    _returnMe ["Tree"]                 = Format(codonTree,0,1);
 813
 814    for (_biterator = 0; _biterator < treeBranchCount; _biterator = _biterator + 1)
 815    {
 816        branchName = treeBranchNames[_biterator];
 817
 818        _returnMe ["length("+branchName+")"]         = (_cL["Total"])[_biterator];
 819        _returnMe ["dS("+branchName+")"]                 = (_cL["Syn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["S_sites"]);
 820        _returnMe ["dN("+branchName+")"]                 = (_cL["NonSyn"])[_biterator]*(_returnMe ["BP"]/_returnMe ["NS_sites"]);
 821        
 822        ExecuteCommands ("_lom = _standardizeRatio(codonTree."+treeBranchNames[_biterator]+".nonSynRate,codonTree."+treeBranchNames[_biterator]+".synRate);");
 823        _returnMe ["omega("+branchName+")"]                 = _lom;
 824    }
 825    
 826    return _returnMe;
 827}
 828
 829"""
 830
 831SimpleGlobalFitter = """
 832VERBOSITY_LEVEL               = -1;
 833COUNT_GAPS_IN_FREQUENCIES  = 0;
 834
 835/*---------------------------------------------------------*/
 836
 837function returnResultHeaders (dummy)
 838{
 839    _analysisHeaders = {};
 840    _analysisHeaders[0]  = "BLOCK";
 841    _analysisHeaders[1]  = "BP";
 842    _analysisHeaders[2]  = "S_sites";
 843    _analysisHeaders[3]  = "NS_sites";
 844    _analysisHeaders[4]  = "Stop_codons";
 845    _analysisHeaders[5]  = "LogL";
 846    _analysisHeaders[6]  = "omega";
 847    _analysisHeaders[7]  = "omega_range";
 848    _analysisHeaders[8]  = "AC";
 849    _analysisHeaders[9]  = "AT";
 850    _analysisHeaders[10] = "CG";
 851    _analysisHeaders[11] = "CT";
 852    _analysisHeaders[12] = "GT";
 853    _analysisHeaders[13] = "Tree";
 854
 855    return _analysisHeaders;
 856}
 857
 858/*---------------------------------------------------------*/
 859
 860function runAGeneFit (myID)
 861{
 862    fprintf (stdout, "[SimpleGlobalFitter.bf on GENE ", myID, "]\\n");
 863    taxonNameMap = {};
 864    
 865    for (k=0; k<ds.species; k=k+1)
 866    {
 867        GetString         (thisName, ds,k);
 868        shortName         = (thisName^{{"\\\\..+",""}})&&1;
 869        taxonNameMap[shortName] = thisName;
 870        SetParameter (ds,k,shortName);
 871    }
 872
 873    DataSetFilter filteredData = CreateFilter (ds,1);
 874    _nucSites                    = filteredData.sites;
 875    
 876    if (Abs(treeString))
 877    {
 878        givenTreeString = treeString;
 879    }
 880    else
 881    {
 882        if (_currentGene==1)
 883        {
 884            ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"NJ.bf");
 885        }
 886        givenTreeString = InferTreeTopology (0);
 887        treeString        = "";
 888    }
 889
 890    DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
 891
 892    if (_currentGene==1)
 893    {
 894        _MG94stdinOverload = {};
 895        _MG94stdinOverload ["0"] = "Global";
 896        _MG94stdinOverload ["1"] = modelSpecString;
 897
 898        ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"MG94custom.mdl",
 899                                 _MG94stdinOverload);
 900        
 901        Tree    codonTree           = givenTreeString;
 902    }
 903    else
 904    {
 905        HarvestFrequencies               (observedFreq,filteredData,3,1,1);
 906        MULTIPLY_BY_FREQS             = PopulateModelMatrix ("MG94custom", observedFreq);
 907        vectorOfFrequencies         = BuildCodonFrequencies (observedFreq);
 908        Model MG94customModel         = (MG94custom,vectorOfFrequencies,0);
 909
 910        Tree    codonTree            = givenTreeString;
 911    }
 912
 913    LikelihoodFunction lf     = (filteredData,codonTree);
 914
 915    Optimize                     (res,lf);
 916
 917    _snsAVL       =                 _computeSNSSites ("filteredData", _Genetic_Code, vectorOfFrequencies, 0);
 918    _cL           =                  ReturnVectorsOfCodonLengths (ComputeScalingStencils (0), "codonTree");
 919
 920
 921    _returnMe = {};
 922    _returnMe ["BLOCK"]              = myID;
 923    _returnMe ["LogL"]              = res[1][0];
 924    _returnMe ["BP"]                 = _snsAVL ["Sites"];
 925    _returnMe ["S_sites"]             = _snsAVL ["SSites"];
 926    _returnMe ["NS_sites"]             = _snsAVL ["NSSites"];
 927    _returnMe ["Stop_codons"]         = (_nucSites-filteredData.sites*3)$3;
 928    _returnMe ["AC"]                 = AC;
 929    _returnMe ["AT"]                 = AT;
 930    _returnMe ["CG"]                 = CG;
 931    _returnMe ["CT"]                 = CT;
 932    _returnMe ["GT"]                 = GT;
 933    _returnMe ["omega"]             = R;
 934    COVARIANCE_PARAMETER             = "R";
 935    COVARIANCE_PRECISION             = 0.95;
 936    CovarianceMatrix                 (cmx,lf);
 937    _returnMe ["omega_range"]         = ""+cmx[0]+"-"+cmx[2];
 938    _returnMe ["Tree"]                 = Format(codonTree,0,1);
 939
 940
 941    return _returnMe;
 942}
 943"""
 944
 945FastaReader = """
 946fscanf            (stdin, "String", _coreAnalysis);
 947fscanf            (stdin, "String", _outputDriver);
 948
 949ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def");
 950ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"dSdNTreeTools.ibf");
 951ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"CodonTools.bf");
 952ExecuteAFile             (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"GrabBag.bf");
 953
 954SetDialogPrompt ("Tree file");
 955fscanf            (PROMPT_FOR_FILE, "Tree",  givenTree);
 956
 957treeBranchNames               = BranchName (givenTree,-1);
 958treeBranchCount               = Columns    (treeBranchNames)-1;
 959treeString                    = Format (givenTree,1,1);
 960
 961SetDialogPrompt ("Multiple gene FASTA file");
 962fscanf          (PROMPT_FOR_FILE, "Lines", inLines);
 963fscanf            (stdin, "String",  modelSpecString);
 964fscanf            (stdin, "String", _outPath);
 965 
 966ExecuteAFile    (_outputDriver);
 967ExecuteAFile    (_coreAnalysis);
 968 
 969/*---------------------------------------------------------*/
 970
 971_linesIn     = Columns (inLines);
 972_currentGene   = 1; 
 973 _currentState = 0;
 974/* 0 - waiting for a non-empty line */
 975/* 1 - reading files */
 976
 977geneSeqs       = "";
 978geneSeqs        * 0;
 979
 980_prepareFileOutput (_outPath);
 981
 982for (l=0; l<_linesIn; l=l+1)
 983{
 984    if (Abs(inLines[l]) == 0)
 985    {
 986        if (_currentState == 1)
 987        {
 988            geneSeqs      * 0;
 989            DataSet       ds            = ReadFromString (geneSeqs);
 990            _processAGene (ds.species == treeBranchCount,_currentGene);
 991            geneSeqs * 128;
 992            _currentGene = _currentGene + 1;
 993        }
 994    }
 995    else
 996    {
 997        if (_currentState == 0)
 998        {
 999            _currentState = 1;
1000        }
1001        geneSeqs * inLines[l];
1002        geneSeqs * "\\n";
1003    }
1004}
1005
1006if (_currentState == 1)
1007{
1008    geneSeqs      * 0;
1009    DataSet       ds            = ReadFromString (geneSeqs);
1010    _processAGene (ds.species == treeBranchCount,_currentGene);
1011}
1012
1013_finishFileOutput (0);
1014"""
1015
1016TabWriter = """
1017/*---------------------------------------------------------*/
1018function _prepareFileOutput (_outPath)
1019{
1020    _outputFilePath = _outPath;
1021    
1022    _returnHeaders     = returnResultHeaders(0);
1023    
1024    fprintf (_outputFilePath, CLEAR_FILE, KEEP_OPEN, _returnHeaders[0]);
1025    for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1)
1026    {
1027        fprintf (_outputFilePath,"\\t",_returnHeaders[_biterator]);
1028    }
1029
1030
1031    
1032    fprintf (_outputFilePath,"\\n");
1033    return 0;
1034}
1035
1036/*---------------------------------------------------------*/
1037
1038function _processAGene (valid, _geneID)
1039{
1040    if (valid)
1041    {
1042        returnValue = runAGeneFit (_geneID);
1043        fprintf (_outputFilePath, returnValue[_returnHeaders[0]]);
1044        for (_biterator = 1; _biterator < Abs(_returnHeaders); _biterator = _biterator + 1)
1045        {
1046            fprintf (_outputFilePath,"\\t",returnValue[_returnHeaders[_biterator]]);
1047        }
1048        fprintf (_outputFilePath, "\\n");
1049    }
1050    /*
1051    else
1052    {
1053        fprintf (_outputFilePath, 
1054                _geneID, ", Incorrect number of sequences\\n");
1055    }
1056    */
1057    _currentState = 0;
1058    return 0;
1059}
1060
1061/*---------------------------------------------------------*/
1062function _finishFileOutput (dummy)
1063{
1064    return 0;
1065}
1066"""
1067
1068def get_dnds_config_filename(Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename ):
1069    contents = """
1070_genomeScreenOptions = {};
1071
1072/* all paths are either absolute or relative 
1073to the DATA READER */
1074
1075_genomeScreenOptions ["0"] = "%s"; 
1076    /* which analysis to run on each gene; */    
1077_genomeScreenOptions ["1"] = "%s"; 
1078    /* what output to produce; */    
1079_genomeScreenOptions ["2"] = "%s"; 
1080    /* genetic code */    
1081_genomeScreenOptions ["3"] = "%s"; 
1082    /* tree file */
1083_genomeScreenOptions ["4"] = "%s"; 
1084    /* alignment file */
1085_genomeScreenOptions ["5"] = "%s"; 
1086    /* nucleotide bias string; can define any of the 203 models */
1087_genomeScreenOptions ["6"] = "%s"; 
1088    /* output csv file */
1089    
1090ExecuteAFile ("%s", _genomeScreenOptions);    
1091""" % (Fitter_filename, TabWriter_filename, genetic_code, tree_filename, input_filename, nuc_model, output_filename, FastaReader_filename )
1092    return get_filled_temp_filename(contents)
1093    
1094    
1095def get_branch_lengths_config_filename(input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename):
1096    contents = """
1097_genomeScreenOptions = {};
1098
1099/* all paths are either absolute or relative 
1100to the NucDataBranchLengths.bf */
1101
1102_genomeScreenOptions ["0"] = "%s"; 
1103    /* the file to analyze; */    
1104_genomeScreenOptions ["1"] = "CUSTOM"; 
1105    /* use an arbitrary nucleotide model */    
1106_genomeScreenOptions ["2"] = "%s";
1107    /* which model to use */ 
1108_genomeScreenOptions ["3"] = "%s"; 
1109    /* model options */
1110_genomeScreenOptions ["4"] = "Estimated"; 
1111    /* rate parameters */
1112_genomeScreenOptions ["5"] = "%s"; 
1113    /* base frequencies */
1114_genomeScreenOptions ["6"] = "%s"; 
1115    /* the tree to use; */    
1116_genomeScreenOptions ["7"] = "%s"; 
1117    /* write .csv output to; */    
1118    
1119ExecuteAFile ("%s", _genomeScreenOptions);    
1120""" % (input_filename, nuc_model, model_options, base_freq, tree_filename, output_filename, BranchLengths_filename)
1121    return get_filled_temp_filename(contents)
1122    
1123    
1124def get_nj_tree_config_filename(input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename):
1125    contents = """
1126_genomeScreenOptions = {};
1127
1128/* all paths are either absolute or relative 
1129to the BuildNJTree.bf */
1130
1131_genomeScreenOptions ["0"] = "%s"; 
1132    /* the file to analyze; */    
1133_genomeScreenOptions ["1"] = "%s"; 
1134    /* pick which distance metric to use; TN93 is a good default */    
1135_genomeScreenOptions ["2"] = "%s"; 
1136    /* write Newick tree output to; */    
1137_genomeScreenOptions ["3"] = "%s"; 
1138    /* write a postscript tree file to this file; leave blank to not write a tree */    
1139    
1140ExecuteAFile ("%s", _genomeScreenOptions);    
1141""" % (input_filename, distance_metric, output_filename1, output_filename2, NJ_tree_filename)
1142    return get_filled_temp_filename(contents)
1143    
1144    
1145def get_nj_treeMF_config_filename(input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename):
1146    contents = """
1147_genomeScreenOptions = {};
1148
1149/* all paths are either absolute or relative 
1150to the BuildNJTreeMF.bf */
1151
1152_genomeScreenOptions ["0"] = "%s"; 
1153    /* the multiple alignment file to analyze; */    
1154_genomeScreenOptions ["1"] = "%s"; 
1155    /* write Newick tree output to; */    
1156_genomeScreenOptions ["2"] = "%s"; 
1157    /* write a postscript tree file to this file; leave blank to not write a tree */    
1158_genomeScreenOptions ["3"] = "%s"; 
1159    /* pick which distance metric to use; TN93 is a good default */    
1160    
1161ExecuteAFile ("%s", _genomeScreenOptions);        
1162""" % (input_filename, output_filename1, output_filename2, distance_metric, NJ_tree_filename)
1163    return get_filled_temp_filename(contents)