trait.rjmcmc.09022010.R - The R code implements a Markov Ch…

/archive/source.archive/bm.and.ou/trait.rjmcmc.09022010.R

http://github.com/eastman/auteur · R · 278 lines · 228 code · 25 blank · 25 comment · 70 complexity · eaa693bc376165c9155e0da4e32c9cc5 MD5 · raw file


## Written by LJ HARMON, AH: 2008



brownMcmc<-function (phy, data, ngen = 1000, probMergeSplit = 0.01, rootp = 0.1, sampleFreq = 100, ylim = c(-200, 0), fileBase = "result") 

{	  

	td <- treedata(phy, data, sort = T)

    nn <- length(td$phy$edge.length)	# max number of rates (one per branch)

    currRateCat <- numeric(nn)			# initialize array for recording which branch belongs to which rate category

    currRateCat[] <- 1					# assign one rate to all branches initially

    currRate <- numeric(nn)				# initialize array for rate estimates branchwise

    currRate[] <- init <- 0.001			# initialize BMMrate to be small

    currRootMean <- mean(td$data[, 1])	# determine an ancestral state (by averaging known data)

    l <- numeric(ngen)					# number of MCMC generations

    l[] <- NA							# initialize array for storing lnLs

    nMergeProp = 0						## proposal count: merge two rate parameters

    nSplitProp = 0						## proposal count: draw new rate parameter

    nRateProp = 0						## proposal count: update rate parameter(s)

    nMergeOK = 0						## proposal count: successful merges

    nSplitOK = 0						## proposal count: successful splits

    nRateOK = 0							## proposal count: successful rate changes



    res <- list(rate = currRate, rateCat = currRateCat, lnlprof = l)

    

# error and output file handling

	errorNum <- 0 

	errorLog <- paste(fileBase, 'errors.log', sep = '')

	cat(c('Error log for run beginning', date(), '\n'), file = errorLog, append = F, sep = ' ')

	cat(c('--------------------------------------------------------', '\n\n'), file = errorLog, append = T)

	cat(c('generation', '\t', 'numCat', '\t', 'rateMin', '\t', 'rateMax', '\t', 'cumulativePercentage', '\n'), file = paste(errorLog, 'stats.txt', sep = ''), append = F, sep = '')

	

	if (fileBase != "") {

        f1 <- paste(fileBase, "RateCat.log", sep = "")

        f2 <- paste(fileBase, "Rates.log", sep = "")

    } else {

        f1 <- ""

        f2 <- ""

    }

    cat("", file = f1)

    cat("", file = f2)

# end output file handling

	

# Begin rjMCMC

    for (i in 1:ngen) {

        lnLikelihoodRatio = 0

        lnProposalRatio = 0

        lnPriorRatio = 0

        merge = FALSE

        split = FALSE

		

# determine whether to split or merge rate categories

        if (runif(1) < (2 * probMergeSplit)) {

            if (max(currRateCat) == 1) {

                split = TRUE

            }

            else if (max(currRateCat) == nn) {

                merge = TRUE

            }

            else if (runif(1) < 0.5) {

                split = TRUE

            }

            else merge = TRUE

        }

		

# update either number of rate categories or rates themselves

        if (merge) 

            nMergeProp = nMergeProp + 1

        if (split) 

            nSplitProp = nSplitProp + 1

        if (!merge & !split) 

            nRateProp = nRateProp + 1

        newRateCat <- currRateCat

        newRate <- currRate

        newRootMean <- currRootMean

		

# decide whether to update root mean or update a rate estimate

        if (!merge & !split) {

            if (runif(1) < rootp) {

                rch <- runif(1, min = -0.5, max = 0.5)

                newRootMean <- newRootMean + rch

            }

            else {

                ncat <- max(currRateCat)

                rcat <- round(runif(1) * (ncat) + 0.5)

                r <- log(currRate[currRateCat == rcat][1])

                rch <- runif(1, min = -2, max = 2)

                nr <- r + rch

                newRate[currRateCat == rcat] <- exp(nr)

            }

        }

		

        if (merge) {

            ncat <- max(currRateCat)

            mergeCats <- sample(1:ncat)[1:2]

            newRateCat[newRateCat == mergeCats[1]] <- mergeCats[2]

            newRateCat <- fixRateCat(newRateCat)

            or1 <- currRate[currRateCat == mergeCats[1]][1]

            or2 <- currRate[currRateCat == mergeCats[2]][1]

            n1 <- sum(currRateCat == mergeCats[1])

            n2 <- sum(currRateCat == mergeCats[2])

            nr <- (n1 * or1 + n2 * or2)/(n1 + n2)

            newRate[currRateCat == mergeCats[1] | currRateCat == 

                mergeCats[2]] <- nr

            numSplittableBeforeMerge <- sum(table(currRateCat) > 

                1)

            numSplittableAfterMerge <- sum(table(newRateCat) > 

                1)

            if (max(currRateCat) == nn) {

                probMerge = 1

            }

            else {

                probMerge = 0.5

            }

            if (max(newRateCat) == 1) {

                probSplit = 1

            }

            else {

                probSplit = 0.5

            }

            factor = probSplit/probMerge

            lnProposalRatio = log(factor) + log(nn) + log(ncat) - log(numSplittableAfterMerge) - log(2^(n1 + n2) - 2) - log(nr/mean(currRate)) - log(n1 + n2)

            lnPriorRatio = log(Stirling2(nn, ncat)) - log(Stirling2(nn, ncat - 1))

        }

		

        if (split) {

            ncat <- max(currRateCat)

            while (1) {

                rcat <- round(runif(1) * ncat + 0.5)

                nc <- sum(currRateCat == rcat)

                if (nc > 1) 

                  break

            }

            while (1) {

                new <- round(runif(nc) * 2 + 0.5)

                if (length(table(new)) == 2) 

                  break

            }

            newRateCat[currRateCat == rcat][new == 2] <- ncat + 

                1

            or <- currRate[currRateCat == rcat][1]

            n1 <- sum(new == 1)

            n2 <- sum(new == 2)

            u <- runif(1, min = -0.5 * n1 * or, max = 0.5 * n2 * 

                or)

            nr1 <- or + u/n1

            nr2 <- or - u/n2

            newRate[newRateCat == rcat] <- nr1

            newRate[newRateCat == ncat + 1] <- nr2

            newRateCat <- fixRateCat(newRateCat)

            numSplittableBeforeSplit <- sum(table(currRateCat) > 1)

            numSplittableAfterSplit <- sum(table(newRateCat) > 1)

            if (max(currRateCat) == 1) {

                probSplit = 1

            }

            else {

                probSplit = 0.5

            }

            if (max(newRateCat) == nn) {

                probMerge = 1

            }

            else {

                probMerge = 0.5

            }

            factor = probMerge/probSplit

            lnProposalRatio = log(factor) + log(numSplittableBeforeSplit) + log(2^(n1 + n2) - 2) + log(or/mean(currRate)) + log(n1 + n2) - log(nn) - log(ncat + 1)

            lnPriorRatio = log(Stirling2(nn, ncat)) - log(Stirling2(nn, ncat + 1))

        }

		

        lnlCurr <- fitModel(td$phy, td$data[, 1], currRate, currRootMean)

        lnlNew <-  fitModel(td$phy, td$data[, 1], newRate, newRootMean)



		lnLikelihoodRatio = lnlNew - lnlCurr

		

		heat = 1

		lnR = heat * lnLikelihoodRatio + heat * lnPriorRatio + lnProposalRatio

    

		if (lnR > 0) {

			r = 1

		}

		else if (lnR < -100) {

			r = 0

		}

		else {

			r = exp(lnR)

		}

#message(paste('lnlNew - lnlCurr =', lnlNew, '-', lnlCurr, '=', lnLikelihoodRatio, "lnR", lnR, "r", r))

         

# adopt proposed update; message("YES")

		if (runif(1) <= r) {

			currRate <- newRate

			currRateCat <- newRateCat

			currRootMean <- newRootMean

			res$lnlprof[i] <- lnlNew

			if (merge) 

				nMergeOK = nMergeOK + 1

			if (split) 

				nSplitOK = nSplitOK + 1

			if (!merge & !split) 

				nRateOK = nRateOK + 1  

		}

		

# reject proposed update; message("NO")

		else {					

			res$lnlprof[i] <- lnlCurr  

		}

		

# log parameters

        if (i%%sampleFreq == 0) {

            res$rate <- cbind(res$rate, currRate)

            res$rateCat <- cbind(res$rateCat, currRateCat)

            cat(file = f1, i, mean(currRate), max(currRateCat), lnlCurr, "\n", append = T, sep = "\t")

            cat(file = f2, mean(currRate), "\n", append = T, sep = "\t")

            cat("Gen ", i, "completed\n")

		}

    }

# End rjMCMC

	

	diagnostics=c(nMergeOK, nMergeProp, nSplitOK, nSplitProp, nRateOK, nRateProp, init)

	names(diagnostics)=c("nMergeOK", "nMergeProp", "nSplitOK", "nSplitProp", "nRateOK", "nRateProp", "initialRate")

	res$diagnostics=diagnostics

	res$rate=as.data.frame(res$rate[,-1])

	names(res$rate)=paste("gen",seq(from=sampleFreq, to=ngen, by=sampleFreq), sep=".")

	row.names(res$rate)=td$phy$edge[,2]

	res$rateCat=as.data.frame(res$rateCat[,-1])

	row.names(res$rateCat)=td$phy$edge[,2]

	names(res$rateCat)=paste("gen",seq(from=sampleFreq, to=ngen, by=sampleFreq), sep=".")

	res$phy=td$phy

	res$dat=td$data[,1]

    res

}





fixRateCat<-function(x)

# assigns indicator variables to shared rate classes amongst branches

{

	f<-factor(x)

	n<-nlevels(f)

	o<-numeric(n)

	for(i in 1:n) o[i]<-which(f==levels(f)[i])[1]

	nf<-ordered(f, levels(f)[order(o)])	

	nx<-as.numeric(nf)

	nx

}



fitModel<-function(phy, data, model, rootMean)

# derive likelihood for given tree, data, and model

{

	phy$edge.length=phy$edge.length*model

	vcv<-vcv.phylo(phy)

	mm<-rep(rootMean, nrow(vcv))  ## analogous to w in glssoln

	return(lnldmv(data, mm, vcv))

	

	}

	

Stirling2 <- function(n,m)

{

    ## Purpose:  Stirling Numbers of the 2-nd kind

    ## 		S^{(m)}_n = number of ways of partitioning a set of

    ##                      $n$ elements into $m$ non-empty subsets

    ## Author: Martin Maechler, Date:  May 28 1992, 23:42

    ## ----------------------------------------------------------------

    ## Abramowitz/Stegun: 24,1,4 (p. 824-5 ; Table 24.4, p.835)

    ## Closed Form : p.824 "C."

    ## ----------------------------------------------------------------



    if (0 > m || m > n) stop("'m' must be in 0..n !")

    k <- 0:m

    sig <- rep(c(1,-1)*(-1)^m, length= m+1)# 1 for m=0; -1 1 (m=1)

    ## The following gives rounding errors for (25,5) :

    ## r <- sum( sig * k^n /(gamma(k+1)*gamma(m+1-k)) )

    ga <- gamma(k+1)

    round(sum( sig * k^n /(ga * rev(ga))))

}



lnldmv<-function(y, mean, sigma) {

# vcv = sigma

	num<- -t(y-mean)%*%solve(sigma)%*%(y-mean)/2

	den<-sqrt((2*pi)^length(y)*det(sigma))

	return((num-log(den))[1,1])

	}

Summary ✨

The R code implements a Markov Chain Monte Carlo (MCMC) algorithm to estimate parameters of a phylogenetic model. It iterates through a tree, assigning rates to branches and updating the model’s parameters based on likelihood calculations. The output is a set of estimated parameter values, diagnostics, and rate assignments for each generation of the MCMC chain.

Alerts (4)

Complexity hotspot; lines 95 to 98 (total complexity: 4)
95 96 97 98