rjmcmc.R - The R code implements a Bayesian Markov Chain Mo…

/in.progress/auteurExtended/R/rjmcmc.R

http://github.com/eastman/auteur · R · 455 lines · 345 code · 70 blank · 40 comment · 88 complexity · d2983c1ca6f0c7707a70533fed0b5897 MD5 · raw file

#function for Markov sampling from a range of model complexities under the general process of Brownian motion evolution of continuous traits
#author: JM EASTMAN 2011

rjmcmc.ou <- function(	phy, dat, SE=0, ngen=1000, sample.freq=100, 
						prob.mergesplit=0.1, prob.theta=0.4, prob.alpha=0.2, upper.alpha=100, 
						lambdaK=log(2), constrainK=FALSE, prop.width=NULL, simplestart=FALSE, 
						internal.only=FALSE, summary=TRUE, fileBase="result") 
{ 
	model="OU"
	primary.parameter="optima"
	
# calibration of proposal width	
	if(is.null(prop.width)) {
		cat("CALIBRATING proposal width...\n")
		prop.width=calibrate.proposalwidth(phy, dat, nsteps=ngen/1000, model)
		adjustable.prop.width=TRUE		
	} else {
		if(prop.width<=0) stop("please supply a 'prop.width' larger than 0")
		adjustable.prop.width=FALSE
	}
	
# parameter bounds 
	abs.low.alpha=1e-13
	if(length(upper.alpha)==2 & upper.alpha[1]>abs.low.alpha) {
		alpha.bounds=upper.alpha
	} else if(!is.null(upper.alpha) & is.numeric(upper.alpha)){
		alpha.bounds=c(abs.low.alpha, upper.alpha)
	} else {
		alpha.bounds=c(abs.low.alpha, 100)
	}
	
### prepare data for rjMCMC
	dataList		<- prepare.data(phy, dat, SE)			
	ape.tre			<- dataList$ape.tre			
	orig.dat		<- dataList$orig.dat
	SE				<- dataList$SE
	node.des		<- sapply(unique(c(ape.tre$edge[1,1],ape.tre$edge[,2])), function(x) get.descendants.of.node(x, ape.tre))
	names(node.des) <- c(ape.tre$edge[1,1], unique(ape.tre$edge[,2]))
	ancestors		<- lapply(1:Ntip(ape.tre), function(x) c(x,rev(sort(get.ancestors.of.node(x,ape.tre)))))
	epochs			<- get.epochs(ape.tre)
	phyobject		<- get.times(ape.tre)
	vcvSE			<- vmat(ape.tre); if(any(SE!=0)) diag(vcvSE)<-diag(vcvSE)+SE^2
	
# initialize parameters
	if(is.numeric(constrainK) & (constrainK > length(ape.tre$edge) | constrainK < 1)) stop(paste("Constraint on ",primary.parameter, " is nonsensical. Ensure that constrainK is at least 1 and less than the number of available nodes in the tree.",sep=""))
	
	if(simplestart | is.numeric(constrainK) | internal.only) {
		if(is.numeric(constrainK) & !internal.only) {
			init.theta	<- generate.starting.point(orig.dat, ape.tre, node.des, logspace=FALSE, K=constrainK, prop.width=prop.width)
		} else {
			init.theta	<- list(values=rep(mean(orig.dat),length(ape.tre$edge.length)),delta=rep(0,length(ape.tre$edge.length)))
		}
	} else {
		init.theta		<- generate.starting.point(orig.dat, ape.tre, node.des, logspace=FALSE, K=constrainK, prop.width=prop.width)
	}
	
	init.hansen		<- fit.hansen(ape.tre,orig.dat,alpha.bounds)

	cur.theta		<- init.theta$values 
	cur.delta.theta	<- init.theta$delta
	cur.alpha		<- init.hansen$alpha
	cur.rate		<- init.hansen$sigmasq
	
# attempt to ensure valid starting point
	while(1) {
		mod.cur = try(ou.lik.fn(orig.dat, ape.tre, epochs, phyobject, ancestors, vcvSE, cur.rate, cur.alpha, c(-999, cur.theta)),silent=TRUE)
		if(inherits(mod.cur, "try-error")){
			if(!summary)stop()
			cur.alpha=adjustrate(cur.alpha, prop.width)
			cur.rate=adjustrate(cur.rate, prop.width)
		} else {
			cur.lnL=mod.cur$lnL
			break()
		}
	}	
	
# proposal frequencies	
	prob.rate=1-sum(c(2*prob.mergesplit, prob.theta, prob.alpha))
	if(prob.rate<0) stop("proposal frequencies must not exceed 1; adjust proposal frequencies")
	proposal.rates<-orig.proposal.rates<-c(2*prob.mergesplit, prob.theta, prob.alpha, prob.rate)
	prop.cs<-orig.prop.cs<-cumsum(proposal.rates)
	
# proposal counts
	n.props<-n.accept<-rep(0,length(prop.cs))
	names.props<-c("mergesplit","optima","alpha","rate")
	cur.acceptancerate=0
	
# file handling
	if(summary) {
		parmBase=paste(model, fileBase, "parameters/",sep=".")
		if(!file.exists(parmBase)) dir.create(parmBase)
		errorLog=paste(parmBase,paste(model, fileBase, "rjmcmc.errors.log",sep="."),sep="/")
		runLog=file(paste(parmBase,paste(model, fileBase, "rjmcmc.log",sep="."),sep="/"),open='w+')
		parms=list(gen=NULL, lnL=NULL, lnL.p=NULL, lnL.h=NULL, 
			   optima=NULL, delta=NULL, alpha=NULL, rate=NULL)
		parlogger(ape.tre, init=TRUE, primary.parameter, parameters=parms, parmBase=parmBase, runLog)
	}
	
# prop.width calibration points
	if(adjustable.prop.width){
		tf=c()
		tenths=function(v)floor(0.1*v)
		nn=ngen
		while(1) {
			vv=tenths(nn)
			nn=vv
			if(nn<=1) break() else tf=c(nn,tf)		
		}
		tuneFreq=tf
	} else {
		tuneFreq=0
	}
	
	tickerFreq=ceiling((ngen+max(tuneFreq))/30)
	
### Begin rjMCMC
    for (i in 1:(ngen+max(tuneFreq))) {
		
        lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
		
		if(internal.only) {
			tips.tmp=which(as.numeric(names(cur.delta.theta))<=Ntip(ape.tre))
			if(any(cur.delta.theta[tips.tmp]==1)) stop("Broken internal only sampling")
		}
				
## OU IMPLEMENTATION ##
		while(1) {
			cur.proposal=min(which(runif(1)<prop.cs))
			if (cur.proposal==1 & !constrainK) {									# adjust theta categories
				nr=splitormerge(cur.delta.theta, cur.theta, ape.tre, node.des, lambdaK, logspace=FALSE, internal.only)
				new.theta=nr$new.values ##
				new.delta.theta=nr$new.delta ##
				new.alpha=cur.alpha
				new.rate=cur.rate 
				lnHastingsRatio=nr$lnHastingsRatio
				lnPriorRatio=nr$lnPriorRatio
				break()
			} else if(cur.proposal==2){													
				if(runif(1)>0.05 & sum(cur.delta.theta)>1) {						# tune local theta
					new.theta=tune.value(cur.theta, prop.width) ##
					new.delta.theta=cur.delta.theta 
					new.alpha=cur.alpha									
					new.rate=cur.rate 					
					break()
				} else {															# adjust theta
					new.theta=adjustvalue(cur.theta, prop.width) ##
					new.delta.theta=cur.delta.theta
					new.alpha=cur.alpha									
					new.rate=cur.rate 
					break()
				} 
			}  else if(cur.proposal==3) {											# adjust alpha
				while(1) {
					new.alpha=adjustrate(cur.alpha, prop.width) ##	
					if(withinrange(new.alpha, min(alpha.bounds), max(alpha.bounds))) break()
				}
				new.theta=cur.theta
				new.delta.theta=cur.delta.theta
				new.rate=cur.rate 
				break()
			} else if(cur.proposal==4) {											# adjust rate
				new.theta=cur.theta
				new.delta.theta=cur.delta.theta
				new.alpha=cur.alpha									
				new.rate=adjustrate(cur.rate,prop.width) ##
				break()
			}
		}
				
		n.props[cur.proposal]=n.props[cur.proposal]+1				
		
# compute fit of proposed model
		mod.new=NULL
		mod.new=try(ou.lik.fn(orig.dat, ape.tre, epochs, phyobject, ancestors, vcvSE, new.rate, new.alpha, c(-999, new.theta)),silent=TRUE)
		
		if(inherits(mod.new, "try-error")) {mod.new=as.list(mod.new); mod.new$lnL=-Inf}
		if(!is.infinite(mod.new$lnL)) {
			lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
		} else {
			mod.new$lnL=-Inf
			lnLikelihoodRatio = -Inf
		}
		
# compare likelihoods
		heat=1
		r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)
		
# potential errors
		if(is.infinite(mod.cur$lnL)) stop("starting point has exceptionally poor likelihood")
		if(r$error & summary) generate.error.message(Ntip(ape.tre), i, mod.cur, mod.new, lnLikelihoodRatio, lnPriorRatio, lnHastingsRatio, cur.delta.theta, new.delta.theta, errorLog)
		
		if (runif(1) <= r$r) {			## adopt proposal ##
			cur.theta <- new.theta
			cur.delta.theta <- new.delta.theta
			cur.alpha <- new.alpha
			cur.rate <- new.rate
			
			n.accept[cur.proposal] = n.accept[cur.proposal]+1
			
			mod.cur <- mod.new
			cur.lnL <- mod.new$lnL
		} 
		
# iteration-specific functions
		if(i%%tickerFreq==0 & summary) {
			if(i==tickerFreq) cat("|",rep(" ",9),toupper("generations complete"),rep(" ",9),"|","\n")
			cat(". ")
		}
		
		if(i%%sample.freq==0 & i>max(tuneFreq) & summary) {

			## DIAGNOSTICS ##
#			cc=cur.theta
#			names(cc)=phy$edge[,2]
#			if(sum(cc)>=1){branchcol.plot(ape.tre, cc, color.length=sum(cur.delta.theta)+1, log=FALSE)}

			parms=list(gen=i-max(tuneFreq), lnL=cur.lnL, lnL.p=lnPriorRatio, lnL.h=lnHastingsRatio, 
					   optima=cur.theta, delta=cur.delta.theta, alpha=cur.alpha, rate=cur.rate)
			parlogger(ape.tre, init=FALSE, primary.parameter, parameters=parms, parmBase=parmBase, runLog=runLog)
		}
		
		if(any(tuneFreq%in%i) & adjustable.prop.width) {
			new.acceptancerate=sum(n.accept)/sum(n.props)
			if((new.acceptancerate<cur.acceptancerate) & adjustable.prop.width) {
				prop.width=calibrate.proposalwidth(ape.tre, orig.dat, nsteps=1000, model, widths=c(exp(log(prop.width)-log(2)), exp(log(prop.width)+log(2))))
			}
			cur.acceptancerate=new.acceptancerate
		}
	}

# End rjMCMC

# clear out prop.width calibration directories if calibration run
	if(summary) {
		close(runLog)
		parlogger(primary.parameter=primary.parameter, parmBase=parmBase, end=TRUE)
		summarize.run(n.accept, n.props, names.props)	
		cleanup.files(parmBase, model, fileBase, ape.tre)
	} 
	return(list(acceptance.rate=sum(n.accept)/sum(n.props), prop.width=prop.width))
}



#function for Markov sampling from a range of model complexities under the general process of Brownian motion evolution of continuous traits
#author: LJ HARMON 2009, A HIPP 2009, and JM EASTMAN 2010-2011

rjmcmc.bm <- function (	phy, dat, SE=0, ngen=1000, sample.freq=100, 
						prob.mergesplit=0.20, prob.root=0.05, lambdaK=log(2), tuner=0.05, 
						prior.rate=NULL, constrainK=FALSE, prop.width=NULL, simplestart=FALSE, 
						internal.only=FALSE, summary=TRUE, fileBase="result") 
{ 
	model="BM"
	primary.parameter="rates"

# calibration of proposal width	
	if(is.null(prop.width)) {
		cat("CALIBRATING proposal width...\n")
		adjustable.prop.width=TRUE
		prop.width=calibrate.proposalwidth(phy, dat, nsteps=ngen/1000, model)
	} else {
		if(prop.width<=0) stop("please supply a 'prop.width' larger than 0")
		adjustable.prop.width=FALSE
	}
		
### prepare data for rjMCMC
	dataList		<- prepare.data(phy, dat, SE)			
	ape.tre			<- dataList$ape.tre			
	orig.dat		<- dataList$orig.dat
	SE				<- dataList$SE
	node.des		<- sapply(unique(c(ape.tre$edge[1,1],ape.tre$edge[,2])), function(x) get.descendants.of.node(x, ape.tre))
	names(node.des) <- c(ape.tre$edge[1,1], unique(ape.tre$edge[,2]))

# initialize parameters
	if(is.numeric(constrainK) & (constrainK > length(ape.tre$edge) | constrainK < 1)) stop(paste("Constraint on ",primary.parameter, " is nonsensical. Ensure that constrainK is at least 1 and less than the number of available nodes in the tree.",sep=""))

	if(simplestart | is.numeric(constrainK) | internal.only) {
		if(is.numeric(constrainK) & !internal.only) {
			init.rate	<- generate.starting.point(orig.dat, ape.tre, node.des, logspace=TRUE, K=constrainK, prop.width=prop.width)
		} else {
			init.rate	<- list(values=rep(fit.continuous(ape.tre,orig.dat),length(ape.tre$edge.length)),delta=rep(0,length(ape.tre$edge.length)))
		}
	} else {
		init.rate	<- generate.starting.point(orig.dat, ape.tre, node.des, logspace=TRUE, K=constrainK, prop.width=prop.width )
	}
	
	if(is.null(prior.rate)) prior.rate	<- fit.continuous(ape.tre,orig.dat)
		
    cur.rates		<- init.rate$values			
	cur.delta.rates	<- init.rate$delta
	cur.root		<- adjustvalue(mean(orig.dat), prop.width)
	cur.vcv			<- updatevcv(ape.tre, cur.rates)
	
	mod.cur = bm.lik.fn(cur.root, orig.dat, cur.vcv, SE)
	cur.lnL=mod.cur$lnL

			
# proposal frequencies
	prob.rates=1-sum(c(2*prob.mergesplit, prob.root))
	if(prob.rates<0) stop("proposal frequencies must not exceed 1; adjust 'prob.mergesplit' and (or) 'prob.root'")
	proposal.rates<-orig.proposal.rates<-c(2*prob.mergesplit, prob.root, prob.rates)
	prop.cs<-orig.prop.cs<-cumsum(proposal.rates)

# proposal counts
	n.props<-n.accept<-rep(0,length(prop.cs))
	names.props<-c("mergesplit","rootstate","rates")
	cur.acceptancerate=0
		
# file handling
	if(summary) {
		parmBase=paste(model, fileBase, "parameters/",sep=".")
		if(!file.exists(parmBase)) dir.create(parmBase)
		errorLog=paste(parmBase,paste(model, fileBase, "rjmcmc.errors.log",sep="."),sep="/")
		runLog=file(paste(parmBase,paste(model, fileBase, "rjmcmc.log",sep="."),sep="/"),open='w+')
		parms=list(gen=NULL, lnL=NULL, lnL.p=NULL, lnL.h=NULL, 
			   rates=NULL, delta=NULL, root=NULL)
		parlogger(ape.tre, init=TRUE, primary.parameter, parameters=parms, parmBase=parmBase, runLog)
	}
	
# prop.width calibration points
	if(adjustable.prop.width){
		tf=c()
		tenths=function(v)floor(0.1*v)
		nn=ngen
		while(1) {
			vv=tenths(nn)
			nn=vv
			if(nn<=1) break() else tf=c(nn,tf)		
		}
		tuneFreq=tf
	} else {
		tuneFreq=0
	}
	
	tickerFreq=ceiling((ngen+max(tuneFreq))/30)
	
### Begin rjMCMC
    for (i in 1:(ngen+max(tuneFreq))) {
		
        lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
		
		if(internal.only) {
				tips.tmp=which(as.numeric(names(cur.delta.rates))<=Ntip(ape.tre))
				if(any(cur.delta.rates[tips.tmp]==1)) stop("Broken internal only sampling")
		}

		
## BM IMPLEMENTATION ##
		while(1) {
			cur.proposal=min(which(runif(1)<prop.cs))
			if (cur.proposal==1 & !constrainK) {									# adjust rate categories
				nr=splitormerge(cur.delta.rates, cur.rates, ape.tre, node.des, lambdaK, logspace=TRUE, internal.only)
				new.rates=nr$new.values
				new.delta.rates=nr$new.delta
				new.root=cur.root
				lnHastingsRatio=nr$lnHastingsRatio
				lnPriorRatio=nr$lnPriorRatio + priorratio.exp(cur.rates, cur.delta.rates, new.rates, new.delta.rates, prior.rate)	
				break()
			} else if(cur.proposal==2) {											# adjust root
				new.root=proposal.slidingwindow(cur.root, 4*prop.width, min=NULL, max=NULL)$v								
				new.rates=cur.rates
				new.delta.rates=cur.delta.rates
				lnHastingsRatio=0
				lnPriorRatio=0
				break()
			} else if(cur.proposal==3){													
				if(runif(1)>0.05 & sum(cur.delta.rates)>1) {						# tune local rate
					nr=tune.rate(rates=cur.rates, prop.width=prop.width, min=0, max=Inf, tuner=tuner, prior.mean=prior.rate)
					new.rates=nr$values
					new.delta.rates=cur.delta.rates 
					new.root=cur.root
					lnHastingsRatio=nr$lnHastingsRatio
					lnPriorRatio=nr$lnPriorRatio
					break()
				} else {															# scale rates
					nr=proposal.multiplier(cur.rates, prop.width)
					new.rates=nr$v
					new.delta.rates=cur.delta.rates
					new.root=cur.root
					lnHastingsRatio=nr$lnHastingsRatio
					lnPriorRatio=priorratio.exp(cur.rates, cur.delta.rates, new.rates, new.delta.rates, prior.rate)
					break()
				} 
			}
		}
		
		n.props[cur.proposal]=n.props[cur.proposal]+1				

	# compute fit of proposed model
		if(any(new.rates!=cur.rates)) new.vcv=updatevcv(ape.tre, new.rates) else new.vcv=cur.vcv
		mod.new=NULL
		mod.new=try(bm.lik.fn(new.root, orig.dat, new.vcv, SE), silent=TRUE)
		
		if(inherits(mod.new, "try-error")) {mod.new=as.list(mod.new); mod.new$lnL=-Inf}
		if(!is.infinite(mod.new$lnL)) {
			lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
		} else {
			mod.new$lnL=-Inf
			lnLikelihoodRatio = -Inf
		}
		
	# compare likelihoods
		heat=1
		r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)

		# potential errors
		if(is.infinite(mod.cur$lnL)) stop("starting point has exceptionally poor likelihood")
		if(r$error & summary) generate.error.message(Ntip(ape.tre), i, mod.cur, mod.new, lnLikelihoodRatio, lnPriorRatio, lnHastingsRatio, cur.delta.rates, new.delta.rates, errorLog)
		
		if (runif(1) <= r$r) {			## adopt proposal ##
			cur.root <- new.root
			cur.rates <- new.rates
			cur.delta.rates <- new.delta.rates
			cur.vcv <- new.vcv

			n.accept[cur.proposal] = n.accept[cur.proposal]+1

			mod.cur <- mod.new
			cur.lnL <- mod.new$lnL
		} 
		
		# iteration-specific functions
		if(i%%tickerFreq==0 & summary) {
			if(i==tickerFreq) cat("|",rep(" ",9),toupper("generations complete"),rep(" ",9),"|","\n")
			cat(". ")
		}
		
		if(i%%sample.freq==0 & i>max(tuneFreq) & summary) {			
			parms=list(gen=i-max(tuneFreq), lnL=cur.lnL, lnL.p=lnPriorRatio, lnL.h=lnHastingsRatio, 
					   rates=cur.rates, delta=cur.delta.rates, root=cur.root)
			parlogger(ape.tre, init=FALSE, primary.parameter, parameters=parms, parmBase=parmBase, runLog=runLog)
		}
									   
		if(any(tuneFreq%in%i) & adjustable.prop.width) {
			new.acceptancerate=sum(n.accept)/sum(n.props)
			if((new.acceptancerate<cur.acceptancerate) & adjustable.prop.width) {
				prop.width=calibrate.proposalwidth(ape.tre, orig.dat, nsteps=1000, model, widths=c(exp(log(prop.width)-log(2)), exp(log(prop.width)+log(2))))
			}
			cur.acceptancerate=new.acceptancerate
		}
	}
	
# End rjMCMC
	
  # clear out prop.width calibration directories if calibration run
	if(summary) {
		close(runLog)
		parlogger(primary.parameter=primary.parameter, parmBase=parmBase, end=TRUE)
		summarize.run(n.accept, n.props, names.props)	
		cleanup.files(parmBase, model, fileBase, ape.tre)
	} 
	
	return(list(acceptance.rate=sum(n.accept)/sum(n.props), prop.width=prop.width))
}
Summary ✨

The R code implements a Bayesian Markov Chain Monte Carlo (MCMC) algorithm to estimate model parameters and their uncertainty. It iteratively proposes new parameter values, computes the likelihood of each proposal, and accepts or rejects proposals based on a Metropolis-Hastings acceptance criterion. The algorithm outputs the estimated model parameters, their standard errors, and acceptance rates for each iteration.
Alerts (4)

Complexity hotspot; lines 138 to 139 (total complexity: 3)
138 139
Complexity hotspot; lines 366 to 367 (total complexity: 3)
366 367