trait.rjmcmc.09162010.R - This R code appears to be part of…

/archive/source.archive/bm.and.ou/trait.rjmcmc.09162010.R

http://github.com/eastman/auteur · R · 496 lines · 421 code · 58 blank · 17 comment · 68 complexity · 4327115b691c65e3dd0041bc4e0d9444 MD5 · raw file

## Written by LJ HARMON & AH: 2008

## Modified by JM EASTMAN: 09.2010



rjMCMC.trait<-function (phy, data, ngen=1000, sampleFreq=100, model=c("OU","BM"), probAlpha=0.05, probMergeSplit=0.05, probRoot=0.1, heat=1, fileBase="result") 

{

	if(length(model)>1)stop(paste("Please specify either ", sQuote("OU"), " or ", sQuote("BM")))

## MORE ERROR CHECKING NEEDED HERE ## 

	

	require(ouch)

	require(geiger)

	

### prepare objects for rjMCMC

	cur.model		<- model								# initialize family of models explored



	dataList		<- prepare.ouchdata(phy, data)			# check tree and data; convert into S4



	ape.tre			<- cur.tre <- dataList$ape.tre			# S3 phylogeny

	orig.dat		<- dataList$orig.dat					# S3 data	

	ouch.tre		<- dataList$ouch.tre					# S4 OUCH phylogeny

	ouch.dat		<- dataList$ouch.dat					# S4 OUCH data

	

	nn				<- length(ape.tre$edge.length)			# max number of rates (one per branch)

	init.rate		<- fitContinuous(ape.tre,orig.dat)

	init.rate		<- adjust.rate(init.rate[[1]]$beta)

    cur.rates		<- rep(init.rate,nn)					# initialize array for rate estimates branchwise

	cur.delta.rates	<- rep(0,nn)

	cur.root		<- mean(orig.dat)

	cur.alpha		<- 0.001								# initialize alpha

	cur.theta		<- rep(0,nn)							# initialize trait optima under OU

	cur.delta.theta	<- rep(0,nn)							# initialize local 'rates'

	cur.regimes		<- as.factor(c(sample(cur.theta,1),cur.theta))				# initial set of selective regimes

	cur.root.theta	<- sample(cur.theta,1)



	nRateProp = 0											# proposal count: update rate parameter(s)

	nRcatProp = 0											# proposal count: update rate categories

	nRootProp = 0

	nAlphaProp = 0											# proposal count: update alpha parameter

	nRegimesProp = 0										# proposal count: update selective regimes

	nThetaProp = 0

	nRateOK = 0												# proposal count: successful rate changes

	nRcatOK = 0												# proposal count: successful rate categories updates

	nRootOK = 0

	nAlphaOK = 0											# proposal count: successful alpha updates

	nRegimesOK = 0											# proposal count: successful regime updates

	nThetaOK = 0

	l <- array(dim=c(ngen,4))								# array for storing output: size is number of MCMC generations

    l[] <- NA												# initialize array for storing lnLs

	

	cOK=c(													# list of proposal tallies

		  nRateOK, 

		  nRcatOK, 

		  nRootOK, 

		  nAlphaOK, 

		  nRegimesOK, 

		  nThetaOK

		  )		

	

	errorLog=file(paste(getwd(),paste(cur.model, "rjTraitErrors.log",sep="."),sep="/"),open='w+')

	generate.error.message(i=NULL, mod.cur=NULL, mod.new=NULL, lnR=NULL, errorLog=errorLog, init=TRUE)

	runLog=file(paste(getwd(),paste(cur.model, "rjTrait.log",sep="."),sep="/"),open='w+')

	generate.log(bundled.parms=NULL, cur.model, file=runLog, init=TRUE)

	

### Begin rjMCMC

    for (i in 1:ngen) {

        lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0

		startparms=list(rates=cur.rates, cats=sum(cur.delta.rates)+1, root=cur.root, alph=cur.alpha, reg=length(table(cur.regimes)), theta=mean(cur.theta))

	

## OU IMPLEMENTATION ## 

		if(cur.model=="OU") {

			if (runif(1) < (2 * probMergeSplit)) {

				if(runif(1)<0.5) {				# adjust theta categories

					nr=split.or.merge(cur.delta.theta, cur.theta, ape.tre, theta=TRUE)

					new.alpha=cur.alpha

					new.root.theta=cur.root.theta

					new.theta=nr$new.values ##

					new.delta.theta=nr$new.delta ##

					nrt=assign.root.theta(new.theta, ape.tre) ## 

					new.regimes=as.factor(c(nrt,new.theta)) ##

					new.rates=cur.rates 

					new.delta.rates=cur.delta.rates 

					nRegimesProp=nRegimesProp+1

				} else {						# adjust rate categories

					nr=split.or.merge(cur.delta.rates, cur.rates, ape.tre, theta=FALSE)

					new.alpha=cur.alpha

					new.root.theta=cur.root.theta

					new.theta=cur.theta

					new.delta.theta=cur.delta.theta

					new.regimes=cur.regimes

					new.rates=nr$new.values ##

					new.delta.rates=nr$new.delta ##

					nRcatProp=nRcatProp+1

				}

				lnHastingsRatio=nr$lnHastingsRatio

				lnPriorRatio=nr$lnPriorRatio

			} else { # neither split nor merge

				if(runif(1)<probAlpha) {		# adjust alpha

					new.alpha=adjust.rate(cur.alpha) ##

					new.root.theta=cur.root.theta

					new.theta=cur.theta

					new.delta.theta=cur.delta.theta

					new.regimes=cur.regimes 

					new.rates=cur.rates

					new.delta.rates=cur.delta.rates

					nAlphaProp=nAlphaProp+1

				} else { 

					if(runif(1)<0.5) {			# adjust rates

						new.alpha=cur.alpha

						new.root.theta=cur.root.theta

						new.theta=cur.theta

						new.delta.theta=cur.delta.theta

						new.regimes=cur.regimes 

						new.rates=adjust.rate(cur.rates) ##

						new.delta.rates=cur.delta.rates

						nRateProp=nRateProp+1

					} else {					# adjust theta

						new.alpha=cur.alpha 

						new.root.theta=cur.root.theta

						new.theta=adjust.value(cur.theta) ##

						new.delta.theta=cur.delta.theta

						nrt=assign.root.theta(new.theta, ape.tre) ## 

						new.regimes=as.factor(c(nrt,new.theta)) ##

						new.rates=cur.rates

						new.delta.rates=cur.delta.rates

						nThetaProp=nThetaProp+1				

					}

				}

			}

			

			cur.tre=apply.BMM.to.apetree(ape.tre, cur.rates)

			new.tre=apply.BMM.to.apetree(ape.tre, new.rates)

			mod.cur=new.ou.lik.fn(cur.rates, cur.alpha, cur.regimes, cur.theta, cur.tre, ouch.dat)

			mod.new=new.ou.lik.fn(new.rates, new.alpha, new.regimes, new.theta, new.tre, ouch.dat)

#			mod.new=try(new.ou.lik.fn(new.rates, new.alpha, new.regimes, new.theta, new.tre, ouch.dat),silent=TRUE)

		} else if(cur.model=="BM"){

## BM IMPLEMENTATION ##

			if (runif(1) < (2 * probMergeSplit)) {

				nr=split.or.merge(cur.delta.rates, cur.rates, ape.tre, theta=FALSE)

				new.rates=nr$new.values

				new.delta.rates=nr$new.delta

				new.root=cur.root

				nRcatProp=nRcatProp+1

				lnHastingsRatio=nr$lnHastingsRatio

				lnPriorRatio=nr$lnPriorRatio

			} else { 

				if(runif(1)<probRoot) { # adjust root

					new.root=adjust.value(cur.root)

					new.rates=cur.rates

					new.delta.rates=cur.delta.rates

					nRootProp=nRootProp+1

				} else { # adjust rates

					new.rates=adjust.rate(cur.rates)

					new.delta.rates=cur.delta.rates

					new.root=cur.root

					nRateProp=nRateProp+1

				}

			}

			

			cur.tre=apply.BMM.to.apetree(ape.tre, cur.rates)

			new.tre=apply.BMM.to.apetree(ape.tre, new.rates)

			mod.cur=new.bm.lik.fn(cur.rates, cur.root, cur.tre, orig.dat)

			mod.new=new.bm.lik.fn(new.rates, new.root, new.tre, orig.dat)

#			mod.new=try(new.bm.lik.fn(new.rates, new.root, new.tre, ouch.dat),silent=TRUE)

		}

	

		if(!inherits(mod.new, "try-error")) {

			lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL

		} else {

			lnLikelihoodRatio = -Inf

			mod.new$lnL=NaN

		}

		r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)



		if (runif(1) <= r$r) {			# adopt proposal

			if(cur.model=="OU") {

				decision="adopt"

				cur.alpha <- new.alpha

				cur.root.theta <- new.root.theta

				cur.regimes <- new.regimes

				cur.theta <- new.theta

				cur.delta.theta <- new.delta.theta

			} else {

				decision="adopt"

				cur.root <- new.root

			}

			cur.rates <- new.rates

			cur.delta.rates <- new.delta.rates

			curr.lnL <- mod.new$lnL

			cur.tre <- new.tre

		} else {						# deny proposal	

			decision="reject"

			curr.lnL <- mod.cur$lnL 

		}

		

		if(i==1) cat(paste("gen", "cur.lnL", "new.lnL", "lnR", sQuote(decision), sep="\t\t"),"\n")

		cat(paste(i, sprintf("%.2f", mod.cur$lnL), sprintf("%.2f", mod.new$lnL), sprintf("%.2f", lnR), sQuote(decision), sep="\t\t"),"\n")



		if(r$error) generate.error.message(i, mod.cur, mod.new, lnR, errorLog)

		

		l[i,]<-c(curr.lnL, sum(cur.delta.rates)+1, cur.alpha, length(table(cur.regimes)))

		endparms=list(rates=cur.rates, cats=sum(cur.delta.rates)+1, root=cur.root, alph=cur.alpha, reg=length(table(cur.regimes)), theta=mean(cur.theta))

		cOK=tally.mcmc.parms(startparms, endparms, cOK)

		

		bundled.parms=list(gen=i, mrate=exp(mean(log(cur.rates))), cats=sum(cur.delta.rates)+1, root=cur.root, alpha=cur.alpha, reg=length(table(cur.regimes)), theta=mean(cur.theta), lnL=curr.lnL)

		if(i%%sampleFreq==0) generate.log(bundled.parms, cur.model, file=runLog)

	}

	

# End rjMCMC

	close(errorLog)

	close(runLog)

	summarize.run(cOK, (cProp<-c(nRateProp, nRcatProp, nRootProp, nAlphaProp, nRegimesProp, nThetaProp)), cur.model)	

	if(cur.model=="BM") {

		return(list(r.cat=l[,2], lnL=l[,1]))

	} else {

		return(list(r.cat=l[,2], r.alpha=l[,3], r.regimes=l[,4], lnL=l[,1]))

	}

}





## AUXILIARY FUNCTIONS





new.ou.lik.fn <- function(rates, alpha, regimes, theta, ape.tre, ouch.dat) { # from OUCH 2.7-1:::hansen()

# mod 09.13.2010 JM Eastman

	ouch.tre=ape2ouch(ape.tre, scale=FALSE)

	ouch.dat$regimes=as.factor(regimes)

	theta.tmp=as.numeric(levels(as.factor(theta)))

	theta=theta.tmp[order(match(theta.tmp,theta))]

	alpha.ouch=as.matrix(sqrt(alpha))

	beta=ouch:::regime.spec(ouch.tre, ouch.dat['regimes'])

	dat=do.call(c,lapply(ouch.dat['trait'],function(y)y[ouch.tre@term]))

	names(dat)=ouch.dat$labels[Ntip(ape.tre):nrow(ouch.dat)]

	n <- length(dat)

	otree.df=as(ouch.tre,"data.frame")

	ordering=otree.df[n:nrow(otree.df),'labels']

	ev <- eigen(alpha.ouch,symmetric=TRUE)

	w <- .Call(ouch:::ouch_weights,object=ouch.tre,lambda=ev$values,S=ev$vectors,beta=beta)

	v <- geiger:::ouMatrix(vcv.phylo(ape.tre), alpha)

	v.match <- match(row.names(v),ordering)

	v <- v[v.match,v.match]

		## GEIGER call does not assume ultrametricity (and thus allows different 'rates' under BM)

		# -old-	v <- .Call(ouch:::ouch_covar,object=ouch.tre,lambda=ev$values,S=ev$vectors,sigma.sq=1)

	

	y <- matrix(theta,ncol=1)

	e <- w%*%y-dat

	dim(y) <- dim(y)[1]

	dim(e) <- n

	q <- e%*%solve(v,e)

	det.v <- determinant(v,logarithm=TRUE)

	if (det.v$sign!=1)stop("ou.lik.fn error: non-positive determinant",call.=FALSE)

	dev <- n*log(2*pi)+as.numeric(det.v$modulus)+q[1,1]

	list(

		 theta=theta,

		 weight=w,

		 vcov=v,

		 resids=e,

		 lnL=-0.5*dev

		 )

}



new.bm.lik.fn <- function(rates, root, ape.tre, orig.dat) { # from OUCH 2.7-1:::brown()	

# mod 09.16.2010 JM Eastman



	tree=ape.tre

	data=orig.dat



	nterm=Ntip(tree)

	nchar=length(data)

	dat=data[match(tree$tip.label,names(data))]



	nchar <- 1

	w <- matrix(data=1,nrow=nterm,ncol=1)

	n <- length(dat)



	b <- vcv.phylo(ape.tre)



	y <- matrix(root)

	e <- w%*%y-dat

	q <- t(e)%*%solve(b,e)

	v <- q/nterm

	dev <- nchar*nterm*(1+log(2*pi))+nchar*log(det(b))+nterm*log(det(v))

	list(

		 root=root,

		 lnL = -0.5*dev

		 )	

}



split.or.merge <- function(cur.delta, cur.values, phy, theta=FALSE) { 

# mod 09.14.2010 JM Eastman

	bb=cur.delta

	vv=cur.values

	names(vv)<-names(bb)<-phy$edge[,2]

	new.bb=choose.one(bb)

	new.vv=vv

	

	s=names(new.bb[bb!=new.bb])

	all.shifts=as.numeric(names(bb[bb>0]))

	all.D=get.descendants.of.node(s, phy)

	untouchable=unlist(lapply(all.shifts[all.shifts>s], function(x)get.descendants.of.node(x,phy)))

	remain.unchanged=union(all.shifts, untouchable)

	

	marker=match(s, names(new.vv))

	nn=length(vv)

	K=length(table(vv))

	N=Ntip(phy)

	

	if(sum(new.bb)>sum(bb)) {			# add transition: SPLIT

		if(theta) {							# assign new theta to current node

			new.vv[marker] <- nr <- adjust.value(new.vv[marker])

		} else {							# assign new rate to current node

			new.vv[marker] <- nr <- adjust.rate(new.vv[marker])

		}

		if(length(remain.unchanged)==0) {	# assign new value to all descendants

			new.vv[match(all.D,names(new.vv))] = nr

		} else {							# assign new value to all 'open' descendants 

			new.vv[match(all.D[!(all.D%in%remain.unchanged)],names(new.vv))] = nr

		}

		lnHastingsRatio = log((K+1)/(2*N-2-K)) ### from Drummond and Suchard 2010: where N is tips, K is number of local parms in tree

		lnPriorRatio = log(ppois(K+1,log(2),lower.tail=FALSE)/ppois(K,log(2),lower.tail=FALSE))

	} else {							# drop transition: MERGE

		anc = get.ancestor.of.node(s, phy)

		if(!is.root(anc, phy)) {			# substitute ancestral value for current

			new.vv[match(s, names(new.vv))] <- nr <- new.vv[match(anc, names(new.vv))] 

		} else {							

			if(theta) {						# assign new theta to first descendant of root

				new.vv[marker] <- nr <- adjust.value(new.vv[marker])

			} else {						# assign new rate to first descendant of root

				new.vv[marker] <- nr <- adjust.rate(new.vv[marker])

			}		

		}

		if(length(remain.unchanged)==0) {	# assign new value to all descendants

			new.vv[match(all.D, names(new.vv))] = nr

		} else {							# assign new value to all 'open' descendants

			new.vv[match(all.D[!(all.D%in%remain.unchanged)],names(new.vv))] = nr

		}

		lnHastingsRatio = log((2*N-2-K+1)/K) ### from Drummond and Suchard 2010: where N is tips, K is number of local parms in tree

		lnPriorRatio = log(ppois(K-1,log(2),lower.tail=FALSE)/ppois(K,log(2),lower.tail=FALSE))

		

	}

	return(list(new.values=new.vv, new.delta=new.bb, lnHastingsRatio=lnHastingsRatio, lnPriorRatio=lnPriorRatio))

}



assign.root.theta <- function(cur.theta, phy) {

	root=phy$edge[1,1]

	desc=phy$edge[which(phy$edge[,1]==root),2]

	root.theta=cur.theta[sample(which(phy$edge[,2]==desc),1)]

	root.theta

}



get.descendants.of.node <- function(node, phy) {

	storage=list()

	if(node%in%phy$edge[,1]) {

		desc=c(sapply(node, function(x) {return(phy$edge[which(phy$edge[,1]==x),2])}))

		while(any(desc%in%phy$edge[,1])) {

			desc.true=desc[which(desc%in%phy$edge[,1])]

			desc.desc=lapply(desc.true, function(x) return(get.desc.of.node(x, phy)))

			

			storage=list(storage,union(desc,desc.desc))

			

			desc=unlist(desc.desc)

		}

	}

	return(unique(unlist(storage)))

}







get.desc.of.node <- function(node, phy) {

	return(phy$edge[which(phy$edge[,1]==node),2])

}



		

get.ancestor.of.node<-function(node, phy) {

	anc=phy$edge[which(phy$edge[,2]==node),1]

	anc

}



choose.one <- function(cur.delta)

{

	bb=cur.delta

	s=sample(1:length(bb),1)

	bb[s]=1-bb[s]

	bb

}



is.root<-function(node,phy) {

	if(node==phy$edge[1,1]) return(TRUE) else return(FALSE)

}



assess.lnR <- function(lnR) {

	if(is.na(lnR) || abs(lnR)==Inf) {

		error=TRUE

		r=0

	} else {

		if(lnR < -100) {

			r=0 

		} else if(lnR > 0) {

			r=1 

		} else {

			r=exp(lnR)

		}

		error=FALSE

	}

	return(list(r=r, error=error))

}



apply.BMM.to.apetree<-function(apetree, rates) {

	apetree$edge.length=apetree$edge.length*rates

	return(apetree)

}



adjust.value <- function(value) {

	vv=value

	rch <- runif(1, min = -2, max = 2)

	vv <- vv + rch

	vv

}



adjust.rate <- function(rate) {

	rr=rate

	r=log(rr)

	rch=runif(1, min=-2, max=2)

	nr=r+rch

	rr=exp(nr)

	rr

}

	

prepare.ouchdata <- function(phy, data) {

	td <- treedata(phy, data, sort = T)	

	ouch.tre <- ape2ouch(td$phy->ape.tre, scale=FALSE)		

	ouch.dat=data.frame(as.numeric(ouch.tre@nodes), as.character(ouch.tre@nodelabels))

	names(ouch.dat)=c("nodes","labels")

	ouch.dat[ouch.dat[,2]=="",2]=NA

	ouch.dat$trait=NA

	mM=match(ouch.dat$labels,names(data))

	for(i in 1:nrow(ouch.dat)){

		if(!is.na(mM[i])){

			ouch.dat$trait[i]=data[mM[i]]	

		}

	}

	return(list(ouch.tre=ouch.tre, ouch.dat=ouch.dat, ape.tre=td$phy, orig.dat=td$data[,1]))

}



summarize.run<-function(cOK, cProp, cur.model) {

	names(cProp)<-names(cOK)<-c("rates", "rcats", "root", "alpha", "sRegimes", "theta")

	names(cProp)=paste("prop.",names(cProp),sep="")

	names(cOK)=paste("okay.",names(cOK),sep="")

	cat("\n")

	if(cur.model=="BM") {

		print(cProp[1:3])

		print(cOK[1:3])

	} else {

		print(cProp[c(1,2,4,5,6)])

		print(cOK[c(1,2,4,5,6)])

	}

}



generate.log<-function(bundled.parms, cur.model, file, init=FALSE) {

#	bundled.parms=list(gen=i, mrate=mean(cur.rates), cats=max(cur.delta.rates), root=cur.root, alpha=cur.alpha, reg=max(cur.regimes), theta=mean(cur.theta), lnL=curr.lnL)

	res=bundled.parms



	if(init) {

		if(cur.model=="BM") msg=paste("gen", "model", "mean.rate", "rates", "root", "lnL", sep="\t") else msg=paste("gen", "model", "mean.rate", "rates", "alpha", "regimes", "mean.theta", "lnL", sep="\t")

	} else {

		if(cur.model=="BM") {

			msg<-paste(res$gen, sQuote(cur.model), sprintf("%.3f", res$mrate), res$cats, sprintf("%.3f", res$root), sprintf("%.3f", res$lnL),sep="\t")

		} else {

			msg<-paste(res$gen, sQuote(cur.model), sprintf("%.3f", res$mrate), res$cats, sprintf("%.4f", res$alpha), res$reg, sprintf("%.3f", res$theta), sprintf("%.3f", res$lnL),sep="\t")

		}

	}

	write(msg, file=file, append=TRUE)

}



generate.error.message<-function(i, mod.cur, mod.new, lnR, errorLog, init=FALSE) {

	if(init) {

		initmsg = write(paste("gen", "curr.lnL", "new.lnL", "lnR", sep="\t"), file=errorLog)

	} else {

		write(paste(i, sprintf("%.3f", mod.cur$lnL), sprintf("%.3f", mod.new$lnL), sprintf("%.3f", lnR), sep="\t"), file=errorLog)

	}

}



tally.mcmc.parms<-function(startparms, endparms, cOK) {

	index=array(dim=(length(startparms)-1))

	for(i in 1:(length(startparms)-1)) {

		index[i]=all(startparms[[i]]==endparms[[i]])

	}

	if(!all(index)) {

		cOK[max(which(index==FALSE))]=cOK[max(which(index==FALSE))]+1

	}

	if(length(startparms$theta) == length(endparms$theta)) {

		if(!all(sort(startparms$theta)==sort(endparms$theta))) {

			cOK[6]=cOK[6]+1

		}

	}

	cOK

}
Summary ✨

This R code appears to be part of a Bayesian model comparison and MCMC simulation framework. It generates log files, summarizes run results, and tracks convergence metrics such as likelihood ratio (lnR) values, parameter estimates, and convergence statistics for multiple models. The output provides insight into the performance of different models and helps identify potential issues with the simulations or model selection process.
Alerts (4)

Complexity hotspot; lines 461 to 464 (total complexity: 5)
461 462 463 464