PageRenderTime 124ms CodeModel.GetById 18ms app.highlight 92ms RepoModel.GetById 2ms app.codeStats 0ms

/archive/source.archive/bm.and.ou/rjmcmc.12042010.R

http://github.com/eastman/auteur
R | 1310 lines | 1117 code | 140 blank | 53 comment | 222 complexity | 535a2e3626d1e9d9ed5c52c6c27f4d54 MD5 | raw file
   1## Written by LJ HARMON (2008), AL HIPP (2008), and JM EASTMAN (2010)
   2# MODELING TRAIT EVOLUTION by BROWNIAN MOTION
   3
   4rjMCMC.bm<-function (phy, data, ngen=1000, sampleFreq=100, probMergeSplit=0.05, probRoot=0.01, lambdaK=log(2), constrainK=FALSE, jumpsize=NULL, fileBase="result", simplestart=FALSE, switcheroo=TRUE) 
   5{ 
   6# ngen=1000; sampleFreq=100; probMergeSplit=0.05; probRoot=0.01; lambdaK=log(2); constrainK=FALSE; jumpsize=NULL; fileBase="result"; simplestart=FALSE; switcheroo=TRUE 
   7	model="BM"
   8	heat=1
   9	if(is.null(jumpsize)) jumpsize=log(2)
  10	require(geiger)
  11		
  12### prepare data for rjMCMC
  13	cur.model		<- model
  14
  15	dataList		<- prepare.data(phy, data)			
  16	ape.tre			<- dataList$ape.tre			
  17	orig.dat		<- dataList$orig.dat						
  18	nn				<- length(ape.tre$edge.length)	
  19	node.des		<- sapply(unique(c(ape.tre$edge[1,1],ape.tre$edge[,2])), function(x) get.descendants.of.node(x, ape.tre))
  20	names(node.des) <- c(ape.tre$edge[1,1], unique(ape.tre$edge[,2]))
  21	subtrees		<- subtrees(phy)
  22	subtrees.marker <- sapply(subtrees, function(x) return(min(x$node.label)))
  23
  24# initialize parameters
  25	if(is.numeric(constrainK) & (constrainK > length(ape.tre$edge) | constrainK < 1)) stop("Constraint on rate shifts is nonsensical. Ensure that constrainK is at least 1 and less than the number of available nodes in the tree.")
  26
  27	if(simplestart | is.numeric(constrainK) ) {
  28		if(is.numeric(constrainK)) {
  29			init.rate	<- generate.starting.point(orig.dat, ape.tre, node.des, theta=FALSE, K=constrainK, jumpsize=jumpsize)
  30		} else {
  31			init.rate	<- list(values=rep(fitContinuous(ape.tre,orig.dat)$Trait1$beta,length(ape.tre$edge.length)),delta=rep(0,length(ape.tre$edge.length)))
  32		}
  33	} else {
  34		init.rate	<- generate.starting.point(orig.dat, ape.tre, node.des, theta=FALSE, K=constrainK, jumpsize=jumpsize )
  35	}
  36		
  37    cur.rates		<- init.rate$values			
  38	cur.delta.rates	<- init.rate$delta
  39	cur.root		<- adjust.value(mean(orig.dat), jumpsize)
  40#	design.vcv		<- design.vcv.phylo(ape.tre)
  41	cur.vcv			<- update.vcv(ape.tre, cur.rates)
  42	
  43	mod.cur = new.bm.lik.fn(cur.rates, cur.root, orig.dat, cur.vcv)
  44	
  45# proposal counts
  46	nRateProp =		0	
  47	nRateSwapProp = 0									
  48	nRcatProp =		0										
  49	nRootProp =		0
  50	nRateOK =		0											
  51	nRateSwapOK =	0
  52	nRcatOK =		0											
  53	nRootOK =		0
  54
  55	cOK=c(												
  56		nRateOK,
  57		nRateSwapOK,
  58		nRcatOK, 
  59		nRootOK 
  60	)
  61		
  62	tickerFreq=ceiling(ngen/30)
  63	
  64# file handling
  65	parmBase=paste(model, fileBase, "parameters/",sep=".")
  66	if(!file.exists(parmBase)) dir.create(parmBase)
  67	parlogger(model=model, init=TRUE, node.des, parmBase=parmBase)
  68	errorLog=file(paste(parmBase,paste(cur.model, fileBase, "rjTraitErrors.log",sep="."),sep="/"),open='w+')
  69	generate.error.message(i=NULL, mod.cur=NULL, mod.new=NULL, lnR=NULL, errorLog=errorLog, init=TRUE)
  70	runLog=file(paste(parmBase,paste(cur.model, fileBase, "rjTrait.log",sep="."),sep="/"),open='w+')
  71	generate.log(bundled.parms=NULL, cur.model, file=runLog, init=TRUE)
  72
  73
  74### Begin rjMCMC
  75    for (i in 1:ngen) {
  76        lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
  77		startparms = c(nRateProp, nRateSwapProp, nRcatProp, nRootProp)
  78
  79## BM IMPLEMENTATION ##
  80		while(1) {
  81			if (runif(1) < (2 * probMergeSplit) & !constrainK) {			# adjust rate categories
  82				nr=split.or.merge(cur.delta.rates, cur.rates, ape.tre, node.des, lambdaK, theta=FALSE)
  83				new.rates=nr$new.values
  84				new.delta.rates=nr$new.delta
  85				new.root=cur.root
  86				nRcatProp=nRcatProp+1
  87				lnHastingsRatio=nr$lnHastingsRatio
  88				lnPriorRatio=nr$lnPriorRatio
  89				break()
  90			} else { 
  91				if(runif(1)<probRoot) {										# adjust root
  92					new.root=adjust.value(cur.root, jumpsize)
  93					new.rates=cur.rates
  94					new.delta.rates=cur.delta.rates
  95					nRootProp=nRootProp+1
  96					break()
  97				} else {													
  98					if(runif(1)>0.1 & length(unique(cur.rates))>1 & switcheroo) {		# swap rates classes
  99						nr=switcheroo(cur.rates)
 100						new.rates=nr$new.values ##
 101						new.delta.rates=cur.delta.rates 
 102						new.root=cur.root
 103						nRateSwapProp=nRateSwapProp+1
 104						break()
 105					} else {												# adjust rates
 106						new.rates=adjust.rate(cur.rates, jumpsize)
 107						new.delta.rates=cur.delta.rates
 108						new.root=cur.root
 109						nRateProp=nRateProp+1
 110						break()
 111					} 
 112				}
 113			}
 114		}
 115		
 116		if(any(new.rates!=cur.rates)) new.vcv=update.vcv(ape.tre, new.rates) else new.vcv=cur.vcv
 117		mod.new=try(new.bm.lik.fn(new.rates, new.root, orig.dat, new.vcv), silent=TRUE)
 118		
 119		if(inherits(mod.new, "try-error")) {mod.new=as.list(mod.new); mod.new$lnL=Inf}
 120		if(!is.infinite(mod.new$lnL)) {
 121			lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
 122		} else {
 123			lnLikelihoodRatio = -Inf
 124			failures=paste("./failed", model, "parameters/",sep=".")
 125			if(!file.exists(failures)) dir.create(failures)
 126			write.table(matrix(c(i, new.rates),nrow=1),file=paste(failures,"failed.rates.txt",sep="/"),append=TRUE,col=FALSE,row=FALSE,quote=FALSE)
 127			write.table(matrix(c(i, new.root),nrow=1),file=paste(failures,"failed.root.txt",sep="/"),append=TRUE,col=FALSE,row=FALSE,quote=FALSE)
 128		}
 129		
 130	# compare likelihoods
 131		endparms = c(nRateProp=nRateProp, nRateSwapProp=nRateSwapProp, nRcatProp=nRcatProp, nRootProp=nRootProp)
 132
 133		r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)
 134
 135		if (runif(1) <= r$r) {			# adopt proposal
 136			decision="adopt"
 137			cur.root <- new.root
 138			cur.rates <- new.rates
 139			cur.delta.rates <- new.delta.rates
 140			mod.cur <- mod.new
 141			cur.vcv <- new.vcv
 142			curr.lnL <- mod.new$lnL
 143			cOK <- determine.accepted.proposal(startparms, endparms, cOK)
 144		} else {						# deny proposal	
 145			decision="reject"
 146			curr.lnL <- mod.cur$lnL 
 147		}
 148		
 149		if(is.infinite(curr.lnL)) stop("starting point has exceptionally poor likelihood")
 150
 151		if(r$error) generate.error.message(i, mod.cur, mod.new, lnR, errorLog)
 152		
 153		if(i%%tickerFreq==0) {
 154			if(i==tickerFreq) cat("|",rep(" ",9),toupper("generations complete"),rep(" ",9),"|","\n")
 155			cat(". ")
 156		}
 157		
 158		if(i%%sampleFreq==0) {
 159			bundled.parms=list(gen=i, mrate=exp(mean(log(cur.rates))), cats=sum(cur.delta.rates)+1, root=cur.root, lnL=curr.lnL)
 160			generate.log(bundled.parms, cur.model, file=runLog)			
 161			parlogger(model=model, init=FALSE, node.des=node.des, i=i, curr.lnL=curr.lnL, cur.root=cur.root, cur.rates=cur.rates, cur.delta.rates=cur.delta.rates, parmBase=parmBase)
 162		}
 163	}
 164	
 165# End rjMCMC
 166	close(errorLog)
 167	close(runLog)
 168	summarize.run(cOK, endparms, cur.model)	
 169}
 170
 171rjMCMC.ou<-function (phy, data, ngen=1000, sampleFreq=100, probMergeSplit=0.05, probRoot=0.01, probRate=0.1, probAlpha=0.01, upperAlpha=100, lambdaK=log(2), constrainK=FALSE, jumpsize=2, fileBase="result", simplestart=FALSE, switcheroo=TRUE) 
 172{
 173#	load("rjMCMC/versions/fakedata.rda"); phy=sims$phy; data=sims[[2]]; ngen=1000; sampleFreq=100; probMergeSplit=0.05; probRoot=0.01; probRate=0.1; probAlpha=0.01; lambdaK=log(2); constrainK=FALSE; jumpsize=2; upperAlpha=100; fileBase="result"; simplestart=FALSE 
 174	
 175	model="OU"
 176	heat=1
 177	require(ouch)
 178	require(geiger)
 179	abs.low.alpha=1e-13
 180	if(length(upperAlpha)==2 & upperAlpha[1]>abs.low.alpha) {
 181		alpha.bounds=upperAlpha
 182	} else if(!is.null(upperAlpha) & is.numeric(upperAlpha)){
 183		alpha.bounds=c(abs.low.alpha, upperAlpha)
 184	} else {
 185		alpha.bounds=c(abs.low.alpha, 100)
 186	}
 187	
 188### prepare data for rjMCMC
 189	cur.model		<- model								
 190	
 191	dataList		<- prepare.ouchdata(phy, data)			
 192	ape.tre			<- dataList$ape.tre			
 193	orig.dat		<- dataList$orig.dat	
 194	ouch.tre		<- dataList$ouch.tre
 195	ouch.dat		<- dataList$ouch.dat
 196	ape.to.ouch.order <- match(as.numeric(names(dataList$ape.ordered[-1])), as.numeric(ape.tre$edge[,2]))
 197	nn				<- length(ape.tre$edge.length)	
 198	node.des		<- sapply(unique(c(ape.tre$edge[1,1],ape.tre$edge[,2])), function(x) get.descendants.of.node(x, ape.tre))
 199	names(node.des) <- c(ape.tre$edge[1,1], unique(ape.tre$edge[,2]))
 200	subtrees		<- subtrees(phy)
 201	subtrees.marker <- sapply(subtrees, function(x) return(min(x$node.label)))
 202	
 203# initialize parameters
 204	if(is.numeric(constrainK) & (constrainK > length(ape.tre$edge) | constrainK < 1)) stop("Constraint on rate shifts is nonsensical. Ensure that constrainK is at least 1 and less than the number of available nodes in the tree.")
 205	
 206	if(simplestart | is.numeric(constrainK) ) {
 207		if(is.numeric(constrainK)) {
 208			init.theta	<- generate.starting.point(orig.dat, ape.tre, node.des, theta=TRUE, K=constrainK, jumpsize=jumpsize)
 209		} else {
 210			init.theta	<- list(values=rep(mean(orig.dat),length(ape.tre$edge.length)),delta=rep(0,length(ape.tre$edge.length)))
 211		}
 212	} else {
 213		init.theta	<- generate.starting.point(orig.dat, ape.tre, node.des, theta=TRUE, K=constrainK, jumpsize=jumpsize )
 214	}
 215	
 216	cur.rate		<- fitContinuous(ape.tre,orig.dat)$Trait1$beta
 217	cur.alpha		<- min(runif(1000, alpha.bounds[1], alpha.bounds[2]))
 218    cur.theta		<- init.theta$values			
 219	cur.delta.theta	<- init.theta$delta
 220	cur.root.theta	<- assign.root.theta(cur.theta,ape.tre)
 221	cur.regimes		<- c(cur.root.theta,cur.theta) 
 222	
 223	mod.cur = new.ou.lik.fn(cur.rate, cur.alpha, cur.regimes, cur.theta, ouch.dat, ouch.tre, ape.to.ouch.order)
 224	
 225# proposal counts
 226	nRateProp =		0	
 227	nRootProp =		0
 228	nAlphaProp =	0										
 229	nTcatProp =		0										
 230	nThetaProp =	0
 231	nRateOK =		0											
 232	nRootOK =		0
 233	nAlphaOK =		0										
 234	nTcatOK =		0
 235	nThetaOK =		0
 236	
 237	cOK=c(												
 238		  nRateOK,
 239		  nRootOK,
 240		  nAlphaOK,
 241		  nTcatOK,
 242		  nThetaOK
 243		  )
 244	
 245	tickerFreq=ceiling(ngen/30)
 246	
 247# file handling
 248	parmBase=paste(model, fileBase, "parameters/",sep=".")
 249	if(!file.exists(parmBase)) dir.create(parmBase)
 250	parlogger(model=model, init=TRUE, node.des, parmBase=parmBase)
 251	errorLog=file(paste(parmBase,paste(cur.model, fileBase, "rjTraitErrors.log",sep="."),sep="/"),open='w+')
 252	generate.error.message(i=NULL, mod.cur=NULL, mod.new=NULL, lnR=NULL, errorLog=errorLog, init=TRUE)
 253	runLog=file(paste(parmBase,paste(cur.model, fileBase, "rjTrait.log",sep="."),sep="/"),open='w+')
 254	generate.log(bundled.parms=NULL, cur.model, file=runLog, init=TRUE)
 255	
 256	probSwap=max(c(probRoot, probRate, probAlpha))
 257	minorprobs=sum(c(probRoot, probRate, probAlpha, probSwap))
 258### Begin rjMCMC
 259    for (i in 1:ngen) {
 260		
 261		# loop to ensure that each state produces a sensible proposal 
 262		while(1) {
 263			lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
 264			startparms = c(nRateProp, nRootProp, nAlphaProp, nTcatProp, nThetaProp)
 265			
 266			## OU IMPLEMENTATION ##
 267			if (runif(1) < (2 * probMergeSplit) & !constrainK) {									# adjust theta categories
 268				nr=split.or.merge(cur.delta.theta, cur.theta, ape.tre, node.des, lambdaK, theta=TRUE)
 269				new.alpha=cur.alpha
 270				new.theta=nr$new.values ##
 271				new.delta.theta=nr$new.delta ##
 272				new.root.theta<-nrt<-assign.root.theta(new.theta, ape.tre) ## 
 273				new.regimes=c(nrt,new.theta) ##
 274				new.rate=cur.rate 
 275				nTcatProp=nTcatProp+1
 276				lnHastingsRatio=nr$lnHastingsRatio
 277				lnPriorRatio=nr$lnPriorRatio
 278				
 279			} else { # neither split nor merge
 280				while(1) {
 281					if(runif(1)<(probAlpha/minorprobs)) {											# adjust alpha
 282						new.alpha=adjust.rate(cur.alpha, jumpsize) ##
 283						new.root.theta=cur.root.theta
 284						new.theta=cur.theta
 285						new.delta.theta=cur.delta.theta
 286						new.regimes=cur.regimes 
 287						new.rate=cur.rate
 288						nAlphaProp=nAlphaProp+1
 289						break()
 290					} else if(runif(1)<(probRoot/minorprobs)) {										# adjust root
 291						new.alpha=cur.alpha									
 292						new.root.theta=assign.root.theta(cur.theta, ape.tre) ##
 293						new.theta=cur.theta
 294						new.delta.theta=cur.delta.theta
 295						new.regimes=cur.regimes 
 296						new.rate=cur.rate 
 297						nRootProp=nRootProp+1					
 298						break()
 299					} else if(runif(1)<(probRate/minorprobs)) {										# adjust rate
 300						new.alpha=cur.alpha									
 301						new.root.theta=cur.root.theta
 302						new.theta=cur.theta
 303						new.delta.theta=cur.delta.theta
 304						new.regimes=cur.regimes 
 305						new.rate=adjust.rate(cur.rate, jumpsize) ##
 306						nRateProp=nRateProp+1
 307						break()
 308					} else if(runif(1)<0.5*(probSwap/minorprobs) & length(unique(cur.theta))>1 & switcheroo) {	# swap thetas
 309						nr=switcheroo(cur.theta)
 310						new.alpha=cur.alpha 
 311						new.theta=nr$new.values ##
 312						new.delta.theta=cur.delta.theta 
 313						new.root.theta<-nrt<-assign.root.theta(new.theta, ape.tre) ## 
 314						new.regimes=c(nrt,new.theta) ##
 315						new.rate=cur.rate
 316						nThetaProp=nThetaProp+1					
 317						break()
 318					} else {																		# adjust thetas
 319						new.alpha=cur.alpha 
 320						new.theta=adjust.value(cur.theta, jumpsize, theta=TRUE) ##
 321						new.delta.theta=cur.delta.theta 
 322						new.root.theta<-nrt<-assign.root.theta(new.theta, ape.tre) ## 
 323						new.regimes=c(nrt,new.theta) ##
 324						new.rate=cur.rate
 325						nThetaProp=nThetaProp+1					
 326						break()
 327					}
 328					
 329				}
 330			}
 331			if(within.range(new.alpha, alpha.bounds[1], alpha.bounds[2])) {
 332				mod.new=try(new.ou.lik.fn(new.rate, new.alpha, new.regimes, new.theta, ouch.dat, ouch.tre, ape.to.ouch.order),silent=TRUE)
 333				if(!inherits(mod.new, "try-error")) break()
 334			}
 335
 336		}
 337		lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
 338	
 339		
 340# compare likelihoods
 341		endparms = c(nRateProp, nRootProp, nAlphaProp, nTcatProp, nThetaProp)
 342
 343		r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)
 344		
 345		if (runif(1) <= r$r) {			# adopt proposal
 346			decision="adopt"
 347			cur.alpha <- new.alpha
 348			cur.theta <- new.theta
 349			cur.delta.theta <- new.delta.theta
 350			cur.root.theta <- new.root.theta
 351			cur.regimes <- new.regimes
 352			cur.rate <- new.rate
 353			cOK <- determine.accepted.proposal(startparms, endparms, cOK)
 354			mod.cur <- mod.new
 355			curr.lnL <- mod.new$lnL
 356		} else {						# deny proposal	
 357			decision="reject"
 358			curr.lnL <- mod.cur$lnL 
 359		}
 360		
 361		if(is.infinite(curr.lnL)) stop("starting point has exceptionally poor likelihood")
 362		
 363		if(r$error) generate.error.message(i, mod.cur, mod.new, lnR, errorLog)
 364		
 365		if(i%%tickerFreq==0) {
 366			if(i==tickerFreq) cat("|",rep(" ",9),toupper("generations complete"),rep(" ",9),"|","\n")
 367			cat(". ")
 368		}
 369		
 370		if(i%%sampleFreq==0) {
 371			bundled.parms=list(gen=i, rate=cur.rate, regimes=sum(cur.delta.theta)+1, root=cur.root.theta, mean.theta=mean(cur.theta), alpha=cur.alpha, lnL=curr.lnL)
 372			generate.log(bundled.parms, cur.model, file=runLog)			
 373			parlogger(model=model, init=FALSE, node.des=node.des, i=i, curr.lnL=curr.lnL, cur.root=cur.root.theta, cur.rates=cur.rate, cur.alpha=cur.alpha, cur.theta=cur.theta, cur.delta.theta=cur.delta.theta, parmBase=parmBase)
 374		}
 375	}
 376	
 377# End rjMCMC
 378	close(errorLog)
 379	close(runLog)
 380	summarize.run(cOK, endparms, cur.model)	
 381}
 382
 383
 384## AUXILIARY FUNCTIONS
 385
 386new.ou.lik.fn <- function(rate, alpha, regimes, theta, ouch.dat, ouch.tre, ape.to.ouch.order) { # from OUCH 2.7-1:::hansen()
 387# mod 11.17.2010 JM Eastman; checked -- returns very similar if tested against fitted values from hansen()
 388#	rate=new.rate; alpha=new.alpha; regimes=new.regimes; theta=new.theta
 389	ouch.dat$regimes=as.factor(regimes[c(1,ape.to.ouch.order+1)])
 390#	theta=sort(unique(theta))
 391	alpha.ouch=as.matrix(alpha)
 392	n <- length(which(!is.na(ouch.dat['trait'])))	
 393	beta=ouch:::regime.spec(ouch.tre, ouch.dat['regimes']) 
 394	theta=as.numeric(levels(ouch:::sets.of.regimes(ouch.tre,ouch.dat['regimes'])$regimes))
 395	dat=ouch.dat$trait[n:nrow(ouch.dat)]
 396	names(dat)=ouch.dat$labels[n:nrow(ouch.dat)]
 397	ev <- eigen(alpha.ouch,symmetric=TRUE)
 398	w <- .Call(ouch:::ouch_weights,object=ouch.tre,lambda=ev$values,S=ev$vectors,beta=beta) 
 399	v <- .Call(ouch:::ouch_covar,object=ouch.tre,lambda=ev$values,S=ev$vectors,sigma.sq=rate)
 400	y <- matrix(theta,ncol=1)
 401	e <- w%*%y-dat
 402	dim(y) <- dim(y)[1]
 403	dim(e) <- n
 404	q <- e%*%solve(v,e)
 405	det.v <- determinant(v,logarithm=TRUE)
 406	dev <- n*log(2*pi)+as.numeric(det.v$modulus)+q[1,1]
 407	list(
 408		 theta=theta,
 409		 weight=w,
 410		 vcov=v,
 411		 resids=e,
 412		 lnL=-0.5*dev
 413		 )
 414}
 415
 416new.bm.lik.fn <- function(rates, root, orig.dat, vcv) { # from LJ HARMON	
 417# mod 12.02.2010 JM Eastman: using determinant()$modulus rather than det() to stay in log-space
 418	y=orig.dat
 419	
 420	b <- vcv
 421	w <- rep(root, nrow(b))
 422	num <- -t(y-w)%*%solve(b)%*%(y-w)/2
 423	den <- 0.5*(length(y)*log(2*pi) + as.numeric(determinant(b)$modulus))
 424	list(
 425		 root=root,
 426		 lnL = (num-den)[1,1]
 427		 )	
 428}
 429
 430split.or.merge <- function(cur.delta, cur.values, phy, node.des, lambda=lambda, theta=FALSE) { 
 431# mod 12.05.2010 JM Eastman
 432#	cur.delta=cur.delta.rates; cur.values=cur.rates; theta=FALSE; lambda=log(2); jumpsize=2
 433	bb=cur.delta
 434	vv=cur.values
 435	names(vv)<-names(bb)<-phy$edge[,2]
 436	new.bb=choose.one(bb)
 437	new.vv=vv
 438	
 439	s=names(new.bb[bb!=new.bb])
 440	all.shifts=as.numeric(names(bb[bb>0]))
 441	all.D=node.des[[which(names(node.des)==s)]]
 442	if(length(all.D)!=0) {
 443		untouchable=unlist(lapply(all.shifts[all.shifts>s], function(x)node.des[[which(names(node.des)==x)]]))
 444		remain.unchanged=union(all.shifts, untouchable)
 445	} else {
 446		untouchable=NULL
 447		remain.unchanged=list()
 448	}
 449	
 450	marker=match(s, names(new.vv))
 451	nn=length(vv)
 452	K=sum(bb)+1
 453	N=Ntip(phy)
 454	
 455	ncat=sum(bb)
 456	cur.vv=as.numeric(vv[marker])
 457	ca.vv=length(which(vv==cur.vv))
 458	
 459	if(sum(new.bb)>sum(bb)) {			# add transition: SPLIT
 460#		print("s")
 461		decision="split"
 462		n.desc=sum(!all.D%in%remain.unchanged)+1
 463		n.split=sum(vv==cur.vv)-n.desc
 464		if(theta) u=split.value(cur.vv=cur.vv, n.desc=n.desc, n.split=n.split, factor=lambda) else u=split.rate(cur.vv=cur.vv, n.desc=n.desc, n.split=n.split, factor=lambda)
 465		nr.split=u$nr.split
 466		nr.desc=u$nr.desc
 467		new.vv[vv==cur.vv]=nr.split
 468		if(length(remain.unchanged)==0) {	# assign new value to all descendants
 469			new.vv[match(all.D,names(new.vv))] = nr.desc
 470		} else {							# assign new value to all 'open' descendants 
 471			new.vv[match(all.D[!(all.D%in%remain.unchanged)],names(new.vv))] = nr.desc
 472		}
 473		new.vv[match(s, names(new.vv))]=nr.desc
 474		lnHastingsRatio = log((K+1)/(2*N-2-K)) ### from Drummond and Suchard 2010: where N is tips, K is number of local parms in tree
 475		lnPriorRatio = log(ptpois(K+1,lambda,nn)/ptpois(K,lambda,nn))
 476		
 477	} else {							# drop transition: MERGE
 478#		print("m")
 479		decision="merge"
 480		anc = get.ancestor.of.node(s, phy)
 481		if(!is.root(anc, phy)) {			# base new rate on ancestral rate of selected branch
 482			anc.vv=as.numeric(vv[match(anc,names(vv))])
 483			na.vv=length(which(vv==anc.vv))
 484			nr=(anc.vv*na.vv+cur.vv*ca.vv)/(ca.vv+na.vv)
 485			new.vv[vv==cur.vv | vv==anc.vv]=nr
 486		} else {							# if ancestor of selected node is root, base new rate on sister node
 487			sister.tmp=get.desc.of.node(anc,phy)
 488			sister=sister.tmp[sister.tmp!=s]
 489			sis.vv=as.numeric(vv[match(sister,names(vv))])
 490			ns.vv=length(which(vv==sis.vv))
 491			nr=(sis.vv*ns.vv+cur.vv*ca.vv)/(ca.vv+ns.vv)
 492			new.vv[vv==cur.vv | vv==sis.vv]=nr			
 493		}
 494		lnHastingsRatio = log((2*N-2-K+1)/K) ### from Drummond and Suchard 2010: where N is tips, K is number of local parms in tree
 495		lnPriorRatio = log(ptpois(K-1,lambda,nn)/ptpois(K,lambda,nn))
 496		
 497	}
 498	
 499	return(list(new.delta=new.bb, new.values=new.vv, lnHastingsRatio=lnHastingsRatio, lnPriorRatio=lnPriorRatio, decision=decision))
 500}
 501
 502switcheroo <- function(cur.values) { 
 503# mod 11.17.2010 JM Eastman
 504#	cur.delta=cur.delta.rates; cur.values=cur.rates; theta=FALSE; lambda=log(2); jumpsize=2
 505
 506	vv=cur.values
 507	rates.sample=unique(vv)[sample(1:length(unique(vv)), 2)]
 508	new.vv=vv
 509	new.vv[which(vv==rates.sample[1])]=rates.sample[2]
 510	new.vv[which(vv==rates.sample[2])]=rates.sample[1]
 511	
 512	return(list(new.values=new.vv))
 513}
 514
 515generate.starting.point <- function(data, phy, node.des, theta=FALSE, K=FALSE, jumpsize) { 
 516# updated JME 11.14.2010
 517	nn=length(phy$edge.length)
 518	ntip=Ntip(phy)
 519	if(!K) nshifts=rtpois(1,log(ntip),nn) else nshifts=K-1
 520	if(nshifts!=0) bb=sample(c(rep(1,nshifts),rep(0,nn-nshifts)),replace=FALSE) else bb=rep(0,nn)
 521	names(bb)=phy$edge[,2]
 522	shifts=as.numeric(names(bb)[bb==1])
 523	if(theta) {
 524		min.max=c(adjust.value(min(data), jumpsize), adjust.value(max(data), jumpsize))
 525		values=runif(sum(bb)+1, min=min.max[min.max==min(min.max)], max=min.max[min.max==max(min.max)])
 526	} else {
 527		init.rate=fitContinuous(phy,data)[[1]]$beta
 528		min.max=c(adjust.rate(init.rate, jumpsize), adjust.rate(init.rate, jumpsize))
 529		values=runif(sum(bb)+1, min=min.max[min.max==min(min.max)], max=min.max[min.max==max(min.max)])
 530	}
 531	internal.shifts<-tip.shifts<-numeric(0)
 532	internal.shifts=sort(shifts[shifts>ntip])
 533	tip.shifts=shifts[shifts<=ntip]
 534	
 535	if(length(internal.shifts)==0 & length(tip.shifts)==0) {
 536		vv=rep(values, nn)
 537	} else {
 538		vv=bb
 539		vv[]=values[length(values)]
 540		i=0
 541		if(length(internal.shifts)!=0) {
 542			for(i in 1:length(internal.shifts)) {
 543				d=node.des[which(names(node.des)==internal.shifts[i])]
 544				vv[match(c(internal.shifts[i], unlist(d)), names(vv))]=values[i]
 545			}
 546		}
 547		if(length(tip.shifts)!=0) {
 548			for(j in 1:length(tip.shifts)) {
 549				vv[match(tip.shifts[j], names(vv))]=values[j+i]
 550			}
 551		}
 552	}
 553	return(list(delta=unname(bb), values=unname(vv)))
 554}
 555
 556assign.root.theta <- function(cur.theta, phy) {
 557	root=phy$edge[1,1]
 558	desc=phy$edge[which(phy$edge[,1]==root),2]
 559	root.theta=cur.theta[sample(which(phy$edge[,2]==desc),1)]
 560	root.theta
 561}
 562
 563design.vcv.phylo=function(phy, array=TRUE) {
 564	n=Ntip(phy)
 565	tips=lapply(phy$edge[,2], function(x) get.descendants.of.node(x,phy,tips=TRUE))
 566	if(!array) {
 567		dd=matrix(0,n,n)
 568		design=lapply(as.numeric(phy$edge[,2]), function(x) {
 569				  new=dd
 570				  node=x
 571				  marker=as.numeric(which(phy$edge[,2]==node))
 572				  if(node<=n) tt=node else tt=tips[[marker]]
 573				  for(i in length(tt):1) {
 574					j=tt[1:i]
 575					new[tt[i],j]<-new[j,tt[i]]<-1
 576				  }	  
 577				  return(new)
 578				  
 579		}
 580		)
 581	} else {
 582		design=array(0,dim=c(n,n,length(phy$edge[,2])))
 583		for(k in 1:length(phy$edge[,2])){
 584			node=as.numeric(phy$edge[k,2])
 585			marker=k
 586			if(node<=n) tt=node else tt=tips[[marker]]
 587			for(i in length(tt):1) {
 588				j=tt[1:i]
 589				design[tt[i],j,k]<-design[j,tt[i],k]<-1
 590			}	  
 591		}		 
 592	} 
 593	return(design) 
 594}
 595
 596update.vcv=function(ape.tre, new.rates) {
 597	n=ape.tre
 598	n$edge.length=n$edge.length*new.rates
 599	vv=vcv.phylo(n)
 600	return(vv)
 601}
 602
 603update.vcv.older=function(ape.tre, design.vcv, new.rates) {
 604	n=Ntip(ape.tre)
 605	new.vcv=matrix(0,n,n)
 606	new.edges=ape.tre$edge.length*new.rates
 607	for(i in 1:length(new.rates)) {
 608		new.vcv=new.vcv+new.edges[i]*design.vcv[[i]]
 609	}
 610	return(new.vcv)
 611}
 612
 613update.vcv.new=function(ape.tre, new.rates, scalar.list) {
 614#	phy=birthdeath.tree(1,0,taxa=200); d=design.vcv.phylo(phy); s=build.scalar.sets(d); Rprof(); system.time(update.vcv.new(phy, rep(1,length(phy$edge.length)), s)); print(summaryRprof()); system.time(vcv.phylo(phy))
 615	n=Ntip(ape.tre)
 616	e=new.rates*ape.tre$edge.length
 617	ss=scalar.list*e
 618	entries=apply(ss,3,sum)
 619	return(matrix(entries,n,n))
 620}
 621
 622update.vcv.newest=function(ape.tre, new.rates, scalar.list) {
 623#	phy=birthdeath.tree(1,0,taxa=200); d=design.vcv.phylo(phy); s=build.scalar.sets(d); Rprof(); system.time(update.vcv.new(phy, rep(1,length(phy$edge.length)), s)); print(summaryRprof()); system.time(vcv.phylo(phy))
 624	n=Ntip(ape.tre)
 625	e=new.rates*ape.tre$edge.length
 626	ss=scalar.list$scalars*e
 627	new.vcv=matrix(0,n,n)
 628	dg=which(scalar.list$diags)
 629	cv=which(scalar.list$attr)
 630	new.vcv[dg]=apply(scalar.list$scalars[,,dg],2,sum)
 631	new.vcv[cv]=apply(scalar.list$scalars[,,cv],2,sum)
 632	return(new.vcv)
 633}
 634
 635
 636build.scalar.sets=function(design.vcv) {
 637	if(!is.array(design.vcv)) stop("Cannot process design matrix: must be array")
 638	d=design.vcv
 639	t=dim(design.vcv)[1]
 640	ss=array(dim=c(1,(2*t-2),(t^2)))
 641	index=0
 642	scalar.attributes=c()
 643	diags=c()
 644	for(i in 1:t) {
 645		for(j in 1:t) {
 646			index=index+1
 647			ss[,,index]=(as.logical(d[i,j,]))
 648			if(any(ss[,,index])) scalar.attributes[index]=TRUE else scalar.attributes[index]=FALSE
 649			if(i==j) diags[index]=TRUE else diags[index]=FALSE
 650		}
 651	}
 652	for(i in 1:index) {
 653		
 654	}
 655	return(list(scalars=ss, attr=scalar.attributes, diags=diags))
 656}
 657
 658get.descendants.of.node <- function(node, phy, tips=FALSE) {
 659	storage=list()
 660	if(is.null(node)) node=phy$edge[1,1]
 661	if(node%in%phy$edge[,1]) {
 662		desc=c(sapply(node, function(x) {return(phy$edge[which(phy$edge[,1]==x),2])}))
 663		while(any(desc%in%phy$edge[,1])) {
 664			desc.true=desc[which(desc%in%phy$edge[,1])]
 665			desc.desc=lapply(desc.true, function(x) return(get.desc.of.node(x, phy)))
 666			
 667			storage=list(storage,union(desc,desc.desc))
 668			
 669			desc=unlist(desc.desc)
 670		}
 671		storage=as.numeric(sort(unique(unlist(union(desc,storage)))))
 672	}
 673	if(tips) return(storage[storage<=Ntip(phy)]) else return(storage)
 674}
 675
 676get.desc.of.node <- function(node, phy) {
 677	return(phy$edge[which(phy$edge[,1]==node),2])
 678}
 679
 680get.ancestor.of.node<-function(node, phy) {
 681	anc=phy$edge[which(phy$edge[,2]==node),1]
 682	anc
 683}
 684
 685get.ancestors.of.node<-function(node, phy) {
 686	ancestors=c()
 687	a=0
 688	while(1) {
 689		a=a+1
 690		node<-as.numeric(get.ancestor.of.node(node, phy))
 691		if(!length(node)) break() else ancestors[a]=node
 692	}
 693	return(ancestors)
 694}
 695
 696choose.one <- function(cur.delta)
 697{
 698	bb=cur.delta
 699	s=sample(1:length(bb),1)
 700	bb[s]=1-bb[s]
 701	bb
 702}
 703
 704is.root<-function(node,phy) {
 705	if(node==phy$edge[1,1]) return(TRUE) else return(FALSE)
 706}
 707			   
 708within.range=function(x, min, max) {			 
 709	a=sign(x-min)
 710	b=sign(x-max)
 711	if(abs(a+b)==2) return(FALSE) else return(TRUE)
 712}
 713			   
 714
 715assess.lnR <- function(lnR) {
 716	if(is.na(lnR) || abs(lnR)==Inf) {
 717		error=TRUE
 718		r=0
 719	} else {
 720		if(lnR < -20) {
 721			r=0 
 722		} else if(lnR > 0) {
 723			r=1 
 724		} else {
 725			r=exp(lnR)
 726		}
 727		error=FALSE
 728	}
 729	return(list(r=r, error=error))
 730}
 731
 732apply.BMM.to.apetree<-function(apetree, rates) {
 733	apetree$edge.length=apetree$edge.length*rates
 734	return(apetree)
 735}
 736
 737ape.to.ouch.tre<-function (tree, scale = FALSE, branch.lengths = tree$edge.length) 
 738{
 739    if (!inherits(tree, "phylo")) 
 740	stop(sQuote("tree"), " must be of class ", sQuote("phylo"))
 741    nnodes <- nrow(tree$edge) + 1
 742    n.term <- length(tree$tip.label)
 743    n.int <- nnodes - n.term
 744    tmp <- matrix(NA, nnodes, 2)
 745    tmp[-1, 1:2] <- tree$edge
 746    tmp[1, 2] <- nnodes + 1
 747    bl <- c(0, branch.lengths)
 748	bl.ape=bl
 749	names(bl.ape)=c("root", tree$edge[,2])
 750    reord <- order(-tmp[, 2])
 751    bl <- bl[reord]
 752	bl.ouch <- bl.ape[reord]
 753    tmp <- tmp[reord, ]
 754    node <- seq(nnodes)
 755    ancestor <- rep(NA, nnodes)
 756    for (n in 2:nnodes) {
 757        anc <- which(tmp[, 2] == tmp[n, 1])
 758        if (length(anc) > 1) 
 759		stop("invalid tree")
 760        if (length(anc) > 0) {
 761            ancestor[n] <- node[anc]
 762        }
 763        else {
 764            ancestor[n] <- node[1]
 765        }
 766    }
 767    if (is.null(tree$node.label)) 
 768	tree$node.label <- rep("", n.int)
 769    species <- rev(c(tree$tip.label, tree$node.label[-1], tree$node.label[1]))
 770    times <- rep(NA, nnodes)
 771    for (n in 1:nnodes) times[n] <- ouch:::branch.height(node, ancestor, bl, n)
 772    if (is.logical(scale)) {
 773        if (is.na(scale)) 
 774		stop("if ", sQuote("scale"), " is logical, it must be either true or false")
 775        if (scale) 
 776		times <- times/max(times)
 777    }
 778    else if (is.numeric(scale)) {
 779        times <- times/abs(scale)
 780    }
 781    else {
 782        stop(sQuote("scale"), " must be either logical or numeric")
 783    }
 784    return(list(ouch.tre=ouchtree(nodes = node, ancestors = ancestor, times = times, labels = species), ape.ordered=bl.ouch))
 785}
 786
 787adjust.value <- function(value, jumpsize, theta=FALSE) {
 788# mod 10.20.2010 JM Eastman
 789	vv=value
 790	if(runif(1)<0.5 | length(unique(vv))==1) {
 791		rch <- runif(1, min = -jumpsize, max = jumpsize)
 792		return(vv[]+rch)
 793	} else {
 794		return((vv-mean(vv))*runif(1,min=-jumpsize, max=jumpsize)+mean(vv))
 795	}
 796}
 797
 798adjust.rate <- function(rate, jumpsize) {
 799# mod 10.20.2010 JM Eastman
 800	vv=rate
 801	v=log(vv)
 802	rch <- runif(1, min = -jumpsize, max = jumpsize)
 803	v=exp(v[]+rch)
 804	v
 805}
 806
 807prepare.data <- function(phy, data) {
 808	td <- treedata(phy, data, sort = T)	
 809	return(list(ape.tre=td$phy, orig.dat=td$data[,1]))
 810}
 811
 812prepare.ouchdata <- function(phy, data) {
 813	td <- treedata(phy, data, sort = T)	
 814	
 815	ot <- ape.to.ouch.tre(td$phy->ape.tre, scale=FALSE)		
 816	ouch.tre=ot$ouch.tre
 817	ouch.dat=data.frame(as.numeric(ouch.tre@nodes), as.character(ouch.tre@nodelabels))
 818	names(ouch.dat)=c("nodes","labels")
 819	ouch.dat[ouch.dat[,2]=="",2]=NA
 820	ouch.dat$trait=NA
 821	mM=match(ouch.dat$labels,names(data))
 822	for(i in 1:nrow(ouch.dat)){
 823		if(!is.na(mM[i])){
 824			ouch.dat$trait[i]=data[mM[i]]	
 825		}
 826	}
 827	return(list(ouch.tre=ouch.tre, ouch.dat=ouch.dat, ape.tre=td$phy, orig.dat=td$data[,1], ape.ordered=ot$ape.ordered))
 828}
 829
 830summarize.run<-function(cOK, endparms, cur.model) {
 831#	endparmsOU = c(nRateProp, nRootProp, nAlphaProp, nTcatProp, nThetaProp)
 832
 833	if(cur.model=="BM") {
 834		names(endparms)<-names(cOK)<-c("rates", "rate.swap", "rate.catg", "root")
 835	} else {
 836		names(endparms)<-names(cOK)<-c("rate", "root", "alpha", "regimes", "theta")
 837	}
 838	names(endparms)=paste("pr.",names(endparms),sep="")
 839	names(cOK)=paste("ok.",names(cOK),sep="")
 840	cat("\n")
 841	if(cur.model=="BM") {
 842		print(endparms[paste("pr.", c("rates", "rate.swap", "rate.catg", "root"),sep="")])
 843		print(cOK[paste("ok.", c("rates", "rate.swap", "rate.catg", "root"),sep="")])
 844	} else {
 845		print(endparms[paste("pr.", c("rate", "root", "alpha", "regimes", "theta"),sep="")])
 846		print(cOK[paste("ok.", c("rate", "root", "alpha", "regimes", "theta"),sep="")])
 847	}
 848}
 849
 850generate.log<-function(bundled.parms, cur.model, file, init=FALSE) {
 851#	bundled.parms=list(gen=i, mrate=mean(cur.rates), cats=sum(cur.delta.rates)+1, root=cur.root, alpha=cur.alpha, reg=sum(cur.delta.theta)+1, theta=mean(cur.theta), lnL=curr.lnL)
 852	res=bundled.parms
 853
 854	if(init) {
 855		if(cur.model=="BM") msg=paste("gen", "model", "mean.rate", "rates", "root", "lnL", sep="\t") else msg=paste("gen", "model", "rate", "root", "alpha", "regimes", "mean.theta", "lnL", sep="\t")
 856	} else {
 857		if(cur.model=="BM") {
 858			msg<-paste(res$gen, sQuote(cur.model), sprintf("%.3f", res$mrate), res$cats, sprintf("%.3f", res$root), sprintf("%.3f", res$lnL),sep="\t")
 859		} else {
 860			msg<-paste(res$gen, sQuote(cur.model), sprintf("%.3f", res$rate), sprintf("%.2f", res$root), sprintf("%.4f", res$alpha), res$regimes, sprintf("%.3f", res$mean.theta), sprintf("%.3f", res$lnL),sep="\t")
 861		}
 862	}
 863	write(msg, file=file, append=TRUE)
 864}
 865
 866parlogger<-function(model, init=FALSE, node.des, i, curr.lnL, cur.root=NULL, cur.rates=NULL, cur.delta.rates=NULL, cur.alpha=NULL, cur.theta=NULL, cur.delta.theta=NULL, parmBase) {	
 867	if(init) {
 868		msg.long=names(node.des[-1])
 869		if(model=="BM") {
 870			msg.short=paste("gen", "root", "lnL", sep="\t") 
 871			parlogs=paste(parmBase, paste(c("summary", "rates", "rate.shifts"), "txt", sep="."), sep="")
 872			sapply(parlogs[1], function(x) write.table(msg.short, x, quote=FALSE, col.names=FALSE, row.names=FALSE))
 873			sapply(parlogs[2:length(parlogs)], function(x) write.table(paste(msg.long,collapse=" "), x, quote=FALSE, col.names=FALSE, row.names=FALSE))
 874		} else {
 875			msg.short=paste("gen", "root", "rate", "alpha", "lnL", sep="\t")
 876			parlogs=paste(parmBase, paste(c("summary", "optima", "optima.shifts"), "txt", sep="."), sep="")
 877			sapply(parlogs[1], function(x) write.table(msg.short, x, quote=FALSE, col.names=FALSE, row.names=FALSE))
 878			sapply(parlogs[2:length(parlogs)], function(x) write.table(paste(msg.long,collapse=" "), x, quote=FALSE, col.names=FALSE, row.names=FALSE))
 879		}
 880	} else {
 881		if(model=="BM") {
 882			parlogs=paste(parmBase, paste(c("summary", "rates", "rate.shifts"), "txt", sep="."), sep="")
 883			outlist=list(paste(i, cur.root, curr.lnL, sep="\t"), cur.rates, cur.delta.rates) 
 884			sapply(1:length(outlist), function(x) write.table(paste(outlist[[x]],collapse=" "), parlogs[[x]], quote=FALSE, col.names=FALSE, row.names=FALSE, append=TRUE)) 
 885		} else {
 886			parlogs=paste(parmBase, paste(c("summary", "optima", "optima.shifts"), "txt", sep="."), sep="")
 887			outlist=list(paste(i, cur.root, cur.rates, cur.alpha, curr.lnL, sep="\t"), cur.theta, cur.delta.theta) 
 888			sapply(1:length(outlist), function(x) write.table(paste(outlist[[x]],collapse=" "), parlogs[[x]], quote=FALSE, col.names=FALSE, row.names=FALSE, append=TRUE)) 
 889		}
 890	}
 891}
 892
 893generate.error.message<-function(i, mod.cur, mod.new, lnR, errorLog, init=FALSE) {
 894	if(init) {
 895		initmsg = write(paste("gen", "curr.lnL", "new.lnL", "lnR", sep="\t"), file=errorLog)
 896	} else {
 897		write(paste(i, sprintf("%.3f", mod.cur$lnL), sprintf("%.3f", mod.new$lnL), sprintf("%.3f", lnR), sep="\t"), file=errorLog)
 898	}
 899}
 900
 901determine.accepted.proposal<-function(startparms, endparms, cOK) {
 902	which(startparms!=endparms)->marker
 903	cOK[marker]=cOK[marker]+1
 904	cOK
 905}
 906
 907rtpois<-function(N, lambda, k) {
 908	p=ppois(k, lambda, lower.tail=FALSE)
 909	out=qpois(runif(N, min=0, max=1-p), lambda)
 910	out
 911}
 912				 
 913ptpois<-function(x, lambda, k) {
 914	p.k=ppois(k, lambda, lower.tail=FALSE)
 915	p.x=ppois(x, lambda, lower.tail=FALSE)
 916	ptp=p.x/(1-p.k)
 917	return(ptp)
 918}
 919				 
 920## SUMMARIZING MCMC -- PLOTTING FUNCTIONS ##
 921shifts.plot=function(phy, base.dir, burnin=0, level=0.03, internal.only=FALSE, paint.branches=TRUE, pdf=TRUE, verbose=TRUE, ...) {
 922#	phy; base.dir; burnin=0; level=0.03; internal.only=FALSE; paint.branches=TRUE; pdf=TRUE; verbose=TRUE
 923	nullphy=phy
 924	nullphy$tip.label[]=""
 925	color.length=21
 926	require(ape)
 927	oldwd=getwd()
 928	setwd(base.dir)
 929	files=dir()
 930	if(any(grep("optima", files))) optima=TRUE else optima=FALSE
 931	if(optima) {
 932		shifts.file=files[grep("optima.shifts.txt", files)]
 933		estimates.file=files[grep("optima.txt", files)]
 934	} else {
 935		shifts.file=files[grep("rate.shifts.txt", files)]
 936		estimates.file=files[grep("rates.txt", files)]
 937	}
 938	if(pdf | verbose) {
 939		lab=paste(strsplit(base.dir, ".", fixed=TRUE)[[1]][1:2],collapse=".")
 940		out.base=paste(lab, paste("bi", burnin, sep=""), paste("fq", level, sep=""), sep=".")
 941		outdir=paste("./results")
 942		if(!file.exists(outdir)) dir.create(outdir)
 943	}
 944	cat("READING shifts...\n")
 945	results=read.table(shifts.file)
 946	names(results)=as.numeric(results[1,])
 947	burnin=ceiling(burnin*nrow(results))
 948	results=results[-c(1:(burnin+1)),]
 949	if(!all(names(results)%in%phy$edge[,2])) stop("Cannot process results: check that tree matches posterior summaries")
 950	hits.tmp<-shifts<-apply(results,1,sum)
 951	hits=length(hits.tmp[hits.tmp>0])
 952	aa.tmp=apply(results, 2, function(x) sum(x))
 953	aa.tmp=aa.tmp/hits
 954	aa=aa.tmp[aa.tmp>=level]
 955	aa=aa[order(aa, decreasing=TRUE)]
 956	aa.nodes=aa[which(as.numeric(names(aa))>Ntip(phy))]
 957	aa.tips=aa[which(as.numeric(names(aa))<=Ntip(phy))]
 958	aa=aa.nodes
 959	nodes=as.numeric(names(aa))
 960	nodes=nodes[nodes>Ntip(phy)]
 961	desc=lapply(nodes, function(x) {
 962				foo=get.descendants.of.node(x,phy)
 963				if(length(foo)) return(phy$tip.label[foo[foo<=Ntip(phy)]]) else return(NULL)
 964				}
 965				)
 966	cat("READING estimates...\n")
 967	ests=read.table(estimates.file)
 968	names(ests)=ests[1,]
 969	ests=ests[-c(1:(burnin+1)),]
 970	average.ests.tmp=apply(ests,2,mean)
 971	average.ests=average.ests.tmp-median(average.ests.tmp)
 972	if(paint.branches) {
 973		require(colorspace)
 974		cce=diverge_hcl(2*color.length, power = 0.5)
 975		e.seq=seq(min(average.ests),max(average.ests),length=color.length)
 976		color.gamut=seq(-max(abs(e.seq)), max(abs(e.seq)), length=length(cce))
 977		colors.branches=sapply(average.ests, function(x) {y=cce[which(min(abs(color.gamut-x))==abs(color.gamut-x))]; return(y[sample(1:length(y),1)])})
 978		colors.branches=colors.branches[match(as.numeric(names(colors.branches)), phy$edge[,2])]
 979	} else {
 980		colors.branches=1
 981	}
 982	ccx=diverge_hcl(color.length, c = c(100, 0), l = c(50, 90), power = 1.1)
 983	c.seq=round(seq(-1,1,by=0.1),digits=1)
 984	
 985	if(length(nodes)) {
 986		require(colorspace)
 987		cols=sapply(nodes, function(x) {
 988					a=get.ancestor.of.node(x, phy)
 989					comp=ests[,which(names(ests)==a)]
 990					this=ests[,which(names(ests)==x)]
 991					if(!length(comp)) { # dealing with first descendant of root
 992					d=get.desc.of.node(a, phy)
 993					d=d[which(d!=x)]
 994					
 995# find if sister descendant also experienced rate shift 
 996					d.shifts=results[, which(names(results)==d)]
 997					comp=ests[,which(names(ests)==d)]
 998					x.res=sapply(1:length(d.shifts), function(y) {
 999								 if(d.shifts[y]==0) {
1000								 zz=this[y]-comp[y]
1001								 if(zz>0) {
1002								 return(1) 
1003								 } else {
1004								 if(zz<0) return(-1) else return(0)
1005								 }
1006								 } else {
1007								 return(0)
1008								 }
1009								 }
1010								 )
1011					x.res=mean(x.res[x.res!=0])
1012					if(is.na(x.res)) x.res=0
1013					} else {
1014					yy=this-comp
1015					zz=yy
1016					zz[yy>0]=1
1017					zz[yy<0]=-1
1018					zz[yy==0]=0
1019					x.res=mean(zz[zz!=0])
1020					if(is.na(x.res)) x.res=0
1021					}
1022					return(x.res)
1023					}
1024					)
1025		colors.nodes=ccx[match(round(cols,1),c.seq)]
1026		names(colors.nodes)=nodes
1027		colors.nodes=colors.nodes[which(as.numeric(names(colors.nodes))>Ntip(phy))]
1028	} else {
1029		colors.nodes=NULL
1030		cols=NULL
1031	}
1032	
1033	if(length(aa.tips) & !internal.only) {
1034		tt<-tt.cex<-rep(0,Ntip(phy))
1035		tt[as.numeric(names(aa.tips))]=1
1036#		tt.cex[as.numeric(names(aa.tips))]=aa.tips/max(aa.tmp)
1037		tt.cex[as.numeric(names(aa.tips))]=aa.tips
1038		
1039		tt.col=sapply(names(aa.tips), function(x) {
1040					  a=get.ancestor.of.node(x, phy)
1041					  comp=ests[,which(names(ests)==a)]
1042					  this=ests[,which(names(ests)==x)]
1043					  if(!length(comp)) { # dealing with first descendant of root
1044					  d=get.desc.of.node(a, phy)
1045					  d=d[which(d!=x)]
1046					  
1047# find if sister descendant also experienced rate shift 
1048					  d.shifts=results[, which(names(results)==d)]
1049					  comp=ests[,which(names(ests)==d)]
1050					  x.res=sapply(1:length(d.shifts), function(y) {
1051								   if(d.shifts[y]==0) {
1052								   zz=this[y]-comp[y]
1053								   if(zz>0) {
1054								   return(1) 
1055								   } else {
1056								   if(zz<0) return(-1) else return(0)
1057								   }
1058								   } else {
1059								   return(0)
1060								   }
1061								   }
1062								   )
1063					  x.res=mean(x.res[x.res!=0])
1064					  } else {
1065					  yy=this-comp
1066					  zz=yy
1067					  zz[yy>0]=1
1068					  zz[yy<0]=-1
1069					  zz[yy==0]=0
1070					  x.res=mean(zz[zz!=0])
1071					  }
1072					  return(x.res)
1073					  }
1074					  )
1075		tt.ccx=diverge_hcl(21, c = c(100, 0), l = c(50, 90), power = 1.1)
1076		tt.c.seq=round(seq(-1,1,by=0.1),digits=1)
1077		tt.colors.nodes=tt.ccx[match(round(tt.col,1),tt.c.seq)]
1078		names(tt.colors.nodes)=as.numeric(names(aa.tips))
1079		colors.tips.tmp=tt.colors.nodes[order(as.numeric(names(tt.colors.nodes)))]
1080		colors.tips=rep(NA, Ntip(phy))
1081		colors.tips[as.numeric(names(colors.tips.tmp))]=colors.tips.tmp
1082		tip.names=as.list(phy$tip.label[as.numeric(names(aa.tips))])
1083		names(tip.names)=paste("node",names(aa.tips),sep=".")
1084	}	else {
1085		tt.cex=NULL
1086		tt.col=NULL
1087		tip.names=NULL
1088		colors.tips=NULL
1089		tt=rep(0,Ntip(phy))
1090	}
1091	
1092## PLOTTING OF TREE
1093	CEX=max(c(0.05, (2+-0.36*log(Ntip(phy))) ))	
1094	if(pdf) pdf(file=paste(outdir, paste(out.base,"pdf",sep="."),sep="/"))
1095	plot(phy, cex=CEX, lwd=0.4, edge.width=0.5, edge.color=colors.branches, show.tip.label=TRUE, label.offset=strwidth(par("pch"),cex=1.25), ...)
1096	nn=(Ntip(phy)+1):max(phy$edge[,2])
1097	ll<-cc<-rr<-rep(0,length(nn))
1098	ll[nn%in%nodes]=1
1099#	if(length(nodes)) cc[nn%in%nodes]=aa[match(nn[nn%in%nodes],nodes)]/max(c(aa,tt.cex)) 
1100	if(length(nodes)) cc[nn%in%nodes]=aa[match(nn[nn%in%nodes],nodes)]
1101	rr[nn%in%nodes]=colors.nodes[order(names(colors.nodes))]
1102	nodelabels(pch=ifelse(ll==1, 21, NA), cex=2*cc, col=rr, bg=rr, lwd=0.5)
1103	
1104	tiplabels(pch=ifelse(tt==1, 21, NA), cex=2*tt.cex, col=colors.tips, bg=colors.tips, lwd=0.5)
1105	legend("topright", title=toupper("direction"), cex=0.5, pt.cex=1, text.col="darkgray", legend = sprintf("%6.1f", rev(c.seq[seq(1,color.length,by=2)])), pch=21, ncol=1, col = "darkgray", pt.bg = rev(ccx[seq(1,color.length,by=2)]), box.lty="blank", border="white")
1106	if(length(nodes) | (length(aa.tips) & !internal.only)) {
1107		legend("bottomright", title=toupper("frequency"), cex=0.5, pt.cex=2*(seq(0, 1, length=9)), text.col="darkgray", legend = sprintf("%10.3f", seq(0, 1, length=9)), pch=21, ncol=1, col = "darkgray", pt.bg = "white", box.lty="blank", border="white")
1108	}
1109	if(pdf) dev.off()
1110	
1111	if(length(nodes)) {
1112		names(cols)=names(aa)
1113		names(desc)=paste("node",nodes,sep=".")
1114	}
1115##
1116	
1117	if(verbose) {
1118		summary.table=c(hits/nrow(results), mean(shifts), median(shifts))
1119		names(summary.table)=c("shift.prob", "mean.shift","median.shifts")
1120		write.table(summary.table, file=paste(outdir, paste(out.base, "run.summary.txt", sep="."), sep="/"), quote=FALSE, col.names=FALSE)
1121		if(length(nodes)) {
1122			for(nn in 1:length(nodes)) {
1123				write.table(c(nodes[nn], desc[[nn]]), file=paste(outdir, paste(out.base, "shift.descendants.txt", sep="."), sep="/"), append=TRUE, quote=FALSE, row.names=FALSE, col.names=FALSE)
1124			}
1125		}
1126		if(length(aa.tips)) {
1127			for(nn in 1:length(aa.tips)) {
1128				write.table(c(as.numeric(names(aa.tips)[nn]), tip.names[[nn]]), file=paste(outdir, paste(out.base, "shift.descendants.txt", sep="."), sep="/"), append=TRUE, quote=FALSE, row.names=FALSE, col.names=FALSE)
1129			}
1130		}
1131		allres=data.frame(matrix(NA, nrow=nrow(phy$edge), ncol=3))
1132		if(length(aa)) {
1133			x=as.numeric(names(aa))
1134			nodres=merge(as.data.frame(aa),as.data.frame(cols),by=0)
1135			allres[nodres[,1],]=nodres
1136		}
1137		if(length(aa.tips)) {
1138			x=as.numeric(names(aa.tips))
1139			tipres=merge(as.data.frame(aa.tips), as.data.frame(tt.col),by=0)
1140			allres[tipres[,1],]=tipres
1141		}
1142		allres=allres[!apply(allres, 1, function(x)any(is.na(x))),]
1143		allres=allres[order(allres[,2], decreasing=TRUE),]
1144		if(nrow(allres)>0) {
1145			allres=data.frame(allres)
1146			names(allres)=c("node", "conditional.shift.probability", "relative.shift.direction")
1147			write.table(allres, file=paste(outdir, paste(out.base, "estimates.summary.txt", sep="."), sep="/"), quote=FALSE, row.names=FALSE)
1148		}
1149	}
1150	setwd(oldwd)
1151	
1152	if(!optima) {
1153		return(list(desc=desc, tips=tip.names, shift.prob=hits/nrow(results), mean.shifts=mean(shifts), median.shifts=median(shifts), nodeshift.prop=aa, nodeshift.direction=cols, tipshift.prop=aa.tips, tipshift.direction=tt.col, mean.rates=average.ests.tmp, mean.shifts=aa.tmp/nrow(results)))
1154	} else {
1155		return(list(desc=desc, tips=tip.names, shift.prob=hits/nrow(results), mean.shifts=mean(shifts), median.shifts=median(shifts), nodeshift.prop=aa, nodeshift.direction=cols, tipshift.prop=aa.tips, tipshift.direction=tt.col, mean.optima=average.ests.tmp, mean.shifts=aa.tmp/nrow(results)))
1156	}
1157}
1158
1159
1160
1161plot.quant<-function(phy, data, data.names, shrinker=1, relative=TRUE, cex=0.5, adj=c(15,5), lwd=0.5, font=1, ...){
1162	std=function(x,max,min){return((x-min)/(max-min))}
1163	if(relative) {
1164		data=sapply(data, function(x) std(x, max(data), min(data)))
1165	}
1166	
1167	f=match(phy$tip.label, data.names)
1168	if(is.na(sum(f))) {
1169		stop("tips cannot be matched to tree")
1170	}
1171	else {
1172		data=data[f]
1173		phy$trait=data
1174		plot.phylo(phy, show.tip.label=T, label.offset=adj[1], cex=cex, font=font, ...)
1175		tiplabels(text=NULL, pch=21, adj=adj[2], cex=c(phy$trait/shrinker), bg="white",lwd=lwd)
1176		} 
1177}
1178
1179branchcol.plot=function(phy, cur.rates, color.length=4, ...) {
1180	color.length=color.length
1181	require(ape)
1182	require(colorspace)
1183	ests=cur.rates
1184	names(ests)=phy$edge[,2]
1185	cce=diverge_hcl(color.length, power = 0.5)
1186	e.seq=seq(min(ests),max(ests),length=color.length)
1187	colors.branches=sapply(ests, function(x) cce[which(min(abs(e.seq-x))==abs(e.seq-x))])
1188	names(colors.branches)=names(ests)
1189	colors.branches=colors.branches[match(as.numeric(names(colors.branches)), phy$edge[,2])]	
1190	plot(phy, cex=0.1, lwd=0.4, edge.width=0.5, edge.color=colors.branches, ...)
1191}
1192
1193
1194prior.posterior.plot=function(data, rpois.lambda) {
1195# data should be a vector of number of shifts, for instance, for all interations across the MCMC sampling (post-burnin)
1196	require(ggplot2)
1197	dataset <- data.frame(variable = gl(2, length(data), labels = c("posterior", "prior")), value = c(data, rpois(length(data),rpois.lambda))) 
1198	ggplot(data = dataset, aes(x = value, fill = variable)) + geom_histogram(position = "dodge") + scale_fill_manual(values = c("gray", "black"))	
1199}
1200
1201prior.posterior.plot.alt=function(outlogs, lambda, ...){
1202	foo=outlogs
1203	d=lapply(dir(foo),function(x) read.table(paste(foo,x,sep="/"),header=T))
1204	names=dir(foo)
1205	lapply(names,function(x)paste(strsplit(x,".",fixed=T)[[1]][1:2],collapse="."))->nn
1206	
1207	
1208	n=length(nn)
1209	ss=(1:10)^2
1210	sn=ss-n
1211	s=ss[which(sn==min(sn[sn>=0]))]
1212	x=sqrt(s)
1213	layout(matrix(1:s, x, x))
1214		
1215	for(i in 1:length(d)){
1216		if(any(names(d[[i]])=="regimes")) {
1217			plot(density(d[[i]]$regimes), bty="n",xlim=c(0,10), main=nn[[i]],xlab="regimes") 
1218		} else {
1219			plot(density(d[[i]]$rates),bty="n",xlim=c(0,10), main=nn[[i]],xlab="rates")
1220		}
1221		lines(density(rpois(nrow(d[[i]]),lambda)), ...)
1222	}	
1223}
1224
1225split.rate=function(cur.vv, n.desc, n.split, factor=log(2)){
1226	dev=cur.vv-exp(log(cur.vv)+runif(1, -factor, factor))
1227	nr.desc=cur.vv+dev/n.desc
1228	nr.split=cur.vv-dev/n.split
1229#	print((nr.split*n.split+nr.desc*n.desc)/(n.desc+n.split)-cur.vv)
1230	return(list(nr.desc=nr.desc, nr.split=nr.split))
1231}
1232
1233split.value=function(cur.vv, n.desc, n.split, factor=log(2)){
1234	dev=cur.vv-adjust.value(cur.vv, factor)
1235	nr.desc=cur.vv + dev/n.desc
1236	nr.split=cur.vv - dev/n.split
1237#	print((nr.split*n.split+nr.desc*n.desc)/(n.desc+n.split)-cur.vv)
1238	return(list(nr.desc=nr.desc, nr.split=nr.split))
1239}
1240
1241compare.rates=function(nodes=list(A=c(NULL,NULL),B=c(NULL,NULL)), phy, posterior.rates, ...){
1242	if(is.null(names(nodes))) names(nodes)=c("A","B")
1243	rates.a=rates[,match(nodes[[1]],names(rates))]
1244	rates.b=rates[,match(nodes[[2]],names(rates))]
1245	edges.a=phy$edge.length[match(nodes[[1]],phy$edge[,2])]
1246	edges.b=phy$edge.length[match(nodes[[2]],phy$edge[,2])]
1247	weights.a=edges.a/sum(edges.a)
1248	weights.b=edges.b/sum(edges.b)
1249	r.scl.a=apply(rates.a, 1,function(x) sum(x*weights.a))
1250	r.scl.b=apply(rates.b, 1,function(x) sum(x*weights.b))
1251	return(list(scl.A=r.scl.a, scl.B=r.scl.b, r.test=randomization.test(r.scl.a, r.scl.b, ...)))	
1252}
1253
1254randomization.test=function(obs=obs, exp=exp, iter=10000, verbose=F, two.tailed=F){
1255	if(verbose && length(obs)<iter) warning("iterations exceeds observed data")
1256	if(verbose) warning(paste("p-value roughly approximated for ",length(exp), " values\n",sep=""))
1257	
1258	O=sample(obs, iter, replace=TRUE)
1259	E=sample(exp, iter, replace=TRUE)
1260	obj=data.frame(obs, exp)
1261	names(obj)=c("obs","exp")
1262	
1263	result=list(c(O-E))
1264	p=array(NA, iter)
1265	
1266	for(i in 1:length(result[[1]])){
1267		if(result[[1]][i]>0) {
1268			p[i]=1
1269		} else {
1270			if(result[[1]][i]==0) {
1271				p[i]=sample(c(0,1),1)
1272			} else {
1273				p[i]=0
1274			}
1275		}
1276	}
1277	
1278	p.g=round(1-(sum(p)/iter), digits=7)
1279	p.l=1-p.g
1280	
1281#	names(p.g)="greater"
1282#	names(p.l)="lesser"
1283	
1284	if(verbose) res=list(c(O-E),greater=p.g,lesser=p.l) else res=list(greater=p.g,lesser=p.l)
1285	if(two.tailed) res=min(c(p.g,p.l))*2 else res=res
1286	return(res)
1287}
1288
1289collect.nodes=function(tips,phy,strict=FALSE){
1290	if(length(tips)==1) stop("Must supply at least two tips.")
1291	tt=phy$tip.label
1292	tt=lapply(tips, function(x) which(phy$tip.label==x))
1293	nodes=lapply(tt, function(x) get.ancestors.of.node(x, phy))
1294	all.nodes=nodes[[1]]
1295	for(i in 2:length(nodes)) {
1296		all.nodes=intersect(all.nodes,nodes[[i]])
1297	}
1298	base.node=max(all.nodes)
1299	if(strict) {
1300		u=unlist(nodes)
1301		return(c(sort(unique(u[u>=base.node])), unlist(tt)))
1302	} else {
1303		return(c(base.node,get.descendants.of.node(base.node,phy)))
1304	}
1305}
1306
1307find.relatives=function(tips,phy){
1308	nn=collect.nodes(tips, phy, strict=FALSE)
1309	return(phy$tip.label[nn[nn<=Ntip(phy)]])
1310}