PageRenderTime 55ms CodeModel.GetById 12ms app.highlight 34ms RepoModel.GetById 1ms app.codeStats 0ms

/archive/source.archive/bm.only/trait.rjmcmc.11152010.R

http://github.com/eastman/auteur
R | 738 lines | 627 code | 79 blank | 32 comment | 126 complexity | 711f513ed6475d73e7e22fa77771b7a0 MD5 | raw file
  1## Written by LJ HARMON (2008), AL HIPP (2008), and JM EASTMAN (2010)
  2
  3rjMCMC.trait<-function (phy, data, ngen=1000, sampleFreq=100, probMergeSplit=0.05, probRoot=0.01, lambdaK=log(2), constrainK=FALSE, jumpsize=2, fileBase="result", simplestart=FALSE) 
  4{
  5	model="BM"
  6	heat=1
  7#	require(ouch)
  8	require(geiger)
  9		
 10### prepare data for rjMCMC
 11	cur.model		<- model								
 12
 13	dataList		<- prepare.data(phy, data)			
 14	ape.tre			<- cur.tre <- dataList$ape.tre			
 15	orig.dat		<- dataList$orig.dat						
 16	nn				<- length(ape.tre$edge.length)	
 17	node.des		<- sapply(unique(c(ape.tre$edge[1,1],ape.tre$edge[,2])), function(x) get.descendants.of.node(x, ape.tre))
 18	names(node.des) <- c(ape.tre$edge[1,1], unique(ape.tre$edge[,2]))
 19	subtrees		<- subtrees(phy)
 20	subtrees.marker <- sapply(subtrees, function(x) return(min(x$node.label)))
 21
 22# initialize parameters
 23	if(is.numeric(constrainK) & (constrainK > length(ape.tre$edge) | constrainK < 1)) stop("Constraint on rate shifts is nonsensical. Ensure that constrainK is at least 1 and less than the number of available nodes in the tree.")
 24
 25	if(simplestart | is.numeric(constrainK) ) {
 26		if(is.numeric(constrainK)) {
 27			init.rate	<- generate.starting.point(data, ape.tre, node.des, theta=FALSE, K=constrainK, jumpsize=jumpsize)
 28		} else {
 29			init.rate	<- list(values=rep(fitContinuous(phy,data)$Trait1$beta,length(phy$edge.length)),delta=rep(0,length(phy$edge.length)))
 30		}
 31	} else {
 32		init.rate	<- generate.starting.point(data, ape.tre, node.des, theta=FALSE, K=constrainK, jumpsize=jumpsize )
 33	}
 34	
 35    cur.rates		<- init.rate$values			
 36	cur.delta.rates	<- init.rate$delta
 37	cur.root		<- adjust.value(mean(orig.dat), jumpsize)
 38	cur.tre			<- apply.BMM.to.apetree(ape.tre, cur.rates)
 39	
 40	mod.cur = new.bm.lik.fn(cur.rates, cur.root, cur.tre, orig.dat)
 41	
 42# proposal counts
 43	nRateProp =		0	
 44	nRateSwapProp = 0									
 45	nRcatProp =		0										
 46	nRootProp =		0
 47	nRateOK =		0											
 48	nRateSwapOK =	0
 49	nRcatOK =		0											
 50	nRootOK =		0
 51
 52	cOK=c(												
 53		nRateOK,
 54		nRateSwapOK,
 55		nRcatOK, 
 56		nRootOK 
 57	)
 58		
 59	tickerFreq=ceiling(ngen/30)
 60	
 61# file handling
 62	parmBase=paste(model, fileBase, "parameters/",sep=".")
 63	if(!file.exists(parmBase)) dir.create(parmBase)
 64	parlogger(model=model, init=TRUE, node.des, parmBase=parmBase)
 65	errorLog=file(paste(parmBase,paste(cur.model, fileBase, "rjTraitErrors.log",sep="."),sep="/"),open='w+')
 66	generate.error.message(i=NULL, mod.cur=NULL, mod.new=NULL, lnR=NULL, errorLog=errorLog, init=TRUE)
 67	runLog=file(paste(parmBase,paste(cur.model, fileBase, "rjTrait.log",sep="."),sep="/"),open='w+')
 68	generate.log(bundled.parms=NULL, cur.model, file=runLog, init=TRUE)
 69
 70
 71### Begin rjMCMC
 72    for (i in 1:ngen) {
 73        lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
 74		startparms = c(nRateProp, nRateSwapProp, nRcatProp, nRootProp)
 75
 76## BM IMPLEMENTATION ##
 77		while(1) {
 78			if (runif(1) < (2 * probMergeSplit) & !constrainK) {			# adjust rate categories
 79				nr=split.or.merge(cur.delta.rates, cur.rates, ape.tre, node.des, theta=FALSE, lambdaK, jumpsize)
 80				new.rates=nr$new.values
 81				new.delta.rates=nr$new.delta
 82				new.root=cur.root
 83				nRcatProp=nRcatProp+1
 84				lnHastingsRatio=nr$lnHastingsRatio
 85				lnPriorRatio=nr$lnPriorRatio
 86				break()
 87			} else { 
 88				if(runif(1)<probRoot) {										# adjust root
 89					new.root=adjust.value(cur.root, jumpsize)
 90					new.rates=cur.rates
 91					new.delta.rates=cur.delta.rates
 92					nRootProp=nRootProp+1
 93					break()
 94				} else {													
 95					if(runif(1)>0.1 & sum(cur.delta.rates)!=0) {			# swap rate classes
 96						nr=switcheroo(cur.delta.rates, cur.rates, ape.tre, node.des, theta=FALSE, jumpsize)
 97						new.rates=nr$new.values ##
 98						new.delta.rates=nr$new.delta ##
 99						new.root=cur.root
100						nRateSwapProp=nRateSwapProp+1
101						break()
102					} else {												# adjust rates
103						new.rates=adjust.rate(cur.rates, jumpsize)
104						new.delta.rates=cur.delta.rates
105						new.root=cur.root
106						nRateProp=nRateProp+1
107						break()
108					} 
109				}
110			}
111		}
112		
113		new.tre=apply.BMM.to.apetree(ape.tre, new.rates)
114		mod.new=try(new.bm.lik.fn(new.rates, new.root, new.tre, orig.dat),silent=TRUE)
115		
116		if(inherits(mod.new, "try-error")) {mod.new=as.list(mod.new); mod.new$lnL=Inf}
117		if(!is.infinite(mod.new$lnL)) {
118			lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
119		} else {
120			lnLikelihoodRatio = -Inf
121			failures=paste("./failed", model, "parameters/",sep=".")
122			failed.trees=paste(failures, "trees", sep="/")
123			failed.plots=paste(failures, "pdf", sep="/")
124			lapply(list(failures, failed.trees, failed.plots), function(x) if(!file.exists(x)) dir.create(x))
125				
126			pdf(file=paste(failed.plots, paste(i,fileBase,"failed.model.pdf",sep="."),sep="/"))
127				plot(new.tre, tip=FALSE)
128			dev.off()
129			
130			write.tree(new.tre, file=paste(failed.trees, paste(i,fileBase,"tre",sep="."),sep="/"))
131			write.table(matrix(c(i, new.rates),nrow=1),file=paste(failures,"failed.rates.txt",sep="/"),append=TRUE,col=FALSE,row=FALSE,quote=FALSE)
132			write.table(matrix(c(i, new.root),nrow=1),file=paste(failures,"failed.root.txt",sep="/"),append=TRUE,col=FALSE,row=FALSE,quote=FALSE)
133		}
134		
135	# compare likelihoods
136		endparms = c(nRateProp=nRateProp, nRateSwapProp=nRateSwapProp, nRcatProp=nRcatProp, nRootProp=nRootProp)
137
138		r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)
139
140		if (runif(1) <= r$r) {			# adopt proposal
141			decision="adopt"
142			cur.root <- new.root
143			cur.rates <- new.rates
144			cur.delta.rates <- new.delta.rates
145			curr.lnL <- mod.new$lnL
146			cur.tre <- new.tre
147			mod.cur <- mod.new
148			cOK <- determine.accepted.proposal(startparms, endparms, cOK)
149		} else {						# deny proposal	
150			decision="reject"
151			curr.lnL <- mod.cur$lnL 
152		}
153		
154		if(is.infinite(curr.lnL)) stop("starting point has exceptionally poor likelihood")
155
156		if(r$error) generate.error.message(i, mod.cur, mod.new, lnR, errorLog)
157		
158		if(i%%tickerFreq==0) {
159			if(i==tickerFreq) cat("|",rep(" ",9),toupper("generations complete"),rep(" ",9),"|","\n")
160			cat(". ")
161		}
162		
163		if(i%%sampleFreq==0) {
164			bundled.parms=list(gen=i, mrate=exp(mean(log(cur.rates))), cats=sum(cur.delta.rates)+1, root=cur.root, lnL=curr.lnL)
165			generate.log(bundled.parms, cur.model, file=runLog)			
166			parlogger(model=model, init=FALSE, node.des=node.des, i=i, curr.lnL=curr.lnL, cur.root=cur.root, cur.rates=cur.rates, cur.delta.rates=cur.delta.rates, parmBase=parmBase)
167		}
168	}
169	
170# End rjMCMC
171	close(errorLog)
172	close(runLog)
173	summarize.run(cOK, endparms, cur.model)	
174}
175
176
177## AUXILIARY FUNCTIONS
178
179new.bm.lik.fn <- function(rates, root, ape.tre, orig.dat) { # from LJ HARMON	
180# mod 10.18.2010 JM Eastman: using determinant()$modulus rather than det() to stay in log-space
181	tree=ape.tre
182	y=orig.dat
183	
184	b <- vcv.phylo(ape.tre)
185	w <- rep(root, nrow(b))
186	num <- -t(y-w)%*%solve(b)%*%(y-w)/2
187	den <- 0.5*(length(y)*log(2*pi) + as.numeric(determinant(b)$modulus))
188	list(
189		 root=root,
190		 lnL = (num-den)[1,1]
191		 )	
192}
193
194split.or.merge <- function(cur.delta, cur.values, phy, node.des, theta=FALSE, lambda=lambda, jumpsize=jumpsize) { 
195# mod 09.17.2010 JM Eastman
196	bb=cur.delta
197	vv=cur.values
198	names(vv)<-names(bb)<-phy$edge[,2]
199	new.bb=choose.one(bb)
200	new.vv=vv
201	
202	s=names(new.bb[bb!=new.bb])
203	all.shifts=as.numeric(names(bb[bb>0]))
204	all.D=node.des[[which(names(node.des)==s)]]
205	if(length(all.D)!=0) {
206		untouchable=unlist(lapply(all.shifts[all.shifts>s], function(x)node.des[[which(names(node.des)==x)]]))
207		remain.unchanged=union(all.shifts, untouchable)
208	} else {
209		untouchable=NULL
210		remain.unchanged=list()
211	}
212	
213	marker=match(s, names(new.vv))
214	nn=length(vv)
215	K=sum(bb)+1
216	N=Ntip(phy)
217	
218	if(sum(new.bb)>sum(bb)) {			# add transition: SPLIT
219#		print("s")
220		if(theta) {							# assign new theta to current node
221			new.vv[marker] <- nr <- adjust.value(new.vv[marker], jumpsize)
222		} else {							# assign new rate to current node
223			new.vv[marker] <- nr <- adjust.rate(new.vv[marker], jumpsize)
224		}
225		if(length(remain.unchanged)==0) {	# assign new value to all descendants
226			new.vv[match(all.D,names(new.vv))] = nr
227		} else {							# assign new value to all 'open' descendants 
228			new.vv[match(all.D[!(all.D%in%remain.unchanged)],names(new.vv))] = nr
229		}
230		lnHastingsRatio = log((K+1)/(2*N-2-K)) ### from Drummond and Suchard 2010: where N is tips, K is number of local parms in tree
231		lnPriorRatio = log(ptpois(K+1,lambda,nn)/ptpois(K,lambda,nn))
232	} else {							# drop transition: MERGE
233#		print("m")
234		anc = get.ancestor.of.node(s, phy)
235		if(!is.root(anc, phy)) {			# substitute ancestral value for current
236			new.vv[match(s, names(new.vv))] <- nr <- new.vv[match(anc, names(new.vv))] 
237		} else {
238			sister.tmp=get.desc.of.node(anc,phy)
239			sister=sister.tmp[sister.tmp!=s]
240			if(bb[match(sister, names(bb))]==0) {
241				nr=as.numeric(vv[match(sister, names(vv))])
242			} else {
243				if(theta) {						# assign new theta to first descendant of root
244					new.vv[marker] <- nr <- adjust.value(new.vv[marker], jumpsize)
245				} else {						# assign new rate to first descendant of root
246					new.vv[marker] <- nr <- adjust.rate(new.vv[marker], jumpsize)
247				}
248			}
249		}
250		if(length(remain.unchanged)==0) {	# assign new value to all descendants
251			new.vv[match(all.D, names(new.vv))] = nr
252		} else {							# assign new value to all 'open' descendants
253			new.vv[match(all.D[!(all.D%in%remain.unchanged)],names(new.vv))] = nr
254		}
255		lnHastingsRatio = log((2*N-2-K+1)/K) ### from Drummond and Suchard 2010: where N is tips, K is number of local parms in tree
256		lnPriorRatio = log(ptpois(K-1,lambda,nn)/ptpois(K,lambda,nn))
257		
258	}
259	if(((length(table(new.vv))-1)-sum(new.bb))!=0) fail=TRUE else fail=FALSE
260	return(list(new.delta=new.bb, new.values=new.vv, lnHastingsRatio=lnHastingsRatio, lnPriorRatio=lnPriorRatio, fail=fail))
261}
262
263switcheroo <- function(cur.delta, cur.values, phy, node.des, theta=FALSE, jumpsize) { 
264	bb=cur.delta
265	vv=cur.values
266	new.bb=sample(bb)
267	names(vv)<-names(bb)<-names(new.bb)<-phy$edge[,2]
268	new.vv=vv
269	
270	old.shifts=as.numeric(names(bb[bb!=0]))
271	old.values=vv[match(old.shifts, names(vv))]
272	
273	anc=lapply(old.shifts, function(x)get.ancestor.of.node(x,phy))
274	if(!is.root(min(unlist(anc)),phy)) {
275	# find most ancestral value in tree
276		anc.value=unname(vv[match(min(unlist(anc)),names(vv))]) 
277	} else {
278		if(theta) anc.value=mean(sapply(old.values, function(x) adjust.value(x, jumpsize))) else anc.value=mean(sapply(old.values, function(x) adjust.rate(x, jumpsize)))
279	}
280	all.desc.anc=node.des[[which(names(node.des)==min(unlist(anc)))]]
281	new.vv[match(all.desc.anc,names(new.vv))] = anc.value
282
283	new.shifts=sort(as.numeric(names(new.bb[new.bb!=0])))
284	new.tips=new.shifts[new.shifts<=Ntip(phy)]
285	new.ints=new.shifts[new.shifts>Ntip(phy)]
286	new.shifts=c(new.ints, new.tips)
287	
288	new.values=as.numeric(old.values)[sample(1:length(old.values))]
289	for(z in 1:length(new.shifts)) {
290		node=new.shifts[z]
291		new.vv[match(node, names(new.vv))]=new.values[z]
292		new.vv[match(node.des[[which(names(node.des)==node)]],names(new.vv))]=new.values[z]
293	} 
294	return(list(new.delta=new.bb, new.values=new.vv))
295}
296
297generate.starting.point <- function(data, phy, node.des, theta=FALSE, K=FALSE, jumpsize) { 
298# updated JME 11.14.2010
299	nn=length(phy$edge.length)
300	ntip=Ntip(phy)
301	if(!K) nshifts=rtpois(1,log(ntip),nn) else nshifts=K-1
302	if(nshifts!=0) bb=sample(c(rep(1,nshifts),rep(0,nn-nshifts)),replace=FALSE) else bb=rep(0,nn)
303	names(bb)=phy$edge[,2]
304	shifts=as.numeric(names(bb)[bb==1])
305	if(theta) {
306		min.max=c(adjust.value(min(data), jumpsize), adjust.value(max(data), jumpsize))
307		values=runif(sum(bb)+1, min=min.max[min.max==min(min.max)], max=min.max[min.max==max(min.max)])
308	} else {
309		init.rate=fitContinuous(phy,data)[[1]]$beta
310		min.max=c(adjust.rate(init.rate, jumpsize), adjust.rate(init.rate, jumpsize))
311		values=runif(sum(bb)+1, min=min.max[min.max==min(min.max)], max=min.max[min.max==max(min.max)])
312	}
313	internal.shifts<-tip.shifts<-numeric(0)
314	internal.shifts=sort(shifts[shifts>ntip])
315	tip.shifts=shifts[shifts<=ntip]
316	
317	if(length(internal.shifts)==0 & length(tip.shifts)==0) {
318		vv=rep(values, nn)
319	} else {
320		vv=bb
321		vv[]=values[length(values)]
322		i=0
323		if(length(internal.shifts)!=0) {
324			for(i in 1:length(internal.shifts)) {
325				d=node.des[which(names(node.des)==internal.shifts[i])]
326				vv[match(c(internal.shifts[i], unlist(d)), names(vv))]=values[i]
327			}
328		}
329		if(length(tip.shifts)!=0) {
330			for(j in 1:length(tip.shifts)) {
331				vv[match(tip.shifts[j], names(vv))]=values[j+i]
332			}
333		}
334	}
335	return(list(delta=unname(bb), values=unname(vv)))
336}
337
338assign.root.theta <- function(cur.theta, phy) {
339	root=phy$edge[1,1]
340	desc=phy$edge[which(phy$edge[,1]==root),2]
341	root.theta=cur.theta[sample(which(phy$edge[,2]==desc),1)]
342	root.theta
343}
344
345get.descendants.of.node <- function(node, phy) {
346	storage=list()
347	if(is.null(node)) node=phy$edge[1,1]
348	if(node%in%phy$edge[,1]) {
349		desc=c(sapply(node, function(x) {return(phy$edge[which(phy$edge[,1]==x),2])}))
350		while(any(desc%in%phy$edge[,1])) {
351			desc.true=desc[which(desc%in%phy$edge[,1])]
352			desc.desc=lapply(desc.true, function(x) return(get.desc.of.node(x, phy)))
353			
354			storage=list(storage,union(desc,desc.desc))
355			
356			desc=unlist(desc.desc)
357		}
358		storage=sort(unique(unlist(union(desc,storage))))
359	}
360	return(storage)
361}
362
363get.desc.of.node <- function(node, phy) {
364	return(phy$edge[which(phy$edge[,1]==node),2])
365}
366
367get.ancestor.of.node<-function(node, phy) {
368	anc=phy$edge[which(phy$edge[,2]==node),1]
369	anc
370}
371
372choose.one <- function(cur.delta)
373{
374	bb=cur.delta
375	s=sample(1:length(bb),1)
376	bb[s]=1-bb[s]
377	bb
378}
379
380is.root<-function(node,phy) {
381	if(node==phy$edge[1,1]) return(TRUE) else return(FALSE)
382}
383
384assess.lnR <- function(lnR) {
385	if(is.na(lnR) || abs(lnR)==Inf) {
386		error=TRUE
387		r=0
388	} else {
389		if(lnR < -20) {
390			r=0 
391		} else if(lnR > 0) {
392			r=1 
393		} else {
394			r=exp(lnR)
395		}
396		error=FALSE
397	}
398	return(list(r=r, error=error))
399}
400
401apply.BMM.to.apetree<-function(apetree, rates) {
402	apetree$edge.length=apetree$edge.length*rates
403	return(apetree)
404}
405
406adjust.value <- function(value, jumpsize) {
407# mod 10.20.2010 JM Eastman
408	vv=value
409	rch <- runif(1, min = -jumpsize, max = jumpsize)
410	return(vv[]+rch)
411}
412
413adjust.rate <- function(rate, jumpsize) {
414# mod 10.20.2010 JM Eastman
415	vv=rate
416	v=log(vv)
417	rch <- runif(1, min = -jumpsize, max = jumpsize)
418	v=exp(v[]+rch)
419	v
420}
421
422prepare.data <- function(phy, data) {
423	td <- treedata(phy, data, sort = T)	
424	return(list(ape.tre=td$phy, orig.dat=td$data[,1]))
425}
426
427summarize.run<-function(cOK, endparms, cur.model) {
428#	end = c(nRateProp, nRateSwapProp, nRcatProp, nRootProp, nAlphaProp, nThetaProp, nThetaSwapProp, nTcatProp)
429
430	if(cur.model=="BM") {
431		names(endparms)<-names(cOK)<-c("rates", "rate.swap", "rate.catg", "root")
432	} else {
433		names(endparms)<-names(cOK)<-c("rates", "rate.swap", "rate.catg", "root", "alpha", "theta", "theta.swap", "theta.catg")
434	}
435	names(endparms)=paste("pr.",names(endparms),sep="")
436	names(cOK)=paste("ok.",names(cOK),sep="")
437	cat("\n")
438	if(cur.model=="BM") {
439		print(endparms[paste("pr.", c("rates", "rate.swap", "rate.catg", "root"),sep="")])
440		print(cOK[paste("ok.", c("rates", "rate.swap", "rate.catg", "root"),sep="")])
441	} else {
442		print(endparms[paste("pr.", c("rates", "rate.swap", "rate.catg", "alpha", "theta", "theta.swap", "theta.catg"),sep="")])
443		print(cOK[paste("ok.", c("rates", "rate.swap", "rate.catg", "alpha", "theta", "theta.swap", "theta.catg"),sep="")])
444	}
445}
446
447generate.log<-function(bundled.parms, cur.model, file, init=FALSE) {
448#	bundled.parms=list(gen=i, mrate=mean(cur.rates), cats=sum(cur.delta.rates)+1, root=cur.root, alpha=cur.alpha, reg=sum(cur.delta.theta)+1, theta=mean(cur.theta), lnL=curr.lnL)
449	res=bundled.parms
450
451	if(init) {
452		if(cur.model=="BM") msg=paste("gen", "model", "mean.rate", "rates", "root", "lnL", sep="\t") else msg=paste("gen", "model", "mean.rate", "rates", "alpha", "regimes", "mean.theta", "lnL", sep="\t")
453	} else {
454		if(cur.model=="BM") {
455			msg<-paste(res$gen, sQuote(cur.model), sprintf("%.3f", res$mrate), res$cats, sprintf("%.3f", res$root), sprintf("%.3f", res$lnL),sep="\t")
456		} else {
457			msg<-paste(res$gen, sQuote(cur.model), sprintf("%.3f", res$mrate), res$cats, sprintf("%.4f", res$alpha), res$reg, sprintf("%.3f", res$theta), sprintf("%.3f", res$lnL),sep="\t")
458		}
459	}
460	write(msg, file=file, append=TRUE)
461}
462
463parlogger<-function(model, init=FALSE, node.des, i, curr.lnL, cur.root=NULL, cur.rates=NULL, cur.delta.rates=NULL, cur.alpha=NULL, cur.theta=NULL, cur.delta.theta=NULL, parmBase) {	
464	if(init) {
465		msg.long=names(node.des[-1])
466		if(model=="BM") {
467			msg.short=paste("gen", "model", "lnL", sep="\t") 
468			parlogs=paste(parmBase, paste(c("summary", "root", "rates", "rate.shifts"), "txt", sep="."), sep="")
469			sapply(parlogs[1], function(x) write.table(msg.short, x, quote=FALSE, col.names=FALSE, row.names=FALSE))
470			sapply(parlogs[2], function(x) write.table(NULL, x, quote=FALSE, col.names=FALSE, row.names=FALSE))
471			sapply(parlogs[3:length(parlogs)], function(x) write.table(paste(msg.long,collapse=" "), x, quote=FALSE, col.names=FALSE, row.names=FALSE))
472		} else {
473			msg.short=paste("gen", "model", "lnL", sep="\t")
474			parlogs=paste(parmBase, paste(c("summary", "root", "alpha", "rates", "rate.shifts", "optima", "optima.shifts"), "txt", sep="."), sep="")
475			sapply(parlogs[1], function(x) write.table(msg.short, x, quote=FALSE, col.names=FALSE, row.names=FALSE))
476			sapply(parlogs[2:3], function(x) write.table(NULL, x, quote=FALSE, col.names=FALSE, row.names=FALSE))
477			sapply(parlogs[4:length(parlogs)], function(x) write.table(paste(msg.long,collapse=" "), x, quote=FALSE, col.names=FALSE, row.names=FALSE))
478		}
479	} else {
480		if(model=="BM") {
481			parlogs=paste(parmBase, paste(c("summary", "root", "rates", "rate.shifts"), "txt", sep="."), sep="")
482			outlist=list(paste(i, model, curr.lnL, sep="\t"), cur.root, cur.rates, cur.delta.rates) 
483			sapply(1:length(outlist), function(x) write.table(paste(outlist[[x]],collapse=" "), parlogs[[x]], quote=FALSE, col.names=FALSE, row.names=FALSE, append=TRUE)) 
484		} else {
485			parlogs=paste(parmBase, paste(c("summary", "root", "alpha", "rates", "rate.shifts", "optima", "optima.shifts"), "txt", sep="."), sep="")
486			outlist=list(paste(i, model, curr.lnL, sep="\t"), cur.root, cur.alpha, cur.rates, cur.delta.rates, cur.theta, cur.delta.theta) 
487			sapply(1:length(outlist), function(x) write.table(paste(outlist[[x]],collapse=" "), parlogs[[x]], quote=FALSE, col.names=FALSE, row.names=FALSE, append=TRUE)) 
488		}
489	}
490}
491
492generate.error.message<-function(i, mod.cur, mod.new, lnR, errorLog, init=FALSE) {
493	if(init) {
494		initmsg = write(paste("gen", "curr.lnL", "new.lnL", "lnR", sep="\t"), file=errorLog)
495	} else {
496		write(paste(i, sprintf("%.3f", mod.cur$lnL), sprintf("%.3f", mod.new$lnL), sprintf("%.3f", lnR), sep="\t"), file=errorLog)
497	}
498}
499
500determine.accepted.proposal<-function(startparms, endparms, cOK) {
501	which(startparms!=endparms)->marker
502	cOK[marker]=cOK[marker]+1
503	cOK
504}
505
506rtpois<-function(N, lambda, k) {
507	p=ppois(k, lambda, lower.tail=FALSE)
508	out=qpois(runif(N, min=0, max=1-p), lambda)
509	out
510}
511				 
512ptpois<-function(x, lambda, k) {
513	p.k=ppois(k, lambda, lower.tail=FALSE)
514	p.x=ppois(x, lambda, lower.tail=FALSE)
515	ptp=p.x/(1-p.k)
516	return(ptp)
517}
518				 
519## SUMMARIZING MCMC -- PLOTTING FUNCTIONS ##
520shifts.plot=function(phy, base.dir, burnin=0, level=0.03, internal.only=FALSE, paint.branches=TRUE, pdf.base=NULL, verbosity.base=NULL, ...) {
521	color.length=21
522	require(ape)
523	oldwd=getwd()
524	setwd(base.dir)
525	files=dir()
526#	if(optima) {
527#		shifts.file=files[grep("optima.shifts.txt", files)]
528#		estimates.file=files[grep("optima.txt", files)]
529#	} else {
530		shifts.file=files[grep("rate.shifts.txt", files)]
531		estimates.file=files[grep("rates.txt", files)]
532#	}
533	if(!is.null(pdf.base)) lab=paste(pdf.base, paste("bi", burnin, sep=""), paste("fq", level, sep=""), sep=".")
534
535	cat("READING shifts...\n")
536	results=read.table(shifts.file)
537	names(results)=as.numeric(results[1,])
538	results=results[-c(1:(burnin+1)),]
539	if(!all(names(results)%in%phy$edge[,2])) stop("Cannot process results: check that tree matches posterior summaries")
540	hits.tmp=apply(results,1,sum)
541	hits=length(hits.tmp[hits.tmp>0])
542	aa.tmp=apply(results, 2, function(x) sum(x))
543	aa.tmp=aa.tmp/hits
544	aa=aa.tmp[aa.tmp>=level]
545	aa=aa[order(aa, decreasing=TRUE)]
546	aa.nodes=aa[which(as.numeric(names(aa))>Ntip(phy))]
547	aa.tips=aa[which(as.numeric(names(aa))<=Ntip(phy))]
548	aa=aa.nodes
549	print(aa.tips)
550	nodes=as.numeric(names(aa))
551	nodes=nodes[nodes>Ntip(phy)]
552	desc=lapply(nodes, function(x) {
553				foo=get.descendants.of.node(x,phy)
554				if(length(foo)) return(phy$tip.label[foo[foo<=Ntip(phy)]]) else return(NULL)
555				}
556				)
557	shifts=apply(results, 1, sum)
558	cat("READING estimates...\n")
559	ests=read.table(estimates.file)
560	names(ests)=ests[1,]
561	ests=ests[-c(1:(burnin+1)),]
562	average.ests.tmp=apply(ests,2,mean)
563	average.ests=average.ests.tmp-median(average.ests.tmp)
564	if(paint.branches) {
565		require(colorspace)
566#		cce=diverge_hcl(color.length, c = c(100, 0), l = c(50, 90), power = 1.1)
567		cce=diverge_hcl(2*color.length, power = 0.5)
568		e.seq=seq(min(average.ests),max(average.ests),length=color.length)
569		color.gamut=seq(-max(abs(e.seq)), max(abs(e.seq)), length=length(cce))
570		colors.branches=sapply(average.ests, function(x) cce[which(min(abs(color.gamut-x))==abs(color.gamut-x))])
571		colors.branches=colors.branches[match(as.numeric(names(colors.branches)), phy$edge[,2])]
572	} else {
573		colors.branches=1
574	}
575	if(length(nodes)) {
576		require(colorspace)
577		cols=sapply(nodes, function(x) {
578						 a=get.ancestor.of.node(x, phy)
579						 comp=ests[,which(names(ests)==a)]
580						 this=ests[,which(names(ests)==x)]
581						 if(!length(comp)) { # dealing with first descendant of root
582						 d=get.desc.of.node(a, phy)
583						 d=d[which(d!=x)]
584						 
585						# find if sister descendant also experienced rate shift 
586						 d.shifts=results[, which(names(results)==d)]
587						 comp=ests[,which(names(ests)==d)]
588						 x.res=sapply(1:length(d.shifts), function(y) {
589									  if(d.shifts[y]==0) {
590									  zz=this[y]-comp[y]
591									  if(zz>0) {
592									  return(1) 
593									  } else {
594									  if(zz<0) return(-1) else return(0)
595									  }
596									  } else {
597									  return(0)
598									  }
599									  }
600									  )
601						 x.res=mean(x.res[x.res!=0])
602						 } else {
603						 yy=this-comp
604						 zz=yy
605						 zz[yy>0]=1
606						 zz[yy<0]=-1
607						 zz[yy==0]=0
608						 x.res=mean(zz[zz!=0])
609						 }
610						 return(x.res)
611						 }
612						 )
613		ccx=diverge_hcl(21, c = c(100, 0), l = c(50, 90), power = 1.1)
614		c.seq=round(seq(-1,1,by=0.1),digits=1)
615		colors.nodes=ccx[match(round(cols,1),c.seq)]
616		names(colors.nodes)=nodes
617		colors.nodes=colors.nodes[which(as.numeric(names(colors.nodes))>Ntip(phy))]
618	} else {
619		colors.nodes=NULL
620		cols=NULL
621	}
622		
623# send to pdf 
624	if(!is.null(pdf.base)) pdf(file=paste(oldwd, paste(lab,"pdf",sep="."),sep="/"))
625	plot(phy, cex=0.1, lwd=0.4, edge.width=0.5, edge.color=colors.branches, ...)
626	nn=(Ntip(phy)+1):max(phy$edge[,2])
627	ll<-cc<-rr<-rep(0,length(nn))
628	ll[nn%in%nodes]=1
629	if(length(nodes)) cc[nn%in%nodes]=aa[match(nn[nn%in%nodes],nodes)]/max(aa) 
630	if(is.null(colors.nodes)) {
631		nodelabels(pch=ifelse(ll==1, 21, NA), bg=ifelse(ll==1, "black", "white"), cex=2*cc)
632	} else {
633		rr[nn%in%nodes]=colors.nodes[order(names(colors.nodes))]
634		nodelabels(pch=ifelse(ll==1, 21, NA), cex=2*cc, col=rr, bg=rr, lwd=0.5)
635		if(length(aa.tips) & !internal.only) {
636			tt=rep(0,Ntip(phy))
637			tt[(1:Ntip(phy))[as.numeric(names(aa.tips))]]=1
638			tt.cex=ifelse(tt==1, aa.tips/max(aa.tmp), 0)
639			tt.col=sapply(names(aa.tips), function(x) {
640						a=get.ancestor.of.node(x, phy)
641						comp=ests[,which(names(ests)==a)]
642						this=ests[,which(names(ests)==x)]
643						if(!length(comp)) { # dealing with first descendant of root
644						d=get.desc.of.node(a, phy)
645						d=d[which(d!=x)]
646						
647						# find if sister descendant also experienced rate shift 
648						d.shifts=results[, which(names(results)==d)]
649						comp=ests[,which(names(ests)==d)]
650						x.res=sapply(1:length(d.shifts), function(y) {
651									 if(d.shifts[y]==0) {
652									 zz=this[y]-comp[y]
653									 if(zz>0) {
654									 return(1) 
655									 } else {
656									 if(zz<0) return(-1) else return(0)
657									 }
658									 } else {
659									 return(0)
660									 }
661									 }
662									 )
663						x.res=mean(x.res[x.res!=0])
664						} else {
665						yy=this-comp
666						zz=yy
667						zz[yy>0]=1
668						zz[yy<0]=-1
669						zz[yy==0]=0
670						x.res=mean(zz[zz!=0])
671						}
672						return(x.res)
673						}
674						)
675			tt.ccx=diverge_hcl(21, c = c(100, 0), l = c(50, 90), power = 1.1)
676			tt.c.seq=round(seq(-1,1,by=0.1),digits=1)
677			tt.colors.nodes=tt.ccx[match(round(tt.col,1),tt.c.seq)]
678			names(tt.colors.nodes)=as.numeric(names(aa.tips))
679			colors.tips.tmp=tt.colors.nodes[order(as.numeric(names(tt.colors.nodes)))]
680			colors.tips=rep(NA, Ntip(phy))
681			colors.tips[(1:Ntip(phy))[as.numeric(names(aa.tips))]]=colors.tips.tmp
682
683			tiplabels(pch=ifelse(tt==1, 21, NA), cex=2*tt.cex, col=colors.tips, bg=colors.tips, lwd=0.5)
684		}
685		legend("bottomleft", title=toupper("direction"), cex=0.5, pt.cex=1, text.col="darkgray", legend = sprintf("%6.1f", rev(c.seq[seq(1,length(c.seq),by=2)])), pch=21, ncol=1, col = "darkgray", pt.bg = rev(ccx[seq(1,length(c.seq),by=2)]), box.lty="blank", border="white")
686		if(length(nodes)) legend("bottomright", title=toupper("frequency"), cex=0.5, pt.cex=2*(seq(min(cc), max(cc), length=6)), text.col="darkgray", legend = sprintf("%10.3f", seq(round(min(aa),2), round(max(aa),2), length=6)), pch=21, ncol=1, col = "darkgray", pt.bg = "white", box.lty="blank", border="white")
687	}
688	if(!is.null(pdf.base)) dev.off()
689		
690	if(length(nodes)) names(cols)=names(aa)
691	setwd(oldwd)
692	names(desc)=paste("node",nodes,sep=".")
693	if(!is.null(verbosity.base)) {
694		summary.table=c(hits/nrow(results), mean(shifts), median(shifts))
695		names(summary.table)=c("shift.prob", "mean.shift","median.shifts")
696		write.table(summary.table, file=paste(verbosity.base, "run.summary.txt", sep="."), quote=FALSE, col.names=FALSE)
697		if(length(nodes)) {
698			for(nn in 1:length(nodes)) {
699				write.table(c(nodes[nn], desc[[nn]]), file=paste(verbosity.base, "shift.descendants.txt", sep="."), append=TRUE, quote=FALSE, row.names=FALSE, col.names=FALSE)
700			}
701		}
702		allres=merge(as.data.frame(aa),as.data.frame(cols),by=0)
703		allres=allres[order(allres$aa, decreasing=TRUE),]
704		names(allres)=c("node", "conditional.shift.probability", "relative.shift.direction")
705		write.table(allres, file=paste(verbosity.base, "estimates.summary.txt", sep="."), quote=FALSE, row.names=FALSE)
706	}
707	return(list(desc=desc, shift.prob=hits/nrow(results), mean.shifts=mean(shifts), median.shifts=median(shifts), shift.prop=aa, shift.direction=cols, mean.rates=average.ests.tmp, mean.shifts=aa.tmp/nrow(results)))
708}
709
710
711plot.quant<-function(phy, data, data.names, shrinker=1, relative=TRUE, cex=0.5, adj=c(15,5), lwd=0.5, font=1, ...){
712	std=function(x,max,min){return((x-min)/(max-min))}
713	if(relative) {
714		data=sapply(data, function(x) std(x, max(data), min(data)))
715	}
716	
717	f=match(phy$tip.label, data.names)
718	if(is.na(sum(f))) {
719		stop("tips cannot be matched to tree")
720	}
721	else {
722		data=data[f]
723		phy$trait=data
724		plot.phylo(phy, show.tip.label=T, label.offset=adj[1], cex=cex, font=font, ...)
725		tiplabels(text=NULL, pch=21, adj=adj[2], cex=c(phy$trait/shrinker), bg="white",lwd=lwd)
726		} 
727}
728
729
730prior.posterior.plot=function(data, rpois.lambda) {
731# data should be a vector of number of shifts, for instance, for all interations across the MCMC sampling (post-burnin)
732	require(ggplot2)
733	dataset <- data.frame(variable = gl(2, length(data), labels = c("posterior", "prior")), value = c(data, rpois(length(data),rpois.lambda))) 
734	ggplot(data = dataset, aes(x = value, fill = variable)) + geom_histogram(position = "dodge") + scale_fill_manual(values = c("gray", "black"))	
735}
736
737
738