PageRenderTime 33ms CodeModel.GetById 2ms app.highlight 25ms RepoModel.GetById 1ms app.codeStats 1ms

/in.progress/auteurExtended/R/rjmcmc.R

http://github.com/eastman/auteur
R | 455 lines | 345 code | 70 blank | 40 comment | 88 complexity | d2983c1ca6f0c7707a70533fed0b5897 MD5 | raw file
  1#function for Markov sampling from a range of model complexities under the general process of Brownian motion evolution of continuous traits
  2#author: JM EASTMAN 2011
  3
  4rjmcmc.ou <- function(	phy, dat, SE=0, ngen=1000, sample.freq=100, 
  5						prob.mergesplit=0.1, prob.theta=0.4, prob.alpha=0.2, upper.alpha=100, 
  6						lambdaK=log(2), constrainK=FALSE, prop.width=NULL, simplestart=FALSE, 
  7						internal.only=FALSE, summary=TRUE, fileBase="result") 
  8{ 
  9	model="OU"
 10	primary.parameter="optima"
 11	
 12# calibration of proposal width	
 13	if(is.null(prop.width)) {
 14		cat("CALIBRATING proposal width...\n")
 15		prop.width=calibrate.proposalwidth(phy, dat, nsteps=ngen/1000, model)
 16		adjustable.prop.width=TRUE		
 17	} else {
 18		if(prop.width<=0) stop("please supply a 'prop.width' larger than 0")
 19		adjustable.prop.width=FALSE
 20	}
 21	
 22# parameter bounds 
 23	abs.low.alpha=1e-13
 24	if(length(upper.alpha)==2 & upper.alpha[1]>abs.low.alpha) {
 25		alpha.bounds=upper.alpha
 26	} else if(!is.null(upper.alpha) & is.numeric(upper.alpha)){
 27		alpha.bounds=c(abs.low.alpha, upper.alpha)
 28	} else {
 29		alpha.bounds=c(abs.low.alpha, 100)
 30	}
 31	
 32### prepare data for rjMCMC
 33	dataList		<- prepare.data(phy, dat, SE)			
 34	ape.tre			<- dataList$ape.tre			
 35	orig.dat		<- dataList$orig.dat
 36	SE				<- dataList$SE
 37	node.des		<- sapply(unique(c(ape.tre$edge[1,1],ape.tre$edge[,2])), function(x) get.descendants.of.node(x, ape.tre))
 38	names(node.des) <- c(ape.tre$edge[1,1], unique(ape.tre$edge[,2]))
 39	ancestors		<- lapply(1:Ntip(ape.tre), function(x) c(x,rev(sort(get.ancestors.of.node(x,ape.tre)))))
 40	epochs			<- get.epochs(ape.tre)
 41	phyobject		<- get.times(ape.tre)
 42	vcvSE			<- vmat(ape.tre); if(any(SE!=0)) diag(vcvSE)<-diag(vcvSE)+SE^2
 43	
 44# initialize parameters
 45	if(is.numeric(constrainK) & (constrainK > length(ape.tre$edge) | constrainK < 1)) stop(paste("Constraint on ",primary.parameter, " is nonsensical. Ensure that constrainK is at least 1 and less than the number of available nodes in the tree.",sep=""))
 46	
 47	if(simplestart | is.numeric(constrainK) | internal.only) {
 48		if(is.numeric(constrainK) & !internal.only) {
 49			init.theta	<- generate.starting.point(orig.dat, ape.tre, node.des, logspace=FALSE, K=constrainK, prop.width=prop.width)
 50		} else {
 51			init.theta	<- list(values=rep(mean(orig.dat),length(ape.tre$edge.length)),delta=rep(0,length(ape.tre$edge.length)))
 52		}
 53	} else {
 54		init.theta		<- generate.starting.point(orig.dat, ape.tre, node.des, logspace=FALSE, K=constrainK, prop.width=prop.width)
 55	}
 56	
 57	init.hansen		<- fit.hansen(ape.tre,orig.dat,alpha.bounds)
 58
 59	cur.theta		<- init.theta$values 
 60	cur.delta.theta	<- init.theta$delta
 61	cur.alpha		<- init.hansen$alpha
 62	cur.rate		<- init.hansen$sigmasq
 63	
 64# attempt to ensure valid starting point
 65	while(1) {
 66		mod.cur = try(ou.lik.fn(orig.dat, ape.tre, epochs, phyobject, ancestors, vcvSE, cur.rate, cur.alpha, c(-999, cur.theta)),silent=TRUE)
 67		if(inherits(mod.cur, "try-error")){
 68			if(!summary)stop()
 69			cur.alpha=adjustrate(cur.alpha, prop.width)
 70			cur.rate=adjustrate(cur.rate, prop.width)
 71		} else {
 72			cur.lnL=mod.cur$lnL
 73			break()
 74		}
 75	}	
 76	
 77# proposal frequencies	
 78	prob.rate=1-sum(c(2*prob.mergesplit, prob.theta, prob.alpha))
 79	if(prob.rate<0) stop("proposal frequencies must not exceed 1; adjust proposal frequencies")
 80	proposal.rates<-orig.proposal.rates<-c(2*prob.mergesplit, prob.theta, prob.alpha, prob.rate)
 81	prop.cs<-orig.prop.cs<-cumsum(proposal.rates)
 82	
 83# proposal counts
 84	n.props<-n.accept<-rep(0,length(prop.cs))
 85	names.props<-c("mergesplit","optima","alpha","rate")
 86	cur.acceptancerate=0
 87	
 88# file handling
 89	if(summary) {
 90		parmBase=paste(model, fileBase, "parameters/",sep=".")
 91		if(!file.exists(parmBase)) dir.create(parmBase)
 92		errorLog=paste(parmBase,paste(model, fileBase, "rjmcmc.errors.log",sep="."),sep="/")
 93		runLog=file(paste(parmBase,paste(model, fileBase, "rjmcmc.log",sep="."),sep="/"),open='w+')
 94		parms=list(gen=NULL, lnL=NULL, lnL.p=NULL, lnL.h=NULL, 
 95			   optima=NULL, delta=NULL, alpha=NULL, rate=NULL)
 96		parlogger(ape.tre, init=TRUE, primary.parameter, parameters=parms, parmBase=parmBase, runLog)
 97	}
 98	
 99# prop.width calibration points
100	if(adjustable.prop.width){
101		tf=c()
102		tenths=function(v)floor(0.1*v)
103		nn=ngen
104		while(1) {
105			vv=tenths(nn)
106			nn=vv
107			if(nn<=1) break() else tf=c(nn,tf)		
108		}
109		tuneFreq=tf
110	} else {
111		tuneFreq=0
112	}
113	
114	tickerFreq=ceiling((ngen+max(tuneFreq))/30)
115	
116### Begin rjMCMC
117    for (i in 1:(ngen+max(tuneFreq))) {
118		
119        lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
120		
121		if(internal.only) {
122			tips.tmp=which(as.numeric(names(cur.delta.theta))<=Ntip(ape.tre))
123			if(any(cur.delta.theta[tips.tmp]==1)) stop("Broken internal only sampling")
124		}
125				
126## OU IMPLEMENTATION ##
127		while(1) {
128			cur.proposal=min(which(runif(1)<prop.cs))
129			if (cur.proposal==1 & !constrainK) {									# adjust theta categories
130				nr=splitormerge(cur.delta.theta, cur.theta, ape.tre, node.des, lambdaK, logspace=FALSE, internal.only)
131				new.theta=nr$new.values ##
132				new.delta.theta=nr$new.delta ##
133				new.alpha=cur.alpha
134				new.rate=cur.rate 
135				lnHastingsRatio=nr$lnHastingsRatio
136				lnPriorRatio=nr$lnPriorRatio
137				break()
138			} else if(cur.proposal==2){													
139				if(runif(1)>0.05 & sum(cur.delta.theta)>1) {						# tune local theta
140					new.theta=tune.value(cur.theta, prop.width) ##
141					new.delta.theta=cur.delta.theta 
142					new.alpha=cur.alpha									
143					new.rate=cur.rate 					
144					break()
145				} else {															# adjust theta
146					new.theta=adjustvalue(cur.theta, prop.width) ##
147					new.delta.theta=cur.delta.theta
148					new.alpha=cur.alpha									
149					new.rate=cur.rate 
150					break()
151				} 
152			}  else if(cur.proposal==3) {											# adjust alpha
153				while(1) {
154					new.alpha=adjustrate(cur.alpha, prop.width) ##	
155					if(withinrange(new.alpha, min(alpha.bounds), max(alpha.bounds))) break()
156				}
157				new.theta=cur.theta
158				new.delta.theta=cur.delta.theta
159				new.rate=cur.rate 
160				break()
161			} else if(cur.proposal==4) {											# adjust rate
162				new.theta=cur.theta
163				new.delta.theta=cur.delta.theta
164				new.alpha=cur.alpha									
165				new.rate=adjustrate(cur.rate,prop.width) ##
166				break()
167			}
168		}
169				
170		n.props[cur.proposal]=n.props[cur.proposal]+1				
171		
172# compute fit of proposed model
173		mod.new=NULL
174		mod.new=try(ou.lik.fn(orig.dat, ape.tre, epochs, phyobject, ancestors, vcvSE, new.rate, new.alpha, c(-999, new.theta)),silent=TRUE)
175		
176		if(inherits(mod.new, "try-error")) {mod.new=as.list(mod.new); mod.new$lnL=-Inf}
177		if(!is.infinite(mod.new$lnL)) {
178			lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
179		} else {
180			mod.new$lnL=-Inf
181			lnLikelihoodRatio = -Inf
182		}
183		
184# compare likelihoods
185		heat=1
186		r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)
187		
188# potential errors
189		if(is.infinite(mod.cur$lnL)) stop("starting point has exceptionally poor likelihood")
190		if(r$error & summary) generate.error.message(Ntip(ape.tre), i, mod.cur, mod.new, lnLikelihoodRatio, lnPriorRatio, lnHastingsRatio, cur.delta.theta, new.delta.theta, errorLog)
191		
192		if (runif(1) <= r$r) {			## adopt proposal ##
193			cur.theta <- new.theta
194			cur.delta.theta <- new.delta.theta
195			cur.alpha <- new.alpha
196			cur.rate <- new.rate
197			
198			n.accept[cur.proposal] = n.accept[cur.proposal]+1
199			
200			mod.cur <- mod.new
201			cur.lnL <- mod.new$lnL
202		} 
203		
204# iteration-specific functions
205		if(i%%tickerFreq==0 & summary) {
206			if(i==tickerFreq) cat("|",rep(" ",9),toupper("generations complete"),rep(" ",9),"|","\n")
207			cat(". ")
208		}
209		
210		if(i%%sample.freq==0 & i>max(tuneFreq) & summary) {
211
212			## DIAGNOSTICS ##
213#			cc=cur.theta
214#			names(cc)=phy$edge[,2]
215#			if(sum(cc)>=1){branchcol.plot(ape.tre, cc, color.length=sum(cur.delta.theta)+1, log=FALSE)}
216
217			parms=list(gen=i-max(tuneFreq), lnL=cur.lnL, lnL.p=lnPriorRatio, lnL.h=lnHastingsRatio, 
218					   optima=cur.theta, delta=cur.delta.theta, alpha=cur.alpha, rate=cur.rate)
219			parlogger(ape.tre, init=FALSE, primary.parameter, parameters=parms, parmBase=parmBase, runLog=runLog)
220		}
221		
222		if(any(tuneFreq%in%i) & adjustable.prop.width) {
223			new.acceptancerate=sum(n.accept)/sum(n.props)
224			if((new.acceptancerate<cur.acceptancerate) & adjustable.prop.width) {
225				prop.width=calibrate.proposalwidth(ape.tre, orig.dat, nsteps=1000, model, widths=c(exp(log(prop.width)-log(2)), exp(log(prop.width)+log(2))))
226			}
227			cur.acceptancerate=new.acceptancerate
228		}
229	}
230
231# End rjMCMC
232
233# clear out prop.width calibration directories if calibration run
234	if(summary) {
235		close(runLog)
236		parlogger(primary.parameter=primary.parameter, parmBase=parmBase, end=TRUE)
237		summarize.run(n.accept, n.props, names.props)	
238		cleanup.files(parmBase, model, fileBase, ape.tre)
239	} 
240	return(list(acceptance.rate=sum(n.accept)/sum(n.props), prop.width=prop.width))
241}
242
243
244
245#function for Markov sampling from a range of model complexities under the general process of Brownian motion evolution of continuous traits
246#author: LJ HARMON 2009, A HIPP 2009, and JM EASTMAN 2010-2011
247
248rjmcmc.bm <- function (	phy, dat, SE=0, ngen=1000, sample.freq=100, 
249						prob.mergesplit=0.20, prob.root=0.05, lambdaK=log(2), tuner=0.05, 
250						prior.rate=NULL, constrainK=FALSE, prop.width=NULL, simplestart=FALSE, 
251						internal.only=FALSE, summary=TRUE, fileBase="result") 
252{ 
253	model="BM"
254	primary.parameter="rates"
255
256# calibration of proposal width	
257	if(is.null(prop.width)) {
258		cat("CALIBRATING proposal width...\n")
259		adjustable.prop.width=TRUE
260		prop.width=calibrate.proposalwidth(phy, dat, nsteps=ngen/1000, model)
261	} else {
262		if(prop.width<=0) stop("please supply a 'prop.width' larger than 0")
263		adjustable.prop.width=FALSE
264	}
265		
266### prepare data for rjMCMC
267	dataList		<- prepare.data(phy, dat, SE)			
268	ape.tre			<- dataList$ape.tre			
269	orig.dat		<- dataList$orig.dat
270	SE				<- dataList$SE
271	node.des		<- sapply(unique(c(ape.tre$edge[1,1],ape.tre$edge[,2])), function(x) get.descendants.of.node(x, ape.tre))
272	names(node.des) <- c(ape.tre$edge[1,1], unique(ape.tre$edge[,2]))
273
274# initialize parameters
275	if(is.numeric(constrainK) & (constrainK > length(ape.tre$edge) | constrainK < 1)) stop(paste("Constraint on ",primary.parameter, " is nonsensical. Ensure that constrainK is at least 1 and less than the number of available nodes in the tree.",sep=""))
276
277	if(simplestart | is.numeric(constrainK) | internal.only) {
278		if(is.numeric(constrainK) & !internal.only) {
279			init.rate	<- generate.starting.point(orig.dat, ape.tre, node.des, logspace=TRUE, K=constrainK, prop.width=prop.width)
280		} else {
281			init.rate	<- list(values=rep(fit.continuous(ape.tre,orig.dat),length(ape.tre$edge.length)),delta=rep(0,length(ape.tre$edge.length)))
282		}
283	} else {
284		init.rate	<- generate.starting.point(orig.dat, ape.tre, node.des, logspace=TRUE, K=constrainK, prop.width=prop.width )
285	}
286	
287	if(is.null(prior.rate)) prior.rate	<- fit.continuous(ape.tre,orig.dat)
288		
289    cur.rates		<- init.rate$values			
290	cur.delta.rates	<- init.rate$delta
291	cur.root		<- adjustvalue(mean(orig.dat), prop.width)
292	cur.vcv			<- updatevcv(ape.tre, cur.rates)
293	
294	mod.cur = bm.lik.fn(cur.root, orig.dat, cur.vcv, SE)
295	cur.lnL=mod.cur$lnL
296
297			
298# proposal frequencies
299	prob.rates=1-sum(c(2*prob.mergesplit, prob.root))
300	if(prob.rates<0) stop("proposal frequencies must not exceed 1; adjust 'prob.mergesplit' and (or) 'prob.root'")
301	proposal.rates<-orig.proposal.rates<-c(2*prob.mergesplit, prob.root, prob.rates)
302	prop.cs<-orig.prop.cs<-cumsum(proposal.rates)
303
304# proposal counts
305	n.props<-n.accept<-rep(0,length(prop.cs))
306	names.props<-c("mergesplit","rootstate","rates")
307	cur.acceptancerate=0
308		
309# file handling
310	if(summary) {
311		parmBase=paste(model, fileBase, "parameters/",sep=".")
312		if(!file.exists(parmBase)) dir.create(parmBase)
313		errorLog=paste(parmBase,paste(model, fileBase, "rjmcmc.errors.log",sep="."),sep="/")
314		runLog=file(paste(parmBase,paste(model, fileBase, "rjmcmc.log",sep="."),sep="/"),open='w+')
315		parms=list(gen=NULL, lnL=NULL, lnL.p=NULL, lnL.h=NULL, 
316			   rates=NULL, delta=NULL, root=NULL)
317		parlogger(ape.tre, init=TRUE, primary.parameter, parameters=parms, parmBase=parmBase, runLog)
318	}
319	
320# prop.width calibration points
321	if(adjustable.prop.width){
322		tf=c()
323		tenths=function(v)floor(0.1*v)
324		nn=ngen
325		while(1) {
326			vv=tenths(nn)
327			nn=vv
328			if(nn<=1) break() else tf=c(nn,tf)		
329		}
330		tuneFreq=tf
331	} else {
332		tuneFreq=0
333	}
334	
335	tickerFreq=ceiling((ngen+max(tuneFreq))/30)
336	
337### Begin rjMCMC
338    for (i in 1:(ngen+max(tuneFreq))) {
339		
340        lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
341		
342		if(internal.only) {
343				tips.tmp=which(as.numeric(names(cur.delta.rates))<=Ntip(ape.tre))
344				if(any(cur.delta.rates[tips.tmp]==1)) stop("Broken internal only sampling")
345		}
346
347		
348## BM IMPLEMENTATION ##
349		while(1) {
350			cur.proposal=min(which(runif(1)<prop.cs))
351			if (cur.proposal==1 & !constrainK) {									# adjust rate categories
352				nr=splitormerge(cur.delta.rates, cur.rates, ape.tre, node.des, lambdaK, logspace=TRUE, internal.only)
353				new.rates=nr$new.values
354				new.delta.rates=nr$new.delta
355				new.root=cur.root
356				lnHastingsRatio=nr$lnHastingsRatio
357				lnPriorRatio=nr$lnPriorRatio + priorratio.exp(cur.rates, cur.delta.rates, new.rates, new.delta.rates, prior.rate)	
358				break()
359			} else if(cur.proposal==2) {											# adjust root
360				new.root=proposal.slidingwindow(cur.root, 4*prop.width, min=NULL, max=NULL)$v								
361				new.rates=cur.rates
362				new.delta.rates=cur.delta.rates
363				lnHastingsRatio=0
364				lnPriorRatio=0
365				break()
366			} else if(cur.proposal==3){													
367				if(runif(1)>0.05 & sum(cur.delta.rates)>1) {						# tune local rate
368					nr=tune.rate(rates=cur.rates, prop.width=prop.width, min=0, max=Inf, tuner=tuner, prior.mean=prior.rate)
369					new.rates=nr$values
370					new.delta.rates=cur.delta.rates 
371					new.root=cur.root
372					lnHastingsRatio=nr$lnHastingsRatio
373					lnPriorRatio=nr$lnPriorRatio
374					break()
375				} else {															# scale rates
376					nr=proposal.multiplier(cur.rates, prop.width)
377					new.rates=nr$v
378					new.delta.rates=cur.delta.rates
379					new.root=cur.root
380					lnHastingsRatio=nr$lnHastingsRatio
381					lnPriorRatio=priorratio.exp(cur.rates, cur.delta.rates, new.rates, new.delta.rates, prior.rate)
382					break()
383				} 
384			}
385		}
386		
387		n.props[cur.proposal]=n.props[cur.proposal]+1				
388
389	# compute fit of proposed model
390		if(any(new.rates!=cur.rates)) new.vcv=updatevcv(ape.tre, new.rates) else new.vcv=cur.vcv
391		mod.new=NULL
392		mod.new=try(bm.lik.fn(new.root, orig.dat, new.vcv, SE), silent=TRUE)
393		
394		if(inherits(mod.new, "try-error")) {mod.new=as.list(mod.new); mod.new$lnL=-Inf}
395		if(!is.infinite(mod.new$lnL)) {
396			lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
397		} else {
398			mod.new$lnL=-Inf
399			lnLikelihoodRatio = -Inf
400		}
401		
402	# compare likelihoods
403		heat=1
404		r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)
405
406		# potential errors
407		if(is.infinite(mod.cur$lnL)) stop("starting point has exceptionally poor likelihood")
408		if(r$error & summary) generate.error.message(Ntip(ape.tre), i, mod.cur, mod.new, lnLikelihoodRatio, lnPriorRatio, lnHastingsRatio, cur.delta.rates, new.delta.rates, errorLog)
409		
410		if (runif(1) <= r$r) {			## adopt proposal ##
411			cur.root <- new.root
412			cur.rates <- new.rates
413			cur.delta.rates <- new.delta.rates
414			cur.vcv <- new.vcv
415
416			n.accept[cur.proposal] = n.accept[cur.proposal]+1
417
418			mod.cur <- mod.new
419			cur.lnL <- mod.new$lnL
420		} 
421		
422		# iteration-specific functions
423		if(i%%tickerFreq==0 & summary) {
424			if(i==tickerFreq) cat("|",rep(" ",9),toupper("generations complete"),rep(" ",9),"|","\n")
425			cat(". ")
426		}
427		
428		if(i%%sample.freq==0 & i>max(tuneFreq) & summary) {			
429			parms=list(gen=i-max(tuneFreq), lnL=cur.lnL, lnL.p=lnPriorRatio, lnL.h=lnHastingsRatio, 
430					   rates=cur.rates, delta=cur.delta.rates, root=cur.root)
431			parlogger(ape.tre, init=FALSE, primary.parameter, parameters=parms, parmBase=parmBase, runLog=runLog)
432		}
433									   
434		if(any(tuneFreq%in%i) & adjustable.prop.width) {
435			new.acceptancerate=sum(n.accept)/sum(n.props)
436			if((new.acceptancerate<cur.acceptancerate) & adjustable.prop.width) {
437				prop.width=calibrate.proposalwidth(ape.tre, orig.dat, nsteps=1000, model, widths=c(exp(log(prop.width)-log(2)), exp(log(prop.width)+log(2))))
438			}
439			cur.acceptancerate=new.acceptancerate
440		}
441	}
442	
443# End rjMCMC
444	
445  # clear out prop.width calibration directories if calibration run
446	if(summary) {
447		close(runLog)
448		parlogger(primary.parameter=primary.parameter, parmBase=parmBase, end=TRUE)
449		summarize.run(n.accept, n.props, names.props)	
450		cleanup.files(parmBase, model, fileBase, ape.tre)
451	} 
452	
453	return(list(acceptance.rate=sum(n.accept)/sum(n.props), prop.width=prop.width))
454}
455