/in.progress/auteurExtended/R/rjmcmc.R

http://github.com/eastman/auteur · R · 455 lines · 345 code · 70 blank · 40 comment · 88 complexity · d2983c1ca6f0c7707a70533fed0b5897 MD5 · raw file

  1. #function for Markov sampling from a range of model complexities under the general process of Brownian motion evolution of continuous traits
  2. #author: JM EASTMAN 2011
  3. rjmcmc.ou <- function( phy, dat, SE=0, ngen=1000, sample.freq=100,
  4. prob.mergesplit=0.1, prob.theta=0.4, prob.alpha=0.2, upper.alpha=100,
  5. lambdaK=log(2), constrainK=FALSE, prop.width=NULL, simplestart=FALSE,
  6. internal.only=FALSE, summary=TRUE, fileBase="result")
  7. {
  8. model="OU"
  9. primary.parameter="optima"
  10. # calibration of proposal width
  11. if(is.null(prop.width)) {
  12. cat("CALIBRATING proposal width...\n")
  13. prop.width=calibrate.proposalwidth(phy, dat, nsteps=ngen/1000, model)
  14. adjustable.prop.width=TRUE
  15. } else {
  16. if(prop.width<=0) stop("please supply a 'prop.width' larger than 0")
  17. adjustable.prop.width=FALSE
  18. }
  19. # parameter bounds
  20. abs.low.alpha=1e-13
  21. if(length(upper.alpha)==2 & upper.alpha[1]>abs.low.alpha) {
  22. alpha.bounds=upper.alpha
  23. } else if(!is.null(upper.alpha) & is.numeric(upper.alpha)){
  24. alpha.bounds=c(abs.low.alpha, upper.alpha)
  25. } else {
  26. alpha.bounds=c(abs.low.alpha, 100)
  27. }
  28. ### prepare data for rjMCMC
  29. dataList <- prepare.data(phy, dat, SE)
  30. ape.tre <- dataList$ape.tre
  31. orig.dat <- dataList$orig.dat
  32. SE <- dataList$SE
  33. node.des <- sapply(unique(c(ape.tre$edge[1,1],ape.tre$edge[,2])), function(x) get.descendants.of.node(x, ape.tre))
  34. names(node.des) <- c(ape.tre$edge[1,1], unique(ape.tre$edge[,2]))
  35. ancestors <- lapply(1:Ntip(ape.tre), function(x) c(x,rev(sort(get.ancestors.of.node(x,ape.tre)))))
  36. epochs <- get.epochs(ape.tre)
  37. phyobject <- get.times(ape.tre)
  38. vcvSE <- vmat(ape.tre); if(any(SE!=0)) diag(vcvSE)<-diag(vcvSE)+SE^2
  39. # initialize parameters
  40. if(is.numeric(constrainK) & (constrainK > length(ape.tre$edge) | constrainK < 1)) stop(paste("Constraint on ",primary.parameter, " is nonsensical. Ensure that constrainK is at least 1 and less than the number of available nodes in the tree.",sep=""))
  41. if(simplestart | is.numeric(constrainK) | internal.only) {
  42. if(is.numeric(constrainK) & !internal.only) {
  43. init.theta <- generate.starting.point(orig.dat, ape.tre, node.des, logspace=FALSE, K=constrainK, prop.width=prop.width)
  44. } else {
  45. init.theta <- list(values=rep(mean(orig.dat),length(ape.tre$edge.length)),delta=rep(0,length(ape.tre$edge.length)))
  46. }
  47. } else {
  48. init.theta <- generate.starting.point(orig.dat, ape.tre, node.des, logspace=FALSE, K=constrainK, prop.width=prop.width)
  49. }
  50. init.hansen <- fit.hansen(ape.tre,orig.dat,alpha.bounds)
  51. cur.theta <- init.theta$values
  52. cur.delta.theta <- init.theta$delta
  53. cur.alpha <- init.hansen$alpha
  54. cur.rate <- init.hansen$sigmasq
  55. # attempt to ensure valid starting point
  56. while(1) {
  57. mod.cur = try(ou.lik.fn(orig.dat, ape.tre, epochs, phyobject, ancestors, vcvSE, cur.rate, cur.alpha, c(-999, cur.theta)),silent=TRUE)
  58. if(inherits(mod.cur, "try-error")){
  59. if(!summary)stop()
  60. cur.alpha=adjustrate(cur.alpha, prop.width)
  61. cur.rate=adjustrate(cur.rate, prop.width)
  62. } else {
  63. cur.lnL=mod.cur$lnL
  64. break()
  65. }
  66. }
  67. # proposal frequencies
  68. prob.rate=1-sum(c(2*prob.mergesplit, prob.theta, prob.alpha))
  69. if(prob.rate<0) stop("proposal frequencies must not exceed 1; adjust proposal frequencies")
  70. proposal.rates<-orig.proposal.rates<-c(2*prob.mergesplit, prob.theta, prob.alpha, prob.rate)
  71. prop.cs<-orig.prop.cs<-cumsum(proposal.rates)
  72. # proposal counts
  73. n.props<-n.accept<-rep(0,length(prop.cs))
  74. names.props<-c("mergesplit","optima","alpha","rate")
  75. cur.acceptancerate=0
  76. # file handling
  77. if(summary) {
  78. parmBase=paste(model, fileBase, "parameters/",sep=".")
  79. if(!file.exists(parmBase)) dir.create(parmBase)
  80. errorLog=paste(parmBase,paste(model, fileBase, "rjmcmc.errors.log",sep="."),sep="/")
  81. runLog=file(paste(parmBase,paste(model, fileBase, "rjmcmc.log",sep="."),sep="/"),open='w+')
  82. parms=list(gen=NULL, lnL=NULL, lnL.p=NULL, lnL.h=NULL,
  83. optima=NULL, delta=NULL, alpha=NULL, rate=NULL)
  84. parlogger(ape.tre, init=TRUE, primary.parameter, parameters=parms, parmBase=parmBase, runLog)
  85. }
  86. # prop.width calibration points
  87. if(adjustable.prop.width){
  88. tf=c()
  89. tenths=function(v)floor(0.1*v)
  90. nn=ngen
  91. while(1) {
  92. vv=tenths(nn)
  93. nn=vv
  94. if(nn<=1) break() else tf=c(nn,tf)
  95. }
  96. tuneFreq=tf
  97. } else {
  98. tuneFreq=0
  99. }
  100. tickerFreq=ceiling((ngen+max(tuneFreq))/30)
  101. ### Begin rjMCMC
  102. for (i in 1:(ngen+max(tuneFreq))) {
  103. lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
  104. if(internal.only) {
  105. tips.tmp=which(as.numeric(names(cur.delta.theta))<=Ntip(ape.tre))
  106. if(any(cur.delta.theta[tips.tmp]==1)) stop("Broken internal only sampling")
  107. }
  108. ## OU IMPLEMENTATION ##
  109. while(1) {
  110. cur.proposal=min(which(runif(1)<prop.cs))
  111. if (cur.proposal==1 & !constrainK) { # adjust theta categories
  112. nr=splitormerge(cur.delta.theta, cur.theta, ape.tre, node.des, lambdaK, logspace=FALSE, internal.only)
  113. new.theta=nr$new.values ##
  114. new.delta.theta=nr$new.delta ##
  115. new.alpha=cur.alpha
  116. new.rate=cur.rate
  117. lnHastingsRatio=nr$lnHastingsRatio
  118. lnPriorRatio=nr$lnPriorRatio
  119. break()
  120. } else if(cur.proposal==2){
  121. if(runif(1)>0.05 & sum(cur.delta.theta)>1) { # tune local theta
  122. new.theta=tune.value(cur.theta, prop.width) ##
  123. new.delta.theta=cur.delta.theta
  124. new.alpha=cur.alpha
  125. new.rate=cur.rate
  126. break()
  127. } else { # adjust theta
  128. new.theta=adjustvalue(cur.theta, prop.width) ##
  129. new.delta.theta=cur.delta.theta
  130. new.alpha=cur.alpha
  131. new.rate=cur.rate
  132. break()
  133. }
  134. } else if(cur.proposal==3) { # adjust alpha
  135. while(1) {
  136. new.alpha=adjustrate(cur.alpha, prop.width) ##
  137. if(withinrange(new.alpha, min(alpha.bounds), max(alpha.bounds))) break()
  138. }
  139. new.theta=cur.theta
  140. new.delta.theta=cur.delta.theta
  141. new.rate=cur.rate
  142. break()
  143. } else if(cur.proposal==4) { # adjust rate
  144. new.theta=cur.theta
  145. new.delta.theta=cur.delta.theta
  146. new.alpha=cur.alpha
  147. new.rate=adjustrate(cur.rate,prop.width) ##
  148. break()
  149. }
  150. }
  151. n.props[cur.proposal]=n.props[cur.proposal]+1
  152. # compute fit of proposed model
  153. mod.new=NULL
  154. mod.new=try(ou.lik.fn(orig.dat, ape.tre, epochs, phyobject, ancestors, vcvSE, new.rate, new.alpha, c(-999, new.theta)),silent=TRUE)
  155. if(inherits(mod.new, "try-error")) {mod.new=as.list(mod.new); mod.new$lnL=-Inf}
  156. if(!is.infinite(mod.new$lnL)) {
  157. lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
  158. } else {
  159. mod.new$lnL=-Inf
  160. lnLikelihoodRatio = -Inf
  161. }
  162. # compare likelihoods
  163. heat=1
  164. r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)
  165. # potential errors
  166. if(is.infinite(mod.cur$lnL)) stop("starting point has exceptionally poor likelihood")
  167. if(r$error & summary) generate.error.message(Ntip(ape.tre), i, mod.cur, mod.new, lnLikelihoodRatio, lnPriorRatio, lnHastingsRatio, cur.delta.theta, new.delta.theta, errorLog)
  168. if (runif(1) <= r$r) { ## adopt proposal ##
  169. cur.theta <- new.theta
  170. cur.delta.theta <- new.delta.theta
  171. cur.alpha <- new.alpha
  172. cur.rate <- new.rate
  173. n.accept[cur.proposal] = n.accept[cur.proposal]+1
  174. mod.cur <- mod.new
  175. cur.lnL <- mod.new$lnL
  176. }
  177. # iteration-specific functions
  178. if(i%%tickerFreq==0 & summary) {
  179. if(i==tickerFreq) cat("|",rep(" ",9),toupper("generations complete"),rep(" ",9),"|","\n")
  180. cat(". ")
  181. }
  182. if(i%%sample.freq==0 & i>max(tuneFreq) & summary) {
  183. ## DIAGNOSTICS ##
  184. # cc=cur.theta
  185. # names(cc)=phy$edge[,2]
  186. # if(sum(cc)>=1){branchcol.plot(ape.tre, cc, color.length=sum(cur.delta.theta)+1, log=FALSE)}
  187. parms=list(gen=i-max(tuneFreq), lnL=cur.lnL, lnL.p=lnPriorRatio, lnL.h=lnHastingsRatio,
  188. optima=cur.theta, delta=cur.delta.theta, alpha=cur.alpha, rate=cur.rate)
  189. parlogger(ape.tre, init=FALSE, primary.parameter, parameters=parms, parmBase=parmBase, runLog=runLog)
  190. }
  191. if(any(tuneFreq%in%i) & adjustable.prop.width) {
  192. new.acceptancerate=sum(n.accept)/sum(n.props)
  193. if((new.acceptancerate<cur.acceptancerate) & adjustable.prop.width) {
  194. prop.width=calibrate.proposalwidth(ape.tre, orig.dat, nsteps=1000, model, widths=c(exp(log(prop.width)-log(2)), exp(log(prop.width)+log(2))))
  195. }
  196. cur.acceptancerate=new.acceptancerate
  197. }
  198. }
  199. # End rjMCMC
  200. # clear out prop.width calibration directories if calibration run
  201. if(summary) {
  202. close(runLog)
  203. parlogger(primary.parameter=primary.parameter, parmBase=parmBase, end=TRUE)
  204. summarize.run(n.accept, n.props, names.props)
  205. cleanup.files(parmBase, model, fileBase, ape.tre)
  206. }
  207. return(list(acceptance.rate=sum(n.accept)/sum(n.props), prop.width=prop.width))
  208. }
  209. #function for Markov sampling from a range of model complexities under the general process of Brownian motion evolution of continuous traits
  210. #author: LJ HARMON 2009, A HIPP 2009, and JM EASTMAN 2010-2011
  211. rjmcmc.bm <- function ( phy, dat, SE=0, ngen=1000, sample.freq=100,
  212. prob.mergesplit=0.20, prob.root=0.05, lambdaK=log(2), tuner=0.05,
  213. prior.rate=NULL, constrainK=FALSE, prop.width=NULL, simplestart=FALSE,
  214. internal.only=FALSE, summary=TRUE, fileBase="result")
  215. {
  216. model="BM"
  217. primary.parameter="rates"
  218. # calibration of proposal width
  219. if(is.null(prop.width)) {
  220. cat("CALIBRATING proposal width...\n")
  221. adjustable.prop.width=TRUE
  222. prop.width=calibrate.proposalwidth(phy, dat, nsteps=ngen/1000, model)
  223. } else {
  224. if(prop.width<=0) stop("please supply a 'prop.width' larger than 0")
  225. adjustable.prop.width=FALSE
  226. }
  227. ### prepare data for rjMCMC
  228. dataList <- prepare.data(phy, dat, SE)
  229. ape.tre <- dataList$ape.tre
  230. orig.dat <- dataList$orig.dat
  231. SE <- dataList$SE
  232. node.des <- sapply(unique(c(ape.tre$edge[1,1],ape.tre$edge[,2])), function(x) get.descendants.of.node(x, ape.tre))
  233. names(node.des) <- c(ape.tre$edge[1,1], unique(ape.tre$edge[,2]))
  234. # initialize parameters
  235. if(is.numeric(constrainK) & (constrainK > length(ape.tre$edge) | constrainK < 1)) stop(paste("Constraint on ",primary.parameter, " is nonsensical. Ensure that constrainK is at least 1 and less than the number of available nodes in the tree.",sep=""))
  236. if(simplestart | is.numeric(constrainK) | internal.only) {
  237. if(is.numeric(constrainK) & !internal.only) {
  238. init.rate <- generate.starting.point(orig.dat, ape.tre, node.des, logspace=TRUE, K=constrainK, prop.width=prop.width)
  239. } else {
  240. init.rate <- list(values=rep(fit.continuous(ape.tre,orig.dat),length(ape.tre$edge.length)),delta=rep(0,length(ape.tre$edge.length)))
  241. }
  242. } else {
  243. init.rate <- generate.starting.point(orig.dat, ape.tre, node.des, logspace=TRUE, K=constrainK, prop.width=prop.width )
  244. }
  245. if(is.null(prior.rate)) prior.rate <- fit.continuous(ape.tre,orig.dat)
  246. cur.rates <- init.rate$values
  247. cur.delta.rates <- init.rate$delta
  248. cur.root <- adjustvalue(mean(orig.dat), prop.width)
  249. cur.vcv <- updatevcv(ape.tre, cur.rates)
  250. mod.cur = bm.lik.fn(cur.root, orig.dat, cur.vcv, SE)
  251. cur.lnL=mod.cur$lnL
  252. # proposal frequencies
  253. prob.rates=1-sum(c(2*prob.mergesplit, prob.root))
  254. if(prob.rates<0) stop("proposal frequencies must not exceed 1; adjust 'prob.mergesplit' and (or) 'prob.root'")
  255. proposal.rates<-orig.proposal.rates<-c(2*prob.mergesplit, prob.root, prob.rates)
  256. prop.cs<-orig.prop.cs<-cumsum(proposal.rates)
  257. # proposal counts
  258. n.props<-n.accept<-rep(0,length(prop.cs))
  259. names.props<-c("mergesplit","rootstate","rates")
  260. cur.acceptancerate=0
  261. # file handling
  262. if(summary) {
  263. parmBase=paste(model, fileBase, "parameters/",sep=".")
  264. if(!file.exists(parmBase)) dir.create(parmBase)
  265. errorLog=paste(parmBase,paste(model, fileBase, "rjmcmc.errors.log",sep="."),sep="/")
  266. runLog=file(paste(parmBase,paste(model, fileBase, "rjmcmc.log",sep="."),sep="/"),open='w+')
  267. parms=list(gen=NULL, lnL=NULL, lnL.p=NULL, lnL.h=NULL,
  268. rates=NULL, delta=NULL, root=NULL)
  269. parlogger(ape.tre, init=TRUE, primary.parameter, parameters=parms, parmBase=parmBase, runLog)
  270. }
  271. # prop.width calibration points
  272. if(adjustable.prop.width){
  273. tf=c()
  274. tenths=function(v)floor(0.1*v)
  275. nn=ngen
  276. while(1) {
  277. vv=tenths(nn)
  278. nn=vv
  279. if(nn<=1) break() else tf=c(nn,tf)
  280. }
  281. tuneFreq=tf
  282. } else {
  283. tuneFreq=0
  284. }
  285. tickerFreq=ceiling((ngen+max(tuneFreq))/30)
  286. ### Begin rjMCMC
  287. for (i in 1:(ngen+max(tuneFreq))) {
  288. lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
  289. if(internal.only) {
  290. tips.tmp=which(as.numeric(names(cur.delta.rates))<=Ntip(ape.tre))
  291. if(any(cur.delta.rates[tips.tmp]==1)) stop("Broken internal only sampling")
  292. }
  293. ## BM IMPLEMENTATION ##
  294. while(1) {
  295. cur.proposal=min(which(runif(1)<prop.cs))
  296. if (cur.proposal==1 & !constrainK) { # adjust rate categories
  297. nr=splitormerge(cur.delta.rates, cur.rates, ape.tre, node.des, lambdaK, logspace=TRUE, internal.only)
  298. new.rates=nr$new.values
  299. new.delta.rates=nr$new.delta
  300. new.root=cur.root
  301. lnHastingsRatio=nr$lnHastingsRatio
  302. lnPriorRatio=nr$lnPriorRatio + priorratio.exp(cur.rates, cur.delta.rates, new.rates, new.delta.rates, prior.rate)
  303. break()
  304. } else if(cur.proposal==2) { # adjust root
  305. new.root=proposal.slidingwindow(cur.root, 4*prop.width, min=NULL, max=NULL)$v
  306. new.rates=cur.rates
  307. new.delta.rates=cur.delta.rates
  308. lnHastingsRatio=0
  309. lnPriorRatio=0
  310. break()
  311. } else if(cur.proposal==3){
  312. if(runif(1)>0.05 & sum(cur.delta.rates)>1) { # tune local rate
  313. nr=tune.rate(rates=cur.rates, prop.width=prop.width, min=0, max=Inf, tuner=tuner, prior.mean=prior.rate)
  314. new.rates=nr$values
  315. new.delta.rates=cur.delta.rates
  316. new.root=cur.root
  317. lnHastingsRatio=nr$lnHastingsRatio
  318. lnPriorRatio=nr$lnPriorRatio
  319. break()
  320. } else { # scale rates
  321. nr=proposal.multiplier(cur.rates, prop.width)
  322. new.rates=nr$v
  323. new.delta.rates=cur.delta.rates
  324. new.root=cur.root
  325. lnHastingsRatio=nr$lnHastingsRatio
  326. lnPriorRatio=priorratio.exp(cur.rates, cur.delta.rates, new.rates, new.delta.rates, prior.rate)
  327. break()
  328. }
  329. }
  330. }
  331. n.props[cur.proposal]=n.props[cur.proposal]+1
  332. # compute fit of proposed model
  333. if(any(new.rates!=cur.rates)) new.vcv=updatevcv(ape.tre, new.rates) else new.vcv=cur.vcv
  334. mod.new=NULL
  335. mod.new=try(bm.lik.fn(new.root, orig.dat, new.vcv, SE), silent=TRUE)
  336. if(inherits(mod.new, "try-error")) {mod.new=as.list(mod.new); mod.new$lnL=-Inf}
  337. if(!is.infinite(mod.new$lnL)) {
  338. lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
  339. } else {
  340. mod.new$lnL=-Inf
  341. lnLikelihoodRatio = -Inf
  342. }
  343. # compare likelihoods
  344. heat=1
  345. r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)
  346. # potential errors
  347. if(is.infinite(mod.cur$lnL)) stop("starting point has exceptionally poor likelihood")
  348. if(r$error & summary) generate.error.message(Ntip(ape.tre), i, mod.cur, mod.new, lnLikelihoodRatio, lnPriorRatio, lnHastingsRatio, cur.delta.rates, new.delta.rates, errorLog)
  349. if (runif(1) <= r$r) { ## adopt proposal ##
  350. cur.root <- new.root
  351. cur.rates <- new.rates
  352. cur.delta.rates <- new.delta.rates
  353. cur.vcv <- new.vcv
  354. n.accept[cur.proposal] = n.accept[cur.proposal]+1
  355. mod.cur <- mod.new
  356. cur.lnL <- mod.new$lnL
  357. }
  358. # iteration-specific functions
  359. if(i%%tickerFreq==0 & summary) {
  360. if(i==tickerFreq) cat("|",rep(" ",9),toupper("generations complete"),rep(" ",9),"|","\n")
  361. cat(". ")
  362. }
  363. if(i%%sample.freq==0 & i>max(tuneFreq) & summary) {
  364. parms=list(gen=i-max(tuneFreq), lnL=cur.lnL, lnL.p=lnPriorRatio, lnL.h=lnHastingsRatio,
  365. rates=cur.rates, delta=cur.delta.rates, root=cur.root)
  366. parlogger(ape.tre, init=FALSE, primary.parameter, parameters=parms, parmBase=parmBase, runLog=runLog)
  367. }
  368. if(any(tuneFreq%in%i) & adjustable.prop.width) {
  369. new.acceptancerate=sum(n.accept)/sum(n.props)
  370. if((new.acceptancerate<cur.acceptancerate) & adjustable.prop.width) {
  371. prop.width=calibrate.proposalwidth(ape.tre, orig.dat, nsteps=1000, model, widths=c(exp(log(prop.width)-log(2)), exp(log(prop.width)+log(2))))
  372. }
  373. cur.acceptancerate=new.acceptancerate
  374. }
  375. }
  376. # End rjMCMC
  377. # clear out prop.width calibration directories if calibration run
  378. if(summary) {
  379. close(runLog)
  380. parlogger(primary.parameter=primary.parameter, parmBase=parmBase, end=TRUE)
  381. summarize.run(n.accept, n.props, names.props)
  382. cleanup.files(parmBase, model, fileBase, ape.tre)
  383. }
  384. return(list(acceptance.rate=sum(n.accept)/sum(n.props), prop.width=prop.width))
  385. }