/archive/source.archive/bm.and.ou/trait.rjmcmc.09162010.R

http://github.com/eastman/auteur · R · 496 lines · 421 code · 58 blank · 17 comment · 68 complexity · 4327115b691c65e3dd0041bc4e0d9444 MD5 · raw file

  1. ## Written by LJ HARMON & AH: 2008
  2. ## Modified by JM EASTMAN: 09.2010
  3. rjMCMC.trait<-function (phy, data, ngen=1000, sampleFreq=100, model=c("OU","BM"), probAlpha=0.05, probMergeSplit=0.05, probRoot=0.1, heat=1, fileBase="result")
  4. {
  5. if(length(model)>1)stop(paste("Please specify either ", sQuote("OU"), " or ", sQuote("BM")))
  6. ## MORE ERROR CHECKING NEEDED HERE ##
  7. require(ouch)
  8. require(geiger)
  9. ### prepare objects for rjMCMC
  10. cur.model <- model # initialize family of models explored
  11. dataList <- prepare.ouchdata(phy, data) # check tree and data; convert into S4
  12. ape.tre <- cur.tre <- dataList$ape.tre # S3 phylogeny
  13. orig.dat <- dataList$orig.dat # S3 data
  14. ouch.tre <- dataList$ouch.tre # S4 OUCH phylogeny
  15. ouch.dat <- dataList$ouch.dat # S4 OUCH data
  16. nn <- length(ape.tre$edge.length) # max number of rates (one per branch)
  17. init.rate <- fitContinuous(ape.tre,orig.dat)
  18. init.rate <- adjust.rate(init.rate[[1]]$beta)
  19. cur.rates <- rep(init.rate,nn) # initialize array for rate estimates branchwise
  20. cur.delta.rates <- rep(0,nn)
  21. cur.root <- mean(orig.dat)
  22. cur.alpha <- 0.001 # initialize alpha
  23. cur.theta <- rep(0,nn) # initialize trait optima under OU
  24. cur.delta.theta <- rep(0,nn) # initialize local 'rates'
  25. cur.regimes <- as.factor(c(sample(cur.theta,1),cur.theta)) # initial set of selective regimes
  26. cur.root.theta <- sample(cur.theta,1)
  27. nRateProp = 0 # proposal count: update rate parameter(s)
  28. nRcatProp = 0 # proposal count: update rate categories
  29. nRootProp = 0
  30. nAlphaProp = 0 # proposal count: update alpha parameter
  31. nRegimesProp = 0 # proposal count: update selective regimes
  32. nThetaProp = 0
  33. nRateOK = 0 # proposal count: successful rate changes
  34. nRcatOK = 0 # proposal count: successful rate categories updates
  35. nRootOK = 0
  36. nAlphaOK = 0 # proposal count: successful alpha updates
  37. nRegimesOK = 0 # proposal count: successful regime updates
  38. nThetaOK = 0
  39. l <- array(dim=c(ngen,4)) # array for storing output: size is number of MCMC generations
  40. l[] <- NA # initialize array for storing lnLs
  41. cOK=c( # list of proposal tallies
  42. nRateOK,
  43. nRcatOK,
  44. nRootOK,
  45. nAlphaOK,
  46. nRegimesOK,
  47. nThetaOK
  48. )
  49. errorLog=file(paste(getwd(),paste(cur.model, "rjTraitErrors.log",sep="."),sep="/"),open='w+')
  50. generate.error.message(i=NULL, mod.cur=NULL, mod.new=NULL, lnR=NULL, errorLog=errorLog, init=TRUE)
  51. runLog=file(paste(getwd(),paste(cur.model, "rjTrait.log",sep="."),sep="/"),open='w+')
  52. generate.log(bundled.parms=NULL, cur.model, file=runLog, init=TRUE)
  53. ### Begin rjMCMC
  54. for (i in 1:ngen) {
  55. lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
  56. startparms=list(rates=cur.rates, cats=sum(cur.delta.rates)+1, root=cur.root, alph=cur.alpha, reg=length(table(cur.regimes)), theta=mean(cur.theta))
  57. ## OU IMPLEMENTATION ##
  58. if(cur.model=="OU") {
  59. if (runif(1) < (2 * probMergeSplit)) {
  60. if(runif(1)<0.5) { # adjust theta categories
  61. nr=split.or.merge(cur.delta.theta, cur.theta, ape.tre, theta=TRUE)
  62. new.alpha=cur.alpha
  63. new.root.theta=cur.root.theta
  64. new.theta=nr$new.values ##
  65. new.delta.theta=nr$new.delta ##
  66. nrt=assign.root.theta(new.theta, ape.tre) ##
  67. new.regimes=as.factor(c(nrt,new.theta)) ##
  68. new.rates=cur.rates
  69. new.delta.rates=cur.delta.rates
  70. nRegimesProp=nRegimesProp+1
  71. } else { # adjust rate categories
  72. nr=split.or.merge(cur.delta.rates, cur.rates, ape.tre, theta=FALSE)
  73. new.alpha=cur.alpha
  74. new.root.theta=cur.root.theta
  75. new.theta=cur.theta
  76. new.delta.theta=cur.delta.theta
  77. new.regimes=cur.regimes
  78. new.rates=nr$new.values ##
  79. new.delta.rates=nr$new.delta ##
  80. nRcatProp=nRcatProp+1
  81. }
  82. lnHastingsRatio=nr$lnHastingsRatio
  83. lnPriorRatio=nr$lnPriorRatio
  84. } else { # neither split nor merge
  85. if(runif(1)<probAlpha) { # adjust alpha
  86. new.alpha=adjust.rate(cur.alpha) ##
  87. new.root.theta=cur.root.theta
  88. new.theta=cur.theta
  89. new.delta.theta=cur.delta.theta
  90. new.regimes=cur.regimes
  91. new.rates=cur.rates
  92. new.delta.rates=cur.delta.rates
  93. nAlphaProp=nAlphaProp+1
  94. } else {
  95. if(runif(1)<0.5) { # adjust rates
  96. new.alpha=cur.alpha
  97. new.root.theta=cur.root.theta
  98. new.theta=cur.theta
  99. new.delta.theta=cur.delta.theta
  100. new.regimes=cur.regimes
  101. new.rates=adjust.rate(cur.rates) ##
  102. new.delta.rates=cur.delta.rates
  103. nRateProp=nRateProp+1
  104. } else { # adjust theta
  105. new.alpha=cur.alpha
  106. new.root.theta=cur.root.theta
  107. new.theta=adjust.value(cur.theta) ##
  108. new.delta.theta=cur.delta.theta
  109. nrt=assign.root.theta(new.theta, ape.tre) ##
  110. new.regimes=as.factor(c(nrt,new.theta)) ##
  111. new.rates=cur.rates
  112. new.delta.rates=cur.delta.rates
  113. nThetaProp=nThetaProp+1
  114. }
  115. }
  116. }
  117. cur.tre=apply.BMM.to.apetree(ape.tre, cur.rates)
  118. new.tre=apply.BMM.to.apetree(ape.tre, new.rates)
  119. mod.cur=new.ou.lik.fn(cur.rates, cur.alpha, cur.regimes, cur.theta, cur.tre, ouch.dat)
  120. mod.new=new.ou.lik.fn(new.rates, new.alpha, new.regimes, new.theta, new.tre, ouch.dat)
  121. # mod.new=try(new.ou.lik.fn(new.rates, new.alpha, new.regimes, new.theta, new.tre, ouch.dat),silent=TRUE)
  122. } else if(cur.model=="BM"){
  123. ## BM IMPLEMENTATION ##
  124. if (runif(1) < (2 * probMergeSplit)) {
  125. nr=split.or.merge(cur.delta.rates, cur.rates, ape.tre, theta=FALSE)
  126. new.rates=nr$new.values
  127. new.delta.rates=nr$new.delta
  128. new.root=cur.root
  129. nRcatProp=nRcatProp+1
  130. lnHastingsRatio=nr$lnHastingsRatio
  131. lnPriorRatio=nr$lnPriorRatio
  132. } else {
  133. if(runif(1)<probRoot) { # adjust root
  134. new.root=adjust.value(cur.root)
  135. new.rates=cur.rates
  136. new.delta.rates=cur.delta.rates
  137. nRootProp=nRootProp+1
  138. } else { # adjust rates
  139. new.rates=adjust.rate(cur.rates)
  140. new.delta.rates=cur.delta.rates
  141. new.root=cur.root
  142. nRateProp=nRateProp+1
  143. }
  144. }
  145. cur.tre=apply.BMM.to.apetree(ape.tre, cur.rates)
  146. new.tre=apply.BMM.to.apetree(ape.tre, new.rates)
  147. mod.cur=new.bm.lik.fn(cur.rates, cur.root, cur.tre, orig.dat)
  148. mod.new=new.bm.lik.fn(new.rates, new.root, new.tre, orig.dat)
  149. # mod.new=try(new.bm.lik.fn(new.rates, new.root, new.tre, ouch.dat),silent=TRUE)
  150. }
  151. if(!inherits(mod.new, "try-error")) {
  152. lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
  153. } else {
  154. lnLikelihoodRatio = -Inf
  155. mod.new$lnL=NaN
  156. }
  157. r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)
  158. if (runif(1) <= r$r) { # adopt proposal
  159. if(cur.model=="OU") {
  160. decision="adopt"
  161. cur.alpha <- new.alpha
  162. cur.root.theta <- new.root.theta
  163. cur.regimes <- new.regimes
  164. cur.theta <- new.theta
  165. cur.delta.theta <- new.delta.theta
  166. } else {
  167. decision="adopt"
  168. cur.root <- new.root
  169. }
  170. cur.rates <- new.rates
  171. cur.delta.rates <- new.delta.rates
  172. curr.lnL <- mod.new$lnL
  173. cur.tre <- new.tre
  174. } else { # deny proposal
  175. decision="reject"
  176. curr.lnL <- mod.cur$lnL
  177. }
  178. if(i==1) cat(paste("gen", "cur.lnL", "new.lnL", "lnR", sQuote(decision), sep="\t\t"),"\n")
  179. cat(paste(i, sprintf("%.2f", mod.cur$lnL), sprintf("%.2f", mod.new$lnL), sprintf("%.2f", lnR), sQuote(decision), sep="\t\t"),"\n")
  180. if(r$error) generate.error.message(i, mod.cur, mod.new, lnR, errorLog)
  181. l[i,]<-c(curr.lnL, sum(cur.delta.rates)+1, cur.alpha, length(table(cur.regimes)))
  182. endparms=list(rates=cur.rates, cats=sum(cur.delta.rates)+1, root=cur.root, alph=cur.alpha, reg=length(table(cur.regimes)), theta=mean(cur.theta))
  183. cOK=tally.mcmc.parms(startparms, endparms, cOK)
  184. bundled.parms=list(gen=i, mrate=exp(mean(log(cur.rates))), cats=sum(cur.delta.rates)+1, root=cur.root, alpha=cur.alpha, reg=length(table(cur.regimes)), theta=mean(cur.theta), lnL=curr.lnL)
  185. if(i%%sampleFreq==0) generate.log(bundled.parms, cur.model, file=runLog)
  186. }
  187. # End rjMCMC
  188. close(errorLog)
  189. close(runLog)
  190. summarize.run(cOK, (cProp<-c(nRateProp, nRcatProp, nRootProp, nAlphaProp, nRegimesProp, nThetaProp)), cur.model)
  191. if(cur.model=="BM") {
  192. return(list(r.cat=l[,2], lnL=l[,1]))
  193. } else {
  194. return(list(r.cat=l[,2], r.alpha=l[,3], r.regimes=l[,4], lnL=l[,1]))
  195. }
  196. }
  197. ## AUXILIARY FUNCTIONS
  198. new.ou.lik.fn <- function(rates, alpha, regimes, theta, ape.tre, ouch.dat) { # from OUCH 2.7-1:::hansen()
  199. # mod 09.13.2010 JM Eastman
  200. ouch.tre=ape2ouch(ape.tre, scale=FALSE)
  201. ouch.dat$regimes=as.factor(regimes)
  202. theta.tmp=as.numeric(levels(as.factor(theta)))
  203. theta=theta.tmp[order(match(theta.tmp,theta))]
  204. alpha.ouch=as.matrix(sqrt(alpha))
  205. beta=ouch:::regime.spec(ouch.tre, ouch.dat['regimes'])
  206. dat=do.call(c,lapply(ouch.dat['trait'],function(y)y[ouch.tre@term]))
  207. names(dat)=ouch.dat$labels[Ntip(ape.tre):nrow(ouch.dat)]
  208. n <- length(dat)
  209. otree.df=as(ouch.tre,"data.frame")
  210. ordering=otree.df[n:nrow(otree.df),'labels']
  211. ev <- eigen(alpha.ouch,symmetric=TRUE)
  212. w <- .Call(ouch:::ouch_weights,object=ouch.tre,lambda=ev$values,S=ev$vectors,beta=beta)
  213. v <- geiger:::ouMatrix(vcv.phylo(ape.tre), alpha)
  214. v.match <- match(row.names(v),ordering)
  215. v <- v[v.match,v.match]
  216. ## GEIGER call does not assume ultrametricity (and thus allows different 'rates' under BM)
  217. # -old- v <- .Call(ouch:::ouch_covar,object=ouch.tre,lambda=ev$values,S=ev$vectors,sigma.sq=1)
  218. y <- matrix(theta,ncol=1)
  219. e <- w%*%y-dat
  220. dim(y) <- dim(y)[1]
  221. dim(e) <- n
  222. q <- e%*%solve(v,e)
  223. det.v <- determinant(v,logarithm=TRUE)
  224. if (det.v$sign!=1)stop("ou.lik.fn error: non-positive determinant",call.=FALSE)
  225. dev <- n*log(2*pi)+as.numeric(det.v$modulus)+q[1,1]
  226. list(
  227. theta=theta,
  228. weight=w,
  229. vcov=v,
  230. resids=e,
  231. lnL=-0.5*dev
  232. )
  233. }
  234. new.bm.lik.fn <- function(rates, root, ape.tre, orig.dat) { # from OUCH 2.7-1:::brown()
  235. # mod 09.16.2010 JM Eastman
  236. tree=ape.tre
  237. data=orig.dat
  238. nterm=Ntip(tree)
  239. nchar=length(data)
  240. dat=data[match(tree$tip.label,names(data))]
  241. nchar <- 1
  242. w <- matrix(data=1,nrow=nterm,ncol=1)
  243. n <- length(dat)
  244. b <- vcv.phylo(ape.tre)
  245. y <- matrix(root)
  246. e <- w%*%y-dat
  247. q <- t(e)%*%solve(b,e)
  248. v <- q/nterm
  249. dev <- nchar*nterm*(1+log(2*pi))+nchar*log(det(b))+nterm*log(det(v))
  250. list(
  251. root=root,
  252. lnL = -0.5*dev
  253. )
  254. }
  255. split.or.merge <- function(cur.delta, cur.values, phy, theta=FALSE) {
  256. # mod 09.14.2010 JM Eastman
  257. bb=cur.delta
  258. vv=cur.values
  259. names(vv)<-names(bb)<-phy$edge[,2]
  260. new.bb=choose.one(bb)
  261. new.vv=vv
  262. s=names(new.bb[bb!=new.bb])
  263. all.shifts=as.numeric(names(bb[bb>0]))
  264. all.D=get.descendants.of.node(s, phy)
  265. untouchable=unlist(lapply(all.shifts[all.shifts>s], function(x)get.descendants.of.node(x,phy)))
  266. remain.unchanged=union(all.shifts, untouchable)
  267. marker=match(s, names(new.vv))
  268. nn=length(vv)
  269. K=length(table(vv))
  270. N=Ntip(phy)
  271. if(sum(new.bb)>sum(bb)) { # add transition: SPLIT
  272. if(theta) { # assign new theta to current node
  273. new.vv[marker] <- nr <- adjust.value(new.vv[marker])
  274. } else { # assign new rate to current node
  275. new.vv[marker] <- nr <- adjust.rate(new.vv[marker])
  276. }
  277. if(length(remain.unchanged)==0) { # assign new value to all descendants
  278. new.vv[match(all.D,names(new.vv))] = nr
  279. } else { # assign new value to all 'open' descendants
  280. new.vv[match(all.D[!(all.D%in%remain.unchanged)],names(new.vv))] = nr
  281. }
  282. lnHastingsRatio = log((K+1)/(2*N-2-K)) ### from Drummond and Suchard 2010: where N is tips, K is number of local parms in tree
  283. lnPriorRatio = log(ppois(K+1,log(2),lower.tail=FALSE)/ppois(K,log(2),lower.tail=FALSE))
  284. } else { # drop transition: MERGE
  285. anc = get.ancestor.of.node(s, phy)
  286. if(!is.root(anc, phy)) { # substitute ancestral value for current
  287. new.vv[match(s, names(new.vv))] <- nr <- new.vv[match(anc, names(new.vv))]
  288. } else {
  289. if(theta) { # assign new theta to first descendant of root
  290. new.vv[marker] <- nr <- adjust.value(new.vv[marker])
  291. } else { # assign new rate to first descendant of root
  292. new.vv[marker] <- nr <- adjust.rate(new.vv[marker])
  293. }
  294. }
  295. if(length(remain.unchanged)==0) { # assign new value to all descendants
  296. new.vv[match(all.D, names(new.vv))] = nr
  297. } else { # assign new value to all 'open' descendants
  298. new.vv[match(all.D[!(all.D%in%remain.unchanged)],names(new.vv))] = nr
  299. }
  300. lnHastingsRatio = log((2*N-2-K+1)/K) ### from Drummond and Suchard 2010: where N is tips, K is number of local parms in tree
  301. lnPriorRatio = log(ppois(K-1,log(2),lower.tail=FALSE)/ppois(K,log(2),lower.tail=FALSE))
  302. }
  303. return(list(new.values=new.vv, new.delta=new.bb, lnHastingsRatio=lnHastingsRatio, lnPriorRatio=lnPriorRatio))
  304. }
  305. assign.root.theta <- function(cur.theta, phy) {
  306. root=phy$edge[1,1]
  307. desc=phy$edge[which(phy$edge[,1]==root),2]
  308. root.theta=cur.theta[sample(which(phy$edge[,2]==desc),1)]
  309. root.theta
  310. }
  311. get.descendants.of.node <- function(node, phy) {
  312. storage=list()
  313. if(node%in%phy$edge[,1]) {
  314. desc=c(sapply(node, function(x) {return(phy$edge[which(phy$edge[,1]==x),2])}))
  315. while(any(desc%in%phy$edge[,1])) {
  316. desc.true=desc[which(desc%in%phy$edge[,1])]
  317. desc.desc=lapply(desc.true, function(x) return(get.desc.of.node(x, phy)))
  318. storage=list(storage,union(desc,desc.desc))
  319. desc=unlist(desc.desc)
  320. }
  321. }
  322. return(unique(unlist(storage)))
  323. }
  324. get.desc.of.node <- function(node, phy) {
  325. return(phy$edge[which(phy$edge[,1]==node),2])
  326. }
  327. get.ancestor.of.node<-function(node, phy) {
  328. anc=phy$edge[which(phy$edge[,2]==node),1]
  329. anc
  330. }
  331. choose.one <- function(cur.delta)
  332. {
  333. bb=cur.delta
  334. s=sample(1:length(bb),1)
  335. bb[s]=1-bb[s]
  336. bb
  337. }
  338. is.root<-function(node,phy) {
  339. if(node==phy$edge[1,1]) return(TRUE) else return(FALSE)
  340. }
  341. assess.lnR <- function(lnR) {
  342. if(is.na(lnR) || abs(lnR)==Inf) {
  343. error=TRUE
  344. r=0
  345. } else {
  346. if(lnR < -100) {
  347. r=0
  348. } else if(lnR > 0) {
  349. r=1
  350. } else {
  351. r=exp(lnR)
  352. }
  353. error=FALSE
  354. }
  355. return(list(r=r, error=error))
  356. }
  357. apply.BMM.to.apetree<-function(apetree, rates) {
  358. apetree$edge.length=apetree$edge.length*rates
  359. return(apetree)
  360. }
  361. adjust.value <- function(value) {
  362. vv=value
  363. rch <- runif(1, min = -2, max = 2)
  364. vv <- vv + rch
  365. vv
  366. }
  367. adjust.rate <- function(rate) {
  368. rr=rate
  369. r=log(rr)
  370. rch=runif(1, min=-2, max=2)
  371. nr=r+rch
  372. rr=exp(nr)
  373. rr
  374. }
  375. prepare.ouchdata <- function(phy, data) {
  376. td <- treedata(phy, data, sort = T)
  377. ouch.tre <- ape2ouch(td$phy->ape.tre, scale=FALSE)
  378. ouch.dat=data.frame(as.numeric(ouch.tre@nodes), as.character(ouch.tre@nodelabels))
  379. names(ouch.dat)=c("nodes","labels")
  380. ouch.dat[ouch.dat[,2]=="",2]=NA
  381. ouch.dat$trait=NA
  382. mM=match(ouch.dat$labels,names(data))
  383. for(i in 1:nrow(ouch.dat)){
  384. if(!is.na(mM[i])){
  385. ouch.dat$trait[i]=data[mM[i]]
  386. }
  387. }
  388. return(list(ouch.tre=ouch.tre, ouch.dat=ouch.dat, ape.tre=td$phy, orig.dat=td$data[,1]))
  389. }
  390. summarize.run<-function(cOK, cProp, cur.model) {
  391. names(cProp)<-names(cOK)<-c("rates", "rcats", "root", "alpha", "sRegimes", "theta")
  392. names(cProp)=paste("prop.",names(cProp),sep="")
  393. names(cOK)=paste("okay.",names(cOK),sep="")
  394. cat("\n")
  395. if(cur.model=="BM") {
  396. print(cProp[1:3])
  397. print(cOK[1:3])
  398. } else {
  399. print(cProp[c(1,2,4,5,6)])
  400. print(cOK[c(1,2,4,5,6)])
  401. }
  402. }
  403. generate.log<-function(bundled.parms, cur.model, file, init=FALSE) {
  404. # bundled.parms=list(gen=i, mrate=mean(cur.rates), cats=max(cur.delta.rates), root=cur.root, alpha=cur.alpha, reg=max(cur.regimes), theta=mean(cur.theta), lnL=curr.lnL)
  405. res=bundled.parms
  406. if(init) {
  407. if(cur.model=="BM") msg=paste("gen", "model", "mean.rate", "rates", "root", "lnL", sep="\t") else msg=paste("gen", "model", "mean.rate", "rates", "alpha", "regimes", "mean.theta", "lnL", sep="\t")
  408. } else {
  409. if(cur.model=="BM") {
  410. msg<-paste(res$gen, sQuote(cur.model), sprintf("%.3f", res$mrate), res$cats, sprintf("%.3f", res$root), sprintf("%.3f", res$lnL),sep="\t")
  411. } else {
  412. msg<-paste(res$gen, sQuote(cur.model), sprintf("%.3f", res$mrate), res$cats, sprintf("%.4f", res$alpha), res$reg, sprintf("%.3f", res$theta), sprintf("%.3f", res$lnL),sep="\t")
  413. }
  414. }
  415. write(msg, file=file, append=TRUE)
  416. }
  417. generate.error.message<-function(i, mod.cur, mod.new, lnR, errorLog, init=FALSE) {
  418. if(init) {
  419. initmsg = write(paste("gen", "curr.lnL", "new.lnL", "lnR", sep="\t"), file=errorLog)
  420. } else {
  421. write(paste(i, sprintf("%.3f", mod.cur$lnL), sprintf("%.3f", mod.new$lnL), sprintf("%.3f", lnR), sep="\t"), file=errorLog)
  422. }
  423. }
  424. tally.mcmc.parms<-function(startparms, endparms, cOK) {
  425. index=array(dim=(length(startparms)-1))
  426. for(i in 1:(length(startparms)-1)) {
  427. index[i]=all(startparms[[i]]==endparms[[i]])
  428. }
  429. if(!all(index)) {
  430. cOK[max(which(index==FALSE))]=cOK[max(which(index==FALSE))]+1
  431. }
  432. if(length(startparms$theta) == length(endparms$theta)) {
  433. if(!all(sort(startparms$theta)==sort(endparms$theta))) {
  434. cOK[6]=cOK[6]+1
  435. }
  436. }
  437. cOK
  438. }