/archive/source.archive/bm.only/trait.rjmcmc.11152010.R

http://github.com/eastman/auteur · R · 738 lines · 627 code · 79 blank · 32 comment · 126 complexity · 711f513ed6475d73e7e22fa77771b7a0 MD5 · raw file

  1. ## Written by LJ HARMON (2008), AL HIPP (2008), and JM EASTMAN (2010)
  2. rjMCMC.trait<-function (phy, data, ngen=1000, sampleFreq=100, probMergeSplit=0.05, probRoot=0.01, lambdaK=log(2), constrainK=FALSE, jumpsize=2, fileBase="result", simplestart=FALSE)
  3. {
  4. model="BM"
  5. heat=1
  6. # require(ouch)
  7. require(geiger)
  8. ### prepare data for rjMCMC
  9. cur.model <- model
  10. dataList <- prepare.data(phy, data)
  11. ape.tre <- cur.tre <- dataList$ape.tre
  12. orig.dat <- dataList$orig.dat
  13. nn <- length(ape.tre$edge.length)
  14. node.des <- sapply(unique(c(ape.tre$edge[1,1],ape.tre$edge[,2])), function(x) get.descendants.of.node(x, ape.tre))
  15. names(node.des) <- c(ape.tre$edge[1,1], unique(ape.tre$edge[,2]))
  16. subtrees <- subtrees(phy)
  17. subtrees.marker <- sapply(subtrees, function(x) return(min(x$node.label)))
  18. # initialize parameters
  19. if(is.numeric(constrainK) & (constrainK > length(ape.tre$edge) | constrainK < 1)) stop("Constraint on rate shifts is nonsensical. Ensure that constrainK is at least 1 and less than the number of available nodes in the tree.")
  20. if(simplestart | is.numeric(constrainK) ) {
  21. if(is.numeric(constrainK)) {
  22. init.rate <- generate.starting.point(data, ape.tre, node.des, theta=FALSE, K=constrainK, jumpsize=jumpsize)
  23. } else {
  24. init.rate <- list(values=rep(fitContinuous(phy,data)$Trait1$beta,length(phy$edge.length)),delta=rep(0,length(phy$edge.length)))
  25. }
  26. } else {
  27. init.rate <- generate.starting.point(data, ape.tre, node.des, theta=FALSE, K=constrainK, jumpsize=jumpsize )
  28. }
  29. cur.rates <- init.rate$values
  30. cur.delta.rates <- init.rate$delta
  31. cur.root <- adjust.value(mean(orig.dat), jumpsize)
  32. cur.tre <- apply.BMM.to.apetree(ape.tre, cur.rates)
  33. mod.cur = new.bm.lik.fn(cur.rates, cur.root, cur.tre, orig.dat)
  34. # proposal counts
  35. nRateProp = 0
  36. nRateSwapProp = 0
  37. nRcatProp = 0
  38. nRootProp = 0
  39. nRateOK = 0
  40. nRateSwapOK = 0
  41. nRcatOK = 0
  42. nRootOK = 0
  43. cOK=c(
  44. nRateOK,
  45. nRateSwapOK,
  46. nRcatOK,
  47. nRootOK
  48. )
  49. tickerFreq=ceiling(ngen/30)
  50. # file handling
  51. parmBase=paste(model, fileBase, "parameters/",sep=".")
  52. if(!file.exists(parmBase)) dir.create(parmBase)
  53. parlogger(model=model, init=TRUE, node.des, parmBase=parmBase)
  54. errorLog=file(paste(parmBase,paste(cur.model, fileBase, "rjTraitErrors.log",sep="."),sep="/"),open='w+')
  55. generate.error.message(i=NULL, mod.cur=NULL, mod.new=NULL, lnR=NULL, errorLog=errorLog, init=TRUE)
  56. runLog=file(paste(parmBase,paste(cur.model, fileBase, "rjTrait.log",sep="."),sep="/"),open='w+')
  57. generate.log(bundled.parms=NULL, cur.model, file=runLog, init=TRUE)
  58. ### Begin rjMCMC
  59. for (i in 1:ngen) {
  60. lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
  61. startparms = c(nRateProp, nRateSwapProp, nRcatProp, nRootProp)
  62. ## BM IMPLEMENTATION ##
  63. while(1) {
  64. if (runif(1) < (2 * probMergeSplit) & !constrainK) { # adjust rate categories
  65. nr=split.or.merge(cur.delta.rates, cur.rates, ape.tre, node.des, theta=FALSE, lambdaK, jumpsize)
  66. new.rates=nr$new.values
  67. new.delta.rates=nr$new.delta
  68. new.root=cur.root
  69. nRcatProp=nRcatProp+1
  70. lnHastingsRatio=nr$lnHastingsRatio
  71. lnPriorRatio=nr$lnPriorRatio
  72. break()
  73. } else {
  74. if(runif(1)<probRoot) { # adjust root
  75. new.root=adjust.value(cur.root, jumpsize)
  76. new.rates=cur.rates
  77. new.delta.rates=cur.delta.rates
  78. nRootProp=nRootProp+1
  79. break()
  80. } else {
  81. if(runif(1)>0.1 & sum(cur.delta.rates)!=0) { # swap rate classes
  82. nr=switcheroo(cur.delta.rates, cur.rates, ape.tre, node.des, theta=FALSE, jumpsize)
  83. new.rates=nr$new.values ##
  84. new.delta.rates=nr$new.delta ##
  85. new.root=cur.root
  86. nRateSwapProp=nRateSwapProp+1
  87. break()
  88. } else { # adjust rates
  89. new.rates=adjust.rate(cur.rates, jumpsize)
  90. new.delta.rates=cur.delta.rates
  91. new.root=cur.root
  92. nRateProp=nRateProp+1
  93. break()
  94. }
  95. }
  96. }
  97. }
  98. new.tre=apply.BMM.to.apetree(ape.tre, new.rates)
  99. mod.new=try(new.bm.lik.fn(new.rates, new.root, new.tre, orig.dat),silent=TRUE)
  100. if(inherits(mod.new, "try-error")) {mod.new=as.list(mod.new); mod.new$lnL=Inf}
  101. if(!is.infinite(mod.new$lnL)) {
  102. lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
  103. } else {
  104. lnLikelihoodRatio = -Inf
  105. failures=paste("./failed", model, "parameters/",sep=".")
  106. failed.trees=paste(failures, "trees", sep="/")
  107. failed.plots=paste(failures, "pdf", sep="/")
  108. lapply(list(failures, failed.trees, failed.plots), function(x) if(!file.exists(x)) dir.create(x))
  109. pdf(file=paste(failed.plots, paste(i,fileBase,"failed.model.pdf",sep="."),sep="/"))
  110. plot(new.tre, tip=FALSE)
  111. dev.off()
  112. write.tree(new.tre, file=paste(failed.trees, paste(i,fileBase,"tre",sep="."),sep="/"))
  113. write.table(matrix(c(i, new.rates),nrow=1),file=paste(failures,"failed.rates.txt",sep="/"),append=TRUE,col=FALSE,row=FALSE,quote=FALSE)
  114. write.table(matrix(c(i, new.root),nrow=1),file=paste(failures,"failed.root.txt",sep="/"),append=TRUE,col=FALSE,row=FALSE,quote=FALSE)
  115. }
  116. # compare likelihoods
  117. endparms = c(nRateProp=nRateProp, nRateSwapProp=nRateSwapProp, nRcatProp=nRcatProp, nRootProp=nRootProp)
  118. r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)
  119. if (runif(1) <= r$r) { # adopt proposal
  120. decision="adopt"
  121. cur.root <- new.root
  122. cur.rates <- new.rates
  123. cur.delta.rates <- new.delta.rates
  124. curr.lnL <- mod.new$lnL
  125. cur.tre <- new.tre
  126. mod.cur <- mod.new
  127. cOK <- determine.accepted.proposal(startparms, endparms, cOK)
  128. } else { # deny proposal
  129. decision="reject"
  130. curr.lnL <- mod.cur$lnL
  131. }
  132. if(is.infinite(curr.lnL)) stop("starting point has exceptionally poor likelihood")
  133. if(r$error) generate.error.message(i, mod.cur, mod.new, lnR, errorLog)
  134. if(i%%tickerFreq==0) {
  135. if(i==tickerFreq) cat("|",rep(" ",9),toupper("generations complete"),rep(" ",9),"|","\n")
  136. cat(". ")
  137. }
  138. if(i%%sampleFreq==0) {
  139. bundled.parms=list(gen=i, mrate=exp(mean(log(cur.rates))), cats=sum(cur.delta.rates)+1, root=cur.root, lnL=curr.lnL)
  140. generate.log(bundled.parms, cur.model, file=runLog)
  141. parlogger(model=model, init=FALSE, node.des=node.des, i=i, curr.lnL=curr.lnL, cur.root=cur.root, cur.rates=cur.rates, cur.delta.rates=cur.delta.rates, parmBase=parmBase)
  142. }
  143. }
  144. # End rjMCMC
  145. close(errorLog)
  146. close(runLog)
  147. summarize.run(cOK, endparms, cur.model)
  148. }
  149. ## AUXILIARY FUNCTIONS
  150. new.bm.lik.fn <- function(rates, root, ape.tre, orig.dat) { # from LJ HARMON
  151. # mod 10.18.2010 JM Eastman: using determinant()$modulus rather than det() to stay in log-space
  152. tree=ape.tre
  153. y=orig.dat
  154. b <- vcv.phylo(ape.tre)
  155. w <- rep(root, nrow(b))
  156. num <- -t(y-w)%*%solve(b)%*%(y-w)/2
  157. den <- 0.5*(length(y)*log(2*pi) + as.numeric(determinant(b)$modulus))
  158. list(
  159. root=root,
  160. lnL = (num-den)[1,1]
  161. )
  162. }
  163. split.or.merge <- function(cur.delta, cur.values, phy, node.des, theta=FALSE, lambda=lambda, jumpsize=jumpsize) {
  164. # mod 09.17.2010 JM Eastman
  165. bb=cur.delta
  166. vv=cur.values
  167. names(vv)<-names(bb)<-phy$edge[,2]
  168. new.bb=choose.one(bb)
  169. new.vv=vv
  170. s=names(new.bb[bb!=new.bb])
  171. all.shifts=as.numeric(names(bb[bb>0]))
  172. all.D=node.des[[which(names(node.des)==s)]]
  173. if(length(all.D)!=0) {
  174. untouchable=unlist(lapply(all.shifts[all.shifts>s], function(x)node.des[[which(names(node.des)==x)]]))
  175. remain.unchanged=union(all.shifts, untouchable)
  176. } else {
  177. untouchable=NULL
  178. remain.unchanged=list()
  179. }
  180. marker=match(s, names(new.vv))
  181. nn=length(vv)
  182. K=sum(bb)+1
  183. N=Ntip(phy)
  184. if(sum(new.bb)>sum(bb)) { # add transition: SPLIT
  185. # print("s")
  186. if(theta) { # assign new theta to current node
  187. new.vv[marker] <- nr <- adjust.value(new.vv[marker], jumpsize)
  188. } else { # assign new rate to current node
  189. new.vv[marker] <- nr <- adjust.rate(new.vv[marker], jumpsize)
  190. }
  191. if(length(remain.unchanged)==0) { # assign new value to all descendants
  192. new.vv[match(all.D,names(new.vv))] = nr
  193. } else { # assign new value to all 'open' descendants
  194. new.vv[match(all.D[!(all.D%in%remain.unchanged)],names(new.vv))] = nr
  195. }
  196. lnHastingsRatio = log((K+1)/(2*N-2-K)) ### from Drummond and Suchard 2010: where N is tips, K is number of local parms in tree
  197. lnPriorRatio = log(ptpois(K+1,lambda,nn)/ptpois(K,lambda,nn))
  198. } else { # drop transition: MERGE
  199. # print("m")
  200. anc = get.ancestor.of.node(s, phy)
  201. if(!is.root(anc, phy)) { # substitute ancestral value for current
  202. new.vv[match(s, names(new.vv))] <- nr <- new.vv[match(anc, names(new.vv))]
  203. } else {
  204. sister.tmp=get.desc.of.node(anc,phy)
  205. sister=sister.tmp[sister.tmp!=s]
  206. if(bb[match(sister, names(bb))]==0) {
  207. nr=as.numeric(vv[match(sister, names(vv))])
  208. } else {
  209. if(theta) { # assign new theta to first descendant of root
  210. new.vv[marker] <- nr <- adjust.value(new.vv[marker], jumpsize)
  211. } else { # assign new rate to first descendant of root
  212. new.vv[marker] <- nr <- adjust.rate(new.vv[marker], jumpsize)
  213. }
  214. }
  215. }
  216. if(length(remain.unchanged)==0) { # assign new value to all descendants
  217. new.vv[match(all.D, names(new.vv))] = nr
  218. } else { # assign new value to all 'open' descendants
  219. new.vv[match(all.D[!(all.D%in%remain.unchanged)],names(new.vv))] = nr
  220. }
  221. lnHastingsRatio = log((2*N-2-K+1)/K) ### from Drummond and Suchard 2010: where N is tips, K is number of local parms in tree
  222. lnPriorRatio = log(ptpois(K-1,lambda,nn)/ptpois(K,lambda,nn))
  223. }
  224. if(((length(table(new.vv))-1)-sum(new.bb))!=0) fail=TRUE else fail=FALSE
  225. return(list(new.delta=new.bb, new.values=new.vv, lnHastingsRatio=lnHastingsRatio, lnPriorRatio=lnPriorRatio, fail=fail))
  226. }
  227. switcheroo <- function(cur.delta, cur.values, phy, node.des, theta=FALSE, jumpsize) {
  228. bb=cur.delta
  229. vv=cur.values
  230. new.bb=sample(bb)
  231. names(vv)<-names(bb)<-names(new.bb)<-phy$edge[,2]
  232. new.vv=vv
  233. old.shifts=as.numeric(names(bb[bb!=0]))
  234. old.values=vv[match(old.shifts, names(vv))]
  235. anc=lapply(old.shifts, function(x)get.ancestor.of.node(x,phy))
  236. if(!is.root(min(unlist(anc)),phy)) {
  237. # find most ancestral value in tree
  238. anc.value=unname(vv[match(min(unlist(anc)),names(vv))])
  239. } else {
  240. if(theta) anc.value=mean(sapply(old.values, function(x) adjust.value(x, jumpsize))) else anc.value=mean(sapply(old.values, function(x) adjust.rate(x, jumpsize)))
  241. }
  242. all.desc.anc=node.des[[which(names(node.des)==min(unlist(anc)))]]
  243. new.vv[match(all.desc.anc,names(new.vv))] = anc.value
  244. new.shifts=sort(as.numeric(names(new.bb[new.bb!=0])))
  245. new.tips=new.shifts[new.shifts<=Ntip(phy)]
  246. new.ints=new.shifts[new.shifts>Ntip(phy)]
  247. new.shifts=c(new.ints, new.tips)
  248. new.values=as.numeric(old.values)[sample(1:length(old.values))]
  249. for(z in 1:length(new.shifts)) {
  250. node=new.shifts[z]
  251. new.vv[match(node, names(new.vv))]=new.values[z]
  252. new.vv[match(node.des[[which(names(node.des)==node)]],names(new.vv))]=new.values[z]
  253. }
  254. return(list(new.delta=new.bb, new.values=new.vv))
  255. }
  256. generate.starting.point <- function(data, phy, node.des, theta=FALSE, K=FALSE, jumpsize) {
  257. # updated JME 11.14.2010
  258. nn=length(phy$edge.length)
  259. ntip=Ntip(phy)
  260. if(!K) nshifts=rtpois(1,log(ntip),nn) else nshifts=K-1
  261. if(nshifts!=0) bb=sample(c(rep(1,nshifts),rep(0,nn-nshifts)),replace=FALSE) else bb=rep(0,nn)
  262. names(bb)=phy$edge[,2]
  263. shifts=as.numeric(names(bb)[bb==1])
  264. if(theta) {
  265. min.max=c(adjust.value(min(data), jumpsize), adjust.value(max(data), jumpsize))
  266. values=runif(sum(bb)+1, min=min.max[min.max==min(min.max)], max=min.max[min.max==max(min.max)])
  267. } else {
  268. init.rate=fitContinuous(phy,data)[[1]]$beta
  269. min.max=c(adjust.rate(init.rate, jumpsize), adjust.rate(init.rate, jumpsize))
  270. values=runif(sum(bb)+1, min=min.max[min.max==min(min.max)], max=min.max[min.max==max(min.max)])
  271. }
  272. internal.shifts<-tip.shifts<-numeric(0)
  273. internal.shifts=sort(shifts[shifts>ntip])
  274. tip.shifts=shifts[shifts<=ntip]
  275. if(length(internal.shifts)==0 & length(tip.shifts)==0) {
  276. vv=rep(values, nn)
  277. } else {
  278. vv=bb
  279. vv[]=values[length(values)]
  280. i=0
  281. if(length(internal.shifts)!=0) {
  282. for(i in 1:length(internal.shifts)) {
  283. d=node.des[which(names(node.des)==internal.shifts[i])]
  284. vv[match(c(internal.shifts[i], unlist(d)), names(vv))]=values[i]
  285. }
  286. }
  287. if(length(tip.shifts)!=0) {
  288. for(j in 1:length(tip.shifts)) {
  289. vv[match(tip.shifts[j], names(vv))]=values[j+i]
  290. }
  291. }
  292. }
  293. return(list(delta=unname(bb), values=unname(vv)))
  294. }
  295. assign.root.theta <- function(cur.theta, phy) {
  296. root=phy$edge[1,1]
  297. desc=phy$edge[which(phy$edge[,1]==root),2]
  298. root.theta=cur.theta[sample(which(phy$edge[,2]==desc),1)]
  299. root.theta
  300. }
  301. get.descendants.of.node <- function(node, phy) {
  302. storage=list()
  303. if(is.null(node)) node=phy$edge[1,1]
  304. if(node%in%phy$edge[,1]) {
  305. desc=c(sapply(node, function(x) {return(phy$edge[which(phy$edge[,1]==x),2])}))
  306. while(any(desc%in%phy$edge[,1])) {
  307. desc.true=desc[which(desc%in%phy$edge[,1])]
  308. desc.desc=lapply(desc.true, function(x) return(get.desc.of.node(x, phy)))
  309. storage=list(storage,union(desc,desc.desc))
  310. desc=unlist(desc.desc)
  311. }
  312. storage=sort(unique(unlist(union(desc,storage))))
  313. }
  314. return(storage)
  315. }
  316. get.desc.of.node <- function(node, phy) {
  317. return(phy$edge[which(phy$edge[,1]==node),2])
  318. }
  319. get.ancestor.of.node<-function(node, phy) {
  320. anc=phy$edge[which(phy$edge[,2]==node),1]
  321. anc
  322. }
  323. choose.one <- function(cur.delta)
  324. {
  325. bb=cur.delta
  326. s=sample(1:length(bb),1)
  327. bb[s]=1-bb[s]
  328. bb
  329. }
  330. is.root<-function(node,phy) {
  331. if(node==phy$edge[1,1]) return(TRUE) else return(FALSE)
  332. }
  333. assess.lnR <- function(lnR) {
  334. if(is.na(lnR) || abs(lnR)==Inf) {
  335. error=TRUE
  336. r=0
  337. } else {
  338. if(lnR < -20) {
  339. r=0
  340. } else if(lnR > 0) {
  341. r=1
  342. } else {
  343. r=exp(lnR)
  344. }
  345. error=FALSE
  346. }
  347. return(list(r=r, error=error))
  348. }
  349. apply.BMM.to.apetree<-function(apetree, rates) {
  350. apetree$edge.length=apetree$edge.length*rates
  351. return(apetree)
  352. }
  353. adjust.value <- function(value, jumpsize) {
  354. # mod 10.20.2010 JM Eastman
  355. vv=value
  356. rch <- runif(1, min = -jumpsize, max = jumpsize)
  357. return(vv[]+rch)
  358. }
  359. adjust.rate <- function(rate, jumpsize) {
  360. # mod 10.20.2010 JM Eastman
  361. vv=rate
  362. v=log(vv)
  363. rch <- runif(1, min = -jumpsize, max = jumpsize)
  364. v=exp(v[]+rch)
  365. v
  366. }
  367. prepare.data <- function(phy, data) {
  368. td <- treedata(phy, data, sort = T)
  369. return(list(ape.tre=td$phy, orig.dat=td$data[,1]))
  370. }
  371. summarize.run<-function(cOK, endparms, cur.model) {
  372. # end = c(nRateProp, nRateSwapProp, nRcatProp, nRootProp, nAlphaProp, nThetaProp, nThetaSwapProp, nTcatProp)
  373. if(cur.model=="BM") {
  374. names(endparms)<-names(cOK)<-c("rates", "rate.swap", "rate.catg", "root")
  375. } else {
  376. names(endparms)<-names(cOK)<-c("rates", "rate.swap", "rate.catg", "root", "alpha", "theta", "theta.swap", "theta.catg")
  377. }
  378. names(endparms)=paste("pr.",names(endparms),sep="")
  379. names(cOK)=paste("ok.",names(cOK),sep="")
  380. cat("\n")
  381. if(cur.model=="BM") {
  382. print(endparms[paste("pr.", c("rates", "rate.swap", "rate.catg", "root"),sep="")])
  383. print(cOK[paste("ok.", c("rates", "rate.swap", "rate.catg", "root"),sep="")])
  384. } else {
  385. print(endparms[paste("pr.", c("rates", "rate.swap", "rate.catg", "alpha", "theta", "theta.swap", "theta.catg"),sep="")])
  386. print(cOK[paste("ok.", c("rates", "rate.swap", "rate.catg", "alpha", "theta", "theta.swap", "theta.catg"),sep="")])
  387. }
  388. }
  389. generate.log<-function(bundled.parms, cur.model, file, init=FALSE) {
  390. # bundled.parms=list(gen=i, mrate=mean(cur.rates), cats=sum(cur.delta.rates)+1, root=cur.root, alpha=cur.alpha, reg=sum(cur.delta.theta)+1, theta=mean(cur.theta), lnL=curr.lnL)
  391. res=bundled.parms
  392. if(init) {
  393. if(cur.model=="BM") msg=paste("gen", "model", "mean.rate", "rates", "root", "lnL", sep="\t") else msg=paste("gen", "model", "mean.rate", "rates", "alpha", "regimes", "mean.theta", "lnL", sep="\t")
  394. } else {
  395. if(cur.model=="BM") {
  396. msg<-paste(res$gen, sQuote(cur.model), sprintf("%.3f", res$mrate), res$cats, sprintf("%.3f", res$root), sprintf("%.3f", res$lnL),sep="\t")
  397. } else {
  398. msg<-paste(res$gen, sQuote(cur.model), sprintf("%.3f", res$mrate), res$cats, sprintf("%.4f", res$alpha), res$reg, sprintf("%.3f", res$theta), sprintf("%.3f", res$lnL),sep="\t")
  399. }
  400. }
  401. write(msg, file=file, append=TRUE)
  402. }
  403. parlogger<-function(model, init=FALSE, node.des, i, curr.lnL, cur.root=NULL, cur.rates=NULL, cur.delta.rates=NULL, cur.alpha=NULL, cur.theta=NULL, cur.delta.theta=NULL, parmBase) {
  404. if(init) {
  405. msg.long=names(node.des[-1])
  406. if(model=="BM") {
  407. msg.short=paste("gen", "model", "lnL", sep="\t")
  408. parlogs=paste(parmBase, paste(c("summary", "root", "rates", "rate.shifts"), "txt", sep="."), sep="")
  409. sapply(parlogs[1], function(x) write.table(msg.short, x, quote=FALSE, col.names=FALSE, row.names=FALSE))
  410. sapply(parlogs[2], function(x) write.table(NULL, x, quote=FALSE, col.names=FALSE, row.names=FALSE))
  411. sapply(parlogs[3:length(parlogs)], function(x) write.table(paste(msg.long,collapse=" "), x, quote=FALSE, col.names=FALSE, row.names=FALSE))
  412. } else {
  413. msg.short=paste("gen", "model", "lnL", sep="\t")
  414. parlogs=paste(parmBase, paste(c("summary", "root", "alpha", "rates", "rate.shifts", "optima", "optima.shifts"), "txt", sep="."), sep="")
  415. sapply(parlogs[1], function(x) write.table(msg.short, x, quote=FALSE, col.names=FALSE, row.names=FALSE))
  416. sapply(parlogs[2:3], function(x) write.table(NULL, x, quote=FALSE, col.names=FALSE, row.names=FALSE))
  417. sapply(parlogs[4:length(parlogs)], function(x) write.table(paste(msg.long,collapse=" "), x, quote=FALSE, col.names=FALSE, row.names=FALSE))
  418. }
  419. } else {
  420. if(model=="BM") {
  421. parlogs=paste(parmBase, paste(c("summary", "root", "rates", "rate.shifts"), "txt", sep="."), sep="")
  422. outlist=list(paste(i, model, curr.lnL, sep="\t"), cur.root, cur.rates, cur.delta.rates)
  423. sapply(1:length(outlist), function(x) write.table(paste(outlist[[x]],collapse=" "), parlogs[[x]], quote=FALSE, col.names=FALSE, row.names=FALSE, append=TRUE))
  424. } else {
  425. parlogs=paste(parmBase, paste(c("summary", "root", "alpha", "rates", "rate.shifts", "optima", "optima.shifts"), "txt", sep="."), sep="")
  426. outlist=list(paste(i, model, curr.lnL, sep="\t"), cur.root, cur.alpha, cur.rates, cur.delta.rates, cur.theta, cur.delta.theta)
  427. sapply(1:length(outlist), function(x) write.table(paste(outlist[[x]],collapse=" "), parlogs[[x]], quote=FALSE, col.names=FALSE, row.names=FALSE, append=TRUE))
  428. }
  429. }
  430. }
  431. generate.error.message<-function(i, mod.cur, mod.new, lnR, errorLog, init=FALSE) {
  432. if(init) {
  433. initmsg = write(paste("gen", "curr.lnL", "new.lnL", "lnR", sep="\t"), file=errorLog)
  434. } else {
  435. write(paste(i, sprintf("%.3f", mod.cur$lnL), sprintf("%.3f", mod.new$lnL), sprintf("%.3f", lnR), sep="\t"), file=errorLog)
  436. }
  437. }
  438. determine.accepted.proposal<-function(startparms, endparms, cOK) {
  439. which(startparms!=endparms)->marker
  440. cOK[marker]=cOK[marker]+1
  441. cOK
  442. }
  443. rtpois<-function(N, lambda, k) {
  444. p=ppois(k, lambda, lower.tail=FALSE)
  445. out=qpois(runif(N, min=0, max=1-p), lambda)
  446. out
  447. }
  448. ptpois<-function(x, lambda, k) {
  449. p.k=ppois(k, lambda, lower.tail=FALSE)
  450. p.x=ppois(x, lambda, lower.tail=FALSE)
  451. ptp=p.x/(1-p.k)
  452. return(ptp)
  453. }
  454. ## SUMMARIZING MCMC -- PLOTTING FUNCTIONS ##
  455. shifts.plot=function(phy, base.dir, burnin=0, level=0.03, internal.only=FALSE, paint.branches=TRUE, pdf.base=NULL, verbosity.base=NULL, ...) {
  456. color.length=21
  457. require(ape)
  458. oldwd=getwd()
  459. setwd(base.dir)
  460. files=dir()
  461. # if(optima) {
  462. # shifts.file=files[grep("optima.shifts.txt", files)]
  463. # estimates.file=files[grep("optima.txt", files)]
  464. # } else {
  465. shifts.file=files[grep("rate.shifts.txt", files)]
  466. estimates.file=files[grep("rates.txt", files)]
  467. # }
  468. if(!is.null(pdf.base)) lab=paste(pdf.base, paste("bi", burnin, sep=""), paste("fq", level, sep=""), sep=".")
  469. cat("READING shifts...\n")
  470. results=read.table(shifts.file)
  471. names(results)=as.numeric(results[1,])
  472. results=results[-c(1:(burnin+1)),]
  473. if(!all(names(results)%in%phy$edge[,2])) stop("Cannot process results: check that tree matches posterior summaries")
  474. hits.tmp=apply(results,1,sum)
  475. hits=length(hits.tmp[hits.tmp>0])
  476. aa.tmp=apply(results, 2, function(x) sum(x))
  477. aa.tmp=aa.tmp/hits
  478. aa=aa.tmp[aa.tmp>=level]
  479. aa=aa[order(aa, decreasing=TRUE)]
  480. aa.nodes=aa[which(as.numeric(names(aa))>Ntip(phy))]
  481. aa.tips=aa[which(as.numeric(names(aa))<=Ntip(phy))]
  482. aa=aa.nodes
  483. print(aa.tips)
  484. nodes=as.numeric(names(aa))
  485. nodes=nodes[nodes>Ntip(phy)]
  486. desc=lapply(nodes, function(x) {
  487. foo=get.descendants.of.node(x,phy)
  488. if(length(foo)) return(phy$tip.label[foo[foo<=Ntip(phy)]]) else return(NULL)
  489. }
  490. )
  491. shifts=apply(results, 1, sum)
  492. cat("READING estimates...\n")
  493. ests=read.table(estimates.file)
  494. names(ests)=ests[1,]
  495. ests=ests[-c(1:(burnin+1)),]
  496. average.ests.tmp=apply(ests,2,mean)
  497. average.ests=average.ests.tmp-median(average.ests.tmp)
  498. if(paint.branches) {
  499. require(colorspace)
  500. # cce=diverge_hcl(color.length, c = c(100, 0), l = c(50, 90), power = 1.1)
  501. cce=diverge_hcl(2*color.length, power = 0.5)
  502. e.seq=seq(min(average.ests),max(average.ests),length=color.length)
  503. color.gamut=seq(-max(abs(e.seq)), max(abs(e.seq)), length=length(cce))
  504. colors.branches=sapply(average.ests, function(x) cce[which(min(abs(color.gamut-x))==abs(color.gamut-x))])
  505. colors.branches=colors.branches[match(as.numeric(names(colors.branches)), phy$edge[,2])]
  506. } else {
  507. colors.branches=1
  508. }
  509. if(length(nodes)) {
  510. require(colorspace)
  511. cols=sapply(nodes, function(x) {
  512. a=get.ancestor.of.node(x, phy)
  513. comp=ests[,which(names(ests)==a)]
  514. this=ests[,which(names(ests)==x)]
  515. if(!length(comp)) { # dealing with first descendant of root
  516. d=get.desc.of.node(a, phy)
  517. d=d[which(d!=x)]
  518. # find if sister descendant also experienced rate shift
  519. d.shifts=results[, which(names(results)==d)]
  520. comp=ests[,which(names(ests)==d)]
  521. x.res=sapply(1:length(d.shifts), function(y) {
  522. if(d.shifts[y]==0) {
  523. zz=this[y]-comp[y]
  524. if(zz>0) {
  525. return(1)
  526. } else {
  527. if(zz<0) return(-1) else return(0)
  528. }
  529. } else {
  530. return(0)
  531. }
  532. }
  533. )
  534. x.res=mean(x.res[x.res!=0])
  535. } else {
  536. yy=this-comp
  537. zz=yy
  538. zz[yy>0]=1
  539. zz[yy<0]=-1
  540. zz[yy==0]=0
  541. x.res=mean(zz[zz!=0])
  542. }
  543. return(x.res)
  544. }
  545. )
  546. ccx=diverge_hcl(21, c = c(100, 0), l = c(50, 90), power = 1.1)
  547. c.seq=round(seq(-1,1,by=0.1),digits=1)
  548. colors.nodes=ccx[match(round(cols,1),c.seq)]
  549. names(colors.nodes)=nodes
  550. colors.nodes=colors.nodes[which(as.numeric(names(colors.nodes))>Ntip(phy))]
  551. } else {
  552. colors.nodes=NULL
  553. cols=NULL
  554. }
  555. # send to pdf
  556. if(!is.null(pdf.base)) pdf(file=paste(oldwd, paste(lab,"pdf",sep="."),sep="/"))
  557. plot(phy, cex=0.1, lwd=0.4, edge.width=0.5, edge.color=colors.branches, ...)
  558. nn=(Ntip(phy)+1):max(phy$edge[,2])
  559. ll<-cc<-rr<-rep(0,length(nn))
  560. ll[nn%in%nodes]=1
  561. if(length(nodes)) cc[nn%in%nodes]=aa[match(nn[nn%in%nodes],nodes)]/max(aa)
  562. if(is.null(colors.nodes)) {
  563. nodelabels(pch=ifelse(ll==1, 21, NA), bg=ifelse(ll==1, "black", "white"), cex=2*cc)
  564. } else {
  565. rr[nn%in%nodes]=colors.nodes[order(names(colors.nodes))]
  566. nodelabels(pch=ifelse(ll==1, 21, NA), cex=2*cc, col=rr, bg=rr, lwd=0.5)
  567. if(length(aa.tips) & !internal.only) {
  568. tt=rep(0,Ntip(phy))
  569. tt[(1:Ntip(phy))[as.numeric(names(aa.tips))]]=1
  570. tt.cex=ifelse(tt==1, aa.tips/max(aa.tmp), 0)
  571. tt.col=sapply(names(aa.tips), function(x) {
  572. a=get.ancestor.of.node(x, phy)
  573. comp=ests[,which(names(ests)==a)]
  574. this=ests[,which(names(ests)==x)]
  575. if(!length(comp)) { # dealing with first descendant of root
  576. d=get.desc.of.node(a, phy)
  577. d=d[which(d!=x)]
  578. # find if sister descendant also experienced rate shift
  579. d.shifts=results[, which(names(results)==d)]
  580. comp=ests[,which(names(ests)==d)]
  581. x.res=sapply(1:length(d.shifts), function(y) {
  582. if(d.shifts[y]==0) {
  583. zz=this[y]-comp[y]
  584. if(zz>0) {
  585. return(1)
  586. } else {
  587. if(zz<0) return(-1) else return(0)
  588. }
  589. } else {
  590. return(0)
  591. }
  592. }
  593. )
  594. x.res=mean(x.res[x.res!=0])
  595. } else {
  596. yy=this-comp
  597. zz=yy
  598. zz[yy>0]=1
  599. zz[yy<0]=-1
  600. zz[yy==0]=0
  601. x.res=mean(zz[zz!=0])
  602. }
  603. return(x.res)
  604. }
  605. )
  606. tt.ccx=diverge_hcl(21, c = c(100, 0), l = c(50, 90), power = 1.1)
  607. tt.c.seq=round(seq(-1,1,by=0.1),digits=1)
  608. tt.colors.nodes=tt.ccx[match(round(tt.col,1),tt.c.seq)]
  609. names(tt.colors.nodes)=as.numeric(names(aa.tips))
  610. colors.tips.tmp=tt.colors.nodes[order(as.numeric(names(tt.colors.nodes)))]
  611. colors.tips=rep(NA, Ntip(phy))
  612. colors.tips[(1:Ntip(phy))[as.numeric(names(aa.tips))]]=colors.tips.tmp
  613. tiplabels(pch=ifelse(tt==1, 21, NA), cex=2*tt.cex, col=colors.tips, bg=colors.tips, lwd=0.5)
  614. }
  615. legend("bottomleft", title=toupper("direction"), cex=0.5, pt.cex=1, text.col="darkgray", legend = sprintf("%6.1f", rev(c.seq[seq(1,length(c.seq),by=2)])), pch=21, ncol=1, col = "darkgray", pt.bg = rev(ccx[seq(1,length(c.seq),by=2)]), box.lty="blank", border="white")
  616. if(length(nodes)) legend("bottomright", title=toupper("frequency"), cex=0.5, pt.cex=2*(seq(min(cc), max(cc), length=6)), text.col="darkgray", legend = sprintf("%10.3f", seq(round(min(aa),2), round(max(aa),2), length=6)), pch=21, ncol=1, col = "darkgray", pt.bg = "white", box.lty="blank", border="white")
  617. }
  618. if(!is.null(pdf.base)) dev.off()
  619. if(length(nodes)) names(cols)=names(aa)
  620. setwd(oldwd)
  621. names(desc)=paste("node",nodes,sep=".")
  622. if(!is.null(verbosity.base)) {
  623. summary.table=c(hits/nrow(results), mean(shifts), median(shifts))
  624. names(summary.table)=c("shift.prob", "mean.shift","median.shifts")
  625. write.table(summary.table, file=paste(verbosity.base, "run.summary.txt", sep="."), quote=FALSE, col.names=FALSE)
  626. if(length(nodes)) {
  627. for(nn in 1:length(nodes)) {
  628. write.table(c(nodes[nn], desc[[nn]]), file=paste(verbosity.base, "shift.descendants.txt", sep="."), append=TRUE, quote=FALSE, row.names=FALSE, col.names=FALSE)
  629. }
  630. }
  631. allres=merge(as.data.frame(aa),as.data.frame(cols),by=0)
  632. allres=allres[order(allres$aa, decreasing=TRUE),]
  633. names(allres)=c("node", "conditional.shift.probability", "relative.shift.direction")
  634. write.table(allres, file=paste(verbosity.base, "estimates.summary.txt", sep="."), quote=FALSE, row.names=FALSE)
  635. }
  636. return(list(desc=desc, shift.prob=hits/nrow(results), mean.shifts=mean(shifts), median.shifts=median(shifts), shift.prop=aa, shift.direction=cols, mean.rates=average.ests.tmp, mean.shifts=aa.tmp/nrow(results)))
  637. }
  638. plot.quant<-function(phy, data, data.names, shrinker=1, relative=TRUE, cex=0.5, adj=c(15,5), lwd=0.5, font=1, ...){
  639. std=function(x,max,min){return((x-min)/(max-min))}
  640. if(relative) {
  641. data=sapply(data, function(x) std(x, max(data), min(data)))
  642. }
  643. f=match(phy$tip.label, data.names)
  644. if(is.na(sum(f))) {
  645. stop("tips cannot be matched to tree")
  646. }
  647. else {
  648. data=data[f]
  649. phy$trait=data
  650. plot.phylo(phy, show.tip.label=T, label.offset=adj[1], cex=cex, font=font, ...)
  651. tiplabels(text=NULL, pch=21, adj=adj[2], cex=c(phy$trait/shrinker), bg="white",lwd=lwd)
  652. }
  653. }
  654. prior.posterior.plot=function(data, rpois.lambda) {
  655. # data should be a vector of number of shifts, for instance, for all interations across the MCMC sampling (post-burnin)
  656. require(ggplot2)
  657. dataset <- data.frame(variable = gl(2, length(data), labels = c("posterior", "prior")), value = c(data, rpois(length(data),rpois.lambda)))
  658. ggplot(data = dataset, aes(x = value, fill = variable)) + geom_histogram(position = "dodge") + scale_fill_manual(values = c("gray", "black"))
  659. }