/archive/source.archive/bm.and.ou/rjmcmc.12042010.R

http://github.com/eastman/auteur · R · 1310 lines · 1117 code · 140 blank · 53 comment · 222 complexity · 535a2e3626d1e9d9ed5c52c6c27f4d54 MD5 · raw file

  1. ## Written by LJ HARMON (2008), AL HIPP (2008), and JM EASTMAN (2010)
  2. # MODELING TRAIT EVOLUTION by BROWNIAN MOTION
  3. rjMCMC.bm<-function (phy, data, ngen=1000, sampleFreq=100, probMergeSplit=0.05, probRoot=0.01, lambdaK=log(2), constrainK=FALSE, jumpsize=NULL, fileBase="result", simplestart=FALSE, switcheroo=TRUE)
  4. {
  5. # ngen=1000; sampleFreq=100; probMergeSplit=0.05; probRoot=0.01; lambdaK=log(2); constrainK=FALSE; jumpsize=NULL; fileBase="result"; simplestart=FALSE; switcheroo=TRUE
  6. model="BM"
  7. heat=1
  8. if(is.null(jumpsize)) jumpsize=log(2)
  9. require(geiger)
  10. ### prepare data for rjMCMC
  11. cur.model <- model
  12. dataList <- prepare.data(phy, data)
  13. ape.tre <- dataList$ape.tre
  14. orig.dat <- dataList$orig.dat
  15. nn <- length(ape.tre$edge.length)
  16. node.des <- sapply(unique(c(ape.tre$edge[1,1],ape.tre$edge[,2])), function(x) get.descendants.of.node(x, ape.tre))
  17. names(node.des) <- c(ape.tre$edge[1,1], unique(ape.tre$edge[,2]))
  18. subtrees <- subtrees(phy)
  19. subtrees.marker <- sapply(subtrees, function(x) return(min(x$node.label)))
  20. # initialize parameters
  21. if(is.numeric(constrainK) & (constrainK > length(ape.tre$edge) | constrainK < 1)) stop("Constraint on rate shifts is nonsensical. Ensure that constrainK is at least 1 and less than the number of available nodes in the tree.")
  22. if(simplestart | is.numeric(constrainK) ) {
  23. if(is.numeric(constrainK)) {
  24. init.rate <- generate.starting.point(orig.dat, ape.tre, node.des, theta=FALSE, K=constrainK, jumpsize=jumpsize)
  25. } else {
  26. init.rate <- list(values=rep(fitContinuous(ape.tre,orig.dat)$Trait1$beta,length(ape.tre$edge.length)),delta=rep(0,length(ape.tre$edge.length)))
  27. }
  28. } else {
  29. init.rate <- generate.starting.point(orig.dat, ape.tre, node.des, theta=FALSE, K=constrainK, jumpsize=jumpsize )
  30. }
  31. cur.rates <- init.rate$values
  32. cur.delta.rates <- init.rate$delta
  33. cur.root <- adjust.value(mean(orig.dat), jumpsize)
  34. # design.vcv <- design.vcv.phylo(ape.tre)
  35. cur.vcv <- update.vcv(ape.tre, cur.rates)
  36. mod.cur = new.bm.lik.fn(cur.rates, cur.root, orig.dat, cur.vcv)
  37. # proposal counts
  38. nRateProp = 0
  39. nRateSwapProp = 0
  40. nRcatProp = 0
  41. nRootProp = 0
  42. nRateOK = 0
  43. nRateSwapOK = 0
  44. nRcatOK = 0
  45. nRootOK = 0
  46. cOK=c(
  47. nRateOK,
  48. nRateSwapOK,
  49. nRcatOK,
  50. nRootOK
  51. )
  52. tickerFreq=ceiling(ngen/30)
  53. # file handling
  54. parmBase=paste(model, fileBase, "parameters/",sep=".")
  55. if(!file.exists(parmBase)) dir.create(parmBase)
  56. parlogger(model=model, init=TRUE, node.des, parmBase=parmBase)
  57. errorLog=file(paste(parmBase,paste(cur.model, fileBase, "rjTraitErrors.log",sep="."),sep="/"),open='w+')
  58. generate.error.message(i=NULL, mod.cur=NULL, mod.new=NULL, lnR=NULL, errorLog=errorLog, init=TRUE)
  59. runLog=file(paste(parmBase,paste(cur.model, fileBase, "rjTrait.log",sep="."),sep="/"),open='w+')
  60. generate.log(bundled.parms=NULL, cur.model, file=runLog, init=TRUE)
  61. ### Begin rjMCMC
  62. for (i in 1:ngen) {
  63. lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
  64. startparms = c(nRateProp, nRateSwapProp, nRcatProp, nRootProp)
  65. ## BM IMPLEMENTATION ##
  66. while(1) {
  67. if (runif(1) < (2 * probMergeSplit) & !constrainK) { # adjust rate categories
  68. nr=split.or.merge(cur.delta.rates, cur.rates, ape.tre, node.des, lambdaK, theta=FALSE)
  69. new.rates=nr$new.values
  70. new.delta.rates=nr$new.delta
  71. new.root=cur.root
  72. nRcatProp=nRcatProp+1
  73. lnHastingsRatio=nr$lnHastingsRatio
  74. lnPriorRatio=nr$lnPriorRatio
  75. break()
  76. } else {
  77. if(runif(1)<probRoot) { # adjust root
  78. new.root=adjust.value(cur.root, jumpsize)
  79. new.rates=cur.rates
  80. new.delta.rates=cur.delta.rates
  81. nRootProp=nRootProp+1
  82. break()
  83. } else {
  84. if(runif(1)>0.1 & length(unique(cur.rates))>1 & switcheroo) { # swap rates classes
  85. nr=switcheroo(cur.rates)
  86. new.rates=nr$new.values ##
  87. new.delta.rates=cur.delta.rates
  88. new.root=cur.root
  89. nRateSwapProp=nRateSwapProp+1
  90. break()
  91. } else { # adjust rates
  92. new.rates=adjust.rate(cur.rates, jumpsize)
  93. new.delta.rates=cur.delta.rates
  94. new.root=cur.root
  95. nRateProp=nRateProp+1
  96. break()
  97. }
  98. }
  99. }
  100. }
  101. if(any(new.rates!=cur.rates)) new.vcv=update.vcv(ape.tre, new.rates) else new.vcv=cur.vcv
  102. mod.new=try(new.bm.lik.fn(new.rates, new.root, orig.dat, new.vcv), silent=TRUE)
  103. if(inherits(mod.new, "try-error")) {mod.new=as.list(mod.new); mod.new$lnL=Inf}
  104. if(!is.infinite(mod.new$lnL)) {
  105. lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
  106. } else {
  107. lnLikelihoodRatio = -Inf
  108. failures=paste("./failed", model, "parameters/",sep=".")
  109. if(!file.exists(failures)) dir.create(failures)
  110. write.table(matrix(c(i, new.rates),nrow=1),file=paste(failures,"failed.rates.txt",sep="/"),append=TRUE,col=FALSE,row=FALSE,quote=FALSE)
  111. write.table(matrix(c(i, new.root),nrow=1),file=paste(failures,"failed.root.txt",sep="/"),append=TRUE,col=FALSE,row=FALSE,quote=FALSE)
  112. }
  113. # compare likelihoods
  114. endparms = c(nRateProp=nRateProp, nRateSwapProp=nRateSwapProp, nRcatProp=nRcatProp, nRootProp=nRootProp)
  115. r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)
  116. if (runif(1) <= r$r) { # adopt proposal
  117. decision="adopt"
  118. cur.root <- new.root
  119. cur.rates <- new.rates
  120. cur.delta.rates <- new.delta.rates
  121. mod.cur <- mod.new
  122. cur.vcv <- new.vcv
  123. curr.lnL <- mod.new$lnL
  124. cOK <- determine.accepted.proposal(startparms, endparms, cOK)
  125. } else { # deny proposal
  126. decision="reject"
  127. curr.lnL <- mod.cur$lnL
  128. }
  129. if(is.infinite(curr.lnL)) stop("starting point has exceptionally poor likelihood")
  130. if(r$error) generate.error.message(i, mod.cur, mod.new, lnR, errorLog)
  131. if(i%%tickerFreq==0) {
  132. if(i==tickerFreq) cat("|",rep(" ",9),toupper("generations complete"),rep(" ",9),"|","\n")
  133. cat(". ")
  134. }
  135. if(i%%sampleFreq==0) {
  136. bundled.parms=list(gen=i, mrate=exp(mean(log(cur.rates))), cats=sum(cur.delta.rates)+1, root=cur.root, lnL=curr.lnL)
  137. generate.log(bundled.parms, cur.model, file=runLog)
  138. parlogger(model=model, init=FALSE, node.des=node.des, i=i, curr.lnL=curr.lnL, cur.root=cur.root, cur.rates=cur.rates, cur.delta.rates=cur.delta.rates, parmBase=parmBase)
  139. }
  140. }
  141. # End rjMCMC
  142. close(errorLog)
  143. close(runLog)
  144. summarize.run(cOK, endparms, cur.model)
  145. }
  146. rjMCMC.ou<-function (phy, data, ngen=1000, sampleFreq=100, probMergeSplit=0.05, probRoot=0.01, probRate=0.1, probAlpha=0.01, upperAlpha=100, lambdaK=log(2), constrainK=FALSE, jumpsize=2, fileBase="result", simplestart=FALSE, switcheroo=TRUE)
  147. {
  148. # load("rjMCMC/versions/fakedata.rda"); phy=sims$phy; data=sims[[2]]; ngen=1000; sampleFreq=100; probMergeSplit=0.05; probRoot=0.01; probRate=0.1; probAlpha=0.01; lambdaK=log(2); constrainK=FALSE; jumpsize=2; upperAlpha=100; fileBase="result"; simplestart=FALSE
  149. model="OU"
  150. heat=1
  151. require(ouch)
  152. require(geiger)
  153. abs.low.alpha=1e-13
  154. if(length(upperAlpha)==2 & upperAlpha[1]>abs.low.alpha) {
  155. alpha.bounds=upperAlpha
  156. } else if(!is.null(upperAlpha) & is.numeric(upperAlpha)){
  157. alpha.bounds=c(abs.low.alpha, upperAlpha)
  158. } else {
  159. alpha.bounds=c(abs.low.alpha, 100)
  160. }
  161. ### prepare data for rjMCMC
  162. cur.model <- model
  163. dataList <- prepare.ouchdata(phy, data)
  164. ape.tre <- dataList$ape.tre
  165. orig.dat <- dataList$orig.dat
  166. ouch.tre <- dataList$ouch.tre
  167. ouch.dat <- dataList$ouch.dat
  168. ape.to.ouch.order <- match(as.numeric(names(dataList$ape.ordered[-1])), as.numeric(ape.tre$edge[,2]))
  169. nn <- length(ape.tre$edge.length)
  170. node.des <- sapply(unique(c(ape.tre$edge[1,1],ape.tre$edge[,2])), function(x) get.descendants.of.node(x, ape.tre))
  171. names(node.des) <- c(ape.tre$edge[1,1], unique(ape.tre$edge[,2]))
  172. subtrees <- subtrees(phy)
  173. subtrees.marker <- sapply(subtrees, function(x) return(min(x$node.label)))
  174. # initialize parameters
  175. if(is.numeric(constrainK) & (constrainK > length(ape.tre$edge) | constrainK < 1)) stop("Constraint on rate shifts is nonsensical. Ensure that constrainK is at least 1 and less than the number of available nodes in the tree.")
  176. if(simplestart | is.numeric(constrainK) ) {
  177. if(is.numeric(constrainK)) {
  178. init.theta <- generate.starting.point(orig.dat, ape.tre, node.des, theta=TRUE, K=constrainK, jumpsize=jumpsize)
  179. } else {
  180. init.theta <- list(values=rep(mean(orig.dat),length(ape.tre$edge.length)),delta=rep(0,length(ape.tre$edge.length)))
  181. }
  182. } else {
  183. init.theta <- generate.starting.point(orig.dat, ape.tre, node.des, theta=TRUE, K=constrainK, jumpsize=jumpsize )
  184. }
  185. cur.rate <- fitContinuous(ape.tre,orig.dat)$Trait1$beta
  186. cur.alpha <- min(runif(1000, alpha.bounds[1], alpha.bounds[2]))
  187. cur.theta <- init.theta$values
  188. cur.delta.theta <- init.theta$delta
  189. cur.root.theta <- assign.root.theta(cur.theta,ape.tre)
  190. cur.regimes <- c(cur.root.theta,cur.theta)
  191. mod.cur = new.ou.lik.fn(cur.rate, cur.alpha, cur.regimes, cur.theta, ouch.dat, ouch.tre, ape.to.ouch.order)
  192. # proposal counts
  193. nRateProp = 0
  194. nRootProp = 0
  195. nAlphaProp = 0
  196. nTcatProp = 0
  197. nThetaProp = 0
  198. nRateOK = 0
  199. nRootOK = 0
  200. nAlphaOK = 0
  201. nTcatOK = 0
  202. nThetaOK = 0
  203. cOK=c(
  204. nRateOK,
  205. nRootOK,
  206. nAlphaOK,
  207. nTcatOK,
  208. nThetaOK
  209. )
  210. tickerFreq=ceiling(ngen/30)
  211. # file handling
  212. parmBase=paste(model, fileBase, "parameters/",sep=".")
  213. if(!file.exists(parmBase)) dir.create(parmBase)
  214. parlogger(model=model, init=TRUE, node.des, parmBase=parmBase)
  215. errorLog=file(paste(parmBase,paste(cur.model, fileBase, "rjTraitErrors.log",sep="."),sep="/"),open='w+')
  216. generate.error.message(i=NULL, mod.cur=NULL, mod.new=NULL, lnR=NULL, errorLog=errorLog, init=TRUE)
  217. runLog=file(paste(parmBase,paste(cur.model, fileBase, "rjTrait.log",sep="."),sep="/"),open='w+')
  218. generate.log(bundled.parms=NULL, cur.model, file=runLog, init=TRUE)
  219. probSwap=max(c(probRoot, probRate, probAlpha))
  220. minorprobs=sum(c(probRoot, probRate, probAlpha, probSwap))
  221. ### Begin rjMCMC
  222. for (i in 1:ngen) {
  223. # loop to ensure that each state produces a sensible proposal
  224. while(1) {
  225. lnLikelihoodRatio <- lnHastingsRatio <- lnPriorRatio <- 0
  226. startparms = c(nRateProp, nRootProp, nAlphaProp, nTcatProp, nThetaProp)
  227. ## OU IMPLEMENTATION ##
  228. if (runif(1) < (2 * probMergeSplit) & !constrainK) { # adjust theta categories
  229. nr=split.or.merge(cur.delta.theta, cur.theta, ape.tre, node.des, lambdaK, theta=TRUE)
  230. new.alpha=cur.alpha
  231. new.theta=nr$new.values ##
  232. new.delta.theta=nr$new.delta ##
  233. new.root.theta<-nrt<-assign.root.theta(new.theta, ape.tre) ##
  234. new.regimes=c(nrt,new.theta) ##
  235. new.rate=cur.rate
  236. nTcatProp=nTcatProp+1
  237. lnHastingsRatio=nr$lnHastingsRatio
  238. lnPriorRatio=nr$lnPriorRatio
  239. } else { # neither split nor merge
  240. while(1) {
  241. if(runif(1)<(probAlpha/minorprobs)) { # adjust alpha
  242. new.alpha=adjust.rate(cur.alpha, jumpsize) ##
  243. new.root.theta=cur.root.theta
  244. new.theta=cur.theta
  245. new.delta.theta=cur.delta.theta
  246. new.regimes=cur.regimes
  247. new.rate=cur.rate
  248. nAlphaProp=nAlphaProp+1
  249. break()
  250. } else if(runif(1)<(probRoot/minorprobs)) { # adjust root
  251. new.alpha=cur.alpha
  252. new.root.theta=assign.root.theta(cur.theta, ape.tre) ##
  253. new.theta=cur.theta
  254. new.delta.theta=cur.delta.theta
  255. new.regimes=cur.regimes
  256. new.rate=cur.rate
  257. nRootProp=nRootProp+1
  258. break()
  259. } else if(runif(1)<(probRate/minorprobs)) { # adjust rate
  260. new.alpha=cur.alpha
  261. new.root.theta=cur.root.theta
  262. new.theta=cur.theta
  263. new.delta.theta=cur.delta.theta
  264. new.regimes=cur.regimes
  265. new.rate=adjust.rate(cur.rate, jumpsize) ##
  266. nRateProp=nRateProp+1
  267. break()
  268. } else if(runif(1)<0.5*(probSwap/minorprobs) & length(unique(cur.theta))>1 & switcheroo) { # swap thetas
  269. nr=switcheroo(cur.theta)
  270. new.alpha=cur.alpha
  271. new.theta=nr$new.values ##
  272. new.delta.theta=cur.delta.theta
  273. new.root.theta<-nrt<-assign.root.theta(new.theta, ape.tre) ##
  274. new.regimes=c(nrt,new.theta) ##
  275. new.rate=cur.rate
  276. nThetaProp=nThetaProp+1
  277. break()
  278. } else { # adjust thetas
  279. new.alpha=cur.alpha
  280. new.theta=adjust.value(cur.theta, jumpsize, theta=TRUE) ##
  281. new.delta.theta=cur.delta.theta
  282. new.root.theta<-nrt<-assign.root.theta(new.theta, ape.tre) ##
  283. new.regimes=c(nrt,new.theta) ##
  284. new.rate=cur.rate
  285. nThetaProp=nThetaProp+1
  286. break()
  287. }
  288. }
  289. }
  290. if(within.range(new.alpha, alpha.bounds[1], alpha.bounds[2])) {
  291. mod.new=try(new.ou.lik.fn(new.rate, new.alpha, new.regimes, new.theta, ouch.dat, ouch.tre, ape.to.ouch.order),silent=TRUE)
  292. if(!inherits(mod.new, "try-error")) break()
  293. }
  294. }
  295. lnLikelihoodRatio = mod.new$lnL - mod.cur$lnL
  296. # compare likelihoods
  297. endparms = c(nRateProp, nRootProp, nAlphaProp, nTcatProp, nThetaProp)
  298. r=assess.lnR((heat * lnLikelihoodRatio + heat * lnPriorRatio + lnHastingsRatio)->lnR)
  299. if (runif(1) <= r$r) { # adopt proposal
  300. decision="adopt"
  301. cur.alpha <- new.alpha
  302. cur.theta <- new.theta
  303. cur.delta.theta <- new.delta.theta
  304. cur.root.theta <- new.root.theta
  305. cur.regimes <- new.regimes
  306. cur.rate <- new.rate
  307. cOK <- determine.accepted.proposal(startparms, endparms, cOK)
  308. mod.cur <- mod.new
  309. curr.lnL <- mod.new$lnL
  310. } else { # deny proposal
  311. decision="reject"
  312. curr.lnL <- mod.cur$lnL
  313. }
  314. if(is.infinite(curr.lnL)) stop("starting point has exceptionally poor likelihood")
  315. if(r$error) generate.error.message(i, mod.cur, mod.new, lnR, errorLog)
  316. if(i%%tickerFreq==0) {
  317. if(i==tickerFreq) cat("|",rep(" ",9),toupper("generations complete"),rep(" ",9),"|","\n")
  318. cat(". ")
  319. }
  320. if(i%%sampleFreq==0) {
  321. bundled.parms=list(gen=i, rate=cur.rate, regimes=sum(cur.delta.theta)+1, root=cur.root.theta, mean.theta=mean(cur.theta), alpha=cur.alpha, lnL=curr.lnL)
  322. generate.log(bundled.parms, cur.model, file=runLog)
  323. parlogger(model=model, init=FALSE, node.des=node.des, i=i, curr.lnL=curr.lnL, cur.root=cur.root.theta, cur.rates=cur.rate, cur.alpha=cur.alpha, cur.theta=cur.theta, cur.delta.theta=cur.delta.theta, parmBase=parmBase)
  324. }
  325. }
  326. # End rjMCMC
  327. close(errorLog)
  328. close(runLog)
  329. summarize.run(cOK, endparms, cur.model)
  330. }
  331. ## AUXILIARY FUNCTIONS
  332. new.ou.lik.fn <- function(rate, alpha, regimes, theta, ouch.dat, ouch.tre, ape.to.ouch.order) { # from OUCH 2.7-1:::hansen()
  333. # mod 11.17.2010 JM Eastman; checked -- returns very similar if tested against fitted values from hansen()
  334. # rate=new.rate; alpha=new.alpha; regimes=new.regimes; theta=new.theta
  335. ouch.dat$regimes=as.factor(regimes[c(1,ape.to.ouch.order+1)])
  336. # theta=sort(unique(theta))
  337. alpha.ouch=as.matrix(alpha)
  338. n <- length(which(!is.na(ouch.dat['trait'])))
  339. beta=ouch:::regime.spec(ouch.tre, ouch.dat['regimes'])
  340. theta=as.numeric(levels(ouch:::sets.of.regimes(ouch.tre,ouch.dat['regimes'])$regimes))
  341. dat=ouch.dat$trait[n:nrow(ouch.dat)]
  342. names(dat)=ouch.dat$labels[n:nrow(ouch.dat)]
  343. ev <- eigen(alpha.ouch,symmetric=TRUE)
  344. w <- .Call(ouch:::ouch_weights,object=ouch.tre,lambda=ev$values,S=ev$vectors,beta=beta)
  345. v <- .Call(ouch:::ouch_covar,object=ouch.tre,lambda=ev$values,S=ev$vectors,sigma.sq=rate)
  346. y <- matrix(theta,ncol=1)
  347. e <- w%*%y-dat
  348. dim(y) <- dim(y)[1]
  349. dim(e) <- n
  350. q <- e%*%solve(v,e)
  351. det.v <- determinant(v,logarithm=TRUE)
  352. dev <- n*log(2*pi)+as.numeric(det.v$modulus)+q[1,1]
  353. list(
  354. theta=theta,
  355. weight=w,
  356. vcov=v,
  357. resids=e,
  358. lnL=-0.5*dev
  359. )
  360. }
  361. new.bm.lik.fn <- function(rates, root, orig.dat, vcv) { # from LJ HARMON
  362. # mod 12.02.2010 JM Eastman: using determinant()$modulus rather than det() to stay in log-space
  363. y=orig.dat
  364. b <- vcv
  365. w <- rep(root, nrow(b))
  366. num <- -t(y-w)%*%solve(b)%*%(y-w)/2
  367. den <- 0.5*(length(y)*log(2*pi) + as.numeric(determinant(b)$modulus))
  368. list(
  369. root=root,
  370. lnL = (num-den)[1,1]
  371. )
  372. }
  373. split.or.merge <- function(cur.delta, cur.values, phy, node.des, lambda=lambda, theta=FALSE) {
  374. # mod 12.05.2010 JM Eastman
  375. # cur.delta=cur.delta.rates; cur.values=cur.rates; theta=FALSE; lambda=log(2); jumpsize=2
  376. bb=cur.delta
  377. vv=cur.values
  378. names(vv)<-names(bb)<-phy$edge[,2]
  379. new.bb=choose.one(bb)
  380. new.vv=vv
  381. s=names(new.bb[bb!=new.bb])
  382. all.shifts=as.numeric(names(bb[bb>0]))
  383. all.D=node.des[[which(names(node.des)==s)]]
  384. if(length(all.D)!=0) {
  385. untouchable=unlist(lapply(all.shifts[all.shifts>s], function(x)node.des[[which(names(node.des)==x)]]))
  386. remain.unchanged=union(all.shifts, untouchable)
  387. } else {
  388. untouchable=NULL
  389. remain.unchanged=list()
  390. }
  391. marker=match(s, names(new.vv))
  392. nn=length(vv)
  393. K=sum(bb)+1
  394. N=Ntip(phy)
  395. ncat=sum(bb)
  396. cur.vv=as.numeric(vv[marker])
  397. ca.vv=length(which(vv==cur.vv))
  398. if(sum(new.bb)>sum(bb)) { # add transition: SPLIT
  399. # print("s")
  400. decision="split"
  401. n.desc=sum(!all.D%in%remain.unchanged)+1
  402. n.split=sum(vv==cur.vv)-n.desc
  403. if(theta) u=split.value(cur.vv=cur.vv, n.desc=n.desc, n.split=n.split, factor=lambda) else u=split.rate(cur.vv=cur.vv, n.desc=n.desc, n.split=n.split, factor=lambda)
  404. nr.split=u$nr.split
  405. nr.desc=u$nr.desc
  406. new.vv[vv==cur.vv]=nr.split
  407. if(length(remain.unchanged)==0) { # assign new value to all descendants
  408. new.vv[match(all.D,names(new.vv))] = nr.desc
  409. } else { # assign new value to all 'open' descendants
  410. new.vv[match(all.D[!(all.D%in%remain.unchanged)],names(new.vv))] = nr.desc
  411. }
  412. new.vv[match(s, names(new.vv))]=nr.desc
  413. lnHastingsRatio = log((K+1)/(2*N-2-K)) ### from Drummond and Suchard 2010: where N is tips, K is number of local parms in tree
  414. lnPriorRatio = log(ptpois(K+1,lambda,nn)/ptpois(K,lambda,nn))
  415. } else { # drop transition: MERGE
  416. # print("m")
  417. decision="merge"
  418. anc = get.ancestor.of.node(s, phy)
  419. if(!is.root(anc, phy)) { # base new rate on ancestral rate of selected branch
  420. anc.vv=as.numeric(vv[match(anc,names(vv))])
  421. na.vv=length(which(vv==anc.vv))
  422. nr=(anc.vv*na.vv+cur.vv*ca.vv)/(ca.vv+na.vv)
  423. new.vv[vv==cur.vv | vv==anc.vv]=nr
  424. } else { # if ancestor of selected node is root, base new rate on sister node
  425. sister.tmp=get.desc.of.node(anc,phy)
  426. sister=sister.tmp[sister.tmp!=s]
  427. sis.vv=as.numeric(vv[match(sister,names(vv))])
  428. ns.vv=length(which(vv==sis.vv))
  429. nr=(sis.vv*ns.vv+cur.vv*ca.vv)/(ca.vv+ns.vv)
  430. new.vv[vv==cur.vv | vv==sis.vv]=nr
  431. }
  432. lnHastingsRatio = log((2*N-2-K+1)/K) ### from Drummond and Suchard 2010: where N is tips, K is number of local parms in tree
  433. lnPriorRatio = log(ptpois(K-1,lambda,nn)/ptpois(K,lambda,nn))
  434. }
  435. return(list(new.delta=new.bb, new.values=new.vv, lnHastingsRatio=lnHastingsRatio, lnPriorRatio=lnPriorRatio, decision=decision))
  436. }
  437. switcheroo <- function(cur.values) {
  438. # mod 11.17.2010 JM Eastman
  439. # cur.delta=cur.delta.rates; cur.values=cur.rates; theta=FALSE; lambda=log(2); jumpsize=2
  440. vv=cur.values
  441. rates.sample=unique(vv)[sample(1:length(unique(vv)), 2)]
  442. new.vv=vv
  443. new.vv[which(vv==rates.sample[1])]=rates.sample[2]
  444. new.vv[which(vv==rates.sample[2])]=rates.sample[1]
  445. return(list(new.values=new.vv))
  446. }
  447. generate.starting.point <- function(data, phy, node.des, theta=FALSE, K=FALSE, jumpsize) {
  448. # updated JME 11.14.2010
  449. nn=length(phy$edge.length)
  450. ntip=Ntip(phy)
  451. if(!K) nshifts=rtpois(1,log(ntip),nn) else nshifts=K-1
  452. if(nshifts!=0) bb=sample(c(rep(1,nshifts),rep(0,nn-nshifts)),replace=FALSE) else bb=rep(0,nn)
  453. names(bb)=phy$edge[,2]
  454. shifts=as.numeric(names(bb)[bb==1])
  455. if(theta) {
  456. min.max=c(adjust.value(min(data), jumpsize), adjust.value(max(data), jumpsize))
  457. values=runif(sum(bb)+1, min=min.max[min.max==min(min.max)], max=min.max[min.max==max(min.max)])
  458. } else {
  459. init.rate=fitContinuous(phy,data)[[1]]$beta
  460. min.max=c(adjust.rate(init.rate, jumpsize), adjust.rate(init.rate, jumpsize))
  461. values=runif(sum(bb)+1, min=min.max[min.max==min(min.max)], max=min.max[min.max==max(min.max)])
  462. }
  463. internal.shifts<-tip.shifts<-numeric(0)
  464. internal.shifts=sort(shifts[shifts>ntip])
  465. tip.shifts=shifts[shifts<=ntip]
  466. if(length(internal.shifts)==0 & length(tip.shifts)==0) {
  467. vv=rep(values, nn)
  468. } else {
  469. vv=bb
  470. vv[]=values[length(values)]
  471. i=0
  472. if(length(internal.shifts)!=0) {
  473. for(i in 1:length(internal.shifts)) {
  474. d=node.des[which(names(node.des)==internal.shifts[i])]
  475. vv[match(c(internal.shifts[i], unlist(d)), names(vv))]=values[i]
  476. }
  477. }
  478. if(length(tip.shifts)!=0) {
  479. for(j in 1:length(tip.shifts)) {
  480. vv[match(tip.shifts[j], names(vv))]=values[j+i]
  481. }
  482. }
  483. }
  484. return(list(delta=unname(bb), values=unname(vv)))
  485. }
  486. assign.root.theta <- function(cur.theta, phy) {
  487. root=phy$edge[1,1]
  488. desc=phy$edge[which(phy$edge[,1]==root),2]
  489. root.theta=cur.theta[sample(which(phy$edge[,2]==desc),1)]
  490. root.theta
  491. }
  492. design.vcv.phylo=function(phy, array=TRUE) {
  493. n=Ntip(phy)
  494. tips=lapply(phy$edge[,2], function(x) get.descendants.of.node(x,phy,tips=TRUE))
  495. if(!array) {
  496. dd=matrix(0,n,n)
  497. design=lapply(as.numeric(phy$edge[,2]), function(x) {
  498. new=dd
  499. node=x
  500. marker=as.numeric(which(phy$edge[,2]==node))
  501. if(node<=n) tt=node else tt=tips[[marker]]
  502. for(i in length(tt):1) {
  503. j=tt[1:i]
  504. new[tt[i],j]<-new[j,tt[i]]<-1
  505. }
  506. return(new)
  507. }
  508. )
  509. } else {
  510. design=array(0,dim=c(n,n,length(phy$edge[,2])))
  511. for(k in 1:length(phy$edge[,2])){
  512. node=as.numeric(phy$edge[k,2])
  513. marker=k
  514. if(node<=n) tt=node else tt=tips[[marker]]
  515. for(i in length(tt):1) {
  516. j=tt[1:i]
  517. design[tt[i],j,k]<-design[j,tt[i],k]<-1
  518. }
  519. }
  520. }
  521. return(design)
  522. }
  523. update.vcv=function(ape.tre, new.rates) {
  524. n=ape.tre
  525. n$edge.length=n$edge.length*new.rates
  526. vv=vcv.phylo(n)
  527. return(vv)
  528. }
  529. update.vcv.older=function(ape.tre, design.vcv, new.rates) {
  530. n=Ntip(ape.tre)
  531. new.vcv=matrix(0,n,n)
  532. new.edges=ape.tre$edge.length*new.rates
  533. for(i in 1:length(new.rates)) {
  534. new.vcv=new.vcv+new.edges[i]*design.vcv[[i]]
  535. }
  536. return(new.vcv)
  537. }
  538. update.vcv.new=function(ape.tre, new.rates, scalar.list) {
  539. # phy=birthdeath.tree(1,0,taxa=200); d=design.vcv.phylo(phy); s=build.scalar.sets(d); Rprof(); system.time(update.vcv.new(phy, rep(1,length(phy$edge.length)), s)); print(summaryRprof()); system.time(vcv.phylo(phy))
  540. n=Ntip(ape.tre)
  541. e=new.rates*ape.tre$edge.length
  542. ss=scalar.list*e
  543. entries=apply(ss,3,sum)
  544. return(matrix(entries,n,n))
  545. }
  546. update.vcv.newest=function(ape.tre, new.rates, scalar.list) {
  547. # phy=birthdeath.tree(1,0,taxa=200); d=design.vcv.phylo(phy); s=build.scalar.sets(d); Rprof(); system.time(update.vcv.new(phy, rep(1,length(phy$edge.length)), s)); print(summaryRprof()); system.time(vcv.phylo(phy))
  548. n=Ntip(ape.tre)
  549. e=new.rates*ape.tre$edge.length
  550. ss=scalar.list$scalars*e
  551. new.vcv=matrix(0,n,n)
  552. dg=which(scalar.list$diags)
  553. cv=which(scalar.list$attr)
  554. new.vcv[dg]=apply(scalar.list$scalars[,,dg],2,sum)
  555. new.vcv[cv]=apply(scalar.list$scalars[,,cv],2,sum)
  556. return(new.vcv)
  557. }
  558. build.scalar.sets=function(design.vcv) {
  559. if(!is.array(design.vcv)) stop("Cannot process design matrix: must be array")
  560. d=design.vcv
  561. t=dim(design.vcv)[1]
  562. ss=array(dim=c(1,(2*t-2),(t^2)))
  563. index=0
  564. scalar.attributes=c()
  565. diags=c()
  566. for(i in 1:t) {
  567. for(j in 1:t) {
  568. index=index+1
  569. ss[,,index]=(as.logical(d[i,j,]))
  570. if(any(ss[,,index])) scalar.attributes[index]=TRUE else scalar.attributes[index]=FALSE
  571. if(i==j) diags[index]=TRUE else diags[index]=FALSE
  572. }
  573. }
  574. for(i in 1:index) {
  575. }
  576. return(list(scalars=ss, attr=scalar.attributes, diags=diags))
  577. }
  578. get.descendants.of.node <- function(node, phy, tips=FALSE) {
  579. storage=list()
  580. if(is.null(node)) node=phy$edge[1,1]
  581. if(node%in%phy$edge[,1]) {
  582. desc=c(sapply(node, function(x) {return(phy$edge[which(phy$edge[,1]==x),2])}))
  583. while(any(desc%in%phy$edge[,1])) {
  584. desc.true=desc[which(desc%in%phy$edge[,1])]
  585. desc.desc=lapply(desc.true, function(x) return(get.desc.of.node(x, phy)))
  586. storage=list(storage,union(desc,desc.desc))
  587. desc=unlist(desc.desc)
  588. }
  589. storage=as.numeric(sort(unique(unlist(union(desc,storage)))))
  590. }
  591. if(tips) return(storage[storage<=Ntip(phy)]) else return(storage)
  592. }
  593. get.desc.of.node <- function(node, phy) {
  594. return(phy$edge[which(phy$edge[,1]==node),2])
  595. }
  596. get.ancestor.of.node<-function(node, phy) {
  597. anc=phy$edge[which(phy$edge[,2]==node),1]
  598. anc
  599. }
  600. get.ancestors.of.node<-function(node, phy) {
  601. ancestors=c()
  602. a=0
  603. while(1) {
  604. a=a+1
  605. node<-as.numeric(get.ancestor.of.node(node, phy))
  606. if(!length(node)) break() else ancestors[a]=node
  607. }
  608. return(ancestors)
  609. }
  610. choose.one <- function(cur.delta)
  611. {
  612. bb=cur.delta
  613. s=sample(1:length(bb),1)
  614. bb[s]=1-bb[s]
  615. bb
  616. }
  617. is.root<-function(node,phy) {
  618. if(node==phy$edge[1,1]) return(TRUE) else return(FALSE)
  619. }
  620. within.range=function(x, min, max) {
  621. a=sign(x-min)
  622. b=sign(x-max)
  623. if(abs(a+b)==2) return(FALSE) else return(TRUE)
  624. }
  625. assess.lnR <- function(lnR) {
  626. if(is.na(lnR) || abs(lnR)==Inf) {
  627. error=TRUE
  628. r=0
  629. } else {
  630. if(lnR < -20) {
  631. r=0
  632. } else if(lnR > 0) {
  633. r=1
  634. } else {
  635. r=exp(lnR)
  636. }
  637. error=FALSE
  638. }
  639. return(list(r=r, error=error))
  640. }
  641. apply.BMM.to.apetree<-function(apetree, rates) {
  642. apetree$edge.length=apetree$edge.length*rates
  643. return(apetree)
  644. }
  645. ape.to.ouch.tre<-function (tree, scale = FALSE, branch.lengths = tree$edge.length)
  646. {
  647. if (!inherits(tree, "phylo"))
  648. stop(sQuote("tree"), " must be of class ", sQuote("phylo"))
  649. nnodes <- nrow(tree$edge) + 1
  650. n.term <- length(tree$tip.label)
  651. n.int <- nnodes - n.term
  652. tmp <- matrix(NA, nnodes, 2)
  653. tmp[-1, 1:2] <- tree$edge
  654. tmp[1, 2] <- nnodes + 1
  655. bl <- c(0, branch.lengths)
  656. bl.ape=bl
  657. names(bl.ape)=c("root", tree$edge[,2])
  658. reord <- order(-tmp[, 2])
  659. bl <- bl[reord]
  660. bl.ouch <- bl.ape[reord]
  661. tmp <- tmp[reord, ]
  662. node <- seq(nnodes)
  663. ancestor <- rep(NA, nnodes)
  664. for (n in 2:nnodes) {
  665. anc <- which(tmp[, 2] == tmp[n, 1])
  666. if (length(anc) > 1)
  667. stop("invalid tree")
  668. if (length(anc) > 0) {
  669. ancestor[n] <- node[anc]
  670. }
  671. else {
  672. ancestor[n] <- node[1]
  673. }
  674. }
  675. if (is.null(tree$node.label))
  676. tree$node.label <- rep("", n.int)
  677. species <- rev(c(tree$tip.label, tree$node.label[-1], tree$node.label[1]))
  678. times <- rep(NA, nnodes)
  679. for (n in 1:nnodes) times[n] <- ouch:::branch.height(node, ancestor, bl, n)
  680. if (is.logical(scale)) {
  681. if (is.na(scale))
  682. stop("if ", sQuote("scale"), " is logical, it must be either true or false")
  683. if (scale)
  684. times <- times/max(times)
  685. }
  686. else if (is.numeric(scale)) {
  687. times <- times/abs(scale)
  688. }
  689. else {
  690. stop(sQuote("scale"), " must be either logical or numeric")
  691. }
  692. return(list(ouch.tre=ouchtree(nodes = node, ancestors = ancestor, times = times, labels = species), ape.ordered=bl.ouch))
  693. }
  694. adjust.value <- function(value, jumpsize, theta=FALSE) {
  695. # mod 10.20.2010 JM Eastman
  696. vv=value
  697. if(runif(1)<0.5 | length(unique(vv))==1) {
  698. rch <- runif(1, min = -jumpsize, max = jumpsize)
  699. return(vv[]+rch)
  700. } else {
  701. return((vv-mean(vv))*runif(1,min=-jumpsize, max=jumpsize)+mean(vv))
  702. }
  703. }
  704. adjust.rate <- function(rate, jumpsize) {
  705. # mod 10.20.2010 JM Eastman
  706. vv=rate
  707. v=log(vv)
  708. rch <- runif(1, min = -jumpsize, max = jumpsize)
  709. v=exp(v[]+rch)
  710. v
  711. }
  712. prepare.data <- function(phy, data) {
  713. td <- treedata(phy, data, sort = T)
  714. return(list(ape.tre=td$phy, orig.dat=td$data[,1]))
  715. }
  716. prepare.ouchdata <- function(phy, data) {
  717. td <- treedata(phy, data, sort = T)
  718. ot <- ape.to.ouch.tre(td$phy->ape.tre, scale=FALSE)
  719. ouch.tre=ot$ouch.tre
  720. ouch.dat=data.frame(as.numeric(ouch.tre@nodes), as.character(ouch.tre@nodelabels))
  721. names(ouch.dat)=c("nodes","labels")
  722. ouch.dat[ouch.dat[,2]=="",2]=NA
  723. ouch.dat$trait=NA
  724. mM=match(ouch.dat$labels,names(data))
  725. for(i in 1:nrow(ouch.dat)){
  726. if(!is.na(mM[i])){
  727. ouch.dat$trait[i]=data[mM[i]]
  728. }
  729. }
  730. return(list(ouch.tre=ouch.tre, ouch.dat=ouch.dat, ape.tre=td$phy, orig.dat=td$data[,1], ape.ordered=ot$ape.ordered))
  731. }
  732. summarize.run<-function(cOK, endparms, cur.model) {
  733. # endparmsOU = c(nRateProp, nRootProp, nAlphaProp, nTcatProp, nThetaProp)
  734. if(cur.model=="BM") {
  735. names(endparms)<-names(cOK)<-c("rates", "rate.swap", "rate.catg", "root")
  736. } else {
  737. names(endparms)<-names(cOK)<-c("rate", "root", "alpha", "regimes", "theta")
  738. }
  739. names(endparms)=paste("pr.",names(endparms),sep="")
  740. names(cOK)=paste("ok.",names(cOK),sep="")
  741. cat("\n")
  742. if(cur.model=="BM") {
  743. print(endparms[paste("pr.", c("rates", "rate.swap", "rate.catg", "root"),sep="")])
  744. print(cOK[paste("ok.", c("rates", "rate.swap", "rate.catg", "root"),sep="")])
  745. } else {
  746. print(endparms[paste("pr.", c("rate", "root", "alpha", "regimes", "theta"),sep="")])
  747. print(cOK[paste("ok.", c("rate", "root", "alpha", "regimes", "theta"),sep="")])
  748. }
  749. }
  750. generate.log<-function(bundled.parms, cur.model, file, init=FALSE) {
  751. # bundled.parms=list(gen=i, mrate=mean(cur.rates), cats=sum(cur.delta.rates)+1, root=cur.root, alpha=cur.alpha, reg=sum(cur.delta.theta)+1, theta=mean(cur.theta), lnL=curr.lnL)
  752. res=bundled.parms
  753. if(init) {
  754. if(cur.model=="BM") msg=paste("gen", "model", "mean.rate", "rates", "root", "lnL", sep="\t") else msg=paste("gen", "model", "rate", "root", "alpha", "regimes", "mean.theta", "lnL", sep="\t")
  755. } else {
  756. if(cur.model=="BM") {
  757. msg<-paste(res$gen, sQuote(cur.model), sprintf("%.3f", res$mrate), res$cats, sprintf("%.3f", res$root), sprintf("%.3f", res$lnL),sep="\t")
  758. } else {
  759. msg<-paste(res$gen, sQuote(cur.model), sprintf("%.3f", res$rate), sprintf("%.2f", res$root), sprintf("%.4f", res$alpha), res$regimes, sprintf("%.3f", res$mean.theta), sprintf("%.3f", res$lnL),sep="\t")
  760. }
  761. }
  762. write(msg, file=file, append=TRUE)
  763. }
  764. parlogger<-function(model, init=FALSE, node.des, i, curr.lnL, cur.root=NULL, cur.rates=NULL, cur.delta.rates=NULL, cur.alpha=NULL, cur.theta=NULL, cur.delta.theta=NULL, parmBase) {
  765. if(init) {
  766. msg.long=names(node.des[-1])
  767. if(model=="BM") {
  768. msg.short=paste("gen", "root", "lnL", sep="\t")
  769. parlogs=paste(parmBase, paste(c("summary", "rates", "rate.shifts"), "txt", sep="."), sep="")
  770. sapply(parlogs[1], function(x) write.table(msg.short, x, quote=FALSE, col.names=FALSE, row.names=FALSE))
  771. sapply(parlogs[2:length(parlogs)], function(x) write.table(paste(msg.long,collapse=" "), x, quote=FALSE, col.names=FALSE, row.names=FALSE))
  772. } else {
  773. msg.short=paste("gen", "root", "rate", "alpha", "lnL", sep="\t")
  774. parlogs=paste(parmBase, paste(c("summary", "optima", "optima.shifts"), "txt", sep="."), sep="")
  775. sapply(parlogs[1], function(x) write.table(msg.short, x, quote=FALSE, col.names=FALSE, row.names=FALSE))
  776. sapply(parlogs[2:length(parlogs)], function(x) write.table(paste(msg.long,collapse=" "), x, quote=FALSE, col.names=FALSE, row.names=FALSE))
  777. }
  778. } else {
  779. if(model=="BM") {
  780. parlogs=paste(parmBase, paste(c("summary", "rates", "rate.shifts"), "txt", sep="."), sep="")
  781. outlist=list(paste(i, cur.root, curr.lnL, sep="\t"), cur.rates, cur.delta.rates)
  782. sapply(1:length(outlist), function(x) write.table(paste(outlist[[x]],collapse=" "), parlogs[[x]], quote=FALSE, col.names=FALSE, row.names=FALSE, append=TRUE))
  783. } else {
  784. parlogs=paste(parmBase, paste(c("summary", "optima", "optima.shifts"), "txt", sep="."), sep="")
  785. outlist=list(paste(i, cur.root, cur.rates, cur.alpha, curr.lnL, sep="\t"), cur.theta, cur.delta.theta)
  786. sapply(1:length(outlist), function(x) write.table(paste(outlist[[x]],collapse=" "), parlogs[[x]], quote=FALSE, col.names=FALSE, row.names=FALSE, append=TRUE))
  787. }
  788. }
  789. }
  790. generate.error.message<-function(i, mod.cur, mod.new, lnR, errorLog, init=FALSE) {
  791. if(init) {
  792. initmsg = write(paste("gen", "curr.lnL", "new.lnL", "lnR", sep="\t"), file=errorLog)
  793. } else {
  794. write(paste(i, sprintf("%.3f", mod.cur$lnL), sprintf("%.3f", mod.new$lnL), sprintf("%.3f", lnR), sep="\t"), file=errorLog)
  795. }
  796. }
  797. determine.accepted.proposal<-function(startparms, endparms, cOK) {
  798. which(startparms!=endparms)->marker
  799. cOK[marker]=cOK[marker]+1
  800. cOK
  801. }
  802. rtpois<-function(N, lambda, k) {
  803. p=ppois(k, lambda, lower.tail=FALSE)
  804. out=qpois(runif(N, min=0, max=1-p), lambda)
  805. out
  806. }
  807. ptpois<-function(x, lambda, k) {
  808. p.k=ppois(k, lambda, lower.tail=FALSE)
  809. p.x=ppois(x, lambda, lower.tail=FALSE)
  810. ptp=p.x/(1-p.k)
  811. return(ptp)
  812. }
  813. ## SUMMARIZING MCMC -- PLOTTING FUNCTIONS ##
  814. shifts.plot=function(phy, base.dir, burnin=0, level=0.03, internal.only=FALSE, paint.branches=TRUE, pdf=TRUE, verbose=TRUE, ...) {
  815. # phy; base.dir; burnin=0; level=0.03; internal.only=FALSE; paint.branches=TRUE; pdf=TRUE; verbose=TRUE
  816. nullphy=phy
  817. nullphy$tip.label[]=""
  818. color.length=21
  819. require(ape)
  820. oldwd=getwd()
  821. setwd(base.dir)
  822. files=dir()
  823. if(any(grep("optima", files))) optima=TRUE else optima=FALSE
  824. if(optima) {
  825. shifts.file=files[grep("optima.shifts.txt", files)]
  826. estimates.file=files[grep("optima.txt", files)]
  827. } else {
  828. shifts.file=files[grep("rate.shifts.txt", files)]
  829. estimates.file=files[grep("rates.txt", files)]
  830. }
  831. if(pdf | verbose) {
  832. lab=paste(strsplit(base.dir, ".", fixed=TRUE)[[1]][1:2],collapse=".")
  833. out.base=paste(lab, paste("bi", burnin, sep=""), paste("fq", level, sep=""), sep=".")
  834. outdir=paste("./results")
  835. if(!file.exists(outdir)) dir.create(outdir)
  836. }
  837. cat("READING shifts...\n")
  838. results=read.table(shifts.file)
  839. names(results)=as.numeric(results[1,])
  840. burnin=ceiling(burnin*nrow(results))
  841. results=results[-c(1:(burnin+1)),]
  842. if(!all(names(results)%in%phy$edge[,2])) stop("Cannot process results: check that tree matches posterior summaries")
  843. hits.tmp<-shifts<-apply(results,1,sum)
  844. hits=length(hits.tmp[hits.tmp>0])
  845. aa.tmp=apply(results, 2, function(x) sum(x))
  846. aa.tmp=aa.tmp/hits
  847. aa=aa.tmp[aa.tmp>=level]
  848. aa=aa[order(aa, decreasing=TRUE)]
  849. aa.nodes=aa[which(as.numeric(names(aa))>Ntip(phy))]
  850. aa.tips=aa[which(as.numeric(names(aa))<=Ntip(phy))]
  851. aa=aa.nodes
  852. nodes=as.numeric(names(aa))
  853. nodes=nodes[nodes>Ntip(phy)]
  854. desc=lapply(nodes, function(x) {
  855. foo=get.descendants.of.node(x,phy)
  856. if(length(foo)) return(phy$tip.label[foo[foo<=Ntip(phy)]]) else return(NULL)
  857. }
  858. )
  859. cat("READING estimates...\n")
  860. ests=read.table(estimates.file)
  861. names(ests)=ests[1,]
  862. ests=ests[-c(1:(burnin+1)),]
  863. average.ests.tmp=apply(ests,2,mean)
  864. average.ests=average.ests.tmp-median(average.ests.tmp)
  865. if(paint.branches) {
  866. require(colorspace)
  867. cce=diverge_hcl(2*color.length, power = 0.5)
  868. e.seq=seq(min(average.ests),max(average.ests),length=color.length)
  869. color.gamut=seq(-max(abs(e.seq)), max(abs(e.seq)), length=length(cce))
  870. colors.branches=sapply(average.ests, function(x) {y=cce[which(min(abs(color.gamut-x))==abs(color.gamut-x))]; return(y[sample(1:length(y),1)])})
  871. colors.branches=colors.branches[match(as.numeric(names(colors.branches)), phy$edge[,2])]
  872. } else {
  873. colors.branches=1
  874. }
  875. ccx=diverge_hcl(color.length, c = c(100, 0), l = c(50, 90), power = 1.1)
  876. c.seq=round(seq(-1,1,by=0.1),digits=1)
  877. if(length(nodes)) {
  878. require(colorspace)
  879. cols=sapply(nodes, function(x) {
  880. a=get.ancestor.of.node(x, phy)
  881. comp=ests[,which(names(ests)==a)]
  882. this=ests[,which(names(ests)==x)]
  883. if(!length(comp)) { # dealing with first descendant of root
  884. d=get.desc.of.node(a, phy)
  885. d=d[which(d!=x)]
  886. # find if sister descendant also experienced rate shift
  887. d.shifts=results[, which(names(results)==d)]
  888. comp=ests[,which(names(ests)==d)]
  889. x.res=sapply(1:length(d.shifts), function(y) {
  890. if(d.shifts[y]==0) {
  891. zz=this[y]-comp[y]
  892. if(zz>0) {
  893. return(1)
  894. } else {
  895. if(zz<0) return(-1) else return(0)
  896. }
  897. } else {
  898. return(0)
  899. }
  900. }
  901. )
  902. x.res=mean(x.res[x.res!=0])
  903. if(is.na(x.res)) x.res=0
  904. } else {
  905. yy=this-comp
  906. zz=yy
  907. zz[yy>0]=1
  908. zz[yy<0]=-1
  909. zz[yy==0]=0
  910. x.res=mean(zz[zz!=0])
  911. if(is.na(x.res)) x.res=0
  912. }
  913. return(x.res)
  914. }
  915. )
  916. colors.nodes=ccx[match(round(cols,1),c.seq)]
  917. names(colors.nodes)=nodes
  918. colors.nodes=colors.nodes[which(as.numeric(names(colors.nodes))>Ntip(phy))]
  919. } else {
  920. colors.nodes=NULL
  921. cols=NULL
  922. }
  923. if(length(aa.tips) & !internal.only) {
  924. tt<-tt.cex<-rep(0,Ntip(phy))
  925. tt[as.numeric(names(aa.tips))]=1
  926. # tt.cex[as.numeric(names(aa.tips))]=aa.tips/max(aa.tmp)
  927. tt.cex[as.numeric(names(aa.tips))]=aa.tips
  928. tt.col=sapply(names(aa.tips), function(x) {
  929. a=get.ancestor.of.node(x, phy)
  930. comp=ests[,which(names(ests)==a)]
  931. this=ests[,which(names(ests)==x)]
  932. if(!length(comp)) { # dealing with first descendant of root
  933. d=get.desc.of.node(a, phy)
  934. d=d[which(d!=x)]
  935. # find if sister descendant also experienced rate shift
  936. d.shifts=results[, which(names(results)==d)]
  937. comp=ests[,which(names(ests)==d)]
  938. x.res=sapply(1:length(d.shifts), function(y) {
  939. if(d.shifts[y]==0) {
  940. zz=this[y]-comp[y]
  941. if(zz>0) {
  942. return(1)
  943. } else {
  944. if(zz<0) return(-1) else return(0)
  945. }
  946. } else {
  947. return(0)
  948. }
  949. }
  950. )
  951. x.res=mean(x.res[x.res!=0])
  952. } else {
  953. yy=this-comp
  954. zz=yy
  955. zz[yy>0]=1
  956. zz[yy<0]=-1
  957. zz[yy==0]=0
  958. x.res=mean(zz[zz!=0])
  959. }
  960. return(x.res)
  961. }
  962. )
  963. tt.ccx=diverge_hcl(21, c = c(100, 0), l = c(50, 90), power = 1.1)
  964. tt.c.seq=round(seq(-1,1,by=0.1),digits=1)
  965. tt.colors.nodes=tt.ccx[match(round(tt.col,1),tt.c.seq)]
  966. names(tt.colors.nodes)=as.numeric(names(aa.tips))
  967. colors.tips.tmp=tt.colors.nodes[order(as.numeric(names(tt.colors.nodes)))]
  968. colors.tips=rep(NA, Ntip(phy))
  969. colors.tips[as.numeric(names(colors.tips.tmp))]=colors.tips.tmp
  970. tip.names=as.list(phy$tip.label[as.numeric(names(aa.tips))])
  971. names(tip.names)=paste("node",names(aa.tips),sep=".")
  972. } else {
  973. tt.cex=NULL
  974. tt.col=NULL
  975. tip.names=NULL
  976. colors.tips=NULL
  977. tt=rep(0,Ntip(phy))
  978. }
  979. ## PLOTTING OF TREE
  980. CEX=max(c(0.05, (2+-0.36*log(Ntip(phy))) ))
  981. if(pdf) pdf(file=paste(outdir, paste(out.base,"pdf",sep="."),sep="/"))
  982. plot(phy, cex=CEX, lwd=0.4, edge.width=0.5, edge.color=colors.branches, show.tip.label=TRUE, label.offset=strwidth(par("pch"),cex=1.25), ...)
  983. nn=(Ntip(phy)+1):max(phy$edge[,2])
  984. ll<-cc<-rr<-rep(0,length(nn))
  985. ll[nn%in%nodes]=1
  986. # if(length(nodes)) cc[nn%in%nodes]=aa[match(nn[nn%in%nodes],nodes)]/max(c(aa,tt.cex))
  987. if(length(nodes)) cc[nn%in%nodes]=aa[match(nn[nn%in%nodes],nodes)]
  988. rr[nn%in%nodes]=colors.nodes[order(names(colors.nodes))]
  989. nodelabels(pch=ifelse(ll==1, 21, NA), cex=2*cc, col=rr, bg=rr, lwd=0.5)
  990. tiplabels(pch=ifelse(tt==1, 21, NA), cex=2*tt.cex, col=colors.tips, bg=colors.tips, lwd=0.5)
  991. legend("topright", title=toupper("direction"), cex=0.5, pt.cex=1, text.col="darkgray", legend = sprintf("%6.1f", rev(c.seq[seq(1,color.length,by=2)])), pch=21, ncol=1, col = "darkgray", pt.bg = rev(ccx[seq(1,color.length,by=2)]), box.lty="blank", border="white")
  992. if(length(nodes) | (length(aa.tips) & !internal.only)) {
  993. legend("bottomright", title=toupper("frequency"), cex=0.5, pt.cex=2*(seq(0, 1, length=9)), text.col="darkgray", legend = sprintf("%10.3f", seq(0, 1, length=9)), pch=21, ncol=1, col = "darkgray", pt.bg = "white", box.lty="blank", border="white")
  994. }
  995. if(pdf) dev.off()
  996. if(length(nodes)) {
  997. names(cols)=names(aa)
  998. names(desc)=paste("node",nodes,sep=".")
  999. }
  1000. ##
  1001. if(verbose) {
  1002. summary.table=c(hits/nrow(results), mean(shifts), median(shifts))
  1003. names(summary.table)=c("shift.prob", "mean.shift","median.shifts")
  1004. write.table(summary.table, file=paste(outdir, paste(out.base, "run.summary.txt", sep="."), sep="/"), quote=FALSE, col.names=FALSE)
  1005. if(length(nodes)) {
  1006. for(nn in 1:length(nodes)) {
  1007. write.table(c(nodes[nn], desc[[nn]]), file=paste(outdir, paste(out.base, "shift.descendants.txt", sep="."), sep="/"), append=TRUE, quote=FALSE, row.names=FALSE, col.names=FALSE)
  1008. }
  1009. }
  1010. if(length(aa.tips)) {
  1011. for(nn in 1:length(aa.tips)) {
  1012. write.table(c(as.numeric(names(aa.tips)[nn]), tip.names[[nn]]), file=paste(outdir, paste(out.base, "shift.descendants.txt", sep="."), sep="/"), append=TRUE, quote=FALSE, row.names=FALSE, col.names=FALSE)
  1013. }
  1014. }
  1015. allres=data.frame(matrix(NA, nrow=nrow(phy$edge), ncol=3))
  1016. if(length(aa)) {
  1017. x=as.numeric(names(aa))
  1018. nodres=merge(as.data.frame(aa),as.data.frame(cols),by=0)
  1019. allres[nodres[,1],]=nodres
  1020. }
  1021. if(length(aa.tips)) {
  1022. x=as.numeric(names(aa.tips))
  1023. tipres=merge(as.data.frame(aa.tips), as.data.frame(tt.col),by=0)
  1024. allres[tipres[,1],]=tipres
  1025. }
  1026. allres=allres[!apply(allres, 1, function(x)any(is.na(x))),]
  1027. allres=allres[order(allres[,2], decreasing=TRUE),]
  1028. if(nrow(allres)>0) {
  1029. allres=data.frame(allres)
  1030. names(allres)=c("node", "conditional.shift.probability", "relative.shift.direction")
  1031. write.table(allres, file=paste(outdir, paste(out.base, "estimates.summary.txt", sep="."), sep="/"), quote=FALSE, row.names=FALSE)
  1032. }
  1033. }
  1034. setwd(oldwd)
  1035. if(!optima) {
  1036. return(list(desc=desc, tips=tip.names, shift.prob=hits/nrow(results), mean.shifts=mean(shifts), median.shifts=median(shifts), nodeshift.prop=aa, nodeshift.direction=cols, tipshift.prop=aa.tips, tipshift.direction=tt.col, mean.rates=average.ests.tmp, mean.shifts=aa.tmp/nrow(results)))
  1037. } else {
  1038. return(list(desc=desc, tips=tip.names, shift.prob=hits/nrow(results), mean.shifts=mean(shifts), median.shifts=median(shifts), nodeshift.prop=aa, nodeshift.direction=cols, tipshift.prop=aa.tips, tipshift.direction=tt.col, mean.optima=average.ests.tmp, mean.shifts=aa.tmp/nrow(results)))
  1039. }
  1040. }
  1041. plot.quant<-function(phy, data, data.names, shrinker=1, relative=TRUE, cex=0.5, adj=c(15,5), lwd=0.5, font=1, ...){
  1042. std=function(x,max,min){return((x-min)/(max-min))}
  1043. if(relative) {
  1044. data=sapply(data, function(x) std(x, max(data), min(data)))
  1045. }
  1046. f=match(phy$tip.label, data.names)
  1047. if(is.na(sum(f))) {
  1048. stop("tips cannot be matched to tree")
  1049. }
  1050. else {
  1051. data=data[f]
  1052. phy$trait=data
  1053. plot.phylo(phy, show.tip.label=T, label.offset=adj[1], cex=cex, font=font, ...)
  1054. tiplabels(text=NULL, pch=21, adj=adj[2], cex=c(phy$trait/shrinker), bg="white",lwd=lwd)
  1055. }
  1056. }
  1057. branchcol.plot=function(phy, cur.rates, color.length=4, ...) {
  1058. color.length=color.length
  1059. require(ape)
  1060. require(colorspace)
  1061. ests=cur.rates
  1062. names(ests)=phy$edge[,2]
  1063. cce=diverge_hcl(color.length, power = 0.5)
  1064. e.seq=seq(min(ests),max(ests),length=color.length)
  1065. colors.branches=sapply(ests, function(x) cce[which(min(abs(e.seq-x))==abs(e.seq-x))])
  1066. names(colors.branches)=names(ests)
  1067. colors.branches=colors.branches[match(as.numeric(names(colors.branches)), phy$edge[,2])]
  1068. plot(phy, cex=0.1, lwd=0.4, edge.width=0.5, edge.color=colors.branches, ...)
  1069. }
  1070. prior.posterior.plot=function(data, rpois.lambda) {
  1071. # data should be a vector of number of shifts, for instance, for all interations across the MCMC sampling (post-burnin)
  1072. require(ggplot2)
  1073. dataset <- data.frame(variable = gl(2, length(data), labels = c("posterior", "prior")), value = c(data, rpois(length(data),rpois.lambda)))
  1074. ggplot(data = dataset, aes(x = value, fill = variable)) + geom_histogram(position = "dodge") + scale_fill_manual(values = c("gray", "black"))
  1075. }
  1076. prior.posterior.plot.alt=function(outlogs, lambda, ...){
  1077. foo=outlogs
  1078. d=lapply(dir(foo),function(x) read.table(paste(foo,x,sep="/"),header=T))
  1079. names=dir(foo)
  1080. lapply(names,function(x)paste(strsplit(x,".",fixed=T)[[1]][1:2],collapse="."))->nn
  1081. n=length(nn)
  1082. ss=(1:10)^2
  1083. sn=ss-n
  1084. s=ss[which(sn==min(sn[sn>=0]))]
  1085. x=sqrt(s)
  1086. layout(matrix(1:s, x, x))
  1087. for(i in 1:length(d)){
  1088. if(any(names(d[[i]])=="regimes")) {
  1089. plot(density(d[[i]]$regimes), bty="n",xlim=c(0,10), main=nn[[i]],xlab="regimes")
  1090. } else {
  1091. plot(density(d[[i]]$rates),bty="n",xlim=c(0,10), main=nn[[i]],xlab="rates")
  1092. }
  1093. lines(density(rpois(nrow(d[[i]]),lambda)), ...)
  1094. }
  1095. }
  1096. split.rate=function(cur.vv, n.desc, n.split, factor=log(2)){
  1097. dev=cur.vv-exp(log(cur.vv)+runif(1, -factor, factor))
  1098. nr.desc=cur.vv+dev/n.desc
  1099. nr.split=cur.vv-dev/n.split
  1100. # print((nr.split*n.split+nr.desc*n.desc)/(n.desc+n.split)-cur.vv)
  1101. return(list(nr.desc=nr.desc, nr.split=nr.split))
  1102. }
  1103. split.value=function(cur.vv, n.desc, n.split, factor=log(2)){
  1104. dev=cur.vv-adjust.value(cur.vv, factor)
  1105. nr.desc=cur.vv + dev/n.desc
  1106. nr.split=cur.vv - dev/n.split
  1107. # print((nr.split*n.split+nr.desc*n.desc)/(n.desc+n.split)-cur.vv)
  1108. return(list(nr.desc=nr.desc, nr.split=nr.split))
  1109. }
  1110. compare.rates=function(nodes=list(A=c(NULL,NULL),B=c(NULL,NULL)), phy, posterior.rates, ...){
  1111. if(is.null(names(nodes))) names(nodes)=c("A","B")
  1112. rates.a=rates[,match(nodes[[1]],names(rates))]
  1113. rates.b=rates[,match(nodes[[2]],names(rates))]
  1114. edges.a=phy$edge.length[match(nodes[[1]],phy$edge[,2])]
  1115. edges.b=phy$edge.length[match(nodes[[2]],phy$edge[,2])]
  1116. weights.a=edges.a/sum(edges.a)
  1117. weights.b=edges.b/sum(edges.b)
  1118. r.scl.a=apply(rates.a, 1,function(x) sum(x*weights.a))
  1119. r.scl.b=apply(rates.b, 1,function(x) sum(x*weights.b))
  1120. return(list(scl.A=r.scl.a, scl.B=r.scl.b, r.test=randomization.test(r.scl.a, r.scl.b, ...)))
  1121. }
  1122. randomization.test=function(obs=obs, exp=exp, iter=10000, verbose=F, two.tailed=F){
  1123. if(verbose && length(obs)<iter) warning("iterations exceeds observed data")
  1124. if(verbose) warning(paste("p-value roughly approximated for ",length(exp), " values\n",sep=""))
  1125. O=sample(obs, iter, replace=TRUE)
  1126. E=sample(exp, iter, replace=TRUE)
  1127. obj=data.frame(obs, exp)
  1128. names(obj)=c("obs","exp")
  1129. result=list(c(O-E))
  1130. p=array(NA, iter)
  1131. for(i in 1:length(result[[1]])){
  1132. if(result[[1]][i]>0) {
  1133. p[i]=1
  1134. } else {
  1135. if(result[[1]][i]==0) {
  1136. p[i]=sample(c(0,1),1)
  1137. } else {
  1138. p[i]=0
  1139. }
  1140. }
  1141. }
  1142. p.g=round(1-(sum(p)/iter), digits=7)
  1143. p.l=1-p.g
  1144. # names(p.g)="greater"
  1145. # names(p.l)="lesser"
  1146. if(verbose) res=list(c(O-E),greater=p.g,lesser=p.l) else res=list(greater=p.g,lesser=p.l)
  1147. if(two.tailed) res=min(c(p.g,p.l))*2 else res=res
  1148. return(res)
  1149. }
  1150. collect.nodes=function(tips,phy,strict=FALSE){
  1151. if(length(tips)==1) stop("Must supply at least two tips.")
  1152. tt=phy$tip.label
  1153. tt=lapply(tips, function(x) which(phy$tip.label==x))
  1154. nodes=lapply(tt, function(x) get.ancestors.of.node(x, phy))
  1155. all.nodes=nodes[[1]]
  1156. for(i in 2:length(nodes)) {
  1157. all.nodes=intersect(all.nodes,nodes[[i]])
  1158. }
  1159. base.node=max(all.nodes)
  1160. if(strict) {
  1161. u=unlist(nodes)
  1162. return(c(sort(unique(u[u>=base.node])), unlist(tt)))
  1163. } else {
  1164. return(c(base.node,get.descendants.of.node(base.node,phy)))
  1165. }
  1166. }
  1167. find.relatives=function(tips,phy){
  1168. nn=collect.nodes(tips, phy, strict=FALSE)
  1169. return(phy$tip.label[nn[nn<=Ntip(phy)]])
  1170. }