/archive/source.archive/bm.and.ou/trait.rjmcmc.09022010.R

http://github.com/eastman/auteur · R · 278 lines · 228 code · 25 blank · 25 comment · 70 complexity · eaa693bc376165c9155e0da4e32c9cc5 MD5 · raw file

  1. ## Written by LJ HARMON, AH: 2008
  2. brownMcmc<-function (phy, data, ngen = 1000, probMergeSplit = 0.01, rootp = 0.1, sampleFreq = 100, ylim = c(-200, 0), fileBase = "result")
  3. {
  4. td <- treedata(phy, data, sort = T)
  5. nn <- length(td$phy$edge.length) # max number of rates (one per branch)
  6. currRateCat <- numeric(nn) # initialize array for recording which branch belongs to which rate category
  7. currRateCat[] <- 1 # assign one rate to all branches initially
  8. currRate <- numeric(nn) # initialize array for rate estimates branchwise
  9. currRate[] <- init <- 0.001 # initialize BMMrate to be small
  10. currRootMean <- mean(td$data[, 1]) # determine an ancestral state (by averaging known data)
  11. l <- numeric(ngen) # number of MCMC generations
  12. l[] <- NA # initialize array for storing lnLs
  13. nMergeProp = 0 ## proposal count: merge two rate parameters
  14. nSplitProp = 0 ## proposal count: draw new rate parameter
  15. nRateProp = 0 ## proposal count: update rate parameter(s)
  16. nMergeOK = 0 ## proposal count: successful merges
  17. nSplitOK = 0 ## proposal count: successful splits
  18. nRateOK = 0 ## proposal count: successful rate changes
  19. res <- list(rate = currRate, rateCat = currRateCat, lnlprof = l)
  20. # error and output file handling
  21. errorNum <- 0
  22. errorLog <- paste(fileBase, 'errors.log', sep = '')
  23. cat(c('Error log for run beginning', date(), '\n'), file = errorLog, append = F, sep = ' ')
  24. cat(c('--------------------------------------------------------', '\n\n'), file = errorLog, append = T)
  25. cat(c('generation', '\t', 'numCat', '\t', 'rateMin', '\t', 'rateMax', '\t', 'cumulativePercentage', '\n'), file = paste(errorLog, 'stats.txt', sep = ''), append = F, sep = '')
  26. if (fileBase != "") {
  27. f1 <- paste(fileBase, "RateCat.log", sep = "")
  28. f2 <- paste(fileBase, "Rates.log", sep = "")
  29. } else {
  30. f1 <- ""
  31. f2 <- ""
  32. }
  33. cat("", file = f1)
  34. cat("", file = f2)
  35. # end output file handling
  36. # Begin rjMCMC
  37. for (i in 1:ngen) {
  38. lnLikelihoodRatio = 0
  39. lnProposalRatio = 0
  40. lnPriorRatio = 0
  41. merge = FALSE
  42. split = FALSE
  43. # determine whether to split or merge rate categories
  44. if (runif(1) < (2 * probMergeSplit)) {
  45. if (max(currRateCat) == 1) {
  46. split = TRUE
  47. }
  48. else if (max(currRateCat) == nn) {
  49. merge = TRUE
  50. }
  51. else if (runif(1) < 0.5) {
  52. split = TRUE
  53. }
  54. else merge = TRUE
  55. }
  56. # update either number of rate categories or rates themselves
  57. if (merge)
  58. nMergeProp = nMergeProp + 1
  59. if (split)
  60. nSplitProp = nSplitProp + 1
  61. if (!merge & !split)
  62. nRateProp = nRateProp + 1
  63. newRateCat <- currRateCat
  64. newRate <- currRate
  65. newRootMean <- currRootMean
  66. # decide whether to update root mean or update a rate estimate
  67. if (!merge & !split) {
  68. if (runif(1) < rootp) {
  69. rch <- runif(1, min = -0.5, max = 0.5)
  70. newRootMean <- newRootMean + rch
  71. }
  72. else {
  73. ncat <- max(currRateCat)
  74. rcat <- round(runif(1) * (ncat) + 0.5)
  75. r <- log(currRate[currRateCat == rcat][1])
  76. rch <- runif(1, min = -2, max = 2)
  77. nr <- r + rch
  78. newRate[currRateCat == rcat] <- exp(nr)
  79. }
  80. }
  81. if (merge) {
  82. ncat <- max(currRateCat)
  83. mergeCats <- sample(1:ncat)[1:2]
  84. newRateCat[newRateCat == mergeCats[1]] <- mergeCats[2]
  85. newRateCat <- fixRateCat(newRateCat)
  86. or1 <- currRate[currRateCat == mergeCats[1]][1]
  87. or2 <- currRate[currRateCat == mergeCats[2]][1]
  88. n1 <- sum(currRateCat == mergeCats[1])
  89. n2 <- sum(currRateCat == mergeCats[2])
  90. nr <- (n1 * or1 + n2 * or2)/(n1 + n2)
  91. newRate[currRateCat == mergeCats[1] | currRateCat ==
  92. mergeCats[2]] <- nr
  93. numSplittableBeforeMerge <- sum(table(currRateCat) >
  94. 1)
  95. numSplittableAfterMerge <- sum(table(newRateCat) >
  96. 1)
  97. if (max(currRateCat) == nn) {
  98. probMerge = 1
  99. }
  100. else {
  101. probMerge = 0.5
  102. }
  103. if (max(newRateCat) == 1) {
  104. probSplit = 1
  105. }
  106. else {
  107. probSplit = 0.5
  108. }
  109. factor = probSplit/probMerge
  110. lnProposalRatio = log(factor) + log(nn) + log(ncat) - log(numSplittableAfterMerge) - log(2^(n1 + n2) - 2) - log(nr/mean(currRate)) - log(n1 + n2)
  111. lnPriorRatio = log(Stirling2(nn, ncat)) - log(Stirling2(nn, ncat - 1))
  112. }
  113. if (split) {
  114. ncat <- max(currRateCat)
  115. while (1) {
  116. rcat <- round(runif(1) * ncat + 0.5)
  117. nc <- sum(currRateCat == rcat)
  118. if (nc > 1)
  119. break
  120. }
  121. while (1) {
  122. new <- round(runif(nc) * 2 + 0.5)
  123. if (length(table(new)) == 2)
  124. break
  125. }
  126. newRateCat[currRateCat == rcat][new == 2] <- ncat +
  127. 1
  128. or <- currRate[currRateCat == rcat][1]
  129. n1 <- sum(new == 1)
  130. n2 <- sum(new == 2)
  131. u <- runif(1, min = -0.5 * n1 * or, max = 0.5 * n2 *
  132. or)
  133. nr1 <- or + u/n1
  134. nr2 <- or - u/n2
  135. newRate[newRateCat == rcat] <- nr1
  136. newRate[newRateCat == ncat + 1] <- nr2
  137. newRateCat <- fixRateCat(newRateCat)
  138. numSplittableBeforeSplit <- sum(table(currRateCat) > 1)
  139. numSplittableAfterSplit <- sum(table(newRateCat) > 1)
  140. if (max(currRateCat) == 1) {
  141. probSplit = 1
  142. }
  143. else {
  144. probSplit = 0.5
  145. }
  146. if (max(newRateCat) == nn) {
  147. probMerge = 1
  148. }
  149. else {
  150. probMerge = 0.5
  151. }
  152. factor = probMerge/probSplit
  153. lnProposalRatio = log(factor) + log(numSplittableBeforeSplit) + log(2^(n1 + n2) - 2) + log(or/mean(currRate)) + log(n1 + n2) - log(nn) - log(ncat + 1)
  154. lnPriorRatio = log(Stirling2(nn, ncat)) - log(Stirling2(nn, ncat + 1))
  155. }
  156. lnlCurr <- fitModel(td$phy, td$data[, 1], currRate, currRootMean)
  157. lnlNew <- fitModel(td$phy, td$data[, 1], newRate, newRootMean)
  158. lnLikelihoodRatio = lnlNew - lnlCurr
  159. heat = 1
  160. lnR = heat * lnLikelihoodRatio + heat * lnPriorRatio + lnProposalRatio
  161. if (lnR > 0) {
  162. r = 1
  163. }
  164. else if (lnR < -100) {
  165. r = 0
  166. }
  167. else {
  168. r = exp(lnR)
  169. }
  170. #message(paste('lnlNew - lnlCurr =', lnlNew, '-', lnlCurr, '=', lnLikelihoodRatio, "lnR", lnR, "r", r))
  171. # adopt proposed update; message("YES")
  172. if (runif(1) <= r) {
  173. currRate <- newRate
  174. currRateCat <- newRateCat
  175. currRootMean <- newRootMean
  176. res$lnlprof[i] <- lnlNew
  177. if (merge)
  178. nMergeOK = nMergeOK + 1
  179. if (split)
  180. nSplitOK = nSplitOK + 1
  181. if (!merge & !split)
  182. nRateOK = nRateOK + 1
  183. }
  184. # reject proposed update; message("NO")
  185. else {
  186. res$lnlprof[i] <- lnlCurr
  187. }
  188. # log parameters
  189. if (i%%sampleFreq == 0) {
  190. res$rate <- cbind(res$rate, currRate)
  191. res$rateCat <- cbind(res$rateCat, currRateCat)
  192. cat(file = f1, i, mean(currRate), max(currRateCat), lnlCurr, "\n", append = T, sep = "\t")
  193. cat(file = f2, mean(currRate), "\n", append = T, sep = "\t")
  194. cat("Gen ", i, "completed\n")
  195. }
  196. }
  197. # End rjMCMC
  198. diagnostics=c(nMergeOK, nMergeProp, nSplitOK, nSplitProp, nRateOK, nRateProp, init)
  199. names(diagnostics)=c("nMergeOK", "nMergeProp", "nSplitOK", "nSplitProp", "nRateOK", "nRateProp", "initialRate")
  200. res$diagnostics=diagnostics
  201. res$rate=as.data.frame(res$rate[,-1])
  202. names(res$rate)=paste("gen",seq(from=sampleFreq, to=ngen, by=sampleFreq), sep=".")
  203. row.names(res$rate)=td$phy$edge[,2]
  204. res$rateCat=as.data.frame(res$rateCat[,-1])
  205. row.names(res$rateCat)=td$phy$edge[,2]
  206. names(res$rateCat)=paste("gen",seq(from=sampleFreq, to=ngen, by=sampleFreq), sep=".")
  207. res$phy=td$phy
  208. res$dat=td$data[,1]
  209. res
  210. }
  211. fixRateCat<-function(x)
  212. # assigns indicator variables to shared rate classes amongst branches
  213. {
  214. f<-factor(x)
  215. n<-nlevels(f)
  216. o<-numeric(n)
  217. for(i in 1:n) o[i]<-which(f==levels(f)[i])[1]
  218. nf<-ordered(f, levels(f)[order(o)])
  219. nx<-as.numeric(nf)
  220. nx
  221. }
  222. fitModel<-function(phy, data, model, rootMean)
  223. # derive likelihood for given tree, data, and model
  224. {
  225. phy$edge.length=phy$edge.length*model
  226. vcv<-vcv.phylo(phy)
  227. mm<-rep(rootMean, nrow(vcv)) ## analogous to w in glssoln
  228. return(lnldmv(data, mm, vcv))
  229. }
  230. Stirling2 <- function(n,m)
  231. {
  232. ## Purpose: Stirling Numbers of the 2-nd kind
  233. ## S^{(m)}_n = number of ways of partitioning a set of
  234. ## $n$ elements into $m$ non-empty subsets
  235. ## Author: Martin Maechler, Date: May 28 1992, 23:42
  236. ## ----------------------------------------------------------------
  237. ## Abramowitz/Stegun: 24,1,4 (p. 824-5 ; Table 24.4, p.835)
  238. ## Closed Form : p.824 "C."
  239. ## ----------------------------------------------------------------
  240. if (0 > m || m > n) stop("'m' must be in 0..n !")
  241. k <- 0:m
  242. sig <- rep(c(1,-1)*(-1)^m, length= m+1)# 1 for m=0; -1 1 (m=1)
  243. ## The following gives rounding errors for (25,5) :
  244. ## r <- sum( sig * k^n /(gamma(k+1)*gamma(m+1-k)) )
  245. ga <- gamma(k+1)
  246. round(sum( sig * k^n /(ga * rev(ga))))
  247. }
  248. lnldmv<-function(y, mean, sigma) {
  249. # vcv = sigma
  250. num<- -t(y-mean)%*%solve(sigma)%*%(y-mean)/2
  251. den<-sqrt((2*pi)^length(y)*det(sigma))
  252. return((num-log(den))[1,1])
  253. }