PageRenderTime 59ms CodeModel.GetById 17ms app.highlight 35ms RepoModel.GetById 2ms app.codeStats 0ms

/archive/source.archive/bm.and.ou/trait.rjmcmc.09022010.R

http://github.com/eastman/auteur
R | 278 lines | 228 code | 25 blank | 25 comment | 70 complexity | eaa693bc376165c9155e0da4e32c9cc5 MD5 | raw file
  1## Written by LJ HARMON, AH: 2008
  2
  3brownMcmc<-function (phy, data, ngen = 1000, probMergeSplit = 0.01, rootp = 0.1, sampleFreq = 100, ylim = c(-200, 0), fileBase = "result") 
  4{	  
  5	td <- treedata(phy, data, sort = T)
  6    nn <- length(td$phy$edge.length)	# max number of rates (one per branch)
  7    currRateCat <- numeric(nn)			# initialize array for recording which branch belongs to which rate category
  8    currRateCat[] <- 1					# assign one rate to all branches initially
  9    currRate <- numeric(nn)				# initialize array for rate estimates branchwise
 10    currRate[] <- init <- 0.001			# initialize BMMrate to be small
 11    currRootMean <- mean(td$data[, 1])	# determine an ancestral state (by averaging known data)
 12    l <- numeric(ngen)					# number of MCMC generations
 13    l[] <- NA							# initialize array for storing lnLs
 14    nMergeProp = 0						## proposal count: merge two rate parameters
 15    nSplitProp = 0						## proposal count: draw new rate parameter
 16    nRateProp = 0						## proposal count: update rate parameter(s)
 17    nMergeOK = 0						## proposal count: successful merges
 18    nSplitOK = 0						## proposal count: successful splits
 19    nRateOK = 0							## proposal count: successful rate changes
 20
 21    res <- list(rate = currRate, rateCat = currRateCat, lnlprof = l)
 22    
 23# error and output file handling
 24	errorNum <- 0 
 25	errorLog <- paste(fileBase, 'errors.log', sep = '')
 26	cat(c('Error log for run beginning', date(), '\n'), file = errorLog, append = F, sep = ' ')
 27	cat(c('--------------------------------------------------------', '\n\n'), file = errorLog, append = T)
 28	cat(c('generation', '\t', 'numCat', '\t', 'rateMin', '\t', 'rateMax', '\t', 'cumulativePercentage', '\n'), file = paste(errorLog, 'stats.txt', sep = ''), append = F, sep = '')
 29	
 30	if (fileBase != "") {
 31        f1 <- paste(fileBase, "RateCat.log", sep = "")
 32        f2 <- paste(fileBase, "Rates.log", sep = "")
 33    } else {
 34        f1 <- ""
 35        f2 <- ""
 36    }
 37    cat("", file = f1)
 38    cat("", file = f2)
 39# end output file handling
 40	
 41# Begin rjMCMC
 42    for (i in 1:ngen) {
 43        lnLikelihoodRatio = 0
 44        lnProposalRatio = 0
 45        lnPriorRatio = 0
 46        merge = FALSE
 47        split = FALSE
 48		
 49# determine whether to split or merge rate categories
 50        if (runif(1) < (2 * probMergeSplit)) {
 51            if (max(currRateCat) == 1) {
 52                split = TRUE
 53            }
 54            else if (max(currRateCat) == nn) {
 55                merge = TRUE
 56            }
 57            else if (runif(1) < 0.5) {
 58                split = TRUE
 59            }
 60            else merge = TRUE
 61        }
 62		
 63# update either number of rate categories or rates themselves
 64        if (merge) 
 65            nMergeProp = nMergeProp + 1
 66        if (split) 
 67            nSplitProp = nSplitProp + 1
 68        if (!merge & !split) 
 69            nRateProp = nRateProp + 1
 70        newRateCat <- currRateCat
 71        newRate <- currRate
 72        newRootMean <- currRootMean
 73		
 74# decide whether to update root mean or update a rate estimate
 75        if (!merge & !split) {
 76            if (runif(1) < rootp) {
 77                rch <- runif(1, min = -0.5, max = 0.5)
 78                newRootMean <- newRootMean + rch
 79            }
 80            else {
 81                ncat <- max(currRateCat)
 82                rcat <- round(runif(1) * (ncat) + 0.5)
 83                r <- log(currRate[currRateCat == rcat][1])
 84                rch <- runif(1, min = -2, max = 2)
 85                nr <- r + rch
 86                newRate[currRateCat == rcat] <- exp(nr)
 87            }
 88        }
 89		
 90        if (merge) {
 91            ncat <- max(currRateCat)
 92            mergeCats <- sample(1:ncat)[1:2]
 93            newRateCat[newRateCat == mergeCats[1]] <- mergeCats[2]
 94            newRateCat <- fixRateCat(newRateCat)
 95            or1 <- currRate[currRateCat == mergeCats[1]][1]
 96            or2 <- currRate[currRateCat == mergeCats[2]][1]
 97            n1 <- sum(currRateCat == mergeCats[1])
 98            n2 <- sum(currRateCat == mergeCats[2])
 99            nr <- (n1 * or1 + n2 * or2)/(n1 + n2)
100            newRate[currRateCat == mergeCats[1] | currRateCat == 
101                mergeCats[2]] <- nr
102            numSplittableBeforeMerge <- sum(table(currRateCat) > 
103                1)
104            numSplittableAfterMerge <- sum(table(newRateCat) > 
105                1)
106            if (max(currRateCat) == nn) {
107                probMerge = 1
108            }
109            else {
110                probMerge = 0.5
111            }
112            if (max(newRateCat) == 1) {
113                probSplit = 1
114            }
115            else {
116                probSplit = 0.5
117            }
118            factor = probSplit/probMerge
119            lnProposalRatio = log(factor) + log(nn) + log(ncat) - log(numSplittableAfterMerge) - log(2^(n1 + n2) - 2) - log(nr/mean(currRate)) - log(n1 + n2)
120            lnPriorRatio = log(Stirling2(nn, ncat)) - log(Stirling2(nn, ncat - 1))
121        }
122		
123        if (split) {
124            ncat <- max(currRateCat)
125            while (1) {
126                rcat <- round(runif(1) * ncat + 0.5)
127                nc <- sum(currRateCat == rcat)
128                if (nc > 1) 
129                  break
130            }
131            while (1) {
132                new <- round(runif(nc) * 2 + 0.5)
133                if (length(table(new)) == 2) 
134                  break
135            }
136            newRateCat[currRateCat == rcat][new == 2] <- ncat + 
137                1
138            or <- currRate[currRateCat == rcat][1]
139            n1 <- sum(new == 1)
140            n2 <- sum(new == 2)
141            u <- runif(1, min = -0.5 * n1 * or, max = 0.5 * n2 * 
142                or)
143            nr1 <- or + u/n1
144            nr2 <- or - u/n2
145            newRate[newRateCat == rcat] <- nr1
146            newRate[newRateCat == ncat + 1] <- nr2
147            newRateCat <- fixRateCat(newRateCat)
148            numSplittableBeforeSplit <- sum(table(currRateCat) > 1)
149            numSplittableAfterSplit <- sum(table(newRateCat) > 1)
150            if (max(currRateCat) == 1) {
151                probSplit = 1
152            }
153            else {
154                probSplit = 0.5
155            }
156            if (max(newRateCat) == nn) {
157                probMerge = 1
158            }
159            else {
160                probMerge = 0.5
161            }
162            factor = probMerge/probSplit
163            lnProposalRatio = log(factor) + log(numSplittableBeforeSplit) + log(2^(n1 + n2) - 2) + log(or/mean(currRate)) + log(n1 + n2) - log(nn) - log(ncat + 1)
164            lnPriorRatio = log(Stirling2(nn, ncat)) - log(Stirling2(nn, ncat + 1))
165        }
166		
167        lnlCurr <- fitModel(td$phy, td$data[, 1], currRate, currRootMean)
168        lnlNew <-  fitModel(td$phy, td$data[, 1], newRate, newRootMean)
169
170		lnLikelihoodRatio = lnlNew - lnlCurr
171		
172		heat = 1
173		lnR = heat * lnLikelihoodRatio + heat * lnPriorRatio + lnProposalRatio
174    
175		if (lnR > 0) {
176			r = 1
177		}
178		else if (lnR < -100) {
179			r = 0
180		}
181		else {
182			r = exp(lnR)
183		}
184#message(paste('lnlNew - lnlCurr =', lnlNew, '-', lnlCurr, '=', lnLikelihoodRatio, "lnR", lnR, "r", r))
185         
186# adopt proposed update; message("YES")
187		if (runif(1) <= r) {
188			currRate <- newRate
189			currRateCat <- newRateCat
190			currRootMean <- newRootMean
191			res$lnlprof[i] <- lnlNew
192			if (merge) 
193				nMergeOK = nMergeOK + 1
194			if (split) 
195				nSplitOK = nSplitOK + 1
196			if (!merge & !split) 
197				nRateOK = nRateOK + 1  
198		}
199		
200# reject proposed update; message("NO")
201		else {					
202			res$lnlprof[i] <- lnlCurr  
203		}
204		
205# log parameters
206        if (i%%sampleFreq == 0) {
207            res$rate <- cbind(res$rate, currRate)
208            res$rateCat <- cbind(res$rateCat, currRateCat)
209            cat(file = f1, i, mean(currRate), max(currRateCat), lnlCurr, "\n", append = T, sep = "\t")
210            cat(file = f2, mean(currRate), "\n", append = T, sep = "\t")
211            cat("Gen ", i, "completed\n")
212		}
213    }
214# End rjMCMC
215	
216	diagnostics=c(nMergeOK, nMergeProp, nSplitOK, nSplitProp, nRateOK, nRateProp, init)
217	names(diagnostics)=c("nMergeOK", "nMergeProp", "nSplitOK", "nSplitProp", "nRateOK", "nRateProp", "initialRate")
218	res$diagnostics=diagnostics
219	res$rate=as.data.frame(res$rate[,-1])
220	names(res$rate)=paste("gen",seq(from=sampleFreq, to=ngen, by=sampleFreq), sep=".")
221	row.names(res$rate)=td$phy$edge[,2]
222	res$rateCat=as.data.frame(res$rateCat[,-1])
223	row.names(res$rateCat)=td$phy$edge[,2]
224	names(res$rateCat)=paste("gen",seq(from=sampleFreq, to=ngen, by=sampleFreq), sep=".")
225	res$phy=td$phy
226	res$dat=td$data[,1]
227    res
228}
229
230
231fixRateCat<-function(x)
232# assigns indicator variables to shared rate classes amongst branches
233{
234	f<-factor(x)
235	n<-nlevels(f)
236	o<-numeric(n)
237	for(i in 1:n) o[i]<-which(f==levels(f)[i])[1]
238	nf<-ordered(f, levels(f)[order(o)])	
239	nx<-as.numeric(nf)
240	nx
241}
242
243fitModel<-function(phy, data, model, rootMean)
244# derive likelihood for given tree, data, and model
245{
246	phy$edge.length=phy$edge.length*model
247	vcv<-vcv.phylo(phy)
248	mm<-rep(rootMean, nrow(vcv))  ## analogous to w in glssoln
249	return(lnldmv(data, mm, vcv))
250	
251	}
252	
253Stirling2 <- function(n,m)
254{
255    ## Purpose:  Stirling Numbers of the 2-nd kind
256    ## 		S^{(m)}_n = number of ways of partitioning a set of
257    ##                      $n$ elements into $m$ non-empty subsets
258    ## Author: Martin Maechler, Date:  May 28 1992, 23:42
259    ## ----------------------------------------------------------------
260    ## Abramowitz/Stegun: 24,1,4 (p. 824-5 ; Table 24.4, p.835)
261    ## Closed Form : p.824 "C."
262    ## ----------------------------------------------------------------
263
264    if (0 > m || m > n) stop("'m' must be in 0..n !")
265    k <- 0:m
266    sig <- rep(c(1,-1)*(-1)^m, length= m+1)# 1 for m=0; -1 1 (m=1)
267    ## The following gives rounding errors for (25,5) :
268    ## r <- sum( sig * k^n /(gamma(k+1)*gamma(m+1-k)) )
269    ga <- gamma(k+1)
270    round(sum( sig * k^n /(ga * rev(ga))))
271}
272
273lnldmv<-function(y, mean, sigma) {
274# vcv = sigma
275	num<- -t(y-mean)%*%solve(sigma)%*%(y-mean)/2
276	den<-sqrt((2*pi)^length(y)*det(sigma))
277	return((num-log(den))[1,1])
278	}