PageRenderTime 51ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/R/ripple.R

http://github.com/kbroman/qtl
R | 393 lines | 253 code | 51 blank | 89 comment | 81 complexity | 58389f4fb02bce5683561e6fe1756c95 MD5 | raw file
  1. ######################################################################
  2. #
  3. # ripple.R
  4. #
  5. # copyright (c) 2001-2019, Karl W Broman
  6. # last modified Dec, 2019
  7. # first written Oct, 2001
  8. #
  9. # This program is free software; you can redistribute it and/or
  10. # modify it under the terms of the GNU General Public License,
  11. # version 3, as published by the Free Software Foundation.
  12. #
  13. # This program is distributed in the hope that it will be useful,
  14. # but without any warranty; without even the implied warranty of
  15. # merchantability or fitness for a particular purpose. See the GNU
  16. # General Public License, version 3, for more details.
  17. #
  18. # A copy of the GNU General Public License, version 3, is available
  19. # at http://www.r-project.org/Licenses/GPL-3
  20. #
  21. # Part of the R/qtl package
  22. # Contains: ripple, summary.ripple, print.summary.ripple
  23. # ripple.perm1, ripple.perm2, ripple.perm.sub
  24. #
  25. ######################################################################
  26. ######################################################################
  27. #
  28. # ripple: Check marker orders for a given chromosome, comparing all
  29. # possible permutations of a sliding window of markers
  30. #
  31. ######################################################################
  32. ripple <-
  33. function(cross, chr, window=4, method=c("countxo","likelihood"),
  34. error.prob=0.0001, map.function=c("haldane","kosambi","c-f","morgan"),
  35. maxit=4000, tol=1e-6, sex.sp=TRUE, verbose=TRUE, n.cluster=1)
  36. {
  37. if(!inherits(cross, "cross"))
  38. stop("Input should have class \"cross\".")
  39. # pull out relevant chromosome
  40. if(missing(chr)) {
  41. chr <- names(cross$geno)[1]
  42. warning("chr argument not provided; assuming you want chr ", chr)
  43. }
  44. else {
  45. if(length(chr) > 1)
  46. stop("ripple only works for one chromosome at a time.")
  47. if(!testchr(chr, names(cross$geno)))
  48. stop("Chr ", chr, " not found.")
  49. }
  50. cross <- subset(cross,chr=chr)
  51. chr.name <- names(cross$geno)[1]
  52. if(nmar(cross)[1] < 3) {
  53. warning("Less than three markers.")
  54. return(NULL)
  55. }
  56. # don't let error.prob be exactly zero (or >1)
  57. if(error.prob < 1e-50) error.prob <- 1e-50
  58. if(error.prob > 1) {
  59. error.prob <- 1-1e-50
  60. warning("error.prob shouldn't be > 1!")
  61. }
  62. # make sure window is an integer >= 2
  63. if(window < 2) {
  64. warning("The window argument must be > 1; using window=2.")
  65. window <- 2
  66. }
  67. window <- round(window)
  68. method <- match.arg(method)
  69. map.function <- match.arg(map.function)
  70. # get marker orders to test
  71. n.mar <- totmar(cross)
  72. if(n.mar <= window) # look at all possible orders
  73. orders <- ripple.perm2(n.mar)
  74. else {
  75. temp <- ripple.perm1(window)
  76. n <- nrow(temp)
  77. orders <- cbind(temp,matrix(rep((window+1):n.mar,n),
  78. byrow=TRUE,ncol=n.mar-window))
  79. for(i in 2:(n.mar-window+1)) {
  80. left <- matrix(rep(1:(i-1),n),byrow=TRUE,ncol=i-1)
  81. if(i < n.mar-window+1)
  82. right <- matrix(rep((i+window):n.mar,n),byrow=TRUE,ncol=n.mar-window-i+1)
  83. else
  84. right <- NULL
  85. orders <- rbind(orders,cbind(left,temp+i-1,right))
  86. }
  87. # keep only distinct orders
  88. orders <- as.numeric(unlist(strsplit(unique(apply(orders,1,paste,collapse=":")),":")))
  89. orders <- matrix(orders,ncol=n.mar,byrow=TRUE)
  90. }
  91. n.orders <- nrow(orders)
  92. # how often to print information about current order being considered
  93. if(n.orders > 49) print.by <- 10
  94. else if(n.orders > 14) print.by <- 5
  95. else print.by <- 2
  96. if(method=="likelihood") {
  97. # calculate log likelihoods (and est'd chr length) for each marker order
  98. loglik <- 1:n.orders
  99. chrlen <- 1:n.orders
  100. # create temporary cross
  101. m <- seq(0,by=5,length=n.mar)
  102. temcross <- cross
  103. if(is.matrix(cross$geno[[1]]$map))
  104. temcross$geno[[1]]$map <- rbind(m,m)
  105. else temcross$geno[[1]]$map <- m
  106. if(verbose) cat(" ", n.orders,"total orders\n")
  107. if(n.cluster > 1) {
  108. # parallelize
  109. if(n.orders <= n.cluster) n.cluster <- n.orders
  110. cl <- makeCluster(n.cluster)
  111. clusterStopped <- FALSE
  112. on.exit(if(!clusterStopped) stopCluster(cl))
  113. if(verbose) cat(" Running in", n.cluster, "clusters\n")
  114. clusterEvalQ(cl, library(qtl, quietly=TRUE))
  115. whclust <- sort(rep(1:n.cluster, ceiling(n.orders/n.cluster))[1:n.orders])
  116. order.split <- vector("list", n.cluster)
  117. for(i in 1:n.cluster)
  118. order.split[[i]] <- orders[whclust==i,,drop=FALSE]
  119. result <- parLapply(cl, order.split, rippleSnowLik, cross=temcross,
  120. error.prob=error.prob, map.function=map.function, maxit=maxit, tol=tol,
  121. sex.sp=sex.sp)
  122. loglik <- unlist(lapply(result, function(a) a$loglik))
  123. chrlen <- unlist(lapply(result, function(a) a$chrlen))
  124. }
  125. else {
  126. for(i in 1:n.orders) {
  127. if(verbose && (i %/% print.by)*print.by == i) cat(" --Order", i, "\n")
  128. temcross$geno[[1]]$data <- cross$geno[[1]]$data[,orders[i,]]
  129. newmap <- est.map(temcross, error.prob=error.prob, map.function=map.function,
  130. m=0, p=0, maxit=maxit, tol=tol, sex.sp=sex.sp, verbose=FALSE)
  131. loglik[i] <- attr(newmap[[1]],"loglik")
  132. chrlen[i] <- diff(range(newmap[[1]]))
  133. }
  134. }
  135. # re-scale log likelihoods and convert to lods
  136. loglik <- (loglik - loglik[1])/log(10)
  137. # sort orders by lod
  138. o <- order(loglik[-1], decreasing=TRUE)+1
  139. # create output
  140. orders <- cbind(orders,LOD=loglik,chrlen)[c(1,o),]
  141. }
  142. else { # count obligate crossovers for each order
  143. # which type of cross is this?
  144. type <- crosstype(cross)
  145. is.bcs <- type == "bcsft"
  146. if(is.bcs)
  147. is.bcs <- (attr(cross, "scheme")[2] == 0)
  148. if(type == "f2" || (type == "bcsft" && !is.bcs)) {
  149. if(chrtype(cross$geno[[1]]) == "A") # autosomal
  150. func <- "R_ripple_f2"
  151. else func <- "R_ripple_bc" # X chromsome
  152. }
  153. else if(type %in% c("bc", "riself", "risib", "dh", "haploid", "bcsft")) func <- "R_ripple_bc"
  154. else if(type == "4way") func <- "R_ripple_4way"
  155. else if(type=="ri4self" || type=="ri8self" || type=="ri4sib" || type=="ri8sib" || type=="bgmagic16")
  156. func <- "R_ripple_ril48"
  157. else
  158. stop("ripple not available for cross ", type)
  159. # data to be input
  160. genodat <- cross$geno[[1]]$data
  161. genodat[is.na(genodat)] <- 0
  162. n.ind <- nind(cross)
  163. if(verbose) cat(" ", n.orders,"total orders\n")
  164. if(n.cluster > 1) {
  165. # parallelize
  166. if(n.orders <= n.cluster) n.cluster <- n.orders
  167. cl <- makeCluster(n.cluster)
  168. clusterStopped <- FALSE
  169. on.exit(if(!clusterStopped) stopCluster(cl))
  170. if(verbose) cat(" Running in", n.cluster, "clusters\n")
  171. clusterEvalQ(cl, library(qtl, quietly=TRUE))
  172. whclust <- sort(rep(1:n.cluster, ceiling(n.orders/n.cluster))[1:n.orders])
  173. order.split <- vector("list", n.cluster)
  174. for(i in 1:n.cluster)
  175. order.split[[i]] <- orders[whclust==i,,drop=FALSE]
  176. oblxo <- unlist(parLapply(cl, order.split, rippleSnowCountxo, genodat=genodat, func=func))
  177. stopCluster(cl)
  178. clusterStopped <- TRUE
  179. }
  180. else {
  181. z <- .C(func,
  182. as.integer(n.ind),
  183. as.integer(n.mar),
  184. as.integer(genodat),
  185. as.integer(n.orders),
  186. as.integer(orders-1),
  187. oblxo=as.integer(rep(0,n.orders)),
  188. as.integer(print.by),
  189. PACKAGE="qtl")
  190. oblxo <- z$oblxo
  191. }
  192. # sort orders by lod
  193. o <- order(oblxo[-1])+1
  194. # create output
  195. orders <- cbind(orders,obligXO=oblxo)[c(1,o),]
  196. }
  197. rownames(orders) <- c("Initial", paste(1:(nrow(orders)-1)))
  198. class(orders) <- c("ripple","matrix")
  199. attr(orders,"chr") <- chr.name
  200. attr(orders,"window") <- window
  201. attr(orders,"error.prob") <- error.prob
  202. attr(orders,"method") <- method
  203. # make sure, for each order considered, that the proximal marker
  204. # (in the original order) is to the left of the distal marker
  205. # (in the original order)
  206. orders[,1:n.mar] <- t(apply(orders[,1:n.mar,drop=FALSE],1,
  207. function(a) {
  208. n <- length(a)
  209. if((1:n)[a==1] > (1:n)[a==n]) return(rev(a))
  210. else return(a) }))
  211. orders
  212. }
  213. ######################################################################
  214. # function for method="likelihood", for parallel processing (formerly with snow pkg)
  215. ######################################################################
  216. rippleSnowLik <-
  217. function(orders, cross, error.prob, map.function, maxit, tol, sex.sp)
  218. {
  219. n.orders <- nrow(orders)
  220. temcross <- cross
  221. loglik <- chrlen <- rep(NA, n.orders)
  222. for(i in 1:n.orders) {
  223. temcross$geno[[1]]$data <- cross$geno[[1]]$data[,orders[i,]]
  224. newmap <- est.map(temcross, error.prob=error.prob, map.function=map.function,
  225. m=0, p=0, maxit=maxit, tol=tol, sex.sp=sex.sp, verbose=FALSE)
  226. loglik[i] <- attr(newmap[[1]],"loglik")
  227. chrlen[i] <- diff(range(newmap[[1]]))
  228. }
  229. list(loglik=loglik, chrlen=chrlen)
  230. }
  231. ######################################################################
  232. # function for method="countxo", for parallel processing (formerly with snow pkg)
  233. ######################################################################
  234. rippleSnowCountxo <-
  235. function(orders, genodat, func)
  236. {
  237. func <- func # this avoids a Note from R CMD check
  238. .C(func,
  239. as.integer(nrow(genodat)),
  240. as.integer(ncol(genodat)),
  241. as.integer(genodat),
  242. as.integer(nrow(orders)),
  243. as.integer(orders-1),
  244. oblxo=as.integer(rep(0, nrow(orders))),
  245. as.integer(0),
  246. PACKAGE="qtl")$oblxo
  247. }
  248. ######################################################################
  249. #
  250. # summary.ripple: print top results from ripple(). We do this so
  251. # that we can return *all* results but allow easy
  252. # view of only the important ones
  253. #
  254. ######################################################################
  255. summary.ripple <-
  256. function(object, lod.cutoff = -1, ...)
  257. {
  258. if(!inherits(object, "ripple"))
  259. stop("Input should have class \"ripple\".")
  260. n <- ncol(object)
  261. if("obligXO" %in% colnames(object)) # counts of crossovers
  262. o <- (object[-1,n] <= (object[1,n] - lod.cutoff*2))
  263. else o <- (object[-1,n-1] >= lod.cutoff) # likelihood analysis
  264. if(!any(o)) object <- object[1:2,,drop=FALSE]
  265. else # make sure first row is included
  266. object <- object[c(TRUE,o),,drop=FALSE]
  267. rownames(object) <- c("Initial ", paste(1:(nrow(object)-1)))
  268. class(object) <- c("summary.ripple","matrix")
  269. object
  270. }
  271. ######################################################################
  272. #
  273. # print.summary.ripple
  274. #
  275. ######################################################################
  276. print.summary.ripple <-
  277. function(x, ...)
  278. {
  279. n <- ncol(x)
  280. x <- round(x,1)
  281. max.row <- 6
  282. if(!("obligXO" %in% colnames(x)))
  283. colnames(x)[n-1] <- " LOD"
  284. class(x) <- "matrix"
  285. if(nrow(x) > max.row) {
  286. print(x[1:max.row,])
  287. cat("... [", nrow(x)-max.row, " additional rows] ...\n")
  288. }
  289. else print(x)
  290. }
  291. ######################################################################
  292. #
  293. # ripple.perm1: Utility function for ripple(). Returns all possible
  294. # permutations of {1, 2, ..., n}
  295. #
  296. ######################################################################
  297. ripple.perm1 <-
  298. function(n)
  299. {
  300. if(n == 1) return(rbind(1))
  301. o <- rbind(c(n-1,n),c(n,n-1))
  302. if(n > 2)
  303. for(i in (n-2):1)
  304. o <- ripple.perm.sub(i,o)
  305. dimnames(o) <- NULL
  306. o
  307. }
  308. ######################################################################
  309. #
  310. # ripple.perm2: Utility function for ripple(). Returns all possible
  311. # permutations of {1, 2, ..., n}, up to orientation of
  312. # the entire group
  313. #
  314. ######################################################################
  315. ripple.perm2 <-
  316. function(n)
  317. {
  318. if(n < 3) return(rbind(1:n))
  319. o <- rbind(c(n-2,n-1,n),c(n-1,n-2,n),c(n-1,n,n-2))
  320. if(n > 3)
  321. for(i in (n-3):1)
  322. o <- ripple.perm.sub(i,o)
  323. dimnames(o) <- NULL
  324. o
  325. }
  326. ######################################################################
  327. #
  328. # ripple.perm.sub: Subroutine used for ripple(). I'm too tired to
  329. # explain.
  330. #
  331. ######################################################################
  332. ripple.perm.sub <-
  333. function(x,mat)
  334. {
  335. res <- cbind(x,mat)
  336. if(ncol(mat) > 1) {
  337. for(i in 1:ncol(mat))
  338. res <- rbind(res,cbind(mat[,1:i],x,mat[,-(1:i)]))
  339. }
  340. res
  341. }
  342. # end of ripple.R