/R/transform_seg_table.R

https://github.com/ShixiangWang/sigminer · R · 116 lines · 85 code · 7 blank · 24 comment · 11 complexity · a5a685141a679efb9351b35775abe378 MD5 · raw file

  1. #' Transform Copy Number Table
  2. #'
  3. #' @inheritParams get_cn_freq_table
  4. #' @inheritParams tidyr::pivot_wider
  5. #' @param ref_type annotation data type used for constructing matrix.
  6. #'
  7. #' @return a `data.table`.
  8. #' @export
  9. #'
  10. #' @examples
  11. #' load(system.file("extdata", "toy_copynumber.RData",
  12. #' package = "sigminer", mustWork = TRUE
  13. #' ))
  14. #' # Compute the mean segVal in each cytoband
  15. #' x <- transform_seg_table(cn, resolution_factor = 1)
  16. #' x
  17. #' # Compute the mean segVal in each half-cytoband
  18. #' x2 <- transform_seg_table(cn, resolution_factor = 2)
  19. #' x2
  20. #' @testexamples
  21. #' expect_is(x, "data.table")
  22. #' expect_is(x2, "data.table")
  23. transform_seg_table <- function(data,
  24. genome_build = c("hg19", "hg38", "mm10", "mm9"),
  25. ref_type = c("cytoband", "gene"),
  26. values_fill = NA,
  27. values_fn = function(x, ...) {
  28. round(mean(x, ...))
  29. },
  30. resolution_factor = 1L) {
  31. stopifnot(is.data.frame(data) | inherits(data, "CopyNumber"))
  32. if (is.data.frame(data)) {
  33. nc_cols <- c("chromosome", "start", "end", "segVal", "sample")
  34. if (!all(nc_cols %in% colnames(data))) {
  35. stop("Invalid input, it must contain columns: ", paste(nc_cols, collapse = " "))
  36. }
  37. }
  38. genome_build <- match.arg(genome_build)
  39. if (inherits(data, "CopyNumber")) {
  40. genome_build <- data@genome_build
  41. data <- data@data
  42. } else {
  43. data <- data.table::as.data.table(data)
  44. }
  45. ref_type <- match.arg(ref_type)
  46. # data$sample <- factor(data$sample, levels = unique(data$sample))
  47. data$chromosome <- ifelse(startsWith(data$chromosome, prefix = "chr"),
  48. data$chromosome,
  49. paste0("chr", data$chromosome)
  50. )
  51. if (ref_type == "cytoband") {
  52. annot <- get_genome_annotation(
  53. data_type = "cytobands",
  54. genome_build = genome_build
  55. )
  56. annot$start <- annot$start + 1L
  57. } else {
  58. annot_file <- system.file(
  59. "extdata",
  60. paste0(if (startsWith(genome_build, "mm")) "mouse_"
  61. else "human_", genome_build, "_gene_info.rds"),
  62. package = "sigminer", mustWork = TRUE)
  63. ok <- TRUE
  64. if (!file.exists(annot_file)) ok <- query_remote_data(basename(annot_file))
  65. if (!ok) {
  66. return(invisible(NULL))
  67. }
  68. annot <- readRDS(annot_file)
  69. annot <- annot[, c("chrom", "start", "end", "gene_name", "gene_type")]
  70. colnames(annot)[4] <- "band"
  71. }
  72. data.table::setDT(annot)
  73. ## Control the resolution
  74. if (resolution_factor > 1) {
  75. f <- function(x, y, n, chrom, band) {
  76. helper_create_chunks(x, y,
  77. n = n,
  78. chrom = chrom,
  79. band = paste(band, seq_len(n), sep = "-chunk-")
  80. )
  81. }
  82. annot <- purrr::pmap_df(
  83. data.frame(
  84. x = annot$start,
  85. y = annot$end,
  86. n = resolution_factor,
  87. chrom = annot$chrom,
  88. band = annot$band
  89. ),
  90. .f = f
  91. ) %>%
  92. data.table::as.data.table() %>%
  93. data.table::setcolorder(c("chrom", "start", "end", "band"))
  94. }
  95. data.table::setkey(annot, chrom, start, end)
  96. merge_dt <- data.table::foverlaps(data, annot,
  97. by.x = c("chromosome", "start", "end")
  98. )
  99. merge_dt <- merge_dt %>%
  100. dplyr::as_tibble() %>%
  101. dplyr::select(-c("i.start", "i.end")) %>%
  102. na.omit() %>%
  103. tidyr::pivot_wider(
  104. names_from = "sample", values_from = "segVal",
  105. values_fill = values_fill, values_fn = values_fn
  106. )
  107. colnames(merge_dt)[4] <- "label"
  108. merge_dt %>% data.table::as.data.table()
  109. }