/R/enrich_component_strand_bias.R

https://github.com/ShixiangWang/sigminer · R · 101 lines · 66 code · 11 blank · 24 comment · 2 complexity · d6d7fbfe9f2e87ffd24b381174d30370 MD5 · raw file

  1. #' Performs Strand Bias Enrichment Analysis for a Given Sample-by-Component Matrix
  2. #'
  3. #' See [sig_tally] for examples.
  4. #'
  5. #' @param mat a sample-by-component matrix from [sig_tally] with strand bias labels "T:" and "B:".
  6. #'
  7. #' @return a `data.table` sorted by `p_value`.
  8. #' @export
  9. enrich_component_strand_bias <- function(mat) {
  10. stopifnot(is.matrix(mat))
  11. ## mat: sample-by-component matrix with T: and U: labels
  12. T_cols <- startsWith(colnames(mat), "T:")
  13. U_cols <- startsWith(colnames(mat), "U:")
  14. if (!any(T_cols) & !any(U_cols)) {
  15. stop("No transcription labels found!")
  16. }
  17. types <- colnames(mat)[T_cols]
  18. types <- sub("T:", "", types)
  19. mat <- mat[, T_cols | U_cols]
  20. dt <- dplyr::bind_cols(
  21. dplyr::tibble(sample = rownames(mat)),
  22. dplyr::as_tibble(mat)
  23. )
  24. f4row <- function(df) {
  25. df <- df %>%
  26. tidyr::pivot_longer(dplyr::contains(":"),
  27. names_sep = ":",
  28. names_to = c("strand", "component"),
  29. values_to = "count"
  30. ) %>%
  31. dplyr::mutate(
  32. strand = ifelse(.data$strand == "T", "Trans", "UnTrans")
  33. ) %>%
  34. tidyr::pivot_wider(
  35. id_cols = "component",
  36. names_from = "strand",
  37. values_from = "count"
  38. ) %>%
  39. dplyr::mutate(
  40. Trans_Total = sum(.data$Trans),
  41. UnTrans_Total = sum(.data$UnTrans),
  42. Enrichment = .data$Trans / .data$UnTrans
  43. )
  44. df$p_value <- apply(df, 1, function(x) {
  45. x <- as.numeric(x[2:5])
  46. # The order is
  47. # U_Total-U, T_Total-T, U, T
  48. fisher.test(
  49. matrix(
  50. c(
  51. x[4] - x[2], x[3] - x[1],
  52. x[2], x[1]
  53. ),
  54. nrow = 2
  55. )
  56. )$p.value
  57. })
  58. df$fdr <- p.adjust(df$p_value, method = "fdr")
  59. return(df)
  60. }
  61. res <- dt %>%
  62. dplyr::group_by(.data$sample) %>%
  63. tidyr::nest() %>%
  64. dplyr::mutate(
  65. data = list(
  66. purrr::map_df(
  67. .data$data,
  68. f4row
  69. )
  70. )
  71. ) %>%
  72. tidyr::unnest("data") %>%
  73. ## Should adjust all samples for a type instead
  74. ## of adjust all types for a sample??
  75. # dplyr::group_by(.data$component) %>%
  76. # tidyr::nest() %>%
  77. # dplyr::mutate(
  78. # data = list(
  79. # purrr::map_df(
  80. # .data$data,
  81. # ~dplyr::mutate(.x, fdr = p.adjust(.data$p_value, method = "fdr"))
  82. # )
  83. # )
  84. # ) %>%
  85. # tidyr::unnest("data") %>%
  86. dplyr::arrange(.data$p_value, .data$fdr) %>%
  87. data.table::as.data.table()
  88. colnames(res)[3:6] <- c("N_T", "N_U", "Total_T", "Total_U")
  89. return(res)
  90. }