/R/plot_signature_strand_bias.R

https://github.com/UMCUGenetics/MutationalPatterns · R · 112 lines · 57 code · 8 blank · 47 comment · 3 complexity · bb90d9248fc9ed9f7037c7f40a4823b6 MD5 · raw file

  1. #' Plot signature strand bias
  2. #'
  3. #' Plot strand bias per mutation type for each signature.
  4. #'
  5. #' @param signatures_strand_bias Signature matrix with 192 features
  6. #' @return Barplot
  7. #'
  8. #' @import ggplot2
  9. #' @importFrom magrittr %>%
  10. #'
  11. #' @examples
  12. #' ## See the 'mut_matrix()' example for how we obtained the following
  13. #' ## mutation matrix.
  14. #' mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
  15. #' package = "MutationalPatterns"
  16. #' ))
  17. #'
  18. #' ## Extracting signatures can be computationally intensive, so
  19. #' ## we use pre-computed data generated with the following command:
  20. #' # nmf_res_strand <- extract_signatures(mut_mat_s, rank = 2)
  21. #'
  22. #' nmf_res_strand <- readRDS(system.file("states/nmf_res_strand_data.rds",
  23. #' package = "MutationalPatterns"
  24. #' ))
  25. #'
  26. #' ## Provide column names for the plot.
  27. #' colnames(nmf_res_strand$signatures) <- c("Signature A", "Signature B")
  28. #'
  29. #' ## Creat figure
  30. #' plot_signature_strand_bias(nmf_res_strand$signatures)
  31. #'
  32. #' ## You can also plot the bias of samples
  33. #' plot_signature_strand_bias(mut_mat_s[, c(1, 2)])
  34. #' @seealso
  35. #' \code{link{extract_signatures}},
  36. #' \code{link{mut_matrix}}
  37. #'
  38. #' @export
  39. plot_signature_strand_bias <- function(signatures_strand_bias) {
  40. # These variables use non standard evaluation.
  41. # To avoid R CMD check complaints we initialize them to NULL.
  42. Signature <- type <- ratio <- transcribed <- untranscribed <- NULL
  43. `Group.1` <- `Group.2` <- significant <- pval <- log2_ratio <- NULL
  44. # check if there are 192 features in the signatures
  45. if (dim(signatures_strand_bias)[1] != 192) {
  46. stop(paste(
  47. "Input signature matrix does not have 192 features (96",
  48. "trinucleotide * 2 strands)."
  49. ))
  50. }
  51. # aggregate by strand and type
  52. sum_per_type <- stats::aggregate(signatures_strand_bias,
  53. by = list(STRAND, SUBSTITUTIONS_192),
  54. FUN = sum
  55. )
  56. # Make tibble longer
  57. stats_per_type <- sum_per_type %>%
  58. dplyr::rename(strand = `Group.1`, type = `Group.2`) %>%
  59. dplyr::mutate(strand = ifelse(strand == "T", "transcribed", "untranscribed")) %>%
  60. tidyr::pivot_longer(c(-strand, -type), names_to = "Signature") %>% # Combine signature columns
  61. tidyr::pivot_wider(names_from = strand) %>% # Split transcribed/untranscribed column for binom test.
  62. dplyr::mutate(
  63. observed = as.integer(transcribed),
  64. size = as.integer(transcribed + untranscribed),
  65. ratio = (transcribed / untranscribed),
  66. ratio = ifelse(is.nan(ratio), 1, ratio),
  67. log2_ratio = log2(ratio)
  68. )
  69. # Perform binomial test
  70. stats_per_type$pval <- purrr::map_dbl(seq_len(nrow(stats_per_type)), function(i) {
  71. row <- stats_per_type[i, ]
  72. binomial_test(0.5, row$size, row$observed)$pval
  73. })
  74. # Add stars when significant.
  75. stats_per_type <- dplyr::mutate(stats_per_type, significant = ifelse(pval < 0.05, "*", " "))
  76. # Find maximum y value for plotting
  77. ratios <- stats_per_type %>%
  78. dplyr::filter(is.finite(log2_ratio)) %>%
  79. dplyr::pull(log2_ratio)
  80. max <- round(max(abs(ratios)))
  81. # Plot
  82. plot <- ggplot(
  83. stats_per_type,
  84. aes(x = type, y = log2_ratio, fill = type)
  85. ) +
  86. geom_bar(stat = "identity", position = "dodge", color = "black") +
  87. scale_y_continuous(limits = c(-max, max)) +
  88. scale_fill_manual(values = COLORS6) +
  89. facet_grid(Signature ~ .) +
  90. ylab("log2(transcribed/untranscribed)") +
  91. theme_bw() +
  92. scale_x_discrete(breaks = NULL) +
  93. xlab("") +
  94. geom_text(
  95. aes(
  96. x = type,
  97. y = log2(ratio),
  98. label = significant,
  99. vjust = ifelse(sign(log2_ratio) > 0, 0.5, 1)
  100. ),
  101. size = 8, position = ggplot2::position_dodge(width = 1)
  102. )
  103. return(plot)
  104. }