/R/plot_signature_strand_bias.R
https://github.com/UMCUGenetics/MutationalPatterns · R · 112 lines · 57 code · 8 blank · 47 comment · 3 complexity · bb90d9248fc9ed9f7037c7f40a4823b6 MD5 · raw file
- #' Plot signature strand bias
- #'
- #' Plot strand bias per mutation type for each signature.
- #'
- #' @param signatures_strand_bias Signature matrix with 192 features
- #' @return Barplot
- #'
- #' @import ggplot2
- #' @importFrom magrittr %>%
- #'
- #' @examples
- #' ## See the 'mut_matrix()' example for how we obtained the following
- #' ## mutation matrix.
- #' mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
- #' package = "MutationalPatterns"
- #' ))
- #'
- #' ## Extracting signatures can be computationally intensive, so
- #' ## we use pre-computed data generated with the following command:
- #' # nmf_res_strand <- extract_signatures(mut_mat_s, rank = 2)
- #'
- #' nmf_res_strand <- readRDS(system.file("states/nmf_res_strand_data.rds",
- #' package = "MutationalPatterns"
- #' ))
- #'
- #' ## Provide column names for the plot.
- #' colnames(nmf_res_strand$signatures) <- c("Signature A", "Signature B")
- #'
- #' ## Creat figure
- #' plot_signature_strand_bias(nmf_res_strand$signatures)
- #'
- #' ## You can also plot the bias of samples
- #' plot_signature_strand_bias(mut_mat_s[, c(1, 2)])
- #' @seealso
- #' \code{link{extract_signatures}},
- #' \code{link{mut_matrix}}
- #'
- #' @export
- plot_signature_strand_bias <- function(signatures_strand_bias) {
- # These variables use non standard evaluation.
- # To avoid R CMD check complaints we initialize them to NULL.
- Signature <- type <- ratio <- transcribed <- untranscribed <- NULL
- `Group.1` <- `Group.2` <- significant <- pval <- log2_ratio <- NULL
- # check if there are 192 features in the signatures
- if (dim(signatures_strand_bias)[1] != 192) {
- stop(paste(
- "Input signature matrix does not have 192 features (96",
- "trinucleotide * 2 strands)."
- ))
- }
- # aggregate by strand and type
- sum_per_type <- stats::aggregate(signatures_strand_bias,
- by = list(STRAND, SUBSTITUTIONS_192),
- FUN = sum
- )
- # Make tibble longer
- stats_per_type <- sum_per_type %>%
- dplyr::rename(strand = `Group.1`, type = `Group.2`) %>%
- dplyr::mutate(strand = ifelse(strand == "T", "transcribed", "untranscribed")) %>%
- tidyr::pivot_longer(c(-strand, -type), names_to = "Signature") %>% # Combine signature columns
- tidyr::pivot_wider(names_from = strand) %>% # Split transcribed/untranscribed column for binom test.
- dplyr::mutate(
- observed = as.integer(transcribed),
- size = as.integer(transcribed + untranscribed),
- ratio = (transcribed / untranscribed),
- ratio = ifelse(is.nan(ratio), 1, ratio),
- log2_ratio = log2(ratio)
- )
- # Perform binomial test
- stats_per_type$pval <- purrr::map_dbl(seq_len(nrow(stats_per_type)), function(i) {
- row <- stats_per_type[i, ]
- binomial_test(0.5, row$size, row$observed)$pval
- })
- # Add stars when significant.
- stats_per_type <- dplyr::mutate(stats_per_type, significant = ifelse(pval < 0.05, "*", " "))
- # Find maximum y value for plotting
- ratios <- stats_per_type %>%
- dplyr::filter(is.finite(log2_ratio)) %>%
- dplyr::pull(log2_ratio)
- max <- round(max(abs(ratios)))
- # Plot
- plot <- ggplot(
- stats_per_type,
- aes(x = type, y = log2_ratio, fill = type)
- ) +
- geom_bar(stat = "identity", position = "dodge", color = "black") +
- scale_y_continuous(limits = c(-max, max)) +
- scale_fill_manual(values = COLORS6) +
- facet_grid(Signature ~ .) +
- ylab("log2(transcribed/untranscribed)") +
- theme_bw() +
- scale_x_discrete(breaks = NULL) +
- xlab("") +
- geom_text(
- aes(
- x = type,
- y = log2(ratio),
- label = significant,
- vjust = ifelse(sign(log2_ratio) > 0, 0.5, 1)
- ),
- size = 8, position = ggplot2::position_dodge(width = 1)
- )
- return(plot)
- }