strand_bias_test.R - These variables use non standard evalu…

/R/strand_bias_test.R

https://github.com/UMCUGenetics/MutationalPatterns · R · 82 lines · 21 code · 6 blank · 55 comment · 0 complexity · 5567e03a8413f548c8c42be5e52a6820 MD5 · raw file

#' Significance test for strand asymmetry
#'
#' This function performs a two sided Poisson test for the ratio between mutations on
#' each strand. Multiple testing correction is also performed.
#'
#' @param strand_occurrences Dataframe with mutation count per strand, result
#' from 'strand_occurrences()'
#' @param p_cutoffs Significance cutoff for the p value. Default: 0.05
#' @param fdr_cutoffs Significance cutoff for the fdr. Default: 0.1
#' @return Dataframe with poisson test P value for the ratio between the
#' two strands per group per base substitution type.
#' @importFrom magrittr  %>%
#'
#' @examples
#' ## See the 'mut_matrix_stranded()' example for how we obtained the
#' ## following mutation matrix.
#' mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' tissue <- c(
#'   "colon", "colon", "colon",
#'   "intestine", "intestine", "intestine",
#'   "liver", "liver", "liver"
#' )
#'
#' ## Perform the strand bias test.
#' strand_counts <- strand_occurrences(mut_mat_s, by = tissue)
#' strand_bias <- strand_bias_test(strand_counts)
#'
#' ## Use different significance cutoffs for the pvalue and fdr
#' strand_bias_strict <- strand_bias_test(strand_counts,
#'   p_cutoffs = 0.01, fdr_cutoffs = 0.05
#' )
#'
#' ## Use multiple (max 3) significance cutoffs.
#' ## This will vary the number of significance stars.
#' strand_bias_multistars <- strand_bias_test(strand_counts,
#'   p_cutoffs = c(0.05, 0.01, 0.005),
#'   fdr_cutoffs = c(0.1, 0.05, 0.01)
#' )
#' @seealso
#' \code{\link{mut_matrix_stranded}},
#' \code{\link{strand_occurrences}},
#' \code{\link{plot_strand_bias}}
#'
#' @export

strand_bias_test <- function(strand_occurrences, p_cutoffs = 0.05, fdr_cutoffs = 0.1) {
  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  group <- type <- strand <- variable <- relative_contribution <- NULL
  fdr <- no_mutations <- p_poisson <- NULL

  # Make data long
  df_strand <- strand_occurrences %>%
    dplyr::select(-relative_contribution) %>%
    tidyr::pivot_wider(names_from = strand, values_from = no_mutations)

  # Calculate total and ratio
  df_strand[, "total"] <- df_strand[, 3] + df_strand[, 4]
  df_strand[, "ratio"] <- df_strand[, 3] / df_strand[, 4]

  # poisson test for strand ratio
  # Is the same as binom in this scenario.
  # Since the output uses poisson in the name, we will keep using this.
  df_strand$p_poisson <- apply(df_strand, 1, function(x) {
    stats::poisson.test(c(as.numeric(x[3]), as.numeric(x[4])),
      r = 1, alternative = "two.sided"
    )$p.value
  })

  # Add significance stars and do multiple testing correction.
  df_strand <- df_strand %>%
    dplyr::mutate(
      significant = .get_sig_star(p_poisson, p_cutoffs),
      fdr = stats::p.adjust(p_poisson, method = "fdr"),
      significant_fdr = .get_sig_star(fdr, fdr_cutoffs)
    )

  return(df_strand)
}