/R/strand_bias_test.R

https://github.com/UMCUGenetics/MutationalPatterns · R · 82 lines · 21 code · 6 blank · 55 comment · 0 complexity · 5567e03a8413f548c8c42be5e52a6820 MD5 · raw file

  1. #' Significance test for strand asymmetry
  2. #'
  3. #' This function performs a two sided Poisson test for the ratio between mutations on
  4. #' each strand. Multiple testing correction is also performed.
  5. #'
  6. #' @param strand_occurrences Dataframe with mutation count per strand, result
  7. #' from 'strand_occurrences()'
  8. #' @param p_cutoffs Significance cutoff for the p value. Default: 0.05
  9. #' @param fdr_cutoffs Significance cutoff for the fdr. Default: 0.1
  10. #' @return Dataframe with poisson test P value for the ratio between the
  11. #' two strands per group per base substitution type.
  12. #' @importFrom magrittr %>%
  13. #'
  14. #' @examples
  15. #' ## See the 'mut_matrix_stranded()' example for how we obtained the
  16. #' ## following mutation matrix.
  17. #' mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
  18. #' package = "MutationalPatterns"
  19. #' ))
  20. #'
  21. #' tissue <- c(
  22. #' "colon", "colon", "colon",
  23. #' "intestine", "intestine", "intestine",
  24. #' "liver", "liver", "liver"
  25. #' )
  26. #'
  27. #' ## Perform the strand bias test.
  28. #' strand_counts <- strand_occurrences(mut_mat_s, by = tissue)
  29. #' strand_bias <- strand_bias_test(strand_counts)
  30. #'
  31. #' ## Use different significance cutoffs for the pvalue and fdr
  32. #' strand_bias_strict <- strand_bias_test(strand_counts,
  33. #' p_cutoffs = 0.01, fdr_cutoffs = 0.05
  34. #' )
  35. #'
  36. #' ## Use multiple (max 3) significance cutoffs.
  37. #' ## This will vary the number of significance stars.
  38. #' strand_bias_multistars <- strand_bias_test(strand_counts,
  39. #' p_cutoffs = c(0.05, 0.01, 0.005),
  40. #' fdr_cutoffs = c(0.1, 0.05, 0.01)
  41. #' )
  42. #' @seealso
  43. #' \code{\link{mut_matrix_stranded}},
  44. #' \code{\link{strand_occurrences}},
  45. #' \code{\link{plot_strand_bias}}
  46. #'
  47. #' @export
  48. strand_bias_test <- function(strand_occurrences, p_cutoffs = 0.05, fdr_cutoffs = 0.1) {
  49. # These variables use non standard evaluation.
  50. # To avoid R CMD check complaints we initialize them to NULL.
  51. group <- type <- strand <- variable <- relative_contribution <- NULL
  52. fdr <- no_mutations <- p_poisson <- NULL
  53. # Make data long
  54. df_strand <- strand_occurrences %>%
  55. dplyr::select(-relative_contribution) %>%
  56. tidyr::pivot_wider(names_from = strand, values_from = no_mutations)
  57. # Calculate total and ratio
  58. df_strand[, "total"] <- df_strand[, 3] + df_strand[, 4]
  59. df_strand[, "ratio"] <- df_strand[, 3] / df_strand[, 4]
  60. # poisson test for strand ratio
  61. # Is the same as binom in this scenario.
  62. # Since the output uses poisson in the name, we will keep using this.
  63. df_strand$p_poisson <- apply(df_strand, 1, function(x) {
  64. stats::poisson.test(c(as.numeric(x[3]), as.numeric(x[4])),
  65. r = 1, alternative = "two.sided"
  66. )$p.value
  67. })
  68. # Add significance stars and do multiple testing correction.
  69. df_strand <- df_strand %>%
  70. dplyr::mutate(
  71. significant = .get_sig_star(p_poisson, p_cutoffs),
  72. fdr = stats::p.adjust(p_poisson, method = "fdr"),
  73. significant_fdr = .get_sig_star(fdr, fdr_cutoffs)
  74. )
  75. return(df_strand)
  76. }