enrich_component_strand_bias.R

/R/enrich_component_strand_bias.R

https://github.com/ShixiangWang/sigminer · R · 101 lines · 66 code · 11 blank · 24 comment · 2 complexity · d6d7fbfe9f2e87ffd24b381174d30370 MD5 · raw file

#' Performs Strand Bias Enrichment Analysis for a Given Sample-by-Component Matrix
#'
#' See [sig_tally] for examples.
#'
#' @param mat a sample-by-component matrix from [sig_tally] with strand bias labels "T:" and "B:".
#'
#' @return a `data.table` sorted by `p_value`.
#' @export
enrich_component_strand_bias <- function(mat) {
  stopifnot(is.matrix(mat))

  ## mat: sample-by-component matrix with T: and U: labels
  T_cols <- startsWith(colnames(mat), "T:")
  U_cols <- startsWith(colnames(mat), "U:")

  if (!any(T_cols) & !any(U_cols)) {
    stop("No transcription labels found!")
  }

  types <- colnames(mat)[T_cols]
  types <- sub("T:", "", types)
  mat <- mat[, T_cols | U_cols]

  dt <- dplyr::bind_cols(
    dplyr::tibble(sample = rownames(mat)),
    dplyr::as_tibble(mat)
  )

  f4row <- function(df) {
    df <- df %>%
      tidyr::pivot_longer(dplyr::contains(":"),
        names_sep = ":",
        names_to = c("strand", "component"),
        values_to = "count"
      ) %>%
      dplyr::mutate(
        strand = ifelse(.data$strand == "T", "Trans", "UnTrans")
      ) %>%
      tidyr::pivot_wider(
        id_cols = "component",
        names_from = "strand",
        values_from = "count"
      ) %>%
      dplyr::mutate(
        Trans_Total = sum(.data$Trans),
        UnTrans_Total = sum(.data$UnTrans),
        Enrichment = .data$Trans / .data$UnTrans
      )


    df$p_value <- apply(df, 1, function(x) {
      x <- as.numeric(x[2:5])
      # The order is
      # U_Total-U, T_Total-T, U, T
      fisher.test(
        matrix(
          c(
            x[4] - x[2], x[3] - x[1],
            x[2], x[1]
          ),
          nrow = 2
        )
      )$p.value
    })
    df$fdr <- p.adjust(df$p_value, method = "fdr")

    return(df)
  }

  res <- dt %>%
    dplyr::group_by(.data$sample) %>%
    tidyr::nest() %>%
    dplyr::mutate(
      data = list(
        purrr::map_df(
          .data$data,
          f4row
        )
      )
    ) %>%
    tidyr::unnest("data") %>%
    ## Should adjust all samples for a type instead
    ## of adjust all types for a sample??
    # dplyr::group_by(.data$component) %>%
    # tidyr::nest() %>%
    # dplyr::mutate(
    #   data = list(
    #     purrr::map_df(
    #       .data$data,
    #       ~dplyr::mutate(.x, fdr = p.adjust(.data$p_value, method = "fdr"))
    #     )
    #   )
    # ) %>%
    # tidyr::unnest("data") %>%
    dplyr::arrange(.data$p_value, .data$fdr) %>%
    data.table::as.data.table()

  colnames(res)[3:6] <- c("N_T", "N_U", "Total_T", "Total_U")

  return(res)
}