yardstick-metric-sets.R - DEFAULT FORECAST ACCURACY METRIC …

/R/yardstick-metric-sets.R

https://github.com/business-science/modeltime · R · 287 lines · 93 code · 45 blank · 149 comment · 5 complexity · c294776f4d2c8f1dd602a417f35d9bd7 MD5 · raw file

# DEFAULT FORECAST ACCURACY METRIC SET ----

#' Forecast Accuracy Metrics Sets
#'
#'
#' This is a wrapper for [metric_set()] with several common forecast / regression
#' accuracy metrics included. These are the default time series accuracy
#' metrics used with [modeltime_accuracy()].
#'
#' @param ... Add additional `yardstick` metrics
#'
#' @details
#'
#' # Default Forecast Accuracy Metric Set
#'
#' The primary purpose is to use the default accuracy metrics to calculate the following
#' forecast accuracy metrics using [modeltime_accuracy()]:
#'
#' - MAE   - Mean absolute error, [mae()]
#' - MAPE  - Mean absolute percentage error, [mape()]
#' - MASE  - Mean absolute scaled error, [mase()]
#' - SMAPE - Symmetric mean absolute percentage error, [smape()]
#' - RMSE  - Root mean squared error, [rmse()]
#' - RSQ   - R-squared, [rsq()]
#'
#' Adding additional metrics is possible via `...`.
#'
#' # Extended Forecast Accuracy Metric Set
#'
#' Extends the default metric set by adding:
#'
#' - MAAPE - Mean Arctangent Absolute Percentage Error, [maape()].
#'   MAAPE is designed for intermittent data where MAPE returns `Inf`.
#'
#'
#'
#' @seealso
#' - [yardstick::metric_tweak()] - For modifying `yardstick` metrics
#'
#' @examples
#' library(tibble)
#' library(dplyr)
#' library(timetk)
#' library(yardstick)
#'
#' fake_data <- tibble(
#'     y    = c(1:12, 2*1:12),
#'     yhat = c(1 + 1:12, 2*1:12 - 1)
#' )
#'
#' # ---- HOW IT WORKS ----
#'
#' # Default Forecast Accuracy Metric Specification
#' default_forecast_accuracy_metric_set()
#'
#' # Create a metric summarizer function from the metric set
#' calc_default_metrics <- default_forecast_accuracy_metric_set()
#'
#' # Apply the metric summarizer to new data
#' calc_default_metrics(fake_data, y, yhat)
#'
#' # ---- ADD MORE PARAMETERS ----
#'
#' # Can create a version of mase() with seasonality = 12 (monthly)
#' mase12 <- metric_tweak(.name = "mase12", .fn = mase, m = 12)
#'
#' # Add it to the default metric set
#' my_metric_set <- default_forecast_accuracy_metric_set(mase12)
#' my_metric_set
#'
#' # Apply the newly created metric set
#' my_metric_set(fake_data, y, yhat)
#'
#'
#' @name metric_sets

#' @importFrom yardstick mae mape mase smape rmse rsq metric_tweak
#' @export
#' @rdname metric_sets
default_forecast_accuracy_metric_set <- function(...) {
    yardstick::metric_set(
        yardstick::mae,
        yardstick::mape,
        yardstick::mase,
        yardstick::smape,
        yardstick::rmse,
        yardstick::rsq,
        ...
    )
}


# EXTENDED FORECAST ACCURACY METRIC SET ----


#' @importFrom yardstick mae mape mase smape rmse rsq metric_tweak
#' @export
#' @rdname metric_sets
extended_forecast_accuracy_metric_set <- function(...) {
    yardstick::metric_set(
        yardstick::mae,
        yardstick::mape,
        maape,
        yardstick::mase,
        yardstick::smape,
        yardstick::rmse,
        yardstick::rsq,
        ...
    )
}


# SUMMARIZE ACCURACY ----

#' Summarize Accuracy Metrics
#'
#' This is an internal function used by `modeltime_accuracy()`.
#'
#' @inheritParams modeltime_accuracy
#' @param data  A `data.frame` containing the truth and estimate columns.
#' @param truth The column identifier for the true results (that is numeric).
#' @param estimate The column identifier for the predicted results (that is also numeric).
#'
#' @examples
#' library(tibble)
#' library(dplyr)
#'
#' predictions_tbl <- tibble(
#'     group = c("model 1", "model 1", "model 1",
#'               "model 2", "model 2", "model 2"),
#'     truth = c(1, 2, 3,
#'               1, 2, 3),
#'     estimate = c(1.2, 2.0, 2.5,
#'                  0.9, 1.9, 3.3)
#' )
#'
#' predictions_tbl %>%
#'     group_by(group) %>%
#'     summarize_accuracy_metrics(
#'         truth, estimate,
#'         metric_set = default_forecast_accuracy_metric_set()
#'     )
#'
#' @export
summarize_accuracy_metrics <- function(data, truth, estimate, metric_set) {

    data_tbl <- data

    truth_expr    <- rlang::enquo(truth)
    estimate_expr <- rlang::enquo(estimate)

    metric_summarizer_fun <- metric_set

    group_nms <- dplyr::group_vars(data_tbl)

    data_tbl %>%
        metric_summarizer_fun(!! truth_expr, !! estimate_expr) %>%
        dplyr::select(-.estimator) %>%

        dplyr::group_by(!!! rlang::syms(group_nms)) %>%
        dplyr::mutate(.metric = make.unique(.metric, sep = "_")) %>%
        dplyr::ungroup() %>%

        tidyr::pivot_wider(
            names_from  = .metric,
            values_from = .estimate
        )

}


# UTILITIES ----

calc_accuracy_2 <- function(train_data = NULL, test_data = NULL, metric_set, by_id = FALSE, ...) {

    metrics <- metric_set

    # Training Metrics
    train_metrics_tbl <- tibble::tibble()

    # Testing Metrics
    test_metrics_tbl <- tibble::tibble()

    # Check by_id
    if (by_id) {
        if (length(names(test_data)) == 5) {
            id_col_text <- names(test_data)[5]
            test_data <- test_data %>%
                dplyr::group_by(!! rlang::ensym(id_col_text))
        } else {
            rlang::warn("The 'id' column in calibration data was not detected. Global accuracy is being returned.")
        }

    }

    if (!is.null(test_data)) {

        test_metrics_tbl <- test_data %>%
            summarize_accuracy_metrics(
                truth      = .actual,
                estimate   = .prediction,
                metric_set = metrics
            ) %>%
            dplyr::ungroup()

    }

    metrics_tbl <- dplyr::bind_rows(train_metrics_tbl, test_metrics_tbl)

    return(metrics_tbl)
}


# MAAPE ----

#' Mean Arctangent Absolute Percentage Error
#'
#' This is basically a wrapper to the function of `TSrepr::maape()`.
#'
#' @param truth The column identifier for the true results (that is numeric).
#' @param estimate The column identifier for the predicted results (that is also numeric).
#' @param na_rm Not in use...NA values managed by TSrepr::maape
#' @param ... Not currently in use
#'
#' @export
maape_vec <- function(truth, estimate, na_rm = TRUE, ...) {

    maape_impl <- function(truth, estimate) {
        TSrepr::maape(truth, estimate)
    }

    yardstick::metric_vec_template(
        metric_impl = maape_impl,
        truth = truth,
        estimate = estimate,
        na_rm = na_rm,
        cls = "numeric",
        ...
    )

}


# MAAPE ----

#' Mean Arctangent Absolute Percentage Error
#'
#' Useful when MAPE returns Inf typically due to intermittent data containing zeros.
#' This is a wrapper to the function of `TSrepr::maape()`.
#'
#' @param data  A `data.frame` containing the truth and estimate columns.
#' @param ... Not currently in use.
#'
#' @export

maape <- function(data, ...) {
    UseMethod("maape")
}

maape <- yardstick::new_numeric_metric(maape, direction = "minimize")

# MAAPE ----

#' Mean Arctangent Absolute Percentage Error
#'
#' This is basically a wrapper to the function of `TSrepr::maape()`.
#'
#' @param data  A `data.frame` containing the truth and estimate columns.
#' @param truth The column identifier for the true results (that is numeric).
#' @param estimate The column identifier for the predicted results (that is also numeric).
#' @param na_rm Not in use...NA values managed by TSrepr::maape
#' @param ... Not currently in use
#'
#' @export
maape.data.frame <- function(data, truth, estimate, na_rm = TRUE, ...) {

    yardstick::metric_summarizer(
        metric_nm = "maape",
        metric_fn = maape_vec,
        data = data,
        truth = !! enquo(truth),
        estimate = !! enquo(estimate),
        na_rm = na_rm,
        ...
    )

}