Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensembles for Nested Forecasting and at local IDs #13

Open
mdancho84 opened this issue Sep 12, 2021 · 1 comment
Open

Ensembles for Nested Forecasting and at local IDs #13

mdancho84 opened this issue Sep 12, 2021 · 1 comment

Comments

@mdancho84
Copy link
Contributor

Add method for working with Local ID and Nested Forecast Ensembles.

@mdancho84
Copy link
Contributor Author

mdancho84 commented Oct 12, 2021

I've added support for nested forecasting with ensembles. There are 2 new algorithms:

  1. Ensemble Nested Average
  2. Ensemble Nested Weighted

Example

# Setup

library(tidymodels)
library(modeltime)
library(modeltime.ensemble)
library(tidyverse)
library(timetk)

data_tbl <- walmart_sales_weekly %>%
    select(id, Date, Weekly_Sales) %>%
    set_names(c("id", "date", "value"))

nested_data_tbl <- data_tbl %>%
    extend_timeseries(
        .id_var        = id,
        .date_var      = date,
        .length_future = 52
    ) %>%
    nest_timeseries(
        .id_var        = id,
        .length_future = 52,
        .length_actual = 52*2
    ) %>%
    split_nested_timeseries(
        .length_test = 52
    )

nested_data_tbl <- nested_data_tbl %>% slice(1:2)


rec_prophet <- recipe(value ~ date, extract_nested_train_split(nested_data_tbl)) 

wflw_prophet <- workflow() %>%
    add_model(
        prophet_reg("regression", seasonality_yearly = TRUE) %>% 
            set_engine("prophet")
    ) %>%
    add_recipe(rec_prophet)

rec_xgb <- recipe(value ~ ., extract_nested_train_split(nested_data_tbl)) %>%
    step_timeseries_signature(date) %>%
    step_rm(date) %>%
    step_zv(all_predictors()) %>%
    step_dummy(all_nominal_predictors(), one_hot = TRUE)

wflw_xgb <- workflow() %>%
    add_model(boost_tree("regression") %>% set_engine("xgboost")) %>%
    add_recipe(rec_xgb)

nested_modeltime_tbl <- modeltime_nested_fit(
    # Nested data 
    nested_data = nested_data_tbl,
    
    # Add workflows
    wflw_prophet,
    wflw_xgb
)
#> Fitting models on training data... ===============>--------------- 50% | ET...
#> ```



# ENSEMBLE AVERAGE TESTING ----

parallel_start(6)

ensem <- nested_modeltime_tbl %>%
    ensemble_nested_average(
        type           = "mean", 
        keep_submodels = TRUE, 
        control        = control_nested_fit(allow_par = FALSE, verbose = TRUE)
    ) %>%
    ensemble_nested_average(
        type           = "median", 
        keep_submodels = TRUE, 
        model_ids      = c(1,2), 
        control        = control_nested_fit(allow_par = FALSE, verbose = TRUE)
    )
#> i [1/2] Starting Modeltime Table: ID 1_1...
#> v [1/2] Finished Modeltime Table: ID 1_1
#> i [2/2] Starting Modeltime Table: ID 1_3...
#> v [2/2] Finished Modeltime Table: ID 1_3
#> Finished in: 1.125003 secs.
#> i [1/2] Starting Modeltime Table: ID 1_1...
#> v [1/2] Finished Modeltime Table: ID 1_1
#> i [2/2] Starting Modeltime Table: ID 1_3...
#> v [2/2] Finished Modeltime Table: ID 1_3
#> Finished in: 1.133003 secs.
#> 

ensem
#> # Nested Modeltime Table
#> 
#> Trained on: .splits | Model Errors: [0]
#> # A tibble: 2 x 5
#>   id    .actual_data       .future_data      .splits         .modeltime_tables  
#>   <fct> <list>             <list>            <list>          <list>             
#> 1 1_1   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_time_tbl [4 x~
#> 2 1_3   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_time_tbl [4 x~

ensem %>% extract_nested_modeltime_table()
#> # A tibble: 4 x 6
#>   id    .model_id .model         .model_desc                 .type .calibration_da~
#>   <fct>     <dbl> <list>         <chr>                       <chr> <list>          
#> 1 1_1           1 <workflow>     PROPHET                     Test  <tibble [52 x 4~
#> 2 1_1           2 <workflow>     XGBOOST                     Test  <tibble [52 x 4~
#> 3 1_1           3 <ensemble [2]> ENSEMBLE (MEAN): 2 MODELS   Test  <tibble [52 x 4~
#> 4 1_1           4 <ensemble [2]> ENSEMBLE (MEDIAN): 2 MODELS Test  <tibble [52 x 4~

ensem %>% extract_nested_test_accuracy()
#> # A tibble: 8 x 10
#>   id    .model_id .model_desc        .type    mae  mape  mase smape   rmse   rsq
#>   <fct>     <dbl> <chr>              <chr>  <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>
#> 1 1_1           1 PROPHET            Test  10071.  45.9  1.99  60.0 11777. 0.383
#> 2 1_1           2 XGBOOST            Test   6237.  25.3  1.23  24.6  9017. 0.191
#> 3 1_1           3 ENSEMBLE (MEAN): ~ Test   5419.  20.2  1.07  22.2  8655. 0.415
#> 4 1_1           4 ENSEMBLE (MEDIAN)~ Test   5419.  20.2  1.07  22.2  8655. 0.415
#> 5 1_3           1 PROPHET            Test   3540.  29.9  1.37  25.5  4708. 0.796
#> 6 1_3           2 XGBOOST            Test   3086.  18.8  1.20  20.4  5086. 0.787
#> 7 1_3           3 ENSEMBLE (MEAN): ~ Test   2662.  19.0  1.03  17.7  4038. 0.819
#> 8 1_3           4 ENSEMBLE (MEDIAN)~ Test   2662.  19.0  1.03  17.7  4038. 0.819


# ENSEMBLE WEIGHTED TESTING ----

parallel_start(6)

ensem <- nested_modeltime_tbl %>%
    ensemble_nested_weighted(
        loadings       = c(2,1),  
        loading_method = "lowest_rmse",
        control        = control_nested_fit(allow_par = FALSE, verbose = TRUE)
    ) 
#> i [1/2] Starting Modeltime Table: ID 1_1...
#> v [1/2] Finished Modeltime Table: ID 1_1
#> i [2/2] Starting Modeltime Table: ID 1_3...
#> v [2/2] Finished Modeltime Table: ID 1_3
#> Finished in: 1.278002 secs.
#> 

ensem
#> # Nested Modeltime Table
#> 
#> Trained on: .splits | Model Errors: [0]
#> # A tibble: 2 x 5
#>   id    .actual_data       .future_data      .splits         .modeltime_tables  
#>   <fct> <list>             <list>            <list>          <list>             
#> 1 1_1   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_time_tbl [3 x~
#> 2 1_3   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_time_tbl [3 x~

ensem %>% 
    extract_nested_modeltime_table(1) %>%
    slice(3) %>%
    pluck(".model", 1)
#> -- Modeltime Ensemble -------------------------------------------
#> Ensemble of 2 Models (WEIGHTED)
#> 
#> # Modeltime Table
#> # A tibble: 2 x 6
#>   .model_id .model     .model_desc .type .calibration_data .loadings
#>       <int> <list>     <chr>       <chr> <list>                <dbl>
#> 1         1 <workflow> PROPHET     Test  <tibble [52 x 4]>     0.333
#> 2         2 <workflow> XGBOOST     Test  <tibble [52 x 4]>     0.667

ensem %>%
    extract_nested_test_accuracy()
#> # A tibble: 6 x 10
#>   id    .model_id .model_desc        .type    mae  mape  mase smape   rmse   rsq
#>   <fct>     <dbl> <chr>              <chr>  <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>
#> 1 1_1           1 PROPHET            Test  10071.  45.9 1.99   60.0 11777. 0.383
#> 2 1_1           2 XGBOOST            Test   6237.  25.3 1.23   24.6  9017. 0.191
#> 3 1_1           3 ENSEMBLE (WEIGHTE~ Test   4414.  14.8 0.870  16.0  8321. 0.410
#> 4 1_3           1 PROPHET            Test   3540.  29.9 1.37   25.5  4708. 0.796
#> 5 1_3           2 XGBOOST            Test   3086.  18.8 1.20   20.4  5086. 0.787
#> 6 1_3           3 ENSEMBLE (WEIGHTE~ Test   2772.  21.2 1.08   19.1  4068. 0.820

ensem %>%
    extract_nested_test_forecast() %>%
    group_by(id) %>%
    plot_modeltime_forecast(.interactive = F)

Created on 2021-10-12 by the reprex package (v2.0.1)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant