https://github.com/mlr-org/mlr3forecast
Time series forecasting for mlr3
https://github.com/mlr-org/mlr3forecast
forecasting machine-learning mlr3 r r-package time-series
Last synced: 3 months ago
JSON representation
Time series forecasting for mlr3
- Host: GitHub
- URL: https://github.com/mlr-org/mlr3forecast
- Owner: mlr-org
- License: lgpl-3.0
- Created: 2024-07-16T16:33:00.000Z (12 months ago)
- Default Branch: main
- Last Pushed: 2025-03-30T18:00:21.000Z (3 months ago)
- Last Synced: 2025-04-02T12:12:50.535Z (3 months ago)
- Topics: forecasting, machine-learning, mlr3, r, r-package, time-series
- Language: R
- Homepage: http://mlr3forecast.mlr-org.com/
- Size: 2.28 MB
- Stars: 5
- Watchers: 5
- Forks: 0
- Open Issues: 5
-
Metadata Files:
- Readme: README.Rmd
- Changelog: NEWS.md
- License: LICENSE
Awesome Lists containing this project
README
---
output: github_document
---```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "man/figures/README-",
out.width = "100%"
)lgr::get_logger("mlr3")$set_threshold("warn")
options(datatable.print.class = FALSE, datatable.print.keys = FALSE)
library(data.table)
library(mlr3misc)
```# mlr3forecast
Extending mlr3 to time series forecasting.
[](https://lifecycle.r-lib.org/articles/stages.html#experimental)
[](https://github.com/mlr-org/mlr3forecast/actions/workflows/rcmdcheck.yaml)
[](https://CRAN.R-project.org/package=mlr3forecast)
[](https://stackoverflow.com/questions/tagged/mlr3)
[](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/)> This package is in an early stage of development and should be considered experimental.
> If you are interested in experimenting with it, we welcome your feedback!## Installation
Install the development version from [GitHub](https://github.com/):
```{r, eval = FALSE}
# install.packages("pak")
pak::pak("mlr-org/mlr3forecast")
```## Usage
The goal of mlr3forecast is to extend mlr3 to time series forecasting.
This is achieved by introducing new classes and methods for forecasting tasks,
learners, and resamplers. For now the forecasting task and learner is restricted
to time series regression tasks, but might be extended to classification tasks
in the future.We have two goals, one to support traditional forecasting learners and the
other to support to support machine learning forecasting, i.e. using regression
learners and applying them to forecasting tasks. The design of the latter is
still in flux and may change.### Example: forecasting with forecast learner
Currently, we support native forecasting learners from the forecast package.
In the future, we plan to support more forecasting learners.```{r, message = FALSE}
library(mlr3forecast)task = tsk("airpassengers")
learner = lrn("fcst.auto_arima")$train(task)
prediction = learner$predict(task, 140:144)
prediction$score(msr("regr.rmse"))
newdata = generate_newdata(task, 12L)
learner$predict_newdata(newdata, task)# works with quantile response
learner = lrn("fcst.auto_arima",
predict_type = "quantiles",
quantiles = c(0.1, 0.15, 0.5, 0.85, 0.9),
quantile_response = 0.5
)$train(task)
learner$predict_newdata(newdata, task)
```### Example: forecasting with regression learner
```{r, message = FALSE}
library(mlr3learners)task = tsk("airpassengers")
# we have to remove the date feature for regression learners
task$select(setdiff(task$feature_names, "date"))
flrn = ForecastLearner$new(lrn("regr.ranger"), 1:12)$train(task)
newdata = data.frame(passengers = rep(NA_real_, 3L))
prediction = flrn$predict_newdata(newdata, task)
prediction
prediction = flrn$predict(task, 142:144)
prediction
prediction$score(msr("regr.rmse"))flrn = ForecastLearner$new(lrn("regr.ranger"), 1:12)
resampling = rsmp("forecast_holdout", ratio = 0.9)
rr = resample(task, flrn, resampling)
rr$aggregate(msr("regr.rmse"))resampling = rsmp("forecast_cv")
rr = resample(task, flrn, resampling)
rr$aggregate(msr("regr.rmse"))
```Or with some feature engineering using mlr3pipelines:
```{r}
library(mlr3pipelines)graph = ppl("convert_types", "Date", "POSIXct") %>>%
po("datefeatures",
param_vals = list(
week_of_year = FALSE,
day_of_year = FALSE,
day_of_month = FALSE,
day_of_week = FALSE,
is_day = FALSE,
hour = FALSE,
minute = FALSE,
second = FALSE
)
)
task = tsk("airpassengers")
flrn = ForecastLearner$new(lrn("regr.ranger"), 1:12)
glrn = as_learner(graph %>>% flrn)$train(task)
prediction = glrn$predict(task, 142:144)
prediction$score(msr("regr.rmse"))
```### Example: forecasting electricity demand
```{r, message = FALSE}
library(mlr3learners)
library(mlr3pipelines)task = tsk("electricity")
graph = ppl("convert_types", "Date", "POSIXct") %>>%
po("datefeatures",
param_vals = list(
year = FALSE,
is_day = FALSE,
hour = FALSE,
minute = FALSE,
second = FALSE
)
)
flrn = ForecastLearner$new(lrn("regr.ranger"), 1:3)
glrn = as_learner(graph %>>% flrn)$train(task)max_date = task$data()[.N, date]
newdata = data.frame(
date = max_date + 1:14,
demand = rep(NA_real_, 14L),
temperature = 26,
holiday = c(TRUE, rep(FALSE, 13L))
)
prediction = glrn$predict_newdata(newdata, task)
prediction
```### Example: global forecasting (longitudinal data)
```{r, message = FALSE}
library(mlr3learners)
library(mlr3pipelines)
library(tsibble)task = tsibbledata::aus_livestock |>
as.data.table() |>
setnames(tolower) |>
_[, month := as.Date(month)] |>
_[, .(count = sum(count)), by = .(state, month)] |>
setorder(state, month) |>
as_task_fcst(
id = "aus_livestock",
target = "count",
order = "month",
key = "state",
freq = "monthly"
)graph = ppl("convert_types", "Date", "POSIXct") %>>%
po("datefeatures",
param_vals = list(
week_of_year = FALSE,
day_of_week = FALSE,
day_of_month = FALSE,
day_of_year = FALSE,
is_day = FALSE,
hour = FALSE,
minute = FALSE,
second = FALSE
)
)
task = graph$train(task)[[1L]]flrn = ForecastLearner$new(lrn("regr.ranger"), 1:3)$train(task)
prediction = flrn$predict(task, 4460:4464)
prediction$score(msr("regr.rmse"))flrn = ForecastLearner$new(lrn("regr.ranger"), 1:3)
resampling = rsmp("forecast_holdout", ratio = 0.9)
rr = resample(task, flrn, resampling)
rr$aggregate(msr("regr.rmse"))
```### Example: global vs local forecasting
In machine learning forecasting the difference between forecasting a time series
and longitudinal data is often refered to local and global forecasting.```{r, eval = FALSE}
# TODO: find better task example, since the effect is minor heregraph = ppl("convert_types", "Date", "POSIXct") %>>%
po("datefeatures",
param_vals = list(
week_of_year = FALSE,
day_of_week = FALSE,
day_of_month = FALSE,
day_of_year = FALSE,
is_day = FALSE,
hour = FALSE,
minute = FALSE,
second = FALSE
)
)# local forecasting
task = tsibbledata::aus_livestock |>
as.data.table() |>
setnames(tolower) |>
_[, month := as.Date(month)] |>
_[state == "Western Australia", .(count = sum(count)), by = .(month)] |>
setorder(month) |>
as_task_fcst(id = "aus_livestock", target = "count", order = "month")
task = graph$train(task)[[1L]]
flrn = ForecastLearner$new(lrn("regr.ranger"), 1L)$train(task)
tab = task$backend$data(
rows = task$row_ids,
cols = c(task$backend$primary_key, "month.year")
)
setnames(tab, c("row_id", "year"))
row_ids = tab[year >= 2015, row_id]
prediction = flrn$predict(task, row_ids)
prediction$score(msr("regr.rmse"))# global forecasting
task = tsibbledata::aus_livestock |>
as.data.table() |>
setnames(tolower) |>
_[, month := as.Date(month)] |>
_[, .(count = sum(count)), by = .(state, month)] |>
setorder(state, month) |>
as_task_fcst(id = "aus_livestock", target = "count", order = "month", key = "state")
task = graph$train(task)[[1L]]
task$col_roles$key = "state"
flrn = ForecastLearner$new(lrn("regr.ranger"), 1L)$train(task)
tab = task$backend$data(
rows = task$row_ids,
cols = c(task$backend$primary_key, "month.year", "state")
)
setnames(tab, c("row_id", "year", "state"))
row_ids = tab[year >= 2015 & state == "Western Australia", row_id]
prediction = flrn$predict(task, row_ids)
prediction$score(msr("regr.rmse"))
```### Example: Custom PipeOps
```{r, eval = FALSE}
library(mlr3learners)
library(mlr3pipelines)task = tsk("airpassengers")
pop = po("fcst.lag", lags = 1:12)
new_task = pop$train(list(task))[[1L]]
new_task$data()task = tsk("airpassengers")
graph = po("fcst.lag", lags = 1:12) %>>%
ppl("convert_types", "Date", "POSIXct") %>>%
po("datefeatures",
param_vals = list(
week_of_year = FALSE,
day_of_week = FALSE,
day_of_month = FALSE,
day_of_year = FALSE,
is_day = FALSE,
hour = FALSE,
minute = FALSE,
second = FALSE
)
)
flrn = ForecastRecursiveLearner$new(lrn("regr.ranger"))
glrn = as_learner(graph %>>% flrn)$train(task)
prediction = glrn$predict(task, 142:144)
prediction$score(msr("regr.rmse"))newdata = generate_newdata(task, 12L)
glrn$predict_newdata(newdata, task)
```### Example: common target transformations
Some common target transformations in forecasting are:
- differencing (WIP)
- log transformation, see example below
- power transformations such as [Box-Cox](https://mlr3pipelines.mlr-org.com/reference/mlr_pipeops_boxcox.html) and [Yeo-Johnson](https://mlr3pipelines.mlr-org.com/reference/mlr_pipeops_yeojohnson.html)
currently only supported as feature transformation and not target
- scaling/normalization, available see [here](https://mlr3pipelines.mlr-org.com/reference/mlr_pipeops_targettrafoscalerange.html)```{r, eval = FALSE}
trafo = po("targetmutate",
param_vals = list(
trafo = function(x) log(x),
inverter = function(x) list(response = exp(x$response))
)
)graph = po("fcst.lag", lags = 1:12) %>>%
ppl("convert_types", "Date", "POSIXct") %>>%
po("datefeatures",
param_vals = list(
week_of_year = FALSE,
day_of_week = FALSE,
day_of_month = FALSE,
day_of_year = FALSE,
is_day = FALSE,
hour = FALSE,
minute = FALSE,
second = FALSE
)
)task = tsk("airpassengers")
flrn = ForecastRecursiveLearner$new(lrn("regr.ranger"))
glrn = as_learner(graph %>>% flrn)
pipeline = ppl("targettrafo", graph = glrn, trafo_pipeop = trafo)
glrn = as_learner(pipeline)$train(task)
prediction = glrn$predict(task, 142:144)
prediction$score(msr("regr.rmse"))
``````{r, eval = FALSE}
graph = po("fcst.lag", lags = 1:12) %>>%
ppl("convert_types", "Date", "POSIXct") %>>%
po("datefeatures",
param_vals = list(
week_of_year = FALSE,
day_of_week = FALSE,
day_of_month = FALSE,
day_of_year = FALSE,
is_day = FALSE,
hour = FALSE,
minute = FALSE,
second = FALSE
)
)task = tsk("airpassengers")
flrn = ForecastRecursiveLearner$new(lrn("regr.ranger"))
glrn = as_learner(graph %>>% flrn)
trafo = po("fcst.targetdiff", lags = 12L)
pipeline = ppl("targettrafo", graph = glrn, trafo_pipeop = trafo)
glrn = as_learner(pipeline)$train(task)
prediction = glrn$predict(task, 142:144)
prediction$score(msr("regr.rmse"))
```