embed
embed copied to clipboard
Steps idea: Dealing with correlation
- find the correlation structure
- find groups of highly correlated features
- replace each group with the PC of just those features
- …
- profit
look at correlation filter
library(tidymodels)
spline_cols <- ames |>
select(where(function(x) n_distinct(x) > 1000 && is.numeric(x))) |>
names()
recipe(~., data = ames) |>
step_rm(all_nominal_predictors()) |>
step_spline_natural(any_of(spline_cols), deg_free = 10) |>
prep() |>
bake(NULL) |>
corrr::correlate() |>
autoplot(method = "identity")
library(tidymodels)
spline_cols <- ames |>
select(where(function(x) n_distinct(x) > 1000 && is.numeric(x)))
recipe(~., data = spline_cols) |>
step_pca(all_predictors(), threshold = 1) |>
step_spline_natural(all_predictors(), deg_free = 10) |>
prep() |>
bake(NULL) |>
corrr::correlate() |>
autoplot(method = "identity")