library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidy.outliers)
library(recipes)
#> 
#> Attaching package: 'recipes'
#> The following object is masked from 'package:stats':
#> 
#>     step

One of neat applications of tidy.outliers is that you may want to model just use the outlier score outside of an workflow, this can be easily done as show.

Mahalanobis Score

Prep your recipe.

example_recipe <- 
  recipe(. ~ .,data = mtcars) |>
  step_outliers_maha(all_numeric())

Bake it with null.

example_recipe |>
  prep() |> 
  bake(new_data = NULL)
#> # A tibble: 32 × 12
#>      mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb .outliers…¹
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>       <dbl>
#>  1  21       6  160    110  3.9   2.62  16.5     0     1     4     4      0.373 
#>  2  21       6  160    110  3.9   2.88  17.0     0     1     4     4      0.313 
#>  3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1      0.372 
#>  4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1      0.133 
#>  5  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2      0.0914
#>  6  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1      0.367 
#>  7  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4      0.391 
#>  8  24.4     4  147.    62  3.69  3.19  20       1     0     4     2      0.472 
#>  9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2      0.980 
#> 10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4      0.665 
#> # … with 22 more rows, and abbreviated variable name ¹​.outliers_maha

You can even give it a new name!

example_feature <- 
  recipe(. ~ .,data = mtcars) |>
  step_outliers_maha(all_numeric(),name_mutate = 'maha') |> 
  prep() |> 
  bake(new_data = NULL)

example_feature |> select(maha,everything())
#> # A tibble: 32 × 12
#>      maha   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 0.373   21       6  160    110  3.9   2.62  16.5     0     1     4     4
#>  2 0.313   21       6  160    110  3.9   2.88  17.0     0     1     4     4
#>  3 0.372   22.8     4  108     93  3.85  2.32  18.6     1     1     4     1
#>  4 0.133   21.4     6  258    110  3.08  3.22  19.4     1     0     3     1
#>  5 0.0914  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
#>  6 0.367   18.1     6  225    105  2.76  3.46  20.2     1     0     3     1
#>  7 0.391   14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
#>  8 0.472   24.4     4  147.    62  3.69  3.19  20       1     0     4     2
#>  9 0.980   22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2
#> 10 0.665   19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4
#> # … with 22 more rows