Skip to contents

Predictions and DALEX shap attribution of an svm model of Chocolate data classifying type of chocolate (light/dark).

Usage

chocolates_svm_pred

chocolates_svm_shap

Format

chocolate_svm_pred is a n=88 length vector of the prediction of an svm model predicting the number of the factor level of the species of penguin. chocolate_svm_shap is a (88 x 10) data frame of the DALEX SHAP attribution of the svm model for each observation.

Replicating

library(cheem)
library(e1071)
library(DALEX)
set.seed(135)

## Classification setup
X    <- chocolates[, 5:14]
Y    <- chocolates$Type
clas <- chocolates$Type

## Model and predict
choc_svm_fit <- svm(
  formula = Y ~ ., data = data.frame(Y, X),
  type = 'C-classification', kernel = 'linear', probability = TRUE)
chocolates_svm_pred <- predict(choc_svm_fit, data.frame(Y, X))

## SHAP via DALEX, versatile but slow
choc_svm_exp <- explain(choc_svm_fit, data = X, y = Y,
                        label = "Chocolates, svm")
## Note that cheem expects a full [n, p] attribution space
## Shap takes about ~30-40 sec for me
chocolates_svm_shap <- matrix(NA, nrow(X), ncol(X)) ## init a df of the same structure
sapply(1:nrow(X), function(i){
  pps <- predict_parts_shap(choc_svm_exp, new_observation = X[i, ])
  ## Keep just the [n, p] local explanations
  chocolates_svm_shap[i, ] <<- tapply(
    pps$contribution, pps$variable, mean, na.rm = TRUE) %>% as.vector()
})
chocolates_svm_shap <- as.data.frame(chocolates_svm_shap)

if(F){ ## Don't accidentally save
  save(chocolates_svm_pred, file = "./data/chocolates_svm_pred.rda")
  save(chocolates_svm_shap, file = "./data/chocolates_svm_shap.rda")
  #usethis::use_data(chocolates_svm_pred)
  #usethis::use_data(chocolates_svm_shap)
}

An object of class data.frame with 88 rows and 10 columns.

Examples

library(cheem)

## Classification setup
X    <- chocolates[, 5:14]
Y    <- chocolates$Type
clas <- chocolates$Type

## Precomputed predictions and shap attribution
str(chocolates_svm_pred)
#>  Factor w/ 2 levels "Dark","Milk": 1 1 1 1 1 1 1 1 2 1 ...
#>  - attr(*, "names")= chr [1:88] "1" "2" "3" "4" ...
str(chocolates_svm_shap)
#> 'data.frame':	88 obs. of  10 variables:
#>  $ V1 : num  -0.03852 -0.05926 0.00136 -0.02551 -0.01845 ...
#>  $ V2 : num  -0.005681 0.003371 -0.000133 0.000113 0.004603 ...
#>  $ V3 : num  0.01161 0.00343 -0.00449 0.0062 0.00262 ...
#>  $ V4 : num  -0.0156 -0.0021 -0.0257 -0.0195 -0.0222 ...
#>  $ V5 : num  -0.106 -0.112 -0.168 -0.258 -0.251 ...
#>  $ V6 : num  -0.0573 -0.0513 -0.0797 0.0294 0.0565 ...
#>  $ V7 : num  0.014471 0.012275 -0.000944 0.039095 0.039942 ...
#>  $ V8 : num  -0.01986 -0.03739 -0.00627 -0.00754 -0.02157 ...
#>  $ V9 : num  -0.0735 -0.0265 0.016 -0.0373 -0.0275 ...
#>  $ V10: num  -0.0396 -0.0596 0.0114 -0.0295 -0.0322 ...

## Cheem
choc_chm <- cheem_ls(X, Y, chocolates_svm_shap,
                     chocolates_svm_pred, clas,
                     label = "Chocolates, SVM, shap")

## Save for use with shiny app (expects an rds file)
if(FALSE){ ## Don't accidentally save.
  saveRDS(choc_chm, "./cmh_chocolates_svm_shap.rds")
  run_app() ## Select the saved rds file from the data dropdown.
}

## Cheem visuals
if(interactive()){
  prim <- 1
  comp <- 2
  global_view(choc_chm, primary_obs = prim, comparison_obs = comp)
  bas <- sug_basis(chocolates_svm_shap, prim, comp)
  mv  <- sug_manip_var(chocolates_svm_shap, primary_obs = prim, comp)
  ggt <- radial_cheem_tour(choc_chm, basis = bas, manip_var = mv)
  animate_plotly(ggt)
}