Chocolate svm model predictions and shap values
Source:R/7_presaved_attribution.r
chocolates_svm_pred.Rd
Predictions and DALEX shap attribution of an svm model of Chocolate data classifying type of chocolate (light/dark).
Format
chocolate_svm_pred
is a n=88 length vector of the prediction of an
svm model predicting the number of the factor level of the species of penguin.
chocolate_svm_shap
is a (88 x 10) data frame of the DALEX SHAP attribution
of the svm model for each observation.
Replicating
library(cheem)
library(e1071)
library(DALEX)
set.seed(135)
## Classification setup
<- chocolates[, 5:14]
X <- chocolates$Type
Y <- chocolates$Type
clas
## Model and predict
<- svm(
choc_svm_fit formula = Y ~ ., data = data.frame(Y, X),
type = 'C-classification', kernel = 'linear', probability = TRUE)
<- predict(choc_svm_fit, data.frame(Y, X))
chocolates_svm_pred
## SHAP via DALEX, versatile but slow
<- explain(choc_svm_fit, data = X, y = Y,
choc_svm_exp label = "Chocolates, svm")
## Note that cheem expects a full [n, p] attribution space
## Shap takes about ~30-40 sec for me
<- matrix(NA, nrow(X), ncol(X)) ## init a df of the same structure
chocolates_svm_shap sapply(1:nrow(X), function(i){
<- predict_parts_shap(choc_svm_exp, new_observation = X[i, ])
pps ## Keep just the [n, p] local explanations
<<- tapply(
chocolates_svm_shap[i, ] $contribution, pps$variable, mean, na.rm = TRUE) %>% as.vector()
pps
})<- as.data.frame(chocolates_svm_shap)
chocolates_svm_shap
if(F){ ## Don't accidentally save
save(chocolates_svm_pred, file = "./data/chocolates_svm_pred.rda")
save(chocolates_svm_shap, file = "./data/chocolates_svm_shap.rda")
#usethis::use_data(chocolates_svm_pred)
#usethis::use_data(chocolates_svm_shap)
}
An object of class data.frame
with 88 rows and 10 columns.
Examples
library(cheem)
## Classification setup
X <- chocolates[, 5:14]
Y <- chocolates$Type
clas <- chocolates$Type
## Precomputed predictions and shap attribution
str(chocolates_svm_pred)
#> Factor w/ 2 levels "Dark","Milk": 1 1 1 1 1 1 1 1 2 1 ...
#> - attr(*, "names")= chr [1:88] "1" "2" "3" "4" ...
str(chocolates_svm_shap)
#> 'data.frame': 88 obs. of 10 variables:
#> $ V1 : num -0.03852 -0.05926 0.00136 -0.02551 -0.01845 ...
#> $ V2 : num -0.005681 0.003371 -0.000133 0.000113 0.004603 ...
#> $ V3 : num 0.01161 0.00343 -0.00449 0.0062 0.00262 ...
#> $ V4 : num -0.0156 -0.0021 -0.0257 -0.0195 -0.0222 ...
#> $ V5 : num -0.106 -0.112 -0.168 -0.258 -0.251 ...
#> $ V6 : num -0.0573 -0.0513 -0.0797 0.0294 0.0565 ...
#> $ V7 : num 0.014471 0.012275 -0.000944 0.039095 0.039942 ...
#> $ V8 : num -0.01986 -0.03739 -0.00627 -0.00754 -0.02157 ...
#> $ V9 : num -0.0735 -0.0265 0.016 -0.0373 -0.0275 ...
#> $ V10: num -0.0396 -0.0596 0.0114 -0.0295 -0.0322 ...
## Cheem
choc_chm <- cheem_ls(X, Y, chocolates_svm_shap,
chocolates_svm_pred, clas,
label = "Chocolates, SVM, shap")
## Save for use with shiny app (expects an rds file)
if(FALSE){ ## Don't accidentally save.
saveRDS(choc_chm, "./cmh_chocolates_svm_shap.rds")
run_app() ## Select the saved rds file from the data dropdown.
}
## Cheem visuals
if(interactive()){
prim <- 1
comp <- 2
global_view(choc_chm, primary_obs = prim, comparison_obs = comp)
bas <- sug_basis(chocolates_svm_shap, prim, comp)
mv <- sug_manip_var(chocolates_svm_shap, primary_obs = prim, comp)
ggt <- radial_cheem_tour(choc_chm, basis = bas, manip_var = mv)
animate_plotly(ggt)
}