Ames random forest model predictions and shap values
Source:R/7_presaved_attribution.r
ames_rf_pred.Rd
Predictions and treeshap attribution of a random forest model of North Ames house sales data regressing Sales Price from house and lot variables.
Format
ames_rf_pred
is a n=338 length vector of the prediction of an
random forest model predicting the numeric House Sales in North Ames.
ames_rf_shap
is a (338 x 9) data frame of the treeshap SHAP attribution of
the random forest model for each observation.
Replicating
library(cheem)
library(randomForest)
library(treeshap)
set.seed(135)
## Regression setup
<- amesHousing2018_NorthAmes
dat <- dat[, 1:9]
X <- dat$SalePrice
Y <- dat$SubclassMS
clas
## Model and treeSHAP
<- randomForest::randomForest(
ames_rf_fit ntree = 125,
X, Y, mtry = ifelse(is_discrete(Y), sqrt(ncol(X)), ncol(X) / 3),
nodesize = max(ifelse(is_discrete(Y), 1, 5), nrow(X) / 500))
<- predict(ames_rf_fit, X)
ames_rf_pred <- treeshap::treeshap(
ames_rf_shap ::randomForest.unify(ames_rf_fit, X), X, FALSE, FALSE)
treeshap<- ames_rf_shap$shaps
ames_rf_shap
if(F){ ## Don't accidentally save
save(ames_rf_pred, file = "./data/ames_rf_pred.rda")
save(ames_rf_shap, file = "./data/ames_rf_shap.rda")
#usethis::use_data(ames_rf_pred)
#usethis::use_data(ames_rf_shap)
}
An object of class data.frame
with 338 rows and 9 columns.
Examples
library(cheem)
## Regression setup
dat <- amesHousing2018_NorthAmes
X <- dat[, 1:9]
Y <- dat$SalePrice
clas <- dat$SubclassMS
## Precomputed predictions and shap attribution
str(ames_rf_pred)
#> Named num [1:338] 203193 116048 167320 223589 139029 ...
#> - attr(*, "names")= chr [1:338] "1" "2" "3" "4" ...
str(ames_rf_shap)
#> 'data.frame': 338 obs. of 9 variables:
#> $ LotArea : num 27231 -374 14749 1687 -1036 ...
#> $ OverallQual: num 6828 -5011 6182 22641 -3156 ...
#> $ YearBuild : num 1881 294 -438 4182 2677 ...
#> $ LivingArea : num 13490 -16765 4160 28335 -3640 ...
#> $ Bathrms : num -824 -1310 1621 10436 -996 ...
#> $ Bedrms : num 528 -1363 360 744 672 ...
#> $ TotRms : num 1866 -3193 124 5314 1135 ...
#> $ GarageYrBlt: num 2847 -937 -559 791 1462 ...
#> $ GarageArea : num 5312 672 -2915 5423 -2125 ...
## Cheem
ames_chm <- cheem_ls(X, Y, ames_rf_shap, ames_rf_pred, clas,
label = "Ames, random forest, treeshap")
## Save for use with shiny app (expects an rds file)
if(FALSE){ ## Don't accidentally save.
saveRDS(ames_chm, "./chm_ames_rf_tshap.rds")
run_app() ## Select the saved rds file from the data dropdown.
}
## Cheem visuals
if(interactive()){
prim <- 1
comp <- 2
global_view(ames_chm, primary_obs = prim, comparison_obs = comp)
bas <- sug_basis(ames_rf_shap, prim, comp)
mv <- sug_manip_var(ames_rf_shap, primary_obs = prim, comp)
ggt <- radial_cheem_tour(ames_chm, basis = bas, manip_var = mv)
animate_plotly(ggt)
}