Skip to contents

Predictions and treeshap attribution of a random forest model of North Ames house sales data regressing Sales Price from house and lot variables.

Usage

ames_rf_pred

ames_rf_shap

Format

ames_rf_pred is a n=338 length vector of the prediction of an random forest model predicting the numeric House Sales in North Ames. ames_rf_shap is a (338 x 9) data frame of the treeshap SHAP attribution of the random forest model for each observation.

Replicating

library(cheem)
library(randomForest)
library(treeshap)
set.seed(135)

## Regression setup
dat  <- amesHousing2018_NorthAmes
X    <- dat[, 1:9]
Y    <- dat$SalePrice
clas <- dat$SubclassMS

## Model and treeSHAP
ames_rf_fit <- randomForest::randomForest(
  X, Y, ntree = 125,
  mtry = ifelse(is_discrete(Y), sqrt(ncol(X)), ncol(X) / 3),
  nodesize = max(ifelse(is_discrete(Y), 1, 5), nrow(X) / 500))
ames_rf_pred <- predict(ames_rf_fit, X)
ames_rf_shap <- treeshap::treeshap(
  treeshap::randomForest.unify(ames_rf_fit, X), X, FALSE, FALSE)
ames_rf_shap <- ames_rf_shap$shaps

if(F){ ## Don't accidentally save
  save(ames_rf_pred, file = "./data/ames_rf_pred.rda")
  save(ames_rf_shap, file = "./data/ames_rf_shap.rda")
  #usethis::use_data(ames_rf_pred)
  #usethis::use_data(ames_rf_shap)
}

An object of class data.frame with 338 rows and 9 columns.

Examples

library(cheem)

## Regression setup
dat  <- amesHousing2018_NorthAmes
X    <- dat[, 1:9]
Y    <- dat$SalePrice
clas <- dat$SubclassMS

## Precomputed predictions and shap attribution
str(ames_rf_pred)
#>  Named num [1:338] 203193 116048 167320 223589 139029 ...
#>  - attr(*, "names")= chr [1:338] "1" "2" "3" "4" ...
str(ames_rf_shap)
#> 'data.frame':	338 obs. of  9 variables:
#>  $ LotArea    : num  27231 -374 14749 1687 -1036 ...
#>  $ OverallQual: num  6828 -5011 6182 22641 -3156 ...
#>  $ YearBuild  : num  1881 294 -438 4182 2677 ...
#>  $ LivingArea : num  13490 -16765 4160 28335 -3640 ...
#>  $ Bathrms    : num  -824 -1310 1621 10436 -996 ...
#>  $ Bedrms     : num  528 -1363 360 744 672 ...
#>  $ TotRms     : num  1866 -3193 124 5314 1135 ...
#>  $ GarageYrBlt: num  2847 -937 -559 791 1462 ...
#>  $ GarageArea : num  5312 672 -2915 5423 -2125 ...

## Cheem
ames_chm <- cheem_ls(X, Y, ames_rf_shap, ames_rf_pred, clas,
                     label = "Ames, random forest, treeshap")

## Save for use with shiny app (expects an rds file)
if(FALSE){ ## Don't accidentally save.
  saveRDS(ames_chm, "./chm_ames_rf_tshap.rds")
  run_app() ## Select the saved rds file from the data dropdown.
}

## Cheem visuals
if(interactive()){
  prim <- 1
  comp <- 2
  global_view(ames_chm, primary_obs = prim, comparison_obs = comp)
  bas <- sug_basis(ames_rf_shap, prim, comp)
  mv  <- sug_manip_var(ames_rf_shap, primary_obs = prim, comp)
  ggt <- radial_cheem_tour(ames_chm, basis = bas, manip_var = mv)
  animate_plotly(ggt)
}