Dec 4th, 2017
Model Selection
根據問題
與資料型態
Cross Validation
交叉驗證Local Interpretable Model-Agnostic Explanations
Local Interpretable Model-Agnostic Explanations
none
: Use all features for the explanation.
forward selection
: Features are added one by one based on their improvements to a ridge regression fit of the complex model outcome.
highest weights
: The m features with highest absolute weight in a ridge regression fit of the complex model outcome are chosen.
lasso
: The m features that are least prone to shrinkage based on the regularization path of a lasso fit of the complex model outcome is chosen.
tree
: A tree is fitted with log2(m) splits, to use at max m features. It may possibly select less.
auto
: Uses forward selection
if m <= 6 and otherwise highest weights
.
library(lime) library(MASS) data(biopsy) # First we'll clean up the data a bit biopsy$ID <- NULL biopsy <- na.omit(biopsy) names(biopsy) <- c('clump thickness', 'uniformity of cell size', 'uniformity of cell shape', 'marginal adhesion', 'single epithelial cell size', 'bare nuclei', 'bland chromatin', 'normal nucleoli', 'mitoses', 'class')
# Now we'll fit a linear discriminant model on all but 4 cases set.seed(4) test_set <- sample(seq_len(nrow(biopsy)), 100) prediction <- biopsy$class biopsy$class <- NULL model <- lda(biopsy[-test_set, ], prediction[-test_set]) sum(predict(model, biopsy[test_set, ])$class == prediction[test_set])/100
## [1] 0.96
# Train the explainer explainer <- lime(biopsy[-test_set,], model, bin_continuous = TRUE, quantile_bins = FALSE) # Use the explainer on new observations explanation <- explain(biopsy[test_set[1:4], ], explainer, n_labels = 1, n_features = 4) tibble::glimpse(explanation)
## Observations: 16 ## Variables: 13 ## $ model_type <chr> "classification", "classification", "classifi... ## $ case <chr> "416", "416", "416", "416", "7", "7", "7", "7... ## $ label <chr> "benign", "benign", "benign", "benign", "beni... ## $ label_prob <dbl> 0.9964864, 0.9964864, 0.9964864, 0.9964864, 0... ## $ model_r2 <dbl> 0.5659044, 0.5659044, 0.5659044, 0.5659044, 0... ## $ model_intercept <dbl> 0.08837631, 0.08837631, 0.08837631, 0.0883763... ## $ model_prediction <dbl> 1.0244738, 1.0244738, 1.0244738, 1.0244738, 0... ## $ feature <chr> "normal nucleoli", "bare nuclei", "uniformity... ## $ feature_value <int> 5, 3, 3, 3, 1, 10, 1, 1, 5, 10, 10, 3, 1, 1, ... ## $ feature_weight <dbl> -0.018041571, 0.573050022, 0.202345467, 0.178... ## $ feature_desc <chr> "3.25 < normal nucleoli <= 5.50", "bare nucle... ## $ data <list> [[3, 3, 2, 6, 3, 3, 3, 5, 1], [3, 3, 2, 6, 3... ## $ prediction <list> [[0.9964864, 0.003513577], [0.9964864, 0.003...
explanation <- explain(biopsy[test_set[1:4], ], explainer, n_labels = 1, n_features = 4, kernel_width = 0.5, feature_select = "auto") explanation[, 2:9]
## case label label_prob model_r2 model_intercept model_prediction ## 1 416 benign 0.9964864 0.4804993 0.4323631 1.0029394 ## 2 416 benign 0.9964864 0.4804993 0.4323631 1.0029394 ## 3 416 benign 0.9964864 0.4804993 0.4323631 1.0029394 ## 4 416 benign 0.9964864 0.4804993 0.4323631 1.0029394 ## 5 7 benign 0.9244742 0.4680113 0.3216358 0.6370384 ## 6 7 benign 0.9244742 0.4680113 0.3216358 0.6370384 ## 7 7 benign 0.9244742 0.4680113 0.3216358 0.6370384 ## 8 7 benign 0.9244742 0.4680113 0.3216358 0.6370384 ## 9 207 malignant 0.9999911 0.6543314 0.1583423 1.0001967 ## 10 207 malignant 0.9999911 0.6543314 0.1583423 1.0001967 ## 11 207 malignant 0.9999911 0.6543314 0.1583423 1.0001967 ## 12 207 malignant 0.9999911 0.6543314 0.1583423 1.0001967 ## 13 195 benign 0.9999981 0.4631399 0.5493353 1.0047116 ## 14 195 benign 0.9999981 0.4631399 0.5493353 1.0047116 ## 15 195 benign 0.9999981 0.4631399 0.5493353 1.0047116 ## 16 195 benign 0.9999981 0.4631399 0.5493353 1.0047116 ## feature feature_value ## 1 mitoses 1 ## 2 bare nuclei 3 ## 3 clump thickness 3 ## 4 uniformity of cell size 3 ## 5 mitoses 1 ## 6 bare nuclei 10 ## 7 clump thickness 1 ## 8 uniformity of cell size 1 ## 9 mitoses 1 ## 10 uniformity of cell size 10 ## 11 clump thickness 10 ## 12 uniformity of cell shape 9 ## 13 mitoses 1 ## 14 bare nuclei 1 ## 15 clump thickness 3 ## 16 uniformity of cell size 1
plot_features(explanation, ncol = 2)