-
Notifications
You must be signed in to change notification settings - Fork 1
/
datasets.py
49 lines (41 loc) · 1.86 KB
/
datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import shap
import sklearn.datasets
import xgboost as xgb
import numpy as np
def get_regression(n_foreground, n_background):
X, y = sklearn.datasets.make_regression(
n_samples=1000,
n_features=10,
noise=0.1,
random_state=47)
model = xgb.XGBRegressor().fit(X, y)
X_foreground = X[:n_foreground]
X_background = X[:n_background]
tree_explainer = shap.TreeExplainer(model, X_background)
exact_shap_values = tree_explainer.shap_values(X_foreground)
return (model, X_background, X_foreground, exact_shap_values)
def get_cal_housing(n_foreground, n_background):
X, y = sklearn.datasets.fetch_california_housing(return_X_y=True)
model = xgb.XGBRegressor().fit(X, y)
X_foreground = X[:n_foreground]
X_background = X[:n_background].copy()
tree_explainer = shap.TreeExplainer(model, X_background)
exact_shap_values = tree_explainer.shap_values(X_foreground)
return (model, X_background, X_foreground, exact_shap_values)
def get_adult(n_foreground, n_background):
X, y = sklearn.datasets.fetch_openml("adult", return_X_y=True)
y = np.array([y_i != '<=50K' for y_i in y])
model = xgb.XGBClassifier().fit(X, y)
X_foreground = X[:n_foreground]
X_background = X[:n_background].copy()
tree_explainer = shap.TreeExplainer(model, X_background)
exact_shap_values = tree_explainer.shap_values(X_foreground)
return (model, X_background, X_foreground, exact_shap_values)
def get_breast_cancer(n_foreground, n_background):
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
model = xgb.XGBClassifier().fit(X, y)
X_foreground = X[:n_foreground]
X_background = X[:n_background].copy()
tree_explainer = shap.TreeExplainer(model, X_background)
exact_shap_values = tree_explainer.shap_values(X_foreground)
return (model, X_background, X_foreground, exact_shap_values)