-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
crossvalidation_scikitlearn_pythoncodetutorial.py
45 lines (38 loc) · 1.42 KB
/
crossvalidation_scikitlearn_pythoncodetutorial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# -*- coding: utf-8 -*-
"""CrossValidation-ScikitLearn_PythonCodeTutorial.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/15FFmKBlvdAFCP4-Ka2SoFsWC93PjdxJH
"""
# Load libraries
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
# digits dataset loading
digits = datasets.load_digits()
# Create features matrix
features = digits.data
# Create target vector
target = digits.target
# standardization
standard_scaler = StandardScaler()
# logistic regression creation
logit = LogisticRegression()
# pipeline creation for standardization and performing logistic regression
pipeline = make_pipeline(standard_scaler, logit)
# perform k-Fold cross-validation
kf = KFold(n_splits=11, shuffle=True, random_state=2)
# k-fold cross-validation conduction
cv_results = cross_val_score(pipeline, # Pipeline
features, # Feature matrix
target, # Target vector
cv=kf, # Cross-validation technique
scoring="accuracy", # Loss function
n_jobs=-1) # Use all CPU cores
# View score for all 11 folds
cv_results
# Calculate mean
cv_results.mean()