import numpy as np
from sklearn import datasets
digits = datasets.load_digits()
X = digits.data
y = digits.target.copy()
y[digits.target==9] = 1
y[digits.target!=9] = 0 # 产生极度偏斜的数据
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666)
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
准度度
log_reg.score(X_test, y_test)
输出:0.9755555555555555
混淆矩阵
y_log_predict = log_reg.predict(X_test)
def TN(y_true, y_predict):
assert len(y_true) == len(y_predict)
return np.sum((y_true == 0) & (y_predict==0)) # 注意这里是一个‘&’
TN(y_test, y_log_predict) # 403
def FP(y_true, y_predict):
assert len(y_true) == len(y_predict)
return np.sum((y_true == 0) & (y_predict==1))
FP(y_test, y_log_predict) # 2
def FN(y_true, y_predict):
assert len(y_true) == len(y_predict)
return np.sum((y_true == 1) & (y_predict==0))
FN(y_test, y_log_predict) # 9
def TP(y_true, y_predict):
assert len(y_true) == len(y_predict)
return np.sum((y_true == 1) & (y_predict==1))
TP(y_test, y_log_predict) # 36
def confusion_matrix(y_true, y_predict):
return np.array([
[TN(y_true, y_predict), FP(y_true, y_predict)],
[FN(y_true, y_predict), TP(y_true, y_predict)]
])
confusion_matrix(y_test, y_log_predict)
输出结果:
array([[403, 2], [ 9, 36]])
精准率
def precision_score(y_true, y_predict):
tp = TP(y_true, y_predict)
fp = FP(y_true, y_predict)
try:
return tp / (tp + fp)
except: # 处理分母为0的情况
return 0.0
precision_score(y_test, y_log_predict)
输出结果:0.9473684210526315
召回率
def recall_score(y_true, y_predict):
tp = TP(y_true, y_predict)
fn = FN(y_true, y_predict)
try:
return tp / (tp + fn)
except:
return 0.0
recall_score(y_test, y_log_predict)
输出结果:0.8
scikit-learn中的混淆矩阵、精准率、召回率
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_log_predict)
from sklearn.metrics import precision_score
precision_score(y_test, y_log_predict)
from sklearn.metrics import recall_score
recall_score(y_test, y_log_predict)