import numpy as np from sklearn import datasets digits = datasets.load_digits() X = digits.data y = digits.target.copy() y[digits.target==9] = 1 y[digits.target!=9] = 0 # 产生极度偏斜的数据 from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666) from sklearn.linear_model import LogisticRegression log_reg = LogisticRegression() log_reg.fit(X_train, y_train)

准度度

log_reg.score(X_test, y_test)

输出:0.9755555555555555

混淆矩阵

y_log_predict = log_reg.predict(X_test) def TN(y_true, y_predict): assert len(y_true) == len(y_predict) return np.sum((y_true == 0) & (y_predict==0)) # 注意这里是一个‘&’ TN(y_test, y_log_predict) # 403 def FP(y_true, y_predict): assert len(y_true) == len(y_predict) return np.sum((y_true == 0) & (y_predict==1)) FP(y_test, y_log_predict) # 2 def FN(y_true, y_predict): assert len(y_true) == len(y_predict) return np.sum((y_true == 1) & (y_predict==0)) FN(y_test, y_log_predict) # 9 def TP(y_true, y_predict): assert len(y_true) == len(y_predict) return np.sum((y_true == 1) & (y_predict==1)) TP(y_test, y_log_predict) # 36 def confusion_matrix(y_true, y_predict): return np.array([ [TN(y_true, y_predict), FP(y_true, y_predict)], [FN(y_true, y_predict), TP(y_true, y_predict)] ]) confusion_matrix(y_test, y_log_predict)

输出结果:
array([[403, 2], [ 9, 36]])

精准率

def precision_score(y_true, y_predict): tp = TP(y_true, y_predict) fp = FP(y_true, y_predict) try: return tp / (tp + fp) except: # 处理分母为0的情况 return 0.0 precision_score(y_test, y_log_predict)

输出结果:0.9473684210526315

召回率

def recall_score(y_true, y_predict): tp = TP(y_true, y_predict) fn = FN(y_true, y_predict) try: return tp / (tp + fn) except: return 0.0 recall_score(y_test, y_log_predict)

输出结果:0.8

scikit-learn中的混淆矩阵、精准率、召回率

from sklearn.metrics import confusion_matrix confusion_matrix(y_test, y_log_predict) from sklearn.metrics import precision_score precision_score(y_test, y_log_predict) from sklearn.metrics import recall_score recall_score(y_test, y_log_predict)