编写 test_sklearn_5.py 如下
- # -*- coding: utf-8 -*-
- """ 使用 sklearn 估计器构建分类模型,并评价 """
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn import datasets
- from sklearn.svm import SVC
- from sklearn.model_selection import train_test_split
- from sklearn.preprocessing import StandardScaler
- # 1.加载 datasets 中的乳腺癌数据集
- cancer = datasets.load_breast_cancer()
- #print(len(cancer))
- #print(type(cancer))
- # 数据集的数据
- cancer_data = cancer['data']
- #print('breast_cancer_data:','\n', data)
- # 数据集的标签
- target = cancer['target']
- #print('breast_cancer_target:','\n', target)
- # 数据集的特征名称
- feature_names = cancer['feature_names']
- #print('breast_cancer_feature_names:','\n', feature_names)
- # 2.将数据集划分为训练集和测试集
- # 使用 train_test_split 划分数据集
- data_train,data_test,target_train,target_test = \
- train_test_split(cancer_data, target, test_size=0.2, random_state=42)
- # 数据标准化
- stdScaler = StandardScaler().fit(data_train)
- trainStd = stdScaler.transform(data_train)
- testStd = stdScaler.transform(data_test)
- # 建立 SVM 模型
- svm = SVC(C=1.0).fit(trainStd, target_train)
- print(" SVM model:\n", svm)
- # 预测训练集结果
- cancer_target_pred = svm.predict(testStd)
- print("前20个结果:\n", cancer_target_pred[:20])
- # 将预测结果和真实结果做比对,求出预测对的结果和预测错的结果,并求出准确率
- # 求出预测对的结果
- dui = np.sum(cancer_target_pred == target_test)
- print("预测对的结果数为:", dui)
- print("预测错的结果数为:", target_test.shape[0]-dui)
- print("预测结果的准确率为:", dui/target_test.shape[0])
- # 分类模型常用评价方法
- from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,cohen_kappa_score
- score = accuracy_score(target_test, cancer_target_pred)
- print("用SVM 预测 breast_cancer 数据的准确率:", score)
- score = precision_score(target_test, cancer_target_pred)
- print("用SVM 预测 breast_cancer 数据的精确率:", score)
- score = recall_score(target_test, cancer_target_pred)
- print("用SVM 预测 breast_cancer 数据的召回率:", score)
- score = f1_score(target_test, cancer_target_pred)
- print("用SVM 预测 breast_cancer 数据的F1数值:", score)
- score = cohen_kappa_score(target_test, cancer_target_pred)
- print("用SVM 预测 breast_cancer 数据的 Cohen's Kappa 系数:", score)
- # 分类模型评价报告
- from sklearn.metrics import classification_report
- print("用SVM 预测 breast_cancer 数据的分类评价报告:\n",\
- classification_report(target_test, cancer_target_pred))
- # 绘制 ROC 曲线
- from sklearn.metrics import roc_curve
- # 求出 ROC 曲线的x轴和y轴
- fpr, tpr, threholds = roc_curve(target_test, cancer_target_pred)
- plt.figure(figsize=(10,6))
- plt.xlim(0,1)
- plt.ylim(0.0,1.1)
- plt.xlabel('False Postive Rate')
- plt.ylabel('True Postive Rate')
- plt.plot(fpr,tpr, linewidth=2,linestyle='-',color='red')
- plt.show()
复制代码 运行 python test_sklearn_5.py
- (base) D:\python> python test_sklearn_5.py
- SVM model:
- SVC()
- 前20个结果:
- [1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0]
- 预测对的结果数为: 112
- 预测错的结果数为: 2
- 预测结果的准确率为: 0.9824561403508771
- 用SVM 预测 breast_cancer 数据的准确率: 0.9824561403508771
- 用SVM 预测 breast_cancer 数据的精确率: 0.9726027397260274
- 用SVM 预测 breast_cancer 数据的召回率: 1.0
- 用SVM 预测 breast_cancer 数据的F1数值: 0.9861111111111112
- 用SVM 预测 breast_cancer 数据的 Cohen's Kappa 系数: 0.9623140495867769
- 用SVM 预测 breast_cancer 数据的分类评价报告:
- precision recall f1-score support
- 0 1.00 0.95 0.98 43
- 1 0.97 1.00 0.99 71
- accuracy 0.98 114
- macro avg 0.99 0.98 0.98 114
- weighted avg 0.98 0.98 0.98 114
复制代码 参考书:【Python 数据分析与应用】第6章 使用 scikit-learn 构建模子
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |