Files
coco 85d885e008 a
2026-07-03 16:29:47 +08:00

354 lines
12 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd \n",
"import seaborn as sns\n",
"from IPython.display import display\n",
"import matplotlib.pyplot as plt\n",
"from mpl_toolkits.mplot3d import Axes3D\n",
"import sklearn\n",
"%matplotlib inline\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"try :\n",
" #PH值 - done\n",
" #data = pd.read_csv(\"data-ph.csv\")\n",
" #微蛋白 - done\n",
" #data = pd.read_csv(\"data-mau.csv\") \n",
" #蛋白质 - done\n",
" #data = pd.read_csv(\"data-pro.csv\") \n",
" #亚硝酸盐 - done\n",
" #data = pd.read_csv(\"data-nit.csv\") \n",
" \n",
" #肌酐\n",
" #data = pd.read_csv(\"data-cre.csv\") \n",
" #葡萄糖\n",
" #data = pd.read_csv(\"data-glu.csv\") \n",
" \n",
"\n",
" #通体 数据不正确\n",
" data = pd.read_csv(\"mix-mau-data.csv\") \n",
" #data = pd.read_excel(\"data-ket.xlsx\") \n",
"\n",
" #比重\n",
" #data = pd.read_csv(\"data-sg.csv\") \n",
" #抗坏血酸\n",
" #data = pd.read_csv(\"data-vc.csv\") \n",
" \n",
" #白细胞 - done\n",
" #data = pd.read_csv(\"data-wbc.csv\") \n",
" #尿胆原 - done\n",
" #data = pd.read_csv(\"data-uro.csv\") \n",
" #尿钙 -- done\n",
" #data1 = pd.read_csv(\"data-uca.csv\")\n",
" #data = pd.read_csv(\"data-uca2.csv\")\n",
" #data = data1.append(data2);\n",
" #胆红素 - done\n",
" #data = pd.read_csv(\"data-bil.csv\") \n",
" #潜血 - done\n",
" #data = pd.read_csv(\"data-bld.csv\") \n",
"\n",
" \n",
" \n",
" print (\"load data successful !!!!!\")\n",
"except :\n",
" print (\"load data error !!!!!!!!!!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_labels = data[\"index\"]\n",
"train_features = data.drop(\"index\",axis=1)\n",
"\n",
"train_features.describe()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#from sklearn.model_selection import KFold\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import f1_score\n",
"from sklearn.metrics import precision_score\n",
"from sklearn.metrics import recall_score\n",
"\n",
"\n",
"from sklearn.ensemble import ExtraTreesClassifier\n",
"from sklearn.ensemble import AdaBoostClassifier\n",
"\n",
"from sklearn.cross_validation import train_test_split\n",
"X_train ,X_test,y_train,y_test = train_test_split(train_features,train_labels,test_size = 0.4, random_state = 0)\n",
"#X_train ,X_test,y_train,y_test = train_test_split(train_features,train_labels,test_size = 0.2, random_state = 20)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.metrics import classification_report,confusion_matrix\n",
"\n",
"dtree = DecisionTreeClassifier(criterion='gini',max_depth=None)\n",
"dtree.fit(X_train,y_train)\n",
"predictions = dtree.predict(X_test)\n",
"\n",
"print(\"-----------classification_report----\\n\")\n",
"print(classification_report(y_test,predictions))\n",
"print(\"-----------confusion_matrix---------\\n\")\n",
"cm=confusion_matrix(y_test,predictions)\n",
"print(cm)\n",
"print(\"------------------------------------\\n\")\n",
"print (\"Accuracy of prediction:\",round((cm[0,0]+cm[1,1])/cm.sum(),3))\n",
"print(\"-----------------------------------\\n\")\n",
"print (\"DecisionTree accuracy score:\" , accuracy_score(y_test,predictions))\n",
"print (\"f1 score:\" , f1_score(y_test,predictions,average='micro'))\n",
"print (\"precision_score:\" , precision_score(y_test,predictions,average='micro'))\n",
"print (\"recall_score:\" , recall_score(y_test,predictions,average='micro'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import classification_report\n",
"\n",
"rfc = RandomForestClassifier(n_estimators=600)\n",
"rfc.fit(X_train, y_train)\n",
"rfc_pred = rfc.predict(X_test)\n",
"cr = classification_report(y_test,predictions)\n",
"print(cr)\n",
"cm = confusion_matrix(y_test,rfc_pred)\n",
"print(cm)\n",
"\n",
"print(\"---------------------------------\\n\")\n",
"print (\"Accuracy of prediction:\",round((cm[0,0]+cm[1,1])/cm.sum(),3))\n",
"print (\"RandomForest accuracy score:\" , accuracy_score(y_test,rfc_pred))\n",
"print(\"---------------------------------\\n\")\n",
"print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro'))\n",
"print (\"precision_score:\" , precision_score(y_test,rfc_pred,average='micro'))\n",
"print (\"recall_score:\" , recall_score(y_test,rfc_pred,average='micro'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import KFold\n",
"\n",
"X = train_features.values\n",
"y = train_labels.values\n",
"\n",
"kf = KFold(n_splits=5)\n",
"kf.get_n_splits(X)\n",
"\n",
"print(kf) \n",
"\n",
"for train_index, test_index in kf.split(X):\n",
" print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
" X_train, X_test = X[train_index], X[test_index]\n",
" y_train, y_test = y[train_index], y[test_index]\n",
" \n",
" \n",
" from datetime import datetime\n",
" trarining_start_time = datetime.now()\n",
"\n",
" rfc = RandomForestClassifier(n_estimators=600)\n",
" rfc.fit(X_train, y_train)\n",
" rfc_pred = rfc.predict(X_test) \n",
" print (\"svm linear accuracy score:\" , accuracy_score(y_test,rfc_pred))\n",
" print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro'))\n",
" print (\"precision_score:\" , precision_score(y_test,rfc_pred,average='micro'))\n",
" print (\"recall_score:\" , recall_score(y_test,rfc_pred,average='micro'))\n",
"\n",
" training_stop_time = datetime.now()\n",
"\n",
" print (\"runing time:\",(training_stop_time - trarining_start_time))\n",
" print(\"\\n\\n\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import KFold\n",
"\n",
"X = train_features.values\n",
"y = train_labels.values\n",
"\n",
"kf = KFold(n_splits=5)\n",
"kf.get_n_splits(X)\n",
"\n",
"print(kf) \n",
"\n",
"for train_index, test_index in kf.split(X):\n",
" print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
" X_train, X_test = X[train_index], X[test_index]\n",
" y_train, y_test = y[train_index], y[test_index]\n",
" \n",
" \n",
" from datetime import datetime\n",
" trarining_start_time = datetime.now()\n",
"\n",
" clf_svm_linear = SVC(kernel='linear', gamma=0.02, C=1)\n",
" clf_svm_linear = clf_svm_linear.fit(X_train, y_train)\n",
" #print(clf_svm_linear.predict(X_test))\n",
" pred = clf_svm_linear.predict(X_test)\n",
" print (\"svm linear accuracy score:\" , accuracy_score(y_test,pred))\n",
" print (\"f1 score:\" , f1_score(y_test,pred,average='micro'))\n",
" print (\"precision_score:\" , precision_score(y_test,pred,average='micro'))\n",
" print (\"recall_score:\" , recall_score(y_test,pred,average='micro'))\n",
"\n",
" training_stop_time = datetime.now()\n",
"\n",
" print (\"runing time:\",(training_stop_time - trarining_start_time))\n",
" print(\"\\n\\n\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime\n",
"trarining_start_time = datetime.now()\n",
"\n",
"X_train ,X_test,y_train,y_test = train_test_split(train_features,train_labels,test_size = 0.4, random_state = 0)\n",
"\n",
"#clf_svm_linear = SVC(kernel = 'linear',gamma=0.00001,C=0.01)\n",
"clf_svm_linear = SVC(kernel = 'linear',gamma=0.02,C=1)\n",
"clf_svm_linear = clf_svm_linear.fit(X_train, y_train)\n",
"pred = clf_svm_linear.predict(X_test)\n",
"#print (\"svm linear accuracy score:\" , accuracy_score(y_test,pred))\n",
"#print (\"f1 score:\" , f1_score(y_test,pred,average='micro'))\n",
"#print (\"precision_score:\" , precision_score(y_test,pred,average=None))\n",
"#print (\"recall_score :\" , recall_score(y_test,pred,average=None))\n",
"print (\"svm linear accuracy score:\" , accuracy_score(y_test,pred))\n",
"print (\"f1 score:\" , f1_score(y_test,pred,average='micro'))\n",
"print (\"precision_score:\" , precision_score(y_test,pred,average='micro'))\n",
"print (\"recall_score:\" , recall_score(y_test,pred,average='micro'))\n",
"\n",
"training_stop_time = datetime.now()\n",
"print (\"runing clf_svm_linear time:\",(training_stop_time - trarining_start_time))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn_porter import Porter\n",
"\n",
"porter_clf_svm_liner_14items = Porter(clf_svm_linear, language='c').export()\n",
"\n",
"#print(porter_clf_svm_liner_ph)\n",
"f = open(\"new14modal/svm_bil.c\",'wb')\n",
"f.write(porter_clf_svm_liner_14items.encode())\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn_porter import Porter\n",
"\n",
"porter_clf_svm_liner_14items = Porter(clf_svm_linear, language='js').export()\n",
"\n",
"#print(porter_clf_svm_linear)\n",
"f = open(\"new14modal/svm_bil.js\",'wb')\n",
"#f = open(\"clf_svm_linear_125100_low_feature_data.txt\",'wb')\n",
"f.write(porter_clf_svm_liner_14items.encode())\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn_porter import Porter\n",
"\n",
"porter_clf_svm_liner_14items = Porter(rfc, language='c').export()\n",
"\n",
"#print(porter_clf_svm_liner_ph)\n",
"f = open(\"new14modal/rfc_bil.c\",'wb')\n",
"f.write(porter_clf_svm_liner_14items.encode())\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn_porter import Porter\n",
"\n",
"porter_clf_svm_liner_14items = Porter(rfc, language='js').export()\n",
"\n",
"#print(porter_clf_svm_linear)\n",
"f = open(\"new14modal/rfc_bil.js\",'wb')\n",
"#f = open(\"clf_svm_linear_125100_low_feature_data.txt\",'wb')\n",
"f.write(porter_clf_svm_liner_14items.encode())\n",
"f.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}