354 lines
12 KiB
Plaintext
354 lines
12 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import pandas as pd \n",
|
|
"import seaborn as sns\n",
|
|
"from IPython.display import display\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from mpl_toolkits.mplot3d import Axes3D\n",
|
|
"import sklearn\n",
|
|
"%matplotlib inline\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"try :\n",
|
|
" #PH值 - done\n",
|
|
" #data = pd.read_csv(\"data-ph.csv\")\n",
|
|
" #微蛋白 - done\n",
|
|
" #data = pd.read_csv(\"data-mau.csv\") \n",
|
|
" #蛋白质 - done\n",
|
|
" #data = pd.read_csv(\"data-pro.csv\") \n",
|
|
" #亚硝酸盐 - done\n",
|
|
" #data = pd.read_csv(\"data-nit.csv\") \n",
|
|
" \n",
|
|
" #肌酐\n",
|
|
" #data = pd.read_csv(\"data-cre.csv\") \n",
|
|
" #葡萄糖\n",
|
|
" #data = pd.read_csv(\"data-glu.csv\") \n",
|
|
" \n",
|
|
"\n",
|
|
" #通体 数据不正确\n",
|
|
" data = pd.read_csv(\"mix-mau-data.csv\") \n",
|
|
" #data = pd.read_excel(\"data-ket.xlsx\") \n",
|
|
"\n",
|
|
" #比重\n",
|
|
" #data = pd.read_csv(\"data-sg.csv\") \n",
|
|
" #抗坏血酸\n",
|
|
" #data = pd.read_csv(\"data-vc.csv\") \n",
|
|
" \n",
|
|
" #白细胞 - done\n",
|
|
" #data = pd.read_csv(\"data-wbc.csv\") \n",
|
|
" #尿胆原 - done\n",
|
|
" #data = pd.read_csv(\"data-uro.csv\") \n",
|
|
" #尿钙 -- done\n",
|
|
" #data1 = pd.read_csv(\"data-uca.csv\")\n",
|
|
" #data = pd.read_csv(\"data-uca2.csv\")\n",
|
|
" #data = data1.append(data2);\n",
|
|
" #胆红素 - done\n",
|
|
" #data = pd.read_csv(\"data-bil.csv\") \n",
|
|
" #潜血 - done\n",
|
|
" #data = pd.read_csv(\"data-bld.csv\") \n",
|
|
"\n",
|
|
" \n",
|
|
" \n",
|
|
" print (\"load data successful !!!!!\")\n",
|
|
"except :\n",
|
|
" print (\"load data error !!!!!!!!!!\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_labels = data[\"index\"]\n",
|
|
"train_features = data.drop(\"index\",axis=1)\n",
|
|
"\n",
|
|
"train_features.describe()\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#from sklearn.model_selection import KFold\n",
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"from sklearn.metrics import accuracy_score\n",
|
|
"from sklearn.svm import SVC\n",
|
|
"from sklearn.metrics import f1_score\n",
|
|
"from sklearn.metrics import precision_score\n",
|
|
"from sklearn.metrics import recall_score\n",
|
|
"\n",
|
|
"\n",
|
|
"from sklearn.ensemble import ExtraTreesClassifier\n",
|
|
"from sklearn.ensemble import AdaBoostClassifier\n",
|
|
"\n",
|
|
"from sklearn.cross_validation import train_test_split\n",
|
|
"X_train ,X_test,y_train,y_test = train_test_split(train_features,train_labels,test_size = 0.4, random_state = 0)\n",
|
|
"#X_train ,X_test,y_train,y_test = train_test_split(train_features,train_labels,test_size = 0.2, random_state = 20)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.tree import DecisionTreeClassifier\n",
|
|
"from sklearn.metrics import classification_report,confusion_matrix\n",
|
|
"\n",
|
|
"dtree = DecisionTreeClassifier(criterion='gini',max_depth=None)\n",
|
|
"dtree.fit(X_train,y_train)\n",
|
|
"predictions = dtree.predict(X_test)\n",
|
|
"\n",
|
|
"print(\"-----------classification_report----\\n\")\n",
|
|
"print(classification_report(y_test,predictions))\n",
|
|
"print(\"-----------confusion_matrix---------\\n\")\n",
|
|
"cm=confusion_matrix(y_test,predictions)\n",
|
|
"print(cm)\n",
|
|
"print(\"------------------------------------\\n\")\n",
|
|
"print (\"Accuracy of prediction:\",round((cm[0,0]+cm[1,1])/cm.sum(),3))\n",
|
|
"print(\"-----------------------------------\\n\")\n",
|
|
"print (\"DecisionTree accuracy score:\" , accuracy_score(y_test,predictions))\n",
|
|
"print (\"f1 score:\" , f1_score(y_test,predictions,average='micro'))\n",
|
|
"print (\"precision_score:\" , precision_score(y_test,predictions,average='micro'))\n",
|
|
"print (\"recall_score:\" , recall_score(y_test,predictions,average='micro'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"from sklearn.metrics import classification_report\n",
|
|
"\n",
|
|
"rfc = RandomForestClassifier(n_estimators=600)\n",
|
|
"rfc.fit(X_train, y_train)\n",
|
|
"rfc_pred = rfc.predict(X_test)\n",
|
|
"cr = classification_report(y_test,predictions)\n",
|
|
"print(cr)\n",
|
|
"cm = confusion_matrix(y_test,rfc_pred)\n",
|
|
"print(cm)\n",
|
|
"\n",
|
|
"print(\"---------------------------------\\n\")\n",
|
|
"print (\"Accuracy of prediction:\",round((cm[0,0]+cm[1,1])/cm.sum(),3))\n",
|
|
"print (\"RandomForest accuracy score:\" , accuracy_score(y_test,rfc_pred))\n",
|
|
"print(\"---------------------------------\\n\")\n",
|
|
"print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro'))\n",
|
|
"print (\"precision_score:\" , precision_score(y_test,rfc_pred,average='micro'))\n",
|
|
"print (\"recall_score:\" , recall_score(y_test,rfc_pred,average='micro'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import KFold\n",
|
|
"\n",
|
|
"X = train_features.values\n",
|
|
"y = train_labels.values\n",
|
|
"\n",
|
|
"kf = KFold(n_splits=5)\n",
|
|
"kf.get_n_splits(X)\n",
|
|
"\n",
|
|
"print(kf) \n",
|
|
"\n",
|
|
"for train_index, test_index in kf.split(X):\n",
|
|
" print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
|
|
" X_train, X_test = X[train_index], X[test_index]\n",
|
|
" y_train, y_test = y[train_index], y[test_index]\n",
|
|
" \n",
|
|
" \n",
|
|
" from datetime import datetime\n",
|
|
" trarining_start_time = datetime.now()\n",
|
|
"\n",
|
|
" rfc = RandomForestClassifier(n_estimators=600)\n",
|
|
" rfc.fit(X_train, y_train)\n",
|
|
" rfc_pred = rfc.predict(X_test) \n",
|
|
" print (\"svm linear accuracy score:\" , accuracy_score(y_test,rfc_pred))\n",
|
|
" print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro'))\n",
|
|
" print (\"precision_score:\" , precision_score(y_test,rfc_pred,average='micro'))\n",
|
|
" print (\"recall_score:\" , recall_score(y_test,rfc_pred,average='micro'))\n",
|
|
"\n",
|
|
" training_stop_time = datetime.now()\n",
|
|
"\n",
|
|
" print (\"runing time:\",(training_stop_time - trarining_start_time))\n",
|
|
" print(\"\\n\\n\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import KFold\n",
|
|
"\n",
|
|
"X = train_features.values\n",
|
|
"y = train_labels.values\n",
|
|
"\n",
|
|
"kf = KFold(n_splits=5)\n",
|
|
"kf.get_n_splits(X)\n",
|
|
"\n",
|
|
"print(kf) \n",
|
|
"\n",
|
|
"for train_index, test_index in kf.split(X):\n",
|
|
" print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
|
|
" X_train, X_test = X[train_index], X[test_index]\n",
|
|
" y_train, y_test = y[train_index], y[test_index]\n",
|
|
" \n",
|
|
" \n",
|
|
" from datetime import datetime\n",
|
|
" trarining_start_time = datetime.now()\n",
|
|
"\n",
|
|
" clf_svm_linear = SVC(kernel='linear', gamma=0.02, C=1)\n",
|
|
" clf_svm_linear = clf_svm_linear.fit(X_train, y_train)\n",
|
|
" #print(clf_svm_linear.predict(X_test))\n",
|
|
" pred = clf_svm_linear.predict(X_test)\n",
|
|
" print (\"svm linear accuracy score:\" , accuracy_score(y_test,pred))\n",
|
|
" print (\"f1 score:\" , f1_score(y_test,pred,average='micro'))\n",
|
|
" print (\"precision_score:\" , precision_score(y_test,pred,average='micro'))\n",
|
|
" print (\"recall_score:\" , recall_score(y_test,pred,average='micro'))\n",
|
|
"\n",
|
|
" training_stop_time = datetime.now()\n",
|
|
"\n",
|
|
" print (\"runing time:\",(training_stop_time - trarining_start_time))\n",
|
|
" print(\"\\n\\n\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from datetime import datetime\n",
|
|
"trarining_start_time = datetime.now()\n",
|
|
"\n",
|
|
"X_train ,X_test,y_train,y_test = train_test_split(train_features,train_labels,test_size = 0.4, random_state = 0)\n",
|
|
"\n",
|
|
"#clf_svm_linear = SVC(kernel = 'linear',gamma=0.00001,C=0.01)\n",
|
|
"clf_svm_linear = SVC(kernel = 'linear',gamma=0.02,C=1)\n",
|
|
"clf_svm_linear = clf_svm_linear.fit(X_train, y_train)\n",
|
|
"pred = clf_svm_linear.predict(X_test)\n",
|
|
"#print (\"svm linear accuracy score:\" , accuracy_score(y_test,pred))\n",
|
|
"#print (\"f1 score:\" , f1_score(y_test,pred,average='micro'))\n",
|
|
"#print (\"precision_score:\" , precision_score(y_test,pred,average=None))\n",
|
|
"#print (\"recall_score :\" , recall_score(y_test,pred,average=None))\n",
|
|
"print (\"svm linear accuracy score:\" , accuracy_score(y_test,pred))\n",
|
|
"print (\"f1 score:\" , f1_score(y_test,pred,average='micro'))\n",
|
|
"print (\"precision_score:\" , precision_score(y_test,pred,average='micro'))\n",
|
|
"print (\"recall_score:\" , recall_score(y_test,pred,average='micro'))\n",
|
|
"\n",
|
|
"training_stop_time = datetime.now()\n",
|
|
"print (\"runing clf_svm_linear time:\",(training_stop_time - trarining_start_time))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn_porter import Porter\n",
|
|
"\n",
|
|
"porter_clf_svm_liner_14items = Porter(clf_svm_linear, language='c').export()\n",
|
|
"\n",
|
|
"#print(porter_clf_svm_liner_ph)\n",
|
|
"f = open(\"new14modal/svm_bil.c\",'wb')\n",
|
|
"f.write(porter_clf_svm_liner_14items.encode())\n",
|
|
"f.close()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn_porter import Porter\n",
|
|
"\n",
|
|
"porter_clf_svm_liner_14items = Porter(clf_svm_linear, language='js').export()\n",
|
|
"\n",
|
|
"#print(porter_clf_svm_linear)\n",
|
|
"f = open(\"new14modal/svm_bil.js\",'wb')\n",
|
|
"#f = open(\"clf_svm_linear_125100_low_feature_data.txt\",'wb')\n",
|
|
"f.write(porter_clf_svm_liner_14items.encode())\n",
|
|
"f.close()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn_porter import Porter\n",
|
|
"\n",
|
|
"porter_clf_svm_liner_14items = Porter(rfc, language='c').export()\n",
|
|
"\n",
|
|
"#print(porter_clf_svm_liner_ph)\n",
|
|
"f = open(\"new14modal/rfc_bil.c\",'wb')\n",
|
|
"f.write(porter_clf_svm_liner_14items.encode())\n",
|
|
"f.close()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn_porter import Porter\n",
|
|
"\n",
|
|
"porter_clf_svm_liner_14items = Porter(rfc, language='js').export()\n",
|
|
"\n",
|
|
"#print(porter_clf_svm_linear)\n",
|
|
"f = open(\"new14modal/rfc_bil.js\",'wb')\n",
|
|
"#f = open(\"clf_svm_linear_125100_low_feature_data.txt\",'wb')\n",
|
|
"f.write(porter_clf_svm_liner_14items.encode())\n",
|
|
"f.close()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|