{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 排卵试纸机器学习算法验证" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 1. **import moudle**" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd \n", "import seaborn as sns\n", "from IPython.display import display\n", "import matplotlib.pyplot as plt\n", "from mpl_toolkits.mplot3d import Axes3D\n", "import sklearn\n", "%matplotlib inline\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 2. **load data**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "try :\n", " data4_1 = pd.read_csv(\"data-4-2-old.txt\")\n", " data4_2 = pd.read_csv(\"data-4-old.txt\")\n", " data2 = pd.read_csv(\"data-pro-0.3456.txt\")\n", " data3 = pd.read_csv(\"data-pro-0.71020.txt\")\n", " data4 = pd.read_csv(\"data-pro-3.txt\")\n", " data4_3 = pd.read_csv(\"hw-pro-data-4-old.txt\")\n", " \n", " print (\"load data successful !!!!!\")\n", "except :\n", " print (\"load data error !!!!!!!!!!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data4 = data4.append(data4_1).append(data4_2).append(data4_3)\n", "#data10_all['index'].replace(2,1,inplace=True)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data4=data4.sort_values(by=\"h\" , ascending=False)\n", "data4.to_csv('data4_sorted.txt')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data2 = data2.sort_values(by=\"h\" , ascending=False)\n", "data3 = data3.sort_values(by=\"h\" , ascending=False)\n", "\n", "\n", "data2.to_csv('data2_sorted.txt')\n", "data3.to_csv('data3_sorted.txt')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "try :\n", " apple0 = pd.read_csv(\"14/apple-0-old.txt\")\n", " data0 = pd.read_csv(\"14/data-0-old.txt\")\n", " data0_1 = pd.read_csv(\"14/data-pro-0.1.txt\")\n", " data0_2 = pd.read_csv(\"14/data-pro-0.15.txt\")\n", " data0_3 = pd.read_csv(\"14/hw-pro-data-0-old.txt\")\n", " data1_1 = pd.read_csv(\"14/apple-1-old.txt\")\n", " data1_2 = pd.read_csv(\"14/data-pro-0.2.txt\")\n", " \n", " print (\"load data successful !!!!!\")\n", "except :\n", " print (\"load data error !!!!!!!!!!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data0 = apple0.append(data0).append(data0_1).append(data0_2).append(data0_3)\n", "data1 = data1_1.append(data1_2)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data0 = data0.sort_values(by=\"h\" , ascending=False)\n", "data1 = data1.sort_values(by=\"h\" , ascending=False)\n", "\n", "\n", "data0.to_csv('data0_sorted.csv')\n", "data1.to_csv('data1_sorted.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "try :\n", "# data_iphone6p_75_10 = pd.read_csv(\"20170912.pm.csv\")\n", "# data_iphone6p_1234 = pd.read_csv(\"20170920.pm.csv\")\n", "# data_iphone6p_5 = pd.read_csv(\"20170922.pm.csv\")\n", "# data_iphone6p_0 = pd.read_csv(\"20170925.am.csv\")\n", "# data_iphone6p_0_0 = pd.read_csv(\"20170925.pm.csv\")\n", "# data_iphone6p_246 = pd.read_csv(\"20171011.pm.csv\")\n", " \n", "# data1 = pd.read_csv(\"ovdata_reindex.csv\")\n", "# data2 = pd.read_csv(\"ovdataMore_reindex.csv\")\n", "# data3 = pd.read_csv(\"ov_data_2020_reindex.csv\")\n", " ovdata = pd.read_csv(\"ovdata.csv\")\n", " ovdataMore = pd.read_csv(\"ovdataMore.csv\")\n", " ov_data_2020 = pd.read_csv(\"ov_data_2020.csv\")\n", " data10more = pd.read_csv(\"data10more.csv\")\n", "\n", " data =ovdata.append(ovdataMore).append(ov_data_2020).append(data10more)\n", " data_all = data[data[\"whiteBalance\"] == 0]\n", " print(data_all.describe())\n", "\n", "# data4 = pd.read_csv(\"10_25_renew.csv\")\n", "\n", "# data_all = pd.read_csv(\"data_all_2019_2020_reindex.csv\")\n", "# data_all = pd.read_csv(\"ov_data_2020_reindex.csv\")\n", " \n", "# data1 = pd.read_csv(\"ovdata.csv\")\n", "# data2 = pd.read_csv(\"ovdataMore.csv\")\n", "# data3 = pd.read_csv(\"ov_data_2020.csv\")\n", "# data_test1 = pd.read_csv(\"./newData/test.csv\")\n", "# data_test2 = pd.read_csv(\"./newData/nubia_test.csv\")\n", " \n", " print (\"load data successful !!!!!\")\n", "except :\n", " print (\"load data error !!!!!!!!!!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "data2019_2020_old = ovdata.append(ovdataMore).append(ov_data_2020).append(data10more)\n", "data2019_2020_old['index'].replace(4,7,inplace=True)\n", "data2019_2020_old['index'].replace(3,6,inplace=True)\n", "data2019_2020_old['index'].replace(2,4,inplace=True)\n", "data2019_2020_old['index'].replace(1,2,inplace=True)\n", "\n", "data2019_2020_old.describe()\n", "data2019_2020_old.to_excel('data2019_2020_old.xlsx')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "try :\n", "# data_iphone6p_75_10 = pd.read_csv(\"20170912.pm.csv\")\n", "# data_iphone6p_1234 = pd.read_csv(\"20170920.pm.csv\")\n", "# data_iphone6p_5 = pd.read_csv(\"20170922.pm.csv\")\n", "# data_iphone6p_0 = pd.read_csv(\"20170925.am.csv\")\n", "# data_iphone6p_0_0 = pd.read_csv(\"20170925.pm.csv\")\n", "# data_iphone6p_246 = pd.read_csv(\"20171011.pm.csv\")\n", " \n", "# data1 = pd.read_csv(\"ovdata_reindex.csv\")\n", "# data2 = pd.read_csv(\"ovdataMore_reindex.csv\")\n", "# data3 = pd.read_csv(\"ov_data_2020_reindex.csv\")\n", " d1 = pd.read_excel(\"data_all_2020514.xlsx\")\n", " d2 = pd.read_excel(\"data2019_2020_old.xlsx\")\n", " \n", " data =d1.append(d2)#.append(data3).append(data4)\n", " data_all = data[data[\"whiteBalance\"] == 0]\n", " print(data_all.describe())\n", "# data_all = pd.read_csv(\"data_all_2019_2020_reindex.csv\")\n", "# data_all = pd.read_csv(\"ov_data_2020_reindex.csv\")\n", " \n", "# data1 = pd.read_csv(\"ovdata.csv\")\n", "# data2 = pd.read_csv(\"ovdataMore.csv\")\n", "# data3 = pd.read_csv(\"ov_data_2020.csv\")\n", "# data_test1 = pd.read_csv(\"./newData/test.csv\")\n", "# data_test2 = pd.read_csv(\"./newData/nubia_test.csv\")\n", " \n", " print (\"load data successful !!!!!\")\n", "except :\n", " print (\"load data error !!!!!!!!!!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#潘晓春的老数据,测试发现,这个数据不能使用在新算法中。\n", "try :\n", " t1 = pd.read_csv(\"newData/0_5_10_50_70.csv\")\n", " t2 = pd.read_csv(\"newData/5_10_25_50_70.csv\")\n", " \n", " ts =t1.append(t2)#.append(data3).append(data4)\n", " ts_all = ts[ts[\"whiteBalance\"] == 0]\n", " print(ts_all.describe())\n", "# data_all = pd.read_csv(\"data_all_2019_2020_reindex.csv\")\n", "# data_all = pd.read_csv(\"ov_data_2020_reindex.csv\")\n", " \n", "# data1 = pd.read_csv(\"ovdata.csv\")\n", "# data2 = pd.read_csv(\"ovdataMore.csv\")\n", "# data3 = pd.read_csv(\"ov_data_2020.csv\")\n", "# data_test1 = pd.read_csv(\"./newData/test.csv\")\n", "# data_test2 = pd.read_csv(\"./newData/nubia_test.csv\")\n", " \n", " print (\"load data successful !!!!!\")\n", "except :\n", " print (\"load data error !!!!!!!!!!\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 3. **分析数据**" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# data4 = data_iphone6p_246[data_iphone6p_246[\"whiteBalance\"] == 0]\n", "# data2= data_iphone6p_1234[data_iphone6p_1234[\"whiteBalance\"] == 0 ]\n", "# data1 = data_iphone6p_75_10[data_iphone6p_75_10[\"whiteBalance\"] == 0 ]\n", "# data3 = data_iphone6p_5[data_iphone6p_5[\"whiteBalance\"] == 0]\n", "# data0 = data_iphone6p_0[data_iphone6p_0[\"whiteBalance\"] == 0]\n", "# data0_0 = data_iphone6p_0_0[data_iphone6p_0_0[\"whiteBalance\"] == 0]\n", "\n", "\n", "#data_all = data2.append(data1[data1[\"index\"] == 5 ]).append(data3).append(data1[data1[\"index\"] == 7 ]).append(data1[data1[\"index\"] == 8 ]).append(data0).append(data0_0).append(data4)\n", "#data1['index'].replace(4,6,inplace=True)\n", "#data1['index'].replace(3,5,inplace=True)\n", "#data1['index'].replace(2,4,inplace=True)\n", "#data1['index'].replace(1,2,inplace=True)\n", "\n", "#data2['index'].replace(4,6,inplace=True)\n", "#data2['index'].replace(3,5,inplace=True)\n", "#data2['index'].replace(2,4,inplace=True)\n", "#data2['index'].replace(1,2,inplace=True)\n", "\n", "#data3['index'].replace(4,6,inplace=True)\n", "#data3['index'].replace(3,5,inplace=True)\n", "#data3['index'].replace(2,4,inplace=True)\n", "#data3['index'].replace(1,2,inplace=True)\n", "\n", "#data4['index'].replace(2,1,inplace=True)\n", "#data4['index'].replace(4,2,inplace=True)\n", "\n", "#data1_0 = data1[data1[\"whiteBalance\"] == 0]\n", "#data2_0 = data2[data2[\"whiteBalance\"] == 0]\n", "#data3_0 = data3[data3[\"whiteBalance\"] == 0]\n", "\n", "#data_test_0 = data_test\n", "\n", "#data_all =data1_0.append(data2_0);\n", "#data_all =data1.append(data2).append(data3);\n", "\n", "#data_all.to_csv('data_all_2019_2020_reindex.csv')\n", "#data1.to_csv('ovdata_modifed.csv')\n", "#data2.to_csv('ovdataMore_modifed.csv')\n", "#data3.to_csv('ov_data_2020_modifed.csv')\n", "#data4.to_csv('10_25_renew.csv')\n", "\n", "data =d1.append(d2)#.append(data3).append(data4)\n", "data_all = data[data[\"whiteBalance\"] == 0]\n", "print(data_all.describe())\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "whiteBlock_R_one =data_all[data_all[\"index\"] == 0 ][\"right_block_l_min\"]\n", "whiteBlock_G_one = data_all[data_all[\"index\"] == 0 ][\"right_block_a_min\"]\n", "whiteBlock_B_one = data_all[data_all[\"index\"] == 0 ][\"right_block_b_min\"]\n", "\n", "whiteBlock_R_two = data_all[data_all[\"index\"] == 1 ][\"right_block_l_min\"]\n", "whiteBlock_G_two = data_all[data_all[\"index\"] == 1 ][\"right_block_a_min\"]\n", "whiteBlock_B_two = data_all[data_all[\"index\"] == 1 ][\"right_block_b_min\"]\n", "\n", "whiteBlock_R_three = data_all[data_all[\"index\"] == 2 ][\"right_block_l_min\"]\n", "whiteBlock_G_three = data_all[data_all[\"index\"] == 2 ][\"right_block_a_min\"]\n", "whiteBlock_B_three = data_all[data_all[\"index\"] == 2 ][\"right_block_b_min\"]\n", "\n", "whiteBlock_R_four = data_all[data_all[\"index\"] == 4 ][\"right_block_l_min\"]\n", "whiteBlock_G_four = data_all[data_all[\"index\"] == 4 ][\"right_block_a_min\"]\n", "whiteBlock_B_four = data_all[data_all[\"index\"] == 4 ][\"right_block_b_min\"]\n", "\n", "\n", "whiteBlock_R_five = data_all[data_all[\"index\"] == 6 ][\"right_block_l_min\"]\n", "whiteBlock_G_five = data_all[data_all[\"index\"] == 6 ][\"right_block_a_min\"]\n", "whiteBlock_B_five = data_all[data_all[\"index\"] == 6 ][\"right_block_b_min\"]\n", "\n", "whiteBlock_R_six = data_all[data_all[\"index\"] == 7 ][\"right_block_l_min\"]\n", "whiteBlock_G_six = data_all[data_all[\"index\"] == 7 ][\"right_block_a_min\"]\n", "whiteBlock_B_six = data_all[data_all[\"index\"] == 7 ][\"right_block_b_min\"]\n", "\n", "fig = plt.figure()\n", "#plt.rcParams[\"figure.figsize\"] = 20,20\n", "ax = Axes3D(fig)\n", "\n", "ax.set_xlim(0,255)\n", "ax.set_ylim(0,255)\n", "ax.set_zlim(0,255)\n", "ax.set_xlabel('H')\n", "ax.set_ylabel('S')\n", "ax.set_zlabel('V')\n", "ax.set_title('HSV colorspace OV right block max value')\n", "# ax.scatter(whiteBlock_R_zero, whiteBlock_G_zero, whiteBlock_B_zero,s = 15,c='y')\n", "ax.scatter(whiteBlock_R_one, whiteBlock_G_one, whiteBlock_B_one,s = 15,c='r')\n", "\n", "ax.scatter(whiteBlock_R_two, whiteBlock_G_two, whiteBlock_B_two,s = 15,c='g')\n", "ax.scatter(whiteBlock_R_three, whiteBlock_G_three, whiteBlock_B_three,s = 15,c='b')\n", "\n", "ax.scatter(whiteBlock_R_four, whiteBlock_G_four, whiteBlock_B_four,s = 15,c='y')\n", "ax.scatter(whiteBlock_R_five, whiteBlock_G_five, whiteBlock_B_five,s = 15,c='pink')\n", "ax.scatter(whiteBlock_R_six, whiteBlock_G_six, whiteBlock_B_six,s = 15,c='c')\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data_all.columns" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "hsv max min hist value h值要去掉" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 预处理数据" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "train_labels = data_all[\"index\"]\n", "train_features = data_all.drop(\"dateTime\",axis=1)\n", "train_features = train_features.drop(\"index\",axis=1)\n", "train_features = train_features.drop(\"whiteBalance\",axis=1)\n", "\n", "\n", "\n", "train_features.describe()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data_0 = data_all[data_all[\"index\"] == 0]\n", "data_1 = data_all[data_all[\"index\"] == 1]\n", "data_2 = data_all[data_all[\"index\"] == 2]\n", "data_3 = data_all[data_all[\"index\"] == 3]\n", "data_4 = data_all[data_all[\"index\"] == 4]\n", "data_5 = data_all[data_all[\"index\"] == 5]\n", "data_6 = data_all[data_all[\"index\"] == 6]\n", "data_7 = data_all[data_all[\"index\"] == 7]\n", "data_0.to_excel('data_0.xlsx')\n", "data_1.to_excel('data_1.xlsx')\n", "data_2.to_excel('data_2.xlsx')\n", "data_3.to_excel('data_3.xlsx')\n", "data_4.to_excel('data_4.xlsx')\n", "data_5.to_excel('data_5.xlsx')\n", "data_6.to_excel('data_6.xlsx')\n", "data_7.to_excel('data_7.xlsx')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#测试数据,不要使用,无效数据\n", "ts_labels = ts_all[\"index\"]\n", "ts_features = ts_all.drop(\"dateTime\",axis=1)\n", "ts_features = ts_features.drop(\"index\",axis=1)\n", "ts_features = ts_features.drop(\"whiteBalance\",axis=1)\n", "\n", "\n", "\n", "ts_features.describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "#这一节的代码,不要执行\n", "\n", "train_features = train_features.drop(\"left_block_H\",axis=1)\n", "train_features = train_features.drop(\"left_block_S\",axis=1)\n", "train_features = train_features.drop(\"left_block_V\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_H\",axis=1)\n", "train_features = train_features.drop(\"right_block_S\",axis=1)\n", "train_features = train_features.drop(\"right_block_V\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_H\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_S\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_V\",axis=1)\n", "\n", "\n", "train_features = train_features.drop(\"left_block_H_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_S_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_V_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_H_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_S_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_V_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_H_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_S_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_V_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_H_hist\",axis=1)\n", "train_features = train_features.drop(\"left_block_S_hist\",axis=1)\n", "train_features = train_features.drop(\"left_block_V_hist\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_H_hist\",axis=1)\n", "train_features = train_features.drop(\"right_block_S_hist\",axis=1)\n", "train_features = train_features.drop(\"right_block_V_hist\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_H_hist\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_S_hist\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_V_hist\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_H_max\",axis=1)\n", "train_features = train_features.drop(\"left_block_S_max\",axis=1)\n", "train_features = train_features.drop(\"left_block_V_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_H_max\",axis=1)\n", "train_features = train_features.drop(\"right_block_S_max\",axis=1)\n", "train_features = train_features.drop(\"right_block_V_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_H_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_S_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_V_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_H_min\",axis=1)\n", "train_features = train_features.drop(\"left_block_S_min\",axis=1)\n", "train_features = train_features.drop(\"left_block_V_min\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_H_min\",axis=1)\n", "train_features = train_features.drop(\"right_block_S_min\",axis=1)\n", "train_features = train_features.drop(\"right_block_V_min\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_H_min\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_S_min\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_V_min\",axis=1)\n", "\n", "\n", "\n", "train_features['lelf_right_R'] = train_features['left_block_R'] - train_features['right_block_R']\n", "train_features['lelf_right_G'] = train_features['left_block_G'] - train_features['right_block_G']\n", "train_features['lelf_right_B'] = train_features['left_block_B'] - train_features['right_block_B']\n", "\n", "# train_features['lelf_right_H'] = train_features['left_block_H'] - train_features['right_block_H']\n", "# train_features['lelf_right_S'] = train_features['left_block_S'] - train_features['right_block_S']\n", "# train_features['lelf_right_V'] = train_features['left_block_V'] - train_features['right_block_V']\n", "\n", "train_features['lelf_right_l'] = train_features['left_block_l'] - train_features['right_block_l']\n", "train_features['lelf_right_a'] = train_features['left_block_a'] - train_features['right_block_a']\n", "train_features['lelf_right_b'] = train_features['left_block_b'] - train_features['right_block_b']\n", "\n", "train_features['lelf_right_R_stddev'] = train_features['left_block_R_stddev'] - train_features['right_block_R_stddev']\n", "train_features['lelf_right_G_stddev'] = train_features['left_block_G_stddev'] - train_features['right_block_G_stddev']\n", "train_features['lelf_right_B_stddev'] = train_features['left_block_B_stddev'] - train_features['right_block_B_stddev']\n", "\n", "# train_features['lelf_right_H_stddev'] = train_features['left_block_H_stddev'] - train_features['right_block_H_stddev']\n", "# train_features['lelf_right_S_stddev'] = train_features['left_block_S_stddev'] - train_features['right_block_S_stddev']\n", "# train_features['lelf_right_V_stddev'] = train_features['left_block_V_stddev'] - train_features['right_block_V_stddev']\n", "\n", "train_features['lelf_right_l_stddev'] = train_features['left_block_l_stddev'] - train_features['right_block_l_stddev']\n", "train_features['lelf_right_a_stddev'] = train_features['left_block_a_stddev'] - train_features['right_block_a_stddev']\n", "train_features['lelf_right_b_stddev'] = train_features['left_block_b_stddev'] - train_features['right_block_b_stddev']\n", "\n", "train_features['lelf_right_R_hist'] = train_features['left_block_R_hist'] - train_features['right_block_R_hist']\n", "train_features['lelf_right_G_hist'] = train_features['left_block_G_hist'] - train_features['right_block_G_hist']\n", "train_features['lelf_right_B_hist'] = train_features['left_block_B_hist'] - train_features['right_block_B_hist']\n", "\n", "# train_features['lelf_right_H_hist'] = train_features['left_block_H_hist'] - train_features['right_block_H_hist']\n", "# train_features['lelf_right_S_hist'] = train_features['left_block_S_hist'] - train_features['right_block_S_hist']\n", "# train_features['lelf_right_V_hist'] = train_features['left_block_V_hist'] - train_features['right_block_V_hist']\n", "\n", "train_features['lelf_right_l_hist'] = train_features['left_block_l_hist'] - train_features['right_block_l_hist']\n", "train_features['lelf_right_a_hist'] = train_features['left_block_a_hist'] - train_features['right_block_a_hist']\n", "train_features['lelf_right_b_hist'] = train_features['left_block_b_hist'] - train_features['right_block_b_hist']\n", "\n", "train_features['lelf_right_R_max'] = train_features['left_block_R_max'] - train_features['right_block_R_max']\n", "train_features['lelf_right_G_max'] = train_features['left_block_G_max'] - train_features['right_block_G_max']\n", "train_features['lelf_right_B_max'] = train_features['left_block_B_max'] - train_features['right_block_B_max']\n", "\n", "# train_features['lelf_right_H_max'] = train_features['left_block_H_max'] - train_features['right_block_H_max']\n", "# train_features['lelf_right_S_max'] = train_features['left_block_S_max'] - train_features['right_block_S_max']\n", "# train_features['lelf_right_V_max'] = train_features['left_block_V_max'] - train_features['right_block_V_max']\n", "\n", "train_features['lelf_right_l_max'] = train_features['left_block_l_max'] - train_features['right_block_l_max']\n", "train_features['lelf_right_a_max'] = train_features['left_block_a_max'] - train_features['right_block_a_max']\n", "train_features['lelf_right_b_max'] = train_features['left_block_b_max'] - train_features['right_block_b_max']\n", "\n", "\n", "\n", "train_features['lelf_right_R_min'] = train_features['left_block_R_min'] - train_features['right_block_R_min']\n", "train_features['lelf_right_G_min'] = train_features['left_block_G_min'] - train_features['right_block_G_min']\n", "train_features['lelf_right_B_min'] = train_features['left_block_B_min'] - train_features['right_block_B_min']\n", "\n", "# train_features['lelf_right_H_min'] = train_features['left_block_H_min'] - train_features['right_block_H_min']\n", "# train_features['lelf_right_S_min'] = train_features['left_block_S_min'] - train_features['right_block_S_min']\n", "# train_features['lelf_right_V_min'] = train_features['left_block_V_min'] - train_features['right_block_V_min']\n", "\n", "train_features['lelf_right_l_min'] = train_features['left_block_l_min'] - train_features['right_block_l_min']\n", "train_features['lelf_right_a_min'] = train_features['left_block_a_min'] - train_features['right_block_a_min']\n", "train_features['lelf_right_b_min'] = train_features['left_block_b_min'] - train_features['right_block_b_min']\n", "\n", "train_features['lelf_right_gray_value'] = train_features['left_grayValue'] - train_features['right_grayValue']\n", "train_features['lelf_right_gray_stddev'] = train_features['left_grayStddevValue'] - train_features['right_grayStddevValue']\n", "train_features['lelf_right_gray_hist'] = train_features['left_grayHist'] - train_features['right_grayHist']\n", "train_features['lelf_right_gray_max'] = train_features['left_grayMax'] - train_features['right_grayMax']\n", "train_features['lelf_right_gray_min'] = train_features['left_grayMin'] - train_features['right_grayMin']\n", "train_features.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# train_features_9是真正的训练数据" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#这个原始算法的容错性最好!!!!2020.5.22\n", "train_features_9 = pd.DataFrame()\n", "train_features_9['lelf_right_R'] = train_features['left_block_R'] - train_features['right_block_R']\n", "train_features_9['lelf_right_G'] = train_features['left_block_G'] - train_features['right_block_G']\n", "train_features_9['lelf_right_B'] = train_features['left_block_B'] - train_features['right_block_B']\n", "\n", "train_features_9['lelf_right_H'] = train_features['left_block_H'] - train_features['right_block_H']\n", "train_features_9['lelf_right_S'] = train_features['left_block_S'] - train_features['right_block_S']\n", "train_features_9['lelf_right_V'] = train_features['left_block_V'] - train_features['right_block_V']\n", "\n", "train_features_9['lelf_right_l'] = train_features['left_block_l'] - train_features['right_block_l']\n", "train_features_9['lelf_right_a'] = train_features['left_block_a'] - train_features['right_block_a']\n", "train_features_9['lelf_right_b'] = train_features['left_block_b'] - train_features['right_block_b']\n", "\n", "train_features_9['lelf_right_R_stddev'] = train_features['left_block_R_stddev'] - train_features['right_block_R_stddev']\n", "train_features_9['lelf_right_G_stddev'] = train_features['left_block_G_stddev'] - train_features['right_block_G_stddev']\n", "train_features_9['lelf_right_B_stddev'] = train_features['left_block_B_stddev'] - train_features['right_block_B_stddev']\n", "\n", "train_features_9['lelf_right_H_stddev'] = train_features['left_block_H_stddev'] - train_features['right_block_H_stddev']\n", "train_features_9['lelf_right_S_stddev'] = train_features['left_block_S_stddev'] - train_features['right_block_S_stddev']\n", "train_features_9['lelf_right_V_stddev'] = train_features['left_block_V_stddev'] - train_features['right_block_V_stddev']\n", "\n", "train_features_9['lelf_right_l_stddev'] = train_features['left_block_l_stddev'] - train_features['right_block_l_stddev']\n", "train_features_9['lelf_right_a_stddev'] = train_features['left_block_a_stddev'] - train_features['right_block_a_stddev']\n", "train_features_9['lelf_right_b_stddev'] = train_features['left_block_b_stddev'] - train_features['right_block_b_stddev']\n", "\n", "train_features_9['lelf_right_R_hist'] = train_features['left_block_R_hist'] - train_features['right_block_R_hist']\n", "train_features_9['lelf_right_G_hist'] = train_features['left_block_G_hist'] - train_features['right_block_G_hist']\n", "train_features_9['lelf_right_B_hist'] = train_features['left_block_B_hist'] - train_features['right_block_B_hist']\n", "\n", "train_features_9['lelf_right_H_hist'] = train_features['left_block_H_hist'] - train_features['right_block_H_hist']\n", "train_features_9['lelf_right_S_hist'] = train_features['left_block_S_hist'] - train_features['right_block_S_hist']\n", "train_features_9['lelf_right_V_hist'] = train_features['left_block_V_hist'] - train_features['right_block_V_hist']\n", "\n", "train_features_9['lelf_right_l_hist'] = train_features['left_block_l_hist'] - train_features['right_block_l_hist']\n", "train_features_9['lelf_right_a_hist'] = train_features['left_block_a_hist'] - train_features['right_block_a_hist']\n", "train_features_9['lelf_right_b_hist'] = train_features['left_block_b_hist'] - train_features['right_block_b_hist']\n", "\n", "train_features_9['lelf_right_R_max'] = train_features['left_block_R_max'] - train_features['right_block_R_max']\n", "train_features_9['lelf_right_G_max'] = train_features['left_block_G_max'] - train_features['right_block_G_max']\n", "train_features_9['lelf_right_B_max'] = train_features['left_block_B_max'] - train_features['right_block_B_max']\n", "\n", "train_features_9['lelf_right_H_max'] = train_features['left_block_H_max'] - train_features['right_block_H_max']\n", "train_features_9['lelf_right_S_max'] = train_features['left_block_S_max'] - train_features['right_block_S_max']\n", "train_features_9['lelf_right_V_max'] = train_features['left_block_V_max'] - train_features['right_block_V_max']\n", "\n", "train_features_9['lelf_right_l_max'] = train_features['left_block_l_max'] - train_features['right_block_l_max']\n", "train_features_9['lelf_right_a_max'] = train_features['left_block_a_max'] - train_features['right_block_a_max']\n", "train_features_9['lelf_right_b_max'] = train_features['left_block_b_max'] - train_features['right_block_b_max']\n", "\n", "train_features_9['lelf_right_R_min'] = train_features['left_block_R_min'] - train_features['right_block_R_min']\n", "train_features_9['lelf_right_G_min'] = train_features['left_block_G_min'] - train_features['right_block_G_min']\n", "train_features_9['lelf_right_B_min'] = train_features['left_block_B_min'] - train_features['right_block_B_min']\n", "\n", "train_features_9['lelf_right_H_min'] = train_features['left_block_H_min'] - train_features['right_block_H_min']\n", "train_features_9['lelf_right_S_min'] = train_features['left_block_S_min'] - train_features['right_block_S_min']\n", "train_features_9['lelf_right_V_min'] = train_features['left_block_V_min'] - train_features['right_block_V_min']\n", "\n", "train_features_9['lelf_right_l_min'] = train_features['left_block_l_min'] - train_features['right_block_l_min']\n", "train_features_9['lelf_right_a_min'] = train_features['left_block_a_min'] - train_features['right_block_a_min']\n", "train_features_9['lelf_right_b_min'] = train_features['left_block_b_min'] - train_features['right_block_b_min']\n", "\n", "# train_features_9['left_grayValue']= train_features['left_grayValue'];\n", "# train_features_9['left_grayStddevValue']= train_features['left_grayStddevValue'];\n", "# train_features_9['left_grayHist']= train_features['left_grayHist'];\n", "# train_features_9['left_grayMax']= train_features['left_grayMax'];\n", "# train_features_9['left_grayMin']= train_features['left_grayMin'];\n", "\n", "# train_features_9['right_grayValue']= train_features['right_grayValue'];\n", "# train_features_9['right_grayStddevValue']= train_features['right_grayStddevValue'];\n", "# train_features_9['right_grayHist']= train_features['right_grayHist'];\n", "# train_features_9['right_grayMax']= train_features['right_grayMax'];\n", "# train_features_9['right_grayMin']= train_features['right_grayMin'];\n", "\n", "# train_features_9['lelf_R_stddev'] = train_features['left_block_R_stddev'] \n", "# train_features_9['lelf_G_stddev'] = train_features['left_block_G_stddev'] \n", "# train_features_9['lelf_B_stddev'] = train_features['left_block_B_stddev'] \n", "\n", "# train_features_9['left_block_R_min'] = train_features['left_block_R_min'] \n", "# train_features_9['left_block_G_min'] = train_features['left_block_G_min'] \n", "# train_features_9['left_block_B_min'] = train_features['left_block_B_min'] \n", "\n", "\n", "train_features_9['lelf_right_gray_value'] = train_features['left_grayValue'] - train_features['right_grayValue']\n", "train_features_9['lelf_right_gray_stddev'] = train_features['left_grayStddevValue'] - train_features['right_grayStddevValue']\n", "train_features_9['lelf_right_gray_hist'] = train_features['left_grayHist'] - train_features['right_grayHist']\n", "train_features_9['lelf_right_gray_max'] = train_features['left_grayMax'] - train_features['right_grayMax']\n", "train_features_9['lelf_right_gray_min'] = train_features['left_grayMin'] - train_features['right_grayMin']\n", "#train_features_9['index'] = train_labels\n", "train_features_9.describe()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "ts_features_9 = pd.DataFrame()\n", "ts_features_9['lelf_right_R'] = ts_features['left_block_R'] - ts_features['right_block_R']\n", "ts_features_9['lelf_right_G'] = ts_features['left_block_G'] - ts_features['right_block_G']\n", "ts_features_9['lelf_right_B'] = ts_features['left_block_B'] - ts_features['right_block_B']\n", "\n", "ts_features_9['lelf_right_H'] = ts_features['left_block_H'] - ts_features['right_block_H']\n", "ts_features_9['lelf_right_S'] = ts_features['left_block_S'] - ts_features['right_block_S']\n", "ts_features_9['lelf_right_V'] = ts_features['left_block_V'] - ts_features['right_block_V']\n", "\n", "ts_features_9['lelf_right_l'] = ts_features['left_block_l'] - ts_features['right_block_l']\n", "ts_features_9['lelf_right_a'] = ts_features['left_block_a'] - ts_features['right_block_a']\n", "ts_features_9['lelf_right_b'] = ts_features['left_block_b'] - ts_features['right_block_b']\n", "\n", "ts_features_9['lelf_right_R_stddev'] = ts_features['left_block_R_stddev'] - ts_features['right_block_R_stddev']\n", "ts_features_9['lelf_right_G_stddev'] = ts_features['left_block_G_stddev'] - ts_features['right_block_G_stddev']\n", "ts_features_9['lelf_right_B_stddev'] = ts_features['left_block_B_stddev'] - ts_features['right_block_B_stddev']\n", "\n", "ts_features_9['lelf_right_H_stddev'] = ts_features['left_block_H_stddev'] - ts_features['right_block_H_stddev']\n", "ts_features_9['lelf_right_S_stddev'] = ts_features['left_block_S_stddev'] - ts_features['right_block_S_stddev']\n", "ts_features_9['lelf_right_V_stddev'] = ts_features['left_block_V_stddev'] - ts_features['right_block_V_stddev']\n", "\n", "ts_features_9['lelf_right_l_stddev'] = ts_features['left_block_l_stddev'] - ts_features['right_block_l_stddev']\n", "ts_features_9['lelf_right_a_stddev'] = ts_features['left_block_a_stddev'] - ts_features['right_block_a_stddev']\n", "ts_features_9['lelf_right_b_stddev'] = ts_features['left_block_b_stddev'] - ts_features['right_block_b_stddev']\n", "\n", "ts_features_9['lelf_right_R_hist'] = ts_features['left_block_R_hist'] - ts_features['right_block_R_hist']\n", "ts_features_9['lelf_right_G_hist'] = ts_features['left_block_G_hist'] - ts_features['right_block_G_hist']\n", "ts_features_9['lelf_right_B_hist'] = ts_features['left_block_B_hist'] - ts_features['right_block_B_hist']\n", "\n", "ts_features_9['lelf_right_H_hist'] = ts_features['left_block_H_hist'] - ts_features['right_block_H_hist']\n", "ts_features_9['lelf_right_S_hist'] = ts_features['left_block_S_hist'] - ts_features['right_block_S_hist']\n", "ts_features_9['lelf_right_V_hist'] = ts_features['left_block_V_hist'] - ts_features['right_block_V_hist']\n", "\n", "ts_features_9['lelf_right_l_hist'] = ts_features['left_block_l_hist'] - ts_features['right_block_l_hist']\n", "ts_features_9['lelf_right_a_hist'] = ts_features['left_block_a_hist'] - ts_features['right_block_a_hist']\n", "ts_features_9['lelf_right_b_hist'] = ts_features['left_block_b_hist'] - ts_features['right_block_b_hist']\n", "\n", "ts_features_9['lelf_right_R_max'] = ts_features['left_block_R_max'] - ts_features['right_block_R_max']\n", "ts_features_9['lelf_right_G_max'] = ts_features['left_block_G_max'] - ts_features['right_block_G_max']\n", "ts_features_9['lelf_right_B_max'] = ts_features['left_block_B_max'] - ts_features['right_block_B_max']\n", "\n", "ts_features_9['lelf_right_H_max'] = ts_features['left_block_H_max'] - ts_features['right_block_H_max']\n", "ts_features_9['lelf_right_S_max'] = ts_features['left_block_S_max'] - ts_features['right_block_S_max']\n", "ts_features_9['lelf_right_V_max'] = ts_features['left_block_V_max'] - ts_features['right_block_V_max']\n", "\n", "ts_features_9['lelf_right_l_max'] = ts_features['left_block_l_max'] - ts_features['right_block_l_max']\n", "ts_features_9['lelf_right_a_max'] = ts_features['left_block_a_max'] - ts_features['right_block_a_max']\n", "ts_features_9['lelf_right_b_max'] = ts_features['left_block_b_max'] - ts_features['right_block_b_max']\n", "\n", "ts_features_9['lelf_right_R_min'] = ts_features['left_block_R_min'] - ts_features['right_block_R_min']\n", "ts_features_9['lelf_right_G_min'] = ts_features['left_block_G_min'] - ts_features['right_block_G_min']\n", "ts_features_9['lelf_right_B_min'] = ts_features['left_block_B_min'] - ts_features['right_block_B_min']\n", "\n", "ts_features_9['lelf_right_H_min'] = ts_features['left_block_H_min'] - ts_features['right_block_H_min']\n", "ts_features_9['lelf_right_S_min'] = ts_features['left_block_S_min'] - ts_features['right_block_S_min']\n", "ts_features_9['lelf_right_V_min'] = ts_features['left_block_V_min'] - ts_features['right_block_V_min']\n", "\n", "ts_features_9['lelf_right_l_min'] = ts_features['left_block_l_min'] - ts_features['right_block_l_min']\n", "ts_features_9['lelf_right_a_min'] = ts_features['left_block_a_min'] - ts_features['right_block_a_min']\n", "ts_features_9['lelf_right_b_min'] = ts_features['left_block_b_min'] - ts_features['right_block_b_min']\n", "\n", "# ts_features_9['left_grayValue']= ts_features['left_grayValue'];\n", "# ts_features_9['left_grayStddevValue']= ts_features['left_grayStddevValue'];\n", "# ts_features_9['left_grayHist']= ts_features['left_grayHist'];\n", "# ts_features_9['left_grayMax']= ts_features['left_grayMax'];\n", "# ts_features_9['left_grayMin']= ts_features['left_grayMin'];\n", "\n", "# ts_features_9['right_grayValue']= ts_features['right_grayValue'];\n", "# ts_features_9['right_grayStddevValue']= ts_features['right_grayStddevValue'];\n", "# ts_features_9['right_grayHist']= ts_features['right_grayHist'];\n", "# ts_features_9['right_grayMax']= ts_features['right_grayMax'];\n", "# ts_features_9['right_grayMin']= ts_features['right_grayMin'];\n", "\n", "# ts_features_9['lelf_R_stddev'] = ts_features['left_block_R_stddev'] \n", "# ts_features_9['lelf_G_stddev'] = ts_features['left_block_G_stddev'] \n", "# ts_features_9['lelf_B_stddev'] = ts_features['left_block_B_stddev'] \n", "\n", "# ts_features_9['left_block_R_min'] = ts_features['left_block_R_min'] \n", "# ts_features_9['left_block_G_min'] = ts_features['left_block_G_min'] \n", "# ts_features_9['left_block_B_min'] = ts_features['left_block_B_min'] \n", "\n", "ts_features_9['lelf_right_gray_value'] = ts_features['left_grayValue'] - ts_features['right_grayValue']\n", "ts_features_9['lelf_right_gray_stddev'] = ts_features['left_grayStddevValue'] - ts_features['right_grayStddevValue']\n", "ts_features_9['lelf_right_gray_hist'] = ts_features['left_grayHist'] - ts_features['right_grayHist']\n", "ts_features_9['lelf_right_gray_max'] = ts_features['left_grayMax'] - ts_features['right_grayMax']\n", "ts_features_9['lelf_right_gray_min'] = ts_features['left_grayMin'] - ts_features['right_grayMin']\n", "\n", "#ts_features_9['index'] = ts_labels\n", "ts_features_9.describe()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#测试可以删除哪些项目\n", "train_features_9 = pd.DataFrame()\n", "\n", "#train_features_9['right_grayValue'] = train_features['right_grayValue']\n", "#train_features_9['left_grayStddevValue'] = train_features['left_grayStddevValue']\n", "#train_features_9['right_grayStddevValue'] = train_features['right_grayStddevValue']\n", "#train_features_9['right_grayHist'] = train_features['right_grayHist']\n", "train_features_9['left_right_gray_value'] = train_features['left_grayValue'] - train_features['right_grayValue']\n", "train_features_9['left_right_gray_stddev'] = train_features['left_grayStddevValue'] - train_features['right_grayStddevValue']\n", "train_features_9['left_right_gray_hist'] = train_features['left_grayHist'] - train_features['right_grayHist']\n", "train_features_9['left_right_gray_max'] = train_features['left_grayMax'] - train_features['right_grayMax']\n", "train_features_9['left_right_gray_min'] = train_features['left_grayMin'] - train_features['right_grayMin']\n", "\n", "#train_features_9['left_H_stddev'] = train_features['left_block_H_stddev']\n", "#train_features_9['left_S_stddev'] = train_features['left_block_S_stddev']\n", "#train_features_9['left_V_stddev'] = train_features['left_block_V_stddev']\n", "#train_features_9['right_H_stddev'] = train_features['right_block_H_stddev']\n", "#train_features_9['right_S_stddev'] = train_features['right_block_S_stddev']\n", "#train_features_9['right_V_stddev'] = train_features['right_block_V_stddev']\n", "train_features_9['left_right_H'] = train_features['left_block_H'] - train_features['right_block_H']\n", "train_features_9['left_right_S'] = train_features['left_block_S'] - train_features['right_block_S']\n", "train_features_9['left_right_V'] = train_features['left_block_V'] - train_features['right_block_V']\n", "train_features_9['left_right_H_stddev'] = train_features['left_block_H_stddev'] - train_features['right_block_H_stddev']\n", "train_features_9['left_right_S_stddev'] = train_features['left_block_S_stddev'] - train_features['right_block_S_stddev']\n", "train_features_9['left_right_V_stddev'] = train_features['left_block_V_stddev'] - train_features['right_block_V_stddev']\n", "train_features_9['left_right_H_hist'] = train_features['left_block_H_hist'] - train_features['right_block_H_hist']\n", "train_features_9['left_right_S_hist'] = train_features['left_block_S_hist'] - train_features['right_block_S_hist']\n", "train_features_9['left_right_V_hist'] = train_features['left_block_V_hist'] - train_features['right_block_V_hist']\n", "train_features_9['left_right_H_max'] = train_features['left_block_H_max'] - train_features['right_block_H_max']\n", "train_features_9['left_right_S_max'] = train_features['left_block_S_max'] - train_features['right_block_S_max']\n", "train_features_9['left_right_V_max'] = train_features['left_block_V_max'] - train_features['right_block_V_max']\n", "train_features_9['left_right_H_min'] = train_features['left_block_H_min'] - train_features['right_block_H_min']\n", "train_features_9['left_right_S_min'] = train_features['left_block_S_min'] - train_features['right_block_S_min']\n", "train_features_9['left_right_V_min'] = train_features['left_block_V_min'] - train_features['right_block_V_min']\n", "\n", "\n", "\n", "#train_features_9['index'] = train_labels\n", "train_features_9.describe()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**去掉左边块的方差和白块和右边块的特征**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "# train_features = train_features.drop(\"left_block_R\",axis=1)\n", "# train_features = train_features.drop(\"left_block_G\",axis=1)\n", "# train_features = train_features.drop(\"left_block_B\",axis=1)\n", "\n", "# train_features = train_features.drop(\"left_block_R_hist\",axis=1)\n", "# train_features = train_features.drop(\"left_block_G_hist\",axis=1)\n", "# train_features = train_features.drop(\"left_block_B_hist\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_R_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_G_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_B_stddev\",axis=1)\n", "\n", "# train_features = train_features.drop(\"left_block_R_max\",axis=1)\n", "# train_features = train_features.drop(\"left_block_G_max\",axis=1)\n", "# train_features = train_features.drop(\"left_block_B_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_H\",axis=1)\n", "train_features = train_features.drop(\"left_block_S\",axis=1)\n", "train_features = train_features.drop(\"left_block_V\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_H_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_S_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_V_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_H_hist\",axis=1)\n", "train_features = train_features.drop(\"left_block_S_hist\",axis=1)\n", "train_features = train_features.drop(\"left_block_V_hist\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_H_max\",axis=1)\n", "train_features = train_features.drop(\"left_block_S_max\",axis=1)\n", "train_features = train_features.drop(\"left_block_V_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_l\",axis=1)\n", "train_features = train_features.drop(\"left_block_a\",axis=1)\n", "train_features = train_features.drop(\"left_block_b\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_l_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_a_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_b_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_l_hist\",axis=1)\n", "train_features = train_features.drop(\"left_block_a_hist\",axis=1)\n", "train_features = train_features.drop(\"left_block_b_hist\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_l_max\",axis=1)\n", "train_features = train_features.drop(\"left_block_a_max\",axis=1)\n", "train_features = train_features.drop(\"left_block_b_max\",axis=1)\n", "##################################################################\n", "\n", "# train_features = train_features.drop(\"right_block_R\",axis=1)\n", "# train_features = train_features.drop(\"right_block_G\",axis=1)\n", "# train_features = train_features.drop(\"right_block_B\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_R_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_G_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_B_stddev\",axis=1)\n", "\n", "# train_features = train_features.drop(\"right_block_R_hist\",axis=1)\n", "# train_features = train_features.drop(\"right_block_G_hist\",axis=1)\n", "# train_features = train_features.drop(\"right_block_B_hist\",axis=1)\n", "\n", "# train_features = train_features.drop(\"right_block_R_max\",axis=1)\n", "# train_features = train_features.drop(\"right_block_G_max\",axis=1)\n", "# train_features = train_features.drop(\"right_block_B_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_H\",axis=1)\n", "train_features = train_features.drop(\"right_block_S\",axis=1)\n", "train_features = train_features.drop(\"right_block_V\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_H_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_S_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_V_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_H_hist\",axis=1)\n", "train_features = train_features.drop(\"right_block_S_hist\",axis=1)\n", "train_features = train_features.drop(\"right_block_V_hist\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_H_max\",axis=1)\n", "train_features = train_features.drop(\"right_block_S_max\",axis=1)\n", "train_features = train_features.drop(\"right_block_V_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_l\",axis=1)\n", "train_features = train_features.drop(\"right_block_a\",axis=1)\n", "train_features = train_features.drop(\"right_block_b\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_l_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_a_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_b_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_l_hist\",axis=1)\n", "train_features = train_features.drop(\"right_block_a_hist\",axis=1)\n", "train_features = train_features.drop(\"right_block_b_hist\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_l_max\",axis=1)\n", "train_features = train_features.drop(\"right_block_a_max\",axis=1)\n", "train_features = train_features.drop(\"right_block_b_max\",axis=1)\n", "\n", "####################################################################\n", "\n", "train_features = train_features.drop(\"whiteBlock_R\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_G\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_B\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_R_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_G_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_B_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_R_hist\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_G_hist\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_B_hist\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_R_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_G_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_B_max\",axis=1)\n", "\n", "\n", "train_features = train_features.drop(\"whiteBlock_H\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_S\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_V\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_H_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_S_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_V_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_H_hist\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_S_hist\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_V_hist\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_H_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_S_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_V_max\",axis=1)\n", "\n", "\n", "\n", "train_features = train_features.drop(\"whiteBlock_l\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_a\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_b\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_l_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_a_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_b_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_l_hist\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_a_hist\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_b_hist\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_l_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_a_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_b_max\",axis=1)\n", "\n", "##################################################################\n", "\n", "\n", "\n", "train_features.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**去掉所有块的方差特征**" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "train_features = train_features.drop(\"left_block_R_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_G_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_B_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_H_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_S_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_V_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_l_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_a_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_b_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_R_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_G_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_B_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_H_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_S_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_V_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_l_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_a_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_b_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_R_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_G_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_B_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_H_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_S_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_V_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_l_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_a_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_b_stddev\",axis=1)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# train_features = train_features.drop(\"left_block_R\",axis=1)\n", "# train_features = train_features.drop(\"left_block_G\",axis=1)\n", "# train_features = train_features.drop(\"left_block_B\",axis=1)\n", "\n", "# train_features = train_features.drop(\"left_block_H\",axis=1)\n", "# train_features = train_features.drop(\"left_block_S\",axis=1)\n", "# train_features = train_features.drop(\"left_block_V\",axis=1)\n", "\n", "# train_features = train_features.drop(\"left_block_l\",axis=1)\n", "# train_features = train_features.drop(\"left_block_a\",axis=1)\n", "# train_features = train_features.drop(\"left_block_b\",axis=1)\n", "\n", "# train_features = train_features.drop(\"right_block_R\",axis=1)\n", "# train_features = train_features.drop(\"right_block_G\",axis=1)\n", "# train_features = train_features.drop(\"right_block_B\",axis=1)\n", "\n", "# train_features = train_features.drop(\"right_block_H\",axis=1)\n", "# train_features = train_features.drop(\"right_block_S\",axis=1)\n", "# train_features = train_features.drop(\"right_block_V\",axis=1)\n", "\n", "# train_features = train_features.drop(\"right_block_l\",axis=1)\n", "# train_features = train_features.drop(\"right_block_a\",axis=1)\n", "# train_features = train_features.drop(\"right_block_b\",axis=1)\n", "\n", "# train_features = train_features.drop(\"whiteBlock_R\",axis=1)\n", "# train_features = train_features.drop(\"whiteBlock_G\",axis=1)\n", "# train_features = train_features.drop(\"whiteBlock_B\",axis=1)\n", "\n", "# train_features = train_features.drop(\"whiteBlock_H\",axis=1)\n", "# train_features = train_features.drop(\"whiteBlock_S\",axis=1)\n", "# train_features = train_features.drop(\"whiteBlock_V\",axis=1)\n", "\n", "# train_features = train_features.drop(\"whiteBlock_l\",axis=1)\n", "# train_features = train_features.drop(\"whiteBlock_a\",axis=1)\n", "# train_features = train_features.drop(\"whiteBlock_b\",axis=1)\n", "\n", "# train_features = train_features.drop(\"left_block_R_hist\",axis=1)\n", "# train_features = train_features.drop(\"left_block_G_hist\",axis=1)\n", "# train_features = train_features.drop(\"left_block_B_hist\",axis=1)\n", "\n", "# train_features = train_features.drop(\"left_block_H_hist\",axis=1)\n", "# train_features = train_features.drop(\"left_block_S_hist\",axis=1)\n", "# train_features = train_features.drop(\"left_block_V_hist\",axis=1)\n", "\n", "# train_features = train_features.drop(\"left_block_l_hist\",axis=1)\n", "# train_features = train_features.drop(\"left_block_a_hist\",axis=1)\n", "# train_features = train_features.drop(\"left_block_b_hist\",axis=1)\n", "\n", "# train_features = train_features.drop(\"right_block_R_hist\",axis=1)\n", "# train_features = train_features.drop(\"right_block_G_hist\",axis=1)\n", "# train_features = train_features.drop(\"right_block_B_hist\",axis=1)\n", "\n", "# train_features = train_features.drop(\"right_block_H_hist\",axis=1)\n", "# train_features = train_features.drop(\"right_block_S_hist\",axis=1)\n", "# train_features = train_features.drop(\"right_block_V_hist\",axis=1)\n", "\n", "# train_features = train_features.drop(\"right_block_l_hist\",axis=1)\n", "# train_features = train_features.drop(\"right_block_a_hist\",axis=1)\n", "# train_features = train_features.drop(\"right_block_b_hist\",axis=1)\n", "\n", "# train_features = train_features.drop(\"whiteBlock_R_hist\",axis=1)\n", "# train_features = train_features.drop(\"whiteBlock_G_hist\",axis=1)\n", "# train_features = train_features.drop(\"whiteBlock_B_hist\",axis=1)\n", "\n", "# train_features = train_features.drop(\"whiteBlock_H_hist\",axis=1)\n", "# train_features = train_features.drop(\"whiteBlock_S_hist\",axis=1)\n", "# train_features = train_features.drop(\"whiteBlock_V_hist\",axis=1)\n", "\n", "# train_features = train_features.drop(\"whiteBlock_l_hist\",axis=1)\n", "# train_features = train_features.drop(\"whiteBlock_a_hist\",axis=1)\n", "# train_features = train_features.drop(\"whiteBlock_b_hist\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_R_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_G_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_B_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_H_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_S_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_V_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_l_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_a_stddev\",axis=1)\n", "train_features = train_features.drop(\"left_block_b_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_R_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_G_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_B_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_H_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_S_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_V_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_l_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_a_stddev\",axis=1)\n", "train_features = train_features.drop(\"right_block_b_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_R_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_G_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_B_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_H_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_S_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_V_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_l_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_a_stddev\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_b_stddev\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_R_max\",axis=1)\n", "train_features = train_features.drop(\"left_block_G_max\",axis=1)\n", "train_features = train_features.drop(\"left_block_B_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_H_max\",axis=1)\n", "train_features = train_features.drop(\"left_block_S_max\",axis=1)\n", "train_features = train_features.drop(\"left_block_V_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"left_block_l_max\",axis=1)\n", "train_features = train_features.drop(\"left_block_a_max\",axis=1)\n", "train_features = train_features.drop(\"left_block_b_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_R_max\",axis=1)\n", "train_features = train_features.drop(\"right_block_G_max\",axis=1)\n", "train_features = train_features.drop(\"right_block_B_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_H_max\",axis=1)\n", "train_features = train_features.drop(\"right_block_S_max\",axis=1)\n", "train_features = train_features.drop(\"right_block_V_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"right_block_l_max\",axis=1)\n", "train_features = train_features.drop(\"right_block_a_max\",axis=1)\n", "train_features = train_features.drop(\"right_block_b_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_R_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_G_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_B_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_H_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_S_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_V_max\",axis=1)\n", "\n", "train_features = train_features.drop(\"whiteBlock_l_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_a_max\",axis=1)\n", "train_features = train_features.drop(\"whiteBlock_b_max\",axis=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "train_features.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# svc支持向量机算法" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#from sklearn.model_selection import KFold\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.svm import SVC\n", "from sklearn.metrics import f1_score\n", "from sklearn.metrics import precision_score\n", "from sklearn.metrics import recall_score\n", "#集成学习(Ensemble Learning) \n", "from sklearn.ensemble import ExtraTreesClassifier\n", "from sklearn.ensemble import AdaBoostClassifier\n", "\n", "#报错:ModuleNotFoundError: No module named 'sklearn.cross_validation'\n", "#原因:当前 sklearn 版本中 cross_validation 已经替换成了 model_selection,但其中的函数功能并没有变化\n", "#from sklearn.cross_validation import train_test_split\n", "from sklearn.model_selection import train_test_split\n", "X_train ,X_test,y_train,y_test = train_test_split(train_features_9,train_labels,test_size = 0.3, random_state = 20)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##fit_transform,fit,transform区别和作用详解\n", "###fit和transform没有任何关系,仅仅是数据处理的两个不同环节,之所以出来fit_transform这个函数名,仅仅是为了写代码方便,会高效一点。\n", "###sklearn里的封装好的各种算法使用前都要fit,fit相对于整个代码而言,为后续API服务。fit之后,然后调用各种API方法,transform只是其中一个API方法,所以当你调用transform之外的方法,也必须要先fit。\n", "###fit原义指的是安装、使适合的意思,其实有点train的含义,但是和train不同的是,它并不是一个训练的过程,而是一个适配的过程,过程都是确定的,最后得到一个可用于转换的有价值的信息。\n", "###https://blog.csdn.net/weixin_38278334/article/details/82971752\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "##这个不需要了!\n", "X = train_features.values\n", "y = train_labels.values\n", "\n", "kf = KFold(n_splits=5)\n", "kf.get_n_splits(X)\n", "\n", "print(kf) \n", "\n", "for train_index, test_index in kf.split(X):\n", " print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n", " X_train, X_test = X[train_index], X[test_index]\n", " y_train, y_test = y[train_index], y[test_index]\n", " \n", " \n", " from datetime import datetime\n", " trarining_start_time = datetime.now()\n", "\n", " clf_svm_linear = SVC(kernel = 'linear',gamma=0.00001,C=0.1)\n", " clf_svm_linear = clf_svm_linear.fit(X_train, y_train)\n", " pred = clf_svm_linear.predict(X_test)\n", " print \"svm linear accuracy score:\" , accuracy_score(y_test,pred)\n", " print \"f1 score:\" , f1_score(y_test,pred,average='micro')\n", "\n", "\n", " training_stop_time = datetime.now()\n", "\n", " print \"runing time:\",(training_stop_time - trarining_start_time)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import StratifiedKFold #交叉验证\n", "from sklearn.model_selection import GridSearchCV #网格搜索\n", "from sklearn.model_selection import train_test_split #将数据集分开成训练集和测试集\n", "from xgboost import XGBClassifier #xgboost\n", "\n", "#这个不需要了!\n", "model = XGBClassifier()\n", "learning_rate = [0.0001,0.001,0.01,0.1,0.2,0.3] #学习率\n", "gamma = [1, 0.1, 0.01, 0.001]\n", "param_grid = dict(learning_rate = learning_rate,gamma = gamma)#转化为字典格式,网络搜索要求\n", "kflod = StratifiedKFold(n_splits=10, shuffle = True,random_state=7)#将训练/测试数据集划分10个互斥子集,\n", "grid_search = GridSearchCV(model,param_grid,scoring = 'neg_log_loss',n_jobs = -1,cv = kflod)\n", "#scoring指定损失函数类型,n_jobs指定全部cpu跑,cv指定交叉验证\n", "grid_result = grid_search.fit(X_train, y_train) #运行网格搜索\n", "print(\"Best: %f using %s\" % (grid_result.best_score_,grid_search.best_params_))\n", "#grid_scores_:给出不同参数情况下的评价结果。best_params_:描述了已取得最佳结果的参数的组合\n", "#best_score_:成员提供优化过程期间观察到的最好的评分\n", "#具有键作为列标题和值作为列的dict,可以导入到DataFrame中。\n", "#注意,“params”键用于存储所有参数候选项的参数设置列表。\n", "means = grid_result.cv_results_['mean_test_score']\n", "params = grid_result.cv_results_['params']\n", "for mean,param in zip(means,params):\n", " print(\"%f with: %r\" % (mean,param))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import StratifiedKFold #交叉验证\n", "from sklearn.model_selection import GridSearchCV #网格搜索\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn import metrics \n", "#https://blog.csdn.net/WJWFighting/article/details/80983022\n", "thresholds=np.linspace(0,0.1,20)#设置gamma参数列表,生成等差数列\n", "thresholds\n", "param_grid={'gamma':thresholds}\n", "clf=GridSearchCV(SVC(kernel='rbf'),param_grid,cv=5)\n", "clf.fit(X_train, y_train)\n", "\n", "print(\"best param: {0}\\nbest score: {1}\".format(clf.best_params_, clf.best_score_))\n", "\n", "y_pred = clf.predict(X_test)\n", "\n", "print(\"查准率:\",metrics.precision_score(y_pred, y_test))\n", "print(\"召回率:\",metrics.recall_score(y_pred, y_test))\n", "print(\"F1:\",metrics.f1_score(y_pred, y_test))\n", "\n", "print(\"最佳效果:%0.3f\"% clf.best_score_)\n", "print(\"最优参数组合:\")\n", "best_parameters=clf.best_estimator_.get_params()\n", "for param_name in sorted(param_grid.keys()):\n", " print('\\t%s:%r' %(param_name,best_parameters[param_name]))\n", "\n", "#print(\"训练集评分:\",clf.score(x_train,y_train))\n", "#print(\"测试集评分:\",clf.score(x_test,y_test))\n", "\n", "\"\"\"\n", "SVC方法。常用的参数如下:\n", "C:默认为1.0,是对于错误的惩罚项。\n", "kernel:指定算法的核函数,默认为'rbf',常用的有'linear','poly','rbf','sigmoid','precomputed'。\n", "degree:多项式核函数的次数('poly'),默认为3。 其他核函数会将其忽略。\n", "gamma:'rbf','poly'和'sigmoid'的核系数。 如果gamma是'auto',那么将使用1 / n_features。\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "#X_train = train_features_9\n", "#y_train = train_labels\n", "# X_test = test_features\n", "# y_test = test_labels\n", "#clf_svm_linear = SVC(kernel = 'linear',gamma=0.00001,C=0.01)\n", "#clf_svm_linear = SVC(kernel = 'linear',gamma=0.01,C=0.01)\n", "#svm linear accuracy score: 0.9746101835242169\n", "clf_svm_linear = SVC(kernel = 'linear',C=0.1)\n", "#svm linear accuracy score: 0.974885004599816\n", "clf_svm_linear = clf_svm_linear.fit(X_train, y_train)\n", "# pred = clf_svm_linear.predict(X_test)\n", "# print \"svm linear accuracy score:\" , accuracy_score(y_test,pred)\n", "# print \"f1 score:\" , f1_score(y_test,pred,average='micro')\n", "pred = clf_svm_linear.predict(X_test)\n", "print (\"svm linear accuracy score:\" , accuracy_score(y_test,pred))\n", "print (\"f1 score :\" , f1_score(y_test,pred,average=None))\n", "print (\"precision_score:\" , precision_score(y_test,pred,average=None))\n", "print (\"recall_score :\" , recall_score(y_test,pred,average=None))\n", "print(\"preds:\",pred[:10])\n", "print('trues:\\n',y_test[:10])\n", "print(\"\\n\")\n", "###针对同一份数据,\n", "clf_svc_poly = SVC(kernel='poly',degree=3,gamma=0.001,C=0.1)\n", "#clf_svc_poly = SVC(kernel='poly',degree=3,gamma=0.00001,C=0.1)\n", "\n", "##svm polynomial accuracy score: 0.37460901563937443\n", "clf_svc_poly.fit(X_train, y_train)\n", "pred_poly = clf_svc_poly.predict(X_test)\n", "print (\"svm polynomial accuracy score:\" , accuracy_score(y_test,pred_poly))\n", "print (\"f1 score :\" , f1_score(y_test,pred_poly,average=None))\n", "print (\"precision_score:\" , precision_score(y_test,pred_poly,average=None))\n", "print (\"recall_score :\" , recall_score(y_test,pred_poly,average=None))\n", "\n", "clf_svc_rbf = SVC(kernel='rbf', gamma=0.05,C=0.1)\n", "##svm rbf accuracy score: 0.284360625574977\n", "clf_svc_rbf.fit(X_train, y_train)\n", "pred_rbf = clf_svc_rbf.predict(X_test)\n", "print (\"svm rbf accuracy score:\" , accuracy_score(y_test,pred_rbf))\n", "print (\"f1 score :\" , f1_score(y_test,pred_rbf,average=None))\n", "print (\"precision_score:\" , precision_score(y_test,pred_rbf,average=None))\n", "print (\"recall_score :\" , recall_score(y_test,pred_rbf,average=None))\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn_porter import Porter\n", "\n", "porter_clf_svm_linear = Porter(clf_svm_linear, language='c').export()\n", "#porter_clf_svm_poly = Porter(clf_svm_poly, language='c').export()\n", "# porter_clf_forest = Porter(clf_randomForest, language='c').export()\n", "#porter_clf_extra_forest = Porter(clf_extra_forest, language='c').export()\n", "\n", "#print(porter_clf_svm_linear)\n", "f = open(\"clf/clf_svm_linear_50features_2020.cpp\",'wb')\n", "#f = open(\"clf/clf_svm_linear_50features_20171207.txt\",'wb')\n", "#f = open(\"clf_svm_linear_125100_low_feature_data.txt\",'wb')\n", "f.write(porter_clf_svm_linear.encode())\n", "f.close()\n", "#f = open(\"clf_svm_poly_2457100_data.txt\",'wb')\n", "#f.write(porter_clf_svm_poly)\n", "#f.close()\n", "# f = open(\"clf/clf_randomForest_27features_stddev_c_0_01.txt\",'wb')\n", "# f.write(porter_clf_forest)\n", "# f.close()\n", "# f = open(\"oclf_extra_forest_2457100_data_0824.txt\",'wb')\n", "# f.write(porter_clf_extra_forest)\n", "# f.close()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 随机森林算法" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#from sklearn.model_selection import KFold\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.svm import SVC\n", "from sklearn.metrics import f1_score\n", "from sklearn.metrics import precision_score\n", "from sklearn.metrics import recall_score\n", "\n", "\n", "from sklearn.ensemble import ExtraTreesClassifier\n", "from sklearn.ensemble import AdaBoostClassifier\n", "\n", "from sklearn.model_selection import train_test_split\n", "X_train ,X_test,y_train,y_test = train_test_split(train_features_9,train_labels,test_size = 0.3, random_state = 20)\n", "#X_train ,X_test,y_train,y_test = train_test_split(train_features_9,train_labels,test_size = 0.2, random_state = 20)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import classification_report\n", "from sklearn.metrics import classification_report,confusion_matrix\n", "from sklearn.metrics import f1_score\n", "from sklearn.metrics import precision_score\n", "from sklearn.metrics import recall_score\n", "\n", "#rfc = RandomForestClassifier(n_estimators=600)\n", "\n", "rfc = RandomForestClassifier(n_estimators=20)\n", "#20 : 0.992901760363429 30:0.9937941104891701 40: 0.9944430923987994 50:0.9944836537681512\n", "\n", "#rfc = RandomForestClassifier(n_estimators=50,min_samples_leaf=20)\n", "#RandomForest accuracy score: 0.9772012028813204/0.9803133086229806/0.9811874956290649/0.9852786908175397\n", "#50- 0.9648738541413158\n", "\n", "#rfc = RandomForestClassifier(n_estimators=50)\n", "#---- 0.9857629593575079\n", "#RandomForest accuracy score: 0.9955940974893349\n", "\n", "#rfc = RandomForestClassifier(n_estimators=50,min_samples_leaf=20)\n", "#30-0.9595197533868743 40 - 0.9553013709742841 50 - 0.9528271274438225\n", "\n", "#rfc = RandomForestClassifier(n_estimators=100,min_samples_leaf=50)\n", "#RandomForest accuracy score: 0.97688649555913\n", "\n", "#rfc = RandomForestClassifier(n_estimators=50,min_samples_leaf=100)\n", "#RandomForest accuracy score: 0.9669906986502552\n", "\n", "\n", "rfc.fit(X_train, y_train)\n", "rfc_pred = rfc.predict(X_test)\n", "cr = classification_report(y_test,rfc_pred)\n", "print(cr)\n", "\n", "print(\"---------------------------------\\n\")\n", "#print (\"Accuracy of prediction:\",round((cm[0,0]+cm[1,1])/cm.sum(),3))\n", "print (\"RandomForest accuracy score:\" , accuracy_score(y_test,rfc_pred))\n", "print(\"---------------------------------\\n\")\n", "print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro'))\n", "print (\"precision_score:\" , precision_score(y_test,rfc_pred,average='micro'))\n", "print (\"recall_score:\" , recall_score(y_test,rfc_pred,average='micro'))\n", "cm = confusion_matrix(y_test,rfc_pred)\n", "print(cm)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn_porter import Porter\n", "\n", "porter_clf_rfc = Porter(rfc, language='c').export()\n", "#porter_clf_svm_poly = Porter(clf_svm_poly, language='c').export()\n", "# porter_clf_forest = Porter(clf_randomForest, language='c').export()\n", "#porter_clf_extra_forest = Porter(clf_extra_forest, language='c').export()\n", "\n", "#print(porter_clf_svm_linear)\n", "f = open(\"clf/ov_rtree20_f50_2020522.cpp\",'wb')\n", "#f = open(\"clf/clf_svm_linear_50features_20171207.txt\",'wb')\n", "#f = open(\"clf_svm_linear_125100_low_feature_data.txt\",'wb')\n", "f.write(porter_clf_rfc.encode())\n", "f.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import KFold\n", "\n", "X = train_features_9.values\n", "y = train_labels.values\n", "\n", "kf = KFold(n_splits=5)\n", "kf.get_n_splits(X)\n", "\n", "print(kf) \n", "\n", "for train_index, test_index in kf.split(X):\n", " print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n", " X_train, X_test = X[train_index], X[test_index]\n", " y_train, y_test = y[train_index], y[test_index]\n", " \n", " \n", " from datetime import datetime\n", " trarining_start_time = datetime.now()\n", "\n", " rfc = RandomForestClassifier(n_estimators=600)\n", " rfc.fit(X_train, y_train)\n", " rfc_pred = rfc.predict(X_test) \n", " print (\"svm linear accuracy score:\" , accuracy_score(y_test,rfc_pred))\n", " print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro'))\n", " print (\"precision_score:\" , precision_score(y_test,rfc_pred,average='micro'))\n", " print (\"recall_score:\" , recall_score(y_test,rfc_pred,average='micro'))\n", "\n", " training_stop_time = datetime.now()\n", "\n", " print (\"runing time:\",(training_stop_time - trarining_start_time))\n", " print(\"\\n\\n\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "nsimu = 21\n", "accuracy=[0]*nsimu\n", "ntree = [0]*nsimu\n", "for i in range(1,nsimu):\n", " rfc = RandomForestClassifier(n_estimators=i*5,min_samples_split=10,max_depth=None,criterion='gini')\n", " rfc.fit(X_train, y_train)\n", " rfc_pred = rfc.predict(X_test)\n", " cm = confusion_matrix(y_test,rfc_pred)\n", " accuracy[i] = (cm[0,0]+cm[1,1])/cm.sum()\n", " ntree[i]=i*5\n", "\n", " print (\"RandomForest accuracy score:\" , accuracy_score(y_test,rfc_pred))\n", " print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro')) \n", " print (\"Accuracy of prediction:\",round((cm[0,0]+cm[1,1])/cm.sum(),3))\n", "\n", " \n", "plt.figure(figsize=(10,6))\n", "plt.scatter(x=ntree[1:nsimu],y=accuracy[1:nsimu],s=60,c='red')\n", "plt.title(\"Number of trees in the Random Forest vs. prediction accuracy (criterion: 'gini')\", fontsize=18)\n", "plt.xlabel(\"Number of trees\", fontsize=15)\n", "plt.ylabel(\"Prediction accuracy from confusion matrix\", fontsize=15)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "from sklearn.utils import shuffle\n", "\n", "\n", "# data_shuffle1 = shuffle(data1)\n", "# #data_shuffle = data_all;\n", "# test_labels = data_shuffle1[\"index\"]\n", "# test_features = data_shuffle1.drop(\"dateTime\",axis=1)\n", "# test_features = test_features.drop(\"index\",axis=1)\n", "# test_features = test_features.drop(\"whiteBalance\",axis=1)\n", "\n", "\n", "# test_features = test_features.drop(\"left_block_R_stddev\",axis=1)\n", "# test_features = test_features.drop(\"left_block_G_stddev\",axis=1)\n", "# test_features = test_features.drop(\"left_block_B_stddev\",axis=1)\n", "\n", "# test_features = test_features.drop(\"left_block_H_stddev\",axis=1)\n", "# test_features = test_features.drop(\"left_block_S_stddev\",axis=1)\n", "# test_features = test_features.drop(\"left_block_V_stddev\",axis=1)\n", "\n", "# test_features = test_features.drop(\"left_block_l_stddev\",axis=1)\n", "# test_features = test_features.drop(\"left_block_a_stddev\",axis=1)\n", "# test_features = test_features.drop(\"left_block_b_stddev\",axis=1)\n", "\n", "# test_features = test_features.drop(\"right_block_R_stddev\",axis=1)\n", "# test_features = test_features.drop(\"right_block_G_stddev\",axis=1)\n", "# test_features = test_features.drop(\"right_block_B_stddev\",axis=1)\n", "\n", "# test_features = test_features.drop(\"right_block_H_stddev\",axis=1)\n", "# test_features = test_features.drop(\"right_block_S_stddev\",axis=1)\n", "# test_features = test_features.drop(\"right_block_V_stddev\",axis=1)\n", "\n", "# test_features = test_features.drop(\"right_block_l_stddev\",axis=1)\n", "# test_features = test_features.drop(\"right_block_a_stddev\",axis=1)\n", "# test_features = test_features.drop(\"right_block_b_stddev\",axis=1)\n", "\n", "# test_features = test_features.drop(\"whiteBlock_R_stddev\",axis=1)\n", "# test_features = test_features.drop(\"whiteBlock_G_stddev\",axis=1)\n", "# test_features = test_features.drop(\"whiteBlock_B_stddev\",axis=1)\n", "\n", "# test_features = test_features.drop(\"whiteBlock_H_stddev\",axis=1)\n", "# test_features = test_features.drop(\"whiteBlock_S_stddev\",axis=1)\n", "# test_features = test_features.drop(\"whiteBlock_V_stddev\",axis=1)\n", "\n", "# test_features = test_features.drop(\"whiteBlock_l_stddev\",axis=1)\n", "# test_features = test_features.drop(\"whiteBlock_a_stddev\",axis=1)\n", "# test_features = test_features.drop(\"whiteBlock_b_stddev\",axis=1)\n", "\n", "train_features_10 = pd.DataFrame()\n", "train_features_10['lelf_right_R'] = test_features['left_block_R'] - test_features['right_block_R']\n", "train_features_10['lelf_right_G'] = test_features['left_block_G'] - test_features['right_block_G']\n", "train_features_10['lelf_right_B'] = test_features['left_block_B'] - test_features['right_block_B']\n", "\n", "train_features_10['lelf_right_H'] = test_features['left_block_H'] - test_features['right_block_H']\n", "# train_features_10['lelf_right_S'] = test_features['left_block_S'] - test_features['right_block_S']\n", "train_features_10['lelf_right_V'] = test_features['left_block_V'] - test_features['right_block_V']\n", "\n", "train_features_10['lelf_right_l'] = test_features['left_block_l'] - test_features['right_block_l']\n", "train_features_10['lelf_right_a'] = test_features['left_block_a'] - test_features['right_block_a']\n", "train_features_10['lelf_right_b'] = test_features['left_block_b'] - test_features['right_block_b']\n", "\n", "train_features_10['lelf_right_R_stddev'] = test_features['left_block_R_stddev'] - test_features['right_block_R_stddev']\n", "train_features_10['lelf_right_G_stddev'] = test_features['left_block_G_stddev'] - test_features['right_block_G_stddev']\n", "train_features_10['lelf_right_B_stddev'] = test_features['left_block_B_stddev'] - test_features['right_block_B_stddev']\n", "\n", "train_features_10['lelf_right_H_stddev'] = test_features['left_block_H_stddev'] - test_features['right_block_H_stddev']\n", "# train_features_10['lelf_right_S_stddev'] = test_features['left_block_S_stddev'] - test_features['right_block_S_stddev']\n", "train_features_10['lelf_right_V_stddev'] = test_features['left_block_V_stddev'] - test_features['right_block_V_stddev']\n", "\n", "train_features_10['lelf_right_l_stddev'] = test_features['left_block_l_stddev'] - test_features['right_block_l_stddev']\n", "train_features_10['lelf_right_a_stddev'] = test_features['left_block_a_stddev'] - test_features['right_block_a_stddev']\n", "train_features_10['lelf_right_b_stddev'] = test_features['left_block_b_stddev'] - test_features['right_block_b_stddev']\n", "\n", "train_features_10['lelf_right_R_hist'] = test_features['left_block_R_hist'] - test_features['right_block_R_hist']\n", "train_features_10['lelf_right_G_hist'] = test_features['left_block_G_hist'] - test_features['right_block_G_hist']\n", "train_features_10['lelf_right_B_hist'] = test_features['left_block_B_hist'] - test_features['right_block_B_hist']\n", "\n", "train_features_10['lelf_right_H_hist'] = test_features['left_block_H_hist'] - test_features['right_block_H_hist']\n", "# train_features_10['lelf_right_S_hist'] = test_features['left_block_S_hist'] - test_features['right_block_S_hist']\n", "train_features_10['lelf_right_V_hist'] = test_features['left_block_V_hist'] - test_features['right_block_V_hist']\n", "\n", "train_features_10['lelf_right_l_hist'] = test_features['left_block_l_hist'] - test_features['right_block_l_hist']\n", "train_features_10['lelf_right_a_hist'] = test_features['left_block_a_hist'] - test_features['right_block_a_hist']\n", "train_features_10['lelf_right_b_hist'] = test_features['left_block_b_hist'] - test_features['right_block_b_hist']\n", "\n", "train_features_10['lelf_right_R_max'] = test_features['left_block_R_max'] - test_features['right_block_R_max']\n", "train_features_10['lelf_right_G_max'] = test_features['left_block_G_max'] - test_features['right_block_G_max']\n", "train_features_10['lelf_right_B_max'] = test_features['left_block_B_max'] - test_features['right_block_B_max']\n", "\n", "train_features_10['lelf_right_H_max'] = test_features['left_block_H_max'] - test_features['right_block_H_max']\n", "# train_features_10['lelf_right_S_max'] = test_features['left_block_S_max'] - test_features['right_block_S_max']\n", "train_features_10['lelf_right_V_max'] = test_features['left_block_V_max'] - test_features['right_block_V_max']\n", "\n", "train_features_10['lelf_right_l_max'] = test_features['left_block_l_max'] - test_features['right_block_l_max']\n", "train_features_10['lelf_right_a_max'] = test_features['left_block_a_max'] - test_features['right_block_a_max']\n", "train_features_10['lelf_right_b_max'] = test_features['left_block_b_max'] - test_features['right_block_b_max']\n", "\n", "\n", "train_features_10['lelf_right_R_min'] = test_features['left_block_R_min'] - test_features['right_block_R_min']\n", "train_features_10['lelf_right_G_min'] = test_features['left_block_G_min'] - test_features['right_block_G_min']\n", "train_features_10['lelf_right_B_min'] = test_features['left_block_B_min'] - test_features['right_block_B_min']\n", "\n", "train_features_10['lelf_right_H_min'] = test_features['left_block_H_min'] - test_features['right_block_H_min']\n", "# train_features_10['lelf_right_S_min'] = test_features['left_block_S_min'] - test_features['right_block_S_min']\n", "train_features_10['lelf_right_V_min'] = test_features['left_block_V_min'] - test_features['right_block_V_min']\n", "\n", "train_features_10['lelf_right_l_min'] = test_features['left_block_l_min'] - test_features['right_block_l_min']\n", "train_features_10['lelf_right_a_min'] = test_features['left_block_a_min'] - test_features['right_block_a_min']\n", "train_features_10['lelf_right_b_min'] = test_features['left_block_b_min'] - test_features['right_block_b_min']\n", "\n", "# train_features_10['left_grayValue']= test_features['left_grayValue'];\n", "# train_features_10['left_grayStddevValue']= test_features['left_grayStddevValue'];\n", "# train_features_10['left_grayHist']= test_features['left_grayHist'];\n", "# train_features_10['left_grayMax']= test_features['left_grayMax'];\n", "# train_features_10['left_grayMin']= test_features['left_grayMin'];\n", "\n", "# train_features_10['right_grayValue']= test_features['right_grayValue'];\n", "# train_features_10['right_grayStddevValue']= test_features['right_grayStddevValue'];\n", "# train_features_10['right_grayHist']= test_features['right_grayHist'];\n", "# train_features_10['right_grayMax']= test_features['right_grayMax'];\n", "# train_features_10['right_grayMin']= test_features['right_grayMin'];\n", "\n", "# train_features_10['lelf_R_stddev'] = test_features['left_block_R_stddev'] \n", "# train_features_10['lelf_G_stddev'] = test_features['left_block_G_stddev'] \n", "# train_features_10['lelf_B_stddev'] = test_features['left_block_B_stddev'] \n", "\n", "# train_features_10['left_block_R_min'] = test_features['left_block_R_min'] \n", "# train_features_10['left_block_G_min'] = test_features['left_block_G_min'] \n", "# train_features_10['left_block_B_min'] = test_features['left_block_B_min'] \n", "\n", "\n", "\n", "train_features_10['lelf_right_gray_value'] = test_features['left_grayValue'] - test_features['right_grayValue']\n", "train_features_10['lelf_right_gray_stddev'] = test_features['left_grayStddevValue'] - test_features['right_grayStddevValue']\n", "train_features_10['lelf_right_gray_hist'] = test_features['left_grayHist'] - test_features['right_grayHist']\n", "train_features_10['lelf_right_gray_max'] = test_features['left_grayMax'] - test_features['right_grayMax']\n", "train_features_10['lelf_right_gray_min'] = test_features['left_grayMin'] - test_features['right_grayMin']\n", "\n", "train_features_10.describe()\n", "\n", "\n", "# feature = feature.drop(\"left_block_H_hist\",axis=1)\n", "# feature = feature.drop(\"right_block_H_hist\",axis=1)\n", "# feature = feature.drop(\"whiteBlock_H_hist\",axis=1)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ " \n", "test_features = test_features.drop(\"left_block_H\",axis=1)\n", "test_features = test_features.drop(\"left_block_S\",axis=1)\n", "test_features = test_features.drop(\"left_block_V\",axis=1)\n", "\n", "test_features = test_features.drop(\"right_block_H\",axis=1)\n", "test_features = test_features.drop(\"right_block_S\",axis=1)\n", "test_features = test_features.drop(\"right_block_V\",axis=1)\n", "\n", "test_features = test_features.drop(\"whiteBlock_H\",axis=1)\n", "test_features = test_features.drop(\"whiteBlock_S\",axis=1)\n", "test_features = test_features.drop(\"whiteBlock_V\",axis=1)\n", "\n", "\n", "test_features = test_features.drop(\"left_block_H_stddev\",axis=1)\n", "test_features = test_features.drop(\"left_block_S_stddev\",axis=1)\n", "test_features = test_features.drop(\"left_block_V_stddev\",axis=1)\n", "\n", "test_features = test_features.drop(\"right_block_H_stddev\",axis=1)\n", "test_features = test_features.drop(\"right_block_S_stddev\",axis=1)\n", "test_features = test_features.drop(\"right_block_V_stddev\",axis=1)\n", "\n", "test_features = test_features.drop(\"whiteBlock_H_stddev\",axis=1)\n", "test_features = test_features.drop(\"whiteBlock_S_stddev\",axis=1)\n", "test_features = test_features.drop(\"whiteBlock_V_stddev\",axis=1)\n", "\n", "test_features = test_features.drop(\"left_block_H_hist\",axis=1)\n", "test_features = test_features.drop(\"left_block_S_hist\",axis=1)\n", "test_features = test_features.drop(\"left_block_V_hist\",axis=1)\n", "\n", "test_features = test_features.drop(\"right_block_H_hist\",axis=1)\n", "test_features = test_features.drop(\"right_block_S_hist\",axis=1)\n", "test_features = test_features.drop(\"right_block_V_hist\",axis=1)\n", "\n", "test_features = test_features.drop(\"whiteBlock_H_hist\",axis=1)\n", "test_features = test_features.drop(\"whiteBlock_S_hist\",axis=1)\n", "test_features = test_features.drop(\"whiteBlock_V_hist\",axis=1)\n", "\n", "test_features = test_features.drop(\"left_block_H_max\",axis=1)\n", "test_features = test_features.drop(\"left_block_S_max\",axis=1)\n", "test_features = test_features.drop(\"left_block_V_max\",axis=1)\n", "\n", "test_features = test_features.drop(\"right_block_H_max\",axis=1)\n", "test_features = test_features.drop(\"right_block_S_max\",axis=1)\n", "test_features = test_features.drop(\"right_block_V_max\",axis=1)\n", "\n", "test_features = test_features.drop(\"whiteBlock_H_max\",axis=1)\n", "test_features = test_features.drop(\"whiteBlock_S_max\",axis=1)\n", "test_features = test_features.drop(\"whiteBlock_V_max\",axis=1)\n", "\n", "test_features = test_features.drop(\"left_block_H_min\",axis=1)\n", "test_features = test_features.drop(\"left_block_S_min\",axis=1)\n", "test_features = test_features.drop(\"left_block_V_min\",axis=1)\n", "\n", "test_features = test_features.drop(\"right_block_H_min\",axis=1)\n", "test_features = test_features.drop(\"right_block_S_min\",axis=1)\n", "test_features = test_features.drop(\"right_block_V_min\",axis=1)\n", "\n", "test_features = test_features.drop(\"whiteBlock_H_min\",axis=1)\n", "test_features = test_features.drop(\"whiteBlock_S_min\",axis=1)\n", "test_features = test_features.drop(\"whiteBlock_V_min\",axis=1)\n", " \n", " \n", "test_features['lelf_right_R'] = test_features['left_block_R'] - test_features['right_block_R']\n", "test_features['lelf_right_G'] = test_features['left_block_G'] - test_features['right_block_G']\n", "test_features['lelf_right_B'] = test_features['left_block_B'] - test_features['right_block_B']\n", "\n", "# test_features['lelf_right_H'] = test_features['left_block_H'] - test_features['right_block_H']\n", "# test_features['lelf_right_S'] = test_features['left_block_S'] - test_features['right_block_S']\n", "# test_features['lelf_right_V'] = test_features['left_block_V'] - test_features['right_block_V']\n", "\n", "# test_features['lelf_right_l'] = test_features['left_block_l'] - test_features['right_block_l']\n", "# test_features['lelf_right_a'] = test_features['left_block_a'] - test_features['right_block_a']\n", "# test_features['lelf_right_b'] = test_features['left_block_b'] - test_features['right_block_b']\n", "\n", "# test_features['lelf_right_R_stddev'] = test_features['left_block_R_stddev'] - test_features['right_block_R_stddev']\n", "# test_features['lelf_right_G_stddev'] = test_features['left_block_G_stddev'] - test_features['right_block_G_stddev']\n", "# test_features['lelf_right_B_stddev'] = test_features['left_block_B_stddev'] - test_features['right_block_B_stddev']\n", "\n", "# test_features['lelf_right_H_stddev'] = test_features['left_block_H_stddev'] - test_features['right_block_H_stddev']\n", "# test_features['lelf_right_S_stddev'] = test_features['left_block_S_stddev'] - test_features['right_block_S_stddev']\n", "# test_features['lelf_right_V_stddev'] = test_features['left_block_V_stddev'] - test_features['right_block_V_stddev']\n", "\n", "# test_features['lelf_right_l_stddev'] = test_features['left_block_l_stddev'] - test_features['right_block_l_stddev']\n", "# test_features['lelf_right_a_stddev'] = test_features['left_block_a_stddev'] - test_features['right_block_a_stddev']\n", "# test_features['lelf_right_b_stddev'] = test_features['left_block_b_stddev'] - test_features['right_block_b_stddev']\n", "\n", "# test_features['lelf_right_R_hist'] = test_features['left_block_R_hist'] - test_features['right_block_R_hist']\n", "# test_features['lelf_right_G_hist'] = test_features['left_block_G_hist'] - test_features['right_block_G_hist']\n", "# test_features['lelf_right_B_hist'] = test_features['left_block_B_hist'] - test_features['right_block_B_hist']\n", "\n", "# test_features['lelf_right_H_hist'] = test_features['left_block_H_hist'] - test_features['right_block_H_hist']\n", "# test_features['lelf_right_S_hist'] = test_features['left_block_S_hist'] - test_features['right_block_S_hist']\n", "# test_features['lelf_right_V_hist'] = test_features['left_block_V_hist'] - test_features['right_block_V_hist']\n", "\n", "# test_features['lelf_right_l_hist'] = test_features['left_block_l_hist'] - test_features['right_block_l_hist']\n", "# test_features['lelf_right_a_hist'] = test_features['left_block_a_hist'] - test_features['right_block_a_hist']\n", "# test_features['lelf_right_b_hist'] = test_features['left_block_b_hist'] - test_features['right_block_b_hist']\n", "\n", "# test_features['lelf_right_R_max'] = test_features['left_block_R_max'] - test_features['right_block_R_max']\n", "# test_features['lelf_right_G_max'] = test_features['left_block_G_max'] - test_features['right_block_G_max']\n", "# test_features['lelf_right_B_max'] = test_features['left_block_B_max'] - test_features['right_block_B_max']\n", "\n", "# test_features['lelf_right_H_max'] = test_features['left_block_H_max'] - test_features['right_block_H_max']\n", "# test_features['lelf_right_S_max'] = test_features['left_block_S_max'] - test_features['right_block_S_max']\n", "# test_features['lelf_right_V_max'] = test_features['left_block_V_max'] - test_features['right_block_V_max']\n", "\n", "# test_features['lelf_right_l_max'] = test_features['left_block_l_max'] - test_features['right_block_l_max']\n", "# test_features['lelf_right_a_max'] = test_features['left_block_a_max'] - test_features['right_block_a_max']\n", "# test_features['lelf_right_b_max'] = test_features['left_block_b_max'] - test_features['right_block_b_max']\n", "\n", "\n", "\n", "# test_features['lelf_right_R_min'] = test_features['left_block_R_min'] - test_features['right_block_R_min']\n", "# test_features['lelf_right_G_min'] = test_features['left_block_G_min'] - test_features['right_block_G_min']\n", "# test_features['lelf_right_B_min'] = test_features['left_block_B_min'] - test_features['right_block_B_min']\n", "\n", "# test_features['lelf_right_H_min'] = test_features['left_block_H_min'] - test_features['right_block_H_min']\n", "# test_features['lelf_right_S_min'] = test_features['left_block_S_min'] - test_features['right_block_S_min']\n", "# test_features['lelf_right_V_min'] = test_features['left_block_V_min'] - test_features['right_block_V_min']\n", "\n", "# test_features['lelf_right_l_min'] = test_features['left_block_l_min'] - test_features['right_block_l_min']\n", "# test_features['lelf_right_a_min'] = test_features['left_block_a_min'] - test_features['right_block_a_min']\n", "# test_features['lelf_right_b_min'] = test_features['left_block_b_min'] - test_features['right_block_b_min']\n", "\n", "test_features['lelf_right_gray_value'] = test_features['left_grayValue'] - test_features['right_grayValue']\n", "test_features['lelf_right_gray_stddev'] = test_features['left_grayStddevValue'] - test_features['right_grayStddevValue']\n", "test_features['lelf_right_gray_hist'] = test_features['left_grayHist'] - test_features['right_grayHist']\n", "test_features['lelf_right_gray_max'] = test_features['left_grayMax'] - test_features['right_grayMax']\n", "test_features['lelf_right_gray_min'] = test_features['left_grayMin'] - test_features['right_grayMin']\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pred = clf_svm_linear.predict(train_features_10)\n", "test_features_gray_stddev = test_features['left_grayStddevValue']\n", "test_features_np = np.ndarray(test_features_gray_stddev.shape,dtype = np.float32)\n", "\n", "test_features_np = test_features_gray_stddev.values\n", "print \"svm linear accuracy score:\" , accuracy_score(test_labels,pred)\n", "print \"f1 score:\" , f1_score(test_labels,pred,average='micro')\n", "print \"recall_score :\" , recall_score(test_labels,pred,average='micro')\n", "print \"precision_score :\" , precision_score(test_labels,pred,average='micro')\n", "\n", "for i in range(0, len(test_features_np)):\n", " if test_features_np[i] < 3:\n", " pred[i] =0\n", "print \"svm linear accuracy score:\" , accuracy_score(test_labels,pred)\n", "print \"f1 score:\" , f1_score(test_labels,pred,average='micro')\n", "print \"recall_score :\" , recall_score(test_labels,pred,average='micro')\n", "print \"precision_score :\" , precision_score(test_labels,pred,average='micro')\n", "\n", "\n", "print(\"preds:\",pred[120:130])\n", "print('trues:\\n',test_labels[120:130])\n", "test_labels_np = np.ndarray(test_labels.shape,dtype= np.int32)\n", "test_labels_np = test_labels.values\n", "print(test_labels_np[0])\n", "all_counter = 0\n", "counter = 0\n", "for i in range(0 ,len(pred) ):\n", " if (pred[i] == 4 or (pred[i] == 4 and test_labels_np[i] ==4 )or test_labels_np[i] ==4 ) :\n", " all_counter = all_counter + 1\n", " if pred[i] != test_labels_np[i] :\n", " counter = counter+1\n", " print(pred[i] , test_labels_np[i])\n", "print(len(pred),all_counter, counter) \n", "all_counter = 0\n", "counter = 0\n", "for i in range(0 ,len(pred) ):\n", " if pred[i] != test_labels_np[i] :\n", " counter = counter+1\n", " print(pred[i] , test_labels_np[i])\n", "print(len(pred),all_counter, counter) \n", "\n", "# print \"svm linear accuracy score:\" , accuracy_score(test_labels,pred)\n", "# print \"f1 score:\" , f1_score(test_labels,pred,average='micro')\n", "# print \"recall_score :\" , recall_score(test_labels,pred,average='micro')\n", "# print \"precision_score :\" , precision_score(test_labels,pred,average='micro')\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## from sklearn.metrics import recall_score\n", "from sklearn.metrics import precision_score\n", "print \"accuracy score:\" , accuracy_score(y_test,pred)\n", "print \"recall_score :\" , recall_score(y_test,pred,average='macro')\n", "print \"precision_score :\" , precision_score(y_test,pred,average='macro')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn_porter import Porter\n", "\n", "porter_java = Porter(clf_svm, language='java').export()\n", "porter_c = Porter(clf_svm, language='c').export()\n", "\n", "f = open(\"Protein_c.txt\",'wb')\n", "f.write(porter_c)\n", "f.close()\n", "\n", "f = open(\"Protein_svm_java.txt\",'wb')\n", "f.write(porter_java)\n", "f.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "try :\n", " data = pd.read_csv(\"data.csv\")\n", " print (\"load data successful !!!!!\")\n", " data0 = data[data[\"index\"] == 0]\n", " data1 = data[data[\"index\"] == 1]\n", " data2 = data[data[\"index\"] == 2]\n", " data3 = data[data[\"index\"] == 3]\n", " data4 = data[data[\"index\"] == 4]\n", " data0.to_csv('data0.csv')\n", " data1.to_csv('data1.csv')\n", " data2.to_csv('data2.csv')\n", " data3.to_csv('data3.csv')\n", " data4.to_csv('data4.csv') \n", "except :\n", " print (\"load data error !!!!!!!!!!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 2 }