Files
coco 85d885e008 a
2026-07-03 16:29:47 +08:00

2098 lines
106 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 排卵试纸机器学习算法验证"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 1. **import moudle**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd \n",
"import seaborn as sns\n",
"from IPython.display import display\n",
"import matplotlib.pyplot as plt\n",
"from mpl_toolkits.mplot3d import Axes3D\n",
"import sklearn\n",
"%matplotlib inline\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2. **load data**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"try :\n",
" data4_1 = pd.read_csv(\"data-4-2-old.txt\")\n",
" data4_2 = pd.read_csv(\"data-4-old.txt\")\n",
" data2 = pd.read_csv(\"data-pro-0.3456.txt\")\n",
" data3 = pd.read_csv(\"data-pro-0.71020.txt\")\n",
" data4 = pd.read_csv(\"data-pro-3.txt\")\n",
" data4_3 = pd.read_csv(\"hw-pro-data-4-old.txt\")\n",
" \n",
" print (\"load data successful !!!!!\")\n",
"except :\n",
" print (\"load data error !!!!!!!!!!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data4 = data4.append(data4_1).append(data4_2).append(data4_3)\n",
"#data10_all['index'].replace(2,1,inplace=True)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data4=data4.sort_values(by=\"h\" , ascending=False)\n",
"data4.to_csv('data4_sorted.txt')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data2 = data2.sort_values(by=\"h\" , ascending=False)\n",
"data3 = data3.sort_values(by=\"h\" , ascending=False)\n",
"\n",
"\n",
"data2.to_csv('data2_sorted.txt')\n",
"data3.to_csv('data3_sorted.txt')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"try :\n",
" apple0 = pd.read_csv(\"14/apple-0-old.txt\")\n",
" data0 = pd.read_csv(\"14/data-0-old.txt\")\n",
" data0_1 = pd.read_csv(\"14/data-pro-0.1.txt\")\n",
" data0_2 = pd.read_csv(\"14/data-pro-0.15.txt\")\n",
" data0_3 = pd.read_csv(\"14/hw-pro-data-0-old.txt\")\n",
" data1_1 = pd.read_csv(\"14/apple-1-old.txt\")\n",
" data1_2 = pd.read_csv(\"14/data-pro-0.2.txt\")\n",
" \n",
" print (\"load data successful !!!!!\")\n",
"except :\n",
" print (\"load data error !!!!!!!!!!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data0 = apple0.append(data0).append(data0_1).append(data0_2).append(data0_3)\n",
"data1 = data1_1.append(data1_2)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data0 = data0.sort_values(by=\"h\" , ascending=False)\n",
"data1 = data1.sort_values(by=\"h\" , ascending=False)\n",
"\n",
"\n",
"data0.to_csv('data0_sorted.csv')\n",
"data1.to_csv('data1_sorted.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"try :\n",
"# data_iphone6p_75_10 = pd.read_csv(\"20170912.pm.csv\")\n",
"# data_iphone6p_1234 = pd.read_csv(\"20170920.pm.csv\")\n",
"# data_iphone6p_5 = pd.read_csv(\"20170922.pm.csv\")\n",
"# data_iphone6p_0 = pd.read_csv(\"20170925.am.csv\")\n",
"# data_iphone6p_0_0 = pd.read_csv(\"20170925.pm.csv\")\n",
"# data_iphone6p_246 = pd.read_csv(\"20171011.pm.csv\")\n",
" \n",
"# data1 = pd.read_csv(\"ovdata_reindex.csv\")\n",
"# data2 = pd.read_csv(\"ovdataMore_reindex.csv\")\n",
"# data3 = pd.read_csv(\"ov_data_2020_reindex.csv\")\n",
" ovdata = pd.read_csv(\"ovdata.csv\")\n",
" ovdataMore = pd.read_csv(\"ovdataMore.csv\")\n",
" ov_data_2020 = pd.read_csv(\"ov_data_2020.csv\")\n",
" data10more = pd.read_csv(\"data10more.csv\")\n",
"\n",
" data =ovdata.append(ovdataMore).append(ov_data_2020).append(data10more)\n",
" data_all = data[data[\"whiteBalance\"] == 0]\n",
" print(data_all.describe())\n",
"\n",
"# data4 = pd.read_csv(\"10_25_renew.csv\")\n",
"\n",
"# data_all = pd.read_csv(\"data_all_2019_2020_reindex.csv\")\n",
"# data_all = pd.read_csv(\"ov_data_2020_reindex.csv\")\n",
" \n",
"# data1 = pd.read_csv(\"ovdata.csv\")\n",
"# data2 = pd.read_csv(\"ovdataMore.csv\")\n",
"# data3 = pd.read_csv(\"ov_data_2020.csv\")\n",
"# data_test1 = pd.read_csv(\"./newData/test.csv\")\n",
"# data_test2 = pd.read_csv(\"./newData/nubia_test.csv\")\n",
" \n",
" print (\"load data successful !!!!!\")\n",
"except :\n",
" print (\"load data error !!!!!!!!!!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"data2019_2020_old = ovdata.append(ovdataMore).append(ov_data_2020).append(data10more)\n",
"data2019_2020_old['index'].replace(4,7,inplace=True)\n",
"data2019_2020_old['index'].replace(3,6,inplace=True)\n",
"data2019_2020_old['index'].replace(2,4,inplace=True)\n",
"data2019_2020_old['index'].replace(1,2,inplace=True)\n",
"\n",
"data2019_2020_old.describe()\n",
"data2019_2020_old.to_excel('data2019_2020_old.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"try :\n",
"# data_iphone6p_75_10 = pd.read_csv(\"20170912.pm.csv\")\n",
"# data_iphone6p_1234 = pd.read_csv(\"20170920.pm.csv\")\n",
"# data_iphone6p_5 = pd.read_csv(\"20170922.pm.csv\")\n",
"# data_iphone6p_0 = pd.read_csv(\"20170925.am.csv\")\n",
"# data_iphone6p_0_0 = pd.read_csv(\"20170925.pm.csv\")\n",
"# data_iphone6p_246 = pd.read_csv(\"20171011.pm.csv\")\n",
" \n",
"# data1 = pd.read_csv(\"ovdata_reindex.csv\")\n",
"# data2 = pd.read_csv(\"ovdataMore_reindex.csv\")\n",
"# data3 = pd.read_csv(\"ov_data_2020_reindex.csv\")\n",
" d1 = pd.read_excel(\"data_all_2020514.xlsx\")\n",
" d2 = pd.read_excel(\"data2019_2020_old.xlsx\")\n",
" \n",
" data =d1.append(d2)#.append(data3).append(data4)\n",
" data_all = data[data[\"whiteBalance\"] == 0]\n",
" print(data_all.describe())\n",
"# data_all = pd.read_csv(\"data_all_2019_2020_reindex.csv\")\n",
"# data_all = pd.read_csv(\"ov_data_2020_reindex.csv\")\n",
" \n",
"# data1 = pd.read_csv(\"ovdata.csv\")\n",
"# data2 = pd.read_csv(\"ovdataMore.csv\")\n",
"# data3 = pd.read_csv(\"ov_data_2020.csv\")\n",
"# data_test1 = pd.read_csv(\"./newData/test.csv\")\n",
"# data_test2 = pd.read_csv(\"./newData/nubia_test.csv\")\n",
" \n",
" print (\"load data successful !!!!!\")\n",
"except :\n",
" print (\"load data error !!!!!!!!!!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#潘晓春的老数据,测试发现,这个数据不能使用在新算法中。\n",
"try :\n",
" t1 = pd.read_csv(\"newData/0_5_10_50_70.csv\")\n",
" t2 = pd.read_csv(\"newData/5_10_25_50_70.csv\")\n",
" \n",
" ts =t1.append(t2)#.append(data3).append(data4)\n",
" ts_all = ts[ts[\"whiteBalance\"] == 0]\n",
" print(ts_all.describe())\n",
"# data_all = pd.read_csv(\"data_all_2019_2020_reindex.csv\")\n",
"# data_all = pd.read_csv(\"ov_data_2020_reindex.csv\")\n",
" \n",
"# data1 = pd.read_csv(\"ovdata.csv\")\n",
"# data2 = pd.read_csv(\"ovdataMore.csv\")\n",
"# data3 = pd.read_csv(\"ov_data_2020.csv\")\n",
"# data_test1 = pd.read_csv(\"./newData/test.csv\")\n",
"# data_test2 = pd.read_csv(\"./newData/nubia_test.csv\")\n",
" \n",
" print (\"load data successful !!!!!\")\n",
"except :\n",
" print (\"load data error !!!!!!!!!!\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 3. **分析数据**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# data4 = data_iphone6p_246[data_iphone6p_246[\"whiteBalance\"] == 0]\n",
"# data2= data_iphone6p_1234[data_iphone6p_1234[\"whiteBalance\"] == 0 ]\n",
"# data1 = data_iphone6p_75_10[data_iphone6p_75_10[\"whiteBalance\"] == 0 ]\n",
"# data3 = data_iphone6p_5[data_iphone6p_5[\"whiteBalance\"] == 0]\n",
"# data0 = data_iphone6p_0[data_iphone6p_0[\"whiteBalance\"] == 0]\n",
"# data0_0 = data_iphone6p_0_0[data_iphone6p_0_0[\"whiteBalance\"] == 0]\n",
"\n",
"\n",
"#data_all = data2.append(data1[data1[\"index\"] == 5 ]).append(data3).append(data1[data1[\"index\"] == 7 ]).append(data1[data1[\"index\"] == 8 ]).append(data0).append(data0_0).append(data4)\n",
"#data1['index'].replace(4,6,inplace=True)\n",
"#data1['index'].replace(3,5,inplace=True)\n",
"#data1['index'].replace(2,4,inplace=True)\n",
"#data1['index'].replace(1,2,inplace=True)\n",
"\n",
"#data2['index'].replace(4,6,inplace=True)\n",
"#data2['index'].replace(3,5,inplace=True)\n",
"#data2['index'].replace(2,4,inplace=True)\n",
"#data2['index'].replace(1,2,inplace=True)\n",
"\n",
"#data3['index'].replace(4,6,inplace=True)\n",
"#data3['index'].replace(3,5,inplace=True)\n",
"#data3['index'].replace(2,4,inplace=True)\n",
"#data3['index'].replace(1,2,inplace=True)\n",
"\n",
"#data4['index'].replace(2,1,inplace=True)\n",
"#data4['index'].replace(4,2,inplace=True)\n",
"\n",
"#data1_0 = data1[data1[\"whiteBalance\"] == 0]\n",
"#data2_0 = data2[data2[\"whiteBalance\"] == 0]\n",
"#data3_0 = data3[data3[\"whiteBalance\"] == 0]\n",
"\n",
"#data_test_0 = data_test\n",
"\n",
"#data_all =data1_0.append(data2_0);\n",
"#data_all =data1.append(data2).append(data3);\n",
"\n",
"#data_all.to_csv('data_all_2019_2020_reindex.csv')\n",
"#data1.to_csv('ovdata_modifed.csv')\n",
"#data2.to_csv('ovdataMore_modifed.csv')\n",
"#data3.to_csv('ov_data_2020_modifed.csv')\n",
"#data4.to_csv('10_25_renew.csv')\n",
"\n",
"data =d1.append(d2)#.append(data3).append(data4)\n",
"data_all = data[data[\"whiteBalance\"] == 0]\n",
"print(data_all.describe())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"whiteBlock_R_one =data_all[data_all[\"index\"] == 0 ][\"right_block_l_min\"]\n",
"whiteBlock_G_one = data_all[data_all[\"index\"] == 0 ][\"right_block_a_min\"]\n",
"whiteBlock_B_one = data_all[data_all[\"index\"] == 0 ][\"right_block_b_min\"]\n",
"\n",
"whiteBlock_R_two = data_all[data_all[\"index\"] == 1 ][\"right_block_l_min\"]\n",
"whiteBlock_G_two = data_all[data_all[\"index\"] == 1 ][\"right_block_a_min\"]\n",
"whiteBlock_B_two = data_all[data_all[\"index\"] == 1 ][\"right_block_b_min\"]\n",
"\n",
"whiteBlock_R_three = data_all[data_all[\"index\"] == 2 ][\"right_block_l_min\"]\n",
"whiteBlock_G_three = data_all[data_all[\"index\"] == 2 ][\"right_block_a_min\"]\n",
"whiteBlock_B_three = data_all[data_all[\"index\"] == 2 ][\"right_block_b_min\"]\n",
"\n",
"whiteBlock_R_four = data_all[data_all[\"index\"] == 4 ][\"right_block_l_min\"]\n",
"whiteBlock_G_four = data_all[data_all[\"index\"] == 4 ][\"right_block_a_min\"]\n",
"whiteBlock_B_four = data_all[data_all[\"index\"] == 4 ][\"right_block_b_min\"]\n",
"\n",
"\n",
"whiteBlock_R_five = data_all[data_all[\"index\"] == 6 ][\"right_block_l_min\"]\n",
"whiteBlock_G_five = data_all[data_all[\"index\"] == 6 ][\"right_block_a_min\"]\n",
"whiteBlock_B_five = data_all[data_all[\"index\"] == 6 ][\"right_block_b_min\"]\n",
"\n",
"whiteBlock_R_six = data_all[data_all[\"index\"] == 7 ][\"right_block_l_min\"]\n",
"whiteBlock_G_six = data_all[data_all[\"index\"] == 7 ][\"right_block_a_min\"]\n",
"whiteBlock_B_six = data_all[data_all[\"index\"] == 7 ][\"right_block_b_min\"]\n",
"\n",
"fig = plt.figure()\n",
"#plt.rcParams[\"figure.figsize\"] = 20,20\n",
"ax = Axes3D(fig)\n",
"\n",
"ax.set_xlim(0,255)\n",
"ax.set_ylim(0,255)\n",
"ax.set_zlim(0,255)\n",
"ax.set_xlabel('H')\n",
"ax.set_ylabel('S')\n",
"ax.set_zlabel('V')\n",
"ax.set_title('HSV colorspace OV right block max value')\n",
"# ax.scatter(whiteBlock_R_zero, whiteBlock_G_zero, whiteBlock_B_zero,s = 15,c='y')\n",
"ax.scatter(whiteBlock_R_one, whiteBlock_G_one, whiteBlock_B_one,s = 15,c='r')\n",
"\n",
"ax.scatter(whiteBlock_R_two, whiteBlock_G_two, whiteBlock_B_two,s = 15,c='g')\n",
"ax.scatter(whiteBlock_R_three, whiteBlock_G_three, whiteBlock_B_three,s = 15,c='b')\n",
"\n",
"ax.scatter(whiteBlock_R_four, whiteBlock_G_four, whiteBlock_B_four,s = 15,c='y')\n",
"ax.scatter(whiteBlock_R_five, whiteBlock_G_five, whiteBlock_B_five,s = 15,c='pink')\n",
"ax.scatter(whiteBlock_R_six, whiteBlock_G_six, whiteBlock_B_six,s = 15,c='c')\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data_all.columns"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"hsv max min hist value h值要去掉"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 预处理数据"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"train_labels = data_all[\"index\"]\n",
"train_features = data_all.drop(\"dateTime\",axis=1)\n",
"train_features = train_features.drop(\"index\",axis=1)\n",
"train_features = train_features.drop(\"whiteBalance\",axis=1)\n",
"\n",
"\n",
"\n",
"train_features.describe()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data_0 = data_all[data_all[\"index\"] == 0]\n",
"data_1 = data_all[data_all[\"index\"] == 1]\n",
"data_2 = data_all[data_all[\"index\"] == 2]\n",
"data_3 = data_all[data_all[\"index\"] == 3]\n",
"data_4 = data_all[data_all[\"index\"] == 4]\n",
"data_5 = data_all[data_all[\"index\"] == 5]\n",
"data_6 = data_all[data_all[\"index\"] == 6]\n",
"data_7 = data_all[data_all[\"index\"] == 7]\n",
"data_0.to_excel('data_0.xlsx')\n",
"data_1.to_excel('data_1.xlsx')\n",
"data_2.to_excel('data_2.xlsx')\n",
"data_3.to_excel('data_3.xlsx')\n",
"data_4.to_excel('data_4.xlsx')\n",
"data_5.to_excel('data_5.xlsx')\n",
"data_6.to_excel('data_6.xlsx')\n",
"data_7.to_excel('data_7.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#测试数据,不要使用,无效数据\n",
"ts_labels = ts_all[\"index\"]\n",
"ts_features = ts_all.drop(\"dateTime\",axis=1)\n",
"ts_features = ts_features.drop(\"index\",axis=1)\n",
"ts_features = ts_features.drop(\"whiteBalance\",axis=1)\n",
"\n",
"\n",
"\n",
"ts_features.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"#这一节的代码,不要执行\n",
"\n",
"train_features = train_features.drop(\"left_block_H\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V\",axis=1)\n",
"\n",
"\n",
"train_features = train_features.drop(\"left_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_hist\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_hist\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_hist\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_hist\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_min\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_min\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_min\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_min\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_min\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_min\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_min\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_min\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_min\",axis=1)\n",
"\n",
"\n",
"\n",
"train_features['lelf_right_R'] = train_features['left_block_R'] - train_features['right_block_R']\n",
"train_features['lelf_right_G'] = train_features['left_block_G'] - train_features['right_block_G']\n",
"train_features['lelf_right_B'] = train_features['left_block_B'] - train_features['right_block_B']\n",
"\n",
"# train_features['lelf_right_H'] = train_features['left_block_H'] - train_features['right_block_H']\n",
"# train_features['lelf_right_S'] = train_features['left_block_S'] - train_features['right_block_S']\n",
"# train_features['lelf_right_V'] = train_features['left_block_V'] - train_features['right_block_V']\n",
"\n",
"train_features['lelf_right_l'] = train_features['left_block_l'] - train_features['right_block_l']\n",
"train_features['lelf_right_a'] = train_features['left_block_a'] - train_features['right_block_a']\n",
"train_features['lelf_right_b'] = train_features['left_block_b'] - train_features['right_block_b']\n",
"\n",
"train_features['lelf_right_R_stddev'] = train_features['left_block_R_stddev'] - train_features['right_block_R_stddev']\n",
"train_features['lelf_right_G_stddev'] = train_features['left_block_G_stddev'] - train_features['right_block_G_stddev']\n",
"train_features['lelf_right_B_stddev'] = train_features['left_block_B_stddev'] - train_features['right_block_B_stddev']\n",
"\n",
"# train_features['lelf_right_H_stddev'] = train_features['left_block_H_stddev'] - train_features['right_block_H_stddev']\n",
"# train_features['lelf_right_S_stddev'] = train_features['left_block_S_stddev'] - train_features['right_block_S_stddev']\n",
"# train_features['lelf_right_V_stddev'] = train_features['left_block_V_stddev'] - train_features['right_block_V_stddev']\n",
"\n",
"train_features['lelf_right_l_stddev'] = train_features['left_block_l_stddev'] - train_features['right_block_l_stddev']\n",
"train_features['lelf_right_a_stddev'] = train_features['left_block_a_stddev'] - train_features['right_block_a_stddev']\n",
"train_features['lelf_right_b_stddev'] = train_features['left_block_b_stddev'] - train_features['right_block_b_stddev']\n",
"\n",
"train_features['lelf_right_R_hist'] = train_features['left_block_R_hist'] - train_features['right_block_R_hist']\n",
"train_features['lelf_right_G_hist'] = train_features['left_block_G_hist'] - train_features['right_block_G_hist']\n",
"train_features['lelf_right_B_hist'] = train_features['left_block_B_hist'] - train_features['right_block_B_hist']\n",
"\n",
"# train_features['lelf_right_H_hist'] = train_features['left_block_H_hist'] - train_features['right_block_H_hist']\n",
"# train_features['lelf_right_S_hist'] = train_features['left_block_S_hist'] - train_features['right_block_S_hist']\n",
"# train_features['lelf_right_V_hist'] = train_features['left_block_V_hist'] - train_features['right_block_V_hist']\n",
"\n",
"train_features['lelf_right_l_hist'] = train_features['left_block_l_hist'] - train_features['right_block_l_hist']\n",
"train_features['lelf_right_a_hist'] = train_features['left_block_a_hist'] - train_features['right_block_a_hist']\n",
"train_features['lelf_right_b_hist'] = train_features['left_block_b_hist'] - train_features['right_block_b_hist']\n",
"\n",
"train_features['lelf_right_R_max'] = train_features['left_block_R_max'] - train_features['right_block_R_max']\n",
"train_features['lelf_right_G_max'] = train_features['left_block_G_max'] - train_features['right_block_G_max']\n",
"train_features['lelf_right_B_max'] = train_features['left_block_B_max'] - train_features['right_block_B_max']\n",
"\n",
"# train_features['lelf_right_H_max'] = train_features['left_block_H_max'] - train_features['right_block_H_max']\n",
"# train_features['lelf_right_S_max'] = train_features['left_block_S_max'] - train_features['right_block_S_max']\n",
"# train_features['lelf_right_V_max'] = train_features['left_block_V_max'] - train_features['right_block_V_max']\n",
"\n",
"train_features['lelf_right_l_max'] = train_features['left_block_l_max'] - train_features['right_block_l_max']\n",
"train_features['lelf_right_a_max'] = train_features['left_block_a_max'] - train_features['right_block_a_max']\n",
"train_features['lelf_right_b_max'] = train_features['left_block_b_max'] - train_features['right_block_b_max']\n",
"\n",
"\n",
"\n",
"train_features['lelf_right_R_min'] = train_features['left_block_R_min'] - train_features['right_block_R_min']\n",
"train_features['lelf_right_G_min'] = train_features['left_block_G_min'] - train_features['right_block_G_min']\n",
"train_features['lelf_right_B_min'] = train_features['left_block_B_min'] - train_features['right_block_B_min']\n",
"\n",
"# train_features['lelf_right_H_min'] = train_features['left_block_H_min'] - train_features['right_block_H_min']\n",
"# train_features['lelf_right_S_min'] = train_features['left_block_S_min'] - train_features['right_block_S_min']\n",
"# train_features['lelf_right_V_min'] = train_features['left_block_V_min'] - train_features['right_block_V_min']\n",
"\n",
"train_features['lelf_right_l_min'] = train_features['left_block_l_min'] - train_features['right_block_l_min']\n",
"train_features['lelf_right_a_min'] = train_features['left_block_a_min'] - train_features['right_block_a_min']\n",
"train_features['lelf_right_b_min'] = train_features['left_block_b_min'] - train_features['right_block_b_min']\n",
"\n",
"train_features['lelf_right_gray_value'] = train_features['left_grayValue'] - train_features['right_grayValue']\n",
"train_features['lelf_right_gray_stddev'] = train_features['left_grayStddevValue'] - train_features['right_grayStddevValue']\n",
"train_features['lelf_right_gray_hist'] = train_features['left_grayHist'] - train_features['right_grayHist']\n",
"train_features['lelf_right_gray_max'] = train_features['left_grayMax'] - train_features['right_grayMax']\n",
"train_features['lelf_right_gray_min'] = train_features['left_grayMin'] - train_features['right_grayMin']\n",
"train_features.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# train_features_9是真正的训练数据"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#这个原始算法的容错性最好!!!!2020.5.22\n",
"train_features_9 = pd.DataFrame()\n",
"train_features_9['lelf_right_R'] = train_features['left_block_R'] - train_features['right_block_R']\n",
"train_features_9['lelf_right_G'] = train_features['left_block_G'] - train_features['right_block_G']\n",
"train_features_9['lelf_right_B'] = train_features['left_block_B'] - train_features['right_block_B']\n",
"\n",
"train_features_9['lelf_right_H'] = train_features['left_block_H'] - train_features['right_block_H']\n",
"train_features_9['lelf_right_S'] = train_features['left_block_S'] - train_features['right_block_S']\n",
"train_features_9['lelf_right_V'] = train_features['left_block_V'] - train_features['right_block_V']\n",
"\n",
"train_features_9['lelf_right_l'] = train_features['left_block_l'] - train_features['right_block_l']\n",
"train_features_9['lelf_right_a'] = train_features['left_block_a'] - train_features['right_block_a']\n",
"train_features_9['lelf_right_b'] = train_features['left_block_b'] - train_features['right_block_b']\n",
"\n",
"train_features_9['lelf_right_R_stddev'] = train_features['left_block_R_stddev'] - train_features['right_block_R_stddev']\n",
"train_features_9['lelf_right_G_stddev'] = train_features['left_block_G_stddev'] - train_features['right_block_G_stddev']\n",
"train_features_9['lelf_right_B_stddev'] = train_features['left_block_B_stddev'] - train_features['right_block_B_stddev']\n",
"\n",
"train_features_9['lelf_right_H_stddev'] = train_features['left_block_H_stddev'] - train_features['right_block_H_stddev']\n",
"train_features_9['lelf_right_S_stddev'] = train_features['left_block_S_stddev'] - train_features['right_block_S_stddev']\n",
"train_features_9['lelf_right_V_stddev'] = train_features['left_block_V_stddev'] - train_features['right_block_V_stddev']\n",
"\n",
"train_features_9['lelf_right_l_stddev'] = train_features['left_block_l_stddev'] - train_features['right_block_l_stddev']\n",
"train_features_9['lelf_right_a_stddev'] = train_features['left_block_a_stddev'] - train_features['right_block_a_stddev']\n",
"train_features_9['lelf_right_b_stddev'] = train_features['left_block_b_stddev'] - train_features['right_block_b_stddev']\n",
"\n",
"train_features_9['lelf_right_R_hist'] = train_features['left_block_R_hist'] - train_features['right_block_R_hist']\n",
"train_features_9['lelf_right_G_hist'] = train_features['left_block_G_hist'] - train_features['right_block_G_hist']\n",
"train_features_9['lelf_right_B_hist'] = train_features['left_block_B_hist'] - train_features['right_block_B_hist']\n",
"\n",
"train_features_9['lelf_right_H_hist'] = train_features['left_block_H_hist'] - train_features['right_block_H_hist']\n",
"train_features_9['lelf_right_S_hist'] = train_features['left_block_S_hist'] - train_features['right_block_S_hist']\n",
"train_features_9['lelf_right_V_hist'] = train_features['left_block_V_hist'] - train_features['right_block_V_hist']\n",
"\n",
"train_features_9['lelf_right_l_hist'] = train_features['left_block_l_hist'] - train_features['right_block_l_hist']\n",
"train_features_9['lelf_right_a_hist'] = train_features['left_block_a_hist'] - train_features['right_block_a_hist']\n",
"train_features_9['lelf_right_b_hist'] = train_features['left_block_b_hist'] - train_features['right_block_b_hist']\n",
"\n",
"train_features_9['lelf_right_R_max'] = train_features['left_block_R_max'] - train_features['right_block_R_max']\n",
"train_features_9['lelf_right_G_max'] = train_features['left_block_G_max'] - train_features['right_block_G_max']\n",
"train_features_9['lelf_right_B_max'] = train_features['left_block_B_max'] - train_features['right_block_B_max']\n",
"\n",
"train_features_9['lelf_right_H_max'] = train_features['left_block_H_max'] - train_features['right_block_H_max']\n",
"train_features_9['lelf_right_S_max'] = train_features['left_block_S_max'] - train_features['right_block_S_max']\n",
"train_features_9['lelf_right_V_max'] = train_features['left_block_V_max'] - train_features['right_block_V_max']\n",
"\n",
"train_features_9['lelf_right_l_max'] = train_features['left_block_l_max'] - train_features['right_block_l_max']\n",
"train_features_9['lelf_right_a_max'] = train_features['left_block_a_max'] - train_features['right_block_a_max']\n",
"train_features_9['lelf_right_b_max'] = train_features['left_block_b_max'] - train_features['right_block_b_max']\n",
"\n",
"train_features_9['lelf_right_R_min'] = train_features['left_block_R_min'] - train_features['right_block_R_min']\n",
"train_features_9['lelf_right_G_min'] = train_features['left_block_G_min'] - train_features['right_block_G_min']\n",
"train_features_9['lelf_right_B_min'] = train_features['left_block_B_min'] - train_features['right_block_B_min']\n",
"\n",
"train_features_9['lelf_right_H_min'] = train_features['left_block_H_min'] - train_features['right_block_H_min']\n",
"train_features_9['lelf_right_S_min'] = train_features['left_block_S_min'] - train_features['right_block_S_min']\n",
"train_features_9['lelf_right_V_min'] = train_features['left_block_V_min'] - train_features['right_block_V_min']\n",
"\n",
"train_features_9['lelf_right_l_min'] = train_features['left_block_l_min'] - train_features['right_block_l_min']\n",
"train_features_9['lelf_right_a_min'] = train_features['left_block_a_min'] - train_features['right_block_a_min']\n",
"train_features_9['lelf_right_b_min'] = train_features['left_block_b_min'] - train_features['right_block_b_min']\n",
"\n",
"# train_features_9['left_grayValue']= train_features['left_grayValue'];\n",
"# train_features_9['left_grayStddevValue']= train_features['left_grayStddevValue'];\n",
"# train_features_9['left_grayHist']= train_features['left_grayHist'];\n",
"# train_features_9['left_grayMax']= train_features['left_grayMax'];\n",
"# train_features_9['left_grayMin']= train_features['left_grayMin'];\n",
"\n",
"# train_features_9['right_grayValue']= train_features['right_grayValue'];\n",
"# train_features_9['right_grayStddevValue']= train_features['right_grayStddevValue'];\n",
"# train_features_9['right_grayHist']= train_features['right_grayHist'];\n",
"# train_features_9['right_grayMax']= train_features['right_grayMax'];\n",
"# train_features_9['right_grayMin']= train_features['right_grayMin'];\n",
"\n",
"# train_features_9['lelf_R_stddev'] = train_features['left_block_R_stddev'] \n",
"# train_features_9['lelf_G_stddev'] = train_features['left_block_G_stddev'] \n",
"# train_features_9['lelf_B_stddev'] = train_features['left_block_B_stddev'] \n",
"\n",
"# train_features_9['left_block_R_min'] = train_features['left_block_R_min'] \n",
"# train_features_9['left_block_G_min'] = train_features['left_block_G_min'] \n",
"# train_features_9['left_block_B_min'] = train_features['left_block_B_min'] \n",
"\n",
"\n",
"train_features_9['lelf_right_gray_value'] = train_features['left_grayValue'] - train_features['right_grayValue']\n",
"train_features_9['lelf_right_gray_stddev'] = train_features['left_grayStddevValue'] - train_features['right_grayStddevValue']\n",
"train_features_9['lelf_right_gray_hist'] = train_features['left_grayHist'] - train_features['right_grayHist']\n",
"train_features_9['lelf_right_gray_max'] = train_features['left_grayMax'] - train_features['right_grayMax']\n",
"train_features_9['lelf_right_gray_min'] = train_features['left_grayMin'] - train_features['right_grayMin']\n",
"#train_features_9['index'] = train_labels\n",
"train_features_9.describe()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"ts_features_9 = pd.DataFrame()\n",
"ts_features_9['lelf_right_R'] = ts_features['left_block_R'] - ts_features['right_block_R']\n",
"ts_features_9['lelf_right_G'] = ts_features['left_block_G'] - ts_features['right_block_G']\n",
"ts_features_9['lelf_right_B'] = ts_features['left_block_B'] - ts_features['right_block_B']\n",
"\n",
"ts_features_9['lelf_right_H'] = ts_features['left_block_H'] - ts_features['right_block_H']\n",
"ts_features_9['lelf_right_S'] = ts_features['left_block_S'] - ts_features['right_block_S']\n",
"ts_features_9['lelf_right_V'] = ts_features['left_block_V'] - ts_features['right_block_V']\n",
"\n",
"ts_features_9['lelf_right_l'] = ts_features['left_block_l'] - ts_features['right_block_l']\n",
"ts_features_9['lelf_right_a'] = ts_features['left_block_a'] - ts_features['right_block_a']\n",
"ts_features_9['lelf_right_b'] = ts_features['left_block_b'] - ts_features['right_block_b']\n",
"\n",
"ts_features_9['lelf_right_R_stddev'] = ts_features['left_block_R_stddev'] - ts_features['right_block_R_stddev']\n",
"ts_features_9['lelf_right_G_stddev'] = ts_features['left_block_G_stddev'] - ts_features['right_block_G_stddev']\n",
"ts_features_9['lelf_right_B_stddev'] = ts_features['left_block_B_stddev'] - ts_features['right_block_B_stddev']\n",
"\n",
"ts_features_9['lelf_right_H_stddev'] = ts_features['left_block_H_stddev'] - ts_features['right_block_H_stddev']\n",
"ts_features_9['lelf_right_S_stddev'] = ts_features['left_block_S_stddev'] - ts_features['right_block_S_stddev']\n",
"ts_features_9['lelf_right_V_stddev'] = ts_features['left_block_V_stddev'] - ts_features['right_block_V_stddev']\n",
"\n",
"ts_features_9['lelf_right_l_stddev'] = ts_features['left_block_l_stddev'] - ts_features['right_block_l_stddev']\n",
"ts_features_9['lelf_right_a_stddev'] = ts_features['left_block_a_stddev'] - ts_features['right_block_a_stddev']\n",
"ts_features_9['lelf_right_b_stddev'] = ts_features['left_block_b_stddev'] - ts_features['right_block_b_stddev']\n",
"\n",
"ts_features_9['lelf_right_R_hist'] = ts_features['left_block_R_hist'] - ts_features['right_block_R_hist']\n",
"ts_features_9['lelf_right_G_hist'] = ts_features['left_block_G_hist'] - ts_features['right_block_G_hist']\n",
"ts_features_9['lelf_right_B_hist'] = ts_features['left_block_B_hist'] - ts_features['right_block_B_hist']\n",
"\n",
"ts_features_9['lelf_right_H_hist'] = ts_features['left_block_H_hist'] - ts_features['right_block_H_hist']\n",
"ts_features_9['lelf_right_S_hist'] = ts_features['left_block_S_hist'] - ts_features['right_block_S_hist']\n",
"ts_features_9['lelf_right_V_hist'] = ts_features['left_block_V_hist'] - ts_features['right_block_V_hist']\n",
"\n",
"ts_features_9['lelf_right_l_hist'] = ts_features['left_block_l_hist'] - ts_features['right_block_l_hist']\n",
"ts_features_9['lelf_right_a_hist'] = ts_features['left_block_a_hist'] - ts_features['right_block_a_hist']\n",
"ts_features_9['lelf_right_b_hist'] = ts_features['left_block_b_hist'] - ts_features['right_block_b_hist']\n",
"\n",
"ts_features_9['lelf_right_R_max'] = ts_features['left_block_R_max'] - ts_features['right_block_R_max']\n",
"ts_features_9['lelf_right_G_max'] = ts_features['left_block_G_max'] - ts_features['right_block_G_max']\n",
"ts_features_9['lelf_right_B_max'] = ts_features['left_block_B_max'] - ts_features['right_block_B_max']\n",
"\n",
"ts_features_9['lelf_right_H_max'] = ts_features['left_block_H_max'] - ts_features['right_block_H_max']\n",
"ts_features_9['lelf_right_S_max'] = ts_features['left_block_S_max'] - ts_features['right_block_S_max']\n",
"ts_features_9['lelf_right_V_max'] = ts_features['left_block_V_max'] - ts_features['right_block_V_max']\n",
"\n",
"ts_features_9['lelf_right_l_max'] = ts_features['left_block_l_max'] - ts_features['right_block_l_max']\n",
"ts_features_9['lelf_right_a_max'] = ts_features['left_block_a_max'] - ts_features['right_block_a_max']\n",
"ts_features_9['lelf_right_b_max'] = ts_features['left_block_b_max'] - ts_features['right_block_b_max']\n",
"\n",
"ts_features_9['lelf_right_R_min'] = ts_features['left_block_R_min'] - ts_features['right_block_R_min']\n",
"ts_features_9['lelf_right_G_min'] = ts_features['left_block_G_min'] - ts_features['right_block_G_min']\n",
"ts_features_9['lelf_right_B_min'] = ts_features['left_block_B_min'] - ts_features['right_block_B_min']\n",
"\n",
"ts_features_9['lelf_right_H_min'] = ts_features['left_block_H_min'] - ts_features['right_block_H_min']\n",
"ts_features_9['lelf_right_S_min'] = ts_features['left_block_S_min'] - ts_features['right_block_S_min']\n",
"ts_features_9['lelf_right_V_min'] = ts_features['left_block_V_min'] - ts_features['right_block_V_min']\n",
"\n",
"ts_features_9['lelf_right_l_min'] = ts_features['left_block_l_min'] - ts_features['right_block_l_min']\n",
"ts_features_9['lelf_right_a_min'] = ts_features['left_block_a_min'] - ts_features['right_block_a_min']\n",
"ts_features_9['lelf_right_b_min'] = ts_features['left_block_b_min'] - ts_features['right_block_b_min']\n",
"\n",
"# ts_features_9['left_grayValue']= ts_features['left_grayValue'];\n",
"# ts_features_9['left_grayStddevValue']= ts_features['left_grayStddevValue'];\n",
"# ts_features_9['left_grayHist']= ts_features['left_grayHist'];\n",
"# ts_features_9['left_grayMax']= ts_features['left_grayMax'];\n",
"# ts_features_9['left_grayMin']= ts_features['left_grayMin'];\n",
"\n",
"# ts_features_9['right_grayValue']= ts_features['right_grayValue'];\n",
"# ts_features_9['right_grayStddevValue']= ts_features['right_grayStddevValue'];\n",
"# ts_features_9['right_grayHist']= ts_features['right_grayHist'];\n",
"# ts_features_9['right_grayMax']= ts_features['right_grayMax'];\n",
"# ts_features_9['right_grayMin']= ts_features['right_grayMin'];\n",
"\n",
"# ts_features_9['lelf_R_stddev'] = ts_features['left_block_R_stddev'] \n",
"# ts_features_9['lelf_G_stddev'] = ts_features['left_block_G_stddev'] \n",
"# ts_features_9['lelf_B_stddev'] = ts_features['left_block_B_stddev'] \n",
"\n",
"# ts_features_9['left_block_R_min'] = ts_features['left_block_R_min'] \n",
"# ts_features_9['left_block_G_min'] = ts_features['left_block_G_min'] \n",
"# ts_features_9['left_block_B_min'] = ts_features['left_block_B_min'] \n",
"\n",
"ts_features_9['lelf_right_gray_value'] = ts_features['left_grayValue'] - ts_features['right_grayValue']\n",
"ts_features_9['lelf_right_gray_stddev'] = ts_features['left_grayStddevValue'] - ts_features['right_grayStddevValue']\n",
"ts_features_9['lelf_right_gray_hist'] = ts_features['left_grayHist'] - ts_features['right_grayHist']\n",
"ts_features_9['lelf_right_gray_max'] = ts_features['left_grayMax'] - ts_features['right_grayMax']\n",
"ts_features_9['lelf_right_gray_min'] = ts_features['left_grayMin'] - ts_features['right_grayMin']\n",
"\n",
"#ts_features_9['index'] = ts_labels\n",
"ts_features_9.describe()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#测试可以删除哪些项目\n",
"train_features_9 = pd.DataFrame()\n",
"\n",
"#train_features_9['right_grayValue'] = train_features['right_grayValue']\n",
"#train_features_9['left_grayStddevValue'] = train_features['left_grayStddevValue']\n",
"#train_features_9['right_grayStddevValue'] = train_features['right_grayStddevValue']\n",
"#train_features_9['right_grayHist'] = train_features['right_grayHist']\n",
"train_features_9['left_right_gray_value'] = train_features['left_grayValue'] - train_features['right_grayValue']\n",
"train_features_9['left_right_gray_stddev'] = train_features['left_grayStddevValue'] - train_features['right_grayStddevValue']\n",
"train_features_9['left_right_gray_hist'] = train_features['left_grayHist'] - train_features['right_grayHist']\n",
"train_features_9['left_right_gray_max'] = train_features['left_grayMax'] - train_features['right_grayMax']\n",
"train_features_9['left_right_gray_min'] = train_features['left_grayMin'] - train_features['right_grayMin']\n",
"\n",
"#train_features_9['left_H_stddev'] = train_features['left_block_H_stddev']\n",
"#train_features_9['left_S_stddev'] = train_features['left_block_S_stddev']\n",
"#train_features_9['left_V_stddev'] = train_features['left_block_V_stddev']\n",
"#train_features_9['right_H_stddev'] = train_features['right_block_H_stddev']\n",
"#train_features_9['right_S_stddev'] = train_features['right_block_S_stddev']\n",
"#train_features_9['right_V_stddev'] = train_features['right_block_V_stddev']\n",
"train_features_9['left_right_H'] = train_features['left_block_H'] - train_features['right_block_H']\n",
"train_features_9['left_right_S'] = train_features['left_block_S'] - train_features['right_block_S']\n",
"train_features_9['left_right_V'] = train_features['left_block_V'] - train_features['right_block_V']\n",
"train_features_9['left_right_H_stddev'] = train_features['left_block_H_stddev'] - train_features['right_block_H_stddev']\n",
"train_features_9['left_right_S_stddev'] = train_features['left_block_S_stddev'] - train_features['right_block_S_stddev']\n",
"train_features_9['left_right_V_stddev'] = train_features['left_block_V_stddev'] - train_features['right_block_V_stddev']\n",
"train_features_9['left_right_H_hist'] = train_features['left_block_H_hist'] - train_features['right_block_H_hist']\n",
"train_features_9['left_right_S_hist'] = train_features['left_block_S_hist'] - train_features['right_block_S_hist']\n",
"train_features_9['left_right_V_hist'] = train_features['left_block_V_hist'] - train_features['right_block_V_hist']\n",
"train_features_9['left_right_H_max'] = train_features['left_block_H_max'] - train_features['right_block_H_max']\n",
"train_features_9['left_right_S_max'] = train_features['left_block_S_max'] - train_features['right_block_S_max']\n",
"train_features_9['left_right_V_max'] = train_features['left_block_V_max'] - train_features['right_block_V_max']\n",
"train_features_9['left_right_H_min'] = train_features['left_block_H_min'] - train_features['right_block_H_min']\n",
"train_features_9['left_right_S_min'] = train_features['left_block_S_min'] - train_features['right_block_S_min']\n",
"train_features_9['left_right_V_min'] = train_features['left_block_V_min'] - train_features['right_block_V_min']\n",
"\n",
"\n",
"\n",
"#train_features_9['index'] = train_labels\n",
"train_features_9.describe()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**去掉左边块的方差和白块和右边块的特征**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"# train_features = train_features.drop(\"left_block_R\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_G\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_B\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_R_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_G_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_B_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_B_stddev\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_R_max\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_G_max\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_B_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_hist\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_hist\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l_hist\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a_hist\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b_max\",axis=1)\n",
"##################################################################\n",
"\n",
"# train_features = train_features.drop(\"right_block_R\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_G\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_B\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_B_stddev\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_R_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_G_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_B_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_R_max\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_G_max\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_B_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_hist\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_hist\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l_hist\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a_hist\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b_max\",axis=1)\n",
"\n",
"####################################################################\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B_max\",axis=1)\n",
"\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_max\",axis=1)\n",
"\n",
"\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b_max\",axis=1)\n",
"\n",
"##################################################################\n",
"\n",
"\n",
"\n",
"train_features.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**去掉所有块的方差特征**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"train_features = train_features.drop(\"left_block_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b_stddev\",axis=1)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# train_features = train_features.drop(\"left_block_R\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_G\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_B\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_H\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_S\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_V\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_l\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_a\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_b\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_R\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_G\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_B\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_H\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_S\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_V\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_l\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_a\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_b\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"whiteBlock_R\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_G\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_B\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"whiteBlock_H\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_S\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_V\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"whiteBlock_l\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_a\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_b\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_R_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_G_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_B_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_H_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_S_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_V_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_l_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_a_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_b_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_R_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_G_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_B_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_H_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_S_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_V_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_l_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_a_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_b_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"whiteBlock_R_hist\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_G_hist\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_B_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"whiteBlock_H_hist\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_S_hist\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_V_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"whiteBlock_l_hist\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_a_hist\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_b_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_R_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_G_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_B_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_R_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_G_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_B_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b_max\",axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"train_features.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# svc支持向量机算法"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#from sklearn.model_selection import KFold\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import f1_score\n",
"from sklearn.metrics import precision_score\n",
"from sklearn.metrics import recall_score\n",
"#集成学习(Ensemble Learning) \n",
"from sklearn.ensemble import ExtraTreesClassifier\n",
"from sklearn.ensemble import AdaBoostClassifier\n",
"\n",
"#报错:ModuleNotFoundError: No module named 'sklearn.cross_validation'\n",
"#原因:当前 sklearn 版本中 cross_validation 已经替换成了 model_selection,但其中的函数功能并没有变化\n",
"#from sklearn.cross_validation import train_test_split\n",
"from sklearn.model_selection import train_test_split\n",
"X_train ,X_test,y_train,y_test = train_test_split(train_features_9,train_labels,test_size = 0.3, random_state = 20)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##fit_transform,fit,transform区别和作用详解\n",
"###fit和transform没有任何关系,仅仅是数据处理的两个不同环节,之所以出来fit_transform这个函数名,仅仅是为了写代码方便,会高效一点。\n",
"###sklearn里的封装好的各种算法使用前都要fit,fit相对于整个代码而言,为后续API服务。fit之后,然后调用各种API方法,transform只是其中一个API方法,所以当你调用transform之外的方法,也必须要先fit。\n",
"###fit原义指的是安装、使适合的意思,其实有点train的含义,但是和train不同的是,它并不是一个训练的过程,而是一个适配的过程,过程都是确定的,最后得到一个可用于转换的有价值的信息。\n",
"###https://blog.csdn.net/weixin_38278334/article/details/82971752\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"##这个不需要了!\n",
"X = train_features.values\n",
"y = train_labels.values\n",
"\n",
"kf = KFold(n_splits=5)\n",
"kf.get_n_splits(X)\n",
"\n",
"print(kf) \n",
"\n",
"for train_index, test_index in kf.split(X):\n",
" print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
" X_train, X_test = X[train_index], X[test_index]\n",
" y_train, y_test = y[train_index], y[test_index]\n",
" \n",
" \n",
" from datetime import datetime\n",
" trarining_start_time = datetime.now()\n",
"\n",
" clf_svm_linear = SVC(kernel = 'linear',gamma=0.00001,C=0.1)\n",
" clf_svm_linear = clf_svm_linear.fit(X_train, y_train)\n",
" pred = clf_svm_linear.predict(X_test)\n",
" print \"svm linear accuracy score:\" , accuracy_score(y_test,pred)\n",
" print \"f1 score:\" , f1_score(y_test,pred,average='micro')\n",
"\n",
"\n",
" training_stop_time = datetime.now()\n",
"\n",
" print \"runing time:\",(training_stop_time - trarining_start_time)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import StratifiedKFold #交叉验证\n",
"from sklearn.model_selection import GridSearchCV #网格搜索\n",
"from sklearn.model_selection import train_test_split #将数据集分开成训练集和测试集\n",
"from xgboost import XGBClassifier #xgboost\n",
"\n",
"#这个不需要了!\n",
"model = XGBClassifier()\n",
"learning_rate = [0.0001,0.001,0.01,0.1,0.2,0.3] #学习率\n",
"gamma = [1, 0.1, 0.01, 0.001]\n",
"param_grid = dict(learning_rate = learning_rate,gamma = gamma)#转化为字典格式,网络搜索要求\n",
"kflod = StratifiedKFold(n_splits=10, shuffle = True,random_state=7)#将训练/测试数据集划分10个互斥子集,\n",
"grid_search = GridSearchCV(model,param_grid,scoring = 'neg_log_loss',n_jobs = -1,cv = kflod)\n",
"#scoring指定损失函数类型,n_jobs指定全部cpu跑,cv指定交叉验证\n",
"grid_result = grid_search.fit(X_train, y_train) #运行网格搜索\n",
"print(\"Best: %f using %s\" % (grid_result.best_score_,grid_search.best_params_))\n",
"#grid_scores_:给出不同参数情况下的评价结果。best_params_:描述了已取得最佳结果的参数的组合\n",
"#best_score_:成员提供优化过程期间观察到的最好的评分\n",
"#具有键作为列标题和值作为列的dict,可以导入到DataFrame中。\n",
"#注意,“params”键用于存储所有参数候选项的参数设置列表。\n",
"means = grid_result.cv_results_['mean_test_score']\n",
"params = grid_result.cv_results_['params']\n",
"for mean,param in zip(means,params):\n",
" print(\"%f with: %r\" % (mean,param))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import StratifiedKFold #交叉验证\n",
"from sklearn.model_selection import GridSearchCV #网格搜索\n",
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn import metrics \n",
"#https://blog.csdn.net/WJWFighting/article/details/80983022\n",
"thresholds=np.linspace(0,0.1,20)#设置gamma参数列表,生成等差数列\n",
"thresholds\n",
"param_grid={'gamma':thresholds}\n",
"clf=GridSearchCV(SVC(kernel='rbf'),param_grid,cv=5)\n",
"clf.fit(X_train, y_train)\n",
"\n",
"print(\"best param: {0}\\nbest score: {1}\".format(clf.best_params_, clf.best_score_))\n",
"\n",
"y_pred = clf.predict(X_test)\n",
"\n",
"print(\"查准率:\",metrics.precision_score(y_pred, y_test))\n",
"print(\"召回率:\",metrics.recall_score(y_pred, y_test))\n",
"print(\"F1\",metrics.f1_score(y_pred, y_test))\n",
"\n",
"print(\"最佳效果:%0.3f\"% clf.best_score_)\n",
"print(\"最优参数组合:\")\n",
"best_parameters=clf.best_estimator_.get_params()\n",
"for param_name in sorted(param_grid.keys()):\n",
" print('\\t%s:%r' %(param_name,best_parameters[param_name]))\n",
"\n",
"#print(\"训练集评分:\",clf.score(x_train,y_train))\n",
"#print(\"测试集评分:\",clf.score(x_test,y_test))\n",
"\n",
"\"\"\"\n",
"SVC方法。常用的参数如下:\n",
"C:默认为1.0,是对于错误的惩罚项。\n",
"kernel:指定算法的核函数,默认为'rbf',常用的有'linear''poly''rbf''sigmoid''precomputed'。\n",
"degree:多项式核函数的次数('poly'),默认为3。 其他核函数会将其忽略。\n",
"gamma'rbf''poly'和'sigmoid'的核系数。 如果gamma是'auto',那么将使用1 / n_features。\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#X_train = train_features_9\n",
"#y_train = train_labels\n",
"# X_test = test_features\n",
"# y_test = test_labels\n",
"#clf_svm_linear = SVC(kernel = 'linear',gamma=0.00001,C=0.01)\n",
"#clf_svm_linear = SVC(kernel = 'linear',gamma=0.01,C=0.01)\n",
"#svm linear accuracy score: 0.9746101835242169\n",
"clf_svm_linear = SVC(kernel = 'linear',C=0.1)\n",
"#svm linear accuracy score: 0.974885004599816\n",
"clf_svm_linear = clf_svm_linear.fit(X_train, y_train)\n",
"# pred = clf_svm_linear.predict(X_test)\n",
"# print \"svm linear accuracy score:\" , accuracy_score(y_test,pred)\n",
"# print \"f1 score:\" , f1_score(y_test,pred,average='micro')\n",
"pred = clf_svm_linear.predict(X_test)\n",
"print (\"svm linear accuracy score:\" , accuracy_score(y_test,pred))\n",
"print (\"f1 score :\" , f1_score(y_test,pred,average=None))\n",
"print (\"precision_score:\" , precision_score(y_test,pred,average=None))\n",
"print (\"recall_score :\" , recall_score(y_test,pred,average=None))\n",
"print(\"preds:\",pred[:10])\n",
"print('trues:\\n',y_test[:10])\n",
"print(\"\\n\")\n",
"###针对同一份数据,\n",
"clf_svc_poly = SVC(kernel='poly',degree=3,gamma=0.001,C=0.1)\n",
"#clf_svc_poly = SVC(kernel='poly',degree=3,gamma=0.00001,C=0.1)\n",
"\n",
"##svm polynomial accuracy score: 0.37460901563937443\n",
"clf_svc_poly.fit(X_train, y_train)\n",
"pred_poly = clf_svc_poly.predict(X_test)\n",
"print (\"svm polynomial accuracy score:\" , accuracy_score(y_test,pred_poly))\n",
"print (\"f1 score :\" , f1_score(y_test,pred_poly,average=None))\n",
"print (\"precision_score:\" , precision_score(y_test,pred_poly,average=None))\n",
"print (\"recall_score :\" , recall_score(y_test,pred_poly,average=None))\n",
"\n",
"clf_svc_rbf = SVC(kernel='rbf', gamma=0.05,C=0.1)\n",
"##svm rbf accuracy score: 0.284360625574977\n",
"clf_svc_rbf.fit(X_train, y_train)\n",
"pred_rbf = clf_svc_rbf.predict(X_test)\n",
"print (\"svm rbf accuracy score:\" , accuracy_score(y_test,pred_rbf))\n",
"print (\"f1 score :\" , f1_score(y_test,pred_rbf,average=None))\n",
"print (\"precision_score:\" , precision_score(y_test,pred_rbf,average=None))\n",
"print (\"recall_score :\" , recall_score(y_test,pred_rbf,average=None))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn_porter import Porter\n",
"\n",
"porter_clf_svm_linear = Porter(clf_svm_linear, language='c').export()\n",
"#porter_clf_svm_poly = Porter(clf_svm_poly, language='c').export()\n",
"# porter_clf_forest = Porter(clf_randomForest, language='c').export()\n",
"#porter_clf_extra_forest = Porter(clf_extra_forest, language='c').export()\n",
"\n",
"#print(porter_clf_svm_linear)\n",
"f = open(\"clf/clf_svm_linear_50features_2020.cpp\",'wb')\n",
"#f = open(\"clf/clf_svm_linear_50features_20171207.txt\",'wb')\n",
"#f = open(\"clf_svm_linear_125100_low_feature_data.txt\",'wb')\n",
"f.write(porter_clf_svm_linear.encode())\n",
"f.close()\n",
"#f = open(\"clf_svm_poly_2457100_data.txt\",'wb')\n",
"#f.write(porter_clf_svm_poly)\n",
"#f.close()\n",
"# f = open(\"clf/clf_randomForest_27features_stddev_c_0_01.txt\",'wb')\n",
"# f.write(porter_clf_forest)\n",
"# f.close()\n",
"# f = open(\"oclf_extra_forest_2457100_data_0824.txt\",'wb')\n",
"# f.write(porter_clf_extra_forest)\n",
"# f.close()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 随机森林算法"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#from sklearn.model_selection import KFold\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import f1_score\n",
"from sklearn.metrics import precision_score\n",
"from sklearn.metrics import recall_score\n",
"\n",
"\n",
"from sklearn.ensemble import ExtraTreesClassifier\n",
"from sklearn.ensemble import AdaBoostClassifier\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"X_train ,X_test,y_train,y_test = train_test_split(train_features_9,train_labels,test_size = 0.3, random_state = 20)\n",
"#X_train ,X_test,y_train,y_test = train_test_split(train_features_9,train_labels,test_size = 0.2, random_state = 20)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import classification_report\n",
"from sklearn.metrics import classification_report,confusion_matrix\n",
"from sklearn.metrics import f1_score\n",
"from sklearn.metrics import precision_score\n",
"from sklearn.metrics import recall_score\n",
"\n",
"#rfc = RandomForestClassifier(n_estimators=600)\n",
"\n",
"rfc = RandomForestClassifier(n_estimators=20)\n",
"#20 : 0.992901760363429 300.9937941104891701 40 0.9944430923987994 500.9944836537681512\n",
"\n",
"#rfc = RandomForestClassifier(n_estimators=50,min_samples_leaf=20)\n",
"#RandomForest accuracy score: 0.9772012028813204/0.9803133086229806/0.9811874956290649/0.9852786908175397\n",
"#50- 0.9648738541413158\n",
"\n",
"#rfc = RandomForestClassifier(n_estimators=50)\n",
"#---- 0.9857629593575079\n",
"#RandomForest accuracy score: 0.9955940974893349\n",
"\n",
"#rfc = RandomForestClassifier(n_estimators=50,min_samples_leaf=20)\n",
"#30-0.9595197533868743 40 - 0.9553013709742841 50 - 0.9528271274438225\n",
"\n",
"#rfc = RandomForestClassifier(n_estimators=100,min_samples_leaf=50)\n",
"#RandomForest accuracy score: 0.97688649555913\n",
"\n",
"#rfc = RandomForestClassifier(n_estimators=50,min_samples_leaf=100)\n",
"#RandomForest accuracy score: 0.9669906986502552\n",
"\n",
"\n",
"rfc.fit(X_train, y_train)\n",
"rfc_pred = rfc.predict(X_test)\n",
"cr = classification_report(y_test,rfc_pred)\n",
"print(cr)\n",
"\n",
"print(\"---------------------------------\\n\")\n",
"#print (\"Accuracy of prediction:\",round((cm[0,0]+cm[1,1])/cm.sum(),3))\n",
"print (\"RandomForest accuracy score:\" , accuracy_score(y_test,rfc_pred))\n",
"print(\"---------------------------------\\n\")\n",
"print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro'))\n",
"print (\"precision_score:\" , precision_score(y_test,rfc_pred,average='micro'))\n",
"print (\"recall_score:\" , recall_score(y_test,rfc_pred,average='micro'))\n",
"cm = confusion_matrix(y_test,rfc_pred)\n",
"print(cm)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn_porter import Porter\n",
"\n",
"porter_clf_rfc = Porter(rfc, language='c').export()\n",
"#porter_clf_svm_poly = Porter(clf_svm_poly, language='c').export()\n",
"# porter_clf_forest = Porter(clf_randomForest, language='c').export()\n",
"#porter_clf_extra_forest = Porter(clf_extra_forest, language='c').export()\n",
"\n",
"#print(porter_clf_svm_linear)\n",
"f = open(\"clf/ov_rtree20_f50_2020522.cpp\",'wb')\n",
"#f = open(\"clf/clf_svm_linear_50features_20171207.txt\",'wb')\n",
"#f = open(\"clf_svm_linear_125100_low_feature_data.txt\",'wb')\n",
"f.write(porter_clf_rfc.encode())\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import KFold\n",
"\n",
"X = train_features_9.values\n",
"y = train_labels.values\n",
"\n",
"kf = KFold(n_splits=5)\n",
"kf.get_n_splits(X)\n",
"\n",
"print(kf) \n",
"\n",
"for train_index, test_index in kf.split(X):\n",
" print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
" X_train, X_test = X[train_index], X[test_index]\n",
" y_train, y_test = y[train_index], y[test_index]\n",
" \n",
" \n",
" from datetime import datetime\n",
" trarining_start_time = datetime.now()\n",
"\n",
" rfc = RandomForestClassifier(n_estimators=600)\n",
" rfc.fit(X_train, y_train)\n",
" rfc_pred = rfc.predict(X_test) \n",
" print (\"svm linear accuracy score:\" , accuracy_score(y_test,rfc_pred))\n",
" print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro'))\n",
" print (\"precision_score:\" , precision_score(y_test,rfc_pred,average='micro'))\n",
" print (\"recall_score:\" , recall_score(y_test,rfc_pred,average='micro'))\n",
"\n",
" training_stop_time = datetime.now()\n",
"\n",
" print (\"runing time:\",(training_stop_time - trarining_start_time))\n",
" print(\"\\n\\n\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nsimu = 21\n",
"accuracy=[0]*nsimu\n",
"ntree = [0]*nsimu\n",
"for i in range(1,nsimu):\n",
" rfc = RandomForestClassifier(n_estimators=i*5,min_samples_split=10,max_depth=None,criterion='gini')\n",
" rfc.fit(X_train, y_train)\n",
" rfc_pred = rfc.predict(X_test)\n",
" cm = confusion_matrix(y_test,rfc_pred)\n",
" accuracy[i] = (cm[0,0]+cm[1,1])/cm.sum()\n",
" ntree[i]=i*5\n",
"\n",
" print (\"RandomForest accuracy score:\" , accuracy_score(y_test,rfc_pred))\n",
" print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro')) \n",
" print (\"Accuracy of prediction:\",round((cm[0,0]+cm[1,1])/cm.sum(),3))\n",
"\n",
" \n",
"plt.figure(figsize=(10,6))\n",
"plt.scatter(x=ntree[1:nsimu],y=accuracy[1:nsimu],s=60,c='red')\n",
"plt.title(\"Number of trees in the Random Forest vs. prediction accuracy (criterion: 'gini')\", fontsize=18)\n",
"plt.xlabel(\"Number of trees\", fontsize=15)\n",
"plt.ylabel(\"Prediction accuracy from confusion matrix\", fontsize=15)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"from sklearn.utils import shuffle\n",
"\n",
"\n",
"# data_shuffle1 = shuffle(data1)\n",
"# #data_shuffle = data_all;\n",
"# test_labels = data_shuffle1[\"index\"]\n",
"# test_features = data_shuffle1.drop(\"dateTime\",axis=1)\n",
"# test_features = test_features.drop(\"index\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBalance\",axis=1)\n",
"\n",
"\n",
"# test_features = test_features.drop(\"left_block_R_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"left_block_G_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"left_block_B_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"left_block_H_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"left_block_S_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"left_block_V_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"left_block_l_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"left_block_a_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"left_block_b_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"right_block_R_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"right_block_G_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"right_block_B_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"right_block_H_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"right_block_S_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"right_block_V_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"right_block_l_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"right_block_a_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"right_block_b_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"whiteBlock_R_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBlock_G_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBlock_B_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"whiteBlock_H_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBlock_S_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBlock_V_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"whiteBlock_l_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBlock_a_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBlock_b_stddev\",axis=1)\n",
"\n",
"train_features_10 = pd.DataFrame()\n",
"train_features_10['lelf_right_R'] = test_features['left_block_R'] - test_features['right_block_R']\n",
"train_features_10['lelf_right_G'] = test_features['left_block_G'] - test_features['right_block_G']\n",
"train_features_10['lelf_right_B'] = test_features['left_block_B'] - test_features['right_block_B']\n",
"\n",
"train_features_10['lelf_right_H'] = test_features['left_block_H'] - test_features['right_block_H']\n",
"# train_features_10['lelf_right_S'] = test_features['left_block_S'] - test_features['right_block_S']\n",
"train_features_10['lelf_right_V'] = test_features['left_block_V'] - test_features['right_block_V']\n",
"\n",
"train_features_10['lelf_right_l'] = test_features['left_block_l'] - test_features['right_block_l']\n",
"train_features_10['lelf_right_a'] = test_features['left_block_a'] - test_features['right_block_a']\n",
"train_features_10['lelf_right_b'] = test_features['left_block_b'] - test_features['right_block_b']\n",
"\n",
"train_features_10['lelf_right_R_stddev'] = test_features['left_block_R_stddev'] - test_features['right_block_R_stddev']\n",
"train_features_10['lelf_right_G_stddev'] = test_features['left_block_G_stddev'] - test_features['right_block_G_stddev']\n",
"train_features_10['lelf_right_B_stddev'] = test_features['left_block_B_stddev'] - test_features['right_block_B_stddev']\n",
"\n",
"train_features_10['lelf_right_H_stddev'] = test_features['left_block_H_stddev'] - test_features['right_block_H_stddev']\n",
"# train_features_10['lelf_right_S_stddev'] = test_features['left_block_S_stddev'] - test_features['right_block_S_stddev']\n",
"train_features_10['lelf_right_V_stddev'] = test_features['left_block_V_stddev'] - test_features['right_block_V_stddev']\n",
"\n",
"train_features_10['lelf_right_l_stddev'] = test_features['left_block_l_stddev'] - test_features['right_block_l_stddev']\n",
"train_features_10['lelf_right_a_stddev'] = test_features['left_block_a_stddev'] - test_features['right_block_a_stddev']\n",
"train_features_10['lelf_right_b_stddev'] = test_features['left_block_b_stddev'] - test_features['right_block_b_stddev']\n",
"\n",
"train_features_10['lelf_right_R_hist'] = test_features['left_block_R_hist'] - test_features['right_block_R_hist']\n",
"train_features_10['lelf_right_G_hist'] = test_features['left_block_G_hist'] - test_features['right_block_G_hist']\n",
"train_features_10['lelf_right_B_hist'] = test_features['left_block_B_hist'] - test_features['right_block_B_hist']\n",
"\n",
"train_features_10['lelf_right_H_hist'] = test_features['left_block_H_hist'] - test_features['right_block_H_hist']\n",
"# train_features_10['lelf_right_S_hist'] = test_features['left_block_S_hist'] - test_features['right_block_S_hist']\n",
"train_features_10['lelf_right_V_hist'] = test_features['left_block_V_hist'] - test_features['right_block_V_hist']\n",
"\n",
"train_features_10['lelf_right_l_hist'] = test_features['left_block_l_hist'] - test_features['right_block_l_hist']\n",
"train_features_10['lelf_right_a_hist'] = test_features['left_block_a_hist'] - test_features['right_block_a_hist']\n",
"train_features_10['lelf_right_b_hist'] = test_features['left_block_b_hist'] - test_features['right_block_b_hist']\n",
"\n",
"train_features_10['lelf_right_R_max'] = test_features['left_block_R_max'] - test_features['right_block_R_max']\n",
"train_features_10['lelf_right_G_max'] = test_features['left_block_G_max'] - test_features['right_block_G_max']\n",
"train_features_10['lelf_right_B_max'] = test_features['left_block_B_max'] - test_features['right_block_B_max']\n",
"\n",
"train_features_10['lelf_right_H_max'] = test_features['left_block_H_max'] - test_features['right_block_H_max']\n",
"# train_features_10['lelf_right_S_max'] = test_features['left_block_S_max'] - test_features['right_block_S_max']\n",
"train_features_10['lelf_right_V_max'] = test_features['left_block_V_max'] - test_features['right_block_V_max']\n",
"\n",
"train_features_10['lelf_right_l_max'] = test_features['left_block_l_max'] - test_features['right_block_l_max']\n",
"train_features_10['lelf_right_a_max'] = test_features['left_block_a_max'] - test_features['right_block_a_max']\n",
"train_features_10['lelf_right_b_max'] = test_features['left_block_b_max'] - test_features['right_block_b_max']\n",
"\n",
"\n",
"train_features_10['lelf_right_R_min'] = test_features['left_block_R_min'] - test_features['right_block_R_min']\n",
"train_features_10['lelf_right_G_min'] = test_features['left_block_G_min'] - test_features['right_block_G_min']\n",
"train_features_10['lelf_right_B_min'] = test_features['left_block_B_min'] - test_features['right_block_B_min']\n",
"\n",
"train_features_10['lelf_right_H_min'] = test_features['left_block_H_min'] - test_features['right_block_H_min']\n",
"# train_features_10['lelf_right_S_min'] = test_features['left_block_S_min'] - test_features['right_block_S_min']\n",
"train_features_10['lelf_right_V_min'] = test_features['left_block_V_min'] - test_features['right_block_V_min']\n",
"\n",
"train_features_10['lelf_right_l_min'] = test_features['left_block_l_min'] - test_features['right_block_l_min']\n",
"train_features_10['lelf_right_a_min'] = test_features['left_block_a_min'] - test_features['right_block_a_min']\n",
"train_features_10['lelf_right_b_min'] = test_features['left_block_b_min'] - test_features['right_block_b_min']\n",
"\n",
"# train_features_10['left_grayValue']= test_features['left_grayValue'];\n",
"# train_features_10['left_grayStddevValue']= test_features['left_grayStddevValue'];\n",
"# train_features_10['left_grayHist']= test_features['left_grayHist'];\n",
"# train_features_10['left_grayMax']= test_features['left_grayMax'];\n",
"# train_features_10['left_grayMin']= test_features['left_grayMin'];\n",
"\n",
"# train_features_10['right_grayValue']= test_features['right_grayValue'];\n",
"# train_features_10['right_grayStddevValue']= test_features['right_grayStddevValue'];\n",
"# train_features_10['right_grayHist']= test_features['right_grayHist'];\n",
"# train_features_10['right_grayMax']= test_features['right_grayMax'];\n",
"# train_features_10['right_grayMin']= test_features['right_grayMin'];\n",
"\n",
"# train_features_10['lelf_R_stddev'] = test_features['left_block_R_stddev'] \n",
"# train_features_10['lelf_G_stddev'] = test_features['left_block_G_stddev'] \n",
"# train_features_10['lelf_B_stddev'] = test_features['left_block_B_stddev'] \n",
"\n",
"# train_features_10['left_block_R_min'] = test_features['left_block_R_min'] \n",
"# train_features_10['left_block_G_min'] = test_features['left_block_G_min'] \n",
"# train_features_10['left_block_B_min'] = test_features['left_block_B_min'] \n",
"\n",
"\n",
"\n",
"train_features_10['lelf_right_gray_value'] = test_features['left_grayValue'] - test_features['right_grayValue']\n",
"train_features_10['lelf_right_gray_stddev'] = test_features['left_grayStddevValue'] - test_features['right_grayStddevValue']\n",
"train_features_10['lelf_right_gray_hist'] = test_features['left_grayHist'] - test_features['right_grayHist']\n",
"train_features_10['lelf_right_gray_max'] = test_features['left_grayMax'] - test_features['right_grayMax']\n",
"train_features_10['lelf_right_gray_min'] = test_features['left_grayMin'] - test_features['right_grayMin']\n",
"\n",
"train_features_10.describe()\n",
"\n",
"\n",
"# feature = feature.drop(\"left_block_H_hist\",axis=1)\n",
"# feature = feature.drop(\"right_block_H_hist\",axis=1)\n",
"# feature = feature.drop(\"whiteBlock_H_hist\",axis=1)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" \n",
"test_features = test_features.drop(\"left_block_H\",axis=1)\n",
"test_features = test_features.drop(\"left_block_S\",axis=1)\n",
"test_features = test_features.drop(\"left_block_V\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"right_block_H\",axis=1)\n",
"test_features = test_features.drop(\"right_block_S\",axis=1)\n",
"test_features = test_features.drop(\"right_block_V\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"whiteBlock_H\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_S\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_V\",axis=1)\n",
"\n",
"\n",
"test_features = test_features.drop(\"left_block_H_stddev\",axis=1)\n",
"test_features = test_features.drop(\"left_block_S_stddev\",axis=1)\n",
"test_features = test_features.drop(\"left_block_V_stddev\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"right_block_H_stddev\",axis=1)\n",
"test_features = test_features.drop(\"right_block_S_stddev\",axis=1)\n",
"test_features = test_features.drop(\"right_block_V_stddev\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"whiteBlock_H_stddev\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_S_stddev\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_V_stddev\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"left_block_H_hist\",axis=1)\n",
"test_features = test_features.drop(\"left_block_S_hist\",axis=1)\n",
"test_features = test_features.drop(\"left_block_V_hist\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"right_block_H_hist\",axis=1)\n",
"test_features = test_features.drop(\"right_block_S_hist\",axis=1)\n",
"test_features = test_features.drop(\"right_block_V_hist\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"whiteBlock_H_hist\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_S_hist\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_V_hist\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"left_block_H_max\",axis=1)\n",
"test_features = test_features.drop(\"left_block_S_max\",axis=1)\n",
"test_features = test_features.drop(\"left_block_V_max\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"right_block_H_max\",axis=1)\n",
"test_features = test_features.drop(\"right_block_S_max\",axis=1)\n",
"test_features = test_features.drop(\"right_block_V_max\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"whiteBlock_H_max\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_S_max\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_V_max\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"left_block_H_min\",axis=1)\n",
"test_features = test_features.drop(\"left_block_S_min\",axis=1)\n",
"test_features = test_features.drop(\"left_block_V_min\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"right_block_H_min\",axis=1)\n",
"test_features = test_features.drop(\"right_block_S_min\",axis=1)\n",
"test_features = test_features.drop(\"right_block_V_min\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"whiteBlock_H_min\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_S_min\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_V_min\",axis=1)\n",
" \n",
" \n",
"test_features['lelf_right_R'] = test_features['left_block_R'] - test_features['right_block_R']\n",
"test_features['lelf_right_G'] = test_features['left_block_G'] - test_features['right_block_G']\n",
"test_features['lelf_right_B'] = test_features['left_block_B'] - test_features['right_block_B']\n",
"\n",
"# test_features['lelf_right_H'] = test_features['left_block_H'] - test_features['right_block_H']\n",
"# test_features['lelf_right_S'] = test_features['left_block_S'] - test_features['right_block_S']\n",
"# test_features['lelf_right_V'] = test_features['left_block_V'] - test_features['right_block_V']\n",
"\n",
"# test_features['lelf_right_l'] = test_features['left_block_l'] - test_features['right_block_l']\n",
"# test_features['lelf_right_a'] = test_features['left_block_a'] - test_features['right_block_a']\n",
"# test_features['lelf_right_b'] = test_features['left_block_b'] - test_features['right_block_b']\n",
"\n",
"# test_features['lelf_right_R_stddev'] = test_features['left_block_R_stddev'] - test_features['right_block_R_stddev']\n",
"# test_features['lelf_right_G_stddev'] = test_features['left_block_G_stddev'] - test_features['right_block_G_stddev']\n",
"# test_features['lelf_right_B_stddev'] = test_features['left_block_B_stddev'] - test_features['right_block_B_stddev']\n",
"\n",
"# test_features['lelf_right_H_stddev'] = test_features['left_block_H_stddev'] - test_features['right_block_H_stddev']\n",
"# test_features['lelf_right_S_stddev'] = test_features['left_block_S_stddev'] - test_features['right_block_S_stddev']\n",
"# test_features['lelf_right_V_stddev'] = test_features['left_block_V_stddev'] - test_features['right_block_V_stddev']\n",
"\n",
"# test_features['lelf_right_l_stddev'] = test_features['left_block_l_stddev'] - test_features['right_block_l_stddev']\n",
"# test_features['lelf_right_a_stddev'] = test_features['left_block_a_stddev'] - test_features['right_block_a_stddev']\n",
"# test_features['lelf_right_b_stddev'] = test_features['left_block_b_stddev'] - test_features['right_block_b_stddev']\n",
"\n",
"# test_features['lelf_right_R_hist'] = test_features['left_block_R_hist'] - test_features['right_block_R_hist']\n",
"# test_features['lelf_right_G_hist'] = test_features['left_block_G_hist'] - test_features['right_block_G_hist']\n",
"# test_features['lelf_right_B_hist'] = test_features['left_block_B_hist'] - test_features['right_block_B_hist']\n",
"\n",
"# test_features['lelf_right_H_hist'] = test_features['left_block_H_hist'] - test_features['right_block_H_hist']\n",
"# test_features['lelf_right_S_hist'] = test_features['left_block_S_hist'] - test_features['right_block_S_hist']\n",
"# test_features['lelf_right_V_hist'] = test_features['left_block_V_hist'] - test_features['right_block_V_hist']\n",
"\n",
"# test_features['lelf_right_l_hist'] = test_features['left_block_l_hist'] - test_features['right_block_l_hist']\n",
"# test_features['lelf_right_a_hist'] = test_features['left_block_a_hist'] - test_features['right_block_a_hist']\n",
"# test_features['lelf_right_b_hist'] = test_features['left_block_b_hist'] - test_features['right_block_b_hist']\n",
"\n",
"# test_features['lelf_right_R_max'] = test_features['left_block_R_max'] - test_features['right_block_R_max']\n",
"# test_features['lelf_right_G_max'] = test_features['left_block_G_max'] - test_features['right_block_G_max']\n",
"# test_features['lelf_right_B_max'] = test_features['left_block_B_max'] - test_features['right_block_B_max']\n",
"\n",
"# test_features['lelf_right_H_max'] = test_features['left_block_H_max'] - test_features['right_block_H_max']\n",
"# test_features['lelf_right_S_max'] = test_features['left_block_S_max'] - test_features['right_block_S_max']\n",
"# test_features['lelf_right_V_max'] = test_features['left_block_V_max'] - test_features['right_block_V_max']\n",
"\n",
"# test_features['lelf_right_l_max'] = test_features['left_block_l_max'] - test_features['right_block_l_max']\n",
"# test_features['lelf_right_a_max'] = test_features['left_block_a_max'] - test_features['right_block_a_max']\n",
"# test_features['lelf_right_b_max'] = test_features['left_block_b_max'] - test_features['right_block_b_max']\n",
"\n",
"\n",
"\n",
"# test_features['lelf_right_R_min'] = test_features['left_block_R_min'] - test_features['right_block_R_min']\n",
"# test_features['lelf_right_G_min'] = test_features['left_block_G_min'] - test_features['right_block_G_min']\n",
"# test_features['lelf_right_B_min'] = test_features['left_block_B_min'] - test_features['right_block_B_min']\n",
"\n",
"# test_features['lelf_right_H_min'] = test_features['left_block_H_min'] - test_features['right_block_H_min']\n",
"# test_features['lelf_right_S_min'] = test_features['left_block_S_min'] - test_features['right_block_S_min']\n",
"# test_features['lelf_right_V_min'] = test_features['left_block_V_min'] - test_features['right_block_V_min']\n",
"\n",
"# test_features['lelf_right_l_min'] = test_features['left_block_l_min'] - test_features['right_block_l_min']\n",
"# test_features['lelf_right_a_min'] = test_features['left_block_a_min'] - test_features['right_block_a_min']\n",
"# test_features['lelf_right_b_min'] = test_features['left_block_b_min'] - test_features['right_block_b_min']\n",
"\n",
"test_features['lelf_right_gray_value'] = test_features['left_grayValue'] - test_features['right_grayValue']\n",
"test_features['lelf_right_gray_stddev'] = test_features['left_grayStddevValue'] - test_features['right_grayStddevValue']\n",
"test_features['lelf_right_gray_hist'] = test_features['left_grayHist'] - test_features['right_grayHist']\n",
"test_features['lelf_right_gray_max'] = test_features['left_grayMax'] - test_features['right_grayMax']\n",
"test_features['lelf_right_gray_min'] = test_features['left_grayMin'] - test_features['right_grayMin']\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pred = clf_svm_linear.predict(train_features_10)\n",
"test_features_gray_stddev = test_features['left_grayStddevValue']\n",
"test_features_np = np.ndarray(test_features_gray_stddev.shape,dtype = np.float32)\n",
"\n",
"test_features_np = test_features_gray_stddev.values\n",
"print \"svm linear accuracy score:\" , accuracy_score(test_labels,pred)\n",
"print \"f1 score:\" , f1_score(test_labels,pred,average='micro')\n",
"print \"recall_score :\" , recall_score(test_labels,pred,average='micro')\n",
"print \"precision_score :\" , precision_score(test_labels,pred,average='micro')\n",
"\n",
"for i in range(0, len(test_features_np)):\n",
" if test_features_np[i] < 3:\n",
" pred[i] =0\n",
"print \"svm linear accuracy score:\" , accuracy_score(test_labels,pred)\n",
"print \"f1 score:\" , f1_score(test_labels,pred,average='micro')\n",
"print \"recall_score :\" , recall_score(test_labels,pred,average='micro')\n",
"print \"precision_score :\" , precision_score(test_labels,pred,average='micro')\n",
"\n",
"\n",
"print(\"preds:\",pred[120:130])\n",
"print('trues:\\n',test_labels[120:130])\n",
"test_labels_np = np.ndarray(test_labels.shape,dtype= np.int32)\n",
"test_labels_np = test_labels.values\n",
"print(test_labels_np[0])\n",
"all_counter = 0\n",
"counter = 0\n",
"for i in range(0 ,len(pred) ):\n",
" if (pred[i] == 4 or (pred[i] == 4 and test_labels_np[i] ==4 )or test_labels_np[i] ==4 ) :\n",
" all_counter = all_counter + 1\n",
" if pred[i] != test_labels_np[i] :\n",
" counter = counter+1\n",
" print(pred[i] , test_labels_np[i])\n",
"print(len(pred),all_counter, counter) \n",
"all_counter = 0\n",
"counter = 0\n",
"for i in range(0 ,len(pred) ):\n",
" if pred[i] != test_labels_np[i] :\n",
" counter = counter+1\n",
" print(pred[i] , test_labels_np[i])\n",
"print(len(pred),all_counter, counter) \n",
"\n",
"# print \"svm linear accuracy score:\" , accuracy_score(test_labels,pred)\n",
"# print \"f1 score:\" , f1_score(test_labels,pred,average='micro')\n",
"# print \"recall_score :\" , recall_score(test_labels,pred,average='micro')\n",
"# print \"precision_score :\" , precision_score(test_labels,pred,average='micro')\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## from sklearn.metrics import recall_score\n",
"from sklearn.metrics import precision_score\n",
"print \"accuracy score:\" , accuracy_score(y_test,pred)\n",
"print \"recall_score :\" , recall_score(y_test,pred,average='macro')\n",
"print \"precision_score :\" , precision_score(y_test,pred,average='macro')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn_porter import Porter\n",
"\n",
"porter_java = Porter(clf_svm, language='java').export()\n",
"porter_c = Porter(clf_svm, language='c').export()\n",
"\n",
"f = open(\"Protein_c.txt\",'wb')\n",
"f.write(porter_c)\n",
"f.close()\n",
"\n",
"f = open(\"Protein_svm_java.txt\",'wb')\n",
"f.write(porter_java)\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"try :\n",
" data = pd.read_csv(\"data.csv\")\n",
" print (\"load data successful !!!!!\")\n",
" data0 = data[data[\"index\"] == 0]\n",
" data1 = data[data[\"index\"] == 1]\n",
" data2 = data[data[\"index\"] == 2]\n",
" data3 = data[data[\"index\"] == 3]\n",
" data4 = data[data[\"index\"] == 4]\n",
" data0.to_csv('data0.csv')\n",
" data1.to_csv('data1.csv')\n",
" data2.to_csv('data2.csv')\n",
" data3.to_csv('data3.csv')\n",
" data4.to_csv('data4.csv') \n",
"except :\n",
" print (\"load data error !!!!!!!!!!\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}