Files
yola/zaoYun/master/clf/hcg-LOCAL-2018-12-21.ipynb
coco 85d885e008 a
2026-07-03 16:29:47 +08:00

2430 lines
117 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
" *早早孕试纸机器学习算法验证*"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**import moudle**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd \n",
"import seaborn as sns\n",
"from IPython.display import display\n",
"import matplotlib.pyplot as plt\n",
"from mpl_toolkits.mplot3d import Axes3D\n",
"import sklearn\n",
"%matplotlib inline\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**load data**"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"load data successful !!!!!\n"
]
}
],
"source": [
"try :\n",
"# data_iphone6p_75_10 = pd.read_csv(\"20170912.pm.csv\")\n",
"# data_iphone6p_1234 = pd.read_csv(\"20170920.pm.csv\")\n",
"# data_iphone6p_5 = pd.read_csv(\"20170922.pm.csv\")\n",
"# data_iphone6p_0 = pd.read_csv(\"20170925.am.csv\")\n",
"# data_iphone6p_0_0 = pd.read_csv(\"20170925.pm.csv\")\n",
"# data_iphone6p_246 = pd.read_csv(\"20171011.pm.csv\")\n",
" \n",
" data1 = pd.read_csv(\"light.csv\")\n",
" data2 = pd.read_csv(\"nature_light.csv\")\n",
"# data_test1 = pd.read_csv(\"./newData/test.csv\")\n",
"# data_test2 = pd.read_csv(\"./newData/nubia_test.csv\")\n",
" \n",
" print (\"load data successful !!!!!\")\n",
"except :\n",
" print (\"load data error !!!!!!!!!!\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**分析数据**"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" left_block_R left_block_G left_block_B left_block_H left_block_S \\\n",
"count 91301.000000 91301.000000 91301.000000 91301.000000 91301.000000 \n",
"mean 165.729696 134.358506 140.457706 178.764099 56.932838 \n",
"std 30.728156 39.645554 35.580123 62.957946 28.703214 \n",
"min 90.000000 51.000000 60.000000 7.000000 7.000000 \n",
"25% 145.000000 103.000000 114.000000 143.000000 31.000000 \n",
"50% 165.000000 135.000000 140.000000 202.000000 51.000000 \n",
"75% 186.000000 165.000000 167.000000 228.000000 83.000000 \n",
"max 247.000000 230.000000 233.000000 248.000000 119.000000 \n",
"\n",
" left_block_V left_block_l left_block_a left_block_b \\\n",
"count 91301.000000 91301.000000 91301.000000 91301.000000 \n",
"mean 166.771580 150.273458 140.877241 129.089342 \n",
"std 31.214224 35.806975 7.556304 2.173696 \n",
"min 90.000000 67.000000 124.000000 122.000000 \n",
"25% 146.000000 124.000000 135.000000 128.000000 \n",
"50% 166.000000 152.000000 141.000000 129.000000 \n",
"75% 188.000000 177.000000 148.000000 131.000000 \n",
"max 247.000000 231.000000 154.000000 135.000000 \n",
"\n",
" left_block_R_stddev ... right_grayHist right_grayMax \\\n",
"count 91301.000000 ... 91301.000000 91301.000000 \n",
"mean 11.584725 ... 124.195693 178.770342 \n",
"std 8.180077 ... 30.525042 22.532460 \n",
"min 0.000000 ... 49.000000 120.000000 \n",
"25% 4.000000 ... 104.000000 162.000000 \n",
"50% 11.000000 ... 122.000000 178.000000 \n",
"75% 18.000000 ... 144.000000 194.000000 \n",
"max 36.000000 ... 232.000000 251.000000 \n",
"\n",
" right_grayMin white_grayValue white_grayStddevValue white_grayHist \\\n",
"count 91301.000000 91301.000000 91301.000000 91301.000000 \n",
"mean 98.756312 192.484661 1.065191 193.134763 \n",
"std 27.773504 24.373492 3.633961 24.401654 \n",
"min 39.000000 102.000000 0.000000 0.000000 \n",
"25% 78.000000 175.000000 0.000000 175.000000 \n",
"50% 95.000000 194.000000 1.000000 195.000000 \n",
"75% 115.000000 208.000000 1.000000 208.000000 \n",
"max 196.000000 255.000000 52.000000 254.000000 \n",
"\n",
" white_grayMax white_grayMin whiteBalance index \n",
"count 91301.000000 91301.000000 91301.0 91301.000000 \n",
"mean 196.246043 189.544912 0.0 3.221476 \n",
"std 22.966470 26.690282 0.0 1.891621 \n",
"min 139.000000 49.000000 0.0 0.000000 \n",
"25% 179.000000 172.000000 0.0 2.000000 \n",
"50% 197.000000 192.000000 0.0 3.000000 \n",
"75% 211.000000 206.000000 0.0 5.000000 \n",
"max 255.000000 255.000000 0.0 6.000000 \n",
"\n",
"[8 rows x 152 columns]\n"
]
}
],
"source": [
"# data4 = data_iphone6p_246[data_iphone6p_246[\"whiteBalance\"] == 0]\n",
"# data2= data_iphone6p_1234[data_iphone6p_1234[\"whiteBalance\"] == 0 ]\n",
"# data1 = data_iphone6p_75_10[data_iphone6p_75_10[\"whiteBalance\"] == 0 ]\n",
"# data3 = data_iphone6p_5[data_iphone6p_5[\"whiteBalance\"] == 0]\n",
"# data0 = data_iphone6p_0[data_iphone6p_0[\"whiteBalance\"] == 0]\n",
"# data0_0 = data_iphone6p_0_0[data_iphone6p_0_0[\"whiteBalance\"] == 0]\n",
"\n",
"\n",
"#data_all = data2.append(data1[data1[\"index\"] == 5 ]).append(data3).append(data1[data1[\"index\"] == 7 ]).append(data1[data1[\"index\"] == 8 ]).append(data0).append(data0_0).append(data4)\n",
"\n",
"data1_0 = data1[data1[\"whiteBalance\"] == 0]\n",
"data2_0 = data2[data2[\"whiteBalance\"] == 0]\n",
"#data_test_0 = data_test\n",
"\n",
"data_all =data1_0.append(data2_0);\n",
"#data_all =data1.append(data2);\n",
"#data_all = data2\n",
"whiteBlock_R_one = data_all[data_all[\"index\"] == 0 ][\"left_block_R_stddev\"]\n",
"whiteBlock_G_one = data_all[data_all[\"index\"] == 0 ][\"left_block_G_stddev\"]\n",
"whiteBlock_B_one = data_all[data_all[\"index\"] == 0 ][\"left_block_B_stddev\"]\n",
"\n",
"whiteBlock_R_two = data_all[data_all[\"index\"] == 1 ][\"left_block_R_stddev\"]\n",
"whiteBlock_G_two = data_all[data_all[\"index\"] == 1 ][\"left_block_G_stddev\"]\n",
"whiteBlock_B_two = data_all[data_all[\"index\"] == 1 ][\"left_block_B_stddev\"]\n",
"\n",
"whiteBlock_R_three = data_all[data_all[\"index\"] == 2 ][\"left_block_R_stddev\"]\n",
"whiteBlock_G_three = data_all[data_all[\"index\"] == 2 ][\"left_block_G_stddev\"]\n",
"whiteBlock_B_three = data_all[data_all[\"index\"] == 2 ][\"left_block_B_stddev\"]\n",
"\n",
"whiteBlock_R_four = data_all[data_all[\"index\"] == 3 ][\"left_block_R_stddev\"]\n",
"whiteBlock_G_four = data_all[data_all[\"index\"] == 3 ][\"left_block_G_stddev\"]\n",
"whiteBlock_B_four = data_all[data_all[\"index\"] == 3 ][\"left_block_B_stddev\"]\n",
"\n",
"\n",
"whiteBlock_R_five = data_all[data_all[\"index\"] == 4 ][\"left_block_R_stddev\"]\n",
"whiteBlock_G_five = data_all[data_all[\"index\"] == 4 ][\"left_block_G_stddev\"]\n",
"whiteBlock_B_five = data_all[data_all[\"index\"] == 4 ][\"left_block_B_stddev\"]\n",
"\n",
"whiteBlock_R_six = data_all[data_all[\"index\"] == 5 ][\"left_block_R_stddev\"]\n",
"whiteBlock_G_six = data_all[data_all[\"index\"] == 5 ][\"left_block_G_stddev\"]\n",
"whiteBlock_B_six = data_all[data_all[\"index\"] == 5 ][\"left_block_B_stddev\"]\n",
"\n",
"whiteBlock_R_seven = data_all[data_all[\"index\"] == 6 ][\"left_block_R_stddev\"]\n",
"whiteBlock_G_seven = data_all[data_all[\"index\"] == 6 ][\"left_block_G_stddev\"]\n",
"whiteBlock_B_seven = data_all[data_all[\"index\"] == 6 ][\"left_block_B_stddev\"]\n",
"\n",
"whiteBlock_R_eghit = data_all[data_all[\"index\"] == 7 ][\"left_block_R_stddev\"]\n",
"whiteBlock_G_eghit = data_all[data_all[\"index\"] == 7 ][\"left_block_G_stddev\"]\n",
"whiteBlock_B_eghit = data_all[data_all[\"index\"] == 7 ][\"left_block_B_stddev\"]\n",
"\n",
"print(data_all.describe())\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['dateTime', 'left_block_R', 'left_block_G', 'left_block_B',\n",
" 'left_block_H', 'left_block_S', 'left_block_V', 'left_block_l',\n",
" 'left_block_a', 'left_block_b',\n",
" ...\n",
" 'right_grayHist', 'right_grayMax', 'right_grayMin', 'white_grayValue',\n",
" 'white_grayStddevValue', 'white_grayHist', 'white_grayMax',\n",
" 'white_grayMin', 'whiteBalance', 'index'],\n",
" dtype='object', length=153)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_all.columns"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"hsv max min hist value h值要去掉"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>left_block_R</th>\n",
" <th>left_block_G</th>\n",
" <th>left_block_B</th>\n",
" <th>left_block_H</th>\n",
" <th>left_block_S</th>\n",
" <th>left_block_V</th>\n",
" <th>left_block_l</th>\n",
" <th>left_block_a</th>\n",
" <th>left_block_b</th>\n",
" <th>left_block_R_stddev</th>\n",
" <th>...</th>\n",
" <th>right_grayValue</th>\n",
" <th>right_grayStddevValue</th>\n",
" <th>right_grayHist</th>\n",
" <th>right_grayMax</th>\n",
" <th>right_grayMin</th>\n",
" <th>white_grayValue</th>\n",
" <th>white_grayStddevValue</th>\n",
" <th>white_grayHist</th>\n",
" <th>white_grayMax</th>\n",
" <th>white_grayMin</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>...</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>165.729696</td>\n",
" <td>134.358506</td>\n",
" <td>140.457706</td>\n",
" <td>178.764099</td>\n",
" <td>56.932838</td>\n",
" <td>166.771580</td>\n",
" <td>150.273458</td>\n",
" <td>140.877241</td>\n",
" <td>129.089342</td>\n",
" <td>11.584725</td>\n",
" <td>...</td>\n",
" <td>136.328233</td>\n",
" <td>20.844142</td>\n",
" <td>124.195693</td>\n",
" <td>178.770342</td>\n",
" <td>98.756312</td>\n",
" <td>192.484661</td>\n",
" <td>1.065191</td>\n",
" <td>193.134763</td>\n",
" <td>196.246043</td>\n",
" <td>189.544912</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>30.728156</td>\n",
" <td>39.645554</td>\n",
" <td>35.580123</td>\n",
" <td>62.957946</td>\n",
" <td>28.703214</td>\n",
" <td>31.214224</td>\n",
" <td>35.806975</td>\n",
" <td>7.556304</td>\n",
" <td>2.173696</td>\n",
" <td>8.180077</td>\n",
" <td>...</td>\n",
" <td>25.583188</td>\n",
" <td>4.952730</td>\n",
" <td>30.525042</td>\n",
" <td>22.532460</td>\n",
" <td>27.773504</td>\n",
" <td>24.373492</td>\n",
" <td>3.633961</td>\n",
" <td>24.401654</td>\n",
" <td>22.966470</td>\n",
" <td>26.690282</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>90.000000</td>\n",
" <td>51.000000</td>\n",
" <td>60.000000</td>\n",
" <td>7.000000</td>\n",
" <td>7.000000</td>\n",
" <td>90.000000</td>\n",
" <td>67.000000</td>\n",
" <td>124.000000</td>\n",
" <td>122.000000</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>81.000000</td>\n",
" <td>9.000000</td>\n",
" <td>49.000000</td>\n",
" <td>120.000000</td>\n",
" <td>39.000000</td>\n",
" <td>102.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>139.000000</td>\n",
" <td>49.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>145.000000</td>\n",
" <td>103.000000</td>\n",
" <td>114.000000</td>\n",
" <td>143.000000</td>\n",
" <td>31.000000</td>\n",
" <td>146.000000</td>\n",
" <td>124.000000</td>\n",
" <td>135.000000</td>\n",
" <td>128.000000</td>\n",
" <td>4.000000</td>\n",
" <td>...</td>\n",
" <td>119.000000</td>\n",
" <td>17.000000</td>\n",
" <td>104.000000</td>\n",
" <td>162.000000</td>\n",
" <td>78.000000</td>\n",
" <td>175.000000</td>\n",
" <td>0.000000</td>\n",
" <td>175.000000</td>\n",
" <td>179.000000</td>\n",
" <td>172.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>165.000000</td>\n",
" <td>135.000000</td>\n",
" <td>140.000000</td>\n",
" <td>202.000000</td>\n",
" <td>51.000000</td>\n",
" <td>166.000000</td>\n",
" <td>152.000000</td>\n",
" <td>141.000000</td>\n",
" <td>129.000000</td>\n",
" <td>11.000000</td>\n",
" <td>...</td>\n",
" <td>135.000000</td>\n",
" <td>21.000000</td>\n",
" <td>122.000000</td>\n",
" <td>178.000000</td>\n",
" <td>95.000000</td>\n",
" <td>194.000000</td>\n",
" <td>1.000000</td>\n",
" <td>195.000000</td>\n",
" <td>197.000000</td>\n",
" <td>192.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>186.000000</td>\n",
" <td>165.000000</td>\n",
" <td>167.000000</td>\n",
" <td>228.000000</td>\n",
" <td>83.000000</td>\n",
" <td>188.000000</td>\n",
" <td>177.000000</td>\n",
" <td>148.000000</td>\n",
" <td>131.000000</td>\n",
" <td>18.000000</td>\n",
" <td>...</td>\n",
" <td>153.000000</td>\n",
" <td>24.000000</td>\n",
" <td>144.000000</td>\n",
" <td>194.000000</td>\n",
" <td>115.000000</td>\n",
" <td>208.000000</td>\n",
" <td>1.000000</td>\n",
" <td>208.000000</td>\n",
" <td>211.000000</td>\n",
" <td>206.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>247.000000</td>\n",
" <td>230.000000</td>\n",
" <td>233.000000</td>\n",
" <td>248.000000</td>\n",
" <td>119.000000</td>\n",
" <td>247.000000</td>\n",
" <td>231.000000</td>\n",
" <td>154.000000</td>\n",
" <td>135.000000</td>\n",
" <td>36.000000</td>\n",
" <td>...</td>\n",
" <td>215.000000</td>\n",
" <td>35.000000</td>\n",
" <td>232.000000</td>\n",
" <td>251.000000</td>\n",
" <td>196.000000</td>\n",
" <td>255.000000</td>\n",
" <td>52.000000</td>\n",
" <td>254.000000</td>\n",
" <td>255.000000</td>\n",
" <td>255.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8 rows × 150 columns</p>\n",
"</div>"
],
"text/plain": [
" left_block_R left_block_G left_block_B left_block_H left_block_S \\\n",
"count 91301.000000 91301.000000 91301.000000 91301.000000 91301.000000 \n",
"mean 165.729696 134.358506 140.457706 178.764099 56.932838 \n",
"std 30.728156 39.645554 35.580123 62.957946 28.703214 \n",
"min 90.000000 51.000000 60.000000 7.000000 7.000000 \n",
"25% 145.000000 103.000000 114.000000 143.000000 31.000000 \n",
"50% 165.000000 135.000000 140.000000 202.000000 51.000000 \n",
"75% 186.000000 165.000000 167.000000 228.000000 83.000000 \n",
"max 247.000000 230.000000 233.000000 248.000000 119.000000 \n",
"\n",
" left_block_V left_block_l left_block_a left_block_b \\\n",
"count 91301.000000 91301.000000 91301.000000 91301.000000 \n",
"mean 166.771580 150.273458 140.877241 129.089342 \n",
"std 31.214224 35.806975 7.556304 2.173696 \n",
"min 90.000000 67.000000 124.000000 122.000000 \n",
"25% 146.000000 124.000000 135.000000 128.000000 \n",
"50% 166.000000 152.000000 141.000000 129.000000 \n",
"75% 188.000000 177.000000 148.000000 131.000000 \n",
"max 247.000000 231.000000 154.000000 135.000000 \n",
"\n",
" left_block_R_stddev ... right_grayValue \\\n",
"count 91301.000000 ... 91301.000000 \n",
"mean 11.584725 ... 136.328233 \n",
"std 8.180077 ... 25.583188 \n",
"min 0.000000 ... 81.000000 \n",
"25% 4.000000 ... 119.000000 \n",
"50% 11.000000 ... 135.000000 \n",
"75% 18.000000 ... 153.000000 \n",
"max 36.000000 ... 215.000000 \n",
"\n",
" right_grayStddevValue right_grayHist right_grayMax right_grayMin \\\n",
"count 91301.000000 91301.000000 91301.000000 91301.000000 \n",
"mean 20.844142 124.195693 178.770342 98.756312 \n",
"std 4.952730 30.525042 22.532460 27.773504 \n",
"min 9.000000 49.000000 120.000000 39.000000 \n",
"25% 17.000000 104.000000 162.000000 78.000000 \n",
"50% 21.000000 122.000000 178.000000 95.000000 \n",
"75% 24.000000 144.000000 194.000000 115.000000 \n",
"max 35.000000 232.000000 251.000000 196.000000 \n",
"\n",
" white_grayValue white_grayStddevValue white_grayHist white_grayMax \\\n",
"count 91301.000000 91301.000000 91301.000000 91301.000000 \n",
"mean 192.484661 1.065191 193.134763 196.246043 \n",
"std 24.373492 3.633961 24.401654 22.966470 \n",
"min 102.000000 0.000000 0.000000 139.000000 \n",
"25% 175.000000 0.000000 175.000000 179.000000 \n",
"50% 194.000000 1.000000 195.000000 197.000000 \n",
"75% 208.000000 1.000000 208.000000 211.000000 \n",
"max 255.000000 52.000000 254.000000 255.000000 \n",
"\n",
" white_grayMin \n",
"count 91301.000000 \n",
"mean 189.544912 \n",
"std 26.690282 \n",
"min 49.000000 \n",
"25% 172.000000 \n",
"50% 192.000000 \n",
"75% 206.000000 \n",
"max 255.000000 \n",
"\n",
"[8 rows x 150 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"\n",
"train_labels = data_all[\"index\"]\n",
"train_features = data_all.drop(\"dateTime\",axis=1)\n",
"train_features = train_features.drop(\"index\",axis=1)\n",
"train_features = train_features.drop(\"whiteBalance\",axis=1)\n",
"\n",
"\n",
"\n",
"train_features.describe()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_features = train_features.drop(\"left_block_H\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V\",axis=1)\n",
"\n",
"\n",
"train_features = train_features.drop(\"left_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_hist\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_hist\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_hist\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_hist\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_min\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_min\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_min\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_min\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_min\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_min\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_min\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_min\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_min\",axis=1)\n",
"\n",
"\n",
"\n",
"train_features['lelf_right_R'] = train_features['left_block_R'] - train_features['right_block_R']\n",
"train_features['lelf_right_G'] = train_features['left_block_G'] - train_features['right_block_G']\n",
"train_features['lelf_right_B'] = train_features['left_block_B'] - train_features['right_block_B']\n",
"\n",
"# train_features['lelf_right_H'] = train_features['left_block_H'] - train_features['right_block_H']\n",
"# train_features['lelf_right_S'] = train_features['left_block_S'] - train_features['right_block_S']\n",
"# train_features['lelf_right_V'] = train_features['left_block_V'] - train_features['right_block_V']\n",
"\n",
"train_features['lelf_right_l'] = train_features['left_block_l'] - train_features['right_block_l']\n",
"train_features['lelf_right_a'] = train_features['left_block_a'] - train_features['right_block_a']\n",
"train_features['lelf_right_b'] = train_features['left_block_b'] - train_features['right_block_b']\n",
"\n",
"train_features['lelf_right_R_stddev'] = train_features['left_block_R_stddev'] - train_features['right_block_R_stddev']\n",
"train_features['lelf_right_G_stddev'] = train_features['left_block_G_stddev'] - train_features['right_block_G_stddev']\n",
"train_features['lelf_right_B_stddev'] = train_features['left_block_B_stddev'] - train_features['right_block_B_stddev']\n",
"\n",
"# train_features['lelf_right_H_stddev'] = train_features['left_block_H_stddev'] - train_features['right_block_H_stddev']\n",
"# train_features['lelf_right_S_stddev'] = train_features['left_block_S_stddev'] - train_features['right_block_S_stddev']\n",
"# train_features['lelf_right_V_stddev'] = train_features['left_block_V_stddev'] - train_features['right_block_V_stddev']\n",
"\n",
"train_features['lelf_right_l_stddev'] = train_features['left_block_l_stddev'] - train_features['right_block_l_stddev']\n",
"train_features['lelf_right_a_stddev'] = train_features['left_block_a_stddev'] - train_features['right_block_a_stddev']\n",
"train_features['lelf_right_b_stddev'] = train_features['left_block_b_stddev'] - train_features['right_block_b_stddev']\n",
"\n",
"train_features['lelf_right_R_hist'] = train_features['left_block_R_hist'] - train_features['right_block_R_hist']\n",
"train_features['lelf_right_G_hist'] = train_features['left_block_G_hist'] - train_features['right_block_G_hist']\n",
"train_features['lelf_right_B_hist'] = train_features['left_block_B_hist'] - train_features['right_block_B_hist']\n",
"\n",
"# train_features['lelf_right_H_hist'] = train_features['left_block_H_hist'] - train_features['right_block_H_hist']\n",
"# train_features['lelf_right_S_hist'] = train_features['left_block_S_hist'] - train_features['right_block_S_hist']\n",
"# train_features['lelf_right_V_hist'] = train_features['left_block_V_hist'] - train_features['right_block_V_hist']\n",
"\n",
"train_features['lelf_right_l_hist'] = train_features['left_block_l_hist'] - train_features['right_block_l_hist']\n",
"train_features['lelf_right_a_hist'] = train_features['left_block_a_hist'] - train_features['right_block_a_hist']\n",
"train_features['lelf_right_b_hist'] = train_features['left_block_b_hist'] - train_features['right_block_b_hist']\n",
"\n",
"train_features['lelf_right_R_max'] = train_features['left_block_R_max'] - train_features['right_block_R_max']\n",
"train_features['lelf_right_G_max'] = train_features['left_block_G_max'] - train_features['right_block_G_max']\n",
"train_features['lelf_right_B_max'] = train_features['left_block_B_max'] - train_features['right_block_B_max']\n",
"\n",
"# train_features['lelf_right_H_max'] = train_features['left_block_H_max'] - train_features['right_block_H_max']\n",
"# train_features['lelf_right_S_max'] = train_features['left_block_S_max'] - train_features['right_block_S_max']\n",
"# train_features['lelf_right_V_max'] = train_features['left_block_V_max'] - train_features['right_block_V_max']\n",
"\n",
"train_features['lelf_right_l_max'] = train_features['left_block_l_max'] - train_features['right_block_l_max']\n",
"train_features['lelf_right_a_max'] = train_features['left_block_a_max'] - train_features['right_block_a_max']\n",
"train_features['lelf_right_b_max'] = train_features['left_block_b_max'] - train_features['right_block_b_max']\n",
"\n",
"\n",
"\n",
"train_features['lelf_right_R_min'] = train_features['left_block_R_min'] - train_features['right_block_R_min']\n",
"train_features['lelf_right_G_min'] = train_features['left_block_G_min'] - train_features['right_block_G_min']\n",
"train_features['lelf_right_B_min'] = train_features['left_block_B_min'] - train_features['right_block_B_min']\n",
"\n",
"# train_features['lelf_right_H_min'] = train_features['left_block_H_min'] - train_features['right_block_H_min']\n",
"# train_features['lelf_right_S_min'] = train_features['left_block_S_min'] - train_features['right_block_S_min']\n",
"# train_features['lelf_right_V_min'] = train_features['left_block_V_min'] - train_features['right_block_V_min']\n",
"\n",
"train_features['lelf_right_l_min'] = train_features['left_block_l_min'] - train_features['right_block_l_min']\n",
"train_features['lelf_right_a_min'] = train_features['left_block_a_min'] - train_features['right_block_a_min']\n",
"train_features['lelf_right_b_min'] = train_features['left_block_b_min'] - train_features['right_block_b_min']\n",
"\n",
"train_features['lelf_right_gray_value'] = train_features['left_grayValue'] - train_features['right_grayValue']\n",
"train_features['lelf_right_gray_stddev'] = train_features['left_grayStddevValue'] - train_features['right_grayStddevValue']\n",
"train_features['lelf_right_gray_hist'] = train_features['left_grayHist'] - train_features['right_grayHist']\n",
"train_features['lelf_right_gray_max'] = train_features['left_grayMax'] - train_features['right_grayMax']\n",
"train_features['lelf_right_gray_min'] = train_features['left_grayMin'] - train_features['right_grayMin']\n",
"train_features.describe()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>left_block_H</th>\n",
" <th>left_block_S</th>\n",
" <th>left_block_V</th>\n",
" <th>left_block_l</th>\n",
" <th>left_block_a</th>\n",
" <th>left_block_b</th>\n",
" <th>lelf_right_H</th>\n",
" <th>lelf_right_S</th>\n",
" <th>lelf_right_V</th>\n",
" <th>lelf_right_l</th>\n",
" <th>...</th>\n",
" <th>lelf_right_S_min</th>\n",
" <th>lelf_right_V_min</th>\n",
" <th>lelf_right_l_min</th>\n",
" <th>lelf_right_a_min</th>\n",
" <th>lelf_right_b_min</th>\n",
" <th>lelf_right_gray_value</th>\n",
" <th>lelf_right_gray_stddev</th>\n",
" <th>lelf_right_gray_hist</th>\n",
" <th>lelf_right_gray_max</th>\n",
" <th>lelf_right_gray_min</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>...</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" <td>91301.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>178.764099</td>\n",
" <td>56.932838</td>\n",
" <td>166.771580</td>\n",
" <td>150.273458</td>\n",
" <td>140.877241</td>\n",
" <td>129.089342</td>\n",
" <td>-30.762193</td>\n",
" <td>-11.383139</td>\n",
" <td>2.284028</td>\n",
" <td>8.190655</td>\n",
" <td>...</td>\n",
" <td>-1.485909</td>\n",
" <td>7.071325</td>\n",
" <td>16.983593</td>\n",
" <td>-1.059320</td>\n",
" <td>0.402131</td>\n",
" <td>8.089287</td>\n",
" <td>-3.907175</td>\n",
" <td>11.542732</td>\n",
" <td>2.554802</td>\n",
" <td>17.138443</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>62.957946</td>\n",
" <td>28.703214</td>\n",
" <td>31.214224</td>\n",
" <td>35.806975</td>\n",
" <td>7.556304</td>\n",
" <td>2.173696</td>\n",
" <td>64.432921</td>\n",
" <td>35.029158</td>\n",
" <td>29.075898</td>\n",
" <td>38.617100</td>\n",
" <td>...</td>\n",
" <td>8.281171</td>\n",
" <td>44.608714</td>\n",
" <td>53.477432</td>\n",
" <td>3.800465</td>\n",
" <td>2.222573</td>\n",
" <td>38.236717</td>\n",
" <td>12.018341</td>\n",
" <td>51.666312</td>\n",
" <td>14.254111</td>\n",
" <td>52.269917</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>7.000000</td>\n",
" <td>7.000000</td>\n",
" <td>90.000000</td>\n",
" <td>67.000000</td>\n",
" <td>124.000000</td>\n",
" <td>122.000000</td>\n",
" <td>-222.000000</td>\n",
" <td>-84.000000</td>\n",
" <td>-52.000000</td>\n",
" <td>-56.000000</td>\n",
" <td>...</td>\n",
" <td>-38.000000</td>\n",
" <td>-71.000000</td>\n",
" <td>-65.000000</td>\n",
" <td>-12.000000</td>\n",
" <td>-9.000000</td>\n",
" <td>-57.000000</td>\n",
" <td>-31.000000</td>\n",
" <td>-126.000000</td>\n",
" <td>-33.000000</td>\n",
" <td>-65.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>143.000000</td>\n",
" <td>31.000000</td>\n",
" <td>146.000000</td>\n",
" <td>124.000000</td>\n",
" <td>135.000000</td>\n",
" <td>128.000000</td>\n",
" <td>-76.000000</td>\n",
" <td>-43.000000</td>\n",
" <td>-26.000000</td>\n",
" <td>-30.000000</td>\n",
" <td>...</td>\n",
" <td>-7.000000</td>\n",
" <td>-38.000000</td>\n",
" <td>-37.000000</td>\n",
" <td>-4.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>-30.000000</td>\n",
" <td>-16.000000</td>\n",
" <td>-40.000000</td>\n",
" <td>-10.000000</td>\n",
" <td>-35.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>202.000000</td>\n",
" <td>51.000000</td>\n",
" <td>166.000000</td>\n",
" <td>152.000000</td>\n",
" <td>141.000000</td>\n",
" <td>129.000000</td>\n",
" <td>-7.000000</td>\n",
" <td>-20.000000</td>\n",
" <td>12.000000</td>\n",
" <td>19.000000</td>\n",
" <td>...</td>\n",
" <td>-1.000000</td>\n",
" <td>18.000000</td>\n",
" <td>27.000000</td>\n",
" <td>-1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>19.000000</td>\n",
" <td>-5.000000</td>\n",
" <td>20.000000</td>\n",
" <td>5.000000</td>\n",
" <td>27.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>228.000000</td>\n",
" <td>83.000000</td>\n",
" <td>188.000000</td>\n",
" <td>177.000000</td>\n",
" <td>148.000000</td>\n",
" <td>131.000000</td>\n",
" <td>18.000000</td>\n",
" <td>22.000000</td>\n",
" <td>29.000000</td>\n",
" <td>44.000000</td>\n",
" <td>...</td>\n",
" <td>4.000000</td>\n",
" <td>51.000000</td>\n",
" <td>69.000000</td>\n",
" <td>2.000000</td>\n",
" <td>2.000000</td>\n",
" <td>44.000000</td>\n",
" <td>8.000000</td>\n",
" <td>56.000000</td>\n",
" <td>15.000000</td>\n",
" <td>69.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>248.000000</td>\n",
" <td>119.000000</td>\n",
" <td>247.000000</td>\n",
" <td>231.000000</td>\n",
" <td>154.000000</td>\n",
" <td>135.000000</td>\n",
" <td>152.000000</td>\n",
" <td>46.000000</td>\n",
" <td>60.000000</td>\n",
" <td>80.000000</td>\n",
" <td>...</td>\n",
" <td>35.000000</td>\n",
" <td>93.000000</td>\n",
" <td>127.000000</td>\n",
" <td>11.000000</td>\n",
" <td>10.000000</td>\n",
" <td>80.000000</td>\n",
" <td>19.000000</td>\n",
" <td>120.000000</td>\n",
" <td>50.000000</td>\n",
" <td>127.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8 rows × 41 columns</p>\n",
"</div>"
],
"text/plain": [
" left_block_H left_block_S left_block_V left_block_l left_block_a \\\n",
"count 91301.000000 91301.000000 91301.000000 91301.000000 91301.000000 \n",
"mean 178.764099 56.932838 166.771580 150.273458 140.877241 \n",
"std 62.957946 28.703214 31.214224 35.806975 7.556304 \n",
"min 7.000000 7.000000 90.000000 67.000000 124.000000 \n",
"25% 143.000000 31.000000 146.000000 124.000000 135.000000 \n",
"50% 202.000000 51.000000 166.000000 152.000000 141.000000 \n",
"75% 228.000000 83.000000 188.000000 177.000000 148.000000 \n",
"max 248.000000 119.000000 247.000000 231.000000 154.000000 \n",
"\n",
" left_block_b lelf_right_H lelf_right_S lelf_right_V lelf_right_l \\\n",
"count 91301.000000 91301.000000 91301.000000 91301.000000 91301.000000 \n",
"mean 129.089342 -30.762193 -11.383139 2.284028 8.190655 \n",
"std 2.173696 64.432921 35.029158 29.075898 38.617100 \n",
"min 122.000000 -222.000000 -84.000000 -52.000000 -56.000000 \n",
"25% 128.000000 -76.000000 -43.000000 -26.000000 -30.000000 \n",
"50% 129.000000 -7.000000 -20.000000 12.000000 19.000000 \n",
"75% 131.000000 18.000000 22.000000 29.000000 44.000000 \n",
"max 135.000000 152.000000 46.000000 60.000000 80.000000 \n",
"\n",
" ... lelf_right_S_min lelf_right_V_min \\\n",
"count ... 91301.000000 91301.000000 \n",
"mean ... -1.485909 7.071325 \n",
"std ... 8.281171 44.608714 \n",
"min ... -38.000000 -71.000000 \n",
"25% ... -7.000000 -38.000000 \n",
"50% ... -1.000000 18.000000 \n",
"75% ... 4.000000 51.000000 \n",
"max ... 35.000000 93.000000 \n",
"\n",
" lelf_right_l_min lelf_right_a_min lelf_right_b_min \\\n",
"count 91301.000000 91301.000000 91301.000000 \n",
"mean 16.983593 -1.059320 0.402131 \n",
"std 53.477432 3.800465 2.222573 \n",
"min -65.000000 -12.000000 -9.000000 \n",
"25% -37.000000 -4.000000 -1.000000 \n",
"50% 27.000000 -1.000000 0.000000 \n",
"75% 69.000000 2.000000 2.000000 \n",
"max 127.000000 11.000000 10.000000 \n",
"\n",
" lelf_right_gray_value lelf_right_gray_stddev lelf_right_gray_hist \\\n",
"count 91301.000000 91301.000000 91301.000000 \n",
"mean 8.089287 -3.907175 11.542732 \n",
"std 38.236717 12.018341 51.666312 \n",
"min -57.000000 -31.000000 -126.000000 \n",
"25% -30.000000 -16.000000 -40.000000 \n",
"50% 19.000000 -5.000000 20.000000 \n",
"75% 44.000000 8.000000 56.000000 \n",
"max 80.000000 19.000000 120.000000 \n",
"\n",
" lelf_right_gray_max lelf_right_gray_min \n",
"count 91301.000000 91301.000000 \n",
"mean 2.554802 17.138443 \n",
"std 14.254111 52.269917 \n",
"min -33.000000 -65.000000 \n",
"25% -10.000000 -35.000000 \n",
"50% 5.000000 27.000000 \n",
"75% 15.000000 69.000000 \n",
"max 50.000000 127.000000 \n",
"\n",
"[8 rows x 41 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_features_9 = pd.DataFrame()\n",
"#train_features_9['lelf_right_R'] = train_features['left_block_R'] - train_features['right_block_R']\n",
"#train_features_9['lelf_right_G'] = train_features['left_block_G'] - train_features['right_block_G']\n",
"#train_features_9['lelf_right_B'] = train_features['left_block_B'] - train_features['right_block_B']\n",
"\n",
"train_features_9['left_block_H'] = train_features['left_block_H']\n",
"train_features_9['left_block_S'] = train_features['left_block_S']\n",
"train_features_9['left_block_V'] = train_features['left_block_V']\n",
"train_features_9['left_block_l'] = train_features['left_block_l']\n",
"train_features_9['left_block_a'] = train_features['left_block_a']\n",
"train_features_9['left_block_b'] = train_features['left_block_b']\n",
"\n",
"train_features_9['lelf_right_H'] = train_features['left_block_H'] - train_features['right_block_H']\n",
"train_features_9['lelf_right_S'] = train_features['left_block_S'] - train_features['right_block_S']\n",
"train_features_9['lelf_right_V'] = train_features['left_block_V'] - train_features['right_block_V']\n",
"\n",
"train_features_9['lelf_right_l'] = train_features['left_block_l'] - train_features['right_block_l']\n",
"train_features_9['lelf_right_a'] = train_features['left_block_a'] - train_features['right_block_a']\n",
"train_features_9['lelf_right_b'] = train_features['left_block_b'] - train_features['right_block_b']\n",
"\n",
"#train_features_9['lelf_right_R_stddev'] = train_features['left_block_R_stddev'] - train_features['right_block_R_stddev']\n",
"#train_features_9['lelf_right_G_stddev'] = train_features['left_block_G_stddev'] - train_features['right_block_G_stddev']\n",
"#train_features_9['lelf_right_B_stddev'] = train_features['left_block_B_stddev'] - train_features['right_block_B_stddev']\n",
"\n",
"train_features_9['lelf_right_H_stddev'] = train_features['left_block_H_stddev'] - train_features['right_block_H_stddev']\n",
"train_features_9['lelf_right_S_stddev'] = train_features['left_block_S_stddev'] - train_features['right_block_S_stddev']\n",
"train_features_9['lelf_right_V_stddev'] = train_features['left_block_V_stddev'] - train_features['right_block_V_stddev']\n",
"\n",
"train_features_9['lelf_right_l_stddev'] = train_features['left_block_l_stddev'] - train_features['right_block_l_stddev']\n",
"train_features_9['lelf_right_a_stddev'] = train_features['left_block_a_stddev'] - train_features['right_block_a_stddev']\n",
"train_features_9['lelf_right_b_stddev'] = train_features['left_block_b_stddev'] - train_features['right_block_b_stddev']\n",
"\n",
"#train_features_9['lelf_right_R_hist'] = train_features['left_block_R_hist'] - train_features['right_block_R_hist']\n",
"#train_features_9['lelf_right_G_hist'] = train_features['left_block_G_hist'] - train_features['right_block_G_hist']\n",
"#train_features_9['lelf_right_B_hist'] = train_features['left_block_B_hist'] - train_features['right_block_B_hist']\n",
"\n",
"train_features_9['lelf_right_H_hist'] = train_features['left_block_H_hist'] - train_features['right_block_H_hist']\n",
"train_features_9['lelf_right_S_hist'] = train_features['left_block_S_hist'] - train_features['right_block_S_hist']\n",
"train_features_9['lelf_right_V_hist'] = train_features['left_block_V_hist'] - train_features['right_block_V_hist']\n",
"\n",
"train_features_9['lelf_right_l_hist'] = train_features['left_block_l_hist'] - train_features['right_block_l_hist']\n",
"train_features_9['lelf_right_a_hist'] = train_features['left_block_a_hist'] - train_features['right_block_a_hist']\n",
"train_features_9['lelf_right_b_hist'] = train_features['left_block_b_hist'] - train_features['right_block_b_hist']\n",
"\n",
"#train_features_9['lelf_right_R_max'] = train_features['left_block_R_max'] - train_features['right_block_R_max']\n",
"#train_features_9['lelf_right_G_max'] = train_features['left_block_G_max'] - train_features['right_block_G_max']\n",
"#train_features_9['lelf_right_B_max'] = train_features['left_block_B_max'] - train_features['right_block_B_max']\n",
"\n",
"train_features_9['lelf_right_H_max'] = train_features['left_block_H_max'] - train_features['right_block_H_max']\n",
"train_features_9['lelf_right_S_max'] = train_features['left_block_S_max'] - train_features['right_block_S_max']\n",
"train_features_9['lelf_right_V_max'] = train_features['left_block_V_max'] - train_features['right_block_V_max']\n",
"\n",
"train_features_9['lelf_right_l_max'] = train_features['left_block_l_max'] - train_features['right_block_l_max']\n",
"train_features_9['lelf_right_a_max'] = train_features['left_block_a_max'] - train_features['right_block_a_max']\n",
"train_features_9['lelf_right_b_max'] = train_features['left_block_b_max'] - train_features['right_block_b_max']\n",
"\n",
"#train_features_9['lelf_right_R_min'] = train_features['left_block_R_min'] - train_features['right_block_R_min']\n",
"#train_features_9['lelf_right_G_min'] = train_features['left_block_G_min'] - train_features['right_block_G_min']\n",
"#train_features_9['lelf_right_B_min'] = train_features['left_block_B_min'] - train_features['right_block_B_min']\n",
"\n",
"train_features_9['lelf_right_H_min'] = train_features['left_block_H_min'] - train_features['right_block_H_min']\n",
"train_features_9['lelf_right_S_min'] = train_features['left_block_S_min'] - train_features['right_block_S_min']\n",
"train_features_9['lelf_right_V_min'] = train_features['left_block_V_min'] - train_features['right_block_V_min']\n",
"\n",
"train_features_9['lelf_right_l_min'] = train_features['left_block_l_min'] - train_features['right_block_l_min']\n",
"train_features_9['lelf_right_a_min'] = train_features['left_block_a_min'] - train_features['right_block_a_min']\n",
"train_features_9['lelf_right_b_min'] = train_features['left_block_b_min'] - train_features['right_block_b_min']\n",
"\n",
"# train_features_9['left_grayValue']= train_features['left_grayValue'];\n",
"# train_features_9['left_grayStddevValue']= train_features['left_grayStddevValue'];\n",
"# train_features_9['left_grayHist']= train_features['left_grayHist'];\n",
"# train_features_9['left_grayMax']= train_features['left_grayMax'];\n",
"# train_features_9['left_grayMin']= train_features['left_grayMin'];\n",
"\n",
"# train_features_9['right_grayValue']= train_features['right_grayValue'];\n",
"# train_features_9['right_grayStddevValue']= train_features['right_grayStddevValue'];\n",
"# train_features_9['right_grayHist']= train_features['right_grayHist'];\n",
"# train_features_9['right_grayMax']= train_features['right_grayMax'];\n",
"# train_features_9['right_grayMin']= train_features['right_grayMin'];\n",
"\n",
"# train_features_9['lelf_R_stddev'] = train_features['left_block_R_stddev'] \n",
"# train_features_9['lelf_G_stddev'] = train_features['left_block_G_stddev'] \n",
"# train_features_9['lelf_B_stddev'] = train_features['left_block_B_stddev'] \n",
"\n",
"# train_features_9['left_block_R_min'] = train_features['left_block_R_min'] \n",
"# train_features_9['left_block_G_min'] = train_features['left_block_G_min'] \n",
"# train_features_9['left_block_B_min'] = train_features['left_block_B_min'] \n",
"\n",
"\n",
"train_features_9['lelf_right_gray_value'] = train_features['left_grayValue'] - train_features['right_grayValue']\n",
"train_features_9['lelf_right_gray_stddev'] = train_features['left_grayStddevValue'] - train_features['right_grayStddevValue']\n",
"train_features_9['lelf_right_gray_hist'] = train_features['left_grayHist'] - train_features['right_grayHist']\n",
"train_features_9['lelf_right_gray_max'] = train_features['left_grayMax'] - train_features['right_grayMax']\n",
"train_features_9['lelf_right_gray_min'] = train_features['left_grayMin'] - train_features['right_grayMin']\n",
"#train_features_9['index'] = train_labels\n",
"train_features_9.describe()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**去掉左边块的方差和白块和右边块的特征**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"# train_features = train_features.drop(\"left_block_R\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_G\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_B\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_R_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_G_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_B_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_B_stddev\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_R_max\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_G_max\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_B_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_hist\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_hist\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l_hist\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a_hist\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b_max\",axis=1)\n",
"##################################################################\n",
"\n",
"# train_features = train_features.drop(\"right_block_R\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_G\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_B\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_B_stddev\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_R_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_G_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_B_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_R_max\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_G_max\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_B_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_hist\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_hist\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l_hist\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a_hist\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b_max\",axis=1)\n",
"\n",
"####################################################################\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B_max\",axis=1)\n",
"\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_max\",axis=1)\n",
"\n",
"\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a_hist\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b_max\",axis=1)\n",
"\n",
"##################################################################\n",
"\n",
"\n",
"\n",
"train_features.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**去掉所有块的方差特征**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"train_features = train_features.drop(\"left_block_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b_stddev\",axis=1)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# train_features = train_features.drop(\"left_block_R\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_G\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_B\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_H\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_S\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_V\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_l\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_a\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_b\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_R\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_G\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_B\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_H\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_S\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_V\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_l\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_a\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_b\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"whiteBlock_R\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_G\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_B\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"whiteBlock_H\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_S\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_V\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"whiteBlock_l\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_a\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_b\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_R_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_G_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_B_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_H_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_S_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_V_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"left_block_l_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_a_hist\",axis=1)\n",
"# train_features = train_features.drop(\"left_block_b_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_R_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_G_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_B_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_H_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_S_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_V_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"right_block_l_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_a_hist\",axis=1)\n",
"# train_features = train_features.drop(\"right_block_b_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"whiteBlock_R_hist\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_G_hist\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_B_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"whiteBlock_H_hist\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_S_hist\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_V_hist\",axis=1)\n",
"\n",
"# train_features = train_features.drop(\"whiteBlock_l_hist\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_a_hist\",axis=1)\n",
"# train_features = train_features.drop(\"whiteBlock_b_hist\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a_stddev\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b_stddev\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_R_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_G_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_B_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_H_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_S_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"left_block_l_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_a_max\",axis=1)\n",
"train_features = train_features.drop(\"left_block_b_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_R_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_G_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_B_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_H_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_S_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"right_block_l_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_a_max\",axis=1)\n",
"train_features = train_features.drop(\"right_block_b_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_R_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_G_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_B_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_H_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_S_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_V_max\",axis=1)\n",
"\n",
"train_features = train_features.drop(\"whiteBlock_l_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_a_max\",axis=1)\n",
"train_features = train_features.drop(\"whiteBlock_b_max\",axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"train_features.describe()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.\n",
" from numpy.core.umath_tests import inner1d\n",
"d:\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
" \"This module will be removed in 0.20.\", DeprecationWarning)\n"
]
}
],
"source": [
"#from sklearn.model_selection import KFold\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import f1_score\n",
"from sklearn.metrics import precision_score\n",
"from sklearn.metrics import recall_score\n",
"\n",
"\n",
"from sklearn.ensemble import ExtraTreesClassifier\n",
"from sklearn.ensemble import AdaBoostClassifier\n",
"\n",
"from sklearn.cross_validation import train_test_split\n",
"X_train ,X_test,y_train,y_test = train_test_split(train_features_9,train_labels,test_size = 0.5, random_state = 0)\n",
"#X_train ,X_test,y_train,y_test = train_test_split(train_features_9,train_labels,test_size = 0.2, random_state = 20)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-----------classification_report----------------------\n",
"\n",
" precision recall f1-score support\n",
"\n",
" 0 0.00 0.00 0.00 7138\n",
" 1 0.00 0.00 0.00 0\n",
" 2 0.00 0.00 0.00 0\n",
" 5 0.73 0.99 0.84 4133\n",
" 6 0.99 0.78 0.87 6989\n",
"\n",
"avg / total 0.54 0.52 0.52 18260\n",
"\n",
"-----------cm----------------------\n",
"\n",
"[[ 0 7097 41 0 0]\n",
" [ 0 0 0 0 0]\n",
" [ 0 0 0 0 0]\n",
" [ 0 0 0 4072 61]\n",
" [ 0 0 0 1524 5465]]\n",
"---------------------------------\n",
"\n",
"Accuracy of prediction: 0.0\n",
"---------------------------------\n",
"\n",
"DecisionTree accuracy score: 0.522289156626506\n",
"f1 score: 0.522289156626506\n",
"precision_score: 0.522289156626506\n",
"recall_score: 0.522289156626506\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
" 'precision', 'predicted', average, warn_for)\n",
"d:\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1137: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples.\n",
" 'recall', 'true', average, warn_for)\n"
]
}
],
"source": [
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.metrics import classification_report,confusion_matrix\n",
"\n",
"dtree = DecisionTreeClassifier(criterion='gini',max_depth=None)\n",
"dtree.fit(X_train,y_train)\n",
"predictions = dtree.predict(X_test)\n",
"\n",
"print(\"-----------classification_report----------------------\\n\")\n",
"print(classification_report(y_test,predictions))\n",
"print(\"-----------cm----------------------\\n\")\n",
"cm=confusion_matrix(y_test,predictions)\n",
"print(cm)\n",
"print(\"---------------------------------\\n\")\n",
"print (\"Accuracy of prediction:\",round((cm[0,0]+cm[1,1])/cm.sum(),3))\n",
"print(\"---------------------------------\\n\")\n",
"print (\"DecisionTree accuracy score:\" , accuracy_score(y_test,predictions))\n",
"print (\"f1 score:\" , f1_score(y_test,predictions,average='micro'))\n",
"print (\"precision_score:\" , precision_score(y_test,predictions,average='micro'))\n",
"print (\"recall_score:\" , recall_score(y_test,predictions,average='micro'))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.00 0.00 0.00 7138\n",
" 1 0.00 0.00 0.00 0\n",
" 2 0.00 0.00 0.00 0\n",
" 5 0.73 0.99 0.84 4133\n",
" 6 0.99 0.78 0.87 6989\n",
"\n",
"avg / total 0.54 0.52 0.52 18260\n",
"\n",
"[[ 0 7138 0 0]\n",
" [ 0 0 0 0]\n",
" [ 0 0 4090 43]\n",
" [ 0 0 1967 5022]]\n",
"---------------------------------\n",
"\n",
"Accuracy of prediction: 0.0\n",
"RandomForest accuracy score: 0.49901423877327494\n",
"---------------------------------\n",
"\n",
"f1 score: 0.49901423877327494\n",
"precision_score: 0.49901423877327494\n",
"recall_score: 0.49901423877327494\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1135: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
" 'precision', 'predicted', average, warn_for)\n",
"d:\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1137: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples.\n",
" 'recall', 'true', average, warn_for)\n"
]
}
],
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import classification_report\n",
"\n",
"rfc = RandomForestClassifier(n_estimators=600)\n",
"rfc.fit(X_train, y_train)\n",
"rfc_pred = rfc.predict(X_test)\n",
"cr = classification_report(y_test,predictions)\n",
"print(cr)\n",
"cm = confusion_matrix(y_test,rfc_pred)\n",
"print(cm)\n",
"\n",
"print(\"---------------------------------\\n\")\n",
"print (\"Accuracy of prediction:\",round((cm[0,0]+cm[1,1])/cm.sum(),3))\n",
"print (\"RandomForest accuracy score:\" , accuracy_score(y_test,rfc_pred))\n",
"print(\"---------------------------------\\n\")\n",
"print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro'))\n",
"print (\"precision_score:\" , precision_score(y_test,rfc_pred,average='micro'))\n",
"print (\"recall_score:\" , recall_score(y_test,rfc_pred,average='micro'))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"KFold(n_splits=5, random_state=None, shuffle=False)\n",
"TRAIN: [18261 18262 18263 ... 91298 91299 91300] TEST: [ 0 1 2 ... 18258 18259 18260]\n",
"svm linear accuracy score: 0.8475987076282788\n",
"f1 score: 0.8475987076282787\n",
"precision_score: 0.8475987076282788\n",
"recall_score: 0.8475987076282788\n",
"runing time: 0:00:41.736420\n",
"\n",
"\n",
"\n",
"TRAIN: [ 0 1 2 ... 91298 91299 91300] TEST: [18261 18262 18263 ... 36518 36519 36520]\n",
"svm linear accuracy score: 0.6246987951807229\n",
"f1 score: 0.6246987951807229\n",
"precision_score: 0.6246987951807229\n",
"recall_score: 0.6246987951807229\n",
"runing time: 0:00:44.495668\n",
"\n",
"\n",
"\n",
"TRAIN: [ 0 1 2 ... 91298 91299 91300] TEST: [36521 36522 36523 ... 54778 54779 54780]\n",
"svm linear accuracy score: 0.6128148959474261\n",
"f1 score: 0.6128148959474261\n",
"precision_score: 0.6128148959474261\n",
"recall_score: 0.6128148959474261\n",
"runing time: 0:00:37.768042\n",
"\n",
"\n",
"\n",
"TRAIN: [ 0 1 2 ... 91298 91299 91300] TEST: [54781 54782 54783 ... 73038 73039 73040]\n",
"svm linear accuracy score: 0.6917853231106244\n",
"f1 score: 0.6917853231106244\n",
"precision_score: 0.6917853231106244\n",
"recall_score: 0.6917853231106244\n",
"runing time: 0:00:40.198486\n",
"\n",
"\n",
"\n",
"TRAIN: [ 0 1 2 ... 73038 73039 73040] TEST: [73041 73042 73043 ... 91298 91299 91300]\n",
"svm linear accuracy score: 0.512157721796276\n",
"f1 score: 0.512157721796276\n",
"precision_score: 0.512157721796276\n",
"recall_score: 0.512157721796276\n",
"runing time: 0:00:37.209128\n",
"\n",
"\n",
"\n"
]
}
],
"source": [
"from sklearn.model_selection import KFold\n",
"\n",
"X = train_features_9.values\n",
"y = train_labels.values\n",
"\n",
"kf = KFold(n_splits=5)\n",
"kf.get_n_splits(X)\n",
"\n",
"print(kf) \n",
"\n",
"for train_index, test_index in kf.split(X):\n",
" print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
" X_train, X_test = X[train_index], X[test_index]\n",
" y_train, y_test = y[train_index], y[test_index]\n",
" \n",
" \n",
" from datetime import datetime\n",
" trarining_start_time = datetime.now()\n",
"\n",
" rfc = RandomForestClassifier(n_estimators=600)\n",
" rfc.fit(X_train, y_train)\n",
" rfc_pred = rfc.predict(X_test) \n",
" print (\"svm linear accuracy score:\" , accuracy_score(y_test,rfc_pred))\n",
" print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro'))\n",
" print (\"precision_score:\" , precision_score(y_test,rfc_pred,average='micro'))\n",
" print (\"recall_score:\" , recall_score(y_test,rfc_pred,average='micro'))\n",
"\n",
" training_stop_time = datetime.now()\n",
"\n",
" print (\"runing time:\",(training_stop_time - trarining_start_time))\n",
" print(\"\\n\\n\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"nsimu = 21\n",
"accuracy=[0]*nsimu\n",
"ntree = [0]*nsimu\n",
"for i in range(1,nsimu):\n",
" rfc = RandomForestClassifier(n_estimators=i*5,min_samples_split=10,max_depth=None,criterion='gini')\n",
" rfc.fit(X_train, y_train)\n",
" rfc_pred = rfc.predict(X_test)\n",
" cm = confusion_matrix(y_test,rfc_pred)\n",
" accuracy[i] = (cm[0,0]+cm[1,1])/cm.sum()\n",
" ntree[i]=i*5\n",
"\n",
" print (\"RandomForest accuracy score:\" , accuracy_score(y_test,rfc_pred))\n",
" print (\"f1 score:\" , f1_score(y_test,rfc_pred,average='micro')) \n",
" print (\"Accuracy of prediction:\",round((cm[0,0]+cm[1,1])/cm.sum(),3))\n",
"\n",
" \n",
"plt.figure(figsize=(10,6))\n",
"plt.scatter(x=ntree[1:nsimu],y=accuracy[1:nsimu],s=60,c='red')\n",
"plt.title(\"Number of trees in the Random Forest vs. prediction accuracy (criterion: 'gini')\", fontsize=18)\n",
"plt.xlabel(\"Number of trees\", fontsize=15)\n",
"plt.ylabel(\"Prediction accuracy from confusion matrix\", fontsize=15)\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"KFold(n_splits=5, random_state=None, shuffle=False)\n",
"TRAIN: [18261 18262 18263 ... 91298 91299 91300] TEST: [ 0 1 2 ... 18258 18259 18260]\n",
"svm linear accuracy score: 0.9905810196593834\n",
"f1 score: 0.9905810196593834\n",
"precision_score: 0.9905810196593834\n",
"recall_score: 0.9905810196593834\n",
"runing time: 0:00:01.629664\n",
"\n",
"\n",
"\n",
"TRAIN: [ 0 1 2 ... 91298 91299 91300] TEST: [18261 18262 18263 ... 36518 36519 36520]\n",
"svm linear accuracy score: 0.6089813800657174\n",
"f1 score: 0.6089813800657174\n",
"precision_score: 0.6089813800657174\n",
"recall_score: 0.6089813800657174\n",
"runing time: 0:00:01.333435\n",
"\n",
"\n",
"\n",
"TRAIN: [ 0 1 2 ... 91298 91299 91300] TEST: [36521 36522 36523 ... 54778 54779 54780]\n",
"svm linear accuracy score: 0.7424424972617744\n",
"f1 score: 0.7424424972617744\n",
"precision_score: 0.7424424972617744\n",
"recall_score: 0.7424424972617744\n",
"runing time: 0:00:01.253643\n",
"\n",
"\n",
"\n",
"TRAIN: [ 0 1 2 ... 91298 91299 91300] TEST: [54781 54782 54783 ... 73038 73039 73040]\n",
"svm linear accuracy score: 0.7694414019715224\n",
"f1 score: 0.7694414019715224\n",
"precision_score: 0.7694414019715224\n",
"recall_score: 0.7694414019715224\n",
"runing time: 0:00:01.432155\n",
"\n",
"\n",
"\n",
"TRAIN: [ 0 1 2 ... 73038 73039 73040] TEST: [73041 73042 73043 ... 91298 91299 91300]\n",
"svm linear accuracy score: 0.5460021905805038\n",
"f1 score: 0.5460021905805038\n",
"precision_score: 0.5460021905805038\n",
"recall_score: 0.5460021905805038\n",
"runing time: 0:00:01.428182\n",
"\n",
"\n",
"\n"
]
}
],
"source": [
"from sklearn.model_selection import KFold\n",
"\n",
"X = train_features_9.values\n",
"y = train_labels.values\n",
"\n",
"kf = KFold(n_splits=5)\n",
"kf.get_n_splits(X)\n",
"\n",
"print(kf) \n",
"\n",
"for train_index, test_index in kf.split(X):\n",
" print(\"TRAIN:\", train_index, \"TEST:\", test_index)\n",
" X_train, X_test = X[train_index], X[test_index]\n",
" y_train, y_test = y[train_index], y[test_index]\n",
" \n",
" \n",
" from datetime import datetime\n",
" trarining_start_time = datetime.now()\n",
"\n",
" clf_svm_linear = SVC(kernel='linear', gamma=0.02, C=1)\n",
" clf_svm_linear = clf_svm_linear.fit(X_train, y_train)\n",
" #print(clf_svm_linear.predict(X_test))\n",
" pred = clf_svm_linear.predict(X_test)\n",
" print (\"svm linear accuracy score:\" , accuracy_score(y_test,pred))\n",
" print (\"f1 score:\" , f1_score(y_test,pred,average='micro'))\n",
" print (\"precision_score:\" , precision_score(y_test,pred,average='micro'))\n",
" print (\"recall_score:\" , recall_score(y_test,pred,average='micro'))\n",
"\n",
" training_stop_time = datetime.now()\n",
"\n",
" print (\"runing time:\",(training_stop_time - trarining_start_time))\n",
" print(\"\\n\\n\")\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"svm linear accuracy score: 0.9999342840244464\n",
"f1 score: 0.9999342840244464\n",
"precision_score: 0.9999342840244464\n",
"recall_score: 0.9999342840244464\n",
"runing clf_svm_linear time: 0:00:01.601394\n"
]
}
],
"source": [
"#X_train = train_features_9\n",
"#y_train = train_labels\n",
"\n",
"# X_test = test_features\n",
"# y_test = test_labels\n",
"\n",
"from datetime import datetime\n",
"trarining_start_time = datetime.now()\n",
" \n",
"#clf_svm_linear = SVC(kernel = 'linear',gamma=0.00001,C=0.01)\n",
"clf_svm_linear = SVC(kernel = 'linear',gamma=0.02,C=1)\n",
"clf_svm_linear = clf_svm_linear.fit(X_train, y_train)\n",
"pred = clf_svm_linear.predict(X_test)\n",
"#print (\"svm linear accuracy score:\" , accuracy_score(y_test,pred))\n",
"#print (\"f1 score:\" , f1_score(y_test,pred,average='micro'))\n",
"#print (\"precision_score:\" , precision_score(y_test,pred,average=None))\n",
"#print (\"recall_score :\" , recall_score(y_test,pred,average=None))\n",
"print (\"svm linear accuracy score:\" , accuracy_score(y_test,pred))\n",
"print (\"f1 score:\" , f1_score(y_test,pred,average='micro'))\n",
"print (\"precision_score:\" , precision_score(y_test,pred,average='micro'))\n",
"print (\"recall_score:\" , recall_score(y_test,pred,average='micro'))\n",
"\n",
"training_stop_time = datetime.now()\n",
"print (\"runing clf_svm_linear time:\",(training_stop_time - trarining_start_time))\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"from sklearn_porter import Porter\n",
"\n",
"porter_clf_svm_liner = Porter(clf_svm_linear, language='c').export()\n",
"#porter_clf_svm_poly = Porter(clf_svm_poly, language='c').export()\n",
"# porter_clf_forest = Porter(clf_randomForest, language='c').export()\n",
"#porter_clf_extra_forest = Porter(clf_extra_forest, language='c').export()\n",
"\n",
"#print(porter_clf_svm_linear)\n",
"f = open(\"clf/clf_svm_linear_50features_20181221.txt\",'wb')\n",
"#f = open(\"clf_svm_linear_125100_low_feature_data.txt\",'wb')\n",
"f.write(porter_clf_svm_liner.encode())\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from sklearn_porter import Porter\n",
"\n",
"porter_clf_svm_liner = Porter(clf_svm_linear, language='js').export()\n",
"#porter_clf_svm_poly = Porter(clf_svm_poly, language='c').export()\n",
"# porter_clf_forest = Porter(clf_randomForest, language='c').export()\n",
"#porter_clf_extra_forest = Porter(clf_extra_forest, language='c').export()\n",
"\n",
"#print(porter_clf_svm_linear)\n",
"f = open(\"clf/clf_svm_linear_41features_js_20181221.js\",'wb')\n",
"#f = open(\"clf_svm_linear_125100_low_feature_data.txt\",'wb')\n",
"f.write(porter_clf_svm_liner.encode())\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"pred = clf_svm_linear.predict(X_test)\n",
"print \"svm linear accuracy score:\" , accuracy_score(y_test,pred)\n",
"print \"f1 score :\" , f1_score(y_test,pred,average=None)\n",
"print \"precision_score:\" , precision_score(y_test,pred,average=None)\n",
"print \"recall_score :\" , recall_score(y_test,pred,average=None)\n",
"\n",
"print(\"preds:\",pred[:10])\n",
"print('trues:\\n',y_test[:10])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn_porter import Porter\n",
"\n",
"porter_clf_svm_linear = Porter(clf_svm_linear, language='c').export()\n",
"#porter_clf_svm_poly = Porter(clf_svm_poly, language='c').export()\n",
"# porter_clf_forest = Porter(clf_randomForest, language='c').export()\n",
"#porter_clf_extra_forest = Porter(clf_extra_forest, language='c').export()\n",
"\n",
"#print(porter_clf_svm_linear)\n",
"f = open(\"clf/clf_svm_linear_50features_20181207.txt\",'wb')\n",
"#f = open(\"clf_svm_linear_125100_low_feature_data.txt\",'wb')\n",
"f.write(porter_clf_svm_linear.encode())\n",
"f.close()\n",
"#f = open(\"clf_svm_poly_2457100_data.txt\",'wb')\n",
"#f.write(porter_clf_svm_poly)\n",
"#f.close()\n",
"# f = open(\"clf/clf_randomForest_27features_stddev_c_0_01.txt\",'wb')\n",
"# f.write(porter_clf_forest)\n",
"# f.close()\n",
"# f = open(\"oclf_extra_forest_2457100_data_0824.txt\",'wb')\n",
"# f.write(porter_clf_extra_forest)\n",
"# f.close()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"from sklearn.utils import shuffle\n",
"\n",
"\n",
"# data_shuffle1 = shuffle(data1)\n",
"# #data_shuffle = data_all;\n",
"# test_labels = data_shuffle1[\"index\"]\n",
"# test_features = data_shuffle1.drop(\"dateTime\",axis=1)\n",
"# test_features = test_features.drop(\"index\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBalance\",axis=1)\n",
"\n",
"\n",
"# test_features = test_features.drop(\"left_block_R_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"left_block_G_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"left_block_B_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"left_block_H_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"left_block_S_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"left_block_V_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"left_block_l_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"left_block_a_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"left_block_b_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"right_block_R_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"right_block_G_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"right_block_B_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"right_block_H_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"right_block_S_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"right_block_V_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"right_block_l_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"right_block_a_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"right_block_b_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"whiteBlock_R_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBlock_G_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBlock_B_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"whiteBlock_H_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBlock_S_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBlock_V_stddev\",axis=1)\n",
"\n",
"# test_features = test_features.drop(\"whiteBlock_l_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBlock_a_stddev\",axis=1)\n",
"# test_features = test_features.drop(\"whiteBlock_b_stddev\",axis=1)\n",
"\n",
"train_features_10 = pd.DataFrame()\n",
"train_features_10['lelf_right_R'] = test_features['left_block_R'] - test_features['right_block_R']\n",
"train_features_10['lelf_right_G'] = test_features['left_block_G'] - test_features['right_block_G']\n",
"train_features_10['lelf_right_B'] = test_features['left_block_B'] - test_features['right_block_B']\n",
"\n",
"train_features_10['lelf_right_H'] = test_features['left_block_H'] - test_features['right_block_H']\n",
"# train_features_10['lelf_right_S'] = test_features['left_block_S'] - test_features['right_block_S']\n",
"train_features_10['lelf_right_V'] = test_features['left_block_V'] - test_features['right_block_V']\n",
"\n",
"train_features_10['lelf_right_l'] = test_features['left_block_l'] - test_features['right_block_l']\n",
"train_features_10['lelf_right_a'] = test_features['left_block_a'] - test_features['right_block_a']\n",
"train_features_10['lelf_right_b'] = test_features['left_block_b'] - test_features['right_block_b']\n",
"\n",
"train_features_10['lelf_right_R_stddev'] = test_features['left_block_R_stddev'] - test_features['right_block_R_stddev']\n",
"train_features_10['lelf_right_G_stddev'] = test_features['left_block_G_stddev'] - test_features['right_block_G_stddev']\n",
"train_features_10['lelf_right_B_stddev'] = test_features['left_block_B_stddev'] - test_features['right_block_B_stddev']\n",
"\n",
"train_features_10['lelf_right_H_stddev'] = test_features['left_block_H_stddev'] - test_features['right_block_H_stddev']\n",
"# train_features_10['lelf_right_S_stddev'] = test_features['left_block_S_stddev'] - test_features['right_block_S_stddev']\n",
"train_features_10['lelf_right_V_stddev'] = test_features['left_block_V_stddev'] - test_features['right_block_V_stddev']\n",
"\n",
"train_features_10['lelf_right_l_stddev'] = test_features['left_block_l_stddev'] - test_features['right_block_l_stddev']\n",
"train_features_10['lelf_right_a_stddev'] = test_features['left_block_a_stddev'] - test_features['right_block_a_stddev']\n",
"train_features_10['lelf_right_b_stddev'] = test_features['left_block_b_stddev'] - test_features['right_block_b_stddev']\n",
"\n",
"train_features_10['lelf_right_R_hist'] = test_features['left_block_R_hist'] - test_features['right_block_R_hist']\n",
"train_features_10['lelf_right_G_hist'] = test_features['left_block_G_hist'] - test_features['right_block_G_hist']\n",
"train_features_10['lelf_right_B_hist'] = test_features['left_block_B_hist'] - test_features['right_block_B_hist']\n",
"\n",
"train_features_10['lelf_right_H_hist'] = test_features['left_block_H_hist'] - test_features['right_block_H_hist']\n",
"# train_features_10['lelf_right_S_hist'] = test_features['left_block_S_hist'] - test_features['right_block_S_hist']\n",
"train_features_10['lelf_right_V_hist'] = test_features['left_block_V_hist'] - test_features['right_block_V_hist']\n",
"\n",
"train_features_10['lelf_right_l_hist'] = test_features['left_block_l_hist'] - test_features['right_block_l_hist']\n",
"train_features_10['lelf_right_a_hist'] = test_features['left_block_a_hist'] - test_features['right_block_a_hist']\n",
"train_features_10['lelf_right_b_hist'] = test_features['left_block_b_hist'] - test_features['right_block_b_hist']\n",
"\n",
"train_features_10['lelf_right_R_max'] = test_features['left_block_R_max'] - test_features['right_block_R_max']\n",
"train_features_10['lelf_right_G_max'] = test_features['left_block_G_max'] - test_features['right_block_G_max']\n",
"train_features_10['lelf_right_B_max'] = test_features['left_block_B_max'] - test_features['right_block_B_max']\n",
"\n",
"train_features_10['lelf_right_H_max'] = test_features['left_block_H_max'] - test_features['right_block_H_max']\n",
"# train_features_10['lelf_right_S_max'] = test_features['left_block_S_max'] - test_features['right_block_S_max']\n",
"train_features_10['lelf_right_V_max'] = test_features['left_block_V_max'] - test_features['right_block_V_max']\n",
"\n",
"train_features_10['lelf_right_l_max'] = test_features['left_block_l_max'] - test_features['right_block_l_max']\n",
"train_features_10['lelf_right_a_max'] = test_features['left_block_a_max'] - test_features['right_block_a_max']\n",
"train_features_10['lelf_right_b_max'] = test_features['left_block_b_max'] - test_features['right_block_b_max']\n",
"\n",
"\n",
"train_features_10['lelf_right_R_min'] = test_features['left_block_R_min'] - test_features['right_block_R_min']\n",
"train_features_10['lelf_right_G_min'] = test_features['left_block_G_min'] - test_features['right_block_G_min']\n",
"train_features_10['lelf_right_B_min'] = test_features['left_block_B_min'] - test_features['right_block_B_min']\n",
"\n",
"train_features_10['lelf_right_H_min'] = test_features['left_block_H_min'] - test_features['right_block_H_min']\n",
"# train_features_10['lelf_right_S_min'] = test_features['left_block_S_min'] - test_features['right_block_S_min']\n",
"train_features_10['lelf_right_V_min'] = test_features['left_block_V_min'] - test_features['right_block_V_min']\n",
"\n",
"train_features_10['lelf_right_l_min'] = test_features['left_block_l_min'] - test_features['right_block_l_min']\n",
"train_features_10['lelf_right_a_min'] = test_features['left_block_a_min'] - test_features['right_block_a_min']\n",
"train_features_10['lelf_right_b_min'] = test_features['left_block_b_min'] - test_features['right_block_b_min']\n",
"\n",
"# train_features_10['left_grayValue']= test_features['left_grayValue'];\n",
"# train_features_10['left_grayStddevValue']= test_features['left_grayStddevValue'];\n",
"# train_features_10['left_grayHist']= test_features['left_grayHist'];\n",
"# train_features_10['left_grayMax']= test_features['left_grayMax'];\n",
"# train_features_10['left_grayMin']= test_features['left_grayMin'];\n",
"\n",
"# train_features_10['right_grayValue']= test_features['right_grayValue'];\n",
"# train_features_10['right_grayStddevValue']= test_features['right_grayStddevValue'];\n",
"# train_features_10['right_grayHist']= test_features['right_grayHist'];\n",
"# train_features_10['right_grayMax']= test_features['right_grayMax'];\n",
"# train_features_10['right_grayMin']= test_features['right_grayMin'];\n",
"\n",
"# train_features_10['lelf_R_stddev'] = test_features['left_block_R_stddev'] \n",
"# train_features_10['lelf_G_stddev'] = test_features['left_block_G_stddev'] \n",
"# train_features_10['lelf_B_stddev'] = test_features['left_block_B_stddev'] \n",
"\n",
"# train_features_10['left_block_R_min'] = test_features['left_block_R_min'] \n",
"# train_features_10['left_block_G_min'] = test_features['left_block_G_min'] \n",
"# train_features_10['left_block_B_min'] = test_features['left_block_B_min'] \n",
"\n",
"\n",
"\n",
"train_features_10['lelf_right_gray_value'] = test_features['left_grayValue'] - test_features['right_grayValue']\n",
"train_features_10['lelf_right_gray_stddev'] = test_features['left_grayStddevValue'] - test_features['right_grayStddevValue']\n",
"train_features_10['lelf_right_gray_hist'] = test_features['left_grayHist'] - test_features['right_grayHist']\n",
"train_features_10['lelf_right_gray_max'] = test_features['left_grayMax'] - test_features['right_grayMax']\n",
"train_features_10['lelf_right_gray_min'] = test_features['left_grayMin'] - test_features['right_grayMin']\n",
"\n",
"train_features_10.describe()\n",
"\n",
"\n",
"# feature = feature.drop(\"left_block_H_hist\",axis=1)\n",
"# feature = feature.drop(\"right_block_H_hist\",axis=1)\n",
"# feature = feature.drop(\"whiteBlock_H_hist\",axis=1)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" \n",
"test_features = test_features.drop(\"left_block_H\",axis=1)\n",
"test_features = test_features.drop(\"left_block_S\",axis=1)\n",
"test_features = test_features.drop(\"left_block_V\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"right_block_H\",axis=1)\n",
"test_features = test_features.drop(\"right_block_S\",axis=1)\n",
"test_features = test_features.drop(\"right_block_V\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"whiteBlock_H\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_S\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_V\",axis=1)\n",
"\n",
"\n",
"test_features = test_features.drop(\"left_block_H_stddev\",axis=1)\n",
"test_features = test_features.drop(\"left_block_S_stddev\",axis=1)\n",
"test_features = test_features.drop(\"left_block_V_stddev\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"right_block_H_stddev\",axis=1)\n",
"test_features = test_features.drop(\"right_block_S_stddev\",axis=1)\n",
"test_features = test_features.drop(\"right_block_V_stddev\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"whiteBlock_H_stddev\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_S_stddev\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_V_stddev\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"left_block_H_hist\",axis=1)\n",
"test_features = test_features.drop(\"left_block_S_hist\",axis=1)\n",
"test_features = test_features.drop(\"left_block_V_hist\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"right_block_H_hist\",axis=1)\n",
"test_features = test_features.drop(\"right_block_S_hist\",axis=1)\n",
"test_features = test_features.drop(\"right_block_V_hist\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"whiteBlock_H_hist\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_S_hist\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_V_hist\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"left_block_H_max\",axis=1)\n",
"test_features = test_features.drop(\"left_block_S_max\",axis=1)\n",
"test_features = test_features.drop(\"left_block_V_max\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"right_block_H_max\",axis=1)\n",
"test_features = test_features.drop(\"right_block_S_max\",axis=1)\n",
"test_features = test_features.drop(\"right_block_V_max\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"whiteBlock_H_max\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_S_max\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_V_max\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"left_block_H_min\",axis=1)\n",
"test_features = test_features.drop(\"left_block_S_min\",axis=1)\n",
"test_features = test_features.drop(\"left_block_V_min\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"right_block_H_min\",axis=1)\n",
"test_features = test_features.drop(\"right_block_S_min\",axis=1)\n",
"test_features = test_features.drop(\"right_block_V_min\",axis=1)\n",
"\n",
"test_features = test_features.drop(\"whiteBlock_H_min\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_S_min\",axis=1)\n",
"test_features = test_features.drop(\"whiteBlock_V_min\",axis=1)\n",
" \n",
" \n",
"test_features['lelf_right_R'] = test_features['left_block_R'] - test_features['right_block_R']\n",
"test_features['lelf_right_G'] = test_features['left_block_G'] - test_features['right_block_G']\n",
"test_features['lelf_right_B'] = test_features['left_block_B'] - test_features['right_block_B']\n",
"\n",
"# test_features['lelf_right_H'] = test_features['left_block_H'] - test_features['right_block_H']\n",
"# test_features['lelf_right_S'] = test_features['left_block_S'] - test_features['right_block_S']\n",
"# test_features['lelf_right_V'] = test_features['left_block_V'] - test_features['right_block_V']\n",
"\n",
"# test_features['lelf_right_l'] = test_features['left_block_l'] - test_features['right_block_l']\n",
"# test_features['lelf_right_a'] = test_features['left_block_a'] - test_features['right_block_a']\n",
"# test_features['lelf_right_b'] = test_features['left_block_b'] - test_features['right_block_b']\n",
"\n",
"# test_features['lelf_right_R_stddev'] = test_features['left_block_R_stddev'] - test_features['right_block_R_stddev']\n",
"# test_features['lelf_right_G_stddev'] = test_features['left_block_G_stddev'] - test_features['right_block_G_stddev']\n",
"# test_features['lelf_right_B_stddev'] = test_features['left_block_B_stddev'] - test_features['right_block_B_stddev']\n",
"\n",
"# test_features['lelf_right_H_stddev'] = test_features['left_block_H_stddev'] - test_features['right_block_H_stddev']\n",
"# test_features['lelf_right_S_stddev'] = test_features['left_block_S_stddev'] - test_features['right_block_S_stddev']\n",
"# test_features['lelf_right_V_stddev'] = test_features['left_block_V_stddev'] - test_features['right_block_V_stddev']\n",
"\n",
"# test_features['lelf_right_l_stddev'] = test_features['left_block_l_stddev'] - test_features['right_block_l_stddev']\n",
"# test_features['lelf_right_a_stddev'] = test_features['left_block_a_stddev'] - test_features['right_block_a_stddev']\n",
"# test_features['lelf_right_b_stddev'] = test_features['left_block_b_stddev'] - test_features['right_block_b_stddev']\n",
"\n",
"# test_features['lelf_right_R_hist'] = test_features['left_block_R_hist'] - test_features['right_block_R_hist']\n",
"# test_features['lelf_right_G_hist'] = test_features['left_block_G_hist'] - test_features['right_block_G_hist']\n",
"# test_features['lelf_right_B_hist'] = test_features['left_block_B_hist'] - test_features['right_block_B_hist']\n",
"\n",
"# test_features['lelf_right_H_hist'] = test_features['left_block_H_hist'] - test_features['right_block_H_hist']\n",
"# test_features['lelf_right_S_hist'] = test_features['left_block_S_hist'] - test_features['right_block_S_hist']\n",
"# test_features['lelf_right_V_hist'] = test_features['left_block_V_hist'] - test_features['right_block_V_hist']\n",
"\n",
"# test_features['lelf_right_l_hist'] = test_features['left_block_l_hist'] - test_features['right_block_l_hist']\n",
"# test_features['lelf_right_a_hist'] = test_features['left_block_a_hist'] - test_features['right_block_a_hist']\n",
"# test_features['lelf_right_b_hist'] = test_features['left_block_b_hist'] - test_features['right_block_b_hist']\n",
"\n",
"# test_features['lelf_right_R_max'] = test_features['left_block_R_max'] - test_features['right_block_R_max']\n",
"# test_features['lelf_right_G_max'] = test_features['left_block_G_max'] - test_features['right_block_G_max']\n",
"# test_features['lelf_right_B_max'] = test_features['left_block_B_max'] - test_features['right_block_B_max']\n",
"\n",
"# test_features['lelf_right_H_max'] = test_features['left_block_H_max'] - test_features['right_block_H_max']\n",
"# test_features['lelf_right_S_max'] = test_features['left_block_S_max'] - test_features['right_block_S_max']\n",
"# test_features['lelf_right_V_max'] = test_features['left_block_V_max'] - test_features['right_block_V_max']\n",
"\n",
"# test_features['lelf_right_l_max'] = test_features['left_block_l_max'] - test_features['right_block_l_max']\n",
"# test_features['lelf_right_a_max'] = test_features['left_block_a_max'] - test_features['right_block_a_max']\n",
"# test_features['lelf_right_b_max'] = test_features['left_block_b_max'] - test_features['right_block_b_max']\n",
"\n",
"\n",
"\n",
"# test_features['lelf_right_R_min'] = test_features['left_block_R_min'] - test_features['right_block_R_min']\n",
"# test_features['lelf_right_G_min'] = test_features['left_block_G_min'] - test_features['right_block_G_min']\n",
"# test_features['lelf_right_B_min'] = test_features['left_block_B_min'] - test_features['right_block_B_min']\n",
"\n",
"# test_features['lelf_right_H_min'] = test_features['left_block_H_min'] - test_features['right_block_H_min']\n",
"# test_features['lelf_right_S_min'] = test_features['left_block_S_min'] - test_features['right_block_S_min']\n",
"# test_features['lelf_right_V_min'] = test_features['left_block_V_min'] - test_features['right_block_V_min']\n",
"\n",
"# test_features['lelf_right_l_min'] = test_features['left_block_l_min'] - test_features['right_block_l_min']\n",
"# test_features['lelf_right_a_min'] = test_features['left_block_a_min'] - test_features['right_block_a_min']\n",
"# test_features['lelf_right_b_min'] = test_features['left_block_b_min'] - test_features['right_block_b_min']\n",
"\n",
"test_features['lelf_right_gray_value'] = test_features['left_grayValue'] - test_features['right_grayValue']\n",
"test_features['lelf_right_gray_stddev'] = test_features['left_grayStddevValue'] - test_features['right_grayStddevValue']\n",
"test_features['lelf_right_gray_hist'] = test_features['left_grayHist'] - test_features['right_grayHist']\n",
"test_features['lelf_right_gray_max'] = test_features['left_grayMax'] - test_features['right_grayMax']\n",
"test_features['lelf_right_gray_min'] = test_features['left_grayMin'] - test_features['right_grayMin']\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pred = clf_svm_linear.predict(train_features_10)\n",
"test_features_gray_stddev = test_features['left_grayStddevValue']\n",
"test_features_np = np.ndarray(test_features_gray_stddev.shape,dtype = np.float32)\n",
"\n",
"test_features_np = test_features_gray_stddev.values\n",
"print \"svm linear accuracy score:\" , accuracy_score(test_labels,pred)\n",
"print \"f1 score:\" , f1_score(test_labels,pred,average='micro')\n",
"print \"recall_score :\" , recall_score(test_labels,pred,average='micro')\n",
"print \"precision_score :\" , precision_score(test_labels,pred,average='micro')\n",
"\n",
"for i in range(0, len(test_features_np)):\n",
" if test_features_np[i] < 3:\n",
" pred[i] =0\n",
"print \"svm linear accuracy score:\" , accuracy_score(test_labels,pred)\n",
"print \"f1 score:\" , f1_score(test_labels,pred,average='micro')\n",
"print \"recall_score :\" , recall_score(test_labels,pred,average='micro')\n",
"print \"precision_score :\" , precision_score(test_labels,pred,average='micro')\n",
"\n",
"\n",
"print(\"preds:\",pred[120:130])\n",
"print('trues:\\n',test_labels[120:130])\n",
"test_labels_np = np.ndarray(test_labels.shape,dtype= np.int32)\n",
"test_labels_np = test_labels.values\n",
"print(test_labels_np[0])\n",
"all_counter = 0\n",
"counter = 0\n",
"for i in range(0 ,len(pred) ):\n",
" if (pred[i] == 4 or (pred[i] == 4 and test_labels_np[i] ==4 )or test_labels_np[i] ==4 ) :\n",
" all_counter = all_counter + 1\n",
" if pred[i] != test_labels_np[i] :\n",
" counter = counter+1\n",
" print(pred[i] , test_labels_np[i])\n",
"print(len(pred),all_counter, counter) \n",
"all_counter = 0\n",
"counter = 0\n",
"for i in range(0 ,len(pred) ):\n",
" if pred[i] != test_labels_np[i] :\n",
" counter = counter+1\n",
" print(pred[i] , test_labels_np[i])\n",
"print(len(pred),all_counter, counter) \n",
"\n",
"# print \"svm linear accuracy score:\" , accuracy_score(test_labels,pred)\n",
"# print \"f1 score:\" , f1_score(test_labels,pred,average='micro')\n",
"# print \"recall_score :\" , recall_score(test_labels,pred,average='micro')\n",
"# print \"precision_score :\" , precision_score(test_labels,pred,average='micro')\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## from sklearn.metrics import recall_score\n",
"from sklearn.metrics import precision_score\n",
"print \"accuracy score:\" , accuracy_score(y_test,pred)\n",
"print \"recall_score :\" , recall_score(y_test,pred,average='macro')\n",
"print \"precision_score :\" , precision_score(y_test,pred,average='macro')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn_porter import Porter\n",
"\n",
"porter_java = Porter(clf_svm, language='java').export()\n",
"porter_c = Porter(clf_svm, language='c').export()\n",
"\n",
"f = open(\"Protein_c.txt\",'wb')\n",
"f.write(porter_c)\n",
"f.close()\n",
"\n",
"f = open(\"Protein_svm_java.txt\",'wb')\n",
"f.write(porter_java)\n",
"f.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}