550 lines
16 KiB
C++
550 lines
16 KiB
C++
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
|
|
|
|
#include <dlib/svm.h>
|
|
#include <dlib/rand.h>
|
|
#include <dlib/statistics.h>
|
|
|
|
#include "tester.h"
|
|
|
|
|
|
namespace
|
|
{
|
|
|
|
using namespace test;
|
|
using namespace dlib;
|
|
using namespace std;
|
|
|
|
logger dlog("test.svm_c_linear_dcd");
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
void test_sparse()
|
|
{
|
|
typedef std::map<unsigned long,double> sample_type;
|
|
|
|
|
|
typedef sparse_linear_kernel<sample_type> kernel_type;
|
|
|
|
|
|
|
|
svm_c_linear_trainer<kernel_type> linear_trainer_cpa;
|
|
svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
|
|
|
|
svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
|
|
|
|
const double C = 0.2;
|
|
linear_trainer.set_epsilon(1e-10);
|
|
linear_trainer_cpa.set_epsilon(1e-10);
|
|
linear_trainer_cpa.set_relative_epsilon(1e-10);
|
|
|
|
|
|
std::vector<sample_type> samples;
|
|
std::vector<double> labels;
|
|
|
|
// make an instance of a sample vector so we can use it below
|
|
sample_type sample;
|
|
|
|
decision_function<kernel_type> df, df2, df3;
|
|
|
|
dlib::rand rnd;
|
|
// Now lets go into a loop and randomly generate 10000 samples.
|
|
double label = +1;
|
|
for (int i = 0; i < 100; ++i)
|
|
{
|
|
// flip this flag
|
|
label *= -1;
|
|
|
|
sample.clear();
|
|
|
|
// now make a random sparse sample with at most 10 non-zero elements
|
|
for (int j = 0; j < 5; ++j)
|
|
{
|
|
int idx = rnd.get_random_32bit_number()%10;
|
|
double value = rnd.get_random_double();
|
|
|
|
sample[idx] = label*value;
|
|
}
|
|
|
|
// Also save the samples we are generating so we can let the svm_c_linear_trainer
|
|
// learn from them below.
|
|
samples.push_back(sample);
|
|
labels.push_back(label);
|
|
|
|
if (samples.size() > 1)
|
|
{
|
|
linear_trainer_cpa.set_c_class1(C);
|
|
linear_trainer_cpa.set_c_class2(1.5*C);
|
|
linear_trainer.set_c_class1(C/samples.size());
|
|
linear_trainer.set_c_class2(1.5*C/samples.size());
|
|
|
|
df = linear_trainer.train(samples, labels, state);
|
|
df2 = linear_trainer_cpa.train(samples, labels);
|
|
df3 = linear_trainer.train(samples, labels);
|
|
|
|
DLIB_TEST_MSG( dlib::distance(df.basis_vectors(0), df2.basis_vectors(0)) < 1e-8, dlib::distance(df.basis_vectors(0), df2.basis_vectors(0)));
|
|
DLIB_TEST( std::abs(df.b - df2.b) < 1e-8);
|
|
DLIB_TEST( dlib::distance(df.basis_vectors(0), df3.basis_vectors(0)) < 1e-8);
|
|
DLIB_TEST( std::abs(df.b - df3.b) < 1e-8);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
void test_normal_no_bias()
|
|
{
|
|
typedef matrix<double,10,1> sample_type;
|
|
|
|
|
|
typedef linear_kernel<sample_type> kernel_type;
|
|
|
|
|
|
|
|
svm_c_linear_trainer<kernel_type> linear_trainer_cpa;
|
|
svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
|
|
|
|
svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
|
|
|
|
const double C = 1.0;
|
|
linear_trainer.set_epsilon(1e-10);
|
|
linear_trainer_cpa.set_epsilon(1e-10);
|
|
linear_trainer_cpa.set_relative_epsilon(1e-10);
|
|
|
|
linear_trainer.include_bias(false);
|
|
|
|
|
|
std::vector<sample_type> samples, samples_explict_bias;
|
|
std::vector<double> labels;
|
|
|
|
// make an instance of a sample vector so we can use it below
|
|
sample_type sample;
|
|
|
|
decision_function<kernel_type> df, df2, df3;
|
|
|
|
dlib::rand rnd;
|
|
// Now lets go into a loop and randomly generate 10000 samples.
|
|
double label = +1;
|
|
for (int i = 0; i < 100; ++i)
|
|
{
|
|
// flip this flag
|
|
label *= -1;
|
|
|
|
sample = 0;
|
|
|
|
// now make a random sparse sample with at most 10 non-zero elements
|
|
for (int j = 0; j < 5; ++j)
|
|
{
|
|
int idx = rnd.get_random_32bit_number()%9;
|
|
double value = rnd.get_random_double();
|
|
|
|
sample(idx) = label*value;
|
|
}
|
|
|
|
// Also save the samples we are generating so we can let the svm_c_linear_trainer
|
|
// learn from them below.
|
|
samples.push_back(sample);
|
|
labels.push_back(label);
|
|
|
|
sample(9) = -1;
|
|
samples_explict_bias.push_back(sample);
|
|
|
|
if (samples.size() > 1)
|
|
{
|
|
linear_trainer_cpa.set_c_class1(C);
|
|
linear_trainer_cpa.set_c_class2(1.5*C);
|
|
linear_trainer.set_c_class1(C/samples.size());
|
|
linear_trainer.set_c_class2(1.5*C/samples.size());
|
|
|
|
df = linear_trainer.train(samples_explict_bias, labels, state);
|
|
df2 = linear_trainer_cpa.train(samples, labels);
|
|
df3 = linear_trainer.train(samples_explict_bias, labels);
|
|
|
|
DLIB_TEST( std::abs(df2.basis_vectors(0)(9)) < 1e-7);
|
|
DLIB_TEST_MSG( max(abs(colm(df.basis_vectors(0),0,9) - colm(df2.basis_vectors(0),0,9))) < 1e-6, max(abs(colm(df.basis_vectors(0),0,9) - colm(df2.basis_vectors(0),0,9))));
|
|
DLIB_TEST( std::abs(df.basis_vectors(0)(9) - df2.b) < 1e-6);
|
|
DLIB_TEST( max(abs(df.basis_vectors(0) - df3.basis_vectors(0))) < 1e-6);
|
|
DLIB_TEST( std::abs(df.b - df3.b) < 1e-7);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
void test_normal()
|
|
{
|
|
typedef matrix<double,10,1> sample_type;
|
|
|
|
|
|
typedef linear_kernel<sample_type> kernel_type;
|
|
|
|
|
|
|
|
svm_c_linear_trainer<kernel_type> linear_trainer_cpa;
|
|
svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
|
|
|
|
svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
|
|
|
|
const double C = 1;
|
|
linear_trainer.set_epsilon(1e-10);
|
|
linear_trainer_cpa.set_epsilon(1e-10);
|
|
linear_trainer_cpa.set_relative_epsilon(1e-10);
|
|
|
|
std::vector<sample_type> samples;
|
|
std::vector<double> labels;
|
|
|
|
// make an instance of a sample vector so we can use it below
|
|
sample_type sample;
|
|
|
|
decision_function<kernel_type> df, df2, df3;
|
|
|
|
dlib::rand rnd;
|
|
// Now lets go into a loop and randomly generate 10000 samples.
|
|
double label = +1;
|
|
for (int i = 0; i < 100; ++i)
|
|
{
|
|
// flip this flag
|
|
label *= -1;
|
|
|
|
sample = 0;
|
|
|
|
// now make a random sparse sample with at most 10 non-zero elements
|
|
for (int j = 0; j < 5; ++j)
|
|
{
|
|
int idx = rnd.get_random_32bit_number()%10;
|
|
double value = rnd.get_random_double();
|
|
|
|
sample(idx) = label*value;
|
|
}
|
|
|
|
// Also save the samples we are generating so we can let the svm_c_linear_trainer
|
|
// learn from them below.
|
|
samples.push_back(sample);
|
|
labels.push_back(label);
|
|
|
|
if (samples.size() > 1)
|
|
{
|
|
linear_trainer_cpa.set_c_class1(C);
|
|
linear_trainer_cpa.set_c_class2(1.5*C);
|
|
linear_trainer.set_c_class1(C/samples.size());
|
|
linear_trainer.set_c_class2(1.5*C/samples.size());
|
|
|
|
df = linear_trainer.train(samples, labels, state);
|
|
df2 = linear_trainer_cpa.train(samples, labels);
|
|
df3 = linear_trainer.train(samples, labels);
|
|
|
|
DLIB_TEST_MSG( max(abs(df.basis_vectors(0) - df2.basis_vectors(0))) < 1e-7, max(abs(df.basis_vectors(0) - df2.basis_vectors(0))));
|
|
DLIB_TEST( std::abs(df.b - df2.b) < 1e-7);
|
|
DLIB_TEST( max(abs(df.basis_vectors(0) - df3.basis_vectors(0))) < 1e-7);
|
|
DLIB_TEST( std::abs(df.b - df3.b) < 1e-7);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
void test_normal_force_last_weight(bool have_bias, bool force_weight)
|
|
{
|
|
typedef matrix<double,10,1> sample_type;
|
|
dlog << LINFO << "have_bias: "<< have_bias << " force_weight: "<< force_weight;
|
|
|
|
|
|
typedef linear_kernel<sample_type> kernel_type;
|
|
|
|
|
|
svm_c_linear_trainer<kernel_type> linear_trainer_cpa;
|
|
|
|
svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
|
|
|
|
svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
|
|
|
|
const double C = 1;
|
|
linear_trainer.set_epsilon(1e-10);
|
|
linear_trainer_cpa.set_epsilon(1e-11);
|
|
linear_trainer_cpa.set_relative_epsilon(1e-11);
|
|
|
|
linear_trainer_cpa.force_last_weight_to_1(force_weight);
|
|
|
|
linear_trainer.force_last_weight_to_1(force_weight);
|
|
linear_trainer.include_bias(have_bias);
|
|
|
|
std::vector<sample_type> samples;
|
|
std::vector<double> labels;
|
|
|
|
// make an instance of a sample vector so we can use it below
|
|
sample_type sample;
|
|
|
|
decision_function<kernel_type> df, df2;
|
|
|
|
running_stats<double> rs;
|
|
|
|
dlib::rand rnd;
|
|
// Now lets go into a loop and randomly generate 10000 samples.
|
|
double label = +1;
|
|
for (int i = 0; i < 40; ++i)
|
|
{
|
|
// flip this flag
|
|
label *= -1;
|
|
|
|
sample = 0;
|
|
|
|
// now make a random sparse sample with at most 10 non-zero elements
|
|
for (int j = 0; j < 5; ++j)
|
|
{
|
|
int idx = rnd.get_random_32bit_number()%9;
|
|
double value = rnd.get_random_double();
|
|
|
|
sample(idx) = label*value + label;
|
|
}
|
|
|
|
sample(9) = 4;
|
|
|
|
// Also save the samples we are generating so we can let the svm_c_linear_trainer
|
|
// learn from them below.
|
|
samples.push_back(sample);
|
|
labels.push_back(label);
|
|
|
|
linear_trainer.set_c(C);
|
|
linear_trainer_cpa.set_c(C*samples.size());
|
|
|
|
df = linear_trainer.train(samples, labels, state);
|
|
|
|
if (force_weight)
|
|
{
|
|
DLIB_TEST(std::abs(df.basis_vectors(0)(9) - 1) < 1e-8);
|
|
DLIB_TEST(std::abs(df.b) < 1e-8);
|
|
|
|
if (samples.size() > 1)
|
|
{
|
|
df2 = linear_trainer_cpa.train(samples, labels);
|
|
DLIB_TEST_MSG( max(abs(df.basis_vectors(0) - df2.basis_vectors(0))) < 1e-7, max(abs(df.basis_vectors(0) - df2.basis_vectors(0))));
|
|
DLIB_TEST( std::abs(df.b - df2.b) < 1e-7);
|
|
}
|
|
}
|
|
|
|
if (!have_bias)
|
|
DLIB_TEST(std::abs(df.b) < 1e-8);
|
|
|
|
|
|
for (unsigned long k = 0; k < samples.size(); ++k)
|
|
{
|
|
//cout << "pred: "<< labels[k]*df(samples[k]) << endl;
|
|
rs.add(labels[k]*df(samples[k]));
|
|
}
|
|
}
|
|
DLIB_TEST_MSG(std::abs(rs.min()-1) < 1e-7, std::abs(rs.min()-1));
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
void test_normal_1_sample(double label)
|
|
{
|
|
typedef matrix<double,10,1> sample_type;
|
|
|
|
|
|
typedef linear_kernel<sample_type> kernel_type;
|
|
|
|
|
|
|
|
svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
|
|
|
|
svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
|
|
|
|
const double C = 10;
|
|
linear_trainer.set_epsilon(1e-10);
|
|
linear_trainer.set_c(C);
|
|
|
|
|
|
linear_trainer.force_last_weight_to_1(true);
|
|
linear_trainer.include_bias(false);
|
|
|
|
std::vector<sample_type> samples;
|
|
std::vector<double> labels;
|
|
|
|
// make an instance of a sample vector so we can use it below
|
|
sample_type sample;
|
|
|
|
sample = 0;
|
|
sample(0) = -1;
|
|
sample(1) = -1;
|
|
sample(9) = 4;
|
|
|
|
samples.push_back(sample);
|
|
labels.push_back(label);
|
|
|
|
for (int i = 0; i < 4; ++i)
|
|
{
|
|
decision_function<kernel_type> df;
|
|
df = linear_trainer.train(samples, labels);
|
|
|
|
if (label > 0)
|
|
{
|
|
DLIB_TEST(std::abs(df(samples[0])-4) < 1e-8);
|
|
}
|
|
else
|
|
{
|
|
DLIB_TEST(std::abs(df(samples[0])+1) < 1e-8);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
void test_sparse_1_sample(double label)
|
|
{
|
|
typedef std::vector<std::pair<unsigned long,double> > sample_type;
|
|
|
|
|
|
typedef sparse_linear_kernel<sample_type> kernel_type;
|
|
|
|
|
|
|
|
svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
|
|
|
|
svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
|
|
|
|
const double C = 10;
|
|
linear_trainer.set_epsilon(1e-10);
|
|
linear_trainer.set_c(C);
|
|
|
|
|
|
linear_trainer.force_last_weight_to_1(true);
|
|
linear_trainer.include_bias(false);
|
|
|
|
std::vector<sample_type> samples;
|
|
std::vector<double> labels;
|
|
|
|
// make an instance of a sample vector so we can use it below
|
|
sample_type sample;
|
|
|
|
sample.push_back(make_pair(0,-1));
|
|
sample.push_back(make_pair(1,1));
|
|
sample.push_back(make_pair(9,4));
|
|
|
|
for (int i = 0; i < 4; ++i)
|
|
{
|
|
samples.push_back(sample);
|
|
labels.push_back(label);
|
|
|
|
decision_function<kernel_type> df;
|
|
df = linear_trainer.train(samples, labels);
|
|
|
|
|
|
if (label > 0)
|
|
{
|
|
DLIB_TEST(std::abs(df(samples[0])-4) < 1e-8);
|
|
}
|
|
else
|
|
{
|
|
DLIB_TEST(std::abs(df(samples[0])+1) < 1e-8);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
void test_l2_version ()
|
|
{
|
|
typedef std::map<unsigned long,double> sample_type;
|
|
typedef sparse_linear_kernel<sample_type> kernel_type;
|
|
|
|
svm_c_linear_dcd_trainer<kernel_type> linear_trainer;
|
|
linear_trainer.set_c(10);
|
|
linear_trainer.set_epsilon(1e-5);
|
|
|
|
std::vector<sample_type> samples;
|
|
std::vector<double> labels;
|
|
|
|
// make an instance of a sample vector so we can use it below
|
|
sample_type sample;
|
|
|
|
|
|
// Now let's go into a loop and randomly generate 10000 samples.
|
|
double label = +1;
|
|
for (int i = 0; i < 1000; ++i)
|
|
{
|
|
// flip this flag
|
|
label *= -1;
|
|
|
|
sample.clear();
|
|
|
|
// now make a random sparse sample with at most 10 non-zero elements
|
|
for (int j = 0; j < 10; ++j)
|
|
{
|
|
int idx = std::rand()%100;
|
|
double value = static_cast<double>(std::rand())/RAND_MAX;
|
|
|
|
sample[idx] = label*value;
|
|
}
|
|
|
|
// Also save the samples we are generating so we can let the svm_c_linear_trainer
|
|
// learn from them below.
|
|
samples.push_back(sample);
|
|
labels.push_back(label);
|
|
}
|
|
|
|
decision_function<kernel_type> df = linear_trainer.train(samples, labels);
|
|
|
|
sample.clear();
|
|
sample[4] = 0.3;
|
|
sample[10] = 0.9;
|
|
DLIB_TEST(df(sample) > 0);
|
|
|
|
sample.clear();
|
|
sample[83] = -0.3;
|
|
sample[26] = -0.9;
|
|
sample[58] = -0.7;
|
|
DLIB_TEST(df(sample) < 0);
|
|
|
|
sample.clear();
|
|
sample[0] = -0.2;
|
|
sample[9] = -0.8;
|
|
DLIB_TEST(df(sample) < 0);
|
|
}
|
|
|
|
class tester_svm_c_linear_dcd : public tester
|
|
{
|
|
public:
|
|
tester_svm_c_linear_dcd (
|
|
) :
|
|
tester ("test_svm_c_linear_dcd",
|
|
"Runs tests on the svm_c_linear_dcd_trainer.")
|
|
{}
|
|
|
|
void perform_test (
|
|
)
|
|
{
|
|
test_normal();
|
|
print_spinner();
|
|
test_normal_no_bias();
|
|
print_spinner();
|
|
test_sparse();
|
|
print_spinner();
|
|
test_normal_force_last_weight(false,false);
|
|
print_spinner();
|
|
test_normal_force_last_weight(false,true);
|
|
print_spinner();
|
|
test_normal_force_last_weight(true,false);
|
|
print_spinner();
|
|
test_normal_force_last_weight(true,true);
|
|
print_spinner();
|
|
test_normal_1_sample(+1);
|
|
print_spinner();
|
|
test_normal_1_sample(-1);
|
|
print_spinner();
|
|
test_sparse_1_sample(+1);
|
|
print_spinner();
|
|
test_sparse_1_sample(-1);
|
|
print_spinner();
|
|
|
|
test_l2_version();
|
|
}
|
|
} a;
|
|
|
|
}
|
|
|
|
|
|
|
|
|