713 lines
24 KiB
C++
713 lines
24 KiB
C++
// Copyright (C) 2012 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_SVm_C_LINEAR_DCD_TRAINER_Hh_
|
|
#define DLIB_SVm_C_LINEAR_DCD_TRAINER_Hh_
|
|
|
|
#include "svm_c_linear_dcd_trainer_abstract.h"
|
|
#include <cmath>
|
|
#include <limits>
|
|
#include "../matrix.h"
|
|
#include "../algs.h"
|
|
#include "../rand.h"
|
|
#include "svm.h"
|
|
|
|
#include "function.h"
|
|
#include "kernel.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename K
|
|
>
|
|
class svm_c_linear_dcd_trainer
|
|
{
|
|
public:
|
|
typedef K kernel_type;
|
|
typedef typename kernel_type::scalar_type scalar_type;
|
|
typedef typename kernel_type::sample_type sample_type;
|
|
typedef typename kernel_type::mem_manager_type mem_manager_type;
|
|
typedef decision_function<kernel_type> trained_function_type;
|
|
typedef typename decision_function<K>::sample_vector_type sample_vector_type;
|
|
typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
|
|
|
|
// You are getting a compiler error on this line because you supplied a non-linear
|
|
// kernel to the svm_c_linear_dcd_trainer object. You have to use one of the
|
|
// linear kernels with this trainer.
|
|
COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value ||
|
|
is_same_type<K, sparse_linear_kernel<sample_type> >::value ));
|
|
|
|
svm_c_linear_dcd_trainer (
|
|
) :
|
|
Cpos(1),
|
|
Cneg(1),
|
|
eps(0.1),
|
|
max_iterations(10000),
|
|
verbose(false),
|
|
have_bias(true),
|
|
last_weight_1(false),
|
|
do_shrinking(true),
|
|
do_svm_l2(false)
|
|
{
|
|
}
|
|
|
|
explicit svm_c_linear_dcd_trainer (
|
|
const scalar_type& C_
|
|
) :
|
|
Cpos(C_),
|
|
Cneg(C_),
|
|
eps(0.1),
|
|
max_iterations(10000),
|
|
verbose(false),
|
|
have_bias(true),
|
|
last_weight_1(false),
|
|
do_shrinking(true),
|
|
do_svm_l2(false)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(0 < C_,
|
|
"\tsvm_c_trainer::svm_c_linear_dcd_trainer(kernel,C)"
|
|
<< "\n\t invalid inputs were given to this function"
|
|
<< "\n\t C_: " << C_
|
|
);
|
|
}
|
|
|
|
bool includes_bias (
|
|
) const
|
|
{
|
|
return have_bias;
|
|
}
|
|
|
|
void include_bias (
|
|
bool should_have_bias
|
|
)
|
|
{
|
|
have_bias = should_have_bias;
|
|
}
|
|
|
|
bool forces_last_weight_to_1 (
|
|
) const
|
|
{
|
|
return last_weight_1;
|
|
}
|
|
|
|
void force_last_weight_to_1 (
|
|
bool should_last_weight_be_1
|
|
)
|
|
{
|
|
last_weight_1 = should_last_weight_be_1;
|
|
}
|
|
|
|
bool shrinking_enabled (
|
|
) const { return do_shrinking; }
|
|
|
|
void enable_shrinking (
|
|
bool enabled
|
|
) { do_shrinking = enabled; }
|
|
|
|
bool solving_svm_l2_problem (
|
|
) const { return do_svm_l2; }
|
|
|
|
void solve_svm_l2_problem (
|
|
bool enabled
|
|
) { do_svm_l2 = enabled; }
|
|
|
|
void be_verbose (
|
|
)
|
|
{
|
|
verbose = true;
|
|
}
|
|
|
|
void be_quiet (
|
|
)
|
|
{
|
|
verbose = false;
|
|
}
|
|
|
|
void set_epsilon (
|
|
scalar_type eps_
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(eps_ > 0,
|
|
"\tvoid svm_c_linear_dcd_trainer::set_epsilon(eps_)"
|
|
<< "\n\t invalid inputs were given to this function"
|
|
<< "\n\t eps_: " << eps_
|
|
);
|
|
eps = eps_;
|
|
}
|
|
|
|
const scalar_type get_epsilon (
|
|
) const
|
|
{
|
|
return eps;
|
|
}
|
|
|
|
const kernel_type& get_kernel (
|
|
) const
|
|
{
|
|
return kernel_type();
|
|
}
|
|
|
|
unsigned long get_max_iterations (
|
|
) const { return max_iterations; }
|
|
|
|
void set_max_iterations (
|
|
unsigned long max_iter
|
|
)
|
|
{
|
|
max_iterations = max_iter;
|
|
}
|
|
|
|
void set_c (
|
|
scalar_type C
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(C > 0,
|
|
"\t void svm_c_linear_dcd_trainer::set_c()"
|
|
<< "\n\t C must be greater than 0"
|
|
<< "\n\t C: " << C
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
Cpos = C;
|
|
Cneg = C;
|
|
}
|
|
|
|
const scalar_type get_c_class1 (
|
|
) const
|
|
{
|
|
return Cpos;
|
|
}
|
|
|
|
const scalar_type get_c_class2 (
|
|
) const
|
|
{
|
|
return Cneg;
|
|
}
|
|
|
|
void set_c_class1 (
|
|
scalar_type C
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(C > 0,
|
|
"\t void svm_c_linear_dcd_trainer::set_c_class1()"
|
|
<< "\n\t C must be greater than 0"
|
|
<< "\n\t C: " << C
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
Cpos = C;
|
|
}
|
|
|
|
void set_c_class2 (
|
|
scalar_type C
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(C > 0,
|
|
"\t void svm_c_linear_dcd_trainer::set_c_class2()"
|
|
<< "\n\t C must be greater than 0"
|
|
<< "\n\t C: " << C
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
Cneg = C;
|
|
}
|
|
|
|
class optimizer_state
|
|
{
|
|
friend class svm_c_linear_dcd_trainer;
|
|
|
|
public:
|
|
optimizer_state() : did_init(false) {}
|
|
|
|
private:
|
|
|
|
template <
|
|
typename in_sample_vector_type,
|
|
typename in_scalar_vector_type
|
|
>
|
|
void init(
|
|
const in_sample_vector_type& x,
|
|
const in_scalar_vector_type& y,
|
|
bool have_bias_,
|
|
bool last_weight_1_,
|
|
bool do_svm_l2_,
|
|
scalar_type Cpos,
|
|
scalar_type Cneg
|
|
)
|
|
{
|
|
const long new_dims = max_index_plus_one(x);
|
|
long new_idx = 0;
|
|
|
|
if (did_init)
|
|
{
|
|
DLIB_CASSERT(have_bias_ == have_bias &&
|
|
last_weight_1_ == last_weight_1,
|
|
"\t decision_function svm_c_linear_dcd_trainer::train(x,y,state)"
|
|
<< "\n\t The given state object is invalid because the previous trainer was configured differently."
|
|
<< "\n\t have_bias_: " << have_bias_
|
|
<< "\n\t have_bias: " << have_bias
|
|
<< "\n\t last_weight_1_: " << last_weight_1_
|
|
<< "\n\t last_weight_1: " << last_weight_1
|
|
);
|
|
|
|
DLIB_CASSERT( new_dims >= dims,
|
|
"\t decision_function svm_c_linear_dcd_trainer::train(x,y,state)"
|
|
<< "\n\t The given state object is invalid because the training data dimensions have shrunk."
|
|
<< "\n\t new_dims: " << new_dims
|
|
<< "\n\t dims: " << dims
|
|
);
|
|
|
|
DLIB_CASSERT( x.size() >= static_cast<long>(alpha.size()),
|
|
"\t decision_function svm_c_linear_dcd_trainer::train(x,y,state)"
|
|
<< "\n\t The given state object is invalid because the training data has fewer samples than previously."
|
|
<< "\n\t x.size(): " << x.size()
|
|
<< "\n\t alpha.size(): " << alpha.size()
|
|
);
|
|
|
|
// make sure we amortize the cost of growing the alpha vector.
|
|
if (alpha.capacity() < static_cast<unsigned long>(x.size()))
|
|
alpha.reserve(x.size()*2);
|
|
|
|
new_idx = alpha.size();
|
|
|
|
// Make sure alpha has the same length as x. So pad with extra zeros if
|
|
// necessary to make this happen.
|
|
alpha.resize(x.size(),0);
|
|
|
|
|
|
if (new_dims != dims)
|
|
{
|
|
// The only valid way the dimensions can be different here is if
|
|
// you are using a sparse vector type. This is because we might
|
|
// have had training samples which just happened to not include all
|
|
// the features previously. Therefore, max_index_plus_one() would
|
|
// have given too low of a result. But for dense vectors it is
|
|
// definitely a user error if the dimensions don't match.
|
|
|
|
DLIB_CASSERT(is_matrix<sample_type>::value == false,
|
|
"\t decision_function svm_c_linear_dcd_trainer::train(x,y,state)"
|
|
<< "\n\t The given state object is invalid because the training data dimensions have changed."
|
|
<< "\n\t new_dims: " << new_dims
|
|
<< "\n\t dims: " << dims
|
|
);
|
|
|
|
// extend w by the right number of elements
|
|
if (have_bias && !last_weight_1)
|
|
{
|
|
// Splice some zeros into the w vector so it will have the
|
|
// right length. Here we are being careful to move the bias
|
|
// weight to the end of the resulting vector.
|
|
w = join_cols(join_cols(
|
|
colm(w,0,dims),
|
|
zeros_matrix<scalar_type>(new_dims-dims,1)),
|
|
uniform_matrix<scalar_type>(1,1,w(dims))
|
|
);
|
|
}
|
|
else
|
|
{
|
|
// Just concatenate the right number of zeros.
|
|
w = join_cols(w, zeros_matrix<scalar_type>(new_dims-dims,1));
|
|
}
|
|
dims = new_dims;
|
|
}
|
|
|
|
}
|
|
else
|
|
{
|
|
did_init = true;
|
|
have_bias = have_bias_;
|
|
last_weight_1 = last_weight_1_;
|
|
dims = new_dims;
|
|
|
|
alpha.resize(x.size());
|
|
|
|
index.reserve(x.size());
|
|
Q.reserve(x.size());
|
|
|
|
if (have_bias && !last_weight_1)
|
|
w.set_size(dims+1);
|
|
else
|
|
w.set_size(dims);
|
|
|
|
w = 0;
|
|
}
|
|
|
|
for (long i = new_idx; i < x.size(); ++i)
|
|
{
|
|
Q.push_back(length_squared(x(i)));
|
|
|
|
if (have_bias && !last_weight_1)
|
|
{
|
|
index.push_back(i);
|
|
Q.back() += 1;
|
|
}
|
|
else if (Q.back() != 0)
|
|
{
|
|
index.push_back(i);
|
|
}
|
|
|
|
if (do_svm_l2_)
|
|
{
|
|
if (y(i) > 0)
|
|
Q.back() += 1/(2*Cpos);
|
|
else
|
|
Q.back() += 1/(2*Cneg);
|
|
}
|
|
}
|
|
|
|
if (last_weight_1)
|
|
w(dims-1) = 1;
|
|
}
|
|
|
|
template <typename T>
|
|
typename enable_if<is_matrix<T>,scalar_type>::type length_squared (const T& x) const
|
|
{
|
|
if (!last_weight_1)
|
|
{
|
|
return dlib::dot(x,x);
|
|
}
|
|
else
|
|
{
|
|
// skip the last dimension
|
|
return dlib::dot(colm(x,0,x.size()-1),
|
|
colm(x,0,x.size()-1));
|
|
}
|
|
|
|
}
|
|
|
|
template <typename T>
|
|
typename disable_if<is_matrix<T>,scalar_type>::type length_squared (const T& x) const
|
|
{
|
|
if (!last_weight_1)
|
|
{
|
|
return dlib::dot(x,x);
|
|
}
|
|
else
|
|
{
|
|
scalar_type temp = 0;
|
|
typename T::const_iterator i;
|
|
for (i = x.begin(); i != x.end(); ++i)
|
|
{
|
|
// skip the last dimension
|
|
if (static_cast<long>(i->first) < dims-1)
|
|
temp += i->second*i->second;
|
|
}
|
|
return temp;
|
|
}
|
|
}
|
|
|
|
|
|
bool did_init;
|
|
bool have_bias;
|
|
bool last_weight_1;
|
|
std::vector<scalar_type> alpha;
|
|
scalar_vector_type w;
|
|
std::vector<scalar_type> Q;
|
|
std::vector<long> index;
|
|
long dims;
|
|
dlib::rand rnd;
|
|
|
|
public:
|
|
|
|
const std::vector<scalar_type>& get_alpha () const { return alpha; }
|
|
|
|
friend void serialize(const optimizer_state& item, std::ostream& out)
|
|
{
|
|
const int version = 1;
|
|
dlib::serialize(version, out);
|
|
dlib::serialize(item.did_init, out);
|
|
dlib::serialize(item.have_bias, out);
|
|
dlib::serialize(item.last_weight_1, out);
|
|
dlib::serialize(item.alpha, out);
|
|
dlib::serialize(item.w, out);
|
|
dlib::serialize(item.Q, out);
|
|
dlib::serialize(item.index, out);
|
|
dlib::serialize(item.dims, out);
|
|
dlib::serialize(item.rnd, out);
|
|
}
|
|
|
|
friend void deserialize(optimizer_state& item, std::istream& in)
|
|
{
|
|
int version = 0;
|
|
dlib::deserialize(version, in);
|
|
if (version != 1)
|
|
{
|
|
throw dlib::serialization_error(
|
|
"Error while deserializing dlib::svm_c_linear_dcd_trainer::optimizer_state, unexpected version."
|
|
);
|
|
}
|
|
|
|
dlib::deserialize(item.did_init, in);
|
|
dlib::deserialize(item.have_bias, in);
|
|
dlib::deserialize(item.last_weight_1, in);
|
|
dlib::deserialize(item.alpha, in);
|
|
dlib::deserialize(item.w, in);
|
|
dlib::deserialize(item.Q, in);
|
|
dlib::deserialize(item.index, in);
|
|
dlib::deserialize(item.dims, in);
|
|
dlib::deserialize(item.rnd, in);
|
|
}
|
|
|
|
};
|
|
|
|
template <
|
|
typename in_sample_vector_type,
|
|
typename in_scalar_vector_type
|
|
>
|
|
const decision_function<kernel_type> train (
|
|
const in_sample_vector_type& x,
|
|
const in_scalar_vector_type& y
|
|
) const
|
|
{
|
|
optimizer_state state;
|
|
return do_train(mat(x), mat(y), state);
|
|
}
|
|
|
|
template <
|
|
typename in_sample_vector_type,
|
|
typename in_scalar_vector_type
|
|
>
|
|
const decision_function<kernel_type> train (
|
|
const in_sample_vector_type& x,
|
|
const in_scalar_vector_type& y,
|
|
optimizer_state& state
|
|
) const
|
|
{
|
|
return do_train(mat(x), mat(y), state);
|
|
}
|
|
|
|
private:
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename in_sample_vector_type,
|
|
typename in_scalar_vector_type
|
|
>
|
|
const decision_function<kernel_type> do_train (
|
|
const in_sample_vector_type& x,
|
|
const in_scalar_vector_type& y,
|
|
optimizer_state& state
|
|
) const
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(is_learning_problem(x,y) == true,
|
|
"\t decision_function svm_c_linear_dcd_trainer::train(x,y)"
|
|
<< "\n\t invalid inputs were given to this function"
|
|
<< "\n\t x.size(): " << x.size()
|
|
<< "\n\t y.size(): " << y.size()
|
|
<< "\n\t is_learning_problem(x,y): " << is_learning_problem(x,y)
|
|
);
|
|
#ifdef ENABLE_ASSERTS
|
|
for (long i = 0; i < x.size(); ++i)
|
|
{
|
|
DLIB_ASSERT(y(i) == +1 || y(i) == -1,
|
|
"\t decision_function svm_c_linear_dcd_trainer::train(x,y)"
|
|
<< "\n\t invalid inputs were given to this function"
|
|
<< "\n\t y("<<i<<"): " << y(i)
|
|
);
|
|
}
|
|
#endif
|
|
|
|
state.init(x,y,have_bias,last_weight_1,do_svm_l2,Cpos,Cneg);
|
|
|
|
std::vector<scalar_type>& alpha = state.alpha;
|
|
scalar_vector_type& w = state.w;
|
|
std::vector<long>& index = state.index;
|
|
const long dims = state.dims;
|
|
|
|
|
|
unsigned long active_size = index.size();
|
|
|
|
scalar_type PG_max_prev = std::numeric_limits<scalar_type>::infinity();
|
|
scalar_type PG_min_prev = -std::numeric_limits<scalar_type>::infinity();
|
|
|
|
const scalar_type Dii_pos = 1/(2*Cpos);
|
|
const scalar_type Dii_neg = 1/(2*Cneg);
|
|
|
|
// main loop
|
|
for (unsigned long iter = 0; iter < max_iterations; ++iter)
|
|
{
|
|
scalar_type PG_max = -std::numeric_limits<scalar_type>::infinity();
|
|
scalar_type PG_min = std::numeric_limits<scalar_type>::infinity();
|
|
|
|
// randomly shuffle the indices
|
|
for (unsigned long i = 0; i < active_size; ++i)
|
|
{
|
|
// pick a random index >= i
|
|
const long j = i + state.rnd.get_random_32bit_number()%(active_size-i);
|
|
std::swap(index[i], index[j]);
|
|
}
|
|
|
|
// for all the active training samples
|
|
for (unsigned long ii = 0; ii < active_size; ++ii)
|
|
{
|
|
const long i = index[ii];
|
|
|
|
scalar_type G = y(i)*dot(w, x(i)) - 1;
|
|
if (do_svm_l2)
|
|
{
|
|
if (y(i) > 0)
|
|
G += Dii_pos*alpha[i];
|
|
else
|
|
G += Dii_neg*alpha[i];
|
|
}
|
|
const scalar_type C = (y(i) > 0) ? Cpos : Cneg;
|
|
const scalar_type U = do_svm_l2 ? std::numeric_limits<scalar_type>::infinity() : C;
|
|
|
|
scalar_type PG = 0;
|
|
if (alpha[i] == 0)
|
|
{
|
|
if (G > PG_max_prev)
|
|
{
|
|
// shrink the active set of training examples
|
|
--active_size;
|
|
std::swap(index[ii], index[active_size]);
|
|
--ii;
|
|
continue;
|
|
}
|
|
|
|
if (G < 0)
|
|
PG = G;
|
|
}
|
|
else if (alpha[i] == U)
|
|
{
|
|
if (G < PG_min_prev)
|
|
{
|
|
// shrink the active set of training examples
|
|
--active_size;
|
|
std::swap(index[ii], index[active_size]);
|
|
--ii;
|
|
continue;
|
|
}
|
|
|
|
if (G > 0)
|
|
PG = G;
|
|
}
|
|
else
|
|
{
|
|
PG = G;
|
|
}
|
|
|
|
if (PG > PG_max)
|
|
PG_max = PG;
|
|
if (PG < PG_min)
|
|
PG_min = PG;
|
|
|
|
// if PG != 0
|
|
if (std::abs(PG) > 1e-12)
|
|
{
|
|
const scalar_type alpha_old = alpha[i];
|
|
alpha[i] = std::min(std::max(alpha[i] - G/state.Q[i], (scalar_type)0.0), U);
|
|
const scalar_type delta = (alpha[i]-alpha_old)*y(i);
|
|
add_to(w, x(i), delta);
|
|
if (have_bias && !last_weight_1)
|
|
w(w.size()-1) -= delta;
|
|
|
|
if (last_weight_1)
|
|
w(dims-1) = 1;
|
|
}
|
|
|
|
}
|
|
|
|
if (verbose)
|
|
{
|
|
using namespace std;
|
|
cout << "gap: " << PG_max - PG_min << endl;
|
|
cout << "active_size: " << active_size << endl;
|
|
cout << "iter: " << iter << endl;
|
|
cout << endl;
|
|
}
|
|
|
|
if (PG_max - PG_min <= eps)
|
|
{
|
|
// stop if we are within eps tolerance and the last iteration
|
|
// was over all the samples
|
|
if (active_size == index.size())
|
|
break;
|
|
|
|
// Turn off shrinking on the next iteration. We will stop if the
|
|
// tolerance is still <= eps when shrinking is off.
|
|
active_size = index.size();
|
|
PG_max_prev = std::numeric_limits<scalar_type>::infinity();
|
|
PG_min_prev = -std::numeric_limits<scalar_type>::infinity();
|
|
}
|
|
else if (do_shrinking)
|
|
{
|
|
PG_max_prev = PG_max;
|
|
PG_min_prev = PG_min;
|
|
if (PG_max_prev <= 0)
|
|
PG_max_prev = std::numeric_limits<scalar_type>::infinity();
|
|
if (PG_min_prev >= 0)
|
|
PG_min_prev = -std::numeric_limits<scalar_type>::infinity();
|
|
}
|
|
|
|
} // end of main optimization loop
|
|
|
|
|
|
|
|
|
|
// put the solution into a decision function and then return it
|
|
decision_function<kernel_type> df;
|
|
if (have_bias && !last_weight_1)
|
|
df.b = w(w.size()-1);
|
|
else
|
|
df.b = 0;
|
|
|
|
df.basis_vectors.set_size(1);
|
|
// Copy the plane normal into the output basis vector. The output vector might
|
|
// be a sparse vector container so we need to use this special kind of copy to
|
|
// handle that case.
|
|
assign(df.basis_vectors(0), colm(w, 0, dims));
|
|
df.alpha.set_size(1);
|
|
df.alpha(0) = 1;
|
|
|
|
return df;
|
|
}
|
|
|
|
scalar_type dot (
|
|
const scalar_vector_type& w,
|
|
const sample_type& sample
|
|
) const
|
|
{
|
|
if (have_bias && !last_weight_1)
|
|
{
|
|
const long w_size_m1 = w.size()-1;
|
|
return dlib::dot(colm(w,0,w_size_m1), sample) - w(w_size_m1);
|
|
}
|
|
else
|
|
{
|
|
return dlib::dot(w, sample);
|
|
}
|
|
}
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
scalar_type Cpos;
|
|
scalar_type Cneg;
|
|
scalar_type eps;
|
|
unsigned long max_iterations;
|
|
bool verbose;
|
|
bool have_bias; // having a bias means we pretend all x vectors have an extra element which is always -1.
|
|
bool last_weight_1;
|
|
bool do_shrinking;
|
|
bool do_svm_l2;
|
|
|
|
}; // end of class svm_c_linear_dcd_trainer
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
|
|
}
|
|
|
|
#endif // DLIB_SVm_C_LINEAR_DCD_TRAINER_Hh_
|
|
|
|
|