469 lines
14 KiB
C++
469 lines
14 KiB
C++
// Copyright (C) 2007 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_SOLVE_QP2_USING_SMo_Hh_
|
|
#define DLIB_SOLVE_QP2_USING_SMo_Hh_
|
|
|
|
#include "optimization_solve_qp2_using_smo_abstract.h"
|
|
#include <cmath>
|
|
#include <limits>
|
|
#include <sstream>
|
|
#include "../matrix.h"
|
|
#include "../algs.h"
|
|
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class invalid_nu_error : public dlib::error
|
|
{
|
|
public:
|
|
invalid_nu_error(const std::string& msg, double nu_) : dlib::error(msg), nu(nu_) {};
|
|
const double nu;
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename T
|
|
>
|
|
typename T::type maximum_nu_impl (
|
|
const T& y
|
|
)
|
|
{
|
|
typedef typename T::type scalar_type;
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(y.size() > 1 && is_col_vector(y),
|
|
"\ttypedef T::type maximum_nu(y)"
|
|
<< "\n\ty should be a column vector with more than one entry"
|
|
<< "\n\ty.nr(): " << y.nr()
|
|
<< "\n\ty.nc(): " << y.nc()
|
|
);
|
|
|
|
long pos_count = 0;
|
|
long neg_count = 0;
|
|
for (long r = 0; r < y.nr(); ++r)
|
|
{
|
|
if (y(r) == 1.0)
|
|
{
|
|
++pos_count;
|
|
}
|
|
else if (y(r) == -1.0)
|
|
{
|
|
++neg_count;
|
|
}
|
|
else
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(y(r) == -1.0 || y(r) == 1.0,
|
|
"\ttypedef T::type maximum_nu(y)"
|
|
<< "\n\ty should contain only 1 and 0 entries"
|
|
<< "\n\tr: " << r
|
|
<< "\n\ty(r): " << y(r)
|
|
);
|
|
}
|
|
}
|
|
return static_cast<scalar_type>(2.0*(scalar_type)std::min(pos_count,neg_count)/(scalar_type)y.nr());
|
|
}
|
|
|
|
template <
|
|
typename T
|
|
>
|
|
typename T::type maximum_nu (
|
|
const T& y
|
|
)
|
|
{
|
|
return maximum_nu_impl(mat(y));
|
|
}
|
|
|
|
template <
|
|
typename T
|
|
>
|
|
typename T::value_type maximum_nu (
|
|
const T& y
|
|
)
|
|
{
|
|
return maximum_nu_impl(mat(y));
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename matrix_type
|
|
>
|
|
class solve_qp2_using_smo
|
|
{
|
|
public:
|
|
typedef typename matrix_type::mem_manager_type mem_manager_type;
|
|
typedef typename matrix_type::type scalar_type;
|
|
typedef typename matrix_type::layout_type layout_type;
|
|
typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
|
|
typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
|
|
|
|
|
|
template <
|
|
typename EXP1,
|
|
typename EXP2,
|
|
long NR
|
|
>
|
|
unsigned long operator() (
|
|
const matrix_exp<EXP1>& Q,
|
|
const matrix_exp<EXP2>& y,
|
|
const scalar_type nu,
|
|
matrix<scalar_type,NR,1,mem_manager_type, layout_type>& alpha,
|
|
scalar_type eps
|
|
)
|
|
{
|
|
DLIB_ASSERT(Q.nr() == Q.nc() && y.size() == Q.nr() && y.size() > 1 && is_col_vector(y) &&
|
|
sum((y == +1) + (y == -1)) == y.size() &&
|
|
0 < nu && nu <= 1 &&
|
|
eps > 0,
|
|
"\t void solve_qp2_using_smo::operator()"
|
|
<< "\n\t invalid arguments were given to this function"
|
|
<< "\n\t Q.nr(): " << Q.nr()
|
|
<< "\n\t Q.nc(): " << Q.nc()
|
|
<< "\n\t is_col_vector(y): " << is_col_vector(y)
|
|
<< "\n\t y.size(): " << y.size()
|
|
<< "\n\t sum((y == +1) + (y == -1)): " << sum((y == +1) + (y == -1))
|
|
<< "\n\t nu: " << nu
|
|
<< "\n\t eps: " << eps
|
|
);
|
|
|
|
alpha.set_size(Q.nr(),1);
|
|
df.set_size(Q.nr());
|
|
|
|
// now initialize alpha
|
|
set_initial_alpha(y, nu, alpha);
|
|
|
|
const scalar_type tau = 1e-12;
|
|
|
|
typedef typename colm_exp<EXP1>::type col_type;
|
|
|
|
set_all_elements(df, 0);
|
|
// initialize df. Compute df = Q*alpha
|
|
for (long r = 0; r < df.nr(); ++r)
|
|
{
|
|
if (alpha(r) != 0)
|
|
{
|
|
df += alpha(r)*matrix_cast<scalar_type>(colm(Q,r));
|
|
}
|
|
}
|
|
|
|
unsigned long count = 0;
|
|
|
|
// now perform the actual optimization of alpha
|
|
long i=0, j=0;
|
|
while (find_working_group(y,alpha,Q,df,tau,eps,i,j))
|
|
{
|
|
++count;
|
|
const scalar_type old_alpha_i = alpha(i);
|
|
const scalar_type old_alpha_j = alpha(j);
|
|
|
|
optimize_working_pair(alpha,Q,df,tau,i,j);
|
|
|
|
// update the df vector now that we have modified alpha(i) and alpha(j)
|
|
scalar_type delta_alpha_i = alpha(i) - old_alpha_i;
|
|
scalar_type delta_alpha_j = alpha(j) - old_alpha_j;
|
|
|
|
col_type Q_i = colm(Q,i);
|
|
col_type Q_j = colm(Q,j);
|
|
for(long k = 0; k < df.nr(); ++k)
|
|
df(k) += Q_i(k)*delta_alpha_i + Q_j(k)*delta_alpha_j;
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
const column_matrix& get_gradient (
|
|
) const { return df; }
|
|
|
|
private:
|
|
|
|
// -------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename scalar_type,
|
|
typename scalar_vector_type,
|
|
typename scalar_vector_type2
|
|
>
|
|
inline void set_initial_alpha (
|
|
const scalar_vector_type& y,
|
|
const scalar_type nu,
|
|
scalar_vector_type2& alpha
|
|
) const
|
|
{
|
|
set_all_elements(alpha,0);
|
|
const scalar_type l = y.nr();
|
|
scalar_type temp = nu*l/2;
|
|
long num = (long)std::floor(temp);
|
|
long num_total = (long)std::ceil(temp);
|
|
|
|
bool has_slack = false;
|
|
int count = 0;
|
|
for (int i = 0; i < alpha.nr(); ++i)
|
|
{
|
|
if (y(i) == 1)
|
|
{
|
|
if (count < num)
|
|
{
|
|
++count;
|
|
alpha(i) = 1;
|
|
}
|
|
else
|
|
{
|
|
has_slack = true;
|
|
if (num_total > num)
|
|
{
|
|
++count;
|
|
alpha(i) = temp - std::floor(temp);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (count != num_total || has_slack == false)
|
|
{
|
|
std::ostringstream sout;
|
|
sout << "Invalid nu of " << nu << ". It is required that: 0 < nu < " << 2*(scalar_type)count/y.nr();
|
|
throw invalid_nu_error(sout.str(),nu);
|
|
}
|
|
|
|
has_slack = false;
|
|
count = 0;
|
|
for (int i = 0; i < alpha.nr(); ++i)
|
|
{
|
|
if (y(i) == -1)
|
|
{
|
|
if (count < num)
|
|
{
|
|
++count;
|
|
alpha(i) = 1;
|
|
}
|
|
else
|
|
{
|
|
has_slack = true;
|
|
if (num_total > num)
|
|
{
|
|
++count;
|
|
alpha(i) = temp - std::floor(temp);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (count != num_total || has_slack == false)
|
|
{
|
|
std::ostringstream sout;
|
|
sout << "Invalid nu of " << nu << ". It is required that: 0 < nu < " << 2*(scalar_type)count/y.nr();
|
|
throw invalid_nu_error(sout.str(),nu);
|
|
}
|
|
}
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename scalar_vector_type,
|
|
typename scalar_type,
|
|
typename EXP,
|
|
typename U, typename V
|
|
>
|
|
inline bool find_working_group (
|
|
const V& y,
|
|
const U& alpha,
|
|
const matrix_exp<EXP>& Q,
|
|
const scalar_vector_type& df,
|
|
const scalar_type tau,
|
|
const scalar_type eps,
|
|
long& i_out,
|
|
long& j_out
|
|
) const
|
|
{
|
|
using namespace std;
|
|
long ip = 0;
|
|
long jp = 0;
|
|
long in = 0;
|
|
long jn = 0;
|
|
|
|
|
|
typedef typename colm_exp<EXP>::type col_type;
|
|
typedef typename diag_exp<EXP>::type diag_type;
|
|
|
|
scalar_type ip_val = -numeric_limits<scalar_type>::infinity();
|
|
scalar_type jp_val = numeric_limits<scalar_type>::infinity();
|
|
scalar_type in_val = -numeric_limits<scalar_type>::infinity();
|
|
scalar_type jn_val = numeric_limits<scalar_type>::infinity();
|
|
|
|
// loop over the alphas and find the maximum ip and in indices.
|
|
for (long i = 0; i < alpha.nr(); ++i)
|
|
{
|
|
if (y(i) == 1)
|
|
{
|
|
if (alpha(i) < 1.0)
|
|
{
|
|
if (-df(i) > ip_val)
|
|
{
|
|
ip_val = -df(i);
|
|
ip = i;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (alpha(i) > 0.0)
|
|
{
|
|
if (df(i) > in_val)
|
|
{
|
|
in_val = df(i);
|
|
in = i;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
scalar_type Mp = numeric_limits<scalar_type>::infinity();
|
|
scalar_type Mn = numeric_limits<scalar_type>::infinity();
|
|
|
|
// Pick out the columns and diagonal of Q we need below. Doing
|
|
// it this way is faster if Q is actually a symmetric_matrix_cache
|
|
// object.
|
|
col_type Q_ip = colm(Q,ip);
|
|
col_type Q_in = colm(Q,in);
|
|
diag_type Q_diag = diag(Q);
|
|
|
|
|
|
|
|
// now we need to find the minimum jp and jn indices
|
|
for (long j = 0; j < alpha.nr(); ++j)
|
|
{
|
|
if (y(j) == 1)
|
|
{
|
|
if (alpha(j) > 0.0)
|
|
{
|
|
scalar_type b = ip_val + df(j);
|
|
if (-df(j) < Mp)
|
|
Mp = -df(j);
|
|
|
|
if (b > 0)
|
|
{
|
|
scalar_type a = Q_ip(ip) + Q_diag(j) - 2*Q_ip(j);
|
|
if (a <= 0)
|
|
a = tau;
|
|
scalar_type temp = -b*b/a;
|
|
if (temp < jp_val)
|
|
{
|
|
jp_val = temp;
|
|
jp = j;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (alpha(j) < 1.0)
|
|
{
|
|
scalar_type b = in_val - df(j);
|
|
if (df(j) < Mn)
|
|
Mn = df(j);
|
|
|
|
if (b > 0)
|
|
{
|
|
scalar_type a = Q_in(in) + Q_diag(j) - 2*Q_in(j);
|
|
if (a <= 0)
|
|
a = tau;
|
|
scalar_type temp = -b*b/a;
|
|
if (temp < jn_val)
|
|
{
|
|
jn_val = temp;
|
|
jn = j;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// if we are at the optimal point then return false so the caller knows
|
|
// to stop optimizing
|
|
if (std::max(ip_val - Mp, in_val - Mn) < eps)
|
|
return false;
|
|
|
|
if (jp_val < jn_val)
|
|
{
|
|
i_out = ip;
|
|
j_out = jp;
|
|
}
|
|
else
|
|
{
|
|
i_out = in;
|
|
j_out = jn;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename EXP,
|
|
typename T, typename U
|
|
>
|
|
inline void optimize_working_pair (
|
|
T& alpha,
|
|
const matrix_exp<EXP>& Q,
|
|
const U& df,
|
|
const scalar_type tau,
|
|
const long i,
|
|
const long j
|
|
) const
|
|
{
|
|
scalar_type quad_coef = Q(i,i)+Q(j,j)-2*Q(j,i);
|
|
if (quad_coef <= 0)
|
|
quad_coef = tau;
|
|
scalar_type delta = (df(i)-df(j))/quad_coef;
|
|
scalar_type sum = alpha(i) + alpha(j);
|
|
alpha(i) -= delta;
|
|
alpha(j) += delta;
|
|
|
|
if(sum > 1)
|
|
{
|
|
if(alpha(i) > 1)
|
|
{
|
|
alpha(i) = 1;
|
|
alpha(j) = sum - 1;
|
|
}
|
|
else if(alpha(j) > 1)
|
|
{
|
|
alpha(j) = 1;
|
|
alpha(i) = sum - 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(alpha(j) < 0)
|
|
{
|
|
alpha(j) = 0;
|
|
alpha(i) = sum;
|
|
}
|
|
else if(alpha(i) < 0)
|
|
{
|
|
alpha(i) = 0;
|
|
alpha(j) = sum;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
column_matrix df; // gradient of f(alpha)
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_SOLVE_QP2_USING_SMo_Hh_
|
|
|
|
|