346 lines
11 KiB
C++
346 lines
11 KiB
C++
// Copyright (C) 2010 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_OPTIMIZATION_LEAST_SQuARES_H_h_
|
|
#define DLIB_OPTIMIZATION_LEAST_SQuARES_H_h_
|
|
|
|
#include "../matrix.h"
|
|
#include "optimization_trust_region.h"
|
|
#include "optimization_least_squares_abstract.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename column_vector_type,
|
|
typename funct_type,
|
|
typename funct_der_type,
|
|
typename vector_type
|
|
>
|
|
class least_squares_function_model
|
|
{
|
|
public:
|
|
least_squares_function_model (
|
|
const funct_type& f_,
|
|
const funct_der_type& der_,
|
|
const vector_type& list_
|
|
) : f(f_), der(der_), list(list_)
|
|
{
|
|
S = 0;
|
|
last_f = 0;
|
|
last_f2 = 0;
|
|
|
|
r.set_size(list.size(),1);
|
|
}
|
|
|
|
const funct_type& f;
|
|
const funct_der_type& der;
|
|
const vector_type& list;
|
|
|
|
typedef typename column_vector_type::type type;
|
|
typedef typename column_vector_type::mem_manager_type mem_manager_type;
|
|
typedef typename column_vector_type::layout_type layout_type;
|
|
const static long NR = column_vector_type::NR;
|
|
|
|
typedef column_vector_type column_vector;
|
|
typedef matrix<type,NR,NR,mem_manager_type,layout_type> general_matrix;
|
|
|
|
|
|
type operator() (
|
|
const column_vector& x
|
|
) const
|
|
{
|
|
type result = 0;
|
|
for (long i = 0; i < list.size(); ++i)
|
|
{
|
|
const type temp = f(list(i), x);
|
|
// save the residual for later
|
|
r(i) = temp;
|
|
result += temp*temp;
|
|
}
|
|
|
|
last_f = 0.5*result;
|
|
return 0.5*result;
|
|
}
|
|
|
|
void get_derivative_and_hessian (
|
|
const column_vector& x,
|
|
column_vector& d,
|
|
general_matrix& h
|
|
) const
|
|
{
|
|
J.set_size(list.size(), x.size());
|
|
|
|
// compute the Jacobian
|
|
for (long i = 0; i < list.size(); ++i)
|
|
{
|
|
set_rowm(J,i) = trans(der(list(i), x));
|
|
}
|
|
|
|
// Compute the Levenberg-Marquardt gradient and hessian
|
|
d = trans(J)*r;
|
|
h = trans(J)*J;
|
|
|
|
if (S.size() == 0)
|
|
{
|
|
S.set_size(x.size(), x.size());
|
|
S = 0;
|
|
}
|
|
|
|
// If this isn't the first iteration then check if using
|
|
// a quasi-newton update helps things.
|
|
if (last_r.size() != 0)
|
|
{
|
|
|
|
s = x - last_x;
|
|
y = d - last_d;
|
|
yy = d - trans(last_J)*r;
|
|
|
|
const type ys = trans(y)*s;
|
|
vtemp = yy - S*s;
|
|
const type temp2 = std::abs(trans(s)*S*s);
|
|
type scale = (temp2 != 0) ? std::min<type>(1, std::abs(dot(s,yy))/temp2) : 1;
|
|
|
|
if (ys != 0)
|
|
S = scale*S + (vtemp*trans(y) + y*trans(vtemp))/(ys) - dot(vtemp,s)/ys/ys*y*trans(y);
|
|
else
|
|
S *= scale;
|
|
|
|
// check how well both the models fit the last change we saw in f()
|
|
const type measured_delta = last_f2 - last_f;
|
|
s = -s;
|
|
const type h_predicted_delta = 0.5*trans(s)*h*s + trans(d)*s;
|
|
const type s_predicted_delta = 0.5*trans(s)*(h+S)*s + trans(d)*s;
|
|
|
|
const type h_error = std::abs((h_predicted_delta/measured_delta) - 1);
|
|
const type s_error = std::abs((s_predicted_delta/measured_delta) - 1);
|
|
|
|
if (s_error < h_error && h_error > 0.01)
|
|
{
|
|
h += make_symmetric(S);
|
|
}
|
|
else if (s_error > 10)
|
|
{
|
|
S = 0;
|
|
}
|
|
|
|
// put r into last_r
|
|
r.swap(last_r);
|
|
}
|
|
else
|
|
{
|
|
// put r into last_r
|
|
last_r = r;
|
|
}
|
|
|
|
J.swap(last_J);
|
|
last_x = x;
|
|
last_d = d;
|
|
|
|
last_f2 = last_f;
|
|
}
|
|
|
|
mutable type last_f; // value of function we saw in last operator()
|
|
mutable type last_f2; // value of last_f we saw in get_derivative_and_hessian()
|
|
mutable matrix<type,0,1,mem_manager_type,layout_type> r;
|
|
mutable column_vector vtemp;
|
|
mutable column_vector s, y, yy;
|
|
|
|
mutable general_matrix S;
|
|
mutable column_vector last_x;
|
|
mutable column_vector last_d;
|
|
mutable matrix<type,0,1,mem_manager_type,layout_type> last_r;
|
|
mutable matrix<type,0,NR,mem_manager_type,layout_type> last_J;
|
|
mutable matrix<type,0,NR,mem_manager_type,layout_type> J;
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename column_vector_type,
|
|
typename funct_type,
|
|
typename funct_der_type,
|
|
typename vector_type
|
|
>
|
|
least_squares_function_model<column_vector_type,funct_type,funct_der_type,vector_type> least_squares_model (
|
|
const funct_type& f,
|
|
const funct_der_type& der,
|
|
const vector_type& list
|
|
)
|
|
{
|
|
return least_squares_function_model<column_vector_type,funct_type,funct_der_type,vector_type>(f,der,list);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename stop_strategy_type,
|
|
typename funct_type,
|
|
typename funct_der_type,
|
|
typename vector_type,
|
|
typename T
|
|
>
|
|
double solve_least_squares (
|
|
stop_strategy_type stop_strategy,
|
|
const funct_type& f,
|
|
const funct_der_type& der,
|
|
const vector_type& list,
|
|
T& x,
|
|
double radius = 1
|
|
)
|
|
{
|
|
// The starting point (i.e. x) must be a column vector.
|
|
COMPILE_TIME_ASSERT(T::NC <= 1);
|
|
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(is_vector(mat(list)) && list.size() > 0 &&
|
|
is_col_vector(x) && radius > 0,
|
|
"\t double solve_least_squares()"
|
|
<< "\n\t invalid arguments were given to this function"
|
|
<< "\n\t is_vector(list): " << is_vector(mat(list))
|
|
<< "\n\t list.size(): " << list.size()
|
|
<< "\n\t is_col_vector(x): " << is_col_vector(x)
|
|
<< "\n\t radius: " << radius
|
|
);
|
|
|
|
return find_min_trust_region(stop_strategy,
|
|
least_squares_model<T>(f, der, mat(list)),
|
|
x,
|
|
radius);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename column_vector_type,
|
|
typename funct_type,
|
|
typename funct_der_type,
|
|
typename vector_type
|
|
>
|
|
class least_squares_lm_function_model
|
|
{
|
|
public:
|
|
least_squares_lm_function_model (
|
|
const funct_type& f_,
|
|
const funct_der_type& der_,
|
|
const vector_type& list_
|
|
) : f(f_), der(der_), list(list_)
|
|
{
|
|
r.set_size(list.size(),1);
|
|
}
|
|
|
|
const funct_type& f;
|
|
const funct_der_type& der;
|
|
const vector_type& list;
|
|
|
|
typedef typename column_vector_type::type type;
|
|
typedef typename column_vector_type::mem_manager_type mem_manager_type;
|
|
typedef typename column_vector_type::layout_type layout_type;
|
|
const static long NR = column_vector_type::NR;
|
|
|
|
typedef column_vector_type column_vector;
|
|
typedef matrix<type,NR,NR,mem_manager_type,layout_type> general_matrix;
|
|
|
|
mutable matrix<type,0,1,mem_manager_type,layout_type> r;
|
|
mutable column_vector vtemp;
|
|
|
|
type operator() (
|
|
const column_vector& x
|
|
) const
|
|
{
|
|
type result = 0;
|
|
for (long i = 0; i < list.size(); ++i)
|
|
{
|
|
const type temp = f(list(i), x);
|
|
// save the residual for later
|
|
r(i) = temp;
|
|
result += temp*temp;
|
|
}
|
|
|
|
return 0.5*result;
|
|
}
|
|
|
|
void get_derivative_and_hessian (
|
|
const column_vector& x,
|
|
column_vector& d,
|
|
general_matrix& h
|
|
) const
|
|
{
|
|
d = 0;
|
|
h = 0;
|
|
for (long i = 0; i < list.size(); ++i)
|
|
{
|
|
vtemp = der(list(i), x);
|
|
d += r(i)*vtemp;
|
|
h += vtemp*trans(vtemp);
|
|
}
|
|
}
|
|
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename column_vector_type,
|
|
typename funct_type,
|
|
typename funct_der_type,
|
|
typename vector_type
|
|
>
|
|
least_squares_lm_function_model<column_vector_type,funct_type,funct_der_type,vector_type> least_squares_lm_model (
|
|
const funct_type& f,
|
|
const funct_der_type& der,
|
|
const vector_type& list
|
|
)
|
|
{
|
|
return least_squares_lm_function_model<column_vector_type,funct_type,funct_der_type,vector_type>(f,der,list);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename stop_strategy_type,
|
|
typename funct_type,
|
|
typename funct_der_type,
|
|
typename vector_type,
|
|
typename T
|
|
>
|
|
double solve_least_squares_lm (
|
|
stop_strategy_type stop_strategy,
|
|
const funct_type& f,
|
|
const funct_der_type& der,
|
|
const vector_type& list,
|
|
T& x,
|
|
double radius = 1
|
|
)
|
|
{
|
|
// The starting point (i.e. x) must be a column vector.
|
|
COMPILE_TIME_ASSERT(T::NC <= 1);
|
|
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(is_vector(mat(list)) && list.size() > 0 &&
|
|
is_col_vector(x) && radius > 0,
|
|
"\t double solve_least_squares_lm()"
|
|
<< "\n\t invalid arguments were given to this function"
|
|
<< "\n\t is_vector(list): " << is_vector(mat(list))
|
|
<< "\n\t list.size(): " << list.size()
|
|
<< "\n\t is_col_vector(x): " << is_col_vector(x)
|
|
<< "\n\t radius: " << radius
|
|
);
|
|
|
|
return find_min_trust_region(stop_strategy,
|
|
least_squares_lm_model<T>(f, der, mat(list)),
|
|
x,
|
|
radius);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_OPTIMIZATION_LEAST_SQuARES_H_h_
|
|
|
|
|