895 lines
28 KiB
C++
895 lines
28 KiB
C++
// Copyright (C) 2008 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_OPTIMIZATIOn_LINE_SEARCH_H_
|
|
#define DLIB_OPTIMIZATIOn_LINE_SEARCH_H_
|
|
|
|
#include <cmath>
|
|
#include <limits>
|
|
#include "../matrix.h"
|
|
#include "../algs.h"
|
|
#include "optimization_line_search_abstract.h"
|
|
#include <utility>
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename funct, typename T>
|
|
class line_search_funct
|
|
{
|
|
public:
|
|
line_search_funct(const funct& f_, const T& start_, const T& direction_)
|
|
: f(f_),start(start_), direction(direction_), matrix_r(0), scalar_r(0)
|
|
{}
|
|
|
|
line_search_funct(const funct& f_, const T& start_, const T& direction_, T& r)
|
|
: f(f_),start(start_), direction(direction_), matrix_r(&r), scalar_r(0)
|
|
{}
|
|
|
|
line_search_funct(const funct& f_, const T& start_, const T& direction_, double& r)
|
|
: f(f_),start(start_), direction(direction_), matrix_r(0), scalar_r(&r)
|
|
{}
|
|
|
|
double operator()(const double& x) const
|
|
{
|
|
return get_value(f(start + x*direction));
|
|
}
|
|
|
|
private:
|
|
|
|
double get_value (const double& r) const
|
|
{
|
|
// save a copy of this value for later
|
|
if (scalar_r)
|
|
*scalar_r = r;
|
|
|
|
return r;
|
|
}
|
|
|
|
template <typename U>
|
|
double get_value (const U& r) const
|
|
{
|
|
// U should be a matrix type
|
|
COMPILE_TIME_ASSERT(is_matrix<U>::value);
|
|
|
|
// save a copy of this value for later
|
|
if (matrix_r)
|
|
*matrix_r = r;
|
|
|
|
return dot(r,direction);
|
|
}
|
|
|
|
const funct& f;
|
|
const T& start;
|
|
const T& direction;
|
|
T* matrix_r;
|
|
double* scalar_r;
|
|
};
|
|
|
|
template <typename funct, typename T>
|
|
const line_search_funct<funct,T> make_line_search_function(const funct& f, const T& start, const T& direction)
|
|
{
|
|
COMPILE_TIME_ASSERT(is_matrix<T>::value);
|
|
DLIB_ASSERT (
|
|
is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size(),
|
|
"\tline_search_funct make_line_search_function(f,start,direction)"
|
|
<< "\n\tYou have to supply column vectors to this function"
|
|
<< "\n\tstart.nc(): " << start.nc()
|
|
<< "\n\tdirection.nc(): " << direction.nc()
|
|
<< "\n\tstart.nr(): " << start.nr()
|
|
<< "\n\tdirection.nr(): " << direction.nr()
|
|
);
|
|
return line_search_funct<funct,T>(f,start,direction);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename funct, typename T>
|
|
const line_search_funct<funct,T> make_line_search_function(const funct& f, const T& start, const T& direction, double& f_out)
|
|
{
|
|
COMPILE_TIME_ASSERT(is_matrix<T>::value);
|
|
DLIB_ASSERT (
|
|
is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size(),
|
|
"\tline_search_funct make_line_search_function(f,start,direction)"
|
|
<< "\n\tYou have to supply column vectors to this function"
|
|
<< "\n\tstart.nc(): " << start.nc()
|
|
<< "\n\tdirection.nc(): " << direction.nc()
|
|
<< "\n\tstart.nr(): " << start.nr()
|
|
<< "\n\tdirection.nr(): " << direction.nr()
|
|
);
|
|
return line_search_funct<funct,T>(f,start,direction, f_out);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename funct, typename T>
|
|
const line_search_funct<funct,T> make_line_search_function(const funct& f, const T& start, const T& direction, T& grad_out)
|
|
{
|
|
COMPILE_TIME_ASSERT(is_matrix<T>::value);
|
|
DLIB_ASSERT (
|
|
is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size(),
|
|
"\tline_search_funct make_line_search_function(f,start,direction)"
|
|
<< "\n\tYou have to supply column vectors to this function"
|
|
<< "\n\tstart.nc(): " << start.nc()
|
|
<< "\n\tdirection.nc(): " << direction.nc()
|
|
<< "\n\tstart.nr(): " << start.nr()
|
|
<< "\n\tdirection.nr(): " << direction.nr()
|
|
);
|
|
return line_search_funct<funct,T>(f,start,direction,grad_out);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline double poly_min_extrap (
|
|
double f0,
|
|
double d0,
|
|
double f1,
|
|
double d1,
|
|
double limit = 1
|
|
)
|
|
{
|
|
const double n = 3*(f1 - f0) - 2*d0 - d1;
|
|
const double e = d0 + d1 - 2*(f1 - f0);
|
|
|
|
|
|
// find the minimum of the derivative of the polynomial
|
|
|
|
double temp = std::max(n*n - 3*e*d0,0.0);
|
|
|
|
if (temp < 0)
|
|
return 0.5;
|
|
|
|
temp = std::sqrt(temp);
|
|
|
|
if (std::abs(e) <= std::numeric_limits<double>::epsilon())
|
|
return 0.5;
|
|
|
|
// figure out the two possible min values
|
|
double x1 = (temp - n)/(3*e);
|
|
double x2 = -(temp + n)/(3*e);
|
|
|
|
// compute the value of the interpolating polynomial at these two points
|
|
double y1 = f0 + d0*x1 + n*x1*x1 + e*x1*x1*x1;
|
|
double y2 = f0 + d0*x2 + n*x2*x2 + e*x2*x2*x2;
|
|
|
|
// pick the best point
|
|
double x;
|
|
if (y1 < y2)
|
|
x = x1;
|
|
else
|
|
x = x2;
|
|
|
|
// now make sure the minimum is within the allowed range of [0,limit]
|
|
return put_in_range(0,limit,x);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline double poly_min_extrap (
|
|
double f0,
|
|
double d0,
|
|
double f1
|
|
)
|
|
{
|
|
const double temp = 2*(f1 - f0 - d0);
|
|
if (std::abs(temp) <= d0*std::numeric_limits<double>::epsilon())
|
|
return 0.5;
|
|
|
|
const double alpha = -d0/temp;
|
|
|
|
// now make sure the minimum is within the allowed range of (0,1)
|
|
return put_in_range(0,1,alpha);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline double poly_min_extrap (
|
|
double f0,
|
|
double d0,
|
|
double x1,
|
|
double f_x1,
|
|
double x2,
|
|
double f_x2
|
|
)
|
|
{
|
|
DLIB_ASSERT(0 < x1 && x1 < x2,"Invalid inputs were given to this function.\n"
|
|
<< "x1: " << x1
|
|
<< " x2: " << x2
|
|
);
|
|
// The contents of this function follow the equations described on page 58 of the
|
|
// book Numerical Optimization by Nocedal and Wright, second edition.
|
|
matrix<double,2,2> m;
|
|
matrix<double,2,1> v;
|
|
|
|
const double aa2 = x2*x2;
|
|
const double aa1 = x1*x1;
|
|
m = aa2, -aa1,
|
|
-aa2*x2, aa1*x1;
|
|
v = f_x1 - f0 - d0*x1,
|
|
f_x2 - f0 - d0*x2;
|
|
|
|
|
|
double temp = aa2*aa1*(x1-x2);
|
|
|
|
// just take a guess if this happens
|
|
if (temp == 0 || std::fpclassify(temp) == FP_SUBNORMAL)
|
|
{
|
|
return x1/2.0;
|
|
}
|
|
|
|
matrix<double,2,1> temp2;
|
|
temp2 = m*v/temp;
|
|
const double a = temp2(0);
|
|
const double b = temp2(1);
|
|
|
|
temp = b*b - 3*a*d0;
|
|
if (temp < 0 || a == 0)
|
|
{
|
|
// This is probably a line so just pick the lowest point
|
|
if (f0 < f_x2)
|
|
return 0;
|
|
else
|
|
return x2;
|
|
}
|
|
temp = (-b + std::sqrt(temp))/(3*a);
|
|
return put_in_range(0, x2, temp);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline double lagrange_poly_min_extrap (
|
|
double p1,
|
|
double p2,
|
|
double p3,
|
|
double f1,
|
|
double f2,
|
|
double f3
|
|
)
|
|
{
|
|
DLIB_ASSERT(p1 < p2 && p2 < p3 && f1 >= f2 && f2 <= f3,
|
|
" p1: " << p1
|
|
<< " p2: " << p2
|
|
<< " p3: " << p3
|
|
<< " f1: " << f1
|
|
<< " f2: " << f2
|
|
<< " f3: " << f3);
|
|
|
|
// This formula is out of the book Nonlinear Optimization by Andrzej Ruszczynski. See section 5.2.
|
|
double temp1 = f1*(p3*p3 - p2*p2) + f2*(p1*p1 - p3*p3) + f3*(p2*p2 - p1*p1);
|
|
double temp2 = 2*(f1*(p3 - p2) + f2*(p1 - p3) + f3*(p2 - p1) );
|
|
|
|
if (temp2 == 0)
|
|
{
|
|
return p2;
|
|
}
|
|
|
|
const double result = temp1/temp2;
|
|
|
|
// do a final sanity check to make sure the result is in the right range
|
|
if (p1 <= result && result <= p3)
|
|
{
|
|
return result;
|
|
}
|
|
else
|
|
{
|
|
return std::min(std::max(p1,result),p3);
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename funct,
|
|
typename funct_der
|
|
>
|
|
double line_search (
|
|
const funct& f,
|
|
const double f0,
|
|
const funct_der& der,
|
|
const double d0,
|
|
double rho,
|
|
double sigma,
|
|
double min_f,
|
|
unsigned long max_iter
|
|
)
|
|
{
|
|
DLIB_ASSERT (
|
|
0 < rho && rho < sigma && sigma < 1 && max_iter > 0,
|
|
"\tdouble line_search()"
|
|
<< "\n\tYou have given invalid arguments to this function"
|
|
<< "\n\t sigma: " << sigma
|
|
<< "\n\t rho: " << rho
|
|
<< "\n\t max_iter: " << max_iter
|
|
);
|
|
|
|
// The bracketing phase of this function is implemented according to block 2.6.2 from
|
|
// the book Practical Methods of Optimization by R. Fletcher. The sectioning
|
|
// phase is an implementation of 2.6.4 from the same book.
|
|
|
|
// 1 <= tau1a < tau1b. Controls the alpha jump size during the bracketing phase of
|
|
// the search.
|
|
const double tau1a = 1.4;
|
|
const double tau1b = 9;
|
|
|
|
// it must be the case that 0 < tau2 < tau3 <= 1/2 for the algorithm to function
|
|
// correctly but the specific values of tau2 and tau3 aren't super important.
|
|
const double tau2 = 1.0/10.0;
|
|
const double tau3 = 1.0/2.0;
|
|
|
|
|
|
// Stop right away and return a step size of 0 if the gradient is 0 at the starting point
|
|
if (std::abs(d0) <= std::abs(f0)*std::numeric_limits<double>::epsilon())
|
|
return 0;
|
|
|
|
// Stop right away if the current value is good enough according to min_f
|
|
if (f0 <= min_f)
|
|
return 0;
|
|
|
|
// Figure out a reasonable upper bound on how large alpha can get.
|
|
const double mu = (min_f-f0)/(rho*d0);
|
|
|
|
|
|
double alpha = 1;
|
|
if (mu < 0)
|
|
alpha = -alpha;
|
|
alpha = put_in_range(0, 0.65*mu, alpha);
|
|
|
|
|
|
double last_alpha = 0;
|
|
double last_val = f0;
|
|
double last_val_der = d0;
|
|
|
|
// The bracketing stage will find a range of points [a,b]
|
|
// that contains a reasonable solution to the line search
|
|
double a, b;
|
|
|
|
// These variables will hold the values and derivatives of f(a) and f(b)
|
|
double a_val, b_val, a_val_der, b_val_der;
|
|
|
|
// This thresh value represents the Wolfe curvature condition
|
|
const double thresh = std::abs(sigma*d0);
|
|
|
|
unsigned long itr = 0;
|
|
// do the bracketing stage to find the bracket range [a,b]
|
|
while (true)
|
|
{
|
|
++itr;
|
|
const double val = f(alpha);
|
|
const double val_der = der(alpha);
|
|
|
|
// we are done with the line search since we found a value smaller
|
|
// than the minimum f value
|
|
if (val <= min_f)
|
|
return alpha;
|
|
|
|
if (val > f0 + rho*alpha*d0 || val >= last_val)
|
|
{
|
|
a_val = last_val;
|
|
a_val_der = last_val_der;
|
|
b_val = val;
|
|
b_val_der = val_der;
|
|
|
|
a = last_alpha;
|
|
b = alpha;
|
|
break;
|
|
}
|
|
|
|
if (std::abs(val_der) <= thresh)
|
|
return alpha;
|
|
|
|
// if we are stuck not making progress then quit with the current alpha
|
|
if (last_alpha == alpha || itr >= max_iter)
|
|
return alpha;
|
|
|
|
if (val_der >= 0)
|
|
{
|
|
a_val = val;
|
|
a_val_der = val_der;
|
|
b_val = last_val;
|
|
b_val_der = last_val_der;
|
|
|
|
a = alpha;
|
|
b = last_alpha;
|
|
break;
|
|
}
|
|
|
|
|
|
|
|
const double temp = alpha;
|
|
// Pick a larger range [first, last]. We will pick the next alpha in that
|
|
// range.
|
|
double first, last;
|
|
if (mu > 0)
|
|
{
|
|
first = std::min(mu, alpha + tau1a*(alpha - last_alpha));
|
|
last = std::min(mu, alpha + tau1b*(alpha - last_alpha));
|
|
}
|
|
else
|
|
{
|
|
first = std::max(mu, alpha + tau1a*(alpha - last_alpha));
|
|
last = std::max(mu, alpha + tau1b*(alpha - last_alpha));
|
|
}
|
|
|
|
|
|
|
|
// pick a point between first and last by doing some kind of interpolation
|
|
if (last_alpha < alpha)
|
|
alpha = last_alpha + (alpha-last_alpha)*poly_min_extrap(last_val, last_val_der, val, val_der, 1e10);
|
|
else
|
|
alpha = alpha + (last_alpha-alpha)*poly_min_extrap(val, val_der, last_val, last_val_der, 1e10);
|
|
|
|
alpha = put_in_range(first,last,alpha);
|
|
|
|
last_alpha = temp;
|
|
|
|
last_val = val;
|
|
last_val_der = val_der;
|
|
|
|
}
|
|
|
|
|
|
// Now do the sectioning phase from 2.6.4
|
|
while (true)
|
|
{
|
|
++itr;
|
|
double first = a + tau2*(b-a);
|
|
double last = b - tau3*(b-a);
|
|
|
|
// use interpolation to pick alpha between first and last
|
|
alpha = a + (b-a)*poly_min_extrap(a_val, a_val_der, b_val, b_val_der);
|
|
alpha = put_in_range(first,last,alpha);
|
|
|
|
const double val = f(alpha);
|
|
const double val_der = der(alpha);
|
|
|
|
// we are done with the line search since we found a value smaller
|
|
// than the minimum f value or we ran out of iterations.
|
|
if (val <= min_f || itr >= max_iter)
|
|
return alpha;
|
|
|
|
// stop if the interval gets so small that it isn't shrinking any more due to rounding error
|
|
if (a == first || b == last)
|
|
{
|
|
return b;
|
|
}
|
|
|
|
// If alpha has basically become zero then just stop. Think of it like this,
|
|
// if we take the largest possible alpha step will the objective function
|
|
// change at all? If not then there isn't any point looking for a better
|
|
// alpha.
|
|
const double max_possible_alpha = std::max(std::abs(a),std::abs(b));
|
|
if (std::abs(max_possible_alpha*d0) <= std::abs(f0)*std::numeric_limits<double>::epsilon())
|
|
return alpha;
|
|
|
|
|
|
if (val > f0 + rho*alpha*d0 || val >= a_val)
|
|
{
|
|
b = alpha;
|
|
b_val = val;
|
|
b_val_der = val_der;
|
|
}
|
|
else
|
|
{
|
|
if (std::abs(val_der) <= thresh)
|
|
return alpha;
|
|
|
|
if ( (b-a)*val_der >= 0)
|
|
{
|
|
b = a;
|
|
b_val = a_val;
|
|
b_val_der = a_val_der;
|
|
}
|
|
|
|
a = alpha;
|
|
a_val = val;
|
|
a_val_der = val_der;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename funct
|
|
>
|
|
double backtracking_line_search (
|
|
const funct& f,
|
|
double f0,
|
|
double d0,
|
|
double alpha,
|
|
double rho,
|
|
unsigned long max_iter
|
|
)
|
|
{
|
|
DLIB_ASSERT (
|
|
0 < rho && rho < 1 && max_iter > 0,
|
|
"\tdouble backtracking_line_search()"
|
|
<< "\n\tYou have given invalid arguments to this function"
|
|
<< "\n\t rho: " << rho
|
|
<< "\n\t max_iter: " << max_iter
|
|
);
|
|
|
|
// make sure alpha is going in the right direction. That is, it should be opposite
|
|
// the direction of the gradient.
|
|
if ((d0 > 0 && alpha > 0) ||
|
|
(d0 < 0 && alpha < 0))
|
|
{
|
|
alpha *= -1;
|
|
}
|
|
|
|
bool have_prev_alpha = false;
|
|
double prev_alpha = 0;
|
|
double prev_val = 0;
|
|
unsigned long iter = 0;
|
|
while (true)
|
|
{
|
|
++iter;
|
|
const double val = f(alpha);
|
|
if (val <= f0 + alpha*rho*d0 || iter >= max_iter)
|
|
{
|
|
return alpha;
|
|
}
|
|
else
|
|
{
|
|
// Interpolate a new alpha. We also make sure the step by which we
|
|
// reduce alpha is not super small.
|
|
double step;
|
|
if (!have_prev_alpha)
|
|
{
|
|
if (d0 < 0)
|
|
step = alpha*put_in_range(0.1,0.9, poly_min_extrap(f0, d0, val));
|
|
else
|
|
step = alpha*put_in_range(0.1,0.9, poly_min_extrap(f0, -d0, val));
|
|
have_prev_alpha = true;
|
|
}
|
|
else
|
|
{
|
|
if (d0 < 0)
|
|
step = put_in_range(0.1*alpha,0.9*alpha, poly_min_extrap(f0, d0, alpha, val, prev_alpha, prev_val));
|
|
else
|
|
step = put_in_range(0.1*alpha,0.9*alpha, -poly_min_extrap(f0, -d0, -alpha, val, -prev_alpha, prev_val));
|
|
}
|
|
|
|
prev_alpha = alpha;
|
|
prev_val = val;
|
|
|
|
alpha = step;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class optimize_single_variable_failure : public error {
|
|
public: optimize_single_variable_failure(const std::string& s):error(s){}
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename funct>
|
|
double find_min_single_variable (
|
|
const funct& f,
|
|
double& starting_point,
|
|
const double begin = -1e200,
|
|
const double end = 1e200,
|
|
const double eps = 1e-3,
|
|
const long max_iter = 100,
|
|
const double initial_search_radius = 1
|
|
)
|
|
{
|
|
DLIB_CASSERT( eps > 0 &&
|
|
max_iter > 1 &&
|
|
begin <= starting_point && starting_point <= end &&
|
|
initial_search_radius > 0,
|
|
"eps: " << eps
|
|
<< "\n max_iter: "<< max_iter
|
|
<< "\n begin: "<< begin
|
|
<< "\n end: "<< end
|
|
<< "\n starting_point: "<< starting_point
|
|
<< "\n initial_search_radius: "<< initial_search_radius
|
|
);
|
|
|
|
double search_radius = initial_search_radius;
|
|
|
|
double p1=0, p2=0, p3=0, f1=0, f2=0, f3=0;
|
|
long f_evals = 1;
|
|
|
|
if (begin == end)
|
|
{
|
|
return f(starting_point);
|
|
}
|
|
|
|
using std::abs;
|
|
using std::min;
|
|
using std::max;
|
|
|
|
// find three bracketing points such that f1 > f2 < f3. Do this by generating a sequence
|
|
// of points expanding away from 0. Also note that, in the following code, it is always the
|
|
// case that p1 < p2 < p3.
|
|
|
|
|
|
|
|
// The first thing we do is get a starting set of 3 points that are inside the [begin,end] bounds
|
|
p1 = max(starting_point-search_radius, begin);
|
|
p3 = min(starting_point+search_radius, end);
|
|
f1 = f(p1);
|
|
f3 = f(p3);
|
|
|
|
if (starting_point == p1 || starting_point == p3)
|
|
{
|
|
p2 = (p1+p3)/2;
|
|
f2 = f(p2);
|
|
}
|
|
else
|
|
{
|
|
p2 = starting_point;
|
|
f2 = f(starting_point);
|
|
}
|
|
|
|
f_evals += 2;
|
|
|
|
// Now we have 3 points on the function. Start looking for a bracketing set such that
|
|
// f1 > f2 < f3 is the case.
|
|
while ( !(f1 > f2 && f2 < f3))
|
|
{
|
|
// check for hitting max_iter or if the interval is now too small
|
|
if (f_evals >= max_iter)
|
|
{
|
|
throw optimize_single_variable_failure(
|
|
"The max number of iterations of single variable optimization have been reached\n"
|
|
"without converging.");
|
|
}
|
|
if (p3-p1 < eps)
|
|
{
|
|
if (f1 < min(f2,f3))
|
|
{
|
|
starting_point = p1;
|
|
return f1;
|
|
}
|
|
|
|
if (f2 < min(f1,f3))
|
|
{
|
|
starting_point = p2;
|
|
return f2;
|
|
}
|
|
|
|
starting_point = p3;
|
|
return f3;
|
|
}
|
|
|
|
// If the left most points are identical in function value then expand out the
|
|
// left a bit, unless it's already at bound or we would drop that left most
|
|
// point anyway because it's bad.
|
|
if (f1==f2 && f1<f3 && p1!=begin)
|
|
{
|
|
p1 = max(p1 - search_radius, begin);
|
|
f1 = f(p1);
|
|
++f_evals;
|
|
search_radius *= 2;
|
|
continue;
|
|
}
|
|
if (f2==f3 && f3<f1 && p3!=end)
|
|
{
|
|
p3 = min(p3 + search_radius, end);
|
|
f3 = f(p3);
|
|
++f_evals;
|
|
search_radius *= 2;
|
|
continue;
|
|
}
|
|
|
|
|
|
// if f1 is small then take a step to the left
|
|
if (f1 <= f3)
|
|
{
|
|
// check if the minimum is butting up against the bounds and if so then pick
|
|
// a point between p1 and p2 in the hopes that shrinking the interval will
|
|
// be a good thing to do. Or if p1 and p2 aren't differentiated then try and
|
|
// get them to obtain different values.
|
|
if (p1 == begin || (f1 == f2 && (end-begin) < search_radius ))
|
|
{
|
|
p3 = p2;
|
|
f3 = f2;
|
|
|
|
p2 = (p1+p2)/2.0;
|
|
f2 = f(p2);
|
|
}
|
|
else
|
|
{
|
|
// pick a new point to the left of our current bracket
|
|
p3 = p2;
|
|
f3 = f2;
|
|
|
|
p2 = p1;
|
|
f2 = f1;
|
|
|
|
p1 = max(p1 - search_radius, begin);
|
|
f1 = f(p1);
|
|
|
|
search_radius *= 2;
|
|
}
|
|
|
|
}
|
|
// otherwise f3 is small and we should take a step to the right
|
|
else
|
|
{
|
|
// check if the minimum is butting up against the bounds and if so then pick
|
|
// a point between p2 and p3 in the hopes that shrinking the interval will
|
|
// be a good thing to do. Or if p2 and p3 aren't differentiated then try and
|
|
// get them to obtain different values.
|
|
if (p3 == end || (f2 == f3 && (end-begin) < search_radius))
|
|
{
|
|
p1 = p2;
|
|
f1 = f2;
|
|
|
|
p2 = (p3+p2)/2.0;
|
|
f2 = f(p2);
|
|
}
|
|
else
|
|
{
|
|
// pick a new point to the right of our current bracket
|
|
p1 = p2;
|
|
f1 = f2;
|
|
|
|
p2 = p3;
|
|
f2 = f3;
|
|
|
|
p3 = min(p3 + search_radius, end);
|
|
f3 = f(p3);
|
|
|
|
search_radius *= 2;
|
|
}
|
|
}
|
|
|
|
++f_evals;
|
|
}
|
|
|
|
|
|
// Loop until we have done the max allowable number of iterations or
|
|
// the bracketing window is smaller than eps.
|
|
// Within this loop we maintain the invariant that: f1 > f2 < f3 and p1 < p2 < p3
|
|
const double tau = 0.1;
|
|
while( f_evals < max_iter && p3-p1 > eps)
|
|
{
|
|
double p_min = lagrange_poly_min_extrap(p1,p2,p3, f1,f2,f3);
|
|
|
|
|
|
// make sure p_min isn't too close to the three points we already have
|
|
if (p_min < p2)
|
|
{
|
|
const double min_dist = (p2-p1)*tau;
|
|
if (abs(p1-p_min) < min_dist)
|
|
{
|
|
p_min = p1 + min_dist;
|
|
}
|
|
else if (abs(p2-p_min) < min_dist)
|
|
{
|
|
p_min = p2 - min_dist;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const double min_dist = (p3-p2)*tau;
|
|
if (abs(p2-p_min) < min_dist)
|
|
{
|
|
p_min = p2 + min_dist;
|
|
}
|
|
else if (abs(p3-p_min) < min_dist)
|
|
{
|
|
p_min = p3 - min_dist;
|
|
}
|
|
}
|
|
|
|
// make sure one side of the bracket isn't super huge compared to the other
|
|
// side. If it is then contract it.
|
|
const double bracket_ratio = abs(p1-p2)/abs(p2-p3);
|
|
// Force p_min to be on a reasonable side. But only if lagrange_poly_min_extrap()
|
|
// didn't put it on a good side already.
|
|
if (bracket_ratio >= 10)
|
|
{
|
|
if (p_min > p2)
|
|
p_min = (p1+p2)/2;
|
|
}
|
|
else if (bracket_ratio <= 0.1)
|
|
{
|
|
if (p_min < p2)
|
|
p_min = (p2+p3)/2;
|
|
}
|
|
|
|
|
|
const double f_min = f(p_min);
|
|
|
|
|
|
// Remove one of the endpoints of our bracket depending on where the new point falls.
|
|
if (p_min < p2)
|
|
{
|
|
if (f1 > f_min && f_min < f2)
|
|
{
|
|
p3 = p2;
|
|
f3 = f2;
|
|
p2 = p_min;
|
|
f2 = f_min;
|
|
}
|
|
else
|
|
{
|
|
p1 = p_min;
|
|
f1 = f_min;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (f2 > f_min && f_min < f3)
|
|
{
|
|
p1 = p2;
|
|
f1 = f2;
|
|
p2 = p_min;
|
|
f2 = f_min;
|
|
}
|
|
else
|
|
{
|
|
p3 = p_min;
|
|
f3 = f_min;
|
|
}
|
|
}
|
|
|
|
|
|
++f_evals;
|
|
}
|
|
|
|
if (f_evals >= max_iter)
|
|
{
|
|
throw optimize_single_variable_failure(
|
|
"The max number of iterations of single variable optimization have been reached\n"
|
|
"without converging.");
|
|
}
|
|
|
|
starting_point = p2;
|
|
return f2;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename funct>
|
|
class negate_function_object
|
|
{
|
|
public:
|
|
negate_function_object(const funct& f_) : f(f_){}
|
|
|
|
template <typename T>
|
|
double operator()(const T& x) const
|
|
{
|
|
return -f(x);
|
|
}
|
|
|
|
private:
|
|
const funct& f;
|
|
};
|
|
|
|
template <typename funct>
|
|
const negate_function_object<funct> negate_function(const funct& f) { return negate_function_object<funct>(f); }
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename funct>
|
|
double find_max_single_variable (
|
|
const funct& f,
|
|
double& starting_point,
|
|
const double begin = -1e200,
|
|
const double end = 1e200,
|
|
const double eps = 1e-3,
|
|
const long max_iter = 100,
|
|
const double initial_search_radius = 1
|
|
)
|
|
{
|
|
return -find_min_single_variable(negate_function(f), starting_point, begin, end, eps, max_iter, initial_search_radius);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_OPTIMIZATIOn_LINE_SEARCH_H_
|
|
|