362 lines
13 KiB
C++
362 lines
13 KiB
C++
// Copyright (C) 2008 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#undef DLIB_OPTIMIZATIOn_ABSTRACT_
|
|
#ifdef DLIB_OPTIMIZATIOn_ABSTRACT_
|
|
|
|
#include <cmath>
|
|
#include <limits>
|
|
#include "../matrix/matrix_abstract.h"
|
|
#include "../algs.h"
|
|
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename funct,
|
|
typename T
|
|
>
|
|
class line_search_funct;
|
|
/*!
|
|
This object is a function object that represents a line search function.
|
|
|
|
Moreover, it represents a function with the signature:
|
|
double l(double x)
|
|
!*/
|
|
|
|
template <
|
|
typename funct,
|
|
typename T
|
|
>
|
|
const line_search_funct<funct,T> make_line_search_function (
|
|
const funct& f,
|
|
const T& start,
|
|
const T& direction
|
|
);
|
|
/*!
|
|
requires
|
|
- is_col_vector(start) && is_col_vector(direction) && start.size() == direction.size()
|
|
(i.e. start and direction should be column vectors of the same size)
|
|
- f must return either a double or a column vector the same length as start
|
|
- f(start + 1.5*direction) should be a valid expression
|
|
ensures
|
|
- if (f returns a double) then
|
|
- returns a line search function that computes l(x) == f(start + x*direction)
|
|
- else
|
|
- returns a line search function that computes l(x) == dot(f(start + x*direction),direction).
|
|
That is, we assume f is the derivative of some other function and that what
|
|
f returns is a gradient vector.
|
|
So the following two expressions both create the derivative of l(x):
|
|
- derivative(make_line_search_function(funct,start,direction))
|
|
- make_line_search_function(derivative(funct),start,direction)
|
|
!*/
|
|
|
|
template <
|
|
typename funct,
|
|
typename T
|
|
>
|
|
const line_search_funct<funct,T> make_line_search_function (
|
|
const funct& f,
|
|
const T& start,
|
|
const T& direction,
|
|
double& f_out
|
|
);
|
|
/*!
|
|
This function is identical to the above three argument version of make_line_search_function()
|
|
except that, if f() outputs a double, every time f() is evaluated its output is also stored
|
|
into f_out.
|
|
!*/
|
|
|
|
template <
|
|
typename funct,
|
|
typename T
|
|
>
|
|
const line_search_funct<funct,T> make_line_search_function (
|
|
const funct& f,
|
|
const T& start,
|
|
const T& direction,
|
|
T& gradient_out
|
|
);
|
|
/*!
|
|
This function is identical to the above three argument version of make_line_search_function()
|
|
except that, if f() outputs a column vector, every time f() is evaluated its output is also
|
|
stored into gradient_out.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline double poly_min_extrap (
|
|
double f0,
|
|
double d0,
|
|
double f1,
|
|
double d1,
|
|
double limit = 1
|
|
);
|
|
/*!
|
|
ensures
|
|
- let c(x) be a 3rd degree polynomial such that:
|
|
- c(0) == f0
|
|
- c(1) == f1
|
|
- derivative of c(x) at x==0 is d0
|
|
- derivative of c(x) at x==1 is d1
|
|
- returns the point in the range [0,limit] that minimizes the polynomial c(x)
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline double poly_min_extrap (
|
|
double f0,
|
|
double d0,
|
|
double f1
|
|
);
|
|
/*!
|
|
ensures
|
|
- let c(x) be a 2nd degree polynomial such that:
|
|
- c(0) == f0
|
|
- c(1) == f1
|
|
- derivative of c(x) at x==0 is d0
|
|
- returns the point in the range [0,1] that minimizes the polynomial c(x)
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline double poly_min_extrap (
|
|
double f0,
|
|
double d0,
|
|
double x1,
|
|
double f_x1,
|
|
double x2,
|
|
double f_x2
|
|
)
|
|
/*!
|
|
requires
|
|
- 0 < x1 < x2
|
|
ensures
|
|
- let f(x) be a 3rd degree polynomial such that:
|
|
- f(0) == f0
|
|
- derivative of f(x) at x==0 is d0
|
|
- f(x1) == f_x1
|
|
- f(x2) == f_x2
|
|
- returns the point in the range [0,x2] that minimizes the polynomial f(x)
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline double lagrange_poly_min_extrap (
|
|
double p1,
|
|
double p2,
|
|
double p3,
|
|
double f1,
|
|
double f2,
|
|
double f3
|
|
);
|
|
/*!
|
|
requires
|
|
- f1 >= f2 <= f3
|
|
- p1 < p2 < p3
|
|
ensures
|
|
- let c(x) be the second order Lagrange polynomial that interpolates the
|
|
points p1, p2, and p3 where c(p1)==f1, c(p2)==f2, and c(p3)==f3
|
|
- this function returns the point in the range [p1,p3] that minimizes
|
|
the polynomial c(x)
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename funct,
|
|
typename funct_der
|
|
>
|
|
double line_search (
|
|
const funct& f,
|
|
const double f0,
|
|
const funct_der& der,
|
|
const double d0,
|
|
double rho,
|
|
double sigma,
|
|
double min_f,
|
|
unsigned long max_iter
|
|
)
|
|
/*!
|
|
requires
|
|
- 0 < rho < sigma < 1
|
|
- f and der are scalar functions of scalars
|
|
(e.g. line_search_funct objects)
|
|
- der is the derivative of f
|
|
- f0 == f(0)
|
|
- d0 == der(0)
|
|
- max_iter > 0
|
|
ensures
|
|
- Performs a line search and uses the strong Wolfe conditions to decide when
|
|
the search can stop.
|
|
- rho == the parameter of the Wolfe sufficient decrease condition
|
|
- sigma == the parameter of the Wolfe curvature condition
|
|
- max_iter == the maximum number of iterations allowable. After this
|
|
many evaluations of f() line_search() is guaranteed to terminate.
|
|
- returns a value alpha such that f(alpha) is significantly closer to
|
|
the minimum of f than f(0).
|
|
- It is assumed that the minimum possible value of f(x) is min_f. So if
|
|
an alpha is found such that f(alpha) <= min_f then the search stops
|
|
immediately.
|
|
- This function is also optimized for the case where der(0) is negative. I.e.
|
|
positive values of the argument to f() should be in a descent direction.
|
|
- When this function makes calls to f() and der() it always does so by
|
|
first calling f() and then calling der(). That is, these two functions
|
|
are always called in pairs with f() being called first and then der()
|
|
being called second.
|
|
!*/
|
|
|
|
/*
|
|
A good discussion of the Wolfe conditions and line search algorithms in
|
|
general can be found in the book Practical Methods of Optimization by R. Fletcher
|
|
and also in the more recent book Numerical Optimization by Nocedal and Wright.
|
|
*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename funct
|
|
>
|
|
double backtracking_line_search (
|
|
const funct& f,
|
|
double f0,
|
|
double d0,
|
|
double alpha,
|
|
double rho,
|
|
unsigned long max_iter
|
|
);
|
|
/*!
|
|
requires
|
|
- 0 < rho < 1
|
|
- f is a scalar function of scalars
|
|
(e.g. a line_search_funct object)
|
|
- f0 == f(0)
|
|
- d0 == the derivative of f() at f(0).
|
|
- max_iter > 0
|
|
ensures
|
|
- Performs a backtracking line search and uses the Armijo sufficient decrease
|
|
rule to decide when the search can stop.
|
|
- rho == the parameter of the sufficient decrease condition.
|
|
- max_iter == the maximum number of iterations allowable. After this many
|
|
evaluations of f() backtracking_line_search() is guaranteed to terminate.
|
|
- The line search starts with the input alpha value and then backtracks until
|
|
it finds a good enough alpha value. Once found, it returns the alpha value
|
|
such that f(alpha) is significantly closer to the minimum of f than f(0).
|
|
- The returned value of alpha will always be the last value of alpha which was
|
|
passed to f(). That is, it will always be the case that the last call to f()
|
|
made by backtracking_line_search() was f(alpha) where alpha is the return
|
|
value from this function.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename funct
|
|
>
|
|
const negate_function_object<funct> negate_function(
|
|
const funct& f
|
|
);
|
|
/*!
|
|
requires
|
|
- f == a function that returns a scalar
|
|
ensures
|
|
- returns a function that represents the negation of f. That is,
|
|
the returned function object represents g(x) == -f(x)
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class optimize_single_variable_failure : public error;
|
|
/*!
|
|
This is the exception class used by the functions defined below.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename funct
|
|
>
|
|
double find_min_single_variable (
|
|
const funct& f,
|
|
double& starting_point,
|
|
const double begin = -1e200,
|
|
const double end = 1e200,
|
|
const double eps = 1e-3,
|
|
const long max_iter = 100,
|
|
const double initial_search_radius = 1
|
|
)
|
|
/*!
|
|
requires
|
|
- eps > 0
|
|
- max_iter > 1
|
|
- begin <= starting_point <= end
|
|
- f must be a function of a double that returns a double
|
|
(e.g. f(starting_point) should be a valid expression that evaluates to a double)
|
|
- initial_search_radius > 0
|
|
ensures
|
|
- Finds a point P such that:
|
|
- P is a local minimum of the function f().
|
|
- begin <= P <= end
|
|
- Evaluates f() no more than max_iter times
|
|
- Stops searching when the window around the minimum point is smaller than eps.
|
|
The search will begin with the given starting_point and expand out to the
|
|
left and right by initial_search_radius sized steps. So if you think the
|
|
minimum is likely to be found within X distance from the starting_point then
|
|
set initial_search_radius to X.
|
|
- #starting_point == P
|
|
- returns f(P)
|
|
throws
|
|
- optimize_single_variable_failure
|
|
This exception is thrown if max_iter iterations are performed without
|
|
determining the min point to the requested accuracy of eps.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename funct
|
|
>
|
|
double find_max_single_variable (
|
|
const funct& f,
|
|
double& starting_point,
|
|
const double begin = -1e200,
|
|
const double end = 1e200,
|
|
const double eps = 1e-3,
|
|
const long max_iter = 100,
|
|
const double initial_search_radius = 1
|
|
)
|
|
/*!
|
|
requires
|
|
- eps > 0
|
|
- max_iter > 1
|
|
- begin <= starting_point <= end
|
|
- f must be a function of a double that returns a double
|
|
(e.g. f(starting_point) should be a valid expression that evaluates to a double)
|
|
- initial_search_radius > 0
|
|
ensures
|
|
- Finds a point P such that:
|
|
- P is a local maximum of the function f().
|
|
- begin <= P <= end
|
|
- Evaluates f() no more than max_iter times
|
|
- Stops searching when the window around the maximum point is smaller than eps.
|
|
The search will begin with the given starting_point and expand out to the
|
|
left and right by initial_search_radius sized steps. So if you think the
|
|
maximum is likely to be found within X distance from the starting_point then
|
|
set initial_search_radius to X.
|
|
- #starting_point == P
|
|
- returns f(P)
|
|
throws
|
|
- optimize_single_variable_failure
|
|
This exception is thrown if max_iter iterations are performed without
|
|
determining the max point to the requested accuracy of eps.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_OPTIMIZATIOn_ABSTRACT_
|
|
|