358 lines
14 KiB
C++
358 lines
14 KiB
C++
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#undef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_
|
|
#ifdef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_
|
|
|
|
|
|
#include "structural_svm_problem_abstract.h"
|
|
#include "../optimization/optimization_oca_abstract.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class svm_struct_processing_node : noncopyable
|
|
{
|
|
/*!
|
|
WHAT THIS OBJECT REPRESENTS
|
|
This object is a tool for distributing the work involved in solving
|
|
a dlib::structural_svm_problem across many computers. It is used in
|
|
conjunction with the svm_struct_controller_node defined below.
|
|
!*/
|
|
|
|
public:
|
|
|
|
template <
|
|
typename T,
|
|
typename U
|
|
>
|
|
svm_struct_processing_node (
|
|
const structural_svm_problem<T,U>& problem,
|
|
unsigned short port,
|
|
unsigned short num_threads
|
|
);
|
|
/*!
|
|
requires
|
|
- port != 0
|
|
- problem.get_num_samples() != 0
|
|
- problem.get_num_dimensions() != 0
|
|
ensures
|
|
- This object will listen on the given port for a TCP connection from a
|
|
svm_struct_controller_node. Once connected, the controller node will
|
|
be able to access the given problem.
|
|
- Will use num_threads threads at a time to make concurrent calls to the
|
|
problem.separation_oracle() routine. You should set this parameter equal
|
|
to the number of available processing cores.
|
|
- Note that the following parameters within the given problem are ignored:
|
|
- problem.get_c()
|
|
- problem.get_epsilon()
|
|
- problem.get_cache_based_epsilon()
|
|
- problem.num_nuclear_norm_regularizers()
|
|
- weather the problem is verbose or not
|
|
Instead, they are defined by the svm_struct_controller_node. Note, however,
|
|
that the problem.get_max_cache_size() parameter is meaningful and controls
|
|
the size of the separation oracle cache within a svm_struct_processing_node.
|
|
!*/
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class svm_struct_controller_node : noncopyable
|
|
{
|
|
/*!
|
|
INITIAL VALUE
|
|
- get_num_processing_nodes() == 0
|
|
- get_epsilon() == 0.001
|
|
- get_max_iterations() == 10000
|
|
- get_c() == 1
|
|
- This object will not be verbose
|
|
|
|
WHAT THIS OBJECT REPRESENTS
|
|
This object is a tool for distributing the work involved in solving a
|
|
dlib::structural_svm_problem across many computers. The best way to understand
|
|
its use is via example:
|
|
|
|
First, suppose you have defined a structural_svm_problem object by inheriting from
|
|
it and defining the appropriate virtual functions. You could solve it by passing
|
|
an instance to the oca optimizer. However, if your separation oracle takes a long
|
|
time to evaluate then the optimization will take a long time to solve. To speed
|
|
this up we can distribute the calls to the separation oracle across many computers.
|
|
|
|
To make this concrete, lets imagine you want to distribute the work across three
|
|
computers. You can accomplish this by creating four programs. One containing a
|
|
svm_struct_controller_node and three containing svm_struct_processing_nodes.
|
|
|
|
The programs might look like this:
|
|
|
|
Controller program:
|
|
int main()
|
|
{
|
|
svm_struct_controller_node cont;
|
|
cont.set_c(100);
|
|
// Tell cont where the processing nodes are on your network.
|
|
cont.add_processing_node("192.168.1.10:12345");
|
|
cont.add_processing_node("192.168.1.11:12345");
|
|
cont.add_processing_node("192.168.1.12:12345");
|
|
matrix<double> w;
|
|
oca solver;
|
|
cont(solver, w); // Run the optimization.
|
|
// After this finishes w will contain the solution vector.
|
|
}
|
|
|
|
Processing programs (they are all the same, except that each loads a different subset
|
|
of the training data):
|
|
int main()
|
|
{
|
|
// Put one third of your data into this problem object. How you do this depends on your problem.
|
|
your_structural_svm_problem problem;
|
|
svm_struct_processing_node node(problem, 12345, number_of_cores_on_this_computer);
|
|
cout << "hit enter to terminate this program" << endl;
|
|
cin.get();
|
|
}
|
|
|
|
!*/
|
|
|
|
public:
|
|
|
|
svm_struct_controller_node (
|
|
);
|
|
/*!
|
|
ensures
|
|
- this object is properly initialized
|
|
!*/
|
|
|
|
void set_epsilon (
|
|
double eps
|
|
);
|
|
/*!
|
|
requires
|
|
- eps > 0
|
|
ensures
|
|
- #get_epsilon() == eps
|
|
!*/
|
|
|
|
double get_epsilon (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the error epsilon that determines when training should stop.
|
|
Smaller values may result in a more accurate solution but take longer
|
|
to execute. Specifically, the algorithm stops when the average sample
|
|
risk (i.e. R(w) as defined by the dlib::structural_svm_problem object) is
|
|
within epsilon of its optimal value.
|
|
|
|
Also note that sample risk is an upper bound on a sample's loss. So
|
|
you can think of this epsilon value as saying "solve the optimization
|
|
problem until the average loss per sample is within epsilon of its
|
|
optimal value".
|
|
!*/
|
|
|
|
double get_cache_based_epsilon (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- if (get_max_cache_size() != 0) then
|
|
- The solver will not stop when the average sample risk is within
|
|
get_epsilon() of its optimal value. Instead, it will keep running
|
|
but will run the optimizer completely on the cache until the average
|
|
sample risk is within #get_cache_based_epsilon() of its optimal
|
|
value. This means that it will perform this additional refinement in
|
|
the solution accuracy without making any additional calls to the
|
|
separation_oracle(). This is useful when using a nuclear norm
|
|
regularization term because it allows you to quickly solve the
|
|
optimization problem to a high precision, which in the case of a
|
|
nuclear norm regularized problem means that many of the learned
|
|
matrices will be low rank or very close to low rank due to the
|
|
nuclear norm regularizer. This may not happen without solving the
|
|
problem to a high accuracy or their ranks may be difficult to
|
|
determine, so the extra accuracy given by the cache based refinement
|
|
is very useful. Finally, note that we include the nuclear norm term
|
|
as part of the "risk" for the purposes of determining when to stop.
|
|
- else
|
|
- The value of #get_cache_based_epsilon() has no effect.
|
|
!*/
|
|
|
|
void set_cache_based_epsilon (
|
|
double eps
|
|
);
|
|
/*!
|
|
requires
|
|
- eps > 0
|
|
ensures
|
|
- #get_cache_based_epsilon() == eps
|
|
!*/
|
|
|
|
void set_max_iterations (
|
|
unsigned long max_iter
|
|
);
|
|
/*!
|
|
ensures
|
|
- #get_max_iterations() == max_iter
|
|
!*/
|
|
|
|
unsigned long get_max_iterations (
|
|
);
|
|
/*!
|
|
ensures
|
|
- returns the maximum number of iterations the SVM optimizer is allowed to
|
|
run before it is required to stop and return a result.
|
|
!*/
|
|
|
|
void add_nuclear_norm_regularizer (
|
|
long first_dimension,
|
|
long rows,
|
|
long cols,
|
|
double regularization_strength
|
|
);
|
|
/*!
|
|
requires
|
|
- 0 <= first_dimension < number of dimensions in problem
|
|
- 0 <= rows
|
|
- 0 <= cols
|
|
- first_dimension+rows*cols <= number of dimensions in problem
|
|
- 0 < regularization_strength
|
|
ensures
|
|
- Adds a nuclear norm regularization term to the optimization problem
|
|
solved by this object. That is, instead of solving:
|
|
Minimize: h(w) == 0.5*dot(w,w) + C*R(w)
|
|
this object will solve:
|
|
Minimize: h(w) == 0.5*dot(w,w) + C*R(w) + regularization_strength*nuclear_norm_of(part of w)
|
|
where "part of w" is the part of w indicated by the arguments to this
|
|
function. In particular, the part of w included in the nuclear norm is
|
|
exactly the matrix reshape(rowm(w, range(first_dimension, first_dimension+rows*cols-1)), rows, cols).
|
|
Therefore, if you think of the w vector as being the concatenation of a
|
|
bunch of matrices then you can use multiple calls to add_nuclear_norm_regularizer()
|
|
to add nuclear norm regularization terms to any of the matrices packed into w.
|
|
- #num_nuclear_norm_regularizers() == num_nuclear_norm_regularizers() + 1
|
|
!*/
|
|
|
|
unsigned long num_nuclear_norm_regularizers (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the number of nuclear norm regularizers that are currently a part
|
|
of this optimization problem. That is, returns the number of times
|
|
add_nuclear_norm_regularizer() has been called since the last call to
|
|
clear_nuclear_norm_regularizers() or object construction, whichever is
|
|
most recent.
|
|
!*/
|
|
|
|
void clear_nuclear_norm_regularizers (
|
|
);
|
|
/*!
|
|
ensures
|
|
- #num_nuclear_norm_regularizers() == 0
|
|
!*/
|
|
|
|
void be_verbose (
|
|
);
|
|
/*!
|
|
ensures
|
|
- This object will print status messages to standard out so that a
|
|
user can observe the progress of the algorithm.
|
|
!*/
|
|
|
|
void be_quiet(
|
|
);
|
|
/*!
|
|
ensures
|
|
- this object will not print anything to standard out
|
|
!*/
|
|
|
|
double get_c (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the SVM regularization parameter. It is the parameter that
|
|
determines the trade off between trying to fit the training data
|
|
exactly or allowing more errors but hopefully improving the
|
|
generalization of the resulting classifier. Larger values encourage
|
|
exact fitting while smaller values of C may encourage better
|
|
generalization.
|
|
!*/
|
|
|
|
void set_c (
|
|
double C
|
|
);
|
|
/*!
|
|
requires
|
|
- C > 0
|
|
ensures
|
|
- #get_c() == C
|
|
!*/
|
|
|
|
void add_processing_node (
|
|
const network_address& addr
|
|
);
|
|
/*!
|
|
requires
|
|
- addr.port != 0
|
|
ensures
|
|
- if (this address hasn't already been added) then
|
|
- #get_num_processing_nodes() == get_num_processing_nodes() + 1
|
|
- When operator() is invoked to solve the structural svm problem this
|
|
object will connect to the svm_struct_processing_node located at the
|
|
given network address and will include it in the distributed
|
|
optimization.
|
|
!*/
|
|
|
|
void add_processing_node (
|
|
const std::string& ip_or_hostname,
|
|
unsigned short port
|
|
);
|
|
/*!
|
|
requires
|
|
- port != 0
|
|
ensures
|
|
- invokes: add_processing_node(network_address(ip_or_hostname, port))
|
|
!*/
|
|
|
|
unsigned long get_num_processing_nodes (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the number of remote processing nodes that have been
|
|
registered with this object.
|
|
!*/
|
|
|
|
void remove_processing_nodes (
|
|
);
|
|
/*!
|
|
ensures
|
|
- #get_num_processing_nodes() == 0
|
|
!*/
|
|
|
|
class invalid_problem : public error {};
|
|
|
|
template <typename matrix_type>
|
|
double operator() (
|
|
const oca& solver,
|
|
matrix_type& w
|
|
) const;
|
|
/*!
|
|
requires
|
|
- get_num_processing_nodes() != 0
|
|
- matrix_type == a dlib::matrix capable of storing column vectors
|
|
ensures
|
|
- connects to the processing nodes and begins optimizing the structural
|
|
svm problem using the given oca solver.
|
|
- stores the solution in #w
|
|
- returns the objective value at the solution #w
|
|
throws
|
|
- invalid_problem
|
|
This exception is thrown if the svm_struct_processing_nodes disagree
|
|
on the dimensionality of the problem. That is, if they disagree on
|
|
the value of structural_svm_problem::get_num_dimensions().
|
|
!*/
|
|
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_
|
|
|
|
|