431 lines
20 KiB
C++
431 lines
20 KiB
C++
// Copyright (C) 2009 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#undef DLIB_EMPIRICAL_KERNEl_MAP_ABSTRACT_H_
|
|
#ifdef DLIB_EMPIRICAL_KERNEl_MAP_ABSTRACT_H_
|
|
|
|
#include <vector>
|
|
#include "../matrix.h"
|
|
#include "kernel_abstract.h"
|
|
#include "function_abstract.h"
|
|
#include "linearly_independent_subset_finder_abstract.h"
|
|
#include <vector>
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename kernel_type,
|
|
typename EXP
|
|
>
|
|
const decision_function<kernel_type> convert_to_decision_function (
|
|
const projection_function<kernel_type>& project_funct,
|
|
const matrix_exp<EXP>& vect
|
|
);
|
|
/*!
|
|
requires
|
|
- is_vector(vect) == true
|
|
- vect.size() == project_funct.out_vector_size()
|
|
- project_funct.out_vector_size() > 0
|
|
- project_funct.weights.nc() == project_funct.basis_vectors.size()
|
|
ensures
|
|
- This function interprets the given vector as a point in the kernel feature space defined
|
|
by the given projection function. The return value of this function is a decision
|
|
function, DF, that represents the given vector in the following sense:
|
|
- for all possible sample_type objects, S, it is the case that DF(S) == dot(project_funct(S), vect)
|
|
(i.e. the returned decision function computes dot products, in kernel feature space,
|
|
between vect and any argument you give it. Note also that this equality is exact, even
|
|
for sample_type objects not in the span of the basis_vectors.)
|
|
- DF.kernel_function == project_funct.kernel_function
|
|
- DF.b == 0
|
|
- DF.basis_vectors == project_funct.basis_vectors.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename kern_type
|
|
>
|
|
class empirical_kernel_map
|
|
{
|
|
/*!
|
|
REQUIREMENTS ON kern_type
|
|
- must be a kernel function object as defined in dlib/svm/kernel_abstract.h
|
|
|
|
INITIAL VALUE
|
|
- out_vector_size() == 0
|
|
- basis_size() == 0
|
|
|
|
WHAT THIS OBJECT REPRESENTS
|
|
This object represents a map from objects of sample_type (the kind of object
|
|
a kernel function operates on) to finite dimensional column vectors which
|
|
represent points in the kernel feature space defined by whatever kernel
|
|
is used with this object.
|
|
|
|
To use the empirical_kernel_map you supply it with a particular kernel and a set of
|
|
basis samples. After that you can present it with new samples and it will project
|
|
them into the part of kernel feature space spanned by your basis samples.
|
|
|
|
This means the empirical_kernel_map is a tool you can use to very easily kernelize
|
|
any algorithm that operates on column vectors. All you have to do is select a
|
|
set of basis samples and then use the empirical_kernel_map to project all your
|
|
data points into the part of kernel feature space spanned by those basis samples.
|
|
Then just run your normal algorithm on the output vectors and it will be effectively
|
|
kernelized.
|
|
|
|
Regarding methods to select a set of basis samples, if you are working with only a
|
|
few thousand samples then you can just use all of them as basis samples.
|
|
Alternatively, the linearly_independent_subset_finder often works well for
|
|
selecting a basis set. I also find that picking a random subset typically works
|
|
well.
|
|
|
|
|
|
The empirical kernel map is something that has been around in the kernel methods
|
|
literature for a long time but is seemingly not well known. Anyway, one of the
|
|
best books on the subject is the following:
|
|
Learning with Kernels: Support Vector Machines, Regularization, Optimization,
|
|
and Beyond by Bernhard Schlkopf, Alexander J. Smola
|
|
The authors discuss the empirical kernel map as well as many other interesting
|
|
topics.
|
|
!*/
|
|
|
|
public:
|
|
|
|
typedef kern_type kernel_type;
|
|
typedef typename kernel_type::sample_type sample_type;
|
|
typedef typename kernel_type::scalar_type scalar_type;
|
|
typedef typename kernel_type::mem_manager_type mem_manager_type;
|
|
|
|
struct empirical_kernel_map_error : public error;
|
|
/*!
|
|
This is an exception class used to indicate a failure to create a
|
|
kernel map from data given by the user.
|
|
!*/
|
|
|
|
empirical_kernel_map (
|
|
);
|
|
/*!
|
|
ensures
|
|
- this object is properly initialized
|
|
!*/
|
|
|
|
void clear (
|
|
);
|
|
/*!
|
|
ensures
|
|
- this object has its initial value
|
|
!*/
|
|
|
|
template <typename T>
|
|
void load(
|
|
const kernel_type& kernel,
|
|
const T& basis_samples
|
|
);
|
|
/*!
|
|
requires
|
|
- T must be a dlib::matrix type or something convertible to a matrix via mat()
|
|
(e.g. a std::vector)
|
|
- is_vector(basis_samples) == true
|
|
- basis_samples.size() > 0
|
|
- kernel must be capable of operating on the elements of basis_samples. That is,
|
|
expressions such as kernel(basis_samples(0), basis_samples(0)) should make sense.
|
|
ensures
|
|
- 0 < #out_vector_size() <= basis_samples.size()
|
|
- #basis_size() == basis_samples.size()
|
|
- #get_kernel() == kernel
|
|
- This function constructs a map between normal sample_type objects and the
|
|
subspace of the kernel feature space defined by the given kernel and the
|
|
given set of basis samples. So after this function has been called you
|
|
will be able to project sample_type objects into kernel feature space
|
|
and obtain the resulting vector as a regular column matrix.
|
|
- The basis samples are loaded into this object in the order in which they
|
|
are stored in basis_samples. That is:
|
|
- for all valid i: (*this)[i] == basis_samples(i)
|
|
throws
|
|
- empirical_kernel_map_error
|
|
This exception is thrown if we are unable to create a kernel map.
|
|
If this happens then this object will revert back to its initial value.
|
|
!*/
|
|
|
|
void load(
|
|
const linearly_independent_subset_finder<kernel_type>& lisf
|
|
);
|
|
/*!
|
|
ensures
|
|
- #out_vector_size() == lisf.dictionary_size()
|
|
- #basis_size() == lisf.dictionary_size()
|
|
- #get_kernel() == lisf.get_kernel()
|
|
- Uses the dictionary vectors from lisf as a basis set. Thus, this function
|
|
constructs a map between normal sample_type objects and the subspace of
|
|
the kernel feature space defined by the given kernel and the given set
|
|
of basis samples. So after this function has been called you will be
|
|
able to project sample_type objects into kernel feature space and obtain
|
|
the resulting vector as a regular column matrix.
|
|
- The basis samples are loaded into this object in the order in which they
|
|
are stored in lisf. That is:
|
|
- for all valid i: (*this)[i] == lisf[i]
|
|
throws
|
|
- empirical_kernel_map_error
|
|
This exception is thrown if we are unable to create a kernel map.
|
|
E.g. if the lisf.size() == 0.
|
|
If this happens then this object will revert back to its initial value.
|
|
!*/
|
|
|
|
const kernel_type get_kernel (
|
|
) const;
|
|
/*!
|
|
requires
|
|
- out_vector_size() != 0
|
|
ensures
|
|
- returns a copy of the kernel used by this object
|
|
!*/
|
|
|
|
long out_vector_size (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- if (this object has been loaded with basis samples) then
|
|
- returns the dimensionality of the vectors output by the project() function.
|
|
- else
|
|
- returns 0
|
|
!*/
|
|
|
|
unsigned long basis_size (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the number of basis vectors in projection_functions created
|
|
by this object. This is also equal to the number of basis vectors
|
|
given to the load() function.
|
|
!*/
|
|
|
|
const sample_type& operator[] (
|
|
unsigned long idx
|
|
) const;
|
|
/*!
|
|
requires
|
|
- idx < basis_size()
|
|
ensures
|
|
- returns a const reference to the idx'th basis vector contained inside
|
|
this object.
|
|
!*/
|
|
|
|
const matrix<scalar_type,0,1,mem_manager_type>& project (
|
|
const sample_type& sample
|
|
) const;
|
|
/*!
|
|
requires
|
|
- out_vector_size() != 0
|
|
ensures
|
|
- takes the given sample and projects it into the kernel feature space
|
|
of out_vector_size() dimensions defined by this kernel map and
|
|
returns the resulting vector.
|
|
- in more precise terms, this function returns a vector such that:
|
|
- The returned vector will contain out_vector_size() elements.
|
|
- for any sample_type object S, the following equality is approximately true:
|
|
- get_kernel()(sample,S) == dot(project(sample), project(S)).
|
|
- The approximation error in the above equality will be zero (within rounding error)
|
|
if both sample_type objects involved are within the span of the set of basis
|
|
samples given to the load() function. If they are not then there will be some
|
|
approximation error. Note that all the basis samples are always within their
|
|
own span. So the equality is always exact for the samples given to the load()
|
|
function.
|
|
!*/
|
|
|
|
const matrix<scalar_type,0,1,mem_manager_type>& project (
|
|
const sample_type& samp,
|
|
scalar_type& projection_error
|
|
) const;
|
|
/*!
|
|
requires
|
|
- out_vector_size() != 0
|
|
ensures
|
|
- This function returns project(samp)
|
|
(i.e. it returns the same thing as the above project() function)
|
|
- #projection_error == the square of the distance between the point samp
|
|
gets projected onto and samp's true image in kernel feature space.
|
|
That is, this value is equal to:
|
|
pow(convert_to_distance_function(project(samp))(samp),2)
|
|
!*/
|
|
|
|
template <typename EXP>
|
|
const decision_function<kernel_type> convert_to_decision_function (
|
|
const matrix_exp<EXP>& vect
|
|
) const;
|
|
/*!
|
|
requires
|
|
- is_vector(vect) == true
|
|
- vect.size() == out_vector_size()
|
|
- out_vector_size() != 0
|
|
ensures
|
|
- This function interprets the given vector as a point in the kernel feature space defined
|
|
by this empirical_kernel_map. The return value of this function is a decision
|
|
function, DF, that represents the given vector in the following sense:
|
|
- for all possible sample_type objects, S, it is the case that DF(S) == dot(project(S), vect)
|
|
(i.e. the returned decision function computes dot products, in kernel feature space,
|
|
between vect and any argument you give it. Note also that this equality is exact, even
|
|
for sample_type objects not in the span of the basis samples.)
|
|
- DF.kernel_function == get_kernel()
|
|
- DF.b == 0
|
|
- DF.basis_vectors == these will be the basis samples given to the previous call to load(). Note
|
|
that it is possible for there to be fewer basis_vectors than basis samples given to load().
|
|
- DF.basis_vectors.size() == basis_size()
|
|
!*/
|
|
|
|
template <typename EXP>
|
|
const distance_function<kernel_type> convert_to_distance_function (
|
|
const matrix_exp<EXP>& vect
|
|
) const
|
|
/*!
|
|
requires
|
|
- is_vector(vect) == true
|
|
- vect.size() == out_vector_size()
|
|
- out_vector_size() != 0
|
|
ensures
|
|
- This function interprets the given vector as a point in the kernel feature space defined
|
|
by this empirical_kernel_map. The return value of this function is a distance
|
|
function, DF, that represents the given vector in the following sense:
|
|
- for any sample_type object S, the following equality is approximately true:
|
|
- DF(S) == length(project(S) - vect)
|
|
(i.e. the returned distance function computes distances, in kernel feature space,
|
|
between vect and any argument you give it. )
|
|
- The approximation error in the above equality will be zero (within rounding error)
|
|
if S is within the span of the set of basis samples given to the load() function.
|
|
If it is not then there will be some approximation error. Note that all the basis
|
|
samples are always within their own span. So the equality is always exact for the
|
|
samples given to the load() function. Note further that the distance computed
|
|
by DF(S) is always the correct distance in kernel feature space between vect and
|
|
the true projection of S. That is, the above equality is approximate only because
|
|
of potential error in the project() function, not in DF(S).
|
|
- DF.kernel_function == get_kernel()
|
|
- DF.b == dot(vect,vect)
|
|
- DF.basis_vectors == these will be the basis samples given to the previous call to load(). Note
|
|
that it is possible for there to be fewer basis_vectors than basis samples given to load().
|
|
- DF.basis_vectors.size() == basis_size()
|
|
!*/
|
|
|
|
const projection_function<kernel_type> get_projection_function (
|
|
) const;
|
|
/*!
|
|
requires
|
|
- out_vector_size() != 0
|
|
ensures
|
|
- returns a projection_function, PF, that computes the same projection as project().
|
|
That is, calling PF() on any sample will produce the same output vector as calling
|
|
this->project() on that sample.
|
|
- PF.basis_vectors.size() == basis_size()
|
|
!*/
|
|
|
|
const matrix<scalar_type,0,0,mem_manager_type> get_transformation_to (
|
|
const empirical_kernel_map& target
|
|
) const;
|
|
/*!
|
|
requires
|
|
- get_kernel() == target.get_kernel()
|
|
- out_vector_size() != 0
|
|
- target.out_vector_size() != 0
|
|
ensures
|
|
- A point in the kernel feature space defined by the kernel get_kernel() typically
|
|
has different representations with respect to different empirical_kernel_maps.
|
|
This function lets you obtain a transformation matrix that will allow you
|
|
to project between these different representations. That is, this function returns
|
|
a matrix M with the following properties:
|
|
- M maps vectors represented according to *this into the representation used by target.
|
|
- M.nr() == target.out_vector_size()
|
|
- M.nc() == this->out_vector_size()
|
|
- Let V be a vector of this->out_vector_size() length. Then define two distance_functions
|
|
DF1 = this->convert_to_distance_function(V)
|
|
DF2 = target.convert_to_distance_function(M*V)
|
|
|
|
Then DF1(DF2) == 0 // i.e. the distance between these two points should be 0
|
|
|
|
That is, DF1 and DF2 both represent the same point in kernel feature space. Note
|
|
that the above equality is only approximate. If the vector V represents a point in
|
|
kernel space that isn't in the span of the basis samples used by target then the
|
|
equality is approximate. However, if it is in their span then the equality will
|
|
be exact. For example, if target's basis samples are a superset of the basis samples
|
|
used by *this then the equality will always be exact (within rounding error).
|
|
!*/
|
|
|
|
void get_transformation_to (
|
|
const empirical_kernel_map& target,
|
|
matrix<scalar_type, 0, 0, mem_manager_type>& tmat,
|
|
projection_function<kernel_type>& partial_projection
|
|
) const;
|
|
/*!
|
|
requires
|
|
- get_kernel() == target.get_kernel()
|
|
- out_vector_size() != 0
|
|
- target.out_vector_size() != 0
|
|
- basis_size() < target.basis_size()
|
|
- for all i < basis_size(): (*this)[i] == target[i]
|
|
i.e. target must contain a superset of the basis vectors contained in *this. Moreover,
|
|
it must contain them in the same order.
|
|
ensures
|
|
- The single argument version of get_transformation_to() allows you to project
|
|
vectors from one empirical_kernel_map representation to another. This version
|
|
provides a somewhat different capability. Assuming target's basis vectors form a
|
|
superset of *this's basis vectors then this form of get_transformation_to() allows
|
|
you to reuse a vector from *this ekm to speed up the projection performed by target.
|
|
The defining relation is given below.
|
|
- for any sample S:
|
|
- target.project(S) == #tmat * this->project(S) + #partial_projection(S)
|
|
(this is always true to within rounding error for any S)
|
|
- #partial_projection.basis_vectors.size() == target.basis_vectors.size() - this->basis_vectors.size()
|
|
- #tmat.nr() == target.out_vector_size()
|
|
- #tmat.nc() == this->out_vector_size()
|
|
!*/
|
|
|
|
void swap (
|
|
empirical_kernel_map& item
|
|
);
|
|
/*!
|
|
ensures
|
|
- swaps the state of *this and item
|
|
!*/
|
|
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename kernel_type
|
|
>
|
|
void swap (
|
|
empirical_kernel_map<kernel_type>& a,
|
|
empirical_kernel_map<kernel_type>& b
|
|
) { a.swap(b); }
|
|
/*!
|
|
provides a global swap function
|
|
!*/
|
|
|
|
template <
|
|
typename kernel_type
|
|
>
|
|
void serialize (
|
|
const empirical_kernel_map<kernel_type>& item,
|
|
std::ostream& out
|
|
);
|
|
/*!
|
|
provides serialization support for empirical_kernel_map objects
|
|
!*/
|
|
|
|
template <
|
|
typename kernel_type
|
|
>
|
|
void deserialize (
|
|
empirical_kernel_map<kernel_type>& item,
|
|
std::istream& in
|
|
);
|
|
/*!
|
|
provides serialization support for empirical_kernel_map objects
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_EMPIRICAL_KERNEl_MAP_ABSTRACT_H_
|
|
|