410 lines
16 KiB
C++
410 lines
16 KiB
C++
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#undef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_
|
|
#ifdef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_
|
|
|
|
#include "structural_svm_object_detection_problem_abstract.h"
|
|
#include "../image_processing/object_detector_abstract.h"
|
|
#include "../image_processing/box_overlap_testing_abstract.h"
|
|
#include "../image_processing/full_object_detection_abstract.h"
|
|
#include <chrono>
|
|
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename image_scanner_type
|
|
>
|
|
class structural_object_detection_trainer : noncopyable
|
|
{
|
|
/*!
|
|
REQUIREMENTS ON image_scanner_type
|
|
image_scanner_type must be an implementation of
|
|
dlib/image_processing/scan_fhog_pyramid_abstract.h or
|
|
dlib/image_processing/scan_image_custom_abstract.h or
|
|
dlib/image_processing/scan_image_pyramid_abstract.h or
|
|
dlib/image_processing/scan_image_boxes_abstract.h
|
|
|
|
WHAT THIS OBJECT REPRESENTS
|
|
This object is a tool for learning to detect objects in images based on a
|
|
set of labeled images. The training procedure produces an object_detector
|
|
which can be used to predict the locations of objects in new images.
|
|
|
|
Note that this is just a convenience wrapper around the structural_svm_object_detection_problem
|
|
to make it look similar to all the other trainers in dlib.
|
|
!*/
|
|
|
|
public:
|
|
typedef double scalar_type;
|
|
typedef default_memory_manager mem_manager_type;
|
|
typedef object_detector<image_scanner_type> trained_function_type;
|
|
|
|
|
|
explicit structural_object_detection_trainer (
|
|
const image_scanner_type& scanner
|
|
);
|
|
/*!
|
|
requires
|
|
- scanner.get_num_detection_templates() > 0
|
|
ensures
|
|
- #get_c() == 1
|
|
- this object isn't verbose
|
|
- #get_epsilon() == 0.1
|
|
- #get_num_threads() == 2
|
|
- #get_max_cache_size() == 5
|
|
- #get_match_eps() == 0.5
|
|
- #get_loss_per_missed_target() == 1
|
|
- #get_loss_per_false_alarm() == 1
|
|
- This object will attempt to learn a model for the given
|
|
scanner object when train() is called.
|
|
- #get_scanner() == scanner
|
|
(note that only the "configuration" of scanner is copied.
|
|
I.e. the copy is done using copy_configuration())
|
|
- #auto_set_overlap_tester() == true
|
|
- #get_max_runtime() == std::chrono::hours(24*356*290)
|
|
(i.e. 290 years, so basically forever)
|
|
!*/
|
|
|
|
const image_scanner_type& get_scanner (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the image scanner used by this object.
|
|
!*/
|
|
|
|
bool auto_set_overlap_tester (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- if (this object will automatically determine an appropriate
|
|
state for the overlap tester used for non-max suppression.) then
|
|
- returns true
|
|
- In this case, it is determined using the find_tight_overlap_tester()
|
|
routine based on the truth_object_detections given to the
|
|
structural_object_detection_trainer::train() method.
|
|
- else
|
|
- returns false
|
|
!*/
|
|
|
|
void set_overlap_tester (
|
|
const test_box_overlap& tester
|
|
);
|
|
/*!
|
|
ensures
|
|
- #get_overlap_tester() == tester
|
|
- #auto_set_overlap_tester() == false
|
|
!*/
|
|
|
|
test_box_overlap get_overlap_tester (
|
|
) const;
|
|
/*!
|
|
requires
|
|
- auto_set_overlap_tester() == false
|
|
ensures
|
|
- returns the overlap tester object which will be used to perform non-max suppression.
|
|
In particular, this function returns the overlap tester which will populate the
|
|
object_detector returned by train().
|
|
!*/
|
|
|
|
void set_num_threads (
|
|
unsigned long num
|
|
);
|
|
/*!
|
|
ensures
|
|
- #get_num_threads() == num
|
|
!*/
|
|
|
|
unsigned long get_num_threads (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the number of threads used during training. You should
|
|
usually set this equal to the number of processing cores on your
|
|
machine.
|
|
!*/
|
|
|
|
void set_epsilon (
|
|
scalar_type eps
|
|
);
|
|
/*!
|
|
requires
|
|
- eps > 0
|
|
ensures
|
|
- #get_epsilon() == eps
|
|
!*/
|
|
|
|
const scalar_type get_epsilon (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the error epsilon that determines when training should stop.
|
|
Smaller values may result in a more accurate solution but take longer
|
|
to train. You can think of this epsilon value as saying "solve the
|
|
optimization problem until the average loss per sample is within epsilon
|
|
of its optimal value".
|
|
!*/
|
|
|
|
void set_max_runtime (
|
|
const std::chrono::nanoseconds& max_runtime
|
|
);
|
|
/*!
|
|
ensures
|
|
- #get_max_runtime() == max_runtime
|
|
!*/
|
|
|
|
std::chrono::nanoseconds get_max_runtime (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the maximum amount of time we will let .train() run before
|
|
making it terminate.
|
|
!*/
|
|
|
|
void set_max_cache_size (
|
|
unsigned long max_size
|
|
);
|
|
/*!
|
|
ensures
|
|
- #get_max_cache_size() == max_size
|
|
!*/
|
|
|
|
unsigned long get_max_cache_size (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- During training, this object basically runs the object detector on
|
|
each image, over and over. To speed this up, it is possible to cache
|
|
the results of these detector invocations. This function returns the
|
|
number of cache elements per training sample kept in the cache. Note
|
|
that a value of 0 means caching is not used at all. Note also that
|
|
each cache element takes up about sizeof(double)*scanner.get_num_dimensions()
|
|
memory (where scanner is the scanner given to this object's constructor).
|
|
!*/
|
|
|
|
void be_verbose (
|
|
);
|
|
/*!
|
|
ensures
|
|
- This object will print status messages to standard out so that a
|
|
user can observe the progress of the algorithm.
|
|
!*/
|
|
|
|
void be_quiet (
|
|
);
|
|
/*!
|
|
ensures
|
|
- this object will not print anything to standard out
|
|
!*/
|
|
|
|
void set_oca (
|
|
const oca& item
|
|
);
|
|
/*!
|
|
ensures
|
|
- #get_oca() == item
|
|
!*/
|
|
|
|
const oca get_oca (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns a copy of the optimizer used to solve the structural SVM problem.
|
|
!*/
|
|
|
|
void set_c (
|
|
scalar_type C
|
|
);
|
|
/*!
|
|
requires
|
|
- C > 0
|
|
ensures
|
|
- #get_c() = C
|
|
!*/
|
|
|
|
const scalar_type get_c (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the SVM regularization parameter. It is the parameter
|
|
that determines the trade-off between trying to fit the training
|
|
data (i.e. minimize the loss) or allowing more errors but hopefully
|
|
improving the generalization of the resulting detector. Larger
|
|
values encourage exact fitting while smaller values of C may encourage
|
|
better generalization.
|
|
!*/
|
|
|
|
void set_match_eps (
|
|
double eps
|
|
);
|
|
/*!
|
|
requires
|
|
- 0 < eps < 1
|
|
ensures
|
|
- #get_match_eps() == eps
|
|
!*/
|
|
|
|
double get_match_eps (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the amount of alignment necessary for a detection to be considered
|
|
as matching with a ground truth rectangle. If it doesn't match then
|
|
it is considered to be a false alarm. To define this precisely, let
|
|
A and B be two rectangles, then A and B match if and only if:
|
|
A.intersect(B).area()/(A+B).area() > get_match_eps()
|
|
!*/
|
|
|
|
double get_loss_per_missed_target (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the amount of loss experienced for failing to detect one of the
|
|
targets. If you care more about finding targets than having a low false
|
|
alarm rate then you can increase this value.
|
|
!*/
|
|
|
|
void set_loss_per_missed_target (
|
|
double loss
|
|
);
|
|
/*!
|
|
requires
|
|
- loss > 0
|
|
ensures
|
|
- #get_loss_per_missed_target() == loss
|
|
!*/
|
|
|
|
double get_loss_per_false_alarm (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the amount of loss experienced for emitting a false alarm detection.
|
|
Or in other words, the loss for generating a detection that doesn't correspond
|
|
to one of the truth rectangles. If you care more about having a low false
|
|
alarm rate than finding all the targets then you can increase this value.
|
|
!*/
|
|
|
|
void set_loss_per_false_alarm (
|
|
double loss
|
|
);
|
|
/*!
|
|
requires
|
|
- loss > 0
|
|
ensures
|
|
- #get_loss_per_false_alarm() == loss
|
|
!*/
|
|
|
|
template <
|
|
typename image_array_type
|
|
>
|
|
const trained_function_type train (
|
|
const image_array_type& images,
|
|
const std::vector<std::vector<full_object_detection> >& truth_object_detections
|
|
) const;
|
|
/*!
|
|
requires
|
|
- is_learning_problem(images, truth_object_detections) == true
|
|
- it must be valid to pass images[0] into the image_scanner_type::load() method.
|
|
(also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
|
|
- for all valid i, j:
|
|
- truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template()
|
|
- all_parts_in_rect(truth_object_detections[i][j]) == true
|
|
ensures
|
|
- Uses the structural_svm_object_detection_problem to train an object_detector
|
|
on the given images and truth_object_detections.
|
|
- returns a function F with the following properties:
|
|
- F(new_image) == A prediction of what objects are present in new_image. This
|
|
is a set of rectangles indicating their positions.
|
|
!*/
|
|
|
|
template <
|
|
typename image_array_type
|
|
>
|
|
const trained_function_type train (
|
|
const image_array_type& images,
|
|
const std::vector<std::vector<rectangle> >& truth_object_detections
|
|
) const;
|
|
/*!
|
|
requires
|
|
- is_learning_problem(images, truth_object_detections) == true
|
|
- it must be valid to pass images[0] into the image_scanner_type::load() method.
|
|
(also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
|
|
- get_scanner().get_num_movable_components_per_detection_template() == 0
|
|
ensures
|
|
- This function is identical to the above train(), except that it converts
|
|
each element of truth_object_detections into a full_object_detection by
|
|
passing it to full_object_detection's constructor taking only a rectangle.
|
|
Therefore, this version of train() is a convenience function for for the
|
|
case where you don't have any movable components of the detection templates.
|
|
!*/
|
|
|
|
template <
|
|
typename image_array_type
|
|
>
|
|
const trained_function_type train (
|
|
const image_array_type& images,
|
|
const std::vector<std::vector<full_object_detection> >& truth_object_detections,
|
|
const std::vector<std::vector<rectangle> >& ignore,
|
|
const test_box_overlap& ignore_overlap_tester = test_box_overlap()
|
|
) const;
|
|
/*!
|
|
requires
|
|
- is_learning_problem(images, truth_object_detections) == true
|
|
- it must be valid to pass images[0] into the image_scanner_type::load() method.
|
|
(also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
|
|
- ignore.size() == images.size()
|
|
- for all valid i, j:
|
|
- truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template()
|
|
- all_parts_in_rect(truth_object_detections[i][j]) == true
|
|
ensures
|
|
- Uses the structural_svm_object_detection_problem to train an object_detector
|
|
on the given images and truth_object_detections.
|
|
- for all valid i:
|
|
- Within images[i] any detections that match against a rectangle in
|
|
ignore[i], according to ignore_overlap_tester, are ignored. That is,
|
|
the optimizer doesn't care if the detector outputs a detection that
|
|
matches any of the ignore rectangles or if it fails to output a
|
|
detection for an ignore rectangle. Therefore, if there are objects
|
|
in your dataset that you are unsure if you want to detect or otherwise
|
|
don't care if the detector gets or doesn't then you can mark them
|
|
with ignore rectangles and the optimizer will simply ignore them.
|
|
- returns a function F with the following properties:
|
|
- F(new_image) == A prediction of what objects are present in new_image. This
|
|
is a set of rectangles indicating their positions.
|
|
!*/
|
|
|
|
template <
|
|
typename image_array_type
|
|
>
|
|
const trained_function_type train (
|
|
const image_array_type& images,
|
|
const std::vector<std::vector<rectangle> >& truth_object_detections,
|
|
const std::vector<std::vector<rectangle> >& ignore,
|
|
const test_box_overlap& ignore_overlap_tester = test_box_overlap()
|
|
) const;
|
|
/*!
|
|
requires
|
|
- is_learning_problem(images, truth_object_detections) == true
|
|
- ignore.size() == images.size()
|
|
- it must be valid to pass images[0] into the image_scanner_type::load() method.
|
|
(also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
|
|
- get_scanner().get_num_movable_components_per_detection_template() == 0
|
|
ensures
|
|
- This function is identical to the above train(), except that it converts
|
|
each element of truth_object_detections into a full_object_detection by
|
|
passing it to full_object_detection's constructor taking only a rectangle.
|
|
Therefore, this version of train() is a convenience function for for the
|
|
case where you don't have any movable components of the detection templates.
|
|
!*/
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_
|
|
|
|
|