405 lines
18 KiB
C++
405 lines
18 KiB
C++
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#undef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
|
|
#ifdef DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
|
|
|
|
#include "../geometry.h"
|
|
#include <vector>
|
|
#include "box_overlap_testing_abstract.h"
|
|
#include "full_object_detection_abstract.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
struct rect_detection
|
|
{
|
|
double detection_confidence;
|
|
unsigned long weight_index;
|
|
rectangle rect;
|
|
};
|
|
|
|
struct full_detection
|
|
{
|
|
double detection_confidence;
|
|
unsigned long weight_index;
|
|
full_object_detection rect;
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename image_scanner_type_
|
|
>
|
|
class object_detector
|
|
{
|
|
/*!
|
|
REQUIREMENTS ON image_scanner_type_
|
|
image_scanner_type_ must be an implementation of
|
|
dlib/image_processing/scan_image_pyramid_abstract.h or
|
|
dlib/image_processing/scan_fhog_pyramid.h or
|
|
dlib/image_processing/scan_image_custom.h or
|
|
dlib/image_processing/scan_image_boxes_abstract.h
|
|
|
|
WHAT THIS OBJECT REPRESENTS
|
|
This object is a tool for detecting the positions of objects in an image.
|
|
In particular, it is a simple container to aggregate an instance of an image
|
|
scanner (i.e. scan_image_pyramid, scan_fhog_pyramid, scan_image_custom, or
|
|
scan_image_boxes), the weight vector needed by one of these image scanners,
|
|
and finally an instance of test_box_overlap. The test_box_overlap object
|
|
is used to perform non-max suppression on the output of the image scanner
|
|
object.
|
|
|
|
Note further that this object can contain multiple weight vectors. In this
|
|
case, it will run the image scanner multiple times, once with each of the
|
|
weight vectors. Then it will aggregate the results from all runs, perform
|
|
non-max suppression and then return the results. Therefore, the object_detector
|
|
can also be used as a container for a set of object detectors that all use
|
|
the same image scanner but different weight vectors. This is useful since
|
|
the object detection procedure has two parts. A loading step where the
|
|
image is loaded into the scanner, then a detect step which uses the weight
|
|
vector to locate objects in the image. Since the loading step is independent
|
|
of the weight vector it is most efficient to run multiple detectors by
|
|
performing one load into a scanner followed by multiple detect steps. This
|
|
avoids unnecessarily loading the same image into the scanner multiple times.
|
|
!*/
|
|
public:
|
|
typedef image_scanner_type_ image_scanner_type;
|
|
typedef typename image_scanner_type::feature_vector_type feature_vector_type;
|
|
|
|
object_detector (
|
|
);
|
|
/*!
|
|
ensures
|
|
- This detector won't generate any detections when
|
|
presented with an image.
|
|
- #num_detectors() == 0
|
|
!*/
|
|
|
|
object_detector (
|
|
const object_detector& item
|
|
);
|
|
/*!
|
|
ensures
|
|
- #*this is a copy of item
|
|
- #get_scanner() == item.get_scanner()
|
|
(note that only the "configuration" of item.get_scanner() is copied.
|
|
I.e. the copy is done using copy_configuration())
|
|
!*/
|
|
|
|
object_detector (
|
|
const image_scanner_type& scanner,
|
|
const test_box_overlap& overlap_tester,
|
|
const feature_vector_type& w
|
|
);
|
|
/*!
|
|
requires
|
|
- w.size() == scanner.get_num_dimensions() + 1
|
|
- scanner.get_num_detection_templates() > 0
|
|
ensures
|
|
- When the operator() member function is called it will
|
|
invoke scanner.detect(w,dets,w(w.size()-1)), suppress
|
|
overlapping detections, and then report the results.
|
|
- when #*this is used to detect objects, the set of
|
|
output detections will never contain any overlaps
|
|
with respect to overlap_tester. That is, for all
|
|
pairs of returned detections A and B, we will always
|
|
have: overlap_tester(A,B) == false
|
|
- #get_w() == w
|
|
- #get_overlap_tester() == overlap_tester
|
|
- #get_scanner() == scanner
|
|
(note that only the "configuration" of scanner is copied.
|
|
I.e. the copy is done using copy_configuration())
|
|
- #num_detectors() == 1
|
|
!*/
|
|
|
|
object_detector (
|
|
const image_scanner_type& scanner,
|
|
const test_box_overlap& overlap_tester,
|
|
const std::vector<feature_vector_type>& w
|
|
);
|
|
/*!
|
|
requires
|
|
- for all valid i:
|
|
- w[i].size() == scanner.get_num_dimensions() + 1
|
|
- scanner.get_num_detection_templates() > 0
|
|
- w.size() > 0
|
|
ensures
|
|
- When the operator() member function is called it will invoke
|
|
get_scanner().detect(w[i],dets,w[i](w[i].size()-1)) for all valid i. Then it
|
|
will take all the detections output by the calls to detect() and suppress
|
|
overlapping detections, and finally report the results.
|
|
- when #*this is used to detect objects, the set of output detections will
|
|
never contain any overlaps with respect to overlap_tester. That is, for
|
|
all pairs of returned detections A and B, we will always have:
|
|
overlap_tester(A,B) == false
|
|
- for all valid i:
|
|
- #get_w(i) == w[i]
|
|
- #num_detectors() == w.size()
|
|
- #get_overlap_tester() == overlap_tester
|
|
- #get_scanner() == scanner
|
|
(note that only the "configuration" of scanner is copied.
|
|
I.e. the copy is done using copy_configuration())
|
|
!*/
|
|
|
|
explicit object_detector (
|
|
const std::vector<object_detector>& detectors
|
|
);
|
|
/*!
|
|
requires
|
|
- detectors.size() != 0
|
|
- All the detectors must use compatibly configured scanners. That is, it
|
|
must make sense for the weight vector from one detector to be used with
|
|
the scanner from any other.
|
|
- for all valid i:
|
|
- detectors[i].get_scanner().get_num_dimensions() == detectors[0].get_scanner().get_num_dimensions()
|
|
(i.e. all the detectors use scanners that use the same kind of feature vectors.)
|
|
ensures
|
|
- Very much like the above constructor, this constructor takes all the
|
|
given detectors and packs them into #*this. That is, invoking operator()
|
|
on #*this will run all the detectors, perform non-max suppression, and
|
|
then report the results.
|
|
- When #*this is used to detect objects, the set of output detections will
|
|
never contain any overlaps with respect to overlap_tester. That is, for
|
|
all pairs of returned detections A and B, we will always have:
|
|
overlap_tester(A,B) == false
|
|
- #num_detectors() == The sum of detectors[i].num_detectors() for all valid i.
|
|
- #get_overlap_tester() == detectors[0].get_overlap_tester()
|
|
- #get_scanner() == detectors[0].get_scanner()
|
|
(note that only the "configuration" of scanner is copied. I.e. the copy
|
|
is done using copy_configuration())
|
|
!*/
|
|
|
|
unsigned long num_detectors (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the number of weight vectors in this object. Since each weight
|
|
vector logically represents an object detector, this returns the number
|
|
of object detectors contained in this object.
|
|
!*/
|
|
|
|
const feature_vector_type& get_w (
|
|
unsigned long idx = 0
|
|
) const;
|
|
/*!
|
|
requires
|
|
- idx < num_detectors()
|
|
ensures
|
|
- returns the idx-th weight vector loaded into this object. All the weight vectors
|
|
have the same dimension and logically each represents a different detector.
|
|
!*/
|
|
|
|
const test_box_overlap& get_overlap_tester (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the overlap tester used by this object
|
|
!*/
|
|
|
|
const image_scanner_type& get_scanner (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the image scanner used by this object.
|
|
!*/
|
|
|
|
object_detector& operator= (
|
|
const object_detector& item
|
|
);
|
|
/*!
|
|
ensures
|
|
- #*this is a copy of item
|
|
- #get_scanner() == item.get_scanner()
|
|
(note that only the "configuration" of item.get_scanner() is
|
|
copied. I.e. the copy is done using copy_configuration())
|
|
- returns #*this
|
|
!*/
|
|
|
|
template <
|
|
typename image_type
|
|
>
|
|
void operator() (
|
|
const image_type& img,
|
|
std::vector<rect_detection>& dets,
|
|
double adjust_threshold = 0
|
|
);
|
|
/*!
|
|
requires
|
|
- img == an object which can be accepted by image_scanner_type::load()
|
|
ensures
|
|
- Performs object detection on the given image and stores the detected
|
|
objects into #dets. In particular, we will have that:
|
|
- #dets is sorted such that the highest confidence detections come
|
|
first. E.g. element 0 is the best detection, element 1 the next
|
|
best, and so on.
|
|
- #dets.size() == the number of detected objects.
|
|
- #dets[i].detection_confidence == The strength of the i-th detection.
|
|
Larger values indicate that the detector is more confident that
|
|
#dets[i] is a correct detection rather than being a false alarm.
|
|
Moreover, the detection_confidence is equal to the detection value
|
|
output by the scanner minus the threshold value stored at the end of
|
|
the weight vector in get_w(#dets[i].weight_index).
|
|
- #dets[i].weight_index == the index for the weight vector that
|
|
generated this detection.
|
|
- #dets[i].rect == the bounding box for the i-th detection.
|
|
- #get_scanner() will have been loaded with img. Therefore, you can call
|
|
#get_scanner().get_feature_vector() to obtain the feature vectors or
|
|
#get_scanner().get_full_object_detection() to get the
|
|
full_object_detections for the resulting object detection boxes.
|
|
- The detection threshold is adjusted by having adjust_threshold added to
|
|
it. Therefore, an adjust_threshold value > 0 makes detecting objects
|
|
harder while a negative value makes it easier. Moreover, the following
|
|
will be true for all valid i:
|
|
- #dets[i].detection_confidence >= adjust_threshold
|
|
This means that, for example, you can obtain the maximum possible number
|
|
of detections by setting adjust_threshold equal to negative infinity.
|
|
!*/
|
|
|
|
template <
|
|
typename image_type
|
|
>
|
|
void operator() (
|
|
const image_type& img,
|
|
std::vector<full_detection>& dets,
|
|
double adjust_threshold = 0
|
|
);
|
|
/*!
|
|
requires
|
|
- img == an object which can be accepted by image_scanner_type::load()
|
|
ensures
|
|
- This function is identical to the above operator() routine, except that
|
|
it outputs full_object_detections instead of rectangles. This means that
|
|
the output includes part locations. In particular, calling this function
|
|
is the same as calling the above operator() routine and then using
|
|
get_scanner().get_full_object_detection() to resolve all the rectangles
|
|
into full_object_detections. Therefore, this version of operator() is
|
|
simply a convenience function for performing this set of operations.
|
|
!*/
|
|
|
|
template <
|
|
typename image_type
|
|
>
|
|
std::vector<rectangle> operator() (
|
|
const image_type& img,
|
|
double adjust_threshold = 0
|
|
);
|
|
/*!
|
|
requires
|
|
- img == an object which can be accepted by image_scanner_type::load()
|
|
ensures
|
|
- This function is identical to the above operator() routine, except that
|
|
it returns a std::vector<rectangle> which contains just the bounding
|
|
boxes of all the detections.
|
|
!*/
|
|
|
|
template <
|
|
typename image_type
|
|
>
|
|
void operator() (
|
|
const image_type& img,
|
|
std::vector<std::pair<double, rectangle> >& dets,
|
|
double adjust_threshold = 0
|
|
);
|
|
/*!
|
|
requires
|
|
- img == an object which can be accepted by image_scanner_type::load()
|
|
ensures
|
|
- performs object detection on the given image and stores the
|
|
detected objects into #dets. In particular, we will have that:
|
|
- #dets is sorted such that the highest confidence detections
|
|
come first. E.g. element 0 is the best detection, element 1
|
|
the next best, and so on.
|
|
- #dets.size() == the number of detected objects.
|
|
- #dets[i].first gives the "detection confidence", of the i-th
|
|
detection. This is the detection value output by the scanner minus
|
|
the threshold value stored at the end of the weight vector in get_w().
|
|
- #dets[i].second == the bounding box for the i-th detection.
|
|
- #get_scanner() will have been loaded with img. Therefore, you can call
|
|
#get_scanner().get_feature_vector() to obtain the feature vectors or
|
|
#get_scanner().get_full_object_detection() to get the
|
|
full_object_detections for the resulting object detection boxes.
|
|
- The detection threshold is adjusted by having adjust_threshold added to
|
|
it. Therefore, an adjust_threshold value > 0 makes detecting objects
|
|
harder while a negative value makes it easier. Moreover, the following
|
|
will be true for all valid i:
|
|
- #dets[i].first >= adjust_threshold
|
|
This means that, for example, you can obtain the maximum possible number
|
|
of detections by setting adjust_threshold equal to negative infinity.
|
|
!*/
|
|
|
|
template <
|
|
typename image_type
|
|
>
|
|
void operator() (
|
|
const image_type& img,
|
|
std::vector<std::pair<double, full_object_detection> >& dets,
|
|
double adjust_threshold = 0
|
|
);
|
|
/*!
|
|
requires
|
|
- img == an object which can be accepted by image_scanner_type::load()
|
|
ensures
|
|
- This function is identical to the above operator() routine, except that
|
|
it outputs full_object_detections instead of rectangles. This means that
|
|
the output includes part locations. In particular, calling this function
|
|
is the same as calling the above operator() routine and then using
|
|
get_scanner().get_full_object_detection() to resolve all the rectangles
|
|
into full_object_detections. Therefore, this version of operator() is
|
|
simply a convenience function for performing this set of operations.
|
|
!*/
|
|
|
|
template <
|
|
typename image_type
|
|
>
|
|
void operator() (
|
|
const image_type& img,
|
|
std::vector<full_object_detection>& dets,
|
|
double adjust_threshold = 0
|
|
);
|
|
/*!
|
|
requires
|
|
- img == an object which can be accepted by image_scanner_type::load()
|
|
ensures
|
|
- This function is identical to the above operator() routine, except that
|
|
it doesn't include a double valued score. That is, it just outputs the
|
|
full_object_detections.
|
|
!*/
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename T>
|
|
void serialize (
|
|
const object_detector<T>& item,
|
|
std::ostream& out
|
|
);
|
|
/*!
|
|
provides serialization support. Note that this function only saves the
|
|
configuration part of item.get_scanner(). That is, we use the scanner's
|
|
copy_configuration() function to get a copy of the scanner that doesn't contain any
|
|
loaded image data and we then save just the configuration part of the scanner.
|
|
This means that any serialized object_detectors won't remember any images they have
|
|
processed but will otherwise contain all their state and be able to detect objects
|
|
in new images.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename T>
|
|
void deserialize (
|
|
object_detector<T>& item,
|
|
std::istream& in
|
|
);
|
|
/*!
|
|
provides deserialization support
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_OBJECT_DeTECTOR_ABSTRACT_Hh_
|
|
|