196 lines
8.5 KiB
C++
196 lines
8.5 KiB
C++
// Copyright (C) 2014 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#undef DLIB_SHAPE_PREDICToR_ABSTRACT_H_
|
|
#ifdef DLIB_SHAPE_PREDICToR_ABSTRACT_H_
|
|
|
|
#include "full_object_detection_abstract.h"
|
|
#include "../matrix.h"
|
|
#include "../geometry.h"
|
|
#include "../pixel.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class shape_predictor
|
|
{
|
|
/*!
|
|
WHAT THIS OBJECT REPRESENTS
|
|
This object is a tool that takes in an image region containing some object
|
|
and outputs a set of point locations that define the pose of the object.
|
|
The classic example of this is human face pose prediction, where you take
|
|
an image of a human face as input and are expected to identify the
|
|
locations of important facial landmarks such as the corners of the mouth
|
|
and eyes, tip of the nose, and so forth.
|
|
|
|
To create useful instantiations of this object you need to use the
|
|
shape_predictor_trainer object defined in the
|
|
shape_predictor_trainer_abstract.h file to train a shape_predictor using a
|
|
set of training images, each annotated with shapes you want to predict.
|
|
|
|
THREAD SAFETY
|
|
No synchronization is required when using this object. In particular, a
|
|
single instance of this object can be used from multiple threads at the
|
|
same time.
|
|
!*/
|
|
|
|
public:
|
|
|
|
shape_predictor (
|
|
);
|
|
/*!
|
|
ensures
|
|
- #num_parts() == 0
|
|
- #num_features() == 0
|
|
!*/
|
|
|
|
unsigned long num_parts (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- returns the number of parts in the shapes predicted by this object.
|
|
!*/
|
|
|
|
unsigned long num_features (
|
|
) const;
|
|
/*!
|
|
ensures
|
|
- Returns the dimensionality of the feature vector output by operator().
|
|
This number is the total number of trees in this object times the number
|
|
of leaves on each tree.
|
|
!*/
|
|
|
|
template <typename image_type, typename T, typename U>
|
|
full_object_detection operator()(
|
|
const image_type& img,
|
|
const rectangle& rect,
|
|
std::vector<std::pair<T,U> >& feats
|
|
) const;
|
|
/*!
|
|
requires
|
|
- image_type == an image object that implements the interface defined in
|
|
dlib/image_processing/generic_image.h
|
|
- T is some unsigned integral type (e.g. unsigned int).
|
|
- U is any scalar type capable of storing the value 1 (e.g. float).
|
|
ensures
|
|
- Runs the shape prediction algorithm on the part of the image contained in
|
|
the given bounding rectangle. So it will try and fit the shape model to
|
|
the contents of the given rectangle in the image. For example, if there
|
|
is a human face inside the rectangle and you use a face landmarking shape
|
|
model then this function will return the locations of the face landmarks
|
|
as the parts. So the return value is a full_object_detection DET such
|
|
that:
|
|
- DET.get_rect() == rect
|
|
- DET.num_parts() == num_parts()
|
|
- for all valid i:
|
|
- DET.part(i) == the location in img for the i-th part of the shape
|
|
predicted by this object.
|
|
- #feats == a sparse vector that records which leaf each tree used to make
|
|
the shape prediction. Moreover, it is an indicator vector, Therefore,
|
|
for all valid i:
|
|
- #feats[i].second == 1
|
|
Further, #feats is a vector from the space of num_features() dimensional
|
|
vectors. The output shape positions can be represented as the dot
|
|
product between #feats and a weight vector. Therefore, #feats encodes
|
|
all the information from img that was used to predict the returned shape
|
|
object.
|
|
!*/
|
|
|
|
template <typename image_type>
|
|
full_object_detection operator()(
|
|
const image_type& img,
|
|
const rectangle& rect
|
|
) const;
|
|
/*!
|
|
requires
|
|
- image_type == an image object that implements the interface defined in
|
|
dlib/image_processing/generic_image.h
|
|
ensures
|
|
- Calling this function is equivalent to calling (*this)(img, rect, ignored)
|
|
where the 3d argument is discarded.
|
|
!*/
|
|
|
|
};
|
|
|
|
void serialize (const shape_predictor& item, std::ostream& out);
|
|
void deserialize (shape_predictor& item, std::istream& in);
|
|
/*!
|
|
provides serialization support
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename image_array
|
|
>
|
|
double test_shape_predictor (
|
|
const shape_predictor& sp,
|
|
const image_array& images,
|
|
const std::vector<std::vector<full_object_detection> >& objects,
|
|
const std::vector<std::vector<double> >& scales
|
|
);
|
|
/*!
|
|
requires
|
|
- image_array is a dlib::array of image objects where each image object
|
|
implements the interface defined in dlib/image_processing/generic_image.h
|
|
- images.size() == objects.size()
|
|
- for all valid i and j:
|
|
- objects[i][j].num_parts() == sp.num_parts()
|
|
- if (scales.size() != 0) then
|
|
- There must be a scale value for each full_object_detection in objects.
|
|
That is, it must be the case that:
|
|
- scales.size() == objects.size()
|
|
- for all valid i:
|
|
- scales[i].size() == objects[i].size()
|
|
ensures
|
|
- Tests the given shape_predictor by running it on each of the given objects and
|
|
checking how well it recovers the part positions. In particular, for all
|
|
valid i and j we perform:
|
|
sp(images[i], objects[i][j].get_rect())
|
|
and compare the result with the truth part positions in objects[i][j]. We
|
|
then return the average distance (measured in pixels) between a predicted
|
|
part location and its true position.
|
|
- Note that any parts in objects that are set to OBJECT_PART_NOT_PRESENT are
|
|
simply ignored.
|
|
- if (scales.size() != 0) then
|
|
- Each time we compute the distance between a predicted part location and
|
|
its true location in objects[i][j] we divide the distance by
|
|
scales[i][j]. Therefore, if you want the reported error to be the
|
|
average pixel distance then give an empty scales vector, but if you want
|
|
the returned value to be something else like the average distance
|
|
normalized by some feature of each object (e.g. the interocular distance)
|
|
then you can supply those normalizing values via scales.
|
|
!*/
|
|
|
|
template <
|
|
typename image_array
|
|
>
|
|
double test_shape_predictor (
|
|
const shape_predictor& sp,
|
|
const image_array& images,
|
|
const std::vector<std::vector<full_object_detection> >& objects
|
|
);
|
|
/*!
|
|
requires
|
|
- image_array is a dlib::array of image objects where each image object
|
|
implements the interface defined in dlib/image_processing/generic_image.h
|
|
- images.size() == objects.size()
|
|
- for all valid i and j:
|
|
- objects[i][j].num_parts() == sp.num_parts()
|
|
ensures
|
|
- returns test_shape_predictor(sp, images, objects, no_scales) where no_scales
|
|
is an empty vector. So this is just a convenience function for calling the
|
|
above test_shape_predictor() routine without a scales argument.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_SHAPE_PREDICToR_ABSTRACT_H_
|
|
|