1102 lines
41 KiB
C++
1102 lines
41 KiB
C++
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_SCAN_IMaGE_PYRAMID_Hh_
|
|
#define DLIB_SCAN_IMaGE_PYRAMID_Hh_
|
|
|
|
#include "scan_image_pyramid_abstract.h"
|
|
#include "../matrix.h"
|
|
#include "../geometry.h"
|
|
#include "scan_image.h"
|
|
#include "../array2d.h"
|
|
#include <vector>
|
|
#include "full_object_detection.h"
|
|
#include "../image_processing/generic_image.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
class scan_image_pyramid : noncopyable
|
|
{
|
|
|
|
public:
|
|
|
|
typedef matrix<double,0,1> feature_vector_type;
|
|
|
|
typedef Pyramid_type pyramid_type;
|
|
typedef Feature_extractor_type feature_extractor_type;
|
|
|
|
scan_image_pyramid (
|
|
);
|
|
|
|
template <
|
|
typename image_type
|
|
>
|
|
void load (
|
|
const image_type& img
|
|
);
|
|
|
|
inline bool is_loaded_with_image (
|
|
) const;
|
|
|
|
inline void copy_configuration(
|
|
const feature_extractor_type& fe
|
|
);
|
|
|
|
inline void copy_configuration (
|
|
const scan_image_pyramid& item
|
|
);
|
|
|
|
const Feature_extractor_type& get_feature_extractor (
|
|
) const { return feats_config; }
|
|
|
|
void add_detection_template (
|
|
const rectangle& object_box,
|
|
const std::vector<rectangle>& stationary_feature_extraction_regions,
|
|
const std::vector<rectangle>& movable_feature_extraction_regions
|
|
);
|
|
|
|
void add_detection_template (
|
|
const rectangle& object_box,
|
|
const std::vector<rectangle>& stationary_feature_extraction_regions
|
|
);
|
|
|
|
inline unsigned long get_num_detection_templates (
|
|
) const;
|
|
|
|
inline unsigned long get_num_movable_components_per_detection_template (
|
|
) const;
|
|
|
|
inline unsigned long get_num_stationary_components_per_detection_template (
|
|
) const;
|
|
|
|
inline unsigned long get_num_components_per_detection_template (
|
|
) const;
|
|
|
|
inline long get_num_dimensions (
|
|
) const;
|
|
|
|
unsigned long get_max_pyramid_levels (
|
|
) const;
|
|
|
|
void set_max_pyramid_levels (
|
|
unsigned long max_levels
|
|
);
|
|
|
|
inline unsigned long get_max_detections_per_template (
|
|
) const;
|
|
|
|
void set_min_pyramid_layer_size (
|
|
unsigned long width,
|
|
unsigned long height
|
|
);
|
|
|
|
inline unsigned long get_min_pyramid_layer_width (
|
|
) const;
|
|
|
|
inline unsigned long get_min_pyramid_layer_height (
|
|
) const;
|
|
|
|
void set_max_detections_per_template (
|
|
unsigned long max_dets
|
|
);
|
|
|
|
void detect (
|
|
const feature_vector_type& w,
|
|
std::vector<std::pair<double, rectangle> >& dets,
|
|
const double thresh
|
|
) const;
|
|
|
|
void get_feature_vector (
|
|
const full_object_detection& obj,
|
|
feature_vector_type& psi
|
|
) const;
|
|
|
|
full_object_detection get_full_object_detection (
|
|
const rectangle& rect,
|
|
const feature_vector_type& w
|
|
) const;
|
|
|
|
const rectangle get_best_matching_rect (
|
|
const rectangle& rect
|
|
) const;
|
|
|
|
template <typename T, typename U>
|
|
friend void serialize (
|
|
const scan_image_pyramid<T,U>& item,
|
|
std::ostream& out
|
|
);
|
|
|
|
template <typename T, typename U>
|
|
friend void deserialize (
|
|
scan_image_pyramid<T,U>& item,
|
|
std::istream& in
|
|
);
|
|
|
|
private:
|
|
static bool compare_pair_rect (
|
|
const std::pair<double, rectangle>& a,
|
|
const std::pair<double, rectangle>& b
|
|
)
|
|
{
|
|
return a.first < b.first;
|
|
}
|
|
|
|
struct detection_template
|
|
{
|
|
rectangle object_box; // always centered at (0,0)
|
|
std::vector<rectangle> rects; // template with respect to (0,0)
|
|
std::vector<rectangle> movable_rects;
|
|
};
|
|
|
|
friend void serialize(const detection_template& item, std::ostream& out)
|
|
{
|
|
int version = 1;
|
|
serialize(version, out);
|
|
serialize(item.object_box, out);
|
|
serialize(item.rects, out);
|
|
serialize(item.movable_rects, out);
|
|
}
|
|
friend void deserialize(detection_template& item, std::istream& in)
|
|
{
|
|
int version = 0;
|
|
deserialize(version, in);
|
|
if (version != 1)
|
|
throw serialization_error("Unexpected version found while deserializing a dlib::scan_image_pyramid::detection_template object.");
|
|
|
|
deserialize(item.object_box, in);
|
|
deserialize(item.rects, in);
|
|
deserialize(item.movable_rects, in);
|
|
}
|
|
|
|
void get_mapped_rect_and_metadata (
|
|
const unsigned long number_pyramid_levels,
|
|
rectangle rect,
|
|
rectangle& mapped_rect,
|
|
detection_template& best_template,
|
|
rectangle& object_box,
|
|
unsigned long& best_level,
|
|
unsigned long& detection_template_idx
|
|
) const;
|
|
|
|
double get_match_score (
|
|
rectangle r1,
|
|
rectangle r2
|
|
) const
|
|
{
|
|
// make the rectangles overlap as much as possible before computing the match score.
|
|
r1 = move_rect(r1, r2.tl_corner());
|
|
return (r1.intersect(r2).area())/(double)(r1 + r2).area();
|
|
}
|
|
|
|
void test_coordinate_transforms()
|
|
{
|
|
for (long x = -10; x <= 10; x += 10)
|
|
{
|
|
for (long y = -10; y <= 10; y += 10)
|
|
{
|
|
const rectangle rect = centered_rect(x,y,5,6);
|
|
rectangle a;
|
|
|
|
a = feats_config.image_to_feat_space(rect);
|
|
if (a.width() > 10000000 || a.height() > 10000000 )
|
|
{
|
|
DLIB_CASSERT(false, "The image_to_feat_space() routine is outputting rectangles of an implausibly "
|
|
<< "\nlarge size. This means there is probably a bug in your feature extractor.");
|
|
}
|
|
a = feats_config.feat_to_image_space(rect);
|
|
if (a.width() > 10000000 || a.height() > 10000000 )
|
|
{
|
|
DLIB_CASSERT(false, "The feat_to_image_space() routine is outputting rectangles of an implausibly "
|
|
<< "\nlarge size. This means there is probably a bug in your feature extractor.");
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
feature_extractor_type feats_config; // just here to hold configuration. use it to populate the feats elements.
|
|
array<feature_extractor_type> feats;
|
|
std::vector<detection_template> det_templates;
|
|
unsigned long max_dets_per_template;
|
|
unsigned long max_pyramid_levels;
|
|
unsigned long min_pyramid_layer_width;
|
|
unsigned long min_pyramid_layer_height;
|
|
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename T, typename U>
|
|
void serialize (
|
|
const scan_image_pyramid<T,U>& item,
|
|
std::ostream& out
|
|
)
|
|
{
|
|
int version = 3;
|
|
serialize(version, out);
|
|
serialize(item.feats_config, out);
|
|
serialize(item.feats, out);
|
|
serialize(item.det_templates, out);
|
|
serialize(item.max_dets_per_template, out);
|
|
serialize(item.max_pyramid_levels, out);
|
|
serialize(item.min_pyramid_layer_width, out);
|
|
serialize(item.min_pyramid_layer_height, out);
|
|
serialize(item.get_num_dimensions(), out);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename T, typename U>
|
|
void deserialize (
|
|
scan_image_pyramid<T,U>& item,
|
|
std::istream& in
|
|
)
|
|
{
|
|
int version = 0;
|
|
deserialize(version, in);
|
|
if (version != 3)
|
|
throw serialization_error("Unsupported version found when deserializing a scan_image_pyramid object.");
|
|
|
|
deserialize(item.feats_config, in);
|
|
deserialize(item.feats, in);
|
|
deserialize(item.det_templates, in);
|
|
deserialize(item.max_dets_per_template, in);
|
|
deserialize(item.max_pyramid_levels, in);
|
|
deserialize(item.min_pyramid_layer_width, in);
|
|
deserialize(item.min_pyramid_layer_height, in);
|
|
|
|
// When developing some feature extractor, it's easy to accidentally change its
|
|
// number of dimensions and then try to deserialize data from an older version of
|
|
// your extractor into the current code. This check is here to catch that kind of
|
|
// user error.
|
|
long dims;
|
|
deserialize(dims, in);
|
|
if (item.get_num_dimensions() != dims)
|
|
throw serialization_error("Number of dimensions in serialized scan_image_pyramid doesn't match the expected number.");
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// scan_image_pyramid member functions
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
scan_image_pyramid (
|
|
) :
|
|
max_dets_per_template(10000),
|
|
max_pyramid_levels(1000),
|
|
min_pyramid_layer_width(20),
|
|
min_pyramid_layer_height(20)
|
|
{
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
template <
|
|
typename image_type
|
|
>
|
|
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
load (
|
|
const image_type& img
|
|
)
|
|
{
|
|
unsigned long levels = 0;
|
|
rectangle rect = get_rect(img);
|
|
|
|
// figure out how many pyramid levels we should be using based on the image size
|
|
pyramid_type pyr;
|
|
do
|
|
{
|
|
rect = pyr.rect_down(rect);
|
|
++levels;
|
|
} while (rect.width() >= min_pyramid_layer_width && rect.height() >= min_pyramid_layer_height &&
|
|
levels < max_pyramid_levels);
|
|
|
|
if (feats.max_size() < levels)
|
|
feats.set_max_size(levels);
|
|
feats.set_size(levels);
|
|
|
|
for (unsigned long i = 0; i < feats.size(); ++i)
|
|
feats[i].copy_configuration(feats_config);
|
|
|
|
// build our feature pyramid
|
|
feats[0].load(img);
|
|
if (feats.size() > 1)
|
|
{
|
|
image_type temp1, temp2;
|
|
pyr(img, temp1);
|
|
feats[1].load(temp1);
|
|
swap(temp1,temp2);
|
|
|
|
for (unsigned long i = 2; i < feats.size(); ++i)
|
|
{
|
|
pyr(temp2, temp1);
|
|
feats[i].load(temp1);
|
|
swap(temp1,temp2);
|
|
}
|
|
}
|
|
|
|
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_max_detections_per_template (
|
|
) const
|
|
{
|
|
return max_dets_per_template;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
set_max_detections_per_template (
|
|
unsigned long max_dets
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(max_dets > 0 ,
|
|
"\t void scan_image_pyramid::set_max_detections_per_template()"
|
|
<< "\n\t The max number of possible detections can't be zero. "
|
|
<< "\n\t max_dets: " << max_dets
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
max_dets_per_template = max_dets;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
bool scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
is_loaded_with_image (
|
|
) const
|
|
{
|
|
return feats.size() != 0;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
copy_configuration(
|
|
const feature_extractor_type& fe
|
|
)
|
|
{
|
|
test_coordinate_transforms();
|
|
feats_config.copy_configuration(fe);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
copy_configuration (
|
|
const scan_image_pyramid& item
|
|
)
|
|
{
|
|
feats_config.copy_configuration(item.feats_config);
|
|
det_templates = item.det_templates;
|
|
max_dets_per_template = item.max_dets_per_template;
|
|
max_pyramid_levels = item.max_pyramid_levels;
|
|
min_pyramid_layer_width = item.min_pyramid_layer_width;
|
|
min_pyramid_layer_height = item.min_pyramid_layer_height;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
add_detection_template (
|
|
const rectangle& object_box,
|
|
const std::vector<rectangle>& stationary_feature_extraction_regions,
|
|
const std::vector<rectangle>& movable_feature_extraction_regions
|
|
)
|
|
{
|
|
#ifdef ENABLE_ASSERTS
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT((get_num_detection_templates() == 0 ||
|
|
(get_num_stationary_components_per_detection_template() == stationary_feature_extraction_regions.size() &&
|
|
get_num_movable_components_per_detection_template() == movable_feature_extraction_regions.size())) &&
|
|
center(object_box) == point(0,0),
|
|
"\t void scan_image_pyramid::add_detection_template()"
|
|
<< "\n\t The number of rects in this new detection template doesn't match "
|
|
<< "\n\t the number in previous detection templates."
|
|
<< "\n\t get_num_stationary_components_per_detection_template(): " << get_num_stationary_components_per_detection_template()
|
|
<< "\n\t stationary_feature_extraction_regions.size(): " << stationary_feature_extraction_regions.size()
|
|
<< "\n\t get_num_movable_components_per_detection_template(): " << get_num_movable_components_per_detection_template()
|
|
<< "\n\t movable_feature_extraction_regions.size(): " << movable_feature_extraction_regions.size()
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
for (unsigned long i = 0; i < movable_feature_extraction_regions.size(); ++i)
|
|
{
|
|
DLIB_ASSERT(center(movable_feature_extraction_regions[i]) == point(0,0),
|
|
"Invalid inputs were given to this function."
|
|
<< "\n\t center(movable_feature_extraction_regions["<<i<<"]): " << center(movable_feature_extraction_regions[i])
|
|
<< "\n\t this: " << this
|
|
);
|
|
}
|
|
#endif
|
|
|
|
detection_template temp;
|
|
temp.object_box = object_box;
|
|
temp.rects = stationary_feature_extraction_regions;
|
|
temp.movable_rects = movable_feature_extraction_regions;
|
|
det_templates.push_back(temp);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
add_detection_template (
|
|
const rectangle& object_box,
|
|
const std::vector<rectangle>& stationary_feature_extraction_regions
|
|
)
|
|
{
|
|
// an empty set of movable feature regions
|
|
const std::vector<rectangle> movable_feature_extraction_regions;
|
|
add_detection_template(object_box, stationary_feature_extraction_regions,
|
|
movable_feature_extraction_regions);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_num_detection_templates (
|
|
) const
|
|
{
|
|
return det_templates.size();
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_num_stationary_components_per_detection_template (
|
|
) const
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(get_num_detection_templates() > 0 ,
|
|
"\t unsigned long scan_image_pyramid::get_num_stationary_components_per_detection_template()"
|
|
<< "\n\t You need to give some detection templates before calling this function. "
|
|
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
return det_templates[0].rects.size();
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_num_movable_components_per_detection_template (
|
|
) const
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(get_num_detection_templates() > 0 ,
|
|
"\t unsigned long scan_image_pyramid::get_num_movable_components_per_detection_template()"
|
|
<< "\n\t You need to give some detection templates before calling this function. "
|
|
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
return det_templates[0].movable_rects.size();
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_num_components_per_detection_template (
|
|
) const
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(get_num_detection_templates() > 0 ,
|
|
"\t unsigned long scan_image_pyramid::get_num_components_per_detection_template()"
|
|
<< "\n\t You need to give some detection templates before calling this function. "
|
|
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
return get_num_movable_components_per_detection_template() +
|
|
get_num_stationary_components_per_detection_template();
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_num_dimensions (
|
|
) const
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(get_num_detection_templates() > 0 ,
|
|
"\t long scan_image_pyramid::get_num_dimensions()"
|
|
<< "\n\t You need to give some detection templates before calling this function. "
|
|
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
return feats_config.get_num_dimensions()*get_num_components_per_detection_template() + get_num_detection_templates();
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_max_pyramid_levels (
|
|
) const
|
|
{
|
|
return max_pyramid_levels;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
set_max_pyramid_levels (
|
|
unsigned long max_levels
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(max_levels > 0 ,
|
|
"\t void scan_image_pyramid::set_max_pyramid_levels()"
|
|
<< "\n\t You can't have zero levels. "
|
|
<< "\n\t max_levels: " << max_levels
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
max_pyramid_levels = max_levels;
|
|
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
detect (
|
|
const feature_vector_type& w,
|
|
std::vector<std::pair<double, rectangle> >& dets,
|
|
const double thresh
|
|
) const
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(get_num_detection_templates() > 0 &&
|
|
is_loaded_with_image() &&
|
|
w.size() >= get_num_dimensions(),
|
|
"\t void scan_image_pyramid::detect()"
|
|
<< "\n\t Invalid inputs were given to this function "
|
|
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
|
|
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
|
|
<< "\n\t w.size(): " << w.size()
|
|
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
dets.clear();
|
|
|
|
array<array2d<double> > saliency_images;
|
|
saliency_images.set_max_size(get_num_components_per_detection_template());
|
|
saliency_images.set_size(get_num_components_per_detection_template());
|
|
std::vector<std::pair<unsigned int,rectangle> > stationary_region_rects(get_num_stationary_components_per_detection_template());
|
|
std::vector<std::pair<unsigned int,rectangle> > movable_region_rects(get_num_movable_components_per_detection_template());
|
|
pyramid_type pyr;
|
|
std::vector<std::pair<double, point> > point_dets;
|
|
|
|
// for all pyramid levels
|
|
for (unsigned long l = 0; l < feats.size(); ++l)
|
|
{
|
|
for (unsigned long i = 0; i < saliency_images.size(); ++i)
|
|
{
|
|
saliency_images[i].set_size(feats[l].nr(), feats[l].nc());
|
|
const unsigned long offset = get_num_detection_templates() + feats_config.get_num_dimensions()*i;
|
|
|
|
// build saliency images for pyramid level l
|
|
for (long r = 0; r < feats[l].nr(); ++r)
|
|
{
|
|
for (long c = 0; c < feats[l].nc(); ++c)
|
|
{
|
|
const typename feature_extractor_type::descriptor_type& descriptor = feats[l](r,c);
|
|
|
|
double sum = 0;
|
|
for (unsigned long k = 0; k < descriptor.size(); ++k)
|
|
{
|
|
sum += w(descriptor[k].first + offset)*descriptor[k].second;
|
|
}
|
|
saliency_images[i][r][c] = sum;
|
|
}
|
|
}
|
|
}
|
|
|
|
// now search the saliency images
|
|
for (unsigned long i = 0; i < det_templates.size(); ++i)
|
|
{
|
|
const point offset = -feats[l].image_to_feat_space(point(0,0));
|
|
for (unsigned long j = 0; j < stationary_region_rects.size(); ++j)
|
|
{
|
|
stationary_region_rects[j] = std::make_pair(j, translate_rect(feats[l].image_to_feat_space(det_templates[i].rects[j]),offset));
|
|
}
|
|
for (unsigned long j = 0; j < movable_region_rects.size(); ++j)
|
|
{
|
|
// Scale the size of the movable rectangle but make sure its center
|
|
// stays at point(0,0).
|
|
const rectangle temp = feats[l].image_to_feat_space(det_templates[i].movable_rects[j]);
|
|
movable_region_rects[j] = std::make_pair(j+stationary_region_rects.size(),
|
|
centered_rect(point(0,0),temp.width(), temp.height()));
|
|
}
|
|
|
|
// Scale the object box into the feature extraction image, but keeping it
|
|
// centered at point(0,0).
|
|
rectangle scaled_object_box = feats[l].image_to_feat_space(det_templates[i].object_box);
|
|
scaled_object_box = centered_rect(point(0,0),scaled_object_box.width(), scaled_object_box.height());
|
|
|
|
// Each detection template gets its own special threshold in addition to
|
|
// the global detection threshold. This allows us to model the fact that
|
|
// some detection templates might be more prone to false alarming or since
|
|
// their size is different naturally require a larger or smaller threshold
|
|
// (since they integrate over a larger or smaller region of the image).
|
|
const double template_specific_thresh = w(i);
|
|
|
|
scan_image_movable_parts(point_dets, saliency_images, scaled_object_box,
|
|
stationary_region_rects, movable_region_rects,
|
|
thresh+template_specific_thresh, max_dets_per_template);
|
|
|
|
// convert all the point detections into rectangles at the original image scale and coordinate system
|
|
for (unsigned long j = 0; j < point_dets.size(); ++j)
|
|
{
|
|
const double score = point_dets[j].first-template_specific_thresh;
|
|
point p = point_dets[j].second;
|
|
p = feats[l].feat_to_image_space(p);
|
|
rectangle rect = translate_rect(det_templates[i].object_box, p);
|
|
rect = pyr.rect_up(rect, l);
|
|
|
|
dets.push_back(std::make_pair(score, rect));
|
|
}
|
|
}
|
|
}
|
|
|
|
std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
const rectangle scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_best_matching_rect (
|
|
const rectangle& rect
|
|
) const
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(get_num_detection_templates() > 0 ,
|
|
"\t const rectangle scan_image_pyramid::get_best_matching_rect()"
|
|
<< "\n\t Invalid inputs were given to this function "
|
|
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
rectangle mapped_rect, object_box;
|
|
detection_template best_template;
|
|
unsigned long best_level, junk;
|
|
get_mapped_rect_and_metadata(max_pyramid_levels, rect, mapped_rect, best_template, object_box, best_level, junk);
|
|
return mapped_rect;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_mapped_rect_and_metadata (
|
|
const unsigned long number_pyramid_levels,
|
|
rectangle rect,
|
|
rectangle& mapped_rect,
|
|
detection_template& best_template,
|
|
rectangle& object_box,
|
|
unsigned long& best_level,
|
|
unsigned long& detection_template_idx
|
|
) const
|
|
{
|
|
pyramid_type pyr;
|
|
// Figure out the pyramid level which best matches rect against one of our
|
|
// detection template object boxes.
|
|
best_level = 0;
|
|
double best_match_score = -1;
|
|
|
|
|
|
// Find the best matching detection template for rect
|
|
for (unsigned long l = 0; l < number_pyramid_levels; ++l)
|
|
{
|
|
const rectangle temp = pyr.rect_down(rect,l);
|
|
if (temp.area() <= 1)
|
|
break;
|
|
|
|
// At this pyramid level, what matches best?
|
|
for (unsigned long t = 0; t < det_templates.size(); ++t)
|
|
{
|
|
const double match_score = get_match_score(det_templates[t].object_box, temp);
|
|
if (match_score > best_match_score)
|
|
{
|
|
best_match_score = match_score;
|
|
best_level = l;
|
|
best_template = det_templates[t];
|
|
detection_template_idx = t;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// Now we translate best_template into the right spot (it should be centered at the location
|
|
// determined by rect) and convert it into the feature image coordinate system.
|
|
rect = pyr.rect_down(rect,best_level);
|
|
const point offset = -feats_config.image_to_feat_space(point(0,0));
|
|
const point origin = feats_config.image_to_feat_space(center(rect)) + offset;
|
|
for (unsigned long k = 0; k < best_template.rects.size(); ++k)
|
|
{
|
|
rectangle temp = best_template.rects[k];
|
|
temp = feats_config.image_to_feat_space(temp);
|
|
temp = translate_rect(temp, origin);
|
|
best_template.rects[k] = temp;
|
|
}
|
|
for (unsigned long k = 0; k < best_template.movable_rects.size(); ++k)
|
|
{
|
|
rectangle temp = best_template.movable_rects[k];
|
|
temp = feats_config.image_to_feat_space(temp);
|
|
temp = centered_rect(point(0,0), temp.width(), temp.height());
|
|
best_template.movable_rects[k] = temp;
|
|
}
|
|
|
|
const rectangle scaled_object_box = feats_config.image_to_feat_space(best_template.object_box);
|
|
object_box = centered_rect(origin-offset, scaled_object_box.width(), scaled_object_box.height());
|
|
|
|
// The input rectangle was mapped to one of the detection templates. Reverse the process
|
|
// to figure out what the mapped rectangle is in the original input space.
|
|
mapped_rect = translate_rect(best_template.object_box, feats_config.feat_to_image_space(origin-offset));
|
|
mapped_rect = pyr.rect_up(mapped_rect, best_level);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
full_object_detection scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_full_object_detection (
|
|
const rectangle& rect,
|
|
const feature_vector_type& w
|
|
) const
|
|
{
|
|
// fill in movable part positions.
|
|
|
|
rectangle mapped_rect;
|
|
detection_template best_template;
|
|
unsigned long best_level, junk;
|
|
rectangle object_box;
|
|
get_mapped_rect_and_metadata(feats.size(), rect, mapped_rect, best_template, object_box, best_level, junk);
|
|
|
|
Pyramid_type pyr;
|
|
|
|
array2d<double> saliency_image, sum_img;
|
|
|
|
double total_temp_score = 0;
|
|
// convert into feature space.
|
|
object_box = object_box.intersect(get_rect(feats[best_level]));
|
|
|
|
std::vector<point> movable_parts;
|
|
movable_parts.reserve(get_num_movable_components_per_detection_template());
|
|
for (unsigned long i = 0; i < get_num_movable_components_per_detection_template(); ++i)
|
|
{
|
|
// make the saliency_image for the ith movable part.
|
|
|
|
const rectangle part_rect = best_template.movable_rects[i];
|
|
const rectangle area = grow_rect(object_box,
|
|
part_rect.width()/2,
|
|
part_rect.height()/2).intersect(get_rect(feats[best_level]));
|
|
|
|
saliency_image.set_size(area.height(), area.width());
|
|
const unsigned long offset = get_num_detection_templates() + feats_config.get_num_dimensions()*(i+get_num_stationary_components_per_detection_template());
|
|
|
|
// build saliency image for pyramid level best_level
|
|
for (long r = area.top(); r <= area.bottom(); ++r)
|
|
{
|
|
for (long c = area.left(); c <= area.right(); ++c)
|
|
{
|
|
const typename feature_extractor_type::descriptor_type& descriptor = feats[best_level](r,c);
|
|
|
|
double sum = 0;
|
|
for (unsigned long k = 0; k < descriptor.size(); ++k)
|
|
{
|
|
sum += w(descriptor[k].first + offset)*descriptor[k].second;
|
|
}
|
|
saliency_image[r-area.top()][c-area.left()] = sum;
|
|
}
|
|
}
|
|
|
|
sum_img.set_size(saliency_image.nr(), saliency_image.nc());
|
|
sum_filter_assign(saliency_image, sum_img, part_rect);
|
|
// Figure out where the maximizer is in sum_img. Note that we
|
|
// only look in the part of sum_img that corresponds to a location inside
|
|
// object_box.
|
|
rectangle valid_area = get_rect(sum_img);
|
|
valid_area.left() += object_box.left() - area.left();
|
|
valid_area.top() += object_box.top() - area.top();
|
|
valid_area.right() += object_box.right() - area.right();
|
|
valid_area.bottom() += object_box.bottom() - area.bottom();
|
|
double max_val = 0;
|
|
point max_loc;
|
|
for (long r = valid_area.top(); r <= valid_area.bottom(); ++r)
|
|
{
|
|
for (long c = valid_area.left(); c <= valid_area.right(); ++c)
|
|
{
|
|
if (sum_img[r][c] > max_val)
|
|
{
|
|
//if (object_box.contains(point(c,r) + area.tl_corner()))
|
|
{
|
|
max_loc = point(c,r);
|
|
max_val = sum_img[r][c];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (max_val <= 0)
|
|
{
|
|
max_loc = OBJECT_PART_NOT_PRESENT;
|
|
}
|
|
else
|
|
{
|
|
total_temp_score += max_val;
|
|
// convert max_loc back into feature image space from our cropped image.
|
|
max_loc += area.tl_corner();
|
|
|
|
// now convert from feature space to image space.
|
|
max_loc = feats[best_level].feat_to_image_space(max_loc);
|
|
max_loc = pyr.point_up(max_loc, best_level);
|
|
max_loc = nearest_point(rect, max_loc);
|
|
}
|
|
|
|
movable_parts.push_back(max_loc);
|
|
}
|
|
|
|
return full_object_detection(rect, movable_parts);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_feature_vector (
|
|
const full_object_detection& obj,
|
|
feature_vector_type& psi
|
|
) const
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(get_num_detection_templates() > 0 &&
|
|
is_loaded_with_image() &&
|
|
psi.size() >= get_num_dimensions() &&
|
|
obj.num_parts() == get_num_movable_components_per_detection_template(),
|
|
"\t void scan_image_pyramid::get_feature_vector()"
|
|
<< "\n\t Invalid inputs were given to this function "
|
|
<< "\n\t get_num_detection_templates(): " << get_num_detection_templates()
|
|
<< "\n\t is_loaded_with_image(): " << is_loaded_with_image()
|
|
<< "\n\t psi.size(): " << psi.size()
|
|
<< "\n\t get_num_dimensions(): " << get_num_dimensions()
|
|
<< "\n\t get_num_movable_components_per_detection_template(): " << get_num_movable_components_per_detection_template()
|
|
<< "\n\t obj.num_parts(): " << obj.num_parts()
|
|
<< "\n\t this: " << this
|
|
);
|
|
DLIB_ASSERT(all_parts_in_rect(obj),
|
|
"\t void scan_image_pyramid::get_feature_vector()"
|
|
<< "\n\t Invalid inputs were given to this function "
|
|
<< "\n\t obj.get_rect(): " << obj.get_rect()
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
|
|
|
|
rectangle mapped_rect;
|
|
detection_template best_template;
|
|
unsigned long best_level, detection_template_idx;
|
|
rectangle object_box;
|
|
get_mapped_rect_and_metadata(feats.size(), obj.get_rect(), mapped_rect, best_template, object_box, best_level, detection_template_idx);
|
|
|
|
psi(detection_template_idx) -= 1;
|
|
|
|
Pyramid_type pyr;
|
|
|
|
// put the movable rects at the places indicated by obj.
|
|
std::vector<rectangle> rects = best_template.rects;
|
|
for (unsigned long i = 0; i < obj.num_parts(); ++i)
|
|
{
|
|
if (obj.part(i) != OBJECT_PART_NOT_PRESENT)
|
|
{
|
|
// map from the original image to scaled feature space.
|
|
point loc = feats[best_level].image_to_feat_space(pyr.point_down(obj.part(i), best_level));
|
|
// Make sure the movable part always stays within the object_box.
|
|
// Otherwise it would be at a place that the detect() function can never
|
|
// look.
|
|
loc = nearest_point(object_box, loc);
|
|
rects.push_back(translate_rect(best_template.movable_rects[i], loc));
|
|
}
|
|
else
|
|
{
|
|
// add an empty rectangle since this part wasn't observed.
|
|
rects.push_back(rectangle());
|
|
}
|
|
}
|
|
|
|
// pull features out of all the boxes in rects.
|
|
for (unsigned long j = 0; j < rects.size(); ++j)
|
|
{
|
|
const rectangle rect = rects[j].intersect(get_rect(feats[best_level]));
|
|
const unsigned long template_region_id = j;
|
|
const unsigned long offset = get_num_detection_templates() + feats_config.get_num_dimensions()*template_region_id;
|
|
for (long r = rect.top(); r <= rect.bottom(); ++r)
|
|
{
|
|
for (long c = rect.left(); c <= rect.right(); ++c)
|
|
{
|
|
const typename feature_extractor_type::descriptor_type& descriptor = feats[best_level](r,c);
|
|
for (unsigned long k = 0; k < descriptor.size(); ++k)
|
|
{
|
|
psi(descriptor[k].first + offset) += descriptor[k].second;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
set_min_pyramid_layer_size (
|
|
unsigned long width,
|
|
unsigned long height
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(width > 0 && height > 0 ,
|
|
"\t void scan_image_pyramid::set_min_pyramid_layer_size()"
|
|
<< "\n\t These sizes can't be zero. "
|
|
<< "\n\t width: " << width
|
|
<< "\n\t height: " << height
|
|
<< "\n\t this: " << this
|
|
);
|
|
|
|
min_pyramid_layer_width = width;
|
|
min_pyramid_layer_height = height;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_min_pyramid_layer_width (
|
|
) const
|
|
{
|
|
return min_pyramid_layer_width;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename Pyramid_type,
|
|
typename Feature_extractor_type
|
|
>
|
|
unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
|
|
get_min_pyramid_layer_height (
|
|
) const
|
|
{
|
|
return min_pyramid_layer_height;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_SCAN_IMaGE_PYRAMID_Hh_
|
|
|
|
|