284 lines
13 KiB
C++
284 lines
13 KiB
C++
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#undef DLIB_SCAN_iMAGE_ABSTRACT_Hh_
|
|
#ifdef DLIB_SCAN_iMAGE_ABSTRACT_Hh_
|
|
|
|
#include <vector>
|
|
#include <utility>
|
|
#include "../algs.h"
|
|
#include "../image_processing/generic_image.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename image_array_type
|
|
>
|
|
bool all_images_same_size (
|
|
const image_array_type& images
|
|
);
|
|
/*!
|
|
requires
|
|
- image_array_type == an implementation of array/array_kernel_abstract.h
|
|
- image_array_type::type == an image object that implements the interface
|
|
defined in dlib/image_processing/generic_image.h
|
|
ensures
|
|
- if (all elements of images have the same dimensions (i.e.
|
|
for all i and j: get_rect(images[i]) == get_rect(images[j]))) then
|
|
- returns true
|
|
- else
|
|
- returns false
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename image_array_type
|
|
>
|
|
double sum_of_rects_in_images (
|
|
const image_array_type& images,
|
|
const std::vector<std::pair<unsigned int, rectangle> >& rects,
|
|
const point& position
|
|
);
|
|
/*!
|
|
requires
|
|
- image_array_type == an implementation of array/array_kernel_abstract.h
|
|
- image_array_type::type == an image object that implements the interface
|
|
defined in dlib/image_processing/generic_image.h. Moreover, these objects must
|
|
contain a scalar pixel type (e.g. int rather than rgb_pixel)
|
|
- all_images_same_size(images) == true
|
|
- for all valid i: rects[i].first < images.size()
|
|
(i.e. all the rectangles must reference valid elements of images)
|
|
ensures
|
|
- returns the sum of the pixels inside the given rectangles. To be precise,
|
|
let RECT_SUM[i] = sum of pixels inside the rectangle translate_rect(rects[i].second, position)
|
|
from the image images[rects[i].first]. Then this function returns the
|
|
sum of RECT_SUM[i] for all the valid values of i.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename image_array_type
|
|
>
|
|
double sum_of_rects_in_images_movable_parts (
|
|
const image_array_type& images,
|
|
const rectangle& window,
|
|
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
|
|
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
|
|
const point& position
|
|
);
|
|
/*!
|
|
requires
|
|
- image_array_type == an implementation of array/array_kernel_abstract.h
|
|
- image_array_type::type == an image object that implements the interface
|
|
defined in dlib/image_processing/generic_image.h. Moreover, these objects must
|
|
contain a scalar pixel type (e.g. int rather than rgb_pixel)
|
|
- all_images_same_size(images) == true
|
|
- center(window) == point(0,0)
|
|
- for all valid i:
|
|
- fixed_rects[i].first < images.size()
|
|
(i.e. all the rectangles must reference valid elements of images)
|
|
- for all valid i:
|
|
- movable_rects[i].first < images.size()
|
|
(i.e. all the rectangles must reference valid elements of images)
|
|
- center(movable_rects[i].second) == point(0,0)
|
|
ensures
|
|
- returns the sum of the pixels inside fixed_rects as well as the sum of the pixels
|
|
inside movable_rects when these latter rectangles are placed at their highest
|
|
scoring locations inside the given window. To be precise:
|
|
- let RECT_SUM(r,x) = sum of pixels inside the rectangle translate_rect(r.second, x)
|
|
from the image images[r.first].
|
|
- let WIN_MAX(i) = The maximum value of RECT_SUM(movable_rects[i],X) when maximizing
|
|
over all the X such that translate_rect(window,position).contains(X) == true.
|
|
|
|
- let TOTAL_FIXED == sum over all elements R in fixed_rects of: RECT_SUM(R,position)
|
|
- let TOTAL_MOVABLE == sum over all valid i of: max(WIN_MAX(i), 0)
|
|
|
|
Then this function returns TOTAL_FIXED + TOTAL_MOVABLE.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename image_type
|
|
>
|
|
void find_points_above_thresh (
|
|
std::vector<std::pair<double, point> >& dets,
|
|
const image_type& img,
|
|
const double thresh,
|
|
const unsigned long max_dets
|
|
);
|
|
/*!
|
|
requires
|
|
- image_type == an image object that implements the interface defined in
|
|
dlib/image_processing/generic_image.h. Moreover, these it must contain a
|
|
scalar pixel type (e.g. int rather than rgb_pixel)
|
|
ensures
|
|
- #dets == a list of points from img which had pixel values >= thresh.
|
|
- Specifically, we have:
|
|
- #dets.size() <= max_dets
|
|
(note that dets is cleared before new detections are added by find_points_above_thresh())
|
|
- for all valid i:
|
|
- #dets[i].first == img[#dets[i].second.y()][#dets[i].second.x()]
|
|
(i.e. the first field contains the value of the pixel at this detection location)
|
|
- #dets[i].first >= thresh
|
|
- if (there are more than max_dets locations that pass the above threshold test) then
|
|
- #dets == a random subsample of all the locations which passed the threshold
|
|
test.
|
|
- else
|
|
- #dets == all the points which passed the threshold test.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename image_type
|
|
>
|
|
std::vector<point> find_peaks (
|
|
const image_type& img,
|
|
const double non_max_suppression_radius,
|
|
const typename pixel_traits<typename image_traits<image_type>::pixel_type>::basic_pixel_type& thresh
|
|
);
|
|
/*!
|
|
requires
|
|
- image_type == an image object that implements the interface defined in
|
|
dlib/image_processing/generic_image.h. Moreover, these it must contain a
|
|
scalar pixel type (e.g. int rather than rgb_pixel)
|
|
- non_max_suppression_radius >= 0
|
|
ensures
|
|
- Scans the given image and finds all pixels with values >= thresh that are
|
|
also local maximums within their 8-connected neighborhood of the image. Such
|
|
pixels are collected, sorted in decreasing order of their pixel values, and
|
|
then non-maximum suppression is applied to this list of points using the
|
|
given non_max_suppression_radius. The final list of peaks is then returned.
|
|
|
|
Therefore, the returned list, V, will have these properties:
|
|
- V.size() == the number of peaks found in the image.
|
|
- When measured in image coordinates, no elements of V are within
|
|
non_max_suppression_radius distance of each other. That is, for all valid i!=j
|
|
it is true that length(V[i]-V[j]) > non_max_suppression_radius.
|
|
- For each element of V, that element has the maximum pixel value of all
|
|
pixels in the ball centered on that pixel with radius
|
|
non_max_suppression_radius.
|
|
!*/
|
|
|
|
template <
|
|
typename image_type
|
|
>
|
|
std::vector<point> find_peaks (
|
|
const image_type& img
|
|
);
|
|
/*!
|
|
ensures
|
|
- performs: return find_peaks(img, 0, partition_pixels(img))
|
|
!*/
|
|
|
|
template <
|
|
typename image_type
|
|
>
|
|
std::vector<point> find_peaks (
|
|
const image_type& img,
|
|
const double non_max_suppression_radius
|
|
);
|
|
/*!
|
|
ensures
|
|
- performs: return find_peaks(img, non_max_suppression_radius, partition_pixels(img))
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename image_array_type
|
|
>
|
|
void scan_image (
|
|
std::vector<std::pair<double, point> >& dets,
|
|
const image_array_type& images,
|
|
const std::vector<std::pair<unsigned int, rectangle> >& rects,
|
|
const double thresh,
|
|
const unsigned long max_dets
|
|
);
|
|
/*!
|
|
requires
|
|
- image_array_type == an implementation of array/array_kernel_abstract.h
|
|
- image_array_type::type == an image object that implements the interface
|
|
defined in dlib/image_processing/generic_image.h. Moreover, these objects must
|
|
contain a scalar pixel type (e.g. int rather than rgb_pixel)
|
|
- images.size() > 0
|
|
- rects.size() > 0
|
|
- all_images_same_size(images) == true
|
|
- for all valid i: rects[i].first < images.size()
|
|
(i.e. all the rectangles must reference valid elements of images)
|
|
ensures
|
|
- slides the set of rectangles over the image space and reports the locations
|
|
which give a sum bigger than thresh.
|
|
- Specifically, we have:
|
|
- #dets.size() <= max_dets
|
|
(note that dets is cleared before new detections are added by scan_image())
|
|
- for all valid i:
|
|
- #dets[i].first == sum_of_rects_in_images(images,rects,#dets[i].second) >= thresh
|
|
- if (there are more than max_dets locations that pass the threshold test) then
|
|
- #dets == a random subsample of all the locations which passed the threshold
|
|
test.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename image_array_type
|
|
>
|
|
void scan_image_movable_parts (
|
|
std::vector<std::pair<double, point> >& dets,
|
|
const image_array_type& images,
|
|
const rectangle& window,
|
|
const std::vector<std::pair<unsigned int, rectangle> >& fixed_rects,
|
|
const std::vector<std::pair<unsigned int, rectangle> >& movable_rects,
|
|
const double thresh,
|
|
const unsigned long max_dets
|
|
);
|
|
/*!
|
|
requires
|
|
- image_array_type == an implementation of array/array_kernel_abstract.h
|
|
- image_array_type::type == an image object that implements the interface
|
|
defined in dlib/image_processing/generic_image.h. Moreover, these objects must
|
|
contain a scalar pixel type (e.g. int rather than rgb_pixel)
|
|
- images.size() > 0
|
|
- all_images_same_size(images) == true
|
|
- center(window) == point(0,0)
|
|
- window.area() > 0
|
|
- for all valid i:
|
|
- fixed_rects[i].first < images.size()
|
|
(i.e. all the rectangles must reference valid elements of images)
|
|
- for all valid i:
|
|
- movable_rects[i].first < images.size()
|
|
(i.e. all the rectangles must reference valid elements of images)
|
|
- center(movable_rects[i].second) == point(0,0)
|
|
- movable_rects[i].second.area() > 0
|
|
ensures
|
|
- Scans the given window over the images and reports the locations with a score bigger
|
|
than thresh.
|
|
- Specifically, we have:
|
|
- #dets.size() <= max_dets
|
|
(note that dets is cleared before new detections are added by scan_image_movable_parts())
|
|
- for all valid i:
|
|
- #dets[i].first == sum_of_rects_in_images_movable_parts(images,
|
|
window,
|
|
fixed_rects,
|
|
movable_rects,
|
|
#dets[i].second) >= thresh
|
|
- if (there are more than max_dets locations that pass the above threshold test) then
|
|
- #dets == a random subsample of all the locations which passed the threshold
|
|
test.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_SCAN_iMAGE_ABSTRACT_Hh_
|
|
|
|
|
|
|