169 lines
8.1 KiB
C++
169 lines
8.1 KiB
C++
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#undef DLIB_SEGMENT_ImAGE_ABSTRACT_Hh_
|
|
#ifdef DLIB_SEGMENT_ImAGE_ABSTRACT_Hh_
|
|
|
|
#include <vector>
|
|
#include "../matrix.h"
|
|
#include "../image_processing/generic_image.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename in_image_type,
|
|
typename out_image_type
|
|
>
|
|
void segment_image (
|
|
const in_image_type& in_img,
|
|
out_image_type& out_img,
|
|
const double k = 200,
|
|
const unsigned long min_size = 10
|
|
);
|
|
/*!
|
|
requires
|
|
- in_image_type == an image object that implements the interface defined in
|
|
dlib/image_processing/generic_image.h
|
|
- out_image_type == an image object that implements the interface defined in
|
|
dlib/image_processing/generic_image.h
|
|
- in_image_type can contain any pixel type with a pixel_traits specialization
|
|
or a dlib matrix object representing a row or column vector.
|
|
- out_image_type must contain an unsigned integer pixel type.
|
|
- is_same_object(in_img, out_img) == false
|
|
ensures
|
|
- Attempts to segment in_img into regions which have some visual consistency to
|
|
them. In particular, this function implements the algorithm described in the
|
|
paper: Efficient Graph-Based Image Segmentation by Felzenszwalb and Huttenlocher.
|
|
- #out_img.nr() == in_img.nr()
|
|
- #out_img.nc() == in_img.nc()
|
|
- for all valid r and c:
|
|
- #out_img[r][c] == an integer value indicating the identity of the segment
|
|
containing the pixel in_img[r][c].
|
|
- The k parameter is a measure used to influence how large the segment regions
|
|
will be. Larger k generally results in larger segments being produced. For
|
|
a deeper discussion of the k parameter you should consult the above
|
|
referenced paper.
|
|
- min_size is a lower bound on the size of the output segments. That is, it is
|
|
guaranteed that all output segments will have at least min_size pixels in
|
|
them (unless the whole image contains fewer than min_size pixels, in this
|
|
case the entire image will be put into a single segment).
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename in_image_type,
|
|
typename EXP
|
|
>
|
|
void find_candidate_object_locations (
|
|
const in_image_type& in_img,
|
|
std::vector<rectangle>& rects,
|
|
const matrix_exp<EXP>& kvals = linspace(50, 200, 3),
|
|
const unsigned long min_size = 20,
|
|
const unsigned long max_merging_iterations = 50
|
|
);
|
|
/*!
|
|
requires
|
|
- in_image_type == an image object that implements the interface defined in
|
|
dlib/image_processing/generic_image.h
|
|
- is_vector(kvals) == true
|
|
- kvals.size() > 0
|
|
ensures
|
|
- This function takes an input image and generates a set of candidate
|
|
rectangles which are expected to bound any objects in the image. It does
|
|
this by running a version of the segment_image() routine on the image and
|
|
then reports rectangles containing each of the segments as well as rectangles
|
|
containing unions of adjacent segments. The basic idea is described in the
|
|
paper:
|
|
Segmentation as Selective Search for Object Recognition by Koen E. A. van de Sande, et al.
|
|
Note that this function deviates from what is described in the paper slightly.
|
|
See the code for details.
|
|
- The basic segmentation is performed kvals.size() times, each time with the k
|
|
parameter (see segment_image() and the Felzenszwalb paper for details on k)
|
|
set to a different value from kvals.
|
|
- When doing the basic segmentations prior to any box merging, we discard all
|
|
rectangles that have an area < min_size. Therefore, all outputs and
|
|
subsequent merged rectangles are built out of rectangles that contain at
|
|
least min_size pixels. Note that setting min_size to a smaller value than
|
|
you might otherwise be interested in using can be useful since it allows a
|
|
larger number of possible merged boxes to be created.
|
|
- There are max_merging_iterations rounds of neighboring blob merging.
|
|
Therefore, this parameter has some effect on the number of output rectangles
|
|
you get, with larger values of the parameter giving more output rectangles.
|
|
- This function appends the output rectangles into #rects. This means that any
|
|
rectangles in rects before this function was called will still be in there
|
|
after it terminates. Note further that #rects will not contain any duplicate
|
|
rectangles. That is, for all valid i and j where i != j it will be true
|
|
that:
|
|
- #rects[i] != rects[j]
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
template T,
|
|
typename alloc
|
|
>
|
|
void remove_duplicates (
|
|
std::vector<T,alloc>& items
|
|
);
|
|
/*!
|
|
requires
|
|
- T is comparable via operator != and std::less.
|
|
ensures
|
|
- This function finds any duplicate objects in items and removes the extra
|
|
instances. This way, the result is that items contains only unique
|
|
instances. It does this by sorting items and removing neighboring elements
|
|
unless they compare != according to operator !=.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename in_image_type,
|
|
typename out_image_type
|
|
>
|
|
void min_barrier_distance(
|
|
const in_image_type& img,
|
|
out_image_type& dist,
|
|
size_t iterations = 10,
|
|
bool do_left_right_scans = true
|
|
);
|
|
/*!
|
|
requires
|
|
- in_image_type == an image object that implements the interface defined in
|
|
dlib/image_processing/generic_image.h
|
|
- out_image_type == an image object that implements the interface defined in
|
|
dlib/image_processing/generic_image.h
|
|
- pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale == true
|
|
(i.e. dist must be a grayscale image)
|
|
- iterations > 0
|
|
ensures
|
|
- This function implements the salient object detection method described in the paper:
|
|
"Minimum barrier salient object detection at 80 fps" by Zhang, Jianming, et al.
|
|
In particular, we compute the minimum barrier distance between the borders of
|
|
the image and all the other pixels. The result is stored in dist. Note that
|
|
the paper talks about a bunch of other things you could do beyond computing
|
|
the minimum barrier distance, but this function doesn't do any of that. It's
|
|
just the vanilla MBD.
|
|
- We will perform iterations iterations of MBD passes over the image. Larger
|
|
values might give better results but run slower.
|
|
- During each MBD iteration we make raster scans over the image. These pass
|
|
from top->bottom, bottom->top, left->right, and right->left. If
|
|
do_left_right_scans==false then the left/right passes are not executed.
|
|
Skipping them makes the algorithm about 2x faster but might reduce the
|
|
quality of the output.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_SEGMENT_ImAGE_ABSTRACT_Hh_
|
|
|
|
|