981 lines
37 KiB
C++
981 lines
37 KiB
C++
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_SEGMENT_ImAGE_Hh_
|
|
#define DLIB_SEGMENT_ImAGE_Hh_
|
|
|
|
#include "segment_image_abstract.h"
|
|
#include "../algs.h"
|
|
#include <vector>
|
|
#include "../geometry.h"
|
|
#include "../disjoint_subsets.h"
|
|
#include "assign_image.h"
|
|
#include "../set.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
namespace impl
|
|
{
|
|
template <typename T>
|
|
inline T edge_diff_uint(
|
|
const T& a,
|
|
const T& b
|
|
)
|
|
{
|
|
if (a > b)
|
|
return a - b;
|
|
else
|
|
return b - a;
|
|
}
|
|
|
|
// ----------------------------------------
|
|
|
|
template <typename T, typename enabled = void>
|
|
struct edge_diff_funct
|
|
{
|
|
typedef double diff_type;
|
|
|
|
template <typename pixel_type>
|
|
double operator()(
|
|
const pixel_type& a,
|
|
const pixel_type& b
|
|
) const
|
|
{
|
|
return length(pixel_to_vector<double>(a) - pixel_to_vector<double>(b));
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct edge_diff_funct<uint8,void>
|
|
{
|
|
typedef uint8 diff_type;
|
|
uint8 operator()( const uint8& a, const uint8& b) const { return edge_diff_uint(a,b); }
|
|
};
|
|
|
|
template <>
|
|
struct edge_diff_funct<uint16,void>
|
|
{
|
|
typedef uint16 diff_type;
|
|
uint16 operator()( const uint16& a, const uint16& b) const { return edge_diff_uint(a,b); }
|
|
};
|
|
|
|
template <>
|
|
struct edge_diff_funct<uint32,void>
|
|
{
|
|
typedef uint32 diff_type;
|
|
uint32 operator()( const uint32& a, const uint32& b) const { return edge_diff_uint(a,b); }
|
|
};
|
|
|
|
template <>
|
|
struct edge_diff_funct<double,void>
|
|
{
|
|
typedef double diff_type;
|
|
double operator()( const double& a, const double& b) const { return std::abs(a-b); }
|
|
};
|
|
|
|
template <typename T>
|
|
struct edge_diff_funct<T, typename enable_if<is_matrix<T> >::type>
|
|
{
|
|
typedef double diff_type;
|
|
double operator()(
|
|
const T& a,
|
|
const T& b
|
|
) const
|
|
{
|
|
return length(a-b);
|
|
}
|
|
};
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
template <typename T>
|
|
struct graph_image_segmentation_data_T
|
|
{
|
|
graph_image_segmentation_data_T() : component_size(1), internal_diff(0) {}
|
|
unsigned long component_size;
|
|
T internal_diff;
|
|
};
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
template <typename T>
|
|
struct segment_image_edge_data_T
|
|
{
|
|
segment_image_edge_data_T (){}
|
|
|
|
segment_image_edge_data_T (
|
|
const rectangle& rect,
|
|
const point& p1,
|
|
const point& p2,
|
|
const T& diff_
|
|
) :
|
|
idx1(p1.y()*rect.width() + p1.x()),
|
|
idx2(p2.y()*rect.width() + p2.x()),
|
|
diff(diff_)
|
|
{}
|
|
|
|
bool operator<(const segment_image_edge_data_T& item) const
|
|
{ return diff < item.diff; }
|
|
|
|
unsigned long idx1;
|
|
unsigned long idx2;
|
|
T diff;
|
|
};
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
template <typename image_view_type>
|
|
struct uint8_or_uint16_pixels
|
|
{
|
|
typedef typename image_view_type::pixel_type pixel_type;
|
|
const static bool value = is_same_type<pixel_type,uint8>::value ||
|
|
is_same_type<pixel_type,uint16>::value;
|
|
};
|
|
|
|
// This is an overload of get_pixel_edges() that is optimized to segment images
|
|
// with 8bit or 16bit pixels very quickly. We do this by using a radix sort
|
|
// instead of quicksort.
|
|
template <typename in_image_type, typename T>
|
|
typename enable_if<uint8_or_uint16_pixels<in_image_type> >::type
|
|
get_pixel_edges (
|
|
const in_image_type& in_img,
|
|
std::vector<segment_image_edge_data_T<T> >& sorted_edges
|
|
)
|
|
{
|
|
typedef typename in_image_type::pixel_type ptype;
|
|
typedef T diff_type;
|
|
std::vector<unsigned long> counts(std::numeric_limits<ptype>::max()+1, 0);
|
|
|
|
edge_diff_funct<ptype> edge_diff;
|
|
|
|
border_enumerator be(get_rect(in_img), 1);
|
|
// we are going to do a radix sort on the edge weights. So the first step
|
|
// is to accumulate them into count.
|
|
const rectangle area = get_rect(in_img);
|
|
while (be.move_next())
|
|
{
|
|
const long r = be.element().y();
|
|
const long c = be.element().x();
|
|
const ptype pix = in_img[r][c];
|
|
if (area.contains(c-1,r)) counts[edge_diff(pix, in_img[r ][c-1])] += 1;
|
|
if (area.contains(c+1,r)) counts[edge_diff(pix, in_img[r ][c+1])] += 1;
|
|
if (area.contains(c ,r-1)) counts[edge_diff(pix, in_img[r-1][c ])] += 1;
|
|
if (area.contains(c ,r+1)) counts[edge_diff(pix, in_img[r+1][c ])] += 1;
|
|
}
|
|
for (long r = 1; r+1 < in_img.nr(); ++r)
|
|
{
|
|
for (long c = 1; c+1 < in_img.nc(); ++c)
|
|
{
|
|
const ptype pix = in_img[r][c];
|
|
counts[edge_diff(pix, in_img[r-1][c+1])] += 1;
|
|
counts[edge_diff(pix, in_img[r ][c+1])] += 1;
|
|
counts[edge_diff(pix, in_img[r+1][c ])] += 1;
|
|
counts[edge_diff(pix, in_img[r+1][c+1])] += 1;
|
|
}
|
|
}
|
|
|
|
const unsigned long num_edges = shrink_rect(area,1).area()*4 + in_img.nr()*2*3 - 4 + (in_img.nc()-2)*2*3;
|
|
typedef segment_image_edge_data_T<T> segment_image_edge_data;
|
|
sorted_edges.resize(num_edges);
|
|
|
|
// integrate counts. The idea is to have sorted_edges[counts[i]] be the location that edges
|
|
// with an edge_diff of i go. So counts[0] == 0, counts[1] == number of 0 edge diff edges, etc.
|
|
unsigned long prev = counts[0];
|
|
for (unsigned long i = 1; i < counts.size(); ++i)
|
|
{
|
|
const unsigned long temp = counts[i];
|
|
counts[i] += counts[i-1];
|
|
counts[i-1] -= prev;
|
|
prev = temp;
|
|
}
|
|
counts[counts.size()-1] -= prev;
|
|
|
|
|
|
// now build a sorted list of all the edges
|
|
be.reset();
|
|
while(be.move_next())
|
|
{
|
|
const point p = be.element();
|
|
const long r = p.y();
|
|
const long c = p.x();
|
|
const ptype pix = in_img[r][c];
|
|
if (area.contains(c-1,r))
|
|
{
|
|
const diff_type diff = edge_diff(pix, in_img[r ][c-1]);
|
|
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c-1,r),diff);
|
|
}
|
|
|
|
if (area.contains(c+1,r))
|
|
{
|
|
const diff_type diff = edge_diff(pix, in_img[r ][c+1]);
|
|
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff);
|
|
}
|
|
|
|
if (area.contains(c ,r-1))
|
|
{
|
|
const diff_type diff = edge_diff(pix, in_img[r-1][c ]);
|
|
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r-1),diff);
|
|
}
|
|
|
|
if (area.contains(c ,r+1))
|
|
{
|
|
const diff_type diff = edge_diff(pix, in_img[r+1][c ]);
|
|
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r+1),diff);
|
|
}
|
|
}
|
|
// same thing as the above loop but now we do it on the interior of the image and therefore
|
|
// don't have to include the boundary checking if statements used above.
|
|
for (long r = 1; r+1 < in_img.nr(); ++r)
|
|
{
|
|
for (long c = 1; c+1 < in_img.nc(); ++c)
|
|
{
|
|
const point p(c,r);
|
|
const ptype pix = in_img[r][c];
|
|
diff_type diff;
|
|
|
|
diff = edge_diff(pix, in_img[r ][c+1]);
|
|
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r),diff);
|
|
diff = edge_diff(pix, in_img[r-1][c+1]);
|
|
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r-1),diff);
|
|
diff = edge_diff(pix, in_img[r+1][c+1]);
|
|
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c+1,r+1),diff);
|
|
diff = edge_diff(pix, in_img[r+1][c ]);
|
|
sorted_edges[counts[diff]++] = segment_image_edge_data(area,p,point(c ,r+1),diff);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
// This is the general purpose version of get_pixel_edges(). It handles all pixel types.
|
|
template <typename in_image_type, typename T>
|
|
typename disable_if<uint8_or_uint16_pixels<in_image_type> >::type
|
|
get_pixel_edges (
|
|
const in_image_type& in_img,
|
|
std::vector<segment_image_edge_data_T<T> >& sorted_edges
|
|
)
|
|
{
|
|
const rectangle area = get_rect(in_img);
|
|
sorted_edges.reserve(area.area()*4);
|
|
|
|
typedef typename in_image_type::pixel_type ptype;
|
|
edge_diff_funct<ptype> edge_diff;
|
|
typedef T diff_type;
|
|
typedef segment_image_edge_data_T<T> segment_image_edge_data;
|
|
|
|
border_enumerator be(get_rect(in_img), 1);
|
|
|
|
// now build a sorted list of all the edges
|
|
be.reset();
|
|
while(be.move_next())
|
|
{
|
|
const point p = be.element();
|
|
const long r = p.y();
|
|
const long c = p.x();
|
|
const ptype& pix = in_img[r][c];
|
|
if (area.contains(c-1,r))
|
|
{
|
|
const diff_type diff = edge_diff(pix, in_img[r ][c-1]);
|
|
sorted_edges.push_back(segment_image_edge_data(area,p,point(c-1,r),diff));
|
|
}
|
|
|
|
if (area.contains(c+1,r))
|
|
{
|
|
const diff_type diff = edge_diff(pix, in_img[r ][c+1]);
|
|
sorted_edges.push_back(segment_image_edge_data(area,p,point(c+1,r),diff));
|
|
}
|
|
|
|
if (area.contains(c ,r-1))
|
|
{
|
|
const diff_type diff = edge_diff(pix, in_img[r-1][c ]);
|
|
sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r-1),diff));
|
|
}
|
|
if (area.contains(c ,r+1))
|
|
{
|
|
const diff_type diff = edge_diff(pix, in_img[r+1][c ]);
|
|
sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r+1),diff));
|
|
}
|
|
}
|
|
// same thing as the above loop but now we do it on the interior of the image and therefore
|
|
// don't have to include the boundary checking if statements used above.
|
|
for (long r = 1; r+1 < in_img.nr(); ++r)
|
|
{
|
|
for (long c = 1; c+1 < in_img.nc(); ++c)
|
|
{
|
|
const point p(c,r);
|
|
const ptype& pix = in_img[r][c];
|
|
diff_type diff;
|
|
|
|
diff = edge_diff(pix, in_img[r ][c+1]);
|
|
sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r),diff));
|
|
diff = edge_diff(pix, in_img[r+1][c+1]);
|
|
sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r+1),diff));
|
|
diff = edge_diff(pix, in_img[r+1][c ]);
|
|
sorted_edges.push_back( segment_image_edge_data(area,p,point(c ,r+1),diff));
|
|
diff = edge_diff(pix, in_img[r-1][c+1]);
|
|
sorted_edges.push_back( segment_image_edge_data(area,p,point(c+1,r-1),diff));
|
|
}
|
|
}
|
|
|
|
std::sort(sorted_edges.begin(), sorted_edges.end());
|
|
|
|
}
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
} // end of namespace impl
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename in_image_type,
|
|
typename out_image_type
|
|
>
|
|
void segment_image (
|
|
const in_image_type& in_img_,
|
|
out_image_type& out_img_,
|
|
const double k = 200,
|
|
const unsigned long min_size = 10
|
|
)
|
|
{
|
|
using namespace dlib::impl;
|
|
typedef typename image_traits<in_image_type>::pixel_type ptype;
|
|
typedef typename edge_diff_funct<ptype>::diff_type diff_type;
|
|
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(is_same_object(in_img_, out_img_) == false,
|
|
"\t void segment_image()"
|
|
<< "\n\t The input images can't be the same object."
|
|
);
|
|
|
|
COMPILE_TIME_ASSERT(is_unsigned_type<typename image_traits<out_image_type>::pixel_type>::value);
|
|
|
|
const_image_view<in_image_type> in_img(in_img_);
|
|
image_view<out_image_type> out_img(out_img_);
|
|
|
|
out_img.set_size(in_img.nr(), in_img.nc());
|
|
// don't bother doing anything if the image is too small
|
|
if (in_img.nr() < 2 || in_img.nc() < 2)
|
|
{
|
|
assign_all_pixels(out_img,0);
|
|
return;
|
|
}
|
|
|
|
disjoint_subsets sets;
|
|
sets.set_size(in_img.size());
|
|
|
|
std::vector<segment_image_edge_data_T<diff_type> > sorted_edges;
|
|
get_pixel_edges(in_img, sorted_edges);
|
|
|
|
std::vector<graph_image_segmentation_data_T<diff_type> > data(in_img.size());
|
|
|
|
// now start connecting blobs together to make a minimum spanning tree.
|
|
for (unsigned long i = 0; i < sorted_edges.size(); ++i)
|
|
{
|
|
const unsigned long idx1 = sorted_edges[i].idx1;
|
|
const unsigned long idx2 = sorted_edges[i].idx2;
|
|
|
|
unsigned long set1 = sets.find_set(idx1);
|
|
unsigned long set2 = sets.find_set(idx2);
|
|
if (set1 != set2)
|
|
{
|
|
const diff_type diff = sorted_edges[i].diff;
|
|
const diff_type tau1 = static_cast<diff_type>(k/data[set1].component_size);
|
|
const diff_type tau2 = static_cast<diff_type>(k/data[set2].component_size);
|
|
|
|
const diff_type mint = std::min(data[set1].internal_diff + tau1,
|
|
data[set2].internal_diff + tau2);
|
|
if (diff <= mint)
|
|
{
|
|
const unsigned long new_set = sets.merge_sets(set1, set2);
|
|
data[new_set].component_size = data[set1].component_size + data[set2].component_size;
|
|
data[new_set].internal_diff = diff;
|
|
}
|
|
}
|
|
}
|
|
|
|
// now merge any really small blobs
|
|
if (min_size != 0)
|
|
{
|
|
for (unsigned long i = 0; i < sorted_edges.size(); ++i)
|
|
{
|
|
const unsigned long idx1 = sorted_edges[i].idx1;
|
|
const unsigned long idx2 = sorted_edges[i].idx2;
|
|
|
|
unsigned long set1 = sets.find_set(idx1);
|
|
unsigned long set2 = sets.find_set(idx2);
|
|
if (set1 != set2 && (data[set1].component_size < min_size || data[set2].component_size < min_size))
|
|
{
|
|
const unsigned long new_set = sets.merge_sets(set1, set2);
|
|
data[new_set].component_size = data[set1].component_size + data[set2].component_size;
|
|
//data[new_set].internal_diff = sorted_edges[i].diff;
|
|
}
|
|
}
|
|
}
|
|
|
|
unsigned long idx = 0;
|
|
for (long r = 0; r < out_img.nr(); ++r)
|
|
{
|
|
for (long c = 0; c < out_img.nc(); ++c)
|
|
{
|
|
out_img[r][c] = sets.find_set(idx++);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
// Candidate object location generation code.
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
namespace impl
|
|
{
|
|
struct edge_data
|
|
{
|
|
double edge_diff;
|
|
unsigned long set1;
|
|
unsigned long set2;
|
|
bool operator<(const edge_data& item) const
|
|
{
|
|
return edge_diff < item.edge_diff;
|
|
}
|
|
};
|
|
|
|
template <
|
|
typename in_image_type,
|
|
typename diff_type
|
|
>
|
|
void find_basic_candidate_object_locations (
|
|
const in_image_type& in_img,
|
|
const std::vector<dlib::impl::segment_image_edge_data_T<diff_type> >& sorted_edges,
|
|
std::vector<rectangle>& out_rects,
|
|
std::vector<edge_data>& edges,
|
|
const double k,
|
|
const unsigned long min_size
|
|
)
|
|
{
|
|
using namespace dlib::impl;
|
|
|
|
std::vector<dlib::impl::segment_image_edge_data_T<diff_type> > rejected_edges;
|
|
rejected_edges.reserve(sorted_edges.size());
|
|
|
|
out_rects.clear();
|
|
edges.clear();
|
|
|
|
// don't bother doing anything if the image is too small
|
|
if (in_img.nr() < 2 || in_img.nc() < 2)
|
|
{
|
|
return;
|
|
}
|
|
|
|
disjoint_subsets sets;
|
|
sets.set_size(in_img.size());
|
|
|
|
|
|
std::vector<graph_image_segmentation_data_T<diff_type> > data(in_img.size());
|
|
|
|
|
|
|
|
std::pair<unsigned long,unsigned long> last_blob_edge(std::numeric_limits<unsigned long>::max(),
|
|
std::numeric_limits<unsigned long>::max());;
|
|
// now start connecting blobs together to make a minimum spanning tree.
|
|
for (unsigned long i = 0; i < sorted_edges.size(); ++i)
|
|
{
|
|
const unsigned long idx1 = sorted_edges[i].idx1;
|
|
const unsigned long idx2 = sorted_edges[i].idx2;
|
|
|
|
unsigned long set1 = sets.find_set(idx1);
|
|
unsigned long set2 = sets.find_set(idx2);
|
|
if (set1 != set2)
|
|
{
|
|
const diff_type diff = sorted_edges[i].diff;
|
|
const diff_type tau1 = static_cast<diff_type>(k/data[set1].component_size);
|
|
const diff_type tau2 = static_cast<diff_type>(k/data[set2].component_size);
|
|
|
|
const diff_type mint = std::min(data[set1].internal_diff + tau1,
|
|
data[set2].internal_diff + tau2);
|
|
if (diff <= mint)
|
|
{
|
|
const unsigned long new_set = sets.merge_sets(set1, set2);
|
|
data[new_set].component_size = data[set1].component_size + data[set2].component_size;
|
|
data[new_set].internal_diff = diff;
|
|
}
|
|
else
|
|
{
|
|
// Don't bother keeping multiple edges from the same pair of blobs, we
|
|
// only need one for what we will do later.
|
|
if (std::make_pair(set1,set2) != last_blob_edge)
|
|
{
|
|
segment_image_edge_data_T<diff_type> temp = sorted_edges[i];
|
|
temp.idx1 = set1;
|
|
temp.idx2 = set2;
|
|
rejected_edges.push_back(temp);
|
|
last_blob_edge = std::make_pair(set1,set2);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// merge small blobs
|
|
for (unsigned long i = 0; i < rejected_edges.size(); ++i)
|
|
{
|
|
const unsigned long idx1 = rejected_edges[i].idx1;
|
|
const unsigned long idx2 = rejected_edges[i].idx2;
|
|
|
|
unsigned long set1 = sets.find_set(idx1);
|
|
unsigned long set2 = sets.find_set(idx2);
|
|
rejected_edges[i].idx1 = set1;
|
|
rejected_edges[i].idx2 = set2;
|
|
if (set1 != set2 && (data[set1].component_size < min_size || data[set2].component_size < min_size))
|
|
{
|
|
const unsigned long new_set = sets.merge_sets(set1, set2);
|
|
data[new_set].component_size = data[set1].component_size + data[set2].component_size;
|
|
data[new_set].internal_diff = rejected_edges[i].diff;
|
|
}
|
|
}
|
|
|
|
// find bounding boxes of each blob
|
|
std::map<unsigned long, rectangle> boxes;
|
|
std::map<unsigned long, unsigned long> box_id_map;
|
|
unsigned long idx = 0;
|
|
for (long r = 0; r < in_img.nr(); ++r)
|
|
{
|
|
for (long c = 0; c < in_img.nc(); ++c)
|
|
{
|
|
const unsigned long id = sets.find_set(idx++);
|
|
// Accumulate the current point into its box and if it is the first point
|
|
// in the box then also record the id number for this box.
|
|
if ((boxes[id] += point(c,r)).area() == 1)
|
|
box_id_map[id] = boxes.size()-1;
|
|
}
|
|
}
|
|
|
|
// copy boxes into out_rects
|
|
out_rects.resize(boxes.size());
|
|
for (std::map<unsigned long,rectangle>::iterator i = boxes.begin(); i != boxes.end(); ++i)
|
|
{
|
|
out_rects[box_id_map[i->first]] = i->second;
|
|
}
|
|
|
|
// Now find the edges between the boxes
|
|
typedef dlib::memory_manager<char>::kernel_2c mm_type;
|
|
dlib::set<std::pair<unsigned long, unsigned long>, mm_type>::kernel_1a neighbors_final;
|
|
for (unsigned long i = 0; i < rejected_edges.size(); ++i)
|
|
{
|
|
const unsigned long idx1 = rejected_edges[i].idx1;
|
|
const unsigned long idx2 = rejected_edges[i].idx2;
|
|
|
|
unsigned long set1 = sets.find_set(idx1);
|
|
unsigned long set2 = sets.find_set(idx2);
|
|
if (set1 != set2)
|
|
{
|
|
std::pair<unsigned long, unsigned long> p = std::make_pair(set1,set2);
|
|
if (!neighbors_final.is_member(p))
|
|
{
|
|
neighbors_final.add(p);
|
|
|
|
edge_data temp;
|
|
const diff_type mint = std::min(data[set1].internal_diff ,
|
|
data[set2].internal_diff );
|
|
temp.edge_diff = rejected_edges[i].diff - mint;
|
|
temp.set1 = box_id_map[set1];
|
|
temp.set2 = box_id_map[set2];
|
|
edges.push_back(temp);
|
|
}
|
|
}
|
|
}
|
|
|
|
std::sort(edges.begin(), edges.end());
|
|
}
|
|
} // end namespace impl
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <typename T, typename alloc>
|
|
void remove_duplicates (
|
|
std::vector<T,alloc>& items
|
|
)
|
|
{
|
|
std::sort(items.begin(), items.end(), std::less<T>());
|
|
unsigned long num_unique = 1;
|
|
for (unsigned long i = 1; i < items.size(); ++i)
|
|
{
|
|
if (items[i] != items[i-1])
|
|
{
|
|
items[num_unique++] = items[i];
|
|
}
|
|
}
|
|
if (items.size() != 0)
|
|
items.resize(num_unique);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename in_image_type,
|
|
typename EXP
|
|
>
|
|
void find_candidate_object_locations (
|
|
const in_image_type& in_img_,
|
|
std::vector<rectangle>& rects,
|
|
const matrix_exp<EXP>& kvals,
|
|
const unsigned long min_size = 20,
|
|
const unsigned long max_merging_iterations = 50
|
|
)
|
|
{
|
|
// make sure requires clause is not broken
|
|
DLIB_ASSERT(is_vector(kvals) && kvals.size() > 0,
|
|
"\t void find_candidate_object_locations()"
|
|
<< "\n\t Invalid inputs were given to this function."
|
|
<< "\n\t is_vector(kvals): " << is_vector(kvals)
|
|
<< "\n\t kvals.size(): " << kvals.size()
|
|
);
|
|
|
|
typedef dlib::memory_manager<char>::kernel_2c mm_type;
|
|
typedef dlib::set<rectangle, mm_type>::kernel_1a set_of_rects;
|
|
|
|
using namespace dlib::impl;
|
|
typedef typename image_traits<in_image_type>::pixel_type ptype;
|
|
typedef typename edge_diff_funct<ptype>::diff_type diff_type;
|
|
|
|
const_image_view<in_image_type> in_img(in_img_);
|
|
|
|
// don't bother doing anything if the image is too small
|
|
if (in_img.nr() < 2 || in_img.nc() < 2)
|
|
{
|
|
return;
|
|
}
|
|
|
|
std::vector<edge_data> edges;
|
|
std::vector<rectangle> working_rects;
|
|
std::vector<segment_image_edge_data_T<diff_type> > sorted_edges;
|
|
get_pixel_edges(in_img, sorted_edges);
|
|
|
|
disjoint_subsets sets;
|
|
|
|
for (long j = 0; j < kvals.size(); ++j)
|
|
{
|
|
const double k = kvals(j);
|
|
|
|
find_basic_candidate_object_locations(in_img, sorted_edges, working_rects, edges, k, min_size);
|
|
rects.insert(rects.end(), working_rects.begin(), working_rects.end());
|
|
|
|
|
|
// Now iteratively merge all the rectangles we have and record the results.
|
|
// Note that, unlike what is described in the paper
|
|
// Segmentation as Selective Search for Object Recognition" by Koen E. A. van de Sande, et al.
|
|
// we don't use any kind of histogram/SIFT like thing to order the edges
|
|
// between the blobs. Here we simply order by the pixel difference value.
|
|
// Additionally, note that we keep progressively merging boxes in the outer
|
|
// loop rather than performing just a single iteration as indicated in the
|
|
// paper.
|
|
set_of_rects detected_rects;
|
|
bool did_merge = true;
|
|
for (unsigned long iter = 0; did_merge && iter < max_merging_iterations; ++iter)
|
|
{
|
|
did_merge = false;
|
|
sets.clear();
|
|
sets.set_size(working_rects.size());
|
|
|
|
// recursively merge neighboring blobs until we have merged everything
|
|
for (unsigned long i = 0; i < edges.size(); ++i)
|
|
{
|
|
edge_data temp = edges[i];
|
|
|
|
temp.set1 = sets.find_set(temp.set1);
|
|
temp.set2 = sets.find_set(temp.set2);
|
|
if (temp.set1 != temp.set2)
|
|
{
|
|
rectangle merged_rect = working_rects[temp.set1] + working_rects[temp.set2];
|
|
// Skip merging this pair of blobs if it was merged in a previous
|
|
// iteration. Doing this lets us consider other possible blob
|
|
// merges.
|
|
if (!detected_rects.is_member(merged_rect))
|
|
{
|
|
const unsigned long new_set = sets.merge_sets(temp.set1, temp.set2);
|
|
rects.push_back(merged_rect);
|
|
working_rects[new_set] = merged_rect;
|
|
did_merge = true;
|
|
detected_rects.add(merged_rect);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
remove_duplicates(rects);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename in_image_type
|
|
>
|
|
void find_candidate_object_locations (
|
|
const in_image_type& in_img,
|
|
std::vector<rectangle>& rects
|
|
)
|
|
{
|
|
find_candidate_object_locations(in_img, rects, linspace(50, 200, 3));
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
namespace impl
|
|
{
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename funct1_t,
|
|
typename funct2_t
|
|
>
|
|
inline void mbd_raster_scan_top_bottom(
|
|
const rectangle& area,
|
|
funct1_t funct1,
|
|
funct2_t funct2
|
|
)
|
|
{
|
|
// scan top to bottom
|
|
for (long r = area.top(); r <= area.bottom(); ++r)
|
|
{
|
|
for (long c = area.left(); c <= area.right(); ++c)
|
|
{
|
|
funct1(r,c, r-1,c);
|
|
funct2(r,c, r,c-1);
|
|
}
|
|
}
|
|
|
|
// scan bottom to top
|
|
for (long r = area.bottom(); r >= area.top(); --r)
|
|
{
|
|
for (long c = area.right(); c >= area.left(); --c)
|
|
{
|
|
funct2(r,c, r+1,c);
|
|
funct2(r,c, r,c+1);
|
|
}
|
|
}
|
|
}
|
|
|
|
template <
|
|
typename funct_t
|
|
>
|
|
inline void mbd_raster_scan_left_right(
|
|
const rectangle& area,
|
|
funct_t funct
|
|
)
|
|
{
|
|
// scan left to right
|
|
for (long c = area.left(); c <= area.right(); ++c)
|
|
{
|
|
for (long r = area.top(); r <= area.bottom(); ++r)
|
|
{
|
|
funct(r,c, r-1,c);
|
|
funct(r,c, r,c-1);
|
|
}
|
|
}
|
|
|
|
// scan right to left
|
|
for (long c = area.right(); c >= area.left(); --c)
|
|
{
|
|
for (long r = area.bottom(); r >= area.top(); --r)
|
|
{
|
|
funct(r,c, r+1,c);
|
|
funct(r,c, r,c+1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename in_image_type,
|
|
typename out_image_type
|
|
>
|
|
typename disable_if_c<is_rgb_image<in_image_type>::value>::type min_barrier_distance(
|
|
const in_image_type& img_,
|
|
out_image_type& dist_,
|
|
size_t iterations = 10,
|
|
bool do_left_right_scans = true
|
|
)
|
|
{
|
|
DLIB_CASSERT(iterations > 0);
|
|
|
|
static_assert(pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale,
|
|
"min_barrier_distance() requires a grayscale output image.");
|
|
|
|
const_image_view<in_image_type> img(img_);
|
|
image_view<out_image_type> dist(dist_);
|
|
|
|
typedef typename image_traits<in_image_type>::pixel_type pixel_type;
|
|
typedef typename pixel_traits<pixel_type>::basic_pixel_type basic_pixel_type;
|
|
|
|
|
|
dist.set_size(img.nr(), img.nc());
|
|
array2d<basic_pixel_type> lower, upper;
|
|
assign_all_pixels(dist, pixel_traits<pixel_type>::max());
|
|
zero_border_pixels(dist,1,1);
|
|
assign_image(lower, img);
|
|
assign_image(upper, img);
|
|
|
|
|
|
auto check_neighbor = [&](long r, long c, long neighbor_r, long neighbor_c)
|
|
{
|
|
auto l = std::min(lower[neighbor_r][neighbor_c], get_pixel_intensity(img[r][c]));
|
|
auto u = std::max(upper[neighbor_r][neighbor_c], get_pixel_intensity(img[r][c]));
|
|
auto d = u-l;
|
|
if (d < dist[r][c])
|
|
{
|
|
lower[r][c] = l;
|
|
upper[r][c] = u;
|
|
dist[r][c] = d;
|
|
}
|
|
};
|
|
|
|
// The very first pass we make needs to use this version. We do this because the
|
|
// initial setting of dist is the max pixel value, but that value might naturally
|
|
// occur in the data, especially for 8bit images. So we use this version on the
|
|
// first pass to make sure that the lower and upper bounds get set with real data.
|
|
auto check_neighbor_init = [&](long r, long c, long neighbor_r, long neighbor_c)
|
|
{
|
|
auto l = std::min(lower[neighbor_r][neighbor_c], get_pixel_intensity(img[r][c]));
|
|
auto u = std::max(upper[neighbor_r][neighbor_c], get_pixel_intensity(img[r][c]));
|
|
auto d = u-l;
|
|
lower[r][c] = l;
|
|
upper[r][c] = u;
|
|
dist[r][c] = d;
|
|
};
|
|
|
|
auto area = shrink_rect(get_rect(img),1);
|
|
|
|
impl::mbd_raster_scan_top_bottom(area, check_neighbor_init, check_neighbor);
|
|
if (do_left_right_scans)
|
|
impl::mbd_raster_scan_left_right(area, check_neighbor);
|
|
for (size_t i = 1; i < iterations; ++i)
|
|
{
|
|
impl::mbd_raster_scan_top_bottom(area, check_neighbor, check_neighbor);
|
|
if (do_left_right_scans)
|
|
impl::mbd_raster_scan_left_right(area, check_neighbor);
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename in_image_type,
|
|
typename out_image_type
|
|
>
|
|
typename enable_if_c<is_rgb_image<in_image_type>::value>::type min_barrier_distance(
|
|
const in_image_type& img_,
|
|
out_image_type& dist_,
|
|
size_t iterations = 10,
|
|
bool do_left_right_scans = true
|
|
)
|
|
{
|
|
DLIB_CASSERT(iterations > 0);
|
|
|
|
static_assert(pixel_traits<typename image_traits<out_image_type>::pixel_type>::grayscale,
|
|
"min_barrier_distance() requires a grayscale output image.");
|
|
|
|
const_image_view<in_image_type> img(img_);
|
|
|
|
typedef typename image_traits<in_image_type>::pixel_type pixel_type;
|
|
|
|
|
|
array2d<rgb_pixel> dist(img.nr(), img.nc());
|
|
array2d<rgb_pixel> lower, upper;
|
|
assign_all_pixels(dist, pixel_traits<pixel_type>::max());
|
|
zero_border_pixels(dist,1,1);
|
|
assign_image(lower, img);
|
|
assign_image(upper, img);
|
|
|
|
auto check_neighbor = [&](long r, long c, long neighbor_r, long neighbor_c)
|
|
{
|
|
auto l = std::min(lower[neighbor_r][neighbor_c].red, img[r][c].red);
|
|
auto u = std::max(upper[neighbor_r][neighbor_c].red, img[r][c].red);
|
|
auto d = u-l;
|
|
if (d < dist[r][c].red)
|
|
{
|
|
lower[r][c].red = l;
|
|
upper[r][c].red = u;
|
|
dist[r][c].red = d;
|
|
}
|
|
|
|
l = std::min(lower[neighbor_r][neighbor_c].green, img[r][c].green);
|
|
u = std::max(upper[neighbor_r][neighbor_c].green, img[r][c].green);
|
|
d = u-l;
|
|
if (d < dist[r][c].green)
|
|
{
|
|
lower[r][c].green = l;
|
|
upper[r][c].green = u;
|
|
dist[r][c].green = d;
|
|
}
|
|
|
|
l = std::min(lower[neighbor_r][neighbor_c].blue, img[r][c].blue);
|
|
u = std::max(upper[neighbor_r][neighbor_c].blue, img[r][c].blue);
|
|
d = u-l;
|
|
if (d < dist[r][c].blue)
|
|
{
|
|
lower[r][c].blue = l;
|
|
upper[r][c].blue = u;
|
|
dist[r][c].blue = d;
|
|
}
|
|
};
|
|
|
|
// The very first pass we make needs to use this version. We do this because the
|
|
// initial setting of dist is the max pixel value, but that value might naturally
|
|
// occur in the data, especially for 8bit images. So we use this version on the
|
|
// first pass to make sure that the lower and upper bounds get set with real data.
|
|
auto check_neighbor_init = [&](long r, long c, long neighbor_r, long neighbor_c)
|
|
{
|
|
auto l = std::min(lower[neighbor_r][neighbor_c].red, img[r][c].red);
|
|
auto u = std::max(upper[neighbor_r][neighbor_c].red, img[r][c].red);
|
|
auto d = u-l;
|
|
lower[r][c].red = l;
|
|
upper[r][c].red = u;
|
|
dist[r][c].red = d;
|
|
|
|
l = std::min(lower[neighbor_r][neighbor_c].green, img[r][c].green);
|
|
u = std::max(upper[neighbor_r][neighbor_c].green, img[r][c].green);
|
|
d = u-l;
|
|
lower[r][c].green = l;
|
|
upper[r][c].green = u;
|
|
dist[r][c].green = d;
|
|
|
|
l = std::min(lower[neighbor_r][neighbor_c].blue, img[r][c].blue);
|
|
u = std::max(upper[neighbor_r][neighbor_c].blue, img[r][c].blue);
|
|
d = u-l;
|
|
lower[r][c].blue = l;
|
|
upper[r][c].blue = u;
|
|
dist[r][c].blue = d;
|
|
};
|
|
|
|
auto area = shrink_rect(get_rect(img),1);
|
|
|
|
impl::mbd_raster_scan_top_bottom(area, check_neighbor_init, check_neighbor);
|
|
if (do_left_right_scans)
|
|
impl::mbd_raster_scan_left_right(area, check_neighbor);
|
|
for (size_t i = 1; i < iterations; ++i)
|
|
{
|
|
impl::mbd_raster_scan_top_bottom(area, check_neighbor, check_neighbor);
|
|
if (do_left_right_scans)
|
|
impl::mbd_raster_scan_left_right(area, check_neighbor);
|
|
}
|
|
|
|
// convert to grayscale for output.
|
|
assign_image(dist_,dist);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_SEGMENT_ImAGE_Hh_
|
|
|