137 lines
4.4 KiB
C++
137 lines
4.4 KiB
C++
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#undef DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
|
|
#ifdef DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
|
|
|
|
#include "../matrix.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
template <
|
|
typename EXP
|
|
>
|
|
unsigned long bottom_up_cluster (
|
|
const matrix_exp<EXP>& dists,
|
|
std::vector<unsigned long>& labels,
|
|
unsigned long min_num_clusters,
|
|
double max_dist = std::numeric_limits<double>::infinity()
|
|
);
|
|
/*!
|
|
requires
|
|
- dists.nr() == dists.nc()
|
|
- min_num_clusters > 0
|
|
- dists == trans(dists)
|
|
(l.e. dists should be symmetric)
|
|
ensures
|
|
- Runs a bottom up agglomerative clustering algorithm.
|
|
- Interprets dists as a matrix that gives the distances between dists.nr()
|
|
items. In particular, we take dists(i,j) to be the distance between the ith
|
|
and jth element of some set. This function clusters the elements of this set
|
|
into at least min_num_clusters (or dists.nr() if there aren't enough
|
|
elements). Additionally, within each cluster, the maximum pairwise distance
|
|
between any two cluster elements is <= max_dist.
|
|
- returns the number of clusters found.
|
|
- #labels.size() == dists.nr()
|
|
- for all valid i:
|
|
- #labels[i] == the cluster ID of the node with index i (i.e. the node
|
|
corresponding to the distances dists(i,*)).
|
|
- 0 <= #labels[i] < the number of clusters found
|
|
(i.e. cluster IDs are assigned contiguously and start at 0)
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
struct snl_range
|
|
{
|
|
/*!
|
|
WHAT THIS OBJECT REPRESENTS
|
|
This object represents an interval on the real number line. It is used
|
|
to store the outputs of the segment_number_line() routine defined below.
|
|
!*/
|
|
|
|
snl_range(
|
|
);
|
|
/*!
|
|
ensures
|
|
- #lower == 0
|
|
- #upper == 0
|
|
!*/
|
|
|
|
snl_range(
|
|
double val
|
|
);
|
|
/*!
|
|
ensures
|
|
- #lower == val
|
|
- #upper == val
|
|
!*/
|
|
|
|
snl_range(
|
|
double l,
|
|
double u
|
|
);
|
|
/*!
|
|
requires
|
|
- l <= u
|
|
ensures
|
|
- #lower == l
|
|
- #upper == u
|
|
!*/
|
|
|
|
double lower;
|
|
double upper;
|
|
|
|
double width(
|
|
) const { return upper-lower; }
|
|
/*!
|
|
ensures
|
|
- returns the width of this interval on the number line.
|
|
!*/
|
|
|
|
bool operator<(const snl_range& item) const { return lower < item.lower; }
|
|
/*!
|
|
ensures
|
|
- provides a total ordering of snl_range objects assuming they are
|
|
non-overlapping.
|
|
!*/
|
|
};
|
|
|
|
std::ostream& operator<< (std::ostream& out, const snl_range& item );
|
|
/*!
|
|
ensures
|
|
- prints item to out in the form [lower,upper].
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
std::vector<snl_range> segment_number_line (
|
|
const std::vector<double>& x,
|
|
const double max_range_width
|
|
);
|
|
/*!
|
|
requires
|
|
- max_range_width >= 0
|
|
ensures
|
|
- Finds a clustering of the values in x and returns the ranges that define the
|
|
clustering. This routine uses a combination of bottom up clustering and a
|
|
simple greedy scan to try and find the most compact set of ranges that
|
|
contain all the values in x.
|
|
- This routine has approximately linear runtime.
|
|
- Every value in x will be contained inside one of the returned snl_range
|
|
objects;
|
|
- All returned snl_range object's will have a width() <= max_range_width and
|
|
will also be non-overlapping.
|
|
!*/
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
|
|
|
|
|