AndroidCamera/FaceBlit/dlib-19.21/dlib/image_processing/scan_image_pyramid.h

// Copyright (C) 2011  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_SCAN_IMaGE_PYRAMID_Hh_
#define DLIB_SCAN_IMaGE_PYRAMID_Hh_

#include "scan_image_pyramid_abstract.h"
#include "../matrix.h"
#include "../geometry.h"
#include "scan_image.h"
#include "../array2d.h"
#include <vector>
#include "full_object_detection.h"
#include "../image_processing/generic_image.h"

namespace dlib
{

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    class scan_image_pyramid : noncopyable
    {

    public:

        typedef matrix<double,0,1> feature_vector_type;

        typedef Pyramid_type pyramid_type;
        typedef Feature_extractor_type feature_extractor_type;

        scan_image_pyramid (
        );

        template <
            typename image_type
            >
        void load (
            const image_type& img
        );

        inline bool is_loaded_with_image (
        ) const;

        inline void copy_configuration(
            const feature_extractor_type& fe
        );

        inline void copy_configuration (
            const scan_image_pyramid& item
        );

        const Feature_extractor_type& get_feature_extractor (
        ) const { return feats_config; }

        void add_detection_template (
            const rectangle& object_box,
            const std::vector<rectangle>& stationary_feature_extraction_regions,
            const std::vector<rectangle>& movable_feature_extraction_regions
        );

        void add_detection_template (
            const rectangle& object_box,
            const std::vector<rectangle>& stationary_feature_extraction_regions
        );

        inline unsigned long get_num_detection_templates (
        ) const;

        inline unsigned long get_num_movable_components_per_detection_template (
        ) const;

        inline unsigned long get_num_stationary_components_per_detection_template (
        ) const;

        inline unsigned long get_num_components_per_detection_template (
        ) const;

        inline long get_num_dimensions (
        ) const;

        unsigned long get_max_pyramid_levels (
        ) const;

        void set_max_pyramid_levels (
            unsigned long max_levels
        );

        inline unsigned long get_max_detections_per_template (
        ) const;

        void set_min_pyramid_layer_size (
            unsigned long width,
            unsigned long height
        );

        inline unsigned long get_min_pyramid_layer_width (
        ) const;

        inline unsigned long get_min_pyramid_layer_height (
        ) const;

        void set_max_detections_per_template (
            unsigned long max_dets
        );

        void detect (
            const feature_vector_type& w,
            std::vector<std::pair<double, rectangle> >& dets,
            const double thresh
        ) const;

        void get_feature_vector (
            const full_object_detection& obj,
            feature_vector_type& psi
        ) const;

        full_object_detection get_full_object_detection (
            const rectangle& rect,
            const feature_vector_type& w
        ) const;

        const rectangle get_best_matching_rect (
            const rectangle& rect
        ) const;

        template <typename T, typename U>
        friend void serialize (
            const scan_image_pyramid<T,U>& item,
            std::ostream& out
        );

        template <typename T, typename U>
        friend void deserialize (
            scan_image_pyramid<T,U>& item,
            std::istream& in
        );

    private:
        static bool compare_pair_rect (
            const std::pair<double, rectangle>& a,
            const std::pair<double, rectangle>& b
        )
        {
            return a.first < b.first;
        }

        struct detection_template
        {
            rectangle object_box; // always centered at (0,0)
            std::vector<rectangle> rects; // template with respect to (0,0)
            std::vector<rectangle> movable_rects;
        };

        friend void serialize(const detection_template& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.object_box, out);
            serialize(item.rects, out);
            serialize(item.movable_rects, out);
        }
        friend void deserialize(detection_template& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing a dlib::scan_image_pyramid::detection_template object.");

            deserialize(item.object_box, in);
            deserialize(item.rects, in);
            deserialize(item.movable_rects, in);
        }

        void get_mapped_rect_and_metadata (
            const unsigned long number_pyramid_levels,
            rectangle rect,
            rectangle& mapped_rect,
            detection_template& best_template,
            rectangle& object_box,
            unsigned long& best_level,
            unsigned long& detection_template_idx
        ) const;

        double get_match_score (
            rectangle r1,
            rectangle r2
        ) const
        {
            // make the rectangles overlap as much as possible before computing the match score.
            r1 = move_rect(r1, r2.tl_corner());
            return (r1.intersect(r2).area())/(double)(r1 + r2).area();
        }

        void test_coordinate_transforms()
        {
            for (long x = -10; x <= 10; x += 10)
            {
                for (long y = -10; y <= 10; y += 10)
                {
                    const rectangle rect = centered_rect(x,y,5,6);
                    rectangle a;

                    a = feats_config.image_to_feat_space(rect);
                    if (a.width()  > 10000000 || a.height() > 10000000 )
                    {
                        DLIB_CASSERT(false, "The image_to_feat_space() routine is outputting rectangles of an implausibly "
                                     << "\nlarge size.  This means there is probably a bug in your feature extractor.");
                    }
                    a = feats_config.feat_to_image_space(rect);
                    if (a.width()  > 10000000 || a.height() > 10000000 )
                    {
                        DLIB_CASSERT(false, "The feat_to_image_space() routine is outputting rectangles of an implausibly "
                                     << "\nlarge size.  This means there is probably a bug in your feature extractor.");
                    }
                }
            }

        }

        feature_extractor_type feats_config; // just here to hold configuration.  use it to populate the feats elements.
        array<feature_extractor_type> feats;
        std::vector<detection_template> det_templates;
        unsigned long max_dets_per_template;
        unsigned long max_pyramid_levels;
        unsigned long min_pyramid_layer_width;
        unsigned long min_pyramid_layer_height;

    };

// ----------------------------------------------------------------------------------------

    template <typename T, typename U>
    void serialize (
        const scan_image_pyramid<T,U>& item,
        std::ostream& out
    )
    {
        int version = 3;
        serialize(version, out);
        serialize(item.feats_config, out);
        serialize(item.feats, out);
        serialize(item.det_templates, out);
        serialize(item.max_dets_per_template, out);
        serialize(item.max_pyramid_levels, out);
        serialize(item.min_pyramid_layer_width, out);
        serialize(item.min_pyramid_layer_height, out);
        serialize(item.get_num_dimensions(), out);
    }

// ----------------------------------------------------------------------------------------

    template <typename T, typename U>
    void deserialize (
        scan_image_pyramid<T,U>& item,
        std::istream& in
    )
    {
        int version = 0;
        deserialize(version, in);
        if (version != 3)
            throw serialization_error("Unsupported version found when deserializing a scan_image_pyramid object.");

        deserialize(item.feats_config, in);
        deserialize(item.feats, in);
        deserialize(item.det_templates, in);
        deserialize(item.max_dets_per_template, in);
        deserialize(item.max_pyramid_levels, in);
        deserialize(item.min_pyramid_layer_width, in);
        deserialize(item.min_pyramid_layer_height, in);

        // When developing some feature extractor, it's easy to accidentally change its
        // number of dimensions and then try to deserialize data from an older version of
        // your extractor into the current code.  This check is here to catch that kind of
        // user error.
        long dims;
        deserialize(dims, in);
        if (item.get_num_dimensions() != dims)
            throw serialization_error("Number of dimensions in serialized scan_image_pyramid doesn't match the expected number.");
    }

// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
//                         scan_image_pyramid member functions
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    scan_image_pyramid (
    ) :
        max_dets_per_template(10000),
        max_pyramid_levels(1000),
        min_pyramid_layer_width(20),
        min_pyramid_layer_height(20)
    {
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    template <
        typename image_type
        >
    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    load (
        const image_type& img
    )
    {
        unsigned long levels = 0;
        rectangle rect = get_rect(img);

        // figure out how many pyramid levels we should be using based on the image size
        pyramid_type pyr;
        do
        {
            rect = pyr.rect_down(rect);
            ++levels;
        } while (rect.width() >= min_pyramid_layer_width && rect.height() >= min_pyramid_layer_height &&
                 levels < max_pyramid_levels);

        if (feats.max_size() < levels)
            feats.set_max_size(levels);
        feats.set_size(levels);

        for (unsigned long i = 0; i < feats.size(); ++i)
            feats[i].copy_configuration(feats_config);

        // build our feature pyramid
        feats[0].load(img);
        if (feats.size() > 1)
        {
            image_type temp1, temp2;
            pyr(img, temp1);
            feats[1].load(temp1);
            swap(temp1,temp2);

            for (unsigned long i = 2; i < feats.size(); ++i)
            {
                pyr(temp2, temp1);
                feats[i].load(temp1);
                swap(temp1,temp2);
            }
        }


    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_max_detections_per_template (
    ) const
    {
        return max_dets_per_template;
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    set_max_detections_per_template (
        unsigned long max_dets
    )
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(max_dets > 0 ,
            "\t void scan_image_pyramid::set_max_detections_per_template()"
            << "\n\t The max number of possible detections can't be zero. "
            << "\n\t max_dets: " << max_dets
            << "\n\t this: " << this
            );

        max_dets_per_template = max_dets;
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    bool scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    is_loaded_with_image (
    ) const
    {
        return feats.size() != 0;
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    copy_configuration(
        const feature_extractor_type& fe
    )
    {
        test_coordinate_transforms();
        feats_config.copy_configuration(fe);
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    copy_configuration (
        const scan_image_pyramid& item
    )
    {
        feats_config.copy_configuration(item.feats_config);
        det_templates = item.det_templates;
        max_dets_per_template = item.max_dets_per_template;
        max_pyramid_levels = item.max_pyramid_levels;
        min_pyramid_layer_width = item.min_pyramid_layer_width;
        min_pyramid_layer_height = item.min_pyramid_layer_height;
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    add_detection_template (
        const rectangle& object_box,
        const std::vector<rectangle>& stationary_feature_extraction_regions,
        const std::vector<rectangle>& movable_feature_extraction_regions
    )
    {
#ifdef ENABLE_ASSERTS
        // make sure requires clause is not broken
        DLIB_ASSERT((get_num_detection_templates() == 0 ||
                        (get_num_stationary_components_per_detection_template() == stationary_feature_extraction_regions.size() &&
                        get_num_movable_components_per_detection_template() == movable_feature_extraction_regions.size())) &&
                        center(object_box) == point(0,0),
            "\t void scan_image_pyramid::add_detection_template()"
            << "\n\t The number of rects in this new detection template doesn't match "
            << "\n\t the number in previous detection templates."
            << "\n\t get_num_stationary_components_per_detection_template(): " << get_num_stationary_components_per_detection_template()
            << "\n\t stationary_feature_extraction_regions.size():           " << stationary_feature_extraction_regions.size()
            << "\n\t get_num_movable_components_per_detection_template():    " << get_num_movable_components_per_detection_template()
            << "\n\t movable_feature_extraction_regions.size():              " << movable_feature_extraction_regions.size()
            << "\n\t this: " << this
            );

        for (unsigned long i = 0; i < movable_feature_extraction_regions.size(); ++i)
        {
            DLIB_ASSERT(center(movable_feature_extraction_regions[i]) == point(0,0),
                        "Invalid inputs were given to this function."
                        << "\n\t center(movable_feature_extraction_regions["<<i<<"]): " << center(movable_feature_extraction_regions[i])
                        << "\n\t this: " << this
            );
        }
#endif

        detection_template temp;
        temp.object_box = object_box;
        temp.rects = stationary_feature_extraction_regions;
        temp.movable_rects = movable_feature_extraction_regions;
        det_templates.push_back(temp);
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    add_detection_template (
        const rectangle& object_box,
        const std::vector<rectangle>& stationary_feature_extraction_regions
    )
    {
        // an empty set of movable feature regions
        const std::vector<rectangle> movable_feature_extraction_regions;
        add_detection_template(object_box, stationary_feature_extraction_regions,
                               movable_feature_extraction_regions);
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_num_detection_templates (
    ) const
    {
        return det_templates.size();
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_num_stationary_components_per_detection_template (
    ) const
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(get_num_detection_templates() > 0 ,
            "\t unsigned long scan_image_pyramid::get_num_stationary_components_per_detection_template()"
            << "\n\t You need to give some detection templates before calling this function. "
            << "\n\t get_num_detection_templates(): " << get_num_detection_templates()
            << "\n\t this: " << this
            );

        return det_templates[0].rects.size();
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_num_movable_components_per_detection_template (
    ) const
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(get_num_detection_templates() > 0 ,
            "\t unsigned long scan_image_pyramid::get_num_movable_components_per_detection_template()"
            << "\n\t You need to give some detection templates before calling this function. "
            << "\n\t get_num_detection_templates(): " << get_num_detection_templates()
            << "\n\t this: " << this
            );

        return det_templates[0].movable_rects.size();
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_num_components_per_detection_template (
    ) const
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(get_num_detection_templates() > 0 ,
            "\t unsigned long scan_image_pyramid::get_num_components_per_detection_template()"
            << "\n\t You need to give some detection templates before calling this function. "
            << "\n\t get_num_detection_templates(): " << get_num_detection_templates()
            << "\n\t this: " << this
            );

        return get_num_movable_components_per_detection_template() +
               get_num_stationary_components_per_detection_template();
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_num_dimensions (
    ) const
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(get_num_detection_templates() > 0 ,
            "\t long scan_image_pyramid::get_num_dimensions()"
            << "\n\t You need to give some detection templates before calling this function. "
            << "\n\t get_num_detection_templates(): " << get_num_detection_templates()
            << "\n\t this: " << this
            );

        return feats_config.get_num_dimensions()*get_num_components_per_detection_template() + get_num_detection_templates();
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_max_pyramid_levels (
    ) const
    {
        return max_pyramid_levels;
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    set_max_pyramid_levels (
        unsigned long max_levels
    )
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(max_levels > 0 ,
            "\t void scan_image_pyramid::set_max_pyramid_levels()"
            << "\n\t You can't have zero levels. "
            << "\n\t max_levels: " << max_levels
            << "\n\t this: " << this
            );

        max_pyramid_levels = max_levels;

    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    detect (
        const feature_vector_type& w,
        std::vector<std::pair<double, rectangle> >& dets,
        const double thresh
    ) const
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(get_num_detection_templates() > 0 &&
                    is_loaded_with_image() &&
                    w.size() >= get_num_dimensions(),
            "\t void scan_image_pyramid::detect()"
            << "\n\t Invalid inputs were given to this function "
            << "\n\t get_num_detection_templates(): " << get_num_detection_templates()
            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
            << "\n\t w.size():               " << w.size()
            << "\n\t get_num_dimensions():   " << get_num_dimensions()
            << "\n\t this: " << this
            );

        dets.clear();

        array<array2d<double> > saliency_images;
        saliency_images.set_max_size(get_num_components_per_detection_template());
        saliency_images.set_size(get_num_components_per_detection_template());
        std::vector<std::pair<unsigned int,rectangle> > stationary_region_rects(get_num_stationary_components_per_detection_template());
        std::vector<std::pair<unsigned int,rectangle> > movable_region_rects(get_num_movable_components_per_detection_template());
        pyramid_type pyr;
        std::vector<std::pair<double, point> > point_dets;

        // for all pyramid levels
        for (unsigned long l = 0; l < feats.size(); ++l)
        {
            for (unsigned long i = 0; i < saliency_images.size(); ++i)
            {
                saliency_images[i].set_size(feats[l].nr(), feats[l].nc());
                const unsigned long offset = get_num_detection_templates() + feats_config.get_num_dimensions()*i;

                // build saliency images for pyramid level l
                for (long r = 0; r < feats[l].nr(); ++r)
                {
                    for (long c = 0; c < feats[l].nc(); ++c)
                    {
                        const typename feature_extractor_type::descriptor_type& descriptor = feats[l](r,c);

                        double sum = 0;
                        for (unsigned long k = 0; k < descriptor.size(); ++k)
                        {
                            sum += w(descriptor[k].first + offset)*descriptor[k].second;
                        }
                        saliency_images[i][r][c] = sum;
                    }
                }
            }

            // now search the saliency images
            for (unsigned long i = 0; i < det_templates.size(); ++i)
            {
                const point offset = -feats[l].image_to_feat_space(point(0,0));
                for (unsigned long j = 0; j < stationary_region_rects.size(); ++j)
                {
                    stationary_region_rects[j] = std::make_pair(j, translate_rect(feats[l].image_to_feat_space(det_templates[i].rects[j]),offset));
                }
                for (unsigned long j = 0; j < movable_region_rects.size(); ++j)
                {
                    // Scale the size of the movable rectangle but make sure its center
                    // stays at point(0,0).
                    const rectangle temp = feats[l].image_to_feat_space(det_templates[i].movable_rects[j]);
                    movable_region_rects[j] = std::make_pair(j+stationary_region_rects.size(),
                                                             centered_rect(point(0,0),temp.width(), temp.height()));
                }

                // Scale the object box into the feature extraction image, but keeping it
                // centered at point(0,0).
                rectangle scaled_object_box = feats[l].image_to_feat_space(det_templates[i].object_box);
                scaled_object_box = centered_rect(point(0,0),scaled_object_box.width(), scaled_object_box.height());

                // Each detection template gets its own special threshold in addition to
                // the global detection threshold.  This allows us to model the fact that
                // some detection templates might be more prone to false alarming or since
                // their size is different naturally require a larger or smaller threshold
                // (since they integrate over a larger or smaller region of the image).
                const double template_specific_thresh = w(i);

                scan_image_movable_parts(point_dets, saliency_images, scaled_object_box,
                                         stationary_region_rects, movable_region_rects,
                                         thresh+template_specific_thresh, max_dets_per_template);

                // convert all the point detections into rectangles at the original image scale and coordinate system
                for (unsigned long j = 0; j < point_dets.size(); ++j)
                {
                    const double score = point_dets[j].first-template_specific_thresh;
                    point p = point_dets[j].second;
                    p = feats[l].feat_to_image_space(p);
                    rectangle rect = translate_rect(det_templates[i].object_box, p);
                    rect = pyr.rect_up(rect, l);

                    dets.push_back(std::make_pair(score, rect));
                }
            }
        }

        std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    const rectangle scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_best_matching_rect (
        const rectangle& rect
    ) const
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(get_num_detection_templates() > 0 ,
            "\t const rectangle scan_image_pyramid::get_best_matching_rect()"
            << "\n\t Invalid inputs were given to this function "
            << "\n\t get_num_detection_templates(): " << get_num_detection_templates()
            << "\n\t this: " << this
            );

        rectangle mapped_rect, object_box;
        detection_template best_template;
        unsigned long best_level, junk;
        get_mapped_rect_and_metadata(max_pyramid_levels, rect, mapped_rect, best_template, object_box, best_level, junk);
        return mapped_rect;
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_mapped_rect_and_metadata (
        const unsigned long number_pyramid_levels,
        rectangle rect,
        rectangle& mapped_rect,
        detection_template& best_template,
        rectangle& object_box,
        unsigned long& best_level,
        unsigned long& detection_template_idx
    ) const
    {
        pyramid_type pyr;
        // Figure out the pyramid level which best matches rect against one of our
        // detection template object boxes.
        best_level = 0;
        double best_match_score = -1;


        // Find the best matching detection template for rect
        for (unsigned long l = 0; l < number_pyramid_levels; ++l)
        {
            const rectangle temp = pyr.rect_down(rect,l);
            if (temp.area() <= 1)
                break;

            // At this pyramid level, what matches best?
            for (unsigned long t = 0; t < det_templates.size(); ++t)
            {
                const double match_score = get_match_score(det_templates[t].object_box, temp);
                if (match_score > best_match_score)
                {
                    best_match_score = match_score;
                    best_level = l;
                    best_template = det_templates[t];
                    detection_template_idx = t;
                }
            }
        }


        // Now we translate best_template into the right spot (it should be centered at the location
        // determined by rect) and convert it into the feature image coordinate system.
        rect = pyr.rect_down(rect,best_level);
        const point offset = -feats_config.image_to_feat_space(point(0,0));
        const point origin = feats_config.image_to_feat_space(center(rect)) + offset;
        for (unsigned long k = 0; k < best_template.rects.size(); ++k)
        {
            rectangle temp = best_template.rects[k];
            temp = feats_config.image_to_feat_space(temp);
            temp = translate_rect(temp, origin);
            best_template.rects[k] = temp;
        }
        for (unsigned long k = 0; k < best_template.movable_rects.size(); ++k)
        {
            rectangle temp = best_template.movable_rects[k];
            temp = feats_config.image_to_feat_space(temp);
            temp = centered_rect(point(0,0), temp.width(), temp.height());
            best_template.movable_rects[k] = temp;
        }

        const rectangle scaled_object_box = feats_config.image_to_feat_space(best_template.object_box);
        object_box = centered_rect(origin-offset, scaled_object_box.width(), scaled_object_box.height());

        // The input rectangle was mapped to one of the detection templates.  Reverse the process
        // to figure out what the mapped rectangle is in the original input space.
        mapped_rect = translate_rect(best_template.object_box, feats_config.feat_to_image_space(origin-offset));
        mapped_rect = pyr.rect_up(mapped_rect, best_level);
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    full_object_detection scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_full_object_detection (
        const rectangle& rect,
        const feature_vector_type& w
    ) const
    {
        // fill in movable part positions.

        rectangle mapped_rect;
        detection_template best_template;
        unsigned long best_level, junk;
        rectangle object_box;
        get_mapped_rect_and_metadata(feats.size(), rect, mapped_rect, best_template, object_box, best_level, junk);

        Pyramid_type pyr;

        array2d<double> saliency_image, sum_img;

        double total_temp_score = 0;
        // convert into feature space.
        object_box = object_box.intersect(get_rect(feats[best_level]));

        std::vector<point> movable_parts;
        movable_parts.reserve(get_num_movable_components_per_detection_template());
        for (unsigned long i = 0; i < get_num_movable_components_per_detection_template(); ++i)
        {
            // make the saliency_image for the ith movable part.

            const rectangle part_rect = best_template.movable_rects[i];
            const rectangle area = grow_rect(object_box,
                                             part_rect.width()/2,
                                             part_rect.height()/2).intersect(get_rect(feats[best_level]));

            saliency_image.set_size(area.height(), area.width());
            const unsigned long offset = get_num_detection_templates() + feats_config.get_num_dimensions()*(i+get_num_stationary_components_per_detection_template());

            // build saliency image for pyramid level best_level
            for (long r = area.top(); r <= area.bottom(); ++r)
            {
                for (long c = area.left(); c <= area.right(); ++c)
                {
                    const typename feature_extractor_type::descriptor_type& descriptor = feats[best_level](r,c);

                    double sum = 0;
                    for (unsigned long k = 0; k < descriptor.size(); ++k)
                    {
                        sum += w(descriptor[k].first + offset)*descriptor[k].second;
                    }
                    saliency_image[r-area.top()][c-area.left()] = sum;
                }
            }

            sum_img.set_size(saliency_image.nr(), saliency_image.nc());
            sum_filter_assign(saliency_image, sum_img, part_rect);
            // Figure out where the maximizer is in sum_img.  Note that we
            // only look in the part of sum_img that corresponds to a location inside
            // object_box.
            rectangle valid_area = get_rect(sum_img);
            valid_area.left()   += object_box.left()   - area.left();
            valid_area.top()    += object_box.top()    - area.top();
            valid_area.right()  += object_box.right()  - area.right();
            valid_area.bottom() += object_box.bottom() - area.bottom();
            double max_val = 0;
            point max_loc;
            for (long r = valid_area.top(); r <= valid_area.bottom(); ++r)
            {
                for (long c = valid_area.left(); c <= valid_area.right(); ++c)
                {
                    if (sum_img[r][c] > max_val)
                    {
                        //if (object_box.contains(point(c,r) + area.tl_corner()))
                        {
                            max_loc = point(c,r);
                            max_val = sum_img[r][c];
                        }
                    }
                }
            }

            if (max_val <= 0)
            {
                max_loc = OBJECT_PART_NOT_PRESENT;
            }
            else
            {
                total_temp_score += max_val;
                // convert max_loc back into feature image space from our cropped image.
                max_loc += area.tl_corner();

                // now convert from feature space to image space.
                max_loc = feats[best_level].feat_to_image_space(max_loc);
                max_loc = pyr.point_up(max_loc, best_level);
                max_loc = nearest_point(rect, max_loc);
            }

            movable_parts.push_back(max_loc);
        }

        return full_object_detection(rect, movable_parts);
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_feature_vector (
        const full_object_detection& obj,
        feature_vector_type& psi
    ) const
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(get_num_detection_templates() > 0 &&
                    is_loaded_with_image() &&
                    psi.size() >= get_num_dimensions() &&
                    obj.num_parts() == get_num_movable_components_per_detection_template(),
            "\t void scan_image_pyramid::get_feature_vector()"
            << "\n\t Invalid inputs were given to this function "
            << "\n\t get_num_detection_templates(): " << get_num_detection_templates()
            << "\n\t is_loaded_with_image(): " << is_loaded_with_image()
            << "\n\t psi.size():             " << psi.size()
            << "\n\t get_num_dimensions():   " << get_num_dimensions()
            << "\n\t get_num_movable_components_per_detection_template(): " << get_num_movable_components_per_detection_template()
            << "\n\t obj.num_parts():                            " << obj.num_parts()
            << "\n\t this: " << this
            );
        DLIB_ASSERT(all_parts_in_rect(obj),
            "\t void scan_image_pyramid::get_feature_vector()"
            << "\n\t Invalid inputs were given to this function "
            << "\n\t obj.get_rect(): " << obj.get_rect()
            << "\n\t this: " << this
        );


        rectangle mapped_rect;
        detection_template best_template;
        unsigned long best_level, detection_template_idx;
        rectangle object_box;
        get_mapped_rect_and_metadata(feats.size(), obj.get_rect(), mapped_rect, best_template, object_box, best_level, detection_template_idx);

        psi(detection_template_idx) -= 1;

        Pyramid_type pyr;

        // put the movable rects at the places indicated by obj.
        std::vector<rectangle> rects = best_template.rects;
        for (unsigned long i = 0; i < obj.num_parts(); ++i)
        {
            if (obj.part(i) != OBJECT_PART_NOT_PRESENT)
            {
                // map from the original image to scaled feature space.
                point loc = feats[best_level].image_to_feat_space(pyr.point_down(obj.part(i), best_level));
                // Make sure the movable part always stays within the object_box.
                // Otherwise it would be at a place that the detect() function can never
                // look.
                loc = nearest_point(object_box, loc);
                rects.push_back(translate_rect(best_template.movable_rects[i], loc));
            }
            else
            {
                // add an empty rectangle since this part wasn't observed.
                rects.push_back(rectangle());
            }
        }

        // pull features out of all the boxes in rects.
        for (unsigned long j = 0; j < rects.size(); ++j)
        {
            const rectangle rect = rects[j].intersect(get_rect(feats[best_level]));
            const unsigned long template_region_id = j;
            const unsigned long offset = get_num_detection_templates() + feats_config.get_num_dimensions()*template_region_id;
            for (long r = rect.top(); r <= rect.bottom(); ++r)
            {
                for (long c = rect.left(); c <= rect.right(); ++c)
                {
                    const typename feature_extractor_type::descriptor_type& descriptor = feats[best_level](r,c);
                    for (unsigned long k = 0; k < descriptor.size(); ++k)
                    {
                        psi(descriptor[k].first + offset) += descriptor[k].second;
                    }
                }
            }
        }

    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    void scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    set_min_pyramid_layer_size (
        unsigned long width,
        unsigned long height
    )
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(width > 0 && height > 0 ,
            "\t void scan_image_pyramid::set_min_pyramid_layer_size()"
            << "\n\t These sizes can't be zero. "
            << "\n\t width:  " << width
            << "\n\t height: " << height
            << "\n\t this:   " << this
            );

        min_pyramid_layer_width = width;
        min_pyramid_layer_height = height;
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_min_pyramid_layer_width (
    ) const
    {
        return min_pyramid_layer_width;
    }

// ----------------------------------------------------------------------------------------

    template <
        typename Pyramid_type,
        typename Feature_extractor_type
        >
    unsigned long scan_image_pyramid<Pyramid_type,Feature_extractor_type>::
    get_min_pyramid_layer_height (
    ) const
    {
        return min_pyramid_layer_height;
    }

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_SCAN_IMaGE_PYRAMID_Hh_