392 lines
10 KiB
C++
392 lines
10 KiB
C++
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_DNN_CuDNN_H_
|
|
#define DLIB_DNN_CuDNN_H_
|
|
|
|
#ifdef DLIB_USE_CUDA
|
|
|
|
#include "cuda_errors.h"
|
|
#include <memory>
|
|
#include "cuda_data_ptr.h"
|
|
|
|
namespace dlib
|
|
{
|
|
class tensor;
|
|
class resizable_tensor;
|
|
|
|
namespace cuda
|
|
{
|
|
|
|
// -----------------------------------------------------------------------------------
|
|
|
|
class tensor_descriptor
|
|
{
|
|
/*!
|
|
Each tensor object will carry a tensor_descriptor in it when compiled with
|
|
CUDA.
|
|
!*/
|
|
|
|
public:
|
|
// not copyable
|
|
tensor_descriptor(const tensor_descriptor&) = delete;
|
|
tensor_descriptor& operator=(const tensor_descriptor&) = delete;
|
|
// but is movable
|
|
tensor_descriptor(tensor_descriptor&& item) : tensor_descriptor() { swap(item); }
|
|
tensor_descriptor& operator=(tensor_descriptor&& item) { swap(item); return *this; }
|
|
|
|
tensor_descriptor();
|
|
~tensor_descriptor();
|
|
|
|
void set_size(
|
|
int n,
|
|
int k,
|
|
int nr,
|
|
int nc
|
|
);
|
|
/*!
|
|
ensures
|
|
- if any of the arguments are 0 then they are all set to 0 in the tensor.
|
|
!*/
|
|
|
|
void get_size (
|
|
int& n,
|
|
int& k,
|
|
int& nr,
|
|
int& nc
|
|
) const;
|
|
|
|
const void* get_handle (
|
|
) const { return handle; }
|
|
|
|
private:
|
|
|
|
void swap(tensor_descriptor& item) { std::swap(handle, item.handle); }
|
|
|
|
void* handle;
|
|
};
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
void add(
|
|
float beta,
|
|
tensor& dest,
|
|
float alpha,
|
|
const tensor& src
|
|
);
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
void assign_conv_bias_gradient (
|
|
tensor& grad,
|
|
const tensor& gradient_input
|
|
);
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
void batch_normalize_inference (
|
|
const double eps,
|
|
resizable_tensor& dest,
|
|
const tensor& src,
|
|
const tensor& gamma,
|
|
const tensor& beta,
|
|
const tensor& running_means,
|
|
const tensor& running_variances
|
|
);
|
|
|
|
void batch_normalize (
|
|
const double eps,
|
|
resizable_tensor& dest,
|
|
resizable_tensor& means,
|
|
resizable_tensor& invstds,
|
|
const double averaging_factor,
|
|
resizable_tensor& running_means,
|
|
resizable_tensor& running_variances,
|
|
const tensor& src,
|
|
const tensor& gamma,
|
|
const tensor& beta
|
|
);
|
|
|
|
void batch_normalize_gradient(
|
|
const double eps,
|
|
const tensor& gradient_input,
|
|
const tensor& means,
|
|
const tensor& invstds,
|
|
const tensor& src,
|
|
const tensor& gamma,
|
|
tensor& src_grad,
|
|
tensor& gamma_grad,
|
|
tensor& beta_grad
|
|
);
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
void batch_normalize_conv_inference (
|
|
const double eps,
|
|
resizable_tensor& dest,
|
|
const tensor& src,
|
|
const tensor& gamma,
|
|
const tensor& beta,
|
|
const tensor& running_means,
|
|
const tensor& running_variances
|
|
);
|
|
|
|
void batch_normalize_conv (
|
|
const double eps,
|
|
resizable_tensor& dest,
|
|
resizable_tensor& means,
|
|
resizable_tensor& invstds,
|
|
const double averaging_factor,
|
|
resizable_tensor& running_means,
|
|
resizable_tensor& running_variances,
|
|
const tensor& src,
|
|
const tensor& gamma,
|
|
const tensor& beta
|
|
);
|
|
|
|
void batch_normalize_conv_gradient(
|
|
const double eps,
|
|
const tensor& gradient_input,
|
|
const tensor& means,
|
|
const tensor& invstds,
|
|
const tensor& src,
|
|
const tensor& gamma,
|
|
tensor& src_grad,
|
|
tensor& gamma_grad,
|
|
tensor& beta_grad
|
|
);
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
class tensor_conv
|
|
{
|
|
public:
|
|
tensor_conv(const tensor_conv&) = delete;
|
|
tensor_conv& operator=(const tensor_conv&) = delete;
|
|
|
|
tensor_conv();
|
|
|
|
void clear(
|
|
);
|
|
|
|
~tensor_conv (
|
|
);
|
|
|
|
void operator() (
|
|
const bool add_to_output,
|
|
tensor& output,
|
|
const tensor& data,
|
|
const tensor& filters
|
|
);
|
|
|
|
void operator() (
|
|
const bool add_to_output,
|
|
resizable_tensor& output,
|
|
const tensor& data,
|
|
const tensor& filters
|
|
);
|
|
|
|
void get_gradient_for_data (
|
|
const bool add_to_output,
|
|
const tensor& gradient_input,
|
|
const tensor& filters,
|
|
tensor& data_gradient
|
|
);
|
|
|
|
void get_gradient_for_filters (
|
|
const bool add_to_output,
|
|
const tensor& gradient_input,
|
|
const tensor& data,
|
|
tensor& filters_gradient
|
|
);
|
|
|
|
void setup(
|
|
const tensor& data,
|
|
const tensor& filters,
|
|
int stride_y,
|
|
int stride_x,
|
|
int padding_y,
|
|
int padding_x
|
|
);
|
|
|
|
private:
|
|
|
|
// These variables record the type of data given to the last call to setup().
|
|
int stride_y;
|
|
int stride_x;
|
|
int padding_y;
|
|
int padding_x;
|
|
long data_num_samples, data_k, data_nr, data_nc;
|
|
long filters_num_samples, filters_k, filters_nr, filters_nc;
|
|
|
|
|
|
void* filter_handle;
|
|
void* conv_handle;
|
|
|
|
// dimensions of the output tensor from operator()
|
|
int out_num_samples;
|
|
int out_k;
|
|
int out_nr;
|
|
int out_nc;
|
|
|
|
// sets the three _algo fields.
|
|
void select_best_algorithms(const tensor& data, const tensor_descriptor& dest_desc);
|
|
int forward_algo;
|
|
int backward_data_algo;
|
|
int backward_filters_algo;
|
|
|
|
size_t forward_workspace_size_in_bytes;
|
|
size_t backward_data_workspace_size_in_bytes;
|
|
size_t backward_filters_workspace_size_in_bytes;
|
|
cuda_data_void_ptr forward_workspace;
|
|
cuda_data_void_ptr backward_data_workspace;
|
|
cuda_data_void_ptr backward_filters_workspace;
|
|
};
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
class pooling
|
|
{
|
|
public:
|
|
|
|
pooling(const pooling&) = delete;
|
|
pooling& operator=(const pooling&) = delete;
|
|
|
|
pooling (
|
|
);
|
|
|
|
~pooling(
|
|
);
|
|
|
|
void clear(
|
|
);
|
|
|
|
void setup_max_pooling(
|
|
int window_height,
|
|
int window_width,
|
|
int stride_y,
|
|
int stride_x,
|
|
int padding_y,
|
|
int padding_x
|
|
);
|
|
|
|
void setup_avg_pooling(
|
|
int window_height,
|
|
int window_width,
|
|
int stride_y,
|
|
int stride_x,
|
|
int padding_y,
|
|
int padding_x
|
|
);
|
|
|
|
bool does_max_pooling(
|
|
) const { return do_max_pooling; }
|
|
|
|
void operator() (
|
|
resizable_tensor& dest,
|
|
const tensor& src
|
|
);
|
|
|
|
void get_gradient(
|
|
const tensor& gradient_input,
|
|
const tensor& dest,
|
|
const tensor& src,
|
|
tensor& grad
|
|
);
|
|
|
|
private:
|
|
|
|
void setup(
|
|
int window_height,
|
|
int window_width,
|
|
int stride_y,
|
|
int stride_x,
|
|
int padding_y,
|
|
int padding_x,
|
|
int pooling_mode
|
|
);
|
|
|
|
void* handle;
|
|
int window_height;
|
|
int window_width;
|
|
int stride_y;
|
|
int stride_x;
|
|
int padding_y;
|
|
int padding_x;
|
|
bool do_max_pooling;
|
|
};
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
void softmax (
|
|
tensor& dest,
|
|
const tensor& src
|
|
);
|
|
|
|
void softmax_gradient (
|
|
tensor& grad,
|
|
const tensor& dest,
|
|
const tensor& gradient_input
|
|
);
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
void softmax_all (
|
|
tensor& dest,
|
|
const tensor& src
|
|
);
|
|
|
|
void softmax_all_gradient (
|
|
tensor& grad,
|
|
const tensor& dest,
|
|
const tensor& gradient_input
|
|
);
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
void sigmoid (
|
|
tensor& dest,
|
|
const tensor& src
|
|
);
|
|
|
|
void sigmoid_gradient (
|
|
tensor& grad,
|
|
const tensor& dest,
|
|
const tensor& gradient_input
|
|
);
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
void relu (
|
|
tensor& dest,
|
|
const tensor& src
|
|
);
|
|
|
|
void relu_gradient (
|
|
tensor& grad,
|
|
const tensor& dest,
|
|
const tensor& gradient_input
|
|
);
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
void tanh (
|
|
tensor& dest,
|
|
const tensor& src
|
|
);
|
|
|
|
void tanh_gradient (
|
|
tensor& grad,
|
|
const tensor& dest,
|
|
const tensor& gradient_input
|
|
);
|
|
|
|
// ------------------------------------------------------------------------------------
|
|
|
|
}
|
|
}
|
|
|
|
#endif // DLIB_USE_CUDA
|
|
|
|
#endif // DLIB_DNN_CuDNN_H_
|
|
|