687 lines
21 KiB
C++
687 lines
21 KiB
C++
// Copyright (C) 2015 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_DNn_TENSOR_H_
|
|
#define DLIB_DNn_TENSOR_H_
|
|
|
|
#include "tensor_abstract.h"
|
|
#include <cstring>
|
|
#include "../matrix.h"
|
|
#include "cudnn_dlibapi.h"
|
|
#include "gpu_data.h"
|
|
#include "../byte_orderer.h"
|
|
#include <memory>
|
|
#include "../any.h"
|
|
|
|
namespace dlib
|
|
{
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class tensor;
|
|
namespace cuda
|
|
{
|
|
void set_tensor (
|
|
tensor& t,
|
|
float value
|
|
);
|
|
|
|
void scale_tensor (
|
|
tensor& t,
|
|
float value
|
|
);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class tensor
|
|
{
|
|
public:
|
|
|
|
tensor (
|
|
) :
|
|
m_n(0), m_k(0), m_nr(0), m_nc(0), m_size(0)
|
|
{
|
|
}
|
|
|
|
virtual ~tensor() {}
|
|
|
|
long long num_samples() const { return m_n; }
|
|
long long k() const { return m_k; }
|
|
long long nr() const { return m_nr; }
|
|
long long nc() const { return m_nc; }
|
|
size_t size() const { return m_size; }
|
|
|
|
typedef float* iterator;
|
|
typedef const float* const_iterator;
|
|
iterator begin() { return host(); }
|
|
const_iterator begin() const { return host(); }
|
|
iterator end() { return host()+size(); }
|
|
const_iterator end() const { return host()+size(); }
|
|
|
|
void async_copy_to_device() const
|
|
{
|
|
data().async_copy_to_device();
|
|
}
|
|
|
|
virtual const float* host() const = 0;
|
|
virtual float* host() = 0;
|
|
virtual float* host_write_only() = 0;
|
|
virtual const float* device() const = 0;
|
|
virtual float* device() = 0;
|
|
virtual float* device_write_only() = 0;
|
|
|
|
virtual const any& annotation() const = 0;
|
|
virtual any& annotation() = 0;
|
|
|
|
int device_id() const { return data().device_id(); }
|
|
|
|
tensor& operator= (float val)
|
|
{
|
|
#ifdef DLIB_USE_CUDA
|
|
// If you are using CUDA then presumably you will be mostly using tensors on
|
|
// the GPU. So unless you seem to be actively working with the host side's
|
|
// data then we do this initialization on the device side since this avoids a
|
|
// host to device transfer that would likely immediately follow.
|
|
if (data().device_ready())
|
|
{
|
|
cuda::set_tensor(*this, val);
|
|
return *this;
|
|
}
|
|
#endif
|
|
auto d = host_write_only();
|
|
for (size_t i = 0; i < size(); ++i)
|
|
d[i] = val;
|
|
|
|
return *this;
|
|
}
|
|
|
|
tensor& operator*= (float val)
|
|
{
|
|
#ifdef DLIB_USE_CUDA
|
|
cuda::scale_tensor(*this, val);
|
|
return *this;
|
|
#else
|
|
for (auto& d : *this)
|
|
d *= val;
|
|
|
|
return *this;
|
|
#endif
|
|
}
|
|
|
|
tensor& operator/= (float val)
|
|
{
|
|
*this *= 1.0/val;
|
|
return *this;
|
|
}
|
|
|
|
template <typename EXP>
|
|
tensor& operator= (const matrix_exp<EXP>& item)
|
|
{
|
|
DLIB_CASSERT(num_samples() == item.nr() &&
|
|
nr()*nc()*k() == item.nc());
|
|
static_assert((is_same_type<float, typename EXP::type>::value == true),
|
|
"To assign a matrix to a tensor the matrix must contain float values");
|
|
|
|
set_ptrm(host_write_only(), m_n, m_nr*m_nc*m_k) = item;
|
|
return *this;
|
|
}
|
|
|
|
template <typename EXP>
|
|
tensor& operator+= (const matrix_exp<EXP>& item)
|
|
{
|
|
DLIB_CASSERT(num_samples() == item.nr() &&
|
|
nr()*nc()*k() == item.nc());
|
|
static_assert((is_same_type<float, typename EXP::type>::value == true),
|
|
"To assign a matrix to a tensor the matrix must contain float values");
|
|
set_ptrm(host(), m_n, m_nr*m_nc*m_k) += item;
|
|
return *this;
|
|
}
|
|
|
|
template <typename EXP>
|
|
tensor& operator-= (const matrix_exp<EXP>& item)
|
|
{
|
|
DLIB_CASSERT(num_samples() == item.nr() &&
|
|
nr()*nc()*k() == item.nc());
|
|
static_assert((is_same_type<float, typename EXP::type>::value == true),
|
|
"To assign a matrix to a tensor the matrix must contain float values");
|
|
set_ptrm(host(), m_n, m_nr*m_nc*m_k) -= item;
|
|
return *this;
|
|
}
|
|
|
|
template <typename EXP>
|
|
void set_sample (
|
|
unsigned long long idx,
|
|
const matrix_exp<EXP>& item
|
|
)
|
|
{
|
|
DLIB_CASSERT(idx < (unsigned long long)num_samples());
|
|
DLIB_CASSERT(item.size() == nr()*nc()*k());
|
|
static_assert((is_same_type<float, typename EXP::type>::value == true),
|
|
"To assign a matrix to a tensor the matrix must contain float values");
|
|
set_ptrm(host()+idx*item.size(), item.nr(), item.nc()) = item;
|
|
}
|
|
|
|
|
|
template <typename EXP>
|
|
void add_to_sample (
|
|
unsigned long long idx,
|
|
const matrix_exp<EXP>& item
|
|
)
|
|
{
|
|
DLIB_CASSERT(idx < (unsigned long long)num_samples());
|
|
DLIB_CASSERT(item.size() == nr()*nc()*k());
|
|
static_assert((is_same_type<float, typename EXP::type>::value == true),
|
|
"To assign a matrix to a tensor the matrix must contain float values");
|
|
set_ptrm(host()+idx*item.size(), item.nr(), item.nc()) += item;
|
|
}
|
|
|
|
|
|
#ifdef DLIB_USE_CUDA
|
|
virtual const cuda::tensor_descriptor& get_cudnn_tensor_descriptor (
|
|
) const = 0;
|
|
#endif
|
|
|
|
friend void memcpy (
|
|
tensor& dest,
|
|
const tensor& src
|
|
)
|
|
{
|
|
DLIB_CASSERT(dest.size() == src.size());
|
|
memcpy(dest.data(), dest.get_alias_offset(),
|
|
src.data(), src.get_alias_offset(),
|
|
src.size());
|
|
}
|
|
|
|
|
|
protected:
|
|
|
|
friend class alias_tensor;
|
|
|
|
virtual gpu_data& data() = 0;
|
|
virtual const gpu_data& data() const = 0;
|
|
virtual size_t get_alias_offset() const { return 0; } // needed by alias_tensor.
|
|
|
|
long long m_n;
|
|
long long m_k;
|
|
long long m_nr;
|
|
long long m_nc;
|
|
long long m_size; // always equal to m_n*m_k*m_nr*m_nc
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline bool is_vector (
|
|
const tensor& t
|
|
)
|
|
{
|
|
return t.size() == (size_t)t.num_samples() ||
|
|
t.size() == (size_t)t.k() ||
|
|
t.size() == (size_t)t.nr() ||
|
|
t.size() == (size_t)t.nc();
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline const matrix_op<op_pointer_to_mat<float> > mat (
|
|
const tensor& t,
|
|
long long nr,
|
|
long long nc
|
|
)
|
|
{
|
|
DLIB_ASSERT(nr >= 0 && nc >= 0 ,
|
|
"\tconst matrix_exp mat(tensor, nr, nc)"
|
|
<< "\n\t nr and nc must be >= 0"
|
|
<< "\n\t nr: " << nr
|
|
<< "\n\t nc: " << nc
|
|
);
|
|
DLIB_ASSERT(nr*nc == (long long)t.size() ,
|
|
"\tconst matrix_exp mat(tensor, nr, nc)"
|
|
<< "\n\t The sizes don't match up."
|
|
<< "\n\t nr*nc: " << nr*nc
|
|
<< "\n\t t.size(): " << t.size()
|
|
);
|
|
typedef op_pointer_to_mat<float> op;
|
|
return matrix_op<op>(op(t.host(),nr,nc));
|
|
}
|
|
|
|
inline const matrix_op<op_pointer_to_mat<float> > mat (
|
|
const tensor& t
|
|
)
|
|
{
|
|
if (t.size() != 0)
|
|
return mat(t, t.num_samples(), t.size()/t.num_samples());
|
|
else
|
|
return mat((float*)0,0,0);
|
|
}
|
|
|
|
inline const matrix_op<op_pointer_to_mat<float> > image_plane (
|
|
const tensor& t,
|
|
long long sample = 0,
|
|
long long k = 0
|
|
)
|
|
{
|
|
DLIB_ASSERT(0 <= sample && sample < t.num_samples() &&
|
|
0 <= k && k < t.k() &&
|
|
t.size() != 0,
|
|
"\tconst matrix_exp image_plane(tensor,sample,k)"
|
|
<< "\n\t Invalid arguments were given to this function."
|
|
<< "\n\t sample: " << sample
|
|
<< "\n\t k: " << k
|
|
<< "\n\t t.num_samples(): " << t.num_samples()
|
|
<< "\n\t t.k(): " << t.k()
|
|
<< "\n\t t.size(): " << t.size()
|
|
);
|
|
|
|
|
|
typedef op_pointer_to_mat<float> op;
|
|
return matrix_op<op>(op(t.host() + ((sample*t.k() + k)*t.nr())*t.nc(),
|
|
t.nr(),
|
|
t.nc()));
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline bool have_same_dimensions (
|
|
const tensor& a,
|
|
const tensor& b
|
|
)
|
|
{
|
|
return a.num_samples() == b.num_samples() &&
|
|
a.k() == b.k() &&
|
|
a.nr() == b.nr() &&
|
|
a.nc() == b.nc();
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class resizable_tensor : public tensor
|
|
{
|
|
public:
|
|
resizable_tensor(
|
|
)
|
|
{}
|
|
|
|
template <typename EXP>
|
|
resizable_tensor(
|
|
const matrix_exp<EXP>& item
|
|
)
|
|
{
|
|
set_size(item.nr(), item.nc());
|
|
*this = item;
|
|
}
|
|
|
|
explicit resizable_tensor(
|
|
long long n_, long long k_ = 1, long long nr_ = 1, long long nc_ = 1
|
|
)
|
|
{
|
|
DLIB_ASSERT( n_ >= 0 && k_ >= 0 && nr_ >= 0 && nc_ >= 0);
|
|
|
|
set_size(n_,k_,nr_,nc_);
|
|
}
|
|
|
|
resizable_tensor(const resizable_tensor& item) : _annotation(item.annotation())
|
|
{
|
|
copy_size(item);
|
|
memcpy(*this, item);
|
|
}
|
|
resizable_tensor(const tensor& item) : _annotation(item.annotation())
|
|
{
|
|
copy_size(item);
|
|
memcpy(*this, item);
|
|
}
|
|
|
|
resizable_tensor(resizable_tensor&& item) { swap(item); }
|
|
resizable_tensor& operator=(resizable_tensor&& item) { swap(item); return *this; }
|
|
|
|
virtual const float* host() const { return data_instance.host(); }
|
|
virtual float* host() { return data_instance.host(); }
|
|
virtual float* host_write_only() { return data_instance.host_write_only(); }
|
|
virtual const float* device() const { return data_instance.device(); }
|
|
virtual float* device() { return data_instance.device(); }
|
|
virtual float* device_write_only() { return data_instance.device_write_only(); }
|
|
|
|
virtual const any& annotation() const { return _annotation; }
|
|
virtual any& annotation() { return _annotation; }
|
|
|
|
void clear(
|
|
)
|
|
{
|
|
set_size(0,0,0,0);
|
|
_annotation.clear();
|
|
// free underlying memory
|
|
data_instance.set_size(0);
|
|
}
|
|
|
|
void copy_size (
|
|
const tensor& item
|
|
)
|
|
{
|
|
set_size(item.num_samples(), item.k(), item.nr(), item.nc());
|
|
}
|
|
|
|
resizable_tensor& operator= (float val)
|
|
{
|
|
tensor::operator=(val);
|
|
return *this;
|
|
}
|
|
|
|
template <typename EXP>
|
|
resizable_tensor& operator= (
|
|
const matrix_exp<EXP>& item
|
|
)
|
|
{
|
|
if (!(num_samples() == item.nr() && k()*nr()*nc() == item.nc()))
|
|
set_size(item.nr(), item.nc());
|
|
tensor::operator=(item);
|
|
return *this;
|
|
}
|
|
|
|
void set_size(
|
|
long long n_, long long k_ = 1, long long nr_ = 1, long long nc_ = 1
|
|
)
|
|
{
|
|
DLIB_ASSERT( n_ >= 0 && k_ >= 0 && nr_ >= 0 && nc_ >= 0);
|
|
|
|
m_n = n_;
|
|
m_k = k_;
|
|
m_nr = nr_;
|
|
m_nc = nc_;
|
|
m_size = n_*k_*nr_*nc_;
|
|
if ((long long)data_instance.size() < m_size)
|
|
data_instance.set_size(m_size);
|
|
#ifdef DLIB_USE_CUDA
|
|
cudnn_descriptor.set_size(m_n,m_k,m_nr,m_nc);
|
|
#endif
|
|
}
|
|
|
|
|
|
resizable_tensor& operator= (const resizable_tensor& item)
|
|
{
|
|
resizable_tensor temp(item);
|
|
temp.swap(*this);
|
|
return *this;
|
|
}
|
|
|
|
resizable_tensor& operator= (const tensor& item)
|
|
{
|
|
resizable_tensor temp(item);
|
|
temp.swap(*this);
|
|
return *this;
|
|
}
|
|
|
|
|
|
void swap(resizable_tensor& item)
|
|
{
|
|
std::swap(m_n, item.m_n);
|
|
std::swap(m_k, item.m_k);
|
|
std::swap(m_nr, item.m_nr);
|
|
std::swap(m_nc, item.m_nc);
|
|
std::swap(m_size, item.m_size);
|
|
std::swap(data_instance, item.data_instance);
|
|
std::swap(_annotation, item._annotation);
|
|
#ifdef DLIB_USE_CUDA
|
|
std::swap(cudnn_descriptor, item.cudnn_descriptor);
|
|
#endif
|
|
}
|
|
|
|
#ifdef DLIB_USE_CUDA
|
|
virtual const cuda::tensor_descriptor& get_cudnn_tensor_descriptor (
|
|
) const { return cudnn_descriptor; }
|
|
#endif
|
|
|
|
private:
|
|
|
|
#ifdef DLIB_USE_CUDA
|
|
cuda::tensor_descriptor cudnn_descriptor;
|
|
#endif
|
|
|
|
gpu_data data_instance;
|
|
any _annotation;
|
|
virtual gpu_data& data() { return data_instance; }
|
|
virtual const gpu_data& data() const { return data_instance; }
|
|
};
|
|
|
|
inline void serialize(const tensor& item, std::ostream& out)
|
|
{
|
|
int version = 2;
|
|
serialize(version, out);
|
|
serialize(item.num_samples(), out);
|
|
serialize(item.k(), out);
|
|
serialize(item.nr(), out);
|
|
serialize(item.nc(), out);
|
|
byte_orderer bo;
|
|
auto sbuf = out.rdbuf();
|
|
for (auto d : item)
|
|
{
|
|
// Write out our data as 4byte little endian IEEE floats rather than using
|
|
// dlib's default float serialization. We do this because it will result in
|
|
// more compact outputs. It's slightly less portable but it seems doubtful
|
|
// that any CUDA enabled platform isn't going to use IEEE floats. But if one
|
|
// does we can just update the serialization code here to handle it if such a
|
|
// platform is encountered.
|
|
bo.host_to_little(d);
|
|
static_assert(sizeof(d)==4, "This serialization code assumes we are writing 4 byte floats");
|
|
sbuf->sputn((char*)&d, sizeof(d));
|
|
}
|
|
}
|
|
|
|
inline void deserialize(resizable_tensor& item, std::istream& in)
|
|
{
|
|
int version;
|
|
deserialize(version, in);
|
|
if (version != 2)
|
|
throw serialization_error("Unexpected version found while deserializing dlib::resizable_tensor.");
|
|
|
|
long long num_samples=0, k=0, nr=0, nc=0;
|
|
deserialize(num_samples, in);
|
|
deserialize(k, in);
|
|
deserialize(nr, in);
|
|
deserialize(nc, in);
|
|
item.set_size(num_samples, k, nr, nc);
|
|
byte_orderer bo;
|
|
auto sbuf = in.rdbuf();
|
|
for (auto& d : item)
|
|
{
|
|
static_assert(sizeof(d)==4, "This serialization code assumes we are writing 4 byte floats");
|
|
if (sbuf->sgetn((char*)&d,sizeof(d)) != sizeof(d))
|
|
{
|
|
in.setstate(std::ios::badbit);
|
|
throw serialization_error("Error reading data while deserializing dlib::resizable_tensor.");
|
|
}
|
|
bo.little_to_host(d);
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline double dot(
|
|
const tensor& a,
|
|
const tensor& b
|
|
)
|
|
{
|
|
DLIB_CASSERT(a.size() == b.size());
|
|
const float* da = a.host();
|
|
const float* db = b.host();
|
|
double sum = 0;
|
|
for (size_t i = 0; i < a.size(); ++i)
|
|
sum += da[i]*db[i];
|
|
return sum;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class alias_tensor_instance : public tensor
|
|
{
|
|
alias_tensor_instance(
|
|
) : data_instance(0), _annotation(0), data_offset(0) {}
|
|
|
|
public:
|
|
friend class alias_tensor;
|
|
friend class alias_tensor_const_instance;
|
|
|
|
alias_tensor_instance& operator= (float val)
|
|
{
|
|
tensor::operator=(val);
|
|
return *this;
|
|
}
|
|
|
|
template <typename EXP>
|
|
alias_tensor_instance& operator= (const matrix_exp<EXP>& item)
|
|
{
|
|
tensor::operator=(item);
|
|
return *this;
|
|
}
|
|
|
|
virtual const float* host() const { return data_instance->host()+data_offset; }
|
|
virtual float* host() { return data_instance->host()+data_offset; }
|
|
virtual float* host_write_only() { return data_instance->host()+data_offset; }
|
|
virtual const float* device() const { return data_instance->device()+data_offset; }
|
|
virtual float* device() { return data_instance->device()+data_offset; }
|
|
virtual float* device_write_only() { return data_instance->device()+data_offset; }
|
|
|
|
virtual const any& annotation() const { return *_annotation; }
|
|
virtual any& annotation() { return *_annotation; }
|
|
|
|
#ifdef DLIB_USE_CUDA
|
|
virtual const cuda::tensor_descriptor& get_cudnn_tensor_descriptor (
|
|
) const { return *cudnn_descriptor; }
|
|
#endif
|
|
private:
|
|
|
|
virtual size_t get_alias_offset() const { return data_offset; }
|
|
|
|
#ifdef DLIB_USE_CUDA
|
|
std::shared_ptr<cuda::tensor_descriptor> cudnn_descriptor;
|
|
#endif
|
|
gpu_data* data_instance;
|
|
any* _annotation;
|
|
size_t data_offset;
|
|
virtual gpu_data& data() { return *data_instance; }
|
|
virtual const gpu_data& data() const { return *data_instance; }
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class alias_tensor_const_instance
|
|
{
|
|
public:
|
|
const tensor& get() const { return inst; }
|
|
operator const tensor& () { return inst; }
|
|
|
|
alias_tensor_const_instance(const alias_tensor_instance& item) : inst(item) {}
|
|
|
|
private:
|
|
alias_tensor_instance inst;
|
|
|
|
friend class alias_tensor;
|
|
alias_tensor_const_instance() {}
|
|
};
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
class alias_tensor
|
|
{
|
|
public:
|
|
|
|
alias_tensor (
|
|
) {}
|
|
|
|
alias_tensor (
|
|
long long n_, long long k_ = 1, long long nr_ = 1, long long nc_ = 1
|
|
)
|
|
{
|
|
DLIB_ASSERT( n_ >= 0 && k_ >= 0 && nr_ >= 0 && nc_ >= 0);
|
|
|
|
inst.m_n = n_;
|
|
inst.m_k = k_;
|
|
inst.m_nr = nr_;
|
|
inst.m_nc = nc_;
|
|
inst.m_size = n_*k_*nr_*nc_;
|
|
}
|
|
|
|
long long num_samples(
|
|
) const { return inst.m_n; }
|
|
|
|
long long k(
|
|
) const { return inst.m_k; }
|
|
|
|
long long nr(
|
|
) const { return inst.m_nr; }
|
|
|
|
long long nc(
|
|
) const { return inst.m_nc; }
|
|
|
|
size_t size(
|
|
) const { return inst.m_size; }
|
|
|
|
alias_tensor_instance operator() (
|
|
tensor& t,
|
|
size_t offset = 0
|
|
) const
|
|
{
|
|
DLIB_CASSERT(offset+size() <= t.size(),
|
|
"offset: "<<offset <<"\n"<<
|
|
"size(): "<<size() <<"\n"<<
|
|
"t.size(): "<<t.size() <<"\n");
|
|
|
|
#ifdef DLIB_USE_CUDA
|
|
if (!inst.cudnn_descriptor)
|
|
{
|
|
inst.cudnn_descriptor = std::make_shared<cuda::tensor_descriptor>();
|
|
inst.cudnn_descriptor->set_size(inst.m_n, inst.m_k, inst.m_nr, inst.m_nc);
|
|
}
|
|
#endif
|
|
inst.data_instance = &t.data();
|
|
inst._annotation = &t.annotation();
|
|
// Note that t might already be an aliasing tensor so we need to take that into
|
|
// account.
|
|
inst.data_offset = t.get_alias_offset()+offset;
|
|
return inst;
|
|
}
|
|
|
|
alias_tensor_const_instance operator() (
|
|
const tensor& t,
|
|
size_t offset = 0
|
|
) const
|
|
{
|
|
alias_tensor_const_instance temp;
|
|
temp.inst = (*this)(const_cast<tensor&>(t),offset);
|
|
return temp;
|
|
}
|
|
|
|
private:
|
|
mutable alias_tensor_instance inst;
|
|
};
|
|
|
|
inline void serialize(const alias_tensor& item, std::ostream& out)
|
|
{
|
|
int version = 1;
|
|
serialize(version, out);
|
|
serialize(item.num_samples(), out);
|
|
serialize(item.k(), out);
|
|
serialize(item.nr(), out);
|
|
serialize(item.nc(), out);
|
|
}
|
|
|
|
inline void deserialize(alias_tensor& item, std::istream& in)
|
|
{
|
|
int version = 0;
|
|
deserialize(version, in);
|
|
if (version != 1)
|
|
throw serialization_error("Unexpected version found while deserializing dlib::alias_tensor.");
|
|
long long num_samples, k, nr, nc;
|
|
deserialize(num_samples, in);
|
|
deserialize(k, in);
|
|
deserialize(nr, in);
|
|
deserialize(nc, in);
|
|
item = alias_tensor(num_samples, k, nr, nc);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_DNn_TENSOR_H_
|
|
|