// Copyright (C) 2015 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNN_CPU_H_
#define DLIB_DNN_CPU_H_
// This file contains CPU implementations of the GPU based functions in cuda_dlib.h
// and cudnn_dlibapi.h
#include "tensor.h"
#include "../geometry/rectangle.h"
namespace dlib
{
namespace cpu
{
// -----------------------------------------------------------------------------------
void multiply (
bool add_to,
tensor& dest,
const tensor& src1,
const tensor& src2
);
void multiply_conv (
bool add_to,
tensor& dest,
const tensor& src1,
const tensor& src2
);
void multiply_zero_padded (
bool add_to,
tensor& dest,
const tensor& src1,
const tensor& src2
);
void scale_channels (
bool add_to,
tensor& dest,
const tensor& src,
const tensor& scales
);
void add(
float beta,
tensor& dest,
float alpha,
const tensor& src
);
void assign_bias_gradient (
tensor& grad,
const tensor& gradient_input
);
void add (
tensor& dest,
const tensor& src1,
const tensor& src2
);
void assign_conv_bias_gradient (
tensor& grad,
const tensor& gradient_input
);
// -----------------------------------------------------------------------------------
void affine_transform(
tensor& dest,
const tensor& src,
const float A,
const float B
);
void affine_transform(
tensor& dest,
const tensor& src1,
const tensor& src2,
const float A,
const float B,
const float C
);
void affine_transform(
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
const float A,
const float B,
const float C,
const float D
);
void affine_transform_range(
size_t begin,
size_t end,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
const float A,
const float B,
const float C
);
// -----------------------------------------------------------------------------------
void affine_transform(
tensor& dest,
const tensor& src,
const tensor& A,
const tensor& B
);
// -----------------------------------------------------------------------------------
void affine_transform_conv(
tensor& dest,
const tensor& src,
const tensor& A,
const tensor& B
);
// -----------------------------------------------------------------------------------
void affine_transform(
const rectangle& rect,
tensor& dest,
const tensor& src1,
const tensor& src2,
const tensor& src3,
float A,
float B,
float C
);
// -----------------------------------------------------------------------------------
void compute_adam_update (
size_t begin,
size_t end,
tensor& s,
tensor& m,
tensor& v,
const float t,
const float learning_rate,
const float weight_decay,
const float momentum1,
const float momentum2,
const tensor& params,
const tensor& params_grad
);
// -----------------------------------------------------------------------------------
void batch_normalize_inference (
const double eps,
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_variances
);
void batch_normalize (
const double eps,
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
);
void batch_normalize_gradient (
const double eps,
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
);
void batch_normalize_conv_inference (
const double eps,
resizable_tensor& dest,
const tensor& src,
const tensor& gamma,
const tensor& beta,
const tensor& running_means,
const tensor& running_variances
);
void batch_normalize_conv (
const double eps,
resizable_tensor& dest,
resizable_tensor& means,
resizable_tensor& invstds,
const double averaging_factor,
resizable_tensor& running_means,
resizable_tensor& running_variances,
const tensor& src,
const tensor& gamma,
const tensor& beta
);
void batch_normalize_conv_gradient (
const double eps,
const tensor& gradient_input,
const tensor& means,
const tensor& invstds,
const tensor& src,
const tensor& gamma,
tensor& src_grad,
tensor& gamma_grad,
tensor& beta_grad
);
// -----------------------------------------------------------------------------------
void threshold (
tensor& data,
float thresh
);
void dot (
const tensor& a,
const tensor& b,
tensor& result,
size_t idx
);
// -----------------------------------------------------------------------------------
void softmax (
tensor& dest,
const tensor& src
);
void softmax_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
);
// ------------------------------------------------------------------------------------
void softmax_all (
tensor& dest,
const tensor& src
);
void softmax_all_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
);
// ------------------------------------------------------------------------------------
void sigmoid (
tensor& dest,
const tensor& src
);
void sigmoid_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
);
// ------------------------------------------------------------------------------------
void relu (
tensor& dest,
const tensor& src
);
void relu_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
);
// ----------------------------------------------------------------------------------------
void prelu (
tensor& dest,
const tensor& src,
const tensor& param
);
void prelu_gradient (
tensor& grad,
const tensor& src,
const tensor& gradient_input,
const tensor& param,
tensor& params_grad
);
// ------------------------------------------------------------------------------------
void tanh (
tensor& dest,
const tensor& src
);
void tanh_gradient (
tensor& grad,
const tensor& dest,
const tensor& gradient_input
);
// ----------------------------------------------------------------------------------------
void resize_bilinear (
tensor& dest,
long dest_row_stride,
long dest_channel_stride,
const tensor& src,
long src_row_stride,
long src_channel_stride
);
void resize_bilinear_gradient (
tensor& grad,
long grad_row_stride,
long grad_channel_stride,
const tensor& gradient_input,
long gradient_input_row_stride,
long gradient_input_channel_stride
);
inline void resize_bilinear (
tensor& dest,
const tensor& src
) { resize_bilinear(dest, dest.nc(), dest.nr()*dest.nc(), src, src.nc(), src.nr()*src.nc()); }
inline void resize_bilinear_gradient (
tensor& grad,
const tensor& gradient_input
) { resize_bilinear_gradient(grad, grad.nc(), grad.nr()*grad.nc(), gradient_input, gradient_input.nc(), gradient_input.nr()*gradient_input.nc()); }
// -----------------------------------------------------------------------------------
class pooling
{
public:
pooling(const pooling&) = delete;
pooling& operator=(const pooling&) = delete;
pooling (
);
void clear(
);
void setup_max_pooling(
int window_height,
int window_width,
int stride_y,
int stride_x,
int padding_y,
int padding_x
);
void setup_avg_pooling(
int window_height,
int window_width,
int stride_y,
int stride_x,
int padding_y,
int padding_x
);
bool does_max_pooling(
) const { return do_max_pooling; }
void operator() (
resizable_tensor& dest,
const tensor& src
);
void get_gradient(
const tensor& gradient_input,
const tensor& dest,
const tensor& src,
tensor& grad
);
private:
int window_height;
int window_width;
int stride_y;
int stride_x;
int padding_y;
int padding_x;
bool do_max_pooling;
};
// -----------------------------------------------------------------------------------
class tensor_conv
{
public:
tensor_conv(const tensor_conv&) = delete;
tensor_conv& operator=(const tensor_conv&) = delete;
tensor_conv() {}
void clear(
) {}
void setup(
const tensor& data, /* not used but required for interface */
const tensor& filters, /* not used but required for interface */
int stride_y,
int stride_x,
int padding_y,
int padding_x
)
{
(void)data; /* silence compiler */
DLIB_CASSERT(stride_y > 0 && stride_x > 0);
DLIB_CASSERT(0 <= padding_y && padding_y < filters.nr());
DLIB_CASSERT(0 <= padding_x && padding_x < filters.nc());
last_stride_y = stride_y;
last_stride_x = stride_x;
last_padding_y = padding_y;
last_padding_x = padding_x;
}
void operator() (
const bool add_to_output,
resizable_tensor& output,
const tensor& data,
const tensor& filters
);
void operator() (
const bool add_to_output,
tensor& output,
const tensor& data,
const tensor& filters
);
void get_gradient_for_data (
const bool add_to_output,
const tensor& gradient_input,
const tensor& filters,
tensor& data_gradient
);
void get_gradient_for_filters (
const bool add_to_output,
const tensor& gradient_input,
const tensor& data,
tensor& filters_gradient
);
private:
long last_stride_y = 0;
long last_stride_x = 0;
long last_padding_y = 0;
long last_padding_x = 0;
};
// -----------------------------------------------------------------------------------
void copy_tensor(
bool add_to,
tensor& dest,
size_t dest_k_offset,
const tensor& src,
size_t src_k_offset,
size_t count_k
);
// -----------------------------------------------------------------------------------
}
}
#ifdef NO_MAKEFILE
#include "cpu_dlib.cpp"
#endif
#endif // DLIB_DNN_CPU_H_