
243 lines
8.4 KiB
Raw Permalink Normal View History

2020-03-18 06:42:46 +00:00
// Copyright 2016, Tobias Hermann.
// Distributed under the MIT License.
// (See accompanying LICENSE file or at
#pragma once
#include "fdeep/common.hpp"
#include "fdeep/filter.hpp"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <vector>
namespace fdeep { namespace internal
struct im2col_filter_matrix
ColMajorMatrixXf mat_;
shape5 filter_shape_;
std::size_t filter_count_;
inline im2col_filter_matrix generate_im2col_filter_matrix(
const std::vector<filter>& filters)
fplus_c_mem_fn_t(filter, shape, shape5), filters),
"all filters must have the same shape");
const std::size_t fy = filters.front().shape().height_;
const std::size_t fx = filters.front().shape().width_;
const std::size_t fz = filters.front().shape().depth_;
ColMajorMatrixXf b(filters.size(), fy * fx * fz + 1);
EigenIndex b_y = 0;
EigenIndex b_x = 0;
for (std::size_t f = 0; f < filters.size(); ++f)
b_x = 0;
const filter& filter = filters[f];
for (std::size_t yf = 0; yf < fy; ++yf)
for (std::size_t xf = 0; xf < fx; ++xf)
for (std::size_t zf = 0; zf < fz; ++zf)
b(b_y, b_x++) = filter.get(yf, xf, zf);
b(b_y, b_x++) = filter.get_bias();
return {b, filters.front().shape(), filters.size()};
inline im2col_filter_matrix generate_im2col_single_filter_matrix(
const filter& filter)
return generate_im2col_filter_matrix(filter_vec(1, filter));
// GEMM convolution, faster but uses more RAM
inline tensor5 convolve_im2col(
std::size_t out_height,
std::size_t out_width,
std::size_t strides_y,
std::size_t strides_x,
std::size_t offset_y,
std::size_t offset_x,
const im2col_filter_matrix& filter_mat,
const tensor5& in_padded)
const auto fy = filter_mat.filter_shape_.height_;
const auto fx = filter_mat.filter_shape_.width_;
const auto fz = filter_mat.filter_shape_.depth_;
ColMajorMatrixXf a(fy * fx * fz + 1, out_height * out_width);
EigenIndex a_x = 0;
for (std::size_t y = 0; y < out_height; ++y)
for (std::size_t x = 0; x < out_width; ++x)
EigenIndex a_y = 0;
for (std::size_t yf = 0; yf < fy; ++yf)
for (std::size_t xf = 0; xf < fx; ++xf)
for (std::size_t zf = 0; zf < fz; ++zf)
a(a_y++, a_x) = in_padded.get(0, 0,
offset_y + strides_y * y + yf,
offset_x + strides_x * x + xf,
a(a_y, a_x) = static_cast<float_type>(1);
const std::size_t val_cnt =
static_cast<std::size_t>(filter_mat.mat_.rows() * a.cols());
assertion(val_cnt % (out_height * out_width) == 0,
"Can not calculate out_depth");
const std::size_t out_depth = val_cnt / (out_height * out_width);
assertion(val_cnt == out_depth * out_height * out_width,
"Invalid target size");
shared_float_vec res_vec = fplus::make_shared_ref<float_vec>();
res_vec->resize(static_cast<std::size_t>(out_depth * out_height * out_width));
Eigen::Map<ColMajorMatrixXf, Eigen::Unaligned> out_mat_map(
out_mat_map.noalias() = filter_mat.mat_ * a;
return tensor5(shape5(1, 1, out_height, out_width, out_depth), res_vec);
enum class padding { valid, same };
struct convolution_config
std::size_t pad_top_;
std::size_t pad_bottom_;
std::size_t pad_left_;
std::size_t pad_right_;
std::size_t offset_y_;
std::size_t offset_x_;
std::size_t out_height_;
std::size_t out_width_;
inline convolution_config preprocess_convolution(
const shape2& filter_shape,
const shape2& strides,
padding pad_type,
bool use_offset,
const shape5& input_shape)
const int filter_height = static_cast<int>(filter_shape.height_);
const int filter_width = static_cast<int>(filter_shape.width_);
const int in_height = static_cast<int>(input_shape.height_);
const int in_width = static_cast<int>(input_shape.width_);
const int strides_y = static_cast<int>(strides.height_);
const int strides_x = static_cast<int>(strides.width_);
int out_height = fplus::ceil(static_cast<float>(in_height - filter_height + 1) / static_cast<float>(strides_y) - 0.001);
int out_width = fplus::ceil(static_cast<float>(in_width - filter_width + 1) / static_cast<float>(strides_x) - 0.001);
int pad_along_height = 0;
int pad_along_width = 0;
if (pad_type == padding::same)
out_height = fplus::ceil(static_cast<float>(in_height) / static_cast<float>(strides_y) - 0.001);
out_width = fplus::ceil(static_cast<float>(in_width) / static_cast<float>(strides_x) - 0.001);
if (in_height % strides_y == 0)
pad_along_height = std::max(filter_height - strides_y, 0);
pad_along_height = std::max(filter_height - (in_height % strides_y), 0);
if (in_width % strides_x == 0)
pad_along_width = std::max(filter_width - strides_x, 0);
pad_along_width = std::max(filter_width - (in_width % strides_x), 0);
const int pad_top = pad_along_height / 2;
const int pad_bottom = pad_along_height - pad_top;
const int pad_left = pad_along_width / 2;
const int pad_right = pad_along_width - pad_left;
int offset_y = 0;
int offset_x = 0;
if (use_offset)
offset_y = ((in_height + pad_top + pad_bottom - filter_height) % strides_y) / 2;
if (use_offset)
offset_x = ((in_width + pad_left + pad_right - filter_width) % strides_x) / 2;
std::size_t out_height_size_t = fplus::integral_cast_throw<std::size_t>(out_height);
std::size_t out_width_size_t = fplus::integral_cast_throw<std::size_t>(out_width);
std::size_t offset_y_size_t = fplus::integral_cast_throw<std::size_t>(offset_y);
std::size_t offset_x_size_t = fplus::integral_cast_throw<std::size_t>(offset_x);
std::size_t pad_top_size_t = fplus::integral_cast_throw<std::size_t>(pad_top);
std::size_t pad_bottom_size_t = fplus::integral_cast_throw<std::size_t>(pad_bottom);
std::size_t pad_left_size_t = fplus::integral_cast_throw<std::size_t>(pad_left);
std::size_t pad_right_size_t = fplus::integral_cast_throw<std::size_t>(pad_right);
return {pad_top_size_t, pad_bottom_size_t,
pad_left_size_t, pad_right_size_t,
offset_y_size_t, offset_x_size_t,
out_height_size_t, out_width_size_t};
inline tensor5 convolve(
const shape2& strides,
const padding& pad_type,
bool use_offset,
const im2col_filter_matrix& filter_mat,
const tensor5& input)
assertion(filter_mat.filter_shape_.depth_ == input.shape().depth_,
"invalid filter depth");
const auto conv_cfg = preprocess_convolution(
strides, pad_type, use_offset, input.shape());
const std::size_t offset_y = conv_cfg.offset_y_;
const std::size_t offset_x = conv_cfg.offset_x_;
const std::size_t out_height = conv_cfg.out_height_;
const std::size_t out_width = conv_cfg.out_width_;
const auto in_padded = pad_tensor5(0,
conv_cfg.pad_top_, conv_cfg.pad_bottom_, conv_cfg.pad_left_, conv_cfg.pad_right_,
return convolve_im2col(
out_height, out_width,
strides.height_, strides.width_,
offset_y, offset_x,
filter_mat, in_padded);
} } // namespace fdeep, namespace internal