kendryte-standalone-sdk/lib/nncase/v1/include/nncase/kernels/kernel_utils.h

241 lines
6.8 KiB
C++

/* Copyright 2020 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <nncase/runtime/datatypes.h>
#include <numeric>
#ifdef __GNUC__
#define CXX_RESTRICT __restrict__
#elif _MSC_VER
#define CXX_RESTRICT __restrict
#else
#define CXX_RESTRICT
#endif
BEGIN_NS_NNCASE_KERNELS
template <class offset_type, class S, class It>
inline offset_type element_offset(const S &strides, It first, It last) noexcept
{
using difference_type = typename std::iterator_traits<It>::difference_type;
auto size = static_cast<difference_type>((std::min)(static_cast<typename S::size_type>(std::distance(first, last)), strides.size()));
return std::inner_product(last - size, last, strides.cend() - size, offset_type(0));
}
template <class TShape>
size_t offset(const TShape &strides, const TShape &index)
{
assert(strides.size() == index.size());
return element_offset<size_t>(strides, index.begin(), index.end());
}
template <class TShape>
TShape reshape_linear_index(const TShape &new_shape, size_t index)
{
TShape new_index(new_shape.size());
size_t i = new_shape.size() - 1;
for (auto it = new_shape.rbegin(); it != new_shape.rend(); ++it)
{
new_index[i--] = index % *it;
index /= *it;
}
return new_index;
}
template <class TShape>
size_t linear_index(const TShape &shape, const TShape &index)
{
assert(index.size() == shape.size());
size_t new_index = index[0];
for (size_t i = 1; i < shape.size(); i++)
new_index = new_index * shape[i] + index[i];
return new_index;
}
namespace detail
{
inline size_t get_windowed_output_size(size_t size, int32_t filter, int32_t stride, int32_t dilation, const padding &padding)
{
auto effective_filter_size = (filter - 1) * dilation + 1;
return (size_t)((int32_t)size + padding.before + padding.after - effective_filter_size + stride) / stride;
}
inline runtime_shape_t get_binary_output_shape(const runtime_shape_t &input_a_shape, const runtime_shape_t &input_b_shape)
{
runtime_shape_t out_shape;
const auto dest_dims = (int32_t)std::max(input_a_shape.size(), input_b_shape.size());
const auto in_a_ext = dest_dims - (int32_t)input_a_shape.size();
const auto in_b_ext = dest_dims - (int32_t)input_b_shape.size();
for (int32_t i = 0; i < dest_dims; i++)
{
const auto in_a_dim = i - (int32_t)in_a_ext;
const auto in_b_dim = i - (int32_t)in_b_ext;
const auto in_a = in_a_dim < 0 ? 1 : input_a_shape[in_a_dim];
const auto in_b = in_b_dim < 0 ? 1 : input_b_shape[in_b_dim];
if (in_a == in_b)
out_shape.push_back(in_a);
else if (in_a == 1)
out_shape.push_back(in_b);
else if (in_b == 1)
out_shape.push_back(in_a);
else
assert(!"inputs are not compatible to broadcast");
}
return out_shape;
}
template <class TShape>
size_t compute_size(const TShape &shape)
{
return std::accumulate(shape.begin(), shape.end(), size_t(1), std::multiplies<void>());
}
template <class T>
inline T clamp(T value, T min, T max)
{
return std::max(std::min(value, max), min);
}
template <class T>
inline T apply_activation(T value, value_range<T> activation)
{
return clamp(value, activation.min, activation.max);
}
template <class TShape>
TShape get_reduced_offset(const TShape &in_offset, const TShape &reduced_shape)
{
TShape off(reduced_shape.size());
const auto dims_ext = in_offset.size() - reduced_shape.size();
for (size_t i = 0; i < reduced_shape.size(); i++)
{
if (in_offset[i + dims_ext] >= reduced_shape[i])
off[i] = 0;
else
off[i] = in_offset[i + dims_ext];
}
return off;
}
template <class TShape>
TShape get_reduced_shape(const TShape &in_shape, const TShape &axis, bool keep_dims)
{
TShape shape;
shape.reserve(in_shape.size() - (keep_dims ? 0 : axis.size()));
for (size_t i = 0; i < in_shape.size(); i++)
{
if (std::find(axis.begin(), axis.end(), i) == axis.end())
{
shape.push_back(in_shape[i]);
}
else
{
if (keep_dims)
shape.push_back(1);
}
}
if (shape.empty())
shape.push_back(1);
return shape;
}
template <class TShape>
size_t get_reduce_block_size(const TShape &in_shape, const TShape &axis)
{
size_t size = 1;
for (size_t i = 0; i < in_shape.size(); i++)
{
if (std::find(axis.begin(), axis.end(), i) != axis.end())
{
size *= in_shape[i];
}
}
return size;
}
template <class TShape>
TShape get_reduced_offset(const TShape &in_offset, const TShape &axis, bool keep_dims)
{
TShape off;
off.reserve(in_offset.size() - (keep_dims ? 0 : axis.size()));
for (size_t i = 0; i < in_offset.size(); i++)
{
if (std::find(axis.begin(), axis.end(), i) == axis.end())
{
off.push_back(in_offset[i]);
}
else
{
if (keep_dims)
off.push_back(0);
}
}
if (off.empty())
off.push_back(0);
return off;
}
template <class T, class TRange>
struct default_ptr_getter
{
T *operator()(const TRange &range) const noexcept { return range; }
};
template <int32_t Bits>
int32_t to_signed(uint32_t value)
{
auto mask = uint32_t(1) << (Bits - 1);
if (Bits != 32 && (value & mask) != 0)
{
auto sign = 0xFFFFFFFF << Bits;
return (int)(value | sign);
}
return (int32_t)value;
}
template <int32_t Bits>
int64_t to_signed(uint64_t value)
{
auto mask = uint64_t(1) << (Bits - 1);
if ((value & mask) != 0)
{
auto sign = 0xFFFFFFFFFFFFFFFF << Bits;
return (int64_t)(value | sign);
}
return (int64_t)value;
}
template <class T>
constexpr T quantize(float value, const quant_param_t &param) noexcept
{
return (T)clamp((int32_t)lrintf(value / param.scale + param.zero_point), (int32_t)std::numeric_limits<T>::lowest(), (int32_t)std::numeric_limits<T>::max());
}
}
END_NS_NNCASE_KERNELS