241 lines
6.8 KiB
C++
241 lines
6.8 KiB
C++
/* Copyright 2020 Canaan Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
#pragma once
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstddef>
|
|
#include <nncase/runtime/datatypes.h>
|
|
#include <numeric>
|
|
|
|
#ifdef __GNUC__
|
|
#define CXX_RESTRICT __restrict__
|
|
#elif _MSC_VER
|
|
#define CXX_RESTRICT __restrict
|
|
#else
|
|
#define CXX_RESTRICT
|
|
#endif
|
|
|
|
BEGIN_NS_NNCASE_KERNELS
|
|
|
|
template <class offset_type, class S, class It>
|
|
inline offset_type element_offset(const S &strides, It first, It last) noexcept
|
|
{
|
|
using difference_type = typename std::iterator_traits<It>::difference_type;
|
|
auto size = static_cast<difference_type>((std::min)(static_cast<typename S::size_type>(std::distance(first, last)), strides.size()));
|
|
return std::inner_product(last - size, last, strides.cend() - size, offset_type(0));
|
|
}
|
|
|
|
template <class TShape>
|
|
size_t offset(const TShape &strides, const TShape &index)
|
|
{
|
|
assert(strides.size() == index.size());
|
|
return element_offset<size_t>(strides, index.begin(), index.end());
|
|
}
|
|
|
|
template <class TShape>
|
|
TShape reshape_linear_index(const TShape &new_shape, size_t index)
|
|
{
|
|
TShape new_index(new_shape.size());
|
|
size_t i = new_shape.size() - 1;
|
|
for (auto it = new_shape.rbegin(); it != new_shape.rend(); ++it)
|
|
{
|
|
new_index[i--] = index % *it;
|
|
index /= *it;
|
|
}
|
|
|
|
return new_index;
|
|
}
|
|
|
|
template <class TShape>
|
|
size_t linear_index(const TShape &shape, const TShape &index)
|
|
{
|
|
assert(index.size() == shape.size());
|
|
size_t new_index = index[0];
|
|
for (size_t i = 1; i < shape.size(); i++)
|
|
new_index = new_index * shape[i] + index[i];
|
|
return new_index;
|
|
}
|
|
|
|
namespace detail
|
|
{
|
|
inline size_t get_windowed_output_size(size_t size, int32_t filter, int32_t stride, int32_t dilation, const padding &padding)
|
|
{
|
|
auto effective_filter_size = (filter - 1) * dilation + 1;
|
|
return (size_t)((int32_t)size + padding.before + padding.after - effective_filter_size + stride) / stride;
|
|
}
|
|
|
|
inline runtime_shape_t get_binary_output_shape(const runtime_shape_t &input_a_shape, const runtime_shape_t &input_b_shape)
|
|
{
|
|
runtime_shape_t out_shape;
|
|
|
|
const auto dest_dims = (int32_t)std::max(input_a_shape.size(), input_b_shape.size());
|
|
const auto in_a_ext = dest_dims - (int32_t)input_a_shape.size();
|
|
const auto in_b_ext = dest_dims - (int32_t)input_b_shape.size();
|
|
|
|
for (int32_t i = 0; i < dest_dims; i++)
|
|
{
|
|
const auto in_a_dim = i - (int32_t)in_a_ext;
|
|
const auto in_b_dim = i - (int32_t)in_b_ext;
|
|
|
|
const auto in_a = in_a_dim < 0 ? 1 : input_a_shape[in_a_dim];
|
|
const auto in_b = in_b_dim < 0 ? 1 : input_b_shape[in_b_dim];
|
|
if (in_a == in_b)
|
|
out_shape.push_back(in_a);
|
|
else if (in_a == 1)
|
|
out_shape.push_back(in_b);
|
|
else if (in_b == 1)
|
|
out_shape.push_back(in_a);
|
|
else
|
|
assert(!"inputs are not compatible to broadcast");
|
|
}
|
|
|
|
return out_shape;
|
|
}
|
|
|
|
template <class TShape>
|
|
size_t compute_size(const TShape &shape)
|
|
{
|
|
return std::accumulate(shape.begin(), shape.end(), size_t(1), std::multiplies<void>());
|
|
}
|
|
|
|
template <class T>
|
|
inline T clamp(T value, T min, T max)
|
|
{
|
|
return std::max(std::min(value, max), min);
|
|
}
|
|
|
|
template <class T>
|
|
inline T apply_activation(T value, value_range<T> activation)
|
|
{
|
|
return clamp(value, activation.min, activation.max);
|
|
}
|
|
|
|
template <class TShape>
|
|
TShape get_reduced_offset(const TShape &in_offset, const TShape &reduced_shape)
|
|
{
|
|
TShape off(reduced_shape.size());
|
|
const auto dims_ext = in_offset.size() - reduced_shape.size();
|
|
for (size_t i = 0; i < reduced_shape.size(); i++)
|
|
{
|
|
if (in_offset[i + dims_ext] >= reduced_shape[i])
|
|
off[i] = 0;
|
|
else
|
|
off[i] = in_offset[i + dims_ext];
|
|
}
|
|
|
|
return off;
|
|
}
|
|
|
|
template <class TShape>
|
|
TShape get_reduced_shape(const TShape &in_shape, const TShape &axis, bool keep_dims)
|
|
{
|
|
TShape shape;
|
|
shape.reserve(in_shape.size() - (keep_dims ? 0 : axis.size()));
|
|
for (size_t i = 0; i < in_shape.size(); i++)
|
|
{
|
|
if (std::find(axis.begin(), axis.end(), i) == axis.end())
|
|
{
|
|
shape.push_back(in_shape[i]);
|
|
}
|
|
else
|
|
{
|
|
if (keep_dims)
|
|
shape.push_back(1);
|
|
}
|
|
}
|
|
|
|
if (shape.empty())
|
|
shape.push_back(1);
|
|
return shape;
|
|
}
|
|
|
|
template <class TShape>
|
|
size_t get_reduce_block_size(const TShape &in_shape, const TShape &axis)
|
|
{
|
|
size_t size = 1;
|
|
for (size_t i = 0; i < in_shape.size(); i++)
|
|
{
|
|
if (std::find(axis.begin(), axis.end(), i) != axis.end())
|
|
{
|
|
size *= in_shape[i];
|
|
}
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
template <class TShape>
|
|
TShape get_reduced_offset(const TShape &in_offset, const TShape &axis, bool keep_dims)
|
|
{
|
|
TShape off;
|
|
off.reserve(in_offset.size() - (keep_dims ? 0 : axis.size()));
|
|
for (size_t i = 0; i < in_offset.size(); i++)
|
|
{
|
|
if (std::find(axis.begin(), axis.end(), i) == axis.end())
|
|
{
|
|
off.push_back(in_offset[i]);
|
|
}
|
|
else
|
|
{
|
|
if (keep_dims)
|
|
off.push_back(0);
|
|
}
|
|
}
|
|
|
|
if (off.empty())
|
|
off.push_back(0);
|
|
return off;
|
|
}
|
|
|
|
template <class T, class TRange>
|
|
struct default_ptr_getter
|
|
{
|
|
T *operator()(const TRange &range) const noexcept { return range; }
|
|
};
|
|
|
|
template <int32_t Bits>
|
|
int32_t to_signed(uint32_t value)
|
|
{
|
|
auto mask = uint32_t(1) << (Bits - 1);
|
|
if (Bits != 32 && (value & mask) != 0)
|
|
{
|
|
auto sign = 0xFFFFFFFF << Bits;
|
|
return (int)(value | sign);
|
|
}
|
|
|
|
return (int32_t)value;
|
|
}
|
|
|
|
template <int32_t Bits>
|
|
int64_t to_signed(uint64_t value)
|
|
{
|
|
auto mask = uint64_t(1) << (Bits - 1);
|
|
if ((value & mask) != 0)
|
|
{
|
|
auto sign = 0xFFFFFFFFFFFFFFFF << Bits;
|
|
return (int64_t)(value | sign);
|
|
}
|
|
|
|
return (int64_t)value;
|
|
}
|
|
|
|
template <class T>
|
|
constexpr T quantize(float value, const quant_param_t ¶m) noexcept
|
|
{
|
|
return (T)clamp((int32_t)lrintf(value / param.scale + param.zero_point), (int32_t)std::numeric_limits<T>::lowest(), (int32_t)std::numeric_limits<T>::max());
|
|
}
|
|
}
|
|
END_NS_NNCASE_KERNELS
|