kendryte-standalone-sdk/lib/nncase/v1/include/nncase/runtime/runtime_op_utility.h

260 lines
6.9 KiB
C++

/* Copyright 2020 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "datatypes.h"
#include "result.h"
BEGIN_NS_NNCASE_RUNTIME
inline constexpr size_t get_bytes(datatype_t type)
{
return nncase::detail::datatype_bytes(type);
}
inline size_t compute_size(const runtime_shape_t &shape)
{
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
}
inline size_t get_bytes(datatype_t type, const runtime_shape_t &shape)
{
return compute_size(shape) * get_bytes(type);
}
inline size_t compute_size(const runtime_shape_t &shape, const runtime_shape_t &strides)
{
size_t max_stride = 0, max_shape = 0;
for (size_t i = 0; i < shape.size(); i++)
{
if ((shape[i] == 1 ? 0 : strides[i]) > max_stride)
{
max_stride = strides[i];
max_shape = shape[i];
}
}
size_t size = max_stride * max_shape;
return size ? size : 1;
}
inline size_t get_bytes(datatype_t type, const runtime_shape_t &shape, const runtime_shape_t &strides)
{
return compute_size(shape, strides) * get_bytes(type);
}
namespace detail
{
template <class shape_type, class strides_type>
inline void adapt_strides(const shape_type &shape, strides_type &strides,
std::nullptr_t, typename strides_type::size_type i) noexcept
{
if (shape[i] == 1)
{
strides[i] = 0;
}
}
template <class shape_type, class strides_type, class bs_ptr>
inline std::size_t compute_strides(const shape_type &shape,
strides_type &strides, bs_ptr bs)
{
using strides_value_type = typename std::decay_t<strides_type>::value_type;
strides_value_type data_size = 1;
for (std::size_t i = shape.size(); i != 0; --i)
{
strides[i - 1] = data_size;
data_size = strides[i - 1] * static_cast<strides_value_type>(shape[i - 1]);
adapt_strides(shape, strides, bs, i - 1);
}
return static_cast<std::size_t>(data_size);
}
}
template <class shape_type, class strides_type>
inline std::size_t compute_strides(const shape_type &shape, strides_type &strides)
{
return detail::compute_strides(shape, strides, nullptr);
}
inline runtime_shape_t get_default_strides(const runtime_shape_t &shape)
{
runtime_shape_t strides(shape.size());
compute_strides(shape, strides);
return strides;
}
template <class TShape>
TShape convert_shape_type(const TShape &shape, datatype_t src, datatype_t dest)
{
const auto src_size = get_bytes(src);
const auto dest_size = get_bytes(dest);
TShape new_shape = shape;
if (!new_shape.empty())
{
auto &v = new_shape.back();
v = new_shape.back() * src_size / dest_size;
}
return new_shape;
}
template <class TShape>
result<TShape> convert_strides_type(const TShape &strides, datatype_t src, datatype_t dest)
{
const auto src_size = get_bytes(src);
const auto dest_size = get_bytes(dest);
if (src_size == dest_size)
return ok(strides);
TShape new_strides = strides;
// 1. Except last dim
for (size_t i = 0; i < new_strides.size() - 1; i++)
{
auto &v = new_strides[i];
if (v == 0)
v = 1;
v = v * src_size / dest_size;
}
// 2. Last dim
if (!new_strides.empty())
{
// 2.1. If last dim is not 0 or 1, unsupported
auto last_dim = new_strides.back();
if (last_dim != 0 || last_dim != 1)
return err(std::errc::not_supported);
}
return ok(new_strides);
}
template <int32_t Bits, class T>
uint8_t count_leading_zeros(T value)
{
uint8_t num_zeroes = 0;
for (int32_t i = Bits - 1; i >= 0; i--)
{
if ((value & (1ULL << i)) == 0)
++num_zeroes;
else
break;
}
return num_zeroes;
}
template <class T = uint64_t>
inline T bit_mask(uint8_t shift)
{
return (T(1) << shift) - 1;
}
template <class T, bool Banker = false>
T carry_shift(T value, int32_t shift)
{
if (shift > 0)
{
if (Banker)
{
T result;
// Sign | Int (T - shift - 1 bits) | Frac (shift bits)
// S IIII FFF
auto integral = value >> shift;
auto fractional = value & bit_mask(shift);
auto sign = value < 0 ? -1 : 1;
auto half = size_t(1) << (shift - 1);
// frac < 0.5
if (fractional < half)
{
return integral;
}
// frac > 0.5
else if (fractional > half)
{
return integral + sign;
}
// frac == 0.5
else
{
// odd
if (integral & 1)
return integral + sign;
// even
else
return integral;
}
return result;
}
else
{
value += T(1) << (shift - 1);
value >>= shift;
}
}
else if (shift < 0)
{
value = value << (-shift);
}
return value;
}
template <bool Banker = false>
inline int32_t mul_and_carry_shift(int32_t value, int32_t mul, int32_t shift)
{
return (int32_t)carry_shift<int64_t, Banker>((int64_t)value * mul, shift);
}
template <class T>
inline T clamp(T value, T min, T max)
{
return std::min(max, std::max(value, min));
}
template <uint8_t Bits>
inline int32_t clamp(int32_t value)
{
auto min = std::numeric_limits<int32_t>::lowest() >> (32 - Bits);
auto max = std::numeric_limits<int32_t>::max() >> (32 - Bits);
return clamp(value, min, max);
}
template <class TShape>
inline bool is_contiguous(const TShape &shape, const TShape &strides)
{
return get_default_strides(shape) == strides;
}
inline int get_last_not_contiguous_index(const runtime_shape_t &strides, const runtime_shape_t &default_strides)
{
for (int i = strides.size() - 1; i >= 0; --i)
{
if (strides[i] != default_strides[i])
{
return i + 1;
}
}
return -1;
}
template<size_t A, size_t B>
constexpr auto is_not_equal = std::integral_constant<bool, std::not_equal_to<size_t> {}(A, B)> {};
struct DefaultCallable {};
END_NS_NNCASE_RUNTIME