Update nncaseruntime to v1.0.0beta2

develop
sunnycase 2021-09-17 10:59:50 +08:00
parent 9b61893150
commit cd966ff9bc
33 changed files with 1058 additions and 134 deletions

View File

@ -34,6 +34,15 @@ NNCASE_API result<void> copy(datatype_t type, const gsl::byte *src, gsl::byte *d
const runtime_shape_t &shape, const runtime_shape_t &src_strides, const runtime_shape_t &dest_strides,
int dims_offset, copy_impl_select impl_select, kernel_context &context) noexcept;
NNCASE_API result<void> conv2d(const float *input, const float *weights, const float *bias, float *output,
const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &w_shape, const runtime_shape_t &w_strides,
const runtime_shape_t &bias_strides, const runtime_shape_t &out_strides, const padding &padding_h, const padding &padding_w,
int32_t groups, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w, value_range<float> fused_activation, kernel_context &context) noexcept;
NNCASE_API result<void> dequantize(datatype_t in_type, datatype_t out_type, const gsl::byte *input, gsl::byte *output,
const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, float scale, float bias,
kernel_context &context) noexcept;
NNCASE_API result<void> gather(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape, const runtime_shape_t &out_shape,
const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, const int32_t *indices, const runtime_shape_t &indices_shape, size_t axis,
kernel_context &context = default_kernel_context()) noexcept;
@ -45,6 +54,10 @@ NNCASE_API result<void> gather_nd(datatype_t type, const gsl::byte *input, gsl::
NNCASE_API result<void> onehot(datatype_t type, const int32_t *indices, gsl::byte *output, const runtime_shape_t &indices_shape, const runtime_shape_t &out_shape,
const runtime_shape_t &out_strides, gsl::byte *depth, gsl::byte *off_value, gsl::byte *on_value, size_t axis, onehot_mode_t mode, kernel_context &context) noexcept;
NNCASE_API result<void> quantize(datatype_t in_type, datatype_t out_type, const gsl::byte *input, gsl::byte *output,
const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, float scale, float bias,
kernel_context &context) noexcept;
NNCASE_API result<void> slice(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape,
const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, const runtime_shape_t &begins, const runtime_shape_t &ends, const runtime_axis_t &strides,
kernel_context &context = default_kernel_context()) noexcept;

View File

@ -37,8 +37,8 @@ BEGIN_NS_NNCASE_KERNELS_CPU_REF
namespace detail
{
template <class Callable>
result<void> apply_impl(Callable &&callable, runtime_shape_t index_prefix, runtime_shape_t::const_iterator index_begin, runtime_shape_t::const_iterator index_end) noexcept
template <class TShape, class Callable, class TIt>
result<void> apply_impl(Callable &&callable, TShape index_prefix, TIt index_begin, TIt index_end) noexcept
{
const auto head = *index_begin++;
index_prefix.push_back(0);
@ -63,10 +63,10 @@ result<void> apply_impl(Callable &&callable, runtime_shape_t index_prefix, runti
}
}
template <class Callable>
result<void> apply(const runtime_shape_t &shape, Callable &&callable) noexcept
template <class TShape, class Callable>
result<void> apply(const TShape &shape, Callable &&callable) noexcept
{
return detail::apply_impl(std::forward<Callable>(callable), runtime_shape_t(), shape.cbegin(), shape.cend());
return detail::apply_impl(std::forward<Callable>(callable), TShape(), shape.cbegin(), shape.cend());
}
END_NS_NNCASE_KERNELS_CPU_REF

View File

@ -152,9 +152,7 @@ void kpu_conv2d(const uint8_t *input, int64_t *workspace, uint8_t *output, const
for (size_t i = 0; i < channel_size; i++)
{
auto value = (*src_it++ * bn.mul >> bn.shift) + bn.add;
auto &seg = *std::find_if(activation.rbegin(), activation.rend(), [value](const runtime::k210::kpu_activation_segment &seg) {
return value > seg.start_x;
});
auto &seg = *std::find_if(activation.rbegin(), activation.rend(), [value](const runtime::k210::kpu_activation_segment &seg) { return value > seg.start_x; });
auto act_value = runtime::carry_shift<int64_t, true>((value - seg.start_x) * seg.mul, seg.shift) + seg.add;
*out_it++ = (uint8_t)kernels::detail::clamp(act_value, int64_t(0), int64_t(255));
}

View File

@ -53,7 +53,7 @@ template <class TShape>
size_t offset(const TShape &strides, const TShape &index)
{
assert(strides.size() == index.size());
return element_offset<size_t>(strides, index.begin(), index.end());
return kernels::element_offset<size_t>(strides, index.begin(), index.end());
}
template <class TShape>

View File

@ -22,11 +22,6 @@
namespace nncase
{
struct half
{
uint16_t value;
};
struct from_raw_t
{
explicit from_raw_t() = default;
@ -165,7 +160,17 @@ public:
static constexpr bfloat16 nan() noexcept
{
return from_raw(NAN_VALUE);
return from_raw(0x7fc0);
}
static constexpr bfloat16 quiet_NaN() noexcept
{
return from_raw(0x7fc0);
}
static constexpr bfloat16 signaling_NaN() noexcept
{
return from_raw(0x7f81);
}
static constexpr bfloat16 infinity() noexcept
@ -291,12 +296,12 @@ struct numeric_limits<nncase::bfloat16>
NNCASE_UNUSED static constexpr nncase::bfloat16 quiet_NaN() noexcept
{
return nncase::bfloat16::nan();
return nncase::bfloat16::quiet_NaN();
}
NNCASE_UNUSED static constexpr nncase::bfloat16 signaling_NaN() noexcept
{
return nncase::bfloat16::nan();
return nncase::bfloat16::signaling_NaN();
}
static constexpr int digits = 8;

View File

@ -25,7 +25,7 @@
#define NNCASE_API
#endif
#else
#define NNCASE_API
#define NNCASE_API __attribute__((visibility("default")))
#endif
#if defined(_MSC_VER)
@ -71,16 +71,17 @@ using invoke_result_t = std::result_of_t<Callable(Args...)>;
} \
}
#define BEGIN_NS_NNCASE_RT_STACKVM \
namespace nncase \
{ \
namespace runtime \
{ \
namespace stackvm \
#define BEGIN_NS_NNCASE_RT_MODULE(MODULE) \
namespace nncase \
{ \
namespace runtime \
{ \
namespace MODULE \
{
#define END_NS_NNCASE_RT_STACKVM \
} \
} \
#define END_NS_NNCASE_RT_MODULE \
} \
} \
}
#define BEGIN_NS_NNCASE_KERNELS \

View File

@ -15,6 +15,7 @@
#pragma once
#include "bfloat16.h"
#include "compiler_defs.h"
#include "half.h"
#include "small_vector.hpp"
#include <array>
#include <cmath>
@ -114,7 +115,7 @@ struct value_range
static constexpr value_range<T> full() noexcept
{
if (std::is_floating_point<T>::value || std::is_same<T, bfloat16>::value)
if (std::is_floating_point<T>::value || std::is_same<T, bfloat16>::value || std::is_same<T, half>::value)
return { -std::numeric_limits<T>::infinity(), std::numeric_limits<T>::infinity() };
else
return { std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max() };
@ -324,6 +325,8 @@ NNCASE_INLINE_VAR constexpr memory_location_t mem_input = 0;
NNCASE_INLINE_VAR constexpr memory_location_t mem_output = 1;
NNCASE_INLINE_VAR constexpr memory_location_t mem_rdata = 2;
NNCASE_INLINE_VAR constexpr memory_location_t mem_data = 3;
NNCASE_INLINE_VAR constexpr memory_location_t mem_shared_data = 4;
NNCASE_INLINE_VAR constexpr memory_location_t mem_private_base = 64;
using runtime_shape_t = itlib::small_vector<size_t, 4>;
using runtime_axis_t = itlib::small_vector<int32_t, 4>;
@ -378,6 +381,12 @@ struct scalar
as<bfloat16>() = value;
}
scalar(half value) noexcept
{
type = dt_float16;
as<half>() = value;
}
scalar(float value) noexcept
{
type = dt_float32;
@ -395,7 +404,7 @@ struct memory_range
{
memory_location_t memory_location;
datatype_t datatype;
uint16_t reserved0;
uint16_t shared_module;
uint32_t start;
uint32_t size;
};
@ -456,3 +465,16 @@ inline bool operator!=(const scalar &lhs, const scalar &rhs) noexcept
return lhs.type != rhs.type || memcmp(&lhs.storage, &rhs.storage, valid_bytes);
}
}
template <>
struct std::hash<nncase::module_type_t>
{
auto operator()(const nncase::module_type_t &key) const noexcept
{
size_t result = 0;
const size_t prime = 31;
for (auto c : key)
result = c + (result * prime);
return result;
}
};

View File

@ -172,7 +172,7 @@ struct type_tag
{
};
template <int &... ExplicitArgumentBarrier, typename T>
template <int &...ExplicitArgumentBarrier, typename T>
std::string get_type_name(type_tag<T>)
{
namespace pf = pretty_function;
@ -772,7 +772,8 @@ inline bool pretty_print(std::ostream &stream,
const std::variant<Ts...> &value)
{
stream << "{";
std::visit([&stream](auto &&arg) { pretty_print(stream, arg); }, value);
std::visit([&stream](auto &&arg) { pretty_print(stream, arg); },
value);
stream << "}";
return true;
@ -849,7 +850,7 @@ public:
template <typename... T>
auto print(std::initializer_list<expr_t> exprs,
std::initializer_list<std::string> types,
T &&... values) -> last_t<T...>
T &&...values) -> last_t<T...>
{
if (exprs.size() != sizeof...(values))
{
@ -861,6 +862,15 @@ public:
return print_impl(exprs.begin(), types.begin(), std::forward<T>(values)...);
}
template <typename T>
void print_err(T &&message)
{
std::cerr
<< m_location << ansi(ANSI_WARN)
<< message
<< ansi(ANSI_RESET) << std::endl;
}
template <typename T>
T &&checked_print(std::initializer_list<expr_t> exprs,
std::initializer_list<std::string> types,
@ -912,7 +922,7 @@ private:
auto print_impl(const expr_t *exprs,
const std::string *types,
T &&value,
U &&... rest) -> last_t<T, U...>
U &&...rest) -> last_t<T, U...>
{
print_impl(exprs, types, std::forward<T>(value));
return print_impl(exprs + 1, types + 1, std::forward<U>(rest)...);
@ -954,7 +964,7 @@ T &&identity(T &&t)
}
template <typename T, typename... U>
auto identity(T &&, U &&... u) -> last_t<U...>
auto identity(T &&, U &&...u) -> last_t<U...>
{
return identity(std::forward<U>(u)...);
}
@ -1026,6 +1036,48 @@ auto identity(T &&, U &&... u) -> last_t<U...>
if (!CHECK(x)) \
return nncase::err(e)
#define checked_try(x) \
{ \
auto v = (x); \
if (!v.is_ok()) \
{ \
dbg::DebugOutput(__FILE__, __LINE__, __func__) \
.print_err(v.unwrap_err().message()); \
return nncase::err(std::move(v.unwrap_err())); \
} \
}
#define checked_try_var(name, x) \
typename decltype((x))::traits::ok_type name; \
{ \
auto v = (x); \
if (v.is_ok()) \
{ \
name = std::move(v.unwrap()); \
} \
else \
{ \
dbg::DebugOutput(__FILE__, __LINE__, __func__) \
.print_err(v.unwrap_err().message()); \
return nncase::err(std::move(v.unwrap_err())); \
} \
}
#define checked_try_set(name, x) \
{ \
auto v = (x); \
if (v.is_ok()) \
{ \
name = std::move(v.unwrap()); \
} \
else \
{ \
dbg::DebugOutput(__FILE__, __LINE__, __func__) \
.print_err(v.unwrap_err().message()); \
return nncase::err(std::move(v.unwrap_err())); \
} \
}
#define dbg(...) \
dbg::DebugOutput(__FILE__, __LINE__, __func__) \
.print({ DBG_MAP(DBG_STRINGIFY, __VA_ARGS__) }, \

View File

@ -0,0 +1,369 @@
/* Copyright 2019-2021 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cmath>
#include <cstdint>
#include <float.h>
#include <functional>
#include <limits>
#include <nncase/runtime/compiler_defs.h>
namespace nncase
{
struct fp16_from_raw_t
{
explicit fp16_from_raw_t() = default;
};
NNCASE_INLINE_VAR constexpr fp16_from_raw_t fp16_from_raw {};
struct half
{
private:
union fp32
{
uint32_t u32;
float f32;
uint16_t u16() const noexcept
{
constexpr size_t index = NNCASE_LITTLE_ENDIAN ? 1 : 0;
return reinterpret_cast<const uint16_t *>(&u32)[index];
}
uint16_t &u16() noexcept
{
constexpr size_t index = NNCASE_LITTLE_ENDIAN ? 1 : 0;
return reinterpret_cast<uint16_t *>(&u32)[index];
}
};
static constexpr uint16_t ZERO_VALUE = 0;
// this is quiet NaN, sNaN only used for send signal
static constexpr uint16_t NAN_VALUE = 0x7e00;
public:
half() noexcept = default;
explicit half(float v) noexcept
: value_(round_to_half(v).value_) { }
template <class T, class = std::enable_if_t<std::is_integral<T>::value || std::is_floating_point<T>::value>>
explicit half(const T &val) noexcept
: half(static_cast<float>(val)) { }
constexpr half(fp16_from_raw_t, uint16_t value) noexcept
: value_(value) { }
operator float() const noexcept
{
const fp32 magic = { 113 << 23 };
const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
fp32 o;
o.u32 = (value_ & 0x7fff) << 13; // exponent/mantissa bits
unsigned int exp = shifted_exp & o.u32; // just the exponent
o.u32 += (127 - 15) << 23; // exponent adjust
// handle exponent special cases
if (exp == shifted_exp)
{ // Inf/NaN?
o.u32 += (128 - 16) << 23; // extra exp adjust
}
else if (exp == 0)
{ // Zero/Denormal?
o.u32 += 1 << 23; // extra exp adjust
o.f32 -= magic.f32; // renormalize
}
o.u32 |= (value_ & 0x8000) << 16; // sign bit
return o.f32;
}
const uint16_t &raw() const noexcept { return value_; }
uint16_t &raw() noexcept { return value_; }
static constexpr half from_raw(uint16_t v) noexcept
{
return half(nncase::fp16_from_raw, v);
}
static half round_to_half(float v)
{
fp32 f;
f.f32 = v;
const fp32 f32infy = { 255 << 23 };
const fp32 f16max = { (127 + 16) << 23 };
const fp32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
unsigned int sign_mask = 0x80000000u;
unsigned int sign = f.u32 & sign_mask;
f.u32 ^= sign;
// NOTE all the integer compares in this function can be safely
// compiled into signed compares since all operands are below
// 0x80000000. Important if you want fast straight SSE2 code
// (since there's no unsigned PCMPGTD).
half o;
if (f.u32 >= f16max.u32) // result is Inf or NaN (all exponent bits set)
{
o.value_ = (f.u32 > f32infy.u32) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
}
else
{
if (f.u32 < (113 << 23))
{ // resulting FP16 is subnormal or zero
// use a magic value to align our 10 mantissa bits at the bottom of
// the float. as long as FP addition is round-to-nearest-even this
// just works.
f.f32 += denorm_magic.f32;
// and one integer subtract of the bias later, we have our final float!
o.value_ = static_cast<uint16_t>(f.u32 - denorm_magic.u32);
}
else
{
unsigned int mant_odd = (f.u32 >> 13) & 1; // resulting mantissa is odd
// update exponent, rounding bias part 1
// Equivalent to `f.u32 += ((unsigned int)(15 - 127) << 23) + 0xfff`, but
// without arithmetic overflow.
f.u32 += 0xc8000fffU;
// rounding bias part 2
f.u32 += mant_odd;
// take the bits!
o.value_ = static_cast<uint16_t>(f.u32 >> 13);
}
}
o.value_ |= static_cast<uint16_t>(sign >> 16);
return o;
}
static constexpr half epsilon() noexcept
{
return from_raw(0x0800);
}
static constexpr half highest() noexcept
{
return from_raw(0x7bff);
}
static constexpr half min() noexcept
{
return from_raw(0x0400);
}
static constexpr half lowest() noexcept
{
return from_raw(0xfbff);
}
static constexpr half quiet_NaN() noexcept
{
return from_raw(0x7e00);
}
static constexpr half signaling_NaN() noexcept
{
return from_raw(0x7d00);
}
static constexpr half infinity() noexcept
{
return from_raw(0x7c00);
}
constexpr bool zero() const noexcept { return (value_ & 0x7FFF) == ZERO_VALUE; }
private:
uint16_t value_;
};
#define DEFINE_FP16_BINARY_FP16RET(x) \
inline half operator x(half a, half b) noexcept \
{ \
return half::round_to_half(float(a) x float(b)); \
}
#define DEFINE_FP16_BINARY_BOOLRET(x) \
inline bool operator x(half a, half b) noexcept \
{ \
return float(a) x float(b); \
}
DEFINE_FP16_BINARY_FP16RET(+)
DEFINE_FP16_BINARY_FP16RET(-)
DEFINE_FP16_BINARY_FP16RET(*)
DEFINE_FP16_BINARY_FP16RET(/)
DEFINE_FP16_BINARY_BOOLRET(<)
DEFINE_FP16_BINARY_BOOLRET(<=)
DEFINE_FP16_BINARY_BOOLRET(>=)
DEFINE_FP16_BINARY_BOOLRET(>)
#define DEFINE_FP16_BINARY_SELF_MOD(x, op) \
inline half &operator x(half &a, half b) noexcept \
{ \
a = a op b; \
return a; \
}
DEFINE_FP16_BINARY_SELF_MOD(+=, +)
DEFINE_FP16_BINARY_SELF_MOD(-=, -)
DEFINE_FP16_BINARY_SELF_MOD(*=, *)
DEFINE_FP16_BINARY_SELF_MOD(/=, /)
inline half operator-(half a) noexcept
{
return half::round_to_half(-float(a));
}
inline bool operator==(const half &lhs, const half &rhs) noexcept
{
return lhs.raw() == rhs.raw();
}
inline bool operator!=(const half &lhs, const half &rhs) noexcept
{
return lhs.raw() != rhs.raw();
}
}
namespace std
{
template <>
struct hash<nncase::half>
{
size_t operator()(const nncase::half &v) const
{
return hash<float>()(static_cast<float>(v));
}
};
template <>
struct numeric_limits<nncase::half>
{
static constexpr float_denorm_style has_denorm = std::denorm_present;
static constexpr bool has_infinity = true;
static constexpr bool has_quiet_NaN = true;
static constexpr bool has_signaling_NaN = true;
static constexpr bool is_bounded = false;
static constexpr bool is_iec559 = true;
static constexpr bool is_signed = true;
static constexpr bool is_specialized = true;
static constexpr float_round_style round_style = std::round_to_nearest;
static constexpr int radix = FLT_RADIX;
NNCASE_UNUSED static constexpr nncase::half(min)() noexcept
{
return nncase::half::min();
}
NNCASE_UNUSED static constexpr nncase::half(max)() noexcept
{
return nncase::half::highest();
}
NNCASE_UNUSED static constexpr nncase::half lowest() noexcept
{
return nncase::half::lowest();
}
NNCASE_UNUSED static constexpr nncase::half epsilon() noexcept
{
return nncase::half::epsilon();
}
NNCASE_UNUSED static nncase::half round_error() noexcept
{
return nncase::half((double)0.5);
}
NNCASE_UNUSED static constexpr nncase::half denorm_min() noexcept
{
return nncase::half::min();
}
NNCASE_UNUSED static constexpr nncase::half infinity() noexcept
{
return nncase::half::infinity();
}
NNCASE_UNUSED static constexpr nncase::half quiet_NaN() noexcept
{
return nncase::half::quiet_NaN();
}
NNCASE_UNUSED static constexpr nncase::half signaling_NaN() noexcept
{
return nncase::half::signaling_NaN();
}
static constexpr int digits = 11;
static const int min_exponent = -13;
static const int min_exponent10 = -4;
static const int max_exponent = 16;
static const int max_exponent10 = 4;
};
using nncase::half;
inline bool isinf(const half &a) { return std::isinf(float(a)); }
inline bool isnan(const half &a) { return std::isnan(float(a)); }
inline bool isfinite(const half &a) { return std::isfinite(float(a)); }
inline half abs(const half &a) { return half::round_to_half(fabsf(float(a))); }
inline half exp(const half &a) { return half::round_to_half(expf(float(a))); }
inline half log(const half &a) { return half::round_to_half(logf(float(a))); }
inline half log10(const half &a)
{
return half::round_to_half(log10f(float(a)));
}
inline half sqrt(const half &a)
{
return half::round_to_half(sqrtf(float(a)));
}
inline half pow(const half &a, const half &b)
{
return half::round_to_half(powf(float(a), float(b)));
}
inline half sin(const half &a) { return half::round_to_half(sinf(float(a))); }
inline half cos(const half &a) { return half::round_to_half(cosf(float(a))); }
inline half tan(const half &a) { return half::round_to_half(tanf(float(a))); }
inline half tanh(const half &a)
{
return half::round_to_half(tanhf(float(a)));
}
inline half floor(const half &a)
{
return half::round_to_half(floorf(float(a)));
}
inline half ceil(const half &a)
{
return half::round_to_half(ceilf(float(a)));
}
inline half round(const half &a)
{
return half::round_to_half(roundf(float(a)));
}
inline half nearbyint(const half &a)
{
return half::round_to_half(nearbyintf(float(a)));
}
inline long lrint(const half &a)
{
return lrintf(float(a));
}
}

View File

@ -0,0 +1,369 @@
/**
* @file incbin.h
* @author Dale Weiler
* @brief Utility for including binary files
*
* Facilities for including binary files into the current translation unit and
* making use from them externally in other translation units.
*/
// clang-format off
#ifndef INCBIN_HDR
#define INCBIN_HDR
#include <limits.h>
#if defined(__AVX512BW__) || \
defined(__AVX512CD__) || \
defined(__AVX512DQ__) || \
defined(__AVX512ER__) || \
defined(__AVX512PF__) || \
defined(__AVX512VL__) || \
defined(__AVX512F__)
# define INCBIN_ALIGNMENT_INDEX 6
#elif defined(__AVX__) || \
defined(__AVX2__)
# define INCBIN_ALIGNMENT_INDEX 5
#elif defined(__SSE__) || \
defined(__SSE2__) || \
defined(__SSE3__) || \
defined(__SSSE3__) || \
defined(__SSE4_1__) || \
defined(__SSE4_2__) || \
defined(__neon__)
# define INCBIN_ALIGNMENT_INDEX 4
#elif ULONG_MAX != 0xffffffffu
# define INCBIN_ALIGNMENT_INDEX 3
# else
# define INCBIN_ALIGNMENT_INDEX 2
#endif
/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
#define INCBIN_ALIGN_SHIFT_0 1
#define INCBIN_ALIGN_SHIFT_1 2
#define INCBIN_ALIGN_SHIFT_2 4
#define INCBIN_ALIGN_SHIFT_3 8
#define INCBIN_ALIGN_SHIFT_4 16
#define INCBIN_ALIGN_SHIFT_5 32
#define INCBIN_ALIGN_SHIFT_6 64
/* Actual alignment value */
#define INCBIN_ALIGNMENT \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
INCBIN_ALIGNMENT_INDEX)
/* Stringize */
#define INCBIN_STR(X) \
#X
#define INCBIN_STRINGIZE(X) \
INCBIN_STR(X)
/* Concatenate */
#define INCBIN_CAT(X, Y) \
X ## Y
#define INCBIN_CONCATENATE(X, Y) \
INCBIN_CAT(X, Y)
/* Deferred macro expansion */
#define INCBIN_EVAL(X) \
X
#define INCBIN_INVOKE(N, ...) \
INCBIN_EVAL(N(__VA_ARGS__))
/* Green Hills uses a different directive for including binary data */
#if defined(__ghs__)
# if (__ghs_asm == 2)
# define INCBIN_MACRO ".file"
/* Or consider the ".myrawdata" entry in the ld file */
# else
# define INCBIN_MACRO "\tINCBIN"
# endif
#else
# define INCBIN_MACRO ".incbin"
#endif
#ifndef _MSC_VER
# define INCBIN_ALIGN \
__attribute__((aligned(INCBIN_ALIGNMENT)))
#else
# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
#endif
#if defined(__arm__) || /* GNU C and RealView */ \
defined(__arm) || /* Diab */ \
defined(_ARM) /* ImageCraft */
# define INCBIN_ARM
#endif
#ifdef __GNUC__
/* Utilize .balign where supported */
# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
# define INCBIN_ALIGN_BYTE ".balign 1\n"
#elif defined(INCBIN_ARM)
/*
* On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
* the shift count. This is the value passed to `.align'
*/
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
# define INCBIN_ALIGN_BYTE ".align 0\n"
#else
/* We assume other inline assembler's treat `.align' as `.balign' */
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
# define INCBIN_ALIGN_BYTE ".align 1\n"
#endif
/* INCBIN_CONST is used by incbin.c generated files */
#if defined(__cplusplus)
# define INCBIN_EXTERNAL extern "C"
# define INCBIN_CONST extern const
#else
# define INCBIN_EXTERNAL extern
# define INCBIN_CONST const
#endif
/**
* @brief Optionally override the linker section into which data is emitted.
*
* @warning If you use this facility, you'll have to deal with platform-specific linker output
* section naming on your own
*
* Overriding the default linker output section, e.g for esp8266/Arduino:
* @code
* #define INCBIN_OUTPUT_SECTION ".irom.text"
* #include "incbin.h"
* INCBIN(Foo, "foo.txt");
* // Data is emitted into program memory that never gets copied to RAM
* @endcode
*/
#if !defined(INCBIN_OUTPUT_SECTION)
# if defined(__APPLE__)
# define INCBIN_OUTPUT_SECTION ".const_data"
# else
# define INCBIN_OUTPUT_SECTION ".rodata"
# endif
#endif
#if defined(__APPLE__)
/* The directives are different for Apple branded compilers */
# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n"
# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
# define INCBIN_INT ".long "
# define INCBIN_MANGLE "_"
# define INCBIN_BYTE ".byte "
# define INCBIN_TYPE(...)
#else
# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n"
# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
# if defined(__ghs__)
# define INCBIN_INT ".word "
# else
# define INCBIN_INT ".int "
# endif
# if defined(__USER_LABEL_PREFIX__)
# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
# else
# define INCBIN_MANGLE ""
# endif
# if defined(INCBIN_ARM)
/* On arm assemblers, `@' is used as a line comment token */
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
# elif defined(__MINGW32__) || defined(__MINGW64__)
/* Mingw doesn't support this directive either */
# define INCBIN_TYPE(NAME)
# else
/* It's safe to use `@' on other architectures */
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
# endif
# define INCBIN_BYTE ".byte "
#endif
/* List of style types used for symbol names */
#define INCBIN_STYLE_CAMEL 0
#define INCBIN_STYLE_SNAKE 1
/**
* @brief Specify the prefix to use for symbol names.
*
* By default this is `g', producing symbols of the form:
* @code
* #include "incbin.h"
* INCBIN(Foo, "foo.txt");
*
* // Now you have the following symbols:
* // const unsigned char gFooData[];
* // const unsigned char *const gFooEnd;
* // const unsigned int gFooSize;
* @endcode
*
* If however you specify a prefix before including: e.g:
* @code
* #define INCBIN_PREFIX incbin
* #include "incbin.h"
* INCBIN(Foo, "foo.txt");
*
* // Now you have the following symbols instead:
* // const unsigned char incbinFooData[];
* // const unsigned char *const incbinFooEnd;
* // const unsigned int incbinFooSize;
* @endcode
*/
#if !defined(INCBIN_PREFIX)
# define INCBIN_PREFIX g
#endif
/**
* @brief Specify the style used for symbol names.
*
* Possible options are
* - INCBIN_STYLE_CAMEL "CamelCase"
* - INCBIN_STYLE_SNAKE "snake_case"
*
* Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form:
* @code
* #include "incbin.h"
* INCBIN(Foo, "foo.txt");
*
* // Now you have the following symbols:
* // const unsigned char <prefix>FooData[];
* // const unsigned char *const <prefix>FooEnd;
* // const unsigned int <prefix>FooSize;
* @endcode
*
* If however you specify a style before including: e.g:
* @code
* #define INCBIN_STYLE INCBIN_STYLE_SNAKE
* #include "incbin.h"
* INCBIN(foo, "foo.txt");
*
* // Now you have the following symbols:
* // const unsigned char <prefix>foo_data[];
* // const unsigned char *const <prefix>foo_end;
* // const unsigned int <prefix>foo_size;
* @endcode
*/
#if !defined(INCBIN_STYLE)
# define INCBIN_STYLE INCBIN_STYLE_CAMEL
#endif
/* Style lookup tables */
#define INCBIN_STYLE_0_DATA Data
#define INCBIN_STYLE_0_END End
#define INCBIN_STYLE_0_SIZE Size
#define INCBIN_STYLE_1_DATA _data
#define INCBIN_STYLE_1_END _end
#define INCBIN_STYLE_1_SIZE _size
/* Style lookup: returning identifier */
#define INCBIN_STYLE_IDENT(TYPE) \
INCBIN_CONCATENATE( \
INCBIN_STYLE_, \
INCBIN_CONCATENATE( \
INCBIN_EVAL(INCBIN_STYLE), \
INCBIN_CONCATENATE(_, TYPE)))
/* Style lookup: returning string literal */
#define INCBIN_STYLE_STRING(TYPE) \
INCBIN_STRINGIZE( \
INCBIN_STYLE_IDENT(TYPE)) \
/* Generate the global labels by indirectly invoking the macro with our style
* type and concatenating the name against them. */
#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
INCBIN_INVOKE( \
INCBIN_GLOBAL, \
INCBIN_CONCATENATE( \
NAME, \
INCBIN_INVOKE( \
INCBIN_STYLE_IDENT, \
TYPE))) \
INCBIN_INVOKE( \
INCBIN_TYPE, \
INCBIN_CONCATENATE( \
NAME, \
INCBIN_INVOKE( \
INCBIN_STYLE_IDENT, \
TYPE)))
/**
* @brief Externally reference binary data included in another translation unit.
*
* Produces three external symbols that reference the binary data included in
* another translation unit.
*
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
* "Data", as well as "End" and "Size" after. An example is provided below.
*
* @param NAME The name given for the binary data
*
* @code
* INCBIN_EXTERN(Foo);
*
* // Now you have the following symbols:
* // extern const unsigned char <prefix>FooData[];
* // extern const unsigned char *const <prefix>FooEnd;
* // extern const unsigned int <prefix>FooSize;
* @endcode
*/
#define INCBIN_EXTERN(NAME) \
INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
INCBIN_STYLE_IDENT(DATA))[]; \
INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
INCBIN_STYLE_IDENT(END)); \
INCBIN_EXTERNAL const unsigned int \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
INCBIN_STYLE_IDENT(SIZE))
/**
* @brief Include a binary file into the current translation unit.
*
* Includes a binary file into the current translation unit, producing three symbols
* for objects that encode the data and size respectively.
*
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
* "Data", as well as "End" and "Size" after. An example is provided below.
*
* @param NAME The name to associate with this binary data (as an identifier.)
* @param FILENAME The file to include (as a string literal.)
*
* @code
* INCBIN(Icon, "icon.png");
*
* // Now you have the following symbols:
* // const unsigned char <prefix>IconData[];
* // const unsigned char *const <prefix>IconEnd;
* // const unsigned int <prefix>IconSize;
* @endcode
*
* @warning This must be used in global scope
* @warning The identifiers may be different if INCBIN_STYLE is not default
*
* To externally reference the data included by this in another translation unit
* please @see INCBIN_EXTERN.
*/
#ifdef _MSC_VER
#define INCBIN(NAME, FILENAME) \
INCBIN_EXTERN(NAME)
#else
#define INCBIN(NAME, FILENAME) \
__asm__(INCBIN_SECTION \
INCBIN_GLOBAL_LABELS(NAME, DATA) \
INCBIN_ALIGN_HOST \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
INCBIN_MACRO " \"" FILENAME "\"\n" \
INCBIN_GLOBAL_LABELS(NAME, END) \
INCBIN_ALIGN_BYTE \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
INCBIN_BYTE "1\n" \
INCBIN_GLOBAL_LABELS(NAME, SIZE) \
INCBIN_ALIGN_HOST \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
INCBIN_ALIGN_HOST \
".text\n" \
); \
INCBIN_EXTERN(NAME)
#endif
#endif

View File

@ -74,7 +74,7 @@ public:
private:
std::vector<std::unique_ptr<runtime_module>> modules_;
runtime_module *main_module_;
runtime_function *entry_function_;
options_dict options_;
};

View File

@ -22,21 +22,9 @@
#define NNCASE_MODULES_K210_API __declspec(dllimport)
#endif
#else
#define NNCASE_MODULES_K210_API
#define NNCASE_MODULES_K210_API __attribute__((visibility("default")))
#endif
#define BEGIN_NS_NNCASE_RT_K210 \
namespace nncase \
{ \
namespace runtime \
{ \
namespace k210 \
{
#define END_NS_NNCASE_RT_K210 \
} \
} \
}
#define BEGIN_NS_NNCASE_KERNELS_K210 \
namespace nncase \
{ \

View File

@ -16,7 +16,7 @@
#include "compiler_defs.h"
#include <nncase/runtime/error.h>
BEGIN_NS_NNCASE_RT_K210
BEGIN_NS_NNCASE_RT_MODULE(k210)
enum class nncase_k210_errc
{
@ -26,7 +26,7 @@ enum class nncase_k210_errc
NNCASE_MODULES_K210_API const std::error_category &nncase_k210_category() noexcept;
NNCASE_MODULES_K210_API std::error_condition make_error_condition(nncase_k210_errc code);
END_NS_NNCASE_RT_K210
END_NS_NNCASE_RT_MODULE
namespace std
{

View File

@ -17,7 +17,7 @@
#include <nncase/runtime/result.h>
#include <nncase/runtime/span_reader.h>
BEGIN_NS_NNCASE_RT_K210
BEGIN_NS_NNCASE_RT_MODULE(k210)
class NNCASE_MODULES_K210_API op_visitor
{
@ -44,4 +44,4 @@ private:
result<void> next() noexcept;
};
END_NS_NNCASE_RT_K210
END_NS_NNCASE_RT_MODULE

View File

@ -16,10 +16,11 @@
#include "compiler_defs.h"
#include <nncase/runtime/runtime_module.h>
BEGIN_NS_NNCASE_RT_K210
BEGIN_NS_NNCASE_RT_MODULE(k210)
NNCASE_INLINE_VAR constexpr module_type_t k210_module_type = to_module_type("k210");
NNCASE_INLINE_VAR constexpr uint32_t k210_module_version = 1;
NNCASE_MODULES_K210_API result<std::unique_ptr<runtime_module>> create_k210_runtime_module();
END_NS_NNCASE_RT_K210
END_NS_NNCASE_RT_MODULE

View File

@ -15,7 +15,7 @@
#pragma once
#include "runtime_types.h"
BEGIN_NS_NNCASE_RT_K210
BEGIN_NS_NNCASE_RT_MODULE(k210)
struct kpu_layout
{
@ -184,4 +184,4 @@ inline std::array<int32_t, 2> get_kpu_select_pool_offset(kpu_pool_type_t pool_ty
}
}
END_NS_NNCASE_RT_K210
END_NS_NNCASE_RT_MODULE

View File

@ -16,9 +16,9 @@
#include "compiler_defs.h"
#include <nncase/runtime/datatypes.h>
BEGIN_NS_NNCASE_RT_K210
BEGIN_NS_NNCASE_RT_MODULE(k210)
NNCASE_INLINE_VAR constexpr memory_location_t mem_kpu = 4;
NNCASE_INLINE_VAR constexpr memory_location_t mem_kpu = mem_private_base + 0;
NNCASE_INLINE_VAR constexpr size_t KPU_RAM_SIZE = 2 * 1024 * 1024; // 2MB
typedef struct
@ -341,4 +341,4 @@ struct copy_options
kpu_shape_t out_strides;
};
END_NS_NNCASE_RT_K210
END_NS_NNCASE_RT_MODULE

View File

@ -24,26 +24,49 @@ struct model_header
{
uint32_t identifier;
uint32_t version;
uint32_t header_size;
uint32_t flags;
uint32_t alignment;
uint32_t modules;
uint32_t main_module;
uint32_t entry_module;
uint32_t entry_function;
};
struct function_header
{
uint32_t header_size;
uint32_t size;
uint32_t input_pool_size;
uint32_t output_pool_size;
uint32_t inputs;
uint32_t outputs;
uint32_t entrypoint;
uint32_t text_size;
};
struct module_header
{
module_type_t type;
uint32_t version;
uint32_t header_size;
uint32_t size;
uint32_t mempools;
uint32_t inputs;
uint32_t outputs;
uint32_t shared_mempools;
uint32_t sections;
uint32_t functions;
uint32_t reserved0;
};
struct mempool_desc
{
memory_location_t location;
uint8_t reserved0[3];
uint32_t size;
};
struct shared_mempool_desc
{
uint32_t module;
uint32_t size;
};
@ -51,8 +74,8 @@ struct section_header
{
char name[MAX_SECTION_NAME_LENGTH];
uint32_t flags;
uint32_t start;
uint32_t size;
uint32_t body_start;
uint32_t body_size;
uint32_t reserved0;
};

View File

@ -72,7 +72,7 @@ struct Ok
: value(value) { }
template <class... Args>
constexpr explicit Ok(mpark::in_place_t, Args &&... args)
constexpr explicit Ok(mpark::in_place_t, Args &&...args)
: value(std::forward<Args>(args)...) { }
T value;
@ -101,7 +101,7 @@ inline constexpr Ok<void> ok()
}
template <class T, class... Args>
constexpr Ok<T> ok(Args &&... args)
constexpr Ok<T> ok(Args &&...args)
{
return Ok<T>(mpark::in_place, std::forward<Args>(args)...);
}

View File

@ -0,0 +1,86 @@
/* Copyright 2019-2021 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "model.h"
#include "result.h"
#include "runtime_tensor.h"
BEGIN_NS_NNCASE_RUNTIME
class interpreter;
class runtime_module;
struct runtime_module_init_context;
struct NNCASE_API runtime_function_init_context
{
virtual runtime_module_init_context &module_init_context() noexcept = 0;
virtual const function_header &header() noexcept = 0;
virtual gsl::span<const gsl::byte> body() noexcept = 0;
};
class NNCASE_API runtime_function
{
private:
struct inout_tensor_info
{
runtime_shape_t shape;
runtime_shape_t strides;
memory_range range;
runtime_tensor bind_tensor;
runtime_tensor staging_tensor;
runtime_tensor device_tensor;
};
public:
runtime_function(runtime_module &rt_module);
runtime_function(const runtime_function &) = delete;
virtual ~runtime_function() = default;
runtime_function &operator=(const runtime_function &) = delete;
result<void> initialize(gsl::span<const gsl::byte> payload, runtime_module_init_context &module_init_context) noexcept;
runtime_module &module() const noexcept;
uint32_t inputs_size() const noexcept;
const runtime_shape_t &input_shape(size_t index) const noexcept;
const memory_range &input_desc(size_t index) const noexcept;
result<runtime_tensor> input_tensor(size_t index) noexcept;
result<void> input_tensor(size_t index, runtime_tensor tensor) noexcept;
uint32_t outputs_size() const noexcept;
const runtime_shape_t &output_shape(size_t index) const noexcept;
const memory_range &output_desc(size_t index) const noexcept;
result<runtime_tensor> output_tensor(size_t index) noexcept;
result<void> output_tensor(size_t index, runtime_tensor tensor) noexcept;
result<void> invoke() noexcept;
protected:
virtual result<void> initialize_core(runtime_function_init_context &context) noexcept = 0;
virtual result<runtime_tensor> allocate_input_tensor(size_t index) noexcept = 0;
virtual result<runtime_tensor> allocate_output_tensor(size_t index) noexcept = 0;
virtual result<void> validate_input_tensor(size_t index, runtime_tensor tensor) noexcept = 0;
virtual result<void> validate_output_tensor(size_t index, runtime_tensor tensor) noexcept = 0;
result<runtime_tensor> device_input_tensor(size_t index) noexcept;
result<runtime_tensor> device_output_tensor(size_t index) noexcept;
virtual result<void> invoke_core() noexcept = 0;
private:
function_header header_;
std::vector<inout_tensor_info> input_tensors_;
std::vector<inout_tensor_info> output_tensors_;
runtime_module &rt_module_;
};
END_NS_NNCASE_RUNTIME

View File

@ -15,6 +15,7 @@
#pragma once
#include "model.h"
#include "result.h"
#include "runtime_function.h"
#include "runtime_tensor.h"
BEGIN_NS_NNCASE_RUNTIME
@ -31,26 +32,15 @@ struct NNCASE_API runtime_module_init_context
class NNCASE_API runtime_module
{
private:
struct inout_tensor_info
{
runtime_shape_t shape;
runtime_shape_t strides;
memory_range range;
runtime_tensor bind_tensor;
runtime_tensor staging_tensor;
runtime_tensor device_tensor;
};
public:
static result<std::unique_ptr<runtime_module>> create(const module_type_t &type);
runtime_module() = default;
runtime_module(runtime_module &) = delete;
runtime_module(const runtime_module &) = delete;
virtual ~runtime_module() = default;
runtime_module &operator=(const runtime_module &) = delete;
result<void> initialize(const module_header &header, interpreter &interp) noexcept;
virtual result<void> initialize_inter_modules(interpreter &interp) noexcept;
result<void> initialize(gsl::span<const gsl::byte> payload, interpreter &interp) noexcept;
const module_type_t &type() const noexcept;
interpreter &interp() const noexcept { return *interp_; }
@ -59,35 +49,20 @@ public:
const mempool_desc &mempool(size_t index) const noexcept;
mempool_desc mempool(memory_location_t location) const noexcept;
uint32_t inputs_size() const noexcept;
const runtime_shape_t &input_shape(size_t index) const noexcept;
const memory_range &input_desc(size_t index) const noexcept;
result<runtime_tensor> input_tensor(size_t index) noexcept;
result<void> input_tensor(size_t index, runtime_tensor tensor) noexcept;
uint32_t outputs_size() const noexcept;
const runtime_shape_t &output_shape(size_t index) const noexcept;
const memory_range &output_desc(size_t index) const noexcept;
result<runtime_tensor> output_tensor(size_t index) noexcept;
result<void> output_tensor(size_t index, runtime_tensor tensor) noexcept;
result<void> run() noexcept;
result<runtime_function *> find_function_by_id(size_t index) noexcept;
protected:
virtual result<void> initialize_core(runtime_module_init_context &context) noexcept = 0;
virtual result<runtime_tensor> allocate_input_tensor(size_t index) noexcept = 0;
virtual result<runtime_tensor> allocate_output_tensor(size_t index) noexcept = 0;
virtual result<void> validate_input_tensor(size_t index, runtime_tensor tensor) noexcept = 0;
virtual result<void> validate_output_tensor(size_t index, runtime_tensor tensor) noexcept = 0;
result<runtime_tensor> device_input_tensor(size_t index) noexcept;
result<runtime_tensor> device_output_tensor(size_t index) noexcept;
virtual result<void> run_core() noexcept = 0;
virtual result<void> initialize_before_functions(runtime_module_init_context &context) noexcept;
virtual result<void> initialize_after_functions(runtime_module_init_context &context) noexcept;
virtual result<std::unique_ptr<runtime_function>> create_function() noexcept = 0;
gsl::span<std::unique_ptr<runtime_function>> functions() noexcept { return functions_; }
private:
module_header header_;
std::vector<mempool_desc> mempools_;
std::vector<inout_tensor_info> input_tensors_;
std::vector<inout_tensor_info> output_tensors_;
std::vector<mempool_desc> shared_mempools_;
std::vector<std::unique_ptr<runtime_function>> functions_;
interpreter *interp_ = nullptr;
};

View File

@ -23,17 +23,20 @@ inline constexpr size_t get_bytes(datatype_t type)
return nncase::detail::datatype_bytes(type);
}
inline size_t compute_size(const runtime_shape_t &shape)
template <class TShape>
inline size_t compute_size(const TShape &shape)
{
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
}
inline size_t get_bytes(datatype_t type, const runtime_shape_t &shape)
template <class TShape>
inline size_t get_bytes(datatype_t type, const TShape &shape)
{
return compute_size(shape) * get_bytes(type);
}
inline size_t compute_size(const runtime_shape_t &shape, const runtime_shape_t &strides)
template <class TShape>
inline size_t compute_size(const TShape &shape, const TShape &strides)
{
size_t max_stride = 0, max_shape = 0;
for (size_t i = 0; i < shape.size(); i++)
@ -48,7 +51,8 @@ inline size_t compute_size(const runtime_shape_t &shape, const runtime_shape_t &
return size ? size : 1;
}
inline size_t get_bytes(datatype_t type, const runtime_shape_t &shape, const runtime_shape_t &strides)
template <class TShape>
inline size_t get_bytes(datatype_t type, const TShape &shape, const TShape &strides)
{
return compute_size(shape, strides) * get_bytes(type);
}

View File

@ -690,7 +690,7 @@ public:
}
template <typename... Args>
iterator emplace(const_iterator position, Args &&... args)
iterator emplace(const_iterator position, Args &&...args)
{
auto pos = grow_at(position, 1);
atraits::construct(get_alloc(), pos, std::forward<Args>(args)...);
@ -721,7 +721,7 @@ public:
}
template <typename... Args>
reference emplace_back(Args &&... args)
reference emplace_back(Args &&...args)
{
auto pos = grow_at(m_end, 1);
atraits::construct(get_alloc(), pos, std::forward<Args>(args)...);

View File

@ -88,18 +88,16 @@ public:
}
template <class T>
T peek()
T peek_with_offset(size_t offset)
{
auto value = *reinterpret_cast<const T *>(span_.data());
auto value = *reinterpret_cast<const T *>(span_.data() + offset);
return value;
}
template <class T>
T peek_unaligned()
T peek()
{
T value;
std::memcpy(&value, span_.data(), sizeof(T));
return value;
return peek_with_offset<T>(0);
}
template <class T>
@ -110,6 +108,12 @@ public:
return value;
}
template <class T>
T peek_unaligned()
{
return peek_unaligned_with_offset<T>(0);
}
template <class T>
const T *get_ref()
{

View File

@ -1,4 +1,4 @@
/* This file is generated by tools/stackvm_gen/IsaGen at 2021/7/14 19:17:48 +08:00.
/* This file is generated by tools/stackvm_gen/IsaGen at 2021/8/11 17:40:11 +08:00.
*
* Copyright 2019-2021 Canaan Inc.
*
@ -20,7 +20,7 @@
#include "../span_reader.h"
#include "opcode.h"
BEGIN_NS_NNCASE_RT_STACKVM
BEGIN_NS_NNCASE_RT_MODULE(stackvm)
template <class TOp>
struct op_reader;
@ -1141,7 +1141,8 @@ struct op_reader<tensor_call_op_t>
tensor_call_op_t op(default_init);
op.opcode = static_cast<opcode_t>(reader.read_unaligned<uint8_t>());
op.funct = static_cast<tensor_function_t>(reader.read_unaligned<uint16_t>());
op.module_id = reader.read_unaligned<uint32_t>();
op.function_id = reader.read_unaligned<uint32_t>();
op.module_id = reader.read_unaligned<uint16_t>();
op.num_src = reader.read_unaligned<uint8_t>();
op.num_dst = reader.read_unaligned<uint8_t>();
return op;
@ -1583,4 +1584,4 @@ private:
result<void> next() noexcept;
};
END_NS_NNCASE_RT_STACKVM
END_NS_NNCASE_RT_MODULE

View File

@ -1,4 +1,4 @@
/* This file is generated by tools/stackvm_gen/IsaGen at 2021/7/14 19:17:48 +08:00.
/* This file is generated by tools/stackvm_gen/IsaGen at 2021/8/11 17:40:11 +08:00.
*
* Copyright 2019-2021 Canaan Inc.
*
@ -17,7 +17,7 @@
#pragma once
#include "../datatypes.h"
BEGIN_NS_NNCASE_RT_STACKVM
BEGIN_NS_NNCASE_RT_MODULE(stackvm)
// Enums
@ -1265,13 +1265,14 @@ struct tensor_call_op_t
{
opcode_t opcode;
tensor_function_t funct;
uint32_t module_id;
uint32_t function_id;
uint16_t module_id;
uint8_t num_src;
uint8_t num_dst;
tensor_call_op_t(default_init_t) noexcept { }
explicit tensor_call_op_t(uint32_t module_id, uint8_t num_src, uint8_t num_dst) noexcept
: opcode(opcode_t::TENSOR), funct(tensor_function_t::CALL), module_id(module_id), num_src(num_src), num_dst(num_dst)
explicit tensor_call_op_t(uint32_t function_id, uint16_t module_id, uint8_t num_src, uint8_t num_dst) noexcept
: opcode(opcode_t::TENSOR), funct(tensor_function_t::CALL), function_id(function_id), module_id(module_id), num_src(num_src), num_dst(num_dst)
{
}
};
@ -1576,4 +1577,4 @@ struct tensor_transpose_op_t
}
};
END_NS_NNCASE_RT_STACKVM
END_NS_NNCASE_RT_MODULE

View File

@ -15,10 +15,11 @@
#pragma once
#include "../runtime_module.h"
BEGIN_NS_NNCASE_RT_STACKVM
BEGIN_NS_NNCASE_RT_MODULE(stackvm)
NNCASE_INLINE_VAR constexpr module_type_t stackvm_module_type = to_module_type("stackvm");
NNCASE_INLINE_VAR constexpr uint32_t stackvm_module_version = 1;
NNCASE_API result<std::unique_ptr<runtime_module>> create_stackvm_runtime_module();
END_NS_NNCASE_RT_STACKVM
END_NS_NNCASE_RT_MODULE

View File

@ -14,4 +14,4 @@
*/
#pragma once
#define NNCASE_VERSION "1.0.0"
#define NNCASE_VERSION_SUFFIX "-8c384a4"
#define NNCASE_VERSION_SUFFIX "-9fd39f9"

View File

@ -6,5 +6,3 @@ endif()
if(NOT TARGET gsl-lite)
find_package(gsl-lite REQUIRED)
endif()

View File

@ -15,6 +15,16 @@ set_target_properties(nncaseruntime PROPERTIES
list(APPEND _IMPORT_CHECK_TARGETS nncaseruntime )
list(APPEND _IMPORT_CHECK_FILES_FOR_nncaseruntime "${_IMPORT_PREFIX}/lib/libnncase.runtime.a" )
# Import target "kendryte" for configuration "Release"
set_property(TARGET kendryte APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(kendryte PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "C"
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libkendryte.a"
)
list(APPEND _IMPORT_CHECK_TARGETS kendryte )
list(APPEND _IMPORT_CHECK_FILES_FOR_kendryte "${_IMPORT_PREFIX}/lib/libkendryte.a" )
# Import target "nncase_rt_modules_k210" for configuration "Release"
set_property(TARGET nncase_rt_modules_k210 APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(nncase_rt_modules_k210 PROPERTIES

View File

@ -4,7 +4,7 @@ if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.5)
message(FATAL_ERROR "CMake >= 2.6.0 required")
endif()
cmake_policy(PUSH)
cmake_policy(VERSION 2.6...3.18)
cmake_policy(VERSION 2.6...3.19)
#----------------------------------------------------------------
# Generated CMake target import file.
#----------------------------------------------------------------
@ -16,7 +16,7 @@ set(CMAKE_IMPORT_FILE_VERSION 1)
set(_targetsDefined)
set(_targetsNotDefined)
set(_expectedTargets)
foreach(_expectedTarget kernels runtime nncaseruntime runtime_stackvm kernels_k210 runtime_k210 nncase_rt_modules_k210)
foreach(_expectedTarget kernels runtime nncaseruntime runtime_stackvm kernels_k210 kendryte runtime_k210 nncase_rt_modules_k210)
list(APPEND _expectedTargets ${_expectedTarget})
if(NOT TARGET ${_expectedTarget})
list(APPEND _targetsNotDefined ${_expectedTarget})
@ -83,14 +83,17 @@ set_target_properties(runtime_stackvm PROPERTIES
add_library(kernels_k210 INTERFACE IMPORTED)
set_target_properties(kernels_k210 PROPERTIES
INTERFACE_LINK_LIBRARIES "nncaseruntime"
INTERFACE_LINK_LIBRARIES "nncaseruntime;\$<LINK_ONLY:kendryte>"
)
# Create imported target kendryte
add_library(kendryte STATIC IMPORTED)
# Create imported target runtime_k210
add_library(runtime_k210 INTERFACE IMPORTED)
set_target_properties(runtime_k210 PROPERTIES
INTERFACE_LINK_LIBRARIES "nncaseruntime"
INTERFACE_LINK_LIBRARIES "nncaseruntime;\$<LINK_ONLY:kernels_k210>;\$<LINK_ONLY:kendryte>"
)
# Create imported target nncase_rt_modules_k210