diff --git a/CMakeLists.txt b/CMakeLists.txt index 47947c2..0565b1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,10 @@ include(./cmake/macros.internal.cmake) header_directories(${SDK_ROOT}/lib) header_directories(src/${PROJ}) header_directories(kendryte-standalone-demo/${PROJ}) + +add_subdirectory(third_party/gsl-lite) +add_subdirectory(third_party/mpark-variant) +add_subdirectory(third_party/nlohmann_json) # build library first add_subdirectory(lib) diff --git a/lds/kendryte.ld b/lds/kendryte.ld index 7e1100e..2e106e4 100644 --- a/lds/kendryte.ld +++ b/lds/kendryte.ld @@ -113,7 +113,7 @@ SECTIONS { PROVIDE_HIDDEN (__init_array_start = .); KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*))) - *(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors) + KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors)) PROVIDE_HIDDEN (__init_array_end = .); } >ram AT>ram :ram_ro diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 3276378..038d8ab 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -42,5 +42,5 @@ SET_SOURCE_FILES_PROPERTIES(${ASSEMBLY_FILES} PROPERTIES COMPILE_FLAGS "-x assem ADD_LIBRARY(kendryte ${LIB_SRC} ) -TARGET_LINK_LIBRARIES(kendryte PUBLIC nncase) +TARGET_LINK_LIBRARIES(kendryte PUBLIC nncase-wrapper) SET_TARGET_PROPERTIES(kendryte PROPERTIES LINKER_LANGUAGE C) diff --git a/lib/bsp/syscalls.c b/lib/bsp/syscalls.c index 7b9b018..b3b57a7 100644 --- a/lib/bsp/syscalls.c +++ b/lib/bsp/syscalls.c @@ -574,6 +574,7 @@ handle_breakpoint(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t uintptr_t __attribute__((weak)) handle_misaligned_load(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32]) { + dump_core("misaligned load", cause, epc, regs, fregs); /* notice this function only support 16bit or 32bit instruction */ bool compressed = (*(unsigned short *)epc & 3) != 3; @@ -665,6 +666,7 @@ handle_fault_load(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t uintptr_t __attribute__((weak)) handle_misaligned_store(uintptr_t cause, uintptr_t epc, uintptr_t regs[32], uintptr_t fregs[32]) { + dump_core("misaligned store", cause, epc, regs, fregs); /* notice this function only support 16bit or 32bit instruction */ bool compressed = (*(unsigned short *)epc & 3) != 3; diff --git a/lib/drivers/include/kpu.h b/lib/drivers/include/kpu.h index 474d14e..a7e31cc 100644 --- a/lib/drivers/include/kpu.h +++ b/lib/drivers/include/kpu.h @@ -691,6 +691,7 @@ typedef struct struct { void* nncase_ctx; + uint32_t nncase_version; }; }; } kpu_model_context_t; diff --git a/lib/nncase/CMakeLists.txt b/lib/nncase/CMakeLists.txt index d136ffa..d0b5ae9 100644 --- a/lib/nncase/CMakeLists.txt +++ b/lib/nncase/CMakeLists.txt @@ -1,11 +1,6 @@ -include_directories(${SDK_ROOT}/third_party/xtl/include ${CMAKE_CURRENT_LIST_DIR}/nncase/include) +add_subdirectory(v0) +add_subdirectory(v1) -FILE(GLOB_RECURSE NNCASE_SRC - "${CMAKE_CURRENT_LIST_DIR}/*.c" - "${CMAKE_CURRENT_LIST_DIR}/*.cpp" - ) - -ADD_LIBRARY(nncase - ${NNCASE_SRC} - ) -TARGET_COMPILE_OPTIONS(nncase PRIVATE -O2) \ No newline at end of file +add_library(nncase-wrapper STATIC nncase.cpp) +target_link_libraries(nncase-wrapper PRIVATE nncase-v0 nncase-v1) +target_include_directories(nncase-wrapper PUBLIC include) diff --git a/lib/nncase/nncase.cpp b/lib/nncase/nncase.cpp index 619886b..b4fb0de 100644 --- a/lib/nncase/nncase.cpp +++ b/lib/nncase/nncase.cpp @@ -12,172 +12,51 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include -#include +#include "v0/nncase_v0.h" +#include "v1/nncase_v1.h" #include +#include +#include #include -using namespace nncase; -using namespace nncase::runtime; - -#define NNCASE_DEBUG 0 - -namespace +extern "C" { -void kpu_upload_dma(dmac_channel_number_t dma_ch, const uint8_t *src, uint8_t *dest, size_t input_size, plic_irq_callback_t callback, void *userdata) -{ - if (is_memory_cache((uintptr_t)src)) + struct model_header { - std::copy_n(src, input_size, dest); - src -= 0x40000000; + uint32_t identifier; + uint32_t version; + }; + + int nncase_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer) + { + auto header = reinterpret_cast(buffer); + if (header->version == 4) + return nncase_v0_load_kmodel(ctx, buffer); + else + return nncase_v1_load_kmodel(ctx, buffer); } - dmac_set_irq(dma_ch, callback, userdata, 1); - dmac_set_single_mode(dma_ch, (void *)src, (void *)dest, DMAC_ADDR_INCREMENT, DMAC_ADDR_INCREMENT, - DMAC_MSIZE_16, DMAC_TRANS_WIDTH_64, input_size / 8); - dmac_wait_done(dma_ch); -} -} - -class nncase_context -{ -public: - int load_kmodel(const uint8_t *buffer) + int nncase_get_output(kpu_model_context_t *ctx, uint32_t index, uint8_t **data, size_t *size) { - int ret = interpreter_.try_load_model(buffer) ? 0 : -1; - - uint32_t size = interpreter_.model_size(buffer); - uint8_t *buffer_iomem = (uint8_t *)((uintptr_t)buffer - IOMEM); - const uint8_t *buffer_cache = buffer; - memcpy(buffer_iomem, buffer_cache, size); - for (int i = 0; i < size; i++) - { - if (buffer_iomem[i] != buffer_cache[i]) - { - printf("flush model fail:%d %x %x \n", i, buffer_iomem[i], buffer_cache[i]); - while (1) - ; - } - } - return ret; + if (ctx->nncase_version == 0) + return nncase_v0_get_output(ctx, index, data, size); + else + return nncase_v1_get_output(ctx, index, data, size); } - int get_output(uint32_t index, uint8_t **data, size_t *size) + void nncase_model_free(kpu_model_context_t *ctx) { - if (index >= interpreter_.outputs_size()) - return -1; - - auto mem = interpreter_.memory_at(interpreter_.output_at(index)); - *data = mem.data(); - *size = mem.size(); - return 0; + if (ctx->nncase_version == 0) + return nncase_v0_model_free(ctx); + else + return nncase_v1_model_free(ctx); } - int run_kmodel(const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata) + int nncase_run_kmodel(kpu_model_context_t *ctx, const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata) { - done_callback_ = done_callback; - userdata_ = userdata; - interpreter_.dma_ch(dma_ch); - - auto input = interpreter_.input_at(0); - auto mem = interpreter_.memory_at(input); - if (input.memory_type == mem_main) - { - std::copy(src, src + mem.size(), mem.begin()); - interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this); - return 0; - } - else if (input.memory_type == mem_k210_kpu) - { - auto shape = interpreter_.input_shape_at(0); - kernels::k210::kpu_upload(src, mem.data(), shape); - on_upload_done(); - - return 0; - } - - return -1; - } - -private: - void on_done() - { -#if NNCASE_DEBUG - printf("Total: %fms\n", interpreter_.total_duration().count() / 1e6); -#endif - - if (done_callback_) - done_callback_(userdata_); - } - - void on_upload_done() - { - interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this); - } - - static void done_thunk(void *userdata) - { - reinterpret_cast(userdata)->on_done(); - } - - static void on_error_thunk(const char *err, void *userdata) - { -#if NNCASE_DEBUG - printf("Fatal: %s\n", err); -#endif - } - - static void node_profile_thunk(runtime_opcode op, std::chrono::nanoseconds duration, void *userdata) - { -#if NNCASE_DEBUG - printf("%s: %fms\n", node_opcode_names(op).data(), duration.count() / 1e6); -#endif - } - - static int upload_done_thunk(void *userdata) - { - reinterpret_cast(userdata)->on_upload_done(); - return 0; - } - -private: - interpreter_t interpreter_; - kpu_done_callback_t done_callback_; - void *userdata_; -}; - -int nncase_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer) -{ - auto nnctx = new (std::nothrow) nncase_context(); - if (ctx) - { - ctx->is_nncase = 1; - ctx->nncase_ctx = nnctx; - return nnctx->load_kmodel(buffer); - } - else - { - return -1; + if (ctx->nncase_version == 0) + return nncase_v0_run_kmodel(ctx, src, dma_ch, done_callback, userdata); + else + return nncase_v1_run_kmodel(ctx, src, dma_ch, done_callback, userdata); } } - -int nncase_get_output(kpu_model_context_t *ctx, uint32_t index, uint8_t **data, size_t *size) -{ - auto nnctx = reinterpret_cast(ctx->nncase_ctx); - return nnctx->get_output(index, data, size); -} - -void nncase_model_free(kpu_model_context_t *ctx) -{ - auto nnctx = reinterpret_cast(ctx->nncase_ctx); - delete nnctx; - ctx->nncase_ctx = nullptr; -} - -int nncase_run_kmodel(kpu_model_context_t *ctx, const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata) -{ - auto nnctx = reinterpret_cast(ctx->nncase_ctx); - return nnctx->run_kmodel(src, dma_ch, done_callback, userdata); -} diff --git a/lib/nncase/v0/CMakeLists.txt b/lib/nncase/v0/CMakeLists.txt new file mode 100644 index 0000000..2c97aad --- /dev/null +++ b/lib/nncase/v0/CMakeLists.txt @@ -0,0 +1,11 @@ +include_directories(${SDK_ROOT}/third_party/xtl/include ${CMAKE_CURRENT_LIST_DIR}/nncase/include) + +FILE(GLOB_RECURSE NNCASE_SRC + "${CMAKE_CURRENT_LIST_DIR}/*.c" + "${CMAKE_CURRENT_LIST_DIR}/*.cpp" + ) + +ADD_LIBRARY(nncase-v0 + ${NNCASE_SRC} + ) +TARGET_COMPILE_OPTIONS(nncase-v0 PRIVATE -O2) \ No newline at end of file diff --git a/lib/nncase/include/datatypes.h b/lib/nncase/v0/include/datatypes.h similarity index 100% rename from lib/nncase/include/datatypes.h rename to lib/nncase/v0/include/datatypes.h diff --git a/lib/nncase/include/io_utils.h b/lib/nncase/v0/include/io_utils.h similarity index 100% rename from lib/nncase/include/io_utils.h rename to lib/nncase/v0/include/io_utils.h diff --git a/lib/nncase/include/kernels/cpu/cpu_kernels.h b/lib/nncase/v0/include/kernels/cpu/cpu_kernels.h similarity index 100% rename from lib/nncase/include/kernels/cpu/cpu_kernels.h rename to lib/nncase/v0/include/kernels/cpu/cpu_kernels.h diff --git a/lib/nncase/include/kernels/k210/k210_kernels.h b/lib/nncase/v0/include/kernels/k210/k210_kernels.h similarity index 100% rename from lib/nncase/include/kernels/k210/k210_kernels.h rename to lib/nncase/v0/include/kernels/k210/k210_kernels.h diff --git a/lib/nncase/include/kernels/kernel_utils.h b/lib/nncase/v0/include/kernels/kernel_utils.h similarity index 100% rename from lib/nncase/include/kernels/kernel_utils.h rename to lib/nncase/v0/include/kernels/kernel_utils.h diff --git a/lib/nncase/include/kernels/neutral/neutral_kernels.h b/lib/nncase/v0/include/kernels/neutral/neutral_kernels.h similarity index 100% rename from lib/nncase/include/kernels/neutral/neutral_kernels.h rename to lib/nncase/v0/include/kernels/neutral/neutral_kernels.h diff --git a/lib/nncase/include/kernels/riscv/neutral_kernels.h b/lib/nncase/v0/include/kernels/riscv/neutral_kernels.h similarity index 100% rename from lib/nncase/include/kernels/riscv/neutral_kernels.h rename to lib/nncase/v0/include/kernels/riscv/neutral_kernels.h diff --git a/lib/nncase/v0/include/quantize.h b/lib/nncase/v0/include/quantize.h new file mode 100644 index 0000000..829a4c2 --- /dev/null +++ b/lib/nncase/v0/include/quantize.h @@ -0,0 +1,109 @@ +/* Copyright 2019-2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "datatypes.h" +#include +#include +#include + +namespace nncase +{ +namespace quant +{ + template + value_range get_range(TIt begin, TIt end) + { + float min = std::numeric_limits::max(); + float max = std::numeric_limits::min(); + while (begin != end) + { + auto value = *begin++; + auto fc = std::fpclassify(value); + if (fc == FP_NORMAL || fc == FP_SUBNORMAL || fc == FP_ZERO) + { + min = std::min(min, value); + max = std::max(max, value); + } + } + + return { min, max }; + } + + inline value_range fixup_range(value_range range) + { + if (range.min < -1e3) + range.min = -1e3; + if (range.max > 1e3) + range.max = 1e3; + auto r = range.max - range.min; + if (r == 0) + r = 0.1f; + else if (r < 0.01f) + r = 0.01f; + range.max = range.min + r; + + if (range.max < 0) + range.max = 0; + if (range.min > 0) + range.min = 0; + return range; + } + + inline quant_param_t get_quant_param(value_range range, int32_t bits) + { + range = fixup_range(range); + auto r = range.max - range.min; + auto scale = ((1LL << bits) - 1) / r; + auto bias = std::round(-range.min * scale); + assert(bias >= 0); + return { static_cast(bias), scale }; + } + + inline fixed_mul get_fixed_mul(float value, int32_t max_bits, uint8_t max_shift, bool is_signed) + { + assert(!is_signed || value >= 0); + + auto bits = is_signed ? max_bits - 1 : max_bits; + int32_t shift = 0; + float mul = 0; + + if (std::abs(value) > 1) + { + int mul_shift; + mul = std::frexp(value, &mul_shift); + shift = std::min((int32_t)max_shift, bits - mul_shift); + mul = mul * std::pow(2.f, shift + mul_shift); + } + else if (value == 0) + { + mul = 0; + shift = 0; + } + else + { + int mul_shift; + mul = std::frexp(value, &mul_shift); + shift = std::min(max_shift + mul_shift, bits); + mul = mul * std::pow(2.f, shift); + shift -= mul_shift; + } + + assert(std::abs(mul) < std::pow(2, bits)); + assert(shift >= 0 && shift <= max_shift); + assert(std::abs(value - mul * std::pow(2, -shift)) <= std::numeric_limits::epsilon()); + return { mul, static_cast(shift) }; + } +} +} diff --git a/lib/nncase/include/runtime/binary_reader.h b/lib/nncase/v0/include/runtime/binary_reader.h similarity index 100% rename from lib/nncase/include/runtime/binary_reader.h rename to lib/nncase/v0/include/runtime/binary_reader.h diff --git a/lib/nncase/include/runtime/binary_writer.h b/lib/nncase/v0/include/runtime/binary_writer.h similarity index 100% rename from lib/nncase/include/runtime/binary_writer.h rename to lib/nncase/v0/include/runtime/binary_writer.h diff --git a/lib/nncase/include/runtime/cpu/cpu_ops_body.h b/lib/nncase/v0/include/runtime/cpu/cpu_ops_body.h similarity index 100% rename from lib/nncase/include/runtime/cpu/cpu_ops_body.h rename to lib/nncase/v0/include/runtime/cpu/cpu_ops_body.h diff --git a/lib/nncase/include/runtime/cpu/interpreter.h b/lib/nncase/v0/include/runtime/cpu/interpreter.h similarity index 100% rename from lib/nncase/include/runtime/cpu/interpreter.h rename to lib/nncase/v0/include/runtime/cpu/interpreter.h diff --git a/lib/nncase/include/runtime/interpreter.h b/lib/nncase/v0/include/runtime/interpreter.h similarity index 100% rename from lib/nncase/include/runtime/interpreter.h rename to lib/nncase/v0/include/runtime/interpreter.h diff --git a/lib/nncase/include/runtime/k210/interpreter.h b/lib/nncase/v0/include/runtime/k210/interpreter.h similarity index 100% rename from lib/nncase/include/runtime/k210/interpreter.h rename to lib/nncase/v0/include/runtime/k210/interpreter.h diff --git a/lib/nncase/include/runtime/k210/k210_ops_body.h b/lib/nncase/v0/include/runtime/k210/k210_ops_body.h similarity index 100% rename from lib/nncase/include/runtime/k210/k210_ops_body.h rename to lib/nncase/v0/include/runtime/k210/k210_ops_body.h diff --git a/lib/nncase/include/runtime/k210/k210_runtime_op_utility.h b/lib/nncase/v0/include/runtime/k210/k210_runtime_op_utility.h similarity index 100% rename from lib/nncase/include/runtime/k210/k210_runtime_op_utility.h rename to lib/nncase/v0/include/runtime/k210/k210_runtime_op_utility.h diff --git a/lib/nncase/include/runtime/k210/k210_sim_types.h b/lib/nncase/v0/include/runtime/k210/k210_sim_types.h similarity index 100% rename from lib/nncase/include/runtime/k210/k210_sim_types.h rename to lib/nncase/v0/include/runtime/k210/k210_sim_types.h diff --git a/lib/nncase/include/runtime/kernel_registry.h b/lib/nncase/v0/include/runtime/kernel_registry.h similarity index 100% rename from lib/nncase/include/runtime/kernel_registry.h rename to lib/nncase/v0/include/runtime/kernel_registry.h diff --git a/lib/nncase/include/runtime/model.h b/lib/nncase/v0/include/runtime/model.h similarity index 100% rename from lib/nncase/include/runtime/model.h rename to lib/nncase/v0/include/runtime/model.h diff --git a/lib/nncase/include/runtime/neutral/neutral_ops_body.h b/lib/nncase/v0/include/runtime/neutral/neutral_ops_body.h similarity index 100% rename from lib/nncase/include/runtime/neutral/neutral_ops_body.h rename to lib/nncase/v0/include/runtime/neutral/neutral_ops_body.h diff --git a/lib/nncase/include/runtime/neutral/neutral_sim_types.h b/lib/nncase/v0/include/runtime/neutral/neutral_sim_types.h similarity index 100% rename from lib/nncase/include/runtime/neutral/neutral_sim_types.h rename to lib/nncase/v0/include/runtime/neutral/neutral_sim_types.h diff --git a/lib/nncase/include/runtime/nnil.h b/lib/nncase/v0/include/runtime/nnil.h similarity index 100% rename from lib/nncase/include/runtime/nnil.h rename to lib/nncase/v0/include/runtime/nnil.h diff --git a/lib/nncase/include/runtime/node_body.h b/lib/nncase/v0/include/runtime/node_body.h similarity index 100% rename from lib/nncase/include/runtime/node_body.h rename to lib/nncase/v0/include/runtime/node_body.h diff --git a/lib/nncase/include/runtime/runtime_op.def b/lib/nncase/v0/include/runtime/runtime_op.def similarity index 100% rename from lib/nncase/include/runtime/runtime_op.def rename to lib/nncase/v0/include/runtime/runtime_op.def diff --git a/lib/nncase/include/runtime/runtime_op.h b/lib/nncase/v0/include/runtime/runtime_op.h similarity index 100% rename from lib/nncase/include/runtime/runtime_op.h rename to lib/nncase/v0/include/runtime/runtime_op.h diff --git a/lib/nncase/include/runtime/runtime_op_utility.h b/lib/nncase/v0/include/runtime/runtime_op_utility.h similarity index 100% rename from lib/nncase/include/runtime/runtime_op_utility.h rename to lib/nncase/v0/include/runtime/runtime_op_utility.h diff --git a/lib/nncase/include/runtime/span_reader.h b/lib/nncase/v0/include/runtime/span_reader.h similarity index 100% rename from lib/nncase/include/runtime/span_reader.h rename to lib/nncase/v0/include/runtime/span_reader.h diff --git a/lib/nncase/include/runtime/target_interpreter.h b/lib/nncase/v0/include/runtime/target_interpreter.h similarity index 100% rename from lib/nncase/include/runtime/target_interpreter.h rename to lib/nncase/v0/include/runtime/target_interpreter.h diff --git a/lib/nncase/include/target_config.h b/lib/nncase/v0/include/target_config.h similarity index 100% rename from lib/nncase/include/target_config.h rename to lib/nncase/v0/include/target_config.h diff --git a/lib/nncase/include/targets/target.h b/lib/nncase/v0/include/targets/target.h similarity index 100% rename from lib/nncase/include/targets/target.h rename to lib/nncase/v0/include/targets/target.h diff --git a/lib/nncase/v0/nncase_v0.cpp b/lib/nncase/v0/nncase_v0.cpp new file mode 100644 index 0000000..1952fd9 --- /dev/null +++ b/lib/nncase/v0/nncase_v0.cpp @@ -0,0 +1,184 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include + +using namespace nncase; +using namespace nncase::runtime; + +#define NNCASE_DEBUG 0 + +namespace +{ +void kpu_upload_dma(dmac_channel_number_t dma_ch, const uint8_t *src, uint8_t *dest, size_t input_size, plic_irq_callback_t callback, void *userdata) +{ + if (is_memory_cache((uintptr_t)src)) + { + std::copy_n(src, input_size, dest); + src -= 0x40000000; + } + + dmac_set_irq(dma_ch, callback, userdata, 1); + dmac_set_single_mode(dma_ch, (void *)src, (void *)dest, DMAC_ADDR_INCREMENT, DMAC_ADDR_INCREMENT, + DMAC_MSIZE_16, DMAC_TRANS_WIDTH_64, input_size / 8); + dmac_wait_done(dma_ch); +} +} + +class nncase_context +{ +public: + int load_kmodel(const uint8_t *buffer) + { + int ret = interpreter_.try_load_model(buffer) ? 0 : -1; + + uint32_t size = interpreter_.model_size(buffer); + uint8_t *buffer_iomem = (uint8_t *)((uintptr_t)buffer - IOMEM); + const uint8_t *buffer_cache = buffer; + memcpy(buffer_iomem, buffer_cache, size); + for (int i = 0; i < size; i++) + { + if (buffer_iomem[i] != buffer_cache[i]) + { + printf("flush model fail:%d %x %x \n", i, buffer_iomem[i], buffer_cache[i]); + while (1) + ; + } + } + return ret; + } + + int get_output(uint32_t index, uint8_t **data, size_t *size) + { + if (index >= interpreter_.outputs_size()) + return -1; + + auto mem = interpreter_.memory_at(interpreter_.output_at(index)); + *data = mem.data(); + *size = mem.size(); + return 0; + } + + int run_kmodel(const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata) + { + done_callback_ = done_callback; + userdata_ = userdata; + interpreter_.dma_ch(dma_ch); + + auto input = interpreter_.input_at(0); + auto mem = interpreter_.memory_at(input); + if (input.memory_type == mem_main) + { + std::copy(src, src + mem.size(), mem.begin()); + interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this); + return 0; + } + else if (input.memory_type == mem_k210_kpu) + { + auto shape = interpreter_.input_shape_at(0); + kernels::k210::kpu_upload(src, mem.data(), shape); + on_upload_done(); + + return 0; + } + + return -1; + } + +private: + void on_done() + { +#if NNCASE_DEBUG + printf("Total: %fms\n", interpreter_.total_duration().count() / 1e6); +#endif + + if (done_callback_) + done_callback_(userdata_); + } + + void on_upload_done() + { + interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this); + } + + static void done_thunk(void *userdata) + { + reinterpret_cast(userdata)->on_done(); + } + + static void on_error_thunk(const char *err, void *userdata) + { +#if NNCASE_DEBUG + printf("Fatal: %s\n", err); +#endif + } + + static void node_profile_thunk(runtime_opcode op, std::chrono::nanoseconds duration, void *userdata) + { +#if NNCASE_DEBUG + printf("%s: %fms\n", node_opcode_names(op).data(), duration.count() / 1e6); +#endif + } + + static int upload_done_thunk(void *userdata) + { + reinterpret_cast(userdata)->on_upload_done(); + return 0; + } + +private: + interpreter_t interpreter_; + kpu_done_callback_t done_callback_; + void *userdata_; +}; + +int nncase_v0_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer) +{ + auto nnctx = new (std::nothrow) nncase_context(); + if (ctx) + { + ctx->is_nncase = 1; + ctx->nncase_ctx = nnctx; + ctx->nncase_version = 0; + return nnctx->load_kmodel(buffer); + } + else + { + return -1; + } +} + +int nncase_v0_get_output(kpu_model_context_t *ctx, uint32_t index, uint8_t **data, size_t *size) +{ + auto nnctx = reinterpret_cast(ctx->nncase_ctx); + return nnctx->get_output(index, data, size); +} + +void nncase_v0_model_free(kpu_model_context_t *ctx) +{ + auto nnctx = reinterpret_cast(ctx->nncase_ctx); + delete nnctx; + ctx->nncase_ctx = nullptr; +} + +int nncase_v0_run_kmodel(kpu_model_context_t *ctx, const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata) +{ + auto nnctx = reinterpret_cast(ctx->nncase_ctx); + return nnctx->run_kmodel(src, dma_ch, done_callback, userdata); +} diff --git a/lib/nncase/v0/nncase_v0.h b/lib/nncase/v0/nncase_v0.h new file mode 100644 index 0000000..227cec2 --- /dev/null +++ b/lib/nncase/v0/nncase_v0.h @@ -0,0 +1,33 @@ +/* Copyright 2019 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef _NNCASE_V0_H +#define _NNCASE_V0_H + +#include "kpu.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int nncase_v0_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer); +int nncase_v0_get_output(kpu_model_context_t *ctx, uint32_t index, uint8_t **data, size_t *size); +void nncase_v0_model_free(kpu_model_context_t *ctx); +int nncase_v0_run_kmodel(kpu_model_context_t *ctx, const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/lib/nncase/runtime/cpu/cpu_ops.cpp b/lib/nncase/v0/runtime/cpu/cpu_ops.cpp similarity index 100% rename from lib/nncase/runtime/cpu/cpu_ops.cpp rename to lib/nncase/v0/runtime/cpu/cpu_ops.cpp diff --git a/lib/nncase/runtime/interpreter.cpp b/lib/nncase/v0/runtime/interpreter.cpp similarity index 100% rename from lib/nncase/runtime/interpreter.cpp rename to lib/nncase/v0/runtime/interpreter.cpp diff --git a/lib/nncase/runtime/k210/interpreter.cpp b/lib/nncase/v0/runtime/k210/interpreter.cpp similarity index 100% rename from lib/nncase/runtime/k210/interpreter.cpp rename to lib/nncase/v0/runtime/k210/interpreter.cpp diff --git a/lib/nncase/runtime/k210/k210_ops.cpp b/lib/nncase/v0/runtime/k210/k210_ops.cpp similarity index 100% rename from lib/nncase/runtime/k210/k210_ops.cpp rename to lib/nncase/v0/runtime/k210/k210_ops.cpp diff --git a/lib/nncase/runtime/kernel_registry.cpp b/lib/nncase/v0/runtime/kernel_registry.cpp similarity index 100% rename from lib/nncase/runtime/kernel_registry.cpp rename to lib/nncase/v0/runtime/kernel_registry.cpp diff --git a/lib/nncase/runtime/neutral/neutral_ops.cpp b/lib/nncase/v0/runtime/neutral/neutral_ops.cpp similarity index 100% rename from lib/nncase/runtime/neutral/neutral_ops.cpp rename to lib/nncase/v0/runtime/neutral/neutral_ops.cpp diff --git a/lib/nncase/v1/CMakeLists.txt b/lib/nncase/v1/CMakeLists.txt new file mode 100644 index 0000000..7c5b100 --- /dev/null +++ b/lib/nncase/v1/CMakeLists.txt @@ -0,0 +1,6 @@ + +set(nncaseruntime_DIR ${CMAKE_CURRENT_LIST_DIR}/lib/cmake/nncaseruntime) +find_package(nncaseruntime REQUIRED) + +add_library(nncase-v1 STATIC nncase_v1.cpp) +target_link_libraries(nncase-v1 PRIVATE -Wl,-start-group nncaseruntime nncase_rt_modules_k210 -Wl,-end-group) \ No newline at end of file diff --git a/lib/nncase/v1/include/nncase/kernels/convolution.h b/lib/nncase/v1/include/nncase/kernels/convolution.h new file mode 100644 index 0000000..3617e07 --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/convolution.h @@ -0,0 +1,28 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include +#include +#include + +BEGIN_NS_NNCASE_KERNELS + +NNCASE_API result conv2d(const float *input, const float *weights, const float *bias, float *output, + const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &w_shape, const runtime_shape_t &w_strides, + const runtime_shape_t &bias_strides, const runtime_shape_t &out_strides, const padding &padding_h, const padding &padding_w, + int32_t groups, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w, value_range fused_activation, kernel_context &context = default_kernel_context) noexcept; + +END_NS_NNCASE_KERNELS diff --git a/lib/nncase/v1/include/nncase/kernels/cpu/optimized/convolution.h b/lib/nncase/v1/include/nncase/kernels/cpu/optimized/convolution.h new file mode 100644 index 0000000..5a7a972 --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/cpu/optimized/convolution.h @@ -0,0 +1,28 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "runtime_types.h" +#include +#include +#include + +BEGIN_NS_NNCASE_KERNELS_CPU_OPT + +result conv2d(const float *input, const float *weights, const float *bias, float *output, + const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &w_shape, const runtime_shape_t &w_strides, + const runtime_shape_t &bias_strides, const runtime_shape_t &out_strides, const padding &padding_h, const padding &padding_w, + int32_t groups, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w, value_range fused_activation, kernel_context &context) noexcept; + +END_NS_NNCASE_KERNELS_CPU_OPT \ No newline at end of file diff --git a/lib/nncase/v1/include/nncase/kernels/cpu/optimized/runtime_types.h b/lib/nncase/v1/include/nncase/kernels/cpu/optimized/runtime_types.h new file mode 100644 index 0000000..b0fe0f7 --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/cpu/optimized/runtime_types.h @@ -0,0 +1,54 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include +#include +#include +#include + +#define BEGIN_NS_NNCASE_KERNELS_CPU_OPT \ + namespace nncase \ + { \ + namespace kernels \ + { \ + namespace cpu \ + { \ + namespace optimized \ + { + +#define END_NS_NNCASE_KERNELS_CPU_OPT \ + } \ + } \ + } \ + } + +#define TYPE_IMPL_SELECT(type, IMPL) \ + switch (runtime::get_bytes(type)) \ + { \ + IMPL(1, uint8_t); \ + IMPL(2, uint16_t); \ + IMPL(4, uint32_t); \ + IMPL(8, uint64_t); \ + default: \ + return err(std::errc::not_supported); \ + } + +enum copy_impl_select +{ + all_contiguous, + src_contiguous, + dest_contiguous +}; diff --git a/lib/nncase/v1/include/nncase/kernels/cpu/optimized/tensor_compute.h b/lib/nncase/v1/include/nncase/kernels/cpu/optimized/tensor_compute.h new file mode 100644 index 0000000..4a0b07b --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/cpu/optimized/tensor_compute.h @@ -0,0 +1,33 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "runtime_types.h" +#include + +BEGIN_NS_NNCASE_KERNELS_CPU_OPT + +NNCASE_API result concat(datatype_t type, gsl::span inputs, gsl::byte *output, const runtime_shape_t &out_shape, + gsl::span in_strides, const runtime_shape_t &out_strides, size_t axis, const runtime_shape_t &concat_dims, + kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result slice(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, const runtime_shape_t &begins, const runtime_shape_t &ends, const runtime_axis_t &strides, + kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result copy(datatype_t type, const gsl::byte *src, gsl::byte *dest, + const runtime_shape_t &shape, const runtime_shape_t &src_strides, const runtime_shape_t &dest_strides, + int dims_offset, copy_impl_select impl_select, kernel_context &context = default_kernel_context) noexcept; + +END_NS_NNCASE_KERNELS_CPU_OPT diff --git a/lib/nncase/v1/include/nncase/kernels/cpu/reference/convolution.h b/lib/nncase/v1/include/nncase/kernels/cpu/reference/convolution.h new file mode 100644 index 0000000..95295a1 --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/cpu/reference/convolution.h @@ -0,0 +1,26 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include "runtime_types.h" + +BEGIN_NS_NNCASE_KERNELS_CPU_REF + +NNCASE_API result conv2d(const float *input, const float *weights, const float *bias, float *output, + const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &w_shape, const runtime_shape_t &w_strides, + const runtime_shape_t &bias_strides, const runtime_shape_t &out_strides, const padding &padding_h, const padding &padding_w, + int32_t groups, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w, value_range fused_activation, kernel_context &context = default_kernel_context) noexcept; + +END_NS_NNCASE_KERNELS_CPU_REF diff --git a/lib/nncase/v1/include/nncase/kernels/cpu/reference/nnil.h b/lib/nncase/v1/include/nncase/kernels/cpu/reference/nnil.h new file mode 100644 index 0000000..29e924e --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/cpu/reference/nnil.h @@ -0,0 +1,23 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "runtime_types.h" +#include + +BEGIN_NS_NNCASE_KERNELS_CPU_REF + +NNCASE_API result nnil_unary_method(const float *input, float *output, size_t count, gsl::span body, kernel_context &context = default_kernel_context) noexcept; + +END_NS_NNCASE_KERNELS_CPU_REF diff --git a/lib/nncase/v1/include/nncase/kernels/cpu/reference/reduce_window.h b/lib/nncase/v1/include/nncase/kernels/cpu/reference/reduce_window.h new file mode 100644 index 0000000..422911a --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/cpu/reference/reduce_window.h @@ -0,0 +1,25 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "runtime_types.h" +#include + +BEGIN_NS_NNCASE_KERNELS_CPU_REF + +NNCASE_API result reduce_window2d(reduce_op_t op, const float *input, float init_value, float *output, const runtime_shape_t &in_shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, const padding &padding_h, const padding &padding_w, + int32_t filter_h, int32_t filter_w, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w, value_range fused_activation, kernel_context &context = default_kernel_context) noexcept; + +END_NS_NNCASE_KERNELS_CPU_REF diff --git a/lib/nncase/v1/include/nncase/kernels/cpu/reference/runtime_types.h b/lib/nncase/v1/include/nncase/kernels/cpu/reference/runtime_types.h new file mode 100644 index 0000000..f12a0b9 --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/cpu/reference/runtime_types.h @@ -0,0 +1,72 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include +#include + +#define BEGIN_NS_NNCASE_KERNELS_CPU_REF \ + namespace nncase \ + { \ + namespace kernels \ + { \ + namespace cpu \ + { \ + namespace reference \ + { + +#define END_NS_NNCASE_KERNELS_CPU_REF \ + } \ + } \ + } \ + } + +BEGIN_NS_NNCASE_KERNELS_CPU_REF + +namespace detail +{ +template +result apply_impl(Callable &&callable, runtime_shape_t index_prefix, runtime_shape_t::const_iterator index_begin, runtime_shape_t::const_iterator index_end) noexcept +{ + const auto head = *index_begin++; + index_prefix.push_back(0); + if (index_begin == index_end) + { + for (size_t i = 0; i < head; i++) + { + index_prefix.back() = i; + try_(callable(index_prefix)); + } + } + else + { + for (size_t i = 0; i < head; i++) + { + index_prefix.back() = i; + try_(apply_impl(std::forward(callable), index_prefix, index_begin, index_end)); + } + } + + return ok(); +} +} + +template +result apply(const runtime_shape_t &shape, Callable &&callable) noexcept +{ + return detail::apply_impl(std::forward(callable), runtime_shape_t(), shape.cbegin(), shape.cend()); +} + +END_NS_NNCASE_KERNELS_CPU_REF diff --git a/lib/nncase/v1/include/nncase/kernels/cpu/reference/tensor_compute.h b/lib/nncase/v1/include/nncase/kernels/cpu/reference/tensor_compute.h new file mode 100644 index 0000000..261b9cf --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/cpu/reference/tensor_compute.h @@ -0,0 +1,70 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include "runtime_types.h" + +BEGIN_NS_NNCASE_KERNELS_CPU_REF + +NNCASE_API result batch_to_space(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape, + const runtime_shape_t &block_shape, const runtime_paddings_t &crops, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, + kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result broadcast(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_shape, const runtime_shape_t &out_strides, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result concat(datatype_t type, gsl::span inputs, gsl::byte *output, const runtime_shape_t &out_shape, + gsl::span in_strides, const runtime_shape_t &out_strides, size_t axis, const runtime_shape_t &concat_dims, + kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result convert(datatype_t in_type, datatype_t out_type, const gsl::byte *input, gsl::byte *output, + const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result copy(datatype_t type, const gsl::byte *src, gsl::byte *dest, + const runtime_shape_t &shape, const runtime_shape_t &src_strides, const runtime_shape_t &dest_strides, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result transpose(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape, + const runtime_shape_t &perm, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result binary(binary_op_t op, const float *input_a, const float *input_b, float *output, + const runtime_shape_t &in_a_shape, const runtime_shape_t &in_a_strides, const runtime_shape_t &in_b_shape, + const runtime_shape_t &in_b_strides, const runtime_shape_t &out_strides, value_range fused_activation, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result dequantize(datatype_t in_type, datatype_t out_type, const gsl::byte *input, gsl::byte *output, + const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, float scale, float bias, + kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result lut1d(datatype_t type, const gsl::byte *input, const gsl::byte *table, gsl::byte *output, const runtime_shape_t &shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, const scalar &min, const scalar &max) noexcept; + +NNCASE_API result pad(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, const runtime_paddings_t &paddings, pad_mode_t mode, const scalar &pad_value, + kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result quantize(datatype_t in_type, datatype_t out_type, const gsl::byte *input, gsl::byte *output, + const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, float scale, float bias, + kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result unary(unary_op_t op, const float *input, float *output, const runtime_shape_t &shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result reduce(reduce_op_t op, float init_value, const float *input, float *output, const runtime_shape_t &in_shape, const runtime_shape_t &axis, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, bool keep_dims, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result slice(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, const runtime_shape_t &begins, const runtime_shape_t &ends, const runtime_axis_t &strides, + kernel_context &context = default_kernel_context) noexcept; + +END_NS_NNCASE_KERNELS_CPU_REF diff --git a/lib/nncase/v1/include/nncase/kernels/k210/k210_kernels.h b/lib/nncase/v1/include/nncase/kernels/k210/k210_kernels.h new file mode 100644 index 0000000..cafccd4 --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/k210/k210_kernels.h @@ -0,0 +1,321 @@ +/* Copyright 2019-2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include +#include +#include +#include +#include + +BEGIN_NS_NNCASE_KERNELS_K210 + +namespace detail +{ +template +struct pool_partial_type; + +template <> +struct pool_partial_type +{ + using type = uint32_t; +}; + +template <> +struct pool_partial_type +{ + using type = float; +}; + +template +using pool_partial_type_t = typename pool_partial_type::type; +} + +result kpu_upload(const uint8_t *src, uint8_t *dest, const runtime::k210::kpu_shape_t &in_shape, uint32_t dma_ch); + +inline result kpu_download(const uint8_t *src, uint8_t *dest, const runtime::k210::kpu_shape_t &in_shape) +{ + using namespace runtime::k210; + + if (in_shape[3] % 64 == 0) + { + std::copy(src, src + kernels::detail::compute_size(in_shape), dest); + } + else + { + auto layout = get_kpu_row_layout(in_shape[3]); + auto fmap_size = get_kpu_bytes(in_shape[3], in_shape[2], in_shape[1]); + + for (uint32_t batch = 0; batch < in_shape[0]; batch++) + { + auto batch_origin = src + (size_t)batch * fmap_size; + for (uint32_t oc = 0; oc < in_shape[1]; oc++) + { + auto channel_origin = batch_origin + (size_t)oc / layout.groups * layout.row_len * in_shape[2] * 64 + (size_t)oc % layout.groups * layout.row_pitch; + for (uint32_t y = 0; y < in_shape[2]; y++) + { + auto y_origin = channel_origin + (size_t)y * layout.row_len * 64; + for (uint32_t x = 0; x < in_shape[3]; x++) + *dest++ = y_origin[x]; + } + } + } + } + + return ok(); +} + +template +void kpu_conv2d(const uint8_t *input, int64_t *workspace, uint8_t *output, const uint8_t *weights, int32_t in_h, int32_t in_w, int32_t in_channels, int32_t out_channels, uint8_t pad_value, int32_t arg_x, + int32_t shift_x, int32_t arg_w, int32_t shift_w, int64_t arg_add, const runtime::k210::kpu_batchnorm_segment *batchnorm, const runtime::k210::kpu_activation_table_t &activation) +{ + const auto channel_size = size_t(in_h) * in_w; + // conv + { + auto out_it = workspace; + const auto pad = FilterSize == 1 ? 0 : 1; + const auto groups = IsDepthwise ? out_channels : 1; + const auto g_ic = IsDepthwise ? 1 : in_channels / groups; + const auto g_oc = IsDepthwise ? 1 : out_channels; + + for (int32_t og = 0; og < groups; og++) + { + const uint8_t *w_group_p = weights + (size_t)og * g_oc * g_ic * FilterSize * FilterSize; + + for (int32_t oc = 0; oc < g_oc; oc++) + { + const uint8_t *w_oc_p = w_group_p + (size_t)oc * g_ic * FilterSize * FilterSize; + + for (int32_t oy = 0; oy < in_h; oy++) + { + for (int32_t ox = 0; ox < in_w; ox++) + { + const int32_t in_y_origin = oy - pad; + const int32_t in_x_origin = ox - pad; + int64_t value = 0; + int64_t sum_x = 0, sum_w = 0; + + for (int32_t ic = 0; ic < g_ic; ic++) + { + const uint8_t *in_c_p = input + ((size_t)og * g_ic + ic) * in_h * in_w; + const uint8_t *w_ic_p = w_oc_p + (size_t)ic * FilterSize * FilterSize; + + for (int32_t ky = 0; ky < FilterSize; ky++) + { + for (int32_t kx = 0; kx < FilterSize; kx++) + { + const int32_t in_y = in_y_origin + ky; + const int32_t in_x = in_x_origin + kx; + + uint8_t x; + if (in_x < 0 || in_x >= in_w + || in_y < 0 || in_y >= in_h) + x = pad_value; + else + x = in_c_p[in_y * in_w + in_x]; + + uint8_t w = w_ic_p[ky * FilterSize + kx]; + + sum_x += x; + sum_w += w; + value += (int32_t)x * w; + } + } + } + + *out_it++ = value + (arg_x * sum_x >> shift_x) + (arg_w * sum_w >> shift_w) + arg_add * g_ic; + } + } + } + } + } + + // bn act + { + auto src_it = workspace; + auto out_it = output; + for (int32_t oc = 0; oc < out_channels; oc++) + { + const auto &bn = batchnorm[oc]; + for (size_t i = 0; i < channel_size; i++) + { + auto value = (*src_it++ * bn.mul >> bn.shift) + bn.add; + auto &seg = *std::find_if(activation.rbegin(), activation.rend(), [value](const runtime::k210::kpu_activation_segment &seg) { + return value > seg.start_x; + }); + auto act_value = runtime::carry_shift((value - seg.start_x) * seg.mul, seg.shift) + seg.add; + *out_it++ = (uint8_t)kernels::detail::clamp(act_value, int64_t(0), int64_t(255)); + } + } + } +} + +template +inline void kpu_pool2d(const T *input, T *output, int32_t in_h, int32_t in_w, int32_t in_channels, runtime::k210::kpu_pool_type_t pool_type) +{ + using namespace runtime::k210; + using partial_t = detail::pool_partial_type_t; + + const auto filter = get_kpu_filter_size(pool_type); + const auto stride = get_kpu_filter_stride(pool_type); + const auto out_h = get_kpu_pool_output_size(in_h, pool_type); + const auto out_w = get_kpu_pool_output_size(in_w, pool_type); + + for (int32_t oc = 0; oc < in_channels; oc++) + { + auto in_c_p = input + (size_t)oc * in_h * in_w; + + for (int32_t oy = 0; oy < out_h; oy++) + { + for (int32_t ox = 0; ox < out_w; ox++) + { + const int32_t in_y_origin = oy * stride; + const int32_t in_x_origin = ox * stride; + partial_t value = 0; + + switch (pool_type) + { + case kpu_pool_bypass: + { + const int32_t in_y = in_y_origin; + const int32_t in_x = in_x_origin; + + value = in_c_p[in_y * in_w + in_x]; + break; + } + case kpu_pool_max_2_s2: + case kpu_pool_max_2_s1: + case kpu_pool_max_4_s4: + { + value = std::numeric_limits::lowest(); + for (int32_t ky = 0; ky < filter; ky++) + { + for (int32_t kx = 0; kx < filter; kx++) + { + const int32_t in_y = in_y_origin + ky; + const int32_t in_x = in_x_origin + kx; + partial_t in_v; + + if (in_y < 0 || in_y >= in_h || in_x < 0 || in_x >= in_w) + in_v = std::numeric_limits::lowest(); + else + in_v = in_c_p[in_y * in_w + in_x]; + + value = std::max(value, in_v); + } + } + + break; + } + case kpu_pool_mean_2_s2: + case kpu_pool_mean_2_s1: + case kpu_pool_mean_4_s4: + { + for (int32_t ky = 0; ky < filter; ky++) + { + for (int32_t kx = 0; kx < filter; kx++) + { + const int32_t in_y = kernels::detail::clamp(in_y_origin + ky, 0, in_h - 1); + const int32_t in_x = kernels::detail::clamp(in_x_origin + kx, 0, in_w - 1); + const T in_v = in_c_p[in_y * in_w + in_x]; + + value += in_v; + } + } + + value /= filter * filter; + break; + } + case kpu_pool_left_top_2_s2: + case kpu_pool_left_top_4_s4: + case kpu_pool_right_top_2_s2: + { + auto k_off = get_kpu_select_pool_offset(pool_type); + const int32_t in_y = in_y_origin + k_off[0]; + const int32_t in_x = in_x_origin + k_off[1]; + partial_t in_v; + + if (in_y < 0 || in_y >= in_h || in_x < 0 || in_x >= in_w) + in_v = 0; + else + in_v = in_c_p[in_y * in_w + in_x]; + + value = in_v; + break; + } + } + + *output++ = (T)value; + } + } + } +} + +template +void fake_kpu_conv2d(const float *input, float *output, const float *weights, const float *bias, int32_t in_h, int32_t in_w, int32_t in_channels, int32_t out_channels, value_range fused_activation) +{ + const auto pad = FilterSize == 1 ? 0 : 1; + const auto groups = IsDepthwise ? out_channels : 1; + const auto g_ic = IsDepthwise ? 1 : in_channels / groups; + const auto g_oc = IsDepthwise ? 1 : out_channels; + + for (int32_t og = 0; og < groups; og++) + { + const auto *w_group_p = weights + (size_t)og * g_oc * g_ic * FilterSize * FilterSize; + + for (int32_t oc = 0; oc < g_oc; oc++) + { + const auto *w_oc_p = w_group_p + (size_t)oc * g_ic * FilterSize * FilterSize; + + for (int32_t oy = 0; oy < in_h; oy++) + { + for (int32_t ox = 0; ox < in_w; ox++) + { + const int32_t in_y_origin = oy - pad; + const int32_t in_x_origin = ox - pad; + const int32_t filter_y_start = std::max(0, -in_y_origin); + const int32_t filter_y_end = std::min(FilterSize, in_h - in_y_origin); + const int32_t filter_x_start = std::max(0, -in_x_origin); + const int32_t filter_x_end = std::min(FilterSize, in_w - in_x_origin); + float value = bias[og * g_oc + oc]; + + for (int32_t ic = 0; ic < g_ic; ic++) + { + const auto *in_c_p = input + ((size_t)og * g_ic + ic) * in_h * in_w; + const auto *w_ic_p = w_oc_p + (size_t)ic * FilterSize * FilterSize; + + for (int32_t ky = filter_y_start; ky < filter_y_end; ky++) + { + for (int32_t kx = filter_x_start; kx < filter_x_end; kx++) + { + const int32_t in_y = in_y_origin + ky; + const int32_t in_x = in_x_origin + kx; + + const auto in_v = in_c_p[in_y * in_w + in_x]; + const auto w = w_ic_p[ky * FilterSize + kx]; + + value += in_v * w; + } + } + } + + *output++ = kernels::detail::apply_activation(value, fused_activation); + } + } + } + } +} + +END_NS_NNCASE_KERNELS_K210 diff --git a/lib/nncase/v1/include/nncase/kernels/kernel_context.h b/lib/nncase/v1/include/nncase/kernels/kernel_context.h new file mode 100644 index 0000000..1f2b34e --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/kernel_context.h @@ -0,0 +1,27 @@ +/* Copyright 2019-2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include + +BEGIN_NS_NNCASE_KERNELS + +struct NNCASE_API kernel_context +{ + +}; + +NNCASE_UNUSED static kernel_context default_kernel_context; + +END_NS_NNCASE_KERNELS \ No newline at end of file diff --git a/lib/nncase/v1/include/nncase/kernels/kernel_utils.h b/lib/nncase/v1/include/nncase/kernels/kernel_utils.h new file mode 100644 index 0000000..4414ebb --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/kernel_utils.h @@ -0,0 +1,240 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include +#include +#include +#include + +#ifdef __GNUC__ +#define CXX_RESTRICT __restrict__ +#elif _MSC_VER +#define CXX_RESTRICT __restrict +#else +#define CXX_RESTRICT +#endif + +BEGIN_NS_NNCASE_KERNELS + +template +inline offset_type element_offset(const S &strides, It first, It last) noexcept +{ + using difference_type = typename std::iterator_traits::difference_type; + auto size = static_cast((std::min)(static_cast(std::distance(first, last)), strides.size())); + return std::inner_product(last - size, last, strides.cend() - size, offset_type(0)); +} + +template +size_t offset(const TShape &strides, const TShape &index) +{ + assert(strides.size() == index.size()); + return element_offset(strides, index.begin(), index.end()); +} + +template +TShape reshape_linear_index(const TShape &new_shape, size_t index) +{ + TShape new_index(new_shape.size()); + size_t i = new_shape.size() - 1; + for (auto it = new_shape.rbegin(); it != new_shape.rend(); ++it) + { + new_index[i--] = index % *it; + index /= *it; + } + + return new_index; +} + +template +size_t linear_index(const TShape &shape, const TShape &index) +{ + assert(index.size() == shape.size()); + size_t new_index = index[0]; + for (size_t i = 1; i < shape.size(); i++) + new_index = new_index * shape[i] + index[i]; + return new_index; +} + +namespace detail +{ +inline size_t get_windowed_output_size(size_t size, int32_t filter, int32_t stride, int32_t dilation, const padding &padding) +{ + auto effective_filter_size = (filter - 1) * dilation + 1; + return (size_t)((int32_t)size + padding.before + padding.after - effective_filter_size + stride) / stride; +} + +inline runtime_shape_t get_binary_output_shape(const runtime_shape_t &input_a_shape, const runtime_shape_t &input_b_shape) +{ + runtime_shape_t out_shape; + + const auto dest_dims = (int32_t)std::max(input_a_shape.size(), input_b_shape.size()); + const auto in_a_ext = dest_dims - (int32_t)input_a_shape.size(); + const auto in_b_ext = dest_dims - (int32_t)input_b_shape.size(); + + for (int32_t i = 0; i < dest_dims; i++) + { + const auto in_a_dim = i - (int32_t)in_a_ext; + const auto in_b_dim = i - (int32_t)in_b_ext; + + const auto in_a = in_a_dim < 0 ? 1 : input_a_shape[in_a_dim]; + const auto in_b = in_b_dim < 0 ? 1 : input_b_shape[in_b_dim]; + if (in_a == in_b) + out_shape.push_back(in_a); + else if (in_a == 1) + out_shape.push_back(in_b); + else if (in_b == 1) + out_shape.push_back(in_a); + else + assert(!"inputs are not compatible to broadcast"); + } + + return out_shape; +} + +template +size_t compute_size(const TShape &shape) +{ + return std::accumulate(shape.begin(), shape.end(), size_t(1), std::multiplies()); +} + +template +inline T clamp(T value, T min, T max) +{ + return std::max(std::min(value, max), min); +} + +template +inline T apply_activation(T value, value_range activation) +{ + return clamp(value, activation.min, activation.max); +} + +template +TShape get_reduced_offset(const TShape &in_offset, const TShape &reduced_shape) +{ + TShape off(reduced_shape.size()); + const auto dims_ext = in_offset.size() - reduced_shape.size(); + for (size_t i = 0; i < reduced_shape.size(); i++) + { + if (in_offset[i + dims_ext] >= reduced_shape[i]) + off[i] = 0; + else + off[i] = in_offset[i + dims_ext]; + } + + return off; +} + +template +TShape get_reduced_shape(const TShape &in_shape, const TShape &axis, bool keep_dims) +{ + TShape shape; + shape.reserve(in_shape.size() - (keep_dims ? 0 : axis.size())); + for (size_t i = 0; i < in_shape.size(); i++) + { + if (std::find(axis.begin(), axis.end(), i) == axis.end()) + { + shape.push_back(in_shape[i]); + } + else + { + if (keep_dims) + shape.push_back(1); + } + } + + if (shape.empty()) + shape.push_back(1); + return shape; +} + +template +size_t get_reduce_block_size(const TShape &in_shape, const TShape &axis) +{ + size_t size = 1; + for (size_t i = 0; i < in_shape.size(); i++) + { + if (std::find(axis.begin(), axis.end(), i) != axis.end()) + { + size *= in_shape[i]; + } + } + + return size; +} + +template +TShape get_reduced_offset(const TShape &in_offset, const TShape &axis, bool keep_dims) +{ + TShape off; + off.reserve(in_offset.size() - (keep_dims ? 0 : axis.size())); + for (size_t i = 0; i < in_offset.size(); i++) + { + if (std::find(axis.begin(), axis.end(), i) == axis.end()) + { + off.push_back(in_offset[i]); + } + else + { + if (keep_dims) + off.push_back(0); + } + } + + if (off.empty()) + off.push_back(0); + return off; +} + +template +struct default_ptr_getter +{ + T *operator()(const TRange &range) const noexcept { return range; } +}; + +template +int32_t to_signed(uint32_t value) +{ + auto mask = uint32_t(1) << (Bits - 1); + if (Bits != 32 && (value & mask) != 0) + { + auto sign = 0xFFFFFFFF << Bits; + return (int)(value | sign); + } + + return (int32_t)value; +} + +template +int64_t to_signed(uint64_t value) +{ + auto mask = uint64_t(1) << (Bits - 1); + if ((value & mask) != 0) + { + auto sign = 0xFFFFFFFFFFFFFFFF << Bits; + return (int64_t)(value | sign); + } + + return (int64_t)value; +} + +template +constexpr T quantize(float value, const quant_param_t ¶m) noexcept +{ + return (T)clamp((int32_t)lrintf(value / param.scale + param.zero_point), (int32_t)std::numeric_limits::lowest(), (int32_t)std::numeric_limits::max()); +} +} +END_NS_NNCASE_KERNELS diff --git a/lib/nncase/v1/include/nncase/kernels/neutral/neutral_kernels.h b/lib/nncase/v1/include/nncase/kernels/neutral/neutral_kernels.h new file mode 100644 index 0000000..05b0bfc --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/neutral/neutral_kernels.h @@ -0,0 +1,795 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "../kernel_utils.h" +#include +#include +#include +#include +#ifdef __riscv +#include "../riscv/neutral_kernels.h" +#endif + +namespace nncase::kernels::neutral +{ +template +void binary(const float *input_a, const float *input_b, float *output, const TShape &in_a_shape, + const TShape &in_b_shape, const TShape &out_shape, const value_range &fused_activation, TOp &&op) +{ + // opt. no broadcast + if (in_a_shape == in_b_shape) + { + auto size = kernels::detail::compute_size(in_a_shape); + for (size_t i = 0; i < size; i++) + { + const auto a = input_a[i]; + const auto b = input_b[i]; + output[i] = kernels::detail::apply_activation(op(a, b), fused_activation); + } + } + // fallback + else + { + for (size_t d0 = 0; d0 < out_shape[0]; d0++) + { + for (size_t d1 = 0; d1 < out_shape[1]; d1++) + { + for (size_t d2 = 0; d2 < out_shape[2]; d2++) + { + for (size_t d3 = 0; d3 < out_shape[3]; d3++) + { + TShape in_off = { d0, d1, d2, d3 }; + const auto in_a_off = kernels::detail::get_reduced_offset(in_off, in_a_shape); + const auto in_b_off = kernels::detail::get_reduced_offset(in_off, in_b_shape); + const auto a = input_a[offset(in_a_shape, in_a_off)]; + const auto b = input_b[offset(in_b_shape, in_b_off)]; + output[offset(out_shape, in_off)] = kernels::detail::apply_activation(op(a, b), fused_activation); + } + } + } + } + } +} + +template +void quantized_binary(const uint8_t *input_a, const uint8_t *input_b, uint8_t *output, const TShape &in_a_shape, + const TShape &in_b_shape, const TShape &out_shape, int32_t input_a_offset, int32_t input_a_mul, int32_t input_a_shift, + int32_t input_b_offset, int32_t input_b_mul, int32_t input_b_shift, int32_t output_mul, int32_t output_shift, int32_t output_offset, TOp &&op) +{ + // opt. no broadcast + if (in_a_shape == in_b_shape) + { + auto size = kernels::detail::compute_size(in_a_shape); + for (size_t i = 0; i < size; i++) + { + auto a = (int32_t)input_a[i]; + auto b = (int32_t)input_b[i]; + a = runtime::mul_and_carry_shift(a + input_a_offset, input_a_mul, input_a_shift); + b = runtime::mul_and_carry_shift(b + input_b_offset, input_b_mul, input_b_shift); + + auto output_val = runtime::mul_and_carry_shift(op(a, b), output_mul, output_shift); + output[i] = (uint8_t)std::clamp(output_val + output_offset, 0, 255); + } + } + // fallback + else + { + for (int32_t d0 = 0; d0 < out_shape[0]; d0++) + { + for (int32_t d1 = 0; d1 < out_shape[1]; d1++) + { + for (int32_t d2 = 0; d2 < out_shape[2]; d2++) + { + for (int32_t d3 = 0; d3 < out_shape[3]; d3++) + { + TShape in_off = { d0, d1, d2, d3 }; + const auto in_a_off = kernels::detail::get_reduced_offset(in_off, in_a_shape); + const auto in_b_off = kernels::detail::get_reduced_offset(in_off, in_b_shape); + auto a = (int32_t)input_a[offset(in_a_shape, in_a_off)]; + auto b = (int32_t)input_b[offset(in_b_shape, in_b_off)]; + a = runtime::mul_and_carry_shift(a + input_a_offset, input_a_mul, input_a_shift); + b = runtime::mul_and_carry_shift(b + input_b_offset, input_b_mul, input_b_shift); + + auto output_val = runtime::mul_and_carry_shift(op(a, b), output_mul, output_shift); + output[offset(out_shape, in_off)] = (uint8_t)std::clamp(output_val + output_offset, 0, 255); + } + } + } + } + } +} + +template > +inline void concat(xtl::span inputs, uint8_t *output, xtl::span concat_dims, size_t inner_size, size_t outer_size, TPtrGetter getter = {}) +{ + for (size_t oc = 0; oc < outer_size; oc++) + { + for (size_t i = 0; i < inputs.size(); i++) + { + auto size = inner_size * concat_dims[i]; + auto src = getter(inputs[i]) + oc * size; + std::copy(src, src + size, output); + output += size; + } + } +} + +template +void conv2d(const float *input, float *output, const float *weights, const float *bias, const TShape &in_shape, + int32_t groups, int32_t out_channels, int32_t filter_h, int32_t filter_w, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w, + const padding &padding_h, const padding &padding_w, const value_range &fused_activation) +{ + const auto out_h = detail::get_windowed_output_size(in_shape[2], filter_h, stride_h, dilation_h, padding_h); + const auto out_w = detail::get_windowed_output_size(in_shape[3], filter_w, stride_w, dilation_w, padding_w); + const auto g_ic = in_shape[1] / groups; + const auto g_oc = (size_t)out_channels / groups; + + for (size_t batch = 0; batch < in_shape[0]; batch++) + { + const float *in_batch_p = input + (size_t)batch * in_shape[1] * in_shape[2] * in_shape[3]; + + for (size_t og = 0; og < (size_t)groups; og++) + { + const float *in_group_p = in_batch_p + (size_t)og * g_ic * in_shape[2] * in_shape[3]; + const float *w_group_p = weights + (size_t)og * g_oc * g_ic * filter_h * filter_w; + + for (size_t oc = 0; oc < g_oc; oc++) + { + const float *w_oc_p = w_group_p + (size_t)oc * g_ic * filter_h * filter_w; + + for (size_t oy = 0; oy < out_h; oy++) + { + for (size_t ox = 0; ox < out_w; ox++) + { + const int32_t in_y_origin = (oy * stride_h) - padding_h.before; + const int32_t in_x_origin = (ox * stride_w) - padding_w.before; + const size_t filter_y_start = (size_t)std::max(0, (-in_y_origin + dilation_h - 1) / dilation_h); + const size_t filter_y_end = (size_t)std::min(filter_h, ((int32_t)in_shape[2] - in_y_origin + dilation_h - 1) / dilation_h); + const size_t filter_x_start = (size_t)std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w); + const size_t filter_x_end = (size_t)std::min(filter_w, ((int32_t)in_shape[3] - in_x_origin + dilation_w - 1) / dilation_w); + float value = bias[og * g_oc + oc]; + + for (size_t ic = 0; ic < g_ic; ic++) + { + const float *in_c_p = in_group_p + (size_t)ic * in_shape[2] * in_shape[3]; + const float *w_ic_p = w_oc_p + (size_t)ic * filter_h * filter_w; + + for (size_t ky = filter_y_start; ky < filter_y_end; ky++) + { + for (size_t kx = filter_x_start; kx < filter_x_end; kx++) + { + const size_t in_y = in_y_origin + dilation_h * ky; + const size_t in_x = in_x_origin + dilation_w * kx; + + const float in_v = in_c_p[in_y * in_shape[3] + in_x]; + const float w = w_ic_p[ky * filter_w + kx]; + + value += in_v * w; + } + } + } + + *output++ = detail::apply_activation(value, fused_activation); + } + } + } + } + } +} + +template +void quantized_conv2d(const uint8_t *input, uint8_t *output, const uint8_t *weights, const int32_t *bias, int32_t input_offset, int32_t filter_offset, + int32_t output_mul, int32_t output_shift, int32_t output_offset, const TShape &in_shape, int32_t groups, int32_t out_channels, + int32_t filter_h, int32_t filter_w, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w, + const padding &padding_h, const padding &padding_w) +{ + const auto out_h = detail::get_windowed_output_size(in_shape[2], filter_h, stride_h, dilation_h, padding_h); + const auto out_w = detail::get_windowed_output_size(in_shape[3], filter_w, stride_w, dilation_w, padding_w); + const auto g_ic = in_shape[1] / groups; + const auto g_oc = out_channels / groups; + + for (int32_t batch = 0; batch < in_shape[0]; batch++) + { + const uint8_t *in_batch_p = input + (size_t)batch * in_shape[1] * in_shape[2] * in_shape[3]; + + for (int32_t og = 0; og < groups; og++) + { + const uint8_t *in_group_p = in_batch_p + (size_t)og * g_ic * in_shape[2] * in_shape[3]; + const uint8_t *w_group_p = weights + (size_t)og * g_oc * g_ic * filter_h * filter_w; + + for (int32_t oc = 0; oc < g_oc; oc++) + { + const uint8_t *w_oc_p = w_group_p + (size_t)oc * g_ic * filter_h * filter_w; + + for (int32_t oy = 0; oy < out_h; oy++) + { + for (int32_t ox = 0; ox < out_w; ox++) + { + const int32_t in_y_origin = (oy * stride_h) - padding_h.before; + const int32_t in_x_origin = (ox * stride_w) - padding_w.before; + const int32_t filter_y_start = std::max(0, (-in_y_origin + dilation_h - 1) / dilation_h); + const int32_t filter_y_end = std::min(filter_h, (in_shape[2] - in_y_origin + dilation_h - 1) / dilation_h); + const int32_t filter_x_start = std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w); + const int32_t filter_x_end = std::min(filter_w, (in_shape[3] - in_x_origin + dilation_w - 1) / dilation_w); + int32_t value = bias[og * g_oc + oc]; + + for (int32_t ic = 0; ic < g_ic; ic++) + { + const uint8_t *in_c_p = in_group_p + (size_t)ic * in_shape[2] * in_shape[3]; + const uint8_t *w_ic_p = w_oc_p + (size_t)ic * filter_h * filter_w; + + for (int32_t ky = filter_y_start; ky < filter_y_end; ky++) + { + for (int32_t kx = filter_x_start; kx < filter_x_end; kx++) + { + const int32_t in_y = in_y_origin + dilation_h * ky; + const int32_t in_x = in_x_origin + dilation_w * kx; + + const int32_t in_v = (int32_t)in_c_p[in_y * in_shape[3] + in_x] + input_offset; + const int32_t w = (int32_t)w_ic_p[ky * filter_w + kx] + filter_offset; + + value += in_v * w; + } + } + } + + auto output_val = static_cast(runtime::mul_and_carry_shift(value, output_mul, output_shift)); + output_val += output_offset; + *output++ = (uint8_t)std::clamp(output_val, 0, 255); + } + } + } + } + } +} + +template +void conv2d_transpose(const float *input, float *output, const float *weights, [[maybe_unused]] const float *bias, const TShape &in_shape, + int32_t groups, const TShape &out_shape, int32_t filter_h, int32_t filter_w, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w, + const padding &padding_h, const padding &padding_w, const value_range &fused_activation) +{ + std::fill(output, output + kernels::detail::compute_size(out_shape), 0.f); + const auto g_ic = in_shape[1] / groups; + const auto g_oc = out_shape[1] / groups; + + for (size_t batch = 0; batch < in_shape[0]; batch++) + { + float *out_batch_p = output + (size_t)batch * out_shape[1] * out_shape[2] * out_shape[3]; + + for (size_t g = 0; g < (size_t)groups; g++) + { + float *out_group_p = out_batch_p + (size_t)g * g_oc * out_shape[2] * out_shape[3]; + const float *w_group_p = weights + (size_t)g * g_oc * g_ic * filter_h * filter_w; + + for (size_t ic = 0; ic < g_ic; ic++) + { + for (size_t iy = 0; iy < in_shape[2]; iy++) + { + for (size_t ix = 0; ix < in_shape[3]; ix++) + { + const int32_t out_y_origin = (iy * stride_h) - padding_h.before; + const int32_t out_x_origin = (ix * stride_w) - padding_w.before; + const size_t filter_y_start = (size_t)std::max(0, (-out_y_origin + dilation_h - 1) / dilation_h); + const size_t filter_y_end = (size_t)std::min(filter_h, ((int32_t)out_shape[2] - out_y_origin + dilation_h - 1) / dilation_h); + const size_t filter_x_start = (size_t)std::max(0, (-out_x_origin + dilation_w - 1) / dilation_w); + const size_t filter_x_end = (size_t)std::min(filter_w, ((int32_t)out_shape[3] - out_x_origin + dilation_w - 1) / dilation_w); + const float in_v = *input++; + + for (size_t oc = 0; oc < g_oc; oc++) + { + assert(bias[g * g_oc + oc] == 0.f); + float *out_c_p = out_group_p + (size_t)oc * out_shape[2] * out_shape[3]; + const float *w_oc_p = w_group_p + (size_t)oc * g_ic * filter_h * filter_w; + const float *w_ic_p = w_oc_p + (size_t)ic * filter_h * filter_w; + + for (size_t ky = filter_y_start; ky < filter_y_end; ky++) + { + for (size_t kx = filter_x_start; kx < filter_x_end; kx++) + { + const int32_t out_y = out_y_origin + dilation_h * ky; + const int32_t out_x = out_x_origin + dilation_w * kx; + + const float w = w_ic_p[ky * filter_w + kx]; + + out_c_p[out_y * out_shape[3] + out_x] += in_v * w; + } + } + } + } + } + } + } + } + + if (fused_activation != value_range::full()) + { + for (size_t i = 0; i < kernels::detail::compute_size(out_shape); i++) + output[i] = detail::apply_activation(output[i], fused_activation); + } +} + +template +void dequantize(const TQ *CXX_RESTRICT input, float *CXX_RESTRICT output, size_t count, const quant_param_t ¶m) +{ +#if __riscv + riscv_dequantize(input, output, count, param); +#else + for (size_t i = 0; i < count; i++) + { + output[i] = (input[i] - param.zero_point) * param.scale; + } + +#endif +} + +inline void matmul(const float *input_a, const float *input_b, float *output, const float *bias, int32_t a_rows, int32_t a_cols, int32_t b_cols, const value_range &fused_activation) +{ + for (int32_t oy = 0; oy < a_rows; oy++) + { + for (int32_t ox = 0; ox < b_cols; ox++) + { + float value = bias[ox]; + + for (int32_t i = 0; i < a_cols; i++) + { + const auto a = input_a[oy * a_cols + i]; + const auto b = input_b[i * b_cols + ox]; + value += a * b; + } + + output[oy * b_cols + ox] = detail::apply_activation(value, fused_activation); + } + } +} + +inline void quantized_matmul(const uint8_t *input_a, const uint8_t *input_b, uint8_t *output, const int32_t *bias, int32_t a_rows, int32_t a_cols, int32_t b_cols, int32_t input_a_offset, int32_t input_b_offset, + int32_t output_mul, int32_t output_shift, int32_t output_offset) +{ + for (int32_t oy = 0; oy < a_rows; oy++) + { + for (int32_t ox = 0; ox < b_cols; ox++) + { + int32_t value = bias[ox]; + for (int32_t i = 0; i < a_cols; i++) + { + const auto a = (int32_t)input_a[oy * a_cols + i] + input_a_offset; + const auto b = (int32_t)input_b[i * b_cols + ox] + input_b_offset; + value += a * b; + } + + auto output_val = static_cast(runtime::mul_and_carry_shift(value, output_mul, output_shift)); + output_val += output_offset; + output[oy * b_cols + ox] = (uint8_t)std::clamp(output_val, 0, 255); + } + } +} + +template +void pad(const T *input, T *output, const TShape &in_shape, const TPaddings &paddings, T pad_value) +{ + TShape out_shape = { in_shape[0] + paddings[0].sum(), + in_shape[1] + paddings[1].sum(), + in_shape[2] + paddings[2].sum(), + in_shape[3] + paddings[3].sum() }; + + for (int d0 = 0; d0 < out_shape[0]; d0++) + { + auto d0_origin = -paddings[0].before; + auto in0 = input + ((size_t)d0_origin + d0) * in_shape[1] * in_shape[2] * in_shape[3]; + + for (int d1 = 0; d1 < out_shape[1]; d1++) + { + auto d1_origin = -paddings[1].before; + auto in1 = in0 + ((size_t)d1_origin + d1) * in_shape[2] * in_shape[3]; + + for (int d2 = 0; d2 < out_shape[2]; d2++) + { + auto d2_origin = -paddings[2].before; + auto in2 = in1 + ((size_t)d2_origin + d2) * in_shape[3]; + + for (int d3 = 0; d3 < out_shape[3]; d3++) + { + auto d3_origin = -paddings[3].before; + + if (d0 < paddings[0].before || d0 >= out_shape[0] - paddings[0].after + || d1 < paddings[1].before || d1 >= out_shape[1] - paddings[1].after + || d2 < paddings[2].before || d2 >= out_shape[2] - paddings[2].after + || d3 < paddings[3].before || d3 >= out_shape[3] - paddings[3].after) + *output++ = pad_value; + else + *output++ = in2[d3_origin + d3]; + } + } + } + } +} + +template +void quantize(const float *CXX_RESTRICT input, TQ *CXX_RESTRICT output, size_t count, const quant_param_t ¶m) +{ +#if __riscv + riscv_quantize(input, output, count, param); +#else + for (size_t i = 0; i < count; i++) + { + auto v = (int32_t)std::nearbyintf(input[i] / param.scale + param.zero_point); + output[i] = (TQ)std::clamp(v, (int32_t)std::numeric_limits::lowest(), (int32_t)std::numeric_limits::max()); + } +#endif +} + +template +void reduce(const float *input, float *output, float init_value, const TShape &in_shape, const TShape &reduced_shape, TReducer &&reducer) +{ + std::fill(output, output + kernels::detail::compute_size(reduced_shape), init_value); + + for (size_t d0 = 0; d0 < in_shape[0]; d0++) + { + for (size_t d1 = 0; d1 < in_shape[1]; d1++) + { + for (size_t d2 = 0; d2 < in_shape[2]; d2++) + { + for (size_t d3 = 0; d3 < in_shape[3]; d3++) + { + runtime_shape_t in_off = { d0, d1, d2, d3 }; + auto out_off = kernels::detail::get_reduced_offset(in_off, reduced_shape); + const auto a = input[offset(in_shape, in_off)]; + auto &b = output[offset(reduced_shape, out_off)]; + b = reducer(b, a); + } + } + } + } +} + +template +void unary(const float *CXX_RESTRICT input, float *CXX_RESTRICT output, size_t count, TOp &&op) +{ + for (size_t i = 0; i < count; i++) + output[i] = op(input[i]); +} + +template +void reduce_window2d(const float *input, float *output, float init_value, const TShape &in_shape, int32_t filter_h, int32_t filter_w, + int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w, const padding &padding_h, const padding &padding_w, + const value_range &fused_activation, TBinaryOp &&binary_op, TOutputOp &&window_op) +{ + const auto out_h = kernels::detail::get_windowed_output_size(in_shape[2], filter_h, stride_h, dilation_h, padding_h); + const auto out_w = kernels::detail::get_windowed_output_size(in_shape[3], filter_w, stride_w, dilation_w, padding_w); + runtime_shape_t out_shape { in_shape[0], in_shape[1], out_h, out_w }; + + for (size_t batch = 0; batch < in_shape[0]; batch++) + { + for (size_t oc = 0; oc < in_shape[1]; oc++) + { + for (size_t oy = 0; oy < out_h; oy++) + { + for (size_t ox = 0; ox < out_w; ox++) + { + const int32_t in_y_origin = ((int32_t)oy * stride_h) - padding_h.before; + const int32_t in_x_origin = ((int32_t)ox * stride_w) - padding_w.before; + const size_t filter_y_start = (size_t)std::max(0, (-in_y_origin + dilation_h - 1) / dilation_h); + const size_t filter_y_end = (size_t)std::min(filter_h, ((int32_t)in_shape[2] - in_y_origin + dilation_h - 1) / dilation_h); + const size_t filter_x_start = (size_t)std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w); + const size_t filter_x_end = (size_t)std::min(filter_w, ((int32_t)in_shape[3] - in_x_origin + dilation_w - 1) / dilation_w); + float value = init_value; + int32_t kernel_count = 0; + + for (size_t ky = filter_y_start; ky < filter_y_end; ky++) + { + for (size_t kx = filter_x_start; kx < filter_x_end; kx++) + { + const size_t in_y = in_y_origin + dilation_h * ky; + const size_t in_x = in_x_origin + dilation_w * kx; + + const float in_v = input[offset(in_shape, { batch, oc, in_y, in_x })]; + + value = binary_op(value, in_v); + kernel_count++; + } + } + + output[offset(out_shape, { batch, oc, oy, ox })] = kernels::detail::apply_activation(window_op(value, kernel_count), fused_activation); + } + } + } + } +} + +template +void resize_nearest_neighbor(const T *input, T *output, const TShape &in_shape, int32_t out_h, int32_t out_w) +{ + auto height_scale = (float)in_shape[2] / out_h; + auto width_scale = (float)in_shape[3] / out_w; + + for (size_t batch = 0; batch < in_shape[0]; batch++) + { + auto in_batch = input + batch * in_shape[1] * in_shape[2] * in_shape[3]; + + for (size_t oc = 0; oc < in_shape[1]; oc++) + { + auto in_c = in_batch + oc * in_shape[2] * in_shape[3]; + + for (size_t oy = 0; oy < (size_t)out_h; oy++) + { + auto in_y = std::min((size_t)floorf(oy * height_scale), in_shape[2] - 1); + auto in_row = in_c + in_y * in_shape[3]; + + for (size_t ox = 0; ox < (size_t)out_w; ox++) + { + auto in_x = std::min((size_t)floorf(ox * width_scale), in_shape[3] - 1); + *output++ = in_row[in_x]; + } + } + } + } +} + +template +inline void resize_bilinear(const T *input, T *output, const TShape &in_shape, int32_t out_h, int32_t out_w, bool align_corners) +{ + auto height_scale = (float)in_shape[2] / out_h; + auto width_scale = (float)in_shape[3] / out_w; + if (align_corners && out_h > 1) + height_scale = (float)(in_shape[2] - 1) / (out_h - 1); + if (align_corners && out_w > 1) + width_scale = (float)(in_shape[3] - 1) / (out_w - 1); + + auto destIdx = 0; + for (size_t batch = 0; batch < in_shape[0]; batch++) + { + auto in_batch = input + (size_t)batch * in_shape[1] * in_shape[2] * in_shape[3]; + + for (size_t oc = 0; oc < in_shape[1]; oc++) + { + auto in_c = in_batch + (size_t)oc * in_shape[2] * in_shape[3]; + + for (size_t oy = 0; oy < (size_t)out_h; oy++) + { + auto in_y = oy * height_scale; + auto in_y0 = (size_t)floorf(in_y); + auto in_y1 = std::min(in_y0 + 1, in_shape[2] - 1); + + for (size_t ox = 0; ox < (size_t)out_w; ox++) + { + auto in_x = ox * width_scale; + auto in_x0 = (size_t)floorf(in_x); + auto in_x1 = std::min(in_x0 + 1, in_shape[3] - 1); + + auto v0 = in_c[in_y0 * in_shape[3] + in_x0]; + auto v1 = in_c[in_y1 * in_shape[3] + in_x0]; + auto v2 = in_c[in_y0 * in_shape[3] + in_x1]; + auto v3 = in_c[in_y1 * in_shape[3] + in_x1]; + + auto a0 = (1 - (in_y - in_y0)) * (1 - (in_x - in_x0)); + auto a1 = (in_y - in_y0) * (1 - (in_x - in_x0)); + auto a2 = (1 - (in_y - in_y0)) * (in_x - in_x0); + auto a3 = (in_y - in_y0) * (in_x - in_x0); + + output[destIdx++] = T(v0 * a0 + v1 * a1 + v2 * a2 + v3 * a3); + } + } + } + } +} + +inline void softmax(const float *input, float *output, float beta, int32_t outer_size, size_t inner_size) +{ + for (int32_t batch = 0; batch < outer_size; batch++) + { + auto src = input + batch * inner_size; + auto dest = output + batch * inner_size; + + auto max = *std::max_element(src, src + inner_size); + float sum = 0; + + for (size_t i = 0; i < inner_size; i++) + { + auto value = expf((src[i] - max) * beta); + sum += value; + dest[i] = value; + } + + for (size_t i = 0; i < inner_size; i++) + dest[i] /= sum; + } +} + +template +void transpose(const T *CXX_RESTRICT input, T *CXX_RESTRICT output, const TShape &in_shape, const TShape &in_strides, const TShape &out_strides, const TShape &perm) +{ + runtime_shape_t out_shape(in_shape.size()); + for (size_t i = 0; i < in_shape.size(); i++) + out_shape[i] = in_shape[perm[i]]; + + runtime_shape_t i(4), o(4); + for (o[3] = 0; o[3] < out_shape[3]; o[3]++) + { + i[perm[3]] = o[3]; + for (o[2] = 0; o[2] < out_shape[2]; o[2]++) + { + i[perm[2]] = o[2]; + for (o[1] = 0; o[1] < out_shape[1]; o[1]++) + { + i[perm[1]] = o[1]; + for (o[0] = 0; o[0] < out_shape[0]; o[0]++) + { + i[perm[0]] = o[0]; + output[offset(out_strides, o)] = input[offset(in_strides, i)]; + } + } + } + } +} + +template +void strided_slice(const T *CXX_RESTRICT input, T *CXX_RESTRICT output, const TShape &in_shape, const TShape &begin, const TShape &end, const TShape &strides) +{ + auto loop_cond = [](int32_t i, int32_t stop, int32_t stride) { + return stride > 0 ? i < stop : i > stop; + }; + + for (int32_t d0 = begin[0]; loop_cond(d0, end[0], strides[0]); d0 += strides[0]) + { + auto d0_origin = input + (size_t)d0 * in_shape[1] * in_shape[2] * in_shape[3]; + for (int d1 = begin[1]; loop_cond(d1, end[1], strides[1]); d1 += strides[1]) + { + auto d1_origin = d0_origin + (size_t)d1 * in_shape[2] * in_shape[3]; + for (int32_t d2 = begin[2]; loop_cond(d2, end[2], strides[2]); d2 += strides[2]) + { + auto d2_origin = d1_origin + (size_t)d2 * in_shape[3]; + for (int32_t d3 = begin[3]; loop_cond(d3, end[3], strides[3]); d3 += strides[3]) + *output++ = d2_origin[d3]; + } + } + } +} + +inline void nnil_unary_method(const float *input, float *output, size_t count, gsl::span body) +{ + using namespace nncase::runtime; + + for (size_t i = 0; i < count; i++) + { + nnil_evalstack stack; + span_reader sr(body); + nnil_reader reader(sr); + bool ret = false; + + while (reader.avail() && !ret) + { + auto op = reader.next(); + switch (op.opcode) + { + case nnil_nop: + break; + case nnil_dup: + stack.dup(); + break; + case nnil_pop: + stack.pop(); + break; + case nnil_lda_0: + stack.push(input[i]); + break; + case nnil_ldc_r4_0: + stack.push(0.f); + break; + case nnil_ldc_r4_1: + stack.push(1.f); + break; + case nnil_ldc_r4: + stack.push(op.ldc_r4.r4); + break; + case nnil_abs: + stack.push(fabsf(stack.pop())); + break; + case nnil_ceil: + stack.push(ceilf(stack.pop())); + break; + case nnil_cos: + stack.push(cosf(stack.pop())); + break; + case nnil_exp: + stack.push(expf(stack.pop())); + break; + case nnil_floor: + stack.push(floorf(stack.pop())); + break; + case nnil_log: + stack.push(logf(stack.pop())); + break; + case nnil_neg: + stack.push(-stack.pop()); + break; + case nnil_rsqrt: + stack.push(1.f / sqrtf(stack.pop())); + break; + case nnil_sin: + stack.push(sinf(stack.pop())); + break; + case nnil_square: + { + auto v = stack.pop(); + stack.push(v * v); + break; + } + case nnil_add: + { + auto b = stack.pop(); + auto a = stack.pop(); + stack.push(a + b); + break; + } + case nnil_sub: + { + auto b = stack.pop(); + auto a = stack.pop(); + stack.push(a - b); + break; + } + case nnil_mul: + { + auto b = stack.pop(); + auto a = stack.pop(); + stack.push(a * b); + break; + } + case nnil_div: + { + auto b = stack.pop(); + auto a = stack.pop(); + stack.push(a / b); + break; + } + case nnil_min: + { + auto b = stack.pop(); + auto a = stack.pop(); + stack.push(std::min(a, b)); + break; + } + case nnil_max: + { + auto b = stack.pop(); + auto a = stack.pop(); + stack.push(std::max(a, b)); + break; + } + case nnil_clamp: + { + auto high = stack.pop(); + auto low = stack.pop(); + auto v = stack.pop(); + stack.push(std::clamp(v, low, high)); + break; + } + case nnil_ret: + output[i] = stack.pop(); + ret = true; + break; + default: + throw std::runtime_error("Invalid nnil op"); + } + } + } +} + +inline void table_lookup1d(const uint8_t *CXX_RESTRICT input, uint8_t *CXX_RESTRICT output, size_t size, const uint8_t *CXX_RESTRICT table) +{ + for (size_t i = 0; i < size; i++) + output[i] = table[input[i]]; +} +} diff --git a/lib/nncase/v1/include/nncase/kernels/nnil.h b/lib/nncase/v1/include/nncase/kernels/nnil.h new file mode 100644 index 0000000..ccdbc32 --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/nnil.h @@ -0,0 +1,25 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include +#include +#include + +BEGIN_NS_NNCASE_KERNELS + +NNCASE_API result nnil_unary_method(const float *input, float *output, size_t count, gsl::span body, kernel_context &context = default_kernel_context) noexcept; + +END_NS_NNCASE_KERNELS diff --git a/lib/nncase/v1/include/nncase/kernels/reduce_window.h b/lib/nncase/v1/include/nncase/kernels/reduce_window.h new file mode 100644 index 0000000..d224a81 --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/reduce_window.h @@ -0,0 +1,26 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include +#include + +BEGIN_NS_NNCASE_KERNELS + +NNCASE_API result reduce_window2d(reduce_op_t op, const float *input, float init_value, float *output, const runtime_shape_t &in_shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, const padding &padding_h, const padding &padding_w, + int32_t filter_h, int32_t filter_w, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w, value_range fused_activation, kernel_context &context = default_kernel_context) noexcept; + +END_NS_NNCASE_KERNELS diff --git a/lib/nncase/v1/include/nncase/kernels/riscv/neutral_kernels.h b/lib/nncase/v1/include/nncase/kernels/riscv/neutral_kernels.h new file mode 100644 index 0000000..c0e08fb --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/riscv/neutral_kernels.h @@ -0,0 +1,83 @@ +/* Copyright 2019-2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "../kernel_utils.h" +#include +#include +#include + +namespace nncase +{ +namespace kernels +{ + namespace neutral + { + template + void riscv_dequantize(const TQ *CXX_RESTRICT input, float *CXX_RESTRICT output, size_t count, const quant_param_t ¶m) + { + float scale = 1.f / param.scale; + float zero = -param.zero_point * scale; + + for (size_t i = 0; i < count / 2; i++) + { + // handwritten pipeline for in order CPU + auto in1_q = input[i * 2]; + auto in2_q = input[i * 2 + 1]; + auto in1 = (float)in1_q; + auto in2 = (float)in2_q; + auto out1 = in1 * scale + zero; + auto out2 = in2 * scale + zero; + + output[i * 2] = out1; + output[i * 2 + 1] = out2; + } + + if (count % 2) + output[count - 1] = input[count - 1] * scale + zero; + } + + template + void riscv_quantize(const float *CXX_RESTRICT input, TQ *CXX_RESTRICT output, size_t count, const quant_param_t ¶m) + { + float scale = param.scale; + float zero = param.zero_point; + + for (size_t i = 0; i < count / 2; i++) + { + auto in1 = input[i * 2]; + auto in2 = input[i * 2 + 1]; + in1 = in1 * scale + zero; + in2 = in2 * scale + zero; + int32_t out1, out2; + asm volatile("fcvt.w.s %0, %1, rne" + : "=r"(out1) + : "f"(in1)); + asm volatile("fcvt.w.s %0, %1, rne" + : "=r"(out2) + : "f"(in2)); + + output[i * 2] = std::clamp(out1, (int32_t)std::numeric_limits::lowest(), (int32_t)std::numeric_limits::max()); + output[i * 2 + 1] = std::clamp(out2, (int32_t)std::numeric_limits::lowest(), (int32_t)std::numeric_limits::max()); + } + + if (count % 2) + { + auto in = (int32_t)roundf(input[count - 1] * scale + zero); + output[count - 1] = std::clamp(in, (int32_t)std::numeric_limits::lowest(), (int32_t)std::numeric_limits::max()); + } + } + } +} +} diff --git a/lib/nncase/v1/include/nncase/kernels/tensor_compute.h b/lib/nncase/v1/include/nncase/kernels/tensor_compute.h new file mode 100644 index 0000000..e17f961 --- /dev/null +++ b/lib/nncase/v1/include/nncase/kernels/tensor_compute.h @@ -0,0 +1,72 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include +#include +#include + +BEGIN_NS_NNCASE_KERNELS + +NNCASE_API result batch_to_space(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape, + const runtime_shape_t &block_shape, const runtime_paddings_t &crops, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, + kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result broadcast(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_shape, const runtime_shape_t &out_strides, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result concat(datatype_t type, gsl::span inputs, gsl::byte *output, const runtime_shape_t &out_shape, + gsl::span in_strides, const runtime_shape_t &out_strides, size_t axis, const runtime_shape_t &concat_dims, + kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result convert(datatype_t in_type, datatype_t out_type, const gsl::byte *input, gsl::byte *output, + const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result copy(datatype_t type, const gsl::byte *src, gsl::byte *dest, + const runtime_shape_t &shape, const runtime_shape_t &src_strides, const runtime_shape_t &dest_strides, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result transpose(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape, + const runtime_shape_t &perm, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result binary(binary_op_t op, const float *input_a, const float *input_b, float *output, + const runtime_shape_t &in_a_shape, const runtime_shape_t &in_a_strides, const runtime_shape_t &in_b_shape, + const runtime_shape_t &in_b_strides, const runtime_shape_t &out_strides, value_range fused_activation, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result dequantize(datatype_t in_type, datatype_t out_type, const gsl::byte *input, gsl::byte *output, + const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, float scale, float bias, + kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result lut1d(datatype_t type, const gsl::byte *input, const gsl::byte *table, gsl::byte *output, const runtime_shape_t &shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, const scalar &min, const scalar &max) noexcept; + +NNCASE_API result pad(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, const runtime_paddings_t &paddings, pad_mode_t mode, const scalar &pad_value, + kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result quantize(datatype_t in_type, datatype_t out_type, const gsl::byte *input, gsl::byte *output, + const runtime_shape_t &in_shape, const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, float scale, float bias, + kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result unary(unary_op_t op, const float *input, float *output, const runtime_shape_t &shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result reduce(reduce_op_t op, float init_value, const float *input, float *output, const runtime_shape_t &in_shape, const runtime_shape_t &axis, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, bool keep_dims, kernel_context &context = default_kernel_context) noexcept; + +NNCASE_API result slice(datatype_t type, const gsl::byte *input, gsl::byte *output, const runtime_shape_t &in_shape, + const runtime_shape_t &in_strides, const runtime_shape_t &out_strides, const runtime_shape_t &begins, const runtime_shape_t &ends, const runtime_axis_t &strides, + kernel_context &context = default_kernel_context) noexcept; + +END_NS_NNCASE_KERNELS diff --git a/lib/nncase/v1/include/nncase/runtime/aixlog.hpp b/lib/nncase/v1/include/nncase/runtime/aixlog.hpp new file mode 100644 index 0000000..3d0d30c --- /dev/null +++ b/lib/nncase/v1/include/nncase/runtime/aixlog.hpp @@ -0,0 +1,1251 @@ +/*** + __ __ _ _ __ __ ___ + / _\ ( )( \/ )( ) / \ / __) + / \ )( ) ( / (_/\( O )( (_ \ + \_/\_/(__)(_/\_)\____/ \__/ \___/ + version 1.5.0 + https://github.com/badaix/aixlog + + This file is part of aixlog + Copyright (C) 2017-2021 Johannes Pohl + + This software may be modified and distributed under the terms + of the MIT license. See the LICENSE file for details. +***/ + +/// inspired by "eater": +/// https://stackoverflow.com/questions/2638654/redirect-c-stdclog-to-syslog-on-unix + +#ifndef AIX_LOG_HPP +#define AIX_LOG_HPP + +#ifndef _WIN32 +#define HAS_SYSLOG_ 1 +#endif + +#ifdef __APPLE__ +#ifdef __MAC_OS_X_VERSION_MAX_ALLOWED +#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 1012 +#define HAS_APPLE_UNIFIED_LOG_ 1 +#endif +#endif +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __ANDROID__ +#include +#endif + +#ifdef _WIN32 +#include +// ERROR macro is defined in Windows header +// To avoid conflict between these macro and declaration of ERROR / DEBUG in SEVERITY enum +// We save macro and undef it +#pragma push_macro("ERROR") +#pragma push_macro("DEBUG") +#undef ERROR +#undef DEBUG +#endif + +#ifdef HAS_APPLE_UNIFIED_LOG_ +#include +#endif + +#ifdef HAS_SYSLOG_ +#include +#endif + +#ifdef __ANDROID__ +// fix for bug "Android NDK __func__ definition is inconsistent with glibc and C++99" +// https://bugs.chromium.org/p/chromium/issues/detail?id=631489 +#ifdef __GNUC__ +#define AIXLOG_INTERNAL__FUNC __FUNCTION__ +#else +#define AIXLOG_INTERNAL__FUNC __func__ +#endif +#else +#define AIXLOG_INTERNAL__FUNC __func__ +#endif + +/// Internal helper macros (exposed, but shouldn't be used directly) +#define AIXLOG_INTERNAL__LOG_SEVERITY(SEVERITY_) std::clog << static_cast(SEVERITY_) << TAG() +#define AIXLOG_INTERNAL__LOG_SEVERITY_TAG(SEVERITY_, TAG_) std::clog << static_cast(SEVERITY_) << TAG(TAG_) + +#define AIXLOG_INTERNAL__ONE_COLOR(FG_) AixLog::Color::FG_ +#define AIXLOG_INTERNAL__TWO_COLOR(FG_, BG_) AixLog::TextColor(AixLog::Color::FG_, AixLog::Color::BG_) + +// https://stackoverflow.com/questions/3046889/optional-parameters-with-c-macros +#define AIXLOG_INTERNAL__VAR_PARM(PARAM1_, PARAM2_, FUNC_, ...) FUNC_ +#define AIXLOG_INTERNAL__LOG_MACRO_CHOOSER(...) AIXLOG_INTERNAL__VAR_PARM(__VA_ARGS__, AIXLOG_INTERNAL__LOG_SEVERITY_TAG, AIXLOG_INTERNAL__LOG_SEVERITY, ) +#define AIXLOG_INTERNAL__COLOR_MACRO_CHOOSER(...) AIXLOG_INTERNAL__VAR_PARM(__VA_ARGS__, AIXLOG_INTERNAL__TWO_COLOR, AIXLOG_INTERNAL__ONE_COLOR, ) + +/// External logger macros +// usage: LOG(SEVERITY) or LOG(SEVERITY, TAG) +// e.g.: LOG(NOTICE) or LOG(NOTICE, "my tag") +#ifndef WIN32 +#define LOG(...) AIXLOG_INTERNAL__LOG_MACRO_CHOOSER(__VA_ARGS__)(__VA_ARGS__) << TIMESTAMP << FUNC +#endif + +// usage: COLOR(TEXT_COLOR, BACKGROUND_COLOR) or COLOR(TEXT_COLOR) +// e.g.: COLOR(yellow, blue) or COLOR(red) +#define COLOR(...) AIXLOG_INTERNAL__COLOR_MACRO_CHOOSER(__VA_ARGS__)(__VA_ARGS__) + +#define FUNC AixLog::Function(AIXLOG_INTERNAL__FUNC, __FILE__, __LINE__) +#define TAG AixLog::Tag +#define COND AixLog::Conditional +#define TIMESTAMP AixLog::Timestamp(std::chrono::system_clock::now()) + + +// stijnvdb: sorry! :) LOG(SEV, "tag") was not working for Windows and I couldn't figure out how to fix it for windows without potentially breaking everything +// else... +// https://stackoverflow.com/questions/3046889/optional-parameters-with-c-macros (Jason Deng) +#ifdef WIN32 +#define LOG_2(severity, tag) AIXLOG_INTERNAL__LOG_SEVERITY_TAG(severity, tag) +#define LOG_1(severity) AIXLOG_INTERNAL__LOG_SEVERITY(severity) +#define LOG_0() LOG_1(0) + +#define FUNC_CHOOSER(_f1, _f2, _f3, ...) _f3 +#define FUNC_RECOMPOSER(argsWithParentheses) FUNC_CHOOSER argsWithParentheses +#define CHOOSE_FROM_ARG_COUNT(...) FUNC_RECOMPOSER((__VA_ARGS__, LOG_2, LOG_1, FUNC_, ...)) +#define MACRO_CHOOSER(...) CHOOSE_FROM_ARG_COUNT(__VA_ARGS__()) +#define LOG(...) MACRO_CHOOSER(__VA_ARGS__)(__VA_ARGS__) << TIMESTAMP << FUNC +#endif + +/** + * @brief + * Severity of the log message + */ +enum SEVERITY +{ + TRACE = 0, + DEBUG = 1, + INFO = 2, + NOTICE = 3, + WARNING = 4, + ERROR = 5, + FATAL = 6 +}; + +namespace AixLog +{ + +/** + * @brief + * Severity of the log message + * + * Mandatory parameter for the LOG macro + */ +enum class Severity : std::int8_t +{ + // Mapping table from AixLog to other loggers. Boost is just for information. + // https://chromium.googlesource.com/chromium/mini_chromium/+/master/base/logging.cc + // + // Aixlog Boost Syslog Android macOS EventLog Syslog Desc + // + // trace trace DEBUG VERBOSE DEBUG INFORMATION + // debug debug DEBUG DEBUG DEBUG INFORMATION debug-level message + // info info INFO INFO INFO SUCCESS informational message + // notice NOTICE INFO INFO SUCCESS normal, but significant, condition + // warning warning WARNING WARN DEFAULT WARNING warning conditions + // error error ERROR ERROR ERROR ERROR error conditions + // fatal fatal CRIT FATAL FAULT ERROR critical conditions + // ALERT action must be taken immediately + // EMERG system is unusable + + trace = SEVERITY::TRACE, + debug = SEVERITY::DEBUG, + info = SEVERITY::INFO, + notice = SEVERITY::NOTICE, + warning = SEVERITY::WARNING, + error = SEVERITY::ERROR, + fatal = SEVERITY::FATAL +}; + + +static Severity to_severity(std::string severity, Severity def = Severity::info) +{ + std::transform(severity.begin(), severity.end(), severity.begin(), [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (severity == "trace") + return Severity::trace; + else if (severity == "debug") + return Severity::debug; + else if (severity == "info") + return Severity::info; + else if (severity == "notice") + return Severity::notice; + else if (severity == "warning") + return Severity::warning; + else if (severity == "error") + return Severity::error; + else if (severity == "fatal") + return Severity::fatal; + else + return def; +} + + +static std::string to_string(Severity logSeverity) +{ + switch (logSeverity) + { + case Severity::trace: + return "Trace"; + case Severity::debug: + return "Debug"; + case Severity::info: + return "Info"; + case Severity::notice: + return "Notice"; + case Severity::warning: + return "Warn"; + case Severity::error: + return "Error"; + case Severity::fatal: + return "Fatal"; + default: + std::stringstream ss; + ss << static_cast(logSeverity); + return ss.str(); + } +} + +/** + * @brief + * Color constants used for console colors + */ +enum class Color +{ + none = 0, + NONE = 0, + black = 1, + BLACK = 1, + red = 2, + RED = 2, + green = 3, + GREEN = 3, + yellow = 4, + YELLOW = 4, + blue = 5, + BLUE = 5, + magenta = 6, + MAGENTA = 6, + cyan = 7, + CYAN = 7, + white = 8, + WHITE = 8 +}; + +/** + * @brief + * Encapsulation of foreground and background color + */ +struct TextColor +{ + TextColor(Color foreground = Color::none, Color background = Color::none) : foreground(foreground), background(background) + { + } + + Color foreground; + Color background; +}; + +/** + * @brief + * For Conditional logging of a log line + */ +struct Conditional +{ + using EvalFunc = std::function; + + Conditional() : func_([](void) { return true; }) + { + } + + Conditional(const EvalFunc& func) : func_(func) + { + } + + Conditional(bool value) : func_([value](void) { return value; }) + { + } + + virtual ~Conditional() = default; + + virtual bool is_true() const + { + return func_(); + } + +protected: + EvalFunc func_; +}; + +/** + * @brief + * Timestamp of a log line + * + * to_string will convert the time stamp into a string, using the strftime syntax + */ +struct Timestamp +{ + using time_point_sys_clock = std::chrono::time_point; + + Timestamp(std::nullptr_t) : is_null_(true) + { + } + + Timestamp() : Timestamp(nullptr) + { + } + + Timestamp(const time_point_sys_clock& time_point) : time_point(time_point), is_null_(false) + { + } + + Timestamp(time_point_sys_clock&& time_point) : time_point(std::move(time_point)), is_null_(false) + { + } + + virtual ~Timestamp() = default; + + explicit operator bool() const + { + return !is_null_; + } + + /// strftime format + proprietary "#ms" for milliseconds + std::string to_string(const std::string& format = "%Y-%m-%d %H-%M-%S.#ms") const + { + std::time_t now_c = std::chrono::system_clock::to_time_t(time_point); + struct ::tm now_tm = localtime_xp(now_c); + char buffer[256]; + strftime(buffer, sizeof buffer, format.c_str(), &now_tm); + std::string result(buffer); + size_t pos = result.find("#ms"); + if (pos != std::string::npos) + { + int ms_part = std::chrono::time_point_cast(time_point).time_since_epoch().count() % 1000; + char ms_str[4]; + if (snprintf(ms_str, 4, "%03d", ms_part) >= 0) + result.replace(pos, 3, ms_str); + } + return result; + } + + time_point_sys_clock time_point; + +private: + bool is_null_; + + inline std::tm localtime_xp(std::time_t timer) const + { + std::tm bt; +#if defined(__unix__) + localtime_r(&timer, &bt); +#elif defined(_MSC_VER) + localtime_s(&bt, &timer); +#else + static std::mutex mtx; + std::lock_guard lock(mtx); + bt = *std::localtime(&timer); +#endif + return bt; + } +}; + +/** + * @brief + * Tag (string) for log line + */ +struct Tag +{ + Tag(std::nullptr_t) : text(""), is_null_(true) + { + } + + Tag() : Tag(nullptr) + { + } + + Tag(const char* text) : text(text), is_null_(false) + { + } + + Tag(const std::string& text) : text(text), is_null_(false) + { + } + + Tag(std::string&& text) : text(std::move(text)), is_null_(false) + { + } + + virtual ~Tag() = default; + + explicit operator bool() const + { + return !is_null_; + } + + bool operator<(const Tag& other) const + { + return (text < other.text); + } + + std::string text; + +private: + bool is_null_; +}; + +/** + * @brief + * Capture function, file and line number of the log line + */ +struct Function +{ + Function(const std::string& name, const std::string& file, size_t line) : name(name), file(file), line(line), is_null_(false) + { + } + + Function(std::string&& name, std::string&& file, size_t line) : name(std::move(name)), file(std::move(file)), line(line), is_null_(false) + { + } + + Function(std::nullptr_t) : name(""), file(""), line(0), is_null_(true) + { + } + + Function() : Function(nullptr) + { + } + + virtual ~Function() = default; + + explicit operator bool() const + { + return !is_null_; + } + + std::string name; + std::string file; + size_t line; + +private: + bool is_null_; +}; + +/** + * @brief + * Collection of a log line's meta data + */ +struct Metadata +{ + Metadata() : severity(Severity::trace), tag(nullptr), function(nullptr), timestamp(nullptr) + { + } + + Severity severity; + Tag tag; + Function function; + Timestamp timestamp; +}; + + +class Filter +{ +public: + Filter() + { + } + + Filter(Severity severity) + { + add_filter(severity); + } + + bool match(const Metadata& metadata) const + { + if (tag_filter_.empty()) + return true; + + auto iter = tag_filter_.find(metadata.tag); + if (iter != tag_filter_.end()) + return (metadata.severity >= iter->second); + + iter = tag_filter_.find("*"); + if (iter != tag_filter_.end()) + return (metadata.severity >= iter->second); + + return false; + } + + void add_filter(const Tag& tag, Severity severity) + { + tag_filter_[tag] = severity; + } + + void add_filter(Severity severity) + { + tag_filter_["*"] = severity; + } + + void add_filter(const std::string& filter) + { + auto pos = filter.find(":"); + if (pos != std::string::npos) + add_filter(filter.substr(0, pos), to_severity(filter.substr(pos + 1))); + else + add_filter(to_severity(filter)); + } + +private: + std::map tag_filter_; +}; + + +/** + * @brief + * Abstract log sink + * + * All log sinks must inherit from this Sink + */ +struct Sink +{ + Sink(const Filter& filter) : filter(filter) + { + } + + virtual ~Sink() = default; + + virtual void log(const Metadata& metadata, const std::string& message) = 0; + + Filter filter; +}; + +/// ostream operators << for the meta data structs +static std::ostream& operator<<(std::ostream& os, const Severity& log_severity); +static std::ostream& operator<<(std::ostream& os, const Timestamp& timestamp); +static std::ostream& operator<<(std::ostream& os, const Tag& tag); +static std::ostream& operator<<(std::ostream& os, const Function& function); +static std::ostream& operator<<(std::ostream& os, const Conditional& conditional); +static std::ostream& operator<<(std::ostream& os, const Color& color); +static std::ostream& operator<<(std::ostream& os, const TextColor& text_color); + +using log_sink_ptr = std::shared_ptr; + +/** + * @brief + * Main Logger class with "Log::init" + * + * Don't use it directly, but call once "Log::init" with your log sink instances. + * The Log class will simply redirect clog to itself (as a streambuf) and + * forward whatever went to clog to the log sink instances + */ +class Log : public std::basic_streambuf> +{ +public: + static Log& instance() + { + static Log instance_; + return instance_; + } + + /// Without "init" every LOG(X) will simply go to clog + static void init(const std::vector log_sinks = {}) + { + Log::instance().log_sinks_.clear(); + + for (const auto& sink : log_sinks) + Log::instance().add_logsink(sink); + } + + template + static std::shared_ptr init(Ts&&... params) + { + std::shared_ptr sink = Log::instance().add_logsink(std::forward(params)...); + init({sink}); + return sink; + } + + template + std::shared_ptr add_logsink(Ts&&... params) + { + std::lock_guard lock(mutex_); + static_assert(std::is_base_of::type>::value, "type T must be a Sink"); + std::shared_ptr sink = std::make_shared(std::forward(params)...); + log_sinks_.push_back(sink); + return sink; + } + + void add_logsink(const log_sink_ptr& sink) + { + std::lock_guard lock(mutex_); + log_sinks_.push_back(sink); + } + + void remove_logsink(const log_sink_ptr& sink) + { + std::lock_guard lock(mutex_); + log_sinks_.erase(std::remove(log_sinks_.begin(), log_sinks_.end(), sink), log_sinks_.end()); + } + +protected: + Log() noexcept : last_buffer_(nullptr), do_log_(true) + { + std::clog.rdbuf(this); + std::clog << Severity() << Tag() << Function() << Conditional() << AixLog::Color::NONE << std::flush; + } + + virtual ~Log() + { + sync(); + } + + int sync() override + { + std::lock_guard lock(mutex_); + if (!get_stream().str().empty()) + { + if (do_log_) + { + for (const auto& sink : log_sinks_) + { + if (sink->filter.match(metadata_)) + sink->log(metadata_, get_stream().str()); + } + } + get_stream().str(""); + get_stream().clear(); + } + + return 0; + } + + int overflow(int c) override + { + std::lock_guard lock(mutex_); + if (c != EOF) + { + if (c == '\n') + sync(); + else if (do_log_) + get_stream() << static_cast(c); + } + else + { + sync(); + } + return c; + } + +private: + friend std::ostream& operator<<(std::ostream& os, const Severity& log_severity); + friend std::ostream& operator<<(std::ostream& os, const Timestamp& timestamp); + friend std::ostream& operator<<(std::ostream& os, const Tag& tag); + friend std::ostream& operator<<(std::ostream& os, const Function& function); + friend std::ostream& operator<<(std::ostream& os, const Conditional& conditional); + + std::stringstream& get_stream() + { + auto id = std::this_thread::get_id(); + if ((last_buffer_ == nullptr) || (last_id_ != id)) + { + last_id_ = id; + last_buffer_ = &(buffer_[id]); + } + return *last_buffer_; + } + + /// one buffer per thread to avoid mixed log lines + std::map buffer_; + /// the last thread id + std::thread::id last_id_; + /// the last buffer + std::stringstream* last_buffer_ = nullptr; + Metadata metadata_; + bool do_log_; + std::vector log_sinks_; + std::recursive_mutex mutex_; +}; + +/** + * @brief + * Null log sink + * + * Discards all log messages + */ +struct SinkNull : public Sink +{ + SinkNull() : Sink(Filter()) + { + } + + void log(const Metadata& /*metadata*/, const std::string& /*message*/) override + { + } +}; + + +/** + * @brief + * Abstract log sink with support for formatting log message + * + * "format" in the c'tor defines a log pattern. + * For every log message, these placeholders will be substituded: + * - strftime syntax is used to format the logging time stamp (%Y, %m, %d, ...) + * - #ms: milliseconds part of the logging time stamp with leading zeros + * - #severity: log severity + * - #tag_func: the log tag. If empty, the function + * - #tag: the log tag + * - #function: the function + * - #message: the log message + */ +struct SinkFormat : public Sink +{ + SinkFormat(const Filter& filter, const std::string& format) : Sink(filter), format_(format) + { + } + + virtual void set_format(const std::string& format) + { + format_ = format; + } + + void log(const Metadata& metadata, const std::string& message) override = 0; + +protected: + virtual void do_log(std::ostream& stream, const Metadata& metadata, const std::string& message) const + { + std::string result = format_; + if (metadata.timestamp) + result = metadata.timestamp.to_string(result); + + size_t pos = result.find("#severity"); + if (pos != std::string::npos) + result.replace(pos, 9, to_string(metadata.severity)); + + pos = result.find("#color_severity"); + if (pos != std::string::npos) + { + std::stringstream ss; + ss << TextColor(Color::RED) << to_string(metadata.severity) << TextColor(Color::NONE); + result.replace(pos, 15, ss.str()); + } + + pos = result.find("#tag_func"); + if (pos != std::string::npos) + result.replace(pos, 9, metadata.tag ? metadata.tag.text : (metadata.function ? metadata.function.name : "log")); + + pos = result.find("#tag"); + if (pos != std::string::npos) + result.replace(pos, 4, metadata.tag ? metadata.tag.text : ""); + + pos = result.find("#function"); + if (pos != std::string::npos) + result.replace(pos, 9, metadata.function ? metadata.function.name : ""); + + pos = result.find("#message"); + if (pos != std::string::npos) + { + result.replace(pos, 8, message); + stream << result << std::endl; + } + else + { + if (result.empty() || (result.back() == ' ')) + stream << result << message << std::endl; + else + stream << result << " " << message << std::endl; + } + } + + std::string format_; +}; + +/** + * @brief + * Formatted logging to cout + */ +struct SinkCout : public SinkFormat +{ + SinkCout(const Filter& filter, const std::string& format = "%Y-%m-%d %H-%M-%S.#ms [#severity] (#tag_func)") : SinkFormat(filter, format) + { + } + + void log(const Metadata& metadata, const std::string& message) override + { + do_log(std::cout, metadata, message); + } +}; + +/** + * @brief + * Formatted logging to cerr + */ +struct SinkCerr : public SinkFormat +{ + SinkCerr(const Filter& filter, const std::string& format = "%Y-%m-%d %H-%M-%S.#ms [#severity] (#tag_func)") : SinkFormat(filter, format) + { + } + + void log(const Metadata& metadata, const std::string& message) override + { + do_log(std::cerr, metadata, message); + } +}; + +/** + * @brief + * Formatted logging to file + */ +struct SinkFile : public SinkFormat +{ + SinkFile(const Filter& filter, const std::string& filename, const std::string& format = "%Y-%m-%d %H-%M-%S.#ms [#severity] (#tag_func)") + : SinkFormat(filter, format) + { + ofs.open(filename.c_str(), std::ofstream::out | std::ofstream::trunc); + } + + ~SinkFile() override + { + ofs.close(); + } + + void log(const Metadata& metadata, const std::string& message) override + { + do_log(ofs, metadata, message); + } + +protected: + mutable std::ofstream ofs; +}; + +#ifdef _WIN32 +/** + * @brief + * Windows: Logging to OutputDebugString + * + * Not tested due to unavailability of Windows + */ +struct SinkOutputDebugString : public Sink +{ + SinkOutputDebugString(const Filter& filter) : Sink(filter) + { + } + + void log(const Metadata& metadata, const std::string& message) override + { +#ifdef UNICODE + std::wstring wide = std::wstring(message.begin(), message.end()); + OutputDebugString(wide.c_str()); +#else + OutputDebugString(message.c_str()); +#endif + } +}; +#endif + +#ifdef HAS_APPLE_UNIFIED_LOG_ +/** + * @brief + * macOS: Logging to Apples system logger + */ +struct SinkUnifiedLogging : public Sink +{ + SinkUnifiedLogging(const Filter& filter) : Sink(filter) + { + } + + os_log_type_t get_os_log_type(Severity severity) const + { + // https://developer.apple.com/documentation/os/os_log_type_t?language=objc + switch (severity) + { + case Severity::trace: + case Severity::debug: + return OS_LOG_TYPE_DEBUG; + case Severity::info: + case Severity::notice: + return OS_LOG_TYPE_INFO; + case Severity::warning: + return OS_LOG_TYPE_DEFAULT; + case Severity::error: + return OS_LOG_TYPE_ERROR; + case Severity::fatal: + return OS_LOG_TYPE_FAULT; + default: + return OS_LOG_TYPE_DEFAULT; + } + } + + void log(const Metadata& metadata, const std::string& message) override + { + os_log_with_type(OS_LOG_DEFAULT, get_os_log_type(metadata.severity), "%{public}s", message.c_str()); + } +}; +#endif + +#ifdef HAS_SYSLOG_ +/** + * @brief + * UNIX: Logging to syslog + */ +struct SinkSyslog : public Sink +{ + SinkSyslog(const char* ident, const Filter& filter) : Sink(filter) + { + openlog(ident, LOG_PID, LOG_USER); + } + + ~SinkSyslog() override + { + closelog(); + } + + int get_syslog_priority(Severity severity) const + { + // http://unix.superglobalmegacorp.com/Net2/newsrc/sys/syslog.h.html + switch (severity) + { + case Severity::trace: + case Severity::debug: + return LOG_DEBUG; + case Severity::info: + return LOG_INFO; + case Severity::notice: + return LOG_NOTICE; + case Severity::warning: + return LOG_WARNING; + case Severity::error: + return LOG_ERR; + case Severity::fatal: + return LOG_CRIT; + default: + return LOG_INFO; + } + } + + void log(const Metadata& metadata, const std::string& message) override + { + syslog(get_syslog_priority(metadata.severity), "%s", message.c_str()); + } +}; +#endif + +#ifdef __ANDROID__ +/** + * @brief + * Android: Logging to android log + * + * Use logcat to read the logs + */ +struct SinkAndroid : public Sink +{ + SinkAndroid(const std::string& ident, const Filter& filter) : Sink(filter), ident_(ident) + { + } + + android_LogPriority get_android_prio(Severity severity) const + { + // https://developer.android.com/ndk/reference/log_8h.html + switch (severity) + { + case Severity::trace: + return ANDROID_LOG_VERBOSE; + case Severity::debug: + return ANDROID_LOG_DEBUG; + case Severity::info: + case Severity::notice: + return ANDROID_LOG_INFO; + case Severity::warning: + return ANDROID_LOG_WARN; + case Severity::error: + return ANDROID_LOG_ERROR; + case Severity::fatal: + return ANDROID_LOG_FATAL; + default: + return ANDROID_LOG_UNKNOWN; + } + } + + void log(const Metadata& metadata, const std::string& message) override + { + std::string tag = metadata.tag ? metadata.tag.text : (metadata.function ? metadata.function.name : ""); + std::string log_tag; + if (!ident_.empty() && !tag.empty()) + log_tag = ident_ + "." + tag; + else if (!ident_.empty()) + log_tag = ident_; + else if (!tag.empty()) + log_tag = tag; + else + log_tag = "log"; + + __android_log_write(get_android_prio(metadata.severity), log_tag.c_str(), message.c_str()); + } + +protected: + std::string ident_; +}; +#endif + +#ifdef _WIN32 +/** + * @brief + * Windows: Logging to event logger + * + * Not tested due to unavailability of Windows + */ +struct SinkEventLog : public Sink +{ + SinkEventLog(const std::string& ident, const Filter& filter) : Sink(filter) + { +#ifdef UNICODE + std::wstring wide = std::wstring(ident.begin(), ident.end()); // stijnvdb: RegisterEventSource expands to RegisterEventSourceW which takes wchar_t + event_log = RegisterEventSource(NULL, wide.c_str()); +#else + event_log = RegisterEventSource(NULL, ident.c_str()); +#endif + } + + WORD get_type(Severity severity) const + { + // https://msdn.microsoft.com/de-de/library/windows/desktop/aa363679(v=vs.85).aspx + switch (severity) + { + case Severity::trace: + case Severity::debug: + return EVENTLOG_INFORMATION_TYPE; + case Severity::info: + case Severity::notice: + return EVENTLOG_SUCCESS; + case Severity::warning: + return EVENTLOG_WARNING_TYPE; + case Severity::error: + case Severity::fatal: + return EVENTLOG_ERROR_TYPE; + default: + return EVENTLOG_INFORMATION_TYPE; + } + } + + void log(const Metadata& metadata, const std::string& message) override + { +#ifdef UNICODE + std::wstring wide = std::wstring(message.begin(), message.end()); + // We need this temp variable because we cannot take address of rValue + const auto* c_str = wide.c_str(); + ReportEvent(event_log, get_type(metadata.severity), 0, 0, NULL, 1, 0, &c_str, NULL); +#else + const auto* c_str = message.c_str(); + ReportEvent(event_log, get_type(metadata.severity), 0, 0, NULL, 1, 0, &c_str, NULL); +#endif + } + +protected: + HANDLE event_log; +}; +#endif + +/** + * @brief + * Log to the system's native sys logger + * + * - Android: Android log + * - macOS: unified log + * - Windows: event log + * - Unix: syslog + */ +struct SinkNative : public Sink +{ + SinkNative(const std::string& ident, const Filter& filter) : Sink(filter), log_sink_(nullptr), ident_(ident) + { +#ifdef __ANDROID__ + log_sink_ = std::make_shared(ident_, filter); +#elif HAS_APPLE_UNIFIED_LOG_ + log_sink_ = std::make_shared(filter); +#elif _WIN32 + log_sink_ = std::make_shared(ident, filter); +#elif HAS_SYSLOG_ + log_sink_ = std::make_shared(ident_.c_str(), filter); +#else + /// will not throw or something. Use "get_logger()" to check for success + log_sink_ = nullptr; +#endif + } + + virtual log_sink_ptr get_logger() + { + return log_sink_; + } + + void log(const Metadata& metadata, const std::string& message) override + { + if (log_sink_ != nullptr) + log_sink_->log(metadata, message); + } + +protected: + log_sink_ptr log_sink_; + std::string ident_; +}; + +/** + * @brief + * Forward log messages to a callback function + * + * Pass the callback function to the c'tor. + * This can be any function that matches the signature of "callback_fun" + * Might also be a lambda function + */ +struct SinkCallback : public Sink +{ + using callback_fun = std::function; + + SinkCallback(const Filter& filter, callback_fun callback) : Sink(filter), callback_(callback) + { + } + + void log(const Metadata& metadata, const std::string& message) override + { + if (callback_) + callback_(metadata, message); + } + +private: + callback_fun callback_; +}; + +/** + * @brief + * ostream << operator for "Severity" + * + * Severity must be the first thing that is logged into clog, since it will reset the loggers metadata. + */ +static std::ostream& operator<<(std::ostream& os, const Severity& log_severity) +{ + Log* log = dynamic_cast(os.rdbuf()); + if (log != nullptr) + { + std::lock_guard lock(log->mutex_); + if (log->metadata_.severity != log_severity) + { + log->sync(); + log->metadata_.severity = log_severity; + log->metadata_.timestamp = nullptr; + log->metadata_.tag = nullptr; + log->metadata_.function = nullptr; + log->do_log_ = true; + } + } + else + { + os << to_string(log_severity); + } + return os; +} + +static std::ostream& operator<<(std::ostream& os, const Timestamp& timestamp) +{ + Log* log = dynamic_cast(os.rdbuf()); + if (log != nullptr) + { + std::lock_guard lock(log->mutex_); + log->metadata_.timestamp = timestamp; + } + else if (timestamp) + { + os << timestamp.to_string(); + } + return os; +} + +static std::ostream& operator<<(std::ostream& os, const Tag& tag) +{ + Log* log = dynamic_cast(os.rdbuf()); + if (log != nullptr) + { + std::lock_guard lock(log->mutex_); + log->metadata_.tag = tag; + } + else if (tag) + { + os << tag.text; + } + return os; +} + +static std::ostream& operator<<(std::ostream& os, const Function& function) +{ + Log* log = dynamic_cast(os.rdbuf()); + if (log != nullptr) + { + std::lock_guard lock(log->mutex_); + log->metadata_.function = function; + } + else if (function) + { + os << function.name; + } + return os; +} + +static std::ostream& operator<<(std::ostream& os, const Conditional& conditional) +{ + Log* log = dynamic_cast(os.rdbuf()); + if (log != nullptr) + { + std::lock_guard lock(log->mutex_); + log->do_log_ = conditional.is_true(); + } + return os; +} + +static std::ostream& operator<<(std::ostream& os, const TextColor& text_color) +{ + os << "\033["; + if ((text_color.foreground == Color::none) && (text_color.background == Color::none)) + os << "0"; // reset colors if no params + + if (text_color.foreground != Color::none) + { + os << 29 + static_cast(text_color.foreground); + if (text_color.background != Color::none) + os << ";"; + } + if (text_color.background != Color::none) + os << 39 + static_cast(text_color.background); + os << "m"; + + return os; +} + +static std::ostream& operator<<(std::ostream& os, const Color& color) +{ + os << TextColor(color); + return os; +} + +} // namespace AixLog + +#ifdef _WIN32 +// We restore the ERROR Windows macro +#pragma pop_macro("ERROR") +#pragma pop_macro("DEBUG") +#endif + +#endif // AIX_LOG_HPP diff --git a/lib/nncase/v1/include/nncase/runtime/allocator.h b/lib/nncase/v1/include/nncase/runtime/allocator.h new file mode 100644 index 0000000..99141db --- /dev/null +++ b/lib/nncase/v1/include/nncase/runtime/allocator.h @@ -0,0 +1,34 @@ +/* Copyright 2019-2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include + +BEGIN_NS_NNCASE_RUNTIME + +class NNCASE_API allocation_state +{ +public: + virtual ~allocation_state(); +}; + +class NNCASE_API host_allocator +{ +public: + virtual ~host_allocator(); + virtual gsl::span allocate(allocation_state &state, size_t bytes) = 0; +}; + +END_NS_NNCASE_RUNTIME diff --git a/lib/nncase/v1/include/nncase/runtime/bfloat16.h b/lib/nncase/v1/include/nncase/runtime/bfloat16.h new file mode 100644 index 0000000..5859542 --- /dev/null +++ b/lib/nncase/v1/include/nncase/runtime/bfloat16.h @@ -0,0 +1,353 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace nncase +{ +struct half +{ + uint16_t value; +}; + +struct from_raw_t +{ + explicit from_raw_t() = default; +}; + +NNCASE_INLINE_VAR constexpr from_raw_t from_raw {}; + +struct bfloat16 +{ +private: + union fp32 + { + uint32_t u32; + float f32; + + uint16_t u16() const noexcept + { + constexpr size_t index = NNCASE_LITTLE_ENDIAN ? 1 : 0; + return reinterpret_cast(&u32)[index]; + } + + uint16_t &u16() noexcept + { + constexpr size_t index = NNCASE_LITTLE_ENDIAN ? 1 : 0; + return reinterpret_cast(&u32)[index]; + } + }; + + // A value that represents "zero". + static constexpr uint16_t ZERO_VALUE = 0; + + // A value that represents "not a number". + static constexpr uint16_t NAN_VALUE = 0x7FC0; + +public: + bfloat16() noexcept = default; + + explicit bfloat16(float v) noexcept + : value_(truncate_to_bfloat16(v).value_) { } + + template ::value || std::is_floating_point::value>> + explicit bfloat16(const T &val) noexcept + : bfloat16(static_cast(val)) { } + + constexpr bfloat16(from_raw_t, uint16_t value) noexcept + : value_(value) { } + + operator float() const noexcept + { + fp32 result; + result.u32 = 0; + result.u16() = value_; + return result.f32; + } + + const uint16_t &raw() const noexcept { return value_; } + uint16_t &raw() noexcept { return value_; } + + static constexpr bfloat16 from_raw(uint16_t v) noexcept + { + return bfloat16(nncase::from_raw, v); + } + + static bfloat16 truncate_to_bfloat16(const float v) noexcept + { + bfloat16 output; + + if (!std::isnan(v)) + { + fp32 f; + f.f32 = v; + output.value_ = f.u16(); + } + else + { + output.value_ = NAN_VALUE; + } + + return output; + } + + // Converts a float point to bfloat16, with round-nearest-to-even as rounding + // method. + static bfloat16 round_to_bfloat16(float v) + { + uint32_t input; + fp32 f; + f.f32 = v; + input = f.u32; + bfloat16 output; + + if (!std::isnan(v)) + { + // Least significant bit of resulting bfloat. + uint32_t lsb = (input >> 16) & 1; + uint32_t rounding_bias = 0x7fff + lsb; + input += rounding_bias; + output.value_ = static_cast(input >> 16); + } + else + { + // If the value is a NaN, squash it to a qNaN with msb of fraction set, + // this makes sure after truncation we don't end up with an inf. + // + // qNaN magic: All exponent bits set + most significant bit of fraction + // set. + output.value_ = NAN_VALUE; + } + + return output; + } + + static constexpr bfloat16 epsilon() noexcept + { + // 0x1.0p-7 + return from_raw(0x3c00); + } + + static constexpr bfloat16 highest() noexcept + { + // 0x1.FEp127 + return from_raw(0x7F7F); + } + + static constexpr bfloat16 min() noexcept + { + // 0x1p-126 + return from_raw(0x0080); + } + + static constexpr bfloat16 lowest() noexcept + { + // -0x1.FEp127 + return from_raw(0xFF7F); + } + + static constexpr bfloat16 nan() noexcept + { + return from_raw(NAN_VALUE); + } + + static constexpr bfloat16 infinity() noexcept + { + return from_raw(0x7f80); + } + + constexpr bool zero() const noexcept { return (value_ & 0x7FFF) == ZERO_VALUE; } + +private: + uint16_t value_; +}; + +#define DEFINE_BF16_BINARY_BF16RET(x) \ + inline bfloat16 operator x(bfloat16 a, bfloat16 b) noexcept \ + { \ + return bfloat16::round_to_bfloat16(float(a) x float(b)); \ + } + +#define DEFINE_BF16_BINARY_BOOLRET(x) \ + inline bool operator x(bfloat16 a, bfloat16 b) noexcept \ + { \ + return float(a) x float(b); \ + } + +DEFINE_BF16_BINARY_BF16RET(+) +DEFINE_BF16_BINARY_BF16RET(-) +DEFINE_BF16_BINARY_BF16RET(*) +DEFINE_BF16_BINARY_BF16RET(/) +DEFINE_BF16_BINARY_BOOLRET(<) +DEFINE_BF16_BINARY_BOOLRET(<=) +DEFINE_BF16_BINARY_BOOLRET(>=) +DEFINE_BF16_BINARY_BOOLRET(>) + +#define DEFINE_BF16_BINARY_SELF_MOD(x, op) \ + inline bfloat16 &operator x(bfloat16 &a, bfloat16 b) noexcept \ + { \ + a = a op b; \ + return a; \ + } + +DEFINE_BF16_BINARY_SELF_MOD(+=, +) +DEFINE_BF16_BINARY_SELF_MOD(-=, -) +DEFINE_BF16_BINARY_SELF_MOD(*=, *) +DEFINE_BF16_BINARY_SELF_MOD(/=, /) + +inline bfloat16 operator-(bfloat16 a) noexcept +{ + return bfloat16::round_to_bfloat16(-float(a)); +} + +inline bool operator==(const bfloat16 &lhs, const bfloat16 &rhs) noexcept +{ + return lhs.raw() == rhs.raw(); +} + +inline bool operator!=(const bfloat16 &lhs, const bfloat16 &rhs) noexcept +{ + return lhs.raw() != rhs.raw(); +} +} + +namespace std +{ +template <> +struct hash +{ + size_t operator()(const nncase::bfloat16 &v) const + { + return hash()(static_cast(v)); + } +}; + +template <> +struct numeric_limits +{ + static constexpr float_denorm_style has_denorm = denorm_present; + static constexpr bool has_infinity = true; + static constexpr bool has_quiet_NaN = true; + static constexpr bool has_signaling_NaN = true; + static constexpr bool is_bounded = true; + static constexpr bool is_iec559 = true; + static constexpr bool is_signed = true; + static constexpr bool is_specialized = true; + static constexpr float_round_style round_style = round_to_nearest; + static constexpr int radix = FLT_RADIX; + + [[nodiscard]] static constexpr nncase::bfloat16(min)() noexcept + { + return nncase::bfloat16::min(); + } + + [[nodiscard]] static constexpr nncase::bfloat16(max)() noexcept + { + return nncase::bfloat16::highest(); + } + + [[nodiscard]] static constexpr nncase::bfloat16 lowest() noexcept + { + return nncase::bfloat16::lowest(); + } + + [[nodiscard]] static constexpr nncase::bfloat16 epsilon() noexcept + { + return nncase::bfloat16::epsilon(); + } + + [[nodiscard]] static constexpr nncase::bfloat16 round_error() noexcept + { + // 0.5 + return nncase::bfloat16::from_raw(0x3f00); + } + + [[nodiscard]] static constexpr nncase::bfloat16 denorm_min() noexcept + { + return nncase::bfloat16::min(); + } + + [[nodiscard]] static constexpr nncase::bfloat16 infinity() noexcept + { + return nncase::bfloat16::infinity(); + } + + [[nodiscard]] static constexpr nncase::bfloat16 quiet_NaN() noexcept + { + return nncase::bfloat16::nan(); + } + + [[nodiscard]] static constexpr nncase::bfloat16 signaling_NaN() noexcept + { + return nncase::bfloat16::nan(); + } + + static constexpr int digits = 8; + static constexpr int max_exponent = FLT_MAX_EXP; + static constexpr int min_exponent = FLT_MIN_EXP; +}; + +using nncase::bfloat16; +inline bool isinf(const bfloat16 &a) { return std::isinf(float(a)); } +inline bool isnan(const bfloat16 &a) { return std::isnan(float(a)); } +inline bool isfinite(const bfloat16 &a) { return std::isfinite(float(a)); } +inline bfloat16 abs(const bfloat16 &a) { return bfloat16::round_to_bfloat16(fabsf(float(a))); } +inline bfloat16 exp(const bfloat16 &a) { return bfloat16::round_to_bfloat16(expf(float(a))); } +inline bfloat16 log(const bfloat16 &a) { return bfloat16::round_to_bfloat16(logf(float(a))); } +inline bfloat16 log10(const bfloat16 &a) +{ + return bfloat16::round_to_bfloat16(log10f(float(a))); +} +inline bfloat16 sqrt(const bfloat16 &a) +{ + return bfloat16::round_to_bfloat16(sqrtf(float(a))); +} +inline bfloat16 pow(const bfloat16 &a, const bfloat16 &b) +{ + return bfloat16::round_to_bfloat16(powf(float(a), float(b))); +} +inline bfloat16 sin(const bfloat16 &a) { return bfloat16::round_to_bfloat16(sinf(float(a))); } +inline bfloat16 cos(const bfloat16 &a) { return bfloat16::round_to_bfloat16(cosf(float(a))); } +inline bfloat16 tan(const bfloat16 &a) { return bfloat16::round_to_bfloat16(tanf(float(a))); } +inline bfloat16 tanh(const bfloat16 &a) +{ + return bfloat16::round_to_bfloat16(tanhf(float(a))); +} +inline bfloat16 floor(const bfloat16 &a) +{ + return bfloat16::round_to_bfloat16(floorf(float(a))); +} +inline bfloat16 ceil(const bfloat16 &a) +{ + return bfloat16::round_to_bfloat16(ceilf(float(a))); +} +inline bfloat16 round(const bfloat16 &a) +{ + return bfloat16::round_to_bfloat16(roundf(float(a))); +} +inline bfloat16 nearbyint(const bfloat16 &a) +{ + return bfloat16::round_to_bfloat16(nearbyintf(float(a))); +} +inline long lrint(const bfloat16 &a) +{ + return lrintf(float(a)); +} +} // namespace std diff --git a/lib/nncase/v1/include/nncase/runtime/bitio.h b/lib/nncase/v1/include/nncase/runtime/bitio.h new file mode 100644 index 0000000..290b9a6 --- /dev/null +++ b/lib/nncase/v1/include/nncase/runtime/bitio.h @@ -0,0 +1,167 @@ +/* Copyright 2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "datatypes.h" +#include +#include +#include +#include +#include + +namespace nncase::runtime +{ +class bitreader +{ +public: + bitreader(std::span data) + : data_(data), buffer_(0), avail_(0) { } + + void read(uint8_t *dest, size_t bits) + { + while (bits) + { + auto to_read = std::min(bits, size_t(8)); + *dest++ = read_bits_le8(to_read); + bits -= to_read; + } + } + + template + T read() + { + T ret {}; + read(reinterpret_cast(&ret), Bits); + return ret; + } + +private: + uint8_t read_bits_le8(size_t bits) + { + assert(bits <= 8); + + fill_buffer_le8(bits); + uint8_t ret = buffer_ & ((size_t(1) << bits) - 1); + buffer_ >>= bits; + avail_ -= bits; + return ret; + } + + void fill_buffer_le8(size_t bits) + { + if (avail_ < bits) + { + auto max_read_bytes = std::min(data_.size() * 8, sizeof(buffer_) * 8 - avail_) / 8; + assert(max_read_bytes != 0); + + uint64_t tmp = 0; + std::memcpy(&tmp, data_.data(), max_read_bytes); + data_ = data_.subspan(max_read_bytes); + buffer_ = buffer_ | (tmp << avail_); + avail_ += max_read_bytes * 8; + } + } + +private: + std::span data_; + uint64_t buffer_; + size_t avail_; +}; + +class bitwriter +{ +public: + bitwriter(std::span data, size_t bitoffset = 0) + : data_(data), buffer_(0), avail_(sizeof(buffer_) * 8) + { + if (bitoffset) + { + data_ = data_.subspan(bitoffset / 8); + bitoffset %= 8; + buffer_ = data_.front() & ((size_t(1) << bitoffset) - 1); + avail_ -= bitoffset; + } + } + + ~bitwriter() { flush(); } + + void write(const uint8_t *src, size_t bits) + { + while (bits) + { + auto to_write = std::min(bits, size_t(8)); + write_bits_le8(*src++, to_write); + bits -= to_write; + } + } + + template + void write(T value) + { + write(reinterpret_cast(&value), Bits); + } + + void flush() + { + auto write_bytes = (buffer_written_bits() + 7) / 8; + if (write_bytes) + { + assert(data_.size() >= write_bytes); + + std::memcpy(data_.data(), &buffer_, write_bytes); + data_ = data_.subspan(write_bytes); + buffer_ = 0; + avail_ = sizeof(buffer_) * 8; + } + } + +private: + void write_bits_le8(uint8_t value, size_t bits) + { + assert(bits <= 8); + + reserve_buffer_8(); + size_t new_value = value & ((size_t(1) << bits) - 1); + buffer_ = buffer_ | (new_value << buffer_written_bits()); + avail_ -= bits; + } + + void reserve_buffer_8() + { + if (avail_ < 8) + { + auto write_bytes = buffer_written_bits() / 8; + assert(data_.size() >= write_bytes); + + std::memcpy(data_.data(), &buffer_, write_bytes); + data_ = data_.subspan(write_bytes); + if (write_bytes == sizeof(buffer_)) + buffer_ = 0; + else + buffer_ >>= write_bytes * 8; + avail_ += write_bytes * 8; + } + } + + size_t buffer_written_bits() const noexcept + { + return sizeof(buffer_) * 8 - avail_; + } + +private: + std::span data_; + uint64_t buffer_; + size_t avail_; +}; +} diff --git a/lib/nncase/v1/include/nncase/runtime/compiler_defs.h b/lib/nncase/v1/include/nncase/runtime/compiler_defs.h new file mode 100644 index 0000000..af41d97 --- /dev/null +++ b/lib/nncase/v1/include/nncase/runtime/compiler_defs.h @@ -0,0 +1,107 @@ +/* Copyright 2019-2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include + +#if defined(_MSC_VER) +#ifdef NNCASE_DLL +#define NNCASE_API __declspec(dllexport) +#elif defined(NNCASE_SHARED_LIBS) +#define NNCASE_API __declspec(dllimport) +#else +#define NNCASE_API +#endif +#else +#define NNCASE_API +#endif + +#if defined(_MSC_VER) +#define NNCASE_UNREACHABLE() __assume(0) +#else +#define NNCASE_UNREACHABLE() __builtin_unreachable() +#endif + +#if gsl_CPP17_OR_GREATER +#define NNCASE_INLINE_VAR inline +#define NNCASE_UNUSED [[maybe_unused]] +namespace nncase +{ +template +using invoke_result_t = std::invoke_result_t; +} +#else +#define NNCASE_INLINE_VAR +#if defined(_MSC_VER) +#define NNCASE_UNUSED +#else +#define NNCASE_UNUSED __attribute__((unused)) +#endif +namespace nncase +{ +template +using invoke_result_t = std::result_of_t; +} +#endif + +#define NNCASE_LITTLE_ENDIAN 1 + +#define NNCASE_HAVE_STD_BYTE gsl_CPP17_OR_GREATER +#define NNCASE_NODISCARD gsl_NODISCARD +#define NNCASE_NORETURN gsl_NORETURN + +#define BEGIN_NS_NNCASE_RUNTIME \ + namespace nncase \ + { \ + namespace runtime \ + { +#define END_NS_NNCASE_RUNTIME \ + } \ + } + +#define BEGIN_NS_NNCASE_RT_STACKVM \ + namespace nncase \ + { \ + namespace runtime \ + { \ + namespace stackvm \ + { +#define END_NS_NNCASE_RT_STACKVM \ + } \ + } \ + } + +#define BEGIN_NS_NNCASE_KERNELS \ + namespace nncase \ + { \ + namespace kernels \ + { + +#define END_NS_NNCASE_KERNELS \ + } \ + } + +#ifndef DEFINE_ENUM_BITMASK_OPERATORS +#define DEFINE_ENUM_BITMASK_OPERATORS(ENUMTYPE) gsl_DEFINE_ENUM_BITMASK_OPERATORS(ENUMTYPE) +#endif + +namespace nncase +{ +struct default_init_t +{ +}; + +NNCASE_INLINE_VAR constexpr default_init_t default_init {}; +} diff --git a/lib/nncase/v1/include/nncase/runtime/datatypes.def b/lib/nncase/v1/include/nncase/runtime/datatypes.def new file mode 100644 index 0000000..e962df9 --- /dev/null +++ b/lib/nncase/v1/include/nncase/runtime/datatypes.def @@ -0,0 +1,12 @@ +DEFINE_DATATYPE(int8, int8_t, i8, 0x00) +DEFINE_DATATYPE(int16, int16_t, i16, 0x01) +DEFINE_DATATYPE(int32, int32_t, i32, 0x02) +DEFINE_DATATYPE(int64, int64_t, i64, 0x03) +DEFINE_DATATYPE(uint8, uint8_t, u8, 0x04) +DEFINE_DATATYPE(uint16, uint16_t, u16, 0x05) +DEFINE_DATATYPE(uint32, uint32_t, u32, 0x06) +DEFINE_DATATYPE(uint64, uint64_t, u64, 0x07) +DEFINE_DATATYPE(float16, half, f16, 0x08) +DEFINE_DATATYPE(float32, float, f32, 0x09) +DEFINE_DATATYPE(float64, double, f64, 0x0A) +DEFINE_DATATYPE(bfloat16, bfloat16, bf16, 0x0B) diff --git a/lib/nncase/v1/include/nncase/runtime/datatypes.h b/lib/nncase/v1/include/nncase/runtime/datatypes.h new file mode 100644 index 0000000..56b19d7 --- /dev/null +++ b/lib/nncase/v1/include/nncase/runtime/datatypes.h @@ -0,0 +1,436 @@ +/* Copyright 2019-2020 Canaan Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include "bfloat16.h" +#include "compiler_defs.h" +#include "small_vector.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nncase +{ +typedef enum _datatype : uint8_t +{ +#define DEFINE_DATATYPE(id, t, name, value) dt_##id = value, +#include "datatypes.def" +#undef DEFINE_DATATYPE +} datatype_t; + +namespace detail +{ + template + struct datatype_to_cpp_type + { + }; + + template + struct cpp_type_to_datatype + { + }; + +#if NNCASE_HAVE_STD_BYTE + template <> + struct cpp_type_to_datatype + { + static constexpr datatype_t type = dt_uint8; + }; +#endif + +#define DEFINE_DATATYPE(id, t, name, value) \ + template <> \ + struct datatype_to_cpp_type \ + { \ + using type = t; \ + }; \ + template <> \ + struct cpp_type_to_datatype \ + { \ + static constexpr datatype_t type = dt_##id; \ + }; +#include "datatypes.def" +#undef DEFINE_DATATYPE + + inline constexpr size_t datatype_bytes(datatype_t type) + { + switch (type) + { +#define DEFINE_DATATYPE(id, t, name, value) \ + case (dt_##id): \ + return sizeof(t); +#include "datatypes.def" +#undef DEFINE_DATATYPE + default: + return -1; + } + } + +} + +template +constexpr datatype_t to_datatype() noexcept +{ + return detail::cpp_type_to_datatype::type; +} + +template +using to_cpp_type_t = typename detail::datatype_to_cpp_type::type; + +struct padding +{ + int32_t before; + int32_t after; + + int32_t sum() const noexcept { return before + after; } + + static padding zero() noexcept { return {}; } +}; + +template +struct value_range +{ + T min; + T max; + + static constexpr value_range full() noexcept + { + if (std::is_floating_point::value || std::is_same::value) + return { -std::numeric_limits::infinity(), std::numeric_limits::infinity() }; + else + return { std::numeric_limits::lowest(), std::numeric_limits::max() }; + } + + static constexpr value_range nonnegative() noexcept + { + return { 0, std::numeric_limits::max() }; + } + + constexpr T length() const noexcept { return max - min; } +}; + +typedef enum _reduce_op +{ + reduce_mean, + reduce_min, + reduce_max, + reduce_sum +} reduce_op_t; + +typedef enum _binary_op +{ + binary_add, + binary_sub, + binary_mul, + binary_div, + binary_min, + binary_max, + binary_pow, + binary_floor_div, + binary_floor_mod, + binary_bitwise_and, + binary_bitwise_or, + binary_bitwise_xor, + binary_logical_and, + binary_logical_or, + binary_logical_xor +} binary_op_t; + +inline std::string binary_op_to_string(binary_op_t op) +{ + switch (op) + { + case binary_add: + return "binary_add"; + case binary_sub: + return "binary_sub"; + case binary_mul: + return "binary_mul"; + case binary_div: + return "binary_div"; + case binary_min: + return "binary_min"; + case binary_max: + return "binary_max"; + case binary_pow: + return "binary_pow"; + case binary_floor_div: + return "binary_floor_div"; + case binary_floor_mod: + return "binary_floor_mod"; + case binary_bitwise_and: + return "binary_bitwise_and"; + case binary_bitwise_or: + return "binary_bitwise_or"; + case binary_bitwise_xor: + return "binary_bitwise_xor"; + case binary_logical_and: + return "binary_logical_and"; + case binary_logical_or: + return "binary_logical_or"; + case binary_logical_xor: + return "binary_logical_xor"; + } + return "unknown"; +} + +typedef enum _unary_op +{ + unary_abs, + unary_ceil, + unary_cos, + unary_exp, + unary_floor, + unary_log, + unary_neg, + unary_round, + unary_rsqrt, + unary_sin, + unary_sqrt, + unary_square, + unary_tanh, + unary_bitwise_not, + unary_logical_not +} unary_op_t; + +inline std::string unary_op_to_string(unary_op_t op) +{ + switch (op) + { + case unary_abs: + return "unary_abs"; + case unary_ceil: + return "unary_ceil"; + case unary_cos: + return "unary_cos"; + case unary_exp: + return "unary_exp"; + case unary_floor: + return "unary_floor"; + case unary_log: + return "unary_log"; + case unary_neg: + return "unary_neg"; + case unary_round: + return "unary_round"; + case unary_rsqrt: + return "unary_rsqrt"; + case unary_sin: + return "unary_sin"; + case unary_sqrt: + return "unary_sqrt"; + case unary_square: + return "unary_square"; + case unary_tanh: + return "unary_tanh"; + case unary_bitwise_not: + return "unary_bitwise_not"; + case unary_logical_not: + return "unary_logical_not"; + } + return "unknown"; +} + +typedef enum _image_resize_mode +{ + image_resize_bilinear, + image_resize_nearest_neighbor +} image_resize_mode_t; + +typedef enum _pad_mode +{ + pad_constant, + pad_reflect, + pad_symmetric, + pad_edge +} pad_mode_t; + +typedef struct _quant_param +{ + int32_t zero_point; + float scale; + + template + constexpr value_range range() const noexcept + { + return { + (std::numeric_limits::lowest() - zero_point) * scale, (std::numeric_limits::max() - zero_point) * scale + }; + } +} quant_param_t; + +inline bool operator==(const quant_param_t &lhs, const quant_param_t &rhs) noexcept +{ + return lhs.zero_point == rhs.zero_point && lhs.scale == rhs.scale; +} + +inline bool almost_equal(const quant_param_t &lhs, const quant_param_t &rhs) noexcept +{ + return lhs.zero_point == rhs.zero_point + && fabs(lhs.scale - rhs.scale) <= std::numeric_limits::epsilon(); +} + +struct fixed_mul +{ + float mul; + int8_t shift; + + int32_t rounded_mul() const noexcept { return (int32_t)lrintf(mul); } +}; + +using memory_location_t = uint8_t; +NNCASE_INLINE_VAR constexpr memory_location_t mem_input = 0; +NNCASE_INLINE_VAR constexpr memory_location_t mem_output = 1; +NNCASE_INLINE_VAR constexpr memory_location_t mem_rdata = 2; +NNCASE_INLINE_VAR constexpr memory_location_t mem_data = 3; + +using runtime_shape_t = itlib::small_vector; +using runtime_axis_t = itlib::small_vector; +using runtime_paddings_t = itlib::small_vector; + +struct scalar +{ + datatype_t type; + std::aligned_storage_t<8> storage; + + scalar() = default; + + scalar(int8_t value) noexcept + { + type = dt_int8; + as() = value; + } + + scalar(int16_t value) noexcept + { + type = dt_int16; + as() = value; + } + + scalar(int32_t value) noexcept + { + type = dt_int32; + as() = value; + } + + scalar(uint8_t value) noexcept + { + type = dt_uint8; + as() = value; + } + + scalar(uint16_t value) noexcept + { + type = dt_uint16; + as() = value; + } + + scalar(uint32_t value) noexcept + { + type = dt_uint32; + as() = value; + } + + scalar(bfloat16 value) noexcept + { + type = dt_bfloat16; + as() = value; + } + + scalar(float value) noexcept + { + type = dt_float32; + as() = value; + } + + template + T &as() noexcept { return *reinterpret_cast(&storage); } + + template + const T &as() const noexcept { return *reinterpret_cast(&storage); } +}; + +struct memory_range +{ + memory_location_t memory_location; + datatype_t datatype; + uint16_t reserved0; + uint32_t start; + uint32_t size; +}; + +NNCASE_INLINE_VAR constexpr size_t MAX_MODULE_TYPE_LENGTH = 16; + +typedef std::array module_type_t; + +template +constexpr module_type_t +to_module_type(const char (&a)[N], std::index_sequence) +{ + return { { a[Is]... } }; +} + +template +constexpr module_type_t to_module_type(const char (&a)[N]) +{ + return to_module_type(a, std::make_index_sequence()); +} + +inline padding operator+(const padding &lhs, const padding &rhs) noexcept +{ + return { lhs.before + rhs.before, lhs.after + rhs.after }; +} + +inline bool operator==(const padding &lhs, const padding &rhs) noexcept +{ + return lhs.before == rhs.before && lhs.after == rhs.after; +} + +inline bool operator!=(const padding &lhs, const padding &rhs) noexcept +{ + return lhs.before != rhs.before || lhs.after != rhs.after; +} + +template +bool operator==(const value_range &lhs, const value_range &rhs) noexcept +{ + return lhs.min == rhs.min && lhs.max == rhs.max; +} + +template +bool operator!=(const value_range &lhs, const value_range &rhs) noexcept +{ + return lhs.min != rhs.min || lhs.max != rhs.max; +} + +inline bool operator==(const scalar &lhs, const scalar &rhs) noexcept +{ + auto valid_bytes = detail::datatype_bytes(lhs.type); + return lhs.type == rhs.type && !memcmp(&lhs.storage, &rhs.storage, valid_bytes); +} + +inline bool operator!=(const scalar &lhs, const scalar &rhs) noexcept +{ + auto valid_bytes = detail::datatype_bytes(lhs.type); + return lhs.type != rhs.type || memcmp(&lhs.storage, &rhs.storage, valid_bytes); +} +} diff --git a/lib/nncase/v1/include/nncase/runtime/dbg.h b/lib/nncase/v1/include/nncase/runtime/dbg.h new file mode 100644 index 0000000..83f15a6 --- /dev/null +++ b/lib/nncase/v1/include/nncase/runtime/dbg.h @@ -0,0 +1,1038 @@ +/***************************************************************************** + + dbg(...) macro + +License (MIT): + + Copyright (c) 2019 David Peter + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +*****************************************************************************/ + +#ifndef DBG_MACRO_DBG_H +#define DBG_MACRO_DBG_H + +#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) +#define DBG_MACRO_UNIX +#elif defined(_MSC_VER) +#define DBG_MACRO_WINDOWS +#endif + +#include "result.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DBG_MACRO_UNIX +#include +#endif + +#if __cplusplus >= 201703L +#define DBG_MACRO_CXX_STANDARD 17 +#elif __cplusplus >= 201402L +#define DBG_MACRO_CXX_STANDARD 14 +#else +#define DBG_MACRO_CXX_STANDARD 11 +#endif + +#if DBG_MACRO_CXX_STANDARD >= 17 +#include +#include +#endif + +namespace dbg +{ + +#ifdef DBG_MACRO_UNIX +inline bool isColorizedOutputEnabled() +{ + return isatty(fileno(stderr)); +} +#else +inline bool isColorizedOutputEnabled() +{ + return true; +} +#endif + +struct time +{ +}; + +namespace pretty_function +{ + + // Compiler-agnostic version of __PRETTY_FUNCTION__ and constants to + // extract the template argument in `type_name_impl` + +#if defined(__clang__) +#define DBG_MACRO_PRETTY_FUNCTION __PRETTY_FUNCTION__ + static constexpr size_t PREFIX_LENGTH = sizeof("const char *dbg::type_name_impl() [T = ") - 1; + static constexpr size_t SUFFIX_LENGTH = sizeof("]") - 1; +#elif defined(__GNUC__) && !defined(__clang__) +#define DBG_MACRO_PRETTY_FUNCTION __PRETTY_FUNCTION__ + static constexpr size_t PREFIX_LENGTH = sizeof("const char* dbg::type_name_impl() [with T = ") - 1; + static constexpr size_t SUFFIX_LENGTH = sizeof("]") - 1; +#elif defined(_MSC_VER) +#define DBG_MACRO_PRETTY_FUNCTION __FUNCSIG__ + static constexpr size_t PREFIX_LENGTH = sizeof("const char *__cdecl dbg::type_name_impl<") - 1; + static constexpr size_t SUFFIX_LENGTH = sizeof(">(void)") - 1; +#else +#error "This compiler is currently not supported by dbg_macro." +#endif + +} // namespace pretty_function + +// Formatting helpers + +template +struct print_formatted +{ + static_assert(std::is_integral::value, + "Only integral types are supported."); + + print_formatted(T value, int numeric_base) + : inner(value), base(numeric_base) { } + + operator T() const { return inner; } + + const char *prefix() const + { + switch (base) + { + case 8: + return "0o"; + case 16: + return "0x"; + case 2: + return "0b"; + default: + return ""; + } + } + + T inner; + int base; +}; + +template +print_formatted hex(T value) +{ + return print_formatted { value, 16 }; +} + +template +print_formatted oct(T value) +{ + return print_formatted { value, 8 }; +} + +template +print_formatted bin(T value) +{ + return print_formatted { value, 2 }; +} + +// Implementation of 'type_name()' + +template +const char *type_name_impl() +{ + return DBG_MACRO_PRETTY_FUNCTION; +} + +template +struct type_tag +{ +}; + +template +std::string get_type_name(type_tag) +{ + namespace pf = pretty_function; + + std::string type = type_name_impl(); + return type.substr(pf::PREFIX_LENGTH, + type.size() - pf::PREFIX_LENGTH - pf::SUFFIX_LENGTH); +} + +template +std::string type_name() +{ + if (std::is_volatile::value) + { + if (std::is_pointer::value) + { + return type_name::type>() + " volatile"; + } + else + { + return "volatile " + type_name::type>(); + } + } + if (std::is_const::value) + { + if (std::is_pointer::value) + { + return type_name::type>() + " const"; + } + else + { + return "const " + type_name::type>(); + } + } + if (std::is_pointer::value) + { + return type_name::type>() + "*"; + } + if (std::is_lvalue_reference::value) + { + return type_name::type>() + "&"; + } + if (std::is_rvalue_reference::value) + { + return type_name::type>() + "&&"; + } + return get_type_name(type_tag {}); +} + +inline std::string get_type_name(type_tag) +{ + return "short"; +} + +inline std::string get_type_name(type_tag) +{ + return "unsigned short"; +} + +inline std::string get_type_name(type_tag) +{ + return "long"; +} + +inline std::string get_type_name(type_tag) +{ + return "unsigned long"; +} + +inline std::string get_type_name(type_tag) +{ + return "std::string"; +} + +template +std::string get_type_name(type_tag>>) +{ + return "std::vector<" + type_name() + ">"; +} + +template +std::string get_type_name(type_tag>) +{ + return "std::pair<" + type_name() + ", " + type_name() + ">"; +} + +template +std::string type_list_to_string() +{ + std::string result; + auto unused = { (result += type_name() + ", ", 0)..., 0 }; + static_cast(unused); + +#if DBG_MACRO_CXX_STANDARD >= 17 + if constexpr (sizeof...(T) > 0) + { +#else + if (sizeof...(T) > 0) + { +#endif + result.pop_back(); + result.pop_back(); + } + return result; +} + +template +std::string get_type_name(type_tag>) +{ + return "std::tuple<" + type_list_to_string() + ">"; +} + +template +inline std::string get_type_name(type_tag>) +{ + return type_name(); +} + +// Implementation of 'is_detected' to specialize for container-like types + +namespace detail_detector +{ + + struct nonesuch + { + nonesuch() = delete; + ~nonesuch() = delete; + nonesuch(nonesuch const &) = delete; + void operator=(nonesuch const &) = delete; + }; + + template + using void_t = void; + + template + class Op, + class... Args> + struct detector + { + using value_t = std::false_type; + using type = Default; + }; + + template class Op, class... Args> + struct detector>, Op, Args...> + { + using value_t = std::true_type; + using type = Op; + }; + +} // namespace detail_detector + +template