kendryte-standalone-sdk/lib/nncase/v0/nncase_v0.cpp

185 lines
5.3 KiB
C++

/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <nncase_v0.h>
#include <kernels/k210/k210_kernels.h>
#include <runtime/target_interpreter.h>
#include <stdio.h>
#include <cstring>
#include <utils.h>
using namespace nncase;
using namespace nncase::runtime;
#define NNCASE_DEBUG 0
namespace
{
void kpu_upload_dma(dmac_channel_number_t dma_ch, const uint8_t *src, uint8_t *dest, size_t input_size, plic_irq_callback_t callback, void *userdata)
{
if (is_memory_cache((uintptr_t)src))
{
std::copy_n(src, input_size, dest);
src -= 0x40000000;
}
dmac_set_irq(dma_ch, callback, userdata, 1);
dmac_set_single_mode(dma_ch, (void *)src, (void *)dest, DMAC_ADDR_INCREMENT, DMAC_ADDR_INCREMENT,
DMAC_MSIZE_16, DMAC_TRANS_WIDTH_64, input_size / 8);
dmac_wait_done(dma_ch);
}
}
class nncase_context
{
public:
int load_kmodel(const uint8_t *buffer)
{
int ret = interpreter_.try_load_model(buffer) ? 0 : -1;
uint32_t size = interpreter_.model_size(buffer);
uint8_t *buffer_iomem = (uint8_t *)((uintptr_t)buffer - IOMEM);
const uint8_t *buffer_cache = buffer;
memcpy(buffer_iomem, buffer_cache, size);
for (int i = 0; i < size; i++)
{
if (buffer_iomem[i] != buffer_cache[i])
{
printf("flush model fail:%d %x %x \n", i, buffer_iomem[i], buffer_cache[i]);
while (1)
;
}
}
return ret;
}
int get_output(uint32_t index, uint8_t **data, size_t *size)
{
if (index >= interpreter_.outputs_size())
return -1;
auto mem = interpreter_.memory_at<uint8_t>(interpreter_.output_at(index));
*data = mem.data();
*size = mem.size();
return 0;
}
int run_kmodel(const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata)
{
done_callback_ = done_callback;
userdata_ = userdata;
interpreter_.dma_ch(dma_ch);
auto input = interpreter_.input_at(0);
auto mem = interpreter_.memory_at<uint8_t>(input);
if (input.memory_type == mem_main)
{
std::copy(src, src + mem.size(), mem.begin());
interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this);
return 0;
}
else if (input.memory_type == mem_k210_kpu)
{
auto shape = interpreter_.input_shape_at(0);
kernels::k210::kpu_upload(src, mem.data(), shape);
on_upload_done();
return 0;
}
return -1;
}
private:
void on_done()
{
#if NNCASE_DEBUG
printf("Total: %fms\n", interpreter_.total_duration().count() / 1e6);
#endif
if (done_callback_)
done_callback_(userdata_);
}
void on_upload_done()
{
interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this);
}
static void done_thunk(void *userdata)
{
reinterpret_cast<nncase_context *>(userdata)->on_done();
}
static void on_error_thunk(const char *err, void *userdata)
{
#if NNCASE_DEBUG
printf("Fatal: %s\n", err);
#endif
}
static void node_profile_thunk(runtime_opcode op, std::chrono::nanoseconds duration, void *userdata)
{
#if NNCASE_DEBUG
printf("%s: %fms\n", node_opcode_names(op).data(), duration.count() / 1e6);
#endif
}
static int upload_done_thunk(void *userdata)
{
reinterpret_cast<nncase_context *>(userdata)->on_upload_done();
return 0;
}
private:
interpreter_t interpreter_;
kpu_done_callback_t done_callback_;
void *userdata_;
};
int nncase_v0_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer)
{
auto nnctx = new (std::nothrow) nncase_context();
if (ctx)
{
ctx->is_nncase = 1;
ctx->nncase_ctx = nnctx;
ctx->nncase_version = 0;
return nnctx->load_kmodel(buffer);
}
else
{
return -1;
}
}
int nncase_v0_get_output(kpu_model_context_t *ctx, uint32_t index, uint8_t **data, size_t *size)
{
auto nnctx = reinterpret_cast<nncase_context *>(ctx->nncase_ctx);
return nnctx->get_output(index, data, size);
}
void nncase_v0_model_free(kpu_model_context_t *ctx)
{
auto nnctx = reinterpret_cast<nncase_context *>(ctx->nncase_ctx);
delete nnctx;
ctx->nncase_ctx = nullptr;
}
int nncase_v0_run_kmodel(kpu_model_context_t *ctx, const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata)
{
auto nnctx = reinterpret_cast<nncase_context *>(ctx->nncase_ctx);
return nnctx->run_kmodel(src, dma_ch, done_callback, userdata);
}