Merge develop:Rollback
parent
0a1e683246
commit
e5068a1461
|
@ -680,7 +680,6 @@ typedef struct
|
|||
const uint8_t *volatile current_body;
|
||||
dmac_channel_number_t dma_ch;
|
||||
kpu_done_callback_t done_callback;
|
||||
volatile uint8_t load_first;
|
||||
void *userdata;
|
||||
};
|
||||
|
||||
|
|
|
@ -1006,8 +1006,12 @@ static void kpu_quantize(const kpu_model_quantize_layer_argument_t *arg, kpu_mod
|
|||
size_t count = arg->count;
|
||||
const float *src = (const float *)(ctx->main_buffer + arg->main_mem_in_address);
|
||||
|
||||
kpu_model_quant_param_t q = arg->quant_param;
|
||||
|
||||
kpu_model_quant_param_t q;
|
||||
#if FIX_CACHE
|
||||
memcpy(&q, &arg->quant_param, sizeof(kpu_model_quant_param_t));
|
||||
#else
|
||||
q = arg->quant_param;
|
||||
#endif
|
||||
float scale = 1.f / q.scale;
|
||||
|
||||
uint8_t *dest = (uint8_t *)(ctx->main_buffer + arg->mem_out_address);
|
||||
|
@ -1028,8 +1032,12 @@ static void kpu_kmodel_dequantize(const kpu_model_dequantize_layer_argument_t *a
|
|||
const uint8_t *src = (const uint8_t *)(ctx->main_buffer + arg->main_mem_in_address);
|
||||
float *dest = (float *)(ctx->main_buffer + arg->main_mem_out_address);
|
||||
size_t oc, count = arg->count;
|
||||
kpu_model_quant_param_t q = arg->quant_param;
|
||||
|
||||
kpu_model_quant_param_t q;
|
||||
#if FIX_CACHE
|
||||
memcpy(&q, &arg->quant_param, sizeof(kpu_model_quant_param_t));
|
||||
#else
|
||||
q = arg->quant_param;
|
||||
#endif
|
||||
for(oc = 0; oc < count; oc++)
|
||||
dest[oc] = *src++ * q.scale + q.bias;
|
||||
}
|
||||
|
@ -1134,7 +1142,10 @@ static void kpu_kmodel_fully_connected(const kpu_model_fully_connected_layer_arg
|
|||
const float *src = (const float *)(ctx->main_buffer + arg->main_mem_in_address);
|
||||
float *dest = (float *)(ctx->main_buffer + arg->main_mem_out_address);
|
||||
uint32_t in_channels = arg->in_channels, out_channels = arg->out_channels, ic, oc;
|
||||
const float *weights = arg->weights, *bias = arg->weights + in_channels * out_channels;
|
||||
float *weights = (float *)malloc(in_channels * out_channels * sizeof(float));
|
||||
float *bias = (float *)malloc(out_channels * sizeof(float));
|
||||
memcpy(weights, arg->weights, out_channels * in_channels * sizeof(float));
|
||||
memcpy(bias, arg->weights + in_channels * out_channels, out_channels * sizeof(float));
|
||||
|
||||
if(in_channels % 8 == 0)
|
||||
{
|
||||
|
@ -1181,7 +1192,8 @@ static void kpu_kmodel_fully_connected(const kpu_model_fully_connected_layer_arg
|
|||
dest[oc] = sum + bias[oc];
|
||||
}
|
||||
}
|
||||
|
||||
free(weights);
|
||||
free(bias);
|
||||
kpu_float_activation(dest, out_channels, arg->act);
|
||||
}
|
||||
|
||||
|
@ -1265,10 +1277,9 @@ static void kpu_logistic(const kpu_model_logistic_layer_argument_t *arg, kpu_mod
|
|||
static void kpu_conv(const kpu_model_conv_layer_argument_t *arg, kpu_model_context_t *ctx)
|
||||
{
|
||||
volatile kpu_layer_argument_t layer = *(const volatile kpu_layer_argument_t *)(ctx->model_buffer + arg->layer_offset);
|
||||
|
||||
layer.kernel_load_cfg.data.para_start_addr = (uintptr_t)(ctx->model_buffer + arg->weights_offset) - 0x40000000;
|
||||
layer.kernel_pool_type_cfg.data.bwsx_base_addr = (uintptr_t)(ctx->model_buffer + arg->bn_offset) - 0x40000000;
|
||||
layer.kernel_calc_type_cfg.data.active_addr = (uintptr_t)(ctx->model_buffer + arg->act_offset) - 0x40000000;
|
||||
layer.kernel_load_cfg.data.para_start_addr = (uintptr_t)(ctx->model_buffer + arg->weights_offset);
|
||||
layer.kernel_pool_type_cfg.data.bwsx_base_addr = (uintptr_t)(ctx->model_buffer + arg->bn_offset);
|
||||
layer.kernel_calc_type_cfg.data.active_addr = (uintptr_t)(ctx->model_buffer + arg->act_offset);
|
||||
|
||||
if(arg->flags & KLF_MAIN_MEM_OUT)
|
||||
{
|
||||
|
@ -1360,14 +1371,14 @@ static void kpu_upload(const kpu_model_upload_layer_argument_t *arg, kpu_model_c
|
|||
|
||||
int kpu_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer)
|
||||
{
|
||||
#if FIX_CACHE
|
||||
configASSERT(!is_memory_cache((uintptr_t)buffer));
|
||||
#endif
|
||||
uintptr_t base_addr = (uintptr_t)buffer;
|
||||
const kpu_kmodel_header_t *header = (const kpu_kmodel_header_t *)buffer;
|
||||
|
||||
configASSERT(is_memory_cache((uintptr_t)buffer))
|
||||
|
||||
if(header->version == 3 && header->arch == 0)
|
||||
{
|
||||
ctx->load_first = 1;
|
||||
ctx->is_nncase = 0;
|
||||
ctx->model_buffer = buffer;
|
||||
ctx->output_count = header->output_count;
|
||||
|
@ -1482,7 +1493,6 @@ static int kpu_kmodel_done(kpu_model_context_t *ctx)
|
|||
.calc_done_int = 1,
|
||||
.layer_cfg_almost_empty_int = 1,
|
||||
.layer_cfg_almost_full_int = 1};
|
||||
ctx->load_first = 0;
|
||||
#if KPU_DEBUG
|
||||
uint32_t cnt_layer_id = ctx->current_layer - 1;
|
||||
uint64_t time = sysctl_get_time_us();
|
||||
|
@ -1582,13 +1592,6 @@ static int ai_step(void *userdata)
|
|||
kpu_logistic((const kpu_model_logistic_layer_argument_t *)layer_body, ctx);
|
||||
break;
|
||||
case KL_K210_CONV:
|
||||
if(ctx->load_first)
|
||||
{
|
||||
for(int i=0; i<cnt_layer_header->body_size; i++)
|
||||
{
|
||||
*((uint8_t *)((uintptr_t)layer_body-0x40000000)+i) = *((uint8_t *)layer_body+i);
|
||||
}
|
||||
}
|
||||
kpu_conv((const kpu_model_conv_layer_argument_t *)layer_body, ctx);
|
||||
return 0;
|
||||
case KL_K210_ADD_PADDING:
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -24,6 +24,7 @@
|
|||
#define INCBIN_PREFIX
|
||||
#include "incbin.h"
|
||||
#include "utils.h"
|
||||
#include "iomem.h"
|
||||
|
||||
#define PLL0_OUTPUT_FREQ 800000000UL
|
||||
#define PLL1_OUTPUT_FREQ 400000000UL
|
||||
|
@ -194,7 +195,7 @@ int main(void)
|
|||
w25qxx_init(3, 0);
|
||||
w25qxx_enable_quad_mode();
|
||||
#if LOAD_KMODEL_FROM_FLASH
|
||||
model_data = (uint8_t *)malloc(KMODEL_SIZE);
|
||||
model_data = (uint8_t *)iomem_malloc(KMODEL_SIZE);
|
||||
w25qxx_read_data(0xA00000, model_data, KMODEL_SIZE, W25QXX_QUAD_FAST);
|
||||
#endif
|
||||
/* LCD init */
|
||||
|
|
Loading…
Reference in New Issue