Merge develop:Rollback

fix_cache
xiangbingj 2019-10-26 16:47:07 +08:00
parent 0a1e683246
commit e5068a1461
5 changed files with 26 additions and 23 deletions

View File

@ -680,7 +680,6 @@ typedef struct
const uint8_t *volatile current_body;
dmac_channel_number_t dma_ch;
kpu_done_callback_t done_callback;
volatile uint8_t load_first;
void *userdata;
};

View File

@ -1006,8 +1006,12 @@ static void kpu_quantize(const kpu_model_quantize_layer_argument_t *arg, kpu_mod
size_t count = arg->count;
const float *src = (const float *)(ctx->main_buffer + arg->main_mem_in_address);
kpu_model_quant_param_t q = arg->quant_param;
kpu_model_quant_param_t q;
#if FIX_CACHE
memcpy(&q, &arg->quant_param, sizeof(kpu_model_quant_param_t));
#else
q = arg->quant_param;
#endif
float scale = 1.f / q.scale;
uint8_t *dest = (uint8_t *)(ctx->main_buffer + arg->mem_out_address);
@ -1028,8 +1032,12 @@ static void kpu_kmodel_dequantize(const kpu_model_dequantize_layer_argument_t *a
const uint8_t *src = (const uint8_t *)(ctx->main_buffer + arg->main_mem_in_address);
float *dest = (float *)(ctx->main_buffer + arg->main_mem_out_address);
size_t oc, count = arg->count;
kpu_model_quant_param_t q = arg->quant_param;
kpu_model_quant_param_t q;
#if FIX_CACHE
memcpy(&q, &arg->quant_param, sizeof(kpu_model_quant_param_t));
#else
q = arg->quant_param;
#endif
for(oc = 0; oc < count; oc++)
dest[oc] = *src++ * q.scale + q.bias;
}
@ -1134,7 +1142,10 @@ static void kpu_kmodel_fully_connected(const kpu_model_fully_connected_layer_arg
const float *src = (const float *)(ctx->main_buffer + arg->main_mem_in_address);
float *dest = (float *)(ctx->main_buffer + arg->main_mem_out_address);
uint32_t in_channels = arg->in_channels, out_channels = arg->out_channels, ic, oc;
const float *weights = arg->weights, *bias = arg->weights + in_channels * out_channels;
float *weights = (float *)malloc(in_channels * out_channels * sizeof(float));
float *bias = (float *)malloc(out_channels * sizeof(float));
memcpy(weights, arg->weights, out_channels * in_channels * sizeof(float));
memcpy(bias, arg->weights + in_channels * out_channels, out_channels * sizeof(float));
if(in_channels % 8 == 0)
{
@ -1181,7 +1192,8 @@ static void kpu_kmodel_fully_connected(const kpu_model_fully_connected_layer_arg
dest[oc] = sum + bias[oc];
}
}
free(weights);
free(bias);
kpu_float_activation(dest, out_channels, arg->act);
}
@ -1265,10 +1277,9 @@ static void kpu_logistic(const kpu_model_logistic_layer_argument_t *arg, kpu_mod
static void kpu_conv(const kpu_model_conv_layer_argument_t *arg, kpu_model_context_t *ctx)
{
volatile kpu_layer_argument_t layer = *(const volatile kpu_layer_argument_t *)(ctx->model_buffer + arg->layer_offset);
layer.kernel_load_cfg.data.para_start_addr = (uintptr_t)(ctx->model_buffer + arg->weights_offset) - 0x40000000;
layer.kernel_pool_type_cfg.data.bwsx_base_addr = (uintptr_t)(ctx->model_buffer + arg->bn_offset) - 0x40000000;
layer.kernel_calc_type_cfg.data.active_addr = (uintptr_t)(ctx->model_buffer + arg->act_offset) - 0x40000000;
layer.kernel_load_cfg.data.para_start_addr = (uintptr_t)(ctx->model_buffer + arg->weights_offset);
layer.kernel_pool_type_cfg.data.bwsx_base_addr = (uintptr_t)(ctx->model_buffer + arg->bn_offset);
layer.kernel_calc_type_cfg.data.active_addr = (uintptr_t)(ctx->model_buffer + arg->act_offset);
if(arg->flags & KLF_MAIN_MEM_OUT)
{
@ -1360,14 +1371,14 @@ static void kpu_upload(const kpu_model_upload_layer_argument_t *arg, kpu_model_c
int kpu_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer)
{
#if FIX_CACHE
configASSERT(!is_memory_cache((uintptr_t)buffer));
#endif
uintptr_t base_addr = (uintptr_t)buffer;
const kpu_kmodel_header_t *header = (const kpu_kmodel_header_t *)buffer;
configASSERT(is_memory_cache((uintptr_t)buffer))
if(header->version == 3 && header->arch == 0)
{
ctx->load_first = 1;
ctx->is_nncase = 0;
ctx->model_buffer = buffer;
ctx->output_count = header->output_count;
@ -1482,7 +1493,6 @@ static int kpu_kmodel_done(kpu_model_context_t *ctx)
.calc_done_int = 1,
.layer_cfg_almost_empty_int = 1,
.layer_cfg_almost_full_int = 1};
ctx->load_first = 0;
#if KPU_DEBUG
uint32_t cnt_layer_id = ctx->current_layer - 1;
uint64_t time = sysctl_get_time_us();
@ -1582,13 +1592,6 @@ static int ai_step(void *userdata)
kpu_logistic((const kpu_model_logistic_layer_argument_t *)layer_body, ctx);
break;
case KL_K210_CONV:
if(ctx->load_first)
{
for(int i=0; i<cnt_layer_header->body_size; i++)
{
*((uint8_t *)((uintptr_t)layer_body-0x40000000)+i) = *((uint8_t *)layer_body+i);
}
}
kpu_conv((const kpu_model_conv_layer_argument_t *)layer_body, ctx);
return 0;
case KL_K210_ADD_PADDING:

BIN
src/face_detect/kfpkg/face_detect.bin Normal file → Executable file

Binary file not shown.

View File

@ -24,6 +24,7 @@
#define INCBIN_PREFIX
#include "incbin.h"
#include "utils.h"
#include "iomem.h"
#define PLL0_OUTPUT_FREQ 800000000UL
#define PLL1_OUTPUT_FREQ 400000000UL
@ -194,7 +195,7 @@ int main(void)
w25qxx_init(3, 0);
w25qxx_enable_quad_mode();
#if LOAD_KMODEL_FROM_FLASH
model_data = (uint8_t *)malloc(KMODEL_SIZE);
model_data = (uint8_t *)iomem_malloc(KMODEL_SIZE);
w25qxx_read_data(0xA00000, model_data, KMODEL_SIZE, W25QXX_QUAD_FAST);
#endif
/* LCD init */