Fix cache

pull/107/head
xiangbingj 2019-10-24 13:30:18 +08:00
parent c01c5a83f6
commit 4f73860ea2
12 changed files with 450 additions and 16 deletions

View File

@ -97,6 +97,8 @@ static const char *TAG = "SYSCALL";
extern char _heap_start[];
extern char _heap_end[];
char *_heap_cur = &_heap_start[0];
char *_heap_line = &_heap_start[0];
char *_ioheap_line = &_heap_end[0]-0x40000000;
sys_putchar_t sys_putchar;
sys_getchar_t sys_getchar;
@ -184,6 +186,14 @@ static size_t sys_brk(size_t pos)
res = -ENOMEM;
} else
{
if((uintptr_t)pos > (uintptr_t)_heap_line)
{
_heap_line = (char *)(uintptr_t)pos;
if((uintptr_t)_heap_line-0x40000000 > (uintptr_t)_ioheap_line)
{
LOGE(TAG, "WARNING: cache heap line > iomem heap line!\r\n");
}
}
/* Adjust brk pointer. */
_heap_cur = (char *)(uintptr_t)pos;
/* Return current address. */

View File

@ -15,18 +15,27 @@
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "dmac.h"
#include "fpioa.h"
#include "plic.h"
#include "stdlib.h"
#include "sysctl.h"
#include "utils.h"
#include "iomem.h"
volatile dmac_t *const dmac = (dmac_t *)DMAC_BASE_ADDR;
typedef struct _dmac_context
{
dmac_channel_number_t dmac_channel;
#if FIX_CACHE
uint8_t *dest_buffer;
uint8_t *src_malloc;
uint8_t *dest_malloc;
size_t buf_len;
#endif
plic_irq_callback_t callback;
void *ctx;
} dmac_context_t;
@ -353,6 +362,40 @@ int dmac_set_channel_param(dmac_channel_number_t channel_num,
dmac_ch_ctl_u_t ctl;
dmac_ch_cfg_u_t cfg_u;
#if FIX_CACHE
uint8_t *src_io = (uint8_t *)src;
uint8_t *dest_io = (uint8_t *)dest;
if(is_memory_cache((uintptr_t)src))
{
if(src_inc == DMAC_ADDR_NOCHANGE)
{
src_io = (uint8_t *)iomem_malloc(1<<dmac_trans_width);
memcpy(src_io, src, 1<<dmac_trans_width);
}
else
{
src_io = (uint8_t *)iomem_malloc(blockSize * (1<<dmac_trans_width));
memcpy(src_io, src, blockSize * (1<<dmac_trans_width));
}
dmac_context[channel_num].src_malloc = src_io;
}
if(is_memory_cache((uintptr_t)dest))
{
if(dest_inc == DMAC_ADDR_NOCHANGE)
{
dest_io = (uint8_t *)iomem_malloc(1<<dmac_trans_width);
dmac_context[channel_num].buf_len = 1<<dmac_trans_width;
}
else
{
dest_io = (uint8_t *)iomem_malloc(blockSize * (1<<dmac_trans_width));
dmac_context[channel_num].buf_len = blockSize * (1<<dmac_trans_width);
}
dmac_context[channel_num].dest_malloc = dest_io;
dmac_context[channel_num].dest_buffer = dest;
}
#endif
int mem_type_src = is_memory((uintptr_t)src), mem_type_dest = is_memory((uintptr_t)dest);
dmac_transfer_flow_t flow_control;
if(mem_type_src == 0 && mem_type_dest == 0)
@ -381,8 +424,13 @@ int dmac_set_channel_param(dmac_channel_number_t channel_num,
writeq(cfg_u.data, &dmac->channel[channel_num].cfg);
#if FIX_CACHE
dmac->channel[channel_num].sar = (uint64_t)src_io;
dmac->channel[channel_num].dar = (uint64_t)dest_io;
#else
dmac->channel[channel_num].sar = (uint64_t)src;
dmac->channel[channel_num].dar = (uint64_t)dest;
#endif
ctl.data = readq(&dmac->channel[channel_num].ctl);
ctl.ch_ctl.sms = DMAC_MASTER1;
@ -736,6 +784,22 @@ int dmac_is_done(dmac_channel_number_t channel_num)
void dmac_wait_done(dmac_channel_number_t channel_num)
{
dmac_wait_idle(channel_num);
#if FIX_CACHE
if(dmac_context[channel_num].dest_buffer)
{
memcpy(dmac_context[channel_num].dest_buffer, dmac_context[channel_num].dest_malloc, dmac_context[channel_num].buf_len);
iomem_free(dmac_context[channel_num].dest_malloc);
dmac_context[channel_num].dest_malloc = NULL;
dmac_context[channel_num].dest_buffer = NULL;
dmac_context[channel_num].buf_len = 0;
}
if(dmac_context[channel_num].src_malloc)
{
iomem_free(dmac_context[channel_num].src_malloc);
dmac_context[channel_num].src_malloc = NULL;
}
#endif
}
int dmac_is_idle(dmac_channel_number_t channel_num)
@ -771,6 +835,22 @@ static int dmac_irq_callback(void *ctx)
dmac_context_t *v_dmac_context = (dmac_context_t *)(ctx);
dmac_channel_number_t v_dmac_channel = v_dmac_context->dmac_channel;
dmac_channel_interrupt_clear(v_dmac_channel);
#if FIX_CACHE
if(v_dmac_context->dest_buffer)
{
memcpy(v_dmac_context->dest_buffer, v_dmac_context->dest_malloc, v_dmac_context->buf_len);
iomem_free(v_dmac_context->dest_malloc);
v_dmac_context->dest_malloc = NULL;
v_dmac_context->dest_buffer = NULL;
v_dmac_context->buf_len = 0;
}
if(v_dmac_context->src_malloc)
{
iomem_free(v_dmac_context->src_malloc);
v_dmac_context->src_malloc = NULL;
}
#endif
if(v_dmac_context->callback != NULL)
v_dmac_context->callback(v_dmac_context->ctx);

View File

@ -201,6 +201,11 @@ void dvp_set_image_size(uint32_t width, uint32_t height)
void dvp_set_ai_addr(uint32_t r_addr, uint32_t g_addr, uint32_t b_addr)
{
#if FIX_CACHE
configASSERT(!is_memory_cache((uintptr_t)r_addr));
configASSERT(!is_memory_cache((uintptr_t)g_addr));
configASSERT(!is_memory_cache((uintptr_t)b_addr));
#endif
dvp->r_addr = r_addr;
dvp->g_addr = g_addr;
dvp->b_addr = b_addr;
@ -208,6 +213,9 @@ void dvp_set_ai_addr(uint32_t r_addr, uint32_t g_addr, uint32_t b_addr)
void dvp_set_display_addr(uint32_t addr)
{
#if FIX_CACHE
configASSERT(!is_memory_cache((uintptr_t)addr));
#endif
dvp->rgb_addr = addr;
}

View File

@ -21,6 +21,7 @@
#include "string.h"
#include "sysctl.h"
#include "utils.h"
#include "iomem.h"
typedef struct _i2c_slave_instance
{
@ -167,7 +168,11 @@ void i2c_send_data_dma(dmac_channel_number_t dma_channel_num, i2c_device_number_
configASSERT(i2c_num < I2C_MAX_NUM);
volatile i2c_t *i2c_adapter = i2c[i2c_num];
i2c_adapter->clr_tx_abrt = i2c_adapter->clr_tx_abrt;
#if FIX_CACHE
uint32_t *buf = iomem_malloc(send_buf_len * sizeof(uint32_t));
#else
uint32_t *buf = malloc(send_buf_len * sizeof(uint32_t));
#endif
int i;
for(i = 0; i < send_buf_len; i++)
{
@ -179,7 +184,11 @@ void i2c_send_data_dma(dmac_channel_number_t dma_channel_num, i2c_device_number_
DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, send_buf_len);
dmac_wait_done(dma_channel_num);
#if FIX_CACHE
iomem_free((void *)buf);
#else
free((void *)buf);
#endif
while((i2c_adapter->status & I2C_STATUS_ACTIVITY) || !(i2c_adapter->status & I2C_STATUS_TFE))
{
@ -233,8 +242,11 @@ void i2c_recv_data_dma(dmac_channel_number_t dma_send_channel_num, dmac_channel_
configASSERT(i2c_num < I2C_MAX_NUM);
volatile i2c_t *i2c_adapter = i2c[i2c_num];
#if FIX_CACHE
uint32_t *write_cmd = iomem_malloc(sizeof(uint32_t) * (send_buf_len + receive_buf_len));
#else
uint32_t *write_cmd = malloc(sizeof(uint32_t) * (send_buf_len + receive_buf_len));
#endif
size_t i;
for(i = 0; i < send_buf_len; i++)
write_cmd[i] = *send_buf++;
@ -257,8 +269,11 @@ void i2c_recv_data_dma(dmac_channel_number_t dma_send_channel_num, dmac_channel_
{
receive_buf[i] = (uint8_t)write_cmd[i];
}
free(write_cmd);
#if FIX_CACHE
iomem_free(write_cmd);
#else
free(write_cmd);
#endif
}
static int i2c_dma_irq(void *ctx)

View File

@ -0,0 +1,8 @@
#ifndef _IOMEM_MALLOC_H
#define _IOMEM_MALLOC_H
void iomem_free(void *paddr) ;
void *iomem_malloc(uint32_t size);
uint32_t iomem_unused();
#endif

View File

@ -33,6 +33,8 @@ extern "C" {
#define KENDRYTE_MIN(a, b) ((a) > (b) ? (b) : (a))
#define KENDRYTE_MAX(a, b) ((a) > (b) ? (a) : (b))
#define FIX_CACHE 1
#ifdef __ASSEMBLY__
#define KENDRYTE_CAST(type, ptr) ptr
#else /* __ASSEMBLY__ */
@ -340,6 +342,7 @@ uint32_t get_bit(volatile uint32_t *bits, uint32_t mask, size_t offset);
*/
uint32_t get_gpio_bit(volatile uint32_t *bits, size_t offset);
uint32_t is_memory_cache(uintptr_t address);
#ifdef __cplusplus
}
#endif /* __cplusplus */

169
lib/drivers/iomem.c Normal file
View File

@ -0,0 +1,169 @@
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "iomem.h"
#include "printf.h"
#include "atomic.h"
#define IOMEM_BLOCK_SIZE 256
typedef struct _iomem_malloc_t
{
void (*init)();
uint32_t (*unused)();
uint8_t *membase;
uint32_t memsize;
uint32_t memtblsize;
uint16_t *memmap;
uint8_t memrdy;
} iomem_malloc_t;
static void iomem_init();
static uint32_t k_unused();
extern char *_ioheap_line;
extern char *_heap_line;
extern char _heap_start[];
extern char *_heap_cur;
iomem_malloc_t malloc_cortol =
{
iomem_init,
k_unused,
NULL,
0,
0,
NULL,
0
};
static void iomem_set(void *s, uint8_t c, uint32_t num)
{
uint8_t *xs = s;
while(num--)
*xs++=c;
}
static void iomem_init()
{
malloc_cortol.membase = (uint8_t *)((uintptr_t)_heap_line-0x40000000);
malloc_cortol.memsize = (uint32_t)_ioheap_line - (uint32_t)malloc_cortol.membase;
malloc_cortol.memtblsize = malloc_cortol.memsize / IOMEM_BLOCK_SIZE;
malloc_cortol.memmap = (uint16_t *)malloc(malloc_cortol.memtblsize * 2);
mb();
malloc_cortol.membase = (uint8_t *)((uintptr_t)_heap_line-0x40000000);
malloc_cortol.memsize = (uint32_t)_ioheap_line - (uint32_t)malloc_cortol.membase;
malloc_cortol.memtblsize = malloc_cortol.memsize / IOMEM_BLOCK_SIZE;
iomem_set(malloc_cortol.memmap, 0, malloc_cortol.memtblsize * 2);
iomem_set(malloc_cortol.membase, 0, malloc_cortol.memsize);
malloc_cortol.memrdy = 1;
}
static uint32_t k_unused()
{
uint32_t unused=0;
unused = (uintptr_t)_ioheap_line + 0x40000000 - (uintptr_t)_heap_line;
return unused;
}
static uint32_t k_malloc(uint32_t size)
{
signed long offset = 0;
uint32_t xmemb;
uint32_t kmemb = 0;
if(!malloc_cortol.memrdy)
malloc_cortol.init();
if(size==0)
return 0XFFFFFFFF;
xmemb=size / IOMEM_BLOCK_SIZE;
if(size % IOMEM_BLOCK_SIZE)
xmemb++;
for(offset=malloc_cortol.memtblsize-1; offset>=0; offset--)
{
if(!malloc_cortol.memmap[offset])
{
kmemb++;
}
else
{
offset = offset - malloc_cortol.memmap[offset] + 1;
kmemb=0;
}
if(kmemb==xmemb)
{
malloc_cortol.memmap[offset] = xmemb;
malloc_cortol.memmap[offset+xmemb-1] = xmemb;
return (offset * IOMEM_BLOCK_SIZE);
}
}
return 0XFFFFFFFF;
}
static uint8_t k_free(uint32_t offset)
{
if(!malloc_cortol.memrdy)
{
malloc_cortol.init();
return 1;
}
if(offset < malloc_cortol.memsize)
{
int index=offset / IOMEM_BLOCK_SIZE;
int nmemb=malloc_cortol.memmap[index];
malloc_cortol.memmap[index] = 0;
malloc_cortol.memmap[index+nmemb-1] = 0;
if((uintptr_t)_ioheap_line == (uintptr_t)malloc_cortol.membase + offset)
{
_ioheap_line = (char *)((uintptr_t)_ioheap_line + nmemb * IOMEM_BLOCK_SIZE);
}
return 0;
}
else
return 2;
}
void iomem_free(void *paddr)
{
uint32_t offset;
if(paddr == NULL)
return;
offset=(uintptr_t)paddr - (uintptr_t)malloc_cortol.membase;
k_free(offset);
}
void *iomem_malloc(uint32_t size)
{
uint32_t offset;
offset=k_malloc(size);
if(offset == 0XFFFFFFFF)
{
printk("IOMEM malloc OUT of MEMORY!\r\n");
return NULL;
}
else
{
if((uintptr_t)_ioheap_line > (uintptr_t)malloc_cortol.membase + offset)
{
_ioheap_line = (char *)((uintptr_t)malloc_cortol.membase + offset);
if((uintptr_t)_ioheap_line < (uintptr_t)_heap_line-0x40000000)
{
printk("WARNING: iomem heap line < cache heap line!\r\n");
}
};
return (void*)((uintptr_t)malloc_cortol.membase + offset);
}
}
uint32_t iomem_unused()
{
return malloc_cortol.unused();
}

View File

@ -11,6 +11,7 @@
#include "kpu.h"
#include "printf.h"
#include "nncase.h"
#include "utils.h"
#define LAYER_BURST_SIZE 12
@ -1004,8 +1005,13 @@ static void kpu_quantize(const kpu_model_quantize_layer_argument_t *arg, kpu_mod
{
size_t count = arg->count;
const float *src = (const float *)(ctx->main_buffer + arg->main_mem_in_address);
;
const kpu_model_quant_param_t q = arg->quant_param;
kpu_model_quant_param_t q;
#if FIX_CACHE
memcpy(&q, &arg->quant_param, sizeof(kpu_model_quant_param_t));
#else
q = arg->quant_param;
#endif
float scale = 1.f / q.scale;
uint8_t *dest = (uint8_t *)(ctx->main_buffer + arg->mem_out_address);
@ -1026,8 +1032,12 @@ static void kpu_kmodel_dequantize(const kpu_model_dequantize_layer_argument_t *a
const uint8_t *src = (const uint8_t *)(ctx->main_buffer + arg->main_mem_in_address);
float *dest = (float *)(ctx->main_buffer + arg->main_mem_out_address);
size_t oc, count = arg->count;
const kpu_model_quant_param_t q = arg->quant_param;
kpu_model_quant_param_t q;
#if FIX_CACHE
memcpy(&q, &arg->quant_param, sizeof(kpu_model_quant_param_t));
#else
q = arg->quant_param;
#endif
for(oc = 0; oc < count; oc++)
dest[oc] = *src++ * q.scale + q.bias;
}
@ -1357,6 +1367,9 @@ static void kpu_upload(const kpu_model_upload_layer_argument_t *arg, kpu_model_c
int kpu_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer)
{
#if FIX_CACHE
configASSERT(!is_memory_cache((uintptr_t)buffer));
#endif
uintptr_t base_addr = (uintptr_t)buffer;
const kpu_kmodel_header_t *header = (const kpu_kmodel_header_t *)buffer;

View File

@ -22,6 +22,7 @@
#include "spi.h"
#include "sysctl.h"
#include "utils.h"
#include "iomem.h"
volatile spi_t *const spi[4] =
{
@ -411,7 +412,11 @@ void spi_send_data_normal_dma(dmac_channel_number_t channel_num, spi_device_num_
switch(spi_transfer_width)
{
case SPI_TRANS_SHORT:
buf = malloc((tx_len) * sizeof(uint32_t));
#if FIX_CACHE
buf = (uint32_t *)iomem_malloc((tx_len) * sizeof(uint32_t));
#else
buf = (uint32_t *)malloc((tx_len) * sizeof(uint32_t));
#endif
for(i = 0; i < tx_len; i++)
buf[i] = ((uint16_t *)tx_buff)[i];
break;
@ -420,7 +425,12 @@ void spi_send_data_normal_dma(dmac_channel_number_t channel_num, spi_device_num_
break;
case SPI_TRANS_CHAR:
default:
buf = malloc((tx_len) * sizeof(uint32_t));
#if FIX_CACHE
buf = (uint32_t *)iomem_malloc((tx_len) * sizeof(uint32_t));
#else
buf = (uint32_t *)malloc((tx_len) * sizeof(uint32_t));
#endif
for(i = 0; i < tx_len; i++)
buf[i] = ((uint8_t *)tx_buff)[i];
break;
@ -429,13 +439,19 @@ void spi_send_data_normal_dma(dmac_channel_number_t channel_num, spi_device_num_
spi_handle->ssienr = 0x01;
sysctl_dma_select((sysctl_dma_channel_t)channel_num, SYSCTL_DMA_SELECT_SSI0_TX_REQ + spi_num * 2);
dmac_set_single_mode(channel_num, buf, (void *)(&spi_handle->dr[0]), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
DMAC_MSIZE_4, DMAC_TRANS_WIDTH_32, tx_len);
spi_handle->ser = 1U << chip_select;
dmac_wait_done(channel_num);
if(spi_transfer_width != SPI_TRANS_INT)
{
#if FIX_CACHE
iomem_free((void *)buf);
#else
free((void *)buf);
#endif
}
while((spi_handle->sr & 0x05) != 0x04)
;
spi_handle->ser = 0x00;
@ -472,8 +488,13 @@ void spi_dup_send_receive_data_dma(dmac_channel_number_t dma_send_channel_num,
size_t v_max_len = v_tx_len > v_rx_len ? v_tx_len : v_rx_len;
#if FIX_CACHE
uint32_t *v_tx_buf = iomem_malloc(v_max_len * 4);
uint32_t *v_rx_buf = iomem_malloc(v_max_len * 4);
#else
uint32_t *v_tx_buf = malloc(v_max_len * 4);
uint32_t *v_rx_buf = malloc(v_max_len * 4);
#endif
uint32_t i = 0;
switch(frame_width)
{
@ -552,8 +573,13 @@ void spi_dup_send_receive_data_dma(dmac_channel_number_t dma_send_channel_num,
rx_buf[i] = v_rx_buf[i];
break;
}
#if FIX_CACHE
iomem_free(v_tx_buf);
iomem_free(v_rx_buf);
#else
free(v_tx_buf);
free(v_rx_buf);
#endif
}
void spi_receive_data_standard(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint8_t *cmd_buff,
@ -721,7 +747,11 @@ void spi_receive_data_standard_dma(dmac_channel_number_t dma_send_channel_num,
switch(frame_width)
{
case SPI_TRANS_INT:
#if FIX_CACHE
write_cmd = iomem_malloc(cmd_len + rx_len);
#else
write_cmd = malloc(cmd_len + rx_len);
#endif
for(i = 0; i < cmd_len / 4; i++)
write_cmd[i] = ((uint32_t *)cmd_buff)[i];
read_buf = &write_cmd[i];
@ -729,7 +759,11 @@ void spi_receive_data_standard_dma(dmac_channel_number_t dma_send_channel_num,
v_cmd_len = cmd_len / 4;
break;
case SPI_TRANS_SHORT:
#if FIX_CACHE
write_cmd = iomem_malloc((cmd_len + rx_len) / 2 * sizeof(uint32_t));
#else
write_cmd = malloc((cmd_len + rx_len) / 2 * sizeof(uint32_t));
#endif
for(i = 0; i < cmd_len / 2; i++)
write_cmd[i] = ((uint16_t *)cmd_buff)[i];
read_buf = &write_cmd[i];
@ -737,7 +771,11 @@ void spi_receive_data_standard_dma(dmac_channel_number_t dma_send_channel_num,
v_cmd_len = cmd_len / 2;
break;
default:
#if FIX_CACHE
write_cmd = iomem_malloc((cmd_len + rx_len) * sizeof(uint32_t));
#else
write_cmd = malloc((cmd_len + rx_len) * sizeof(uint32_t));
#endif
for(i = 0; i < cmd_len; i++)
write_cmd[i] = cmd_buff[i];
read_buf = &write_cmd[i];
@ -763,8 +801,11 @@ void spi_receive_data_standard_dma(dmac_channel_number_t dma_send_channel_num,
rx_buff[i] = read_buf[i];
break;
}
#if FIX_CACHE
iomem_free(write_cmd);
#else
free(write_cmd);
#endif
}
void spi_receive_data_multiple(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
@ -887,14 +928,22 @@ void spi_receive_data_multiple_dma(dmac_channel_number_t dma_send_channel_num,
v_recv_len = rx_len / 4;
break;
case SPI_TRANS_SHORT:
#if FIX_CACHE
write_cmd = iomem_malloc(cmd_len + rx_len / 2 * sizeof(uint32_t));
#else
write_cmd = malloc(cmd_len + rx_len / 2 * sizeof(uint32_t));
#endif
for(i = 0; i < cmd_len; i++)
write_cmd[i] = cmd_buff[i];
read_buf = &write_cmd[i];
v_recv_len = rx_len / 2;
break;
default:
#if FIX_CACHE
write_cmd = iomem_malloc(cmd_len + rx_len * sizeof(uint32_t));
#else
write_cmd = malloc(cmd_len + rx_len * sizeof(uint32_t));
#endif
for(i = 0; i < cmd_len; i++)
write_cmd[i] = cmd_buff[i];
read_buf = &write_cmd[i];
@ -921,7 +970,13 @@ void spi_receive_data_multiple_dma(dmac_channel_number_t dma_send_channel_num,
}
if(frame_width != SPI_TRANS_INT)
{
#if FIX_CACHE
iomem_free(write_cmd);
#else
free(write_cmd);
#endif
}
}
void spi_send_data_multiple(spi_device_num_t spi_num, spi_chip_select_t chip_select, const uint32_t *cmd_buff,
@ -979,7 +1034,11 @@ void spi_send_data_multiple_dma(dmac_channel_number_t channel_num, spi_device_nu
switch(frame_width)
{
case SPI_TRANS_INT:
#if FIX_CACHE
buf = iomem_malloc(cmd_len * sizeof(uint32_t) + tx_len);
#else
buf = malloc(cmd_len * sizeof(uint32_t) + tx_len);
#endif
for(i = 0; i < cmd_len; i++)
buf[i] = cmd_buff[i];
for(i = 0; i < tx_len / 4; i++)
@ -987,7 +1046,11 @@ void spi_send_data_multiple_dma(dmac_channel_number_t channel_num, spi_device_nu
v_send_len = cmd_len + tx_len / 4;
break;
case SPI_TRANS_SHORT:
#if FIX_CACHE
buf = iomem_malloc(cmd_len * sizeof(uint32_t) + tx_len / 2 * sizeof(uint32_t));
#else
buf = malloc(cmd_len * sizeof(uint32_t) + tx_len / 2 * sizeof(uint32_t));
#endif
for(i = 0; i < cmd_len; i++)
buf[i] = cmd_buff[i];
for(i = 0; i < tx_len / 2; i++)
@ -995,7 +1058,11 @@ void spi_send_data_multiple_dma(dmac_channel_number_t channel_num, spi_device_nu
v_send_len = cmd_len + tx_len / 2;
break;
default:
#if FIX_CACHE
buf = iomem_malloc((cmd_len + tx_len) * sizeof(uint32_t));
#else
buf = malloc((cmd_len + tx_len) * sizeof(uint32_t));
#endif
for(i = 0; i < cmd_len; i++)
buf[i] = cmd_buff[i];
for(i = 0; i < tx_len; i++)
@ -1006,7 +1073,11 @@ void spi_send_data_multiple_dma(dmac_channel_number_t channel_num, spi_device_nu
spi_send_data_normal_dma(channel_num, spi_num, chip_select, buf, v_send_len, SPI_TRANS_INT);
#if FIX_CACHE
iomem_free((void *)buf);
#else
free((void *)buf);
#endif
}
void spi_fill_data_dma(dmac_channel_number_t channel_num, spi_device_num_t spi_num, spi_chip_select_t chip_select,

View File

@ -20,6 +20,7 @@
#include "sysctl.h"
#include "uart.h"
#include "utils.h"
#include "iomem.h"
#define __UART_BRATE_CONST 16
@ -158,12 +159,18 @@ static int uart_dma_callback(void *ctx)
size_t v_buf_len = v_uart_dma_instance->buf_len;
uint8_t *v_buffer = v_uart_dma_instance->buffer;
uint32_t *v_recv_buffer = v_uart_dma_instance->malloc_buffer;
for(size_t i = 0; i < v_buf_len; i++)
{
v_buffer[i] = v_recv_buffer[i];
}
}
#if FIX_CACHE
iomem_free(v_uart_dma_instance->malloc_buffer);
#else
free(v_uart_dma_instance->malloc_buffer);
#endif
v_uart_dma_instance->malloc_buffer = NULL;
if(v_uart_dma_instance->uart_int_instance.callback)
v_uart_dma_instance->uart_int_instance.callback(v_uart_dma_instance->uart_int_instance.ctx);
return 0;
@ -184,27 +191,42 @@ int uart_receive_data(uart_device_number_t channel, char *buffer, size_t buf_len
void uart_receive_data_dma(uart_device_number_t uart_channel, dmac_channel_number_t dmac_channel, uint8_t *buffer, size_t buf_len)
{
uint32_t *v_recv_buf = malloc(buf_len * sizeof(uint32_t));
#if FIX_CACHE
uint32_t *v_recv_buf = (uint32_t *)iomem_malloc(buf_len * sizeof(uint32_t));
#else
uint32_t *v_recv_buf = (uint32_t *)malloc(buf_len * sizeof(uint32_t));
#endif
configASSERT(v_recv_buf != NULL);
sysctl_dma_select((sysctl_dma_channel_t)dmac_channel, SYSCTL_DMA_SELECT_UART1_RX_REQ + uart_channel * 2);
dmac_set_single_mode(dmac_channel, (void *)(&uart[uart_channel]->RBR), v_recv_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
dmac_wait_done(dmac_channel);
for(uint32_t i = 0; i < buf_len; i++)
{
buffer[i] = (uint8_t)(v_recv_buf[i] & 0xff);
}
#if FIX_CACHE
iomem_free(v_recv_buf);
#else
free(v_recv_buf);
#endif
}
void uart_receive_data_dma_irq(uart_device_number_t uart_channel, dmac_channel_number_t dmac_channel,
uint8_t *buffer, size_t buf_len, plic_irq_callback_t uart_callback,
void *ctx, uint32_t priority)
{
uint32_t *v_recv_buf = malloc(buf_len * sizeof(uint32_t));
#if FIX_CACHE
uint32_t *v_recv_buf = (uint32_t *)iomem_malloc(buf_len * sizeof(uint32_t));
#else
uint32_t *v_recv_buf = (uint32_t *)malloc(buf_len * sizeof(uint32_t));
#endif
configASSERT(v_recv_buf != NULL);
uart_recv_dma_instance[uart_channel].dmac_channel = dmac_channel;
uart_recv_dma_instance[uart_channel].uart_num = uart_channel;
uart_recv_dma_instance[uart_channel].malloc_buffer = v_recv_buf;
@ -217,7 +239,7 @@ void uart_receive_data_dma_irq(uart_device_number_t uart_channel, dmac_channel_n
dmac_irq_register(dmac_channel, uart_dma_callback, &uart_recv_dma_instance[uart_channel], priority);
sysctl_dma_select((sysctl_dma_channel_t)dmac_channel, SYSCTL_DMA_SELECT_UART1_RX_REQ + uart_channel * 2);
dmac_set_single_mode(dmac_channel, (void *)(&uart[uart_channel]->RBR), v_recv_buf, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,
DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
}
int uart_send_data(uart_device_number_t channel, const char *buffer, size_t buf_len)
@ -233,22 +255,37 @@ int uart_send_data(uart_device_number_t channel, const char *buffer, size_t buf_
void uart_send_data_dma(uart_device_number_t uart_channel, dmac_channel_number_t dmac_channel, const uint8_t *buffer, size_t buf_len)
{
#if FIX_CACHE
uint32_t *v_send_buf = iomem_malloc(buf_len * sizeof(uint32_t));
#else
uint32_t *v_send_buf = malloc(buf_len * sizeof(uint32_t));
#endif
configASSERT(v_send_buf != NULL);
for(uint32_t i = 0; i < buf_len; i++)
v_send_buf[i] = buffer[i];
sysctl_dma_select((sysctl_dma_channel_t)dmac_channel, SYSCTL_DMA_SELECT_UART1_TX_REQ + uart_channel * 2);
dmac_set_single_mode(dmac_channel, v_send_buf, (void *)(&uart[uart_channel]->THR), DMAC_ADDR_INCREMENT, DMAC_ADDR_NOCHANGE,
DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
DMAC_MSIZE_1, DMAC_TRANS_WIDTH_32, buf_len);
dmac_wait_done(dmac_channel);
#if FIX_CACHE
iomem_free((void *)v_send_buf);
#else
free((void *)v_send_buf);
#endif
}
void uart_send_data_dma_irq(uart_device_number_t uart_channel, dmac_channel_number_t dmac_channel,
const uint8_t *buffer, size_t buf_len, plic_irq_callback_t uart_callback,
void *ctx, uint32_t priority)
{
#if FIX_CACHE
uint32_t *v_send_buf = iomem_malloc(buf_len * sizeof(uint32_t));
#else
uint32_t *v_send_buf = malloc(buf_len * sizeof(uint32_t));
#endif
configASSERT(v_send_buf != NULL);
uart_send_dma_instance[uart_channel] = (uart_dma_instance_t){

View File

@ -41,3 +41,11 @@ uint32_t get_gpio_bit(volatile uint32_t *bits, size_t offset)
{
return get_bit(bits, 1, offset);
}
uint32_t is_memory_cache(uintptr_t address)
{
#define MEM_CACHE_LEN (6 * 1024 * 1024)
return ((address >= 0x80000000) && (address < 0x80000000 + MEM_CACHE_LEN));
}

View File

@ -17,6 +17,8 @@
#include <cmath>
#include <runtime/runtime_op_utility.h>
#include <xtl/xspan.hpp>
#include <cstring>
#include <utils.h>
namespace nncase
{
@ -139,11 +141,18 @@ namespace kernels
inline void matmul(const float *input_a, const float *input_b, float *output, const float *bias, int32_t a_rows, int32_t a_cols, int32_t b_cols, const value_range<float> &fused_activation)
{
#if FIX_CACHE
float *cache_mem = new float[b_cols];
memcpy(cache_mem, bias, b_cols*sizeof(float));
#else
const float *cache_mem =bias;
#endif
for (size_t oy = 0; oy < a_rows; oy++)
{
for (size_t ox = 0; ox < b_cols; ox++)
{
float value = bias[ox];
float value = cache_mem[ox];
for (size_t i = 0; i < a_cols; i++)
{
const auto a = input_a[oy * a_cols + i];
@ -154,6 +163,9 @@ namespace kernels
output[oy * b_cols + ox] = details::apply_activation(value, fused_activation);
}
}
#if FIX_CACHE
delete []cache_mem;
#endif
}
template <class T>