Merge pull request #90 from kendryte/feature/kmodelv4

Update nncase runtime
pull/91/head
zzxcanaan 2019-08-05 13:48:54 +08:00 committed by GitHub
commit 1a79aea3a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
38 changed files with 1129 additions and 251 deletions

View File

@ -91,6 +91,16 @@ SECTIONS
. = ALIGN(8);
/* Exception handling */
.eh_frame :
{
KEEP (*(.eh_frame)) *(.eh_frame.*)
. = ALIGN(8);
} >ram AT>ram :ram_ro
.gnu_extab : { *(.gnu_extab) } >ram AT>ram :ram_ro
.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } >ram AT>ram :ram_ro
.exception_ranges : { *(.exception_ranges .exception_ranges*) } >ram AT>ram :ram_ro
/* Init array and fini array */
.preinit_array :
{

View File

@ -1,5 +1,22 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "target_config.h"
#include <array>
#include <cmath>
#include <limits>
#include <optional>
#include <stdint.h>
@ -26,13 +43,19 @@ struct value_range
{
T min;
T max;
static constexpr value_range<T> full() noexcept
{
return { std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max() };
}
};
typedef enum _reduce_op
{
reduce_mean,
reduce_min,
reduce_max
reduce_max,
reduce_sum
} reduce_op_t;
typedef enum _binary_op
@ -40,9 +63,31 @@ typedef enum _binary_op
binary_add,
binary_sub,
binary_mul,
binary_div
binary_div,
binary_min,
binary_max
} binary_op_t;
typedef enum _unary_op
{
unary_abs,
unary_ceil,
unary_cos,
unary_exp,
unary_floor,
unary_log,
unary_neg,
unary_rsqrt,
unary_sin,
unary_square
} unary_op_t;
typedef enum _image_resize_mode
{
image_resize_bilinear,
image_resize_nearest_neighbor
} image_resize_mode_t;
typedef struct _quant_param
{
int32_t zero_point;
@ -54,10 +99,17 @@ inline bool operator==(const quant_param_t &lhs, const quant_param_t &rhs) noexc
return lhs.zero_point == rhs.zero_point && lhs.scale == rhs.scale;
}
inline bool almost_equal(const quant_param_t &lhs, const quant_param_t &rhs) noexcept
{
return lhs.zero_point == rhs.zero_point && std::abs(lhs.scale - rhs.scale) <= std::numeric_limits<float>::epsilon();
}
struct fixed_mul
{
float mul;
int8_t shift;
int32_t rounded_mul() const noexcept { return (int32_t)roundf(mul); }
};
typedef enum _memory_type
@ -94,4 +146,14 @@ struct memory_range
uint32_t start;
uint32_t size;
};
inline bool operator==(const padding &lhs, const padding &rhs) noexcept
{
return lhs.before == rhs.before && lhs.after == rhs.after;
}
inline bool operator!=(const padding &lhs, const padding &rhs) noexcept
{
return lhs.before != rhs.before || lhs.after != rhs.after;
}
}

View File

@ -0,0 +1,36 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <filesystem>
#include <fstream>
#include <vector>
namespace nncase
{
inline std::vector<uint8_t> read_file(const std::filesystem::path &filename)
{
std::ifstream infile(filename, std::ios::binary | std::ios::in);
if (infile.bad())
throw std::runtime_error("Cannot open file: " + filename.string());
infile.seekg(0, std::ios::end);
size_t length = infile.tellg();
infile.seekg(0, std::ios::beg);
std::vector<uint8_t> data(length);
infile.read(reinterpret_cast<char *>(data.data()), length);
infile.close();
return data;
}
}

View File

@ -1,6 +1,20 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../utils.h"
#include <runtime_op_utility.h>
#include <runtime/runtime_op_utility.h>
namespace nncase
{

View File

@ -1,7 +1,21 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../utils.h"
#include <runtime_op_utility.h>
#include <targets/k210/k210_runtime_op_utility.h>
#include <runtime/k210/k210_runtime_op_utility.h>
#include <runtime/runtime_op_utility.h>
namespace nncase
{
@ -9,16 +23,39 @@ namespace kernels
{
namespace k210
{
namespace details
{
template <class T>
struct pool_partial_type;
template <>
struct pool_partial_type<uint8_t>
{
using type = uint32_t;
};
template <>
struct pool_partial_type<float>
{
using type = float;
};
template <class T>
using pool_partial_type_t = typename pool_partial_type<T>::type;
}
inline void kpu_upload(const uint8_t *src, uint8_t *dest, const runtime_shape_t &in_shape)
{
using namespace runtime::k210;
if (in_shape[3] % 64 == 0)
{
std::copy(src, src + kernels::details::compute_size(in_shape), dest);
}
else
{
auto layout = targets::k210::get_kpu_row_layout(in_shape[3]);
auto fmap_size = targets::k210::get_kpu_bytes(in_shape[3], in_shape[2], in_shape[1]);
auto layout = get_kpu_row_layout(in_shape[3]);
auto fmap_size = get_kpu_bytes(in_shape[3], in_shape[2], in_shape[1]);
for (int32_t batch = 0; batch < in_shape[0]; batch++)
{
@ -37,18 +74,18 @@ namespace kernels
}
}
#if NNCASE_TARGET_K210_SIMULATOR
inline void kpu_download(const uint8_t *src, uint8_t *dest, const runtime_shape_t &in_shape)
{
using namespace runtime::k210;
if (in_shape[3] % 64 == 0)
{
std::copy(src, src + kernels::details::compute_size(in_shape), dest);
}
else
{
auto layout = targets::k210::get_kpu_row_layout(in_shape[3]);
auto fmap_size = targets::k210::get_kpu_bytes(in_shape[3], in_shape[2], in_shape[1]);
auto layout = get_kpu_row_layout(in_shape[3]);
auto fmap_size = get_kpu_bytes(in_shape[3], in_shape[2], in_shape[1]);
for (int32_t batch = 0; batch < in_shape[0]; batch++)
{
@ -69,7 +106,7 @@ namespace kernels
template <bool IsDepthwise, int32_t FilterSize>
void kpu_conv2d(const uint8_t *input, int64_t *workspace, uint8_t *output, const uint8_t *weights, int32_t in_h, int32_t in_w, int32_t in_channels, int32_t out_channels, uint8_t pad_value, int32_t arg_x,
int32_t shift_x, int32_t arg_w, int32_t shift_w, int64_t arg_add, const targets::k210::kpu_batchnorm_segment *batchnorm, const targets::k210::kpu_activation_table_t &activation)
int32_t shift_x, int32_t arg_w, int32_t shift_w, int64_t arg_add, const runtime::k210::kpu_batchnorm_segment *batchnorm, const runtime::k210::kpu_activation_table_t &activation)
{
const auto channel_size = size_t(in_h) * in_w;
// conv
@ -142,19 +179,21 @@ namespace kernels
for (size_t i = 0; i < channel_size; i++)
{
auto value = (*src_it++ * bn.mul >> bn.shift) + bn.add;
auto &seg = *std::find_if(activation.rbegin(), activation.rend(), [value](const targets::k210::kpu_activation_segment &seg) {
auto &seg = *std::find_if(activation.rbegin(), activation.rend(), [value](const runtime::k210::kpu_activation_segment &seg) {
return value > seg.start_x;
});
value = runtime::carry_shift((value - seg.start_x) * seg.mul, seg.shift);
value = runtime::carry_shift<int64_t, true>((value - seg.start_x) * seg.mul, seg.shift);
*out_it++ = (uint8_t)std::clamp(value, int64_t(0), int64_t(255));
}
}
}
}
inline void kpu_pool2d(const uint8_t *input, uint8_t *output, int32_t in_h, int32_t in_w, int32_t in_channels, targets::k210::kpu_pool_type_t pool_type)
template <class T>
inline void kpu_pool2d(const T *input, T *output, int32_t in_h, int32_t in_w, int32_t in_channels, runtime::k210::kpu_pool_type_t pool_type)
{
using namespace targets::k210;
using namespace runtime::k210;
using partial_t = details::pool_partial_type_t<T>;
const auto filter = get_kpu_filter_size(pool_type);
const auto stride = get_kpu_filter_stride(pool_type);
@ -171,7 +210,7 @@ namespace kernels
{
const int32_t in_y_origin = oy * stride;
const int32_t in_x_origin = ox * stride;
int32_t value = 0;
partial_t value = 0;
switch (pool_type)
{
@ -187,16 +226,17 @@ namespace kernels
case kpu_pool_max_2_s1:
case kpu_pool_max_4_s4:
{
value = std::numeric_limits<T>::lowest();
for (int32_t ky = 0; ky < filter; ky++)
{
for (int32_t kx = 0; kx < filter; kx++)
{
const int32_t in_y = in_y_origin + ky;
const int32_t in_x = in_x_origin + kx;
int32_t in_v;
partial_t in_v;
if (in_y < 0 || in_y >= in_h || in_x < 0 || in_x >= in_w)
in_v = 0;
in_v = std::numeric_limits<T>::lowest();
else
in_v = in_c_p[in_y * in_w + in_x];
@ -216,7 +256,7 @@ namespace kernels
{
const int32_t in_y = std::clamp(in_y_origin + ky, 0, in_h - 1);
const int32_t in_x = std::clamp(in_x_origin + kx, 0, in_w - 1);
const int32_t in_v = in_c_p[in_y * in_w + in_x];
const T in_v = in_c_p[in_y * in_w + in_x];
value += in_v;
}
@ -232,7 +272,7 @@ namespace kernels
auto k_off = get_kpu_select_pool_offset(pool_type);
const int32_t in_y = in_y_origin + k_off[0];
const int32_t in_x = in_x_origin + k_off[1];
int32_t in_v;
partial_t in_v;
if (in_y < 0 || in_y >= in_h || in_x < 0 || in_x >= in_w)
in_v = 0;
@ -244,13 +284,68 @@ namespace kernels
}
}
*output++ = (uint8_t)value;
*output++ = (T)value;
}
}
}
}
#endif
template <bool IsDepthwise, int32_t FilterSize>
void fake_kpu_conv2d(const float *input, float *output, const float *weights, const float *bias, int32_t in_h, int32_t in_w, int32_t in_channels, int32_t out_channels, const value_range<float> &fused_activation)
{
const auto channel_size = size_t(in_h) * in_w;
const auto pad = FilterSize == 1 ? 0 : 1;
const auto groups = IsDepthwise ? out_channels : 1;
const auto g_ic = IsDepthwise ? 1 : in_channels / groups;
const auto g_oc = IsDepthwise ? 1 : out_channels;
for (int32_t og = 0; og < groups; og++)
{
const auto *w_group_p = weights + (size_t)og * g_oc * g_ic * FilterSize * FilterSize;
for (int32_t oc = 0; oc < g_oc; oc++)
{
const auto *w_oc_p = w_group_p + (size_t)oc * g_ic * FilterSize * FilterSize;
for (int32_t oy = 0; oy < in_h; oy++)
{
for (int32_t ox = 0; ox < in_w; ox++)
{
const int32_t in_y_origin = oy - pad;
const int32_t in_x_origin = ox - pad;
const int32_t filter_y_start = std::max(0, -in_y_origin);
const int32_t filter_y_end = std::min(FilterSize, in_h - in_y_origin);
const int32_t filter_x_start = std::max(0, -in_x_origin);
const int32_t filter_x_end = std::min(FilterSize, in_w - in_x_origin);
float value = bias[og * g_oc + oc];
for (int32_t ic = 0; ic < g_ic; ic++)
{
const auto *in_c_p = input + ((size_t)og * g_ic + ic) * in_h * in_w;
const auto *w_ic_p = w_oc_p + (size_t)ic * FilterSize * FilterSize;
for (int32_t ky = filter_y_start; ky < filter_y_end; ky++)
{
for (int32_t kx = filter_x_start; kx < filter_x_end; kx++)
{
const int32_t in_y = in_y_origin + ky;
const int32_t in_x = in_x_origin + kx;
const auto in_v = in_c_p[in_y * in_w + in_x];
const auto w = w_ic_p[ky * FilterSize + kx];
value += in_v * w;
}
}
}
*output++ = kernels::details::apply_activation(value, fused_activation);
}
}
}
}
}
}
}
}

View File

@ -1,7 +1,21 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../utils.h"
#include <cmath>
#include <runtime_op_utility.h>
#include <runtime/runtime_op_utility.h>
#include <xtl/xspan.hpp>
namespace nncase
@ -82,7 +96,7 @@ namespace kernels
const int32_t filter_y_end = std::min(filter_h, (in_shape[2] - in_y_origin + dilation_h - 1) / dilation_h);
const int32_t filter_x_start = std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w);
const int32_t filter_x_end = std::min(filter_w, (in_shape[3] - in_x_origin + dilation_w - 1) / dilation_w);
float value = bias[oc];
float value = bias[og * g_oc + oc];
for (int32_t ic = 0; ic < g_ic; ic++)
{
@ -172,7 +186,7 @@ namespace kernels
if (d0 < paddings[0].before || d0 >= out_shape[0] - paddings[0].after
|| d1 < paddings[1].before || d1 >= out_shape[1] - paddings[1].after
|| d2 < paddings[2].before || d2 >= out_shape[2] - paddings[2].after
|| d3 < paddings[3].before || d1 >= out_shape[3] - paddings[3].after)
|| d3 < paddings[3].before || d3 >= out_shape[3] - paddings[3].after)
*output++ = pad_value;
else
*output++ = in2[d3_origin + d3];

View File

@ -1,3 +1,17 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <algorithm>
#include <cassert>

View File

@ -1,4 +1,4 @@
/* Copyright 2018 Canaan Inc.
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.

View File

@ -1,3 +1,17 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <iostream>
#include <xtl/xspan.hpp>
@ -36,12 +50,18 @@ namespace runtime
stream_.seekp(pos);
}
void align_position(size_t alignment)
std::streamoff align_position(size_t alignment)
{
auto pos = position();
auto rem = pos % alignment;
if (rem != 0)
position(pos + std::streamoff(alignment - rem));
{
auto off = std::streamoff(alignment - rem);
position(pos + off);
return off;
}
return 0;
}
private:

View File

@ -1,9 +1,23 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../node_body.h"
namespace nncase
{
namespace targets
namespace runtime
{
namespace cpu
{
@ -25,7 +39,7 @@ namespace targets
xtl::span<const float> weights;
xtl::span<const float> bias;
void deserialize(runtime::span_reader &reader)
void deserialize(span_reader &reader)
{
reader.read(input);
reader.read(output);
@ -62,7 +76,7 @@ namespace targets
xtl::span<const float> weights;
xtl::span<const float> bias;
void deserialize(runtime::span_reader &reader)
void deserialize(span_reader &reader)
{
reader.read(input);
reader.read(output);
@ -121,7 +135,7 @@ namespace targets
xtl::span<const uint8_t> weights;
xtl::span<const int32_t> bias;
void deserialize(runtime::span_reader &reader)
void deserialize(span_reader &reader)
{
reader.read(input);
reader.read(output);
@ -166,7 +180,7 @@ namespace targets
xtl::span<const uint8_t> weights;
xtl::span<const int32_t> bias;
void deserialize(runtime::span_reader &reader)
void deserialize(span_reader &reader)
{
reader.read(input);
reader.read(output);

View File

@ -0,0 +1,31 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <runtime/interpreter.h>
namespace nncase
{
namespace runtime
{
namespace cpu
{
class interpreter : public runtime::interpreter_base
{
public:
using interpreter_base::interpreter_base;
};
}
}
}

View File

@ -1,3 +1,17 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "model.h"
#include <chrono>

View File

@ -1,20 +1,34 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "k210_sim_types.h"
#include <runtime/interpreter.h>
namespace nncase
{
namespace targets
namespace runtime
{
namespace k210
{
struct k210_interpreter_context
{
runtime::interpreter_base *interpreter;
runtime::interpreter_step_t step;
interpreter_base *interpreter;
interpreter_step_t step;
};
class interpreter : public runtime::interpreter_base
class interpreter : public interpreter_base
{
public:
using interpreter_base::memory_at;

View File

@ -1,3 +1,17 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../node_body.h"
#include "k210_runtime_op_utility.h"
@ -5,7 +19,7 @@
namespace nncase
{
namespace targets
namespace runtime
{
namespace k210
{
@ -26,7 +40,7 @@ namespace targets
const kpu_activate_table_t *activation;
xtl::span<const uint8_t> weights;
void deserialize(runtime::span_reader &reader)
void deserialize(span_reader &reader)
{
reader.read(main_mem_output);
reader.read(batches);
@ -52,6 +66,27 @@ namespace targets
layer.kernel_load_cfg.data.para_start_addr = (uintptr_t)weights.data();
#endif
}
void serialize(binary_writer &writer)
{
writer.write(main_mem_output);
writer.write(batches);
writer.write(reserved0);
auto layer_pos = writer.position();
writer.position(layer_pos + std::streamoff(sizeof(layer)));
layer.kernel_pool_type_cfg.data.bwsx_base_addr = (uint32_t)writer.align_position(8);
writer.write_array(batch_norm);
layer.kernel_calc_type_cfg.data.active_addr = (uint32_t)writer.align_position(256);
writer.write(*activation);
layer.kernel_load_cfg.data.para_start_addr = (uint32_t)writer.align_position(128);
writer.write_array(weights);
auto end_pos = writer.position();
writer.position(layer_pos);
writer.write(layer);
writer.position(end_pos);
}
};
}
}

View File

@ -1,9 +1,23 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "k210_sim_types.h"
namespace nncase
{
namespace targets
namespace runtime
{
namespace k210
{
@ -49,11 +63,24 @@ namespace targets
case kpu_filter_3x3:
return 3;
default:
return 0;
NNCASE_THROW(std::runtime_error, "Invalid kpu filter");
}
}
inline int get_kpu_rows(int32_t width, int32_t height, int32_t channels)
inline int32_t get_kpu_padding(kpu_filter_type_t filter)
{
switch (filter)
{
case kpu_filter_1x1:
return 0;
case kpu_filter_3x3:
return 1;
default:
NNCASE_THROW(std::runtime_error, "Invalid kpu filter");
}
}
inline int32_t get_kpu_rows(int32_t width, int32_t height, int32_t channels)
{
auto layout = get_kpu_row_layout(width);
auto one_line_channels = std::min(channels, layout.groups);
@ -62,12 +89,15 @@ namespace targets
return size;
}
inline int get_kpu_bytes(int32_t width, int32_t height, int32_t channels)
inline int32_t get_kpu_bytes(int32_t width, int32_t height, int32_t channels)
{
return get_kpu_rows(width, height, channels) * 64;
}
#if NNCASE_TARGET_K210_SIMULATOR
inline int32_t get_kpu_bytes(const runtime_shape_t &shape)
{
return get_kpu_bytes(shape[3], shape[2], shape[1]) * shape[0];
}
inline int32_t get_kpu_filter_size(kpu_pool_type_t filter)
{
@ -86,6 +116,8 @@ namespace targets
case kpu_pool_mean_4_s4:
case kpu_pool_left_top_4_s4:
return 4;
default:
NNCASE_THROW(std::runtime_error, "Invalid kpu filter");
}
}
@ -107,6 +139,8 @@ namespace targets
case kpu_pool_mean_4_s4:
case kpu_pool_left_top_4_s4:
return 4;
default:
NNCASE_THROW(std::runtime_error, "Invalid kpu pool type");
}
}
@ -125,10 +159,10 @@ namespace targets
return { 0, 1 };
case kpu_pool_left_top_4_s4:
return { 0, 0 };
default:
NNCASE_THROW(std::runtime_error, "Invalid kpu pool type");
}
}
#endif
}
}
}

View File

@ -1,3 +1,17 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <array>
#include <cstdint>
@ -11,7 +25,7 @@
namespace nncase
{
namespace targets
namespace runtime
{
namespace k210
{

View File

@ -1,5 +1,19 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "target_config.h"
#include "target_interpreter.h"
#include <datatypes.h>
#include <runtime/runtime_op.h>
#include <xtl/xspan.hpp>

View File

@ -1,3 +1,17 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../datatypes.h"
#include "runtime_op.h"
@ -9,7 +23,7 @@ namespace runtime
enum model_target : uint32_t
{
MODEL_TARGET_CPU = 0,
MODEL_TARGET_K210 = 1,
MODEL_TARGET_K210 = 1
};
struct model_header

View File

@ -1,9 +1,23 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../node_body.h"
namespace nncase
{
namespace targets
namespace runtime
{
namespace neutral
{
@ -28,7 +42,7 @@ namespace targets
xtl::span<const memory_range> inputs;
xtl::span<const int32_t> dims;
void deserialize(runtime::span_reader &reader)
void deserialize(span_reader &reader)
{
reader.read(output);
reader.read(inner_size);
@ -38,7 +52,7 @@ namespace targets
reader.read_span(dims, inputs_count);
}
void serialize(runtime::binary_writer &writer) const
void serialize(binary_writer &writer) const
{
writer.write(output);
writer.write(inner_size);
@ -68,7 +82,7 @@ namespace targets
xtl::span<const float> weights;
xtl::span<const float> bias;
void deserialize(runtime::span_reader &reader)
void deserialize(span_reader &reader)
{
reader.read(input);
reader.read(output);
@ -88,7 +102,7 @@ namespace targets
reader.read_span(bias, out_channels);
}
void serialize(runtime::binary_writer &writer) const
void serialize(binary_writer &writer) const
{
writer.write(input);
writer.write(output);
@ -127,7 +141,7 @@ namespace targets
value_range<float> fused_activation;
xtl::span<const float> bias;
void deserialize(runtime::span_reader &reader)
void deserialize(span_reader &reader)
{
reader.read(input_a);
reader.read(input_b);
@ -139,7 +153,7 @@ namespace targets
reader.read_span(bias, b_cols);
}
void serialize(runtime::binary_writer &writer) const
void serialize(binary_writer &writer) const
{
writer.write(input_a);
writer.write(input_b);
@ -202,23 +216,14 @@ namespace targets
value_range<float> fused_activation;
};
struct resize_bilinear_options : public simple_node_body<resize_bilinear_options>
{
memory_range input;
memory_range output;
runtime_shape_t in_shape;
int32_t out_h;
int32_t out_w;
bool align_corners;
};
struct resize_nearest_neighbor_options : public simple_node_body<resize_nearest_neighbor_options>
struct resize_image_options : public simple_node_body<resize_image_options>
{
memory_range input;
memory_range output;
runtime_shape_t in_shape;
int32_t out_h;
int32_t out_w;
image_resize_mode_t mode;
bool align_corners;
};
@ -253,6 +258,13 @@ namespace targets
int32_t new_axis_mask;
int32_t shrink_axis_mask;
};
struct unary_options : public simple_node_body<unary_options>
{
memory_range input;
memory_range output;
unary_op_t unary_op;
};
}
}
}

View File

@ -0,0 +1,38 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../datatypes.h"
#include "binary_writer.h"
#include "span_reader.h"
namespace nncase
{
namespace runtime
{
template <class T>
struct simple_node_body
{
void deserialize(span_reader &reader)
{
reader.read(static_cast<T &>(*this));
}
void serialize(binary_writer &writer) const
{
writer.write(static_cast<const T &>(*this));
}
};
}
}

View File

@ -1,32 +1,32 @@
BEGINE_DEFINE_TARGET(neutral)
DEFINE_RUNTIME_OP(neutral, binary, Binary, 0)
DEFINE_RUNTIME_OP(neutral, concat, Concat, 1)
DEFINE_RUNTIME_OP(neutral, conv2d, Conv2D, 2)
DEFINE_RUNTIME_OP(neutral, dequantize, Dequantize, 3)
DEFINE_RUNTIME_OP(neutral, matmul, MatMul, 4)
DEFINE_RUNTIME_OP(neutral, pad, Pad, 5)
DEFINE_RUNTIME_OP(neutral, quantize, Quantize, 6)
DEFINE_RUNTIME_OP(neutral, reduce, Reduce, 7)
DEFINE_RUNTIME_OP(neutral, reduce_window2d, ReduceWindow2D, 8)
DEFINE_RUNTIME_OP(neutral, memory_copy, MemoryCopy, 9)
DEFINE_RUNTIME_OP(neutral, resize_bilinear, ResizeBilinear, 10)
DEFINE_RUNTIME_OP(neutral, resize_nearest_neighbor, ResizeNearestNeighbor, 11)
DEFINE_RUNTIME_OP(neutral, softmax, Softmax, 12)
DEFINE_RUNTIME_OP(neutral, transpose, Transpose, 13)
DEFINE_RUNTIME_OP(neutral, strided_slice, StridedSlice, 14)
DEFINE_NEUTRAL_RUNTIME_OP(binary, Binary, 0x0)
DEFINE_NEUTRAL_RUNTIME_OP(concat, Concat, 0x1)
DEFINE_NEUTRAL_RUNTIME_OP(conv2d, Conv2D, 0x2)
DEFINE_NEUTRAL_RUNTIME_OP(dequantize, Dequantize, 0x3)
DEFINE_NEUTRAL_RUNTIME_OP(matmul, MatMul, 0x4)
DEFINE_NEUTRAL_RUNTIME_OP(pad, Pad, 0x5)
DEFINE_NEUTRAL_RUNTIME_OP(quantize, Quantize, 0x6)
DEFINE_NEUTRAL_RUNTIME_OP(reduce, Reduce, 0x7)
DEFINE_NEUTRAL_RUNTIME_OP(reduce_window2d, ReduceWindow2D, 0x8)
DEFINE_NEUTRAL_RUNTIME_OP(memory_copy, MemoryCopy, 0x9)
DEFINE_NEUTRAL_RUNTIME_OP(resize_image, ResizeImage, 0x0A)
DEFINE_NEUTRAL_RUNTIME_OP(softmax, Softmax, 0x0B)
DEFINE_NEUTRAL_RUNTIME_OP(transpose, Transpose, 0x0C)
DEFINE_NEUTRAL_RUNTIME_OP(strided_slice, StridedSlice, 0x0D)
DEFINE_NEUTRAL_RUNTIME_OP(unary, Unary, 0x0E)
END_DEFINE_TARGET()
// CPU
BEGINE_DEFINE_TARGET(cpu)
DEFINE_RUNTIME_OP(cpu, cpu_conv2d, CPU_CPUConv2D, 1001)
DEFINE_RUNTIME_OP(cpu, cpu_depthwise_conv2d, CPU_CPUDepthwiseConv2D, 1002)
DEFINE_RUNTIME_OP(cpu, cpu_reduce_window2d, CPU_CPUReduceWindow2D, 1003)
DEFINE_RUNTIME_OP(cpu, cpu_quantized_conv2d, CPU_CPUQuantizedConv2D, 1004)
DEFINE_RUNTIME_OP(cpu, cpu_quantized_depthwise_conv2d, CPU_CPUQuantizedDepthwiseConv2D, 1005)
DEFINE_RUNTIME_OP(cpu, cpu_conv2d, CPUConv2D, 0x1001)
DEFINE_RUNTIME_OP(cpu, cpu_depthwise_conv2d, CPUDepthwiseConv2D, 0x1002)
DEFINE_RUNTIME_OP(cpu, cpu_reduce_window2d, CPUReduceWindow2D, 0x1003)
DEFINE_RUNTIME_OP(cpu, cpu_quantized_conv2d, CPUQuantizedConv2D, 0x1004)
DEFINE_RUNTIME_OP(cpu, cpu_quantized_depthwise_conv2d, CPUQuantizedDepthwiseConv2D, 0x1005)
END_DEFINE_TARGET()
// K210
BEGINE_DEFINE_TARGET(k210)
DEFINE_RUNTIME_OP(k210, kpu_upload, K210_KPUUpload, 2001)
DEFINE_RUNTIME_OP(k210, kpu_conv2d, K210_KPUConv2D, 2002)
DEFINE_RUNTIME_OP(k210, kpu_upload, KPUUpload, 0x2001)
DEFINE_RUNTIME_OP(k210, kpu_conv2d, KPUConv2D, 0x2002)
END_DEFINE_TARGET()

View File

@ -1,3 +1,17 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../datatypes.h"
#include <string_view>
@ -7,7 +21,8 @@ namespace nncase
namespace runtime
{
#define BEGINE_DEFINE_TARGET(...)
#define DEFINE_RUNTIME_OP(target, id, name, value) rop_##id = value,
#define DEFINE_NEUTRAL_RUNTIME_OP(id, name, value) rop_##id = value,
#define DEFINE_RUNTIME_OP(target, id, name, value) rop_##target##_##id = value,
#define END_DEFINE_TARGET()
enum runtime_opcode : uint32_t
@ -15,10 +30,14 @@ namespace runtime
#include "runtime_op.def"
};
#undef DEFINE_NEUTRAL_RUNTIME_OP
#undef DEFINE_RUNTIME_OP
#define DEFINE_RUNTIME_OP(target, id, name, value) \
#define DEFINE_NEUTRAL_RUNTIME_OP(id, name, value) \
case rop_##id: \
return #name;
#define DEFINE_RUNTIME_OP(target, id, name, value) \
case rop_##target##_##id: \
return #name;
constexpr std::string_view node_opcode_names(runtime_opcode opcode)
{
@ -31,6 +50,7 @@ namespace runtime
}
#undef BEGINE_DEFINE_TARGET
#undef DEFINE_NEUTRAL_RUNTIME_OP
#undef DEFINE_RUNTIME_OP
#undef END_DEFINE_TARGET
}

View File

@ -0,0 +1,146 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../datatypes.h"
namespace nncase
{
namespace runtime
{
inline size_t get_bytes(datatype_t type)
{
size_t element_size;
switch (type)
{
case dt_float32:
element_size = 4;
break;
case dt_uint8:
element_size = 1;
break;
default:
NNCASE_THROW(std::runtime_error, "Not supported data type");
}
return element_size;
}
template <int32_t Bits, class T>
uint8_t count_leading_zeros(T value)
{
uint8_t num_zeroes = 0;
for (int32_t i = Bits - 1; i >= 0; i--)
{
if ((value & (1ULL << i)) == 0)
++num_zeroes;
else
break;
}
return num_zeroes;
}
template <class T = uint64_t>
inline T bit_mask(uint8_t shift)
{
return (T(1) << shift) - 1;
}
template <class T, bool Banker = false>
T carry_shift(T value, uint8_t shift)
{
if (shift > 0)
{
if constexpr (Banker)
{
T result;
// Sign | Int (T - shift - 1 bits) | Frac (shift bits)
// S IIII FFF
auto integral = value >> shift;
auto fractional = value & bit_mask(shift);
auto sign = value < 0 ? -1 : 1;
auto half = 1 << (shift - 1);
// frac < 0.5
if (fractional < half)
{
return integral;
}
// frac > 0.5
else if (fractional > half)
{
return integral + sign;
}
// frac == 0.5
else
{
// odd
if (integral & 1)
return integral + sign;
// even
else
return integral;
}
return result;
}
else
{
value >>= shift - 1;
if (value & 0x1)
{
if (value < 0)
value = (value >> 1) - 1;
else
value = (value >> 1) + 1;
}
else
{
value >>= 1;
}
}
}
return value;
}
template <bool Banker = false>
inline int32_t mul_and_carry_shift(int32_t value, int32_t mul, uint8_t shift)
{
return (int32_t)carry_shift<int64_t, Banker>((int64_t)value * mul, shift);
}
template <class T>
struct to_datatype
{
};
template <>
struct to_datatype<float>
{
static constexpr datatype_t type = dt_float32;
};
template <>
struct to_datatype<uint8_t>
{
static constexpr datatype_t type = dt_uint8;
};
template <class T>
inline constexpr datatype_t to_datatype_v = to_datatype<T>::type;
}
}

View File

@ -1,3 +1,17 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <xtl/xspan.hpp>

View File

@ -1,15 +0,0 @@
#pragma once
#define NNCASE_CONCAT_3(a, b, c) a/b/c
#define NNCASE_TARGET_HEADER_(target, name) <NNCASE_CONCAT_3(targets, target, name)>
#define NNCASE_TARGET_HEADER(name) NNCASE_TARGET_HEADER_(NNCASE_TARGET, name)
#include NNCASE_TARGET_HEADER(interpreter.h)
namespace nncase
{
namespace runtime
{
using interpreter_t = nncase::targets::NNCASE_TARGET::interpreter;
}
}

View File

@ -0,0 +1,28 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "../target_config.h"
// clang-format off
#include NNCASE_TARGET_HEADER(runtime,interpreter.h)
// clang-format on
namespace nncase
{
namespace runtime
{
using interpreter_t = nncase::runtime::NNCASE_TARGET::interpreter;
}
}

View File

@ -1,70 +0,0 @@
#pragma once
#include <cassert>
#include <datatypes.h>
namespace nncase
{
namespace runtime
{
inline size_t get_bytes(datatype_t type)
{
size_t element_size;
switch (type)
{
case dt_float32:
element_size = 4;
break;
case dt_uint8:
element_size = 1;
break;
default:
assert(!"Not supported data type");
}
return element_size;
}
template <int32_t Bits, class T>
uint8_t count_leading_zeros(T value)
{
uint8_t num_zeroes = 0;
for (int32_t i = Bits - 1; i >= 0; i--)
{
if ((value & (1ULL << i)) == 0)
++num_zeroes;
else
break;
}
return num_zeroes;
}
template <class T>
T carry_shift(T value, uint8_t shift)
{
if (shift > 0)
{
value >>= shift - 1;
if (value & 0x1)
{
if (value < 0)
value = (value >> 1) - 1;
else
value = (value >> 1) + 1;
}
else
{
value >>= 1;
}
}
return value;
}
inline int32_t mul_and_carry_shift(int32_t value, int32_t mul, uint8_t shift)
{
return (int32_t)carry_shift((int64_t) value * mul, shift);
}
}
}

View File

@ -0,0 +1,32 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cassert>
// clang-format off
#define NNCASE_STRINGFY(x) #x
#define NNCASE_CONCAT_2(a, b) a/b
#define NNCASE_CONCAT_3(a, b, c) NNCASE_CONCAT_2(NNCASE_CONCAT_2(a, b), c)
// clang-format on
#define NNCASE_TARGET_HEADER_(prefix, target, name) <NNCASE_CONCAT_3(prefix, target, name)>
#define NNCASE_TARGET_HEADER(prefix, name) NNCASE_TARGET_HEADER_(prefix, NNCASE_TARGET, name)
#ifndef NNCASE_NO_EXCEPTIONS
#include <stdexcept>
#define NNCASE_THROW(exception, ...) throw exception(__VA_ARGS__)
#else
#define NNCASE_THROW(exception, ...) assert(0 && #exception)
#endif

View File

@ -1,17 +0,0 @@
#pragma once
#include <runtime/interpreter.h>
namespace nncase
{
namespace targets
{
namespace cpu
{
class interpreter : public runtime::interpreter_base
{
public:
using interpreter_base::interpreter_base;
};
}
}
}

View File

@ -1,24 +0,0 @@
#pragma once
#include "../runtime/binary_writer.h"
#include "../runtime/span_reader.h"
#include <datatypes.h>
namespace nncase
{
namespace targets
{
template <class T>
struct simple_node_body
{
void deserialize(runtime::span_reader &reader)
{
reader.read(static_cast<T &>(*this));
}
void serialize(runtime::binary_writer &writer) const
{
writer.write(static_cast<const T &>(*this));
}
};
}
}

View File

@ -0,0 +1,37 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <ir/quantizer.h>
#include <memory>
#include <scheduler/memory_allocator.h>
#include <transforms/transform.h>
#include <unordered_map>
#include <vector>
namespace nncase
{
class target
{
public:
virtual void fill_allocators(std::unordered_map<memory_type_t, scheduler::memory_allocator *> &allocators, std::vector<std::unique_ptr<scheduler::memory_allocator>> &allocator_holders) = 0;
virtual void registry_codegen_ops() = 0;
virtual void registry_evaluator_ops() = 0;
virtual void add_default_transforms(std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
virtual void add_optimize1_transforms(std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
virtual void add_optimize2_transforms(std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
virtual void add_quantization_checkpoint_transforms(std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
virtual void add_quantization_transforms(ir::quantizer& quantizer, const quant_param_t& input_quant_param, std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
};
}

View File

@ -1,4 +1,4 @@
/* Copyright 2018 Canaan Inc.
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -13,12 +13,26 @@
* limitations under the License.
*/
#include <nncase.h>
#include <runtime/target_config.h>
#include <kernels/k210/k210_kernels.h>
#include <runtime/target_interpreter.h>
#include <stdio.h>
using namespace nncase;
using namespace nncase::runtime;
#define NNCASE_DEBUG 0
namespace
{
void kpu_upload_dma(dmac_channel_number_t dma_ch, const uint8_t *src, uint8_t *dest, size_t input_size, plic_irq_callback_t callback, void *userdata)
{
dmac_set_irq(dma_ch, callback, userdata, 1);
dmac_set_single_mode(dma_ch, (void *)src, (void *)dest, DMAC_ADDR_INCREMENT, DMAC_ADDR_INCREMENT,
DMAC_MSIZE_16, DMAC_TRANS_WIDTH_64, input_size / 8);
usleep(1);
}
}
class nncase_context
{
public:
@ -46,20 +60,46 @@ public:
auto input = interpreter_.input_at(0);
auto mem = interpreter_.memory_at<uint8_t>(input);
std::copy(src, src + mem.size(), mem.begin());
interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this);
return 0;
if (input.memory_type == mem_main)
{
std::copy(src, src + mem.size(), mem.begin());
interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this);
return 0;
}
else if (input.memory_type == mem_k210_kpu)
{
auto shape = interpreter_.input_shape_at(0);
if (shape[3] % 64 == 0)
{
kpu_upload_dma(dma_ch, src, mem.data(), mem.size(), upload_done_thunk, this);
}
else
{
kernels::k210::kpu_upload(src, mem.data(), shape);
}
return 0;
}
return -1;
}
private:
void on_done()
{
#if NNCASE_DEBUG
printf("Total: %fms\n", interpreter_.total_duration().count() / 1e6);
#endif
if (done_callback_)
done_callback_(userdata_);
}
void on_upload_done()
{
interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this);
}
static void done_thunk(void *userdata)
{
reinterpret_cast<nncase_context *>(userdata)->on_done();
@ -67,12 +107,22 @@ private:
static void on_error_thunk(const char *err, void *userdata)
{
#if NNCASE_DEBUG
printf("Fatal: %s\n", err);
#endif
}
static void node_profile_thunk(runtime_opcode op, std::chrono::nanoseconds duration, void *userdata)
{
#if NNCASE_DEBUG
printf("%s: %fms\n", node_opcode_names(op).data(), duration.count() / 1e6);
#endif
}
static int upload_done_thunk(void *userdata)
{
reinterpret_cast<nncase_context *>(userdata)->on_upload_done();
return 0;
}
private:

View File

@ -1,13 +1,27 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <kernels/cpu/cpu_kernels.h>
#include <runtime/kernel_registry.h>
#include <targets/cpu/cpu_ops_body.h>
#include <runtime/cpu/cpu_ops_body.h>
using namespace nncase;
using namespace nncase::runtime;
namespace nncase
{
namespace targets
namespace runtime
{
namespace cpu
{

View File

@ -1,3 +1,17 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cassert>
#include <iostream>
#include <runtime/interpreter.h>

View File

@ -1,8 +1,22 @@
#include <targets/k210/interpreter.h>
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <runtime/k210/interpreter.h>
using namespace nncase;
using namespace nncase::runtime;
using namespace nncase::targets::k210;
using namespace nncase::runtime::k210;
interpreter::interpreter()
#if NNCASE_TARGET_K210_SIMULATOR

View File

@ -1,6 +1,20 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <kernels/k210/k210_kernels.h>
#include <runtime/k210/k210_ops_body.h>
#include <runtime/kernel_registry.h>
#include <targets/k210/k210_ops_body.h>
#if !NNCASE_TARGET_K210_SIMULATOR
#include <dmac.h>
#include <sysctl.h>
@ -8,7 +22,7 @@
using namespace nncase;
using namespace nncase::runtime;
using namespace nncase::targets::k210;
using namespace nncase::runtime::k210;
namespace
{
@ -37,6 +51,7 @@ void kpu_conv2d_normal(kpu_layer_argument_t &layer, plic_irq_callback_t callback
plic_irq_register(IRQN_AI_INTERRUPT, callback, userdata);
plic_irq_enable(IRQN_AI_INTERRUPT);
kpu_send_layer(layer);
usleep(1);
}
void kpu_conv2d_output(kpu_layer_argument_t &layer, dmac_channel_number_t dma_ch, uint8_t *dest, plic_irq_callback_t callback, void *userdata)
@ -60,12 +75,27 @@ int kpu_plic_thunk(void *userdata)
(ctx.interpreter->*ctx.step)();
return 0;
}
void kpu_upload_dma(dmac_channel_number_t dma_ch, const uint8_t *src, uint8_t *dest, size_t input_size, plic_irq_callback_t callback, void *userdata)
{
dmac_set_irq(dma_ch, callback, userdata, 1);
dmac_set_single_mode(dma_ch, (void *)src, (void *)dest, DMAC_ADDR_INCREMENT, DMAC_ADDR_INCREMENT,
DMAC_MSIZE_16, DMAC_TRANS_WIDTH_64, input_size / 8);
usleep(1);
}
int kpu_dma_plic_thunk(void *userdata)
{
auto &ctx = *reinterpret_cast<k210_interpreter_context *>(userdata);
(ctx.interpreter->*ctx.step)();
return 0;
}
#endif
}
namespace nncase
{
namespace targets
namespace runtime
{
namespace k210
{
@ -73,6 +103,16 @@ namespace targets
{
auto input = interpreter.memory_at<uint8_t>(options.input);
auto output = interpreter.memory_at<uint8_t>(options.output);
#if !NNCASE_TARGET_K210_SIMULATOR
if (options.in_shape[3] % 64 == 0)
{
auto &ctx = interpreter.context();
ctx.interpreter = &interpreter;
ctx.step = step;
kpu_upload_dma(interpreter.dma_ch(), input.data(), output.data(), input.size(), kpu_dma_plic_thunk, &ctx);
return kcr_async;
}
#endif
kernels::k210::kpu_upload(input.data(), output.data(), options.in_shape);
return kcr_done;
}

View File

@ -1,20 +1,36 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <runtime/cpu/cpu_ops_body.h>
#include <runtime/k210/k210_ops_body.h>
#include <runtime/kernel_registry.h>
#include <runtime/neutral/neutral_ops_body.h>
#include <runtime/span_reader.h>
#include <targets/cpu/cpu_ops_body.h>
#include <targets/k210/k210_ops_body.h>
#include <targets/neutral/neutral_ops_body.h>
using namespace nncase;
using namespace nncase::runtime;
namespace nncase
{
namespace targets
namespace runtime
{
#define BEGINE_DEFINE_TARGET(target) \
namespace target \
{
#define DEFINE_NEUTRAL_RUNTIME_OP(id, name, value) \
kernel_call_result id(id##_options &, interpreter_t &, interpreter_step_t);
#define DEFINE_RUNTIME_OP(target, id, name, value) \
kernel_call_result id(id##_options &, interpreter_t &, interpreter_step_t);
@ -23,6 +39,7 @@ namespace targets
#include <runtime/runtime_op.def>
#undef BEGINE_DEFINE_TARGET
#undef DEFINE_NEUTRAL_RUNTIME_OP
#undef DEFINE_RUNTIME_OP
#undef END_DEFINE_TARGET
}
@ -35,18 +52,26 @@ kernel_call_result runtime::call_kernel(runtime_opcode opcode, xtl::span<const u
switch (opcode)
{
#define BEGINE_DEFINE_TARGET(...)
#define DEFINE_NEUTRAL_RUNTIME_OP(id, name, value) \
case rop_##id: \
{ \
nncase::runtime::neutral::id##_options options; \
options.deserialize(reader); \
return nncase::runtime::neutral::id(options, interpreter, step); \
}
#define DEFINE_RUNTIME_OP(target, id, name, value) \
case rop_##id: \
case rop_##target##_##id: \
{ \
nncase::targets::target::id##_options options; \
nncase::runtime::target::id##_options options; \
options.deserialize(reader); \
return nncase::targets::target::id(options, interpreter, step); \
return nncase::runtime::target::id(options, interpreter, step); \
}
#define END_DEFINE_TARGET()
#include <runtime/runtime_op.def>
#undef BEGINE_DEFINE_TARGET
#undef DEFINE_NEUTRAL_RUNTIME_OP
#undef DEFINE_RUNTIME_OP
#undef END_DEFINE_TARGET
default:

View File

@ -1,6 +1,20 @@
/* Copyright 2019 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <kernels/neutral/neutral_kernels.h>
#include <runtime/kernel_registry.h>
#include <targets/neutral/neutral_ops_body.h>
#include <runtime/neutral/neutral_ops_body.h>
using namespace nncase;
using namespace nncase::runtime;
@ -23,7 +37,7 @@ using namespace nncase::runtime;
namespace nncase
{
namespace targets
namespace runtime
{
namespace neutral
{
@ -51,6 +65,12 @@ namespace targets
case binary_div:
binary([](auto a, auto b) { return a / b; });
return kcr_done;
case binary_min:
binary([](auto a, auto b) { return std::min(a, b); });
return kcr_done;
case binary_max:
binary([](auto a, auto b) { return std::max(a, b); });
return kcr_done;
default:
return kcr_error;
}
@ -146,6 +166,9 @@ namespace targets
case reduce_max:
reduce([](auto a, auto b) { return std::max(a, b); });
return kcr_done;
case reduce_sum:
reduce([](auto a, auto b) { return a + b; });
return kcr_done;
default:
return kcr_error;
}
@ -172,31 +195,33 @@ namespace targets
case reduce_max:
reduce([](auto a, auto b) { return std::max(a, b); }, [](auto v, auto k) { return v; });
return kcr_done;
case reduce_sum:
reduce([](auto a, auto b) { return a + b; }, [](auto v, auto k) { return v; });
return kcr_done;
default:
return kcr_error;
}
}
kernel_call_result resize_bilinear(resize_bilinear_options &options, interpreter_t &interpreter, interpreter_step_t step)
kernel_call_result resize_image(resize_image_options &options, interpreter_t &interpreter, interpreter_step_t step)
{
auto input = interpreter.memory_at<float>(options.input);
auto output = interpreter.memory_at<float>(options.output);
kernels::neutral::resize_bilinear(input.data(), output.data(), options.in_shape, options.out_h, options.out_w, options.align_corners);
return kcr_done;
}
kernel_call_result resize_nearest_neighbor(resize_nearest_neighbor_options &options, interpreter_t &interpreter, runtime::interpreter_step_t step)
{
auto input = interpreter.memory_at<uint8_t>(options.input);
auto output = interpreter.memory_at<uint8_t>(options.output);
if (options.mode == image_resize_bilinear)
{
kernels::neutral::resize_bilinear(input.data(), output.data(), options.in_shape, options.out_h, options.out_w, options.align_corners);
return kcr_done;
}
else
{
#define RESIZE_NN_KERNEL(T) \
kernels::neutral::resize_nearest_neighbor(reinterpret_cast<const T *>(input.data()), reinterpret_cast<T *>(output.data()), options.in_shape, options.out_h, options.out_w);
ELEM_SIZE_IMPL(options.input.datatype, RESIZE_NN_KERNEL);
return kcr_done;
ELEM_SIZE_IMPL(options.input.datatype, RESIZE_NN_KERNEL);
return kcr_done;
#undef RESIZE_NN_KERNEL
}
}
kernel_call_result softmax(softmax_options &options, interpreter_t &interpreter, interpreter_step_t step)
@ -233,6 +258,52 @@ namespace targets
return kcr_done;
#undef STRIDED_SLICE_KERNEL
}
kernel_call_result unary(unary_options &options, interpreter_t &interpreter, interpreter_step_t step)
{
auto input = interpreter.memory_at<float>(options.input);
auto output = interpreter.memory_at<float>(options.output);
auto unary = [&](auto unary_op) {
kernels::neutral::unary(input.data(), output.data(), input.size(), unary_op);
};
switch (options.unary_op)
{
case unary_abs:
unary([](auto a) { return fabs(a); });
return kcr_done;
case unary_ceil:
unary([](auto a) { return ceilf(a); });
return kcr_done;
case unary_cos:
unary([](auto a) { return cosf(a); });
return kcr_done;
case unary_exp:
unary([](auto a) { return expf(a); });
return kcr_done;
case unary_floor:
unary([](auto a) { return floorf(a); });
return kcr_done;
case unary_log:
unary([](auto a) { return logf(a); });
return kcr_done;
case unary_neg:
unary([](auto a) { return -a; });
return kcr_done;
case unary_rsqrt:
unary([](auto a) { return 1.f / sqrtf(a); });
return kcr_done;
case unary_sin:
unary([](auto a) { return sinf(a); });
return kcr_done;
case unary_square:
unary([](auto a) { return a * a; });
return kcr_done;
default:
return kcr_error;
}
}
}
}
}