[nncase] Upload runtime

pull/107/head
sunnycase 2019-11-21 12:42:05 +08:00
parent d60910fc00
commit a3ac928968
7 changed files with 379 additions and 22 deletions

View File

@ -27,6 +27,8 @@ inline std::vector<uint8_t> read_file(const std::filesystem::path &filename)
infile.seekg(0, std::ios::end);
size_t length = infile.tellg();
if (!length)
throw std::runtime_error("Invalid file: " + filename.string());
infile.seekg(0, std::ios::beg);
std::vector<uint8_t> data(length);
infile.read(reinterpret_cast<char *>(data.data()), length);

View File

@ -48,6 +48,35 @@ namespace kernels
}
}
template <class TOp>
void quantized_binary(const uint8_t *input_a, const uint8_t *input_b, uint8_t *output, const runtime_shape_t &in_a_shape,
const runtime_shape_t &in_b_shape, const runtime_shape_t &out_shape, int32_t input_a_offset, int32_t input_a_mul, int32_t input_a_shift,
int32_t input_b_offset, int32_t input_b_mul, int32_t input_b_shift, int32_t output_mul, int32_t output_shift, int32_t output_offset, TOp &&op)
{
for (int32_t d0 = 0; d0 < out_shape[0]; d0++)
{
for (int32_t d1 = 0; d1 < out_shape[1]; d1++)
{
for (int32_t d2 = 0; d2 < out_shape[2]; d2++)
{
for (int32_t d3 = 0; d3 < out_shape[3]; d3++)
{
runtime_shape_t in_off = { d0, d1, d2, d3 };
const auto in_a_off = kernels::details::get_reduced_offset(in_off, in_a_shape);
const auto in_b_off = kernels::details::get_reduced_offset(in_off, in_b_shape);
auto a = (int32_t)input_a[offset(in_a_shape, in_a_off)];
auto b = (int32_t)input_b[offset(in_b_shape, in_b_off)];
a = runtime::mul_and_carry_shift(a + input_a_offset, input_a_mul, input_a_shift);
b = runtime::mul_and_carry_shift(b + input_b_offset, input_b_mul, input_b_shift);
auto output_val = runtime::mul_and_carry_shift(op(a, b), output_mul, output_shift);
output[offset(out_shape, in_off)] = (uint8_t)std::clamp(output_val + output_offset, 0, 255);
}
}
}
}
}
template <class TRange, class TPtrGetter = details::default_ptr_getter<uint8_t, TRange>>
inline void concat(xtl::span<TRange> inputs, uint8_t *output, xtl::span<const int32_t> concat_dims, size_t inner_size, size_t outer_size, TPtrGetter getter = {})
{
@ -125,6 +154,71 @@ namespace kernels
}
}
inline void quantized_conv2d(const uint8_t *input, uint8_t *output, const uint8_t *weights, const int32_t *bias, int32_t input_offset, int32_t filter_offset,
int32_t output_mul, int32_t output_shift, int32_t output_offset, const runtime_shape_t &in_shape, int32_t groups, int32_t out_channels,
int32_t filter_h, int32_t filter_w, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w,
const padding &padding_h, const padding &padding_w)
{
const auto out_h = details::get_windowed_output_size(in_shape[2], filter_h, stride_h, dilation_h, padding_h);
const auto out_w = details::get_windowed_output_size(in_shape[3], filter_w, stride_w, dilation_w, padding_w);
const auto g_ic = in_shape[1] / groups;
const auto g_oc = out_channels / groups;
for (int32_t batch = 0; batch < in_shape[0]; batch++)
{
const uint8_t *in_batch_p = input + (size_t)batch * in_shape[1] * in_shape[2] * in_shape[3];
for (int32_t og = 0; og < groups; og++)
{
const uint8_t *in_group_p = in_batch_p + (size_t)og * g_ic * in_shape[2] * in_shape[3];
const uint8_t *w_group_p = weights + (size_t)og * g_oc * g_ic * filter_h * filter_w;
for (int32_t oc = 0; oc < g_oc; oc++)
{
const uint8_t *w_oc_p = w_group_p + (size_t)oc * g_ic * filter_h * filter_w;
for (int32_t oy = 0; oy < out_h; oy++)
{
for (int32_t ox = 0; ox < out_w; ox++)
{
const int32_t in_y_origin = (oy * stride_h) - padding_h.before;
const int32_t in_x_origin = (ox * stride_w) - padding_w.before;
const int32_t filter_y_start = std::max(0, (-in_y_origin + dilation_h - 1) / dilation_h);
const int32_t filter_y_end = std::min(filter_h, (in_shape[2] - in_y_origin + dilation_h - 1) / dilation_h);
const int32_t filter_x_start = std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w);
const int32_t filter_x_end = std::min(filter_w, (in_shape[3] - in_x_origin + dilation_w - 1) / dilation_w);
int32_t value = bias[og * g_oc + oc];
for (int32_t ic = 0; ic < g_ic; ic++)
{
const uint8_t *in_c_p = in_group_p + (size_t)ic * in_shape[2] * in_shape[3];
const uint8_t *w_ic_p = w_oc_p + (size_t)ic * filter_h * filter_w;
for (int32_t ky = filter_y_start; ky < filter_y_end; ky++)
{
for (int32_t kx = filter_x_start; kx < filter_x_end; kx++)
{
const int32_t in_y = in_y_origin + dilation_h * ky;
const int32_t in_x = in_x_origin + dilation_w * kx;
const int32_t in_v = (int32_t)in_c_p[in_y * in_shape[3] + in_x] + input_offset;
const int32_t w = (int32_t)w_ic_p[ky * filter_w + kx] + filter_offset;
value += in_v * w;
}
}
}
auto output_val = static_cast<int32_t>(runtime::mul_and_carry_shift(value, output_mul, output_shift));
output_val += output_offset;
*output++ = (uint8_t)std::clamp(output_val, 0, 255);
}
}
}
}
}
}
template <class TQ>
void dequantize(const TQ *input, float *output, size_t count, const quant_param_t &param)
{
@ -156,6 +250,28 @@ namespace kernels
}
}
inline void quantized_matmul(const uint8_t *input_a, const uint8_t *input_b, uint8_t *output, const int32_t *bias, int32_t a_rows, int32_t a_cols, int32_t b_cols, int32_t input_a_offset, int32_t input_b_offset,
int32_t output_mul, int32_t output_shift, int32_t output_offset)
{
for (size_t oy = 0; oy < a_rows; oy++)
{
for (size_t ox = 0; ox < b_cols; ox++)
{
int32_t value = bias[ox];
for (size_t i = 0; i < a_cols; i++)
{
const auto a = (int32_t)input_a[oy * a_cols + i] + input_a_offset;
const auto b = (int32_t)input_b[i * b_cols + ox] + input_b_offset;
value += a * b;
}
auto output_val = static_cast<int32_t>(runtime::mul_and_carry_shift(value, output_mul, output_shift));
output_val += output_offset;
output[oy * b_cols + ox] = (uint8_t)std::clamp(output_val, 0, 255);
}
}
}
template <class T>
void pad(const T *input, T *output, const runtime_shape_t &in_shape, const runtime_paddings_t &paddings, T pad_value)
{
@ -313,7 +429,8 @@ namespace kernels
}
}
inline void resize_bilinear(const float *input, float *output, const runtime_shape_t &in_shape, int32_t out_h, int32_t out_w, bool align_corners)
template <class T>
inline void resize_bilinear(const T *input, T *output, const runtime_shape_t &in_shape, int32_t out_h, int32_t out_w, bool align_corners)
{
auto height_scale = (float)in_shape[2] / out_h;
auto width_scale = (float)in_shape[3] / out_w;
@ -353,7 +470,7 @@ namespace kernels
auto a2 = (1 - (in_y - in_y0)) * (in_x - in_x0);
auto a3 = (in_y - in_y0) * (in_x - in_x0);
output[destIdx++] = v0 * a0 + v1 * a1 + v2 * a2 + v3 * a3;
output[destIdx++] = T(v0 * a0 + v1 * a1 + v2 * a2 + v3 * a3);
}
}
}

View File

@ -33,6 +33,26 @@ namespace runtime
value_range<float> fused_activation;
};
struct quantized_binary_options : public simple_node_body<quantized_binary_options>
{
memory_range input_a;
memory_range input_b;
memory_range output;
binary_op_t binary_op;
runtime_shape_t in_a_shape;
runtime_shape_t in_b_shape;
runtime_shape_t out_shape;
int32_t input_a_offset;
int32_t input_a_mul;
int32_t input_a_shift;
int32_t input_b_offset;
int32_t input_b_mul;
int32_t input_b_shift;
int32_t output_offset;
int32_t output_mul;
int32_t output_shift;
};
struct concat_options
{
memory_range output;
@ -123,6 +143,78 @@ namespace runtime
}
};
struct quantized_conv2d_options
{
memory_range input;
memory_range output;
runtime_shape_t in_shape;
int32_t groups;
int32_t out_channels;
padding padding_h;
padding padding_w;
int32_t filter_h;
int32_t filter_w;
int32_t stride_h;
int32_t stride_w;
int32_t dilation_h;
int32_t dilation_w;
int32_t input_offset;
int32_t filter_offset;
int32_t output_mul;
int32_t output_shift;
int32_t output_offset;
xtl::span<const int32_t> bias;
xtl::span<const uint8_t> weights;
void deserialize(span_reader &reader)
{
reader.read(input);
reader.read(output);
reader.read(in_shape);
reader.read(groups);
reader.read(out_channels);
reader.read(padding_h);
reader.read(padding_w);
reader.read(filter_h);
reader.read(filter_w);
reader.read(stride_h);
reader.read(stride_w);
reader.read(dilation_h);
reader.read(dilation_w);
reader.read(input_offset);
reader.read(filter_offset);
reader.read(output_mul);
reader.read(output_shift);
reader.read(output_offset);
reader.read_span(bias, out_channels);
reader.read_span(weights, (size_t)out_channels * in_shape[1] / groups * filter_h * filter_w);
}
void serialize(binary_writer &writer) const
{
writer.write(input);
writer.write(output);
writer.write(in_shape);
writer.write(groups);
writer.write(out_channels);
writer.write(padding_h);
writer.write(padding_w);
writer.write(filter_h);
writer.write(filter_w);
writer.write(stride_h);
writer.write(stride_w);
writer.write(dilation_h);
writer.write(dilation_w);
writer.write(input_offset);
writer.write(filter_offset);
writer.write(output_mul);
writer.write(output_shift);
writer.write(output_offset);
writer.write_array(bias);
writer.write_array(weights);
}
};
struct dequantize_options : public simple_node_body<dequantize_options>
{
memory_range input;
@ -166,6 +258,54 @@ namespace runtime
}
};
struct quantized_matmul_options
{
memory_range input_a;
memory_range input_b;
memory_range output;
int32_t a_rows;
int32_t a_cols;
int32_t b_cols;
int32_t input_a_offset;
int32_t input_b_offset;
int32_t output_mul;
int32_t output_shift;
int32_t output_offset;
xtl::span<const int32_t> bias;
void deserialize(span_reader &reader)
{
reader.read(input_a);
reader.read(input_b);
reader.read(output);
reader.read(a_rows);
reader.read(a_cols);
reader.read(b_cols);
reader.read(input_a_offset);
reader.read(input_b_offset);
reader.read(output_mul);
reader.read(output_shift);
reader.read(output_offset);
reader.read_span(bias, b_cols);
}
void serialize(binary_writer &writer) const
{
writer.write(input_a);
writer.write(input_b);
writer.write(output);
writer.write(a_rows);
writer.write(a_cols);
writer.write(b_cols);
writer.write(input_a_offset);
writer.write(input_b_offset);
writer.write(output_mul);
writer.write(output_shift);
writer.write(output_offset);
writer.write_array(bias);
}
};
struct memory_copy_options : public simple_node_body<memory_copy_options>
{
memory_range input;

View File

@ -1,19 +1,22 @@
BEGINE_DEFINE_TARGET(neutral)
DEFINE_NEUTRAL_RUNTIME_OP(binary, Binary, 0x0)
DEFINE_NEUTRAL_RUNTIME_OP(concat, Concat, 0x1)
DEFINE_NEUTRAL_RUNTIME_OP(conv2d, Conv2D, 0x2)
DEFINE_NEUTRAL_RUNTIME_OP(dequantize, Dequantize, 0x3)
DEFINE_NEUTRAL_RUNTIME_OP(matmul, MatMul, 0x4)
DEFINE_NEUTRAL_RUNTIME_OP(pad, Pad, 0x5)
DEFINE_NEUTRAL_RUNTIME_OP(quantize, Quantize, 0x6)
DEFINE_NEUTRAL_RUNTIME_OP(reduce, Reduce, 0x7)
DEFINE_NEUTRAL_RUNTIME_OP(reduce_window2d, ReduceWindow2D, 0x8)
DEFINE_NEUTRAL_RUNTIME_OP(memory_copy, MemoryCopy, 0x9)
DEFINE_NEUTRAL_RUNTIME_OP(resize_image, ResizeImage, 0x0A)
DEFINE_NEUTRAL_RUNTIME_OP(softmax, Softmax, 0x0B)
DEFINE_NEUTRAL_RUNTIME_OP(transpose, Transpose, 0x0C)
DEFINE_NEUTRAL_RUNTIME_OP(strided_slice, StridedSlice, 0x0D)
DEFINE_NEUTRAL_RUNTIME_OP(unary, Unary, 0x0E)
DEFINE_NEUTRAL_RUNTIME_OP(binary, Binary, 0x0)
DEFINE_NEUTRAL_RUNTIME_OP(concat, Concat, 0x1)
DEFINE_NEUTRAL_RUNTIME_OP(conv2d, Conv2D, 0x2)
DEFINE_NEUTRAL_RUNTIME_OP(dequantize, Dequantize, 0x3)
DEFINE_NEUTRAL_RUNTIME_OP(matmul, MatMul, 0x4)
DEFINE_NEUTRAL_RUNTIME_OP(pad, Pad, 0x5)
DEFINE_NEUTRAL_RUNTIME_OP(quantize, Quantize, 0x6)
DEFINE_NEUTRAL_RUNTIME_OP(reduce, Reduce, 0x7)
DEFINE_NEUTRAL_RUNTIME_OP(reduce_window2d, ReduceWindow2D, 0x8)
DEFINE_NEUTRAL_RUNTIME_OP(memory_copy, MemoryCopy, 0x9)
DEFINE_NEUTRAL_RUNTIME_OP(resize_image, ResizeImage, 0x0A)
DEFINE_NEUTRAL_RUNTIME_OP(softmax, Softmax, 0x0B)
DEFINE_NEUTRAL_RUNTIME_OP(transpose, Transpose, 0x0C)
DEFINE_NEUTRAL_RUNTIME_OP(strided_slice, StridedSlice, 0x0D)
DEFINE_NEUTRAL_RUNTIME_OP(unary, Unary, 0x0E)
DEFINE_NEUTRAL_RUNTIME_OP(quantized_conv2d, QuantizedConv2D, 0x0F)
DEFINE_NEUTRAL_RUNTIME_OP(quantized_matmul, QuantizedMatMul, 0x10)
DEFINE_NEUTRAL_RUNTIME_OP(quantized_binary, QuantizedBinary, 0x11)
END_DEFINE_TARGET()
// CPU

View File

@ -123,6 +123,14 @@ namespace runtime
return (int32_t)carry_shift<int64_t, Banker>((int64_t)value * mul, shift);
}
template <uint8_t Bits>
inline int32_t clamp(int32_t value)
{
auto min = std::numeric_limits<int32_t>::lowest() >> (32 - Bits);
auto max = std::numeric_limits<int32_t>::max() >> (32 - Bits);
return std::clamp(value, min, max);
}
template <class T>
struct to_datatype
{

View File

@ -18,13 +18,22 @@
#include <scheduler/memory_allocator.h>
#include <transforms/transform.h>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace nncase
{
struct target_options
{
std::string input_type;
};
class target
{
public:
target(const target_options &options)
: options_(options) {}
virtual void fill_allocators(std::unordered_map<memory_type_t, scheduler::memory_allocator *> &allocators, std::vector<std::unique_ptr<scheduler::memory_allocator>> &allocator_holders) = 0;
virtual void registry_codegen_ops() = 0;
virtual void registry_evaluator_ops() = 0;
@ -32,6 +41,10 @@ public:
virtual void add_optimize1_transforms(std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
virtual void add_optimize2_transforms(std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
virtual void add_quantization_checkpoint_transforms(std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
virtual void add_quantization_transforms(ir::quantizer& quantizer, const quant_param_t& input_quant_param, std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
virtual void add_quantization_transforms(ir::quantizer &quantizer, std::vector<std::unique_ptr<transforms::transform>> &transforms) = 0;
virtual void add_quantization_broadcast(std::unordered_set<ir::node_opcode> &opcodes) = 0;
protected:
target_options options_;
};
}

View File

@ -35,6 +35,19 @@ using namespace nncase::runtime;
return kcr_error; \
}
#define FP_OR_Q_IMPL(type, KERNEL) \
switch (type) \
{ \
case dt_float32: \
KERNEL(float); \
break; \
case dt_uint8: \
KERNEL(uint8_t); \
break; \
default: \
return kcr_error; \
}
namespace nncase
{
namespace runtime
@ -76,6 +89,43 @@ namespace runtime
}
}
kernel_call_result quantized_binary(quantized_binary_options &options, interpreter_t &interpreter, interpreter_step_t step)
{
auto input_a = interpreter.memory_at<uint8_t>(options.input_a);
auto input_b = interpreter.memory_at<uint8_t>(options.input_b);
auto output = interpreter.memory_at<uint8_t>(options.output);
auto binary = [&](auto op) {
kernels::neutral::quantized_binary(input_a.data(), input_b.data(), output.data(), options.in_a_shape, options.in_b_shape, options.out_shape,
options.input_a_offset, options.input_a_mul, options.input_a_shift, options.input_b_offset, options.input_b_mul, options.input_b_shift,
options.output_mul, options.output_shift, options.output_offset, op);
};
switch (options.binary_op)
{
case binary_add:
binary([](auto a, auto b) { return a + b; });
return kcr_done;
case binary_sub:
binary([](auto a, auto b) { return a - b; });
return kcr_done;
case binary_mul:
binary([](auto a, auto b) { return a * b; });
return kcr_done;
case binary_div:
binary([](auto a, auto b) { return (a + b / 2) / b; });
return kcr_done;
case binary_min:
binary([](auto a, auto b) { return std::min(a, b); });
return kcr_done;
case binary_max:
binary([](auto a, auto b) { return std::max(a, b); });
return kcr_done;
default:
return kcr_error;
}
}
kernel_call_result concat(concat_options &options, interpreter_t &interpreter, interpreter_step_t step)
{
auto output = interpreter.memory_at<uint8_t>(options.output);
@ -93,6 +143,16 @@ namespace runtime
return kcr_done;
}
kernel_call_result quantized_conv2d(quantized_conv2d_options &options, interpreter_t &interpreter, interpreter_step_t step)
{
auto input = interpreter.memory_at<uint8_t>(options.input);
auto output = interpreter.memory_at<uint8_t>(options.output);
kernels::neutral::quantized_conv2d(input.data(), output.data(), options.weights.data(), options.bias.data(), options.input_offset, options.filter_offset,
options.output_mul, options.output_shift, options.output_offset, options.in_shape, options.groups, options.out_channels, options.filter_h,
options.filter_w, options.stride_h, options.stride_w, options.dilation_h, options.dilation_w, options.padding_h, options.padding_w);
return kcr_done;
}
kernel_call_result dequantize(dequantize_options &options, interpreter_t &interpreter, interpreter_step_t step)
{
auto input = interpreter.memory_at<uint8_t>(options.input);
@ -111,6 +171,16 @@ namespace runtime
return kcr_done;
}
kernel_call_result quantized_matmul(quantized_matmul_options &options, interpreter_t &interpreter, interpreter_step_t step)
{
auto input_a = interpreter.memory_at<uint8_t>(options.input_a);
auto input_b = interpreter.memory_at<uint8_t>(options.input_b);
auto output = interpreter.memory_at<uint8_t>(options.output);
kernels::neutral::quantized_matmul(input_a.data(), input_b.data(), output.data(), options.bias.data(), options.a_rows, options.a_cols, options.b_cols,
options.input_a_offset, options.input_b_offset, options.output_mul, options.output_shift, options.output_offset);
return kcr_done;
}
kernel_call_result memory_copy(memory_copy_options &options, interpreter_t &interpreter, interpreter_step_t step)
{
auto input = interpreter.memory_at<float>(options.input);
@ -205,20 +275,24 @@ namespace runtime
kernel_call_result resize_image(resize_image_options &options, interpreter_t &interpreter, interpreter_step_t step)
{
auto input = interpreter.memory_at<float>(options.input);
auto output = interpreter.memory_at<float>(options.output);
auto input = interpreter.memory_at<uint8_t>(options.input);
auto output = interpreter.memory_at<uint8_t>(options.output);
if (options.mode == image_resize_bilinear)
{
kernels::neutral::resize_bilinear(input.data(), output.data(), options.in_shape, options.out_h, options.out_w, options.align_corners);
#define RESIZE_BL_KERNEL(T) \
kernels::neutral::resize_bilinear(reinterpret_cast<const T *>(input.data()), reinterpret_cast<T *>(output.data()), options.in_shape, options.out_h, options.out_w, options.align_corners);
FP_OR_Q_IMPL(options.input.datatype, RESIZE_BL_KERNEL);
return kcr_done;
#undef RESIZE_BL_KERNEL
}
else
{
#define RESIZE_NN_KERNEL(T) \
kernels::neutral::resize_nearest_neighbor(reinterpret_cast<const T *>(input.data()), reinterpret_cast<T *>(output.data()), options.in_shape, options.out_h, options.out_w);
ELEM_SIZE_IMPL(options.input.datatype, RESIZE_NN_KERNEL);
FP_OR_Q_IMPL(options.input.datatype, RESIZE_NN_KERNEL);
return kcr_done;
#undef RESIZE_NN_KERNEL
}