kendryte-standalone-sdk/lib/nncase/v0/include/quantize.h

110 lines
3.3 KiB
C++

/* Copyright 2019-2020 Canaan Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "datatypes.h"
#include <cassert>
#include <cmath>
#include <limits>
namespace nncase
{
namespace quant
{
template <class TIt>
value_range<float> get_range(TIt begin, TIt end)
{
float min = std::numeric_limits<float>::max();
float max = std::numeric_limits<float>::min();
while (begin != end)
{
auto value = *begin++;
auto fc = std::fpclassify(value);
if (fc == FP_NORMAL || fc == FP_SUBNORMAL || fc == FP_ZERO)
{
min = std::min(min, value);
max = std::max(max, value);
}
}
return { min, max };
}
inline value_range<float> fixup_range(value_range<float> range)
{
if (range.min < -1e3)
range.min = -1e3;
if (range.max > 1e3)
range.max = 1e3;
auto r = range.max - range.min;
if (r == 0)
r = 0.1f;
else if (r < 0.01f)
r = 0.01f;
range.max = range.min + r;
if (range.max < 0)
range.max = 0;
if (range.min > 0)
range.min = 0;
return range;
}
inline quant_param_t get_quant_param(value_range<float> range, int32_t bits)
{
range = fixup_range(range);
auto r = range.max - range.min;
auto scale = ((1LL << bits) - 1) / r;
auto bias = std::round(-range.min * scale);
assert(bias >= 0);
return { static_cast<int32_t>(bias), scale };
}
inline fixed_mul get_fixed_mul(float value, int32_t max_bits, uint8_t max_shift, bool is_signed)
{
assert(!is_signed || value >= 0);
auto bits = is_signed ? max_bits - 1 : max_bits;
int32_t shift = 0;
float mul = 0;
if (std::abs(value) > 1)
{
int mul_shift;
mul = std::frexp(value, &mul_shift);
shift = std::min((int32_t)max_shift, bits - mul_shift);
mul = mul * std::pow(2.f, shift + mul_shift);
}
else if (value == 0)
{
mul = 0;
shift = 0;
}
else
{
int mul_shift;
mul = std::frexp(value, &mul_shift);
shift = std::min(max_shift + mul_shift, bits);
mul = mul * std::pow(2.f, shift);
shift -= mul_shift;
}
assert(std::abs(mul) < std::pow(2, bits));
assert(shift >= 0 && shift <= max_shift);
assert(std::abs(value - mul * std::pow(2, -shift)) <= std::numeric_limits<float>::epsilon());
return { mul, static_cast<int8_t>(shift) };
}
}
}