From d75488b0e88137ea969c29aa74c2860dbb402950 Mon Sep 17 00:00:00 2001 From: Jorge Rodriguez Date: Thu, 31 Jul 2014 19:00:48 -0700 Subject: [PATCH] Do the multiply inline, it should be a tad faster and not corrupt our data. --- stb_resample.h | 63 ++++++++++++++++++++++++++----------------------- tests/resample_test.cpp | 4 ++-- 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/stb_resample.h b/stb_resample.h index b3cc92c..b8347f5 100644 --- a/stb_resample.h +++ b/stb_resample.h @@ -1003,76 +1003,81 @@ static float* stbr__get_ring_buffer_scanline(int get_scanline, float* ring_buffe static stbr_inline void stbr__encode_scanline(void* output_buffer, int output_texel_index, float* encode_buffer, int encode_texel_index, int channels, int premul_alpha_channel, int decode) { int n; + float divide_alpha = 1; if (premul_alpha_channel) - { - for (n = 0; n < channels; n++) - { - if (n == premul_alpha_channel) - continue; - - encode_buffer[encode_texel_index + n] /= encode_buffer[encode_texel_index + premul_alpha_channel]; - } - } + divide_alpha = encode_buffer[encode_texel_index + premul_alpha_channel]; switch (decode) { case STBR__DECODE(STBR_TYPE_UINT8, STBR_COLORSPACE_LINEAR): for (n = 0; n < channels; n++) - ((unsigned char*)output_buffer)[output_texel_index + n] = (unsigned char)(stbr__saturate(encode_buffer[encode_texel_index + n]) * 255); + { + float divide_alpha_channel = (n == premul_alpha_channel) ? 1 : divide_alpha; + ((unsigned char*)output_buffer)[output_texel_index + n] = (unsigned char)(stbr__saturate(encode_buffer[encode_texel_index + n] / divide_alpha_channel) * 255); + } break; case STBR__DECODE(STBR_TYPE_UINT8, STBR_COLORSPACE_SRGB): for (n = 0; n < channels; n++) - ((unsigned char*)output_buffer)[output_texel_index + n] = stbr__linear_uchar_to_srgb_uchar[(unsigned char)(stbr__saturate(encode_buffer[encode_texel_index + n]) * 255)]; + { + float divide_alpha_channel = (n == premul_alpha_channel) ? 1 : divide_alpha; + ((unsigned char*)output_buffer)[output_texel_index + n] = stbr__linear_uchar_to_srgb_uchar[(unsigned char)(stbr__saturate(encode_buffer[encode_texel_index + n] / divide_alpha_channel) * 255)]; + } break; case STBR__DECODE(STBR_TYPE_UINT16, STBR_COLORSPACE_LINEAR): for (n = 0; n < channels; n++) - ((unsigned short*)output_buffer)[output_texel_index + n] = (unsigned short)(stbr__saturate(encode_buffer[encode_texel_index + n]) * 65535); + { + float divide_alpha_channel = (n == premul_alpha_channel) ? 1 : divide_alpha; + ((unsigned short*)output_buffer)[output_texel_index + n] = (unsigned short)(stbr__saturate(encode_buffer[encode_texel_index + n] / divide_alpha_channel) * 65535); + } break; case STBR__DECODE(STBR_TYPE_UINT16, STBR_COLORSPACE_SRGB): for (n = 0; n < channels; n++) - ((unsigned short*)output_buffer)[output_texel_index + n] = (unsigned short)(stbr__linear_to_srgb(stbr__saturate(encode_buffer[encode_texel_index + n])) * 65535); + { + float divide_alpha_channel = (n == premul_alpha_channel) ? 1 : divide_alpha; + ((unsigned short*)output_buffer)[output_texel_index + n] = (unsigned short)(stbr__linear_to_srgb(stbr__saturate(encode_buffer[encode_texel_index + n] / divide_alpha_channel)) * 65535); + } break; case STBR__DECODE(STBR_TYPE_UINT32, STBR_COLORSPACE_LINEAR): for (n = 0; n < channels; n++) - ((unsigned int*)output_buffer)[output_texel_index + n] = (unsigned int)(((double)stbr__saturate(encode_buffer[encode_texel_index + n])) * 4294967295); + { + float divide_alpha_channel = (n == premul_alpha_channel) ? 1 : divide_alpha; + ((unsigned int*)output_buffer)[output_texel_index + n] = (unsigned int)(((double)stbr__saturate(encode_buffer[encode_texel_index + n] / divide_alpha_channel)) * 4294967295); + } break; case STBR__DECODE(STBR_TYPE_UINT32, STBR_COLORSPACE_SRGB): for (n = 0; n < channels; n++) - ((unsigned int*)output_buffer)[output_texel_index + n] = (unsigned int)(((double)stbr__linear_to_srgb(stbr__saturate(encode_buffer[encode_texel_index + n]))) * 4294967295); + { + float divide_alpha_channel = (n == premul_alpha_channel) ? 1 : divide_alpha; + ((unsigned int*)output_buffer)[output_texel_index + n] = (unsigned int)(((double)stbr__linear_to_srgb(stbr__saturate(encode_buffer[encode_texel_index + n] / divide_alpha_channel))) * 4294967295); + } break; case STBR__DECODE(STBR_TYPE_FLOAT, STBR_COLORSPACE_LINEAR): for (n = 0; n < channels; n++) - ((float*)output_buffer)[output_texel_index + n] = stbr__saturate(encode_buffer[encode_texel_index + n]); + { + float divide_alpha_channel = (n == premul_alpha_channel) ? 1 : divide_alpha; + ((float*)output_buffer)[output_texel_index + n] = stbr__saturate(encode_buffer[encode_texel_index + n] / divide_alpha_channel); + } break; case STBR__DECODE(STBR_TYPE_FLOAT, STBR_COLORSPACE_SRGB): for (n = 0; n < channels; n++) - ((float*)output_buffer)[output_texel_index + n] = stbr__linear_to_srgb(stbr__saturate(encode_buffer[encode_texel_index + n])); + { + float divide_alpha_channel = (n == premul_alpha_channel) ? 1 : divide_alpha; + ((float*)output_buffer)[output_texel_index + n] = stbr__linear_to_srgb(stbr__saturate(encode_buffer[encode_texel_index + n] / divide_alpha_channel)); + } break; default: STBR_UNIMPLEMENTED("Unknown type/colorspace/channels combination."); break; } - - // Put it back the way it was in case this is a ring buffer. - if (premul_alpha_channel) - { - for (n = 0; n < channels; n++) - { - if (n == premul_alpha_channel) - continue; - - encode_buffer[encode_texel_index + n] *= encode_buffer[encode_texel_index + premul_alpha_channel]; - } - } } static void stbr__resample_vertical_upsample(stbr__info* stbr_info, int n, int in_first_scanline, int in_last_scanline, float in_center_of_out) diff --git a/tests/resample_test.cpp b/tests/resample_test.cpp index e7b5ec2..98e7166 100644 --- a/tests/resample_test.cpp +++ b/tests/resample_test.cpp @@ -295,8 +295,8 @@ void test_premul(const char* file) stbi_write_png("test-output/premul-original.png", w, h, n, input_data, 0); - int new_w = (int)(w * .5); - int new_h = (int)(h * .5); + int new_w = (int)(w * .1); + int new_h = (int)(h * .1); unsigned char* output_data = (unsigned char*)malloc(new_w * new_h * n * sizeof(unsigned char));