Put progressive JPEG AC decode logic back the way I wrote it originally (I changed it to match jpgd when I was trying to figure out why it didn't work);

add STBI__ prefixes to internal SCAN_ enum;
strip unused function arguments for progressive funcs;
tweak release notes;
forget to git commit frequently so these would all be in their own commits;
pull/59/head
Sean Barrett 2014-12-23 05:11:36 -08:00
parent 16d9ed7211
commit 5b53d20c68
2 changed files with 68 additions and 81 deletions

View File

@ -1,5 +1,4 @@
/* stb_image - v1.49 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c
when you control the images you're loading
no warranty implied; use at your own risk
Do this:
@ -14,7 +13,7 @@
Primarily of interest to game developers and other people who can
avoid problematic images and only need the trivial interface
JPEG baseline (no JPEG progressive)
JPEG baseline & progressive (no arithmetic)
PNG 1/2/4/8-bit-per-channel (16 bpc not supported)
TGA (not sure what subset, if a subset)
@ -37,14 +36,14 @@
- Progressive JPEG is now supported.
- PPM and PGM binary formats are now supported.
- PPM and PGM binary formats are now supported, thanks to Ken Miller.
- x86 platforms now make use of SSE2 SIMD instructions if available.
This release is 2x faster on our test JPEGs, mostly due to SIMD.
- x86 platforms now make use of SSE2 SIMD instructions for
JPEG decoding, and ARM platforms use NEON SIMD. This release is
2x faster on our test JPEGs on x86 (except progressive JPEGs,
which see much less speedup), mostly due to the addition of SIMD.
This work was done by Fabian "ryg" Giesen.
- ARM platforms now make use of NEON SIMD instructions if available.
- Compilation of SIMD code can be suppressed with
#define STBI_NO_SIMD
It should not be necessary to disable it unless you have issues
@ -57,7 +56,7 @@
- The old STBI_SIMD system which allowed installing a user-defined
IDCT etc. has been removed. If you need this, don't upgrade. My
assumption is that almost nobody was doing this, and those who
were will find the next bullet item more satisfactory anyway.
were will find the built-in SIMD more satisfactory anyway.
- RGB values computed for JPEG images are slightly different from
previous versions of stb_image. (This is due to using less
@ -87,7 +86,7 @@
Latest revision history:
2.00 (2014-12-25) optimize JPG, incl. x86 & NEON SIMD
2.00 (2014-12-25) optimize JPEG, incl. x86 & NEON SIMD
progressive JPEG
PGM/PPM support
STBI_MALLOC,STBI_REALLOC,STBI_FREE
@ -903,9 +902,9 @@ STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
enum
{
SCAN_load=0,
SCAN_type,
SCAN_header
STBI__SCAN_load=0,
STBI__SCAN_type,
STBI__SCAN_header
};
static void stbi__refill_buffer(stbi__context *s)
@ -1454,7 +1453,7 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman
return 1;
}
static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi_uc *dequant)
static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
{
int diff,dc;
int t;
@ -1470,18 +1469,18 @@ static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__
dc = j->img_comp[b].dc_pred + diff;
j->img_comp[b].dc_pred = dc;
data[0] = (short) ((dc << j->succ_low));
data[0] = (short) (dc << j->succ_low);
} else {
// refinement scan for DC coefficient
if (stbi__jpeg_get_bit(j))
data[0] += 1 << j->succ_low;
data[0] += (short) (1 << j->succ_low);
}
return 1;
}
// @OPTIMIZE: store non-zigzagged during the decode passes,
// and only de-zigzag when dequantizing
static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi_uc *dequant)
static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
{
int k;
if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
@ -1501,7 +1500,7 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
r = fac[c];
if (0 && r) { // fast-AC path
if (r) { // fast-AC path
k += (r >> 4) & 15; // run
s = r & 15; // combined length
j->code_buffer <<= s;
@ -1532,24 +1531,36 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
} else {
// refinement scan for these AC coefficients
int bit = 1 << j->succ_low;
k = j->spec_start;
short bit = (short) (1 << j->succ_low);
if (j->eob_run == 0) {
if (j->eob_run) {
--j->eob_run;
for (k = j->spec_start; k <= j->spec_end; ++k) {
short *p = &data[stbi__jpeg_dezigzag[k]];
if (*p != 0)
if (stbi__jpeg_get_bit(j))
if ((*p & bit)==0)
if (*p > 0)
*p += bit;
else
*p -= bit;
}
} else {
k = j->spec_start;
do {
int r,s;
int rs = stbi__jpeg_huff_decode(j, hac);
int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
s = rs & 15;
r = rs >> 4;
if (s == 0) {
if (r < 15) {
j->eob_run = (1 << r);
j->eob_run = (1 << r) - 1;
if (r)
j->eob_run += stbi__jpeg_get_bits(j, r);
break; // fall through to j->eob_run != 0 case below, which continues k
}
r = 16; // r=15 is the code for 16 0s
r = 64; // force end of block
} else
r = 16; // r=15 is the code for 16 0s
} else {
if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
// sign bit
@ -1563,47 +1574,26 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
while (k <= j->spec_end) {
short *p = &data[stbi__jpeg_dezigzag[k]];
if (*p != 0) {
if (stbi__jpeg_get_bit(j)) {
if ((*p & bit) == 0)
if (stbi__jpeg_get_bit(j))
if ((*p & bit)==0)
if (*p > 0)
*p += bit;
else
*p -= bit;
}
++k;
} else {
if (r == 0)
if (r == 0) {
if (s)
data[stbi__jpeg_dezigzag[k++]] = s;
break;
}
--r;
++k;
}
}
if (s && k <= j->spec_end) {
data[stbi__jpeg_dezigzag[k++]] = s;
}
} while (k <= j->spec_end);
}
// catch case where a previous block had an eob run OR this block
// has an eob_run (in the previous if)
if (j->eob_run) {
--j->eob_run;
for (; k <= j->spec_end; ++k) {
short *p = &data[stbi__jpeg_dezigzag[k]];
if (*p != 0)
// if we already have a history for this, get a bit of it
if (stbi__jpeg_get_bit(j)) {
if ((*p & bit) == 0) // not sure about this, it's in Rich's code, it would mean some bits get sent more than once
if (*p > 0)
*p += bit;
else
*p -= bit;
}
}
}
}
return 1;
}
@ -1964,7 +1954,7 @@ static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
}
}
return 1;
} else { // interleaved!
} else { // interleaved
int i,j,k,x,y;
STBI_SIMD_ALIGN(short, data[64]);
for (j=0; j < z->img_mcu_y; ++j) {
@ -1988,8 +1978,6 @@ static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
// so now count down the restart interval
if (--z->todo <= 0) {
if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
// if it's NOT a restart, then just bail, so we get corrupt data
// rather than no data
if (!STBI__RESTART(z->marker)) return 1;
stbi__jpeg_reset(z);
}
@ -2009,27 +1997,25 @@ static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
int h = (z->img_comp[n].y+7) >> 3;
for (j=0; j < h; ++j) {
for (i=0; i < w; ++i) {
int ha = z->img_comp[n].ha;
short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
if (z->spec_start == 0) {
if (!stbi__jpeg_decode_block_prog_dc(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq]))
if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
return 0;
} else {
if (!stbi__jpeg_decode_block_prog_ac(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq]))
int ha = z->img_comp[n].ha;
if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
return 0;
}
// every data block is an MCU, so countdown the restart interval
if (--z->todo <= 0) {
if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
// if it's NOT a restart, then just bail, so we get corrupt data
// rather than no data
if (!STBI__RESTART(z->marker)) return 1;
stbi__jpeg_reset(z);
}
}
}
return 1;
} else { // interleaved!
} else { // interleaved
int i,j,k,x,y;
for (j=0; j < z->img_mcu_y; ++j) {
for (i=0; i < z->img_mcu_x; ++i) {
@ -2044,7 +2030,7 @@ static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
int y2 = (j*z->img_comp[n].v + y);
int ha = z->img_comp[n].ha;
short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
if (!stbi__jpeg_decode_block_prog_dc(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq]))
if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
return 0;
}
}
@ -2053,8 +2039,6 @@ static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
// so now count down the restart interval
if (--z->todo <= 0) {
if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
// if it's NOT a restart, then just bail, so we get corrupt data
// rather than no data
if (!STBI__RESTART(z->marker)) return 1;
stbi__jpeg_reset(z);
}
@ -2091,7 +2075,6 @@ static void stbi__jpeg_finish(stbi__jpeg *z)
}
}
static int stbi__process_marker(stbi__jpeg *z, int m)
{
int L;
@ -2224,7 +2207,7 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
}
if (scan != SCAN_load) return 1;
if (scan != STBI__SCAN_load) return 1;
if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
@ -2292,7 +2275,7 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
z->marker = STBI__MARKER_none; // initialize cached marker to empty
m = stbi__get_marker(z);
if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
if (scan == SCAN_type) return 1;
if (scan == STBI__SCAN_type) return 1;
m = stbi__get_marker(z);
while (!stbi__SOF(m)) {
if (!stbi__process_marker(z,m)) return 0;
@ -2313,7 +2296,7 @@ static int stbi__decode_jpeg_image(stbi__jpeg *j)
{
int m;
j->restart_interval = 0;
if (!stbi__decode_jpeg_header(j, SCAN_load)) return 0;
if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
m = stbi__get_marker(j);
while (!stbi__EOI(m)) {
if (stbi__SOS(m)) {
@ -2822,14 +2805,14 @@ static int stbi__jpeg_test(stbi__context *s)
stbi__jpeg j;
j.s = s;
stbi__setup_jpeg(&j);
r = stbi__decode_jpeg_header(&j, SCAN_type);
r = stbi__decode_jpeg_header(&j, STBI__SCAN_type);
stbi__rewind(s);
return r;
}
static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
{
if (!stbi__decode_jpeg_header(j, SCAN_header)) {
if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
stbi__rewind( j->s );
return 0;
}
@ -3728,7 +3711,7 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
if (!stbi__check_png_header(s)) return 0;
if (scan == SCAN_type) return 1;
if (scan == STBI__SCAN_type) return 1;
for (;;) {
stbi__pngchunk c = stbi__get_chunk_header(s);
@ -3754,7 +3737,7 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
if (!pal_img_n) {
s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
if (scan == SCAN_header) return 1;
if (scan == STBI__SCAN_header) return 1;
} else {
// if paletted, then pal_n is our final components, and
// img_n is # components to decompress/filter.
@ -3783,7 +3766,7 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
if (first) return stbi__err("first not IHDR", "Corrupt PNG");
if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
if (pal_img_n) {
if (scan == SCAN_header) { s->img_n = 4; return 1; }
if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
pal_img_n = 4;
@ -3802,7 +3785,7 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
case STBI__PNG_TYPE('I','D','A','T'): {
if (first) return stbi__err("first not IHDR", "Corrupt PNG");
if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; }
if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
if (ioff + c.length > idata_limit) {
stbi_uc *p;
if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
@ -3819,7 +3802,7 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
case STBI__PNG_TYPE('I','E','N','D'): {
stbi__uint32 raw_len;
if (first) return stbi__err("first not IHDR", "Corrupt PNG");
if (scan != SCAN_load) return 1;
if (scan != STBI__SCAN_load) return 1;
if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
// initial guess for decoded data size to avoid unnecessary reallocs
raw_len = s->img_x * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
@ -3873,7 +3856,7 @@ static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req
{
unsigned char *result=NULL;
if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
if (stbi__parse_png_file(p, SCAN_load, req_comp)) {
if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
result = p->out;
p->out = NULL;
if (req_comp && req_comp != p->s->img_out_n) {
@ -3909,7 +3892,7 @@ static int stbi__png_test(stbi__context *s)
static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
{
if (!stbi__parse_png_file(p, SCAN_header, 0)) {
if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
stbi__rewind( p->s );
return 0;
}

View File

@ -19,18 +19,22 @@ void test_ycbcr(void)
STBI_SIMD_ALIGN(unsigned char, out2[256][4]);
int i,j,k;
int count = 0, bigcount=0;
int count = 0, bigcount=0, total=0;
for (i=0; i < 256; ++i) {
for (j=0; j < 256; ++j) {
for (k=0; k < 256; ++k) {
y[k] = k;
y [k] = k;
cb[k] = j;
cr[k] = i;
}
stbi__YCbCr_to_RGB_row(out1[0], y, cb, cr, 256, 4);
stbi__YCbCr_to_RGB_sse2(out2[0], y, cb, cr, 256, 4);
for (k=0; k < 256; ++k) {
// inaccurate proxy for values outside of RGB cube
if (out1[k][0] == 0 || out1[k][1] == 0 || out1[k][2] == 0 || out1[k][0] == 255 || out1[k][1] == 255 || out1[k][2] == 255)
continue;
++total;
if (out1[k][0] != out2[k][0] || out1[k][1] != out2[k][1] || out1[k][2] != out2[k][2]) {
int dist1 = abs(out1[k][0] - out2[k][0]);
int dist2 = abs(out1[k][1] - out2[k][1]);
@ -41,9 +45,9 @@ void test_ycbcr(void)
}
}
}
printf("So far: %d (%d big)\n", count, bigcount);
printf("So far: %d (%d big) of %d\n", count, bigcount, total);
}
printf("Final: %d (%d big)\n", count, bigcount);
printf("Final: %d (%d big) of %d\n", count, bigcount, total);
}
#endif