1 /**************************************************************************
3 * Copyright 2013-2014 RAD Game Tools and Valve Software
4 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 **************************************************************************/
27 /* stbi-1.18 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c
28 when you control the images you're loading
31 Primarily of interest to game developers and other people who can
32 avoid problematic images and only need the trivial interface
34 JPEG baseline (no JPEG progressive, no oddball channel decimations)
37 TGA (not sure what subset, if a subset)
38 PSD (composited view only, no extra channels)
39 HDR (radiance rgbE format)
40 writes BMP,TGA (define STBI_NO_WRITE to remove code)
41 decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code)
42 supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD)
48 1.18 fix a threading bug (local mutable static)
49 1.17 support interlaced PNG
50 1.16 major bugfix - convert_format converted one too many pixels
51 1.15 initialize some fields for thread safety
52 1.14 fix threadsafe conversion bug; header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
54 1.12 const qualifiers in the API
55 1.11 Support installable IDCT, colorspace conversion routines
56 1.10 Fixes for 64-bit (don't use "unsigned long")
57 optimized upsampling by Fabian "ryg" Giesen
58 1.09 Fix format-conversion for PSD code (bad global variables!)
59 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
60 1.07 attempt to fix C++ warning/errors again
61 1.06 attempt to fix C++ warning/errors again
62 1.05 fix TGA loading to return correct *comp and use good luminance calc
63 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
64 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
65 1.02 support for (subset of) HDR files, float interface for preferred access to them
66 1.01 fix bug: possible bug in handling right-side up bmps... not sure
67 fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all
68 1.00 interface to zlib that skips zlib header
69 0.99 correct handling of alpha in palette
70 0.98 TGA loader by lonesock; dynamically add loaders (untested)
71 0.97 jpeg errors on too large a file; also catch another stb_malloc failure
72 0.96 fix detection of invalid v value - particleman@mollyrocket forum
73 0.95 during header scan, seek to markers in case of padding
74 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
75 0.93 handle jpegtran output; verbose errors
76 0.92 read 4,8,16,24,32-bit BMP files of several formats
77 0.91 output 24-bit Windows 3.0 BMP files
78 0.90 fix a few more warnings; bump version number to approach 1.0
79 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
80 0.60 fix compiling as c++
81 0.59 fix warnings: merge Dave Moore's -Wall fixes
82 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
83 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less
85 0.56 fix bug: zlib uncompressed mode len vs. nlen
86 0.55 fix bug: restart_interval not initialized to 0
87 0.54 allow NULL for 'int *comp'
88 0.53 fix bug in png 3->4; speedup png decoding
89 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
90 0.51 obey req_comp requests, 1-component jpegs return as 1-component,
91 on 'test' only check type, not whether we support this variant
94 #include "vogl_stb_image.h"
97 #pragma warning(disable : 4793) // function compiled as native
101 #include <math.h> // ldexp
102 #include <string.h> // strcmp
105 #ifndef STBI_NO_STDIO
116 inline void *stb_malloc(size_t c)
118 return vogl_malloc(c);
120 inline void *stb_realloc(void *p, size_t c)
122 return vogl_realloc(p, c);
124 inline void stb_free(void *p)
129 #if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__MINGW64__)
131 #define __forceinline inline
133 #define __forceinline
138 typedef unsigned char uint8;
139 typedef unsigned short uint16;
140 typedef signed short int16;
141 typedef unsigned int uint32;
142 typedef signed int int32;
143 typedef unsigned int uint;
145 // should produce compiler error if size is wrong
146 typedef unsigned char validate_uint32[sizeof(uint32) == 4];
148 #if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE)
149 #define STBI_NO_WRITE
152 //////////////////////////////////////////////////////////////////////////////
154 // Generic API that works on all image types
157 // this is not threadsafe
158 static const char *failure_reason;
160 const char *stbi_failure_reason(void)
162 return failure_reason;
165 static int e(const char *str)
167 failure_reason = str;
171 #ifdef STBI_NO_FAILURE_STRINGS
173 #elif defined(STBI_FAILURE_USERMSG)
179 #define epf(x, y) ((float *)(e(x, y) ? NULL : NULL))
180 #define epuc(x, y) ((unsigned char *)(e(x, y) ? NULL : NULL))
182 void stbi_image_free(void *retval_from_stbi_load)
184 stb_free(retval_from_stbi_load);
187 #define MAX_LOADERS 32
188 stbi_loader *loaders[MAX_LOADERS];
189 static int max_loaders = 0;
191 int stbi_register_loader(stbi_loader *loader)
194 for (i = 0; i < MAX_LOADERS; ++i)
197 if (loaders[i] == loader)
200 if (loaders[i] == NULL)
212 static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
213 static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp);
216 #ifndef STBI_NO_STDIO
217 unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
219 FILE *f = fopen(filename, "rb");
220 unsigned char *result;
222 return epuc("can't fopen", "Unable to open file");
223 result = stbi_load_from_file(f, x, y, comp, req_comp);
229 unsigned char *stbi_load_w(wchar_t const *filename, int *x, int *y, int *comp, int req_comp)
231 FILE *f = _wfopen(filename, L"rb");
232 unsigned char *result;
234 return epuc("can't fopen", "Unable to open file");
235 result = stbi_load_from_file(f, x, y, comp, req_comp);
241 unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
244 if (stbi_jpeg_test_file(f))
245 return stbi_jpeg_load_from_file(f, x, y, comp, req_comp);
246 if (stbi_png_test_file(f))
247 return stbi_png_load_from_file(f, x, y, comp, req_comp);
248 if (stbi_bmp_test_file(f))
249 return stbi_bmp_load_from_file(f, x, y, comp, req_comp);
250 if (stbi_psd_test_file(f))
251 return stbi_psd_load_from_file(f, x, y, comp, req_comp);
253 if (stbi_hdr_test_file(f))
255 float *hdr = stbi_hdr_load_from_file(f, x, y, comp, req_comp);
256 return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
259 for (i = 0; i < max_loaders; ++i)
260 if (loaders[i]->test_file(f))
261 return loaders[i]->load_from_file(f, x, y, comp, req_comp);
262 // test tga last because it's a crappy test!
263 if (stbi_tga_test_file(f))
264 return stbi_tga_load_from_file(f, x, y, comp, req_comp);
265 return epuc("unknown image type", "Image not of any known type, or corrupt");
269 unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
272 if (stbi_jpeg_test_memory(buffer, len))
273 return stbi_jpeg_load_from_memory(buffer, len, x, y, comp, req_comp);
274 if (stbi_png_test_memory(buffer, len))
275 return stbi_png_load_from_memory(buffer, len, x, y, comp, req_comp);
276 if (stbi_bmp_test_memory(buffer, len))
277 return stbi_bmp_load_from_memory(buffer, len, x, y, comp, req_comp);
278 if (stbi_psd_test_memory(buffer, len))
279 return stbi_psd_load_from_memory(buffer, len, x, y, comp, req_comp);
281 if (stbi_hdr_test_memory(buffer, len))
283 float *hdr = stbi_hdr_load_from_memory(buffer, len, x, y, comp, req_comp);
284 return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
287 for (i = 0; i < max_loaders; ++i)
288 if (loaders[i]->test_memory(buffer, len))
289 return loaders[i]->load_from_memory(buffer, len, x, y, comp, req_comp);
290 // test tga last because it's a crappy test!
291 if (stbi_tga_test_memory(buffer, len))
292 return stbi_tga_load_from_memory(buffer, len, x, y, comp, req_comp);
293 return epuc("unknown image type", "Image not of any known type, or corrupt");
298 #ifndef STBI_NO_STDIO
299 float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
301 FILE *f = fopen(filename, "rb");
304 return epf("can't fopen", "Unable to open file");
305 result = stbi_loadf_from_file(f, x, y, comp, req_comp);
310 float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
314 if (stbi_hdr_test_file(f))
315 return stbi_hdr_load_from_file(f, x, y, comp, req_comp);
317 data = stbi_load_from_file(f, x, y, comp, req_comp);
319 return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
320 return epf("unknown image type", "Image not of any known type, or corrupt");
324 float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
328 if (stbi_hdr_test_memory(buffer, len))
329 return stbi_hdr_load_from_memory(buffer, len, x, y, comp, req_comp);
331 data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp);
333 return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
334 return epf("unknown image type", "Image not of any known type, or corrupt");
338 // these is-hdr-or-not is defined independent of whether STBI_NO_HDR is
339 // defined, for API simplicity; if STBI_NO_HDR is defined, it always
342 int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
345 return stbi_hdr_test_memory(buffer, len);
351 #ifndef STBI_NO_STDIO
352 extern int stbi_is_hdr(char const *filename)
354 FILE *f = fopen(filename, "rb");
358 result = stbi_is_hdr_from_file(f);
364 extern int stbi_is_hdr_from_file(FILE *f)
367 return stbi_hdr_test_file(f);
375 // @TODO: get image dimensions & components without fully decoding
376 #ifndef STBI_NO_STDIO
377 extern int stbi_info(char const *filename, int *x, int *y, int *comp);
378 extern int stbi_info_from_file(FILE *f, int *x, int *y, int *comp);
380 extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
383 static float h2l_gamma_i = 1.0f / 2.2f, h2l_scale_i = 1.0f;
384 static float l2h_gamma = 2.2f, l2h_scale = 1.0f;
386 void stbi_hdr_to_ldr_gamma(float gamma)
388 h2l_gamma_i = 1 / gamma;
390 void stbi_hdr_to_ldr_scale(float scale)
392 h2l_scale_i = 1 / scale;
395 void stbi_ldr_to_hdr_gamma(float gamma)
399 void stbi_ldr_to_hdr_scale(float scale)
405 //////////////////////////////////////////////////////////////////////////////
407 // Common code used by all image loaders
420 int img_n, img_out_n;
422 #ifndef STBI_NO_STDIO
425 uint8 *img_buffer, *img_buffer_end;
428 #ifndef STBI_NO_STDIO
429 static void start_file(stbi *s, FILE *f)
435 static void start_mem(stbi *s, uint8 const *buffer, int len)
437 #ifndef STBI_NO_STDIO
440 s->img_buffer = (uint8 *)buffer;
441 s->img_buffer_end = (uint8 *)buffer + len;
444 __forceinline static int get8(stbi *s)
446 #ifndef STBI_NO_STDIO
449 int c = fgetc(s->img_file);
450 return c == EOF ? 0 : c;
453 if (s->img_buffer < s->img_buffer_end)
454 return *s->img_buffer++;
458 __forceinline static int at_eof(stbi *s)
460 #ifndef STBI_NO_STDIO
462 return feof(s->img_file);
464 return s->img_buffer >= s->img_buffer_end;
467 __forceinline static uint8 get8u(stbi *s)
469 return (uint8)get8(s);
472 static void skip(stbi *s, int n)
474 #ifndef STBI_NO_STDIO
476 fseek(s->img_file, n, SEEK_CUR);
482 static int get16(stbi *s)
485 return (z << 8) + get8(s);
488 static uint32 get32(stbi *s)
491 return (z << 16) + get16(s);
494 static int get16le(stbi *s)
497 return z + (get8(s) << 8);
500 static uint32 get32le(stbi *s)
502 uint32 z = get16le(s);
503 return z + (get16le(s) << 16);
506 static void getn(stbi *s, stbi_uc *buffer, int n)
508 #ifndef STBI_NO_STDIO
511 size_t nr = fread(buffer, 1, n, s->img_file);
512 VOGL_NOTE_UNUSED(nr);
516 memcpy(buffer, s->img_buffer, n);
520 //////////////////////////////////////////////////////////////////////////////
522 // generic converter from built-in img_n to req_comp
523 // individual types do this automatically as much as possible (e.g. jpeg
524 // does all cases internally since it needs to colorspace convert anyway,
525 // and it never has alpha, so very few cases ). png can automatically
526 // interleave an alpha=255 channel, but falls back to this for other cases
528 // assume data buffer is malloced, so stb_malloc a new one and free that one
529 // only failure mode is stb_malloc failing
531 static uint8 compute_y(int r, int g, int b)
533 return (uint8)(((r * 77) + (g * 150) + (29 * b)) >> 8);
536 static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y)
541 if (req_comp == img_n)
543 assert(req_comp >= 1 && req_comp <= 4);
545 good = (unsigned char *)stb_malloc(req_comp * x * y);
549 return epuc("outofmem", "Out of memory");
552 for (j = 0; j < (int)y; ++j)
554 unsigned char *src = data + j * x * img_n;
555 unsigned char *dest = good + j * x * req_comp;
557 #define COMBO(a, b) ((a) * 8 + (b))
560 for (i = x - 1; i >= 0; --i, src += a, dest += b)
561 // convert source image with img_n components to one with req_comp components;
562 // avoid switch per pixel, so use switch per scanline and massive macros
563 switch (COMBO(img_n, req_comp))
565 CASE(1, 2) dest[0] = src[0], dest[1] = 255;
567 CASE(1, 3) dest[0] = dest[1] = dest[2] = src[0];
569 CASE(1, 4) dest[0] = dest[1] = dest[2] = src[0], dest[3] = 255;
571 CASE(2, 1) dest[0] = src[0];
573 CASE(2, 3) dest[0] = dest[1] = dest[2] = src[0];
575 CASE(2, 4) dest[0] = dest[1] = dest[2] = src[0], dest[3] = src[1];
577 CASE(3, 4) dest[0] = src[0], dest[1] = src[1], dest[2] = src[2], dest[3] = 255;
579 CASE(3, 1) dest[0] = compute_y(src[0], src[1], src[2]);
581 CASE(3, 2) dest[0] = compute_y(src[0], src[1], src[2]), dest[1] = 255;
583 CASE(4, 1) dest[0] = compute_y(src[0], src[1], src[2]);
585 CASE(4, 2) dest[0] = compute_y(src[0], src[1], src[2]), dest[1] = src[3];
587 CASE(4, 3) dest[0] = src[0], dest[1] = src[1], dest[2] = src[2];
600 static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
603 float *output = (float *)stb_malloc(x * y * comp * sizeof(float));
607 return epf("outofmem", "Out of memory");
609 // compute number of non-alpha components
614 for (i = 0; i < x * y; ++i)
616 for (k = 0; k < n; ++k)
618 output[i * comp + k] = (float)pow(data[i * comp + k] / 255.0f, l2h_gamma) * l2h_scale;
621 output[i * comp + k] = data[i * comp + k] / 255.0f;
627 #define float2int(x) ((int)(x))
628 static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp)
631 stbi_uc *output = (stbi_uc *)stb_malloc(x * y * comp);
635 return epuc("outofmem", "Out of memory");
637 // compute number of non-alpha components
642 for (i = 0; i < x * y; ++i)
644 for (k = 0; k < n; ++k)
646 float z = (float)pow(data[i * comp + k] * h2l_scale_i, h2l_gamma_i) * 255 + 0.5f;
651 output[i * comp + k] = float2int(z);
655 float z = data[i * comp + k] * 255 + 0.5f;
660 output[i * comp + k] = float2int(z);
668 //////////////////////////////////////////////////////////////////////////////
670 // "baseline" JPEG/JFIF decoder (not actually fully baseline implementation)
672 // simple implementation
673 // - channel subsampling of at most 2 in each dimension
674 // - doesn't support delayed output of y-dimension
675 // - simple interface (only one output format: 8-bit interleaved RGB)
676 // - doesn't try to recover corrupt jpegs
677 // - doesn't allow partial loading, loading multiple at once
678 // - still fast on x86 (copying globals into locals doesn't help x86)
679 // - allocates lots of intermediate memory (full size of all components)
680 // - non-interleaved case requires this anyway
681 // - allows good upsampling (see next)
683 // - upsampled channels are bilinearly interpolated, even across blocks
684 // - quality integer IDCT derived from IJG's 'slow'
686 // - fast huffman; reasonable integer IDCT
687 // - uses a lot of intermediate memory, could cache poorly
688 // - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4
689 // stb_jpeg: 1.34 seconds (MSVC6, default release build)
690 // stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro)
691 // IJL11.dll: 1.08 seconds (compiled by intel)
692 // IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG)
693 // IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro)
695 // huffman decoding acceleration
696 #define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
700 uint8 fast[1 << FAST_BITS];
701 // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
705 unsigned int maxcode[18];
706 int delta[17]; // old 'firstsymbol' - old 'firstcode'
712 unsigned short dequant2[4][64];
717 uint8 dequant[4][64];
719 // sizes for components, interleaved MCUs
720 int img_h_max, img_v_max;
721 int img_mcu_x, img_mcu_y;
722 int img_mcu_w, img_mcu_h;
724 // definition of jpeg image component
739 uint32 code_buffer; // jpeg entropy-coded buffer
740 int code_bits; // number of valid bits
741 unsigned char marker; // marker seen while filling entropy buffer
742 int nomore; // flag if we saw a marker so must stop
744 int scan_n, order[4];
745 int restart_interval, todo;
748 static int build_huffman(huffman *h, int *count)
750 int i, j, k = 0, code;
751 // build size list for each symbol (from JPEG spec)
752 for (i = 0; i < 16; ++i)
753 for (j = 0; j < count[i]; ++j)
754 h->size[k++] = (uint8)(i + 1);
757 // compute actual symbols (from jpeg spec)
760 for (j = 1; j <= 16; ++j)
762 // compute delta to add to code to compute symbol id
763 h->delta[j] = k - code;
766 while (h->size[k] == j)
767 h->code[k++] = (uint16)(code++);
768 if (code - 1 >= (1 << j))
769 return e("bad code lengths", "Corrupt JPEG");
771 // compute largest code + 1 for this size, preshifted as needed later
772 h->maxcode[j] = code << (16 - j);
775 h->maxcode[j] = 0xffffffff;
777 // build non-spec acceleration table; 255 is flag for not-accelerated
778 memset(h->fast, 255, 1 << FAST_BITS);
779 for (i = 0; i < k; ++i)
784 int c = h->code[i] << (FAST_BITS - s);
785 int m = 1 << (FAST_BITS - s);
786 for (j = 0; j < m; ++j)
788 h->fast[c + j] = (uint8)i;
795 static void grow_buffer_unsafe(jpeg *j)
799 int b = j->nomore ? 0 : get8(&j->s);
805 j->marker = (unsigned char)c;
810 j->code_buffer = (j->code_buffer << 8) | b;
812 } while (j->code_bits <= 24);
816 static uint32 bmask[17] = { 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767, 65535 };
818 // decode a jpeg huffman value from the bitstream
819 __forceinline static int decode(jpeg *j, huffman *h)
824 if (j->code_bits < 16)
825 grow_buffer_unsafe(j);
827 // look at the top FAST_BITS and determine what symbol ID it is,
828 // if the code is <= FAST_BITS
829 c = (j->code_buffer >> (j->code_bits - FAST_BITS)) & ((1 << FAST_BITS) - 1);
833 if (h->size[k] > j->code_bits)
835 j->code_bits -= h->size[k];
839 // naive test is to shift the code_buffer down so k bits are
840 // valid, then test against maxcode. To speed this up, we've
841 // preshifted maxcode left so that it has (16-k) 0s at the
842 // end; in other words, regardless of the number of bits, it
843 // wants to be compared against something shifted to have 16;
844 // that way we don't need to shift inside the loop.
845 if (j->code_bits < 16)
846 temp = (j->code_buffer << (16 - j->code_bits)) & 0xffff;
848 temp = (j->code_buffer >> (j->code_bits - 16)) & 0xffff;
849 for (k = FAST_BITS + 1;; ++k)
850 if (temp < h->maxcode[k])
854 // error! code not found
859 if (k > j->code_bits)
862 // convert the huffman code to the symbol id
863 c = ((j->code_buffer >> (j->code_bits - k)) & bmask[k]) + h->delta[k];
864 assert((((j->code_buffer) >> (j->code_bits - h->size[c])) & bmask[h->size[c]]) == h->code[c]);
866 // convert the id to a symbol
871 // combined JPEG 'receive' and JPEG 'extend', since baseline
872 // always extends everything it receives.
873 __forceinline static int extend_receive(jpeg *j, int n)
875 unsigned int m = 1 << (n - 1);
877 if (j->code_bits < n)
878 grow_buffer_unsafe(j);
879 k = (j->code_buffer >> (j->code_bits - n)) & bmask[n];
881 // the following test is probably a random branch that won't
882 // predict well. I tried to table accelerate it but failed.
883 // maybe it's compiling as a conditional move?
885 return (-1 << n) + k + 1;
890 // given a value that's at position X in the zigzag stream,
891 // where does it appear in the 8x8 matrix coded as row-major?
892 static uint8 dezigzag[64 + 15] =
894 0, 1, 8, 16, 9, 2, 3, 10,
895 17, 24, 32, 25, 18, 11, 4, 5,
896 12, 19, 26, 33, 40, 48, 41, 34,
897 27, 20, 13, 6, 7, 14, 21, 28,
898 35, 42, 49, 56, 57, 50, 43, 36,
899 29, 22, 15, 23, 30, 37, 44, 51,
900 58, 59, 52, 45, 38, 31, 39, 46,
901 53, 60, 61, 54, 47, 55, 62, 63,
902 // let corrupt input sample past end
903 63, 63, 63, 63, 63, 63, 63, 63,
904 63, 63, 63, 63, 63, 63, 63
907 // decode one 64-entry block--
908 static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b)
911 int t = decode(j, hdc);
913 return e("bad huffman code", "Corrupt JPEG");
915 // 0 all the ac values now so we can do it 32-bits at a time
916 memset(data, 0, 64 * sizeof(data[0]));
918 diff = t ? extend_receive(j, t) : 0;
919 dc = j->img_comp[b].dc_pred + diff;
920 j->img_comp[b].dc_pred = dc;
923 // decode AC components, see JPEG spec
928 int rs = decode(j, hac);
930 return e("bad huffman code", "Corrupt JPEG");
942 // decode into unzigzag'd location
943 data[dezigzag[k++]] = (short)extend_receive(j, s);
949 // take a -128..127 value and clamp it and convert to 0..255
950 __forceinline static uint8 clamp(int x)
953 // trick to use a single test to catch both cases
954 if ((unsigned int)x > 255)
964 #define f2f(x) (int)(((x) * 4096 + 0.5))
965 #define fsh(x) ((x) << 12)
967 // derived from jidctint -- DCT_ISLOW
968 #define IDCT_1D(s0, s1, s2, s3, s4, s5, s6, s7) \
969 int t0, t1, t2, t3, p1, p2, p3, p4, p5, x0, x1, x2, x3; \
972 p1 = (p2 + p3) * f2f(0.5411961f); \
973 t2 = p1 + p3 * f2f(-1.847759065f); \
974 t3 = p1 + p2 * f2f(0.765366865f); \
991 p5 = (p3 + p4) * f2f(1.175875602f); \
992 t0 = t0 * f2f(0.298631336f); \
993 t1 = t1 * f2f(2.053119869f); \
994 t2 = t2 * f2f(3.072711026f); \
995 t3 = t3 * f2f(1.501321110f); \
996 p1 = p5 + p1 * f2f(-0.899976223f); \
997 p2 = p5 + p2 * f2f(-2.562915447f); \
998 p3 = p3 * f2f(-1.961570560f); \
999 p4 = p4 * f2f(-0.390180644f); \
1006 // .344 seconds on 3*anemones.jpg
1007 static void idct_block(uint8 *out, int out_stride, short data[64], uint8 *dequantize)
1009 int i, val[64], *v = val;
1010 uint8 *o, *dq = dequantize;
1014 for (i = 0; i < 8; ++i, ++d, ++dq, ++v)
1016 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
1017 if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0 && d[40] == 0 && d[48] == 0 && d[56] == 0)
1019 // no shortcut 0 seconds
1020 // (1|2|3|4|5|6|7)==0 0 seconds
1021 // all separate -0.047 seconds
1022 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
1023 int dcterm = d[0] * dq[0] << 2;
1024 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
1028 IDCT_1D(d[0] * dq[0], d[8] * dq[8], d[16] * dq[16], d[24] * dq[24],
1029 d[32] * dq[32], d[40] * dq[40], d[48] * dq[48], d[56] * dq[56])
1030 // constants scaled things up by 1<<12; let's bring them back
1031 // down, but keep 2 extra bits of precision
1036 v[0] = (x0 + t3) >> 10;
1037 v[56] = (x0 - t3) >> 10;
1038 v[8] = (x1 + t2) >> 10;
1039 v[48] = (x1 - t2) >> 10;
1040 v[16] = (x2 + t1) >> 10;
1041 v[40] = (x2 - t1) >> 10;
1042 v[24] = (x3 + t0) >> 10;
1043 v[32] = (x3 - t0) >> 10;
1047 for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride)
1049 // no fast case since the first 1D IDCT spread components out
1050 IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])
1051 // constants scaled things up by 1<<12, plus we had 1<<2 from first
1052 // loop, plus horizontal and vertical each scale by sqrt(8) so together
1053 // we've got an extra 1<<3, so 1<<17 total we need to remove.
1058 o[0] = clamp((x0 + t3) >> 17);
1059 o[7] = clamp((x0 - t3) >> 17);
1060 o[1] = clamp((x1 + t2) >> 17);
1061 o[6] = clamp((x1 - t2) >> 17);
1062 o[2] = clamp((x2 + t1) >> 17);
1063 o[5] = clamp((x2 - t1) >> 17);
1064 o[3] = clamp((x3 + t0) >> 17);
1065 o[4] = clamp((x3 - t0) >> 17);
1069 static void idct_block(uint8 *out, int out_stride, short data[64], unsigned short *dequantize)
1071 int i, val[64], *v = val;
1073 unsigned short *dq = dequantize;
1077 for (i = 0; i < 8; ++i, ++d, ++dq, ++v)
1079 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
1080 if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0 && d[40] == 0 && d[48] == 0 && d[56] == 0)
1082 // no shortcut 0 seconds
1083 // (1|2|3|4|5|6|7)==0 0 seconds
1084 // all separate -0.047 seconds
1085 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
1086 int dcterm = d[0] * dq[0] << 2;
1087 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
1091 IDCT_1D(d[0] * dq[0], d[8] * dq[8], d[16] * dq[16], d[24] * dq[24],
1092 d[32] * dq[32], d[40] * dq[40], d[48] * dq[48], d[56] * dq[56])
1093 // constants scaled things up by 1<<12; let's bring them back
1094 // down, but keep 2 extra bits of precision
1099 v[0] = (x0 + t3) >> 10;
1100 v[56] = (x0 - t3) >> 10;
1101 v[8] = (x1 + t2) >> 10;
1102 v[48] = (x1 - t2) >> 10;
1103 v[16] = (x2 + t1) >> 10;
1104 v[40] = (x2 - t1) >> 10;
1105 v[24] = (x3 + t0) >> 10;
1106 v[32] = (x3 - t0) >> 10;
1110 for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride)
1112 // no fast case since the first 1D IDCT spread components out
1113 IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])
1114 // constants scaled things up by 1<<12, plus we had 1<<2 from first
1115 // loop, plus horizontal and vertical each scale by sqrt(8) so together
1116 // we've got an extra 1<<3, so 1<<17 total we need to remove.
1121 o[0] = clamp((x0 + t3) >> 17);
1122 o[7] = clamp((x0 - t3) >> 17);
1123 o[1] = clamp((x1 + t2) >> 17);
1124 o[6] = clamp((x1 - t2) >> 17);
1125 o[2] = clamp((x2 + t1) >> 17);
1126 o[5] = clamp((x2 - t1) >> 17);
1127 o[3] = clamp((x3 + t0) >> 17);
1128 o[4] = clamp((x3 - t0) >> 17);
1131 static stbi_idct_8x8 stbi_idct_installed = idct_block;
1133 extern void stbi_install_idct(stbi_idct_8x8 func)
1135 stbi_idct_installed = func;
1139 #define MARKER_none 0xff
1140 // if there's a pending marker from the entropy stream, return that
1141 // otherwise, fetch from the stream and get a marker. if there's no
1142 // marker, return 0xff, which is never a valid marker value
1143 static uint8 get_marker(jpeg *j)
1146 if (j->marker != MARKER_none)
1149 j->marker = MARKER_none;
1160 // in each scan, we'll have scan_n components, and the order
1161 // of the components is specified by order[]
1162 #define RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
1164 // after a restart interval, reset the entropy decoder and
1165 // the dc prediction
1166 static void reset(jpeg *j)
1171 j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
1172 j->marker = MARKER_none;
1173 j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
1174 // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
1175 // since we don't even allow 1<<30 pixels
1178 static int parse_entropy_coded_data(jpeg *z)
1185 __declspec(align(16))
1188 int n = z->order[0];
1189 // non-interleaved data, we just need to process one block at a time,
1190 // in trivial scanline order
1191 // number of blocks to do just depends on how many actual "pixels" this
1192 // component has, independent of interleaved MCU blocking and such
1193 int w = (z->img_comp[n].x + 7) >> 3;
1194 int h = (z->img_comp[n].y + 7) >> 3;
1195 for (j = 0; j < h; ++j)
1197 for (i = 0; i < w; ++i)
1199 if (!decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + z->img_comp[n].ha, n))
1202 stbi_idct_installed(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1204 idct_block(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1206 // every data block is an MCU, so countdown the restart interval
1209 if (z->code_bits < 24)
1210 grow_buffer_unsafe(z);
1211 // if it's NOT a restart, then just bail, so we get corrupt data
1212 // rather than no data
1213 if (!RESTART(z->marker))
1220 else // interleaved!
1224 for (j = 0; j < z->img_mcu_y; ++j)
1226 for (i = 0; i < z->img_mcu_x; ++i)
1228 // scan an interleaved mcu... process scan_n components in order
1229 for (k = 0; k < z->scan_n; ++k)
1231 int n = z->order[k];
1232 // scan out an mcu's worth of this component; that's just determined
1233 // by the basic H and V specified for the component
1234 for (y = 0; y < z->img_comp[n].v; ++y)
1236 for (x = 0; x < z->img_comp[n].h; ++x)
1238 int x2 = (i * z->img_comp[n].h + x) * 8;
1239 int y2 = (j * z->img_comp[n].v + y) * 8;
1240 if (!decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + z->img_comp[n].ha, n))
1243 stbi_idct_installed(z->img_comp[n].data + z->img_comp[n].w2 * y2 + x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1245 idct_block(z->img_comp[n].data + z->img_comp[n].w2 * y2 + x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1250 // after all interleaved components, that's an interleaved MCU,
1251 // so now count down the restart interval
1254 if (z->code_bits < 24)
1255 grow_buffer_unsafe(z);
1256 // if it's NOT a restart, then just bail, so we get corrupt data
1257 // rather than no data
1258 if (!RESTART(z->marker))
1268 static int process_marker(jpeg *z, int m)
1273 case MARKER_none: // no marker found
1274 return e("expected marker", "Corrupt JPEG");
1276 case 0xC2: // SOF - progressive
1277 return e("progressive jpeg", "JPEG format not supported (progressive)");
1279 case 0xDD: // DRI - specify restart interval
1280 if (get16(&z->s) != 4)
1281 return e("bad DRI len", "Corrupt JPEG");
1282 z->restart_interval = get16(&z->s);
1285 case 0xDB: // DQT - define quantization table
1286 L = get16(&z->s) - 2;
1289 int q = get8(&z->s);
1293 return e("bad DQT type", "Corrupt JPEG");
1295 return e("bad DQT table", "Corrupt JPEG");
1296 for (i = 0; i < 64; ++i)
1297 z->dequant[t][dezigzag[i]] = get8u(&z->s);
1299 for (i = 0; i < 64; ++i)
1300 z->dequant2[t][i] = z->dequant[t][i];
1306 case 0xC4: // DHT - define huffman table
1307 L = get16(&z->s) - 2;
1311 int sizes[16], i, m = 0;
1312 int q = get8(&z->s);
1315 if (tc > 1 || th > 3)
1316 return e("bad DHT header", "Corrupt JPEG");
1317 for (i = 0; i < 16; ++i)
1319 sizes[i] = get8(&z->s);
1325 if (!build_huffman(z->huff_dc + th, sizes))
1327 v = z->huff_dc[th].values;
1331 if (!build_huffman(z->huff_ac + th, sizes))
1333 v = z->huff_ac[th].values;
1335 for (i = 0; i < m; ++i)
1336 v[i] = get8u(&z->s);
1341 // check for comment block or APP blocks
1342 if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE)
1344 skip(&z->s, get16(&z->s) - 2);
1351 static int process_scan_header(jpeg *z)
1354 int Ls = get16(&z->s);
1355 z->scan_n = get8(&z->s);
1356 if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->s.img_n)
1357 return e("bad SOS component count", "Corrupt JPEG");
1358 if (Ls != 6 + 2 * z->scan_n)
1359 return e("bad SOS len", "Corrupt JPEG");
1360 for (i = 0; i < z->scan_n; ++i)
1362 int id = get8(&z->s), which;
1363 int q = get8(&z->s);
1364 for (which = 0; which < z->s.img_n; ++which)
1365 if (z->img_comp[which].id == id)
1367 if (which == z->s.img_n)
1369 z->img_comp[which].hd = q >> 4;
1370 if (z->img_comp[which].hd > 3)
1371 return e("bad DC huff", "Corrupt JPEG");
1372 z->img_comp[which].ha = q & 15;
1373 if (z->img_comp[which].ha > 3)
1374 return e("bad AC huff", "Corrupt JPEG");
1375 z->order[i] = which;
1377 if (get8(&z->s) != 0)
1378 return e("bad SOS", "Corrupt JPEG");
1379 get8(&z->s); // should be 63, but might be 0
1380 if (get8(&z->s) != 0)
1381 return e("bad SOS", "Corrupt JPEG");
1386 static int process_frame_header(jpeg *z, int scan)
1389 int Lf, p, i, q, h_max = 1, v_max = 1, c;
1392 return e("bad SOF len", "Corrupt JPEG"); // JPEG
1395 return e("only 8-bit", "JPEG format not supported: 8-bit only"); // JPEG baseline
1396 s->img_y = get16(s);
1398 return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
1399 s->img_x = get16(s);
1401 return e("0 width", "Corrupt JPEG"); // JPEG requires
1403 if (c != 3 && c != 1)
1404 return e("bad component count", "Corrupt JPEG"); // JFIF requires
1406 for (i = 0; i < c; ++i)
1408 z->img_comp[i].data = NULL;
1409 z->img_comp[i].linebuf = NULL;
1412 if (Lf != 8 + 3 * s->img_n)
1413 return e("bad SOF len", "Corrupt JPEG");
1415 for (i = 0; i < s->img_n; ++i)
1417 z->img_comp[i].id = get8(s);
1418 if (z->img_comp[i].id != i + 1) // JFIF requires
1419 if (z->img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files!
1420 return e("bad component ID", "Corrupt JPEG");
1422 z->img_comp[i].h = (q >> 4);
1423 if (!z->img_comp[i].h || z->img_comp[i].h > 4)
1424 return e("bad H", "Corrupt JPEG");
1425 z->img_comp[i].v = q & 15;
1426 if (!z->img_comp[i].v || z->img_comp[i].v > 4)
1427 return e("bad V", "Corrupt JPEG");
1428 z->img_comp[i].tq = get8(s);
1429 if (z->img_comp[i].tq > 3)
1430 return e("bad TQ", "Corrupt JPEG");
1433 if (scan != SCAN_load)
1436 if ((1 << 30) / s->img_x / s->img_n < s->img_y)
1437 return e("too large", "Image too large to decode");
1439 for (i = 0; i < s->img_n; ++i)
1441 if (z->img_comp[i].h > h_max)
1442 h_max = z->img_comp[i].h;
1443 if (z->img_comp[i].v > v_max)
1444 v_max = z->img_comp[i].v;
1447 // compute interleaved mcu info
1448 z->img_h_max = h_max;
1449 z->img_v_max = v_max;
1450 z->img_mcu_w = h_max * 8;
1451 z->img_mcu_h = v_max * 8;
1452 z->img_mcu_x = (s->img_x + z->img_mcu_w - 1) / z->img_mcu_w;
1453 z->img_mcu_y = (s->img_y + z->img_mcu_h - 1) / z->img_mcu_h;
1455 for (i = 0; i < s->img_n; ++i)
1457 // number of effective pixels (e.g. for non-interleaved MCU)
1458 z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max - 1) / h_max;
1459 z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max - 1) / v_max;
1460 // to simplify generation, we'll allocate enough memory to decode
1461 // the bogus oversized data from using interleaved MCUs and their
1462 // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
1463 // discard the extra data until colorspace conversion
1464 z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
1465 z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
1466 z->img_comp[i].raw_data = stb_malloc(z->img_comp[i].w2 * z->img_comp[i].h2 + 15);
1467 if (z->img_comp[i].raw_data == NULL)
1469 for (--i; i >= 0; --i)
1471 stb_free(z->img_comp[i].raw_data);
1472 z->img_comp[i].data = NULL;
1474 return e("outofmem", "Out of memory");
1476 // align blocks for installable-idct using mmx/sse
1477 z->img_comp[i].data = (uint8 *)(((size_t)z->img_comp[i].raw_data + 15) & ~15);
1478 z->img_comp[i].linebuf = NULL;
1484 // use comparisons since in some cases we handle more than one case (e.g. SOF)
1485 #define DNL(x) ((x) == 0xdc)
1486 #define SOI(x) ((x) == 0xd8)
1487 #define EOI(x) ((x) == 0xd9)
1488 #define SOF(x) ((x) == 0xc0 || (x) == 0xc1)
1489 #define SOS(x) ((x) == 0xda)
1491 static int decode_jpeg_header(jpeg *z, int scan)
1494 z->marker = MARKER_none; // initialize cached marker to empty
1497 return e("no SOI", "Corrupt JPEG");
1498 if (scan == SCAN_type)
1503 if (!process_marker(z, m))
1506 while (m == MARKER_none)
1508 // some files have extra padding after their blocks, so ok, we'll scan
1510 return e("no SOF", "Corrupt JPEG");
1514 if (!process_frame_header(z, scan))
1519 static int decode_jpeg_image(jpeg *j)
1522 j->restart_interval = 0;
1523 if (!decode_jpeg_header(j, SCAN_load))
1530 if (!process_scan_header(j))
1532 if (!parse_entropy_coded_data(j))
1537 if (!process_marker(j, m))
1545 // static jfif-centered resampling (across block boundaries)
1547 typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1,
1550 #define div4(x) ((uint8)((x) >> 2))
1552 static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1554 VOGL_NOTE_UNUSED(out), VOGL_NOTE_UNUSED(in_far), VOGL_NOTE_UNUSED(w), VOGL_NOTE_UNUSED(hs);
1558 static uint8 *resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1560 VOGL_NOTE_UNUSED(hs);
1561 // need to generate two samples vertically for every one in input
1563 for (i = 0; i < w; ++i)
1564 out[i] = div4(3 * in_near[i] + in_far[i] + 2);
1568 static uint8 *resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1570 VOGL_NOTE_UNUSED(hs), VOGL_NOTE_UNUSED(in_far);
1571 // need to generate two samples horizontally for every one in input
1573 uint8 *input = in_near;
1576 // if only one sample, can't do any interpolation
1577 out[0] = out[1] = input[0];
1582 out[1] = div4(input[0] * 3 + input[1] + 2);
1583 for (i = 1; i < w - 1; ++i)
1585 int n = 3 * input[i] + 2;
1586 out[i * 2 + 0] = div4(n + input[i - 1]);
1587 out[i * 2 + 1] = div4(n + input[i + 1]);
1589 out[i * 2 + 0] = div4(input[w - 2] * 3 + input[w - 1] + 2);
1590 out[i * 2 + 1] = input[w - 1];
1594 #define div16(x) ((uint8)((x) >> 4))
1596 static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1598 VOGL_NOTE_UNUSED(hs);
1599 // need to generate 2x2 samples for every one in input
1603 out[0] = out[1] = div4(3 * in_near[0] + in_far[0] + 2);
1607 t1 = 3 * in_near[0] + in_far[0];
1608 out[0] = div4(t1 + 2);
1609 for (i = 1; i < w; ++i)
1612 t1 = 3 * in_near[i] + in_far[i];
1613 out[i * 2 - 1] = div16(3 * t0 + t1 + 8);
1614 out[i * 2] = div16(3 * t1 + t0 + 8);
1616 out[w * 2 - 1] = div4(t1 + 2);
1620 static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1622 VOGL_NOTE_UNUSED(in_far);
1623 // resample with nearest-neighbor
1625 for (i = 0; i < w; ++i)
1626 for (j = 0; j < hs; ++j)
1627 out[i * hs + j] = in_near[i];
1631 #define float2fixed(x) ((int)((x) * 65536 + 0.5))
1633 // 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro)
1634 // VC6 without processor=Pro is generating multiple LEAs per multiply!
1635 static void YCbCr_to_RGB_row(uint8 *out, const uint8 *y, const uint8 *pcb, const uint8 *pcr, int count, int step)
1638 for (i = 0; i < count; ++i)
1640 int y_fixed = (y[i] << 16) + 32768; // rounding
1642 int cr = pcr[i] - 128;
1643 int cb = pcb[i] - 128;
1644 r = y_fixed + cr * float2fixed(1.40200f);
1645 g = y_fixed - cr * float2fixed(0.71414f) - cb * float2fixed(0.34414f);
1646 b = y_fixed + cb * float2fixed(1.77200f);
1650 if ((unsigned)r > 255)
1657 if ((unsigned)g > 255)
1664 if ((unsigned)b > 255)
1680 static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row;
1682 void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func)
1684 stbi_YCbCr_installed = func;
1688 // clean up the temporary component buffers
1689 static void cleanup_jpeg(jpeg *j)
1692 for (i = 0; i < j->s.img_n; ++i)
1694 if (j->img_comp[i].data)
1696 stb_free(j->img_comp[i].raw_data);
1697 j->img_comp[i].data = NULL;
1699 if (j->img_comp[i].linebuf)
1701 stb_free(j->img_comp[i].linebuf);
1702 j->img_comp[i].linebuf = NULL;
1709 resample_row_func resample;
1710 uint8 *line0, *line1;
1711 int hs, vs; // expansion factor in each axis
1712 int w_lores; // horizontal pixels pre-expansion
1713 int ystep; // how far through vertical expansion we are
1714 int ypos; // which pre-expansion row we're on
1717 static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
1720 // validate req_comp
1721 if (req_comp < 0 || req_comp > 4)
1722 return epuc("bad req_comp", "Internal error");
1725 // load a jpeg image from whichever source
1726 if (!decode_jpeg_image(z))
1732 // determine actual number of components to generate
1733 n = req_comp ? req_comp : z->s.img_n;
1735 if (z->s.img_n == 3 && n < 3)
1738 decode_n = z->s.img_n;
1740 // resample and color-convert
1747 stbi_resample res_comp[4];
1749 for (k = 0; k < decode_n; ++k)
1751 stbi_resample *r = &res_comp[k];
1753 // allocate line buffer big enough for upsampling off the edges
1754 // with upsample factor of 4
1755 z->img_comp[k].linebuf = (uint8 *)stb_malloc(z->s.img_x + 3);
1756 if (!z->img_comp[k].linebuf)
1759 return epuc("outofmem", "Out of memory");
1762 r->hs = z->img_h_max / z->img_comp[k].h;
1763 r->vs = z->img_v_max / z->img_comp[k].v;
1764 r->ystep = r->vs >> 1;
1765 r->w_lores = (z->s.img_x + r->hs - 1) / r->hs;
1767 r->line0 = r->line1 = z->img_comp[k].data;
1769 if (r->hs == 1 && r->vs == 1)
1770 r->resample = resample_row_1;
1771 else if (r->hs == 1 && r->vs == 2)
1772 r->resample = resample_row_v_2;
1773 else if (r->hs == 2 && r->vs == 1)
1774 r->resample = resample_row_h_2;
1775 else if (r->hs == 2 && r->vs == 2)
1776 r->resample = resample_row_hv_2;
1778 r->resample = resample_row_generic;
1781 // can't error after this so, this is safe
1782 output = (uint8 *)stb_malloc(n * z->s.img_x * z->s.img_y + 1);
1786 return epuc("outofmem", "Out of memory");
1789 // now go ahead and resample
1790 for (j = 0; j < z->s.img_y; ++j)
1792 uint8 *out = output + n * z->s.img_x * j;
1793 for (k = 0; k < decode_n; ++k)
1795 stbi_resample *r = &res_comp[k];
1796 int y_bot = r->ystep >= (r->vs >> 1);
1797 coutput[k] = r->resample(z->img_comp[k].linebuf,
1798 y_bot ? r->line1 : r->line0,
1799 y_bot ? r->line0 : r->line1,
1801 if (++r->ystep >= r->vs)
1804 r->line0 = r->line1;
1805 if (++r->ypos < z->img_comp[k].y)
1806 r->line1 += z->img_comp[k].w2;
1811 uint8 *y = coutput[0];
1812 if (z->s.img_n == 3)
1815 stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n);
1817 YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n);
1821 for (i = 0; i < z->s.img_x; ++i)
1823 out[0] = out[1] = out[2] = y[i];
1824 out[3] = 255; // not used if n==3
1830 uint8 *y = coutput[0];
1832 for (i = 0; i < z->s.img_x; ++i)
1835 for (i = 0; i < z->s.img_x; ++i)
1836 *out++ = y[i], *out++ = 255;
1840 *out_x = z->s.img_x;
1841 *out_y = z->s.img_y;
1843 *comp = z->s.img_n; // report original components, not output
1848 #ifndef STBI_NO_STDIO
1849 unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1852 start_file(&j.s, f);
1853 return load_jpeg_image(&j, x, y, comp, req_comp);
1856 unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1858 unsigned char *data;
1859 FILE *f = fopen(filename, "rb");
1862 data = stbi_jpeg_load_from_file(f, x, y, comp, req_comp);
1868 unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1871 start_mem(&j.s, buffer, len);
1872 return load_jpeg_image(&j, x, y, comp, req_comp);
1875 #ifndef STBI_NO_STDIO
1876 int stbi_jpeg_test_file(FILE *f)
1881 start_file(&j.s, f);
1882 r = decode_jpeg_header(&j, SCAN_type);
1883 fseek(f, n, SEEK_SET);
1888 int stbi_jpeg_test_memory(stbi_uc const *buffer, int len)
1891 start_mem(&j.s, buffer, len);
1892 return decode_jpeg_header(&j, SCAN_type);
1896 #ifndef STBI_NO_STDIO
1897 extern int stbi_jpeg_info(char const *filename, int *x, int *y, int *comp);
1898 extern int stbi_jpeg_info_from_file(FILE *f, int *x, int *y, int *comp);
1900 extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
1902 // public domain zlib decode v0.2 Sean Barrett 2006-11-18
1903 // simple implementation
1904 // - all input must be provided in an upfront buffer
1905 // - all output is written to a single output buffer (can stb_malloc/stb_realloc)
1909 // fast-way is faster to check than jpeg huffman, but slow way is slower
1910 #define ZFAST_BITS 9 // accelerate all cases in default tables
1911 #define ZFAST_MASK ((1 << ZFAST_BITS) - 1)
1913 // zlib-style huffman encoding
1914 // (jpegs packs from left, zlib from right, so can't share code)
1917 uint16 fast[1 << ZFAST_BITS];
1918 uint16 firstcode[16];
1920 uint16 firstsymbol[16];
1925 __forceinline static int bitreverse16(int n)
1927 n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
1928 n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
1929 n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
1930 n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
1934 __forceinline static int bit_reverse(int v, int bits)
1937 // to bit reverse n bits, reverse 16 and shift
1938 // e.g. 11 bits, bit reverse and shift away 5
1939 return bitreverse16(v) >> (16 - bits);
1942 static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num)
1945 int code, next_code[16], sizes[17];
1947 // DEFLATE spec for generating codes
1948 memset(sizes, 0, sizeof(sizes));
1949 memset(z->fast, 255, sizeof(z->fast));
1950 for (i = 0; i < num; ++i)
1951 ++sizes[sizelist[i]];
1953 for (i = 1; i < 16; ++i)
1954 assert(sizes[i] <= (1 << i));
1956 for (i = 1; i < 16; ++i)
1958 next_code[i] = code;
1959 z->firstcode[i] = (uint16)code;
1960 z->firstsymbol[i] = (uint16)k;
1961 code = (code + sizes[i]);
1963 if (code - 1 >= (1 << i))
1964 return e("bad codelengths", "Corrupt JPEG");
1965 z->maxcode[i] = code << (16 - i); // preshift for inner loop
1969 z->maxcode[16] = 0x10000; // sentinel
1970 for (i = 0; i < num; ++i)
1972 int s = sizelist[i];
1975 int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
1976 z->size[c] = (uint8)s;
1977 z->value[c] = (uint16)i;
1978 if (s <= ZFAST_BITS)
1980 int k = bit_reverse(next_code[s], s);
1981 while (k < (1 << ZFAST_BITS))
1983 z->fast[k] = (uint16)c;
1993 // zlib-from-memory implementation for PNG reading
1994 // because PNG allows splitting the zlib stream arbitrarily,
1995 // and it's annoying structurally to have PNG call ZLIB call PNG,
1996 // we require PNG read all the IDATs and combine them into a single
2001 uint8 *zbuffer, *zbuffer_end;
2010 zhuffman z_length, z_distance;
2013 __forceinline static int zget8(zbuf *z)
2015 if (z->zbuffer >= z->zbuffer_end)
2017 return *z->zbuffer++;
2020 static void fill_bits(zbuf *z)
2024 assert(z->code_buffer < (1U << z->num_bits));
2025 z->code_buffer |= zget8(z) << z->num_bits;
2027 } while (z->num_bits <= 24);
2030 __forceinline static unsigned int zreceive(zbuf *z, int n)
2033 if (z->num_bits < n)
2035 k = z->code_buffer & ((1 << n) - 1);
2036 z->code_buffer >>= n;
2041 __forceinline static int zhuffman_decode(zbuf *a, zhuffman *z)
2044 if (a->num_bits < 16)
2046 b = z->fast[a->code_buffer & ZFAST_MASK];
2050 a->code_buffer >>= s;
2055 // not resolved by fast table, so compute it the slow way
2056 // use jpeg approach, which requires MSbits at top
2057 k = bit_reverse(a->code_buffer, 16);
2058 for (s = ZFAST_BITS + 1;; ++s)
2059 if (k < z->maxcode[s])
2062 return -1; // invalid code!
2063 // code size is s, so:
2064 b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];
2065 assert(z->size[b] == s);
2066 a->code_buffer >>= s;
2071 static int expand(zbuf *z, int n) // need to make room for n bytes
2075 if (!z->z_expandable)
2076 return e("output buffer limit", "Corrupt PNG");
2077 cur = (int)(z->zout - z->zout_start);
2078 limit = (int)(z->zout_end - z->zout_start);
2079 while (cur + n > limit)
2081 q = (char *)stb_realloc(z->zout_start, limit);
2083 return e("outofmem", "Out of memory");
2086 z->zout_end = q + limit;
2090 static int length_base[31] =
2092 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
2093 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
2094 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0
2097 static int length_extra[31] =
2098 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0 };
2100 static int dist_base[32] = { 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
2101 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0 };
2103 static int dist_extra[32] =
2104 { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 };
2106 static int parse_huffman_block(zbuf *a)
2110 int z = zhuffman_decode(a, &a->z_length);
2114 return e("bad huffman code", "Corrupt PNG"); // error in huffman codes
2115 if (a->zout >= a->zout_end)
2118 *a->zout++ = (char)z;
2127 len = length_base[z];
2128 if (length_extra[z])
2129 len += zreceive(a, length_extra[z]);
2130 z = zhuffman_decode(a, &a->z_distance);
2132 return e("bad huffman code", "Corrupt PNG");
2133 dist = dist_base[z];
2135 dist += zreceive(a, dist_extra[z]);
2136 if (a->zout - a->zout_start < dist)
2137 return e("bad dist", "Corrupt PNG");
2138 if (a->zout + len > a->zout_end)
2139 if (!expand(a, len))
2141 p = (uint8 *)(a->zout - dist);
2148 static int compute_huffman_codes(zbuf *a)
2150 static uint8 length_dezigzag[19] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
2151 zhuffman z_codelength;
2152 uint8 lencodes[286 + 32 + 137]; //padding for maximum single op
2153 uint8 codelength_sizes[19];
2156 int hlit = zreceive(a, 5) + 257;
2157 int hdist = zreceive(a, 5) + 1;
2158 int hclen = zreceive(a, 4) + 4;
2160 memset(codelength_sizes, 0, sizeof(codelength_sizes));
2161 for (i = 0; i < hclen; ++i)
2163 int s = zreceive(a, 3);
2164 codelength_sizes[length_dezigzag[i]] = (uint8)s;
2166 if (!zbuild_huffman(&z_codelength, codelength_sizes, 19))
2170 while (n < hlit + hdist)
2172 int c = zhuffman_decode(a, &z_codelength);
2173 assert(c >= 0 && c < 19);
2175 lencodes[n++] = (uint8)c;
2178 c = zreceive(a, 2) + 3;
2179 memset(lencodes + n, lencodes[n - 1], c);
2184 c = zreceive(a, 3) + 3;
2185 memset(lencodes + n, 0, c);
2191 c = zreceive(a, 7) + 11;
2192 memset(lencodes + n, 0, c);
2196 if (n != hlit + hdist)
2197 return e("bad codelengths", "Corrupt PNG");
2198 if (!zbuild_huffman(&a->z_length, lencodes, hlit))
2200 if (!zbuild_huffman(&a->z_distance, lencodes + hlit, hdist))
2205 static int parse_uncompressed_block(zbuf *a)
2209 if (a->num_bits & 7)
2210 zreceive(a, a->num_bits & 7); // discard
2211 // drain the bit-packed data into header
2213 while (a->num_bits > 0)
2215 header[k++] = (uint8)(a->code_buffer & 255); // wtf this warns?
2216 a->code_buffer >>= 8;
2219 assert(a->num_bits == 0);
2220 // now fill header the normal way
2222 header[k++] = (uint8)zget8(a);
2223 len = header[1] * 256 + header[0];
2224 nlen = header[3] * 256 + header[2];
2225 if (nlen != (len ^ 0xffff))
2226 return e("zlib corrupt", "Corrupt PNG");
2227 if (a->zbuffer + len > a->zbuffer_end)
2228 return e("read past buffer", "Corrupt PNG");
2229 if (a->zout + len > a->zout_end)
2230 if (!expand(a, len))
2232 memcpy(a->zout, a->zbuffer, len);
2238 static int parse_zlib_header(zbuf *a)
2242 /* int cinfo = cmf >> 4; */
2244 if ((cmf * 256 + flg) % 31 != 0)
2245 return e("bad zlib header", "Corrupt PNG"); // zlib spec
2247 return e("no preset dict", "Corrupt PNG"); // preset dictionary not allowed in png
2249 return e("bad compression", "Corrupt PNG"); // DEFLATE required for png
2250 // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
2254 // @TODO: should statically initialize these for optimal thread safety
2255 static uint8 default_length[288], default_distance[32];
2256 static void init_defaults(void)
2258 int i; // use <= to match clearly with spec
2259 for (i = 0; i <= 143; ++i)
2260 default_length[i] = 8;
2261 for (; i <= 255; ++i)
2262 default_length[i] = 9;
2263 for (; i <= 279; ++i)
2264 default_length[i] = 7;
2265 for (; i <= 287; ++i)
2266 default_length[i] = 8;
2268 for (i = 0; i <= 31; ++i)
2269 default_distance[i] = 5;
2272 int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead
2273 static int parse_zlib(zbuf *a, int parse_header)
2277 if (!parse_zlib_header(a))
2283 final = zreceive(a, 1);
2284 type = zreceive(a, 2);
2287 if (!parse_uncompressed_block(a))
2298 // use fixed code lengths
2299 if (!default_distance[31])
2301 if (!zbuild_huffman(&a->z_length, default_length, 288))
2303 if (!zbuild_huffman(&a->z_distance, default_distance, 32))
2308 if (!compute_huffman_codes(a))
2311 if (!parse_huffman_block(a))
2314 if (stbi_png_partial && a->zout - a->zout_start > 65536)
2320 static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header)
2322 a->zout_start = obuf;
2324 a->zout_end = obuf + olen;
2325 a->z_expandable = exp;
2327 return parse_zlib(a, parse_header);
2330 char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
2333 char *p = (char *)stb_malloc(initial_size);
2336 a.zbuffer = (uint8 *)buffer;
2337 a.zbuffer_end = (uint8 *)buffer + len;
2338 if (do_zlib(&a, p, initial_size, 1, 1))
2341 *outlen = (int)(a.zout - a.zout_start);
2342 return a.zout_start;
2346 stb_free(a.zout_start);
2351 char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
2353 return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
2356 int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
2359 a.zbuffer = (uint8 *)ibuffer;
2360 a.zbuffer_end = (uint8 *)ibuffer + ilen;
2361 if (do_zlib(&a, obuffer, olen, 0, 1))
2362 return (int)(a.zout - a.zout_start);
2367 char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
2370 char *p = (char *)stb_malloc(16384);
2373 a.zbuffer = (uint8 *)buffer;
2374 a.zbuffer_end = (uint8 *)buffer + len;
2375 if (do_zlib(&a, p, 16384, 1, 0))
2378 *outlen = (int)(a.zout - a.zout_start);
2379 return a.zout_start;
2383 stb_free(a.zout_start);
2388 int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
2391 a.zbuffer = (uint8 *)ibuffer;
2392 a.zbuffer_end = (uint8 *)ibuffer + ilen;
2393 if (do_zlib(&a, obuffer, olen, 0, 0))
2394 return (int)(a.zout - a.zout_start);
2399 // public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
2400 // simple implementation
2401 // - only 8-bit samples
2402 // - no CRC checking
2403 // - allocates lots of intermediate memory
2404 // - avoids problem of streaming data between subsystems
2405 // - avoids explicit window management
2407 // - uses stb_zlib, a PD zlib implementation with fast huffman decoding
2415 #define PNG_TYPE(a, b, c, d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
2417 static chunk get_chunk_header(stbi *s)
2420 c.length = get32(s);
2425 static int check_png_header(stbi *s)
2427 static uint8 png_sig[8] = { 137, 80, 78, 71, 13, 10, 26, 10 };
2429 for (i = 0; i < 8; ++i)
2430 if (get8(s) != png_sig[i])
2431 return e("bad png sig", "Not a PNG");
2438 uint8 *idata, *expanded, *out;
2452 static uint8 first_row_filter[5] =
2454 F_none, F_sub, F_none, F_avg_first, F_paeth_first
2457 static int paeth(int a, int b, int c)
2460 int pa = abs(p - a);
2461 int pb = abs(p - b);
2462 int pc = abs(p - c);
2463 if (pa <= pb && pa <= pc)
2470 // create the png data from post-deflated data
2471 static int create_png_image_raw(png *a, uint8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y)
2474 uint32 i, j, stride = x * out_n;
2476 int img_n = s->img_n; // copy it into a local for later
2477 assert(out_n == s->img_n || out_n == s->img_n + 1);
2478 if (stbi_png_partial)
2480 a->out = (uint8 *)stb_malloc(x * y * out_n);
2482 return e("outofmem", "Out of memory");
2483 if (!stbi_png_partial)
2485 if ((s->img_x == x) && (s->img_y == y))
2487 if (raw_len != (img_n * x + 1) * y)
2488 return e("not enough pixels", "Corrupt PNG");
2492 if (raw_len < (img_n * x + 1) * y)
2493 return e("not enough pixels", "Corrupt PNG");
2496 for (j = 0; j < y; ++j)
2498 uint8 *cur = a->out + stride * j;
2499 uint8 *prior = cur - stride;
2500 int filter = *raw++;
2502 return e("invalid filter", "Corrupt PNG");
2503 // if first row, use special filter that doesn't sample previous row
2505 filter = first_row_filter[filter];
2506 // handle first pixel explicitly
2507 for (k = 0; k < img_n; ++k)
2518 cur[k] = raw[k] + prior[k];
2521 cur[k] = raw[k] + (prior[k] >> 1);
2524 cur[k] = (uint8)(raw[k] + paeth(0, prior[k], 0));
2539 // this is a little gross, so that we don't switch per-pixel or per-component
2544 for (i = x - 1; i >= 1; --i, raw += img_n, cur += img_n, prior += img_n) \
2545 for (k = 0; k < img_n; ++k)
2548 CASE(F_none) cur[k] = raw[k];
2550 CASE(F_sub) cur[k] = raw[k] + cur[k - img_n];
2552 CASE(F_up) cur[k] = raw[k] + prior[k];
2554 CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k - img_n]) >> 1);
2556 CASE(F_paeth) cur[k] = (uint8)(raw[k] + paeth(cur[k - img_n], prior[k], prior[k - img_n]));
2558 CASE(F_avg_first) cur[k] = raw[k] + (cur[k - img_n] >> 1);
2560 CASE(F_paeth_first) cur[k] = (uint8)(raw[k] + paeth(cur[k - img_n], 0, 0));
2567 assert(img_n + 1 == out_n);
2570 for (i = x - 1; i >= 1; --i, cur[img_n] = 255, raw += img_n, cur += out_n, prior += out_n) \
2571 for (k = 0; k < img_n; ++k)
2574 CASE(F_none) cur[k] = raw[k];
2576 CASE(F_sub) cur[k] = raw[k] + cur[k - out_n];
2578 CASE(F_up) cur[k] = raw[k] + prior[k];
2580 CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k - out_n]) >> 1);
2582 CASE(F_paeth) cur[k] = (uint8)(raw[k] + paeth(cur[k - out_n], prior[k], prior[k - out_n]));
2584 CASE(F_avg_first) cur[k] = raw[k] + (cur[k - out_n] >> 1);
2586 CASE(F_paeth_first) cur[k] = (uint8)(raw[k] + paeth(cur[k - out_n], 0, 0));
2595 static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n, int interlaced)
2601 return create_png_image_raw(a, raw, raw_len, out_n, a->s.img_x, a->s.img_y);
2602 save = stbi_png_partial;
2603 stbi_png_partial = 0;
2606 final = (uint8 *)stb_malloc(a->s.img_x * a->s.img_y * out_n);
2607 for (p = 0; p < 7; ++p)
2609 int xorig[] = { 0, 4, 0, 2, 0, 1, 0 };
2610 int yorig[] = { 0, 0, 4, 0, 2, 0, 1 };
2611 int xspc[] = { 8, 8, 4, 4, 2, 2, 1 };
2612 int yspc[] = { 8, 8, 8, 4, 4, 2, 2 };
2614 // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
2615 x = (a->s.img_x - xorig[p] + xspc[p] - 1) / xspc[p];
2616 y = (a->s.img_y - yorig[p] + yspc[p] - 1) / yspc[p];
2619 if (!create_png_image_raw(a, raw, raw_len, out_n, x, y))
2624 for (j = 0; j < y; ++j)
2625 for (i = 0; i < x; ++i)
2626 memcpy(final + (j * yspc[p] + yorig[p]) * a->s.img_x * out_n + (i * xspc[p] + xorig[p]) * out_n,
2627 a->out + (j * x + i) * out_n, out_n);
2629 raw += (x * out_n + 1) * y;
2630 raw_len -= (x * out_n + 1) * y;
2635 stbi_png_partial = save;
2639 static int compute_transparency(png *z, uint8 tc[3], int out_n)
2642 uint32 i, pixel_count = s->img_x * s->img_y;
2645 // compute color-based transparency, assuming we've
2646 // already got 255 as the alpha value in the output
2647 assert(out_n == 2 || out_n == 4);
2651 for (i = 0; i < pixel_count; ++i)
2653 p[1] = (p[0] == tc[0] ? 0 : 255);
2659 for (i = 0; i < pixel_count; ++i)
2661 if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
2669 static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n)
2671 VOGL_NOTE_UNUSED(len);
2672 uint32 i, pixel_count = a->s.img_x * a->s.img_y;
2673 uint8 *p, *temp_out, *orig = a->out;
2675 p = (uint8 *)stb_malloc(pixel_count * pal_img_n);
2677 return e("outofmem", "Out of memory");
2679 // between here and stb_free(out) below, exitting would leak
2684 for (i = 0; i < pixel_count; ++i)
2686 int n = orig[i] * 4;
2688 p[1] = palette[n + 1];
2689 p[2] = palette[n + 2];
2695 for (i = 0; i < pixel_count; ++i)
2697 int n = orig[i] * 4;
2699 p[1] = palette[n + 1];
2700 p[2] = palette[n + 2];
2701 p[3] = palette[n + 3];
2710 static int parse_png_file(png *z, int scan, int req_comp)
2712 uint8 palette[1024], pal_img_n = 0;
2713 uint8 has_trans = 0, tc[3];
2714 uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;
2715 int first = 1, k, interlace = 0;
2718 if (!check_png_header(s))
2721 if (scan == SCAN_type)
2726 chunk c = get_chunk_header(s);
2727 if (first && c.type != PNG_TYPE('I', 'H', 'D', 'R'))
2728 return e("first not IHDR", "Corrupt PNG");
2731 case PNG_TYPE('I', 'H', 'D', 'R')
2734 int depth, color, comp, filter;
2736 return e("multiple IHDR", "Corrupt PNG");
2738 return e("bad IHDR len", "Corrupt PNG");
2739 s->img_x = get32(s);
2740 if (s->img_x > (1 << 24))
2741 return e("too large", "Very large image (corrupt?)");
2742 s->img_y = get32(s);
2743 if (s->img_y > (1 << 24))
2744 return e("too large", "Very large image (corrupt?)");
2747 return e("8bit only", "PNG not supported: 8-bit only");
2750 return e("bad ctype", "Corrupt PNG");
2754 return e("bad ctype", "Corrupt PNG");
2757 return e("bad comp method", "Corrupt PNG");
2760 return e("bad filter method", "Corrupt PNG");
2761 interlace = get8(s);
2763 return e("bad interlace method", "Corrupt PNG");
2764 if (!s->img_x || !s->img_y)
2765 return e("0-pixel image", "Corrupt PNG");
2768 s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
2769 if ((1 << 30) / s->img_x / s->img_n < s->img_y)
2770 return e("too large", "Image too large to decode");
2771 if (scan == SCAN_header)
2776 // if paletted, then pal_n is our final components, and
2777 // img_n is # components to decompress/filter.
2779 if ((1 << 30) / s->img_x / 4 < s->img_y)
2780 return e("too large", "Corrupt PNG");
2781 // if SCAN_header, have to scan to see if we have a tRNS
2786 case PNG_TYPE('P', 'L', 'T', 'E')
2789 if (c.length > 256 * 3)
2790 return e("invalid PLTE", "Corrupt PNG");
2791 pal_len = c.length / 3;
2792 if (pal_len * 3 != c.length)
2793 return e("invalid PLTE", "Corrupt PNG");
2794 for (i = 0; i < pal_len; ++i)
2796 palette[i * 4 + 0] = get8u(s);
2797 palette[i * 4 + 1] = get8u(s);
2798 palette[i * 4 + 2] = get8u(s);
2799 palette[i * 4 + 3] = 255;
2804 case PNG_TYPE('t', 'R', 'N', 'S')
2808 return e("tRNS after IDAT", "Corrupt PNG");
2811 if (scan == SCAN_header)
2817 return e("tRNS before PLTE", "Corrupt PNG");
2818 if (c.length > pal_len)
2819 return e("bad tRNS len", "Corrupt PNG");
2821 for (i = 0; i < c.length; ++i)
2822 palette[i * 4 + 3] = get8u(s);
2826 if (!(s->img_n & 1))
2827 return e("tRNS with alpha", "Corrupt PNG");
2828 if (c.length != (uint32)s->img_n * 2)
2829 return e("bad tRNS len", "Corrupt PNG");
2831 for (k = 0; k < s->img_n; ++k)
2832 tc[k] = (uint8)get16(s); // non 8-bit images will be larger
2837 case PNG_TYPE('I', 'D', 'A', 'T')
2840 if (pal_img_n && !pal_len)
2841 return e("no PLTE", "Corrupt PNG");
2842 if (scan == SCAN_header)
2844 s->img_n = pal_img_n;
2847 if (ioff + c.length > idata_limit)
2850 if (idata_limit == 0)
2851 idata_limit = c.length > 4096 ? c.length : 4096;
2852 while (ioff + c.length > idata_limit)
2854 p = (uint8 *)stb_realloc(z->idata, idata_limit);
2856 return e("outofmem", "Out of memory");
2859 #ifndef STBI_NO_STDIO
2862 if (fread(z->idata + ioff, 1, c.length, s->img_file) != c.length)
2863 return e("outofdata", "Corrupt PNG");
2868 memcpy(z->idata + ioff, s->img_buffer, c.length);
2869 s->img_buffer += c.length;
2875 case PNG_TYPE('I', 'E', 'N', 'D')
2879 if (scan != SCAN_load)
2881 if (z->idata == NULL)
2882 return e("no IDAT", "Corrupt PNG");
2883 z->expanded = (uint8 *)stbi_zlib_decode_malloc((char *)z->idata, ioff, (int *)&raw_len);
2884 if (z->expanded == NULL)
2885 return 0; // zlib should set error
2888 if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) || has_trans)
2889 s->img_out_n = s->img_n + 1;
2891 s->img_out_n = s->img_n;
2892 if (!create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace))
2895 if (!compute_transparency(z, tc, s->img_out_n))
2899 // pal_img_n == 3 or 4
2900 s->img_n = pal_img_n; // record the actual colors we had
2901 s->img_out_n = pal_img_n;
2903 s->img_out_n = req_comp;
2904 if (!expand_palette(z, palette, pal_len, s->img_out_n))
2907 stb_free(z->expanded);
2913 // if critical, fail
2914 if ((c.type & (1 << 29)) == 0)
2916 #ifndef STBI_NO_FAILURE_STRINGS
2918 static char invalid_chunk[] = "XXXX chunk not known";
2919 invalid_chunk[0] = (uint8)(c.type >> 24);
2920 invalid_chunk[1] = (uint8)(c.type >> 16);
2921 invalid_chunk[2] = (uint8)(c.type >> 8);
2922 invalid_chunk[3] = (uint8)(c.type >> 0);
2924 return e(invalid_chunk, "PNG not supported: unknown chunk type");
2929 // end of chunk, read and skip CRC
2934 static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp)
2936 unsigned char *result = NULL;
2940 if (req_comp < 0 || req_comp > 4)
2941 return epuc("bad req_comp", "Internal error");
2942 if (parse_png_file(p, SCAN_load, req_comp))
2946 if (req_comp && req_comp != p->s.img_out_n)
2948 result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y);
2949 p->s.img_out_n = req_comp;
2960 stb_free(p->expanded);
2968 #ifndef STBI_NO_STDIO
2969 unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
2972 start_file(&p.s, f);
2973 return do_png(&p, x, y, comp, req_comp);
2976 unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp)
2978 unsigned char *data;
2979 FILE *f = fopen(filename, "rb");
2982 data = stbi_png_load_from_file(f, x, y, comp, req_comp);
2988 unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
2991 start_mem(&p.s, buffer, len);
2992 return do_png(&p, x, y, comp, req_comp);
2995 #ifndef STBI_NO_STDIO
2996 int stbi_png_test_file(FILE *f)
3001 start_file(&p.s, f);
3002 r = parse_png_file(&p, SCAN_type, STBI_default);
3003 fseek(f, n, SEEK_SET);
3008 int stbi_png_test_memory(stbi_uc const *buffer, int len)
3011 start_mem(&p.s, buffer, len);
3012 return parse_png_file(&p, SCAN_type, STBI_default);
3015 // TODO: load header from png
3016 #ifndef STBI_NO_STDIO
3017 int stbi_png_info(char const *filename, int *x, int *y, int *comp)
3020 FILE *f = fopen(filename, "rb");
3023 start_file(&p.s, f);
3024 if (parse_png_file(&p, SCAN_header, 0))
3039 extern int stbi_png_info_from_file(FILE *f, int *x, int *y, int *comp);
3041 extern int stbi_png_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
3043 // Microsoft/Windows BMP image
3045 static int bmp_test(stbi *s)
3052 get32le(s); // discard filesize
3053 get16le(s); // discard reserved
3054 get16le(s); // discard reserved
3055 get32le(s); // discard data offset
3057 if (sz == 12 || sz == 40 || sz == 56 || sz == 108)
3062 #ifndef STBI_NO_STDIO
3063 int stbi_bmp_test_file(FILE *f)
3066 int r, n = (int)ftell(f);
3069 fseek(f, n, SEEK_SET);
3074 int stbi_bmp_test_memory(stbi_uc const *buffer, int len)
3077 start_mem(&s, buffer, len);
3078 return bmp_test(&s);
3081 // returns 0..31 for the highest set bit
3082 static int high_bit(unsigned int z)
3100 static int bitcount(unsigned int a)
3102 a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
3103 a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
3104 a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
3105 a = (a + (a >> 8)); // max 16 per 8 bits
3106 a = (a + (a >> 16)); // max 32 per 8 bits
3110 static int shiftsigned(int v, int shift, int bits)
3130 static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3133 unsigned int mr = 0, mg = 0, mb = 0, ma = 0, fake_a = 0;
3135 stbi_uc pal[256][4];
3136 int psize = 0, i, j, compress = 0, width;
3137 int bpp, flip_vertically, pad, target, offset, hsz;
3138 if (get8(s) != 'B' || get8(s) != 'M')
3139 return epuc("not BMP", "Corrupt BMP");
3140 get32le(s); // discard filesize
3141 get16le(s); // discard reserved
3142 get16le(s); // discard reserved
3143 offset = get32le(s);
3145 if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108)
3146 return epuc("unknown BMP", "BMP type not supported: unknown");
3147 failure_reason = "bad BMP";
3150 s->img_x = get16le(s);
3151 s->img_y = get16le(s);
3155 s->img_x = get32le(s);
3156 s->img_y = get32le(s);
3158 if (get16le(s) != 1)
3162 return epuc("monochrome", "BMP type not supported: 1-bit");
3163 flip_vertically = ((int)s->img_y) > 0;
3164 s->img_y = abs((int)s->img_y);
3168 psize = (offset - 14 - 24) / 3;
3172 compress = get32le(s);
3173 if (compress == 1 || compress == 2)
3174 return epuc("BMP RLE", "BMP type not supported: RLE");
3175 get32le(s); // discard sizeof
3176 get32le(s); // discard hres
3177 get32le(s); // discard vres
3178 get32le(s); // discard colorsused
3179 get32le(s); // discard max important
3180 if (hsz == 40 || hsz == 56)
3189 if (bpp == 16 || bpp == 32)
3200 fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255
3209 else if (compress == 3)
3214 // not documented, but generated by photoshop and handled by mspaint
3215 if (mr == mg && mg == mb)
3232 get32le(s); // discard color space
3233 for (i = 0; i < 12; ++i)
3234 get32le(s); // discard color space parameters
3237 psize = (offset - 14 - hsz) >> 2;
3239 s->img_n = ma ? 4 : 3;
3240 if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
3243 target = s->img_n; // if they want monochrome, we'll post-convert
3244 out = (stbi_uc *)stb_malloc(target * s->img_x * s->img_y);
3246 return epuc("outofmem", "Out of memory");
3250 if (psize == 0 || psize > 256)
3253 return epuc("invalid", "Corrupt BMP");
3255 for (i = 0; i < psize; ++i)
3257 pal[i][2] = get8(s);
3258 pal[i][1] = get8(s);
3259 pal[i][0] = get8(s);
3264 skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4));
3266 width = (s->img_x + 1) >> 1;
3272 return epuc("bad bpp", "Corrupt BMP");
3275 for (j = 0; j < (int)s->img_y; ++j)
3277 for (i = 0; i < (int)s->img_x; i += 2)
3279 int v = get8(s), v2 = 0;
3285 out[z++] = pal[v][0];
3286 out[z++] = pal[v][1];
3287 out[z++] = pal[v][2];
3290 if (i + 1 == (int)s->img_x)
3292 v = (bpp == 8) ? get8(s) : v2;
3293 out[z++] = pal[v][0];
3294 out[z++] = pal[v][1];
3295 out[z++] = pal[v][2];
3304 int rshift = 0, gshift = 0, bshift = 0, ashift = 0, rcount = 0, gcount = 0, bcount = 0, acount = 0;
3307 skip(s, offset - 14 - hsz);
3309 width = 3 * s->img_x;
3311 width = 2 * s->img_x;
3312 else /* bpp = 32 and pad = 0 */
3321 if (mb == 0xff && mg == 0xff00 && mr == 0xff000000 && ma == 0xff000000)
3326 if (!mr || !mg || !mb)
3327 return epuc("bad masks", "Corrupt BMP");
3328 // right shift amt to put high bit in position #7
3329 rshift = high_bit(mr) - 7;
3330 rcount = bitcount(mr);
3331 gshift = high_bit(mg) - 7;
3332 gcount = bitcount(mr);
3333 bshift = high_bit(mb) - 7;
3334 bcount = bitcount(mr);
3335 ashift = high_bit(ma) - 7;
3336 acount = bitcount(mr);
3338 for (j = 0; j < (int)s->img_y; ++j)
3342 for (i = 0; i < (int)s->img_x; ++i)
3345 out[z + 2] = get8(s);
3346 out[z + 1] = get8(s);
3347 out[z + 0] = get8(s);
3349 a = (easy == 2 ? get8(s) : 255);
3356 for (i = 0; i < (int)s->img_x; ++i)
3358 uint32 v = (bpp == 16 ? get16le(s) : get32le(s));
3360 out[z++] = shiftsigned(v & mr, rshift, rcount);
3361 out[z++] = shiftsigned(v & mg, gshift, gcount);
3362 out[z++] = shiftsigned(v & mb, bshift, bcount);
3363 a = (ma ? shiftsigned(v & ma, ashift, acount) : 255);
3371 if (flip_vertically)
3374 for (j = 0; j<(int)s->img_y>> 1; ++j)
3376 stbi_uc *p1 = out + j * s->img_x * target;
3377 stbi_uc *p2 = out + (s->img_y - 1 - j) * s->img_x * target;
3378 for (i = 0; i < (int)s->img_x * target; ++i)
3380 t = p1[i], p1[i] = p2[i], p2[i] = t;
3385 if (req_comp && req_comp != target)
3387 out = convert_format(out, target, req_comp, s->img_x, s->img_y);
3389 return out; // convert_format frees input on failure
3399 #ifndef STBI_NO_STDIO
3400 stbi_uc *stbi_bmp_load(char const *filename, int *x, int *y, int *comp, int req_comp)
3403 FILE *f = fopen(filename, "rb");
3406 data = stbi_bmp_load_from_file(f, x, y, comp, req_comp);
3411 stbi_uc *stbi_bmp_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3415 return bmp_load(&s, x, y, comp, req_comp);
3419 stbi_uc *stbi_bmp_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3422 start_mem(&s, buffer, len);
3423 return bmp_load(&s, x, y, comp, req_comp);
3426 // Targa Truevision - TGA
3427 // by Jonathan Dummer
3429 static int tga_test(stbi *s)
3432 get8u(s); // discard Offset
3433 sz = get8u(s); // color type
3435 return 0; // only RGB or indexed allowed
3436 sz = get8u(s); // image type
3437 if ((sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11))
3438 return 0; // only RGB or grey allowed, +/- RLE
3439 get16(s); // discard palette start
3440 get16(s); // discard palette length
3441 get8(s); // discard bits per palette color entry
3442 get16(s); // discard x origin
3443 get16(s); // discard y origin
3445 return 0; // test width
3447 return 0; // test height
3448 sz = get8(s); // bits per pixel
3449 if ((sz != 8) && (sz != 16) && (sz != 24) && (sz != 32))
3450 return 0; // only RGB or RGBA or grey allowed
3451 return 1; // seems to have passed everything
3454 #ifndef STBI_NO_STDIO
3455 int stbi_tga_test_file(FILE *f)
3458 int r, n = (int)ftell(f);
3461 fseek(f, n, SEEK_SET);
3466 int stbi_tga_test_memory(stbi_uc const *buffer, int len)
3469 start_mem(&s, buffer, len);
3470 return tga_test(&s);
3473 static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3475 // read in the TGA header stuff
3476 int tga_offset = get8u(s);
3477 int tga_indexed = get8u(s);
3478 int tga_image_type = get8u(s);
3480 int tga_palette_start = get16le(s);
3481 int tga_palette_len = get16le(s);
3482 int tga_palette_bits = get8u(s);
3483 int tga_x_origin = get16le(s);
3484 int tga_y_origin = get16le(s);
3485 int tga_width = get16le(s);
3486 int tga_height = get16le(s);
3487 int tga_bits_per_pixel = get8u(s);
3488 int tga_inverted = get8u(s);
3490 unsigned char *tga_data;
3491 unsigned char *tga_palette = NULL;
3493 unsigned char raw_data[4];
3494 unsigned char trans_data[4] = { 0, 0, 0, 0 };
3496 int RLE_repeating = 0;
3497 int read_next_pixel = 1;
3498 // do a tiny bit of precessing
3499 if (tga_image_type >= 8)
3501 tga_image_type -= 8;
3504 /* int tga_alpha_bits = tga_inverted & 15; */
3505 tga_inverted = 1 - ((tga_inverted >> 5) & 1);
3508 if ( //(tga_indexed) ||
3509 (tga_width < 1) || (tga_height < 1) ||
3510 (tga_image_type < 1) || (tga_image_type > 3) ||
3511 ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) &&
3512 (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32)))
3517 // If I'm paletted, then I'll use the number of bits from the palette
3520 tga_bits_per_pixel = tga_palette_bits;
3526 if ((req_comp < 1) || (req_comp > 4))
3528 // just use whatever the file was
3529 req_comp = tga_bits_per_pixel / 8;
3534 // force a new number of components
3535 *comp = tga_bits_per_pixel / 8;
3537 tga_data = (unsigned char *)stb_malloc(tga_width * tga_height * req_comp);
3539 // skip to the data's starting position (offset usually = 0)
3540 skip(s, tga_offset);
3541 // do I need to load a palette?
3544 // any data to skip? (offset usually = 0)
3545 skip(s, tga_palette_start);
3547 tga_palette = (unsigned char *)stb_malloc(tga_palette_len * tga_palette_bits / 8);
3548 getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8);
3551 for (i = 0; i < tga_width * tga_height; ++i)
3553 // if I'm in RLE mode, do I need to get a RLE chunk?
3558 // yep, get the next byte as a RLE command
3559 int RLE_cmd = get8u(s);
3560 RLE_count = 1 + (RLE_cmd & 127);
3561 RLE_repeating = RLE_cmd >> 7;
3562 read_next_pixel = 1;
3564 else if (!RLE_repeating)
3566 read_next_pixel = 1;
3571 read_next_pixel = 1;
3573 // OK, if I need to read a pixel, do it now
3574 if (read_next_pixel)
3576 // load however much data we did have
3579 // read in 1 byte, then perform the lookup
3580 int pal_idx = get8u(s);
3581 if (pal_idx >= tga_palette_len)
3586 pal_idx *= tga_bits_per_pixel / 8;
3587 for (j = 0; j * 8 < tga_bits_per_pixel; ++j)
3589 raw_data[j] = tga_palette[pal_idx + j];
3594 // read in the data raw
3595 for (j = 0; j * 8 < tga_bits_per_pixel; ++j)
3597 raw_data[j] = get8u(s);
3600 // convert raw to the intermediate format
3601 switch (tga_bits_per_pixel)
3605 trans_data[0] = raw_data[0];
3606 trans_data[1] = raw_data[0];
3607 trans_data[2] = raw_data[0];
3608 trans_data[3] = 255;
3611 // Luminous,Alpha => RGBA
3612 trans_data[0] = raw_data[0];
3613 trans_data[1] = raw_data[0];
3614 trans_data[2] = raw_data[0];
3615 trans_data[3] = raw_data[1];
3619 trans_data[0] = raw_data[2];
3620 trans_data[1] = raw_data[1];
3621 trans_data[2] = raw_data[0];
3622 trans_data[3] = 255;
3626 trans_data[0] = raw_data[2];
3627 trans_data[1] = raw_data[1];
3628 trans_data[2] = raw_data[0];
3629 trans_data[3] = raw_data[3];
3632 // clear the reading flag for the next pixel
3633 read_next_pixel = 0;
3634 } // end of reading a pixel
3635 // convert to final format
3639 // RGBA => Luminance
3640 tga_data[i * req_comp + 0] = compute_y(trans_data[0], trans_data[1], trans_data[2]);
3643 // RGBA => Luminance,Alpha
3644 tga_data[i * req_comp + 0] = compute_y(trans_data[0], trans_data[1], trans_data[2]);
3645 tga_data[i * req_comp + 1] = trans_data[3];
3649 tga_data[i * req_comp + 0] = trans_data[0];
3650 tga_data[i * req_comp + 1] = trans_data[1];
3651 tga_data[i * req_comp + 2] = trans_data[2];
3655 tga_data[i * req_comp + 0] = trans_data[0];
3656 tga_data[i * req_comp + 1] = trans_data[1];
3657 tga_data[i * req_comp + 2] = trans_data[2];
3658 tga_data[i * req_comp + 3] = trans_data[3];
3661 // in case we're in RLE mode, keep counting down
3664 // do I need to invert the image?
3667 for (j = 0; j * 2 < tga_height; ++j)
3669 int index1 = j * tga_width * req_comp;
3670 int index2 = (tga_height - 1 - j) * tga_width * req_comp;
3671 for (i = tga_width * req_comp; i > 0; --i)
3673 unsigned char temp = tga_data[index1];
3674 tga_data[index1] = tga_data[index2];
3675 tga_data[index2] = temp;
3681 // clear my palette, if I had one
3682 if (tga_palette != NULL)
3684 stb_free(tga_palette);
3686 // the things I do to get rid of an error message, and yet keep
3687 // Microsoft's C compilers happy... [8^(
3688 tga_palette_start = tga_palette_len = tga_palette_bits =
3689 tga_x_origin = tga_y_origin = 0;
3694 #ifndef STBI_NO_STDIO
3695 stbi_uc *stbi_tga_load(char const *filename, int *x, int *y, int *comp, int req_comp)
3698 FILE *f = fopen(filename, "rb");
3701 data = stbi_tga_load_from_file(f, x, y, comp, req_comp);
3706 stbi_uc *stbi_tga_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3710 return tga_load(&s, x, y, comp, req_comp);
3714 stbi_uc *stbi_tga_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3717 start_mem(&s, buffer, len);
3718 return tga_load(&s, x, y, comp, req_comp);
3721 // *************************************************************************************************
3722 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicholas Schulz, tweaked by STB
3724 static int psd_test(stbi *s)
3726 if (get32(s) != 0x38425053)
3732 #ifndef STBI_NO_STDIO
3733 int stbi_psd_test_file(FILE *f)
3736 int r, n = (int)ftell(f);
3737 memset(&s, 0, sizeof(s));
3740 fseek(f, n, SEEK_SET);
3745 int stbi_psd_test_memory(stbi_uc const *buffer, int len)
3748 start_mem(&s, buffer, len);
3749 return psd_test(&s);
3752 static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3755 int channelCount, compression;
3756 int channel, i, count, len;
3761 if (get32(s) != 0x38425053) // "8BPS"
3762 return epuc("not PSD", "Corrupt PSD image");
3764 // Check file type version.
3766 return epuc("wrong version", "Unsupported version of PSD image");
3768 // Skip 6 reserved bytes.
3771 // Read the number of channels (R, G, B, A, etc).
3772 channelCount = get16(s);
3773 if (channelCount < 0 || channelCount > 16)
3774 return epuc("wrong channel count", "Unsupported number of channels in PSD image");
3776 // Read the rows and columns of the image.
3780 // Make sure the depth is 8 bits.
3782 return epuc("unsupported bit depth", "PSD bit depth is not 8 bit");
3784 // Make sure the color mode is RGB.
3785 // Valid options are:
3795 return epuc("wrong color format", "PSD is not in RGB color format");
3797 // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
3800 // Skip the image resources. (resolution, pen tool paths, etc)
3803 // Skip the reserved data.
3806 // Find out if the data is compressed.
3808 // 0: no compression
3809 // 1: RLE compressed
3810 compression = get16(s);
3811 if (compression > 1)
3812 return epuc("bad compression", "PSD has an unknown compression format");
3814 // Create the destination image.
3815 out = (stbi_uc *)stb_malloc(4 * w * h);
3817 return epuc("outofmem", "Out of memory");
3820 // Initialize the data to zero.
3821 //memset( out, 0, pixelCount * 4 );
3823 // Finally, the image data.
3826 // RLE as used by .PSD and .TIFF
3827 // Loop until you get the number of unpacked bytes you are expecting:
3828 // Read the next source byte into n.
3829 // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
3830 // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
3831 // Else if n is 128, noop.
3834 // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
3835 // which we're going to just skip.
3836 skip(s, h * channelCount * 2);
3838 // Read the RLE data by channel.
3839 for (channel = 0; channel < 4; channel++)
3844 if (channel >= channelCount)
3846 // Fill this channel with default data.
3847 for (i = 0; i < pixelCount; i++)
3848 *p = (channel == 3 ? 255 : 0), p += 4;
3852 // Read the RLE data.
3854 while (count < pixelCount)
3863 // Copy next len+1 bytes literally.
3876 // Next -len+1 bytes in the dest are replicated from next source byte.
3877 // (Interpret len as a negative 8-bit int.)
3895 // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
3896 // where each channel consists of an 8-bit value for each pixel in the image.
3898 // Read the data by channel.
3899 for (channel = 0; channel < 4; channel++)
3904 if (channel > channelCount)
3906 // Fill this channel with default data.
3907 for (i = 0; i < pixelCount; i++)
3908 *p = channel == 3 ? 255 : 0, p += 4;
3914 for (i = 0; i < pixelCount; i++)
3915 *p = get8(s), p += 4;
3920 if (req_comp && req_comp != 4)
3922 out = convert_format(out, 4, req_comp, w, h);
3924 return out; // convert_format frees input on failure
3928 *comp = channelCount;
3935 #ifndef STBI_NO_STDIO
3936 stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp)
3939 FILE *f = fopen(filename, "rb");
3942 data = stbi_psd_load_from_file(f, x, y, comp, req_comp);
3947 stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
3951 return psd_load(&s, x, y, comp, req_comp);
3955 stbi_uc *stbi_psd_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
3958 start_mem(&s, buffer, len);
3959 return psd_load(&s, x, y, comp, req_comp);
3962 // *************************************************************************************************
3963 // Radiance RGBE HDR loader
3964 // originally by Nicolas Schulz
3966 static int hdr_test(stbi *s)
3968 const char *signature = "#?RADIANCE\n";
3970 for (i = 0; signature[i]; ++i)
3971 if (get8(s) != signature[i])
3976 int stbi_hdr_test_memory(stbi_uc const *buffer, int len)
3979 start_mem(&s, buffer, len);
3980 return hdr_test(&s);
3983 #ifndef STBI_NO_STDIO
3984 int stbi_hdr_test_file(FILE *f)
3987 int r, n = (int)ftell(f);
3988 memset(&s, 0, sizeof(s));
3991 fseek(f, n, SEEK_SET);
3996 #define HDR_BUFLEN 1024
3997 static char *hdr_gettoken(stbi *z, char *buffer)
4005 while (!at_eof(z) && c != '\n')
4008 if (len == HDR_BUFLEN - 1)
4010 // flush to end of line
4011 while (!at_eof(z) && get8(z) != '\n')
4022 static void hdr_convert(float *output, stbi_uc *input, int req_comp)
4028 f1 = (float)ldexp(1.0f, input[3] - (int)(128 + 8));
4030 output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
4033 output[0] = input[0] * f1;
4034 output[1] = input[1] * f1;
4035 output[2] = input[2] * f1;
4047 output[3] = 1; /* fallthrough */
4049 output[0] = output[1] = output[2] = 0;
4052 output[1] = 1; /* fallthrough */
4060 static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp)
4062 char buffer[HDR_BUFLEN];
4069 unsigned char count, value;
4070 int i, j, k, c1, c2, z;
4073 if (strcmp(hdr_gettoken(s, buffer), "#?RADIANCE") != 0)
4074 return epf("not HDR", "Corrupt HDR image");
4079 token = hdr_gettoken(s, buffer);
4082 if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0)
4087 return epf("unsupported format", "Unsupported HDR format");
4089 // Parse width and height
4090 // can't use sscanf() if we're not using stdio!
4091 token = hdr_gettoken(s, buffer);
4092 if (strncmp(token, "-Y ", 3))
4093 return epf("unsupported data layout", "Unsupported HDR format");
4095 height = (int)strtol(token, &token, 10);
4096 while (*token == ' ')
4098 if (strncmp(token, "+X ", 3))
4099 return epf("unsupported data layout", "Unsupported HDR format");
4101 width = (int)strtol(token, NULL, 10);
4111 hdr_data = (float *)stb_malloc(height * width * req_comp * sizeof(float));
4114 // image data is stored as some number of sca
4115 if (width < 8 || width >= 32768)
4118 for (j = 0; j < height; ++j)
4120 for (i = 0; i < width; ++i)
4125 hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
4131 // Read RLE-encoded data
4134 for (j = 0; j < height; ++j)
4139 if (c1 != 2 || c2 != 2 || (len & 0x80))
4141 // not run-length encoded, so we have to actually use THIS data as a decoded
4142 // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
4143 stbi_uc rgbe[4] = {(stbi_uc)c1, (stbi_uc)c2, (stbi_uc)len, (stbi_uc)get8(s) };
4144 hdr_convert(hdr_data, rgbe, req_comp);
4148 goto main_decode_loop; // yes, this is fucking insane; blame the fucking insane format
4156 return epf("invalid decoded scanline length", "corrupt HDR");
4158 if (scanline == NULL)
4159 scanline = (stbi_uc *)stb_malloc(width * 4);
4161 for (k = 0; k < 4; ++k)
4172 for (z = 0; z < count; ++z)
4173 scanline[i++ * 4 + k] = value;
4178 for (z = 0; z < count; ++z)
4179 scanline[i++ * 4 + k] = get8(s);
4183 for (i = 0; i < width; ++i)
4184 hdr_convert(hdr_data + (j * width + i) * req_comp, scanline + i * 4, req_comp);
4192 #ifndef STBI_NO_STDIO
4193 float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
4197 return hdr_load(&s, x, y, comp, req_comp);
4201 float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
4204 start_mem(&s, buffer, len);
4205 return hdr_load(&s, x, y, comp, req_comp);
4208 #endif // STBI_NO_HDR
4210 /////////////////////// write image ///////////////////////
4212 #ifndef STBI_NO_WRITE
4214 static void write8(FILE *f, int x)
4217 fwrite(&z, 1, 1, f);
4220 static void writefv(FILE *f, char *fmt, va_list v)
4230 uint8 x = va_arg(v, int);
4236 int16 x = va_arg(v, int);
4243 int32 x = va_arg(v, int);
4258 static void writef(FILE *f, char *fmt, ...)
4266 static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, int comp, const void *data, int write_alpha, int scanline_pad)
4268 uint8 bg[3] = { 255, 0, 255 }, px[3];
4273 j_end = -1, j = y - 1;
4277 for (; j != j_end; j += vdir)
4279 for (i = 0; i < x; ++i)
4281 uint8 *d = (uint8 *)data + (j * x + i) * comp;
4282 if (write_alpha < 0)
4283 fwrite(&d[comp - 1], 1, 1, f);
4288 writef(f, (char *)"111", d[0], d[0], d[0]);
4293 for (k = 0; k < 3; ++k)
4294 px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255;
4295 writef(f, (char *)"111", px[1 - rgb_dir], px[1], px[1 + rgb_dir]);
4300 writef(f, (char *)"111", d[1 - rgb_dir], d[1], d[1 + rgb_dir]);
4303 if (write_alpha > 0)
4304 fwrite(&d[comp - 1], 1, 1, f);
4306 fwrite(&zero, scanline_pad, 1, f);
4310 static int outfile(char const *filename, int rgb_dir, int vdir, int x, int y, int comp, const void *data, int alpha, int pad, char *fmt, ...)
4312 FILE *f = fopen(filename, "wb");
4319 write_pixels(f, rgb_dir, vdir, x, y, comp, data, alpha, pad);
4326 static int outfile_w(wchar_t const *filename, int rgb_dir, int vdir, int x, int y, int comp, const void *data, int alpha, int pad, char *fmt, ...)
4328 FILE *f = _wfopen(filename, L"wb");
4335 write_pixels(f, rgb_dir, vdir, x, y, comp, data, alpha, pad);
4342 int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data)
4344 int pad = (-x * 3) & 3;
4345 return outfile(filename, -1, -1, x, y, comp, data, 0, pad,
4348 'B', 'M', 14 + 40 + (x * 3 + pad) * y, 0, 0, 14 + 40, // file header
4349 40, x, y, 1, 24, 0, 0, 0, 0, 0, 0); // bitmap header
4353 int stbi_write_bmp_w(wchar_t const *filename, int x, int y, int comp, const void *data)
4355 int pad = (-x * 3) & 3;
4356 return outfile_w(filename, -1, -1, x, y, comp, data, 0, pad,
4359 'B', 'M', 14 + 40 + (x * 3 + pad) * y, 0, 0, 14 + 40, // file header
4360 40, x, y, 1, 24, 0, 0, 0, 0, 0, 0); // bitmap header
4364 int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data)
4366 int has_alpha = !(comp & 1);
4367 return outfile(filename, -1, -1, x, y, comp, data, has_alpha, 0,
4368 (char *)"111 221 2222 11", 0, 0, 2, 0, 0, 0, 0, 0, x, y, 24 + 8 * has_alpha, 8 * has_alpha);
4372 int stbi_write_tga_w(wchar_t const *filename, int x, int y, int comp, const void *data)
4374 int has_alpha = !(comp & 1);
4375 return outfile_w(filename, -1, -1, x, y, comp, data, has_alpha, 0,
4376 (char *)"111 221 2222 11", 0, 0, 2, 0, 0, 0, 0, 0, x, y, 24 + 8 * has_alpha, 8 * has_alpha);
4380 // any other image formats that do interleaved rgb data?
4381 // PNG: requires adler32,crc32 -- significant amount of code
4382 // PSD: no, channels output separately
4383 // TIFF: no, stripwise-interleaved... i think
4385 #endif // STBI_NO_WRITE