1 /**************************************************************************
3 * Copyright 2013-2014 RAD Game Tools and Valve Software
4 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 **************************************************************************/
27 // jpge.cpp - C++ class for JPEG compression.
28 // Public domain, Rich Geldreich <richgel99@gmail.com>
29 // v1.01, Dec. 18, 2010 - Initial release
30 // v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.)
31 // v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc.
32 // Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03).
33 // v1.04, May. 19, 2012: Forgot to set m_pFile ptr to NULL in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug.
34 // Code tweaks to fix VS2008 static code analysis warnings (all looked harmless).
35 // Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02.
37 #include "vogl_jpge.h"
43 #define JPGE_MAX(a, b) (((a) > (b)) ? (a) : (b))
44 #define JPGE_MIN(a, b) (((a) < (b)) ? (a) : (b))
49 static inline void *jpge_malloc(size_t nSize)
51 return vogl_malloc(nSize);
53 static inline void jpge_free(void *p)
58 // Various JPEG enums and tables.
74 AC_CHROMA_CODES = 256,
75 MAX_HUFF_SYMBOLS = 257,
76 MAX_HUFF_CODESIZE = 32
79 static uint8 s_zag[64] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 };
80 static int16 s_std_lum_quant[64] = { 16, 11, 12, 14, 12, 10, 16, 14, 13, 14, 18, 17, 16, 19, 24, 40, 26, 24, 22, 22, 24, 49, 35, 37, 29, 40, 58, 51, 61, 60, 57, 51, 56, 55, 64, 72, 92, 78, 64, 68, 87, 69, 55, 56, 80, 109, 81, 87, 95, 98, 103, 104, 103, 62, 77, 113, 121, 112, 100, 120, 92, 101, 103, 99 };
81 static int16 s_std_croma_quant[64] = { 17, 18, 18, 24, 21, 24, 47, 26, 26, 47, 99, 66, 56, 66, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99 };
82 static uint8 s_dc_lum_bits[17] = { 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
83 static uint8 s_dc_lum_val[DC_LUM_CODES] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
84 static uint8 s_ac_lum_bits[17] = { 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
85 static uint8 s_ac_lum_val[AC_LUM_CODES] =
87 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
88 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
89 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
90 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
91 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
94 static uint8 s_dc_chroma_bits[17] = { 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
95 static uint8 s_dc_chroma_val[DC_CHROMA_CODES] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
96 static uint8 s_ac_chroma_bits[17] = { 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
97 static uint8 s_ac_chroma_val[AC_CHROMA_CODES] =
99 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
100 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
101 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
102 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
103 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
107 // Low-level helper functions.
109 inline void clear_obj(T &obj)
111 memset(&obj, 0, sizeof(obj));
114 const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329;
115 static inline uint8 clamp(int i)
117 if (static_cast<uint>(i) > 255U)
124 return static_cast<uint8>(i);
127 static void RGB_to_YCC(uint8 *pDst, const uint8 *pSrc, int num_pixels)
129 for (; num_pixels; pDst += 3, pSrc += 3, num_pixels--)
131 const int r = pSrc[0], g = pSrc[1], b = pSrc[2];
132 pDst[0] = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
133 pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16));
134 pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16));
138 static void RGB_to_Y(uint8 *pDst, const uint8 *pSrc, int num_pixels)
140 for (; num_pixels; pDst++, pSrc += 3, num_pixels--)
141 pDst[0] = static_cast<uint8>((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16);
144 static void RGBA_to_YCC(uint8 *pDst, const uint8 *pSrc, int num_pixels)
146 for (; num_pixels; pDst += 3, pSrc += 4, num_pixels--)
148 const int r = pSrc[0], g = pSrc[1], b = pSrc[2];
149 pDst[0] = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
150 pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16));
151 pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16));
155 static void RGBA_to_Y(uint8 *pDst, const uint8 *pSrc, int num_pixels)
157 for (; num_pixels; pDst++, pSrc += 4, num_pixels--)
158 pDst[0] = static_cast<uint8>((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16);
161 static void Y_to_YCC(uint8 *pDst, const uint8 *pSrc, int num_pixels)
163 for (; num_pixels; pDst += 3, pSrc++, num_pixels--)
171 // Forward DCT - DCT derived from jfdctint.
177 #define DCT_DESCALE(x, n) (((x) + (((int32)1) << ((n) - 1))) >> (n))
178 #define DCT_MUL(var, c) (static_cast<int16>(var) * static_cast<int32>(c))
179 #define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7) \
180 int32 t0 = s0 + s7, t7 = s0 - s7, t1 = s1 + s6, t6 = s1 - s6, t2 = s2 + s5, t5 = s2 - s5, t3 = s3 + s4, t4 = s3 - s4; \
181 int32 t10 = t0 + t3, t13 = t0 - t3, t11 = t1 + t2, t12 = t1 - t2; \
182 int32 u1 = DCT_MUL(t12 + t13, 4433); \
183 s2 = u1 + DCT_MUL(t13, 6270); \
184 s6 = u1 + DCT_MUL(t12, -15137); \
186 int32 u2 = t5 + t6, u3 = t4 + t6, u4 = t5 + t7; \
187 int32 z5 = DCT_MUL(u3 + u4, 9633); \
188 t4 = DCT_MUL(t4, 2446); \
189 t5 = DCT_MUL(t5, 16819); \
190 t6 = DCT_MUL(t6, 25172); \
191 t7 = DCT_MUL(t7, 12299); \
192 u1 = DCT_MUL(u1, -7373); \
193 u2 = DCT_MUL(u2, -20995); \
194 u3 = DCT_MUL(u3, -16069); \
195 u4 = DCT_MUL(u4, -3196); \
205 static void DCT2D(int32 *p)
208 for (c = 7; c >= 0; c--, q += 8)
210 int32 s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7];
211 DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
212 q[0] = s0 << ROW_BITS;
213 q[1] = DCT_DESCALE(s1, CONST_BITS - ROW_BITS);
214 q[2] = DCT_DESCALE(s2, CONST_BITS - ROW_BITS);
215 q[3] = DCT_DESCALE(s3, CONST_BITS - ROW_BITS);
216 q[4] = s4 << ROW_BITS;
217 q[5] = DCT_DESCALE(s5, CONST_BITS - ROW_BITS);
218 q[6] = DCT_DESCALE(s6, CONST_BITS - ROW_BITS);
219 q[7] = DCT_DESCALE(s7, CONST_BITS - ROW_BITS);
221 for (q = p, c = 7; c >= 0; c--, q++)
223 int32 s0 = q[0 * 8], s1 = q[1 * 8], s2 = q[2 * 8], s3 = q[3 * 8], s4 = q[4 * 8], s5 = q[5 * 8], s6 = q[6 * 8], s7 = q[7 * 8];
224 DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
225 q[0 * 8] = DCT_DESCALE(s0, ROW_BITS + 3);
226 q[1 * 8] = DCT_DESCALE(s1, CONST_BITS + ROW_BITS + 3);
227 q[2 * 8] = DCT_DESCALE(s2, CONST_BITS + ROW_BITS + 3);
228 q[3 * 8] = DCT_DESCALE(s3, CONST_BITS + ROW_BITS + 3);
229 q[4 * 8] = DCT_DESCALE(s4, ROW_BITS + 3);
230 q[5 * 8] = DCT_DESCALE(s5, CONST_BITS + ROW_BITS + 3);
231 q[6 * 8] = DCT_DESCALE(s6, CONST_BITS + ROW_BITS + 3);
232 q[7 * 8] = DCT_DESCALE(s7, CONST_BITS + ROW_BITS + 3);
238 uint m_key, m_sym_index;
241 // Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values.
242 static inline sym_freq *radix_sort_syms(uint num_syms, sym_freq *pSyms0, sym_freq *pSyms1)
244 const uint cMaxPasses = 4;
245 uint32 hist[256 * cMaxPasses];
247 for (uint i = 0; i < num_syms; i++)
249 uint freq = pSyms0[i].m_key;
251 hist[256 + ((freq >> 8) & 0xFF)]++;
252 hist[256 * 2 + ((freq >> 16) & 0xFF)]++;
253 hist[256 * 3 + ((freq >> 24) & 0xFF)]++;
255 sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1;
256 uint total_passes = cMaxPasses;
257 while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256]))
259 for (uint pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
261 const uint32 *pHist = &hist[pass << 8];
262 uint offsets[256], cur_ofs = 0;
263 for (uint i = 0; i < 256; i++)
265 offsets[i] = cur_ofs;
268 for (uint i = 0; i < num_syms; i++)
269 pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
270 sym_freq *t = pCur_syms;
271 pCur_syms = pNew_syms;
277 // calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
278 static void calculate_minimum_redundancy(sym_freq *A, int n)
280 int root, leaf, next, avbl, used, dpth;
288 A[0].m_key += A[1].m_key;
291 for (next = 1; next < n - 1; next++)
293 if (leaf >= n || A[root].m_key < A[leaf].m_key)
295 A[next].m_key = A[root].m_key;
296 A[root++].m_key = next;
299 A[next].m_key = A[leaf++].m_key;
300 if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key))
302 A[next].m_key += A[root].m_key;
303 A[root++].m_key = next;
306 A[next].m_key += A[leaf++].m_key;
309 for (next = n - 3; next >= 0; next--)
310 A[next].m_key = A[A[next].m_key].m_key + 1;
317 while (root >= 0 && (int)A[root].m_key == dpth)
324 A[next--].m_key = dpth;
333 // Limits canonical Huffman code table's max code size to max_code_size.
334 static void huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)
336 if (code_list_len <= 1)
339 for (int i = max_code_size + 1; i <= MAX_HUFF_CODESIZE; i++)
340 pNum_codes[max_code_size] += pNum_codes[i];
343 for (int i = max_code_size; i > 0; i--)
344 total += (((uint32)pNum_codes[i]) << (max_code_size - i));
346 while (total != (1UL << max_code_size))
348 pNum_codes[max_code_size]--;
349 for (int i = max_code_size - 1; i > 0; i--)
354 pNum_codes[i + 1] += 2;
362 // Generates an optimized offman table.
363 void jpeg_encoder::optimize_huffman_table(int table_num, int table_len)
365 sym_freq syms0[MAX_HUFF_SYMBOLS], syms1[MAX_HUFF_SYMBOLS];
367 syms0[0].m_sym_index = 0; // dummy symbol, assures that no valid code contains all 1's
368 int num_used_syms = 1;
369 const uint32 *pSym_count = &m_huff_count[table_num][0];
370 for (int i = 0; i < table_len; i++)
373 syms0[num_used_syms].m_key = pSym_count[i];
374 syms0[num_used_syms++].m_sym_index = i + 1;
376 sym_freq *pSyms = radix_sort_syms(num_used_syms, syms0, syms1);
377 calculate_minimum_redundancy(pSyms, num_used_syms);
379 // Count the # of symbols of each code size.
380 int num_codes[1 + MAX_HUFF_CODESIZE];
381 clear_obj(num_codes);
382 for (int i = 0; i < num_used_syms; i++)
383 num_codes[pSyms[i].m_key]++;
385 const uint JPGE_CODE_SIZE_LIMIT = 16; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol)
386 huffman_enforce_max_code_size(num_codes, num_used_syms, JPGE_CODE_SIZE_LIMIT);
388 // Compute m_huff_bits array, which contains the # of symbols per code size.
389 clear_obj(m_huff_bits[table_num]);
390 for (int i = 1; i <= (int)JPGE_CODE_SIZE_LIMIT; i++)
391 m_huff_bits[table_num][i] = static_cast<uint8>(num_codes[i]);
393 // Remove the dummy symbol added above, which must be in largest bucket.
394 for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--)
396 if (m_huff_bits[table_num][i])
398 m_huff_bits[table_num][i]--;
403 // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest).
404 for (int i = num_used_syms - 1; i >= 1; i--)
405 m_huff_val[table_num][num_used_syms - 1 - i] = static_cast<uint8>(pSyms[i].m_sym_index - 1);
408 // JPEG marker generation.
409 void jpeg_encoder::emit_byte(uint8 i)
411 m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_obj(i);
414 void jpeg_encoder::emit_word(uint i)
416 emit_byte(uint8(i >> 8));
417 emit_byte(uint8(i & 0xFF));
420 void jpeg_encoder::emit_marker(int marker)
422 emit_byte(uint8(0xFF));
423 emit_byte(uint8(marker));
427 void jpeg_encoder::emit_jfif_app0()
430 emit_word(2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1);
434 emit_byte(0x46); /* Identifier: ASCII "JFIF" */
436 emit_byte(1); /* Major version */
437 emit_byte(1); /* Minor version */
438 emit_byte(0); /* Density unit */
441 emit_byte(0); /* No thumbnail image */
445 // Emit quantization tables
446 void jpeg_encoder::emit_dqt()
448 for (int i = 0; i < ((m_num_components == 3) ? 2 : 1); i++)
451 emit_word(64 + 1 + 2);
452 emit_byte(static_cast<uint8>(i));
453 for (int j = 0; j < 64; j++)
454 emit_byte(static_cast<uint8>(m_quantization_tables[i][j]));
458 // Emit start of frame marker
459 void jpeg_encoder::emit_sof()
461 emit_marker(M_SOF0); /* baseline */
462 emit_word(3 * m_num_components + 2 + 5 + 1);
463 emit_byte(8); /* precision */
464 emit_word(m_image_y);
465 emit_word(m_image_x);
466 emit_byte(m_num_components);
467 for (int i = 0; i < m_num_components; i++)
469 emit_byte(static_cast<uint8>(i + 1)); /* component ID */
470 emit_byte((m_comp_h_samp[i] << 4) + m_comp_v_samp[i]); /* h and v sampling */
471 emit_byte(i > 0); /* quant. table num */
475 // Emit Huffman table.
476 void jpeg_encoder::emit_dht(uint8 *bits, uint8 *val, int index, bool ac_flag)
481 for (int i = 1; i <= 16; i++)
484 emit_word(length + 2 + 1 + 16);
485 emit_byte(static_cast<uint8>(index + (ac_flag << 4)));
487 for (int i = 1; i <= 16; i++)
490 for (int i = 0; i < length; i++)
494 // Emit all Huffman tables.
495 void jpeg_encoder::emit_dhts()
497 emit_dht(m_huff_bits[0 + 0], m_huff_val[0 + 0], 0, false);
498 emit_dht(m_huff_bits[2 + 0], m_huff_val[2 + 0], 0, true);
499 if (m_num_components == 3)
501 emit_dht(m_huff_bits[0 + 1], m_huff_val[0 + 1], 1, false);
502 emit_dht(m_huff_bits[2 + 1], m_huff_val[2 + 1], 1, true);
506 // emit start of scan
507 void jpeg_encoder::emit_sos()
510 emit_word(2 * m_num_components + 2 + 1 + 3);
511 emit_byte(m_num_components);
512 for (int i = 0; i < m_num_components; i++)
514 emit_byte(static_cast<uint8>(i + 1));
516 emit_byte((0 << 4) + 0);
518 emit_byte((1 << 4) + 1);
520 emit_byte(0); /* spectral selection */
525 // Emit all markers at beginning of image file.
526 void jpeg_encoder::emit_markers()
536 // Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays.
537 void jpeg_encoder::compute_huffman_table(uint *codes, uint8 *code_sizes, uint8 *bits, uint8 *val)
539 int i, l, last_p, si;
540 uint8 huff_size[257];
545 for (l = 1; l <= 16; l++)
546 for (i = 1; i <= bits[l]; i++)
547 huff_size[p++] = (char)l;
550 last_p = p; // write sentinel
558 while (huff_size[p] == si)
559 huff_code[p++] = code++;
564 memset(codes, 0, sizeof(codes[0]) * 256);
565 memset(code_sizes, 0, sizeof(code_sizes[0]) * 256);
566 for (p = 0; p < last_p; p++)
568 codes[val[p]] = huff_code[p];
569 code_sizes[val[p]] = huff_size[p];
573 // Quantization table generation.
574 void jpeg_encoder::compute_quant_table(int32 *pDst, int16 *pSrc)
577 if (m_params.m_quality < 50)
578 q = 5000 / m_params.m_quality;
580 q = 200 - m_params.m_quality * 2;
581 for (int i = 0; i < 64; i++)
584 j = (j * q + 50L) / 100L;
585 *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255);
589 // Higher-level methods.
590 void jpeg_encoder::first_pass_init()
594 memset(m_last_dc_val, 0, 3 * sizeof(m_last_dc_val[0]));
599 bool jpeg_encoder::second_pass_init()
601 compute_huffman_table(&m_huff_codes[0 + 0][0], &m_huff_code_sizes[0 + 0][0], m_huff_bits[0 + 0], m_huff_val[0 + 0]);
602 compute_huffman_table(&m_huff_codes[2 + 0][0], &m_huff_code_sizes[2 + 0][0], m_huff_bits[2 + 0], m_huff_val[2 + 0]);
603 if (m_num_components > 1)
605 compute_huffman_table(&m_huff_codes[0 + 1][0], &m_huff_code_sizes[0 + 1][0], m_huff_bits[0 + 1], m_huff_val[0 + 1]);
606 compute_huffman_table(&m_huff_codes[2 + 1][0], &m_huff_code_sizes[2 + 1][0], m_huff_bits[2 + 1], m_huff_val[2 + 1]);
614 bool jpeg_encoder::jpg_open(int p_x_res, int p_y_res, int src_channels)
616 m_num_components = 3;
617 switch (m_params.m_subsampling)
621 m_num_components = 1;
622 m_comp_h_samp[0] = 1;
623 m_comp_v_samp[0] = 1;
630 m_comp_h_samp[0] = 1;
631 m_comp_v_samp[0] = 1;
632 m_comp_h_samp[1] = 1;
633 m_comp_v_samp[1] = 1;
634 m_comp_h_samp[2] = 1;
635 m_comp_v_samp[2] = 1;
642 m_comp_h_samp[0] = 2;
643 m_comp_v_samp[0] = 1;
644 m_comp_h_samp[1] = 1;
645 m_comp_v_samp[1] = 1;
646 m_comp_h_samp[2] = 1;
647 m_comp_v_samp[2] = 1;
654 m_comp_h_samp[0] = 2;
655 m_comp_v_samp[0] = 2;
656 m_comp_h_samp[1] = 1;
657 m_comp_v_samp[1] = 1;
658 m_comp_h_samp[2] = 1;
659 m_comp_v_samp[2] = 1;
667 m_image_bpp = src_channels;
668 m_image_bpl = m_image_x * src_channels;
669 m_image_x_mcu = (m_image_x + m_mcu_x - 1) & (~(m_mcu_x - 1));
670 m_image_y_mcu = (m_image_y + m_mcu_y - 1) & (~(m_mcu_y - 1));
671 m_image_bpl_xlt = m_image_x * m_num_components;
672 m_image_bpl_mcu = m_image_x_mcu * m_num_components;
673 m_mcus_per_row = m_image_x_mcu / m_mcu_x;
675 if ((m_mcu_lines[0] = static_cast<uint8 *>(jpge_malloc(m_image_bpl_mcu * m_mcu_y))) == NULL)
677 for (int i = 1; i < m_mcu_y; i++)
678 m_mcu_lines[i] = m_mcu_lines[i - 1] + m_image_bpl_mcu;
680 compute_quant_table(m_quantization_tables[0], s_std_lum_quant);
681 compute_quant_table(m_quantization_tables[1], m_params.m_no_chroma_discrim_flag ? s_std_lum_quant : s_std_croma_quant);
683 m_out_buf_left = JPGE_OUT_BUF_SIZE;
684 m_pOut_buf = m_out_buf;
686 if (m_params.m_two_pass_flag)
688 clear_obj(m_huff_count);
693 memcpy(m_huff_bits[0 + 0], s_dc_lum_bits, 17);
694 memcpy(m_huff_val[0 + 0], s_dc_lum_val, DC_LUM_CODES);
695 memcpy(m_huff_bits[2 + 0], s_ac_lum_bits, 17);
696 memcpy(m_huff_val[2 + 0], s_ac_lum_val, AC_LUM_CODES);
697 memcpy(m_huff_bits[0 + 1], s_dc_chroma_bits, 17);
698 memcpy(m_huff_val[0 + 1], s_dc_chroma_val, DC_CHROMA_CODES);
699 memcpy(m_huff_bits[2 + 1], s_ac_chroma_bits, 17);
700 memcpy(m_huff_val[2 + 1], s_ac_chroma_val, AC_CHROMA_CODES);
701 if (!second_pass_init())
702 return false; // in effect, skip over the first pass
704 return m_all_stream_writes_succeeded;
707 void jpeg_encoder::load_block_8_8_grey(int x)
710 sample_array_t *pDst = m_sample_array;
712 for (int i = 0; i < 8; i++, pDst += 8)
714 pSrc = m_mcu_lines[i] + x;
715 pDst[0] = pSrc[0] - 128;
716 pDst[1] = pSrc[1] - 128;
717 pDst[2] = pSrc[2] - 128;
718 pDst[3] = pSrc[3] - 128;
719 pDst[4] = pSrc[4] - 128;
720 pDst[5] = pSrc[5] - 128;
721 pDst[6] = pSrc[6] - 128;
722 pDst[7] = pSrc[7] - 128;
726 void jpeg_encoder::load_block_8_8(int x, int y, int c)
729 sample_array_t *pDst = m_sample_array;
730 x = (x * (8 * 3)) + c;
732 for (int i = 0; i < 8; i++, pDst += 8)
734 pSrc = m_mcu_lines[y + i] + x;
735 pDst[0] = pSrc[0 * 3] - 128;
736 pDst[1] = pSrc[1 * 3] - 128;
737 pDst[2] = pSrc[2 * 3] - 128;
738 pDst[3] = pSrc[3 * 3] - 128;
739 pDst[4] = pSrc[4 * 3] - 128;
740 pDst[5] = pSrc[5 * 3] - 128;
741 pDst[6] = pSrc[6 * 3] - 128;
742 pDst[7] = pSrc[7 * 3] - 128;
746 void jpeg_encoder::load_block_16_8(int x, int c)
748 uint8 *pSrc1, *pSrc2;
749 sample_array_t *pDst = m_sample_array;
750 x = (x * (16 * 3)) + c;
752 for (int i = 0; i < 16; i += 2, pDst += 8)
754 pSrc1 = m_mcu_lines[i + 0] + x;
755 pSrc2 = m_mcu_lines[i + 1] + x;
756 pDst[0] = ((pSrc1[0 * 3] + pSrc1[1 * 3] + pSrc2[0 * 3] + pSrc2[1 * 3] + a) >> 2) - 128;
757 pDst[1] = ((pSrc1[2 * 3] + pSrc1[3 * 3] + pSrc2[2 * 3] + pSrc2[3 * 3] + b) >> 2) - 128;
758 pDst[2] = ((pSrc1[4 * 3] + pSrc1[5 * 3] + pSrc2[4 * 3] + pSrc2[5 * 3] + a) >> 2) - 128;
759 pDst[3] = ((pSrc1[6 * 3] + pSrc1[7 * 3] + pSrc2[6 * 3] + pSrc2[7 * 3] + b) >> 2) - 128;
760 pDst[4] = ((pSrc1[8 * 3] + pSrc1[9 * 3] + pSrc2[8 * 3] + pSrc2[9 * 3] + a) >> 2) - 128;
761 pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3] + pSrc2[10 * 3] + pSrc2[11 * 3] + b) >> 2) - 128;
762 pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3] + pSrc2[12 * 3] + pSrc2[13 * 3] + a) >> 2) - 128;
763 pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3] + pSrc2[14 * 3] + pSrc2[15 * 3] + b) >> 2) - 128;
770 void jpeg_encoder::load_block_16_8_8(int x, int c)
773 sample_array_t *pDst = m_sample_array;
774 x = (x * (16 * 3)) + c;
775 for (int i = 0; i < 8; i++, pDst += 8)
777 pSrc1 = m_mcu_lines[i + 0] + x;
778 pDst[0] = ((pSrc1[0 * 3] + pSrc1[1 * 3]) >> 1) - 128;
779 pDst[1] = ((pSrc1[2 * 3] + pSrc1[3 * 3]) >> 1) - 128;
780 pDst[2] = ((pSrc1[4 * 3] + pSrc1[5 * 3]) >> 1) - 128;
781 pDst[3] = ((pSrc1[6 * 3] + pSrc1[7 * 3]) >> 1) - 128;
782 pDst[4] = ((pSrc1[8 * 3] + pSrc1[9 * 3]) >> 1) - 128;
783 pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3]) >> 1) - 128;
784 pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3]) >> 1) - 128;
785 pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3]) >> 1) - 128;
789 void jpeg_encoder::load_quantized_coefficients(int component_num)
791 int32 *q = m_quantization_tables[component_num > 0];
792 int16 *pDst = m_coefficient_array;
793 for (int i = 0; i < 64; i++)
795 sample_array_t j = m_sample_array[s_zag[i]];
798 if ((j = -j + (*q >> 1)) < *q)
801 *pDst++ = static_cast<int16>(-(j / *q));
805 if ((j = j + (*q >> 1)) < *q)
808 *pDst++ = static_cast<int16>((j / *q));
814 void jpeg_encoder::flush_output_buffer()
816 if (m_out_buf_left != JPGE_OUT_BUF_SIZE)
817 m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_buf(m_out_buf, JPGE_OUT_BUF_SIZE - m_out_buf_left);
818 m_pOut_buf = m_out_buf;
819 m_out_buf_left = JPGE_OUT_BUF_SIZE;
822 void jpeg_encoder::put_bits(uint bits, uint len)
824 m_bit_buffer |= ((uint32)bits << (24 - (m_bits_in += len)));
825 while (m_bits_in >= 8)
828 #define JPGE_PUT_BYTE(c) \
830 *m_pOut_buf++ = (c); \
831 if (--m_out_buf_left == 0) \
832 flush_output_buffer(); \
834 JPGE_PUT_BYTE(c = (uint8)((m_bit_buffer >> 16) & 0xFF));
842 void jpeg_encoder::code_coefficients_pass_one(int component_num)
844 if (component_num >= 3)
845 return; // just to shut up static analysis
846 int i, run_len, nbits, temp1;
847 int16 *src = m_coefficient_array;
848 uint32 *dc_count = component_num ? m_huff_count[0 + 1] : m_huff_count[0 + 0], *ac_count = component_num ? m_huff_count[2 + 1] : m_huff_count[2 + 0];
850 temp1 = src[0] - m_last_dc_val[component_num];
851 m_last_dc_val[component_num] = src[0];
863 for (run_len = 0, i = 1; i < 64; i++)
865 if ((temp1 = m_coefficient_array[i]) == 0)
869 while (run_len >= 16)
879 ac_count[(run_len << 4) + nbits]++;
887 void jpeg_encoder::code_coefficients_pass_two(int component_num)
889 int i, j, run_len, nbits, temp1, temp2;
890 int16 *pSrc = m_coefficient_array;
892 uint8 *code_sizes[2];
894 if (component_num == 0)
896 codes[0] = m_huff_codes[0 + 0];
897 codes[1] = m_huff_codes[2 + 0];
898 code_sizes[0] = m_huff_code_sizes[0 + 0];
899 code_sizes[1] = m_huff_code_sizes[2 + 0];
903 codes[0] = m_huff_codes[0 + 1];
904 codes[1] = m_huff_codes[2 + 1];
905 code_sizes[0] = m_huff_code_sizes[0 + 1];
906 code_sizes[1] = m_huff_code_sizes[2 + 1];
909 temp1 = temp2 = pSrc[0] - m_last_dc_val[component_num];
910 m_last_dc_val[component_num] = pSrc[0];
925 put_bits(codes[0][nbits], code_sizes[0][nbits]);
927 put_bits(temp2 & ((1 << nbits) - 1), nbits);
929 for (run_len = 0, i = 1; i < 64; i++)
931 if ((temp1 = m_coefficient_array[i]) == 0)
935 while (run_len >= 16)
937 put_bits(codes[1][0xF0], code_sizes[1][0xF0]);
940 if ((temp2 = temp1) < 0)
948 j = (run_len << 4) + nbits;
949 put_bits(codes[1][j], code_sizes[1][j]);
950 put_bits(temp2 & ((1 << nbits) - 1), nbits);
955 put_bits(codes[1][0], code_sizes[1][0]);
958 void jpeg_encoder::code_block(int component_num)
960 DCT2D(m_sample_array);
961 load_quantized_coefficients(component_num);
963 code_coefficients_pass_one(component_num);
965 code_coefficients_pass_two(component_num);
968 void jpeg_encoder::process_mcu_row()
970 if (m_num_components == 1)
972 for (int i = 0; i < m_mcus_per_row; i++)
974 load_block_8_8_grey(i);
978 else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
980 for (int i = 0; i < m_mcus_per_row; i++)
982 load_block_8_8(i, 0, 0);
984 load_block_8_8(i, 0, 1);
986 load_block_8_8(i, 0, 2);
990 else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
992 for (int i = 0; i < m_mcus_per_row; i++)
994 load_block_8_8(i * 2 + 0, 0, 0);
996 load_block_8_8(i * 2 + 1, 0, 0);
998 load_block_16_8_8(i, 1);
1000 load_block_16_8_8(i, 2);
1004 else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
1006 for (int i = 0; i < m_mcus_per_row; i++)
1008 load_block_8_8(i * 2 + 0, 0, 0);
1010 load_block_8_8(i * 2 + 1, 0, 0);
1012 load_block_8_8(i * 2 + 0, 1, 0);
1014 load_block_8_8(i * 2 + 1, 1, 0);
1016 load_block_16_8(i, 1);
1018 load_block_16_8(i, 2);
1024 bool jpeg_encoder::terminate_pass_one()
1026 optimize_huffman_table(0 + 0, DC_LUM_CODES);
1027 optimize_huffman_table(2 + 0, AC_LUM_CODES);
1028 if (m_num_components > 1)
1030 optimize_huffman_table(0 + 1, DC_CHROMA_CODES);
1031 optimize_huffman_table(2 + 1, AC_CHROMA_CODES);
1033 return second_pass_init();
1036 bool jpeg_encoder::terminate_pass_two()
1039 flush_output_buffer();
1041 m_pass_num++; // purposely bump up m_pass_num, for debugging
1045 bool jpeg_encoder::process_end_of_image()
1049 if (m_mcu_y_ofs < 16) // check here just to shut up static analysis
1051 for (int i = m_mcu_y_ofs; i < m_mcu_y; i++)
1052 memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs - 1], m_image_bpl_mcu);
1058 if (m_pass_num == 1)
1059 return terminate_pass_one();
1061 return terminate_pass_two();
1064 void jpeg_encoder::load_mcu(const void *pSrc)
1066 const uint8 *Psrc = reinterpret_cast<const uint8 *>(pSrc);
1068 uint8 *pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst
1070 if (m_num_components == 1)
1072 if (m_image_bpp == 4)
1073 RGBA_to_Y(pDst, Psrc, m_image_x);
1074 else if (m_image_bpp == 3)
1075 RGB_to_Y(pDst, Psrc, m_image_x);
1077 memcpy(pDst, Psrc, m_image_x);
1081 if (m_image_bpp == 4)
1082 RGBA_to_YCC(pDst, Psrc, m_image_x);
1083 else if (m_image_bpp == 3)
1084 RGB_to_YCC(pDst, Psrc, m_image_x);
1086 Y_to_YCC(pDst, Psrc, m_image_x);
1089 // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16
1090 if (m_num_components == 1)
1091 memset(m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt, pDst[m_image_bpl_xlt - 1], m_image_x_mcu - m_image_x);
1094 const uint8 y = pDst[m_image_bpl_xlt - 3 + 0], cb = pDst[m_image_bpl_xlt - 3 + 1], cr = pDst[m_image_bpl_xlt - 3 + 2];
1095 uint8 *q = m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt;
1096 for (int i = m_image_x; i < m_image_x_mcu; i++)
1104 if (++m_mcu_y_ofs == m_mcu_y)
1111 void jpeg_encoder::clear()
1113 m_mcu_lines[0] = NULL;
1115 m_all_stream_writes_succeeded = true;
1118 jpeg_encoder::jpeg_encoder()
1123 jpeg_encoder::~jpeg_encoder()
1128 bool jpeg_encoder::init(output_stream *pStream, int width, int height, int src_channels, const params &comp_params)
1131 if (((!pStream) || (width < 1) || (height < 1)) || ((src_channels != 1) && (src_channels != 3) && (src_channels != 4)) || (!comp_params.check()))
1133 m_pStream = pStream;
1134 m_params = comp_params;
1135 return jpg_open(width, height, src_channels);
1138 void jpeg_encoder::deinit()
1140 jpge_free(m_mcu_lines[0]);
1144 bool jpeg_encoder::process_scanline(const void *pScanline)
1146 if ((m_pass_num < 1) || (m_pass_num > 2))
1148 if (m_all_stream_writes_succeeded)
1152 if (!process_end_of_image())
1157 load_mcu(pScanline);
1160 return m_all_stream_writes_succeeded;
1163 // Higher level wrappers/examples (optional).
1166 class cfile_stream : public output_stream
1168 cfile_stream(const cfile_stream &);
1169 cfile_stream &operator=(const cfile_stream &);
1176 : m_pFile(NULL), m_bStatus(false)
1180 virtual ~cfile_stream()
1185 bool open(const char *pFilename)
1189 fopen_s(&m_pFile, pFilename, "wb");
1191 m_pFile = fopen(pFilename, "wb");
1193 m_bStatus = (m_pFile != NULL);
1201 if (fclose(m_pFile) == EOF)
1210 virtual bool put_buf(const void *pBuf, int len)
1212 m_bStatus = m_bStatus && (fwrite(pBuf, len, 1, m_pFile) == 1);
1216 unsigned long int get_size() const
1218 return m_pFile ? ftell(m_pFile) : 0;
1222 // Writes JPEG image to file.
1223 bool compress_image_to_jpeg_file(const char *pFilename, int width, int height, int num_channels, const uint8 *pImage_data, const params &comp_params)
1225 cfile_stream dst_stream;
1226 if (!dst_stream.open(pFilename))
1229 jpge::jpeg_encoder dst_image;
1230 if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params))
1233 for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++)
1235 for (int i = 0; i < height; i++)
1237 const uint8 *pBuf = pImage_data + i * width * num_channels;
1238 if (!dst_image.process_scanline(pBuf))
1241 if (!dst_image.process_scanline(NULL))
1247 return dst_stream.close();
1250 class memory_stream : public output_stream
1252 memory_stream(const memory_stream &);
1253 memory_stream &operator=(const memory_stream &);
1256 uint m_buf_size, m_buf_ofs;
1259 memory_stream(void *pBuf, uint buf_size)
1260 : m_pBuf(static_cast<uint8 *>(pBuf)), m_buf_size(buf_size), m_buf_ofs(0)
1264 virtual ~memory_stream()
1268 virtual bool put_buf(const void *pBuf, int len)
1270 uint buf_remaining = m_buf_size - m_buf_ofs;
1271 if ((uint)len > buf_remaining)
1273 memcpy(m_pBuf + m_buf_ofs, pBuf, len);
1278 uint get_size() const
1284 bool compress_image_to_jpeg_file_in_memory(void *pDstBuf, int &buf_size, int width, int height, int num_channels, const uint8 *pImage_data, const params &comp_params)
1286 if ((!pDstBuf) || (!buf_size))
1289 memory_stream dst_stream(pDstBuf, buf_size);
1293 jpge::jpeg_encoder dst_image;
1294 if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params))
1297 for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++)
1299 for (int i = 0; i < height; i++)
1301 const uint8 *pScanline = pImage_data + i * width * num_channels;
1302 if (!dst_image.process_scanline(pScanline))
1305 if (!dst_image.process_scanline(NULL))
1311 buf_size = dst_stream.get_size();