/xbmc/visualizations/XBMCProjectM/libprojectM/stb_image_aug.c

http://github.com/xbmc/xbmc · C · 3163 lines · 2541 code · 280 blank · 342 comment · 819 complexity · 0a67e6320b93c9e60a9dfaff421fd43a MD5 · raw file

Large files are truncated click here to view the full file

  1. /* stbi-1.03 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c
  2. when you control the images you're loading
  3. QUICK NOTES:
  4. Primarily of interest to game developers and other people who can
  5. avoid problematic images and only need the trivial interface
  6. JPEG baseline (no JPEG progressive, no oddball channel decimations)
  7. PNG non-interlaced
  8. BMP non-1bpp, non-RLE
  9. TGA (not sure what subset, if a subset)
  10. HDR (radiance rgbE format)
  11. writes BMP,TGA (define STBI_NO_WRITE to remove code)
  12. decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code)
  13. TODO:
  14. stbi_info_*
  15. PSD loader
  16. history:
  17. 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
  18. 1.02 support for (subset of) HDR files, float interface for preferred access to them
  19. 1.01 fix bug: possible bug in handling right-side up bmps... not sure
  20. fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all
  21. 1.00 interface to zlib that skips zlib header
  22. 0.99 correct handling of alpha in palette
  23. 0.98 TGA loader by lonesock; dynamically add loaders (untested)
  24. 0.97 jpeg errors on too large a file; also catch another malloc failure
  25. 0.96 fix detection of invalid v value - particleman@mollyrocket forum
  26. 0.95 during header scan, seek to markers in case of padding
  27. 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
  28. 0.93 handle jpegtran output; verbose errors
  29. 0.92 read 4,8,16,24,32-bit BMP files of several formats
  30. 0.91 output 24-bit Windows 3.0 BMP files
  31. 0.90 fix a few more warnings; bump version number to approach 1.0
  32. 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
  33. 0.60 fix compiling as c++
  34. 0.59 fix warnings: merge Dave Moore's -Wall fixes
  35. 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
  36. 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less
  37. than 16 available
  38. 0.56 fix bug: zlib uncompressed mode len vs. nlen
  39. 0.55 fix bug: restart_interval not initialized to 0
  40. 0.54 allow NULL for 'int *comp'
  41. 0.53 fix bug in png 3->4; speedup png decoding
  42. 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
  43. 0.51 obey req_comp requests, 1-component jpegs return as 1-component,
  44. on 'test' only check type, not whether we support this variant
  45. */
  46. #include "stb_image_aug.h"
  47. #ifndef STBI_NO_STDIO
  48. #include <stdio.h>
  49. #endif
  50. #include <stdlib.h>
  51. #include <memory.h>
  52. #include <assert.h>
  53. #include <stdarg.h>
  54. #ifndef _MSC_VER
  55. #define __forceinline
  56. #endif
  57. // implementation:
  58. typedef unsigned char uint8;
  59. typedef unsigned short uint16;
  60. typedef signed short int16;
  61. typedef unsigned int uint32;
  62. typedef signed int int32;
  63. typedef unsigned int uint;
  64. // should produce compiler error if size is wrong
  65. typedef unsigned char validate_uint32[sizeof(uint32)==4];
  66. #if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE)
  67. #define STBI_NO_WRITE
  68. #endif
  69. #ifndef STBI_NO_DDS
  70. #include "stbi_DDS_aug.h"
  71. #endif
  72. // I (JLD) want full messages for SOIL
  73. #define STBI_FAILURE_USERMSG 1
  74. //////////////////////////////////////////////////////////////////////////////
  75. //
  76. // Generic API that works on all image types
  77. //
  78. static char *failure_reason;
  79. char *stbi_failure_reason(void)
  80. {
  81. return failure_reason;
  82. }
  83. static int e(char *str)
  84. {
  85. failure_reason = str;
  86. return 0;
  87. }
  88. #ifdef STBI_NO_FAILURE_STRINGS
  89. #define e(x,y) 0
  90. #elif defined(STBI_FAILURE_USERMSG)
  91. #define e(x,y) e(y)
  92. #else
  93. #define e(x,y) e(x)
  94. #endif
  95. #define ep(x,y) (e(x,y)?NULL:NULL)
  96. void stbi_image_free(unsigned char *retval_from_stbi_load)
  97. {
  98. free(retval_from_stbi_load);
  99. }
  100. #define MAX_LOADERS 32
  101. stbi_loader *loaders[MAX_LOADERS];
  102. static int max_loaders = 0;
  103. int stbi_register_loader(stbi_loader *loader)
  104. {
  105. int i;
  106. for (i=0; i < MAX_LOADERS; ++i) {
  107. // already present?
  108. if (loaders[i] == loader)
  109. return 1;
  110. // end of the list?
  111. if (loaders[i] == NULL) {
  112. loaders[i] = loader;
  113. max_loaders = i+1;
  114. return 1;
  115. }
  116. }
  117. // no room for it
  118. return 0;
  119. }
  120. #ifndef STBI_NO_HDR
  121. static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
  122. static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp);
  123. #endif
  124. #ifndef STBI_NO_STDIO
  125. unsigned char *stbi_load(char *filename, int *x, int *y, int *comp, int req_comp)
  126. {
  127. FILE *f = fopen(filename, "rb");
  128. unsigned char *result;
  129. if (!f) return ep("can't fopen", "Unable to open file");
  130. result = stbi_load_from_file(f,x,y,comp,req_comp);
  131. fclose(f);
  132. return result;
  133. }
  134. unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
  135. {
  136. int i;
  137. if (stbi_jpeg_test_file(f))
  138. return stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
  139. if (stbi_png_test_file(f))
  140. return stbi_png_load_from_file(f,x,y,comp,req_comp);
  141. if (stbi_bmp_test_file(f))
  142. return stbi_bmp_load_from_file(f,x,y,comp,req_comp);
  143. #ifndef STBI_NO_DDS
  144. if (stbi_dds_test_file(f))
  145. return stbi_dds_load_from_file(f,x,y,comp,req_comp);
  146. #endif
  147. #ifndef STBI_NO_HDR
  148. if (stbi_hdr_test_file(f)) {
  149. float *hdr = stbi_hdr_load_from_file(f, x,y,comp,req_comp);
  150. return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
  151. }
  152. #endif
  153. for (i=0; i < max_loaders; ++i)
  154. if (loaders[i]->test_file(f))
  155. return loaders[i]->load_from_file(f,x,y,comp,req_comp);
  156. // test tga last because it's a crappy test!
  157. if (stbi_tga_test_file(f))
  158. return stbi_tga_load_from_file(f,x,y,comp,req_comp);
  159. return ep("unknown image type", "Image not of any known type, or corrupt");
  160. }
  161. #endif
  162. unsigned char *stbi_load_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp)
  163. {
  164. int i;
  165. if (stbi_jpeg_test_memory(buffer,len))
  166. return stbi_jpeg_load_from_memory(buffer,len,x,y,comp,req_comp);
  167. if (stbi_png_test_memory(buffer,len))
  168. return stbi_png_load_from_memory(buffer,len,x,y,comp,req_comp);
  169. if (stbi_bmp_test_memory(buffer,len))
  170. return stbi_bmp_load_from_memory(buffer,len,x,y,comp,req_comp);
  171. #ifndef STBI_NO_DDS
  172. if (stbi_dds_test_memory(buffer,len))
  173. return stbi_dds_load_from_memory(buffer,len,x,y,comp,req_comp);
  174. #endif
  175. #ifndef STBI_NO_HDR
  176. if (stbi_hdr_test_memory(buffer, len)) {
  177. float *hdr = stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
  178. return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
  179. }
  180. #endif
  181. for (i=0; i < max_loaders; ++i)
  182. if (loaders[i]->test_memory(buffer,len))
  183. return loaders[i]->load_from_memory(buffer,len,x,y,comp,req_comp);
  184. // test tga last because it's a crappy test!
  185. if (stbi_tga_test_memory(buffer,len))
  186. return stbi_tga_load_from_memory(buffer,len,x,y,comp,req_comp);
  187. return ep("unknown image type", "Image not of any known type, or corrupt");
  188. }
  189. #ifndef STBI_NO_HDR
  190. #ifndef STBI_NO_STDIO
  191. float *stbi_loadf(char *filename, int *x, int *y, int *comp, int req_comp)
  192. {
  193. FILE *f = fopen(filename, "rb");
  194. float *result;
  195. if (!f) return ep("can't fopen", "Unable to open file");
  196. result = stbi_loadf_from_file(f,x,y,comp,req_comp);
  197. fclose(f);
  198. return result;
  199. }
  200. float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
  201. {
  202. unsigned char *data;
  203. #ifndef STBI_NO_HDR
  204. if (stbi_hdr_test_file(f))
  205. return stbi_hdr_load_from_file(f,x,y,comp,req_comp);
  206. #endif
  207. data = stbi_load_from_file(f, x, y, comp, req_comp);
  208. if (data)
  209. return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
  210. return ep("unknown image type", "Image not of any known type, or corrupt");
  211. }
  212. #endif
  213. float *stbi_loadf_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp)
  214. {
  215. stbi_uc *data;
  216. #ifndef STBI_NO_HDR
  217. if (stbi_hdr_test_memory(buffer, len))
  218. return stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp);
  219. #endif
  220. data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp);
  221. if (data)
  222. return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
  223. return ep("unknown image type", "Image not of any known type, or corrupt");
  224. }
  225. #endif
  226. // these is-hdr-or-not is defined independent of whether STBI_NO_HDR is
  227. // defined, for API simplicity; if STBI_NO_HDR is defined, it always
  228. // reports false!
  229. extern int stbi_is_hdr_from_memory(stbi_uc *buffer, int len)
  230. {
  231. #ifndef STBI_NO_HDR
  232. return stbi_hdr_test_memory(buffer, len);
  233. #else
  234. return 0;
  235. #endif
  236. }
  237. #ifndef STBI_NO_STDIO
  238. extern int stbi_is_hdr (char *filename)
  239. {
  240. FILE *f = fopen(filename, "rb");
  241. int result=0;
  242. if (f) {
  243. result = stbi_is_hdr_from_file(f);
  244. fclose(f);
  245. }
  246. return result;
  247. }
  248. extern int stbi_is_hdr_from_file(FILE *f)
  249. {
  250. #ifndef STBI_NO_HDR
  251. return stbi_hdr_test_file(f);
  252. #else
  253. return 0;
  254. #endif
  255. }
  256. #endif
  257. // @TODO: get image dimensions & components without fully decoding
  258. #ifndef STBI_NO_STDIO
  259. extern int stbi_info (char *filename, int *x, int *y, int *comp);
  260. extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp);
  261. #endif
  262. extern int stbi_info_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp);
  263. #ifndef STBI_NO_HDR
  264. static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f;
  265. static float l2h_gamma=2.2f, l2h_scale=1.0f;
  266. void stbi_hdr_to_ldr_gamma(float gamma) { h2l_gamma_i = 1/gamma; }
  267. void stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1/scale; }
  268. void stbi_ldr_to_hdr_gamma(float gamma) { l2h_gamma = gamma; }
  269. void stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; }
  270. #endif
  271. //////////////////////////////////////////////////////////////////////////////
  272. //
  273. // Common code used by all image loaders
  274. //
  275. // image width, height, # components
  276. static uint32 img_x, img_y;
  277. static int img_n, img_out_n;
  278. enum
  279. {
  280. SCAN_load=0,
  281. SCAN_type,
  282. SCAN_header,
  283. };
  284. // An API for reading either from memory or file.
  285. #ifndef STBI_NO_STDIO
  286. static FILE *img_file;
  287. #endif
  288. static uint8 *img_buffer, *img_buffer_end;
  289. #ifndef STBI_NO_STDIO
  290. static void start_file(FILE *f)
  291. {
  292. img_file = f;
  293. }
  294. #endif
  295. static void start_mem(uint8 *buffer, int len)
  296. {
  297. #ifndef STBI_NO_STDIO
  298. img_file = NULL;
  299. #endif
  300. img_buffer = buffer;
  301. img_buffer_end = buffer+len;
  302. }
  303. static int get8(void)
  304. {
  305. #ifndef STBI_NO_STDIO
  306. if (img_file) {
  307. int c = fgetc(img_file);
  308. return c == EOF ? 0 : c;
  309. }
  310. #endif
  311. if (img_buffer < img_buffer_end)
  312. return *img_buffer++;
  313. return 0;
  314. }
  315. static int at_eof(void)
  316. {
  317. #ifndef STBI_NO_STDIO
  318. if (img_file)
  319. return feof(img_file);
  320. #endif
  321. return img_buffer >= img_buffer_end;
  322. }
  323. static uint8 get8u(void)
  324. {
  325. return (uint8) get8();
  326. }
  327. static void skip(int n)
  328. {
  329. #ifndef STBI_NO_STDIO
  330. if (img_file)
  331. fseek(img_file, n, SEEK_CUR);
  332. else
  333. #endif
  334. img_buffer += n;
  335. }
  336. static int get16(void)
  337. {
  338. int z = get8();
  339. return (z << 8) + get8();
  340. }
  341. static uint32 get32(void)
  342. {
  343. uint32 z = get16();
  344. return (z << 16) + get16();
  345. }
  346. static int get16le(void)
  347. {
  348. int z = get8();
  349. return z + (get8() << 8);
  350. }
  351. static uint32 get32le(void)
  352. {
  353. uint32 z = get16le();
  354. return z + (get16le() << 16);
  355. }
  356. static void getn(stbi_uc *buffer, int n)
  357. {
  358. #ifndef STBI_NO_STDIO
  359. if (img_file) {
  360. fread(buffer, 1, n, img_file);
  361. return;
  362. }
  363. #endif
  364. memcpy(buffer, img_buffer, n);
  365. img_buffer += n;
  366. }
  367. //////////////////////////////////////////////////////////////////////////////
  368. //
  369. // generic converter from built-in img_n to req_comp
  370. // individual types do this automatically as much as possible (e.g. jpeg
  371. // does all cases internally since it needs to colorspace convert anyway,
  372. // and it never has alpha, so very few cases ). png can automatically
  373. // interleave an alpha=255 channel, but falls back to this for other cases
  374. //
  375. // assume data buffer is malloced, so malloc a new one and free that one
  376. // only failure mode is malloc failing
  377. static uint8 compute_y(int r, int g, int b)
  378. {
  379. return (uint8) (((r*77) + (g*150) + (29*b)) >> 8);
  380. }
  381. static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp)
  382. {
  383. uint i,j;
  384. unsigned char *good;
  385. if (req_comp == img_n) return data;
  386. assert(req_comp >= 1 && req_comp <= 4);
  387. good = (unsigned char *) malloc(req_comp * img_x * img_y);
  388. if (good == NULL) {
  389. free(data);
  390. return ep("outofmem", "Out of memory");
  391. }
  392. for (j=0; j < img_y; ++j) {
  393. unsigned char *src = data + j * img_x * img_n ;
  394. unsigned char *dest = good + j * img_x * req_comp;
  395. #define COMBO(a,b) ((a)*8+(b))
  396. #define CASE(a,b) case COMBO(a,b): for(i=0; i < img_x; ++i, src += a, dest += b)
  397. // convert source image with img_n components to one with req_comp components;
  398. // avoid switch per pixel, so use switch per scanline and massive macros
  399. switch(COMBO(img_n, req_comp)) {
  400. CASE(1,2) dest[0]=src[0], dest[1]=255; break;
  401. CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
  402. CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
  403. CASE(2,1) dest[0]=src[0]; break;
  404. CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
  405. CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
  406. CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
  407. CASE(3,1) dest[0]=compute_y(src[0],src[1],src[2]); break;
  408. CASE(3,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
  409. CASE(4,1) dest[0]=compute_y(src[0],src[1],src[2]); break;
  410. CASE(4,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
  411. CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
  412. default: assert(0);
  413. }
  414. #undef CASE
  415. }
  416. free(data);
  417. img_out_n = req_comp;
  418. return good;
  419. }
  420. #ifndef STBI_NO_HDR
  421. static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
  422. {
  423. int i,k,n;
  424. float *output = (float *) malloc(x * y * comp * sizeof(float));
  425. if (output == NULL) { free(data); return ep("outofmem", "Out of memory"); }
  426. // compute number of non-alpha components
  427. if (comp & 1) n = comp; else n = comp-1;
  428. for (i=0; i < x*y; ++i) {
  429. for (k=0; k < n; ++k) {
  430. output[i*comp + k] = (float) pow(data[i*comp+k]/255.0, l2h_gamma) * l2h_scale;
  431. }
  432. if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
  433. }
  434. free(data);
  435. return output;
  436. }
  437. #define float2int(x) ((int) (x))
  438. static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp)
  439. {
  440. int i,k,n;
  441. stbi_uc *output = (stbi_uc *) malloc(x * y * comp);
  442. if (output == NULL) { free(data); return ep("outofmem", "Out of memory"); }
  443. // compute number of non-alpha components
  444. if (comp & 1) n = comp; else n = comp-1;
  445. for (i=0; i < x*y; ++i) {
  446. for (k=0; k < n; ++k) {
  447. float z = (float) pow(data[i*comp+k]*h2l_scale_i, h2l_gamma_i) * 255 + 0.5f;
  448. if (z < 0) z = 0;
  449. if (z > 255) z = 255;
  450. output[i*comp + k] = float2int(z);
  451. }
  452. if (k < comp) {
  453. float z = data[i*comp+k] * 255 + 0.5f;
  454. if (z < 0) z = 0;
  455. if (z > 255) z = 255;
  456. output[i*comp + k] = float2int(z);
  457. }
  458. }
  459. free(data);
  460. return output;
  461. }
  462. #endif
  463. //////////////////////////////////////////////////////////////////////////////
  464. //
  465. // "baseline" JPEG/JFIF decoder (not actually fully baseline implementation)
  466. //
  467. // simple implementation
  468. // - channel subsampling of at most 2 in each dimension
  469. // - doesn't support delayed output of y-dimension
  470. // - simple interface (only one output format: 8-bit interleaved RGB)
  471. // - doesn't try to recover corrupt jpegs
  472. // - doesn't allow partial loading, loading multiple at once
  473. // - still fast on x86 (copying globals into locals doesn't help x86)
  474. // - allocates lots of intermediate memory (full size of all components)
  475. // - non-interleaved case requires this anyway
  476. // - allows good upsampling (see next)
  477. // high-quality
  478. // - upsampled channels are bilinearly interpolated, even across blocks
  479. // - quality integer IDCT derived from IJG's 'slow'
  480. // performance
  481. // - fast huffman; reasonable integer IDCT
  482. // - uses a lot of intermediate memory, could cache poorly
  483. // - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4
  484. // stb_jpeg: 1.34 seconds (MSVC6, default release build)
  485. // stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro)
  486. // IJL11.dll: 1.08 seconds (compiled by intel)
  487. // IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG)
  488. // IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro)
  489. int stbi_jpeg_dc_only;
  490. // huffman decoding acceleration
  491. #define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
  492. typedef struct
  493. {
  494. uint8 fast[1 << FAST_BITS];
  495. // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
  496. uint16 code[256];
  497. uint8 values[256];
  498. uint8 size[257];
  499. unsigned int maxcode[18];
  500. int delta[17]; // old 'firstsymbol' - old 'firstcode'
  501. } huffman;
  502. static huffman huff_dc[4]; // baseline is 2 tables, extended is 4
  503. static huffman huff_ac[4];
  504. static uint8 dequant[4][64];
  505. static int build_huffman(huffman *h, int *count)
  506. {
  507. int i,j,k=0,code;
  508. // build size list for each symbol (from JPEG spec)
  509. for (i=0; i < 16; ++i)
  510. for (j=0; j < count[i]; ++j)
  511. h->size[k++] = (uint8) (i+1);
  512. h->size[k] = 0;
  513. // compute actual symbols (from jpeg spec)
  514. code = 0;
  515. k = 0;
  516. for(j=1; j <= 16; ++j) {
  517. // compute delta to add to code to compute symbol id
  518. h->delta[j] = k - code;
  519. if (h->size[k] == j) {
  520. while (h->size[k] == j)
  521. h->code[k++] = (uint16) (code++);
  522. if (code-1 >= (1 << j)) return e("bad code lengths","Corrupt JPEG");
  523. }
  524. // compute largest code + 1 for this size, preshifted as needed later
  525. h->maxcode[j] = code << (16-j);
  526. code <<= 1;
  527. }
  528. h->maxcode[j] = 0xffffffff;
  529. // build non-spec acceleration table; 255 is flag for not-accelerated
  530. memset(h->fast, 255, 1 << FAST_BITS);
  531. for (i=0; i < k; ++i) {
  532. int s = h->size[i];
  533. if (s <= FAST_BITS) {
  534. int c = h->code[i] << (FAST_BITS-s);
  535. int m = 1 << (FAST_BITS-s);
  536. for (j=0; j < m; ++j) {
  537. h->fast[c+j] = (uint8) i;
  538. }
  539. }
  540. }
  541. return 1;
  542. }
  543. // sizes for components, interleaved MCUs
  544. static int img_h_max, img_v_max;
  545. static int img_mcu_x, img_mcu_y;
  546. static int img_mcu_w, img_mcu_h;
  547. // definition of jpeg image component
  548. static struct
  549. {
  550. int id;
  551. int h,v;
  552. int tq;
  553. int hd,ha;
  554. int dc_pred;
  555. int x,y,w2,h2;
  556. uint8 *data;
  557. } img_comp[4];
  558. static unsigned long code_buffer; // jpeg entropy-coded buffer
  559. static int code_bits; // number of valid bits
  560. static unsigned char marker; // marker seen while filling entropy buffer
  561. static int nomore; // flag if we saw a marker so must stop
  562. static void grow_buffer_unsafe(void)
  563. {
  564. do {
  565. int b = nomore ? 0 : get8();
  566. if (b == 0xff) {
  567. int c = get8();
  568. if (c != 0) {
  569. marker = (unsigned char) c;
  570. nomore = 1;
  571. return;
  572. }
  573. }
  574. code_buffer = (code_buffer << 8) | b;
  575. code_bits += 8;
  576. } while (code_bits <= 24);
  577. }
  578. // (1 << n) - 1
  579. static unsigned long bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
  580. // decode a jpeg huffman value from the bitstream
  581. __forceinline static int decode(huffman *h)
  582. {
  583. unsigned int temp;
  584. int c,k;
  585. if (code_bits < 16) grow_buffer_unsafe();
  586. // look at the top FAST_BITS and determine what symbol ID it is,
  587. // if the code is <= FAST_BITS
  588. c = (code_buffer >> (code_bits - FAST_BITS)) & ((1 << FAST_BITS)-1);
  589. k = h->fast[c];
  590. if (k < 255) {
  591. if (h->size[k] > code_bits)
  592. return -1;
  593. code_bits -= h->size[k];
  594. return h->values[k];
  595. }
  596. // naive test is to shift the code_buffer down so k bits are
  597. // valid, then test against maxcode. To speed this up, we've
  598. // preshifted maxcode left so that it has (16-k) 0s at the
  599. // end; in other words, regardless of the number of bits, it
  600. // wants to be compared against something shifted to have 16;
  601. // that way we don't need to shift inside the loop.
  602. if (code_bits < 16)
  603. temp = (code_buffer << (16 - code_bits)) & 0xffff;
  604. else
  605. temp = (code_buffer >> (code_bits - 16)) & 0xffff;
  606. for (k=FAST_BITS+1 ; ; ++k)
  607. if (temp < h->maxcode[k])
  608. break;
  609. if (k == 17) {
  610. // error! code not found
  611. code_bits -= 16;
  612. return -1;
  613. }
  614. if (k > code_bits)
  615. return -1;
  616. // convert the huffman code to the symbol id
  617. c = ((code_buffer >> (code_bits - k)) & bmask[k]) + h->delta[k];
  618. assert((((code_buffer) >> (code_bits - h->size[c])) & bmask[h->size[c]]) == h->code[c]);
  619. // convert the id to a symbol
  620. code_bits -= k;
  621. return h->values[c];
  622. }
  623. // combined JPEG 'receive' and JPEG 'extend', since baseline
  624. // always extends everything it receives.
  625. __forceinline static int extend_receive(int n)
  626. {
  627. unsigned int m = 1 << (n-1);
  628. unsigned int k;
  629. if (code_bits < n) grow_buffer_unsafe();
  630. k = (code_buffer >> (code_bits - n)) & bmask[n];
  631. code_bits -= n;
  632. // the following test is probably a random branch that won't
  633. // predict well. I tried to table accelerate it but failed.
  634. // maybe it's compiling as a conditional move?
  635. if (k < m)
  636. return (-1 << n) + k + 1;
  637. else
  638. return k;
  639. }
  640. // given a value that's at position X in the zigzag stream,
  641. // where does it appear in the 8x8 matrix coded as row-major?
  642. static uint8 dezigzag[64+15] =
  643. {
  644. 0, 1, 8, 16, 9, 2, 3, 10,
  645. 17, 24, 32, 25, 18, 11, 4, 5,
  646. 12, 19, 26, 33, 40, 48, 41, 34,
  647. 27, 20, 13, 6, 7, 14, 21, 28,
  648. 35, 42, 49, 56, 57, 50, 43, 36,
  649. 29, 22, 15, 23, 30, 37, 44, 51,
  650. 58, 59, 52, 45, 38, 31, 39, 46,
  651. 53, 60, 61, 54, 47, 55, 62, 63,
  652. // let corrupt input sample past end
  653. 63, 63, 63, 63, 63, 63, 63, 63,
  654. 63, 63, 63, 63, 63, 63, 63
  655. };
  656. // decode one 64-entry block--
  657. static int decode_block(short data[64], huffman *hdc, huffman *hac, int b)
  658. {
  659. int diff,dc,k;
  660. int t = decode(hdc);
  661. if (t < 0) return e("bad huffman code","Corrupt JPEG");
  662. // 0 all the ac values now so we can do it 32-bits at a time
  663. memset(data,0,64*sizeof(data[0]));
  664. diff = t ? extend_receive(t) : 0;
  665. dc = img_comp[b].dc_pred + diff;
  666. img_comp[b].dc_pred = dc;
  667. data[0] = (short) dc;
  668. // decode AC components, see JPEG spec
  669. k = 1;
  670. do {
  671. int r,s;
  672. int rs = decode(hac);
  673. if (rs < 0) return e("bad huffman code","Corrupt JPEG");
  674. s = rs & 15;
  675. r = rs >> 4;
  676. if (s == 0) {
  677. if (rs != 0xf0) break; // end block
  678. k += 16;
  679. } else {
  680. k += r;
  681. // decode into unzigzag'd location
  682. data[dezigzag[k++]] = (short) extend_receive(s);
  683. }
  684. } while (k < 64);
  685. return 1;
  686. }
  687. // take a -128..127 value and clamp it and convert to 0..255
  688. __forceinline static uint8 clamp(int x)
  689. {
  690. x += 128;
  691. // trick to use a single test to catch both cases
  692. if ((unsigned int) x > 255) {
  693. if (x < 0) return 0;
  694. if (x > 255) return 255;
  695. }
  696. return (uint8) x;
  697. }
  698. #define f2f(x) (int) (((x) * 4096 + 0.5))
  699. #define fsh(x) ((x) << 12)
  700. // derived from jidctint -- DCT_ISLOW
  701. #define IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
  702. int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
  703. p2 = s2; \
  704. p3 = s6; \
  705. p1 = (p2+p3) * f2f(0.5411961f); \
  706. t2 = p1 + p3*f2f(-1.847759065f); \
  707. t3 = p1 + p2*f2f( 0.765366865f); \
  708. p2 = s0; \
  709. p3 = s4; \
  710. t0 = fsh(p2+p3); \
  711. t1 = fsh(p2-p3); \
  712. x0 = t0+t3; \
  713. x3 = t0-t3; \
  714. x1 = t1+t2; \
  715. x2 = t1-t2; \
  716. t0 = s7; \
  717. t1 = s5; \
  718. t2 = s3; \
  719. t3 = s1; \
  720. p3 = t0+t2; \
  721. p4 = t1+t3; \
  722. p1 = t0+t3; \
  723. p2 = t1+t2; \
  724. p5 = (p3+p4)*f2f( 1.175875602f); \
  725. t0 = t0*f2f( 0.298631336f); \
  726. t1 = t1*f2f( 2.053119869f); \
  727. t2 = t2*f2f( 3.072711026f); \
  728. t3 = t3*f2f( 1.501321110f); \
  729. p1 = p5 + p1*f2f(-0.899976223f); \
  730. p2 = p5 + p2*f2f(-2.562915447f); \
  731. p3 = p3*f2f(-1.961570560f); \
  732. p4 = p4*f2f(-0.390180644f); \
  733. t3 += p1+p4; \
  734. t2 += p2+p3; \
  735. t1 += p2+p4; \
  736. t0 += p1+p3;
  737. // .344 seconds on 3*anemones.jpg
  738. static void idct_block(uint8 *out, int out_stride, short data[64], uint8 *dequantize)
  739. {
  740. int i,val[64],*v=val;
  741. uint8 *o,*dq = dequantize;
  742. short *d = data;
  743. if (stbi_jpeg_dc_only) {
  744. // ok, I don't really know why this is right, but it seems to be:
  745. int z = 128 + ((d[0] * dq[0]) >> 3);
  746. for (i=0; i < 8; ++i) {
  747. out[0] = out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = z;
  748. out += out_stride;
  749. }
  750. return;
  751. }
  752. // columns
  753. for (i=0; i < 8; ++i,++d,++dq, ++v) {
  754. // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
  755. if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
  756. && d[40]==0 && d[48]==0 && d[56]==0) {
  757. // no shortcut 0 seconds
  758. // (1|2|3|4|5|6|7)==0 0 seconds
  759. // all separate -0.047 seconds
  760. // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
  761. int dcterm = d[0] * dq[0] << 2;
  762. v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
  763. } else {
  764. IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24],
  765. d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56])
  766. // constants scaled things up by 1<<12; let's bring them back
  767. // down, but keep 2 extra bits of precision
  768. x0 += 512; x1 += 512; x2 += 512; x3 += 512;
  769. v[ 0] = (x0+t3) >> 10;
  770. v[56] = (x0-t3) >> 10;
  771. v[ 8] = (x1+t2) >> 10;
  772. v[48] = (x1-t2) >> 10;
  773. v[16] = (x2+t1) >> 10;
  774. v[40] = (x2-t1) >> 10;
  775. v[24] = (x3+t0) >> 10;
  776. v[32] = (x3-t0) >> 10;
  777. }
  778. }
  779. for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
  780. // no fast case since the first 1D IDCT spread components out
  781. IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
  782. // constants scaled things up by 1<<12, plus we had 1<<2 from first
  783. // loop, plus horizontal and vertical each scale by sqrt(8) so together
  784. // we've got an extra 1<<3, so 1<<17 total we need to remove.
  785. x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536;
  786. o[0] = clamp((x0+t3) >> 17);
  787. o[7] = clamp((x0-t3) >> 17);
  788. o[1] = clamp((x1+t2) >> 17);
  789. o[6] = clamp((x1-t2) >> 17);
  790. o[2] = clamp((x2+t1) >> 17);
  791. o[5] = clamp((x2-t1) >> 17);
  792. o[3] = clamp((x3+t0) >> 17);
  793. o[4] = clamp((x3-t0) >> 17);
  794. }
  795. }
  796. #define MARKER_none 0xff
  797. // if there's a pending marker from the entropy stream, return that
  798. // otherwise, fetch from the stream and get a marker. if there's no
  799. // marker, return 0xff, which is never a valid marker value
  800. static uint8 get_marker(void)
  801. {
  802. uint8 x;
  803. if (marker != MARKER_none) { x = marker; marker = MARKER_none; return x; }
  804. x = get8u();
  805. if (x != 0xff) return MARKER_none;
  806. while (x == 0xff)
  807. x = get8u();
  808. return x;
  809. }
  810. // in each scan, we'll have scan_n components, and the order
  811. // of the components is specified by order[]
  812. static int scan_n, order[4];
  813. static int restart_interval, todo;
  814. #define RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
  815. // after a restart interval, reset the entropy decoder and
  816. // the dc prediction
  817. static void reset(void)
  818. {
  819. code_bits = 0;
  820. code_buffer = 0;
  821. nomore = 0;
  822. img_comp[0].dc_pred = img_comp[1].dc_pred = img_comp[2].dc_pred = 0;
  823. marker = MARKER_none;
  824. todo = restart_interval ? restart_interval : 0x7fffffff;
  825. // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
  826. // since we don't even allow 1<<30 pixels
  827. }
  828. static int parse_entropy_coded_data(void)
  829. {
  830. reset();
  831. if (scan_n == 1) {
  832. int i,j;
  833. short data[64];
  834. int n = order[0];
  835. // non-interleaved data, we just need to process one block at a time,
  836. // in trivial scanline order
  837. // number of blocks to do just depends on how many actual "pixels" this
  838. // component has, independent of interleaved MCU blocking and such
  839. int w = (img_comp[n].x+7) >> 3;
  840. int h = (img_comp[n].y+7) >> 3;
  841. for (j=0; j < h; ++j) {
  842. for (i=0; i < w; ++i) {
  843. if (!decode_block(data, huff_dc+img_comp[n].hd, huff_ac+img_comp[n].ha, n)) return 0;
  844. idct_block(img_comp[n].data+img_comp[n].w2*j*8+i*8, img_comp[n].w2, data, dequant[img_comp[n].tq]);
  845. // every data block is an MCU, so countdown the restart interval
  846. if (--todo <= 0) {
  847. if (code_bits < 24) grow_buffer_unsafe();
  848. // if it's NOT a restart, then just bail, so we get corrupt data
  849. // rather than no data
  850. if (!RESTART(marker)) return 1;
  851. reset();
  852. }
  853. }
  854. }
  855. } else { // interleaved!
  856. int i,j,k,x,y;
  857. short data[64];
  858. for (j=0; j < img_mcu_y; ++j) {
  859. for (i=0; i < img_mcu_x; ++i) {
  860. // scan an interleaved mcu... process scan_n components in order
  861. for (k=0; k < scan_n; ++k) {
  862. int n = order[k];
  863. // scan out an mcu's worth of this component; that's just determined
  864. // by the basic H and V specified for the component
  865. for (y=0; y < img_comp[n].v; ++y) {
  866. for (x=0; x < img_comp[n].h; ++x) {
  867. int x2 = (i*img_comp[n].h + x)*8;
  868. int y2 = (j*img_comp[n].v + y)*8;
  869. if (!decode_block(data, huff_dc+img_comp[n].hd, huff_ac+img_comp[n].ha, n)) return 0;
  870. idct_block(img_comp[n].data+img_comp[n].w2*y2+x2, img_comp[n].w2, data, dequant[img_comp[n].tq]);
  871. }
  872. }
  873. }
  874. // after all interleaved components, that's an interleaved MCU,
  875. // so now count down the restart interval
  876. if (--todo <= 0) {
  877. if (code_bits < 24) grow_buffer_unsafe();
  878. // if it's NOT a restart, then just bail, so we get corrupt data
  879. // rather than no data
  880. if (!RESTART(marker)) return 1;
  881. reset();
  882. }
  883. }
  884. }
  885. }
  886. return 1;
  887. }
  888. static int process_marker(int m)
  889. {
  890. int L;
  891. switch (m) {
  892. case MARKER_none: // no marker found
  893. return e("expected marker","Corrupt JPEG");
  894. case 0xC2: // SOF - progressive
  895. return e("progressive jpeg","JPEG format not supported (progressive)");
  896. case 0xDD: // DRI - specify restart interval
  897. if (get16() != 4) return e("bad DRI len","Corrupt JPEG");
  898. restart_interval = get16();
  899. return 1;
  900. case 0xDB: // DQT - define quantization table
  901. L = get16()-2;
  902. while (L > 0) {
  903. int z = get8();
  904. int p = z >> 4;
  905. int t = z & 15,i;
  906. if (p != 0) return e("bad DQT type","Corrupt JPEG");
  907. if (t > 3) return e("bad DQT table","Corrupt JPEG");
  908. for (i=0; i < 64; ++i)
  909. dequant[t][dezigzag[i]] = get8u();
  910. L -= 65;
  911. }
  912. return L==0;
  913. case 0xC4: // DHT - define huffman table
  914. L = get16()-2;
  915. while (L > 0) {
  916. uint8 *v;
  917. int sizes[16],i,m=0;
  918. int z = get8();
  919. int tc = z >> 4;
  920. int th = z & 15;
  921. if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG");
  922. for (i=0; i < 16; ++i) {
  923. sizes[i] = get8();
  924. m += sizes[i];
  925. }
  926. L -= 17;
  927. if (tc == 0) {
  928. if (!build_huffman(huff_dc+th, sizes)) return 0;
  929. v = huff_dc[th].values;
  930. } else {
  931. if (!build_huffman(huff_ac+th, sizes)) return 0;
  932. v = huff_ac[th].values;
  933. }
  934. for (i=0; i < m; ++i)
  935. v[i] = get8u();
  936. L -= m;
  937. }
  938. return L==0;
  939. }
  940. // check for comment block or APP blocks
  941. if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
  942. skip(get16()-2);
  943. return 1;
  944. }
  945. return 0;
  946. }
  947. // after we see SOS
  948. static int process_scan_header(void)
  949. {
  950. int i;
  951. int Ls = get16();
  952. scan_n = get8();
  953. if (scan_n < 1 || scan_n > 4 || scan_n > (int) img_n) return e("bad SOS component count","Corrupt JPEG");
  954. if (Ls != 6+2*scan_n) return e("bad SOS len","Corrupt JPEG");
  955. for (i=0; i < scan_n; ++i) {
  956. int id = get8(), which;
  957. int z = get8();
  958. for (which = 0; which < img_n; ++which)
  959. if (img_comp[which].id == id)
  960. break;
  961. if (which == img_n) return 0;
  962. img_comp[which].hd = z >> 4; if (img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG");
  963. img_comp[which].ha = z & 15; if (img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG");
  964. order[i] = which;
  965. }
  966. if (get8() != 0) return e("bad SOS","Corrupt JPEG");
  967. get8(); // should be 63, but might be 0
  968. if (get8() != 0) return e("bad SOS","Corrupt JPEG");
  969. return 1;
  970. }
  971. static int process_frame_header(int scan)
  972. {
  973. int Lf,p,i,z, h_max=1,v_max=1;
  974. Lf = get16(); if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG
  975. p = get8(); if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
  976. img_y = get16(); if (img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
  977. img_x = get16(); if (img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires
  978. img_n = get8();
  979. if (img_n != 3 && img_n != 1) return e("bad component count","Corrupt JPEG"); // JFIF requires
  980. if (Lf != 8+3*img_n) return e("bad SOF len","Corrupt JPEG");
  981. for (i=0; i < img_n; ++i) {
  982. img_comp[i].id = get8();
  983. if (img_comp[i].id != i+1) // JFIF requires
  984. if (img_comp[i].id != i) // jpegtran outputs non-JFIF-compliant files!
  985. return e("bad component ID","Corrupt JPEG");
  986. z = get8();
  987. img_comp[i].h = (z >> 4); if (!img_comp[i].h || img_comp[i].h > 4) return e("bad H","Corrupt JPEG");
  988. img_comp[i].v = z & 15; if (!img_comp[i].v || img_comp[i].v > 4) return e("bad V","Corrupt JPEG");
  989. img_comp[i].tq = get8(); if (img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG");
  990. }
  991. if (scan != SCAN_load) return 1;
  992. if ((1 << 30) / img_x / img_n < img_y) return e("too large", "Image too large to decode");
  993. for (i=0; i < img_n; ++i) {
  994. if (img_comp[i].h > h_max) h_max = img_comp[i].h;
  995. if (img_comp[i].v > v_max) v_max = img_comp[i].v;
  996. }
  997. // compute interleaved mcu info
  998. img_h_max = h_max;
  999. img_v_max = v_max;
  1000. img_mcu_w = h_max * 8;
  1001. img_mcu_h = v_max * 8;
  1002. img_mcu_x = (img_x + img_mcu_w-1) / img_mcu_w;
  1003. img_mcu_y = (img_y + img_mcu_h-1) / img_mcu_h;
  1004. for (i=0; i < img_n; ++i) {
  1005. // number of effective pixels (e.g. for non-interleaved MCU)
  1006. img_comp[i].x = (img_x * img_comp[i].h + h_max-1) / h_max;
  1007. img_comp[i].y = (img_y * img_comp[i].v + v_max-1) / v_max;
  1008. // to simplify generation, we'll allocate enough memory to decode
  1009. // the bogus oversized data from using interleaved MCUs and their
  1010. // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
  1011. // discard the extra data until colorspace conversion
  1012. img_comp[i].w2 = img_mcu_x * img_comp[i].h * 8;
  1013. img_comp[i].h2 = img_mcu_y * img_comp[i].v * 8;
  1014. img_comp[i].data = (uint8 *) malloc(img_comp[i].w2 * img_comp[i].h2);
  1015. if (img_comp[i].data == NULL) {
  1016. for(--i; i >= 0; --i)
  1017. free(img_comp[i].data);
  1018. return e("outofmem", "Out of memory");
  1019. }
  1020. }
  1021. return 1;
  1022. }
  1023. // use comparisons since in some cases we handle more than one case (e.g. SOF)
  1024. #define DNL(x) ((x) == 0xdc)
  1025. #define SOI(x) ((x) == 0xd8)
  1026. #define EOI(x) ((x) == 0xd9)
  1027. #define SOF(x) ((x) == 0xc0 || (x) == 0xc1)
  1028. #define SOS(x) ((x) == 0xda)
  1029. static int decode_jpeg_header(int scan)
  1030. {
  1031. int m;
  1032. marker = MARKER_none; // initialize cached marker to empty
  1033. m = get_marker();
  1034. if (!SOI(m)) return e("no SOI","Corrupt JPEG");
  1035. if (scan == SCAN_type) return 1;
  1036. m = get_marker();
  1037. while (!SOF(m)) {
  1038. if (!process_marker(m)) return 0;
  1039. m = get_marker();
  1040. while (m == MARKER_none) {
  1041. // some files have extra padding after their blocks, so ok, we'll scan
  1042. if (at_eof()) return e("no SOF", "Corrupt JPEG");
  1043. m = get_marker();
  1044. }
  1045. }
  1046. if (!process_frame_header(scan)) return 0;
  1047. return 1;
  1048. }
  1049. static int decode_jpeg_image(void)
  1050. {
  1051. int m;
  1052. restart_interval = 0;
  1053. if (!decode_jpeg_header(SCAN_load)) return 0;
  1054. m = get_marker();
  1055. while (!EOI(m)) {
  1056. if (SOS(m)) {
  1057. if (!process_scan_header()) return 0;
  1058. if (!parse_entropy_coded_data()) return 0;
  1059. } else {
  1060. if (!process_marker(m)) return 0;
  1061. }
  1062. m = get_marker();
  1063. }
  1064. return 1;
  1065. }
  1066. // static jfif-centered resampling with cross-block smoothing
  1067. // here by cross-block smoothing what I mean is that the resampling
  1068. // is bilerp and crosses blocks; I dunno what IJG means
  1069. #define div4(x) ((uint8) ((x) >> 2))
  1070. static void resample_v_2(uint8 *out1, uint8 *input, int w, int h, int s)
  1071. {
  1072. // need to generate two samples vertically for every one in input
  1073. uint8 *above;
  1074. uint8 *below;
  1075. uint8 *source;
  1076. uint8 *out2;
  1077. int i,j;
  1078. source = input;
  1079. out2 = out1+w;
  1080. for (j=0; j < h; ++j) {
  1081. above = source;
  1082. source = input + j*s;
  1083. below = source + s; if (j == h-1) below = source;
  1084. for (i=0; i < w; ++i) {
  1085. int n = source[i]*3;
  1086. out1[i] = div4(above[i] + n);
  1087. out2[i] = div4(below[i] + n);
  1088. }
  1089. out1 += w*2;
  1090. out2 += w*2;
  1091. }
  1092. }
  1093. static void resample_h_2(uint8 *out, uint8 *input, int w, int h, int s)
  1094. {
  1095. // need to generate two samples horizontally for every one in input
  1096. int i,j;
  1097. if (w == 1) {
  1098. for (j=0; j < h; ++j)
  1099. out[j*2+0] = out[j*2+1] = input[j*s];
  1100. return;
  1101. }
  1102. for (j=0; j < h; ++j) {
  1103. out[0] = input[0];
  1104. out[1] = div4(input[0]*3 + input[1]);
  1105. for (i=1; i < w-1; ++i) {
  1106. int n = input[i]*3;
  1107. out[i*2-2] = div4(input[i-1] + n);
  1108. out[i*2-1] = div4(input[i+1] + n);
  1109. }
  1110. out[w*2-2] = div4(input[w-2]*3 + input[w-1]);
  1111. out[w*2-1] = input[w-1];
  1112. out += w*2;
  1113. input += s;
  1114. }
  1115. }
  1116. // .172 seconds on 3*anemones.jpg
  1117. static void resample_hv_2(uint8 *out, uint8 *input, int w, int h, int s)
  1118. {
  1119. // need to generate 2x2 samples for every one in input
  1120. int i,j;
  1121. int os = w*2;
  1122. // generate edge samples... @TODO lerp them!
  1123. for (i=0; i < w; ++i) {
  1124. out[i*2+0] = out[i*2+1] = input[i];
  1125. out[i*2+(2*h-1)*os+0] = out[i*2+(2*h-1)*os+1] = input[i+(h-1)*w];
  1126. }
  1127. for (j=0; j < h; ++j) {
  1128. out[j*os*2+0] = out[j*os*2+os+0] = input[j*w];
  1129. out[j*os*2+os-1] = out[j*os*2+os+os-1] = input[j*w+i-1];
  1130. }
  1131. // now generate interior samples; i & j point to top left of input
  1132. for (j=0; j < h-1; ++j) {
  1133. uint8 *in1 = input+j*s;
  1134. uint8 *in2 = in1 + s;
  1135. uint8 *out1 = out + (j*2+1)*os + 1;
  1136. uint8 *out2 = out1 + os;
  1137. for (i=0; i < w-1; ++i) {
  1138. int p00 = in1[0], p01=in1[1], p10=in2[0], p11=in2[1];
  1139. int p00_3 = p00*3, p01_3 = p01*3, p10_3 = p10*3, p11_3 = p11*3;
  1140. #define div16(x) ((uint8) ((x) >> 4))
  1141. out1[0] = div16(p00*9 + p01_3 + p10_3 + p11);
  1142. out1[1] = div16(p01*9 + p00_3 + p01_3 + p10);
  1143. out2[0] = div16(p10*9 + p11_3 + p00_3 + p01);
  1144. out2[1] = div16(p11*9 + p10_3 + p01_3 + p00);
  1145. out1 += 2;
  1146. out2 += 2;
  1147. ++in1;
  1148. ++in2;
  1149. }
  1150. }
  1151. }
  1152. #define float2fixed(x) ((int) ((x) * 65536 + 0.5))
  1153. // 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro)
  1154. // VC6 without processor=Pro is generating multiple LEAs per multiply!
  1155. static void YCbCr_to_RGB_row(uint8 *out, uint8 *y, uint8 *pcb, uint8 *pcr, int count, int step)
  1156. {
  1157. int i;
  1158. for (i=0; i < count; ++i) {
  1159. int y_fixed = (y[i] << 16) + 32768; // rounding
  1160. int r,g,b;
  1161. int cr = pcr[i] - 128;
  1162. int cb = pcb[i] - 128;
  1163. r = y_fixed + cr*float2fixed(1.40200f);
  1164. g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
  1165. b = y_fixed + cb*float2fixed(1.77200f);
  1166. r >>= 16;
  1167. g >>= 16;
  1168. b >>= 16;
  1169. if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
  1170. if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
  1171. if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
  1172. out[0] = (uint8)r;
  1173. out[1] = (uint8)g;
  1174. out[2] = (uint8)b;
  1175. if (step == 4) out[3] = 255;
  1176. out += step;
  1177. }
  1178. }
  1179. // clean up the temporary component buffers
  1180. static void cleanup_jpeg(void)
  1181. {
  1182. int i;
  1183. for (i=0; i < img_n; ++i) {
  1184. if (img_comp[i].data) {
  1185. free(img_comp[i].data);
  1186. img_comp[i].data = NULL;
  1187. }
  1188. }
  1189. }
  1190. static uint8 *load_jpeg_image(int *out_x, int *out_y, int *comp, int req_comp)
  1191. {
  1192. int i, n;
  1193. // validate req_comp
  1194. if (req_comp < 0 || req_comp > 4) return ep("bad req_comp", "Internal error");
  1195. // load a jpeg image from whichever source
  1196. if (!decode_jpeg_image()) { cleanup_jpeg(); return NULL; }
  1197. // determine actual number of components to generate
  1198. n = req_comp ? req_comp : img_n;
  1199. // resample components to full size... memory wasteful, but this
  1200. // lets us bilerp across blocks while upsampling
  1201. for (i=0; i < img_n; ++i) {
  1202. // if we're outputting fewer than 3 components, we're grey not RGB;
  1203. // in that case, don't bother upsampling Cb or Cr
  1204. if (n < 3 && i) continue;
  1205. // check if the component scale is less than max; if so it needs upsampling
  1206. if (img_comp[i].h != img_h_max || img_comp[i].v != img_v_max) {
  1207. int stride = img_x;
  1208. // allocate final size; make sure it's big enough for upsampling off
  1209. // the edges with upsample up to 4x4 (although we only support 2x2
  1210. // currently)
  1211. uint8 *new_data = (uint8 *) malloc((img_x+3)*(img_y+3));
  1212. if (new_data == NULL) {
  1213. cleanup_jpeg();
  1214. return ep("outofmem", "Out of memory (image too large?)");
  1215. }
  1216. if (img_comp[i].h*2 == img_h_max && img_comp[i].v*2 == img_v_max) {
  1217. int tx = (img_x+1)>>1;
  1218. resample_hv_2(new_data, img_comp[i].data, tx,(img_y+1)>>1, img_comp[i].w2);
  1219. stride = tx*2;
  1220. } else if (img_comp[i].h == img_h_max && img_comp[i].v*2 == img_v_max) {
  1221. resample_v_2(new_data, img_comp[i].data, img_x,(img_y+1)>>1, img_comp[i].w2);
  1222. } else if (img_comp[i].h*2 == img_h_max && img_comp[i].v == img_v_max) {
  1223. int tx = (img_x+1)>>1;
  1224. resample_h_2(new_data, img_comp[i].data, tx,img_y, img_comp[i].w2);
  1225. stride = tx*2;
  1226. } else {
  1227. // @TODO resample uncommon sampling pattern with nearest neighbor
  1228. free(new_data);
  1229. cleanup_jpeg();
  1230. return ep("uncommon H or V", "JPEG not supported: atypical downsampling mode");
  1231. }
  1232. img_comp[i].w2 = stride;
  1233. free(img_comp[i].data);
  1234. img_comp[i].data = new_data;
  1235. }
  1236. }
  1237. // now convert components to output image
  1238. {
  1239. uint32 i,j;
  1240. uint8 *output = (uint8 *) malloc(n * img_x * img_y + 1);
  1241. if (n >= 3) { // output STBI_rgb_*
  1242. for (j=0; j < img_y; ++j) {
  1243. uint8 *y = img_comp[0].data + j*img_comp[0].w2;
  1244. uint8 *out = output + n * img_x * j;
  1245. if (img_n == 3) {
  1246. uint8 *cb = img_comp[1].data + j*img_comp[1].w2;
  1247. uint8 *cr = img_comp[2].data + j*img_comp[2].w2;
  1248. YCbCr_to_RGB_row(out, y, cb, cr, img_x, n);
  1249. } else {
  1250. for (i=0; i < img_x; ++i) {
  1251. out[0] = out[1] = out[2] = y[i];
  1252. out[3] = 255; // not used if n == 3
  1253. out += n;
  1254. }
  1255. }
  1256. }
  1257. } else { // output STBI_grey_*
  1258. for (j=0; j < img_y; ++j) {
  1259. uint8 *y = img_comp[0].data + j*img_comp[0].w2;
  1260. uint8 *out = output + n * img_x * j;
  1261. if (n == 1)
  1262. for (i=0; i < img_x; ++i) *out++ = *y++;
  1263. else
  1264. for (i=0; i < img_x; ++i) *out++ = *y++, *out++ = 255;
  1265. }
  1266. }
  1267. cleanup_jpeg();
  1268. *out_x = img_x;
  1269. *out_y = img_y;
  1270. if (comp) *comp = n; // Changed JLD: report output components
  1271. //if (comp) *comp = img_n; // report original components, not output
  1272. return output;
  1273. }
  1274. }
  1275. #ifndef STBI_NO_STDIO
  1276. unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
  1277. {
  1278. start_file(f);
  1279. return load_jpeg_image(x,y,comp,req_comp);
  1280. }
  1281. unsigned char *stbi_jpeg_load(char *filename, int *x, int *y, int *comp, int req_comp)
  1282. {
  1283. unsigned char *data;
  1284. FILE *f = fopen(filename, "rb");
  1285. if (!f) return NULL;
  1286. data = stbi_jpeg_load_from_file(f,x,y,comp,req_comp);
  1287. fclose(f);
  1288. return data;
  1289. }
  1290. #endif
  1291. unsigned char *stbi_jpeg_load_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp, int req_comp)
  1292. {
  1293. start_mem(buffer,len);
  1294. return load_jpeg_image(x,y,comp,req_comp);
  1295. }
  1296. #ifndef STBI_NO_STDIO
  1297. int stbi_jpeg_test_file(FILE *f)
  1298. {
  1299. int n,r;
  1300. n = ftell(f);
  1301. start_file(f);
  1302. r = decode_jpeg_header(SCAN_type);
  1303. fseek(f,n,SEEK_SET);
  1304. return r;
  1305. }
  1306. #endif
  1307. int stbi_jpeg_test_memory(unsigned char *buffer, int len)
  1308. {
  1309. start_mem(buffer,len);
  1310. return decode_jpeg_header(SCAN_type);
  1311. }
  1312. // @TODO:
  1313. #ifndef STBI_NO_STDIO
  1314. extern int stbi_jpeg_info (char *filename, int *x, int *y, int *comp);
  1315. extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp);
  1316. #endif
  1317. extern int stbi_jpeg_info_from_memory(stbi_uc *buffer, int len, int *x, int *y, int *comp);
  1318. // public domain zlib decode v0.2 Sean Barrett 2006-11-18
  1319. // simple implementation
  1320. // - all input must be provided in an upfront buffer
  1321. // - all output is written to a single output buffer (can malloc/realloc)
  1322. // performance
  1323. // - fast huffman
  1324. // fast-way is faster to check than jpeg huffman, but slow way is slower
  1325. #define ZFAST_BITS 9 // accelerate all cases in default tables
  1326. #define ZFAST_MASK ((1 << ZFAST_BITS) - 1)
  1327. // zlib-style huffman encoding
  1328. // (jpegs packs from left, zlib from right, so can't share code)
  1329. typedef struct
  1330. {
  1331. uint16 fast[1 << ZFAST_BITS];
  1332. uint16 firstcode[16];
  1333. int maxcode[17];
  1334. uint16 firstsymbol[16];
  1335. uint8 size[288];
  1336. uint16 value[288];
  1337. } zhuffman;
  1338. __forceinline static int bitreverse16(int n)
  1339. {
  1340. n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
  1341. n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
  1342. n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
  1343. n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
  1344. return n;
  1345. }
  1346. __forceinline static int bit_reverse(int v, int bits)
  1347. {
  1348. assert(bits <= 16);
  1349. // to bit reverse n bits, reverse 16 and shift
  1350. // e.g. 11 bits, bit reverse and shift away 5
  1351. return bitreverse16(v) >> (16-bits);
  1352. }
  1353. static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num)
  1354. {
  1355. int i,k=0;
  1356. int code, next_code[16], sizes[17];
  1357. // DEFLATE spec for generating codes
  1358. memset(sizes, 0, sizeof(sizes));
  1359. memset(z->fast, 255, sizeof(z->fast));
  1360. for (i=0; i < num; ++i)
  1361. ++sizes[sizelist[i]];
  1362. sizes[0] = 0;
  1363. for (i=1; i < 16; ++i)
  1364. assert(sizes[i] <= (1 << i));
  1365. code = 0;
  1366. for (i=1; i < 16; ++i) {
  1367. next_code[i] = code;
  1368. z->firstcode[i] = (uint16) code;
  1369. z->firstsymbol[i] = (uint16) k;
  1370. code = (code + sizes[i]);
  1371. if (sizes[i])
  1372. if (code-1 >= (1 << i)) return e("bad codelengths","Corrupt JPEG");
  1373. z->maxcode[i] = code << (16-i); // preshift for inner loop
  1374. code <<= 1;
  1375. k += sizes[i];
  1376. }
  1377. z->maxcode[16] = 0x10000; // sentinel
  1378. for (i=0; i < num; ++i) {
  1379. int s = sizelist[i];
  1380. if (s) {
  1381. int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
  1382. z->size[c] = (uint8)s;
  1383. z->value[c] = (uint16)i;
  1384. if (s <= ZFAST_BITS) {
  1385. int k = bit_reverse(next_code[s],s);
  1386. while (k < (1 << ZFAST_BITS)) {
  1387. z->fast[k] = (uint16) c;
  1388. k += (1 << s);
  1389. }
  1390. }
  1391. ++next_code[s];
  1392. }
  1393. }
  1394. return 1;
  1395. }
  1396. // zlib-from-memory implementation for PNG reading
  1397. // because PNG allows splitting the zlib stream arbitrarily,
  1398. // and it's annoying structurally to have PNG call ZLIB call PNG,
  1399. // we require PNG read all the IDATs and combine them into a single
  1400. // memory buffer
  1401. static uint8 *zbuffer, *zbuffer_end;