The Independent JPEG Group's JPEG software v4
diff --git a/jdhuff.c b/jdhuff.c
index 8071fa2..cdc9bd7 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -7,7 +7,7 @@
  *
  * This file contains Huffman entropy decoding routines.
  * These routines are invoked via the methods entropy_decode
- * and entropy_decoder_init/term.
+ * and entropy_decode_init/term.
  */
 
 #include "jinclude.h"
@@ -19,7 +19,7 @@
 
 static INT32 get_buffer;	/* current bit-extraction buffer */
 static int bits_left;		/* # of unused bits in it */
-
+static boolean printed_eod;	/* flag to suppress multiple end-of-data msgs */
 
 LOCAL void
 fix_huff_tbl (HUFF_TBL * htbl)
@@ -75,116 +75,137 @@
 }
 
 
-/* Extract the next N bits from the input stream (N <= 15) */
+/*
+ * Code for extracting the next N bits from the input stream.
+ * (N never exceeds 15 for JPEG data.)
+ * This needs to go as fast as possible!
+ *
+ * We read source bytes into get_buffer and dole out bits as needed.
+ * If get_buffer already contains enough bits, they are fetched in-line
+ * by the macros get_bits() and get_bit().  When there aren't enough bits,
+ * fill_bit_buffer is called; it will attempt to fill get_buffer to the
+ * "high water mark", then extract the desired number of bits.  The idea,
+ * of course, is to minimize the function-call overhead cost of entering
+ * fill_bit_buffer.
+ * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
+ * of get_buffer to be used.  (On machines with wider words, an even larger
+ * buffer could be used.)  However, on some machines 32-bit shifts are
+ * relatively slow and take time proportional to the number of places shifted.
+ * (This is true with most PC compilers, for instance.)  In this case it may
+ * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
+ * average shift distance at the cost of more calls to fill_bit_buffer.
+ */
+
+#ifdef SLOW_SHIFT_32
+#define MIN_GET_BITS  15	/* minimum allowable value */
+#else
+#define MIN_GET_BITS  25	/* max value for 32-bit get_buffer */
+#endif
+
+static const int bmask[16] =	/* bmask[n] is mask for n rightmost bits */
+  { 0, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF,
+    0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF };
+
 
 LOCAL int
-get_bits (int nbits)
+fill_bit_buffer (int nbits)
+/* Load up the bit buffer and do get_bits(nbits) */
 {
-  int result;
-  
-  while (nbits > bits_left) {
-    int c = JGETC(dcinfo);
+  /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
+  while (bits_left < MIN_GET_BITS) {
+    register int c = JGETC(dcinfo);
     
-    get_buffer <<= 8;
-    get_buffer |= c;
-    bits_left += 8;
     /* If it's 0xFF, check and discard stuffed zero byte */
-    if (c == 0xff) {
-      c = JGETC(dcinfo);  /* Byte stuffing */
-      if (c != 0)
-	ERREXIT1(dcinfo->emethods,
-		 "Unexpected marker 0x%02x in compressed data", c);
+    if (c == 0xFF) {
+      int c2 = JGETC(dcinfo);
+      if (c2 != 0) {
+	/* Oops, it's actually a marker indicating end of compressed data. */
+	/* Better put it back for use later */
+	JUNGETC(c2,dcinfo);
+	JUNGETC(c,dcinfo);
+	/* There should be enough bits still left in the data segment; */
+	/* if so, just break out of the while loop. */
+	if (bits_left >= nbits)
+	  break;
+	/* Uh-oh.  Report corrupted data to user and stuff zeroes into
+	 * the data stream, so we can produce some kind of image.
+	 * Note that this will be repeated for each byte demanded for the
+	 * rest of the segment; this is a bit slow but not unreasonably so.
+	 * The main thing is to avoid getting a zillion warnings, hence:
+	 */
+	if (! printed_eod) {
+	  WARNMS(dcinfo->emethods, "Corrupt JPEG data: premature end of data segment");
+	  printed_eod = TRUE;
+	}
+	c = 0;			/* insert a zero byte into bit buffer */
+      }
     }
+
+    /* OK, load c into get_buffer */
+    get_buffer = (get_buffer << 8) | c;
+    bits_left += 8;
   }
-  
+
+  /* Having filled get_buffer, extract desired bits (this simplifies macros) */
   bits_left -= nbits;
-  result = ((int) (get_buffer >> bits_left)) & ((1 << nbits) - 1);
-  return result;
+  return ((int) (get_buffer >> bits_left)) & bmask[nbits];
 }
 
-/* Macro to make things go at some speed! */
 
-#define get_bit()	(bits_left ? \
-			 ((int) (get_buffer >> (--bits_left))) & 1 : \
-			 get_bits(1))
+/* Macros to make things go at some speed! */
+/* NB: parameter to get_bits should be simple variable, not expression */
+
+#define get_bits(nbits) \
+	(bits_left >= (nbits) ? \
+	 ((int) (get_buffer >> (bits_left -= (nbits)))) & bmask[nbits] : \
+	 fill_bit_buffer(nbits))
+
+#define get_bit() \
+	(bits_left ? \
+	 ((int) (get_buffer >> (--bits_left))) & 1 : \
+	 fill_bit_buffer(1))
 
 
 /* Figure F.16: extract next coded symbol from input stream */
   
+INLINE
 LOCAL int
 huff_DECODE (HUFF_TBL * htbl)
 {
-  int l, p;
-  INT32 code;
+  register int l;
+  register INT32 code;
   
   code = get_bit();
   l = 1;
   while (code > htbl->maxcode[l]) {
-    code = (code << 1) + get_bit();
+    code = (code << 1) | get_bit();
     l++;
   }
 
   /* With garbage input we may reach the sentinel value l = 17. */
 
   if (l > 16) {
-    ERREXIT(dcinfo->emethods, "Corrupted data in JPEG file");
+    WARNMS(dcinfo->emethods, "Corrupt JPEG data: bad Huffman code");
+    return 0;			/* fake a zero as the safest result */
   }
 
-  p = (int) (htbl->valptr[l] + (code - htbl->mincode[l]));
-  
-  return (int) htbl->huffval[p];
+  return htbl->huffval[ htbl->valptr[l] + ((int) (code - htbl->mincode[l])) ];
 }
 
 
 /* Figure F.12: extend sign bit */
 
-/* NB: on some compilers this will only work for s > 0 */
+#define huff_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
 
-#define huff_EXTEND(x, s)	((x) < (1 << ((s)-1)) ? \
-				 (x) + (-1 << (s)) + 1 : \
-				 (x))
+static const int extend_test[16] =   /* entry n is 2**(n-1) */
+  { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
+    0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
 
-
-/* Decode a single block's worth of coefficients */
-/* Note that only the difference is returned for the DC coefficient */
-
-LOCAL void
-decode_one_block (JBLOCK block, HUFF_TBL *dctbl, HUFF_TBL *actbl)
-{
-  int s, k, r, n;
-
-  /* zero out the coefficient block */
-
-  MEMZERO((void *) block, SIZEOF(JBLOCK));
-  
-  /* Section F.2.2.1: decode the DC coefficient difference */
-
-  s = huff_DECODE(dctbl);
-  if (s) {
-    r = get_bits(s);
-    s = huff_EXTEND(r, s);
-  }
-  block[0] = s;
-
-  /* Section F.2.2.2: decode the AC coefficients */
-  
-  for (k = 1; k < DCTSIZE2; k++) {
-    r = huff_DECODE(actbl);
-    
-    s = r & 15;
-    n = r >> 4;
-    
-    if (s) {
-      k += n;
-      r = get_bits(s);
-      block[k] = huff_EXTEND(r, s);
-    } else {
-      if (n != 15)
-	break;
-      k += 15;
-    }
-  }
-}
+static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
+  { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
+    ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
+    ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
+    ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
 
 
 /*
@@ -201,6 +222,7 @@
   /* Initialize static variables */
   dcinfo = cinfo;
   bits_left = 0;
+  printed_eod = FALSE;
 
   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
     compptr = cinfo->cur_comp_info[ci];
@@ -232,30 +254,33 @@
   int c, nbytes;
   short ci;
 
-  /* Throw away any partial unread byte */
+  /* Throw away any unused bits remaining in bit buffer */
+  nbytes = bits_left / 8;	/* count any full bytes loaded into buffer */
   bits_left = 0;
+  printed_eod = FALSE;		/* next segment can get another warning */
 
   /* Scan for next JPEG marker */
-  nbytes = 0;
   do {
     do {			/* skip any non-FF bytes */
       nbytes++;
       c = JGETC(cinfo);
     } while (c != 0xFF);
     do {			/* skip any duplicate FFs */
-      nbytes++;
+      /* we don't increment nbytes here since extra FFs are legal */
       c = JGETC(cinfo);
     } while (c == 0xFF);
   } while (c == 0);		/* repeat if it was a stuffed FF/00 */
 
-  if (c != (RST0 + cinfo->next_restart_num))
-    ERREXIT2(cinfo->emethods, "Found 0x%02x marker instead of RST%d",
-	     c, cinfo->next_restart_num);
+  if (nbytes != 1)
+    WARNMS2(cinfo->emethods,
+	    "Corrupt JPEG data: %d extraneous bytes before marker 0x%02x",
+	    nbytes-1, c);
 
-  if (nbytes != 2)
-    TRACEMS2(cinfo->emethods, 1, "Skipped %d bytes before RST%d",
-	     nbytes-2, cinfo->next_restart_num);
-  else
+  if (c != (RST0 + cinfo->next_restart_num)) {
+    /* Uh-oh, the restart markers have been messed up too. */
+    /* Let the file-format module try to figure out how to resync. */
+    (*cinfo->methods->resync_to_restart) (cinfo, c);
+  } else
     TRACEMS1(cinfo->emethods, 2, "RST%d", cinfo->next_restart_num);
 
   /* Re-initialize DC predictions to 0 */
@@ -264,19 +289,49 @@
 
   /* Update restart state */
   cinfo->restarts_to_go = cinfo->restart_interval;
-  cinfo->next_restart_num++;
-  cinfo->next_restart_num &= 7;
+  cinfo->next_restart_num = (cinfo->next_restart_num + 1) & 7;
 }
 
 
+/* ZAG[i] is the natural-order position of the i'th element of zigzag order.
+ * If the incoming data is corrupted, huff_decode_mcu could attempt to
+ * reference values beyond the end of the array.  To avoid a wild store,
+ * we put some extra zeroes after the real entries.
+ */
+
+static const short ZAG[DCTSIZE2+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4,  5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13,  6,  7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+  0,  0,  0,  0,  0,  0,  0,  0, /* extra entries in case k>63 below */
+  0,  0,  0,  0,  0,  0,  0,  0
+};
+
+
 /*
  * Decode and return one MCU's worth of Huffman-compressed coefficients.
+ * This routine also handles quantization descaling and zigzag reordering
+ * of coefficient values.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER.
+ * (Wholesale zeroing is usually a little faster than retail...)
  */
 
 METHODDEF void
-huff_decode (decompress_info_ptr cinfo, JBLOCK *MCU_data)
+huff_decode_mcu (decompress_info_ptr cinfo, JBLOCKROW *MCU_data)
 {
+  register int s, k, r;
   short blkn, ci;
+  register JBLOCKROW block;
+  register QUANT_TBL_PTR quanttbl;
+  HUFF_TBL *dctbl;
+  HUFF_TBL *actbl;
   jpeg_component_info * compptr;
 
   /* Account for restart interval, process restart marker if needed */
@@ -286,15 +341,51 @@
     cinfo->restarts_to_go--;
   }
 
+  /* Outer loop handles each block in the MCU */
+
   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
     ci = cinfo->MCU_membership[blkn];
     compptr = cinfo->cur_comp_info[ci];
-    decode_one_block(MCU_data[blkn],
-		     cinfo->dc_huff_tbl_ptrs[compptr->dc_tbl_no],
-		     cinfo->ac_huff_tbl_ptrs[compptr->ac_tbl_no]);
+    quanttbl = cinfo->quant_tbl_ptrs[compptr->quant_tbl_no];
+    actbl = cinfo->ac_huff_tbl_ptrs[compptr->ac_tbl_no];
+    dctbl = cinfo->dc_huff_tbl_ptrs[compptr->dc_tbl_no];
+
+    /* Decode a single block's worth of coefficients */
+
+    /* Section F.2.2.1: decode the DC coefficient difference */
+    s = huff_DECODE(dctbl);
+    if (s) {
+      r = get_bits(s);
+      s = huff_EXTEND(r, s);
+    }
+
     /* Convert DC difference to actual value, update last_dc_val */
-    MCU_data[blkn][0] += cinfo->last_dc_val[ci];
-    cinfo->last_dc_val[ci] = MCU_data[blkn][0];
+    s += cinfo->last_dc_val[ci];
+    cinfo->last_dc_val[ci] = (JCOEF) s;
+    /* Descale and output the DC coefficient (assumes ZAG[0] = 0) */
+    (*block)[0] = (JCOEF) (((JCOEF) s) * quanttbl[0]);
+    
+    /* Section F.2.2.2: decode the AC coefficients */
+    /* Since zero values are skipped, output area must be zeroed beforehand */
+    for (k = 1; k < DCTSIZE2; k++) {
+      r = huff_DECODE(actbl);
+      
+      s = r & 15;
+      r = r >> 4;
+      
+      if (s) {
+	k += r;
+	r = get_bits(s);
+	s = huff_EXTEND(r, s);
+	/* Descale coefficient and output in natural (dezigzagged) order */
+	(*block)[ZAG[k]] = (JCOEF) (((JCOEF) s) * quanttbl[k]);
+      } else {
+	if (r != 15)
+	  break;
+	k += 15;
+      }
+    }
   }
 }
 
@@ -318,8 +409,8 @@
 jseldhuffman (decompress_info_ptr cinfo)
 {
   if (! cinfo->arith_code) {
-    cinfo->methods->entropy_decoder_init = huff_decoder_init;
-    cinfo->methods->entropy_decode = huff_decode;
-    cinfo->methods->entropy_decoder_term = huff_decoder_term;
+    cinfo->methods->entropy_decode_init = huff_decoder_init;
+    cinfo->methods->entropy_decode = huff_decode_mcu;
+    cinfo->methods->entropy_decode_term = huff_decoder_term;
   }
 }