Updates libjpeg-turbo to 1.1.90
This change updates our copy of libjpeg-turbo to 1.1.90 (r677), which supports ARM NEON.

BUG=none
TEST=none
Review URL: http://codereview.chromium.org/7554002

git-svn-id: http://src.chromium.org/svn/trunk/deps/third_party/libjpeg_turbo@95196 4ff67af0-8c30-449e-8e8b-ad334ec8d88c
diff --git a/jdhuff.c b/jdhuff.c
index 3325e0a..c2b38a9 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -2,6 +2,7 @@
  * jdhuff.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Copyright (C) 2009-2011, D. R. Commander.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -14,25 +15,11 @@
  * storage only upon successful completion of an MCU.
  */
 
-/* Modifications:
- * Copyright (C)2007 Sun Microsystems, Inc.
- * Copyright (C)2009-2010 D. R. Commander
- *
- * This library is free software and may be redistributed and/or modified under
- * the terms of the wxWindows Library License, Version 3.1 or (at your option)
- * any later version.  The full license is in the LICENSE.txt file included
- * with this distribution.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * wxWindows Library License for more details.
- */
-
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdhuff.h"             /* Declarations shared with jdphuff.c */
+#include "jdhuff.h"		/* Declarations shared with jdphuff.c */
+#include "jpegcomp.h"
 
 
 /*
@@ -56,10 +43,10 @@
 #else
 #if MAX_COMPS_IN_SCAN == 4
 #define ASSIGN_STATE(dest,src)  \
-        ((dest).last_dc_val[0] = (src).last_dc_val[0], \
-         (dest).last_dc_val[1] = (src).last_dc_val[1], \
-         (dest).last_dc_val[2] = (src).last_dc_val[2], \
-         (dest).last_dc_val[3] = (src).last_dc_val[3])
+	((dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
 #endif
 #endif
 
@@ -70,11 +57,11 @@
   /* These fields are loaded into local variables at start of each MCU.
    * In case of suspension, we exit WITHOUT updating them.
    */
-  bitread_perm_state bitstate;  /* Bit buffer at start of MCU */
-  savable_state saved;          /* Other state at start of MCU */
+  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
+  savable_state saved;		/* Other state at start of MCU */
 
   /* These fields are NOT loaded into local working state. */
-  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
 
   /* Pointers to derived tables (these workspaces have image lifespan) */
   d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
@@ -119,9 +106,9 @@
     /* Compute derived values for Huffman tables */
     /* We may do this more than once for a table, but it's not expensive */
     jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl,
-                            & entropy->dc_derived_tbls[dctbl]);
+			    & entropy->dc_derived_tbls[dctbl]);
     jpeg_make_d_derived_tbl(cinfo, FALSE, actbl,
-                            & entropy->ac_derived_tbls[actbl]);
+			    & entropy->ac_derived_tbls[actbl]);
     /* Initialize DC predictions to 0 */
     entropy->saved.last_dc_val[ci] = 0;
   }
@@ -137,7 +124,7 @@
     if (compptr->component_needed) {
       entropy->dc_needed[blkn] = TRUE;
       /* we don't need the ACs if producing a 1/8th-size image */
-      entropy->ac_needed[blkn] = (compptr->DCT_scaled_size > 1);
+      entropy->ac_needed[blkn] = (compptr->_DCT_scaled_size > 1);
     } else {
       entropy->dc_needed[blkn] = entropy->ac_needed[blkn] = FALSE;
     }
@@ -162,7 +149,7 @@
 
 GLOBAL(void)
 jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
-                         d_derived_tbl ** pdtbl)
+			 d_derived_tbl ** pdtbl)
 {
   JHUFF_TBL *htbl;
   d_derived_tbl *dtbl;
@@ -188,26 +175,26 @@
   if (*pdtbl == NULL)
     *pdtbl = (d_derived_tbl *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-                                  SIZEOF(d_derived_tbl));
+				  SIZEOF(d_derived_tbl));
   dtbl = *pdtbl;
-  dtbl->pub = htbl;             /* fill in back link */
-
+  dtbl->pub = htbl;		/* fill in back link */
+  
   /* Figure C.1: make table of Huffman code length for each symbol */
 
   p = 0;
   for (l = 1; l <= 16; l++) {
     i = (int) htbl->bits[l];
-    if (i < 0 || p + i > 256)   /* protect against table overrun */
+    if (i < 0 || p + i > 256)	/* protect against table overrun */
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     while (i--)
       huffsize[p++] = (char) l;
   }
   huffsize[p] = 0;
   numsymbols = p;
-
+  
   /* Figure C.2: generate the codes themselves */
   /* We also validate that the counts represent a legal Huffman code tree. */
-
+  
   code = 0;
   si = huffsize[0];
   p = 0;
@@ -237,7 +224,7 @@
       p += htbl->bits[l];
       dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
     } else {
-      dtbl->maxcode[l] = -1;    /* -1 if no codes of this length */
+      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
     }
   }
   dtbl->valoffset[17] = 0;
@@ -260,8 +247,8 @@
       /* Generate left-justified code followed by all possible bit sequences */
       lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
       for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
-        dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
-        lookbits++;
+	dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
+	lookbits++;
       }
     }
   }
@@ -276,7 +263,7 @@
     for (i = 0; i < numsymbols; i++) {
       int sym = htbl->huffval[i];
       if (sym < 0 || sym > 15)
-        ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     }
   }
 }
@@ -298,7 +285,7 @@
  */
 
 #ifdef SLOW_SHIFT_32
-#define MIN_GET_BITS  15        /* minimum allowable value */
+#define MIN_GET_BITS  15	/* minimum allowable value */
 #else
 #define MIN_GET_BITS  (BIT_BUF_SIZE-7)
 #endif
@@ -306,8 +293,8 @@
 
 GLOBAL(boolean)
 jpeg_fill_bit_buffer (bitread_working_state * state,
-                      register bit_buf_type get_buffer, register int bits_left,
-                      int nbits)
+		      register bit_buf_type get_buffer, register int bits_left,
+		      int nbits)
 /* Load up the bit buffer to a depth of at least nbits */
 {
   /* Copy heavily used state fields into locals (hopefully registers) */
@@ -319,54 +306,54 @@
   /* (It is assumed that no request will be for more than that many bits.) */
   /* We fail to do so only if we hit a marker or are forced to suspend. */
 
-  if (cinfo->unread_marker == 0) {      /* cannot advance past a marker */
+  if (cinfo->unread_marker == 0) {	/* cannot advance past a marker */
     while (bits_left < MIN_GET_BITS) {
       register int c;
 
       /* Attempt to read a byte */
       if (bytes_in_buffer == 0) {
-        if (! (*cinfo->src->fill_input_buffer) (cinfo))
-          return FALSE;
-        next_input_byte = cinfo->src->next_input_byte;
-        bytes_in_buffer = cinfo->src->bytes_in_buffer;
+	if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	  return FALSE;
+	next_input_byte = cinfo->src->next_input_byte;
+	bytes_in_buffer = cinfo->src->bytes_in_buffer;
       }
       bytes_in_buffer--;
       c = GETJOCTET(*next_input_byte++);
 
       /* If it's 0xFF, check and discard stuffed zero byte */
       if (c == 0xFF) {
-        /* Loop here to discard any padding FF's on terminating marker,
-         * so that we can save a valid unread_marker value.  NOTE: we will
-         * accept multiple FF's followed by a 0 as meaning a single FF data
-         * byte.  This data pattern is not valid according to the standard.
-         */
-        do {
-          if (bytes_in_buffer == 0) {
-            if (! (*cinfo->src->fill_input_buffer) (cinfo))
-              return FALSE;
-            next_input_byte = cinfo->src->next_input_byte;
-            bytes_in_buffer = cinfo->src->bytes_in_buffer;
-          }
-          bytes_in_buffer--;
-          c = GETJOCTET(*next_input_byte++);
-        } while (c == 0xFF);
+	/* Loop here to discard any padding FF's on terminating marker,
+	 * so that we can save a valid unread_marker value.  NOTE: we will
+	 * accept multiple FF's followed by a 0 as meaning a single FF data
+	 * byte.  This data pattern is not valid according to the standard.
+	 */
+	do {
+	  if (bytes_in_buffer == 0) {
+	    if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	      return FALSE;
+	    next_input_byte = cinfo->src->next_input_byte;
+	    bytes_in_buffer = cinfo->src->bytes_in_buffer;
+	  }
+	  bytes_in_buffer--;
+	  c = GETJOCTET(*next_input_byte++);
+	} while (c == 0xFF);
 
-        if (c == 0) {
-          /* Found FF/00, which represents an FF data byte */
-          c = 0xFF;
-        } else {
-          /* Oops, it's actually a marker indicating end of compressed data.
-           * Save the marker code for later use.
-           * Fine point: it might appear that we should save the marker into
-           * bitread working state, not straight into permanent state.  But
-           * once we have hit a marker, we cannot need to suspend within the
-           * current MCU, because we will read no more bytes from the data
-           * source.  So it is OK to update permanent state right away.
-           */
-          cinfo->unread_marker = c;
-          /* See if we need to insert some fake zero bits. */
-          goto no_more_bytes;
-        }
+	if (c == 0) {
+	  /* Found FF/00, which represents an FF data byte */
+	  c = 0xFF;
+	} else {
+	  /* Oops, it's actually a marker indicating end of compressed data.
+	   * Save the marker code for later use.
+	   * Fine point: it might appear that we should save the marker into
+	   * bitread working state, not straight into permanent state.  But
+	   * once we have hit a marker, we cannot need to suspend within the
+	   * current MCU, because we will read no more bytes from the data
+	   * source.  So it is OK to update permanent state right away.
+	   */
+	  cinfo->unread_marker = c;
+	  /* See if we need to insert some fake zero bits. */
+	  goto no_more_bytes;
+	}
       }
 
       /* OK, load c into get_buffer */
@@ -386,8 +373,8 @@
        * appears per data segment.
        */
       if (! cinfo->entropy->insufficient_data) {
-        WARNMS(cinfo, JWRN_HIT_MARKER);
-        cinfo->entropy->insufficient_data = TRUE;
+	WARNMS(cinfo, JWRN_HIT_MARKER);
+	cinfo->entropy->insufficient_data = TRUE;
       }
       /* Fill the buffer with zero bits */
       get_buffer <<= MIN_GET_BITS - bits_left;
@@ -405,6 +392,50 @@
 }
 
 
+/* Macro version of the above, which performs much better but does not
+   handle markers.  We have to hand off any blocks with markers to the
+   slower routines. */
+
+#define GET_BYTE \
+{ \
+  register int c0, c1; \
+  c0 = GETJOCTET(*buffer++); \
+  c1 = GETJOCTET(*buffer); \
+  /* Pre-execute most common case */ \
+  get_buffer = (get_buffer << 8) | c0; \
+  bits_left += 8; \
+  if (c0 == 0xFF) { \
+    /* Pre-execute case of FF/00, which represents an FF data byte */ \
+    buffer++; \
+    if (c1 != 0) { \
+      /* Oops, it's actually a marker indicating end of compressed data. */ \
+      cinfo->unread_marker = c1; \
+      /* Back out pre-execution and fill the buffer with zero bits */ \
+      buffer -= 2; \
+      get_buffer &= ~0xFF; \
+    } \
+  } \
+}
+
+#if __WORDSIZE == 64 || defined(_WIN64)
+
+/* Pre-fetch 48 bytes, because the holding register is 64-bit */
+#define FILL_BIT_BUFFER_FAST \
+  if (bits_left < 16) { \
+    GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE \
+  }
+
+#else
+
+/* Pre-fetch 16 bytes, because the holding register is 32-bit */
+#define FILL_BIT_BUFFER_FAST \
+  if (bits_left < 16) { \
+    GET_BYTE GET_BYTE \
+  }
+
+#endif
+
+
 /*
  * Out-of-line code for Huffman code decoding.
  * See jdhuff.h for info about usage.
@@ -412,8 +443,8 @@
 
 GLOBAL(int)
 jpeg_huff_decode (bitread_working_state * state,
-                  register bit_buf_type get_buffer, register int bits_left,
-                  d_derived_tbl * htbl, int min_bits)
+		  register bit_buf_type get_buffer, register int bits_left,
+		  d_derived_tbl * htbl, int min_bits)
 {
   register int l = min_bits;
   register INT32 code;
@@ -442,7 +473,7 @@
 
   if (l > 16) {
     WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
-    return 0;                   /* fake a zero as the safest result */
+    return 0;			/* fake a zero as the safest result */
   }
 
   return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ];
@@ -562,7 +593,7 @@
 
         r = s >> 4;
         s &= 15;
-
+      
         if (s) {
           k += r;
           CHECK_BIT_BUFFER(br_state, s, return FALSE);
@@ -610,60 +641,6 @@
 }
 
 
-/***************************************************************/
-
-#define ADD_BYTE  {                                     \
-  int val0 = *(buffer++);                               \
-  int val1 = *(buffer);                                 \
-                                                        \
-  bits_left += 8;                                       \
-  get_buffer = (get_buffer << 8) | (val0);              \
-  if (val0 == 0xFF) {                                   \
-    buffer++;                                           \
-    if (val1 != 0) {                                    \
-      buffer   -= 2;                                    \
-      get_buffer      &= ~0xFF;                         \
-    }                                                   \
-  }                                                     \
-}
-
-/***************************************************************/
-
-#if __WORDSIZE == 64 || defined(_WIN64)
-
-#define ENSURE_SHORT \
-  if (bits_left < 16) { \
-    ADD_BYTE ADD_BYTE ADD_BYTE ADD_BYTE ADD_BYTE ADD_BYTE \
-  }
-
-#else
-
-#define ENSURE_SHORT  if (bits_left < 16) { ADD_BYTE ADD_BYTE }
-
-#endif
-
-/***************************************************************/
-
-#define HUFF_DECODE_FAST(symbol, size, htbl) { \
-  ENSURE_SHORT \
-  symbol = PEEK_BITS(HUFF_LOOKAHEAD); \
-  symbol = htbl->lookup[symbol]; \
-  size = symbol >> 8; \
-  bits_left -= size; \
-  symbol = symbol & ((1 << HUFF_LOOKAHEAD) - 1); \
-  if (size == HUFF_LOOKAHEAD + 1) { \
-    symbol = (get_buffer >> bits_left) & ((1 << (size)) - 1); \
-    while (symbol > htbl->maxcode[size]) { \
-      symbol <<= 1; \
-      symbol |= GET_BITS(1); \
-      size++; \
-    } \
-    symbol = htbl->pub->huffval[ (int) (symbol + htbl->valoffset[size]) & 0xFF ]; \
-  } \
-}
-
-/***************************************************************/
-
 LOCAL(boolean)
 decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 {
@@ -687,7 +664,7 @@
 
     HUFF_DECODE_FAST(s, l, dctbl);
     if (s) {
-      ENSURE_SHORT
+      FILL_BIT_BUFFER_FAST
       r = GET_BITS(s);
       s = HUFF_EXTEND(r, s);
     }
@@ -705,10 +682,10 @@
         HUFF_DECODE_FAST(s, l, actbl);
         r = s >> 4;
         s &= 15;
-
+      
         if (s) {
           k += r;
-          ENSURE_SHORT
+          FILL_BIT_BUFFER_FAST
           r = GET_BITS(s);
           s = HUFF_EXTEND(r, s);
           (*block)[jpeg_natural_order[k]] = (JCOEF) s;
@@ -727,7 +704,7 @@
 
         if (s) {
           k += r;
-          ENSURE_SHORT
+          FILL_BIT_BUFFER_FAST
           DROP_BITS(s);
         } else {
           if (r != 15) break;
@@ -737,6 +714,11 @@
     }
   }
 
+  if (cinfo->unread_marker != 0) {
+    cinfo->unread_marker = 0;
+    return FALSE;
+  }
+
   br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
   br_state.next_input_byte = buffer;
   BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
@@ -760,7 +742,7 @@
  * this module, since we'll just re-assign them on the next call.)
  */
 
-#define BUFSIZE (DCTSIZE2 * 2u)
+#define BUFSIZE (DCTSIZE2 * 2)
 
 METHODDEF(boolean)
 decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
@@ -772,11 +754,12 @@
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! process_restart(cinfo))
-        return FALSE;
+	return FALSE;
     usefast = 0;
   }
 
-  if (cinfo->src->bytes_in_buffer < BUFSIZE * cinfo->blocks_in_MCU)
+  if (cinfo->src->bytes_in_buffer < BUFSIZE * cinfo->blocks_in_MCU
+    || cinfo->unread_marker != 0)
     usefast = 0;
 
   /* If we've run out of data, just leave the MCU set to zeroes.
@@ -785,9 +768,10 @@
   if (! entropy->pub.insufficient_data) {
 
     if (usefast) {
-      if (!decode_mcu_fast(cinfo, MCU_data)) return FALSE;
+      if (!decode_mcu_fast(cinfo, MCU_data)) goto use_slow;
     }
     else {
+      use_slow:
       if (!decode_mcu_slow(cinfo, MCU_data)) return FALSE;
     }
 
@@ -812,7 +796,7 @@
 
   entropy = (huff_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-                                SIZEOF(huff_entropy_decoder));
+				SIZEOF(huff_entropy_decoder));
   cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
   entropy->pub.start_pass = start_pass_huff_decoder;
   entropy->pub.decode_mcu = decode_mcu;