Replace INT32 with a new internal datatype (JLONG)

These days, INT32 is a commonly-defined datatype in system headers.  We
cannot eliminate the definition of that datatype from jmorecfg.h, since
the INT32 typedef has technically been part of the libjpeg API since
version 5 (1994.)  However, using INT32 internally is risky, because the
inclusion of a particular header (Xmd.h, for instance) could change the
definition of INT32 from long to int on 64-bit platforms and thus change
the internal behavior of libjpeg-turbo in unexpected ways (for instance,
failing to correctly set __INT32_IS_ACTUALLY_LONG to match the INT32
typedef-- perhaps as a result of including the wrong version of
jpeglib.h-- could cause libjpeg-turbo to produce incorrect results.)

The library has always been built in environments in which INT32 is
effectively long (on Windows, long is always 32-bit, so effectively it's
the same as int), so it makes sense to turn INT32 into an explicitly
long datatype.  This ensures that libjpeg-turbo will always behave
consistently, regardless of the headers included at compile time.

Addresses a concern expressed in #26.
diff --git a/djpeg.c b/djpeg.c
index 8b516a6..93567c6 100644
--- a/djpeg.c
+++ b/djpeg.c
@@ -431,7 +431,7 @@
 print_text_marker (j_decompress_ptr cinfo)
 {
   boolean traceit = (cinfo->err->trace_level >= 1);
-  INT32 length;
+  long length;
   unsigned int ch;
   unsigned int lastch = 0;
 
diff --git a/jaricom.c b/jaricom.c
index 1c807d1..3bb557f 100644
--- a/jaricom.c
+++ b/jaricom.c
@@ -1,8 +1,10 @@
 /*
  * jaricom.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Developed 1997-2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -19,7 +21,7 @@
 #include "jpeglib.h"
 
 /* The following #define specifies the packing of the four components
- * into the compact INT32 representation.
+ * into the compact JLONG representation.
  * Note that this formula must match the actual arithmetic encoder
  * and decoder implementation.  The implementation has to be changed
  * if this formula is changed.
@@ -27,9 +29,9 @@
  * implementation (jbig_tab.c).
  */
 
-#define V(i,a,b,c,d) (((INT32)a << 16) | ((INT32)c << 8) | ((INT32)d << 7) | b)
+#define V(i,a,b,c,d) (((JLONG)a << 16) | ((JLONG)c << 8) | ((JLONG)d << 7) | b)
 
-const INT32 jpeg_aritab[113+1] = {
+const JLONG jpeg_aritab[113+1] = {
 /*
  * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS
  */
diff --git a/jcarith.c b/jcarith.c
index a66da3e..b960a34 100644
--- a/jcarith.c
+++ b/jcarith.c
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Developed 1997-2009 by Guido Vollbeding.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -26,10 +26,10 @@
 typedef struct {
   struct jpeg_entropy_encoder pub; /* public fields */
 
-  INT32 c; /* C register, base of coding interval, layout as in sec. D.1.3 */
-  INT32 a;               /* A register, normalized size of coding interval */
-  INT32 sc;        /* counter for stacked 0xFF values which might overflow */
-  INT32 zc;          /* counter for pending 0x00 output values which might *
+  JLONG c; /* C register, base of coding interval, layout as in sec. D.1.3 */
+  JLONG a;               /* A register, normalized size of coding interval */
+  JLONG sc;        /* counter for stacked 0xFF values which might overflow */
+  JLONG zc;          /* counter for pending 0x00 output values which might *
                           * be discarded at the end ("Pacman" termination) */
   int ct;  /* bit shift counter, determines when next byte will be written */
   int buffer;                /* buffer for most recent output byte != 0xFF */
@@ -98,8 +98,8 @@
 #define CALCULATE_SPECTRAL_CONDITIONING
  */
 
-/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
- * We assume that int right shift is unsigned if INT32 right shift is,
+/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.
+ * We assume that int right shift is unsigned if JLONG right shift is,
  * which should be safe.
  */
 
@@ -136,7 +136,7 @@
 finish_pass (j_compress_ptr cinfo)
 {
   arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
-  INT32 temp;
+  JLONG temp;
 
   /* Section D.1.8: Termination of encoding */
 
@@ -223,7 +223,7 @@
 {
   register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
   register unsigned char nl, nm;
-  register INT32 qe, temp;
+  register JLONG qe, temp;
   register int sv;
 
   /* Fetch values from our compact representation of Table D.2:
diff --git a/jccolext.c b/jccolext.c
index 77a218d..479b320 100644
--- a/jccolext.c
+++ b/jccolext.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2012, D. R. Commander.
+ * Copyright (C) 2009-2012, 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -35,7 +35,7 @@
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JLONG * ctab = cconvert->rgb_ycc_tab;
   register JSAMPROW inptr;
   register JSAMPROW outptr0, outptr1, outptr2;
   register JDIMENSION col;
@@ -92,7 +92,7 @@
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JLONG * ctab = cconvert->rgb_ycc_tab;
   register JSAMPROW inptr;
   register JSAMPROW outptr;
   register JDIMENSION col;
diff --git a/jccolor.c b/jccolor.c
index 27af306..12d68bc 100644
--- a/jccolor.c
+++ b/jccolor.c
@@ -26,7 +26,7 @@
   struct jpeg_color_converter pub; /* public fields */
 
   /* Private state for RGB->YCC conversion */
-  INT32 * rgb_ycc_tab;          /* => table for RGB to YCbCr conversion */
+  JLONG * rgb_ycc_tab;          /* => table for RGB to YCbCr conversion */
 } my_color_converter;
 
 typedef my_color_converter * my_cconvert_ptr;
@@ -63,9 +63,9 @@
  */
 
 #define SCALEBITS       16      /* speediest right-shift on some machines */
-#define CBCR_OFFSET     ((INT32) CENTERJSAMPLE << SCALEBITS)
-#define ONE_HALF        ((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)          ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+#define CBCR_OFFSET     ((JLONG) CENTERJSAMPLE << SCALEBITS)
+#define ONE_HALF        ((JLONG) 1 << (SCALEBITS-1))
+#define FIX(x)          ((JLONG) ((x) * (1L<<SCALEBITS) + 0.5))
 
 /* We allocate one big table and divide it up into eight parts, instead of
  * doing eight alloc_small requests.  This lets us use a single table base
@@ -198,13 +198,13 @@
 rgb_ycc_start (j_compress_ptr cinfo)
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  INT32 * rgb_ycc_tab;
-  INT32 i;
+  JLONG * rgb_ycc_tab;
+  JLONG i;
 
   /* Allocate and fill in the conversion tables. */
-  cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *)
+  cconvert->rgb_ycc_tab = rgb_ycc_tab = (JLONG *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-                                (TABLE_SIZE * sizeof(INT32)));
+                                (TABLE_SIZE * sizeof(JLONG)));
 
   for (i = 0; i <= MAXJSAMPLE; i++) {
     rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i;
@@ -382,7 +382,7 @@
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JLONG * ctab = cconvert->rgb_ycc_tab;
   register JSAMPROW inptr;
   register JSAMPROW outptr0, outptr1, outptr2, outptr3;
   register JDIMENSION col;
diff --git a/jcdctmgr.c b/jcdctmgr.c
index 73f3ea4..cef4b5e 100644
--- a/jcdctmgr.c
+++ b/jcdctmgr.c
@@ -302,15 +302,15 @@
         for (i = 0; i < DCTSIZE2; i++) {
 #if BITS_IN_JSAMPLE == 8
           if(!compute_reciprocal(
-            DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-                                  (INT32) aanscales[i]),
+            DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i],
+                                  (JLONG) aanscales[i]),
                     CONST_BITS-3), &dtbl[i])
             && fdct->quantize == jsimd_quantize)
             fdct->quantize = quantize;
 #else
            dtbl[i] = (DCTELEM)
-             DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-                                   (INT32) aanscales[i]),
+             DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i],
+                                   (JLONG) aanscales[i]),
                      CONST_BITS-3);
 #endif
         }
diff --git a/jchuff.c b/jchuff.c
index 2ca80b2..e02fd6a 100644
--- a/jchuff.c
+++ b/jchuff.c
@@ -269,7 +269,7 @@
     /* code is now 1 more than the last code used for codelength si; but
      * it must still fit in si bits, since no code is allowed to be all ones.
      */
-    if (((INT32) code) >= (((INT32) 1) << si))
+    if (((JLONG) code) >= (((JLONG) 1) << si))
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     code <<= 1;
     si++;
@@ -389,7 +389,7 @@
 }
 
 #define EMIT_CODE(code, size) { \
-  temp2 &= (((INT32) 1)<<nbits) - 1; \
+  temp2 &= (((JLONG) 1)<<nbits) - 1; \
   CHECKBUF31() \
   PUT_BITS(code, size) \
   PUT_BITS(temp2, nbits) \
@@ -403,7 +403,7 @@
 }
 
 #define EMIT_CODE(code, size) { \
-  temp2 &= (((INT32) 1)<<nbits) - 1; \
+  temp2 &= (((JLONG) 1)<<nbits) - 1; \
   PUT_BITS(code, size) \
   CHECKBUF15() \
   PUT_BITS(temp2, nbits) \
@@ -521,7 +521,7 @@
   EMIT_BITS(code, size)
 
   /* Mask off any extra bits in code */
-  temp2 &= (((INT32) 1)<<nbits) - 1;
+  temp2 &= (((JLONG) 1)<<nbits) - 1;
 
   /* Emit that number of bits of the value, if positive, */
   /* or the complement of its magnitude, if negative. */
diff --git a/jcphuff.c b/jcphuff.c
index a75285c..3b29e02 100644
--- a/jcphuff.c
+++ b/jcphuff.c
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -35,7 +35,7 @@
    */
   JOCTET * next_output_byte;    /* => next byte to write in buffer */
   size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
-  INT32 put_buffer;             /* current bit-accumulation buffer */
+  JLONG put_buffer;             /* current bit-accumulation buffer */
   int put_bits;                 /* # of bits now in it */
   j_compress_ptr cinfo;         /* link to cinfo (needed for dump_buffer) */
 
@@ -72,8 +72,8 @@
 
 #define MAX_CORR_BITS  1000     /* Max # of correction bits I can buffer */
 
-/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
- * We assume that int right shift is unsigned if INT32 right shift is,
+/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.
+ * We assume that int right shift is unsigned if JLONG right shift is,
  * which should be safe.
  */
 
@@ -231,7 +231,7 @@
 /* Emit some bits, unless we are in gather mode */
 {
   /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer = (INT32) code;
+  register JLONG put_buffer = (JLONG) code;
   register int put_bits = entropy->put_bits;
 
   /* if size is 0, caller used an invalid Huffman table entry */
@@ -241,7 +241,7 @@
   if (entropy->gather_statistics)
     return;                     /* do nothing if we're only getting stats */
 
-  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
+  put_buffer &= (((JLONG) 1)<<size) - 1; /* mask off any extra bits in code */
 
   put_bits += size;             /* new number of bits in buffer */
 
diff --git a/jcsample.c b/jcsample.c
index f6846e5..7accfd1 100644
--- a/jcsample.c
+++ b/jcsample.c
@@ -6,6 +6,7 @@
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * Copyright (C) 2014, MIPS Technologies, Inc., California
+ * Copyright (C) 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -150,7 +151,7 @@
   JDIMENSION outcol, outcol_h;  /* outcol_h == outcol*h_expand */
   JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
   JSAMPROW inptr, outptr;
-  INT32 outvalue;
+  JLONG outvalue;
 
   h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor;
   v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor;
@@ -173,7 +174,7 @@
       for (v = 0; v < v_expand; v++) {
         inptr = input_data[inrow+v] + outcol_h;
         for (h = 0; h < h_expand; h++) {
-          outvalue += (INT32) GETJSAMPLE(*inptr++);
+          outvalue += (JLONG) GETJSAMPLE(*inptr++);
         }
       }
       *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
@@ -302,7 +303,7 @@
   JDIMENSION colctr;
   JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
   register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
-  INT32 membersum, neighsum, memberscale, neighscale;
+  JLONG membersum, neighsum, memberscale, neighscale;
 
   /* Expand input data enough to let all the output samples be generated
    * by the standard loop.  Special-casing padded output would be more
@@ -402,7 +403,7 @@
   JDIMENSION colctr;
   JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
   register JSAMPROW inptr, above_ptr, below_ptr, outptr;
-  INT32 membersum, neighsum, memberscale, neighscale;
+  JLONG membersum, neighsum, memberscale, neighscale;
   int colsum, lastcolsum, nextcolsum;
 
   /* Expand input data enough to let all the output samples be generated
diff --git a/jdarith.c b/jdarith.c
index f92b034..a9a6ec6 100644
--- a/jdarith.c
+++ b/jdarith.c
@@ -26,8 +26,8 @@
 typedef struct {
   struct jpeg_entropy_decoder pub; /* public fields */
 
-  INT32 c;       /* C register, base of coding interval + input bit buffer */
-  INT32 a;               /* A register, normalized size of coding interval */
+  JLONG c;       /* C register, base of coding interval + input bit buffer */
+  JLONG a;               /* A register, normalized size of coding interval */
   int ct;     /* bit shift counter, # of bits left in bit buffer part of C */
                                                          /* init: ct = -16 */
                                                          /* run: ct = 0..7 */
@@ -110,7 +110,7 @@
 {
   register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
   register unsigned char nl, nm;
-  register INT32 qe, temp;
+  register JLONG qe, temp;
   register int sv, data;
 
   /* Renormalization & data input per section D.2.6 */
diff --git a/jdcoefct.c b/jdcoefct.c
index 79c662e..baf6bc8 100644
--- a/jdcoefct.c
+++ b/jdcoefct.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2010, D. R. Commander.
+ * Copyright (C) 2010, 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -411,7 +411,7 @@
   JCOEF * workspace;
   int *coef_bits;
   JQUANT_TBL *quanttbl;
-  INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
+  JLONG Q00,Q01,Q02,Q10,Q11,Q20, num;
   int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
   int Al, pred;
 
diff --git a/jdcol565.c b/jdcol565.c
index 2641abe..944ce83 100644
--- a/jdcol565.c
+++ b/jdcol565.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modifications:
  * Copyright (C) 2013, Linaro Limited.
- * Copyright (C) 2014, D. R. Commander.
+ * Copyright (C) 2014-2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -31,12 +31,12 @@
   register JSAMPLE * range_limit = cinfo->sample_range_limit;
   register int * Crrtab = cconvert->Cr_r_tab;
   register int * Cbbtab = cconvert->Cb_b_tab;
-  register INT32 * Crgtab = cconvert->Cr_g_tab;
-  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  register JLONG * Crgtab = cconvert->Cr_g_tab;
+  register JLONG * Cbgtab = cconvert->Cb_g_tab;
   SHIFT_TEMPS
 
   while (--num_rows >= 0) {
-    INT32 rgb;
+    JLONG rgb;
     unsigned int r, g, b;
     inptr0 = input_buf[0][input_row];
     inptr1 = input_buf[1][input_row];
@@ -110,13 +110,13 @@
   register JSAMPLE * range_limit = cinfo->sample_range_limit;
   register int * Crrtab = cconvert->Cr_r_tab;
   register int * Cbbtab = cconvert->Cb_b_tab;
-  register INT32 * Crgtab = cconvert->Cr_g_tab;
-  register INT32 * Cbgtab = cconvert->Cb_g_tab;
-  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  register JLONG * Crgtab = cconvert->Cr_g_tab;
+  register JLONG * Cbgtab = cconvert->Cb_g_tab;
+  JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
   SHIFT_TEMPS
 
   while (--num_rows >= 0) {
-    INT32 rgb;
+    JLONG rgb;
     unsigned int r, g, b;
 
     inptr0 = input_buf[0][input_row];
@@ -193,7 +193,7 @@
   SHIFT_TEMPS
 
   while (--num_rows >= 0) {
-    INT32 rgb;
+    JLONG rgb;
     unsigned int r, g, b;
 
     inptr0 = input_buf[0][input_row];
@@ -246,11 +246,11 @@
   register JDIMENSION col;
   register JSAMPLE * range_limit = cinfo->sample_range_limit;
   JDIMENSION num_cols = cinfo->output_width;
-  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
   SHIFT_TEMPS
 
   while (--num_rows >= 0) {
-    INT32 rgb;
+    JLONG rgb;
     unsigned int r, g, b;
 
     inptr0 = input_buf[0][input_row];
@@ -305,7 +305,7 @@
   JDIMENSION num_cols = cinfo->output_width;
 
   while (--num_rows >= 0) {
-    INT32 rgb;
+    JLONG rgb;
     unsigned int g;
 
     inptr = input_buf[0][input_row++];
@@ -344,10 +344,10 @@
   register JDIMENSION col;
   register JSAMPLE * range_limit = cinfo->sample_range_limit;
   JDIMENSION num_cols = cinfo->output_width;
-  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
 
   while (--num_rows >= 0) {
-    INT32 rgb;
+    JLONG rgb;
     unsigned int g;
 
     inptr = input_buf[0][input_row++];
diff --git a/jdcolext.c b/jdcolext.c
index e2ebcdd..59b676c 100644
--- a/jdcolext.c
+++ b/jdcolext.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2011, D. R. Commander.
+ * Copyright (C) 2009, 2011, 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -42,8 +42,8 @@
   register JSAMPLE * range_limit = cinfo->sample_range_limit;
   register int * Crrtab = cconvert->Cr_r_tab;
   register int * Cbbtab = cconvert->Cb_b_tab;
-  register INT32 * Crgtab = cconvert->Cr_g_tab;
-  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  register JLONG * Crgtab = cconvert->Cr_g_tab;
+  register JLONG * Cbgtab = cconvert->Cb_g_tab;
   SHIFT_TEMPS
 
   while (--num_rows >= 0) {
diff --git a/jdcolor.c b/jdcolor.c
index ac96b23..6a5d819 100644
--- a/jdcolor.c
+++ b/jdcolor.c
@@ -29,11 +29,11 @@
   /* Private state for YCC->RGB conversion */
   int * Cr_r_tab;               /* => table for Cr to R conversion */
   int * Cb_b_tab;               /* => table for Cb to B conversion */
-  INT32 * Cr_g_tab;             /* => table for Cr to G conversion */
-  INT32 * Cb_g_tab;             /* => table for Cb to G conversion */
+  JLONG * Cr_g_tab;             /* => table for Cr to G conversion */
+  JLONG * Cb_g_tab;             /* => table for Cb to G conversion */
 
   /* Private state for RGB->Y conversion */
-  INT32 * rgb_y_tab;            /* => table for RGB to Y conversion */
+  JLONG * rgb_y_tab;            /* => table for RGB to Y conversion */
 } my_color_deconverter;
 
 typedef my_color_deconverter * my_cconvert_ptr;
@@ -74,8 +74,8 @@
  */
 
 #define SCALEBITS       16      /* speediest right-shift on some machines */
-#define ONE_HALF        ((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)          ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+#define ONE_HALF        ((JLONG) 1 << (SCALEBITS-1))
+#define FIX(x)          ((JLONG) ((x) * (1L<<SCALEBITS) + 0.5))
 
 /* We allocate one big table for RGB->Y conversion and divide it up into
  * three parts, instead of doing three alloc_small requests.  This lets us
@@ -212,7 +212,7 @@
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   int i;
-  INT32 x;
+  JLONG x;
   SHIFT_TEMPS
 
   cconvert->Cr_r_tab = (int *)
@@ -221,12 +221,12 @@
   cconvert->Cb_b_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
                                 (MAXJSAMPLE+1) * sizeof(int));
-  cconvert->Cr_g_tab = (INT32 *)
+  cconvert->Cr_g_tab = (JLONG *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-                                (MAXJSAMPLE+1) * sizeof(INT32));
-  cconvert->Cb_g_tab = (INT32 *)
+                                (MAXJSAMPLE+1) * sizeof(JLONG));
+  cconvert->Cb_g_tab = (JLONG *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-                                (MAXJSAMPLE+1) * sizeof(INT32));
+                                (MAXJSAMPLE+1) * sizeof(JLONG));
 
   for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
     /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
@@ -303,13 +303,13 @@
 build_rgb_y_table (j_decompress_ptr cinfo)
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  INT32 * rgb_y_tab;
-  INT32 i;
+  JLONG * rgb_y_tab;
+  JLONG i;
 
   /* Allocate and fill in the conversion tables. */
-  cconvert->rgb_y_tab = rgb_y_tab = (INT32 *)
+  cconvert->rgb_y_tab = rgb_y_tab = (JLONG *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-                                (TABLE_SIZE * sizeof(INT32)));
+                                (TABLE_SIZE * sizeof(JLONG)));
 
   for (i = 0; i <= MAXJSAMPLE; i++) {
     rgb_y_tab[i+R_Y_OFF] = FIX(0.29900) * i;
@@ -330,7 +330,7 @@
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_y_tab;
+  register JLONG * ctab = cconvert->rgb_y_tab;
   register JSAMPROW outptr;
   register JSAMPROW inptr0, inptr1, inptr2;
   register JDIMENSION col;
@@ -546,8 +546,8 @@
   register JSAMPLE * range_limit = cinfo->sample_range_limit;
   register int * Crrtab = cconvert->Cr_r_tab;
   register int * Cbbtab = cconvert->Cb_b_tab;
-  register INT32 * Crgtab = cconvert->Cr_g_tab;
-  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  register JLONG * Crgtab = cconvert->Cr_g_tab;
+  register JLONG * Cbgtab = cconvert->Cb_g_tab;
   SHIFT_TEMPS
 
   while (--num_rows >= 0) {
@@ -605,7 +605,7 @@
 
 #define DITHER_MASK       0x3
 #define DITHER_ROTATE(x)  (((x) << 24) | (((x) >> 8) & 0x00FFFFFF))
-static const INT32 dither_matrix[4] = {
+static const JLONG dither_matrix[4] = {
   0x0008020A,
   0x0C040E06,
   0x030B0109,
diff --git a/jdct.h b/jdct.h
index b770f2c..f1c7cdf 100644
--- a/jdct.h
+++ b/jdct.h
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -19,7 +19,7 @@
 /*
  * A forward DCT routine is given a pointer to a work area of type DCTELEM[];
  * the DCT is to be performed in-place in that buffer.  Type DCTELEM is int
- * for 8-bit samples, INT32 for 12-bit samples.  (NOTE: Floating-point DCT
+ * for 8-bit samples, JLONG for 12-bit samples.  (NOTE: Floating-point DCT
  * implementations use an array of type FAST_FLOAT, instead.)
  * The DCT inputs are expected to be signed (range +-CENTERJSAMPLE).
  * The DCT outputs are returned scaled up by a factor of 8; they therefore
@@ -41,7 +41,7 @@
 typedef unsigned int UDCTELEM2;
 #endif
 #else
-typedef INT32 DCTELEM;          /* must have 32 bits */
+typedef JLONG DCTELEM;          /* must have 32 bits */
 typedef unsigned long long UDCTELEM2;
 #endif
 
@@ -68,7 +68,7 @@
 typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
 #define IFAST_SCALE_BITS  2     /* fractional bits in scale factors */
 #else
-typedef INT32 IFAST_MULT_TYPE;  /* need 32 bits for scaled quantizers */
+typedef JLONG IFAST_MULT_TYPE;  /* need 32 bits for scaled quantizers */
 #define IFAST_SCALE_BITS  13    /* fractional bits in scale factors */
 #endif
 typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
@@ -154,13 +154,13 @@
  * Macros for handling fixed-point arithmetic; these are used by many
  * but not all of the DCT/IDCT modules.
  *
- * All values are expected to be of type INT32.
+ * All values are expected to be of type JLONG.
  * Fractional constants are scaled left by CONST_BITS bits.
  * CONST_BITS is defined within each module using these macros,
  * and may differ from one module to the next.
  */
 
-#define ONE     ((INT32) 1)
+#define ONE     ((JLONG) 1)
 #define CONST_SCALE (ONE << CONST_BITS)
 
 /* Convert a positive real constant to an integer scaled by CONST_SCALE.
@@ -168,16 +168,16 @@
  * thus causing a lot of useless floating-point operations at run time.
  */
 
-#define FIX(x)  ((INT32) ((x) * CONST_SCALE + 0.5))
+#define FIX(x)  ((JLONG) ((x) * CONST_SCALE + 0.5))
 
-/* Descale and correctly round an INT32 value that's scaled by N bits.
+/* Descale and correctly round a JLONG value that's scaled by N bits.
  * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
  * the fudge factor is correct for either sign of X.
  */
 
 #define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
 
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+/* Multiply a JLONG variable by a JLONG constant to yield a JLONG result.
  * This macro is used only when the two inputs will actually be no more than
  * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a
  * full 32x32 multiply.  This provides a useful speedup on many machines.
@@ -190,7 +190,7 @@
 #define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
 #endif
 #ifdef SHORTxLCONST_32          /* known to work with Microsoft C 6.0 */
-#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((JLONG) (const)))
 #endif
 
 #ifndef MULTIPLY16C16           /* default definition */
diff --git a/jddctmgr.c b/jddctmgr.c
index de7266f..5bab017 100644
--- a/jddctmgr.c
+++ b/jddctmgr.c
@@ -6,7 +6,7 @@
  * Modified 2002-2010 by Guido Vollbeding.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2010, D. R. Commander.
+ * Copyright (C) 2010, 2015, D. R. Commander.
  * Copyright (C) 2013, MIPS Technologies, Inc., California
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -280,8 +280,8 @@
 
         for (i = 0; i < DCTSIZE2; i++) {
           ifmtbl[i] = (IFAST_MULT_TYPE)
-            DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-                                  (INT32) aanscales[i]),
+            DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i],
+                                  (JLONG) aanscales[i]),
                     CONST_BITS-IFAST_SCALE_BITS);
         }
       }
diff --git a/jdhuff.c b/jdhuff.c
index ea0ece9..e3a3f0a 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -210,7 +210,7 @@
     /* code is now 1 more than the last code used for codelength si; but
      * it must still fit in si bits, since no code is allowed to be all ones.
      */
-    if (((INT32) code) >= (((INT32) 1) << si))
+    if (((JLONG) code) >= (((JLONG) 1) << si))
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     code <<= 1;
     si++;
@@ -224,7 +224,7 @@
       /* valoffset[l] = huffval[] index of 1st symbol of code length l,
        * minus the minimum code of length l
        */
-      dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p];
+      dtbl->valoffset[l] = (JLONG) p - (JLONG) huffcode[p];
       p += htbl->bits[l];
       dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
     } else {
@@ -451,7 +451,7 @@
                   d_derived_tbl * htbl, int min_bits)
 {
   register int l = min_bits;
-  register INT32 code;
+  register JLONG code;
 
   /* HUFF_DECODE has determined that the code is at least min_bits */
   /* bits long, so fetch that many bits in one swoop. */
diff --git a/jdhuff.h b/jdhuff.h
index 3cc9340..46d1916 100644
--- a/jdhuff.h
+++ b/jdhuff.h
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2010-2011, D. R. Commander.
+ * Copyright (C) 2010-2011, 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -20,9 +20,9 @@
 
 typedef struct {
   /* Basic tables: (element [0] of each array is unused) */
-  INT32 maxcode[18];            /* largest code of length k (-1 if none) */
+  JLONG maxcode[18];            /* largest code of length k (-1 if none) */
   /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
-  INT32 valoffset[18];          /* huffval[] offset for codes of length k */
+  JLONG valoffset[18];          /* huffval[] offset for codes of length k */
   /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
    * the smallest code of length k; so given a code of length k, the
    * corresponding symbol is huffval[code + valoffset[k]]
@@ -79,7 +79,7 @@
 
 #else
 
-typedef INT32 bit_buf_type;     /* type of bit-extraction buffer */
+typedef JLONG bit_buf_type;     /* type of bit-extraction buffer */
 #define BIT_BUF_SIZE  32                /* size of buffer in bits */
 
 #endif
diff --git a/jdmarker.c b/jdmarker.c
index 383ba10..4f59d87 100644
--- a/jdmarker.c
+++ b/jdmarker.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2012, D. R. Commander.
+ * Copyright (C) 2012, 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -154,7 +154,7 @@
                   V = GETJOCTET(*next_input_byte++); )
 
 /* As above, but read two bytes interpreted as an unsigned 16-bit integer.
- * V should be declared unsigned int or perhaps INT32.
+ * V should be declared unsigned int or perhaps JLONG.
  */
 #define INPUT_2BYTES(cinfo,V,action)  \
         MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
@@ -240,7 +240,7 @@
 get_sof (j_decompress_ptr cinfo, boolean is_prog, boolean is_arith)
 /* Process a SOFn marker */
 {
-  INT32 length;
+  JLONG length;
   int c, ci;
   jpeg_component_info * compptr;
   INPUT_VARS(cinfo);
@@ -304,7 +304,7 @@
 get_sos (j_decompress_ptr cinfo)
 /* Process a SOS marker */
 {
-  INT32 length;
+  JLONG length;
   int i, ci, n, c, cc, pi;
   jpeg_component_info * compptr;
   INPUT_VARS(cinfo);
@@ -387,7 +387,7 @@
 get_dac (j_decompress_ptr cinfo)
 /* Process a DAC marker */
 {
-  INT32 length;
+  JLONG length;
   int index, val;
   INPUT_VARS(cinfo);
 
@@ -433,7 +433,7 @@
 get_dht (j_decompress_ptr cinfo)
 /* Process a DHT marker */
 {
-  INT32 length;
+  JLONG length;
   UINT8 bits[17];
   UINT8 huffval[256];
   int i, index, count;
@@ -467,7 +467,7 @@
     /* Here we just do minimal validation of the counts to avoid walking
      * off the end of our table space.  jdhuff.c will check more carefully.
      */
-    if (count > 256 || ((INT32) count) > length)
+    if (count > 256 || ((JLONG) count) > length)
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
 
     for (i = 0; i < count; i++)
@@ -507,7 +507,7 @@
 get_dqt (j_decompress_ptr cinfo)
 /* Process a DQT marker */
 {
-  INT32 length;
+  JLONG length;
   int n, i, prec;
   unsigned int tmp;
   JQUANT_TBL *quant_ptr;
@@ -565,7 +565,7 @@
 get_dri (j_decompress_ptr cinfo)
 /* Process a DRI marker */
 {
-  INT32 length;
+  JLONG length;
   unsigned int tmp;
   INPUT_VARS(cinfo);
 
@@ -599,13 +599,13 @@
 
 LOCAL(void)
 examine_app0 (j_decompress_ptr cinfo, JOCTET * data,
-              unsigned int datalen, INT32 remaining)
+              unsigned int datalen, JLONG remaining)
 /* Examine first few bytes from an APP0.
  * Take appropriate action if it is a JFIF marker.
  * datalen is # of bytes at data[], remaining is length of rest of marker data.
  */
 {
-  INT32 totallen = (INT32) datalen + remaining;
+  JLONG totallen = (JLONG) datalen + remaining;
 
   if (datalen >= APP0_DATA_LEN &&
       GETJOCTET(data[0]) == 0x4A &&
@@ -639,7 +639,7 @@
                GETJOCTET(data[12]), GETJOCTET(data[13]));
     totallen -= APP0_DATA_LEN;
     if (totallen !=
-        ((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
+        ((JLONG)GETJOCTET(data[12]) * (JLONG)GETJOCTET(data[13]) * (JLONG) 3))
       TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen);
   } else if (datalen >= 6 &&
       GETJOCTET(data[0]) == 0x4A &&
@@ -675,7 +675,7 @@
 
 LOCAL(void)
 examine_app14 (j_decompress_ptr cinfo, JOCTET * data,
-               unsigned int datalen, INT32 remaining)
+               unsigned int datalen, JLONG remaining)
 /* Examine first few bytes from an APP14.
  * Take appropriate action if it is an Adobe marker.
  * datalen is # of bytes at data[], remaining is length of rest of marker data.
@@ -708,7 +708,7 @@
 get_interesting_appn (j_decompress_ptr cinfo)
 /* Process an APP0 or APP14 marker without saving it */
 {
-  INT32 length;
+  JLONG length;
   JOCTET b[APPN_DATA_LEN];
   unsigned int i, numtoread;
   INPUT_VARS(cinfo);
@@ -760,7 +760,7 @@
   jpeg_saved_marker_ptr cur_marker = marker->cur_marker;
   unsigned int bytes_read, data_length;
   JOCTET * data;
-  INT32 length = 0;
+  JLONG length = 0;
   INPUT_VARS(cinfo);
 
   if (cur_marker == NULL) {
@@ -862,7 +862,7 @@
 skip_variable (j_decompress_ptr cinfo)
 /* Skip over an unknown or uninteresting variable-length marker */
 {
-  INT32 length;
+  JLONG length;
   INPUT_VARS(cinfo);
 
   INPUT_2BYTES(cinfo, length, return FALSE);
diff --git a/jdmerge.c b/jdmerge.c
index 22d7623..9b2fdf8 100644
--- a/jdmerge.c
+++ b/jdmerge.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2011, 2014 D. R. Commander.
+ * Copyright (C) 2009, 2011, 2014-2015, D. R. Commander.
  * Copyright (C) 2013, Linaro Limited.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -58,8 +58,8 @@
   /* Private state for YCC->RGB conversion */
   int * Cr_r_tab;               /* => table for Cr to R conversion */
   int * Cb_b_tab;               /* => table for Cb to B conversion */
-  INT32 * Cr_g_tab;             /* => table for Cr to G conversion */
-  INT32 * Cb_g_tab;             /* => table for Cb to G conversion */
+  JLONG * Cr_g_tab;             /* => table for Cr to G conversion */
+  JLONG * Cb_g_tab;             /* => table for Cb to G conversion */
 
   /* For 2:1 vertical sampling, we produce two output rows at a time.
    * We need a "spare" row buffer to hold the second output row if the
@@ -76,8 +76,8 @@
 typedef my_upsampler * my_upsample_ptr;
 
 #define SCALEBITS       16      /* speediest right-shift on some machines */
-#define ONE_HALF        ((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)          ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+#define ONE_HALF        ((JLONG) 1 << (SCALEBITS-1))
+#define FIX(x)          ((JLONG) ((x) * (1L<<SCALEBITS) + 0.5))
 
 
 /* Include inline routines for colorspace extensions */
@@ -191,7 +191,7 @@
 {
   my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
   int i;
-  INT32 x;
+  JLONG x;
   SHIFT_TEMPS
 
   upsample->Cr_r_tab = (int *)
@@ -200,12 +200,12 @@
   upsample->Cb_b_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
                                 (MAXJSAMPLE+1) * sizeof(int));
-  upsample->Cr_g_tab = (INT32 *)
+  upsample->Cr_g_tab = (JLONG *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-                                (MAXJSAMPLE+1) * sizeof(INT32));
-  upsample->Cb_g_tab = (INT32 *)
+                                (MAXJSAMPLE+1) * sizeof(JLONG));
+  upsample->Cb_g_tab = (JLONG *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-                                (MAXJSAMPLE+1) * sizeof(INT32));
+                                (MAXJSAMPLE+1) * sizeof(JLONG));
 
   for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
     /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
@@ -457,7 +457,7 @@
 
 #define DITHER_MASK       0x3
 #define DITHER_ROTATE(x)  (((x) << 24) | (((x) >> 8) & 0x00FFFFFF))
-static const INT32 dither_matrix[4] = {
+static const JLONG dither_matrix[4] = {
   0x0008020A,
   0x0C040E06,
   0x030B0109,
diff --git a/jdmrg565.c b/jdmrg565.c
index 43c7377..09f0d27 100644
--- a/jdmrg565.c
+++ b/jdmrg565.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2013, Linaro Limited.
- * Copyright (C) 2014, D. R. Commander.
+ * Copyright (C) 2014-2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -30,10 +30,10 @@
   register JSAMPLE * range_limit = cinfo->sample_range_limit;
   int * Crrtab = upsample->Cr_r_tab;
   int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
+  JLONG * Crgtab = upsample->Cr_g_tab;
+  JLONG * Cbgtab = upsample->Cb_g_tab;
   unsigned int r, g, b;
-  INT32 rgb;
+  JLONG rgb;
   SHIFT_TEMPS
 
   inptr0 = input_buf[0][in_row_group_ctr];
@@ -101,11 +101,11 @@
   register JSAMPLE * range_limit = cinfo->sample_range_limit;
   int * Crrtab = upsample->Cr_r_tab;
   int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  JLONG * Crgtab = upsample->Cr_g_tab;
+  JLONG * Cbgtab = upsample->Cb_g_tab;
+  JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
   unsigned int r, g, b;
-  INT32 rgb;
+  JLONG rgb;
   SHIFT_TEMPS
 
   inptr0 = input_buf[0][in_row_group_ctr];
@@ -175,10 +175,10 @@
   register JSAMPLE * range_limit = cinfo->sample_range_limit;
   int * Crrtab = upsample->Cr_r_tab;
   int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
+  JLONG * Crgtab = upsample->Cr_g_tab;
+  JLONG * Cbgtab = upsample->Cb_g_tab;
   unsigned int r, g, b;
-  INT32 rgb;
+  JLONG rgb;
   SHIFT_TEMPS
 
   inptr00 = input_buf[0][in_row_group_ctr * 2];
@@ -271,12 +271,12 @@
   register JSAMPLE * range_limit = cinfo->sample_range_limit;
   int * Crrtab = upsample->Cr_r_tab;
   int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
-  INT32 d1 = dither_matrix[(cinfo->output_scanline+1) & DITHER_MASK];
+  JLONG * Crgtab = upsample->Cr_g_tab;
+  JLONG * Cbgtab = upsample->Cb_g_tab;
+  JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  JLONG d1 = dither_matrix[(cinfo->output_scanline+1) & DITHER_MASK];
   unsigned int r, g, b;
-  INT32 rgb;
+  JLONG rgb;
   SHIFT_TEMPS
 
   inptr00 = input_buf[0][in_row_group_ctr*2];
diff --git a/jdmrgext.c b/jdmrgext.c
index 3792c4f..9d7d2af 100644
--- a/jdmrgext.c
+++ b/jdmrgext.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2011, D. R. Commander.
+ * Copyright (C) 2011, 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -36,8 +36,8 @@
   register JSAMPLE * range_limit = cinfo->sample_range_limit;
   int * Crrtab = upsample->Cr_r_tab;
   int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
+  JLONG * Crgtab = upsample->Cr_g_tab;
+  JLONG * Cbgtab = upsample->Cb_g_tab;
   SHIFT_TEMPS
 
   inptr0 = input_buf[0][in_row_group_ctr];
@@ -109,8 +109,8 @@
   register JSAMPLE * range_limit = cinfo->sample_range_limit;
   int * Crrtab = upsample->Cr_r_tab;
   int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
+  JLONG * Crgtab = upsample->Cr_g_tab;
+  JLONG * Cbgtab = upsample->Cb_g_tab;
   SHIFT_TEMPS
 
   inptr00 = input_buf[0][in_row_group_ctr*2];
diff --git a/jdsample.c b/jdsample.c
index e1870fc..276feae 100644
--- a/jdsample.c
+++ b/jdsample.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2010, D. R. Commander.
+ * Copyright (C) 2010, 2015, D. R. Commander.
  * Copyright (C) 2014, MIPS Technologies, Inc., California
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
@@ -318,7 +318,7 @@
 #if BITS_IN_JSAMPLE == 8
   register int thiscolsum, lastcolsum, nextcolsum;
 #else
-  register INT32 thiscolsum, lastcolsum, nextcolsum;
+  register JLONG thiscolsum, lastcolsum, nextcolsum;
 #endif
   register JDIMENSION colctr;
   int inrow, outrow, v;
diff --git a/jfdctfst.c b/jfdctfst.c
index 497f120..5ba6f13 100644
--- a/jfdctfst.c
+++ b/jfdctfst.c
@@ -1,8 +1,10 @@
 /*
  * jfdctfst.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, D. R. Commander
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -77,10 +79,10 @@
  */
 
 #if CONST_BITS == 8
-#define FIX_0_382683433  ((INT32)   98)         /* FIX(0.382683433) */
-#define FIX_0_541196100  ((INT32)  139)         /* FIX(0.541196100) */
-#define FIX_0_707106781  ((INT32)  181)         /* FIX(0.707106781) */
-#define FIX_1_306562965  ((INT32)  334)         /* FIX(1.306562965) */
+#define FIX_0_382683433  ((JLONG)   98)         /* FIX(0.382683433) */
+#define FIX_0_541196100  ((JLONG)  139)         /* FIX(0.541196100) */
+#define FIX_0_707106781  ((JLONG)  181)         /* FIX(0.707106781) */
+#define FIX_1_306562965  ((JLONG)  334)         /* FIX(1.306562965) */
 #else
 #define FIX_0_382683433  FIX(0.382683433)
 #define FIX_0_541196100  FIX(0.541196100)
@@ -100,7 +102,7 @@
 #endif
 
 
-/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+/* Multiply a DCTELEM variable by an JLONG constant, and immediately
  * descale to yield a DCTELEM result.
  */
 
diff --git a/jfdctint.c b/jfdctint.c
index b0b89bd..adfb6e3 100644
--- a/jfdctint.c
+++ b/jfdctint.c
@@ -70,7 +70,7 @@
  * they are represented to better-than-integral precision.  These outputs
  * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
  * with the recommended scaling.  (For 12-bit sample data, the intermediate
- * array is INT32 anyway.)
+ * array is JLONG anyway.)
  *
  * To avoid overflow of the 32-bit intermediate results in pass 2, we must
  * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
@@ -93,18 +93,18 @@
  */
 
 #if CONST_BITS == 13
-#define FIX_0_298631336  ((INT32)  2446)        /* FIX(0.298631336) */
-#define FIX_0_390180644  ((INT32)  3196)        /* FIX(0.390180644) */
-#define FIX_0_541196100  ((INT32)  4433)        /* FIX(0.541196100) */
-#define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
-#define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
-#define FIX_1_175875602  ((INT32)  9633)        /* FIX(1.175875602) */
-#define FIX_1_501321110  ((INT32)  12299)       /* FIX(1.501321110) */
-#define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
-#define FIX_1_961570560  ((INT32)  16069)       /* FIX(1.961570560) */
-#define FIX_2_053119869  ((INT32)  16819)       /* FIX(2.053119869) */
-#define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
-#define FIX_3_072711026  ((INT32)  25172)       /* FIX(3.072711026) */
+#define FIX_0_298631336  ((JLONG)  2446)        /* FIX(0.298631336) */
+#define FIX_0_390180644  ((JLONG)  3196)        /* FIX(0.390180644) */
+#define FIX_0_541196100  ((JLONG)  4433)        /* FIX(0.541196100) */
+#define FIX_0_765366865  ((JLONG)  6270)        /* FIX(0.765366865) */
+#define FIX_0_899976223  ((JLONG)  7373)        /* FIX(0.899976223) */
+#define FIX_1_175875602  ((JLONG)  9633)        /* FIX(1.175875602) */
+#define FIX_1_501321110  ((JLONG)  12299)       /* FIX(1.501321110) */
+#define FIX_1_847759065  ((JLONG)  15137)       /* FIX(1.847759065) */
+#define FIX_1_961570560  ((JLONG)  16069)       /* FIX(1.961570560) */
+#define FIX_2_053119869  ((JLONG)  16819)       /* FIX(2.053119869) */
+#define FIX_2_562915447  ((JLONG)  20995)       /* FIX(2.562915447) */
+#define FIX_3_072711026  ((JLONG)  25172)       /* FIX(3.072711026) */
 #else
 #define FIX_0_298631336  FIX(0.298631336)
 #define FIX_0_390180644  FIX(0.390180644)
@@ -121,7 +121,7 @@
 #endif
 
 
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+/* Multiply an JLONG variable by an JLONG constant to yield an JLONG result.
  * For 8-bit samples with the recommended scaling, all the variable
  * and constant values involved are no more than 16 bits wide, so a
  * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
@@ -142,9 +142,9 @@
 GLOBAL(void)
 jpeg_fdct_islow (DCTELEM * data)
 {
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3, z4, z5;
+  JLONG tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  JLONG tmp10, tmp11, tmp12, tmp13;
+  JLONG z1, z2, z3, z4, z5;
   DCTELEM *dataptr;
   int ctr;
   SHIFT_TEMPS
diff --git a/jidctfst.c b/jidctfst.c
index 9ca3d17..98205eb 100644
--- a/jidctfst.c
+++ b/jidctfst.c
@@ -1,8 +1,10 @@
 /*
  * jidctfst.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -90,10 +92,10 @@
  */
 
 #if CONST_BITS == 8
-#define FIX_1_082392200  ((INT32)  277)         /* FIX(1.082392200) */
-#define FIX_1_414213562  ((INT32)  362)         /* FIX(1.414213562) */
-#define FIX_1_847759065  ((INT32)  473)         /* FIX(1.847759065) */
-#define FIX_2_613125930  ((INT32)  669)         /* FIX(2.613125930) */
+#define FIX_1_082392200  ((JLONG)  277)         /* FIX(1.082392200) */
+#define FIX_1_414213562  ((JLONG)  362)         /* FIX(1.414213562) */
+#define FIX_1_847759065  ((JLONG)  473)         /* FIX(1.847759065) */
+#define FIX_2_613125930  ((JLONG)  669)         /* FIX(2.613125930) */
 #else
 #define FIX_1_082392200  FIX(1.082392200)
 #define FIX_1_414213562  FIX(1.414213562)
@@ -113,7 +115,7 @@
 #endif
 
 
-/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+/* Multiply a DCTELEM variable by an JLONG constant, and immediately
  * descale to yield a DCTELEM result.
  */
 
@@ -123,7 +125,7 @@
 /* Dequantize a coefficient by multiplying it by the multiplier-table
  * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16
  * multiplication will do.  For 12-bit data, the multiplier table is
- * declared INT32, so a 32-bit multiply will be used.
+ * declared JLONG, so a 32-bit multiply will be used.
  */
 
 #if BITS_IN_JSAMPLE == 8
@@ -135,7 +137,7 @@
 
 
 /* Like DESCALE, but applies to a DCTELEM and produces an int.
- * We assume that int right shift is unsigned if INT32 right shift is.
+ * We assume that int right shift is unsigned if JLONG right shift is.
  */
 
 #ifdef RIGHT_SHIFT_IS_UNSIGNED
diff --git a/jidctint.c b/jidctint.c
index ebd3242..66d6cb3 100644
--- a/jidctint.c
+++ b/jidctint.c
@@ -92,7 +92,7 @@
  * they are represented to better-than-integral precision.  These outputs
  * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
  * with the recommended scaling.  (To scale up 12-bit sample data further, an
- * intermediate INT32 array would be needed.)
+ * intermediate JLONG array would be needed.)
  *
  * To avoid overflow of the 32-bit intermediate results in pass 2, we must
  * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
@@ -115,18 +115,18 @@
  */
 
 #if CONST_BITS == 13
-#define FIX_0_298631336  ((INT32)  2446)        /* FIX(0.298631336) */
-#define FIX_0_390180644  ((INT32)  3196)        /* FIX(0.390180644) */
-#define FIX_0_541196100  ((INT32)  4433)        /* FIX(0.541196100) */
-#define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
-#define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
-#define FIX_1_175875602  ((INT32)  9633)        /* FIX(1.175875602) */
-#define FIX_1_501321110  ((INT32)  12299)       /* FIX(1.501321110) */
-#define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
-#define FIX_1_961570560  ((INT32)  16069)       /* FIX(1.961570560) */
-#define FIX_2_053119869  ((INT32)  16819)       /* FIX(2.053119869) */
-#define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
-#define FIX_3_072711026  ((INT32)  25172)       /* FIX(3.072711026) */
+#define FIX_0_298631336  ((JLONG)  2446)        /* FIX(0.298631336) */
+#define FIX_0_390180644  ((JLONG)  3196)        /* FIX(0.390180644) */
+#define FIX_0_541196100  ((JLONG)  4433)        /* FIX(0.541196100) */
+#define FIX_0_765366865  ((JLONG)  6270)        /* FIX(0.765366865) */
+#define FIX_0_899976223  ((JLONG)  7373)        /* FIX(0.899976223) */
+#define FIX_1_175875602  ((JLONG)  9633)        /* FIX(1.175875602) */
+#define FIX_1_501321110  ((JLONG)  12299)       /* FIX(1.501321110) */
+#define FIX_1_847759065  ((JLONG)  15137)       /* FIX(1.847759065) */
+#define FIX_1_961570560  ((JLONG)  16069)       /* FIX(1.961570560) */
+#define FIX_2_053119869  ((JLONG)  16819)       /* FIX(2.053119869) */
+#define FIX_2_562915447  ((JLONG)  20995)       /* FIX(2.562915447) */
+#define FIX_3_072711026  ((JLONG)  25172)       /* FIX(3.072711026) */
 #else
 #define FIX_0_298631336  FIX(0.298631336)
 #define FIX_0_390180644  FIX(0.390180644)
@@ -143,7 +143,7 @@
 #endif
 
 
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+/* Multiply an JLONG variable by an JLONG constant to yield an JLONG result.
  * For 8-bit samples with the recommended scaling, all the variable
  * and constant values involved are no more than 16 bits wide, so a
  * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
@@ -174,9 +174,9 @@
                  JCOEFPTR coef_block,
                  JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp1, tmp2, tmp3;
-  INT32 tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3, z4, z5;
+  JLONG tmp0, tmp1, tmp2, tmp3;
+  JLONG tmp10, tmp11, tmp12, tmp13;
+  JLONG z1, z2, z3, z4, z5;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -314,7 +314,7 @@
     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
         wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
-      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+      JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3)
                                   & RANGE_MASK];
 
       outptr[0] = dcval;
@@ -334,15 +334,15 @@
     /* Even part: reverse the even part of the forward DCT. */
     /* The rotator is sqrt(2)*c(-6). */
 
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
+    z2 = (JLONG) wsptr[2];
+    z3 = (JLONG) wsptr[6];
 
     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
 
-    tmp0 = LEFT_SHIFT((INT32) wsptr[0] + (INT32) wsptr[4], CONST_BITS);
-    tmp1 = LEFT_SHIFT((INT32) wsptr[0] - (INT32) wsptr[4], CONST_BITS);
+    tmp0 = LEFT_SHIFT((JLONG) wsptr[0] + (JLONG) wsptr[4], CONST_BITS);
+    tmp1 = LEFT_SHIFT((JLONG) wsptr[0] - (JLONG) wsptr[4], CONST_BITS);
 
     tmp10 = tmp0 + tmp3;
     tmp13 = tmp0 - tmp3;
@@ -353,10 +353,10 @@
      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
      */
 
-    tmp0 = (INT32) wsptr[7];
-    tmp1 = (INT32) wsptr[5];
-    tmp2 = (INT32) wsptr[3];
-    tmp3 = (INT32) wsptr[1];
+    tmp0 = (JLONG) wsptr[7];
+    tmp1 = (JLONG) wsptr[5];
+    tmp2 = (JLONG) wsptr[3];
+    tmp3 = (JLONG) wsptr[1];
 
     z1 = tmp0 + tmp3;
     z2 = tmp1 + tmp2;
@@ -428,8 +428,8 @@
                JCOEFPTR coef_block,
                JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
-  INT32 z1, z2, z3;
+  JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
+  JLONG z1, z2, z3;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -502,12 +502,12 @@
     /* Even part */
 
     /* Add fudge factor here for final descale. */
-    tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp13 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     tmp13 = LEFT_SHIFT(tmp13, CONST_BITS);
 
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
+    z1 = (JLONG) wsptr[2];
+    z2 = (JLONG) wsptr[4];
+    z3 = (JLONG) wsptr[6];
 
     tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
     tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
@@ -521,9 +521,9 @@
 
     /* Odd part */
 
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
+    z1 = (JLONG) wsptr[1];
+    z2 = (JLONG) wsptr[3];
+    z3 = (JLONG) wsptr[5];
 
     tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
     tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
@@ -577,8 +577,8 @@
                JCOEFPTR coef_block,
                JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
+  JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  JLONG z1, z2, z3;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -638,22 +638,22 @@
     /* Even part */
 
     /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
-    tmp2 = (INT32) wsptr[4];
+    tmp2 = (JLONG) wsptr[4];
     tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
     tmp1 = tmp0 + tmp10;
     tmp11 = tmp0 - tmp10 - tmp10;
-    tmp10 = (INT32) wsptr[2];
+    tmp10 = (JLONG) wsptr[2];
     tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
     tmp10 = tmp1 + tmp0;
     tmp12 = tmp1 - tmp0;
 
     /* Odd part */
 
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
+    z1 = (JLONG) wsptr[1];
+    z2 = (JLONG) wsptr[3];
+    z3 = (JLONG) wsptr[5];
     tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
     tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS);
     tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS);
@@ -698,8 +698,8 @@
                JCOEFPTR coef_block,
                JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
-  INT32 z1, z2, z3;
+  JLONG tmp0, tmp1, tmp10, tmp11, tmp12;
+  JLONG z1, z2, z3;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -757,10 +757,10 @@
     /* Even part */
 
     /* Add fudge factor here for final descale. */
-    tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp12 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     tmp12 = LEFT_SHIFT(tmp12, CONST_BITS);
-    tmp0 = (INT32) wsptr[2];
-    tmp1 = (INT32) wsptr[4];
+    tmp0 = (JLONG) wsptr[2];
+    tmp1 = (JLONG) wsptr[4];
     z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
     z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
     z3 = tmp12 + z2;
@@ -770,8 +770,8 @@
 
     /* Odd part */
 
-    z2 = (INT32) wsptr[1];
-    z3 = (INT32) wsptr[3];
+    z2 = (JLONG) wsptr[1];
+    z3 = (JLONG) wsptr[3];
 
     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
     tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
@@ -813,7 +813,7 @@
                JCOEFPTR coef_block,
                JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp2, tmp10, tmp12;
+  JLONG tmp0, tmp2, tmp10, tmp12;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -861,16 +861,16 @@
     /* Even part */
 
     /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
-    tmp2 = (INT32) wsptr[2];
+    tmp2 = (JLONG) wsptr[2];
     tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
     tmp10 = tmp0 + tmp12;
     tmp2 = tmp0 - tmp12 - tmp12;
 
     /* Odd part */
 
-    tmp12 = (INT32) wsptr[1];
+    tmp12 = (JLONG) wsptr[1];
     tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
 
     /* Final output stage */
@@ -903,8 +903,8 @@
                JCOEFPTR coef_block,
                JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 z1, z2, z3, z4;
+  JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
+  JLONG z1, z2, z3, z4;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -986,12 +986,12 @@
     /* Even part */
 
     /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
 
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
+    z1 = (JLONG) wsptr[2];
+    z2 = (JLONG) wsptr[4];
+    z3 = (JLONG) wsptr[6];
 
     tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
     tmp1 = tmp0 + tmp3;
@@ -1011,10 +1011,10 @@
 
     /* Odd part */
 
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
+    z1 = (JLONG) wsptr[1];
+    z2 = (JLONG) wsptr[3];
+    z3 = (JLONG) wsptr[5];
+    z4 = (JLONG) wsptr[7];
 
     z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
 
@@ -1074,9 +1074,9 @@
                  JCOEFPTR coef_block,
                  JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
-  INT32 z1, z2, z3, z4, z5;
+  JLONG tmp10, tmp11, tmp12, tmp13, tmp14;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24;
+  JLONG z1, z2, z3, z4, z5;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -1169,9 +1169,9 @@
     /* Even part */
 
     /* Add fudge factor here for final descale. */
-    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     z3 = LEFT_SHIFT(z3, CONST_BITS);
-    z4 = (INT32) wsptr[4];
+    z4 = (JLONG) wsptr[4];
     z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
     z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
     tmp10 = z3 + z1;
@@ -1179,8 +1179,8 @@
 
     tmp22 = z3 - LEFT_SHIFT(z1 - z2, 1);         /* c0 = (c4-c8)*2 */
 
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[6];
+    z2 = (JLONG) wsptr[2];
+    z3 = (JLONG) wsptr[6];
 
     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
     tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
@@ -1193,11 +1193,11 @@
 
     /* Odd part */
 
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
+    z1 = (JLONG) wsptr[1];
+    z2 = (JLONG) wsptr[3];
+    z3 = (JLONG) wsptr[5];
     z3 = LEFT_SHIFT(z3, CONST_BITS);
-    z4 = (INT32) wsptr[7];
+    z4 = (JLONG) wsptr[7];
 
     tmp11 = z2 + z4;
     tmp13 = z2 - z4;
@@ -1269,9 +1269,9 @@
                  JCOEFPTR coef_block,
                  JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
+  JLONG tmp10, tmp11, tmp12, tmp13, tmp14;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  JLONG z1, z2, z3, z4;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -1362,12 +1362,12 @@
     /* Even part */
 
     /* Add fudge factor here for final descale. */
-    tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp10 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     tmp10 = LEFT_SHIFT(tmp10, CONST_BITS);
 
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[4];
-    z3 = (INT32) wsptr[6];
+    z1 = (JLONG) wsptr[2];
+    z2 = (JLONG) wsptr[4];
+    z3 = (JLONG) wsptr[6];
 
     tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
     tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
@@ -1387,10 +1387,10 @@
 
     /* Odd part */
 
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
+    z1 = (JLONG) wsptr[1];
+    z2 = (JLONG) wsptr[3];
+    z3 = (JLONG) wsptr[5];
+    z4 = (JLONG) wsptr[7];
 
     tmp11 = z1 + z2;
     tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
@@ -1463,9 +1463,9 @@
                  JCOEFPTR coef_block,
                  JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
-  INT32 z1, z2, z3, z4;
+  JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  JLONG z1, z2, z3, z4;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -1566,19 +1566,19 @@
     /* Even part */
 
     /* Add fudge factor here for final descale. */
-    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     z3 = LEFT_SHIFT(z3, CONST_BITS);
 
-    z4 = (INT32) wsptr[4];
+    z4 = (JLONG) wsptr[4];
     z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
 
     tmp10 = z3 + z4;
     tmp11 = z3 - z4;
 
-    z1 = (INT32) wsptr[2];
+    z1 = (JLONG) wsptr[2];
     z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
     z1 = LEFT_SHIFT(z1, CONST_BITS);
-    z2 = (INT32) wsptr[6];
+    z2 = (JLONG) wsptr[6];
     z2 = LEFT_SHIFT(z2, CONST_BITS);
 
     tmp12 = z1 - z2;
@@ -1598,10 +1598,10 @@
 
     /* Odd part */
 
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
+    z1 = (JLONG) wsptr[1];
+    z2 = (JLONG) wsptr[3];
+    z3 = (JLONG) wsptr[5];
+    z4 = (JLONG) wsptr[7];
 
     tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
     tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
@@ -1679,9 +1679,9 @@
                  JCOEFPTR coef_block,
                  JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
+  JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  JLONG z1, z2, z3, z4;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -1787,12 +1787,12 @@
     /* Even part */
 
     /* Add fudge factor here for final descale. */
-    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     z1 = LEFT_SHIFT(z1, CONST_BITS);
 
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[4];
-    z4 = (INT32) wsptr[6];
+    z2 = (JLONG) wsptr[2];
+    z3 = (JLONG) wsptr[4];
+    z4 = (JLONG) wsptr[6];
 
     tmp10 = z3 + z4;
     tmp11 = z3 - z4;
@@ -1819,10 +1819,10 @@
 
     /* Odd part */
 
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
+    z1 = (JLONG) wsptr[1];
+    z2 = (JLONG) wsptr[3];
+    z3 = (JLONG) wsptr[5];
+    z4 = (JLONG) wsptr[7];
 
     tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
     tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
@@ -1907,9 +1907,9 @@
                  JCOEFPTR coef_block,
                  JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
-  INT32 z1, z2, z3, z4;
+  JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  JLONG z1, z2, z3, z4;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -2014,9 +2014,9 @@
     /* Even part */
 
     /* Add fudge factor here for final descale. */
-    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     z1 = LEFT_SHIFT(z1, CONST_BITS);
-    z4 = (INT32) wsptr[4];
+    z4 = (JLONG) wsptr[4];
     z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
     z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
     z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
@@ -2027,8 +2027,8 @@
 
     tmp23 = z1 - LEFT_SHIFT(z2 + z3 - z4, 1);    /* c0 = (c4+c12-c8)*2 */
 
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
+    z1 = (JLONG) wsptr[2];
+    z2 = (JLONG) wsptr[6];
 
     z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
 
@@ -2046,10 +2046,10 @@
 
     /* Odd part */
 
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
+    z1 = (JLONG) wsptr[1];
+    z2 = (JLONG) wsptr[3];
+    z3 = (JLONG) wsptr[5];
+    z4 = (JLONG) wsptr[7];
     z4 = LEFT_SHIFT(z4, CONST_BITS);
 
     tmp14 = z1 + z3;
@@ -2133,9 +2133,9 @@
                  JCOEFPTR coef_block,
                  JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
+  JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  JLONG z1, z2, z3, z4;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -2246,12 +2246,12 @@
     /* Even part */
 
     /* Add fudge factor here for final descale. */
-    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     z1 = LEFT_SHIFT(z1, CONST_BITS);
 
-    z2 = (INT32) wsptr[2];
-    z3 = (INT32) wsptr[4];
-    z4 = (INT32) wsptr[6];
+    z2 = (JLONG) wsptr[2];
+    z3 = (JLONG) wsptr[4];
+    z4 = (JLONG) wsptr[6];
 
     tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
     tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
@@ -2286,11 +2286,11 @@
 
     /* Odd part */
 
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z4 = (INT32) wsptr[5];
+    z1 = (JLONG) wsptr[1];
+    z2 = (JLONG) wsptr[3];
+    z4 = (JLONG) wsptr[5];
     z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
-    z4 = (INT32) wsptr[7];
+    z4 = (JLONG) wsptr[7];
 
     tmp13 = z2 - z4;
     tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
@@ -2375,9 +2375,9 @@
                  JCOEFPTR coef_block,
                  JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
-  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
-  INT32 z1, z2, z3, z4;
+  JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  JLONG z1, z2, z3, z4;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -2497,10 +2497,10 @@
     /* Even part */
 
     /* Add fudge factor here for final descale. */
-    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 = (JLONG) wsptr[0] + (ONE << (PASS1_BITS+2));
     tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
 
-    z1 = (INT32) wsptr[4];
+    z1 = (JLONG) wsptr[4];
     tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
     tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
 
@@ -2509,8 +2509,8 @@
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
 
-    z1 = (INT32) wsptr[2];
-    z2 = (INT32) wsptr[6];
+    z1 = (JLONG) wsptr[2];
+    z2 = (JLONG) wsptr[6];
     z3 = z1 - z2;
     z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
     z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
@@ -2531,10 +2531,10 @@
 
     /* Odd part */
 
-    z1 = (INT32) wsptr[1];
-    z2 = (INT32) wsptr[3];
-    z3 = (INT32) wsptr[5];
-    z4 = (INT32) wsptr[7];
+    z1 = (JLONG) wsptr[1];
+    z2 = (JLONG) wsptr[3];
+    z3 = (JLONG) wsptr[5];
+    z4 = (JLONG) wsptr[7];
 
     tmp11 = z1 + z3;
 
diff --git a/jidctred.c b/jidctred.c
index 1b55aa8..2d2d624 100644
--- a/jidctred.c
+++ b/jidctred.c
@@ -58,20 +58,20 @@
  */
 
 #if CONST_BITS == 13
-#define FIX_0_211164243  ((INT32)  1730)        /* FIX(0.211164243) */
-#define FIX_0_509795579  ((INT32)  4176)        /* FIX(0.509795579) */
-#define FIX_0_601344887  ((INT32)  4926)        /* FIX(0.601344887) */
-#define FIX_0_720959822  ((INT32)  5906)        /* FIX(0.720959822) */
-#define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
-#define FIX_0_850430095  ((INT32)  6967)        /* FIX(0.850430095) */
-#define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
-#define FIX_1_061594337  ((INT32)  8697)        /* FIX(1.061594337) */
-#define FIX_1_272758580  ((INT32)  10426)       /* FIX(1.272758580) */
-#define FIX_1_451774981  ((INT32)  11893)       /* FIX(1.451774981) */
-#define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
-#define FIX_2_172734803  ((INT32)  17799)       /* FIX(2.172734803) */
-#define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
-#define FIX_3_624509785  ((INT32)  29692)       /* FIX(3.624509785) */
+#define FIX_0_211164243  ((JLONG)  1730)        /* FIX(0.211164243) */
+#define FIX_0_509795579  ((JLONG)  4176)        /* FIX(0.509795579) */
+#define FIX_0_601344887  ((JLONG)  4926)        /* FIX(0.601344887) */
+#define FIX_0_720959822  ((JLONG)  5906)        /* FIX(0.720959822) */
+#define FIX_0_765366865  ((JLONG)  6270)        /* FIX(0.765366865) */
+#define FIX_0_850430095  ((JLONG)  6967)        /* FIX(0.850430095) */
+#define FIX_0_899976223  ((JLONG)  7373)        /* FIX(0.899976223) */
+#define FIX_1_061594337  ((JLONG)  8697)        /* FIX(1.061594337) */
+#define FIX_1_272758580  ((JLONG)  10426)       /* FIX(1.272758580) */
+#define FIX_1_451774981  ((JLONG)  11893)       /* FIX(1.451774981) */
+#define FIX_1_847759065  ((JLONG)  15137)       /* FIX(1.847759065) */
+#define FIX_2_172734803  ((JLONG)  17799)       /* FIX(2.172734803) */
+#define FIX_2_562915447  ((JLONG)  20995)       /* FIX(2.562915447) */
+#define FIX_3_624509785  ((JLONG)  29692)       /* FIX(3.624509785) */
 #else
 #define FIX_0_211164243  FIX(0.211164243)
 #define FIX_0_509795579  FIX(0.509795579)
@@ -90,7 +90,7 @@
 #endif
 
 
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+/* Multiply a JLONG variable by a JLONG constant to yield a JLONG result.
  * For 8-bit samples with the recommended scaling, all the variable
  * and constant values involved are no more than 16 bits wide, so a
  * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
@@ -122,8 +122,8 @@
                JCOEFPTR coef_block,
                JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp2, tmp10, tmp12;
-  INT32 z1, z2, z3, z4;
+  JLONG tmp0, tmp2, tmp10, tmp12;
+  JLONG z1, z2, z3, z4;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -206,7 +206,7 @@
     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 &&
         wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
-      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+      JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3)
                                   & RANGE_MASK];
 
       outptr[0] = dcval;
@@ -221,20 +221,20 @@
 
     /* Even part */
 
-    tmp0 = LEFT_SHIFT((INT32) wsptr[0], CONST_BITS+1);
+    tmp0 = LEFT_SHIFT((JLONG) wsptr[0], CONST_BITS+1);
 
-    tmp2 = MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
-         + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865);
+    tmp2 = MULTIPLY((JLONG) wsptr[2], FIX_1_847759065)
+         + MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865);
 
     tmp10 = tmp0 + tmp2;
     tmp12 = tmp0 - tmp2;
 
     /* Odd part */
 
-    z1 = (INT32) wsptr[7];
-    z2 = (INT32) wsptr[5];
-    z3 = (INT32) wsptr[3];
-    z4 = (INT32) wsptr[1];
+    z1 = (JLONG) wsptr[7];
+    z2 = (JLONG) wsptr[5];
+    z3 = (JLONG) wsptr[3];
+    z4 = (JLONG) wsptr[1];
 
     tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
          + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
@@ -276,7 +276,7 @@
                JCOEFPTR coef_block,
                JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp10, z1;
+  JLONG tmp0, tmp10, z1;
   JCOEFPTR inptr;
   ISLOW_MULT_TYPE * quantptr;
   int * wsptr;
@@ -339,7 +339,7 @@
 #ifndef NO_ZERO_ROW_TEST
     if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
-      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+      JSAMPLE dcval = range_limit[(int) DESCALE((JLONG) wsptr[0], PASS1_BITS+3)
                                   & RANGE_MASK];
 
       outptr[0] = dcval;
@@ -352,14 +352,14 @@
 
     /* Even part */
 
-    tmp10 = LEFT_SHIFT((INT32) wsptr[0], CONST_BITS+2);
+    tmp10 = LEFT_SHIFT((JLONG) wsptr[0], CONST_BITS+2);
 
     /* Odd part */
 
-    tmp0 = MULTIPLY((INT32) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */
-         + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */
-         + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */
-         + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
+    tmp0 = MULTIPLY((JLONG) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */
+         + MULTIPLY((JLONG) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */
+         + MULTIPLY((JLONG) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */
+         + MULTIPLY((JLONG) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
 
     /* Final output stage */
 
@@ -395,7 +395,7 @@
    */
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
-  dcval = (int) DESCALE((INT32) dcval, 3);
+  dcval = (int) DESCALE((JLONG) dcval, 3);
 
   output_buf[0][output_col] = range_limit[dcval & RANGE_MASK];
 }
diff --git a/jmorecfg.h b/jmorecfg.h
index 806d6a4..1d96786 100644
--- a/jmorecfg.h
+++ b/jmorecfg.h
@@ -147,13 +147,35 @@
 typedef short INT16;
 #endif
 
-/* INT32 must hold at least signed 32-bit values. */
+/* INT32 must hold at least signed 32-bit values.
+ *
+ * NOTE: The INT32 typedef dates back to libjpeg v5 (1994.)  Integers were
+ * sometimes 16-bit back then (MS-DOS), which is why INT32 is typedef'd to
+ * long.  It also wasn't common (or at least as common) in 1994 for INT32 to be
+ * defined by platform headers.  Since then, however, INT32 is defined in
+ * several other common places:
+ *
+ * Xmd.h (X11 header) typedefs INT32 to int on 64-bit platforms and long on
+ * 32-bit platforms (i.e always a 32-bit signed type.)
+ *
+ * basetsd.h (Win32 header) typedefs INT32 to int (always a 32-bit signed type
+ * on modern platforms.)
+ *
+ * qglobal.h (Qt header) typedefs INT32 to int (always a 32-bit signed type on
+ * modern platforms.)
+ *
+ * This is a recipe for conflict, since "long" and "int" aren't always
+ * compatible types.  Since the definition of INT32 has technically been part
+ * of the libjpeg API for more than 20 years, we can't remove it, but we do not
+ * use it internally any longer.  We instead define a separate type (JLONG)
+ * for internal use, which ensures that internal behavior will always be the
+ * same regardless of any external headers that may be included.
+ */
 
 #ifndef XMD_H                   /* X11/xmd.h correctly defines INT32 */
 #ifndef _BASETSD_H_		/* Microsoft defines it in basetsd.h */
 #ifndef _BASETSD_H		/* MinGW is slightly different */
 #ifndef QGLOBAL_H		/* Qt defines it in qglobal.h */
-#define __INT32_IS_ACTUALLY_LONG
 typedef long INT32;
 #endif
 #endif
diff --git a/jpegint.h b/jpegint.h
index 10d3623..1530e0c 100644
--- a/jpegint.h
+++ b/jpegint.h
@@ -43,16 +43,16 @@
 #define DSTATE_STOPPING 210     /* looking for EOI in jpeg_finish_decompress */
 
 
+/* JLONG must hold at least signed 32-bit values. */
+typedef long JLONG;
+
+
 /*
  * Left shift macro that handles a negative operand without causing any
  * sanitizer warnings
  */
 
-#ifdef __INT32_IS_ACTUALLY_LONG
-#define LEFT_SHIFT(a, b) ((INT32)((unsigned long)(a) << (b)))
-#else
-#define LEFT_SHIFT(a, b) ((INT32)((unsigned int)(a) << (b)))
-#endif
+#define LEFT_SHIFT(a, b) ((JLONG)((unsigned long)(a) << (b)))
 
 
 /* Declarations for compression modules */
@@ -276,16 +276,16 @@
  * shift" instructions that shift in copies of the sign bit.  But some
  * C compilers implement >> with an unsigned shift.  For these machines you
  * must define RIGHT_SHIFT_IS_UNSIGNED.
- * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
+ * RIGHT_SHIFT provides a proper signed right shift of a JLONG quantity.
  * It is only applied with constant shift counts.  SHIFT_TEMPS must be
  * included in the variables of any routine using RIGHT_SHIFT.
  */
 
 #ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define SHIFT_TEMPS     INT32 shift_temp;
+#define SHIFT_TEMPS     JLONG shift_temp;
 #define RIGHT_SHIFT(x,shft)  \
         ((shift_temp = (x)) < 0 ? \
-         (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
+         (shift_temp >> (shft)) | ((~((JLONG) 0)) << (32-(shft))) : \
          (shift_temp >> (shft)))
 #else
 #define SHIFT_TEMPS
@@ -348,7 +348,7 @@
 extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
 
 /* Arithmetic coding probability estimation tables in jaricom.c */
-extern const INT32 jpeg_aritab[];
+extern const JLONG jpeg_aritab[];
 
 /* Suppress undefined-structure complaints if necessary. */
 
diff --git a/jquant1.c b/jquant1.c
index 4bd0973..0082c2f 100644
--- a/jquant1.c
+++ b/jquant1.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, D. R. Commander
+ * Copyright (C) 2009, 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -128,8 +128,8 @@
 typedef INT16 FSERROR;          /* 16 bits should be enough */
 typedef int LOCFSERROR;         /* use 'int' for calculation temps */
 #else
-typedef INT32 FSERROR;          /* may need more than 16 bits */
-typedef INT32 LOCFSERROR;       /* be sure calculation temps are big enough */
+typedef JLONG FSERROR;          /* may need more than 16 bits */
+typedef JLONG LOCFSERROR;       /* be sure calculation temps are big enough */
 #endif
 
 typedef FSERROR *FSERRPTR;  /* pointer to error array */
@@ -254,7 +254,7 @@
    * (Forcing the upper and lower values to the limits ensures that
    * dithering can't produce a color outside the selected gamut.)
    */
-  return (int) (((INT32) j * MAXJSAMPLE + maxj/2) / maxj);
+  return (int) (((JLONG) j * MAXJSAMPLE + maxj/2) / maxj);
 }
 
 
@@ -264,7 +264,7 @@
 /* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */
 {
   /* Breakpoints are halfway between values returned by output_value */
-  return (int) (((INT32) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj));
+  return (int) (((JLONG) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj));
 }
 
 
@@ -400,7 +400,7 @@
 {
   ODITHER_MATRIX_PTR odither;
   int j,k;
-  INT32 num,den;
+  JLONG num,den;
 
   odither = (ODITHER_MATRIX_PTR)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
@@ -410,10 +410,10 @@
    * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
    * On 16-bit-int machine, be careful to avoid overflow.
    */
-  den = 2 * ODITHER_CELLS * ((INT32) (ncolors - 1));
+  den = 2 * ODITHER_CELLS * ((JLONG) (ncolors - 1));
   for (j = 0; j < ODITHER_SIZE; j++) {
     for (k = 0; k < ODITHER_SIZE; k++) {
-      num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
+      num = ((JLONG) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
             * MAXJSAMPLE;
       /* Ensure round towards zero despite C's lack of consistency
        * about rounding negative values in integer division...
diff --git a/jquant2.c b/jquant2.c
index aa56aa4..fec4e4e 100644
--- a/jquant2.c
+++ b/jquant2.c
@@ -4,7 +4,7 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
- * Copyright (C) 2009, 2014, D. R. Commander.
+ * Copyright (C) 2009, 2014-2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -160,8 +160,8 @@
 typedef INT16 FSERROR;          /* 16 bits should be enough */
 typedef int LOCFSERROR;         /* use 'int' for calculation temps */
 #else
-typedef INT32 FSERROR;          /* may need more than 16 bits */
-typedef INT32 LOCFSERROR;       /* be sure calculation temps are big enough */
+typedef JLONG FSERROR;          /* may need more than 16 bits */
+typedef JLONG LOCFSERROR;       /* be sure calculation temps are big enough */
 #endif
 
 typedef FSERROR *FSERRPTR;      /* pointer to error array */
@@ -240,7 +240,7 @@
   int c1min, c1max;
   int c2min, c2max;
   /* The volume (actually 2-norm) of the box */
-  INT32 volume;
+  JLONG volume;
   /* The number of nonzero histogram cells within this box */
   long colorcount;
 } box;
@@ -275,7 +275,7 @@
 {
   register boxptr boxp;
   register int i;
-  register INT32 maxv = 0;
+  register JLONG maxv = 0;
   boxptr which = NULL;
 
   for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
@@ -298,7 +298,7 @@
   histptr histp;
   int c0,c1,c2;
   int c0min,c0max,c1min,c1max,c2min,c2max;
-  INT32 dist0,dist1,dist2;
+  JLONG dist0,dist1,dist2;
   long ccount;
 
   c0min = boxp->c0min;  c0max = boxp->c0max;
@@ -572,7 +572,7 @@
  * distance from every colormap entry to every histogram cell.  Unfortunately,
  * it needs a work array to hold the best-distance-so-far for each histogram
  * cell (because the inner loop has to be over cells, not colormap entries).
- * The work array elements have to be INT32s, so the work array would need
+ * The work array elements have to be JLONGs, so the work array would need
  * 256Kb at our recommended precision.  This is not feasible in DOS machines.
  *
  * To get around these problems, we apply Thomas' method to compute the
@@ -638,8 +638,8 @@
   int maxc0, maxc1, maxc2;
   int centerc0, centerc1, centerc2;
   int i, x, ncolors;
-  INT32 minmaxdist, min_dist, max_dist, tdist;
-  INT32 mindist[MAXNUMCOLORS];  /* min distance to colormap entry i */
+  JLONG minmaxdist, min_dist, max_dist, tdist;
+  JLONG mindist[MAXNUMCOLORS];  /* min distance to colormap entry i */
 
   /* Compute true coordinates of update box's upper corner and center.
    * Actually we compute the coordinates of the center of the upper-corner
@@ -763,15 +763,15 @@
 {
   int ic0, ic1, ic2;
   int i, icolor;
-  register INT32 * bptr;        /* pointer into bestdist[] array */
+  register JLONG * bptr;        /* pointer into bestdist[] array */
   JSAMPLE * cptr;               /* pointer into bestcolor[] array */
-  INT32 dist0, dist1;           /* initial distance values */
-  register INT32 dist2;         /* current distance in inner loop */
-  INT32 xx0, xx1;               /* distance increments */
-  register INT32 xx2;
-  INT32 inc0, inc1, inc2;       /* initial values for increments */
+  JLONG dist0, dist1;           /* initial distance values */
+  register JLONG dist2;         /* current distance in inner loop */
+  JLONG xx0, xx1;               /* distance increments */
+  register JLONG xx2;
+  JLONG inc0, inc1, inc2;       /* initial values for increments */
   /* This array holds the distance to the nearest-so-far color for each cell */
-  INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+  JLONG bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
 
   /* Initialize best-distance for each cell of the update box */
   bptr = bestdist;
diff --git a/rdppm.c b/rdppm.c
index 3d49318..c2556d0 100644
--- a/rdppm.c
+++ b/rdppm.c
@@ -4,8 +4,8 @@
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
  * Modified 2009 by Bill Allombert, Guido Vollbeding.
- * It was modified by The libjpeg-turbo Project to include only code and
- * information relevant to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -410,14 +410,14 @@
 
   /* Compute the rescaling array if required. */
   if (need_rescale) {
-    INT32 val, half_maxval;
+    long val, half_maxval;
 
     /* On 16-bit-int machines we have to be careful of maxval = 65535 */
     source->rescale = (JSAMPLE *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
                                   (size_t) (((long) maxval + 1L) * sizeof(JSAMPLE)));
     half_maxval = maxval / 2;
-    for (val = 0; val <= (INT32) maxval; val++) {
+    for (val = 0; val <= (long) maxval; val++) {
       /* The multiplication here must be done in 32 bits to avoid overflow */
       source->rescale[val] = (JSAMPLE) ((val*MAXJSAMPLE + half_maxval)/maxval);
     }
diff --git a/simd/jsimd_arm64_neon.S b/simd/jsimd_arm64_neon.S
index 2186f24..37f5a1b 100644
--- a/simd/jsimd_arm64_neon.S
+++ b/simd/jsimd_arm64_neon.S
@@ -6,7 +6,7 @@
  * Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
  * Copyright (C) 2013-2014, Linaro Limited
  * Author: Ragesh Radhakrishnan <ragesh.r@linaro.org>
- * Copyright (C) 2014, D. R. Commander.  All rights reserved.
+ * Copyright (C) 2014-2015, D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -134,8 +134,8 @@
 #define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7)   \
 {                                                                             \
     DCTELEM row0, row1, row2, row3, row4, row5, row6, row7;                   \
-    INT32   q1, q2, q3, q4, q5, q6, q7;                                       \
-    INT32   tmp11_plus_tmp2, tmp11_minus_tmp2;                                \
+    JLONG   q1, q2, q3, q4, q5, q6, q7;                                       \
+    JLONG   tmp11_plus_tmp2, tmp11_minus_tmp2;                                \
                                                                               \
     /* 1-D iDCT input data */                                                 \
     row0 = xrow0;                                                             \
@@ -156,7 +156,7 @@
     q2 = MULTIPLY(row2, FIX_0_541196100) +                                    \
          MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065);                   \
     q4 = q6;                                                                  \
-    q3 = ((INT32) row0 - (INT32) row4) << 13;                                 \
+    q3 = ((JLONG) row0 - (JLONG) row4) << 13;                                 \
     q6 += MULTIPLY(row5, -FIX_2_562915447) +                                  \
           MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447);                  \
     /* now we can use q1 (reloadable constants have been used up) */          \
@@ -183,7 +183,7 @@
     /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */        \
     tmp11_minus_tmp2 = q1;                                                    \
                                                                               \
-    q1 = ((INT32) row0 + (INT32) row4) << 13;                                 \
+    q1 = ((JLONG) row0 + (JLONG) row4) << 13;                                 \
     q2 = q1 + q6;                                                             \
     q1 = q1 - q6;                                                             \
                                                                               \
diff --git a/simd/jsimd_arm_neon.S b/simd/jsimd_arm_neon.S
index 4d9685b..c83e1c7 100644
--- a/simd/jsimd_arm_neon.S
+++ b/simd/jsimd_arm_neon.S
@@ -6,6 +6,7 @@
  * Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
  * Copyright (C) 2014 Siarhei Siamashka.  All Rights Reserved.
  * Copyright (C) 2014 Linaro Limited.  All Rights Reserved.
+ * Copyright (C) 2015 D. R. Commander.  All Rights Reserved.
  *
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
@@ -105,8 +106,8 @@
 #define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7)   \
 {                                                                             \
     DCTELEM row0, row1, row2, row3, row4, row5, row6, row7;                   \
-    INT32   q1, q2, q3, q4, q5, q6, q7;                                       \
-    INT32   tmp11_plus_tmp2, tmp11_minus_tmp2;                                \
+    JLONG   q1, q2, q3, q4, q5, q6, q7;                                       \
+    JLONG   tmp11_plus_tmp2, tmp11_minus_tmp2;                                \
                                                                               \
     /* 1-D iDCT input data */                                                 \
     row0 = xrow0;                                                             \
@@ -127,7 +128,7 @@
     q2 = MULTIPLY(row2, FIX_0_541196100) +                                    \
          MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065);                   \
     q4 = q6;                                                                  \
-    q3 = ((INT32) row0 - (INT32) row4) << 13;                                 \
+    q3 = ((JLONG) row0 - (JLONG) row4) << 13;                                 \
     q6 += MULTIPLY(row5, -FIX_2_562915447) +                                  \
           MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447);                  \
     /* now we can use q1 (reloadable constants have been used up) */          \
@@ -154,7 +155,7 @@
     /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */        \
     tmp11_minus_tmp2 = q1;                                                    \
                                                                               \
-    q1 = ((INT32) row0 + (INT32) row4) << 13;                                 \
+    q1 = ((JLONG) row0 + (JLONG) row4) << 13;                                 \
     q2 = q1 + q6;                                                             \
     q1 = q1 - q6;                                                             \
                                                                               \
diff --git a/simd/jsimd_mips_dspr2.S b/simd/jsimd_mips_dspr2.S
index bd55c76..0eed1ce 100644
--- a/simd/jsimd_mips_dspr2.S
+++ b/simd/jsimd_mips_dspr2.S
@@ -5,6 +5,7 @@
  * All rights reserved.
  * Authors:  Teodora Novkovic (teodora.novkovic@imgtec.com)
  *           Darko Laus       (darko.laus@imgtec.com)
+ * Copyright (C) 2015, D. R. Commander.  All Rights Reserved.
  * This software is provided 'as-is', without any express or implied
  * warranty.  In no event will the authors be held liable for any damages
  * arising from the use of this software.
@@ -1992,14 +1993,14 @@
     move      v0, sp
     addiu     v1, zero, 8
 4:
-    lw        t0, 8(v0)        // z2 = (INT32) wsptr[2]
-    lw        t1, 24(v0)       // z3 = (INT32) wsptr[6]
-    lw        t2, 0(v0)        // (INT32) wsptr[0]
-    lw        t3, 16(v0)       // (INT32) wsptr[4]
-    lw        s4, 4(v0)        // (INT32) wsptr[1]
-    lw        s5, 12(v0)       // (INT32) wsptr[3]
-    lw        s6, 20(v0)       // (INT32) wsptr[5]
-    lw        s7, 28(v0)       // (INT32) wsptr[7]
+    lw        t0, 8(v0)        // z2 = (JLONG) wsptr[2]
+    lw        t1, 24(v0)       // z3 = (JLONG) wsptr[6]
+    lw        t2, 0(v0)        // (JLONG) wsptr[0]
+    lw        t3, 16(v0)       // (JLONG) wsptr[4]
+    lw        s4, 4(v0)        // (JLONG) wsptr[1]
+    lw        s5, 12(v0)       // (JLONG) wsptr[3]
+    lw        s6, 20(v0)       // (JLONG) wsptr[5]
+    lw        s7, 28(v0)       // (JLONG) wsptr[7]
     or        s4, s4, t0
     or        s4, s4, t1
     or        s4, s4, t3
@@ -2025,8 +2026,8 @@
     mul       t1, t1, t8       // MULTIPLY(z3, FIX_1_847759065)
     addiu     t8, zero, 6270   // FIX_0_765366865
     mul       t0, t0, t8       // MULTIPLY(z2, FIX_0_765366865)
-    addu      t4, t2, t3       // (INT32) wsptr[0] + (INT32) wsptr[4]
-    subu      t2, t2, t3       // (INT32) wsptr[0] - (INT32) wsptr[4]
+    addu      t4, t2, t3       // (JLONG) wsptr[0] + (JLONG) wsptr[4]
+    subu      t2, t2, t3       // (JLONG) wsptr[0] - (JLONG) wsptr[4]
     sll       t4, t4, 13       // tmp0 = ((wsptr[0] + wsptr[4]) << CONST_BITS
     sll       t2, t2, 13       // tmp1 = ((wsptr[0] - wsptr[4]) << CONST_BITS
     subu      t1, t5, t1       // tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065)
@@ -2035,10 +2036,10 @@
     addu      t5, t5, t0       // tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865)
     subu      t1, t4, t5       // tmp13 = tmp0 - tmp3
     addu      t0, t4, t5       // tmp10 = tmp0 + tmp3
-    lw        t4, 28(v0)       // tmp0 = (INT32) wsptr[7]
-    lw        t6, 12(v0)       // tmp2 = (INT32) wsptr[3]
-    lw        t5, 20(v0)       // tmp1 = (INT32) wsptr[5]
-    lw        t7, 4(v0)        // tmp3 = (INT32) wsptr[1]
+    lw        t4, 28(v0)       // tmp0 = (JLONG) wsptr[7]
+    lw        t6, 12(v0)       // tmp2 = (JLONG) wsptr[3]
+    lw        t5, 20(v0)       // tmp1 = (JLONG) wsptr[5]
+    lw        t7, 4(v0)        // tmp3 = (JLONG) wsptr[1]
     addu      s0, t4, t6       // z3 = tmp0 + tmp2
     addiu     t8, zero, 9633   // FIX_1_175875602
     addu      s1, t5, t7       // z4 = tmp1 + tmp3
@@ -3269,9 +3270,9 @@
     lw        s6, 8(t1)         // wsptr[2]
     li        s5, 6270
     lw        s7, 24(t1)        // wsptr[6]
-    mul       s4, s4, s6        // MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+    mul       s4, s4, s6        // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065)
     lw        t2, 0(t1)         // wsptr[0]
-    mul       s5, s5, s7        // MULTIPLY((INT32) wsptr[6], - FIX_0_765366865)
+    mul       s5, s5, s7        // MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865)
     lh        t5, 28(t1)        // wsptr[7]
     lh        t6, 20(t1)        // wsptr[5]
     lh        t7, 12(t1)        // wsptr[3]
@@ -3284,7 +3285,7 @@
     mult      $ac1, zero, zero
     dpa.w.ph  $ac1, t5, s2
     dpa.w.ph  $ac1, t7, s3
-    sll       t2, t2, 14        // tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1)
+    sll       t2, t2, 14        // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1)
     mflo      s6, $ac0
     // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865)
     subu      s4, s4, s5
@@ -3323,9 +3324,9 @@
     lw        s6, 40(t1)        // wsptr[2]
     li        s5, 6270
     lw        s7, 56(t1)        // wsptr[6]
-    mul       s4, s4, s6        // MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+    mul       s4, s4, s6        // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065)
     lw        t2, 32(t1)        // wsptr[0]
-    mul       s5, s5, s7        // MULTIPLY((INT32) wsptr[6], - FIX_0_765366865)
+    mul       s5, s5, s7        // MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865)
     lh        t5, 60(t1)        // wsptr[7]
     lh        t6, 52(t1)        // wsptr[5]
     lh        t7, 44(t1)        // wsptr[3]
@@ -3338,7 +3339,7 @@
     mult      $ac1, zero, zero
     dpa.w.ph  $ac1, t5, s2
     dpa.w.ph  $ac1, t7, s3
-    sll       t2, t2, 14        // tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1)
+    sll       t2, t2, 14        // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1)
     mflo      s6, $ac0
     // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865)
     subu      s4, s4, s5
@@ -3377,9 +3378,9 @@
     lw        s6, 72(t1)        // wsptr[2]
     li        s5, 6270
     lw        s7, 88(t1)        // wsptr[6]
-    mul       s4, s4, s6        // MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+    mul       s4, s4, s6        // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065)
     lw        t2, 64(t1)        // wsptr[0]
-    mul       s5, s5, s7        // MULTIPLY((INT32) wsptr[6], - FIX_0_765366865)
+    mul       s5, s5, s7        // MULTIPLY((JLONG) wsptr[6], - FIX_0_765366865)
     lh        t5, 92(t1)        // wsptr[7]
     lh        t6, 84(t1)        // wsptr[5]
     lh        t7, 76(t1)        // wsptr[3]
@@ -3392,7 +3393,7 @@
     mult      $ac1, zero, zero
     dpa.w.ph  $ac1, t5, s2
     dpa.w.ph  $ac1, t7, s3
-    sll       t2, t2, 14        // tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1)
+    sll       t2, t2, 14        // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1)
     mflo      s6, $ac0
     // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865)
     subu      s4, s4, s5
@@ -3430,9 +3431,9 @@
     lw        s6, 104(t1)       // wsptr[2]
     li        s5, 6270
     lw        s7, 120(t1)       // wsptr[6]
-    mul       s4, s4, s6        // MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+    mul       s4, s4, s6        // MULTIPLY((JLONG) wsptr[2], FIX_1_847759065)
     lw        t2, 96(t1)        // wsptr[0]
-    mul       s5, s5, s7        // MULTIPLY((INT32) wsptr[6], -FIX_0_765366865)
+    mul       s5, s5, s7        // MULTIPLY((JLONG) wsptr[6], -FIX_0_765366865)
     lh        t5, 124(t1)       // wsptr[7]
     lh        t6, 116(t1)       // wsptr[5]
     lh        t7, 108(t1)       // wsptr[3]
@@ -3445,7 +3446,7 @@
     mult      $ac1, zero, zero
     dpa.w.ph  $ac1, t5, s2
     dpa.w.ph  $ac1, t7, s3
-    sll       t2, t2, 14        // tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1)
+    sll       t2, t2, 14        // tmp0 = ((JLONG) wsptr[0]) << (CONST_BITS+1)
     mflo      s6, $ac0
     // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865)
     subu      s4, s4, s5
diff --git a/wrbmp.c b/wrbmp.c
index 06630d8..5e307de 100644
--- a/wrbmp.c
+++ b/wrbmp.c
@@ -5,7 +5,7 @@
  * Copyright (C) 1994-1996, Thomas G. Lane.
  * libjpeg-turbo Modifications:
  * Copyright (C) 2013, Linaro Limited.
- * Copyright (C) 2014, D. R. Commander.
+ * Copyright (C) 2014-2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -200,7 +200,7 @@
          array[offset+1] = (char) (((value) >> 8) & 0xFF), \
          array[offset+2] = (char) (((value) >> 16) & 0xFF), \
          array[offset+3] = (char) (((value) >> 24) & 0xFF))
-  INT32 headersize, bfSize;
+  long headersize, bfSize;
   int bits_per_pixel, cmap_entries;
 
   /* Compute colormap size and total file size */
@@ -224,7 +224,7 @@
   }
   /* File size */
   headersize = 14 + 40 + cmap_entries * 4; /* Header and colormap */
-  bfSize = headersize + (INT32) dest->row_width * (INT32) cinfo->output_height;
+  bfSize = headersize + (long) dest->row_width * (long) cinfo->output_height;
 
   /* Set unused fields of header to 0 */
   MEMZERO(bmpfileheader, sizeof(bmpfileheader));
@@ -246,8 +246,8 @@
   /* we leave biCompression = 0, for none */
   /* we leave biSizeImage = 0; this is correct for uncompressed data */
   if (cinfo->density_unit == 2) { /* if have density in dots/cm, then */
-    PUT_4B(bmpinfoheader, 24, (INT32) (cinfo->X_density*100)); /* XPels/M */
-    PUT_4B(bmpinfoheader, 28, (INT32) (cinfo->Y_density*100)); /* XPels/M */
+    PUT_4B(bmpinfoheader, 24, (long) (cinfo->X_density*100)); /* XPels/M */
+    PUT_4B(bmpinfoheader, 28, (long) (cinfo->Y_density*100)); /* XPels/M */
   }
   PUT_2B(bmpinfoheader, 32, cmap_entries); /* biClrUsed */
   /* we leave biClrImportant = 0 */
@@ -268,7 +268,7 @@
 {
   char bmpfileheader[14];
   char bmpcoreheader[12];
-  INT32 headersize, bfSize;
+  long headersize, bfSize;
   int bits_per_pixel, cmap_entries;
 
   /* Compute colormap size and total file size */
@@ -292,7 +292,7 @@
   }
   /* File size */
   headersize = 14 + 12 + cmap_entries * 3; /* Header and colormap */
-  bfSize = headersize + (INT32) dest->row_width * (INT32) cinfo->output_height;
+  bfSize = headersize + (long) dest->row_width * (long) cinfo->output_height;
 
   /* Set unused fields of header to 0 */
   MEMZERO(bmpfileheader, sizeof(bmpfileheader));
diff --git a/wrgif.c b/wrgif.c
index d3d8d51..53592de 100644
--- a/wrgif.c
+++ b/wrgif.c
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -55,7 +55,7 @@
   /* State for packing variable-width codes into a bitstream */
   int n_bits;                   /* current number of bits/code */
   int maxcode;                  /* maximum code, given n_bits */
-  INT32 cur_accum;              /* holds bits not yet output */
+  long cur_accum;               /* holds bits not yet output */
   int cur_bits;                 /* # of bits in cur_accum */
 
   /* State for GIF code assignment */
@@ -109,7 +109,7 @@
 /* Emit a code of n_bits bits */
 /* Uses cur_accum and cur_bits to reblock into 8-bit bytes */
 {
-  dinfo->cur_accum |= ((INT32) code) << dinfo->cur_bits;
+  dinfo->cur_accum |= ((long) code) << dinfo->cur_bits;
   dinfo->cur_bits += dinfo->n_bits;
 
   while (dinfo->cur_bits >= 8) {