Replace INT32 with a new internal datatype (JLONG)

These days, INT32 is a commonly-defined datatype in system headers.  We
cannot eliminate the definition of that datatype from jmorecfg.h, since
the INT32 typedef has technically been part of the libjpeg API since
version 5 (1994.)  However, using INT32 internally is risky, because the
inclusion of a particular header (Xmd.h, for instance) could change the
definition of INT32 from long to int on 64-bit platforms and thus change
the internal behavior of libjpeg-turbo in unexpected ways (for instance,
failing to correctly set __INT32_IS_ACTUALLY_LONG to match the INT32
typedef-- perhaps as a result of including the wrong version of
jpeglib.h-- could cause libjpeg-turbo to produce incorrect results.)

The library has always been built in environments in which INT32 is
effectively long (on Windows, long is always 32-bit, so effectively it's
the same as int), so it makes sense to turn INT32 into an explicitly
long datatype.  This ensures that libjpeg-turbo will always behave
consistently, regardless of the headers included at compile time.

Addresses a concern expressed in #26.
diff --git a/jdct.h b/jdct.h
index b770f2c..f1c7cdf 100644
--- a/jdct.h
+++ b/jdct.h
@@ -3,8 +3,8 @@
  *
  * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README.ijg
  * file.
  *
@@ -19,7 +19,7 @@
 /*
  * A forward DCT routine is given a pointer to a work area of type DCTELEM[];
  * the DCT is to be performed in-place in that buffer.  Type DCTELEM is int
- * for 8-bit samples, INT32 for 12-bit samples.  (NOTE: Floating-point DCT
+ * for 8-bit samples, JLONG for 12-bit samples.  (NOTE: Floating-point DCT
  * implementations use an array of type FAST_FLOAT, instead.)
  * The DCT inputs are expected to be signed (range +-CENTERJSAMPLE).
  * The DCT outputs are returned scaled up by a factor of 8; they therefore
@@ -41,7 +41,7 @@
 typedef unsigned int UDCTELEM2;
 #endif
 #else
-typedef INT32 DCTELEM;          /* must have 32 bits */
+typedef JLONG DCTELEM;          /* must have 32 bits */
 typedef unsigned long long UDCTELEM2;
 #endif
 
@@ -68,7 +68,7 @@
 typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
 #define IFAST_SCALE_BITS  2     /* fractional bits in scale factors */
 #else
-typedef INT32 IFAST_MULT_TYPE;  /* need 32 bits for scaled quantizers */
+typedef JLONG IFAST_MULT_TYPE;  /* need 32 bits for scaled quantizers */
 #define IFAST_SCALE_BITS  13    /* fractional bits in scale factors */
 #endif
 typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
@@ -154,13 +154,13 @@
  * Macros for handling fixed-point arithmetic; these are used by many
  * but not all of the DCT/IDCT modules.
  *
- * All values are expected to be of type INT32.
+ * All values are expected to be of type JLONG.
  * Fractional constants are scaled left by CONST_BITS bits.
  * CONST_BITS is defined within each module using these macros,
  * and may differ from one module to the next.
  */
 
-#define ONE     ((INT32) 1)
+#define ONE     ((JLONG) 1)
 #define CONST_SCALE (ONE << CONST_BITS)
 
 /* Convert a positive real constant to an integer scaled by CONST_SCALE.
@@ -168,16 +168,16 @@
  * thus causing a lot of useless floating-point operations at run time.
  */
 
-#define FIX(x)  ((INT32) ((x) * CONST_SCALE + 0.5))
+#define FIX(x)  ((JLONG) ((x) * CONST_SCALE + 0.5))
 
-/* Descale and correctly round an INT32 value that's scaled by N bits.
+/* Descale and correctly round a JLONG value that's scaled by N bits.
  * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
  * the fudge factor is correct for either sign of X.
  */
 
 #define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
 
-/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+/* Multiply a JLONG variable by a JLONG constant to yield a JLONG result.
  * This macro is used only when the two inputs will actually be no more than
  * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a
  * full 32x32 multiply.  This provides a useful speedup on many machines.
@@ -190,7 +190,7 @@
 #define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
 #endif
 #ifdef SHORTxLCONST_32          /* known to work with Microsoft C 6.0 */
-#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((JLONG) (const)))
 #endif
 
 #ifndef MULTIPLY16C16           /* default definition */