The Independent JPEG Group's JPEG software v4
diff --git a/jquant2.c b/jquant2.c
index cf3eab1..c5c4110 100644
--- a/jquant2.c
+++ b/jquant2.c
@@ -913,16 +913,26 @@
 	      JSAMPIMAGE image_data, JSAMPARRAY output_workspace)
 /* This version performs Floyd-Steinberg dithering */
 {
-  register FSERROR val;
-  register FSERRPTR thisrowerr, nextrowerr;
+#ifdef EIGHT_BIT_SAMPLES
+  register int c0, c1, c2;
+  int two_val;
+#else
   register FSERROR c0, c1, c2;
-  register int pixcode;
+  FSERROR two_val;
+#endif
+  register FSERRPTR thisrowerr, nextrowerr;
   JSAMPROW ptr0, ptr1, ptr2, outptr;
   histptr cachep;
+  register int pixcode;
   int dir;
-  long col;
   int row;
+  long col;
   long width = cinfo->image_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  JSAMPROW colormap0 = my_colormap[0];
+  JSAMPROW colormap1 = my_colormap[1];
+  JSAMPROW colormap2 = my_colormap[2];
+  SHIFT_TEMPS
 
   /* Convert data to colormap indexes, which we save in output_workspace */
   for (row = 0; row < num_rows; row++) {
@@ -950,33 +960,21 @@
     /* need only initialize this one entry in nextrowerr */
     nextrowerr[0] = nextrowerr[1] = nextrowerr[2] = 0;
     for (col = width; col > 0; col--) {
-      /* Get this pixel's value and add accumulated errors */
-      /* The errors are in units of 1/16th pixel value */
-      val = (GETJSAMPLE(*ptr0) << 4) + thisrowerr[0];
-      if (val <= 0) val = 0;	/* must watch for range overflow! */
-      else {
-	val += 8;		/* divide by 16 with proper rounding */
-	val >>= 4;
-	if (val > MAXJSAMPLE) val = MAXJSAMPLE;
-      }
-      c0 = val;
-      val = (GETJSAMPLE(*ptr1) << 4) + thisrowerr[1];
-      if (val <= 0) val = 0;	/* must watch for range overflow! */
-      else {
-	val += 8;		/* divide by 16 with proper rounding */
-	val >>= 4;
-	if (val > MAXJSAMPLE) val = MAXJSAMPLE;
-      }
-      c1 = val;
-      val = (GETJSAMPLE(*ptr2) << 4) + thisrowerr[2];
-      if (val <= 0) val = 0;	/* must watch for range overflow! */
-      else {
-	val += 8;		/* divide by 16 with proper rounding */
-	val >>= 4;
-	if (val > MAXJSAMPLE) val = MAXJSAMPLE;
-      }
-      c2 = val;
-      /* Index into the cache with adjusted value */
+      /* For each component, get accumulated error and round to integer;
+       * form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+       * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+       * for either sign of the error value.  Max error is +- MAXJSAMPLE.
+       */
+      c0 = RIGHT_SHIFT(thisrowerr[0] + 8, 4);
+      c1 = RIGHT_SHIFT(thisrowerr[1] + 8, 4);
+      c2 = RIGHT_SHIFT(thisrowerr[2] + 8, 4);
+      c0 += GETJSAMPLE(*ptr0);
+      c1 += GETJSAMPLE(*ptr1);
+      c2 += GETJSAMPLE(*ptr2);
+      c0 = GETJSAMPLE(range_limit[c0]);
+      c1 = GETJSAMPLE(range_limit[c1]);
+      c2 = GETJSAMPLE(range_limit[c2]);
+      /* Index into the cache with adjusted pixel value */
       cachep = & histogram[c0 >> Y_SHIFT][c1 >> C_SHIFT][c2 >> C_SHIFT];
       /* If we have not seen this color before, find nearest colormap */
       /* entry and update the cache */
@@ -986,34 +984,34 @@
       pixcode = *cachep - 1;
       *outptr = (JSAMPLE) pixcode;
       /* Compute representation error for this pixel */
-      c0 -= (FSERROR) GETJSAMPLE(my_colormap[0][pixcode]);
-      c1 -= (FSERROR) GETJSAMPLE(my_colormap[1][pixcode]);
-      c2 -= (FSERROR) GETJSAMPLE(my_colormap[2][pixcode]);
+      c0 -= GETJSAMPLE(colormap0[pixcode]);
+      c1 -= GETJSAMPLE(colormap1[pixcode]);
+      c2 -= GETJSAMPLE(colormap2[pixcode]);
       /* Propagate error to adjacent pixels */
       /* Remember that nextrowerr entries are in reverse order! */
-      val = c0 * 2;
+      two_val = c0 * 2;
       nextrowerr[0-3]  = c0;	/* not +=, since not initialized yet */
-      c0 += val;		/* form error * 3 */
+      c0 += two_val;		/* form error * 3 */
       nextrowerr[0+3] += c0;
-      c0 += val;		/* form error * 5 */
+      c0 += two_val;		/* form error * 5 */
       nextrowerr[0  ] += c0;
-      c0 += val;		/* form error * 7 */
+      c0 += two_val;		/* form error * 7 */
       thisrowerr[0+3] += c0;
-      val = c1 * 2;
+      two_val = c1 * 2;
       nextrowerr[1-3]  = c1;	/* not +=, since not initialized yet */
-      c1 += val;		/* form error * 3 */
+      c1 += two_val;		/* form error * 3 */
       nextrowerr[1+3] += c1;
-      c1 += val;		/* form error * 5 */
+      c1 += two_val;		/* form error * 5 */
       nextrowerr[1  ] += c1;
-      c1 += val;		/* form error * 7 */
+      c1 += two_val;		/* form error * 7 */
       thisrowerr[1+3] += c1;
-      val = c2 * 2;
+      two_val = c2 * 2;
       nextrowerr[2-3]  = c2;	/* not +=, since not initialized yet */
-      c2 += val;		/* form error * 3 */
+      c2 += two_val;		/* form error * 3 */
       nextrowerr[2+3] += c2;
-      c2 += val;		/* form error * 5 */
+      c2 += two_val;		/* form error * 5 */
       nextrowerr[2  ] += c2;
-      c2 += val;		/* form error * 7 */
+      c2 += two_val;		/* form error * 7 */
       thisrowerr[2+3] += c2;
       /* Advance to next column */
       ptr0 += dir;