The Independent JPEG Group's JPEG software v5
diff --git a/jquant2.c b/jquant2.c
index 4f3b191..7984f58 100644
--- a/jquant2.c
+++ b/jquant2.c
@@ -1,16 +1,25 @@
 /*
  * jquant2.c
  *
- * Copyright (C) 1991, 1992, 1993, Thomas G. Lane.
+ * Copyright (C) 1991-1994, Thomas G. Lane.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains 2-pass color quantization (color mapping) routines.
- * These routines are invoked via the methods color_quant_prescan,
- * color_quant_doit, and color_quant_init/term.
+ * These routines provide selection of a custom color map for an image,
+ * followed by mapping of the image to that color map, with optional
+ * Floyd-Steinberg dithering.
+ * It is also possible to use just the second pass to map to an arbitrary
+ * externally-given color map.
+ *
+ * Note: ordered dithering is not supported, since there isn't any fast
+ * way to compute intercolor distances; it's unclear that ordered dither's
+ * fundamental assumptions even hold with an irregularly spaced color map.
  */
 
+#define JPEG_INTERNALS
 #include "jinclude.h"
+#include "jpeglib.h"
 
 #ifdef QUANT_2PASS_SUPPORTED
 
@@ -23,13 +32,16 @@
  *   Proc. SIGGRAPH '82, Computer Graphics v.16 #3 (July 1982), pp 297-304.
  *
  * In the first pass over the image, we accumulate a histogram showing the
- * usage count of each possible color.  (To keep the histogram to a reasonable
+ * usage count of each possible color.  To keep the histogram to a reasonable
  * size, we reduce the precision of the input; typical practice is to retain
  * 5 or 6 bits per color, so that 8 or 4 different input values are counted
- * in the same histogram cell.)  Next, the color-selection step begins with a
- * box representing the whole color space, and repeatedly splits the "largest"
- * remaining box until we have as many boxes as desired colors.  Then the mean
- * color in each remaining box becomes one of the possible output colors.
+ * in the same histogram cell.
+ *
+ * Next, the color-selection step begins with a box representing the whole
+ * color space, and repeatedly splits the "largest" remaining box until we
+ * have as many boxes as desired colors.  Then the mean color in each
+ * remaining box becomes one of the possible output colors.
+ * 
  * The second pass over the image maps each input pixel to the closest output
  * color (optionally after applying a Floyd-Steinberg dithering correction).
  * This mapping is logically trivial, but making it go fast enough requires
@@ -40,28 +52,57 @@
  * used here have proved out well in experimental comparisons, but better ones
  * may yet be found.
  *
- * The most significant difference between this quantizer and others is that
- * this one is intended to operate in YCbCr colorspace, rather than RGB space
- * as is usually done.  Actually we work in scaled YCbCr colorspace, where
- * Y distances are inflated by a factor of 2 relative to Cb or Cr distances.
- * The empirical evidence is that distances in this space correspond to
- * perceptual color differences more closely than do distances in RGB space;
- * and working in this space is inexpensive within a JPEG decompressor, since
- * the input data is already in YCbCr form.  (We could transform to an even
- * more perceptually linear space such as Lab or Luv, but that is very slow
- * and doesn't yield much better results than scaled YCbCr.)
+ * In earlier versions of the IJG code, this module quantized in YCbCr color
+ * space, processing the raw upsampled data without a color conversion step.
+ * This allowed the color conversion math to be done only once per colormap
+ * entry, not once per pixel.  However, that optimization precluded other
+ * useful optimizations (such as merging color conversion with upsampling)
+ * and it also interfered with desired capabilities such as quantizing to an
+ * externally-supplied colormap.  We have therefore abandoned that approach.
+ * The present code works in the post-conversion color space, typically RGB.
+ *
+ * To improve the visual quality of the results, we actually work in scaled
+ * RGB space, giving G distances more weight than R, and R in turn more than
+ * B.  To do everything in integer math, we must use integer scale factors.
+ * The 2/3/1 scale factors used here correspond loosely to the relative
+ * weights of the colors in the NTSC grayscale equation.
+ * If you want to use this code to quantize a non-RGB color space, you'll
+ * probably need to change these scale factors.
  */
 
-#define Y_SCALE 2		/* scale Y distances up by this much */
+#define R_SCALE 2		/* scale R distances by this much */
+#define G_SCALE 3		/* scale G distances by this much */
+#define B_SCALE 1		/* and B by this much */
 
-#define MAXNUMCOLORS  (MAXJSAMPLE+1) /* maximum size of colormap */
+/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
+ * in jmorecfg.h.  As the code stands, it will do the right thing for R,G,B
+ * and B,G,R orders.  If you define some other weird order in jmorecfg.h,
+ * you'll get compile errors until you extend this logic.  In that case
+ * you'll probably want to tweak the histogram sizes too.
+ */
+
+#if RGB_RED == 0
+#define C0_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 0
+#define C0_SCALE B_SCALE
+#endif
+#if RGB_GREEN == 1
+#define C1_SCALE G_SCALE
+#endif
+#if RGB_RED == 2
+#define C2_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 2
+#define C2_SCALE B_SCALE
+#endif
 
 
 /*
  * First we have the histogram data structure and routines for creating it.
  *
- * For work in YCbCr space, it is useful to keep more precision for Y than
- * for Cb or Cr.  We recommend keeping 6 bits for Y and 5 bits each for Cb/Cr.
+ * The number of bits of precision can be adjusted by changing these symbols.
+ * We recommend keeping 6 bits for G and 5 each for R and B.
  * If you have plenty of memory and cycles, 6 bits all around gives marginally
  * better results; if you are short of memory, 5 bits all around will save
  * some space but degrade the results.
@@ -77,119 +118,165 @@
  * Since the JPEG code is intended to run in small memory model on 80x86
  * machines, we can't just allocate the histogram in one chunk.  Instead
  * of a true 3-D array, we use a row of pointers to 2-D arrays.  Each
- * pointer corresponds to a Y value (typically 2^6 = 64 pointers) and
- * each 2-D array has 2^5^2 = 1024 or 2^6^2 = 4096 entries.  Note that
+ * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
+ * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.  Note that
  * on 80x86 machines, the pointer row is in near memory but the actual
  * arrays are in far memory (same arrangement as we use for image arrays).
  */
 
-#ifndef HIST_Y_BITS		/* so you can override from Makefile */
-#define HIST_Y_BITS  6		/* bits of precision in Y histogram */
-#endif
-#ifndef HIST_C_BITS		/* so you can override from Makefile */
-#define HIST_C_BITS  5		/* bits of precision in Cb/Cr histogram */
-#endif
+#define MAXNUMCOLORS  (MAXJSAMPLE+1) /* maximum size of colormap */
 
-#define HIST_Y_ELEMS  (1<<HIST_Y_BITS) /* # of elements along histogram axes */
-#define HIST_C_ELEMS  (1<<HIST_C_BITS)
-
-/* These are the amounts to shift an input value to get a histogram index.
- * For a combination 8/12 bit implementation, would need variables here...
+/* These will do the right thing for either R,G,B or B,G,R color order,
+ * but you may not like the results for other color orders.
  */
+#define HIST_C0_BITS  5		/* bits of precision in R/B histogram */
+#define HIST_C1_BITS  6		/* bits of precision in G histogram */
+#define HIST_C2_BITS  5		/* bits of precision in B/R histogram */
 
-#define Y_SHIFT  (BITS_IN_JSAMPLE-HIST_Y_BITS)
-#define C_SHIFT  (BITS_IN_JSAMPLE-HIST_C_BITS)
+/* Number of elements along histogram axes. */
+#define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
+#define HIST_C1_ELEMS  (1<<HIST_C1_BITS)
+#define HIST_C2_ELEMS  (1<<HIST_C2_BITS)
+
+/* These are the amounts to shift an input value to get a histogram index. */
+#define C0_SHIFT  (BITS_IN_JSAMPLE-HIST_C0_BITS)
+#define C1_SHIFT  (BITS_IN_JSAMPLE-HIST_C1_BITS)
+#define C2_SHIFT  (BITS_IN_JSAMPLE-HIST_C2_BITS)
 
 
-typedef UINT16 histcell;	/* histogram cell; MUST be an unsigned type */
+typedef UINT16 histcell;	/* histogram cell; prefer an unsigned type */
 
 typedef histcell FAR * histptr;	/* for pointers to histogram cells */
 
-typedef histcell hist1d[HIST_C_ELEMS]; /* typedefs for the array */
-typedef hist1d FAR * hist2d;	/* type for the Y-level pointers */
+typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
+typedef hist1d FAR * hist2d;	/* type for the 2nd-level pointers */
 typedef hist2d * hist3d;	/* type for top-level pointer */
 
-static hist3d histogram;	/* pointer to the histogram */
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array has (#columns + 2) entries; the extra entry at
+ * each end saves us from special-casing the first and last pixels.
+ * Each entry is three values long, one value for each color component.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Variables for accumulating image statistics */
+  hist3d histogram;		/* pointer to the histogram */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors;		/* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+  int * error_limiter;		/* table for clamping the applied error */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
 
 
 /*
  * Prescan some rows of pixels.
  * In this module the prescan simply updates the histogram, which has been
- * initialized to zeroes by color_quant_init.
- * Note: workspace is probably not useful for this routine, but it is passed
- * anyway to allow some code sharing within the pipeline controller.
+ * initialized to zeroes by start_pass.
+ * An output_buf parameter is required by the method signature, but no data
+ * is actually output (in fact the buffer controller is probably passing a
+ * NULL pointer).
  */
 
 METHODDEF void
-color_quant_prescan (decompress_info_ptr cinfo, int num_rows,
-		     JSAMPIMAGE image_data, JSAMPARRAY workspace)
+prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		  JSAMPARRAY output_buf, int num_rows)
 {
-  register JSAMPROW ptr0, ptr1, ptr2;
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW ptr;
   register histptr histp;
-  register int c0, c1, c2;
+  register hist3d histogram = cquantize->histogram;
   int row;
-  long col;
-  long width = cinfo->image_width;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
 
   for (row = 0; row < num_rows; row++) {
-    ptr0 = image_data[0][row];
-    ptr1 = image_data[1][row];
-    ptr2 = image_data[2][row];
+    ptr = input_buf[row];
     for (col = width; col > 0; col--) {
       /* get pixel value and index into the histogram */
-      c0 = GETJSAMPLE(*ptr0++) >> Y_SHIFT;
-      c1 = GETJSAMPLE(*ptr1++) >> C_SHIFT;
-      c2 = GETJSAMPLE(*ptr2++) >> C_SHIFT;
-      histp = & histogram[c0][c1][c2];
+      histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
+			 [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
+			 [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
       /* increment, check for overflow and undo increment if so. */
-      /* We assume unsigned representation here! */
-      if (++(*histp) == 0)
+      if (++(*histp) <= 0)
 	(*histp)--;
+      ptr += 3;
     }
   }
 }
 
 
 /*
- * Now we have the really interesting routines: selection of a colormap
+ * Next we have the really interesting routines: selection of a colormap
  * given the completed histogram.
  * These routines work with a list of "boxes", each representing a rectangular
  * subset of the input color space (to histogram precision).
  */
 
 typedef struct {
-	/* The bounds of the box (inclusive); expressed as histogram indexes */
-	int c0min, c0max;
-	int c1min, c1max;
-	int c2min, c2max;
-	/* The number of nonzero histogram cells within this box */
-	long colorcount;
-      } box;
+  /* The bounds of the box (inclusive); expressed as histogram indexes */
+  int c0min, c0max;
+  int c1min, c1max;
+  int c2min, c2max;
+  /* The volume (actually 2-norm) of the box */
+  INT32 volume;
+  /* The number of nonzero histogram cells within this box */
+  long colorcount;
+} box;
+
 typedef box * boxptr;
 
-static boxptr boxlist;		/* array with room for desired # of boxes */
-static int numboxes;		/* number of boxes currently in boxlist */
-
-static JSAMPARRAY my_colormap;	/* the finished colormap (in YCbCr space) */
-
 
 LOCAL boxptr
-find_biggest_color_pop (void)
+find_biggest_color_pop (boxptr boxlist, int numboxes)
 /* Find the splittable box with the largest color population */
 /* Returns NULL if no splittable boxes remain */
 {
   register boxptr boxp;
   register int i;
-  register long max = 0;
+  register long maxc = 0;
   boxptr which = NULL;
   
   for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
-    if (boxp->colorcount > max) {
-      if (boxp->c0max > boxp->c0min || boxp->c1max > boxp->c1min ||
-	  boxp->c2max > boxp->c2min) {
-	which = boxp;
-	max = boxp->colorcount;
-      }
+    if (boxp->colorcount > maxc && boxp->volume > 0) {
+      which = boxp;
+      maxc = boxp->colorcount;
     }
   }
   return which;
@@ -197,33 +284,19 @@
 
 
 LOCAL boxptr
-find_biggest_volume (void)
+find_biggest_volume (boxptr boxlist, int numboxes)
 /* Find the splittable box with the largest (scaled) volume */
 /* Returns NULL if no splittable boxes remain */
 {
   register boxptr boxp;
   register int i;
-  register INT32 max = 0;
-  register INT32 norm, c0,c1,c2;
+  register INT32 maxv = 0;
   boxptr which = NULL;
   
-  /* We use 2-norm rather than real volume here.
-   * Some care is needed since the differences are expressed in
-   * histogram-cell units; if HIST_Y_BITS != HIST_C_BITS, we have to
-   * adjust the scaling to get the proper scaled-YCbCr-space distance.
-   * This code won't work right if HIST_Y_BITS < HIST_C_BITS,
-   * but that shouldn't ever be true.
-   * Note norm > 0 iff box is splittable, so need not check separately.
-   */
-  
   for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
-    c0 = (boxp->c0max - boxp->c0min) * Y_SCALE;
-    c1 = (boxp->c1max - boxp->c1min) << (HIST_Y_BITS-HIST_C_BITS);
-    c2 = (boxp->c2max - boxp->c2min) << (HIST_Y_BITS-HIST_C_BITS);
-    norm = c0*c0 + c1*c1 + c2*c2;
-    if (norm > max) {
+    if (boxp->volume > maxv) {
       which = boxp;
-      max = norm;
+      maxv = boxp->volume;
     }
   }
   return which;
@@ -231,13 +304,16 @@
 
 
 LOCAL void
-update_box (boxptr boxp)
+update_box (j_decompress_ptr cinfo, boxptr boxp)
 /* Shrink the min/max bounds of a box to enclose only nonzero elements, */
-/* and recompute its population */
+/* and recompute its volume and population */
 {
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
   histptr histp;
   int c0,c1,c2;
   int c0min,c0max,c1min,c1max,c2min,c2max;
+  INT32 dist0,dist1,dist2;
   long ccount;
   
   c0min = boxp->c0min;  c0max = boxp->c0max;
@@ -292,7 +368,7 @@
     for (c2 = c2min; c2 <= c2max; c2++)
       for (c0 = c0min; c0 <= c0max; c0++) {
 	histp = & histogram[c0][c1min][c2];
-	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C_ELEMS)
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
 	  if (*histp != 0) {
 	    boxp->c2min = c2min = c2;
 	    goto have_c2min;
@@ -303,13 +379,26 @@
     for (c2 = c2max; c2 >= c2min; c2--)
       for (c0 = c0min; c0 <= c0max; c0++) {
 	histp = & histogram[c0][c1min][c2];
-	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C_ELEMS)
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
 	  if (*histp != 0) {
 	    boxp->c2max = c2max = c2;
 	    goto have_c2max;
 	  }
       }
  have_c2max:
+
+  /* Update box volume.
+   * We use 2-norm rather than real volume here; this biases the method
+   * against making long narrow boxes, and it has the side benefit that
+   * a box is splittable iff norm > 0.
+   * Since the differences are expressed in histogram-cell units,
+   * we have to shift back to JSAMPLE units to get consistent distances;
+   * after which, we scale according to the selected distance scale factors.
+   */
+  dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
+  dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
+  dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
+  boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2;
   
   /* Now scan remaining volume of box and compute population */
   ccount = 0;
@@ -325,8 +414,9 @@
 }
 
 
-LOCAL void
-median_cut (int desired_colors)
+LOCAL int
+median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
+	    int desired_colors)
 /* Repeatedly select and split the largest box until we have enough boxes */
 {
   int n,lb;
@@ -334,12 +424,13 @@
   register boxptr b1,b2;
 
   while (numboxes < desired_colors) {
-    /* Select box to split */
-    /* Current algorithm: by population for first half, then by volume */
+    /* Select box to split.
+     * Current algorithm: by population for first half, then by volume.
+     */
     if (numboxes*2 <= desired_colors) {
-      b1 = find_biggest_color_pop();
+      b1 = find_biggest_color_pop(boxlist, numboxes);
     } else {
-      b1 = find_biggest_volume();
+      b1 = find_biggest_volume(boxlist, numboxes);
     }
     if (b1 == NULL)		/* no splittable boxes left! */
       break;
@@ -349,14 +440,23 @@
     b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
     /* Choose which axis to split the box on.
      * Current algorithm: longest scaled axis.
-     * See notes in find_biggest_volume about scaling...
+     * See notes in update_box about scaling distances.
      */
-    c0 = (b1->c0max - b1->c0min) * Y_SCALE;
-    c1 = (b1->c1max - b1->c1min) << (HIST_Y_BITS-HIST_C_BITS);
-    c2 = (b1->c2max - b1->c2min) << (HIST_Y_BITS-HIST_C_BITS);
-    cmax = c0; n = 0;
-    if (c1 > cmax) { cmax = c1; n = 1; }
+    c0 = ((b1->c0max - b1->c0min) << C0_SHIFT) * C0_SCALE;
+    c1 = ((b1->c1max - b1->c1min) << C1_SHIFT) * C1_SCALE;
+    c2 = ((b1->c2max - b1->c2min) << C2_SHIFT) * C2_SCALE;
+    /* We want to break any ties in favor of green, then red, blue last.
+     * This code does the right thing for R,G,B or B,G,R color orders only.
+     */
+#if RGB_RED == 0
+    cmax = c1; n = 1;
+    if (c0 > cmax) { cmax = c0; n = 0; }
     if (c2 > cmax) { n = 2; }
+#else
+    cmax = c1; n = 1;
+    if (c2 > cmax) { cmax = c2; n = 2; }
+    if (c0 > cmax) { n = 0; }
+#endif
     /* Choose split point along selected axis, and update box bounds.
      * Current algorithm: split at halfway point.
      * (Since the box has been shrunk to minimum volume,
@@ -381,19 +481,22 @@
       break;
     }
     /* Update stats for boxes */
-    update_box(b1);
-    update_box(b2);
+    update_box(cinfo, b1);
+    update_box(cinfo, b2);
     numboxes++;
   }
+  return numboxes;
 }
 
 
 LOCAL void
-compute_color (boxptr boxp, int icolor)
-/* Compute representative color for a box, put it in my_colormap[icolor] */
+compute_color (j_decompress_ptr cinfo, boxptr boxp, int icolor)
+/* Compute representative color for a box, put it in colormap[icolor] */
 {
   /* Current algorithm: mean weighted by pixels (not colors) */
   /* Note it is important to get the rounding correct! */
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
   histptr histp;
   int c0,c1,c2;
   int c0min,c0max,c1min,c1max,c2min,c2max;
@@ -413,73 +516,48 @@
       for (c2 = c2min; c2 <= c2max; c2++) {
 	if ((count = *histp++) != 0) {
 	  total += count;
-	  c0total += ((c0 << Y_SHIFT) + ((1<<Y_SHIFT)>>1)) * count;
-	  c1total += ((c1 << C_SHIFT) + ((1<<C_SHIFT)>>1)) * count;
-	  c2total += ((c2 << C_SHIFT) + ((1<<C_SHIFT)>>1)) * count;
+	  c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
+	  c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
+	  c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
 	}
       }
     }
   
-  my_colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total);
-  my_colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total);
-  my_colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total);
+  cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total);
+  cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total);
+  cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total);
 }
 
 
 LOCAL void
-remap_colormap (decompress_info_ptr cinfo)
-/* Remap the internal colormap to the output colorspace */
-{
-  /* This requires a little trickery since color_convert expects to
-   * deal with 3-D arrays (a 2-D sample array for each component).
-   * We must promote the colormaps into one-row 3-D arrays.
-   */
-  short ci;
-  JSAMPARRAY input_hack[3];
-  JSAMPARRAY output_hack[10];	/* assume no more than 10 output components */
-
-  for (ci = 0; ci < 3; ci++)
-    input_hack[ci] = &(my_colormap[ci]);
-  for (ci = 0; ci < cinfo->color_out_comps; ci++)
-    output_hack[ci] = &(cinfo->colormap[ci]);
-
-  (*cinfo->methods->color_convert) (cinfo, 1,
-				    (long) cinfo->actual_number_of_colors,
-				    input_hack, output_hack);
-}
-
-
-LOCAL void
-select_colors (decompress_info_ptr cinfo)
+select_colors (j_decompress_ptr cinfo)
 /* Master routine for color selection */
 {
+  boxptr boxlist;
+  int numboxes;
   int desired = cinfo->desired_number_of_colors;
   int i;
 
   /* Allocate workspace for box list */
-  boxlist = (boxptr) (*cinfo->emethods->alloc_small) (desired * SIZEOF(box));
+  boxlist = (boxptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, desired * SIZEOF(box));
   /* Initialize one box containing whole space */
   numboxes = 1;
   boxlist[0].c0min = 0;
-  boxlist[0].c0max = MAXJSAMPLE >> Y_SHIFT;
+  boxlist[0].c0max = MAXJSAMPLE >> C0_SHIFT;
   boxlist[0].c1min = 0;
-  boxlist[0].c1max = MAXJSAMPLE >> C_SHIFT;
+  boxlist[0].c1max = MAXJSAMPLE >> C1_SHIFT;
   boxlist[0].c2min = 0;
-  boxlist[0].c2max = MAXJSAMPLE >> C_SHIFT;
+  boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT;
   /* Shrink it to actually-used volume and set its statistics */
-  update_box(& boxlist[0]);
+  update_box(cinfo, & boxlist[0]);
   /* Perform median-cut to produce final box list */
-  median_cut(desired);
-  /* Compute the representative color for each box, fill my_colormap[] */
+  numboxes = median_cut(cinfo, boxlist, numboxes, desired);
+  /* Compute the representative color for each box, fill colormap */
   for (i = 0; i < numboxes; i++)
-    compute_color(& boxlist[i], i);
+    compute_color(cinfo, & boxlist[i], i);
   cinfo->actual_number_of_colors = numboxes;
-  /* Produce an output colormap in the desired output colorspace */
-  remap_colormap(cinfo);
-  TRACEMS1(cinfo->emethods, 1, "Selected %d colors for quantization",
-	   numboxes);
-  /* Done with the box list */
-  (*cinfo->emethods->free_small) ((void *) boxlist);
+  TRACEMS1(cinfo, 1, JTRC_QUANT_SELECTED, numboxes);
 }
 
 
@@ -511,17 +589,13 @@
  * cell (because the inner loop has to be over cells, not colormap entries).
  * The work array elements have to be INT32s, so the work array would need
  * 256Kb at our recommended precision.  This is not feasible in DOS machines.
- * Another disadvantage of the brute force approach is that it computes
- * distances to every cell of the cubical histogram.  When working with YCbCr
- * input, only about a quarter of the cube represents realizable colors, so
- * many of the cells will never be used and filling them is wasted effort.
  *
  * To get around these problems, we apply Thomas' method to compute the
  * nearest colors for only the cells within a small subbox of the histogram.
  * The work array need be only as big as the subbox, so the memory usage
- * problem is solved.  A subbox is processed only when some cell in it is
- * referenced by the pass2 routines, so we will never bother with cells far
- * outside the realizable color volume.  An additional advantage of this
+ * problem is solved.  Furthermore, we need not fill subboxes that are never
+ * referenced in pass2; many images use only part of the color gamut, so a
+ * fair amount of work is saved.  An additional advantage of this
  * approach is that we can apply Heckbert's locality criterion to quickly
  * eliminate colormap entries that are far away from the subbox; typically
  * three-fourths of the colormap entries are rejected by Heckbert's criterion,
@@ -541,18 +615,18 @@
  */
 
 
-#ifndef BOX_Y_LOG		/* so you can override from Makefile */
-#define BOX_Y_LOG  (HIST_Y_BITS-3) /* log2(hist cells in update box, Y axis) */
-#endif
-#ifndef BOX_C_LOG		/* so you can override from Makefile */
-#define BOX_C_LOG  (HIST_C_BITS-3) /* log2(hist cells in update box, C axes) */
-#endif
+/* log2(histogram cells in update box) for each axis; this can be adjusted */
+#define BOX_C0_LOG  (HIST_C0_BITS-3)
+#define BOX_C1_LOG  (HIST_C1_BITS-3)
+#define BOX_C2_LOG  (HIST_C2_BITS-3)
 
-#define BOX_Y_ELEMS  (1<<BOX_Y_LOG) /* # of hist cells in update box */
-#define BOX_C_ELEMS  (1<<BOX_C_LOG)
+#define BOX_C0_ELEMS  (1<<BOX_C0_LOG) /* # of hist cells in update box */
+#define BOX_C1_ELEMS  (1<<BOX_C1_LOG)
+#define BOX_C2_ELEMS  (1<<BOX_C2_LOG)
 
-#define BOX_Y_SHIFT  (Y_SHIFT + BOX_Y_LOG)
-#define BOX_C_SHIFT  (C_SHIFT + BOX_C_LOG)
+#define BOX_C0_SHIFT  (C0_SHIFT + BOX_C0_LOG)
+#define BOX_C1_SHIFT  (C1_SHIFT + BOX_C1_LOG)
+#define BOX_C2_SHIFT  (C2_SHIFT + BOX_C2_LOG)
 
 
 /*
@@ -564,7 +638,7 @@
  */
 
 LOCAL int
-find_nearby_colors (decompress_info_ptr cinfo, int minc0, int minc1, int minc2,
+find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
 		    JSAMPLE colorlist[])
 /* Locate the colormap entries close enough to an update box to be candidates
  * for the nearest entry to some cell(s) in the update box.  The update box
@@ -588,11 +662,11 @@
    * Note that since ">>" rounds down, the "center" values may be closer to
    * min than to max; hence comparisons to them must be "<=", not "<".
    */
-  maxc0 = minc0 + ((1 << BOX_Y_SHIFT) - (1 << Y_SHIFT));
+  maxc0 = minc0 + ((1 << BOX_C0_SHIFT) - (1 << C0_SHIFT));
   centerc0 = (minc0 + maxc0) >> 1;
-  maxc1 = minc1 + ((1 << BOX_C_SHIFT) - (1 << C_SHIFT));
+  maxc1 = minc1 + ((1 << BOX_C1_SHIFT) - (1 << C1_SHIFT));
   centerc1 = (minc1 + maxc1) >> 1;
-  maxc2 = minc2 + ((1 << BOX_C_SHIFT) - (1 << C_SHIFT));
+  maxc2 = minc2 + ((1 << BOX_C2_SHIFT) - (1 << C2_SHIFT));
   centerc2 = (minc2 + maxc2) >> 1;
 
   /* For each color in colormap, find:
@@ -602,75 +676,74 @@
    * Both of these can be found by considering only the corners of the box.
    * We save the minimum distance for each color in mindist[];
    * only the smallest maximum distance is of interest.
-   * Note we have to scale Y to get correct distance in scaled space.
    */
   minmaxdist = 0x7FFFFFFFL;
 
   for (i = 0; i < numcolors; i++) {
     /* We compute the squared-c0-distance term, then add in the other two. */
-    x = GETJSAMPLE(my_colormap[0][i]);
+    x = GETJSAMPLE(cinfo->colormap[0][i]);
     if (x < minc0) {
-      tdist = (x - minc0) * Y_SCALE;
+      tdist = (x - minc0) * C0_SCALE;
       min_dist = tdist*tdist;
-      tdist = (x - maxc0) * Y_SCALE;
+      tdist = (x - maxc0) * C0_SCALE;
       max_dist = tdist*tdist;
     } else if (x > maxc0) {
-      tdist = (x - maxc0) * Y_SCALE;
+      tdist = (x - maxc0) * C0_SCALE;
       min_dist = tdist*tdist;
-      tdist = (x - minc0) * Y_SCALE;
+      tdist = (x - minc0) * C0_SCALE;
       max_dist = tdist*tdist;
     } else {
       /* within cell range so no contribution to min_dist */
       min_dist = 0;
       if (x <= centerc0) {
-	tdist = (x - maxc0) * Y_SCALE;
+	tdist = (x - maxc0) * C0_SCALE;
 	max_dist = tdist*tdist;
       } else {
-	tdist = (x - minc0) * Y_SCALE;
+	tdist = (x - minc0) * C0_SCALE;
 	max_dist = tdist*tdist;
       }
     }
 
-    x = GETJSAMPLE(my_colormap[1][i]);
+    x = GETJSAMPLE(cinfo->colormap[1][i]);
     if (x < minc1) {
-      tdist = x - minc1;
+      tdist = (x - minc1) * C1_SCALE;
       min_dist += tdist*tdist;
-      tdist = x - maxc1;
+      tdist = (x - maxc1) * C1_SCALE;
       max_dist += tdist*tdist;
     } else if (x > maxc1) {
-      tdist = x - maxc1;
+      tdist = (x - maxc1) * C1_SCALE;
       min_dist += tdist*tdist;
-      tdist = x - minc1;
+      tdist = (x - minc1) * C1_SCALE;
       max_dist += tdist*tdist;
     } else {
       /* within cell range so no contribution to min_dist */
       if (x <= centerc1) {
-	tdist = x - maxc1;
+	tdist = (x - maxc1) * C1_SCALE;
 	max_dist += tdist*tdist;
       } else {
-	tdist = x - minc1;
+	tdist = (x - minc1) * C1_SCALE;
 	max_dist += tdist*tdist;
       }
     }
 
-    x = GETJSAMPLE(my_colormap[2][i]);
+    x = GETJSAMPLE(cinfo->colormap[2][i]);
     if (x < minc2) {
-      tdist = x - minc2;
+      tdist = (x - minc2) * C2_SCALE;
       min_dist += tdist*tdist;
-      tdist = x - maxc2;
+      tdist = (x - maxc2) * C2_SCALE;
       max_dist += tdist*tdist;
     } else if (x > maxc2) {
-      tdist = x - maxc2;
+      tdist = (x - maxc2) * C2_SCALE;
       min_dist += tdist*tdist;
-      tdist = x - minc2;
+      tdist = (x - minc2) * C2_SCALE;
       max_dist += tdist*tdist;
     } else {
       /* within cell range so no contribution to min_dist */
       if (x <= centerc2) {
-	tdist = x - maxc2;
+	tdist = (x - maxc2) * C2_SCALE;
 	max_dist += tdist*tdist;
       } else {
-	tdist = x - minc2;
+	tdist = (x - minc2) * C2_SCALE;
 	max_dist += tdist*tdist;
       }
     }
@@ -694,7 +767,7 @@
 
 
 LOCAL void
-find_best_colors (decompress_info_ptr cinfo, int minc0, int minc1, int minc2,
+find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
 		  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
 /* Find the closest colormap entry for each cell in the update box,
  * given the list of candidate colors prepared by find_nearby_colors.
@@ -713,72 +786,74 @@
   register INT32 xx2;
   INT32 inc0, inc1, inc2;	/* initial values for increments */
   /* This array holds the distance to the nearest-so-far color for each cell */
-  INT32 bestdist[BOX_Y_ELEMS * BOX_C_ELEMS * BOX_C_ELEMS];
+  INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
 
   /* Initialize best-distance for each cell of the update box */
   bptr = bestdist;
-  for (i = BOX_Y_ELEMS*BOX_C_ELEMS*BOX_C_ELEMS-1; i >= 0; i--)
+  for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--)
     *bptr++ = 0x7FFFFFFFL;
   
   /* For each color selected by find_nearby_colors,
    * compute its distance to the center of each cell in the box.
    * If that's less than best-so-far, update best distance and color number.
-   * Note we have to scale Y to get correct distance in scaled space.
    */
   
   /* Nominal steps between cell centers ("x" in Thomas article) */
-#define STEP_Y  ((1 << Y_SHIFT) * Y_SCALE)
-#define STEP_C  (1 << C_SHIFT)
+#define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
+#define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
+#define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
   
   for (i = 0; i < numcolors; i++) {
     icolor = GETJSAMPLE(colorlist[i]);
     /* Compute (square of) distance from minc0/c1/c2 to this color */
-    inc0 = (minc0 - (int) GETJSAMPLE(my_colormap[0][icolor])) * Y_SCALE;
+    inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
     dist0 = inc0*inc0;
-    inc1 = minc1 - (int) GETJSAMPLE(my_colormap[1][icolor]);
+    inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
     dist0 += inc1*inc1;
-    inc2 = minc2 - (int) GETJSAMPLE(my_colormap[2][icolor]);
+    inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
     dist0 += inc2*inc2;
     /* Form the initial difference increments */
-    inc0 = inc0 * (2 * STEP_Y) + STEP_Y * STEP_Y;
-    inc1 = inc1 * (2 * STEP_C) + STEP_C * STEP_C;
-    inc2 = inc2 * (2 * STEP_C) + STEP_C * STEP_C;
+    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
+    inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
+    inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
     /* Now loop over all cells in box, updating distance per Thomas method */
     bptr = bestdist;
     cptr = bestcolor;
     xx0 = inc0;
-    for (ic0 = BOX_Y_ELEMS-1; ic0 >= 0; ic0--) {
+    for (ic0 = BOX_C0_ELEMS-1; ic0 >= 0; ic0--) {
       dist1 = dist0;
       xx1 = inc1;
-      for (ic1 = BOX_C_ELEMS-1; ic1 >= 0; ic1--) {
+      for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
 	dist2 = dist1;
 	xx2 = inc2;
-	for (ic2 = BOX_C_ELEMS-1; ic2 >= 0; ic2--) {
+	for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
 	  if (dist2 < *bptr) {
 	    *bptr = dist2;
 	    *cptr = (JSAMPLE) icolor;
 	  }
 	  dist2 += xx2;
-	  xx2 += 2 * STEP_C * STEP_C;
+	  xx2 += 2 * STEP_C2 * STEP_C2;
 	  bptr++;
 	  cptr++;
 	}
 	dist1 += xx1;
-	xx1 += 2 * STEP_C * STEP_C;
+	xx1 += 2 * STEP_C1 * STEP_C1;
       }
       dist0 += xx0;
-      xx0 += 2 * STEP_Y * STEP_Y;
+      xx0 += 2 * STEP_C0 * STEP_C0;
     }
   }
 }
 
 
 LOCAL void
-fill_inverse_cmap (decompress_info_ptr cinfo, int c0, int c1, int c2)
+fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2)
 /* Fill the inverse-colormap entries in the update box that contains */
 /* histogram cell c0/c1/c2.  (Only that one cell MUST be filled, but */
 /* we can fill as many others as we wish.) */
 {
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
   int minc0, minc1, minc2;	/* lower left corner of update box */
   int ic0, ic1, ic2;
   register JSAMPLE * cptr;	/* pointer into bestcolor[] array */
@@ -787,20 +862,20 @@
   JSAMPLE colorlist[MAXNUMCOLORS];
   int numcolors;		/* number of candidate colors */
   /* This array holds the actually closest colormap index for each cell. */
-  JSAMPLE bestcolor[BOX_Y_ELEMS * BOX_C_ELEMS * BOX_C_ELEMS];
+  JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
 
   /* Convert cell coordinates to update box ID */
-  c0 >>= BOX_Y_LOG;
-  c1 >>= BOX_C_LOG;
-  c2 >>= BOX_C_LOG;
+  c0 >>= BOX_C0_LOG;
+  c1 >>= BOX_C1_LOG;
+  c2 >>= BOX_C2_LOG;
 
   /* Compute true coordinates of update box's origin corner.
    * Actually we compute the coordinates of the center of the corner
    * histogram cell, which are the lower bounds of the volume we care about.
    */
-  minc0 = (c0 << BOX_Y_SHIFT) + ((1 << Y_SHIFT) >> 1);
-  minc1 = (c1 << BOX_C_SHIFT) + ((1 << C_SHIFT) >> 1);
-  minc2 = (c2 << BOX_C_SHIFT) + ((1 << C_SHIFT) >> 1);
+  minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
+  minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
+  minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
   
   /* Determine which colormap entries are close enough to be candidates
    * for the nearest entry to some cell in the update box.
@@ -812,14 +887,14 @@
 		   bestcolor);
 
   /* Save the best color numbers (plus 1) in the main cache array */
-  c0 <<= BOX_Y_LOG;		/* convert ID back to base cell indexes */
-  c1 <<= BOX_C_LOG;
-  c2 <<= BOX_C_LOG;
+  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
+  c1 <<= BOX_C1_LOG;
+  c2 <<= BOX_C2_LOG;
   cptr = bestcolor;
-  for (ic0 = 0; ic0 < BOX_Y_ELEMS; ic0++) {
-    for (ic1 = 0; ic1 < BOX_C_ELEMS; ic1++) {
+  for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) {
+    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
       cachep = & histogram[c0+ic0][c1+ic1][c2];
-      for (ic2 = 0; ic2 < BOX_C_ELEMS; ic2++) {
+      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
 	*cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
       }
     }
@@ -828,37 +903,31 @@
 
 
 /*
- * These routines perform second-pass scanning of the image: map each pixel to
- * the proper colormap index, and output the indexes to the output file.
- *
- * output_workspace is a one-component array of pixel dimensions at least
- * as large as the input image strip; it can be used to hold the converted
- * pixels' colormap indexes.
+ * Map some rows of pixels to the output colormapped representation.
  */
 
 METHODDEF void
-pass2_nodither (decompress_info_ptr cinfo, int num_rows,
-		JSAMPIMAGE image_data, JSAMPARRAY output_workspace)
+pass2_no_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
 /* This version performs no dithering */
 {
-  register JSAMPROW ptr0, ptr1, ptr2, outptr;
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register JSAMPROW inptr, outptr;
   register histptr cachep;
   register int c0, c1, c2;
   int row;
-  long col;
-  long width = cinfo->image_width;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
 
-  /* Convert data to colormap indexes, which we save in output_workspace */
   for (row = 0; row < num_rows; row++) {
-    ptr0 = image_data[0][row];
-    ptr1 = image_data[1][row];
-    ptr2 = image_data[2][row];
-    outptr = output_workspace[row];
+    inptr = input_buf[row];
+    outptr = output_buf[row];
     for (col = width; col > 0; col--) {
       /* get pixel value and index into the cache */
-      c0 = GETJSAMPLE(*ptr0++) >> Y_SHIFT;
-      c1 = GETJSAMPLE(*ptr1++) >> C_SHIFT;
-      c2 = GETJSAMPLE(*ptr2++) >> C_SHIFT;
+      c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
+      c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
+      c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
       cachep = & histogram[c0][c1][c2];
       /* If we have not seen this color before, find nearest colormap entry */
       /* and update the cache */
@@ -868,94 +937,52 @@
       *outptr++ = (JSAMPLE) (*cachep - 1);
     }
   }
-  /* Emit converted rows to the output file */
-  (*cinfo->methods->put_pixel_rows) (cinfo, num_rows, &output_workspace);
 }
 
 
-/* Declarations for Floyd-Steinberg dithering.
- *
- * Errors are accumulated into the array fserrors[], at a resolution of
- * 1/16th of a pixel count.  The error at a given pixel is propagated
- * to its not-yet-processed neighbors using the standard F-S fractions,
- *		...	(here)	7/16
- *		3/16	5/16	1/16
- * We work left-to-right on even rows, right-to-left on odd rows.
- *
- * We can get away with a single array (holding one row's worth of errors)
- * by using it to store the current row's errors at pixel columns not yet
- * processed, but the next row's errors at columns already processed.  We
- * need only a few extra variables to hold the errors immediately around the
- * current column.  (If we are lucky, those variables are in registers, but
- * even if not, they're probably cheaper to access than array elements are.)
- *
- * The fserrors[] array has (#columns + 2) entries; the extra entry at
- * each end saves us from special-casing the first and last pixels.
- * Each entry is three values long, one value for each color component.
- *
- * Note: on a wide image, we might not have enough room in a PC's near data
- * segment to hold the error array; so it is allocated with alloc_medium.
- */
-
-#ifdef EIGHT_BIT_SAMPLES
-typedef INT16 FSERROR;		/* 16 bits should be enough */
-typedef int LOCFSERROR;		/* use 'int' for calculation temps */
-#else
-typedef INT32 FSERROR;		/* may need more than 16 bits */
-typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
-#endif
-
-typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
-
-static FSERRPTR fserrors;	/* accumulated errors */
-static boolean on_odd_row;	/* flag to remember which row we are on */
-
-
 METHODDEF void
-pass2_dither (decompress_info_ptr cinfo, int num_rows,
-	      JSAMPIMAGE image_data, JSAMPARRAY output_workspace)
+pass2_fs_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
 /* This version performs Floyd-Steinberg dithering */
 {
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
   register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
   LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
   LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
   register FSERRPTR errorptr;	/* => fserrors[] at column before current */
-  JSAMPROW ptr0, ptr1, ptr2;	/* => current input pixel */
+  JSAMPROW inptr;		/* => current input pixel */
   JSAMPROW outptr;		/* => current output pixel */
   histptr cachep;
   int dir;			/* +1 or -1 depending on direction */
-  int dir3;			/* 3*dir, for advancing errorptr */
+  int dir3;			/* 3*dir, for advancing inptr & errorptr */
   int row;
-  long col;
-  long width = cinfo->image_width;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
   JSAMPLE *range_limit = cinfo->sample_range_limit;
-  JSAMPROW colormap0 = my_colormap[0];
-  JSAMPROW colormap1 = my_colormap[1];
-  JSAMPROW colormap2 = my_colormap[2];
+  int *error_limit = cquantize->error_limiter;
+  JSAMPROW colormap0 = cinfo->colormap[0];
+  JSAMPROW colormap1 = cinfo->colormap[1];
+  JSAMPROW colormap2 = cinfo->colormap[2];
   SHIFT_TEMPS
 
-  /* Convert data to colormap indexes, which we save in output_workspace */
   for (row = 0; row < num_rows; row++) {
-    ptr0 = image_data[0][row];
-    ptr1 = image_data[1][row];
-    ptr2 = image_data[2][row];
-    outptr = output_workspace[row];
-    if (on_odd_row) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    if (cquantize->on_odd_row) {
       /* work right to left in this row */
-      ptr0 += width - 1;	/* so point to rightmost pixel */
-      ptr1 += width - 1;
-      ptr2 += width - 1;
-      outptr += width - 1;
+      inptr += (width-1) * 3;	/* so point to rightmost pixel */
+      outptr += width-1;
       dir = -1;
       dir3 = -3;
-      errorptr = fserrors + (width+1)*3; /* point to entry after last column */
-      on_odd_row = FALSE;	/* flip for next time */
+      errorptr = cquantize->fserrors + (width+1)*3; /* => entry after last column */
+      cquantize->on_odd_row = FALSE; /* flip for next time */
     } else {
       /* work left to right in this row */
       dir = 1;
       dir3 = 3;
-      errorptr = fserrors;	/* point to entry before first real column */
-      on_odd_row = TRUE;	/* flip for next time */
+      errorptr = cquantize->fserrors; /* => entry before first real column */
+      cquantize->on_odd_row = TRUE; /* flip for next time */
     }
     /* Preset error values: no error propagated to first pixel from left */
     cur0 = cur1 = cur2 = 0;
@@ -975,22 +1002,28 @@
       cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3+0] + 8, 4);
       cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3+1] + 8, 4);
       cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3+2] + 8, 4);
-      /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
-       * The maximum error is +- MAXJSAMPLE; this sets the required size
-       * of the range_limit array.
+      /* Limit the error using transfer function set by init_error_limit.
+       * See comments with init_error_limit for rationale.
        */
-      cur0 += GETJSAMPLE(*ptr0);
-      cur1 += GETJSAMPLE(*ptr1);
-      cur2 += GETJSAMPLE(*ptr2);
+      cur0 = error_limit[cur0];
+      cur1 = error_limit[cur1];
+      cur2 = error_limit[cur2];
+      /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+       * The maximum error is +- MAXJSAMPLE (or less with error limiting);
+       * this sets the required size of the range_limit array.
+       */
+      cur0 += GETJSAMPLE(inptr[0]);
+      cur1 += GETJSAMPLE(inptr[1]);
+      cur2 += GETJSAMPLE(inptr[2]);
       cur0 = GETJSAMPLE(range_limit[cur0]);
       cur1 = GETJSAMPLE(range_limit[cur1]);
       cur2 = GETJSAMPLE(range_limit[cur2]);
       /* Index into the cache with adjusted pixel value */
-      cachep = & histogram[cur0 >> Y_SHIFT][cur1 >> C_SHIFT][cur2 >> C_SHIFT];
+      cachep = & histogram[cur0>>C0_SHIFT][cur1>>C1_SHIFT][cur2>>C2_SHIFT];
       /* If we have not seen this color before, find nearest colormap */
       /* entry and update the cache */
       if (*cachep == 0)
-	fill_inverse_cmap(cinfo, cur0>>Y_SHIFT, cur1>>C_SHIFT, cur2>>C_SHIFT);
+	fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
       /* Now emit the colormap index for this cell */
       { register int pixcode = *cachep - 1;
 	*outptr = (JSAMPLE) pixcode;
@@ -1034,9 +1067,7 @@
        * to the next pixel on the current line, and all the errors for the
        * next line have been shifted over.  We are therefore ready to move on.
        */
-      ptr0 += dir;		/* Advance pixel pointers to next column */
-      ptr1 += dir;
-      ptr2 += dir;
+      inptr += dir3;		/* Advance pixel pointers to next column */
       outptr += dir;
       errorptr += dir3;		/* advance errorptr to current column */
     }
@@ -1048,149 +1079,174 @@
     errorptr[1] = (FSERROR) bpreverr1;
     errorptr[2] = (FSERROR) bpreverr2;
   }
-  /* Emit converted rows to the output file */
-  (*cinfo->methods->put_pixel_rows) (cinfo, num_rows, &output_workspace);
 }
 
 
 /*
- * Initialize for two-pass color quantization.
+ * Initialize the error-limiting transfer function (lookup table).
+ * The raw F-S error computation can potentially compute error values of up to
+ * +- MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
+ * much less, otherwise obviously wrong pixels will be created.  (Typical
+ * effects include weird fringes at color-area boundaries, isolated bright
+ * pixels in a dark area, etc.)  The standard advice for avoiding this problem
+ * is to ensure that the "corners" of the color cube are allocated as output
+ * colors; then repeated errors in the same direction cannot cause cascading
+ * error buildup.  However, that only prevents the error from getting
+ * completely out of hand; Aaron Giles reports that error limiting improves
+ * the results even with corner colors allocated.
+ * A simple clamping of the error values to about +- MAXJSAMPLE/8 works pretty
+ * well, but the smoother transfer function used below is even better.  Thanks
+ * to Aaron Giles for this idea.
  */
 
-METHODDEF void
-color_quant_init (decompress_info_ptr cinfo)
+LOCAL void
+init_error_limit (j_decompress_ptr cinfo)
+/* Allocate and fill in the error_limiter table */
 {
-  int i;
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  int * table;
+  int in, out;
 
-  /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
-  if (cinfo->desired_number_of_colors < 8)
-    ERREXIT(cinfo->emethods, "Cannot request less than 8 quantized colors");
-  /* Make sure colormap indexes can be represented by JSAMPLEs */
-  if (cinfo->desired_number_of_colors > MAXNUMCOLORS)
-    ERREXIT1(cinfo->emethods, "Cannot request more than %d quantized colors",
-	     MAXNUMCOLORS);
+  table = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int));
+  table += MAXJSAMPLE;		/* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
+  cquantize->error_limiter = table;
 
-  /* Allocate and zero the histogram */
-  histogram = (hist3d) (*cinfo->emethods->alloc_small)
-				(HIST_Y_ELEMS * SIZEOF(hist2d));
-  for (i = 0; i < HIST_Y_ELEMS; i++) {
-    histogram[i] = (hist2d) (*cinfo->emethods->alloc_medium)
-				(HIST_C_ELEMS*HIST_C_ELEMS * SIZEOF(histcell));
-    jzero_far((void FAR *) histogram[i],
-	      HIST_C_ELEMS*HIST_C_ELEMS * SIZEOF(histcell));
+#define STEPSIZE ((MAXJSAMPLE+1)/16)
+  /* Map errors 1:1 up to +- MAXJSAMPLE/16 */
+  out = 0;
+  for (in = 0; in < STEPSIZE; in++, out++) {
+    table[in] = out; table[-in] = -out;
   }
-
-  /* Allocate storage for the internal and external colormaps. */
-  /* We do this now since it is FAR storage and may affect the memory */
-  /* manager's space calculations. */
-  my_colormap = (*cinfo->emethods->alloc_small_sarray)
-			((long) cinfo->desired_number_of_colors,
-			 (long) 3);
-  cinfo->colormap = (*cinfo->emethods->alloc_small_sarray)
-			((long) cinfo->desired_number_of_colors,
-			 (long) cinfo->color_out_comps);
-
-  /* Allocate Floyd-Steinberg workspace if necessary */
-  /* This isn't needed until pass 2, but again it is FAR storage. */
-  if (cinfo->use_dithering) {
-    size_t arraysize = (size_t) ((cinfo->image_width + 2L) *
-				 (3 * SIZEOF(FSERROR)));
-
-    fserrors = (FSERRPTR) (*cinfo->emethods->alloc_medium) (arraysize);
-    /* Initialize the propagated errors to zero. */
-    jzero_far((void FAR *) fserrors, arraysize);
-    on_odd_row = FALSE;
+  /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */
+  for (; in < STEPSIZE*3; in++, out += (in&1) ? 0 : 1) {
+    table[in] = out; table[-in] = -out;
   }
-
-  /* Indicate number of passes needed, excluding the prescan pass. */
-  cinfo->total_passes++;	/* I always use one pass */
+  /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */
+  for (; in <= MAXJSAMPLE; in++) {
+    table[in] = out; table[-in] = -out;
+  }
+#undef STEPSIZE
 }
 
 
 /*
- * Perform two-pass quantization: rescan the image data and output the
- * converted data via put_color_map and put_pixel_rows.
- * The source_method is a routine that can scan the image data; it can
- * be called as many times as desired.  The processing routine called by
- * source_method has the same interface as color_quantize does in the
- * one-pass case, except it must call put_pixel_rows itself.  (This allows
- * me to use multiple passes in which earlier passes don't output anything.)
+ * Finish up at the end of each pass.
  */
 
 METHODDEF void
-color_quant_doit (decompress_info_ptr cinfo, quantize_caller_ptr source_method)
+finish_pass1 (j_decompress_ptr cinfo)
 {
-  int i;
-
-  /* Select the representative colors */
+  /* Select the representative colors and fill in cinfo->colormap */
   select_colors(cinfo);
-  /* Pass the external colormap to the output module. */
-  /* NB: the output module may continue to use the colormap until shutdown. */
-  (*cinfo->methods->put_color_map) (cinfo, cinfo->actual_number_of_colors,
-				    cinfo->colormap);
-  /* Re-zero the histogram so pass 2 can use it as nearest-color cache */
-  for (i = 0; i < HIST_Y_ELEMS; i++) {
-    jzero_far((void FAR *) histogram[i],
-	      HIST_C_ELEMS*HIST_C_ELEMS * SIZEOF(histcell));
+}
+
+
+METHODDEF void
+finish_pass2 (j_decompress_ptr cinfo)
+{
+  /* no work */
+}
+
+
+/*
+ * Initialize for each processing pass.
+ */
+
+METHODDEF void
+start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int i;
+
+  if (is_pre_scan) {
+    /* Set up method pointers */
+    cquantize->pub.color_quantize = prescan_quantize;
+    cquantize->pub.finish_pass = finish_pass1;
+  } else {
+    /* Set up method pointers */
+    if (cinfo->dither_mode == JDITHER_FS)
+      cquantize->pub.color_quantize = pass2_fs_dither;
+    else
+      cquantize->pub.color_quantize = pass2_no_dither;
+    cquantize->pub.finish_pass = finish_pass2;
   }
-  /* Perform pass 2 */
-  if (cinfo->use_dithering)
-    (*source_method) (cinfo, pass2_dither);
-  else
-    (*source_method) (cinfo, pass2_nodither);
+  /* Zero the histogram or inverse color map */
+  for (i = 0; i < HIST_C0_ELEMS; i++) {
+    jzero_far((void FAR *) histogram[i],
+	      HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+  }
 }
 
 
 /*
- * Finish up at the end of the file.
- */
-
-METHODDEF void
-color_quant_term (decompress_info_ptr cinfo)
-{
-  /* no work (we let free_all release the histogram/cache and colormaps) */
-  /* Note that we *mustn't* free the external colormap before free_all, */
-  /* since output module may use it! */
-}
-
-
-/*
- * Map some rows of pixels to the output colormapped representation.
- * Not used in two-pass case.
- */
-
-METHODDEF void
-color_quantize (decompress_info_ptr cinfo, int num_rows,
-		JSAMPIMAGE input_data, JSAMPARRAY output_data)
-{
-  ERREXIT(cinfo->emethods, "Should not get here!");
-}
-
-
-/*
- * The method selection routine for 2-pass color quantization.
+ * Module initialization routine for 2-pass color quantization.
  */
 
 GLOBAL void
-jsel2quantize (decompress_info_ptr cinfo)
+jinit_2pass_quantizer (j_decompress_ptr cinfo)
 {
-  if (cinfo->two_pass_quantize) {
-    /* Make sure jdmaster didn't give me a case I can't handle */
-    if (cinfo->num_components != 3 || cinfo->jpeg_color_space != CS_YCbCr)
-      ERREXIT(cinfo->emethods, "2-pass quantization only handles YCbCr input");
-    cinfo->methods->color_quant_init = color_quant_init;
-    cinfo->methods->color_quant_prescan = color_quant_prescan;
-    cinfo->methods->color_quant_doit = color_quant_doit;
-    cinfo->methods->color_quant_term = color_quant_term;
-    cinfo->methods->color_quantize = color_quantize;
-    /* Quantized grayscale output is normally done by jquant1.c (which will do
-     * a much better job of it).  But if the program is configured with only
-     * 2-pass quantization, then I have to do the job.  In this situation,
-     * jseldcolor's clearing of the Cb/Cr component_needed flags is incorrect,
-     * because I will look at those components before conversion.
-     */
-    cinfo->cur_comp_info[1]->component_needed = TRUE;
-    cinfo->cur_comp_info[2]->component_needed = TRUE;
+  my_cquantize_ptr cquantize;
+  int i;
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
+  cquantize->pub.start_pass = start_pass_2_quant;
+
+  /* Make sure jdmaster didn't give me a case I can't handle */
+  if (cinfo->out_color_components != 3)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  /* Make sure color count is acceptable */
+  i = (cinfo->colormap != NULL) ? cinfo->actual_number_of_colors
+				: cinfo->desired_number_of_colors;
+  /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
+  if (i < 8)
+    ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
+  /* Make sure colormap indexes can be represented by JSAMPLEs */
+  if (i > MAXNUMCOLORS)
+    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+
+  /* Allocate the histogram/inverse colormap storage */
+  cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d));
+  for (i = 0; i < HIST_C0_ELEMS; i++) {
+    cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+  }
+
+  /* Allocate storage for the completed colormap,
+   * unless it has been supplied by the application.
+   * We do this now since it is FAR storage and may affect
+   * the memory manager's space calculations.
+   */
+  if (cinfo->colormap == NULL) {
+    cinfo->colormap = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (JDIMENSION) cinfo->desired_number_of_colors, (JDIMENSION) 3);
+  }
+
+  /* Allocate Floyd-Steinberg workspace if necessary. */
+  /* This isn't needed until pass 2, but again it is FAR storage. */
+  if (cinfo->dither_mode == JDITHER_FS) {
+    size_t arraysize = (size_t) ((cinfo->output_width + 2) *
+				 (3 * SIZEOF(FSERROR)));
+
+    cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+    /* Initialize the propagated errors to zero. */
+    jzero_far((void FAR *) cquantize->fserrors, arraysize);
+    cquantize->on_odd_row = FALSE;
+    init_error_limit(cinfo);
   }
 }