Extend YUVImage class to allow reuse of the same buffer with different metadata;  port TJBench changes that treat YUV encoding/decoding as an intermediate step of the JPEG compression/decompression pipeline rather than a separate test case;  add YUV encode/decode tests to the Java version of tjbenchtest


git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1184 632fc199-4ca6-4c93-a231-07263d6284db
diff --git a/Makefile.am b/Makefile.am
index 0e0527a..0636b28 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -330,6 +330,7 @@
 	sh ./tjbenchtest -yuv
 if WITH_JAVA
 	sh ./tjbenchtest.java
+	sh ./tjbenchtest.java -yuv
 endif
 
 
diff --git a/java/TJBench.java b/java/TJBench.java
index 866a58f..617d312 100644
--- a/java/TJBench.java
+++ b/java/TJBench.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C)2009-2013 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2009-2014 D. R. Commander.  All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -34,12 +34,8 @@
 
 class TJBench {
 
-  static final int YUVENCODE = 1;
-  static final int YUVDECODE = 2;
-  static final int YUVCOMPRESS = 3;
-
-  static int flags = 0, yuv = 0, quiet = 0, pf = TJ.PF_BGR, yuvpad = 1;
-  static boolean compOnly, decompOnly, doTile;
+  static int flags = 0, quiet = 0, pf = TJ.PF_BGR, yuvpad = 1, warmup = 1;
+  static boolean compOnly, decompOnly, doTile, doYUV;
 
   static final String[] pixFormatStr = {
     "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY"
@@ -134,17 +130,15 @@
 
 
   /* Decompression test */
-  static void decompTest(byte[] srcBuf, byte[][] jpegBuf, int[] jpegSize,
-                         byte[] dstBuf, int w, int h, int subsamp,
-                         int jpegQual, String fileName, int tilew, int tileh)
-                         throws Exception {
+  static void decomp(byte[] srcBuf, byte[][] jpegBuf, int[] jpegSize,
+                     byte[] dstBuf, int w, int h, int subsamp, int jpegQual,
+                     String fileName, int tilew, int tileh) throws Exception {
     String qualStr = new String(""), sizeStr, tempStr;
     TJDecompressor tjd;
-    double start, elapsed;
-    int ps = TJ.getPixelSize(pf), i;
+    double elapsed, elapsedDecode;
+    int ps = TJ.getPixelSize(pf), i, iter = 0;
     int scaledw = sf.getScaled(w);
     int scaledh = sf.getScaled(h);
-    int yuvSize = TJ.bufSizeYUV(scaledw, yuvpad, scaledh, subsamp), bufsize;
     int pitch = scaledw * ps;
     YUVImage yuvImage = null;
 
@@ -153,40 +147,52 @@
 
     tjd = new TJDecompressor();
 
-    int bufSize = (yuv == YUVDECODE ? yuvSize : pitch * scaledh);
     if (dstBuf == null)
-      dstBuf = new byte[bufSize];
+      dstBuf = new byte[pitch * scaledh];
 
     /* Set the destination buffer to gray so we know whether the decompressor
        attempted to write to it */
     Arrays.fill(dstBuf, (byte)127);
 
-    /* Execute once to preload cache */
-    tjd.setSourceImage(jpegBuf[0], jpegSize[0]);
-    if (yuv == YUVDECODE) {
-      yuvImage = new YUVImage(dstBuf, scaledw, yuvpad, scaledh, subsamp);
-      tjd.decompressToYUV(yuvImage, flags);
+    if (doYUV) {
+      int width = doTile ? tilew : scaledw;
+      int height = doTile ? tileh : scaledh;
+      yuvImage = new YUVImage(width, yuvpad, height, subsamp);
+      Arrays.fill(yuvImage.getBuf(), (byte)127);
     }
-    else
-      tjd.decompress(dstBuf, 0, 0, scaledw, pitch, scaledh, pf, flags);
 
     /* Benchmark */
-    for (i = 0, start = getTime(); (elapsed = getTime() - start) < benchTime;
-         i++) {
+    iter -= warmup;
+    elapsed = elapsedDecode = 0.0;
+    while (true) {
       int tile = 0;
-      if (yuv == YUVDECODE)
-        tjd.decompressToYUV(yuvImage, flags);
-      else {
-        for (int y = 0; y < h; y += tileh) {
-          for (int x = 0; x < w; x += tilew, tile++) {
-            int width = doTile ? Math.min(tilew, w - x) : scaledw;
-            int height = doTile ? Math.min(tileh, h - y) : scaledh;
-            tjd.setSourceImage(jpegBuf[tile], jpegSize[tile]);
+      double start = getTime();
+      for (int y = 0; y < h; y += tileh) {
+        for (int x = 0; x < w; x += tilew, tile++) {
+          int width = doTile ? Math.min(tilew, w - x) : scaledw;
+          int height = doTile ? Math.min(tileh, h - y) : scaledh;
+          tjd.setSourceImage(jpegBuf[tile], jpegSize[tile]);
+          if (doYUV) {
+            yuvImage.setBuf(yuvImage.getBuf(), width, yuvpad, height, subsamp);
+            tjd.decompressToYUV(yuvImage, flags);
+            double startDecode = getTime();
+            tjd.setSourceImage(yuvImage);
             tjd.decompress(dstBuf, x, y, width, pitch, height, pf, flags);
-          }
+            if (iter >= 0)
+              elapsedDecode += getTime() - startDecode;
+          } else
+            tjd.decompress(dstBuf, x, y, width, pitch, height, pf, flags);
         }
       }
+      iter++;
+      if (iter >= 1) {
+        elapsed += getTime() - start;
+        if (elapsed >= benchTime)
+          break;
+      }
     }
+    if(doYUV)
+      elapsed -= elapsedDecode;
 
     tjd = null;
     for (i = 0; i < jpegBuf.length; i++)
@@ -194,14 +200,27 @@
     jpegBuf = null;  jpegSize = null;
     System.gc();
 
-    if (quiet != 0)
-      System.out.println(
-        sigFig((double)(w * h) / 1000000. * (double)i / elapsed, 4));
-    else {
-      System.out.format("D--> Frame rate:           %f fps\n",
-                        (double)i / elapsed);
-      System.out.format("     Dest. throughput:     %f Megapixels/sec\n",
-                        (double)(w * h) / 1000000. * (double)i / elapsed);
+    if (quiet != 0) {
+      System.out.format("%-6s%s",
+        sigFig((double)(w * h) / 1000000. * (double)iter / elapsed, 4),
+        quiet == 2 ? "\n" : "  ");
+      if (doYUV)
+        System.out.format("%s\n",
+          sigFig((double)(w * h) / 1000000. * (double)iter / elapsedDecode, 4));
+      else if (quiet != 2)
+        System.out.print("\n");
+    } else {
+      System.out.format("%s --> Frame rate:         %f fps\n",
+                        (doYUV ? "Decomp to YUV":"Decompress   "),
+                        (double)iter / elapsed);
+      System.out.format("                  Throughput:         %f Megapixels/sec\n",
+                        (double)(w * h) / 1000000. * (double)iter / elapsed);
+      if (doYUV) {
+        System.out.format("YUV Decode    --> Frame rate:         %f fps\n",
+                          (double)iter / elapsedDecode);
+        System.out.format("                  Throughput:         %f Megapixels/sec\n",
+                          (double)(w * h) / 1000000. * (double)iter / elapsedDecode);
+      }
     }
 
     if (sf.getNum() != 1 || sf.getDenom() != 1)
@@ -211,132 +230,57 @@
     else
       sizeStr = new String("full");
     if (decompOnly)
-      tempStr = new String(fileName + "_" + sizeStr +
-                           (yuv != 0 ? ".yuv" : ".bmp"));
+      tempStr = new String(fileName + "_" + sizeStr + ".bmp");
     else
       tempStr = new String(fileName + "_" + subName[subsamp] + qualStr +
-                           "_" + sizeStr + (yuv != 0 ? ".yuv" : ".bmp"));
+                           "_" + sizeStr + ".bmp");
 
-    if (yuv == YUVDECODE) {
-      FileOutputStream fos = new FileOutputStream(tempStr);
-      fos.write(dstBuf, 0, yuvSize);
-      fos.close();
-    } else {
-      saveImage(tempStr, dstBuf, scaledw, scaledh, pf);
-      int ndx = tempStr.indexOf('.');
-      tempStr = new String(tempStr.substring(0, ndx) + "-err.bmp");
-      if (srcBuf != null && sf.getNum() == 1 && sf.getDenom() == 1) {
-        if (quiet == 0)
-          System.out.println("Compression error written to " + tempStr + ".");
-        if (subsamp == TJ.SAMP_GRAY) {
-          for (int y = 0, index = 0; y < h; y++, index += pitch) {
-            for (int x = 0, index2 = index; x < w; x++, index2 += ps) {
-              int rindex = index2 + TJ.getRedOffset(pf);
-              int gindex = index2 + TJ.getGreenOffset(pf);
-              int bindex = index2 + TJ.getBlueOffset(pf);
-              int lum = (int)((double)(srcBuf[rindex] & 0xff) * 0.299 +
-                              (double)(srcBuf[gindex] & 0xff) * 0.587 +
-                              (double)(srcBuf[bindex] & 0xff) * 0.114 + 0.5);
-              if (lum > 255) lum = 255;
-              if (lum < 0) lum = 0;
-              dstBuf[rindex] = (byte)Math.abs((dstBuf[rindex] & 0xff) - lum);
-              dstBuf[gindex] = (byte)Math.abs((dstBuf[gindex] & 0xff) - lum);
-              dstBuf[bindex] = (byte)Math.abs((dstBuf[bindex] & 0xff) - lum);
-            }
+    saveImage(tempStr, dstBuf, scaledw, scaledh, pf);
+    int ndx = tempStr.indexOf('.');
+    tempStr = new String(tempStr.substring(0, ndx) + "-err.bmp");
+    if (srcBuf != null && sf.getNum() == 1 && sf.getDenom() == 1) {
+      if (quiet == 0)
+        System.out.println("Compression error written to " + tempStr + ".");
+      if (subsamp == TJ.SAMP_GRAY) {
+        for (int y = 0, index = 0; y < h; y++, index += pitch) {
+          for (int x = 0, index2 = index; x < w; x++, index2 += ps) {
+            int rindex = index2 + TJ.getRedOffset(pf);
+            int gindex = index2 + TJ.getGreenOffset(pf);
+            int bindex = index2 + TJ.getBlueOffset(pf);
+            int lum = (int)((double)(srcBuf[rindex] & 0xff) * 0.299 +
+                            (double)(srcBuf[gindex] & 0xff) * 0.587 +
+                            (double)(srcBuf[bindex] & 0xff) * 0.114 + 0.5);
+            if (lum > 255) lum = 255;
+            if (lum < 0) lum = 0;
+            dstBuf[rindex] = (byte)Math.abs((dstBuf[rindex] & 0xff) - lum);
+            dstBuf[gindex] = (byte)Math.abs((dstBuf[gindex] & 0xff) - lum);
+            dstBuf[bindex] = (byte)Math.abs((dstBuf[bindex] & 0xff) - lum);
           }
-        } else {
-          for (int y = 0; y < h; y++)
-            for (int x = 0; x < w * ps; x++)
-              dstBuf[pitch * y + x] =
-                (byte)Math.abs((dstBuf[pitch * y + x] & 0xff) -
-                               (srcBuf[pitch * y + x] & 0xff));
         }
-        saveImage(tempStr, dstBuf, w, h, pf);
+      } else {
+        for (int y = 0; y < h; y++)
+          for (int x = 0; x < w * ps; x++)
+            dstBuf[pitch * y + x] =
+              (byte)Math.abs((dstBuf[pitch * y + x] & 0xff) -
+                             (srcBuf[pitch * y + x] & 0xff));
       }
+      saveImage(tempStr, dstBuf, w, h, pf);
     }
   }
 
 
-  static void doTestYUV(byte[] srcBuf, int w, int h, int subsamp,
-                        String fileName) throws Exception {
-    TJCompressor tjc;
-    byte[] dstBuf;
-    double start, elapsed;
-    int ps = TJ.getPixelSize(pf), i;
-    int yuvSize = 0;
-    YUVImage yuvImage;
-
-    yuvSize = TJ.bufSizeYUV(w, yuvpad, h, subsamp);
-    dstBuf = new byte[yuvSize];
-
-    if (quiet == 0)
-      System.out.format(">>>>>  %s (%s) <--> YUV %s  <<<<<\n",
-        pixFormatStr[pf],
-        (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down",
-        subNameLong[subsamp]);
-
-    if (quiet == 1)
-      System.out.format("%s\t%s\t%s\tN/A\t", pixFormatStr[pf],
-                        (flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD",
-                        subNameLong[subsamp]);
-
-    tjc = new TJCompressor(srcBuf, 0, 0, w, 0, h, pf);
-    tjc.setSubsamp(subsamp);
-
-    /* Execute once to preload cache */
-    yuvImage = new YUVImage(dstBuf, w, yuvpad, h, subsamp);
-    tjc.encodeYUV(yuvImage, flags);
-
-    /* Benchmark */
-    for (i = 0, start = getTime();
-         (elapsed = getTime() - start) < benchTime; i++)
-      tjc.encodeYUV(yuvImage, flags);
-
-    if (quiet == 1)
-      System.out.format("%-4d  %-4d\t", w, h);
-    if (quiet != 0) {
-      System.out.format("%s%c%s%c",
-        sigFig((double)(w * h) / 1000000. * (double) i / elapsed, 4),
-        quiet == 2 ? '\n' : '\t',
-        sigFig((double)(w * h * ps) / (double)yuvSize, 4),
-        quiet == 2 ? '\n' : '\t');
-    } else {
-      System.out.format("\n%s size: %d x %d\n", "Image", w, h);
-      System.out.format("C--> Frame rate:           %f fps\n",
-                        (double)i / elapsed);
-      System.out.format("     Output image size:    %d bytes\n", yuvSize);
-      System.out.format("     Compression ratio:    %f:1\n",
-                        (double)(w * h * ps) / (double)yuvSize);
-      System.out.format("     Source throughput:    %f Megapixels/sec\n",
-                        (double)(w * h) / 1000000. * (double)i / elapsed);
-      System.out.format("     Output bit stream:    %f Megabits/sec\n",
-                        (double)yuvSize * 8. / 1000000. * (double)i / elapsed);
-    }
-    String tempStr = fileName + "_" + subName[subsamp] + ".yuv";
-    FileOutputStream fos = new FileOutputStream(tempStr);
-    fos.write(dstBuf, 0, yuvSize);
-    fos.close();
-    if (quiet == 0)
-      System.out.println("Reference image written to " + tempStr);
-  }
-
-
-  static void doTest(byte[] srcBuf, int w, int h, int subsamp, int jpegQual,
-                     String fileName) throws Exception {
+  static void fullTest(byte[] srcBuf, int w, int h, int subsamp, int jpegQual,
+                       String fileName) throws Exception {
     TJCompressor tjc;
     byte[] tmpBuf;
     byte[][] jpegBuf;
     int[] jpegSize;
-    double start, elapsed;
-    int totalJpegSize = 0, tilew, tileh, i;
-    int ps = (yuv == YUVCOMPRESS ? 3 : TJ.getPixelSize(pf));
+    double start, elapsed, elapsedEncode;
+    int totalJpegSize = 0, tilew, tileh, i, iter;
+    int ps = TJ.getPixelSize(pf);
     int ntilesw = 1, ntilesh = 1, pitch = w * ps;
-    String pfStr = (yuv == YUVCOMPRESS ? "YUV" : pixFormatStr[pf]);
-
-    if (yuv == YUVENCODE) {
-      doTestYUV(srcBuf, w, h, subsamp, fileName);
-      return;
-    }
+    String pfStr = pixFormatStr[pf];
+    YUVImage yuvImage = null;
 
     tmpBuf = new byte[pitch * h];
 
@@ -361,62 +305,94 @@
 
       /* Compression test */
       if (quiet == 1)
-        System.out.format("%s\t%s\t%s\t%d\t", pfStr,
+        System.out.format("%-4s (%s)  %-5s    %-3d   ", pfStr,
                           (flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD",
                           subNameLong[subsamp], jpegQual);
-      if (yuv != YUVCOMPRESS)
-        for (i = 0; i < h; i++)
-          System.arraycopy(srcBuf, w * ps * i, tmpBuf, pitch * i, w * ps);
-      if (yuv == YUVCOMPRESS)
-        tjc.setSourceImage(new YUVImage(srcBuf, tilew, yuvpad, tileh,
-                                        subsamp));
-      else
-        tjc.setSourceImage(srcBuf, 0, 0, tilew, pitch, tileh, pf);
+      for (i = 0; i < h; i++)
+        System.arraycopy(srcBuf, w * ps * i, tmpBuf, pitch * i, w * ps);
       tjc.setJPEGQuality(jpegQual);
       tjc.setSubsamp(subsamp);
 
-      /* Execute once to preload cache */
-      tjc.compress(jpegBuf[0], flags);
+      if (doYUV) {
+        yuvImage = new YUVImage(tilew, yuvpad, tileh, subsamp);
+        Arrays.fill(yuvImage.getBuf(), (byte)127);
+      }
 
       /* Benchmark */
-      for (i = 0, start = getTime();
-           (elapsed = getTime() - start) < benchTime; i++) {
+      iter = -warmup;
+      elapsed = elapsedEncode = 0.0;
+      while (true) {
         int tile = 0;
         totalJpegSize = 0;
+        start = getTime();
         for (int y = 0; y < h; y += tileh) {
           for (int x = 0; x < w; x += tilew, tile++) {
             int width = Math.min(tilew, w - x);
             int height = Math.min(tileh, h - y);
-            if (yuv != YUVCOMPRESS)
-              tjc.setSourceImage(srcBuf, x, y, width, pitch, height, pf);
+            tjc.setSourceImage(srcBuf, x, y, width, pitch, height, pf);
+            if (doYUV) {
+              double startEncode = getTime();
+              yuvImage.setBuf(yuvImage.getBuf(), width, yuvpad, height,
+                              subsamp);
+              tjc.encodeYUV(yuvImage, flags);
+              if (iter >= 0)
+                elapsedEncode += getTime() - startEncode;
+              tjc.setSourceImage(yuvImage);
+            }
             tjc.compress(jpegBuf[tile], flags);
             jpegSize[tile] = tjc.getCompressedSize();
             totalJpegSize += jpegSize[tile];
           }
         }
+        iter++;
+        if (iter >= 1) {
+          elapsed += getTime() - start;
+          if (elapsed >= benchTime)
+            break;
+        }
       }
+      if (doYUV)
+        elapsed -= elapsedEncode;
 
       if (quiet == 1)
-        System.out.format("%-4d  %-4d\t", tilew, tileh);
+        System.out.format("%-5d  %-5d   ", tilew, tileh);
       if (quiet != 0) {
-        System.out.format("%s%c%s%c",
-          sigFig((double)(w * h) / 1000000. * (double) i / elapsed, 4),
-          quiet == 2 ? '\n' : '\t',
+        if (doYUV)
+          System.out.format("%-6s%s",
+            sigFig((double)(w * h) / 1000000. * (double)iter / elapsedEncode, 4),
+            quiet == 2 ? "\n" : "  ");
+        System.out.format("%-6s%s",
+          sigFig((double)(w * h) / 1000000. * (double)iter / elapsed, 4),
+          quiet == 2 ? "\n" : "  ");
+        System.out.format("%-6s%s",
           sigFig((double)(w * h * ps) / (double)totalJpegSize, 4),
-          quiet == 2 ? '\n' : '\t');
+          quiet == 2 ? "\n" : "  ");
       } else {
         System.out.format("\n%s size: %d x %d\n", doTile ? "Tile" : "Image",
                           tilew, tileh);
-        System.out.format("C--> Frame rate:           %f fps\n",
-                          (double)i / elapsed);
-        System.out.format("     Output image size:    %d bytes\n",
+        if (doYUV) {
+          System.out.format("Encode YUV    --> Frame rate:         %f fps\n",
+                            (double)iter / elapsedEncode);
+          System.out.format("                  Output image size:  %d bytes\n",
+                            yuvImage.getSize());
+          System.out.format("                  Compression ratio:  %f:1\n",
+                            (double)(w * h * ps) / (double)yuvImage.getSize());
+          System.out.format("                  Throughput:         %f Megapixels/sec\n",
+                            (double)(w * h) / 1000000. * (double)iter / elapsedEncode);
+          System.out.format("                  Output bit stream:  %f Megabits/sec\n",
+            (double)yuvImage.getSize() * 8. / 1000000. * (double)iter / elapsedEncode);
+        }
+        System.out.format("%s --> Frame rate:         %f fps\n",
+                          doYUV ? "Comp from YUV" : "Compress     ",
+                          (double)iter / elapsed);
+        System.out.format("                  Output image size:  %d bytes\n",
                           totalJpegSize);
-        System.out.format("     Compression ratio:    %f:1\n",
+        System.out.format("                  Compression ratio:  %f:1\n",
                           (double)(w * h * ps) / (double)totalJpegSize);
-        System.out.format("     Source throughput:    %f Megapixels/sec\n",
-                          (double)(w * h) / 1000000. * (double)i / elapsed);
-        System.out.format("     Output bit stream:    %f Megabits/sec\n",
-          (double)totalJpegSize * 8. / 1000000. * (double)i / elapsed);
+        System.out.format("                  Throughput:         %f Megapixels/sec\n",
+                          (double)(w * h) / 1000000. * (double)iter / elapsed);
+        System.out.format("                  Output bit stream:  %f Megabits/sec\n",
+          (double)totalJpegSize * 8. / 1000000. * (double)iter / elapsed);
       }
       if (tilew == w && tileh == h) {
         String tempStr = fileName + "_" + subName[subsamp] + "_" + "Q" +
@@ -430,22 +406,22 @@
 
       /* Decompression test */
       if (!compOnly)
-        decompTest(srcBuf, jpegBuf, jpegSize, tmpBuf, w, h, subsamp, jpegQual,
-                   fileName, tilew, tileh);
+        decomp(srcBuf, jpegBuf, jpegSize, tmpBuf, w, h, subsamp, jpegQual,
+               fileName, tilew, tileh);
 
       if (tilew == w && tileh == h) break;
     }
   }
 
 
-  static void doDecompTest(String fileName) throws Exception {
+  static void decompTest(String fileName) throws Exception {
     TJTransformer tjt;
-    byte[][] jpegBuf;
+    byte[][] jpegBuf = null;
     byte[] srcBuf;
-    int[] jpegSize;
+    int[] jpegSize = null;
     int totalJpegSize;
     int w = 0, h = 0, subsamp = -1, cs = -1, _w, _h, _tilew, _tileh,
-      _ntilesw, _ntilesh, _subsamp, x, y;
+      _ntilesw, _ntilesh, _subsamp, x, y, iter;
     int ntilesw = 1, ntilesh = 1;
     double start, elapsed;
     int ps = TJ.getPixelSize(pf), tile;
@@ -470,19 +446,20 @@
 
     if (quiet == 1) {
       System.out.println("All performance values in Mpixels/sec\n");
-      System.out.format("Bitmap\tBitmap\tJPEG\tJPEG\t%s %s \tXform\tComp\tDecomp\n",
+      System.out.format("Bitmap     JPEG   JPEG     %s  %s   Xform   Comp    Decomp  ",
                         (doTile ? "Tile " : "Image"),
                         (doTile ? "Tile " : "Image"));
-      System.out.println("Format\tOrder\tCS\tSubsamp\tWidth Height\tPerf \tRatio\tPerf\n");
-    } else if (quiet == 0) {
-      if (yuv == YUVDECODE)
-        System.out.format(">>>>>  JPEG %s --> YUV  <<<<<\n",
-          formatName(subsamp, cs));
-      else
-        System.out.format(">>>>>  JPEG %s --> %s (%s)  <<<<<\n",
-          formatName(subsamp, cs), pixFormatStr[pf],
-          (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down");
-    }
+      if (doYUV)
+        System.out.print("Decode");
+      System.out.print("\n");
+      System.out.print("Format     CS     Subsamp  Width  Height  Perf    Ratio   Perf    ");
+      if (doYUV)
+        System.out.print("Perf");
+      System.out.println("\n");
+    } else if (quiet == 0)
+      System.out.format(">>>>>  JPEG %s --> %s (%s)  <<<<<\n",
+        formatName(subsamp, cs), pixFormatStr[pf],
+        (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down");
 
     for (int tilew = doTile ? 16 : w, tileh = doTile ? 16 : h; ;
          tilew *= 2, tileh *= 2) {
@@ -502,10 +479,10 @@
                             sf.getScaled(_h));
         System.out.println("");
       } else if (quiet == 1) {
-        System.out.format("%s\t%s\t%s\t%s\t", pixFormatStr[pf],
+        System.out.format("%-4s (%s)  %-5s  %-5s    ", pixFormatStr[pf],
                           (flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD",
                           csName[cs], subNameLong[subsamp]);
-        System.out.format("%-4d  %-4d\t", tilew, tileh);
+        System.out.format("%-5d  %-5d   ", tilew, tileh);
       }
 
       _subsamp = subsamp;
@@ -534,6 +511,16 @@
         _ntilesw = (_w + _tilew - 1) / _tilew;
         _ntilesh = (_h + _tileh - 1) / _tileh;
 
+        if (xformOp == TJTransform.OP_TRANSPOSE ||
+            xformOp == TJTransform.OP_TRANSVERSE ||
+            xformOp == TJTransform.OP_ROT90 ||
+            xformOp == TJTransform.OP_ROT270) {
+            if (_subsamp == TJ.SAMP_422)
+              _subsamp = TJ.SAMP_440;
+            else if (_subsamp == TJ.SAMP_440)
+              _subsamp = TJ.SAMP_422;
+        }
+
         TJTransform[] t = new TJTransform[_ntilesw * _ntilesh];
         jpegBuf = new byte[_ntilesw * _ntilesh][TJ.bufSize(_tilew, _tileh, subsamp)];
 
@@ -552,37 +539,45 @@
           }
         }
 
-        start = getTime();
-        tjt.transform(jpegBuf, t, flags);
-        jpegSize = tjt.getTransformedSizes();
-        elapsed = getTime() - start;
-
+        iter = -warmup;
+        elapsed = 0.;
+        while (true) {
+          start = getTime();
+          tjt.transform(jpegBuf, t, flags);
+          jpegSize = tjt.getTransformedSizes();
+          iter++;
+          if (iter >= 1) {
+            elapsed += getTime() - start;
+            if (elapsed >= benchTime)
+              break;
+          }
+        }
         t = null;
 
         for (tile = 0, totalJpegSize = 0; tile < _ntilesw * _ntilesh; tile++)
           totalJpegSize += jpegSize[tile];
 
         if (quiet != 0) {
-          System.out.format("%s%c%s%c",
+          System.out.format("%-6s%s%-6s%s",
             sigFig((double)(w * h) / 1000000. / elapsed, 4),
-            quiet == 2 ? '\n' : '\t',
+            quiet == 2 ? "\n" : "  ",
             sigFig((double)(w * h * ps) / (double)totalJpegSize, 4),
-            quiet == 2 ? '\n' : '\t');
+            quiet == 2 ? "\n" : "  ");
         } else if (quiet == 0) {
-          System.out.format("X--> Frame rate:           %f fps\n",
+          System.out.format("Transform     --> Frame rate:         %f fps\n",
                             1.0 / elapsed);
-          System.out.format("     Output image size:    %d bytes\n",
+          System.out.format("                  Output image size:  %d bytes\n",
                             totalJpegSize);
-          System.out.format("     Compression ratio:    %f:1\n",
+          System.out.format("                  Compression ratio:  %f:1\n",
                             (double)(w * h * ps) / (double)totalJpegSize);
-          System.out.format("     Source throughput:    %f Megapixels/sec\n",
+          System.out.format("                  Throughput:         %f Megapixels/sec\n",
                             (double)(w * h) / 1000000. / elapsed);
-          System.out.format("     Output bit stream:    %f Megabits/sec\n",
+          System.out.format("                  Output bit stream:  %f Megabits/sec\n",
                             (double)totalJpegSize * 8. / 1000000. / elapsed);
         }
       } else {
         if (quiet == 1)
-          System.out.print("N/A\tN/A\t");
+          System.out.print("N/A     N/A     ");
         jpegBuf = new byte[1][TJ.bufSize(_tilew, _tileh, subsamp)];
         jpegSize = new int[1];
         jpegSize[0] = srcSize;
@@ -594,8 +589,8 @@
       if (h == tileh)
         _tileh = _h;
       if ((xformOpt & TJTransform.OPT_NOOUTPUT) == 0)
-        decompTest(null, jpegBuf, jpegSize, null, _w, _h, _subsamp, 0,
-                   fileName, _tilew, _tileh);
+        decomp(null, jpegBuf, jpegSize, null, _w, _h, _subsamp, 0,
+               fileName, _tilew, _tileh);
       else if (quiet == 1)
         System.out.println("N/A");
 
@@ -614,7 +609,7 @@
     String className = new TJBench().getClass().getName();
 
     System.out.println("\nUSAGE: java " + className);
-    System.out.println("       <Inputfile (BMP|YUV)> <Quality> [options]\n");
+    System.out.println("       <Inputfile (BMP)> <Quality> [options]\n");
     System.out.println("       java " + className);
     System.out.println("       <Inputfile (JPG)> [options]\n");
     System.out.println("Options:\n");
@@ -623,29 +618,23 @@
     System.out.println("-tile = Test performance of the codec when the image is encoded as separate");
     System.out.println("     tiles of varying sizes.");
     System.out.println("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =");
-    System.out.println("     Test the specified color conversion path in the codec (default: BGR)");
+    System.out.println("     Test the specified color conversion path in the codec (default = BGR)");
     System.out.println("-fastupsample = Use the fastest chrominance upsampling algorithm available in");
     System.out.println("     the underlying codec");
     System.out.println("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying");
     System.out.println("     codec");
     System.out.println("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the");
     System.out.println("     underlying codec");
-    System.out.println("-subsamp <s> = if compressing a JPEG image from a YUV planar source image,");
-    System.out.println("     this specifies the level of chrominance subsampling used in the source");
-    System.out.println("     image.  Otherwise, this specifies the level of chrominance subsampling");
-    System.out.println("     to use in the JPEG destination image.  <s> = 444, 422, 440, 420, 411,");
-    System.out.println("     or GRAY");
+    System.out.println("-subsamp <s> = When testing JPEG compression, this option specifies the level");
+    System.out.println("     of chrominance subsampling to use (<s> = 444, 422, 440, 420, 411, or");
+    System.out.println("     GRAY).  The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in");
+    System.out.println("     sequence.");
     System.out.println("-quiet = Output results in tabular rather than verbose format");
-    System.out.println("-yuvencode = Encode RGB input as planar YUV rather than compressing as JPEG");
-    System.out.println("-yuvdecode = Decode JPEG image to planar YUV rather than RGB");
-    System.out.println("-yuvsize WxH = if compressing a JPEG image from a YUV planar source image, this");
-    System.out.println("     specifies the width and height of the source image.");
-    System.out.println("-yuvpad <p> = if compressing a JPEG image from a YUV planar source image, this");
-    System.out.println("     specifies the number of bytes to which each row of each plane in the");
-    System.out.println("     source image is padded.  If decompressing a JPEG image to a YUV planar");
-    System.out.println("     destination image, this specifies the row padding for each plane of the");
-    System.out.println("     destination image. (default=1)");
-    System.out.println("-scale M/N = scale down the width/height of the decompressed JPEG image by a");
+    System.out.println("-yuv = Test YUV encoding/decoding functions");
+    System.out.println("-yuvpad <p> = If testing YUV encoding/decoding, this specifies the number of");
+    System.out.println("     bytes to which each row of each plane in the intermediate YUV image is");
+    System.out.println("     padded (default = 1)");
+    System.out.println("-scale M/N = Scale down the width/height of the decompressed JPEG image by a");
     System.out.print  ("     factor of M/N (M/N = ");
     for (i = 0; i < nsf; i++) {
       System.out.format("%d/%d", scalingFactors[i].getNum(),
@@ -668,7 +657,9 @@
     System.out.println("-grayscale = Perform lossless grayscale conversion prior to decompression");
     System.out.println("     test (can be combined with the other transforms above)");
     System.out.println("-benchtime <t> = Run each benchmark for at least <t> seconds (default = 5.0)");
-	System.out.println("-componly = Stop after running compression tests.  Do not test decompression.\n");
+    System.out.println("-warmup <w> = Execute each benchmark <w> times to prime the cache before");
+    System.out.println("     taking performance measurements (default = 1)");
+    System.out.println("-componly = Stop after running compression tests.  Do not test decompression.\n");
     System.out.println("NOTE:  If the quality is specified as a range (e.g. 90-100), a separate");
     System.out.println("test will be performed for all quality values in the range.\n");
     System.exit(1);
@@ -689,25 +680,10 @@
       String tempStr = argv[0].toLowerCase();
       if (tempStr.endsWith(".jpg") || tempStr.endsWith(".jpeg"))
         decompOnly = true;
-      if (tempStr.endsWith(".yuv"))
-        yuv = YUVCOMPRESS;
 
       System.out.println("");
 
-      if (argv.length > minArg) {
-        for (int i = minArg; i < argv.length; i++) {
-          if (argv[i].equalsIgnoreCase("-yuvencode")) {
-            System.out.println("Testing YUV planar encoding\n");
-            yuv = YUVENCODE;  maxQual = minQual = 100;
-          }
-          if (argv[i].equalsIgnoreCase("-yuvdecode")) {
-            System.out.println("Testing YUV planar decoding\n");
-            yuv = YUVDECODE;
-          }
-        }
-      }
-
-      if (!decompOnly && yuv != YUVENCODE) {
+      if (!decompOnly) {
         minArg = 2;
         if (argv.length < minArg)
           usage();
@@ -812,18 +788,9 @@
             else
               usage();
           }
-          if (argv[i].equalsIgnoreCase("-yuvsize") && i < argv.length - 1) {
-            int temp1 = 0, temp2 = 0;
-            Scanner scanner = new Scanner(argv[++i]).useDelimiter("x");
-            try {
-              temp1 = scanner.nextInt();
-              temp2 = scanner.nextInt();
-            } catch(Exception e) {}
-            if (temp1 >= 1 && temp2 >= 1) {
-              w = temp1;
-              h = temp2;
-            } else
-              usage();
+          if (argv[i].equalsIgnoreCase("-yuv")) {
+            System.out.println("Testing YUV planar encoding/decoding\n");
+            doYUV = true;
           }
           if (argv[i].equalsIgnoreCase("-yuvpad") && i < argv.length - 1) {
             int temp = 0;
@@ -850,6 +817,16 @@
           }
           if (argv[i].equalsIgnoreCase("-componly"))
             compOnly = true;
+          if (argv[i].equalsIgnoreCase("-warmup") && i < argv.length - 1) {
+            int temp = -1;
+            try {
+             temp = Integer.parseInt(argv[++i]);
+            } catch (NumberFormatException e) {}
+            if (temp >= 0) {
+              warmup = temp;
+              System.out.format("Warmup runs = %d\n\n", warmup);
+            }
+          }
           if (argv[i].equalsIgnoreCase("-?"))
             usage();
         }
@@ -864,67 +841,60 @@
         doTile = false;
       }
 
-      if (yuv != 0 && doTile) {
-        System.out.println("Disabling tiled compression/decompression tests, because those tests do not");
-        System.out.println("work when YUV encoding, compression, or decoding is enabled.\n");
-        doTile = false;
-      }
-
       if (!decompOnly) {
-        if(yuv == YUVCOMPRESS) {
-          if (w < 1 || h < 1 || subsamp < 0 || subsamp >= TJ.NUMSAMP)
-            throw new Exception("YUV image size and/or subsampling not specified");
-          FileInputStream fis = new FileInputStream(argv[0]);
-          int srcSize = (int)fis.getChannel().size();
-          if (srcSize != TJ.bufSizeYUV(w, yuvpad, h, subsamp))
-            throw new Exception("YUV image file is the wrong size");
-          srcBuf = new byte[srcSize];
-          fis.read(srcBuf, 0, srcSize);
-          fis.close();
-		}
-        else {
-          int[] width = new int[1], height = new int[1];
-          srcBuf = loadImage(argv[0], width, height, pf);
-          w = width[0];  h = height[0];
-          int index = -1;
-          if ((index = argv[0].indexOf('.')) >= 0)
-            argv[0] = argv[0].substring(0, index);
-        }
+        int[] width = new int[1], height = new int[1];
+        srcBuf = loadImage(argv[0], width, height, pf);
+        w = width[0];  h = height[0];
+        int index = -1;
+        if ((index = argv[0].indexOf('.')) >= 0)
+          argv[0] = argv[0].substring(0, index);
       }
 
       if (quiet == 1 && !decompOnly) {
         System.out.println("All performance values in Mpixels/sec\n");
-        System.out.format("Bitmap\tBitmap\tJPEG\tJPEG\t%s %s \tComp\tComp\tDecomp\n",
+        System.out.format("Bitmap     JPEG     JPEG  %s  %s   ",
           (doTile ? "Tile " : "Image"), (doTile ? "Tile " : "Image"));
-        System.out.println("Format\tOrder\tSubsamp\tQual\tWidth Height\tPerf \tRatio\tPerf\n");
+        if (doYUV)
+          System.out.print("Encode  ");
+        System.out.print("Comp    Comp    Decomp  ");
+        if (doYUV)
+          System.out.print("Decode");
+        System.out.print("\n");
+        System.out.print("Format     Subsamp  Qual  Width  Height  ");
+        if (doYUV)
+          System.out.print("Perf    ");
+        System.out.print("Perf    Ratio   Perf    ");
+        if (doYUV)
+          System.out.print("Perf");
+        System.out.println("\n");
       }
 
       if (decompOnly) {
-        doDecompTest(argv[0]);
+        decompTest(argv[0]);
         System.out.println("");
         System.exit(retval);
       }
 
       System.gc();
-      if (yuv == YUVCOMPRESS || (subsamp >= 0 && subsamp < TJ.NUMSAMP)) {
+      if (subsamp >= 0 && subsamp < TJ.NUMSAMP) {
         for (int i = maxQual; i >= minQual; i--)
-          doTest(srcBuf, w, h, subsamp, i, argv[0]);
+          fullTest(srcBuf, w, h, subsamp, i, argv[0]);
         System.out.println("");
       } else {
         for (int i = maxQual; i >= minQual; i--)
-          doTest(srcBuf, w, h, TJ.SAMP_GRAY, i, argv[0]);
+          fullTest(srcBuf, w, h, TJ.SAMP_GRAY, i, argv[0]);
         System.out.println("");
         System.gc();
         for (int i = maxQual; i >= minQual; i--)
-          doTest(srcBuf, w, h, TJ.SAMP_420, i, argv[0]);
+          fullTest(srcBuf, w, h, TJ.SAMP_420, i, argv[0]);
         System.out.println("");
         System.gc();
         for (int i = maxQual; i >= minQual; i--)
-          doTest(srcBuf, w, h, TJ.SAMP_422, i, argv[0]);
+          fullTest(srcBuf, w, h, TJ.SAMP_422, i, argv[0]);
         System.out.println("");
         System.gc();
         for (int i = maxQual; i >= minQual; i--)
-          doTest(srcBuf, w, h, TJ.SAMP_444, i, argv[0]);
+          fullTest(srcBuf, w, h, TJ.SAMP_444, i, argv[0]);
         System.out.println("");
       }
 
diff --git a/java/doc/index-all.html b/java/doc/index-all.html
index c8a4f0c..fa92e3c 100644
--- a/java/doc/index-all.html
+++ b/java/doc/index-all.html
@@ -662,6 +662,11 @@
 <dd>
 <div class="block">Grayscale.</div>
 </dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#setBuf(byte[], int, int, int, int)">setBuf(byte[], int, int, int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>
+<div class="block">Assign an existing YUV planar image buffer to this <code>YUVImage</code>
+ instance.</div>
+</dd>
 <dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#setJPEGImage(byte[], int)">setJPEGImage(byte[], int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
 <dd>
 <div class="block"><span class="strong">Deprecated.</span>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html b/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html
index 8659aed..13e6160 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html
@@ -243,6 +243,17 @@
 <div class="block">Returns the width of the YUV image.</div>
 </td>
 </tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#setBuf(byte[], int, int, int, int)">setBuf</a></strong>(byte[]&nbsp;yuvImage,
+      int&nbsp;width,
+      int&nbsp;pad,
+      int&nbsp;height,
+      int&nbsp;subsamp)</code>
+<div class="block">Assign an existing YUV planar image buffer to this <code>YUVImage</code>
+ instance.</div>
+</td>
+</tr>
 </table>
 <ul class="blockList">
 <li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
@@ -362,8 +373,8 @@
  buffer.</div>
 <dl><dt><span class="strong">Parameters:</span></dt><dd><code>yuvImage</code> - image buffer that contains or will contain YUV planar
  image data.  See <a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><code>above</code></a> for a description of the image
- format.  You can use <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><code>TJ.bufSizeYUV(int, int, int, int)</code></a> to determine the appropriate
- size for this buffer.</dd><dd><code>width</code> - width (in pixels) of the YUV image</dd><dd><code>pad</code> - the line padding used in the YUV image buffer.  For
+ format.  Use <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><code>TJ.bufSizeYUV(int, int, int, int)</code></a> to determine the minimum size for this
+ buffer.</dd><dd><code>width</code> - width (in pixels) of the YUV image</dd><dd><code>pad</code> - the line padding used in the YUV image buffer.  For
  instance, if each line in each plane of the buffer is padded to the
  nearest multiple of 4 bytes, then <code>pad</code> should be set to 4.</dd><dd><code>height</code> - height (in pixels) of the YUV image</dd><dd><code>subsamp</code> - the level of chrominance subsampling used in the YUV
  image (one of <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_444"><code>TJ.SAMP_*</code></a>)</dd>
@@ -379,6 +390,31 @@
 <!--   -->
 </a>
 <h3>Method Detail</h3>
+<a name="setBuf(byte[], int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>setBuf</h4>
+<pre>public&nbsp;void&nbsp;setBuf(byte[]&nbsp;yuvImage,
+          int&nbsp;width,
+          int&nbsp;pad,
+          int&nbsp;height,
+          int&nbsp;subsamp)
+            throws java.lang.Exception</pre>
+<div class="block">Assign an existing YUV planar image buffer to this <code>YUVImage</code>
+ instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>yuvImage</code> - image buffer that contains or will contain YUV planar
+ image data.  See <a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><code>above</code></a> for a description of the image
+ format.  Use <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><code>TJ.bufSizeYUV(int, int, int, int)</code></a> to determine the minimum size for this
+ buffer.</dd><dd><code>width</code> - width (in pixels) of the YUV image</dd><dd><code>pad</code> - the line padding used in the YUV image buffer.  For
+ instance, if each line in each plane of the buffer is padded to the
+ nearest multiple of 4 bytes, then <code>pad</code> should be set to 4.</dd><dd><code>height</code> - height (in pixels) of the YUV image</dd><dd><code>subsamp</code> - the level of chrominance subsampling used in the YUV
+ image (one of <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_444"><code>TJ.SAMP_*</code></a>)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
 <a name="getWidth()">
 <!--   -->
 </a>
diff --git a/java/org/libjpegturbo/turbojpeg/YUVImage.java b/java/org/libjpegturbo/turbojpeg/YUVImage.java
index 6793593..619b0c3 100644
--- a/java/org/libjpegturbo/turbojpeg/YUVImage.java
+++ b/java/org/libjpegturbo/turbojpeg/YUVImage.java
@@ -74,8 +74,8 @@
    */
   public YUVImage(int width, int pad, int height, int subsamp)
                     throws Exception {
-    setBuffer(new byte[TJ.bufSizeYUV(width, pad, height, subsamp)], width, pad,
-              height, subsamp);
+    setBuf(new byte[TJ.bufSizeYUV(width, pad, height, subsamp)], width, pad,
+           height, subsamp);
   }
 
   /**
@@ -84,8 +84,8 @@
    *
    * @param yuvImage image buffer that contains or will contain YUV planar
    * image data.  See {@link YUVImage above} for a description of the image
-   * format.  You can use {@link TJ#bufSizeYUV} to determine the appropriate
-   * size for this buffer.
+   * format.  Use {@link TJ#bufSizeYUV} to determine the minimum size for this
+   * buffer.
    *
    * @param width width (in pixels) of the YUV image
    *
@@ -100,16 +100,36 @@
    */
   public YUVImage(byte[] yuvImage, int width, int pad, int height,
                   int subsamp) throws Exception {
-    setBuffer(yuvImage, width, pad, height, subsamp);
+    setBuf(yuvImage, width, pad, height, subsamp);
   }
 
-  private void setBuffer(byte[] yuvImage, int width, int pad, int height,
-                         int subsamp) throws Exception {
+  /**
+   * Assign an existing YUV planar image buffer to this <code>YUVImage</code>
+   * instance.
+   *
+   * @param yuvImage image buffer that contains or will contain YUV planar
+   * image data.  See {@link YUVImage above} for a description of the image
+   * format.  Use {@link TJ#bufSizeYUV} to determine the minimum size for this
+   * buffer.
+   *
+   * @param width width (in pixels) of the YUV image
+   *
+   * @param pad the line padding used in the YUV image buffer.  For
+   * instance, if each line in each plane of the buffer is padded to the
+   * nearest multiple of 4 bytes, then <code>pad</code> should be set to 4.
+   *
+   * @param height height (in pixels) of the YUV image
+   *
+   * @param subsamp the level of chrominance subsampling used in the YUV
+   * image (one of {@link TJ#SAMP_444 TJ.SAMP_*})
+   */
+  public void setBuf(byte[] yuvImage, int width, int pad, int height,
+                     int subsamp) throws Exception {
     if (yuvImage == null || width < 1 || pad < 1 || ((pad & (pad - 1)) != 0) ||
         height < 1 || subsamp < 0 || subsamp >= TJ.NUMSAMP)
       throw new Exception("Invalid argument in YUVImage()");
-    if (yuvImage.length != TJ.bufSizeYUV(width, pad, height, subsamp))
-      throw new Exception("YUV image buffer is the wrong size");
+    if (yuvImage.length < TJ.bufSizeYUV(width, pad, height, subsamp))
+      throw new Exception("YUV image buffer is not large enough");
     yuvBuf = yuvImage;
     yuvWidth = width;
     yuvPad = pad;
@@ -181,7 +201,7 @@
    public int getSize() throws Exception {
      if (yuvBuf == null)
        throw new Exception(NO_ASSOC_ERROR);
-     return yuvBuf.length;
+     return TJ.bufSizeYUV(yuvWidth, yuvPad, yuvHeight, yuvSubsamp);
    }
 
   protected long handle = 0;
diff --git a/tjbenchtest.in b/tjbenchtest.in
index 1ebd0b8..5e08c9b 100755
--- a/tjbenchtest.in
+++ b/tjbenchtest.in
@@ -159,7 +159,7 @@
 	done
 	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
 		for samp in GRAY 444; do
-			runme $EXEDIR/djpeg -rgb $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg 
+			runme $EXEDIR/djpeg -rgb $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg
 			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 $YUVARG
 			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
 				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
diff --git a/tjbenchtest.java.in b/tjbenchtest.java.in
index ebff9c8..1869bca 100755
--- a/tjbenchtest.java.in
+++ b/tjbenchtest.java.in
@@ -24,6 +24,9 @@
 OUTDIR=__tjbenchtest_java_output
 EXEDIR=.
 JAVA="@JAVA@ -cp java/turbojpeg.jar -Djava.library.path=.libs"
+BMPARG=
+NSARG=
+YUVARG=
 
 if [ -d $OUTDIR ]; then
 	rm -rf $OUTDIR
@@ -32,33 +35,58 @@
 
 exec >$EXEDIR/tjbenchtest-java.log
 
+if [ $# -gt 0 ]; then
+	if [ "$1" = "-yuv" ]; then
+		NSARG=-nosmooth
+		YUVARG=-yuv
+
+# NOTE: The combination of tjEncodeYUV*() and tjCompressFromYUV*() does not
+# always produce bitwise-identical results to tjCompress*() if subsampling is
+# enabled.  In both cases, if the image width or height are not evenly
+# divisible by the MCU width/height, then the bottom and/or right edge are
+# expanded.  However, the libjpeg code performs this expansion prior to
+# downsampling, and TurboJPEG performs it in tjCompressFromYUV*(), which is
+# after downsampling.  Thus, the two will agree only if the width/height along
+# each downsampled dimension is an odd number or is evenly divisible by the MCU
+# width/height.  This disagreement basically amounts to a round-off error, but
+# there is no easy way around it, so for now, we just test the only image that
+# works.  (NOTE: nightshot_iso_100 does not suffer from the above issue, but
+# it suffers from an unrelated problem whereby the combination of
+# tjDecompressToYUV*() and tjDecodeYUV*() do not produce bitwise-identical
+# results to tjDecompress*() if decompression scaling is enabled.  This latter
+# phenomenon is not yet fully understood but is also believed to be some sort
+# of round-off error.)
+		IMAGES="vgl_6548_0026a.bmp"
+	fi
+fi
+
 # Standard tests
 for image in $IMAGES; do
 
 	cp $IMGDIR/$image $OUTDIR
 	basename=`basename $image .bmp`
-	$EXEDIR/cjpeg -quality 95 -dct fast -grayscale $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_GRAY_fast_cjpeg.jpg
-	$EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_420_fast_cjpeg.jpg
-	$EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_422_fast_cjpeg.jpg
-	$EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_444_fast_cjpeg.jpg
-	$EXEDIR/cjpeg -quality 95 -dct int -grayscale $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg
-	$EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_420_accurate_cjpeg.jpg
-	$EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_422_accurate_cjpeg.jpg
-	$EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_444_accurate_cjpeg.jpg
+	runme $EXEDIR/cjpeg -quality 95 -dct fast -grayscale -outfile $OUTDIR/${basename}_GRAY_fast_cjpeg.jpg $IMGDIR/${basename}.bmp 
+	runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 -outfile $OUTDIR/${basename}_420_fast_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 -outfile $OUTDIR/${basename}_422_fast_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 -outfile $OUTDIR/${basename}_444_fast_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct int -grayscale -outfile $OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 -outfile $OUTDIR/${basename}_420_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 -outfile $OUTDIR/${basename}_422_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 -outfile $OUTDIR/${basename}_444_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp
 	for samp in GRAY 420 422 444; do
-		$EXEDIR/djpeg -rgb -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_default_djpeg.bmp
-		$EXEDIR/djpeg -dct fast -rgb -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_fast_djpeg.bmp
-		$EXEDIR/djpeg -dct int -rgb -bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg >$OUTDIR/${basename}_${samp}_accurate_djpeg.bmp
+		runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_default_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+		runme $EXEDIR/djpeg -dct fast -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_fast_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+		runme $EXEDIR/djpeg -dct int -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_accurate_djpeg.bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg
 	done
 	for samp in 420 422; do
-		$EXEDIR/djpeg -nosmooth -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.bmp
-		$EXEDIR/djpeg -dct fast -nosmooth -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.bmp
-		$EXEDIR/djpeg -dct int -nosmooth -bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg >$OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.bmp
+		runme $EXEDIR/djpeg -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+		runme $EXEDIR/djpeg -dct fast -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+		runme $EXEDIR/djpeg -dct int -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg
 	done
 
 	# Compression
 	for dct in accurate fast; do
-		runme $JAVA TJBench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -${dct}dct
+		runme $JAVA TJBench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -warmup 0 -${dct}dct $YUVARG
 		for samp in GRAY 420 422 444; do
 			runme cmp $OUTDIR/${basename}_${samp}_Q95.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg
 		done
@@ -71,7 +99,7 @@
 		fi
 
 		# Tiled compression & decompression
-		runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 ${dctarg}
+		runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG
 		for samp in GRAY 444; do
 			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
 				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
@@ -79,7 +107,7 @@
 				rm $i
 			done
 		done
-		runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -fastupsample ${dctarg}
+		runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG
 		for samp in 420 422; do
 			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
 				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
@@ -90,7 +118,7 @@
 
 		# Tiled decompression
 		for samp in GRAY 444; do
-			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 ${dctarg}
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG
 			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
 				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
 				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp
@@ -98,7 +126,7 @@
 			done
 		done
 		for samp in 420 422; do
-			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -fastupsample ${dctarg}
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG
 			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
 				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
 				runme cmp $i -i 54:54 $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp
@@ -111,8 +139,8 @@
 	for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
 		scalearg=`echo $scale | sed s@_@/@g`
 		for samp in GRAY 420 422 444; do
-			$EXEDIR/djpeg -rgb -scale ${scalearg} -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp
-			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -scale ${scalearg} -quiet -benchtime 0.01
+			runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG -bmp -outfile $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG
 			runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp
 			rm $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp
 		done
@@ -120,18 +148,18 @@
 
 	# Transforms
 	for samp in GRAY 420 422 444; do
-		$EXEDIR/jpegtran -flip horizontal -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg
-		$EXEDIR/jpegtran -flip vertical -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg
-		$EXEDIR/jpegtran -transpose -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg
-		$EXEDIR/jpegtran -transverse -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg
-		$EXEDIR/jpegtran -rotate 90 -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg
-		$EXEDIR/jpegtran -rotate 180 -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg
-		$EXEDIR/jpegtran -rotate 270 -trim $OUTDIR/${basename}_${samp}_Q95.jpg >$OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg
+		runme $EXEDIR/jpegtran -flip horizontal -trim -outfile $OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -flip vertical -trim -outfile $OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -transpose -trim -outfile $OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -transverse -trim -outfile $OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -rotate 90 -trim -outfile $OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -rotate 180 -trim -outfile $OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -rotate 270 -trim -outfile $OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
 	done
 	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
 		for samp in GRAY 444; do
-			$EXEDIR/djpeg -rgb -bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
-			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01
+			runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 $YUVARG
 			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
 				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
 				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
@@ -139,8 +167,8 @@
 			done
 		done
 		for samp in 420 422; do
-			$EXEDIR/djpeg -nosmooth -rgb -bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
-			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -fastupsample
+			runme $EXEDIR/djpeg -nosmooth -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample $YUVARG
 			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
 				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
 				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
@@ -152,7 +180,7 @@
 	# Grayscale transform
 	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
 		for samp in GRAY 444 422 420; do
-			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -grayscale
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 -grayscale $YUVARG
 			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
 				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
 				runme cmp -i 54:54 $i $OUTDIR/${basename}_GRAY_${xform}_jpegtran.bmp
@@ -166,8 +194,8 @@
 		for samp in GRAY 444 422 420; do
 			for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
 				scalearg=`echo $scale | sed s@_@/@g`
-				$EXEDIR/djpeg -rgb -scale ${scalearg} -bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp
-				runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -scale ${scalearg} -quiet -benchtime 0.01
+				runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg
+				runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG
 				runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp
 				rm $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp
 			done