Oops. dumpbuf() was displaying only red components.
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@428 632fc199-4ca6-4c93-a231-07263d6284db
diff --git a/BUILDING.txt b/BUILDING.txt
index 50e2fb7..0af9fdf 100644
--- a/BUILDING.txt
+++ b/BUILDING.txt
@@ -30,6 +30,13 @@
-- GCC v4.1 or later recommended for best performance
+-- If building the TurboJPEG/OSS JNI wrapper, jni.h is required. Some systems,
+ such as OS X 10.4 and Solaris 10, have this header pre-installed. On OS X
+ 10.5 and later, the header can be obtained by installing the Java Developer
+ Package, which can be downloaded from http://connect.apple.com. On Linux
+ and other systems, the header can be obtained by installing the GCJ
+ (GCC-Java) development packages or the Oracle Java Development Kit (JDK).
+
==================
Out-of-Tree Builds
@@ -117,6 +124,14 @@
disable encoding or decoding (respectively.)
+TurboJPEG/OSS JNI Wrapper
+-------------------------
+Add --with-jni to the configure command line to incorporate an optional Java
+Native Interface wrapper into the TurboJPEG/OSS dynamic library. This allows
+the dynamic library to be used directly from Java applications. See
+java/README for more details.
+
+
========================
Installing libjpeg-turbo
========================
@@ -295,6 +310,15 @@
-- NASM (http://www.nasm.us/) 0.98 or later (NASM 2.05 or later is required for
a 64-bit build)
+-- If building the TurboJPEG/OSS JNI wrapper, jni.h is required. This header
+ can be obtained by installing the Oracle Java Development Kit (JDK).
+ * If using Visual C++, then add the appropriate Java include directories
+ (Example: c:\Program Files\Java\jdk1.6.0_23\include;c:\Program Files\Java\jdk1.6.0_23\include\win32)
+ to the INCLUDE environment variable prior to building libjpeg-turbo.
+ * If using MinGW, then add the appropriate Java include directories
+ (Example: /c/Program Files/Java/jdk1.6.0_23/include:/c/Program Files/Java/jdk1.6.0_23/include/win32)
+ to the CPATH environment variable prior to building libjpeg-turbo.
+
==================
Out-of-Tree Builds
@@ -434,6 +458,14 @@
disable encoding or decoding (respectively.)
+TurboJPEG/OSS JNI Wrapper
+-------------------------
+Add "-DWITH_JNI=1" to the cmake command line to incorporate an optional Java
+Native Interface wrapper into the TurboJPEG/OSS dynamic library. This allows
+the dynamic library to be used directly from Java applications. See
+java/README for more details.
+
+
========================
Installing libjpeg-turbo
========================
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0c1d1b9..c7dc27f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@
cmake_minimum_required(VERSION 2.6)
project(libjpeg-turbo C)
-set(VERSION 1.1.0)
+set(VERSION 1.1.90)
if(MINGW OR CYGWIN)
execute_process(COMMAND "date" "+%Y%m%d" OUTPUT_VARIABLE BUILD)
@@ -58,6 +58,16 @@
message(STATUS "Arithmetic decoding support disabled")
endif()
+if(NOT DEFINED WITH_JNI)
+ set(WITH_JNI 0)
+endif()
+
+if(WITH_JNI)
+ message(STATUS "TurboJPEG/OSS JNI wrapper enabled")
+else()
+ message(STATUS "TurboJPEG/OSS JNI wrapper disabled")
+endif()
+
set(JPEG_LIB_VERSION 62)
set(DLL_VERSION ${JPEG_LIB_VERSION})
set(FULLVERSION ${DLL_VERSION}.0.0)
@@ -105,6 +115,14 @@
include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_SOURCE_DIR})
+if(WITH_JNI)
+ include(CheckIncludeFiles)
+ check_include_files(jni.h HAVE_JNI_H)
+ if(NOT HAVE_JNI_H)
+ message(FATAL_ERROR "Cannot find jni.h. Be sure to add the Java include directories to the INCLUDE environment variable (MSVC) or the CPATH environment variable (GCC).")
+ endif()
+endif()
+
#
# Targets
@@ -155,8 +173,16 @@
add_dependencies(jpeg-static simd)
endif()
-add_library(turbojpeg SHARED turbojpegl.c)
+set(TURBOJPEG_SOURCES turbojpegl.c)
+if(WITH_JNI)
+ set(TURBOJPEG_SOURCES ${TURBOJPEG_SOURCES} turbojpeg-jni.c)
+endif()
+
+add_library(turbojpeg SHARED ${TURBOJPEG_SOURCES})
set_target_properties(turbojpeg PROPERTIES DEFINE_SYMBOL DLLDEFINE)
+if(MINGW)
+ set_target_properties(turbojpeg PROPERTIES LINK_FLAGS -Wl,--kill-at)
+endif()
target_link_libraries(turbojpeg jpeg-static)
set_target_properties(turbojpeg PROPERTIES LINK_INTERFACE_LIBRARIES "")
diff --git a/ChangeLog.txt b/ChangeLog.txt
index 12d1c9e..23e73d8 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -1,3 +1,11 @@
+1.1.90 (1.2 beta1)
+==================
+
+[1] Added a JNI wrapper for TurboJPEG/OSS. See java/README for more details.
+
+[2] TurboJPEG/OSS can now scale down images during decompression.
+
+
1.1.0
=====
diff --git a/Makefile.am b/Makefile.am
index 1a8e532..312ec41 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -34,12 +34,25 @@
endif
-libturbojpeg_la_SOURCES = $(libjpeg_la_SOURCES) turbojpegl.c turbojpeg.h \
- turbojpeg-mapfile
+libturbojpeg_la_SOURCES = $(libjpeg_la_SOURCES) turbojpegl.c turbojpeg.h
+
+if WITH_JNI
+
+libturbojpeg_la_SOURCES += turbojpeg-jni.c
+libturbojpeg_la_CFLAGS = ${JAVA_CFLAGS}
+TJMAPFILE = turbojpeg-mapfile.jni
+
+else
+
+TJMAPFILE = turbojpeg-mapfile
+
+endif
+
+libturbojpeg_la_SOURCES += $(TJMAPFILE)
if ANON_VERSION_SCRIPT
-libturbojpeg_la_LDFLAGS += $(ANON_VERSION_SCRIPT_FLAG)$(srcdir)/turbojpeg-mapfile
+libturbojpeg_la_LDFLAGS += $(ANON_VERSION_SCRIPT_FLAG)$(srcdir)/$(TJMAPFILE)
endif
@@ -113,9 +126,9 @@
TESTFILES= testorig.jpg testorig.ppm testimg.bmp testimgflt.jpg \
testimgfst.jpg testimgint.jpg testimgp.jpg testimgflt.ppm testimgfst.ppm \
testimgint.ppm testimgflt-nosimd.jpg testimgcrop.jpg testimgari.jpg \
- testimgari.ppm testimgfst100.jpg
+ testimgari.ppm testimgfst100.jpg testimggray.jpg
-EXTRA_DIST = win release $(DOCS) $(TESTFILES) CMakeLists.txt \
+EXTRA_DIST = win release java $(DOCS) $(TESTFILES) CMakeLists.txt \
sharedlib/CMakeLists.txt cmakescripts libjpeg.map.in
dist-hook:
@@ -137,6 +150,8 @@
else
cmp $(srcdir)/testimgflt-nosimd.jpg testoutflt.jpg
endif
+ ./cjpeg -dct int -grayscale -outfile testoutgray.jpg $(srcdir)/testorig.ppm
+ cmp $(srcdir)/testimggray.jpg testoutgray.jpg
./djpeg -dct int -fast -ppm -outfile testoutint.ppm $(srcdir)/testorig.jpg
cmp $(srcdir)/testimgint.ppm testoutint.ppm
./djpeg -dct fast -ppm -outfile testoutfst.ppm $(srcdir)/testorig.jpg
diff --git a/configure.ac b/configure.ac
index c520b61..84fca81 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ([2.56])
-AC_INIT([libjpeg-turbo], [1.1.0])
+AC_INIT([libjpeg-turbo], [1.1.90])
BUILD=`date +%Y%m%d`
AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2])
@@ -223,6 +223,48 @@
AM_CONDITIONAL([WITH_ARITH], [test "x$with_arith_dec" != "xno" -o "x$with_arith_enc" != "xno"])
+AC_ARG_VAR(JAVA_CFLAGS, [Compiler flags needed to find jni.h (default: -I/System/Library/Frameworks/JavaVM.framework/Headers on OS X, '-I/usr/java/include -I/usr/java/include/solaris' on Solaris, and '-I/usr/java/default/include -I/usr/java/default/include/linux' on Linux)])
+
+AC_MSG_CHECKING([whether to include JNI wrapper in TurboJPEG/OSS])
+AC_ARG_WITH([jni],
+ AC_HELP_STRING([--with-jni],[Include JNI wrapper in the TurboJPEG/OSS library]))
+
+BUILDJNILIB=0
+RPM_CONFIG_ARGS=
+if test "x$with_jni" = "xyes"; then
+ AC_MSG_RESULT(yes)
+
+ case $host_os in
+ darwin*)
+ DEFAULT_JAVA_CFLAGS=-I/System/Library/Frameworks/JavaVM.framework/Headers
+ BUILDJNILIB=1
+ ;;
+ solaris*)
+ DEFAULT_JAVA_CFLAGS='-I/usr/java/include -I/usr/java/include/solaris'
+ ;;
+ linux*)
+ DEFAULT_JAVA_CFLAGS='-I/usr/java/default/include -I/usr/java/default/include/linux'
+ ;;
+ esac
+ if test "x$JAVA_CFLAGS" = "x"; then
+ JAVA_CFLAGS=$DEFAULT_JAVA_CFLAGS
+ fi
+
+ SAVE_CPPFLAGS=${CPPFLAGS}
+ CPPFLAGS="${CPPFLAGS} ${JAVA_CFLAGS}"
+ AC_CHECK_HEADERS([jni.h], [DUMMY=1],
+ [AC_MSG_ERROR([Could not find JNI header file])])
+ CPPFLAGS=${SAVE_CPPFLAGS}
+ AC_SUBST(JAVA_CFLAGS)
+
+ RPM_CONFIG_ARGS=--with-jni
+else
+ AC_MSG_RESULT(no)
+fi
+AM_CONDITIONAL([WITH_JNI], [test "x$with_jni" = "xyes"])
+AC_SUBST(BUILDJNILIB)
+AC_SUBST(RPM_CONFIG_ARGS)
+
# SIMD is optional
AC_ARG_WITH([simd],
AC_HELP_STRING([--without-simd],[Omit accelerated SIMD routines.]))
diff --git a/java/README b/java/README
new file mode 100644
index 0000000..87d3181
--- /dev/null
+++ b/java/README
@@ -0,0 +1,52 @@
+TurboJPEG/OSS JNI Wrapper
+=========================
+
+TurboJPEG/OSS can optionally be built with a Java Native Interface wrapper,
+which allows the TurboJPEG/OSS dynamic library to be loaded and used directly
+from Java applications. The Java front end for this is defined in several
+classes located under org/libjpegturbo/turbojpeg. The source code for these
+Java classes is licensed under a BSD-style license, so the files can be
+incorporated directly into both open source and proprietary projects without
+restriction.
+
+TJExample.java, which should also be located in the same directory as this
+README file, demonstrates how to use the TurboJPEG/OSS Java front end to
+compress and decompress JPEG images in memory.
+
+ javac TJExample.java
+
+builds .class files for both the front end and example code.
+
+
+Note for OS X users
+-------------------
+
+/usr/lib, the directory under which libturbojpeg.dylib is installed on Mac
+systems, is not part of the normal Java library path. Thus, when running a
+Java application that uses TurboJPEG/OSS on Mac systems, you will need to pass
+an argument of -Djava.library.path=/usr/lib to java.
+
+
+Note for Solaris users
+----------------------
+
+/opt/libjpeg-turbo/lib, the directory under which libturbojpeg.so is installed
+on Solaris systems, is not part of the normal Java library path. Thus, when
+running a Java application that uses TurboJPEG/OSS on Solaris systems, you will
+need to pass an argument of -Djava.library.path=/opt/libjpeg-turbo/lib to java.
+If using a 64-bit data model, then instead pass an argument of
+-Djava.library.path=/opt/libjpeg-turbo/lib/amd64 to use the 64-bit version of
+libturbojpeg.so.
+
+
+Note for MinGW users
+--------------------
+
+When libjpeg-turbo is built with MinGW, the TurboJPEG/OSS dynamic library is
+named libturbojpeg.dll instead of turbojpeg.dll. This is in keeping with the
+convention of MinGW, and it also avoids a filename conflict when the GCC and
+Visual C++ versions of the libjpeg-turbo SDK are installed on the same system.
+However, the TurboJPEG/OSS JNI wrapper will not work on Windows unless the DLL
+is named turbojpeg.dll. You can work around this by renaming the DLL or by
+simply changing the LoadLibrary() calls in TurboJPEG.java so that they load
+"libturbojpeg" instead of "turbojpeg".
diff --git a/java/TJExample.java b/java/TJExample.java
new file mode 100644
index 0000000..3362e81
--- /dev/null
+++ b/java/TJExample.java
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C)2011 D. R. Commander. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This program demonstrates how to compress and decompress JPEG files using
+ * the TurboJPEG JNI wrapper
+ */
+
+import java.io.*;
+import org.libjpegturbo.turbojpeg.*;
+
+public class TJExample {
+
+ public static final String classname=new TJExample().getClass().getName();
+
+ private static void usage() {
+ System.out.println("\nUSAGE: java "+classname+" <Input file> <Output file> [options]\n");
+ System.out.println("Options:\n");
+ System.out.println("-scale 1/N = scale the width/height of the output image by a factor of 1/N");
+ System.out.println(" (N = 1, 2, 4, or 8}\n");
+ System.exit(1);
+ }
+
+ public static void main(String argv[]) {
+
+ try {
+
+ if(argv.length<2) {
+ usage();
+ }
+
+ int scalefactor=1;
+ if(argv.length>2) {
+ for(int i=2; i<argv.length; i++) {
+ if(argv[i].equalsIgnoreCase("-scale") && i<argv.length-1) {
+ String [] scalearg=argv[++i].split("/");
+ if(scalearg.length!=2 || Integer.parseInt(scalearg[0])!=1
+ || (scalefactor=Integer.parseInt(scalearg[1]))<1
+ || scalefactor>8 || (scalefactor&(scalefactor-1))!=0)
+ usage();
+ }
+ }
+ }
+
+ File file=new File(argv[0]);
+ FileInputStream fis=new FileInputStream(file);
+ int inputsize=fis.available();
+ if(inputsize<1) {
+ System.out.println("Input file contains no data");
+ System.exit(1);
+ }
+ byte [] inputbuf=new byte[inputsize];
+ fis.read(inputbuf);
+ fis.close();
+
+ TJDecompressor tjd=new TJDecompressor(inputbuf);
+ int width=tjd.getWidth();
+ int height=tjd.getHeight();
+ int subsamp=tjd.getSubsamp();
+ System.out.print("Source Image: "+width+" x "+height+" pixels, ");
+ switch(subsamp) {
+ case TJ.SAMP444: System.out.println("4:4:4 subsampling"); break;
+ case TJ.SAMP422: System.out.println("4:2:2 subsampling"); break;
+ case TJ.SAMP420: System.out.println("4:2:0 subsampling"); break;
+ case TJ.GRAYSCALE: System.out.println("Grayscale"); break;
+ default: System.out.println("Unknown subsampling"); break;
+ }
+
+ if(scalefactor!=1) {
+ width=(width+scalefactor-1)/scalefactor;
+ height=(height+scalefactor-1)/scalefactor;
+ System.out.println("Dest. Image: "+width+" x "+height
+ +" pixels");
+ }
+
+ byte [] tmpbuf=tjd.decompress(width, 0, height, TJ.BGR, TJ.BOTTOMUP);
+ tjd.close();
+
+ TJCompressor tjc=new TJCompressor(tmpbuf, width, 0, height, TJ.BGR);
+ byte [] outputbuf=new byte[(int)TJ.bufSize(width, height)];
+ long outputsize=tjc.compress(outputbuf, subsamp, 95, TJ.BOTTOMUP);
+ tjc.close();
+
+ file=new File(argv[1]);
+ FileOutputStream fos=new FileOutputStream(file);
+ fos.write(outputbuf, 0, (int)outputsize);
+ fos.close();
+
+ } catch(Exception e) {
+ System.out.println(e);
+ }
+ }
+
+};
diff --git a/java/org/libjpegturbo/turbojpeg/TJ.java b/java/org/libjpegturbo/turbojpeg/TJ.java
new file mode 100644
index 0000000..6bbbd72
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJ.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C)2011 D. R. Commander. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+final public class TJ {
+
+ // Subsampling options
+ final public static int
+ NUMSUBOPT = 4,
+ SAMP444 = 0,
+ SAMP422 = 1,
+ SAMP420 = 2,
+ GRAYSCALE = 3;
+
+ // Pixel formats
+ final public static int
+ NUMPIXFORMATS = 7,
+ RGB = 0,
+ BGR = 1,
+ RGBX = 2,
+ BGRX = 3,
+ XBGR = 4,
+ XRGB = 5,
+ YUV = 6;
+
+ final public static int pixelSize[] = {
+ 3, 3, 4, 4, 4, 4, 3
+ };
+
+ public static int getPixelSize(int pixelFormat) throws Exception {
+ if(pixelFormat < 0 || pixelFormat >= NUMPIXFORMATS)
+ throw new Exception("Invalid pixel format");
+ return pixelSize[pixelFormat];
+ }
+
+ // Flags
+ final public static int
+ BOTTOMUP = 2,
+ FORCEMMX = 8,
+ FORCESSE = 16,
+ FORCESSE2 = 32,
+ FORCESSE3 = 128,
+ FASTUPSAMPLE = 256;
+
+ final private static int
+ TJ_BGR = 1,
+ TJ_ALPHAFIRST = 64,
+ TJ_YUV = 512;
+
+ final private static int flags[] = {
+ 0, TJ_BGR, 0, TJ_BGR, TJ_BGR|TJ_ALPHAFIRST, TJ_ALPHAFIRST, TJ_YUV
+ };
+
+ public static int getFlags(int pixelFormat) throws Exception {
+ if(pixelFormat < 0 || pixelFormat >= NUMPIXFORMATS)
+ throw new Exception("Invalid pixel format");
+ return flags[pixelFormat];
+ }
+
+ public native final static long bufSize(int width, int height)
+ throws Exception;
+
+ public native final static long bufSizeYUV(int width, int height,
+ int subsamp)
+ throws Exception;
+};
diff --git a/java/org/libjpegturbo/turbojpeg/TJCompressor.java b/java/org/libjpegturbo/turbojpeg/TJCompressor.java
new file mode 100644
index 0000000..38de21f
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJCompressor.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C)2011 D. R. Commander. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+public class TJCompressor {
+
+ public TJCompressor() throws Exception {
+ init();
+ }
+
+ public TJCompressor(byte [] buf, int width, int pitch, int height,
+ int pixelFormat) throws Exception {
+ setBitmapBuffer(buf, width, pitch, height, pixelFormat);
+ }
+
+ public void setBitmapBuffer(byte [] buf, int width, int pitch, int height,
+ int pixelFormat) throws Exception {
+ if(handle == 0) init();
+ if(buf == null || width < 1 || height < 1 || pitch < 0 || pixelFormat < 0
+ || pixelFormat >= TJ.NUMPIXFORMATS)
+ throw new Exception("Invalid argument in setBitmapBuffer()");
+ bitmapBuf = buf;
+ bitmapWidth = width;
+ if(pitch == 0) bitmapPitch = width * TJ.getPixelSize(pixelFormat);
+ else bitmapPitch = pitch;
+ bitmapHeight = height;
+ bitmapPixelFormat = pixelFormat;
+ }
+
+ public long compress(byte [] dstBuf, int jpegSubsamp, int jpegQual,
+ int flags) throws Exception {
+ return compress(bitmapBuf, bitmapWidth, bitmapPitch, bitmapHeight,
+ TJ.getPixelSize(bitmapPixelFormat), dstBuf, jpegSubsamp, jpegQual,
+ flags | TJ.getFlags(bitmapPixelFormat));
+ }
+
+ public void close() throws Exception {
+ destroy();
+ }
+
+ protected void finalize() throws Throwable {
+ try {
+ close();
+ } catch(Exception e) {
+ }
+ finally {
+ super.finalize();
+ }
+ };
+
+ private native void init() throws Exception;
+
+ private native void destroy() throws Exception;
+
+ // JPEG size in bytes is returned
+ private native long compress(byte [] srcBuf, int width, int pitch,
+ int height, int pixelSize, byte [] dstbuf, int jpegSubsamp, int jpegQual,
+ int flags) throws Exception;
+
+ static {
+ System.loadLibrary("turbojpeg");
+ }
+
+ private long handle = 0;
+ private byte [] bitmapBuf = null;
+ private int bitmapWidth = 0;
+ private int bitmapHeight = 0;
+ private int bitmapPitch = 0;
+ private int bitmapPixelFormat = -1;
+};
diff --git a/java/org/libjpegturbo/turbojpeg/TJDecompressor.java b/java/org/libjpegturbo/turbojpeg/TJDecompressor.java
new file mode 100644
index 0000000..446eb3c
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJDecompressor.java
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C)2011 D. R. Commander. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+public class TJDecompressor {
+
+ public TJDecompressor() throws Exception {
+ init();
+ }
+
+ public TJDecompressor(byte [] buf) throws Exception {
+ setJPEGBuffer(buf);
+ }
+
+ public void setJPEGBuffer(byte [] buf) throws Exception {
+ if(handle == 0) init();
+ if(buf == null) throw new Exception("Invalid argument in setJPEGBuffer()");
+ jpegBuf = buf;
+ decompressHeader();
+ }
+
+ public int getWidth() throws Exception {
+ if(header.width < 1) throw new Exception("JPEG buffer not initialized");
+ return header.width;
+ }
+
+ public int getHeight() throws Exception {
+ if(header.height < 1) throw new Exception("JPEG buffer not initialized");
+ return header.height;
+ }
+
+ public int getSubsamp() throws Exception {
+ if(header.subsamp < 0) throw new Exception("JPEG buffer not initialized");
+ return header.subsamp;
+ }
+
+ public int getScaledWidth(int desired_width, int desired_height)
+ throws Exception {
+ if(header.width < 1 || header.height < 1)
+ throw new Exception("JPEG buffer not initialized");
+ return getScaledWidth(header.width, header.height, desired_width,
+ desired_height);
+ }
+
+ public int getScaledHeight(int output_width, int output_height)
+ throws Exception {
+ if(header.width < 1 || header.height < 1)
+ throw new Exception("JPEG buffer not initialized");
+ return getScaledHeight(header.width, header.height, output_width,
+ output_height);
+ }
+
+ public void decompress(byte [] dstBuf, int width, int pitch,
+ int height, int pixelFormat, int flags) throws Exception {
+ if(jpegBuf == null) throw new Exception("JPEG buffer not initialized");
+ decompress(jpegBuf, jpegBuf.length, dstBuf, width, pitch, height,
+ TJ.getPixelSize(pixelFormat), flags | TJ.getFlags(pixelFormat));
+ }
+
+ public byte [] decompress(int width, int pitch, int height,
+ int pixelFormat, int flags) throws Exception {
+ if(width < 0 || height < 0 || pitch < 0 || pixelFormat < 0
+ || pixelFormat >= TJ.NUMPIXFORMATS)
+ throw new Exception("Invalid argument in decompress()");
+ int pixelSize = TJ.getPixelSize(pixelFormat);
+ int scaledWidth = getScaledWidth(width, height);
+ int scaledHeight = getScaledHeight(width, height);
+ if(pitch == 0) pitch = scaledWidth * pixelSize;
+ long bufSize;
+ if(pixelFormat == TJ.YUV)
+ bufSize = TJ.bufSizeYUV(width, height, header.subsamp);
+ else bufSize = pitch * scaledHeight;
+ byte [] buf = new byte[(int)bufSize];
+ if(jpegBuf == null) throw new Exception("JPEG buffer not initialized");
+ decompress(jpegBuf, jpegBuf.length, buf, width, pitch, height,
+ TJ.getPixelSize(pixelFormat), flags | TJ.getFlags(pixelFormat));
+ return buf;
+ }
+
+ public void close() throws Exception {
+ destroy();
+ }
+
+ protected void finalize() throws Throwable {
+ try {
+ close();
+ } catch(Exception e) {
+ }
+ finally {
+ super.finalize();
+ }
+ };
+
+ private native void init() throws Exception;
+
+ private native void destroy() throws Exception;
+
+ private native TJHeaderInfo decompressHeader(byte [] srcBuf, long size)
+ throws Exception;
+
+ private void decompressHeader() throws Exception {
+ header = decompressHeader(jpegBuf, jpegBuf.length);
+ }
+
+ private native void decompress(byte [] srcBuf, long size, byte [] dstBuf,
+ int width, int pitch, int height, int pixelSize, int flags)
+ throws Exception;
+
+ private native int getScaledWidth(int input_width, int input_height,
+ int output_width, int output_height) throws Exception;
+
+ private native int getScaledHeight(int input_width, int input_height,
+ int output_width, int output_height) throws Exception;
+
+ static {
+ System.loadLibrary("turbojpeg");
+ }
+
+ private long handle = 0;
+ private byte [] jpegBuf = null;
+ TJHeaderInfo header = null;
+};
diff --git a/java/org/libjpegturbo/turbojpeg/TJHeaderInfo.java b/java/org/libjpegturbo/turbojpeg/TJHeaderInfo.java
new file mode 100644
index 0000000..e4ee59f
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJHeaderInfo.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C)2011 D. R. Commander. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+public class TJHeaderInfo {
+ public int subsamp = -1;
+ public int width = -1;
+ public int height = -1;
+};
diff --git a/java/org_libjpegturbo_turbojpeg_TJ.h b/java/org_libjpegturbo_turbojpeg_TJ.h
new file mode 100644
index 0000000..7009251
--- /dev/null
+++ b/java/org_libjpegturbo_turbojpeg_TJ.h
@@ -0,0 +1,73 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_libjpegturbo_turbojpeg_TJ */
+
+#ifndef _Included_org_libjpegturbo_turbojpeg_TJ
+#define _Included_org_libjpegturbo_turbojpeg_TJ
+#ifdef __cplusplus
+extern "C" {
+#endif
+#undef org_libjpegturbo_turbojpeg_TJ_NUMSUBOPT
+#define org_libjpegturbo_turbojpeg_TJ_NUMSUBOPT 4L
+#undef org_libjpegturbo_turbojpeg_TJ_SAMP444
+#define org_libjpegturbo_turbojpeg_TJ_SAMP444 0L
+#undef org_libjpegturbo_turbojpeg_TJ_SAMP422
+#define org_libjpegturbo_turbojpeg_TJ_SAMP422 1L
+#undef org_libjpegturbo_turbojpeg_TJ_SAMP420
+#define org_libjpegturbo_turbojpeg_TJ_SAMP420 2L
+#undef org_libjpegturbo_turbojpeg_TJ_GRAYSCALE
+#define org_libjpegturbo_turbojpeg_TJ_GRAYSCALE 3L
+#undef org_libjpegturbo_turbojpeg_TJ_NUMPIXFORMATS
+#define org_libjpegturbo_turbojpeg_TJ_NUMPIXFORMATS 7L
+#undef org_libjpegturbo_turbojpeg_TJ_RGB
+#define org_libjpegturbo_turbojpeg_TJ_RGB 0L
+#undef org_libjpegturbo_turbojpeg_TJ_BGR
+#define org_libjpegturbo_turbojpeg_TJ_BGR 1L
+#undef org_libjpegturbo_turbojpeg_TJ_RGBX
+#define org_libjpegturbo_turbojpeg_TJ_RGBX 2L
+#undef org_libjpegturbo_turbojpeg_TJ_BGRX
+#define org_libjpegturbo_turbojpeg_TJ_BGRX 3L
+#undef org_libjpegturbo_turbojpeg_TJ_XBGR
+#define org_libjpegturbo_turbojpeg_TJ_XBGR 4L
+#undef org_libjpegturbo_turbojpeg_TJ_XRGB
+#define org_libjpegturbo_turbojpeg_TJ_XRGB 5L
+#undef org_libjpegturbo_turbojpeg_TJ_YUV
+#define org_libjpegturbo_turbojpeg_TJ_YUV 6L
+#undef org_libjpegturbo_turbojpeg_TJ_BOTTOMUP
+#define org_libjpegturbo_turbojpeg_TJ_BOTTOMUP 2L
+#undef org_libjpegturbo_turbojpeg_TJ_FORCEMMX
+#define org_libjpegturbo_turbojpeg_TJ_FORCEMMX 8L
+#undef org_libjpegturbo_turbojpeg_TJ_FORCESSE
+#define org_libjpegturbo_turbojpeg_TJ_FORCESSE 16L
+#undef org_libjpegturbo_turbojpeg_TJ_FORCESSE2
+#define org_libjpegturbo_turbojpeg_TJ_FORCESSE2 32L
+#undef org_libjpegturbo_turbojpeg_TJ_FORCESSE3
+#define org_libjpegturbo_turbojpeg_TJ_FORCESSE3 128L
+#undef org_libjpegturbo_turbojpeg_TJ_FASTUPSAMPLE
+#define org_libjpegturbo_turbojpeg_TJ_FASTUPSAMPLE 256L
+#undef org_libjpegturbo_turbojpeg_TJ_TJ_BGR
+#define org_libjpegturbo_turbojpeg_TJ_TJ_BGR 1L
+#undef org_libjpegturbo_turbojpeg_TJ_TJ_ALPHAFIRST
+#define org_libjpegturbo_turbojpeg_TJ_TJ_ALPHAFIRST 64L
+#undef org_libjpegturbo_turbojpeg_TJ_TJ_YUV
+#define org_libjpegturbo_turbojpeg_TJ_TJ_YUV 512L
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJ
+ * Method: bufSize
+ * Signature: (II)J
+ */
+JNIEXPORT jlong JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSize
+ (JNIEnv *, jclass, jint, jint);
+
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJ
+ * Method: bufSizeYUV
+ * Signature: (III)J
+ */
+JNIEXPORT jlong JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV
+ (JNIEnv *, jclass, jint, jint, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/java/org_libjpegturbo_turbojpeg_TJCompressor.h b/java/org_libjpegturbo_turbojpeg_TJCompressor.h
new file mode 100644
index 0000000..090929c
--- /dev/null
+++ b/java/org_libjpegturbo_turbojpeg_TJCompressor.h
@@ -0,0 +1,37 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_libjpegturbo_turbojpeg_TJCompressor */
+
+#ifndef _Included_org_libjpegturbo_turbojpeg_TJCompressor
+#define _Included_org_libjpegturbo_turbojpeg_TJCompressor
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJCompressor
+ * Method: init
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_init
+ (JNIEnv *, jobject);
+
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJCompressor
+ * Method: destroy
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy
+ (JNIEnv *, jobject);
+
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJCompressor
+ * Method: compress
+ * Signature: ([BIIII[BIII)J
+ */
+JNIEXPORT jlong JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress
+ (JNIEnv *, jobject, jbyteArray, jint, jint, jint, jint, jbyteArray, jint, jint, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/java/org_libjpegturbo_turbojpeg_TJDecompressor.h b/java/org_libjpegturbo_turbojpeg_TJDecompressor.h
new file mode 100644
index 0000000..3277bcf
--- /dev/null
+++ b/java/org_libjpegturbo_turbojpeg_TJDecompressor.h
@@ -0,0 +1,61 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_libjpegturbo_turbojpeg_TJDecompressor */
+
+#ifndef _Included_org_libjpegturbo_turbojpeg_TJDecompressor
+#define _Included_org_libjpegturbo_turbojpeg_TJDecompressor
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method: init
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_init
+ (JNIEnv *, jobject);
+
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method: destroy
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy
+ (JNIEnv *, jobject);
+
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method: decompressHeader
+ * Signature: ([BJ)Lorg/libjpegturbo/turbojpeg/TJHeaderInfo;
+ */
+JNIEXPORT jobject JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader
+ (JNIEnv *, jobject, jbyteArray, jlong);
+
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method: decompress
+ * Signature: ([BJ[BIIIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress
+ (JNIEnv *, jobject, jbyteArray, jlong, jbyteArray, jint, jint, jint, jint, jint);
+
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method: getScaledWidth
+ * Signature: (IIII)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_getScaledWidth
+ (JNIEnv *, jobject, jint, jint, jint, jint);
+
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method: getScaledHeight
+ * Signature: (IIII)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_getScaledHeight
+ (JNIEnv *, jobject, jint, jint, jint, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/jccolor.c b/jccolor.c
index 5c186d4..5e1c180 100644
--- a/jccolor.c
+++ b/jccolor.c
@@ -81,74 +81,6 @@
#define TABLE_SIZE (8*(MAXJSAMPLE+1))
-#if BITS_IN_JSAMPLE == 8
-
-static const unsigned char red_lut[256] = {
- 0 , 0 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 3 , 3 , 3 , 4 , 4 , 4 , 4 ,
- 5 , 5 , 5 , 6 , 6 , 6 , 7 , 7 , 7 , 7 , 8 , 8 , 8 , 9 , 9 , 9 ,
- 10, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14,
- 14, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19,
- 19, 19, 20, 20, 20, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 24,
- 24, 24, 25, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 28, 28, 28,
- 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33,
- 33, 34, 34, 34, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 38, 38,
- 38, 39, 39, 39, 39, 40, 40, 40, 41, 41, 41, 42, 42, 42, 42, 43,
- 43, 43, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 47, 47, 47, 48,
- 48, 48, 48, 49, 49, 49, 50, 50, 50, 51, 51, 51, 51, 52, 52, 52,
- 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57,
- 57, 58, 58, 58, 59, 59, 59, 60, 60, 60, 60, 61, 61, 61, 62, 62,
- 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 67,
- 67, 67, 68, 68, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71,
- 72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 76, 76, 76
-};
-
-static const unsigned char green_lut[256] = {
- 0 , 1 , 1 , 2 , 2 , 3 , 4 , 4 , 5 , 5 , 6 , 6 ,
- 7 , 8 , 8 , 9 , 9 , 10 , 11 , 11 , 12 , 12 , 13 , 14 ,
- 14 , 15 , 15 , 16 , 16 , 17 , 18 , 18 , 19 , 19 , 20 , 21 ,
- 21 , 22 , 22 , 23 , 23 , 24 , 25 , 25 , 26 , 26 , 27 , 28 ,
- 28 , 29 , 29 , 30 , 31 , 31 , 32 , 32 , 33 , 33 , 34 , 35 ,
- 35 , 36 , 36 , 37 , 38 , 38 , 39 , 39 , 40 , 41 , 41 , 42 ,
- 42 , 43 , 43 , 44 , 45 , 45 , 46 , 46 , 47 , 48 , 48 , 49 ,
- 49 , 50 , 50 , 51 , 52 , 52 , 53 , 53 , 54 , 55 , 55 , 56 ,
- 56 , 57 , 58 , 58 , 59 , 59 , 60 , 60 , 61 , 62 , 62 , 63 ,
- 63 , 64 , 65 , 65 , 66 , 66 , 67 , 68 , 68 , 69 , 69 , 70 ,
- 70 , 71 , 72 , 72 , 73 , 73 , 74 , 75 , 75 , 76 , 76 , 77 ,
- 77 , 78 , 79 , 79 , 80 , 80 , 81 , 82 , 82 , 83 , 83 , 84 ,
- 85 , 85 , 86 , 86 , 87 , 87 , 88 , 89 , 89 , 90 , 90 , 91 ,
- 92 , 92 , 93 , 93 , 94 , 95 , 95 , 96 , 96 , 97 , 97 , 98 ,
- 99 , 99 , 100, 100, 101, 102, 102, 103, 103, 104, 104, 105,
- 106, 106, 107, 107, 108, 109, 109, 110, 110, 111, 112, 112,
- 113, 113, 114, 114, 115, 116, 116, 117, 117, 118, 119, 119,
- 120, 120, 121, 122, 122, 123, 123, 124, 124, 125, 126, 126,
- 127, 127, 128, 129, 129, 130, 130, 131, 131, 132, 133, 133,
- 134, 134, 135, 136, 136, 137, 137, 138, 139, 139, 140, 140,
- 141, 141, 142, 143, 143, 144, 144, 145, 146, 146, 147, 147,
- 148, 149, 149, 150
-};
-
-static const unsigned char blue_lut[256] = {
- 0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 2 , 2 ,
- 2 , 2 , 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 4 ,
- 4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 ,
- 5 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 7 , 7 , 7 , 7 , 7 , 7 ,
- 7 , 7 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 9 , 9 , 9 , 9 , 9 ,
- 9 , 9 , 9 , 9 , 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
- 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13,
- 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14,
- 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18,
- 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
- 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22,
- 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24,
- 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27,
- 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29
-};
-
-#endif
-
-
/*
* Initialize for RGB->YCC colorspace conversion.
*/
@@ -259,36 +191,26 @@
JDIMENSION output_row, int num_rows)
{
my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
- #if BITS_IN_JSAMPLE != 8
+ register int r, g, b;
register INT32 * ctab = cconvert->rgb_ycc_tab;
- #endif
register JSAMPROW inptr;
register JSAMPROW outptr;
- JSAMPLE *maxoutptr;
register JDIMENSION col;
JDIMENSION num_cols = cinfo->image_width;
- int rindex = rgb_red[cinfo->in_color_space];
- int gindex = rgb_green[cinfo->in_color_space];
- int bindex = rgb_blue[cinfo->in_color_space];
- int rgbstride = rgb_pixelsize[cinfo->in_color_space];
while (--num_rows >= 0) {
inptr = *input_buf++;
outptr = output_buf[0][output_row];
- maxoutptr = &outptr[num_cols];
output_row++;
- for (; outptr < maxoutptr; outptr++, inptr += rgbstride) {
+ for (col = 0; col < num_cols; col++) {
+ r = GETJSAMPLE(inptr[rgb_red[cinfo->in_color_space]]);
+ g = GETJSAMPLE(inptr[rgb_green[cinfo->in_color_space]]);
+ b = GETJSAMPLE(inptr[rgb_blue[cinfo->in_color_space]]);
+ inptr += rgb_pixelsize[cinfo->in_color_space];
/* Y */
- #if BITS_IN_JSAMPLE == 8
- *outptr = red_lut[inptr[rindex]] + green_lut[inptr[gindex]]
- + blue_lut[inptr[bindex]];
- #else
- *outptr = (JSAMPLE)
- ((ctab[GETJSAMPLE(inptr[rindex])+R_Y_OFF]
- + ctab[GETJSAMPLE(inptr[gindex])+G_Y_OFF]
- + ctab[GETJSAMPLE(inptr[bindex])+B_Y_OFF])
- >> SCALEBITS);
- #endif
+ outptr[col] = (JSAMPLE)
+ ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+ >> SCALEBITS);
}
}
}
@@ -490,8 +412,12 @@
cinfo->in_color_space == JCS_EXT_BGRX ||
cinfo->in_color_space == JCS_EXT_XBGR ||
cinfo->in_color_space == JCS_EXT_XRGB) {
- cconvert->pub.start_pass = rgb_ycc_start;
- cconvert->pub.color_convert = rgb_gray_convert;
+ if (jsimd_can_rgb_gray())
+ cconvert->pub.color_convert = jsimd_rgb_gray_convert;
+ else {
+ cconvert->pub.start_pass = rgb_ycc_start;
+ cconvert->pub.color_convert = rgb_gray_convert;
+ }
} else if (cinfo->in_color_space == JCS_YCbCr)
cconvert->pub.color_convert = grayscale_convert;
else
diff --git a/jpegut.c b/jpegut.c
index 5a864ea..9c41e50 100644
--- a/jpegut.c
+++ b/jpegut.c
@@ -103,8 +103,10 @@
}
}
-void dumpbuf(unsigned char *buf, int w, int h, int ps, int flags)
+void dumpbuf(unsigned char *buf, int w, int h, int ps, int scalefactor,
+ int flags)
{
+ printf("\n");
int roffset=(flags&TJ_BGR)?2:0, goffset=1, boffset=(flags&TJ_BGR)?0:2, i,
j;
for(i=0; i<h; i++)
@@ -118,15 +120,17 @@
}
}
-int checkbuf(unsigned char *buf, int w, int h, int ps, int subsamp, int flags)
+int checkbuf(unsigned char *buf, int w, int h, int ps, int subsamp,
+ int scalefactor, int flags)
{
int roffset=(flags&TJ_BGR)?2:0, goffset=1, boffset=(flags&TJ_BGR)?0:2, i,
_i, j;
if(flags&TJ_ALPHAFIRST) {roffset++; goffset++; boffset++;}
if(ps==1) roffset=goffset=boffset=0;
+ int halfway=16/scalefactor, blocksize=8/scalefactor;
if(subsamp==TJ_GRAYSCALE)
{
- for(_i=0; _i<16; _i++)
+ for(_i=0; _i<halfway; _i++)
{
if(flags&TJ_BOTTOMUP) i=h-_i-1; else i=_i;
for(j=0; j<w; j++)
@@ -134,7 +138,7 @@
unsigned char r=buf[(w*i+j)*ps+roffset],
g=buf[(w*i+j)*ps+goffset],
b=buf[(w*i+j)*ps+boffset];
- if(((_i/8)+(j/8))%2==0)
+ if(((_i/blocksize)+(j/blocksize))%2==0)
{
if(r<253 || g<253 || b<253) return 0;
}
@@ -144,7 +148,7 @@
}
}
}
- for(_i=16; _i<h; _i++)
+ for(_i=halfway; _i<h; _i++)
{
if(flags&TJ_BOTTOMUP) i=h-_i-1; else i=_i;
for(j=0; j<w; j++)
@@ -152,7 +156,7 @@
unsigned char r=buf[(w*i+j)*ps+roffset],
g=buf[(w*i+j)*ps+goffset],
b=buf[(w*i+j)*ps+boffset];
- if(((_i/8)+(j/8))%2==0)
+ if(((_i/blocksize)+(j/blocksize))%2==0)
{
if(r>2 || g>2 || b>2) return 0;
}
@@ -165,13 +169,13 @@
}
else
{
- for(_i=0; _i<16; _i++)
+ for(_i=0; _i<halfway; _i++)
{
if(flags&TJ_BOTTOMUP) i=h-_i-1; else i=_i;
for(j=0; j<w; j++)
{
if(buf[(w*i+j)*ps+roffset]<253) return 0;
- if(((_i/8)+(j/8))%2==0)
+ if(((_i/blocksize)+(j/blocksize))%2==0)
{
if(buf[(w*i+j)*ps+goffset]<253) return 0;
if(buf[(w*i+j)*ps+boffset]<253) return 0;
@@ -183,13 +187,13 @@
}
}
}
- for(_i=16; _i<h; _i++)
+ for(_i=halfway; _i<h; _i++)
{
if(flags&TJ_BOTTOMUP) i=h-_i-1; else i=_i;
for(j=0; j<w; j++)
{
if(buf[(w*i+j)*ps+boffset]>2) return 0;
- if(((_i/8)+(j/8))%2==0)
+ if(((_i/blocksize)+(j/blocksize))%2==0)
{
if(buf[(w*i+j)*ps+roffset]>2) return 0;
if(buf[(w*i+j)*ps+goffset]>2) return 0;
@@ -377,7 +381,8 @@
memset(jpegbuf, 0, TJBUFSIZE(w, h));
t=rrtime();
- _catch(tjCompress(hnd, bmpbuf, w, 0, h, ps, jpegbuf, size, subsamp, qual, flags));
+ _catch(tjCompress(hnd, bmpbuf, w, 0, h, ps, jpegbuf, size, subsamp, qual,
+ flags));
t=rrtime()-t;
if(yuv==YUVENCODE)
@@ -399,16 +404,15 @@
if(bmpbuf) free(bmpbuf);
}
-void gentestbmp(tjhandle hnd, unsigned char *jpegbuf, unsigned long jpegsize,
- int w, int h, int ps, char *basefilename, int subsamp, int flags)
+void _gentestbmp(tjhandle hnd, unsigned char *jpegbuf, unsigned long jpegsize,
+ int w, int h, int ps, char *basefilename, int subsamp, int flags,
+ int scalefactor)
{
unsigned char *bmpbuf=NULL;
- const char *pixformat; int _w=0, _h=0; double t;
+ const char *pixformat; int _hdrw=0, _hdrh=0, _hdrsubsamp=-1; double t;
+ int scaledw=(w+scalefactor-1)/scalefactor, scaledh=(h+scalefactor-1)/scalefactor;
+ int temp1, temp2;
unsigned long size=0;
- int hsf=_hsf[subsamp], vsf=_vsf[subsamp];
- int pw=PAD(w, hsf), ph=PAD(h, vsf);
- int cw=pw/hsf, ch=ph/vsf;
- int ypitch=PAD(pw, 4), uvpitch=PAD(cw, 4);
if(yuv==YUVDECODE) flags|=TJ_YUV;
else if(yuv==YUVENCODE) return;
@@ -427,19 +431,31 @@
if(yuv==YUVDECODE)
printf("JPEG -> YUV %s ... ", _subnames[subsamp]);
else
- printf("JPEG -> %s %s ... ", pixformat,
+ {
+ printf("JPEG -> %s %s ", pixformat,
(flags&TJ_BOTTOMUP)?"Bottom-Up":"Top-Down ");
+ if(scalefactor) printf("1/%d ... ", scalefactor);
+ else printf("... ");
+ }
- _catch(tjDecompressHeader(hnd, jpegbuf, jpegsize, &_w, &_h));
- if(_w!=w || _h!=h)
+ _catch(tjDecompressHeader2(hnd, jpegbuf, jpegsize, &_hdrw, &_hdrh,
+ &_hdrsubsamp));
+ if(_hdrw!=w || _hdrh!=h || _hdrsubsamp!=subsamp)
{
printf("Incorrect JPEG header\n"); bailout();
}
+ temp1=scaledw; temp2=scaledh;
+ _catch(tjScaledSize(w, h, &temp1, &temp2));
+ if(temp1!=scaledw || temp2!=scaledh)
+ {
+ printf("Scaled size mismatch\n"); bailout();
+ }
+
if(yuv==YUVDECODE)
- size=ypitch*ph + (subsamp==TJ_GRAYSCALE? 0:uvpitch*ch*2);
+ size=TJBUFSIZEYUV(w, h, subsamp);
else
- size=w*h*ps;
+ size=scaledw*scaledh*ps;
if((bmpbuf=(unsigned char *)malloc(size+1))==NULL)
{
printf("ERROR: Could not allocate buffer\n"); bailout();
@@ -447,30 +463,48 @@
memset(bmpbuf, 0, size+1);
t=rrtime();
- _catch(tjDecompress(hnd, jpegbuf, jpegsize, bmpbuf, w, w*ps, h, ps, flags));
+ _catch(tjDecompress(hnd, jpegbuf, jpegsize, bmpbuf, scaledw, 0, scaledh, ps,
+ flags));
t=rrtime()-t;
if(yuv==YUVDECODE)
{
- if(checkbufyuv(bmpbuf, size, pw, ph, subsamp))
+ if(checkbufyuv(bmpbuf, size, w, h, subsamp))
printf("Passed.");
else {printf("FAILED!"); exitstatus=-1;}
}
else
{
- if(checkbuf(bmpbuf, w, h, ps, subsamp, flags)) printf("Passed.");
+ if(checkbuf(bmpbuf, scaledw, scaledh, ps, subsamp, scalefactor, flags))
+ printf("Passed.");
else
{
printf("FAILED!"); exitstatus=-1;
- dumpbuf(bmpbuf, w, h, ps, flags);
+ dumpbuf(bmpbuf, scaledw, scaledh, ps, scalefactor, flags);
}
}
- printf(" %f ms\n\n", t*1000.);
+ printf(" %f ms\n", t*1000.);
finally:
if(bmpbuf) free(bmpbuf);
}
+void gentestbmp(tjhandle hnd, unsigned char *jpegbuf, unsigned long jpegsize,
+ int w, int h, int ps, char *basefilename, int subsamp, int flags)
+{
+ int i;
+ if((subsamp==TJ_444 || subsamp==TJ_GRAYSCALE) && !yuv)
+ {
+ for(i=1; i<=8; i*=2)
+ _gentestbmp(hnd, jpegbuf, jpegsize, w, h, ps, basefilename, subsamp,
+ flags, i);
+ }
+ else
+ _gentestbmp(hnd, jpegbuf, jpegsize, w, h, ps, basefilename, subsamp,
+ flags, 1);
+ printf("\n");
+}
+
void dotest(int w, int h, int ps, int subsamp, char *basefilename)
{
tjhandle hnd=NULL, dhnd=NULL; unsigned char *jpegbuf=NULL;
@@ -548,7 +582,7 @@
bmpbuf[i2*4+1]=pixels[i2%9][1];
bmpbuf[i2*2+2]=pixels[i2%9][0];
}
- _catch(tjCompress(hnd, bmpbuf, i, i*4, j, 4,
+ _catch(tjCompress(hnd, bmpbuf, i, 0, j, 4,
jpgbuf, &size, TJ_444, 100, TJ_BGR));
free(bmpbuf); bmpbuf=NULL; free(jpgbuf); jpgbuf=NULL;
@@ -562,7 +596,7 @@
if(i2%2==0) bmpbuf[i2]=0xFF;
else bmpbuf[i2]=0;
}
- _catch(tjCompress(hnd, bmpbuf, j, j*4, i, 4,
+ _catch(tjCompress(hnd, bmpbuf, j, 0, i, 4,
jpgbuf, &size, TJ_444, 100, TJ_BGR));
free(bmpbuf); bmpbuf=NULL; free(jpgbuf); jpgbuf=NULL;
}
diff --git a/jpgtest.c b/jpgtest.c
index 8451642..773a32e 100644
--- a/jpgtest.c
+++ b/jpgtest.c
@@ -33,7 +33,8 @@
enum {YUVENCODE=1, YUVDECODE};
int forcemmx=0, forcesse=0, forcesse2=0, forcesse3=0, fastupsample=0,
- decomponly=0, yuv=0;
+ decomponly=0, yuv=0, quiet=0, dotile=0, pf=BMP_BGR, bu=0, useppm=0,
+ scalefactor=1;
const int _ps[BMPPIXELFORMATS]={3, 4, 3, 4, 4, 4};
const int _flags[BMPPIXELFORMATS]={0, 0, TJ_BGR, TJ_BGR,
TJ_BGR|TJ_ALPHAFIRST, TJ_ALPHAFIRST};
@@ -64,8 +65,150 @@
printf(format, val);
}
-void dotest(unsigned char *srcbuf, int w, int h, int pf, int bu,
- int jpegsub, int qual, char *filename, int dotile, int useppm, int quiet)
+// Decompression test
+int decomptest(unsigned char *srcbuf, unsigned char **jpegbuf,
+ unsigned long *comptilesize, unsigned char *rgbbuf, int w, int h,
+ int jpegsub, int qual, char *filename, int tilesizex, int tilesizey)
+{
+ char tempstr[1024], qualstr[5]="\0";
+ FILE *outfile=NULL; tjhandle hnd=NULL;
+ int flags=(forcemmx?TJ_FORCEMMX:0)|(forcesse?TJ_FORCESSE:0)
+ |(forcesse2?TJ_FORCESSE2:0)|(forcesse3?TJ_FORCESSE3:0)
+ |(fastupsample?TJ_FASTUPSAMPLE:0);
+ int i, j, ITER, rgbbufalloc=0;
+ double start, elapsed;
+ int ps=_ps[pf];
+ int hsf=_hsf[jpegsub], vsf=_vsf[jpegsub];
+ int pw=PAD(w, hsf), ph=PAD(h, vsf);
+ int cw=pw/hsf, ch=ph/vsf;
+ int ypitch=PAD(pw, 4), uvpitch=PAD(cw, 4);
+ int yuvsize=ypitch*ph + (jpegsub==TJ_GRAYSCALE? 0:uvpitch*ch*2);
+ int scaledw=(flags&TJ_YUV)? w : (w+scalefactor-1)/scalefactor;
+ int scaledh=(flags&TJ_YUV)? h : (h+scalefactor-1)/scalefactor;
+ int pitch=scaledw*ps;
+
+ if(qual>0)
+ {
+ snprintf(qualstr, 5, "Q%d", qual);
+ qualstr[4]=0;
+ }
+
+ flags |= _flags[pf];
+ if(bu) flags |= TJ_BOTTOMUP;
+ if(yuv==YUVDECODE) flags |= TJ_YUV;
+ if((hnd=tjInitDecompress())==NULL)
+ _throwtj("executing tjInitDecompress()");
+
+ if(rgbbuf==NULL)
+ {
+ if((rgbbuf=(unsigned char *)malloc(max(yuvsize, pitch*scaledh))) == NULL)
+ _throwunix("allocating image buffer");
+ rgbbufalloc=1;
+ }
+ // Grey image means decompressor did nothing
+ memset(rgbbuf, 127, max(yuvsize, pitch*scaledh));
+
+ if(tjDecompress(hnd, jpegbuf[0], comptilesize[0], rgbbuf, scaledw, pitch,
+ scaledh, ps, flags)==-1)
+ _throwtj("executing tjDecompress()");
+ ITER=0;
+ start=rrtime();
+ do
+ {
+ int tilen=0;
+ for(i=0; i<h; i+=tilesizey)
+ {
+ for(j=0; j<w; j+=tilesizex)
+ {
+ int tempw=min(tilesizex, w-j), temph=min(tilesizey, h-i);
+ if(tjDecompress(hnd, jpegbuf[tilen], comptilesize[tilen],
+ &rgbbuf[pitch*i+ps*j], scaledw, pitch, scaledh, ps, flags)==-1)
+ _throwtj("executing tjDecompress()");
+ tilen++;
+ }
+ }
+ ITER++;
+ } while((elapsed=rrtime()-start)<5.);
+ if(tjDestroy(hnd)==-1) _throwtj("executing tjDestroy()");
+ hnd=NULL;
+ if(quiet)
+ {
+ printsigfig((double)(w*h)/1000000.*(double)ITER/elapsed, 4);
+ printf("\n");
+ }
+ else
+ {
+ printf("D--> Frame rate: %f fps\n", (double)ITER/elapsed);
+ printf(" Dest. throughput: %f Megapixels/sec\n",
+ (double)(w*h)/1000000.*(double)ITER/elapsed);
+ }
+ if(yuv==YUVDECODE)
+ {
+ sprintf(tempstr, "%s_%s%s.yuv", filename, _subnames[jpegsub], qualstr);
+ if((outfile=fopen(tempstr, "wb"))==NULL)
+ _throwunix("opening YUV image for output");
+ if(fwrite(rgbbuf, yuvsize, 1, outfile)!=1)
+ _throwunix("writing YUV image");
+ fclose(outfile); outfile=NULL;
+ }
+ else
+ {
+ if(tilesizex==w && tilesizey==h)
+ {
+ if(decomponly)
+ sprintf(tempstr, "%s_full.%s", filename, useppm?"ppm":"bmp");
+ else
+ sprintf(tempstr, "%s_%s%s_full.%s", filename, _subnames[jpegsub],
+ qualstr, useppm?"ppm":"bmp");
+ }
+ else sprintf(tempstr, "%s_%s%s_%dx%d.%s", filename, _subnames[jpegsub],
+ qualstr, tilesizex, tilesizey, useppm?"ppm":"bmp");
+ if(savebmp(tempstr, rgbbuf, scaledw, scaledh, pf, pitch, bu)==-1)
+ _throwbmp("saving bitmap");
+ sprintf(strrchr(tempstr, '.'), "-err.%s", useppm?"ppm":"bmp");
+ if(srcbuf && scalefactor==1)
+ {
+ if(!quiet)
+ printf("Computing compression error and saving to %s.\n", tempstr);
+ if(jpegsub==TJ_GRAYSCALE)
+ {
+ for(j=0; j<h; j++)
+ {
+ for(i=0; i<w*ps; i+=ps)
+ {
+ int y=(int)((double)srcbuf[w*ps*j+i+_rindex[pf]]*0.299
+ + (double)srcbuf[w*ps*j+i+_gindex[pf]]*0.587
+ + (double)srcbuf[w*ps*j+i+_bindex[pf]]*0.114 + 0.5);
+ if(y>255) y=255; if(y<0) y=0;
+ rgbbuf[pitch*j+i+_rindex[pf]]=abs(rgbbuf[pitch*j+i+_rindex[pf]]-y);
+ rgbbuf[pitch*j+i+_gindex[pf]]=abs(rgbbuf[pitch*j+i+_gindex[pf]]-y);
+ rgbbuf[pitch*j+i+_bindex[pf]]=abs(rgbbuf[pitch*j+i+_bindex[pf]]-y);
+ }
+ }
+ }
+ else
+ {
+ for(j=0; j<h; j++) for(i=0; i<w*ps; i++)
+ rgbbuf[pitch*j+i]=abs(rgbbuf[pitch*j+i]-srcbuf[w*ps*j+i]);
+ }
+ if(savebmp(tempstr, rgbbuf, w, h, pf, pitch, bu)==-1)
+ _throwbmp("saving bitmap");
+ }
+ }
+
+ if(hnd) {tjDestroy(hnd); hnd=NULL;}
+ if(rgbbuf && rgbbufalloc) {free(rgbbuf); rgbbuf=NULL;}
+ return 0;
+
+ bailout:
+ if(outfile) {fclose(outfile); outfile=NULL;}
+ if(hnd) {tjDestroy(hnd); hnd=NULL;}
+ if(rgbbuf && rgbbufalloc) {free(rgbbuf); rgbbuf=NULL;}
+ return -1;
+}
+
+void dotest(unsigned char *srcbuf, int w, int h, int jpegsub, int qual,
+ char *filename)
{
char tempstr[1024];
FILE *outfile=NULL; tjhandle hnd;
@@ -191,89 +334,9 @@
if(yuv==YUVENCODE) goto bailout;
// Decompression test
- if(yuv==YUVDECODE) flags |= TJ_YUV;
- memset(rgbbuf, 127, max(yuvsize, pitch*h)); // Grey image means decompressor did nothing
- if((hnd=tjInitDecompress())==NULL)
- _throwtj("executing tjInitDecompress()");
- if(tjDecompress(hnd, jpegbuf[0], jpgbufsize, rgbbuf, tilesizex, pitch,
- tilesizey, ps, flags)==-1)
- _throwtj("executing tjDecompress()");
- ITER=0;
- start=rrtime();
- do
- {
- int tilen=0;
- for(i=0; i<h; i+=tilesizey)
- {
- for(j=0; j<w; j+=tilesizex)
- {
- int tempw=min(tilesizex, w-j), temph=min(tilesizey, h-i);
- if(tjDecompress(hnd, jpegbuf[tilen], comptilesize[tilen],
- &rgbbuf[pitch*i+ps*j], tempw, pitch, temph, ps, flags)==-1)
- _throwtj("executing tjDecompress()");
- tilen++;
- }
- }
- ITER++;
- } while((elapsed=rrtime()-start)<5.);
- if(tjDestroy(hnd)==-1) _throwtj("executing tjDestroy()");
- hnd=NULL;
- if(quiet)
- {
- printsigfig((double)(w*h)/1000000.*(double)ITER/elapsed, 4);
- printf("\n");
- }
- else
- {
- printf("D--> Frame rate: %f fps\n", (double)ITER/elapsed);
- printf(" Dest. throughput: %f Megapixels/sec\n",
- (double)(w*h)/1000000.*(double)ITER/elapsed);
- }
- if(yuv==YUVDECODE)
- {
- sprintf(tempstr, "%s_%sQ%d.yuv", filename, _subnames[jpegsub], qual);
- if((outfile=fopen(tempstr, "wb"))==NULL)
- _throwunix("opening YUV image for output");
- if(fwrite(rgbbuf, yuvsize, 1, outfile)!=1)
- _throwunix("writing YUV image");
- fclose(outfile); outfile=NULL;
- }
- else
- {
- if(tilesizex==w && tilesizey==h)
- sprintf(tempstr, "%s_%sQ%d_full.%s", filename, _subnames[jpegsub], qual,
- useppm?"ppm":"bmp");
- else sprintf(tempstr, "%s_%sQ%d_%dx%d.%s", filename, _subnames[jpegsub],
- qual, tilesizex, tilesizey, useppm?"ppm":"bmp");
- if(savebmp(tempstr, rgbbuf, w, h, pf, pitch, bu)==-1)
- _throwbmp("saving bitmap");
- sprintf(strrchr(tempstr, '.'), "-err.%s", useppm?"ppm":"bmp");
- if(!quiet)
- printf("Computing compression error and saving to %s.\n", tempstr);
- if(jpegsub==TJ_GRAYSCALE)
- {
- for(j=0; j<h; j++)
- {
- for(i=0; i<w*ps; i+=ps)
- {
- int y=(int)((double)srcbuf[w*ps*j+i+_rindex[pf]]*0.299
- + (double)srcbuf[w*ps*j+i+_gindex[pf]]*0.587
- + (double)srcbuf[w*ps*j+i+_bindex[pf]]*0.114 + 0.5);
- if(y>255) y=255; if(y<0) y=0;
- rgbbuf[pitch*j+i+_rindex[pf]]=abs(rgbbuf[pitch*j+i+_rindex[pf]]-y);
- rgbbuf[pitch*j+i+_gindex[pf]]=abs(rgbbuf[pitch*j+i+_gindex[pf]]-y);
- rgbbuf[pitch*j+i+_bindex[pf]]=abs(rgbbuf[pitch*j+i+_bindex[pf]]-y);
- }
- }
- }
- else
- {
- for(j=0; j<h; j++) for(i=0; i<w*ps; i++)
- rgbbuf[pitch*j+i]=abs(rgbbuf[pitch*j+i]-srcbuf[w*ps*j+i]);
- }
- if(savebmp(tempstr, rgbbuf, w, h, pf, pitch, bu)==-1)
- _throwbmp("saving bitmap");
- }
+ if(decomptest(srcbuf, jpegbuf, comptilesize, rgbbuf, w, h, jpegsub, qual,
+ filename, tilesizex, tilesizey)==-1)
+ goto bailout;
// Cleanup
if(outfile) {fclose(outfile); outfile=NULL;}
@@ -304,25 +367,15 @@
}
-void dodecomptest(char *filename, int pf, int bu, int useppm,
- int quiet)
+void dodecomptest(char *filename)
{
- char tempstr[1024];
- FILE *file=NULL; tjhandle hnd;
- unsigned char *jpegbuf=NULL, *rgbbuf=NULL;
- double start, elapsed;
- int w, h, ITER;
+ FILE *file=NULL; tjhandle hnd=NULL;
+ unsigned char *jpegbuf=NULL;
+ int w=0, h=0, jpegsub=-1;
unsigned long jpgbufsize=0;
- int flags=(forcemmx?TJ_FORCEMMX:0)|(forcesse?TJ_FORCESSE:0)
- |(forcesse2?TJ_FORCESSE2:0)|(forcesse3?TJ_FORCESSE3:0)
- |(fastupsample?TJ_FASTUPSAMPLE:0);
- int ps=_ps[pf], pitch, jpegsub=-1;
char *temp=NULL;
- int hsf, vsf, pw, ph, cw, ch, ypitch, uvpitch, yuvsize;
- flags |= _flags[pf];
- if(bu) flags |= TJ_BOTTOMUP;
- if(yuv==YUVDECODE) flags |= TJ_YUV;
+ useppm=1;
if((file=fopen(filename, "rb"))==NULL)
_throwunix("opening file");
@@ -342,81 +395,36 @@
if((hnd=tjInitDecompress())==NULL) _throwtj("executing tjInitDecompress()");
if(tjDecompressHeader2(hnd, jpegbuf, jpgbufsize, &w, &h, &jpegsub)==-1)
_throwtj("executing tjDecompressHeader2()");
-
- hsf=_hsf[jpegsub], vsf=_vsf[jpegsub];
- pw=PAD(w, hsf), ph=PAD(h, vsf);
- cw=pw/hsf, ch=ph/vsf;
- ypitch=PAD(pw, 4), uvpitch=PAD(cw, 4);
- yuvsize=ypitch*ph + (jpegsub==TJ_GRAYSCALE? 0:uvpitch*ch*2);
-
- pitch=w*ps;
+ if(tjDestroy(hnd)==-1) _throwtj("executing tjDestroy()");
+ hnd=NULL;
if(quiet==1)
{
printf("\nAll performance values in Mpixels/sec\n\n");
- printf("Bitmap\tBitmap\tImage Size\tDecomp\n"),
- printf("Format\tOrder\t X Y \tPerf\n\n");
- printf("%s\t%s\t%-4d %-4d\t", _pfname[pf], bu?"BU":"TD", w, h);
+ printf("Bitmap\tBitmap\tJPEG\tImage Size\tDecomp\n"),
+ printf("Format\tOrder\tFormat\t X Y \tPerf\n\n");
+ printf("%s\t%s\t%s\t%-4d %-4d\t", _pfname[pf], bu?"BU":"TD",
+ _subnamel[jpegsub], w, h);
}
-
- if((rgbbuf=(unsigned char *)malloc(max(yuvsize, pitch*h)))==NULL)
- _throwunix("allocating image buffer");
-
- if(!quiet)
+ else
{
if(yuv==YUVDECODE)
printf("\n>>>>> JPEG --> YUV %s <<<<<\n", _subnamel[jpegsub]);
else
printf("\n>>>>> JPEG --> %s (%s) <<<<<\n", _pfname[pf],
bu?"Bottom-up":"Top-down");
- printf("\nImage size: %d x %d\n", w, h);
- }
-
- memset(rgbbuf, 127, max(yuvsize, pitch*h)); // Grey image means decompressor did nothing
- if(tjDecompress(hnd, jpegbuf, jpgbufsize, rgbbuf, w, pitch, h, ps, flags)==-1)
- _throwtj("executing tjDecompress()");
- ITER=0;
- start=rrtime();
- do
- {
- if(tjDecompress(hnd, jpegbuf, jpgbufsize, rgbbuf, w, pitch, h, ps, flags)
- ==-1)
- _throwtj("executing tjDecompress()");
- ITER++;
- } while((elapsed=rrtime()-start)<5.);
- if(tjDestroy(hnd)==-1) _throwtj("executing tjDestroy()");
- hnd=NULL;
- if(quiet)
- {
- printsigfig((double)(w*h)/1000000.*(double)ITER/elapsed, 4);
+ printf("\nImage size: %d x %d", w, h);
+ if(scalefactor!=1) printf(" --> %d x %d", (w+scalefactor-1)/scalefactor,
+ (h+scalefactor-1)/scalefactor);
printf("\n");
}
- else
- {
- printf("D--> Frame rate: %f fps\n", (double)ITER/elapsed);
- printf(" Dest. throughput: %f Megapixels/sec\n",
- (double)(w*h)/1000000.*(double)ITER/elapsed);
- }
- sprintf(tempstr, "%s_full.%s", filename, useppm?"ppm":"bmp");
- if(yuv==YUVDECODE)
- {
- sprintf(tempstr, "%s_%s.yuv", filename, _subnames[jpegsub]);
- if((file=fopen(tempstr, "wb"))==NULL)
- _throwunix("opening YUV image for output");
- if(fwrite(rgbbuf, yuvsize, 1, file)!=1)
- _throwunix("writing YUV image");
- fclose(file); file=NULL;
- }
- else
- {
- if(savebmp(tempstr, rgbbuf, w, h, pf, pitch, bu)==-1)
- _throwbmp("saving bitmap");
- }
+
+ decomptest(NULL, &jpegbuf, &jpgbufsize, NULL, w, h, jpegsub, 0, filename, w,
+ h);
bailout:
if(file) {fclose(file); file=NULL;}
if(jpegbuf) {free(jpegbuf); jpegbuf=NULL;}
- if(rgbbuf) {free(rgbbuf); rgbbuf=NULL;}
if(hnd) {tjDestroy(hnd); hnd=NULL;}
return;
}
@@ -424,36 +432,35 @@
void usage(char *progname)
{
- printf("USAGE: %s <Inputfile (BMP|PPM))> <%% Quality>\n", progname);
- printf(" %s <Inputfile (JPG))>\n\n", progname);
- printf(" [-tile]\n");
- printf(" Test performance of the codec when the image is encoded\n");
- printf(" as separate tiles of varying sizes.\n\n");
- printf(" [-forcemmx] [-forcesse] [-forcesse2] [-forcesse3]\n");
- printf(" Force MMX, SSE, SSE2, or SSE3 code paths in the underlying codec\n\n");
- printf(" [-rgb | -bgr | -rgbx | -bgrx | -xbgr | -xrgb]\n");
- printf(" Test the specified color conversion path in the codec (default: BGR)\n\n");
- printf(" [-fastupsample]\n");
- printf(" Use fast, inaccurate upsampling code to perform 4:2:2 and 4:2:0\n");
- printf(" YUV decoding in libjpeg decompressor\n\n");
- printf(" [-quiet]\n");
- printf(" Output in tabular rather than verbose format\n\n");
- printf(" [-yuvencode]\n");
- printf(" Encode RGB input as planar YUV rather than compressing as JPEG\n\n");
- printf(" [-yuvdecode]\n");
- printf(" Decode JPEG image to planar YUV rather than RGB\n\n");
- printf(" NOTE: If the quality is specified as a range, i.e. 90-100, a separate\n");
- printf(" test will be performed for all quality values in the range.\n");
+ printf("USAGE: %s\n", progname);
+ printf(" <Inputfile (BMP|PPM)> <%% Quality> [options]\n\n");
+ printf(" %s\n", progname);
+ printf(" <Inputfile (JPG)> [options]\n\n");
+ printf("Options:\n\n");
+ printf("-tile = Test performance of the codec when the image is encoded as separate\n");
+ printf(" tiles of varying sizes.\n");
+ printf("-forcemmx, -forcesse, -forcesse2, -forcesse3 =\n");
+ printf(" Force MMX, SSE, SSE2, or SSE3 code paths in the underlying codec\n");
+ printf("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =\n");
+ printf(" Test the specified color conversion path in the codec (default: BGR)\n");
+ printf("-fastupsample = Use fast, inaccurate upsampling code to perform 4:2:2 and 4:2:0\n");
+ printf(" YUV decoding in libjpeg decompressor\n");
+ printf("-quiet = Output results in tabular rather than verbose format\n");
+ printf("-yuvencode = Encode RGB input as planar YUV rather than compressing as JPEG\n");
+ printf("-yuvdecode = Decode JPEG image to planar YUV rather than RGB\n");
+ printf("-scale 1/N = scale down the width/height of the decompressed JPEG image by a\n");
+ printf(" factor of N (N = 1, 2, 4, or 8}\n\n");
+ printf("NOTE: If the quality is specified as a range (e.g. 90-100), a separate\n");
+ printf("test will be performed for all quality values in the range.\n\n");
exit(1);
}
int main(int argc, char *argv[])
{
- unsigned char *bmpbuf=NULL; int w, h, i, useppm=0;
- int qual, dotile=0, quiet=0, hiqual=-1; char *temp;
- int pf=BMP_BGR;
- int bu=0, minarg=2;
+ unsigned char *bmpbuf=NULL; int w, h, i;
+ int qual, hiqual=-1; char *temp;
+ int minarg=2;
if(argc<minarg) usage(argv[0]);
@@ -535,9 +542,24 @@
if(!stricmp(argv[i], "-bottomup")) bu=1;
if(!stricmp(argv[i], "-quiet")) quiet=1;
if(!stricmp(argv[i], "-qq")) quiet=2;
+ if(!stricmp(argv[i], "-scale") && i<argc-1)
+ {
+ int temp1=0, temp2=0;
+ if(sscanf(argv[++i], "%d/%d", &temp1, &temp2)!=2
+ || temp1!=1 || temp2<1 || temp2>8 || (temp2&(temp2-1))!=0)
+ usage(argv[0]);
+ scalefactor=temp2;
+ }
}
}
+ if(scalefactor!=1 && dotile)
+ {
+ printf("Disabling tiled compression/decompression tests, because these tests do not\n");
+ printf("work when scaled decompression is enabled.\n");
+ dotile=0;
+ }
+
if(!decomponly)
{
if(loadbmp(argv[1], &bmpbuf, &w, &h, pf, 1, bu)==-1)
@@ -555,20 +577,20 @@
if(decomponly)
{
- dodecomptest(argv[1], pf, bu, 1, quiet);
+ dodecomptest(argv[1]);
goto bailout;
}
for(i=hiqual; i>=qual; i--)
- dotest(bmpbuf, w, h, pf, bu, TJ_GRAYSCALE, i, argv[1], dotile, useppm, quiet);
+ dotest(bmpbuf, w, h, TJ_GRAYSCALE, i, argv[1]);
if(quiet) printf("\n");
for(i=hiqual; i>=qual; i--)
- dotest(bmpbuf, w, h, pf, bu, TJ_420, i, argv[1], dotile, useppm, quiet);
+ dotest(bmpbuf, w, h, TJ_420, i, argv[1]);
if(quiet) printf("\n");
for(i=hiqual; i>=qual; i--)
- dotest(bmpbuf, w, h, pf, bu, TJ_422, i, argv[1], dotile, useppm, quiet);
+ dotest(bmpbuf, w, h, TJ_422, i, argv[1]);
if(quiet) printf("\n");
for(i=hiqual; i>=qual; i--)
- dotest(bmpbuf, w, h, pf, bu, TJ_444, i, argv[1], dotile, useppm, quiet);
+ dotest(bmpbuf, w, h, TJ_444, i, argv[1]);
bailout:
if(bmpbuf) free(bmpbuf);
diff --git a/jsimd.h b/jsimd.h
index b663791..3fa2c43 100644
--- a/jsimd.h
+++ b/jsimd.h
@@ -2,6 +2,7 @@
* jsimd.h
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2011 D. R. Commander
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -13,8 +14,10 @@
#ifdef NEED_SHORT_EXTERNAL_NAMES
#define jsimd_can_rgb_ycc jSCanRgbYcc
+#define jsimd_can_rgb_gray jSCanRgbGry
#define jsimd_can_ycc_rgb jSCanYccRgb
#define jsimd_rgb_ycc_convert jSRgbYccConv
+#define jsimd_rgb_gray_convert jSRgbGryConv
#define jsimd_ycc_rgb_convert jSYccRgbConv
#define jsimd_can_h2v2_downsample jSCanH2V2Down
#define jsimd_can_h2v1_downsample jSCanH2V1Down
@@ -35,12 +38,17 @@
#endif /* NEED_SHORT_EXTERNAL_NAMES */
EXTERN(int) jsimd_can_rgb_ycc JPP((void));
+EXTERN(int) jsimd_can_rgb_gray JPP((void));
EXTERN(int) jsimd_can_ycc_rgb JPP((void));
EXTERN(void) jsimd_rgb_ycc_convert
JPP((j_compress_ptr cinfo,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_rgb_gray_convert
+ JPP((j_compress_ptr cinfo,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
EXTERN(void) jsimd_ycc_rgb_convert
JPP((j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
diff --git a/jsimd_none.c b/jsimd_none.c
index 7ff3074..9787902 100644
--- a/jsimd_none.c
+++ b/jsimd_none.c
@@ -2,7 +2,7 @@
* jsimd_none.c
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright 2009 D. R. Commander
+ * Copyright 2009-2011 D. R. Commander
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -25,6 +25,12 @@
}
GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+ return 0;
+}
+
+GLOBAL(int)
jsimd_can_ycc_rgb (void)
{
return 0;
@@ -38,6 +44,13 @@
}
GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows)
+{
+}
+
+GLOBAL(void)
jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows)
diff --git a/release/libjpeg-turbo.spec.in b/release/libjpeg-turbo.spec.in
index 48e161d..c21eef8 100644
--- a/release/libjpeg-turbo.spec.in
+++ b/release/libjpeg-turbo.spec.in
@@ -43,7 +43,7 @@
#-->%setup -q
#-->%build
-#-->configure libdir=/opt/%{name}/%{__lib} mandir=/opt/%{name}/man JPEG_LIB_VERSION=@JPEG_LIB_VERSION@ SO_MAJOR_VERSION=@SO_MAJOR_VERSION@ SO_MINOR_VERSION=@SO_MINOR_VERSION@ --with-pic
+#-->configure libdir=/opt/%{name}/%{__lib} mandir=/opt/%{name}/man JPEG_LIB_VERSION=@JPEG_LIB_VERSION@ SO_MAJOR_VERSION=@SO_MAJOR_VERSION@ SO_MINOR_VERSION=@SO_MINOR_VERSION@ --with-pic @RPM_CONFIG_ARGS@
#-->make DESTDIR=$RPM_BUILD_ROOT libdir=/opt/%{name}/%{__lib} mandir=/opt/%{name}/man
%install
diff --git a/release/makemacpkg.in b/release/makemacpkg.in
index 49931d9..c7bbbcf 100644
--- a/release/makemacpkg.in
+++ b/release/makemacpkg.in
@@ -28,6 +28,7 @@
BUILD=@BUILD@
SRCDIR=@srcdir@
BUILDDIR32=@srcdir@/osxx86
+BUILDJNILIB=@BUILDJNILIB@
if [ $# -gt 0 ]; then
if [ "$1" = "universal" ]; then
UNIVERSAL=1
@@ -122,6 +123,9 @@
ln -fs /usr/include/turbojpeg.h $PKGROOT/opt/$PACKAGE_NAME/include/
ln -fs /usr/lib/libturbojpeg.a $PKGROOT/opt/$PACKAGE_NAME/lib/
+if [ $BUILDJNILIB = 1 ]; then
+ ln -fs libturbojpeg.dylib $PKGROOT/usr/lib/libturbojpeg.jnilib
+fi
if [ ! -h $PKGROOT/opt/$PACKAGE_NAME/lib32 ]; then
ln -fs lib $PKGROOT/opt/$PACKAGE_NAME/lib32
fi
diff --git a/simd/Makefile.am b/simd/Makefile.am
index 81c23af..f19fdf5 100644
--- a/simd/Makefile.am
+++ b/simd/Makefile.am
@@ -11,7 +11,7 @@
libsimd_la_SOURCES = jsimd_x86_64.c \
jsimd.h jsimdcfg.inc.h \
jsimdext.inc jcolsamp.inc jdct.inc \
- jfsseflt-64.asm \
+ jfsseflt-64.asm jcgrass2-64.asm \
jccolss2-64.asm jdcolss2-64.asm \
jcsamss2-64.asm jdsamss2-64.asm jdmerss2-64.asm \
jcqnts2i-64.asm jfss2fst-64.asm jfss2int-64.asm \
@@ -20,6 +20,7 @@
jccolss2-64.lo: jcclrss2-64.asm
jdcolss2-64.lo: jdclrss2-64.asm
+jcgrass2-64.lo: jcgryss2-64.asm
jdmerss2-64.lo: jdmrgss2-64.asm
endif
@@ -29,20 +30,22 @@
jsimd.h jsimdcfg.inc.h \
jsimdext.inc jcolsamp.inc jdct.inc \
jsimdcpu.asm \
- jccolmmx.asm jdcolmmx.asm \
+ jccolmmx.asm jdcolmmx.asm jcgrammx.asm \
jcsammmx.asm jdsammmx.asm jdmermmx.asm \
jcqntmmx.asm jfmmxfst.asm jfmmxint.asm \
jimmxred.asm jimmxint.asm jimmxfst.asm \
jcqnt3dn.asm jf3dnflt.asm ji3dnflt.asm \
jcqntsse.asm jfsseflt.asm jisseflt.asm \
- jccolss2.asm jdcolss2.asm \
+ jccolss2.asm jdcolss2.asm jcgrass2.asm \
jcsamss2.asm jdsamss2.asm jdmerss2.asm \
jcqnts2i.asm jfss2fst.asm jfss2int.asm \
jiss2red.asm jiss2int.asm jiss2fst.asm \
jcqnts2f.asm jiss2flt.asm
jccolmmx.lo: jcclrmmx.asm
+jcgrammx.lo: jcgrymmx.asm
jccolss2.lo: jcclrss2.asm
+jcgrass2.lo: jcgryss2.asm
jdcolmmx.lo: jdclrmmx.asm
jdcolss2.lo: jdclrss2.asm
jdmermmx.lo: jdmrgmmx.asm
diff --git a/simd/jcgrammx.asm b/simd/jcgrammx.asm
new file mode 100644
index 0000000..dd46cc5
--- /dev/null
+++ b/simd/jcgrammx.asm
@@ -0,0 +1,113 @@
+;
+; jcgrammx.asm - grayscale colorspace conversion (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2011 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS 16
+
+F_0_114 equ 7471 ; FIX(0.11400)
+F_0_250 equ 16384 ; FIX(0.25000)
+F_0_299 equ 19595 ; FIX(0.29900)
+F_0_587 equ 38470 ; FIX(0.58700)
+F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+ SECTION SEG_CONST
+
+ alignz 16
+ global EXTN(jconst_rgb_gray_convert_mmx)
+
+EXTN(jconst_rgb_gray_convert_mmx):
+
+PW_F0299_F0337 times 2 dw F_0_299, F_0_337
+PW_F0114_F0250 times 2 dw F_0_114, F_0_250
+PD_ONEHALF times 2 dd (1 << (SCALEBITS-1))
+
+ alignz 16
+
+; --------------------------------------------------------------------------
+%include "jcgrymmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 0
+%define RGB_GREEN 1
+%define RGB_BLUE 2
+%define RGB_PIXELSIZE 3
+%define jsimd_rgb_gray_convert_mmx jsimd_extrgb_gray_convert_mmx
+%include "jcgrymmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 0
+%define RGB_GREEN 1
+%define RGB_BLUE 2
+%define RGB_PIXELSIZE 4
+%define jsimd_rgb_gray_convert_mmx jsimd_extrgbx_gray_convert_mmx
+%include "jcgrymmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 2
+%define RGB_GREEN 1
+%define RGB_BLUE 0
+%define RGB_PIXELSIZE 3
+%define jsimd_rgb_gray_convert_mmx jsimd_extbgr_gray_convert_mmx
+%include "jcgrymmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 2
+%define RGB_GREEN 1
+%define RGB_BLUE 0
+%define RGB_PIXELSIZE 4
+%define jsimd_rgb_gray_convert_mmx jsimd_extbgrx_gray_convert_mmx
+%include "jcgrymmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 3
+%define RGB_GREEN 2
+%define RGB_BLUE 1
+%define RGB_PIXELSIZE 4
+%define jsimd_rgb_gray_convert_mmx jsimd_extxbgr_gray_convert_mmx
+%include "jcgrymmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 1
+%define RGB_GREEN 2
+%define RGB_BLUE 3
+%define RGB_PIXELSIZE 4
+%define jsimd_rgb_gray_convert_mmx jsimd_extxrgb_gray_convert_mmx
+%include "jcgrymmx.asm"
diff --git a/simd/jcgrass2-64.asm b/simd/jcgrass2-64.asm
new file mode 100644
index 0000000..9f8a01a
--- /dev/null
+++ b/simd/jcgrass2-64.asm
@@ -0,0 +1,110 @@
+;
+; jcgrass2-64.asm - grayscale colorspace conversion (64-bit SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2011, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS 16
+
+F_0_114 equ 7471 ; FIX(0.11400)
+F_0_250 equ 16384 ; FIX(0.25000)
+F_0_299 equ 19595 ; FIX(0.29900)
+F_0_587 equ 38470 ; FIX(0.58700)
+F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+ SECTION SEG_CONST
+
+ alignz 16
+ global EXTN(jconst_rgb_gray_convert_sse2)
+
+EXTN(jconst_rgb_gray_convert_sse2):
+
+PW_F0299_F0337 times 4 dw F_0_299, F_0_337
+PW_F0114_F0250 times 4 dw F_0_114, F_0_250
+PD_ONEHALF times 4 dd (1 << (SCALEBITS-1))
+
+ alignz 16
+
+; --------------------------------------------------------------------------
+%include "jcgryss2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 0
+%define RGB_GREEN 1
+%define RGB_BLUE 2
+%define RGB_PIXELSIZE 3
+%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2
+%include "jcgryss2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 0
+%define RGB_GREEN 1
+%define RGB_BLUE 2
+%define RGB_PIXELSIZE 4
+%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2
+%include "jcgryss2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 2
+%define RGB_GREEN 1
+%define RGB_BLUE 0
+%define RGB_PIXELSIZE 3
+%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2
+%include "jcgryss2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 2
+%define RGB_GREEN 1
+%define RGB_BLUE 0
+%define RGB_PIXELSIZE 4
+%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2
+%include "jcgryss2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 3
+%define RGB_GREEN 2
+%define RGB_BLUE 1
+%define RGB_PIXELSIZE 4
+%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2
+%include "jcgryss2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 1
+%define RGB_GREEN 2
+%define RGB_BLUE 3
+%define RGB_PIXELSIZE 4
+%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2
+%include "jcgryss2-64.asm"
diff --git a/simd/jcgrass2.asm b/simd/jcgrass2.asm
new file mode 100644
index 0000000..f284e0f
--- /dev/null
+++ b/simd/jcgrass2.asm
@@ -0,0 +1,110 @@
+;
+; jcgrass2.asm - grayscale colorspace conversion (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2011, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS 16
+
+F_0_114 equ 7471 ; FIX(0.11400)
+F_0_250 equ 16384 ; FIX(0.25000)
+F_0_299 equ 19595 ; FIX(0.29900)
+F_0_587 equ 38470 ; FIX(0.58700)
+F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+ SECTION SEG_CONST
+
+ alignz 16
+ global EXTN(jconst_rgb_gray_convert_sse2)
+
+EXTN(jconst_rgb_gray_convert_sse2):
+
+PW_F0299_F0337 times 4 dw F_0_299, F_0_337
+PW_F0114_F0250 times 4 dw F_0_114, F_0_250
+PD_ONEHALF times 4 dd (1 << (SCALEBITS-1))
+
+ alignz 16
+
+; --------------------------------------------------------------------------
+%include "jcgryss2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 0
+%define RGB_GREEN 1
+%define RGB_BLUE 2
+%define RGB_PIXELSIZE 3
+%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2
+%include "jcgryss2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 0
+%define RGB_GREEN 1
+%define RGB_BLUE 2
+%define RGB_PIXELSIZE 4
+%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2
+%include "jcgryss2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 2
+%define RGB_GREEN 1
+%define RGB_BLUE 0
+%define RGB_PIXELSIZE 3
+%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2
+%include "jcgryss2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 2
+%define RGB_GREEN 1
+%define RGB_BLUE 0
+%define RGB_PIXELSIZE 4
+%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2
+%include "jcgryss2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 3
+%define RGB_GREEN 2
+%define RGB_BLUE 1
+%define RGB_PIXELSIZE 4
+%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2
+%include "jcgryss2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED 1
+%define RGB_GREEN 2
+%define RGB_BLUE 3
+%define RGB_PIXELSIZE 4
+%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2
+%include "jcgryss2.asm"
diff --git a/simd/jcgrymmx.asm b/simd/jcgrymmx.asm
new file mode 100644
index 0000000..93d0936
--- /dev/null
+++ b/simd/jcgrymmx.asm
@@ -0,0 +1,359 @@
+;
+; jcclrmmx.asm - grayscale colorspace conversion (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2011 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+ SECTION SEG_TEXT
+ BITS 32
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_rgb_gray_convert_mmx (JDIMENSION img_width,
+; JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+; JDIMENSION output_row, int num_rows);
+;
+
+%define img_width(b) (b)+8 ; JDIMENSION img_width
+%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf
+%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf
+%define output_row(b) (b)+20 ; JDIMENSION output_row
+%define num_rows(b) (b)+24 ; int num_rows
+
+%define original_ebp ebp+0
+%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM]
+%define WK_NUM 2
+%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
+
+ align 16
+ global EXTN(jsimd_rgb_gray_convert_mmx)
+
+EXTN(jsimd_rgb_gray_convert_mmx):
+ push ebp
+ mov eax,esp ; eax = original ebp
+ sub esp, byte 4
+ and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
+ mov [esp],eax
+ mov ebp,esp ; ebp = aligned ebp
+ lea esp, [wk(0)]
+ pushpic eax ; make a room for GOT address
+ push ebx
+; push ecx ; need not be preserved
+; push edx ; need not be preserved
+ push esi
+ push edi
+
+ get_GOT ebx ; get GOT address
+ movpic POINTER [gotptr], ebx ; save GOT address
+
+ mov ecx, JDIMENSION [img_width(eax)] ; num_cols
+ test ecx,ecx
+ jz near .return
+
+ push ecx
+
+ mov esi, JSAMPIMAGE [output_buf(eax)]
+ mov ecx, JDIMENSION [output_row(eax)]
+ mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+ lea edi, [edi+ecx*SIZEOF_JSAMPROW]
+
+ pop ecx
+
+ mov esi, JSAMPARRAY [input_buf(eax)]
+ mov eax, INT [num_rows(eax)]
+ test eax,eax
+ jle near .return
+ alignx 16,7
+.rowloop:
+ pushpic eax
+ push edi
+ push esi
+ push ecx ; col
+
+ mov esi, JSAMPROW [esi] ; inptr
+ mov edi, JSAMPROW [edi] ; outptr0
+ movpic eax, POINTER [gotptr] ; load GOT address (eax)
+
+ cmp ecx, byte SIZEOF_MMWORD
+ jae short .columnloop
+ alignx 16,7
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+ push eax
+ push edx
+ lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
+ test cl, SIZEOF_BYTE
+ jz short .column_ld2
+ sub ecx, byte SIZEOF_BYTE
+ xor eax,eax
+ mov al, BYTE [esi+ecx]
+.column_ld2:
+ test cl, SIZEOF_WORD
+ jz short .column_ld4
+ sub ecx, byte SIZEOF_WORD
+ xor edx,edx
+ mov dx, WORD [esi+ecx]
+ shl eax, WORD_BIT
+ or eax,edx
+.column_ld4:
+ movd mmA,eax
+ pop edx
+ pop eax
+ test cl, SIZEOF_DWORD
+ jz short .column_ld8
+ sub ecx, byte SIZEOF_DWORD
+ movd mmG, DWORD [esi+ecx]
+ psllq mmA, DWORD_BIT
+ por mmA,mmG
+.column_ld8:
+ test cl, SIZEOF_MMWORD
+ jz short .column_ld16
+ movq mmG,mmA
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+ mov ecx, SIZEOF_MMWORD
+ jmp short .rgb_gray_cnv
+.column_ld16:
+ test cl, 2*SIZEOF_MMWORD
+ mov ecx, SIZEOF_MMWORD
+ jz short .rgb_gray_cnv
+ movq mmF,mmA
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+ movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+ jmp short .rgb_gray_cnv
+ alignx 16,7
+
+.columnloop:
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+ movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+ movq mmF, MMWORD [esi+2*SIZEOF_MMWORD]
+
+.rgb_gray_cnv:
+ ; mmA=(00 10 20 01 11 21 02 12)
+ ; mmG=(22 03 13 23 04 14 24 05)
+ ; mmF=(15 25 06 16 26 07 17 27)
+
+ movq mmD,mmA
+ psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01)
+ psrlq mmD,4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --)
+
+ punpckhbw mmA,mmG ; mmA=(00 04 10 14 20 24 01 05)
+ psllq mmG,4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23)
+
+ punpcklbw mmD,mmF ; mmD=(11 15 21 25 02 06 12 16)
+ punpckhbw mmG,mmF ; mmG=(22 26 03 07 13 17 23 27)
+
+ movq mmE,mmA
+ psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14)
+ psrlq mmE,4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --)
+
+ punpckhbw mmA,mmD ; mmA=(00 02 04 06 10 12 14 16)
+ psllq mmD,4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25)
+
+ punpcklbw mmE,mmG ; mmE=(20 22 24 26 01 03 05 07)
+ punpckhbw mmD,mmG ; mmD=(11 13 15 17 21 23 25 27)
+
+ pxor mmH,mmH
+
+ movq mmC,mmA
+ punpcklbw mmA,mmH ; mmA=(00 02 04 06)
+ punpckhbw mmC,mmH ; mmC=(10 12 14 16)
+
+ movq mmB,mmE
+ punpcklbw mmE,mmH ; mmE=(20 22 24 26)
+ punpckhbw mmB,mmH ; mmB=(01 03 05 07)
+
+ movq mmF,mmD
+ punpcklbw mmD,mmH ; mmD=(11 13 15 17)
+ punpckhbw mmF,mmH ; mmF=(21 23 25 27)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+ test cl, SIZEOF_MMWORD/8
+ jz short .column_ld2
+ sub ecx, byte SIZEOF_MMWORD/8
+ movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld2:
+ test cl, SIZEOF_MMWORD/4
+ jz short .column_ld4
+ sub ecx, byte SIZEOF_MMWORD/4
+ movq mmF,mmA
+ movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld4:
+ test cl, SIZEOF_MMWORD/2
+ mov ecx, SIZEOF_MMWORD
+ jz short .rgb_gray_cnv
+ movq mmD,mmA
+ movq mmC,mmF
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+ movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+ jmp short .rgb_gray_cnv
+ alignx 16,7
+
+.columnloop:
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+ movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+ movq mmD, MMWORD [esi+2*SIZEOF_MMWORD]
+ movq mmC, MMWORD [esi+3*SIZEOF_MMWORD]
+
+.rgb_gray_cnv:
+ ; mmA=(00 10 20 30 01 11 21 31)
+ ; mmF=(02 12 22 32 03 13 23 33)
+ ; mmD=(04 14 24 34 05 15 25 35)
+ ; mmC=(06 16 26 36 07 17 27 37)
+
+ movq mmB,mmA
+ punpcklbw mmA,mmF ; mmA=(00 02 10 12 20 22 30 32)
+ punpckhbw mmB,mmF ; mmB=(01 03 11 13 21 23 31 33)
+
+ movq mmG,mmD
+ punpcklbw mmD,mmC ; mmD=(04 06 14 16 24 26 34 36)
+ punpckhbw mmG,mmC ; mmG=(05 07 15 17 25 27 35 37)
+
+ movq mmE,mmA
+ punpcklwd mmA,mmD ; mmA=(00 02 04 06 10 12 14 16)
+ punpckhwd mmE,mmD ; mmE=(20 22 24 26 30 32 34 36)
+
+ movq mmH,mmB
+ punpcklwd mmB,mmG ; mmB=(01 03 05 07 11 13 15 17)
+ punpckhwd mmH,mmG ; mmH=(21 23 25 27 31 33 35 37)
+
+ pxor mmF,mmF
+
+ movq mmC,mmA
+ punpcklbw mmA,mmF ; mmA=(00 02 04 06)
+ punpckhbw mmC,mmF ; mmC=(10 12 14 16)
+
+ movq mmD,mmB
+ punpcklbw mmB,mmF ; mmB=(01 03 05 07)
+ punpckhbw mmD,mmF ; mmD=(11 13 15 17)
+
+ movq mmG,mmE
+ punpcklbw mmE,mmF ; mmE=(20 22 24 26)
+ punpckhbw mmG,mmF ; mmG=(30 32 34 36)
+
+ punpcklbw mmF,mmH
+ punpckhbw mmH,mmH
+ psrlw mmF,BYTE_BIT ; mmF=(21 23 25 27)
+ psrlw mmH,BYTE_BIT ; mmH=(31 33 35 37)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+ ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE
+ ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO
+
+ ; (Original)
+ ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
+ ;
+ ; (This implementation)
+ ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+
+ movq mm6,mm1
+ punpcklwd mm1,mm3
+ punpckhwd mm6,mm3
+ pmaddwd mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+ pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+ movq mm7, mm6 ; mm7=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+ movq mm6,mm0
+ punpcklwd mm0,mm2
+ punpckhwd mm6,mm2
+ pmaddwd mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337)
+ pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337)
+
+ movq MMWORD [wk(0)], mm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
+ movq MMWORD [wk(1)], mm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
+
+ movq mm0, mm5 ; mm0=BO
+ movq mm6, mm4 ; mm6=BE
+
+ movq mm4,mm0
+ punpcklwd mm0,mm3
+ punpckhwd mm4,mm3
+ pmaddwd mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+ pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+
+ movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF]
+
+ paddd mm0, mm1
+ paddd mm4, mm7
+ paddd mm0,mm3
+ paddd mm4,mm3
+ psrld mm0,SCALEBITS ; mm0=YOL
+ psrld mm4,SCALEBITS ; mm4=YOH
+ packssdw mm0,mm4 ; mm0=YO
+
+ movq mm4,mm6
+ punpcklwd mm6,mm2
+ punpckhwd mm4,mm2
+ pmaddwd mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+ pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+
+ movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF]
+
+ paddd mm6, MMWORD [wk(0)]
+ paddd mm4, MMWORD [wk(1)]
+ paddd mm6,mm2
+ paddd mm4,mm2
+ psrld mm6,SCALEBITS ; mm6=YEL
+ psrld mm4,SCALEBITS ; mm4=YEH
+ packssdw mm6,mm4 ; mm6=YE
+
+ psllw mm0,BYTE_BIT
+ por mm6,mm0 ; mm6=Y
+ movq MMWORD [edi], mm6 ; Save Y
+
+ sub ecx, byte SIZEOF_MMWORD
+ add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr
+ add edi, byte SIZEOF_MMWORD ; outptr0
+ cmp ecx, byte SIZEOF_MMWORD
+ jae near .columnloop
+ test ecx,ecx
+ jnz near .column_ld1
+
+ pop ecx ; col
+ pop esi
+ pop edi
+ poppic eax
+
+ add esi, byte SIZEOF_JSAMPROW ; input_buf
+ add edi, byte SIZEOF_JSAMPROW
+ dec eax ; num_rows
+ jg near .rowloop
+
+ emms ; empty MMX state
+
+.return:
+ pop edi
+ pop esi
+; pop edx ; need not be preserved
+; pop ecx ; need not be preserved
+ pop ebx
+ mov esp,ebp ; esp <- aligned ebp
+ pop esp ; esp <- original ebp
+ pop ebp
+ ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+ align 16
diff --git a/simd/jcgryss2-64.asm b/simd/jcgryss2-64.asm
new file mode 100644
index 0000000..3a52ec2
--- /dev/null
+++ b/simd/jcgryss2-64.asm
@@ -0,0 +1,366 @@
+;
+; jcgryss2-64.asm - grayscale colorspace conversion (64-bit SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2011, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+ SECTION SEG_TEXT
+ BITS 64
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_rgb_gray_convert_sse2 (JDIMENSION img_width,
+; JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+; JDIMENSION output_row, int num_rows);
+;
+
+; r10 = JDIMENSION img_width
+; r11 = JSAMPARRAY input_buf
+; r12 = JSAMPIMAGE output_buf
+; r13 = JDIMENSION output_row
+; r14 = int num_rows
+
+%define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM 2
+
+ align 16
+
+ global EXTN(jsimd_rgb_gray_convert_sse2)
+
+EXTN(jsimd_rgb_gray_convert_sse2):
+ push rbp
+ mov rax,rsp ; rax = original rbp
+ sub rsp, byte 4
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
+ mov [rsp],rax
+ mov rbp,rsp ; rbp = aligned rbp
+ lea rsp, [wk(0)]
+ collect_args
+ push rbx
+
+ mov rcx, r10
+ test rcx,rcx
+ jz near .return
+
+ push rcx
+
+ mov rsi, r12
+ mov rcx, r13
+ mov rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
+ lea rdi, [rdi+rcx*SIZEOF_JSAMPROW]
+
+ pop rcx
+
+ mov rsi, r11
+ mov eax, r14d
+ test rax,rax
+ jle near .return
+.rowloop:
+ push rdi
+ push rsi
+ push rcx ; col
+
+ mov rsi, JSAMPROW [rsi] ; inptr
+ mov rdi, JSAMPROW [rdi] ; outptr0
+
+ cmp rcx, byte SIZEOF_XMMWORD
+ jae near .columnloop
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+ push rax
+ push rdx
+ lea rcx,[rcx+rcx*2] ; imul ecx,RGB_PIXELSIZE
+ test cl, SIZEOF_BYTE
+ jz short .column_ld2
+ sub rcx, byte SIZEOF_BYTE
+ movzx rax, BYTE [rsi+rcx]
+.column_ld2:
+ test cl, SIZEOF_WORD
+ jz short .column_ld4
+ sub rcx, byte SIZEOF_WORD
+ movzx rdx, WORD [rsi+rcx]
+ shl rax, WORD_BIT
+ or rax,rdx
+.column_ld4:
+ movd xmmA,eax
+ pop rdx
+ pop rax
+ test cl, SIZEOF_DWORD
+ jz short .column_ld8
+ sub rcx, byte SIZEOF_DWORD
+ movd xmmF, XMM_DWORD [rsi+rcx]
+ pslldq xmmA, SIZEOF_DWORD
+ por xmmA,xmmF
+.column_ld8:
+ test cl, SIZEOF_MMWORD
+ jz short .column_ld16
+ sub rcx, byte SIZEOF_MMWORD
+ movq xmmB, XMM_MMWORD [rsi+rcx]
+ pslldq xmmA, SIZEOF_MMWORD
+ por xmmA,xmmB
+.column_ld16:
+ test cl, SIZEOF_XMMWORD
+ jz short .column_ld32
+ movdqa xmmF,xmmA
+ movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+ mov rcx, SIZEOF_XMMWORD
+ jmp short .rgb_gray_cnv
+.column_ld32:
+ test cl, 2*SIZEOF_XMMWORD
+ mov rcx, SIZEOF_XMMWORD
+ jz short .rgb_gray_cnv
+ movdqa xmmB,xmmA
+ movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+ movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+ jmp short .rgb_gray_cnv
+
+.columnloop:
+ movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+ movdqu xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+ movdqu xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD]
+
+.rgb_gray_cnv:
+ ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+ ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+ ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+ movdqa xmmG,xmmA
+ pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+ psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+
+ punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+ pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+
+ punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+ punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+
+ movdqa xmmD,xmmA
+ pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+ psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+
+ punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+ pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+
+ punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+ punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+
+ movdqa xmmE,xmmA
+ pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+ psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+
+ punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+ pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+
+ punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+ punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+
+ pxor xmmH,xmmH
+
+ movdqa xmmC,xmmA
+ punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E)
+ punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+ movdqa xmmB,xmmE
+ punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E)
+ punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F)
+
+ movdqa xmmF,xmmD
+ punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F)
+ punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+ test cl, SIZEOF_XMMWORD/16
+ jz short .column_ld2
+ sub rcx, byte SIZEOF_XMMWORD/16
+ movd xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE]
+.column_ld2:
+ test cl, SIZEOF_XMMWORD/8
+ jz short .column_ld4
+ sub rcx, byte SIZEOF_XMMWORD/8
+ movq xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE]
+ pslldq xmmA, SIZEOF_MMWORD
+ por xmmA,xmmE
+.column_ld4:
+ test cl, SIZEOF_XMMWORD/4
+ jz short .column_ld8
+ sub rcx, byte SIZEOF_XMMWORD/4
+ movdqa xmmE,xmmA
+ movdqu xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE]
+.column_ld8:
+ test cl, SIZEOF_XMMWORD/2
+ mov rcx, SIZEOF_XMMWORD
+ jz short .rgb_gray_cnv
+ movdqa xmmF,xmmA
+ movdqa xmmH,xmmE
+ movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+ movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+ jmp short .rgb_gray_cnv
+
+.columnloop:
+ movdqu xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+ movdqu xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+ movdqu xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD]
+ movdqu xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD]
+
+.rgb_gray_cnv:
+ ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+ ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+ ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+ ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+ movdqa xmmD,xmmA
+ punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+ punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+
+ movdqa xmmC,xmmF
+ punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+ punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+
+ movdqa xmmB,xmmA
+ punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+ punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+
+ movdqa xmmG,xmmD
+ punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+ punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+
+ movdqa xmmE,xmmA
+ punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+ punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+
+ movdqa xmmH,xmmB
+ punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+ punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+
+ pxor xmmF,xmmF
+
+ movdqa xmmC,xmmA
+ punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E)
+ punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+ movdqa xmmD,xmmB
+ punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F)
+ punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F)
+
+ movdqa xmmG,xmmE
+ punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E)
+ punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E)
+
+ punpcklbw xmmF,xmmH
+ punpckhbw xmmH,xmmH
+ psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
+ psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+ ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+ ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+
+ ; (Original)
+ ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
+ ;
+ ; (This implementation)
+ ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+
+ movdqa xmm6,xmm1
+ punpcklwd xmm1,xmm3
+ punpckhwd xmm6,xmm3
+ pmaddwd xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+ pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+ movdqa xmm7, xmm6 ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+ movdqa xmm6,xmm0
+ punpcklwd xmm0,xmm2
+ punpckhwd xmm6,xmm2
+ pmaddwd xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+ pmaddwd xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+
+ movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
+ movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
+
+ movdqa xmm0, xmm5 ; xmm0=BO
+ movdqa xmm6, xmm4 ; xmm6=BE
+
+ movdqa xmm4,xmm0
+ punpcklwd xmm0,xmm3
+ punpckhwd xmm4,xmm3
+ pmaddwd xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+ pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+
+ movdqa xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF]
+
+ paddd xmm0, xmm1
+ paddd xmm4, xmm7
+ paddd xmm0,xmm3
+ paddd xmm4,xmm3
+ psrld xmm0,SCALEBITS ; xmm0=YOL
+ psrld xmm4,SCALEBITS ; xmm4=YOH
+ packssdw xmm0,xmm4 ; xmm0=YO
+
+ movdqa xmm4,xmm6
+ punpcklwd xmm6,xmm2
+ punpckhwd xmm4,xmm2
+ pmaddwd xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+ pmaddwd xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+
+ movdqa xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF]
+
+ paddd xmm6, XMMWORD [wk(0)]
+ paddd xmm4, XMMWORD [wk(1)]
+ paddd xmm6,xmm2
+ paddd xmm4,xmm2
+ psrld xmm6,SCALEBITS ; xmm6=YEL
+ psrld xmm4,SCALEBITS ; xmm4=YEH
+ packssdw xmm6,xmm4 ; xmm6=YE
+
+ psllw xmm0,BYTE_BIT
+ por xmm6,xmm0 ; xmm6=Y
+ movdqa XMMWORD [rdi], xmm6 ; Save Y
+
+ sub rcx, byte SIZEOF_XMMWORD
+ add rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr
+ add rdi, byte SIZEOF_XMMWORD ; outptr0
+ cmp rcx, byte SIZEOF_XMMWORD
+ jae near .columnloop
+ test rcx,rcx
+ jnz near .column_ld1
+
+ pop rcx ; col
+ pop rsi
+ pop rdi
+
+ add rsi, byte SIZEOF_JSAMPROW ; input_buf
+ add rdi, byte SIZEOF_JSAMPROW
+ dec rax ; num_rows
+ jg near .rowloop
+
+.return:
+ pop rbx
+ uncollect_args
+ mov rsp,rbp ; rsp <- aligned rbp
+ pop rsp ; rsp <- original rbp
+ pop rbp
+ ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+ align 16
diff --git a/simd/jcgryss2.asm b/simd/jcgryss2.asm
new file mode 100644
index 0000000..6eac030
--- /dev/null
+++ b/simd/jcgryss2.asm
@@ -0,0 +1,385 @@
+;
+; jcgryss2.asm - grayscale colorspace conversion (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2011, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+ SECTION SEG_TEXT
+ BITS 32
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_rgb_gray_convert_sse2 (JDIMENSION img_width,
+; JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+; JDIMENSION output_row, int num_rows);
+;
+
+%define img_width(b) (b)+8 ; JDIMENSION img_width
+%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf
+%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf
+%define output_row(b) (b)+20 ; JDIMENSION output_row
+%define num_rows(b) (b)+24 ; int num_rows
+
+%define original_ebp ebp+0
+%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM 2
+%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
+
+ align 16
+
+ global EXTN(jsimd_rgb_gray_convert_sse2)
+
+EXTN(jsimd_rgb_gray_convert_sse2):
+ push ebp
+ mov eax,esp ; eax = original ebp
+ sub esp, byte 4
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
+ mov [esp],eax
+ mov ebp,esp ; ebp = aligned ebp
+ lea esp, [wk(0)]
+ pushpic eax ; make a room for GOT address
+ push ebx
+; push ecx ; need not be preserved
+; push edx ; need not be preserved
+ push esi
+ push edi
+
+ get_GOT ebx ; get GOT address
+ movpic POINTER [gotptr], ebx ; save GOT address
+
+ mov ecx, JDIMENSION [img_width(eax)]
+ test ecx,ecx
+ jz near .return
+
+ push ecx
+
+ mov esi, JSAMPIMAGE [output_buf(eax)]
+ mov ecx, JDIMENSION [output_row(eax)]
+ mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+ lea edi, [edi+ecx*SIZEOF_JSAMPROW]
+
+ pop ecx
+
+ mov esi, JSAMPARRAY [input_buf(eax)]
+ mov eax, INT [num_rows(eax)]
+ test eax,eax
+ jle near .return
+ alignx 16,7
+.rowloop:
+ pushpic eax
+ push edi
+ push esi
+ push ecx ; col
+
+ mov esi, JSAMPROW [esi] ; inptr
+ mov edi, JSAMPROW [edi] ; outptr0
+ movpic eax, POINTER [gotptr] ; load GOT address (eax)
+
+ cmp ecx, byte SIZEOF_XMMWORD
+ jae near .columnloop
+ alignx 16,7
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+ push eax
+ push edx
+ lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
+ test cl, SIZEOF_BYTE
+ jz short .column_ld2
+ sub ecx, byte SIZEOF_BYTE
+ movzx eax, BYTE [esi+ecx]
+.column_ld2:
+ test cl, SIZEOF_WORD
+ jz short .column_ld4
+ sub ecx, byte SIZEOF_WORD
+ movzx edx, WORD [esi+ecx]
+ shl eax, WORD_BIT
+ or eax,edx
+.column_ld4:
+ movd xmmA,eax
+ pop edx
+ pop eax
+ test cl, SIZEOF_DWORD
+ jz short .column_ld8
+ sub ecx, byte SIZEOF_DWORD
+ movd xmmF, XMM_DWORD [esi+ecx]
+ pslldq xmmA, SIZEOF_DWORD
+ por xmmA,xmmF
+.column_ld8:
+ test cl, SIZEOF_MMWORD
+ jz short .column_ld16
+ sub ecx, byte SIZEOF_MMWORD
+ movq xmmB, XMM_MMWORD [esi+ecx]
+ pslldq xmmA, SIZEOF_MMWORD
+ por xmmA,xmmB
+.column_ld16:
+ test cl, SIZEOF_XMMWORD
+ jz short .column_ld32
+ movdqa xmmF,xmmA
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+ mov ecx, SIZEOF_XMMWORD
+ jmp short .rgb_gray_cnv
+.column_ld32:
+ test cl, 2*SIZEOF_XMMWORD
+ mov ecx, SIZEOF_XMMWORD
+ jz short .rgb_gray_cnv
+ movdqa xmmB,xmmA
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+ movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+ jmp short .rgb_gray_cnv
+ alignx 16,7
+
+.columnloop:
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+ movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+ movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
+
+.rgb_gray_cnv:
+ ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+ ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+ ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+ movdqa xmmG,xmmA
+ pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+ psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+
+ punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+ pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+
+ punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+ punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+
+ movdqa xmmD,xmmA
+ pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+ psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+
+ punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+ pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+
+ punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+ punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+
+ movdqa xmmE,xmmA
+ pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+ psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+
+ punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+ pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+
+ punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+ punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+
+ pxor xmmH,xmmH
+
+ movdqa xmmC,xmmA
+ punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E)
+ punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+ movdqa xmmB,xmmE
+ punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E)
+ punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F)
+
+ movdqa xmmF,xmmD
+ punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F)
+ punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+ test cl, SIZEOF_XMMWORD/16
+ jz short .column_ld2
+ sub ecx, byte SIZEOF_XMMWORD/16
+ movd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld2:
+ test cl, SIZEOF_XMMWORD/8
+ jz short .column_ld4
+ sub ecx, byte SIZEOF_XMMWORD/8
+ movq xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE]
+ pslldq xmmA, SIZEOF_MMWORD
+ por xmmA,xmmE
+.column_ld4:
+ test cl, SIZEOF_XMMWORD/4
+ jz short .column_ld8
+ sub ecx, byte SIZEOF_XMMWORD/4
+ movdqa xmmE,xmmA
+ movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld8:
+ test cl, SIZEOF_XMMWORD/2
+ mov ecx, SIZEOF_XMMWORD
+ jz short .rgb_gray_cnv
+ movdqa xmmF,xmmA
+ movdqa xmmH,xmmE
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+ movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+ jmp short .rgb_gray_cnv
+ alignx 16,7
+
+.columnloop:
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+ movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+ movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
+ movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
+
+.rgb_gray_cnv:
+ ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+ ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+ ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+ ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+ movdqa xmmD,xmmA
+ punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+ punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+
+ movdqa xmmC,xmmF
+ punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+ punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+
+ movdqa xmmB,xmmA
+ punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+ punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+
+ movdqa xmmG,xmmD
+ punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+ punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+
+ movdqa xmmE,xmmA
+ punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+ punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+
+ movdqa xmmH,xmmB
+ punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+ punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+
+ pxor xmmF,xmmF
+
+ movdqa xmmC,xmmA
+ punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E)
+ punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+ movdqa xmmD,xmmB
+ punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F)
+ punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F)
+
+ movdqa xmmG,xmmE
+ punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E)
+ punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E)
+
+ punpcklbw xmmF,xmmH
+ punpckhbw xmmH,xmmH
+ psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
+ psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+ ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+ ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+
+ ; (Original)
+ ; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
+ ;
+ ; (This implementation)
+ ; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+
+ movdqa xmm6,xmm1
+ punpcklwd xmm1,xmm3
+ punpckhwd xmm6,xmm3
+ pmaddwd xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+ pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+ movdqa xmm7, xmm6 ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+ movdqa xmm6,xmm0
+ punpcklwd xmm0,xmm2
+ punpckhwd xmm6,xmm2
+ pmaddwd xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+ pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+
+ movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
+ movdqa XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
+
+ movdqa xmm0, xmm5 ; xmm0=BO
+ movdqa xmm6, xmm4 ; xmm6=BE
+
+ movdqa xmm4,xmm0
+ punpcklwd xmm0,xmm3
+ punpckhwd xmm4,xmm3
+ pmaddwd xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+ pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+
+ movdqa xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF]
+
+ paddd xmm0, xmm1
+ paddd xmm4, xmm7
+ paddd xmm0,xmm3
+ paddd xmm4,xmm3
+ psrld xmm0,SCALEBITS ; xmm0=YOL
+ psrld xmm4,SCALEBITS ; xmm4=YOH
+ packssdw xmm0,xmm4 ; xmm0=YO
+
+ movdqa xmm4,xmm6
+ punpcklwd xmm6,xmm2
+ punpckhwd xmm4,xmm2
+ pmaddwd xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+ pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+
+ movdqa xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF]
+
+ paddd xmm6, XMMWORD [wk(0)]
+ paddd xmm4, XMMWORD [wk(1)]
+ paddd xmm6,xmm2
+ paddd xmm4,xmm2
+ psrld xmm6,SCALEBITS ; xmm6=YEL
+ psrld xmm4,SCALEBITS ; xmm4=YEH
+ packssdw xmm6,xmm4 ; xmm6=YE
+
+ psllw xmm0,BYTE_BIT
+ por xmm6,xmm0 ; xmm6=Y
+ movdqa XMMWORD [edi], xmm6 ; Save Y
+
+ sub ecx, byte SIZEOF_XMMWORD
+ add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr
+ add edi, byte SIZEOF_XMMWORD ; outptr0
+ cmp ecx, byte SIZEOF_XMMWORD
+ jae near .columnloop
+ test ecx,ecx
+ jnz near .column_ld1
+
+ pop ecx ; col
+ pop esi
+ pop edi
+ poppic eax
+
+ add esi, byte SIZEOF_JSAMPROW ; input_buf
+ add edi, byte SIZEOF_JSAMPROW
+ dec eax ; num_rows
+ jg near .rowloop
+
+.return:
+ pop edi
+ pop esi
+; pop edx ; need not be preserved
+; pop ecx ; need not be preserved
+ pop ebx
+ mov esp,ebp ; esp <- aligned ebp
+ pop esp ; esp <- original ebp
+ pop ebp
+ ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+ align 16
diff --git a/simd/jsimd.h b/simd/jsimd.h
index 89ac1b7..7bfdd17 100644
--- a/simd/jsimd.h
+++ b/simd/jsimd.h
@@ -2,6 +2,7 @@
* simd/jsimd.h
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2011 D. R. Commander
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -43,6 +44,14 @@
#define jsimd_extbgrx_ycc_convert_sse2 jSEXTBGRXYCCS2
#define jsimd_extxbgr_ycc_convert_sse2 jSEXTXBGRYCCS2
#define jsimd_extxrgb_ycc_convert_sse2 jSEXTXRGBYCCS2
+#define jconst_rgb_gray_convert_sse2 jSCRGBGRYS2
+#define jsimd_rgb_gray_convert_sse2 jSRGBGRYS2
+#define jsimd_extrgb_gray_convert_sse2 jSEXTRGBGRYS2
+#define jsimd_extrgbx_gray_convert_sse2 jSEXTRGBXGRYS2
+#define jsimd_extbgr_gray_convert_sse2 jSEXTBGRGRYS2
+#define jsimd_extbgrx_gray_convert_sse2 jSEXTBGRXGRYS2
+#define jsimd_extxbgr_gray_convert_sse2 jSEXTXBGRGRYS2
+#define jsimd_extxrgb_gray_convert_sse2 jSEXTXRGBGRYS2
#define jconst_ycc_rgb_convert_sse2 jSCYCCRGBS2
#define jsimd_ycc_rgb_convert_sse2 jSYCCRGBS2
#define jsimd_ycc_extrgb_convert_sse2 jSYCCEXTRGBS2
@@ -163,6 +172,35 @@
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_rgb_gray_convert_mmx
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgb_gray_convert_mmx
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgbx_gray_convert_mmx
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgr_gray_convert_mmx
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgrx_gray_convert_mmx
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxbgr_gray_convert_mmx
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxrgb_gray_convert_mmx
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+
EXTERN(void) jsimd_ycc_rgb_convert_mmx
JPP((JDIMENSION out_width,
JSAMPIMAGE input_buf, JDIMENSION input_row,
@@ -222,6 +260,36 @@
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
JDIMENSION output_row, int num_rows));
+extern const int jconst_rgb_gray_convert_sse2[];
+EXTERN(void) jsimd_rgb_gray_convert_sse2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgb_gray_convert_sse2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgbx_gray_convert_sse2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgr_gray_convert_sse2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgrx_gray_convert_sse2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxbgr_gray_convert_sse2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxrgb_gray_convert_sse2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+
extern const int jconst_ycc_rgb_convert_sse2[];
EXTERN(void) jsimd_ycc_rgb_convert_sse2
JPP((JDIMENSION out_width,
diff --git a/simd/jsimd_i386.c b/simd/jsimd_i386.c
index d9bb774..f77c5ef 100644
--- a/simd/jsimd_i386.c
+++ b/simd/jsimd_i386.c
@@ -2,7 +2,7 @@
* jsimd_i386.c
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright 2009 D. R. Commander
+ * Copyright 2009-2011 D. R. Commander
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -84,6 +84,28 @@
}
GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+ return 0;
+
+ if ((simd_support & JSIMD_SSE2) &&
+ IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
+ return 1;
+ if (simd_support & JSIMD_MMX)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
jsimd_can_ycc_rgb (void)
{
init_simd();
@@ -155,6 +177,55 @@
}
GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows)
+{
+ void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+ void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+
+ switch(cinfo->in_color_space)
+ {
+ case JCS_EXT_RGB:
+ sse2fct=jsimd_extrgb_gray_convert_sse2;
+ mmxfct=jsimd_extrgb_gray_convert_mmx;
+ break;
+ case JCS_EXT_RGBX:
+ sse2fct=jsimd_extrgbx_gray_convert_sse2;
+ mmxfct=jsimd_extrgbx_gray_convert_mmx;
+ break;
+ case JCS_EXT_BGR:
+ sse2fct=jsimd_extbgr_gray_convert_sse2;
+ mmxfct=jsimd_extbgr_gray_convert_mmx;
+ break;
+ case JCS_EXT_BGRX:
+ sse2fct=jsimd_extbgrx_gray_convert_sse2;
+ mmxfct=jsimd_extbgrx_gray_convert_mmx;
+ break;
+ case JCS_EXT_XBGR:
+ sse2fct=jsimd_extxbgr_gray_convert_sse2;
+ mmxfct=jsimd_extxbgr_gray_convert_mmx;
+ break;
+ case JCS_EXT_XRGB:
+ sse2fct=jsimd_extxrgb_gray_convert_sse2;
+ mmxfct=jsimd_extxrgb_gray_convert_mmx;
+ break;
+ default:
+ sse2fct=jsimd_rgb_gray_convert_sse2;
+ mmxfct=jsimd_rgb_gray_convert_mmx;
+ break;
+ }
+
+ if ((simd_support & JSIMD_SSE2) &&
+ IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
+ sse2fct(cinfo->image_width, input_buf,
+ output_buf, output_row, num_rows);
+ else if (simd_support & JSIMD_MMX)
+ mmxfct(cinfo->image_width, input_buf,
+ output_buf, output_row, num_rows);
+}
+
+GLOBAL(void)
jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows)
diff --git a/simd/jsimd_x86_64.c b/simd/jsimd_x86_64.c
index 7659249..2951268 100644
--- a/simd/jsimd_x86_64.c
+++ b/simd/jsimd_x86_64.c
@@ -2,7 +2,7 @@
* jsimd_x86_64.c
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright 2009 D. R. Commander
+ * Copyright 2009-2011 D. R. Commander
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -47,6 +47,23 @@
}
GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+ return 0;
+
+ if (!IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
+ return 0;
+
+ return 1;
+}
+
+GLOBAL(int)
jsimd_can_ycc_rgb (void)
{
/* The code is optimised for these values only */
@@ -99,6 +116,41 @@
}
GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows)
+{
+ void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+
+ switch(cinfo->in_color_space)
+ {
+ case JCS_EXT_RGB:
+ sse2fct=jsimd_extrgb_gray_convert_sse2;
+ break;
+ case JCS_EXT_RGBX:
+ sse2fct=jsimd_extrgbx_gray_convert_sse2;
+ break;
+ case JCS_EXT_BGR:
+ sse2fct=jsimd_extbgr_gray_convert_sse2;
+ break;
+ case JCS_EXT_BGRX:
+ sse2fct=jsimd_extbgrx_gray_convert_sse2;
+ break;
+ case JCS_EXT_XBGR:
+ sse2fct=jsimd_extxbgr_gray_convert_sse2;
+ break;
+ case JCS_EXT_XRGB:
+ sse2fct=jsimd_extxrgb_gray_convert_sse2;
+ break;
+ default:
+ sse2fct=jsimd_rgb_gray_convert_sse2;
+ break;
+ }
+
+ sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
+}
+
+GLOBAL(void)
jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows)
diff --git a/testimggray.jpg b/testimggray.jpg
new file mode 100644
index 0000000..95505a2
--- /dev/null
+++ b/testimggray.jpg
Binary files differ
diff --git a/turbojpeg-jni.c b/turbojpeg-jni.c
new file mode 100644
index 0000000..1e0a353
--- /dev/null
+++ b/turbojpeg-jni.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C)2011 D. R. Commander. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "turbojpeg.h"
+#include <jni.h>
+#include "java/org_libjpegturbo_turbojpeg_TJCompressor.h"
+#include "java/org_libjpegturbo_turbojpeg_TJDecompressor.h"
+#include "java/org_libjpegturbo_turbojpeg_TJ.h"
+
+#define _throw(msg) { \
+ jclass _exccls=(*env)->FindClass(env, "java/lang/Exception"); \
+ if(!_exccls) goto bailout; \
+ (*env)->ThrowNew(env, _exccls, msg); \
+ goto bailout; \
+}
+
+#define bailif0(f) {if(!(f)) goto bailout;}
+
+#define gethandle() { \
+ jclass _cls=(*env)->GetObjectClass(env, obj); \
+ jfieldID _fid; \
+ if(!_cls) goto bailout; \
+ bailif0(_fid=(*env)->GetFieldID(env, _cls, "handle", "J")); \
+ handle=(tjhandle)(long)(*env)->GetLongField(env, obj, _fid); \
+}
+
+JNIEXPORT jlong JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSize
+ (JNIEnv *env, jclass cls, jint width, jint height)
+{
+ jlong retval=TJBUFSIZE(width, height);
+ if(retval==-1) _throw(tjGetErrorStr());
+
+ bailout:
+ return retval;
+}
+
+JNIEXPORT jlong JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV
+ (JNIEnv *env, jclass cls, jint width, jint height, jint subsamp)
+{
+ jlong retval=TJBUFSIZEYUV(width, height, subsamp);
+ if(retval==-1) _throw(tjGetErrorStr());
+
+ bailout:
+ return retval;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_init
+ (JNIEnv *env, jobject obj)
+{
+ jclass cls;
+ jfieldID fid;
+ tjhandle handle;
+
+ if((handle=tjInitCompress())==NULL)
+ _throw(tjGetErrorStr());
+
+ bailif0(cls=(*env)->GetObjectClass(env, obj));
+ bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J"));
+ (*env)->SetLongField(env, obj, fid, (long)handle);
+
+ bailout:
+ return;
+}
+
+JNIEXPORT jlong JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress
+ (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch,
+ jint height, jint pixelsize, jbyteArray dst, jint jpegsubsamp,
+ jint jpegqual, jint flags)
+{
+ tjhandle handle=0;
+ unsigned long size=0;
+ unsigned char *srcbuf=NULL, *dstbuf=NULL;
+
+ gethandle();
+
+ bailif0(srcbuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+ bailif0(dstbuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+ if(tjCompress(handle, srcbuf, width, pitch, height, pixelsize, dstbuf,
+ &size, jpegsubsamp, jpegqual, flags)==-1)
+ {
+ (*env)->ReleasePrimitiveArrayCritical(env, dst, dstbuf, 0);
+ (*env)->ReleasePrimitiveArrayCritical(env, src, srcbuf, 0);
+ _throw(tjGetErrorStr());
+ }
+
+ bailout:
+ if(dstbuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstbuf, 0);
+ if(srcbuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcbuf, 0);
+ return size;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy
+ (JNIEnv *env, jobject obj)
+{
+ tjhandle handle=0;
+
+ gethandle();
+
+ if(tjDestroy(handle)==-1) _throw(tjGetErrorStr());
+
+ bailout:
+ return;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_init
+ (JNIEnv *env, jobject obj)
+{
+ jclass cls;
+ jfieldID fid;
+ tjhandle handle;
+
+ if((handle=tjInitDecompress())==NULL) _throw(tjGetErrorStr());
+
+ bailif0(cls=(*env)->GetObjectClass(env, obj));
+ bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J"));
+ (*env)->SetLongField(env, obj, fid, (long)handle);
+
+ bailout:
+ return;
+}
+
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_getScaledWidth
+ (JNIEnv *env, jobject obj, jint input_width, jint input_height,
+ jint output_width, jint output_height)
+{
+ if(tjScaledSize(input_width, input_height, &output_width, &output_height)
+ ==-1)
+ _throw(tjGetErrorStr());
+
+ bailout:
+ return output_width;
+}
+
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_getScaledHeight
+ (JNIEnv *env, jobject obj, jint input_width, jint input_height,
+ jint output_width, jint output_height)
+{
+ if(tjScaledSize(input_width, input_height, &output_width, &output_height)
+ ==-1)
+ _throw(tjGetErrorStr());
+
+ bailout:
+ return output_height;
+}
+
+JNIEXPORT jobject JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader
+ (JNIEnv *env, jobject obj, jbyteArray src, jlong size)
+{
+ jclass jhicls=NULL;
+ jfieldID fid;
+ tjhandle handle=0;
+ unsigned char *srcbuf=NULL;
+ int width=0, height=0, jpegsubsamp=-1;
+ jobject jhiobj=NULL;
+
+ gethandle();
+
+ bailif0(srcbuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+
+ if(tjDecompressHeader2(handle, srcbuf, (unsigned long)size,
+ &width, &height, &jpegsubsamp)==-1)
+ {
+ (*env)->ReleasePrimitiveArrayCritical(env, src, srcbuf, 0);
+ _throw(tjGetErrorStr());
+ }
+ (*env)->ReleasePrimitiveArrayCritical(env, src, srcbuf, 0); srcbuf=NULL;
+
+ bailif0(jhicls=(*env)->FindClass(env, "org/libjpegturbo/turbojpeg/TJHeaderInfo"));
+ bailif0(jhiobj=(*env)->AllocObject(env, jhicls));
+
+ bailif0(fid=(*env)->GetFieldID(env, jhicls, "subsamp", "I"));
+ (*env)->SetIntField(env, jhiobj, fid, jpegsubsamp);
+ bailif0(fid=(*env)->GetFieldID(env, jhicls, "width", "I"));
+ (*env)->SetIntField(env, jhiobj, fid, width);
+ bailif0(fid=(*env)->GetFieldID(env, jhicls, "height", "I"));
+ (*env)->SetIntField(env, jhiobj, fid, height);
+
+ bailout:
+ return jhiobj;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress
+ (JNIEnv *env, jobject obj, jbyteArray src, jlong size, jbyteArray dst,
+ jint width, jint pitch, jint height, jint pixelsize, jint flags)
+{
+ tjhandle handle=0;
+ unsigned char *srcbuf=NULL, *dstbuf=NULL;
+
+ gethandle();
+
+ bailif0(srcbuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+ bailif0(dstbuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+ if(tjDecompress(handle, srcbuf, (unsigned long)size, dstbuf, width, pitch,
+ height, pixelsize, flags)==-1)
+ {
+ (*env)->ReleasePrimitiveArrayCritical(env, dst, dstbuf, 0);
+ (*env)->ReleasePrimitiveArrayCritical(env, src, srcbuf, 0);
+ _throw(tjGetErrorStr());
+ }
+
+ bailout:
+ if(dstbuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstbuf, 0);
+ if(srcbuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcbuf, 0);
+ return;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy
+ (JNIEnv *env, jobject obj)
+{
+ Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy(env, obj);
+}
diff --git a/turbojpeg-mapfile b/turbojpeg-mapfile
index e54d59c..7fc4a2d 100755
--- a/turbojpeg-mapfile
+++ b/turbojpeg-mapfile
@@ -3,9 +3,11 @@
tjInitCompress;
tjCompress;
TJBUFSIZE;
+ TJBUFSIZEYUV;
tjInitDecompress;
tjDecompressHeader;
tjDecompressHeader2;
+ tjScaledSize;
tjDecompress;
tjDestroy;
tjGetErrorStr;
diff --git a/turbojpeg-mapfile.jni b/turbojpeg-mapfile.jni
new file mode 100755
index 0000000..bf2ce4b
--- /dev/null
+++ b/turbojpeg-mapfile.jni
@@ -0,0 +1,27 @@
+{
+ global:
+ tjInitCompress;
+ tjCompress;
+ TJBUFSIZE;
+ TJBUFSIZEYUV;
+ tjInitDecompress;
+ tjDecompressHeader;
+ tjDecompressHeader2;
+ tjScaledSize;
+ tjDecompress;
+ tjDestroy;
+ tjGetErrorStr;
+ Java_org_libjpegturbo_turbojpeg_TJ_bufSize;
+ Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV;
+ Java_org_libjpegturbo_turbojpeg_TJCompressor_init;
+ Java_org_libjpegturbo_turbojpeg_TJCompressor_compress;
+ Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy;
+ Java_org_libjpegturbo_turbojpeg_TJDecompressor_init;
+ Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader;
+ Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress;
+ Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy;
+ Java_org_libjpegturbo_turbojpeg_TJDecompressor_getScaledHeight;
+ Java_org_libjpegturbo_turbojpeg_TJDecompressor_getScaledWidth;
+ local:
+ *;
+};
diff --git a/turbojpeg.h b/turbojpeg.h
index 47e5aa6..5fa91f5 100644
--- a/turbojpeg.h
+++ b/turbojpeg.h
@@ -153,8 +153,31 @@
unsigned char *dstbuf, unsigned long *size,
int jpegsubsamp, int jpegqual, int flags);
+
+/*
+ unsigned long TJBUFSIZE(int width, int height)
+
+ Convenience function which returns the maximum size of the buffer required to
+ hold a JPEG image with the given width and height
+
+ RETURNS: -1 if arguments are out of bounds
+*/
DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height);
+
+/*
+ unsigned long TJBUFSIZEYUV(int width, int height, int subsamp)
+
+ Convenience function which returns the size of the buffer required to
+ hold a YUV planar image with the given width, height, and level of
+ chrominance subsampling
+
+ RETURNS: -1 if arguments are out of bounds
+*/
+DLLEXPORT unsigned long DLLCALL TJBUFSIZEYUV(int width, int height,
+ int subsamp);
+
+
/*
tjhandle tjInitDecompress(void)
@@ -176,8 +199,7 @@
[INPUT] j = instance handle previously returned from a call to
tjInitDecompress()
- [INPUT] srcbuf = pointer to a user-allocated buffer containing the JPEG image
- to decompress
+ [INPUT] srcbuf = pointer to a user-allocated buffer containing a JPEG image
[INPUT] size = size of the JPEG image buffer (in bytes)
[OUTPUT] width = width (in pixels) of the JPEG image
[OUTPUT] height = height (in pixels) of the JPEG image
@@ -199,6 +221,26 @@
/*
+ int tjScaledSize(int input_width, int input_height,
+ int *output_width, int *output_height)
+
+ [INPUT] input_width = width (in pixels) of the JPEG image
+ [INPUT] input_height = height (in pixels) of the JPEG image
+ [INPUT/OUTPUT] output_width, output_height = Before calling this function,
+ *output_width and *output_height should be set to the desired dimensions
+ of the output image. Upon returning from this function, they will be set
+ to the dimensions of the largest scaled down image that TurboJPEG can
+ produce without exceeding the desired dimensions. If either *output_width
+ or *output_height is set to 0, then the corresponding dimension will not
+ be considered when determining the scaled image size.
+
+ RETURNS: 0 on success, -1 if arguments are out of bounds
+*/
+DLLEXPORT int DLLCALL tjScaledSize(int input_width, int input_height,
+ int *output_width, int *output_height);
+
+
+/*
int tjDecompress(tjhandle j,
unsigned char *srcbuf, unsigned long size,
unsigned char *dstbuf, int width, int pitch, int height, int pixelsize,
@@ -210,22 +252,38 @@
to decompress
[INPUT] size = size of the JPEG image buffer (in bytes)
[INPUT] dstbuf = pointer to user-allocated image buffer which will receive
- the bitmap image. This buffer should normally be pitch*height
- bytes in size, although this pointer may also be used to decompress into
- a specific region of a larger buffer.
- [INPUT] width = width (in pixels) of the destination image
- [INPUT] pitch = bytes per line of the destination image (width*pixelsize if
- the bitmap is unpadded, else TJPAD(width*pixelsize) if each line of the
- bitmap is padded to the nearest 32-bit boundary, such as is the case for
- Windows bitmaps. You can also be clever and use this parameter to skip
- lines, etc. Setting this parameter to 0 is the equivalent of setting it
- to width*pixelsize.
- [INPUT] height = height (in pixels) of the destination image
+ the bitmap image. This buffer should normally be pitch*scaled_height
+ bytes in size, where scaled_height is determined by calling
+ tjScaledSize() with the height of the desired output image. This pointer
+ may also be used to decompress into a specific region of a
+ larger buffer.
+ [INPUT] width = desired width (in pixels) of the destination image. If this
+ is smaller than the width of the JPEG image being decompressed, then
+ TurboJPEG will use scaling in the JPEG decompressor to generate the
+ largest possible image that will fit within the desired width. If width
+ is set to 0, then only the height will be considered when determining the
+ scaled image size.
+ [INPUT] pitch = bytes per line of the destination image. Normally, this is
+ scaled_width*pixelsize if the bitmap image is unpadded, else
+ TJPAD(scaled_width*pixelsize) if each line of the bitmap is padded to the
+ nearest 32-bit boundary, such as is the case for Windows bitmaps.
+ (NOTE: scaled_width can be determined by calling tjScaledSize().) You can
+ also be clever and use this parameter to skip lines, etc. Setting this
+ parameter to 0 is the equivalent of setting it to scaled_width*pixelsize.
+ [INPUT] height = desired height (in pixels) of the destination image. If
+ this is smaller than the height of the JPEG image being decompressed, then
+ TurboJPEG will use scaling in the JPEG decompressor to generate the
+ largest possible image that will fit within the desired height. If
+ height is set to 0, then only the width will be considered when
+ determining the scaled image size.
[INPUT] pixelsize = size (in bytes) of each pixel in the destination image
RGBX/BGRX/XRGB/XBGR: 4, RGB/BGR: 3, Grayscale: 1
[INPUT] flags = the bitwise OR of one or more of the flags described in the
"Flags" section above.
+ NOTE: The width, pitch, height, and pixelsize parameters are ignored if
+ decompressing to a YUV planar image.
+
RETURNS: 0 on success, -1 on error
*/
DLLEXPORT int DLLCALL tjDecompress(tjhandle j,
diff --git a/turbojpegl.c b/turbojpegl.c
index c1c62f6..93f4b6a 100644
--- a/turbojpegl.c
+++ b/turbojpegl.c
@@ -114,8 +114,33 @@
DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height)
{
- // This allows enough room in case the image doesn't compress
- return ((width+15)&(~15)) * ((height+15)&(~15)) * 6 + 2048;
+ unsigned long retval=0;
+ if(width<1 || height<1)
+ _throw("Invalid argument in TJBUFSIZE()");
+
+ // This allows for rare corner cases in which a JPEG image can actually be
+ // larger than the uncompressed input (we wouldn't mention it if it hadn't
+ // happened before.)
+ retval=((width+15)&(~15)) * ((height+15)&(~15)) * 6 + 2048;
+
+ bailout:
+ return retval;
+}
+
+DLLEXPORT unsigned long DLLCALL TJBUFSIZEYUV(int width, int height,
+ int subsamp)
+{
+ unsigned long retval=0;
+ int pw, ph, cw, ch;
+ if(width<1 || height<1 || subsamp<0 || subsamp>=NUMSUBOPT)
+ _throw("Invalid argument in TJBUFSIZEYUV()");
+ pw=PAD(width, hsampfactor[subsamp]);
+ ph=PAD(height, vsampfactor[subsamp]);
+ cw=pw/hsampfactor[subsamp]; ch=ph/vsampfactor[subsamp];
+ retval=PAD(pw, 4)*ph + (subsamp==TJ_GRAYSCALE? 0:PAD(cw, 4)*ch*2);
+
+ bailout:
+ return retval;
}
DLLEXPORT int DLLCALL tjCompress(tjhandle h,
@@ -294,6 +319,7 @@
-(unsigned long)(j->jdms.free_in_buffer);
bailout:
+ if(j->cinfo.global_state>CSTATE_START) jpeg_abort_compress(&j->cinfo);
if(row_pointer) free(row_pointer);
for(i=0; i<MAX_COMPONENTS; i++)
{
@@ -418,6 +444,34 @@
}
+DLLEXPORT int DLLCALL tjScaledSize(int input_width, int input_height,
+ int *output_width, int *output_height)
+{
+ int i, retval=0, scaledw=0, scaledh=0;
+
+ if(input_width<1 || input_height<1 || output_width==NULL
+ || output_height==NULL || *output_width<0 || *output_height<0)
+ _throw("Invalid argument in tjScaledSize()");
+
+ if(*output_width==0) *output_width=input_width;
+ if(*output_height==0) *output_height=input_height;
+ if(*output_width<input_width || *output_height<input_height)
+ {
+ for(i=1; i<=8; i*=2)
+ {
+ scaledw=(input_width+i-1)/i;
+ scaledh=(input_height+i-1)/i;
+ if(scaledw<=*output_width && scaledh<=*output_height)
+ break;
+ }
+ *output_width=scaledw; *output_height=scaledh;
+ }
+
+ bailout:
+ return retval;
+}
+
+
DLLEXPORT int DLLCALL tjDecompress(tjhandle h,
unsigned char *srcbuf, unsigned long size,
unsigned char *dstbuf, int width, int pitch, int height, int ps,
@@ -427,6 +481,7 @@
int cw[MAX_COMPONENTS], ch[MAX_COMPONENTS], iw[MAX_COMPONENTS],
tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS];
JSAMPLE *_tmpbuf=NULL; JSAMPROW *tmpbuf[MAX_COMPONENTS];
+ int scale_num=1, scale_denom=1, jpegwidth, jpegheight, scaledw, scaledh;
checkhandle(h);
@@ -436,14 +491,12 @@
}
if(srcbuf==NULL || size<=0
- || dstbuf==NULL || width<=0 || pitch<0 || height<=0)
+ || dstbuf==NULL || width<0 || pitch<0 || height<0)
_throw("Invalid argument in tjDecompress()");
if(ps!=3 && ps!=4 && ps!=1)
_throw("This decompressor can only handle 24-bit and 32-bit RGB or 8-bit grayscale output");
if(!j->initd) _throw("Instance has not been initialized for decompression");
- if(pitch==0) pitch=width*ps;
-
if(flags&TJ_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
else if(flags&TJ_FORCESSE) putenv("JSIMD_FORCESSE=1");
else if(flags&TJ_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
@@ -459,6 +512,24 @@
jpeg_read_header(&j->dinfo, TRUE);
+ jpegwidth=j->dinfo.image_width; jpegheight=j->dinfo.image_height;
+ if(width==0) width=jpegwidth;
+ if(height==0) height=jpegheight;
+ if(width<jpegwidth || height<jpegheight)
+ {
+ for(i=1; i<=8; i*=2)
+ {
+ scaledw=(jpegwidth+i-1)/i;
+ scaledh=(jpegheight+i-1)/i;
+ if(scaledw<=width && scaledh<=height)
+ break;
+ }
+ if(scaledw>width || scaledh>height)
+ _throw("Could not scale down to desired image dimensions");
+ width=scaledw; height=scaledh;
+ scale_denom=i;
+ }
+
if(flags&TJ_YUV)
{
j_decompress_ptr dinfo=&j->dinfo;
@@ -470,10 +541,10 @@
int ih;
iw[i]=compptr->width_in_blocks*DCTSIZE;
ih=compptr->height_in_blocks*DCTSIZE;
- cw[i]=PAD(width, dinfo->max_h_samp_factor)*compptr->h_samp_factor
- /dinfo->max_h_samp_factor;
- ch[i]=PAD(height, dinfo->max_v_samp_factor)*compptr->v_samp_factor
- /dinfo->max_v_samp_factor;
+ cw[i]=PAD(dinfo->image_width, dinfo->max_h_samp_factor)
+ *compptr->h_samp_factor/dinfo->max_h_samp_factor;
+ ch[i]=PAD(dinfo->image_height, dinfo->max_v_samp_factor)
+ *compptr->v_samp_factor/dinfo->max_v_samp_factor;
if(iw[i]!=cw[i] || ih!=ch[i]) usetmpbuf=1;
th[i]=compptr->v_samp_factor*DCTSIZE;
tmpbufsize+=iw[i]*th[i];
@@ -503,16 +574,6 @@
}
}
}
- else
- {
- if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*height))==NULL)
- _throw("Memory allocation failed in tjInitDecompress()");
- for(i=0; i<height; i++)
- {
- if(flags&TJ_BOTTOMUP) row_pointer[i]= &dstbuf[(height-i-1)*pitch];
- else row_pointer[i]= &dstbuf[i*pitch];
- }
- }
if(ps==1) j->dinfo.out_color_space = JCS_GRAYSCALE;
#if JCS_EXTENSIONS==1
@@ -533,6 +594,11 @@
if(flags&TJ_FASTUPSAMPLE) j->dinfo.do_fancy_upsampling=FALSE;
if(flags&TJ_YUV) j->dinfo.raw_data_out=TRUE;
+ else
+ {
+ j->dinfo.scale_num=scale_num;
+ j->dinfo.scale_denom=scale_denom;
+ }
jpeg_start_decompress(&j->dinfo);
if(flags&TJ_YUV)
@@ -567,6 +633,16 @@
}
else
{
+ if(pitch==0) pitch=j->dinfo.output_width*ps;
+ if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)
+ *j->dinfo.output_height))==NULL)
+ _throw("Memory allocation failed in tjInitDecompress()");
+ for(i=0; i<j->dinfo.output_height; i++)
+ {
+ if(flags&TJ_BOTTOMUP)
+ row_pointer[i]= &dstbuf[(j->dinfo.output_height-i-1)*pitch];
+ else row_pointer[i]= &dstbuf[i*pitch];
+ }
while(j->dinfo.output_scanline<j->dinfo.output_height)
{
jpeg_read_scanlines(&j->dinfo, &row_pointer[j->dinfo.output_scanline],
@@ -576,6 +652,7 @@
jpeg_finish_decompress(&j->dinfo);
bailout:
+ if(j->dinfo.global_state>DSTATE_START) jpeg_abort_decompress(&j->dinfo);
for(i=0; i<MAX_COMPONENTS; i++)
{
if(tmpbuf[i]) free(tmpbuf[i]);