12-bit JPEG support


git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1337 632fc199-4ca6-4c93-a231-07263d6284db
diff --git a/BUILDING.txt b/BUILDING.txt
new file mode 100644
index 0000000..13835a1
--- /dev/null
+++ b/BUILDING.txt
@@ -0,0 +1,842 @@
+*******************************************************************************
+**     Building on Un*x Platforms (including Cygwin and OS X)
+*******************************************************************************
+
+
+==================
+Build Requirements
+==================
+
+-- autoconf 2.56 or later
+-- automake 1.7 or later
+-- libtool 1.4 or later
+   * If using Xcode 4.3 or later on OS X, autoconf and automake are no longer
+     provided.  The easiest way to obtain them is from MacPorts
+     (http://www.macports.org/).
+
+-- NASM (if building x86 or x86-64 SIMD extensions)
+   * 0.98, or 2.01 or later is required for a 32-bit build
+   * NASM 2.00 or later is required for a 64-bit build
+   * NASM 2.07 or later is required for a 64-bit build on OS X.  This can be
+     obtained from MacPorts (http://www.macports.org/).
+
+   The binary RPMs released by the NASM project do not work on older Linux
+   systems, such as Red Hat Enterprise Linux 4.  On such systems, you can
+   easily build and install NASM from a source RPM by downloading one of the
+   SRPMs from
+
+   http://www.nasm.us/pub/nasm/releasebuilds
+
+   and executing the following as root:
+
+     ARCH=`uname -m`
+     rpmbuild --rebuild nasm-{version}.src.rpm
+     rpm -Uvh /usr/src/redhat/RPMS/$ARCH/nasm-{version}.$ARCH.rpm
+
+   NOTE: the NASM build will fail if texinfo is not installed.
+
+-- GCC v4.1 or later recommended for best performance
+   * Beginning with Xcode 4, Apple stopped distributing GCC and switched to
+     the LLVM compiler.  Xcode v4.0 through v4.6 provides a GCC front end
+     called LLVM-GCC.  Unfortunately, as of this writing, neither LLVM-GCC nor
+     the LLVM (clang) compiler produces optimal performance with libjpeg-turbo.
+     Building libjpeg-turbo with LLVM-GCC v4.2 results in a 10% performance
+     degradation when compressing using 64-bit code, relative to building
+     libjpeg-turbo with GCC v4.2.  Building libjpeg-turbo with LLVM (clang)
+     results in a 20% performance degradation when compressing using 64-bit
+     code, relative to building libjpeg-turbo with GCC v4.2.  If you are
+     running Snow Leopard or earlier, it is suggested that you continue to use
+     Xcode v3.2.6, which provides GCC v4.2.  If you are using Lion or later, it
+     is suggested that you install Apple GCC v4.2 through MacPorts.
+
+-- If building the TurboJPEG Java wrapper, JDK or OpenJDK 1.5 or later is
+   required.  Some systems, such as Solaris 10 and later and Red Hat Enterprise
+   Linux 5 and later, have this pre-installed.  On OS X 10.5 and later, it will
+   be necessary to install the Java Developer Package, which can be downloaded
+   from http://developer.apple.com/downloads (Apple ID required.)  For systems
+   that do not have a JDK installed, you can obtain the Oracle Java Development
+   Kit from http://www.java.com.
+
+
+==================
+Out-of-Tree Builds
+==================
+
+Binary objects, libraries, and executables are generated in the same directory
+from which configure was executed (the "binary directory"), and this directory
+need not necessarily be the same as the libjpeg-turbo source directory.  You
+can create multiple independent binary directories, in which different versions
+of libjpeg-turbo can be built from the same source tree using different
+compilers or settings.  In the sections below, {build_directory} refers to the
+binary directory, whereas {source_directory} refers to the libjpeg-turbo source
+directory.  For in-tree builds, these directories are the same.
+
+
+======================
+Building libjpeg-turbo
+======================
+
+The following procedure will build libjpeg-turbo on Linux, FreeBSD, Cygwin, and
+Solaris/x86 systems (on Solaris, this generates a 32-bit library.  See below
+for 64-bit build instructions.)
+
+  cd {source_directory}
+  autoreconf -fiv
+  cd {build_directory}
+  sh {source_directory}/configure [additional configure flags]
+  make
+
+NOTE: Running autoreconf in the source directory is usually only necessary if
+building libjpeg-turbo from the SVN repository.
+
+This will generate the following files under .libs/
+
+  libjpeg.a
+      Static link library for the libjpeg API
+
+  libjpeg.so.{version} (Linux, Unix)
+  libjpeg.{version}.dylib (OS X)
+  cygjpeg-{version}.dll (Cygwin)
+      Shared library for the libjpeg API
+
+  By default, {version} is 62.1.0, 7.1.0, or 8.0.2, depending on whether
+  libjpeg v6b (default), v7, or v8 emulation is enabled.  If using Cygwin,
+  {version} is 62, 7, or 8.
+
+  libjpeg.so (Linux, Unix)
+  libjpeg.dylib (OS X)
+      Development symlink for the libjpeg API
+
+  libjpeg.dll.a (Cygwin)
+      Import library for the libjpeg API
+
+  libturbojpeg.a
+      Static link library for the TurboJPEG API
+
+  libturbojpeg.so.0.1.0 (Linux, Unix)
+  libturbojpeg.0.1.0.dylib (OS X)
+  cygturbojpeg-0.dll (Cygwin)
+      Shared library for the TurboJPEG API
+
+  libturbojpeg.so (Linux, Unix)
+  libturbojpeg.dylib (OS X)
+      Development symlink for the TurboJPEG API
+
+  libturbojpeg.dll.a (Cygwin)
+      Import library for the TurboJPEG API
+
+
+libjpeg v7 or v8 API/ABI Emulation
+----------------------------------
+
+Add --with-jpeg7 to the configure command line to build a version of
+libjpeg-turbo that is API/ABI-compatible with libjpeg v7.  Add --with-jpeg8 to
+the configure command to build a version of libjpeg-turbo that is
+API/ABI-compatible with libjpeg v8.  See README-turbo.txt for more information
+on libjpeg v7 and v8 emulation.
+
+
+In-Memory Source/Destination Managers
+-------------------------------------
+
+When using libjpeg v6b or v7 API/ABI emulation, add --without-mem-srcdst to the
+configure command line to build a version of libjpeg-turbo that lacks the
+jpeg_mem_src() and jpeg_mem_dest() functions.  These functions were not part of
+the original libjpeg v6b and v7 APIs, so removing them ensures strict
+conformance with those APIs.  See README-turbo.txt for more information.
+
+
+Arithmetic Coding Support
+-------------------------
+
+Since the patent on arithmetic coding has expired, this functionality has been
+included in this release of libjpeg-turbo.  libjpeg-turbo's implementation is
+based on the implementation in libjpeg v8, but it works when emulating libjpeg
+v7 or v6b as well.  The default is to enable both arithmetic encoding and
+decoding, but those who have philosophical objections to arithmetic coding can
+add --without-arith-enc or --without-arith-dec to the configure command line to
+disable encoding or decoding (respectively.)
+
+
+TurboJPEG Java Wrapper
+----------------------
+Add --with-java to the configure command line to incorporate an optional Java
+Native Interface wrapper into the TurboJPEG shared library and build the Java
+front-end classes to support it.  This allows the TurboJPEG shared library to
+be used directly from Java applications.  See java/README for more details.
+
+You can set the JAVAC, JAR, and JAVA configure variables to specify
+alternate commands for javac, jar, and java (respectively.)  You can also
+set the JAVACFLAGS configure variable to specify arguments that should be
+passed to the Java compiler when building the front-end classes, and JNI_CFLAGS
+to specify arguments that should be passed to the C compiler when building the
+JNI wrapper.  Run 'configure --help' for more details.
+
+
+========================
+Installing libjpeg-turbo
+========================
+
+If you intend to install these libraries and the associated header files, then
+replace 'make' in the instructions above with
+
+  make install prefix={base dir} libdir={library directory}
+
+For example,
+
+  make install prefix=/usr/local libdir=/usr/local/lib64
+
+will install the header files in /usr/local/include and the library files in
+/usr/local/lib64.  If 'prefix' and 'libdir' are not specified, then the default
+is to install the header files in /opt/libjpeg-turbo/include and the library
+files in /opt/libjpeg-turbo/lib32 (32-bit) or /opt/libjpeg-turbo/lib64
+(64-bit.)
+
+NOTE: You can specify a prefix of /usr and a libdir of, for instance,
+/usr/lib64 to overwrite the system's version of libjpeg.  If you do this,
+however, then be sure to BACK UP YOUR SYSTEM'S INSTALLATION OF LIBJPEG before
+overwriting it.  It is recommended that you instead install libjpeg-turbo into
+a non-system directory and manipulate the LD_LIBRARY_PATH or create symlinks
+to force applications to use libjpeg-turbo instead of libjpeg.  See
+README-turbo.txt for more information.
+
+
+=============
+Build Recipes
+=============
+
+
+32-bit Build on 64-bit Linux
+----------------------------
+
+Add
+
+  --host i686-pc-linux-gnu CFLAGS='-O3 -m32' LDFLAGS=-m32
+
+to the configure command line.
+
+
+64-bit Build on 64-bit OS X
+---------------------------
+
+Add
+
+  --host x86_64-apple-darwin NASM=/opt/local/bin/nasm
+
+to the configure command line.  NASM 2.07 or later from MacPorts must be
+installed.
+
+
+32-bit Build on 64-bit OS X
+---------------------------
+
+Add
+
+  --host i686-apple-darwin CFLAGS='-O3 -m32' LDFLAGS=-m32
+
+to the configure command line.
+
+
+64-bit Backward-Compatible Build on 64-bit OS X
+-----------------------------------------------
+
+Add
+
+  --host x86_64-apple-darwin NASM=/opt/local/bin/nasm \
+  CFLAGS='-isysroot /Developer/SDKs/MacOSX10.5.sdk \
+    -mmacosx-version-min=10.5 -O3' \
+    LDFLAGS='-isysroot /Developer/SDKs/MacOSX10.5.sdk \
+    -mmacosx-version-min=10.5'
+
+to the configure command line.  The OS X 10.5 SDK, and NASM 2.07 or later from
+MacPorts, must be installed.
+
+
+32-bit Backward-Compatible Build on OS X
+----------------------------------------
+
+Add
+
+  --host i686-apple-darwin \
+    CFLAGS='-isysroot /Developer/SDKs/MacOSX10.5.sdk \
+    -mmacosx-version-min=10.5 -O3 -m32' \
+    LDFLAGS='-isysroot /Developer/SDKs/MacOSX10.5.sdk \
+    -mmacosx-version-min=10.5 -m32'
+
+to the configure command line.  The OS X 10.5 SDK must be installed.
+
+
+64-bit Library Build on 64-bit Solaris
+--------------------------------------
+
+Add
+
+  --host x86_64-pc-solaris CFLAGS='-O3 -m64' LDFLAGS=-m64
+
+to the configure command line.
+
+
+32-bit Build on 64-bit FreeBSD
+------------------------------
+
+Add
+
+  --host i386-unknown-freebsd CC='gcc -B /usr/lib32' CFLAGS='-O3 -m32' \
+    LDFLAGS='-B/usr/lib32'
+
+to the configure command line.  NASM 2.07 or later from FreeBSD ports must be
+installed.
+
+
+Oracle Solaris Studio
+---------------------
+
+Add
+
+  CC=cc
+
+to the configure command line.  libjpeg-turbo will automatically be built with
+the maximum optimization level (-xO5) unless you override CFLAGS.
+
+To build a 64-bit version of libjpeg-turbo using Oracle Solaris Studio, add
+
+  --host x86_64-pc-solaris CC=cc CFLAGS='-xO5 -m64' LDFLAGS=-m64
+
+to the configure command line.
+
+
+MinGW Build on Cygwin
+---------------------
+
+Use CMake (see recipes below)
+
+
+===========
+ARM Support
+===========
+
+This release of libjpeg-turbo can use ARM NEON SIMD instructions to accelerate
+JPEG compression/decompression by approximately 2-4x on ARMv7 and later
+platforms.  If libjpeg-turbo is configured on an ARM Linux platform, then the
+build system will automatically include the NEON SIMD routines, if they are
+supported.  Build instructions for other ARM-based platforms follow.
+
+
+Building libjpeg-turbo for iOS
+------------------------------
+
+iOS platforms, such as the iPhone and iPad, use ARM processors, some of which
+support NEON instructions.  Additional steps are required in order to build
+libjpeg-turbo for these platforms.
+
+Additional build requirements:
+
+  gas-preprocessor.pl
+  (https://sourceforge.net/p/libjpeg-turbo/code/HEAD/tree/gas-preprocessor)
+  should be installed in your PATH.
+
+Set the following shell variables for simplicity:
+
+  Xcode 4.2 and earlier:
+  IOS_PLATFORMDIR=/Developer/Platforms/iPhoneOS.platform
+  Xcode 4.3 and later:
+  IOS_PLATFORMDIR=/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform
+
+  IOS_SYSROOT=$IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk
+
+  Xcode 4.6.x and earlier:
+  IOS_GCC=$IOS_PLATFORMDIR/Developer/usr/bin/arm-apple-darwin10-llvm-gcc-4.2
+  Xcode 5.0.x and later:
+  IOS_GCC=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
+
+  ARM v6 only (up to and including iPhone 3G):
+  [NOTE: Requires Xcode 4.4.x or earlier]
+  IOS_CFLAGS="-march=armv6 -mcpu=arm1176jzf-s -mfpu=vfp"
+
+  ARM v7 only (iPhone 3GS-4S, iPad 1st-3rd Generation):
+  Xcode 4.6.x and earlier:
+  IOS_CFLAGS="-march=armv7 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon"
+  Xcode 5.0.x and later:
+  IOS_CFLAGS="-arch armv7"
+
+  ARM v7s only (iPhone 5, iPad 4th Generation):
+  [NOTE: Requires Xcode 4.5 or later]
+  Xcode 4.6.x and earlier:
+  IOS_CFLAGS="-march=armv7s -mcpu=swift -mtune=swift -mfpu=neon"
+  Xcode 5.0.x and later:
+  IOS_CFLAGS="-arch armv7s"
+
+Follow the procedure under "Building libjpeg-turbo" above, adding
+
+  --host arm-apple-darwin10 --enable-static --disable-shared \
+    CC="$IOS_GCC" LD="$IOS_GCC" \
+    CFLAGS="-mfloat-abi=softfp -isysroot $IOS_SYSROOT -O3 $IOS_CFLAGS" \
+    LDFLAGS="-mfloat-abi=softfp -isysroot $IOS_SYSROOT $IOS_CFLAGS"
+
+to the configure command line.  If using Xcode 5.0.x or later, also add
+
+    CCASFLAGS="-no-integrated-as $IOS_CFLAGS"
+
+to the configure command line.
+
+NOTE:  You can also add -miphoneos-version-min={version} to $IOS_CFLAGS above
+in order to support older versions of iOS than the default version supported by
+the SDK.
+
+Once built, lipo can be used to combine the ARM v6, v7, and/or v7s variants
+into a universal library.
+
+NOTE: If you are building libjpeg-turbo from the "official" project tarball,
+then it is highly likely that you will need to run 'autoreconf -fiv' in the
+source tree prior to building ARM v7 or v7s iOS binaries using the techniques
+described above.  Otherwise, you may get a libtool error such as "unable to
+infer tagged configuration."
+
+
+Building libjpeg-turbo for Android
+----------------------------------
+
+Building libjpeg-turbo for Android platforms requires the Android NDK
+(https://developer.android.com/tools/sdk/ndk) and autotools.  The following is
+a general recipe script that can be modified for your specific needs.
+
+  # Set these variables to suit your needs
+  NDK_PATH={full path to the "ndk" directory-- for example, /opt/android/ndk}
+  BUILD_PLATFORM={the platform name for the NDK package you installed--
+    for example, "windows-x86" or "linux-x86_64"}
+  TOOLCHAIN_VERSION={"4.6", "4.8", etc.  This corresponds to a toolchain
+    directory under ${NDK_PATH}/toolchains/.}
+  ANDROID_VERSION={The minimum version of Android to support-- for example,
+    "9", "19", etc.}
+
+  HOST=arm-linux-androideabi
+  TOOLCHAIN=${NDK_PATH}/toolchains/${HOST}-${TOOLCHAIN_VERSION}/prebuilt/${BUILD_PLATFORM}
+  SYSROOT=${NDK_PATH}/platforms/android-${ANDROID_VERSION}/arch-arm
+  ANDROID_INCLUDES="-I${SYSROOT}/usr/include -I${TOOLCHAIN}/include"
+  ANDROID_CFLAGS="-march=armv7-a -mfloat-abi=softfp -fprefetch-loop-arrays \
+    -fstrict-aliasing --sysroot=${SYSROOT}"
+  export CPP=${TOOLCHAIN}/bin/${HOST}-cpp
+  export AR=${TOOLCHAIN}/bin/${HOST}-ar
+  export AS=${TOOLCHAIN}/bin/${HOST}-as
+  export NM=${TOOLCHAIN}/bin/${HOST}-nm
+  export CC=${TOOLCHAIN}/bin/${HOST}-gcc
+  export LD=${TOOLCHAIN}/bin/${HOST}-ld
+  export RANLIB=${TOOLCHAIN}/bin/${HOST}-ranlib
+  export OBJDUMP=${TOOLCHAIN}/bin/${HOST}-objdump
+  export STRIP=${TOOLCHAIN}/bin/${HOST}-strip
+  cd {build_directory}
+  sh {source_directory}/configure --host=${HOST} \
+    CFLAGS="${ANDROID_INCLUDES} ${ANDROID_CFLAGS} -O3" \
+    CPPFLAGS="${ANDROID_INCLUDES} ${ANDROID_CFLAGS}" \
+    LDFLAGS="${ANDROID_CFLAGS}" --with-simd ${1+"$@"}
+  make
+
+
+*******************************************************************************
+**     Building on Windows (Visual C++ or MinGW)
+*******************************************************************************
+
+
+==================
+Build Requirements
+==================
+
+-- CMake (http://www.cmake.org) v2.8.8 or later
+
+-- Microsoft Visual C++ 2005 or later
+
+   If you don't already have Visual C++, then the easiest way to get it is by
+   installing the Windows SDK:
+
+   http://msdn.microsoft.com/en-us/windows/bb980924.aspx
+
+   The Windows SDK includes both 32-bit and 64-bit Visual C++ compilers and
+   everything necessary to build libjpeg-turbo.
+
+   * You can also use Microsoft Visual Studio Express Edition, which is a free
+     download.  (NOTE: versions prior to 2012 can only be used to build 32-bit
+     code.)
+   * If you intend to build libjpeg-turbo from the command line, then add the
+     appropriate compiler and SDK directories to the INCLUDE, LIB, and PATH
+     environment variables.  This is generally accomplished by executing
+     vcvars32.bat or vcvars64.bat and SetEnv.cmd.  vcvars32.bat and
+     vcvars64.bat are part of Visual C++ and are located in the same directory
+     as the compiler.  SetEnv.cmd is part of the Windows SDK.  You can pass
+     optional arguments to SetEnv.cmd to specify a 32-bit or 64-bit build
+     environment.
+
+... OR ...
+
+-- MinGW
+
+   MinGW-builds (http://sourceforge.net/projects/mingwbuilds/) or
+   tdm-gcc (http://tdm-gcc.tdragon.net/) recommended if building on a Windows
+   machine.  Both distributions install a Start Menu link that can be used to
+   launch a command prompt with the appropriate compiler paths automatically
+   set.
+
+-- NASM (http://www.nasm.us/) 0.98 or later (NASM 2.05 or later is required for
+   a 64-bit build)
+
+-- If building the TurboJPEG Java wrapper, JDK 1.5 or later is required.  This
+   can be downloaded from http://www.java.com.
+
+
+==================
+Out-of-Tree Builds
+==================
+
+Binary objects, libraries, and executables are generated in the same directory
+from which cmake was executed (the "binary directory"), and this directory need
+not necessarily be the same as the libjpeg-turbo source directory.  You can
+create multiple independent binary directories, in which different versions of
+libjpeg-turbo can be built from the same source tree using different compilers
+or settings.  In the sections below, {build_directory} refers to the binary
+directory, whereas {source_directory} refers to the libjpeg-turbo source
+directory.  For in-tree builds, these directories are the same.
+
+
+======================
+Building libjpeg-turbo
+======================
+
+
+Visual C++ (Command Line)
+-------------------------
+
+  cd {build_directory}
+  cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=Release {source_directory}
+  nmake
+
+This will build either a 32-bit or a 64-bit version of libjpeg-turbo, depending
+on which version of cl.exe is in the PATH.
+
+The following files will be generated under {build_directory}:
+
+  jpeg-static.lib
+      Static link library for the libjpeg API
+  sharedlib/jpeg{version}.dll
+      DLL for the libjpeg API
+  sharedlib/jpeg.lib
+      Import library for the libjpeg API
+  turbojpeg-static.lib
+      Static link library for the TurboJPEG API
+  turbojpeg.dll
+      DLL for the TurboJPEG API
+  turbojpeg.lib
+      Import library for the TurboJPEG API
+
+{version} is 62, 7, or 8, depending on whether libjpeg v6b (default), v7, or
+v8 emulation is enabled.
+
+
+Visual C++ (IDE)
+----------------
+
+Choose the appropriate CMake generator option for your version of Visual Studio
+(run "cmake" with no arguments for a list of available generators.)  For
+instance:
+
+  cd {build_directory}
+  cmake -G "Visual Studio 10" {source_directory}
+
+NOTE:  Add "Win64" to the generator name (for example, "Visual Studio 10
+Win64") to build a 64-bit version of libjpeg-turbo.  Recent versions of CMake
+no longer document that.  A separate build directory must be used for 32-bit
+and 64-bit builds.
+
+You can then open ALL_BUILD.vcproj in Visual Studio and build one of the
+configurations in that project ("Debug", "Release", etc.) to generate a full
+build of libjpeg-turbo.
+
+This will generate the following files under {build_directory}:
+
+  {configuration}/jpeg-static.lib
+      Static link library for the libjpeg API
+  sharedlib/{configuration}/jpeg{version}.dll
+      DLL for the libjpeg API
+  sharedlib/{configuration}/jpeg.lib
+      Import library for the libjpeg API
+  {configuration}/turbojpeg-static.lib
+      Static link library for the TurboJPEG API
+  {configuration}/turbojpeg.dll
+      DLL for the TurboJPEG API
+  {configuration}/turbojpeg.lib
+      Import library for the TurboJPEG API
+
+{configuration} is Debug, Release, RelWithDebInfo, or MinSizeRel, depending on
+the configuration you built in the IDE, and {version} is 62, 7, or 8,
+depending on whether libjpeg v6b (default), v7, or v8 emulation is enabled.
+
+
+MinGW
+-----
+
+NOTE: This assumes that you are building on a Windows machine.  If you are
+cross-compiling on a Linux/Unix machine, then see "Build Recipes" below.
+
+  cd {build_directory}
+  cmake -G "MinGW Makefiles" {source_directory}
+  mingw32-make
+
+This will generate the following files under {build_directory}
+
+  libjpeg.a
+      Static link library for the libjpeg API
+  sharedlib/libjpeg-{version}.dll
+      DLL for the libjpeg API
+  sharedlib/libjpeg.dll.a
+      Import library for the libjpeg API
+  libturbojpeg.a
+      Static link library for the TurboJPEG API
+  libturbojpeg.dll
+      DLL for the TurboJPEG API
+  libturbojpeg.dll.a
+      Import library for the TurboJPEG API
+
+{version} is 62, 7, or 8, depending on whether libjpeg v6b (default), v7, or
+v8 emulation is enabled.
+
+
+Debug Build
+-----------
+
+Add "-DCMAKE_BUILD_TYPE=Debug" to the cmake command line.  Or, if building with
+NMake, remove "-DCMAKE_BUILD_TYPE=Release" (Debug builds are the default with
+NMake.)
+
+
+libjpeg v7 or v8 API/ABI Emulation
+-----------------------------------
+
+Add "-DWITH_JPEG7=1" to the cmake command line to build a version of
+libjpeg-turbo that is API/ABI-compatible with libjpeg v7.  Add "-DWITH_JPEG8=1"
+to the cmake command to build a version of libjpeg-turbo that is
+API/ABI-compatible with libjpeg v8.  See README-turbo.txt for more information
+on libjpeg v7 and v8 emulation.
+
+
+In-Memory Source/Destination Managers
+-------------------------------------
+
+When using libjpeg v6b or v7 API/ABI emulation, add -DWITH_MEM_SRCDST=0 to the
+CMake command line to build a version of libjpeg-turbo that lacks the
+jpeg_mem_src() and jpeg_mem_dest() functions.  These functions were not part of
+the original libjpeg v6b and v7 APIs, so removing them ensures strict
+conformance with those APIs.  See README-turbo.txt for more information.
+
+
+Arithmetic Coding Support
+-------------------------
+
+Since the patent on arithmetic coding has expired, this functionality has been
+included in this release of libjpeg-turbo.  libjpeg-turbo's implementation is
+based on the implementation in libjpeg v8, but it works when emulating libjpeg
+v7 or v6b as well.  The default is to enable both arithmetic encoding and
+decoding, but those who have philosophical objections to arithmetic coding can
+add "-DWITH_ARITH_ENC=0" or "-DWITH_ARITH_DEC=0" to the cmake command line to
+disable encoding or decoding (respectively.)
+
+
+TurboJPEG Java Wrapper
+----------------------
+Add "-DWITH_JAVA=1" to the cmake command line to incorporate an optional Java
+Native Interface wrapper into the TurboJPEG shared library and build the Java
+front-end classes to support it.  This allows the TurboJPEG shared library to
+be used directly from Java applications.  See java/README for more details.
+
+If you are using CMake 2.8, you can set the Java_JAVAC_EXECUTABLE,
+Java_JAVA_EXECUTABLE, and Java_JAR_EXECUTABLE CMake variables to specify
+alternate commands or locations for javac, jar, and java (respectively.)  If
+you are using CMake 2.6, set JAVA_COMPILE, JAVA_RUNTIME, and JAVA_ARCHIVE
+instead.  You can also set the JAVACFLAGS CMake variable to specify arguments
+that should be passed to the Java compiler when building the front-end classes.
+
+
+========================
+Installing libjpeg-turbo
+========================
+
+You can use the build system to install libjpeg-turbo into a directory of your
+choosing (as opposed to creating an installer.)  To do this, add:
+
+  -DCMAKE_INSTALL_PREFIX={install_directory}
+
+to the cmake command line.
+
+For example,
+
+  cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_INSTALL_PREFIX=c:\libjpeg-turbo {source_directory}
+  nmake install
+
+will install the header files in c:\libjpeg-turbo\include, the library files
+in c:\libjpeg-turbo\lib, the DLL's in c:\libjpeg-turbo\bin, and the
+documentation in c:\libjpeg-turbo\doc.
+
+
+=============
+Build Recipes
+=============
+
+
+64-bit MinGW Build on Cygwin
+----------------------------
+
+  cd {build_directory}
+  CC=/usr/bin/x86_64-w64-mingw32-gcc \
+    cmake -G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=Windows \
+    -DCMAKE_RC_COMPILER=/usr/bin/x86_64-w64-mingw32-windres.exe \
+    {source_directory}
+  make
+
+This produces a 64-bit build of libjpeg-turbo that does not depend on
+cygwin1.dll or other Cygwin DLL's.  The mingw64-x86_64-gcc-core and
+mingw64-x86_64-gcc-g++ packages (and their dependencies) must be installed.
+
+
+32-bit MinGW Build on Cygwin
+----------------------------
+
+  cd {build_directory}
+  CC=/usr/bin/i686-w64-mingw32-gcc \
+    cmake -G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=Windows \
+    -DCMAKE_RC_COMPILER=/usr/bin/i686-w64-mingw32-windres.exe \
+    {source_directory}
+  make
+
+This produces a 32-bit build of libjpeg-turbo that does not depend on
+cygwin1.dll or other Cygwin DLL's.  The mingw64-i686-gcc-core and
+mingw64-i686-gcc-g++ packages (and their dependencies) must be installed.
+
+
+MinGW Build on Linux
+--------------------
+
+  cd {build_directory}
+  CC={mingw_binary_path}/i386-mingw32-gcc \
+    cmake -G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=Windows \
+    -DCMAKE_AR={mingw_binary_path}/i386-mingw32-ar \
+    -DCMAKE_RANLIB={mingw_binary_path}/i386-mingw32-ranlib \
+    {source_directory}
+  make
+
+
+*******************************************************************************
+**     Creating Release Packages
+*******************************************************************************
+
+The following commands can be used to create various types of release packages:
+
+
+Unix/Linux
+----------
+
+make rpm
+
+  Create Red Hat-style binary RPM package.  Requires RPM v4 or later.
+
+make srpm
+
+  This runs 'make dist' to create a pristine source tarball, then creates a
+  Red Hat-style source RPM package from the tarball.  Requires RPM v4 or later.
+
+make deb
+
+  Create Debian-style binary package.  Requires dpkg.
+
+make dmg
+
+  Create Macintosh package/disk image.  This requires pkgbuild and
+  productbuild, which are installed by default on OS X 10.7 and later and which
+  can be obtained by installing Xcode 3.2.6 (with the "Unix Development"
+  option) on OS X 10.6.  Packages built in this manner can be installed on OS X
+  10.5 and later, but they must be built on OS X 10.6 or later.
+
+make udmg [BUILDDIR32={32-bit build directory}]
+
+  On 64-bit OS X systems, this creates a Macintosh package and disk image that
+  contains universal i386/x86-64 binaries.  You should first configure a 32-bit
+  out-of-tree build of libjpeg-turbo, then configure a 64-bit out-of-tree
+  build, then run 'make udmg' from the 64-bit build directory.  The build
+  system will look for the 32-bit build under {source_directory}/osxx86 by
+  default, but you can override this by setting the BUILDDIR32 variable on the
+  make command line as shown above.
+
+make iosdmg [BUILDDIR32={32-bit build directory}] \
+  [BUILDDIRARMV6={ARM v6 build directory}] \
+  [BUILDDIRARMV7={ARM v7 build directory}] \
+  [BUILDDIRARMV7S={ARM v7s build directory}]
+
+  On OS X systems, this creates a Macintosh package and disk image in which the
+  libjpeg-turbo static libraries contain ARM architectures necessary to build
+  iOS applications.  If building on an x86-64 system, the binaries will also
+  contain the i386 architecture, as with 'make udmg' above.  You should first
+  configure ARM v6, ARM v7, and/or ARM v7s out-of-tree builds of libjpeg-turbo
+  (see "Building libjpeg-turbo for iOS" above.)  If you are building an x86-64
+  version of libjpeg-turbo, you should configure a 32-bit out-of-tree build as
+  well.  Next, build libjpeg-turbo as you would normally, using an out-of-tree
+  build.  When it is built, run 'make iosdmg' from the build directory.  The
+  build system will look for the ARM v6 build under {source_directory}/iosarmv6
+  by default, the ARM v7 build under {source_directory}/iosarmv7 by default,
+  the ARM v7s build under {source_directory}/iosarmv7s by default, and (if
+  applicable) the 32-bit build under {source_directory}/osxx86 by default, but
+  you can override this by setting the BUILDDIR32, BUILDDIRARMV6,
+  BUILDDIRARMV7, and/or BUILDDIRARMV7S variables on the make command line as
+  shown above.
+
+make cygwinpkg
+
+  Build a Cygwin binary package.
+
+
+Windows
+-------
+
+If using NMake:
+
+  cd {build_directory}
+  nmake installer
+
+If using MinGW:
+
+  cd {build_directory}
+  make installer
+
+If using the Visual Studio IDE, build the "installer" project.
+
+The installer package (libjpeg-turbo[-gcc][64].exe) will be located under
+{build_directory}.  If building using the Visual Studio IDE, then the installer
+package will be located in a subdirectory with the same name as the
+configuration you built (such as {build_directory}\Debug\ or
+{build_directory}\Release\).
+
+Building a Windows installer requires the Nullsoft Install System
+(http://nsis.sourceforge.net/.)  makensis.exe should be in your PATH.
+
+
+*******************************************************************************
+**     Regression testing
+*******************************************************************************
+
+The most common way to test libjpeg-turbo is by invoking 'make test' on
+Unix/Linux platforms or 'ctest' on Windows platforms, once the build has
+completed.  This runs a series of tests to ensure that mathematical
+compatibility has been maintained between libjpeg-turbo and libjpeg v6b.  This
+also invokes the TurboJPEG unit tests, which ensure that the colorspace
+extensions, YUV encoding, decompression scaling, and other features of the
+TurboJPEG C and Java APIs are working properly (and, by extension, that the
+equivalent features of the underlying libjpeg API are also working.)
+
+Invoking 'make testclean' or 'nmake testclean' (if using NMake) or building
+the 'testclean' target (if using the Visual Studio IDE) will clean up the
+output images generated by 'make test'.
+
+On Unix/Linux platforms, more extensive tests of the TurboJPEG C and Java
+wrappers can be run by invoking 'make tjtest'.  These extended TurboJPEG tests
+essentially iterate through all of the available features of the TurboJPEG APIs
+that are not covered by the TurboJPEG unit tests (this includes the lossless
+transform options) and compare the images generated by each feature to images
+generated using the equivalent feature in the libjpeg API.  The extended
+TurboJPEG tests are meant to test for regressions in the TurboJPEG wrappers,
+not in the underlying libjpeg API library.
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..c34f45e
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,809 @@
+#
+# Setup
+#
+
+cmake_minimum_required(VERSION 2.8.8)
+# Use LINK_INTERFACE_LIBRARIES instead of INTERFACE_LINK_LIBRARIES
+if(POLICY CMP0022)
+  cmake_policy(SET CMP0022 OLD)
+endif()
+
+project(libjpeg-turbo C)
+set(VERSION 1.3.80)
+
+if(CYGWIN OR NOT CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
+  execute_process(COMMAND "date" "+%Y%m%d" OUTPUT_VARIABLE BUILD)
+  string(REGEX REPLACE "\n" "" BUILD ${BUILD})
+elseif(WIN32)
+  execute_process(COMMAND "wmic.exe" "os" "get" "LocalDateTime" OUTPUT_VARIABLE
+    BUILD)
+  string(REGEX REPLACE "[^0-9]" "" BUILD "${BUILD}")
+  if (BUILD STREQUAL "")
+    execute_process(COMMAND "cmd.exe" "/C" "DATE" "/T" OUTPUT_VARIABLE BUILD)
+    string(REGEX REPLACE ".*[ ]([0-9]*)[/.]([0-9]*)[/.]([0-9]*).*" "\\3\\2\\1" BUILD "${BUILD}")
+  else()
+    string(SUBSTRING "${BUILD}" 0 8 BUILD)
+  endif()
+else()
+  message(FATAL_ERROR "Platform not supported by this build system.  Use autotools instead.")
+endif()
+
+# This does nothing except when using MinGW.  CMAKE_BUILD_TYPE has no meaning
+# in Visual Studio, and it always defaults to Debug when using NMake.
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif()
+
+message(STATUS "CMAKE_BUILD_TYPE = ${CMAKE_BUILD_TYPE}")
+
+# This only works if building from the command line.  There is currently no way
+# to set a variable's value based on the build type when using Visual Studio.
+if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+  set(BUILD "${BUILD}d")
+endif()
+
+message(STATUS "VERSION = ${VERSION}, BUILD = ${BUILD}")
+
+option(WITH_SIMD "Include SIMD extensions" TRUE)
+option(WITH_ARITH_ENC "Include arithmetic encoding support" TRUE)
+option(WITH_ARITH_DEC "Include arithmetic decoding support" TRUE)
+option(WITH_JPEG7 "Emulate libjpeg v7 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b)" FALSE)
+option(WITH_JPEG8 "Emulate libjpeg v8 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b)" FALSE)
+option(WITH_MEM_SRCDST "Include in-memory source/destination manager functions when emulating the libjpeg v6b or v7 API/ABI" TRUE)
+option(WITH_TURBOJPEG "Include the TurboJPEG wrapper library and associated test programs" TRUE)
+option(WITH_JAVA "Build Java wrapper for the TurboJPEG library" FALSE)
+option(WITH_12BIT "Encode/decode JPEG images with 12-bit samples (implies WITH_SIMD=0 WITH_TURBOJPEG=0 WITH_ARITH_ENC=0 WITH_ARITH_DEC=0)" FALSE)
+
+if(WITH_12BIT)
+  set(WITH_SIMD FALSE)
+  set(WITH_TURBOJPEG FALSE)
+  set(WITH_ARITH_ENC FALSE)
+  set(WITH_ARITH_DEC FALSE)
+  set(BITS_IN_JSAMPLE 12)
+  message(STATUS "12-bit JPEG support enabled")
+else()
+  set(BITS_IN_JSAMPLE 8)
+endif()
+
+if(WITH_ARITH_ENC)
+  set(C_ARITH_CODING_SUPPORTED 1)
+  message(STATUS "Arithmetic encoding support enabled")
+else()
+  message(STATUS "Arithmetic encoding support disabled")
+endif()
+
+if(WITH_ARITH_DEC)
+  set(D_ARITH_CODING_SUPPORTED 1)
+  message(STATUS "Arithmetic decoding support enabled")
+else()
+  message(STATUS "Arithmetic decoding support disabled")
+endif()
+
+if(WITH_TURBOJPEG)
+  message(STATUS "TurboJPEG C wrapper enabled")
+else()
+  message(STATUS "TurboJPEG C wrapper disabled")
+endif()
+
+if(WITH_JAVA)
+  message(STATUS "TurboJPEG Java wrapper enabled")
+else()
+  message(STATUS "TurboJPEG Java wrapper disabled")
+endif()
+
+set(SO_AGE 0)
+if(WITH_MEM_SRCDST)
+  set(SO_AGE 1)
+endif()
+
+set(JPEG_LIB_VERSION 62)
+set(DLL_VERSION ${JPEG_LIB_VERSION})
+set(FULLVERSION ${DLL_VERSION}.${SO_AGE}.0)
+if(WITH_JPEG8)
+  set(JPEG_LIB_VERSION 80)
+  set(DLL_VERSION 8)
+  set(FULLVERSION ${DLL_VERSION}.0.2)
+  message(STATUS "Emulating libjpeg v8 API/ABI")
+elseif(WITH_JPEG7)
+  set(JPEG_LIB_VERSION 70)
+  set(DLL_VERSION 7)
+  set(FULLVERSION ${DLL_VERSION}.${SO_AGE}.0)
+  message(STATUS "Emulating libjpeg v7 API/ABI")
+endif(WITH_JPEG8)
+
+if(WITH_MEM_SRCDST)
+  set(MEM_SRCDST_SUPPORTED 1)
+  message(STATUS "In-memory source/destination managers enabled")
+else()
+  message(STATUS "In-memory source/destination managers disabled")
+endif()
+
+if(MSVC)
+  # Use the static C library for all build types
+  foreach(var CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
+    CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
+    if(${var} MATCHES "/MD")
+      string(REGEX REPLACE "/MD" "/MT" ${var} "${${var}}")
+    endif()
+  endforeach()
+
+  add_definitions(-W3 -wd4996)
+endif()
+
+# Detect whether compiler is 64-bit
+if(MSVC AND CMAKE_CL_64)
+  set(SIMD_X86_64 1)
+  set(64BIT 1)
+elseif(CMAKE_SIZEOF_VOID_P MATCHES 8)
+  set(SIMD_X86_64 1)
+  set(64BIT 1)
+endif()
+
+if(64BIT)
+  message(STATUS "64-bit build")
+else()
+  message(STATUS "32-bit build")
+endif()
+
+if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+  if(MSVC)
+    set(CMAKE_INSTALL_PREFIX_DEFAULT ${CMAKE_PROJECT_NAME})
+  else()
+    set(CMAKE_INSTALL_PREFIX_DEFAULT ${CMAKE_PROJECT_NAME}-gcc)
+  endif()
+  if(64BIT)
+    set(CMAKE_INSTALL_PREFIX_DEFAULT ${CMAKE_INSTALL_PREFIX_DEFAULT}64)
+  endif()
+  set(CMAKE_INSTALL_PREFIX "c:/${CMAKE_INSTALL_PREFIX_DEFAULT}" CACHE PATH
+    "Directory into which to install libjpeg-turbo (default: c:/${CMAKE_INSTALL_PREFIX_DEFAULT})"
+    FORCE)
+endif()
+
+message(STATUS "Install directory = ${CMAKE_INSTALL_PREFIX}")
+
+configure_file(win/jconfig.h.in jconfig.h)
+configure_file(win/jconfigint.h.in jconfigint.h)
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_SOURCE_DIR})
+
+if(WITH_JAVA)
+  find_package(Java)
+  find_package(JNI)
+  if(DEFINED JAVACFLAGS)
+    message(STATUS "Java compiler flags = ${JAVACFLAGS}")
+  endif()
+endif()
+
+
+#
+# Targets
+#
+
+set(JPEG_SOURCES jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c
+  jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c jcphuff.c
+  jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c jdatadst.c jdatasrc.c
+  jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdinput.c jdmainct.c jdmarker.c
+  jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c jdtrans.c jerror.c
+  jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c
+  jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c)
+
+if(WITH_ARITH_ENC OR WITH_ARITH_DEC)
+  set(JPEG_SOURCES ${JPEG_SOURCES} jaricom.c)
+endif()
+
+if(WITH_ARITH_ENC)
+  set(JPEG_SOURCES ${JPEG_SOURCES} jcarith.c)
+endif()
+
+if(WITH_ARITH_DEC)
+  set(JPEG_SOURCES ${JPEG_SOURCES} jdarith.c)
+endif()
+
+if(WITH_SIMD)
+  add_definitions(-DWITH_SIMD)
+  add_subdirectory(simd)
+  if(SIMD_X86_64)
+    set(JPEG_SOURCES ${JPEG_SOURCES} simd/jsimd_x86_64.c)
+  else()
+    set(JPEG_SOURCES ${JPEG_SOURCES} simd/jsimd_i386.c)
+  endif()
+  # This tells CMake that the "source" files haven't been generated yet
+  set_source_files_properties(${SIMD_OBJS} PROPERTIES GENERATED 1)
+else()
+  set(JPEG_SOURCES ${JPEG_SOURCES} jsimd_none.c)
+  message(STATUS "Not using SIMD acceleration")
+endif()
+
+if(WITH_JAVA)
+  add_subdirectory(java)
+endif()
+
+add_subdirectory(sharedlib)
+
+add_library(jpeg-static STATIC ${JPEG_SOURCES} ${SIMD_OBJS})
+if(NOT MSVC)
+  set_target_properties(jpeg-static PROPERTIES OUTPUT_NAME jpeg)
+endif()
+if(WITH_SIMD)
+  add_dependencies(jpeg-static simd)
+endif()
+
+if(WITH_TURBOJPEG)
+  set(TURBOJPEG_SOURCES turbojpeg.c transupp.c jdatadst-tj.c jdatasrc-tj.c)
+  if(WITH_JAVA)
+    set(TURBOJPEG_SOURCES ${TURBOJPEG_SOURCES} turbojpeg-jni.c)
+    include_directories(${JAVA_INCLUDE_PATH} ${JAVA_INCLUDE_PATH2})
+  endif()
+
+  add_library(turbojpeg SHARED ${TURBOJPEG_SOURCES})
+  set_target_properties(turbojpeg PROPERTIES DEFINE_SYMBOL DLLDEFINE)
+  if(MINGW)
+    set_target_properties(turbojpeg PROPERTIES LINK_FLAGS -Wl,--kill-at)
+  endif()
+  target_link_libraries(turbojpeg jpeg-static)
+  set_target_properties(turbojpeg PROPERTIES LINK_INTERFACE_LIBRARIES "")
+
+  add_library(turbojpeg-static STATIC ${JPEG_SOURCES} ${SIMD_OBJS}
+    turbojpeg.c transupp.c jdatadst-tj.c jdatasrc-tj.c)
+  if(NOT MSVC)
+    set_target_properties(turbojpeg-static PROPERTIES OUTPUT_NAME turbojpeg)
+  endif()
+  if(WITH_SIMD)
+    add_dependencies(turbojpeg-static simd)
+  endif()
+
+  add_executable(tjunittest tjunittest.c tjutil.c)
+  target_link_libraries(tjunittest turbojpeg)
+
+  add_executable(tjunittest-static tjunittest.c tjutil.c)
+  target_link_libraries(tjunittest-static turbojpeg-static)
+
+  add_executable(tjbench tjbench.c bmp.c tjutil.c rdbmp.c rdppm.c wrbmp.c
+    wrppm.c)
+  target_link_libraries(tjbench turbojpeg jpeg-static)
+  set_property(TARGET tjbench PROPERTY COMPILE_FLAGS
+    "-DBMP_SUPPORTED -DPPM_SUPPORTED")
+
+  add_executable(tjbench-static tjbench.c bmp.c tjutil.c rdbmp.c rdppm.c wrbmp.c
+    wrppm.c)
+  target_link_libraries(tjbench-static turbojpeg-static jpeg-static)
+  set_property(TARGET tjbench-static PROPERTY COMPILE_FLAGS
+    "-DBMP_SUPPORTED -DPPM_SUPPORTED")
+endif()
+
+if(WITH_12BIT)
+  set(COMPILE_FLAGS "-DGIF_SUPPORTED -DPPM_SUPPORTED -DUSE_SETMODE")
+else()
+  set(COMPILE_FLAGS "-DBMP_SUPPORTED -DGIF_SUPPORTED -DPPM_SUPPORTED -DTARGA_SUPPORTED -DUSE_SETMODE")
+	set(CJPEG_BMP_SOURCES rdbmp.c rdtarga.c)
+	set(DJPEG_BMP_SOURCES wrbmp.c wrtarga.c)
+endif()
+
+add_executable(cjpeg-static cjpeg.c cdjpeg.c rdgif.c rdppm.c rdswitch.c
+  ${CJPEG_BMP_SOURCES})
+set_property(TARGET cjpeg-static PROPERTY COMPILE_FLAGS ${COMPILE_FLAGS})
+target_link_libraries(cjpeg-static jpeg-static)
+
+add_executable(djpeg-static djpeg.c cdjpeg.c rdcolmap.c rdswitch.c wrgif.c
+  wrppm.c ${DJPEG_BMP_SOURCES})
+set_property(TARGET djpeg-static PROPERTY COMPILE_FLAGS ${COMPILE_FLAGS})
+target_link_libraries(djpeg-static jpeg-static)
+
+add_executable(jpegtran-static jpegtran.c cdjpeg.c rdswitch.c transupp.c)
+target_link_libraries(jpegtran-static jpeg-static)
+set_property(TARGET jpegtran-static PROPERTY COMPILE_FLAGS "-DUSE_SETMODE")
+
+add_executable(rdjpgcom rdjpgcom.c)
+
+add_executable(wrjpgcom wrjpgcom.c)
+
+
+#
+# Tests
+#
+
+if(MSVC_IDE)
+  set(OBJDIR "\${CTEST_CONFIGURATION_TYPE}/")
+else()
+  set(OBJDIR "")
+endif()
+
+enable_testing()
+
+if(WITH_12BIT)
+  set(TESTORIG testorig12.jpg)
+  set(MD5_JPEG_RGB_ISLOW 9620f424569594bb9242b48498ad801f)
+  set(MD5_PPM_RGB_ISLOW f3301d2219783b8b3d942b7239fa50c0)
+  set(MD5_JPEG_422_IFAST_OPT 7322e3bd2f127f7de4b40d4480ce60e4)
+  set(MD5_PPM_422_IFAST 79807fa552899e66a04708f533e16950)
+  set(MD5_PPM_422M_IFAST 07737bfe8a7c1c87aaa393a0098d16b0)
+  set(MD5_JPEG_420_IFAST_Q100_PROG a1da220b5604081863a504297ed59e55)
+  set(MD5_PPM_420_Q100_IFAST 1b3730122709f53d007255e8dfd3305e)
+  set(MD5_PPM_420M_Q100_IFAST 980a1a3c5bf9510022869d30b7d26566)
+  set(MD5_JPEG_GRAY_ISLOW 235c90707b16e2e069f37c888b2636d9)
+  set(MD5_PPM_GRAY_ISLOW 7213c10af507ad467da5578ca5ee1fca)
+  set(MD5_PPM_GRAY_ISLOW_RGB e96ee81c30a6ed422d466338bd3de65d)
+  set(MD5_JPEG_420S_IFAST_OPT 7af8e60be4d9c227ec63ac9b6630855e)
+  set(MD5_JPEG_3x2_FLOAT_PROG a8c17daf77b457725ec929e215b603f8)
+  set(MD5_PPM_3x2_FLOAT 42876ab9e5c2f76a87d08db5fbd57956)
+  set(MD5_PPM_420M_ISLOW_2_1 4ca6be2a6f326ff9eaab63e70a8259c0)
+  set(MD5_PPM_420M_ISLOW_15_8 12aa9f9534c1b3d7ba047322226365eb)
+  set(MD5_PPM_420M_ISLOW_13_8 f7e22817c7b25e1393e4ec101e9d4e96)
+  set(MD5_PPM_420M_ISLOW_11_8 800a16f9f4dc9b293197bfe11be10a82)
+  set(MD5_PPM_420M_ISLOW_9_8 06b7a92a9bc69f4dc36ec40f1937d55c)
+  set(MD5_PPM_420M_ISLOW_7_8 3ec444a14a4ab4eab88ffc49c48eca43)
+  set(MD5_PPM_420M_ISLOW_3_4 3e726b7ea872445b19437d1c1d4f0d93)
+  set(MD5_PPM_420M_ISLOW_5_8 a8a771abdc94301d20ffac119b2caccd)
+  set(MD5_PPM_420M_ISLOW_1_2 b419124dd5568b085787234866102866)
+  set(MD5_PPM_420M_ISLOW_3_8 343d19015531b7bbe746124127244fa8)
+  set(MD5_PPM_420M_ISLOW_1_4 35fd59d866e44659edfa3c18db2a3edb)
+  set(MD5_PPM_420M_ISLOW_1_8 ccaed48ac0aedefda5d4abe4013f4ad7)
+  set(MD5_JPEG_CROP cdb35ff4b4519392690ea040c56ea99c)
+else()
+  set(TESTORIG testorig.jpg)
+  set(MD5_JPEG_RGB_ISLOW 768e970dd57b340ff1b83c9d3d47c77b)
+  set(MD5_PPM_RGB_ISLOW 00a257f5393fef8821f2b88ac7421291)
+  set(MD5_BMP_RGB_ISLOW_565 f07d2e75073e4bb10f6c6f4d36e2e3be)
+  set(MD5_BMP_RGB_ISLOW_565D 4cfa0928ef3e6bb626d7728c924cfda4)
+  set(MD5_JPEG_422_IFAST_OPT 2540287b79d913f91665e660303ab2c8)
+  set(MD5_PPM_422_IFAST 35bd6b3f833bad23de82acea847129fa)
+  set(MD5_PPM_422M_IFAST 8dbc65323d62cca7c91ba02dd1cfa81d)
+  set(MD5_BMP_422M_IFAST_565 3294bd4d9a1f2b3d08ea6020d0db7065)
+  set(MD5_BMP_422M_IFAST_565D da98c9c7b6039511be4a79a878a9abc1)
+  set(MD5_JPEG_420_IFAST_Q100_PROG 990cbe0329c882420a2094da7e5adade)
+  set(MD5_PPM_420_Q100_IFAST 5a732542015c278ff43635e473a8a294)
+  set(MD5_PPM_420M_Q100_IFAST ff692ee9323a3b424894862557c092f1)
+  set(MD5_JPEG_GRAY_ISLOW 72b51f894b8f4a10b3ee3066770aa38d)
+  set(MD5_PPM_GRAY_ISLOW 8d3596c56eace32f205deccc229aa5ed)
+  set(MD5_PPM_GRAY_ISLOW_RGB 116424ac07b79e5e801f00508eab48ec)
+  set(MD5_BMP_GRAY_ISLOW_565 12f78118e56a2f48b966f792fedf23cc)
+  set(MD5_BMP_GRAY_ISLOW_565D bdbbd616441a24354c98553df5dc82db)
+  set(MD5_JPEG_420S_IFAST_OPT 388708217ac46273ca33086b22827ed8)
+  if(WITH_SIMD)
+    set(MD5_JPEG_3x2_FLOAT_PROG 343e3f8caf8af5986ebaf0bdc13b5c71)
+    set(MD5_PPM_3x2_FLOAT 1a75f36e5904d6fc3a85a43da9ad89bb)
+  else()
+    set(MD5_JPEG_3x2_FLOAT_PROG 9bca803d2042bd1eb03819e2bf92b3e5)
+    set(MD5_PPM_3x2_FLOAT f6bfab038438ed8f5522fbd33595dcdc)
+  endif()
+  set(MD5_JPEG_420_ISLOW_ARI e986fb0a637a8d833d96e8a6d6d84ea1)
+  set(MD5_JPEG_444_ISLOW_PROGARI 0a8f1c8f66e113c3cf635df0a475a617)
+  set(MD5_PPM_420M_IFAST_ARI 72b59a99bcf1de24c5b27d151bde2437)
+  set(MD5_JPEG_420_ISLOW 9a68f56bc76e466aa7e52f415d0f4a5f)
+  set(MD5_PPM_420M_ISLOW_2_1 9f9de8c0612f8d06869b960b05abf9c9)
+  set(MD5_PPM_420M_ISLOW_15_8 b6875bc070720b899566cc06459b63b7)
+  set(MD5_PPM_420M_ISLOW_13_8 bc3452573c8152f6ae552939ee19f82f)
+  set(MD5_PPM_420M_ISLOW_11_8 d8cc73c0aaacd4556569b59437ba00a5)
+  set(MD5_PPM_420M_ISLOW_9_8 d25e61bc7eac0002f5b393aa223747b6)
+  set(MD5_PPM_420M_ISLOW_7_8 ddb564b7c74a09494016d6cd7502a946)
+  set(MD5_PPM_420M_ISLOW_3_4 8ed8e68808c3fbc4ea764fc9d2968646)
+  set(MD5_PPM_420M_ISLOW_5_8 a3363274999da2366a024efae6d16c9b)
+  set(MD5_PPM_420M_ISLOW_1_2 e692a315cea26b988c8e8b29a5dbcd81)
+  set(MD5_PPM_420M_ISLOW_3_8 79eca9175652ced755155c90e785a996)
+  set(MD5_PPM_420M_ISLOW_1_4 79cd778f8bf1a117690052cacdd54eca)
+  set(MD5_PPM_420M_ISLOW_1_8 391b3d4aca640c8567d6f8745eb2142f)
+  set(MD5_BMP_420_ISLOW_256 4980185e3776e89bd931736e1cddeee6)
+  set(MD5_BMP_420_ISLOW_565 bf9d13e16c4923b92e1faa604d7922cb)
+  set(MD5_BMP_420_ISLOW_565D 6bde71526acc44bcff76f696df8638d2)
+  set(MD5_BMP_420M_ISLOW_565 8dc0185245353cfa32ad97027342216f)
+  set(MD5_BMP_420M_ISLOW_565D d1be3a3339166255e76fa50a0d70d73e)
+  set(MD5_JPEG_CROP b4197f377e621c4e9b1d20471432610d)
+endif()
+
+if(WITH_JAVA)
+  add_test(TJUnitTest
+    ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar
+      -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR}
+      TJUnitTest)
+  add_test(TJUnitTest-yuv
+    ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar
+      -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR}
+      TJUnitTest -yuv)
+  add_test(TJUnitTest-yuv-nopad
+    ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar
+      -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR}
+      TJUnitTest -yuv -noyuvpad)
+  add_test(TJUnitTest-bi
+    ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar
+      -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR}
+      TJUnitTest -bi)
+  add_test(TJUnitTest-bi-yuv
+    ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar
+      -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR}
+      TJUnitTest -bi -yuv)
+  add_test(TJUnitTest-bi-yuv-nopad
+    ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar
+      -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR}
+      TJUnitTest -bi -yuv -noyuvpad)
+endif()
+
+foreach(libtype shared static)
+  if(libtype STREQUAL "shared")
+    set(dir sharedlib/)
+  else()
+    set(dir "")
+    set(suffix -static)
+  endif()
+  if(WITH_TURBOJPEG)
+    add_test(tjunittest${suffix} tjunittest${suffix})
+    add_test(tjunittest${suffix}-alloc tjunittest${suffix} -alloc)
+    add_test(tjunittest${suffix}-yuv tjunittest${suffix} -yuv)
+    add_test(tjunittest${suffix}-yuv-alloc tjunittest${suffix} -yuv -alloc)
+    add_test(tjunittest${suffix}-yuv-nopad tjunittest${suffix} -yuv -noyuvpad)
+  endif()
+
+  # These tests are carefully chosen to provide full coverage of as many of the
+  # underlying algorithms as possible (including all of the SIMD-accelerated
+  # ones.)
+
+  # CC: null  SAMP: fullsize  FDCT: islow  ENT: huff
+  add_test(cjpeg${suffix}-rgb-islow
+    ${dir}cjpeg${suffix} -rgb -dct int -outfile testout_rgb_islow.jpg
+      ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
+  add_test(cjpeg${suffix}-rgb-islow-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_RGB_ISLOW} -DFILE=testout_rgb_islow.jpg
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  # CC: null  SAMP: fullsize  IDCT: islow  ENT: huff
+  add_test(djpeg${suffix}-rgb-islow
+    ${dir}djpeg${suffix} -dct int -ppm -outfile testout_rgb_islow.ppm
+      testout_rgb_islow.jpg)
+  add_test(djpeg${suffix}-rgb-islow-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_PPM_RGB_ISLOW} -DFILE=testout_rgb_islow.ppm
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  if(NOT WITH_12BIT)
+    # CC: RGB->RGB565  SAMP: fullsize  IDCT: islow  ENT: huff
+    add_test(djpeg${suffix}-rgb-islow-565
+      ${dir}djpeg${suffix} -dct int -rgb565 -dither none -bmp
+        -outfile testout_rgb_islow_565.bmp testout_rgb_islow.jpg)
+    add_test(djpeg${suffix}-rgb-islow-565-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_BMP_RGB_ISLOW_565}
+        -DFILE=testout_rgb_islow_565.bmp
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+    # CC: RGB->RGB565 (dithered)  SAMP: fullsize  IDCT: islow  ENT: huff
+    add_test(djpeg${suffix}-rgb-islow-565D
+      ${dir}djpeg${suffix} -dct int -rgb565 -bmp
+        -outfile testout_rgb_islow_565D.bmp testout_rgb_islow.jpg)
+    add_test(djpeg${suffix}-rgb-islow-565D-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_BMP_RGB_ISLOW_565D}
+        -DFILE=testout_rgb_islow_565D.bmp
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  endif()
+
+  # CC: RGB->YCC  SAMP: fullsize/h2v1  FDCT: ifast  ENT: 2-pass huff
+  add_test(cjpeg${suffix}-422-ifast-opt
+    ${dir}cjpeg${suffix} -sample 2x1 -dct fast -opt
+      -outfile testout_422_ifast_opt.jpg
+      ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
+  add_test(cjpeg${suffix}-422-ifast-opt-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_422_IFAST_OPT}
+      -DFILE=testout_422_ifast_opt.jpg
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  # CC: YCC->RGB  SAMP: fullsize/h2v1 fancy  IDCT: ifast  ENT: huff
+  add_test(djpeg${suffix}-422-ifast
+    ${dir}djpeg${suffix} -dct fast -outfile testout_422_ifast.ppm
+      testout_422_ifast_opt.jpg)
+  add_test(djpeg${suffix}-422-ifast-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_PPM_422_IFAST} -DFILE=testout_422_ifast.ppm
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  # CC: YCC->RGB  SAMP: h2v1 merged  IDCT: ifast  ENT: huff
+  add_test(djpeg${suffix}-422m-ifast
+    ${dir}djpeg${suffix} -dct fast -nosmooth -outfile testout_422m_ifast.ppm
+      testout_422_ifast_opt.jpg)
+  add_test(djpeg${suffix}-422m-ifast-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_PPM_422M_IFAST} -DFILE=testout_422m_ifast.ppm
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  if(NOT WITH_12BIT)
+    # CC: YCC->RGB565  SAMP: h2v1 merged  IDCT: ifast  ENT: huff
+    add_test(djpeg${suffix}-422m-ifast-565
+      ${dir}djpeg${suffix} -dct int -nosmooth -rgb565 -dither none -bmp
+        -outfile testout_422m_ifast_565.bmp testout_422_ifast_opt.jpg)
+    add_test(djpeg${suffix}-422m-ifast-565-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_BMP_422M_IFAST_565}
+        -DFILE=testout_422m_ifast_565.bmp
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+    # CC: YCC->RGB565 (dithered)  SAMP: h2v1 merged  IDCT: ifast  ENT: huff
+    add_test(djpeg${suffix}-422m-ifast-565D
+      ${dir}djpeg${suffix} -dct int -nosmooth -rgb565 -bmp
+        -outfile testout_422m_ifast_565D.bmp testout_422_ifast_opt.jpg)
+    add_test(djpeg${suffix}-422m-ifast-565D-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_BMP_422M_IFAST_565D}
+        -DFILE=testout_422m_ifast_565D.bmp
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  endif()
+
+  # CC: RGB->YCC  SAMP: fullsize/h2v2  FDCT: ifast  ENT: prog huff
+  add_test(cjpeg${suffix}-420-q100-ifast-prog
+    ${dir}cjpeg${suffix} -sample 2x2 -quality 100 -dct fast -prog
+      -outfile testout_420_q100_ifast_prog.jpg
+      ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
+  add_test(cjpeg${suffix}-420-q100-ifast-prog-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_420_IFAST_Q100_PROG}
+      -DFILE=testout_420_q100_ifast_prog.jpg
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  # CC: YCC->RGB  SAMP: fullsize/h2v2 fancy  IDCT: ifast  ENT: prog huff
+  add_test(djpeg${suffix}-420-q100-ifast-prog
+    ${dir}djpeg${suffix} -dct fast -outfile testout_420_q100_ifast.ppm
+      testout_420_q100_ifast_prog.jpg)
+  add_test(djpeg${suffix}-420-q100-ifast-prog-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_PPM_420_Q100_IFAST}
+      -DFILE=testout_420_q100_ifast.ppm
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  # CC: YCC->RGB  SAMP: h2v2 merged  IDCT: ifast  ENT: prog huff
+  add_test(djpeg${suffix}-420m-q100-ifast-prog
+    ${dir}djpeg${suffix} -dct fast -nosmooth
+      -outfile testout_420m_q100_ifast.ppm testout_420_q100_ifast_prog.jpg)
+  add_test(djpeg${suffix}-420m-q100-ifast-prog-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_PPM_420M_Q100_IFAST}
+      -DFILE=testout_420m_q100_ifast.ppm
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+
+  # CC: RGB->Gray  SAMP: fullsize  FDCT: islow  ENT: huff
+  add_test(cjpeg${suffix}-gray-islow
+    ${dir}cjpeg${suffix} -gray -dct int -outfile testout_gray_islow.jpg
+      ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
+  add_test(cjpeg${suffix}-gray-islow-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_GRAY_ISLOW}
+      -DFILE=testout_gray_islow.jpg
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  # CC: Gray->Gray  SAMP: fullsize  IDCT: islow  ENT: huff
+  add_test(djpeg${suffix}-gray-islow
+    ${dir}djpeg${suffix} -dct int -outfile testout_gray_islow.ppm
+      testout_gray_islow.jpg)
+  add_test(djpeg${suffix}-gray-islow-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_PPM_GRAY_ISLOW}
+      -DFILE=testout_gray_islow.ppm
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  # CC: Gray->RGB  SAMP: fullsize  IDCT: islow  ENT: huff
+  add_test(djpeg${suffix}-gray-islow-rgb
+    ${dir}djpeg${suffix} -dct int -rgb -outfile testout_gray_islow_rgb.ppm
+      testout_gray_islow.jpg)
+  add_test(cjpeg${suffix}-gray-islow-rgb-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_PPM_GRAY_ISLOW_RGB}
+      -DFILE=testout_gray_islow_rgb.ppm
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  if(NOT WITH_12BIT)
+    # CC: Gray->RGB565  SAMP: fullsize  IDCT: islow  ENT: huff
+    add_test(djpeg${suffix}-gray-islow-565
+      ${dir}djpeg${suffix} -dct int -rgb565 -dither none -bmp
+        -outfile testout_gray_islow_565.bmp testout_gray_islow.jpg)
+    add_test(djpeg${suffix}-gray-islow-565-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_BMP_GRAY_ISLOW_565}
+        -DFILE=testout_gray_islow_565.bmp
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+    # CC: Gray->RGB565 (dithered)  SAMP: fullsize  IDCT: islow  ENT: huff
+    add_test(djpeg${suffix}-gray-islow-565D
+      ${dir}djpeg${suffix} -dct int -rgb565 -bmp
+        -outfile testout_gray_islow_565D.bmp testout_gray_islow.jpg)
+    add_test(djpeg${suffix}-gray-islow-565D-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_BMP_GRAY_ISLOW_565D}
+        -DFILE=testout_gray_islow_565D.bmp
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  endif()
+
+  # CC: RGB->YCC  SAMP: fullsize smooth/h2v2 smooth  FDCT: islow
+  # ENT: 2-pass huff
+  add_test(cjpeg${suffix}-420s-ifast-opt
+    ${dir}cjpeg${suffix} -sample 2x2 -smooth 1 -dct int -opt -outfile
+      testout_420s_ifast_opt.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
+  add_test(cjpeg${suffix}-420s-ifast-opt-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_420S_IFAST_OPT}
+      -DFILE=testout_420s_ifast_opt.jpg
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+
+  # CC: RGB->YCC  SAMP: fullsize/int  FDCT: float  ENT: prog huff
+  add_test(cjpeg${suffix}-3x2-float-prog
+    ${dir}cjpeg${suffix} -sample 3x2 -dct float -prog
+      -outfile testout_3x2_float_prog.jpg
+      ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
+  add_test(cjpeg${suffix}-3x2-float-prog-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_3x2_FLOAT_PROG}
+      -DFILE=testout_3x2_float_prog.jpg
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  # CC: YCC->RGB  SAMP: fullsize/int  IDCT: float  ENT: prog huff
+  add_test(djpeg${suffix}-3x2-float-prog
+    ${dir}djpeg${suffix} -dct float -outfile testout_3x2_float.ppm
+      testout_3x2_float_prog.jpg)
+  add_test(djpeg${suffix}-3x2-float-prog-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_PPM_3x2_FLOAT} -DFILE=testout_3x2_float.ppm
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+
+  if(WITH_ARITH_ENC)
+    # CC: YCC->RGB  SAMP: fullsize/h2v2  FDCT: islow  ENT: arith
+    add_test(cjpeg${suffix}-420-islow-ari
+      ${dir}cjpeg${suffix} -dct int -arithmetic
+        -outfile testout_420_islow_ari.jpg
+        ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
+    add_test(cjpeg${suffix}-420-islow-ari-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_420_ISLOW_ARI}
+        -DFILE=testout_420_islow_ari.jpg
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+    add_test(jpegtran${suffix}-420-islow-ari
+      ${dir}jpegtran${suffix} -arithmetic
+        -outfile testout_420_islow_ari.jpg
+        ${CMAKE_SOURCE_DIR}/testimages/testimgint.jpg)
+    add_test(jpegtran${suffix}-420-islow-ari-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_420_ISLOW_ARI}
+        -DFILE=testout_420_islow_ari.jpg
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+    # CC: YCC->RGB  SAMP: fullsize  FDCT: islow  ENT: prog arith
+    add_test(cjpeg${suffix}-444-islow-progari
+      ${dir}cjpeg${suffix} -sample 1x1 -dct int -progressive -arithmetic
+        -outfile testout_444_islow_progari.jpg
+        ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
+    add_test(cjpeg${suffix}-444-islow-progari-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_444_ISLOW_PROGARI}
+        -DFILE=testout_444_islow_progari.jpg
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  endif()
+  if(WITH_ARITH_DEC)
+    # CC: RGB->YCC  SAMP: h2v2 merged  IDCT: ifast  ENT: arith
+    add_test(cjpeg${suffix}-420m-ifast-ari
+      ${dir}djpeg${suffix} -fast -ppm -outfile testout_420m_ifast_ari.ppm
+        ${CMAKE_SOURCE_DIR}/testimages/testimgari.jpg)
+    add_test(cjpeg${suffix}-420m-ifast-ari-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_PPM_420M_IFAST_ARI}
+        -DFILE=testout_420m_ifast_ari.ppm
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+    add_test(jpegtran${suffix}-420-islow
+      ${dir}jpegtran${suffix} -outfile testout_420_islow.jpg
+        ${CMAKE_SOURCE_DIR}/testimages/testimgari.jpg)
+    add_test(jpegtran${suffix}-420-islow-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_420_ISLOW}
+        -DFILE=testout_420_islow.jpg
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  endif()
+
+  # 2/1--   CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 16x16 islow  ENT: huff
+  # 15/8--  CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 15x15 islow  ENT: huff
+  # 13/8--  CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 13x13 islow  ENT: huff
+  # 11/8--  CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 11x11 islow  ENT: huff
+  # 9/8--   CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 9x9 islow  ENT: huff
+  # 7/8--   CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 7x7 islow/14x14 islow
+  #         ENT: huff
+  # 3/4--   CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 6x6 islow/12x12 islow
+  #         ENT: huff
+  # 5/8--   CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 5x5 islow/10x10 islow
+  #         ENT: huff
+  # 1/2--   CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 4x4 islow/8x8 islow
+  #         ENT: huff
+  # 3/8--   CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 3x3 islow/6x6 islow
+  #         ENT: huff
+  # 1/4--   CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 2x2 islow/4x4 islow
+  #         ENT: huff
+  # 1/8--   CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 1x1 islow/2x2 islow
+  #         ENT: huff
+  foreach(scale 2_1 15_8 13_8 11_8 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8)
+    string(REGEX REPLACE "_" "/" scalearg ${scale})
+    add_test(djpeg${suffix}-420m-islow-${scale}
+      ${dir}djpeg${suffix} -dct int -scale ${scalearg} -nosmooth -ppm
+        -outfile testout_420m_islow_${scale}.ppm
+        ${CMAKE_SOURCE_DIR}/testimages/${TESTORIG})
+    add_test(djpeg${suffix}-420m-islow-${scale}-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_PPM_420M_ISLOW_${scale}}
+        -DFILE=testout_420m_islow_${scale}.ppm
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  endforeach()
+
+  if(NOT WITH_12BIT)
+    # CC: YCC->RGB (dithered)  SAMP: h2v2 fancy  IDCT: islow  ENT: huff
+    add_test(djpeg${suffix}-420-islow-256
+      ${dir}djpeg${suffix} -dct int -colors 256 -bmp
+        -outfile testout_420_islow_256.bmp
+        ${CMAKE_SOURCE_DIR}/testimages/${TESTORIG})
+    add_test(djpeg${suffix}-420-islow-256-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_BMP_420_ISLOW_256}
+        -DFILE=testout_420_islow_256.bmp
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+    # CC: YCC->RGB565  SAMP: h2v2 fancy  IDCT: islow  ENT: huff
+    add_test(djpeg${suffix}-420-islow-565
+      ${dir}djpeg${suffix} -dct int -rgb565 -dither none -bmp
+        -outfile testout_420_islow_565.bmp
+        ${CMAKE_SOURCE_DIR}/testimages/${TESTORIG})
+    add_test(djpeg${suffix}-420-islow-565-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_BMP_420_ISLOW_565}
+        -DFILE=testout_420_islow_565.bmp
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+    # CC: YCC->RGB565 (dithered)  SAMP: h2v2 fancy  IDCT: islow  ENT: huff
+    add_test(djpeg${suffix}-420-islow-565D
+      ${dir}djpeg${suffix} -dct int -rgb565 -bmp
+        -outfile testout_420_islow_565D.bmp
+        ${CMAKE_SOURCE_DIR}/testimages/${TESTORIG})
+    add_test(djpeg${suffix}-420-islow-565D-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_BMP_420_ISLOW_565D}
+        -DFILE=testout_420_islow_565D.bmp
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+    # CC: YCC->RGB565  SAMP: h2v2 merged  IDCT: islow  ENT: huff
+    add_test(djpeg${suffix}-420m-islow-565
+      ${dir}djpeg${suffix} -dct int -nosmooth -rgb565 -dither none -bmp
+        -outfile testout_420m_islow_565.bmp
+        ${CMAKE_SOURCE_DIR}/testimages/${TESTORIG})
+    add_test(djpeg${suffix}-420m-islow-565-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_BMP_420M_ISLOW_565}
+        -DFILE=testout_420m_islow_565.bmp
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+    # CC: YCC->RGB565 (dithered)  SAMP: h2v2 merged  IDCT: islow  ENT: huff
+    add_test(djpeg${suffix}-420m-islow-565D
+      ${dir}djpeg${suffix} -dct int -nosmooth -rgb565 -bmp
+        -outfile testout_420m_islow_565D.bmp
+        ${CMAKE_SOURCE_DIR}/testimages/${TESTORIG})
+    add_test(djpeg${suffix}-420m-islow-565D-cmp
+      ${CMAKE_COMMAND} -DMD5=${MD5_BMP_420M_ISLOW_565D}
+        -DFILE=testout_420m_islow_565D.bmp
+        -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+  endif()
+  add_test(jpegtran${suffix}-crop
+    ${dir}jpegtran${suffix} -crop 120x90+20+50 -transpose -perfect
+      -outfile testout_crop.jpg ${CMAKE_SOURCE_DIR}/testimages/${TESTORIG})
+  add_test(jpegtran${suffix}-crop-cmp
+    ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_CROP} -DFILE=testout_crop.jpg
+      -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
+
+endforeach()
+
+add_custom_target(testclean COMMAND ${CMAKE_COMMAND} -P
+  ${CMAKE_SOURCE_DIR}/cmakescripts/testclean.cmake)
+
+
+#
+# Installer
+#
+
+if(MSVC)
+  set(INST_PLATFORM "Visual C++")
+  set(INST_NAME ${CMAKE_PROJECT_NAME}-${VERSION}-vc)
+  set(INST_REG_NAME ${CMAKE_PROJECT_NAME})
+elseif(MINGW)
+  set(INST_PLATFORM GCC)
+  set(INST_NAME ${CMAKE_PROJECT_NAME}-${VERSION}-gcc)
+  set(INST_REG_NAME ${CMAKE_PROJECT_NAME}-gcc)
+  set(INST_DEFS -DGCC)
+endif()
+
+if(64BIT)
+  set(INST_PLATFORM "${INST_PLATFORM} 64-bit")
+  set(INST_NAME ${INST_NAME}64)
+  set(INST_REG_NAME ${INST_DIR}64)
+  set(INST_DEFS ${INST_DEFS} -DWIN64)
+endif()
+
+if(WITH_JAVA)
+  set(INST_DEFS ${INST_DEFS} -DJAVA)
+endif()
+
+if(MSVC_IDE)
+  set(INST_DEFS ${INST_DEFS} "-DBUILDDIR=${CMAKE_CFG_INTDIR}\\")
+else()
+  set(INST_DEFS ${INST_DEFS} "-DBUILDDIR=")
+endif()
+
+STRING(REGEX REPLACE "/" "\\\\" INST_DIR ${CMAKE_INSTALL_PREFIX})
+
+configure_file(release/libjpeg-turbo.nsi.in libjpeg-turbo.nsi @ONLY)
+
+if(WITH_JAVA)
+  set(JAVA_DEPEND java)
+endif()
+add_custom_target(installer
+  makensis -nocd ${INST_DEFS} libjpeg-turbo.nsi
+  DEPENDS jpeg jpeg-static turbojpeg turbojpeg-static rdjpgcom wrjpgcom
+    cjpeg djpeg jpegtran tjbench ${JAVA_DEPEND}
+  SOURCES libjpeg-turbo.nsi)
+
+if(WITH_TURBOJPEG)
+  set(TURBOJPEG_TARGETS turbojpeg turbojpeg-static tjbench)
+endif()
+install(TARGETS jpeg-static rdjpgcom wrjpgcom ${TURBOJPEG_TARGETS}
+  ARCHIVE DESTINATION lib
+  LIBRARY DESTINATION lib
+  RUNTIME DESTINATION bin
+)
+
+install(FILES ${CMAKE_SOURCE_DIR}/README ${CMAKE_SOURCE_DIR}/README-turbo.txt
+  ${CMAKE_SOURCE_DIR}/example.c ${CMAKE_SOURCE_DIR}/libjpeg.txt
+  ${CMAKE_SOURCE_DIR}/structure.txt ${CMAKE_SOURCE_DIR}/usage.txt
+  ${CMAKE_SOURCE_DIR}/wizard.txt
+  DESTINATION doc)
+
+install(FILES ${CMAKE_BINARY_DIR}/jconfig.h ${CMAKE_SOURCE_DIR}/jerror.h
+  ${CMAKE_SOURCE_DIR}/jmorecfg.h ${CMAKE_SOURCE_DIR}/jpeglib.h
+  ${CMAKE_SOURCE_DIR}/turbojpeg.h DESTINATION include)
diff --git a/ChangeLog.txt b/ChangeLog.txt
new file mode 100644
index 0000000..a463254
--- /dev/null
+++ b/ChangeLog.txt
@@ -0,0 +1,547 @@
+1.4 pre-beta
+============
+
+[1] New features in the TurboJPEG API:
+-- YUV planar images can now be generated with an arbitrary line padding
+(previously only 4-byte padding, which was compatible with X Video, was
+supported.)
+-- The decompress-to-YUV function has been extended to support image scaling.
+-- JPEG images can now be compressed from YUV planar source images.
+-- YUV planar images can now be decoded into RGB or grayscale images.
+-- 4:1:1 subsampling is now supported.  This is mainly included for
+compatibility, since 4:1:1 is not fully accelerated in libjpeg-turbo and has no
+significant advantages relative to 4:2:0.
+-- CMYK images are now supported.  This feature allows CMYK source images to be
+compressed to YCCK JPEGs and YCCK or CMYK JPEGs to be decompressed to CMYK
+destination images.  Conversion between CMYK and RGB or YUV images is not
+supported.  Such conversion requires a color management system and is out of
+scope for a codec library.
+-- The handling of YUV images in the Java API has been significantly refactored
+and should now be much more intuitive.
+-- The Java API now supports encoding a YUV image from an arbitrary position in
+a large image buffer.
+
+[2] Added SIMD acceleration for DSPr2-capable MIPS platforms.  This speeds up
+the compression of full-color JPEGs by 70-80% on such platforms and
+decompression by 25-35%.
+
+[3] If an application attempts to decompress a Huffman-coded JPEG image whose
+header does not contain Huffman tables, libjpeg-turbo will now insert the
+default Huffman tables.  In order to save space, many motion JPEG video frames
+are encoded without the default Huffman tables, so these frames can now be
+successfully decompressed by libjpeg-turbo without additional work on the part
+of the application.  An application can still override the Huffman tables, for
+instance to re-use tables from a previous frame of the same video.
+
+[4] The Mac packaging system now uses pkgbuild and productbuild rather than
+PackageMaker (which is obsolete and no longer supported.)  This means that
+OS X 10.6 "Snow Leopard" or later must be used when packaging libjpeg-turbo,
+although the packages produced can be installed on OS X 10.5 "Leopard" or
+later.  OS X 10.4 "Tiger" is no longer supported.
+
+[5] The Huffman encoder now uses clz and bsr instructions for bit counting on
+ARM platforms rather than a lookup table.  This reduces the memory footprint
+by 64k, which may be important for some mobile applications.  Out of four
+Android devices that were tested, two demonstrated a small overall performance
+loss (~3-4% on average) with ARMv6 code and a small gain (also ~3-4%) with
+ARMv7 code when enabling this new feature, but the other two devices
+demonstrated a significant overall performance gain with both ARMv6 and ARMv7
+code (~10-20%.)  Actual mileage may vary.
+
+[6] Worked around an issue with Visual C++ 2010 and later that caused incorrect
+pixels to be generated when decompressing a JPEG image to a 256-color bitmap,
+if compiler optimization was enabled when libjpeg-turbo was built.  This caused
+the regression tests to fail when doing a release build under Visual C++ 2010
+and later.
+
+[7] Improved the accuracy and performance of the non-SIMD implementation of the
+floating point inverse DCT (using code borrowed from libjpeg v8a and later.)
+The accuracy of this implementation now matches the accuracy of the SSE/SSE2
+implementation.  Note, however, that the floating point DCT/IDCT algorithms are
+mainly a legacy feature.  They generally do not produce significantly better
+accuracy than the slow integer DCT/IDCT algorithms, and they are quite a bit
+slower.
+
+[8] Added a new output colorspace (JCS_RGB565) to the libjpeg API that allows
+for decompressing JPEG images into RGB565 (16-bit) pixels.
+
+[9] Numerous obsolete features, such as support for compilers that can't
+handle prototypes and support for the MS-DOS memory model, were removed from
+the libjpeg code, greatly improving its readability and making it easier to
+maintain and extend.
+
+[10] Fixed a segfault that occurred when calling output_message() with msg_code
+set to JMSG_COPYRIGHT.
+
+[11] Fixed an issue whereby wrjpgcom was allowing comments longer than 65k
+characters to be passed on the command line, which was causing it to generate
+incorrect JPEG files.
+
+[12] Fixed a bug in the build system that was causing the Windows version of
+wrjpgcom to be built using the rdjpgcom code.
+
+[13] Restored 12-bit-per-component JPEG support.  A 12-bit version of
+libjpeg-turbo can now be built by passing an argument of --with-12bit to
+configure (Unix) or -DWITH_12BIT=1 to cmake (Windows.)  12-bit JPEG support is
+included only for convenience.  Enabling this feature disables all of the
+performance features in libjpeg-turbo, as well as arithmetic coding and the
+TurboJPEG API.  The resulting library behaves no differently than libjpeg v6b.
+
+
+1.3.1
+=====
+
+[1] On Un*x systems, 'make install' now installs the libjpeg-turbo libraries
+into /opt/libjpeg-turbo/lib32 by default on any 32-bit system, not just x86,
+and into /opt/libjpeg-turbo/lib64 by default on any 64-bit system, not just
+x86-64.  You can override this by overriding either the 'prefix' or 'libdir'
+configure variables.
+
+[2] The Windows installer now places a copy of the TurboJPEG DLLs in the same
+directory as the rest of the libjpeg-turbo binaries.  This was mainly done
+to support TurboVNC 1.3, which bundles the DLLs in its Windows installation.
+When using a 32-bit version of CMake on 64-bit Windows, it is impossible to
+access the c:\WINDOWS\system32 directory, which made it impossible for the
+TurboVNC build scripts to bundle the 64-bit TurboJPEG DLL.
+
+[3] Fixed a bug whereby attempting to encode a progressive JPEG with arithmetic
+entropy coding (by passing arguments of -progressive -arithmetic to cjpeg or
+jpegtran, for instance) would result in an error, "Requested feature was
+omitted at compile time".
+
+[4] Fixed a couple of issues whereby malformed JPEG images would cause
+libjpeg-turbo to use uninitialized memory during decompression.
+
+[5] Fixed an error ("Buffer passed to JPEG library is too small") that occurred
+when calling the TurboJPEG YUV encoding function with a very small (< 5x5)
+source image, and added a unit test to check for this error.
+
+[6] The Java classes should now build properly under Visual Studio 2010 and
+later.
+
+[7] Fixed an issue that prevented SRPMs generated using the in-tree packaging
+tools from being rebuilt on certain newer Linux distributions.
+
+[8] Numerous minor fixes to eliminate compilation and build/packaging system
+warnings, fix cosmetic issues, improve documentation clarity, and other general
+source cleanup.
+
+
+1.3.0
+=====
+
+[1] 'make test' now works properly on FreeBSD, and it no longer requires the
+md5sum executable to be present on other Un*x platforms.
+
+[2] Overhauled the packaging system:
+-- To avoid conflict with vendor-supplied libjpeg-turbo packages, the
+official RPMs and DEBs for libjpeg-turbo have been renamed to
+"libjpeg-turbo-official".
+-- The TurboJPEG libraries are now located under /opt/libjpeg-turbo in the
+official Linux and Mac packages, to avoid conflict with vendor-supplied
+packages and also to streamline the packaging system.
+-- Release packages are now created with the directory structure defined
+by the configure variables "prefix", "bindir", "libdir", etc. (Un*x) or by the
+CMAKE_INSTALL_PREFIX variable (Windows.)  The exception is that the docs are
+always located under the system default documentation directory on Un*x and Mac
+systems, and on Windows, the TurboJPEG DLL is always located in the Windows
+system directory.
+-- To avoid confusion, official libjpeg-turbo packages on Linux/Unix platforms
+(except for Mac) will always install the 32-bit libraries in
+/opt/libjpeg-turbo/lib32 and the 64-bit libraries in /opt/libjpeg-turbo/lib64.
+-- Fixed an issue whereby, in some cases, the libjpeg-turbo executables on Un*x
+systems were not properly linking with the shared libraries installed by the
+same package.
+-- Fixed an issue whereby building the "installer" target on Windows when
+WITH_JAVA=1 would fail if the TurboJPEG JAR had not been previously built.
+-- Building the "install" target on Windows now installs files into the same
+places that the installer does.
+
+[3] Fixed a Huffman encoder bug that prevented I/O suspension from working
+properly.
+
+
+1.2.90 (1.3 beta1)
+==================
+
+[1] Added support for additional scaling factors (3/8, 5/8, 3/4, 7/8, 9/8, 5/4,
+11/8, 3/2, 13/8, 7/4, 15/8, and 2) when decompressing.  Note that the IDCT will
+not be SIMD-accelerated when using any of these new scaling factors.
+
+[2] The TurboJPEG dynamic library is now versioned.  It was not strictly
+necessary to do so, because TurboJPEG uses versioned symbols, and if a function
+changes in an ABI-incompatible way, that function is renamed and a legacy
+function is provided to maintain backward compatibility.  However, certain
+Linux distro maintainers have a policy against accepting any library that isn't
+versioned.
+
+[3] Extended the TurboJPEG Java API so that it can be used to compress a JPEG
+image from and decompress a JPEG image to an arbitrary position in a large
+image buffer.
+
+[4] The tjDecompressToYUV() function now supports the TJFLAG_FASTDCT flag.
+
+[5] The 32-bit supplementary package for amd64 Debian systems now provides
+symlinks in /usr/lib/i386-linux-gnu for the TurboJPEG libraries in /usr/lib32.
+This allows those libraries to be used on MultiArch-compatible systems (such as
+Ubuntu 11 and later) without setting the linker path.
+
+[6] The TurboJPEG Java wrapper should now find the JNI library on Mac systems
+without having to pass -Djava.library.path=/usr/lib to java.
+
+[7] TJBench has been ported to Java to provide a convenient way of validating
+the performance of the TurboJPEG Java API.  It can be run with
+'java -cp turbojpeg.jar TJBench'.
+
+[8] cjpeg can now be used to generate JPEG files with the RGB colorspace
+(feature ported from jpeg-8d.)
+
+[9] The width and height in the -crop argument passed to jpegtran can now be
+suffixed with "f" to indicate that, when the upper left corner of the cropping
+region is automatically moved to the nearest iMCU boundary, the bottom right
+corner should be moved by the same amount.  In other words, this feature causes
+jpegtran to strictly honor the specified width/height rather than the specified
+bottom right corner (feature ported from jpeg-8d.)
+
+[10] JPEG files using the RGB colorspace can now be decompressed into grayscale
+images (feature ported from jpeg-8d.)
+
+[11] Fixed a regression caused by 1.2.1[7] whereby the build would fail with
+multiple "Mismatch in operand sizes" errors when attempting to build the x86
+SIMD code with NASM 0.98.
+
+[12] The in-memory source/destination managers (jpeg_mem_src() and
+jpeg_mem_dest()) are now included by default when building libjpeg-turbo with
+libjpeg v6b or v7 emulation, so that programs can take advantage of these
+functions without requiring the use of the backward-incompatible libjpeg v8
+ABI.  The "age number" of the libjpeg-turbo library on Un*x systems has been
+incremented by 1 to reflect this.  You can disable this feature with a
+configure/CMake switch in order to retain strict API/ABI compatibility with the
+libjpeg v6b or v7 API/ABI (or with previous versions of libjpeg-turbo.)  See
+README-turbo.txt for more details.
+
+[13] Added ARM v7s architecture to libjpeg.a and libturbojpeg.a in the official
+libjpeg-turbo binary package for OS X, so that those libraries can be used to
+build applications that leverage the faster CPUs in the iPhone 5 and iPad 4.
+
+
+1.2.1
+=====
+
+[1] Creating or decoding a JPEG file that uses the RGB colorspace should now
+properly work when the input or output colorspace is one of the libjpeg-turbo
+colorspace extensions.
+
+[2] When libjpeg-turbo was built without SIMD support and merged (non-fancy)
+upsampling was used along with an alpha-enabled colorspace during
+decompression, the unused byte of the decompressed pixels was not being set to
+0xFF.  This has been fixed.  TJUnitTest has also been extended to test for the
+correct behavior of the colorspace extensions when merged upsampling is used.
+
+[3] Fixed a bug whereby the libjpeg-turbo SSE2 SIMD code would not preserve the
+upper 64 bits of xmm6 and xmm7 on Win64 platforms, which violated the Win64
+calling conventions.
+
+[4] Fixed a regression caused by 1.2.0[6] whereby decompressing corrupt JPEG
+images (specifically, images in which the component count was erroneously set
+to a large value) would cause libjpeg-turbo to segfault.
+
+[5] Worked around a severe performance issue with "Bobcat" (AMD Embedded APU)
+processors.  The MASKMOVDQU instruction, which was used by the libjpeg-turbo
+SSE2 SIMD code, is apparently implemented in microcode on AMD processors, and
+it is painfully slow on Bobcat processors in particular.  Eliminating the use
+of this instruction improved performance by an order of magnitude on Bobcat
+processors and by a small amount (typically 5%) on AMD desktop processors.
+
+[6] Added SIMD acceleration for performing 4:2:2 upsampling on NEON-capable ARM
+platforms.  This speeds up the decompression of 4:2:2 JPEGs by 20-25% on such
+platforms.
+
+[7] Fixed a regression caused by 1.2.0[2] whereby, on Linux/x86 platforms
+running the 32-bit SSE2 SIMD code in libjpeg-turbo, decompressing a 4:2:0 or
+4:2:2 JPEG image into a 32-bit (RGBX, BGRX, etc.) buffer without using fancy
+upsampling would produce several incorrect columns of pixels at the right-hand
+side of the output image if each row in the output image was not evenly
+divisible by 16 bytes.
+
+[8] Fixed an issue whereby attempting to build the SIMD extensions with Xcode
+4.3 on OS X platforms would cause NASM to return numerous errors of the form
+"'%define' expects a macro identifier".
+
+[9] Added flags to the TurboJPEG API that allow the caller to force the use of
+either the fast or the accurate DCT/IDCT algorithms in the underlying codec.
+
+
+1.2.0
+=====
+
+[1] Fixed build issue with YASM on Unix systems (the libjpeg-turbo build system
+was not adding the current directory to the assembler include path, so YASM
+was not able to find jsimdcfg.inc.)
+
+[2] Fixed out-of-bounds read in SSE2 SIMD code that occurred when decompressing
+a JPEG image to a bitmap buffer whose size was not a multiple of 16 bytes.
+This was more of an annoyance than an actual bug, since it did not cause any
+actual run-time problems, but the issue showed up when running libjpeg-turbo in
+valgrind.  See http://crbug.com/72399 for more information.
+
+[3] Added a compile-time macro (LIBJPEG_TURBO_VERSION) that can be used to
+check the version of libjpeg-turbo against which an application was compiled.
+
+[4] Added new RGBA/BGRA/ABGR/ARGB colorspace extension constants (libjpeg API)
+and pixel formats (TurboJPEG API), which allow applications to specify that,
+when decompressing to a 4-component RGB buffer, the unused byte should be set
+to 0xFF so that it can be interpreted as an opaque alpha channel.
+
+[5] Fixed regression issue whereby DevIL failed to build against libjpeg-turbo
+because libjpeg-turbo's distributed version of jconfig.h contained an INLINE
+macro, which conflicted with a similar macro in DevIL.  This macro is used only
+internally when building libjpeg-turbo, so it was moved into config.h.
+
+[6] libjpeg-turbo will now correctly decompress erroneous CMYK/YCCK JPEGs whose
+K component is assigned a component ID of 1 instead of 4.  Although these files
+are in violation of the spec, other JPEG implementations handle them
+correctly.
+
+[7] Added ARM v6 and ARM v7 architectures to libjpeg.a and libturbojpeg.a in
+the official libjpeg-turbo binary package for OS X, so that those libraries can
+be used to build both OS X and iOS applications.
+
+
+1.1.90 (1.2 beta1)
+==================
+
+[1] Added a Java wrapper for the TurboJPEG API.  See java/README for more
+details.
+
+[2] The TurboJPEG API can now be used to scale down images during
+decompression.
+
+[3] Added SIMD routines for RGB-to-grayscale color conversion, which
+significantly improves the performance of grayscale JPEG compression from an
+RGB source image.
+
+[4] Improved the performance of the C color conversion routines, which are used
+on platforms for which SIMD acceleration is not available.
+
+[5] Added a function to the TurboJPEG API that performs lossless transforms.
+This function is implemented using the same back end as jpegtran, but it
+performs transcoding entirely in memory and allows multiple transforms and/or
+crop operations to be batched together, so the source coefficients only need to
+be read once.  This is useful when generating image tiles from a single source
+JPEG.
+
+[6] Added tests for the new TurboJPEG scaled decompression and lossless
+transform features to tjbench (the TurboJPEG benchmark, formerly called
+"jpgtest".)
+
+[7] Added support for 4:4:0 (transposed 4:2:2) subsampling in TurboJPEG, which
+was necessary in order for it to read 4:2:2 JPEG files that had been losslessly
+transposed or rotated 90 degrees.
+
+[8] All legacy VirtualGL code has been re-factored, and this has allowed
+libjpeg-turbo, in its entirety, to be re-licensed under a BSD-style license.
+
+[9] libjpeg-turbo can now be built with YASM.
+
+[10] Added SIMD acceleration for ARM Linux and iOS platforms that support
+NEON instructions.
+
+[11] Refactored the TurboJPEG C API and documented it using Doxygen.  The
+TurboJPEG 1.2 API uses pixel formats to define the size and component order of
+the uncompressed source/destination images, and it includes a more efficient
+version of TJBUFSIZE() that computes a worst-case JPEG size based on the level
+of chrominance subsampling.  The refactored implementation of the TurboJPEG API
+now uses the libjpeg memory source and destination managers, which allows the
+TurboJPEG compressor to grow the JPEG buffer as necessary.
+
+[12] Eliminated errors in the output of jpegtran on Windows that occurred when
+the application was invoked using I/O redirection
+(jpegtran <input.jpg >output.jpg).
+
+[13] The inclusion of libjpeg v7 and v8 emulation as well as arithmetic coding
+support in libjpeg-turbo v1.1.0 introduced several new error constants in
+jerror.h, and these were mistakenly enabled for all emulation modes, causing
+the error enum in libjpeg-turbo to sometimes have different values than the
+same enum in libjpeg.  This represents an ABI incompatibility, and it caused
+problems with rare applications that took specific action based on a particular
+error value.  The fix was to include the new error constants conditionally
+based on whether libjpeg v7 or v8 emulation was enabled.
+
+[14] Fixed an issue whereby Windows applications that used libjpeg-turbo would
+fail to compile if the Windows system headers were included before jpeglib.h.
+This issue was caused by a conflict in the definition of the INT32 type.
+
+[15] Fixed 32-bit supplementary package for amd64 Debian systems, which was
+broken by enhancements to the packaging system in 1.1.
+
+[16] When decompressing a JPEG image using an output colorspace of
+JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR, or JCS_EXT_XRGB, libjpeg-turbo will
+now set the unused byte to 0xFF, which allows applications to interpret that
+byte as an alpha channel (0xFF = opaque).
+
+
+1.1.1
+=====
+
+[1] Fixed a 1-pixel error in row 0, column 21 of the luminance plane generated
+by tjEncodeYUV().
+
+[2] libjpeg-turbo's accelerated Huffman decoder previously ignored unexpected
+markers found in the middle of the JPEG data stream during decompression.  It
+will now hand off decoding of a particular block to the unaccelerated Huffman
+decoder if an unexpected marker is found, so that the unaccelerated Huffman
+decoder can generate an appropriate warning.
+
+[3] Older versions of MinGW64 prefixed symbol names with underscores by
+default, which differed from the behavior of 64-bit Visual C++.  MinGW64 1.0
+has adopted the behavior of 64-bit Visual C++ as the default, so to accommodate
+this, the libjpeg-turbo SIMD function names are no longer prefixed with an
+underscore when building with MinGW64.  This means that, when building
+libjpeg-turbo with older versions of MinGW64, you will now have to add
+-fno-leading-underscore to the CFLAGS.
+
+[4] Fixed a regression bug in the NSIS script that caused the Windows installer
+build to fail when using the Visual Studio IDE.
+
+[5] Fixed a bug in jpeg_read_coefficients() whereby it would not initialize
+cinfo->image_width and cinfo->image_height if libjpeg v7 or v8 emulation was
+enabled.  This specifically caused the jpegoptim program to fail if it was
+linked against a version of libjpeg-turbo that was built with libjpeg v7 or v8
+emulation.
+
+[6] Eliminated excessive I/O overhead that occurred when reading BMP files in
+cjpeg.
+
+[7] Eliminated errors in the output of cjpeg on Windows that occurred when the
+application was invoked using I/O redirection (cjpeg <inputfile >output.jpg).
+
+
+1.1.0
+=====
+
+[1] The algorithm used by the SIMD quantization function cannot produce correct
+results when the JPEG quality is >= 98 and the fast integer forward DCT is
+used.  Thus, the non-SIMD quantization function is now used for those cases,
+and libjpeg-turbo should now produce identical output to libjpeg v6b in all
+cases.
+
+[2] Despite the above, the fast integer forward DCT still degrades somewhat for
+JPEG qualities greater than 95, so the TurboJPEG wrapper will now automatically
+use the slow integer forward DCT when generating JPEG images of quality 96 or
+greater.  This reduces compression performance by as much as 15% for these
+high-quality images but is necessary to ensure that the images are perceptually
+lossless.  It also ensures that the library can avoid the performance pitfall
+created by [1].
+
+[3] Ported jpgtest.cxx to pure C to avoid the need for a C++ compiler.
+
+[4] Fixed visual artifacts in grayscale JPEG compression caused by a typo in
+the RGB-to-luminance lookup tables.
+
+[5] The Windows distribution packages now include the libjpeg run-time programs
+(cjpeg, etc.)
+
+[6] All packages now include jpgtest.
+
+[7] The TurboJPEG dynamic library now uses versioned symbols.
+
+[8] Added two new TurboJPEG API functions, tjEncodeYUV() and
+tjDecompressToYUV(), to replace the somewhat hackish TJ_YUV flag.
+
+
+1.0.90 (1.1 beta1)
+==================
+
+[1] Added emulation of the libjpeg v7 and v8 APIs and ABIs.  See
+README-turbo.txt for more details.  This feature was sponsored by CamTrace SAS.
+
+[2] Created a new CMake-based build system for the Visual C++ and MinGW builds.
+
+[3] Grayscale bitmaps can now be compressed from/decompressed to using the
+TurboJPEG API.
+
+[4] jpgtest can now be used to test decompression performance with existing
+JPEG images.
+
+[5] If the default install prefix (/opt/libjpeg-turbo) is used, then
+'make install' now creates /opt/libjpeg-turbo/lib32 and
+/opt/libjpeg-turbo/lib64 sym links to duplicate the behavior of the binary
+packages.
+
+[6] All symbols in the libjpeg-turbo dynamic library are now versioned, even
+when the library is built with libjpeg v6b emulation.
+
+[7] Added arithmetic encoding and decoding support (can be disabled with
+configure or CMake options)
+
+[8] Added a TJ_YUV flag to the TurboJPEG API, which causes both the compressor
+and decompressor to output planar YUV images.
+
+[9] Added an extended version of tjDecompressHeader() to the TurboJPEG API,
+which allows the caller to determine the type of subsampling used in a JPEG
+image.
+
+[10] Added further protections against invalid Huffman codes.
+
+
+1.0.1
+=====
+
+[1] The Huffman decoder will now handle erroneous Huffman codes (for instance,
+from a corrupt JPEG image.)  Previously, these would cause libjpeg-turbo to
+crash under certain circumstances.
+
+[2] Fixed typo in SIMD dispatch routines that was causing 4:2:2 upsampling to
+be used instead of 4:2:0 when decompressing JPEG images using SSE2 code.
+
+[3] configure script will now automatically determine whether the
+INCOMPLETE_TYPES_BROKEN macro should be defined.
+
+
+1.0.0
+=====
+
+[1] 2983700: Further FreeBSD build tweaks (no longer necessary to specify
+--host when configuring on a 64-bit system)
+
+[2] Created symlinks in the Unix/Linux packages so that the TurboJPEG
+include file can always be found in /opt/libjpeg-turbo/include, the 32-bit
+static libraries can always be found in /opt/libjpeg-turbo/lib32, and the
+64-bit static libraries can always be found in /opt/libjpeg-turbo/lib64.
+
+[3] The Unix/Linux distribution packages now include the libjpeg run-time
+programs (cjpeg, etc.) and man pages.
+
+[4] Created a 32-bit supplementary package for amd64 Debian systems, which
+contains just the 32-bit libjpeg-turbo libraries.
+
+[5] Moved the libraries from */lib32 to */lib in the i386 Debian package.
+
+[6] Include distribution package for Cygwin
+
+[7] No longer necessary to specify --without-simd on non-x86 architectures, and
+unit tests now work on those architectures.
+
+
+0.0.93
+======
+
+[1] 2982659, Fixed x86-64 build on FreeBSD systems
+
+[2] 2988188: Added support for Windows 64-bit systems
+
+
+0.0.91
+======
+
+[1] Added documentation to .deb packages
+
+[2] 2968313: Fixed data corruption issues when decompressing large JPEG images
+and/or using buffered I/O with the libjpeg-turbo decompressor
+
+
+0.0.90
+======
+
+Initial release
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..dad69df
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,602 @@
+lib_LTLIBRARIES = libjpeg.la
+libjpeg_la_LDFLAGS = -version-info ${LIBTOOL_CURRENT}:${SO_MINOR_VERSION}:${SO_AGE} -no-undefined
+include_HEADERS = jerror.h jmorecfg.h jpeglib.h
+
+if WITH_TURBOJPEG
+lib_LTLIBRARIES += libturbojpeg.la
+libturbojpeg_la_LDFLAGS = -version-info 1:0:1 -no-undefined
+include_HEADERS += turbojpeg.h
+endif
+
+nodist_include_HEADERS = jconfig.h
+
+
+HDRS = jchuff.h jdct.h jdhuff.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
+	jpegint.h jpeglib.h jversion.h jsimd.h jsimddct.h jpegcomp.h \
+	jpeg_nbits_table.h
+
+libjpeg_la_SOURCES = $(HDRS) jcapimin.c jcapistd.c jccoefct.c jccolor.c \
+	jcdctmgr.c jchuff.c jcinit.c jcmainct.c jcmarker.c jcmaster.c \
+	jcomapi.c jcparam.c jcphuff.c jcprepct.c jcsample.c jctrans.c \
+	jdapimin.c jdapistd.c jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c \
+	jddctmgr.c jdhuff.c jdinput.c jdmainct.c jdmarker.c jdmaster.c \
+	jdmerge.c jdphuff.c jdpostct.c jdsample.c jdtrans.c jerror.c \
+	jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c \
+	jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c
+
+if WITH_ARITH
+libjpeg_la_SOURCES += jaricom.c
+endif
+
+if WITH_ARITH_ENC
+libjpeg_la_SOURCES += jcarith.c
+endif
+
+if WITH_ARITH_DEC
+libjpeg_la_SOURCES += jdarith.c
+endif
+
+
+SUBDIRS = java
+
+
+if WITH_TURBOJPEG
+
+libturbojpeg_la_SOURCES = $(libjpeg_la_SOURCES) turbojpeg.c turbojpeg.h \
+	transupp.c transupp.h jdatadst-tj.c jdatasrc-tj.c
+
+if WITH_JAVA
+
+libturbojpeg_la_SOURCES += turbojpeg-jni.c
+libturbojpeg_la_CFLAGS = ${JNI_CFLAGS}
+TJMAPFILE = turbojpeg-mapfile.jni
+
+else
+
+TJMAPFILE = turbojpeg-mapfile
+
+endif
+
+libturbojpeg_la_SOURCES += $(TJMAPFILE)
+
+if VERSION_SCRIPT
+libturbojpeg_la_LDFLAGS += $(VERSION_SCRIPT_FLAG)$(srcdir)/$(TJMAPFILE)
+endif
+
+endif
+
+
+if VERSION_SCRIPT
+libjpeg_la_LDFLAGS += $(VERSION_SCRIPT_FLAG)libjpeg.map
+endif
+
+
+if WITH_SIMD
+
+SUBDIRS += simd
+libjpeg_la_LIBADD = simd/libsimd.la
+libturbojpeg_la_LIBADD = simd/libsimd.la
+
+else
+
+libjpeg_la_SOURCES += jsimd_none.c
+
+endif
+
+
+bin_PROGRAMS = cjpeg djpeg jpegtran rdjpgcom wrjpgcom
+noinst_PROGRAMS = jcstest
+
+
+if WITH_TURBOJPEG
+
+bin_PROGRAMS += tjbench
+
+noinst_PROGRAMS += tjunittest
+
+tjbench_SOURCES = tjbench.c bmp.h bmp.c tjutil.h tjutil.c rdbmp.c rdppm.c \
+	wrbmp.c wrppm.c
+
+tjbench_LDADD = libturbojpeg.la libjpeg.la -lm
+
+tjbench_CFLAGS = -DBMP_SUPPORTED -DPPM_SUPPORTED
+
+tjunittest_SOURCES = tjunittest.c tjutil.h tjutil.c
+
+tjunittest_LDADD = libturbojpeg.la
+
+endif
+
+
+cjpeg_SOURCES = cdjpeg.h cderror.h cdjpeg.c cjpeg.c rdgif.c rdppm.c rdswitch.c
+if WITH_12BIT
+else
+cjpeg_SOURCES += rdbmp.c rdtarga.c
+endif
+
+cjpeg_LDADD = libjpeg.la
+
+cjpeg_CFLAGS = -DGIF_SUPPORTED -DPPM_SUPPORTED
+if WITH_12BIT
+else
+cjpeg_CFLAGS += -DBMP_SUPPORTED -DTARGA_SUPPORTED
+endif
+
+djpeg_SOURCES = cdjpeg.h cderror.h cdjpeg.c djpeg.c rdcolmap.c rdswitch.c \
+	wrgif.c wrppm.c
+if WITH_12BIT
+else
+djpeg_SOURCES += wrbmp.c wrtarga.c
+endif
+
+djpeg_LDADD = libjpeg.la
+
+djpeg_CFLAGS = -DGIF_SUPPORTED -DPPM_SUPPORTED
+if WITH_12BIT
+else
+djpeg_CFLAGS += -DBMP_SUPPORTED -DTARGA_SUPPORTED
+endif
+
+jpegtran_SOURCES = jpegtran.c rdswitch.c cdjpeg.c transupp.c transupp.h
+
+jpegtran_LDADD = libjpeg.la
+
+rdjpgcom_SOURCES = rdjpgcom.c
+
+rdjpgcom_LDADD = libjpeg.la
+
+wrjpgcom_SOURCES = wrjpgcom.c
+
+wrjpgcom_LDADD = libjpeg.la
+
+jcstest_SOURCES = jcstest.c
+
+jcstest_LDADD = libjpeg.la
+
+dist_man1_MANS = cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 wrjpgcom.1
+
+DOCS= coderules.txt jconfig.txt change.log rdrle.c wrrle.c BUILDING.txt \
+	ChangeLog.txt
+
+docdir = $(datadir)/doc
+dist_doc_DATA = README README-turbo.txt libjpeg.txt structure.txt usage.txt \
+	wizard.txt
+
+exampledir = $(datadir)/doc
+dist_example_DATA = example.c
+
+
+EXTRA_DIST = win release $(DOCS) testimages CMakeLists.txt \
+	sharedlib/CMakeLists.txt cmakescripts libjpeg.map.in doc doxygen.config \
+	jccolext.c jdcolext.c jdcol565.c jdmrgext.c jstdhuff.c
+
+dist-hook:
+	rm -rf `find $(distdir) -name .svn`
+
+
+SUBDIRS += md5
+
+if WITH_12BIT
+
+TESTORIG = testorig12.jpg
+MD5_JPEG_RGB_ISLOW = 9620f424569594bb9242b48498ad801f
+MD5_PPM_RGB_ISLOW = f3301d2219783b8b3d942b7239fa50c0
+MD5_JPEG_422_IFAST_OPT = 7322e3bd2f127f7de4b40d4480ce60e4
+MD5_PPM_422_IFAST = 79807fa552899e66a04708f533e16950
+MD5_PPM_422M_IFAST = 07737bfe8a7c1c87aaa393a0098d16b0
+MD5_JPEG_420_IFAST_Q100_PROG = a1da220b5604081863a504297ed59e55
+MD5_PPM_420_Q100_IFAST = 1b3730122709f53d007255e8dfd3305e
+MD5_PPM_420M_Q100_IFAST = 980a1a3c5bf9510022869d30b7d26566
+MD5_JPEG_GRAY_ISLOW = 235c90707b16e2e069f37c888b2636d9
+MD5_PPM_GRAY_ISLOW = 7213c10af507ad467da5578ca5ee1fca
+MD5_PPM_GRAY_ISLOW_RGB = e96ee81c30a6ed422d466338bd3de65d
+MD5_JPEG_420S_IFAST_OPT = 7af8e60be4d9c227ec63ac9b6630855e
+MD5_JPEG_3x2_FLOAT_PROG = a8c17daf77b457725ec929e215b603f8
+MD5_PPM_3x2_FLOAT = 42876ab9e5c2f76a87d08db5fbd57956
+MD5_PPM_420M_ISLOW_2_1 = 4ca6be2a6f326ff9eaab63e70a8259c0
+MD5_PPM_420M_ISLOW_15_8 = 12aa9f9534c1b3d7ba047322226365eb
+MD5_PPM_420M_ISLOW_13_8 = f7e22817c7b25e1393e4ec101e9d4e96
+MD5_PPM_420M_ISLOW_11_8 = 800a16f9f4dc9b293197bfe11be10a82
+MD5_PPM_420M_ISLOW_9_8 = 06b7a92a9bc69f4dc36ec40f1937d55c
+MD5_PPM_420M_ISLOW_7_8 = 3ec444a14a4ab4eab88ffc49c48eca43
+MD5_PPM_420M_ISLOW_3_4 = 3e726b7ea872445b19437d1c1d4f0d93
+MD5_PPM_420M_ISLOW_5_8 = a8a771abdc94301d20ffac119b2caccd
+MD5_PPM_420M_ISLOW_1_2 = b419124dd5568b085787234866102866
+MD5_PPM_420M_ISLOW_3_8 = 343d19015531b7bbe746124127244fa8
+MD5_PPM_420M_ISLOW_1_4 = 35fd59d866e44659edfa3c18db2a3edb
+MD5_PPM_420M_ISLOW_1_8 = ccaed48ac0aedefda5d4abe4013f4ad7
+MD5_JPEG_CROP = cdb35ff4b4519392690ea040c56ea99c
+
+else
+
+TESTORIG = testorig.jpg
+MD5_JPEG_RGB_ISLOW = 768e970dd57b340ff1b83c9d3d47c77b
+MD5_PPM_RGB_ISLOW = 00a257f5393fef8821f2b88ac7421291
+MD5_BMP_RGB_ISLOW_565 = f07d2e75073e4bb10f6c6f4d36e2e3be
+MD5_BMP_RGB_ISLOW_565D = 4cfa0928ef3e6bb626d7728c924cfda4
+MD5_JPEG_422_IFAST_OPT = 2540287b79d913f91665e660303ab2c8
+MD5_PPM_422_IFAST = 35bd6b3f833bad23de82acea847129fa
+MD5_PPM_422M_IFAST = 8dbc65323d62cca7c91ba02dd1cfa81d
+MD5_BMP_422M_IFAST_565 = 3294bd4d9a1f2b3d08ea6020d0db7065
+MD5_BMP_422M_IFAST_565D = da98c9c7b6039511be4a79a878a9abc1
+MD5_JPEG_420_IFAST_Q100_PROG = 990cbe0329c882420a2094da7e5adade
+MD5_PPM_420_Q100_IFAST = 5a732542015c278ff43635e473a8a294
+MD5_PPM_420M_Q100_IFAST = ff692ee9323a3b424894862557c092f1
+MD5_JPEG_GRAY_ISLOW = 72b51f894b8f4a10b3ee3066770aa38d
+MD5_PPM_GRAY_ISLOW = 8d3596c56eace32f205deccc229aa5ed
+MD5_PPM_GRAY_ISLOW_RGB = 116424ac07b79e5e801f00508eab48ec
+MD5_BMP_GRAY_ISLOW_565 = 12f78118e56a2f48b966f792fedf23cc
+MD5_BMP_GRAY_ISLOW_565D = bdbbd616441a24354c98553df5dc82db
+MD5_JPEG_420S_IFAST_OPT = 388708217ac46273ca33086b22827ed8
+# See README-turbo.txt for more details on why this next bit is necessary.
+if WITH_SSE_FLOAT_DCT
+MD5_JPEG_3x2_FLOAT_PROG = 343e3f8caf8af5986ebaf0bdc13b5c71
+MD5_PPM_3x2_FLOAT = 1a75f36e5904d6fc3a85a43da9ad89bb
+else
+MD5_JPEG_3x2_FLOAT_PROG = 9bca803d2042bd1eb03819e2bf92b3e5
+MD5_PPM_3x2_FLOAT = f6bfab038438ed8f5522fbd33595dcdc
+endif
+MD5_JPEG_420_ISLOW_ARI = e986fb0a637a8d833d96e8a6d6d84ea1
+MD5_JPEG_444_ISLOW_PROGARI = 0a8f1c8f66e113c3cf635df0a475a617
+MD5_PPM_420M_IFAST_ARI = 72b59a99bcf1de24c5b27d151bde2437
+MD5_JPEG_420_ISLOW = 9a68f56bc76e466aa7e52f415d0f4a5f
+MD5_PPM_420M_ISLOW_2_1 = 9f9de8c0612f8d06869b960b05abf9c9
+MD5_PPM_420M_ISLOW_15_8 = b6875bc070720b899566cc06459b63b7
+MD5_PPM_420M_ISLOW_13_8 = bc3452573c8152f6ae552939ee19f82f
+MD5_PPM_420M_ISLOW_11_8 = d8cc73c0aaacd4556569b59437ba00a5
+MD5_PPM_420M_ISLOW_9_8 = d25e61bc7eac0002f5b393aa223747b6
+MD5_PPM_420M_ISLOW_7_8 = ddb564b7c74a09494016d6cd7502a946
+MD5_PPM_420M_ISLOW_3_4 = 8ed8e68808c3fbc4ea764fc9d2968646
+MD5_PPM_420M_ISLOW_5_8 = a3363274999da2366a024efae6d16c9b
+MD5_PPM_420M_ISLOW_1_2 = e692a315cea26b988c8e8b29a5dbcd81
+MD5_PPM_420M_ISLOW_3_8 = 79eca9175652ced755155c90e785a996
+MD5_PPM_420M_ISLOW_1_4 = 79cd778f8bf1a117690052cacdd54eca
+MD5_PPM_420M_ISLOW_1_8 = 391b3d4aca640c8567d6f8745eb2142f
+MD5_BMP_420_ISLOW_256 = 4980185e3776e89bd931736e1cddeee6
+MD5_BMP_420_ISLOW_565 = bf9d13e16c4923b92e1faa604d7922cb
+MD5_BMP_420_ISLOW_565D = 6bde71526acc44bcff76f696df8638d2
+MD5_BMP_420M_ISLOW_565 = 8dc0185245353cfa32ad97027342216f
+MD5_BMP_420M_ISLOW_565D =d1be3a3339166255e76fa50a0d70d73e
+MD5_JPEG_CROP = b4197f377e621c4e9b1d20471432610d
+
+endif
+
+test: testclean all
+
+if WITH_TURBOJPEG
+if WITH_JAVA
+	$(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest
+	$(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -bi
+	$(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv
+	$(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -noyuvpad
+	$(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -bi
+	$(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -bi -noyuvpad
+endif
+	./tjunittest
+	./tjunittest -alloc
+	./tjunittest -yuv
+	./tjunittest -yuv -alloc
+	./tjunittest -yuv -noyuvpad
+endif
+
+# These tests are carefully crafted to provide full coverage of as many of the
+# underlying algorithms as possible (including all of the SIMD-accelerated
+# ones.)
+
+# CC: null  SAMP: fullsize  FDCT: islow  ENT: huff
+	./cjpeg -rgb -dct int -outfile testout_rgb_islow.jpg $(srcdir)/testimages/testorig.ppm
+	md5/md5cmp $(MD5_JPEG_RGB_ISLOW) testout_rgb_islow.jpg
+# CC: null  SAMP: fullsize  IDCT: islow  ENT: huff
+	./djpeg -dct int -ppm -outfile testout_rgb_islow.ppm testout_rgb_islow.jpg
+	md5/md5cmp $(MD5_PPM_RGB_ISLOW) testout_rgb_islow.ppm
+	rm testout_rgb_islow.ppm
+if WITH_12BIT
+	rm testout_rgb_islow.jpg
+else
+# CC: RGB->RGB565  SAMP: fullsize  IDCT: islow  ENT: huff
+	./djpeg -dct int -rgb565 -dither none -bmp -outfile testout_rgb_islow_565.bmp testout_rgb_islow.jpg
+	md5/md5cmp $(MD5_BMP_RGB_ISLOW_565) testout_rgb_islow_565.bmp
+	rm testout_rgb_islow_565.bmp
+# CC: RGB->RGB565 (dithered)  SAMP: fullsize  IDCT: islow  ENT: huff
+	./djpeg -dct int -rgb565 -bmp -outfile testout_rgb_islow_565D.bmp testout_rgb_islow.jpg
+	md5/md5cmp $(MD5_BMP_RGB_ISLOW_565D) testout_rgb_islow_565D.bmp
+	rm testout_rgb_islow_565D.bmp testout_rgb_islow.jpg
+endif
+
+# CC: RGB->YCC  SAMP: fullsize/h2v1  FDCT: ifast  ENT: 2-pass huff
+	./cjpeg -sample 2x1 -dct fast -opt -outfile testout_422_ifast_opt.jpg $(srcdir)/testimages/testorig.ppm
+	md5/md5cmp $(MD5_JPEG_422_IFAST_OPT) testout_422_ifast_opt.jpg
+# CC: YCC->RGB  SAMP: fullsize/h2v1 fancy  IDCT: ifast  ENT: huff
+	./djpeg -dct fast -outfile testout_422_ifast.ppm testout_422_ifast_opt.jpg
+	md5/md5cmp $(MD5_PPM_422_IFAST) testout_422_ifast.ppm
+	rm testout_422_ifast.ppm
+# CC: YCC->RGB  SAMP: h2v1 merged  IDCT: ifast  ENT: huff
+	./djpeg -dct fast -nosmooth -outfile testout_422m_ifast.ppm testout_422_ifast_opt.jpg
+	md5/md5cmp $(MD5_PPM_422M_IFAST) testout_422m_ifast.ppm
+	rm testout_422m_ifast.ppm
+if WITH_12BIT
+	rm testout_422_ifast_opt.jpg
+else
+# CC: YCC->RGB565  SAMP: h2v1 merged  IDCT: ifast  ENT: huff
+	./djpeg -dct int -nosmooth -rgb565 -dither none -bmp -outfile testout_422m_ifast_565.bmp testout_422_ifast_opt.jpg
+	md5/md5cmp $(MD5_BMP_422M_IFAST_565) testout_422m_ifast_565.bmp
+	rm testout_422m_ifast_565.bmp
+# CC: YCC->RGB565 (dithered)  SAMP: h2v1 merged  IDCT: ifast  ENT: huff
+	./djpeg -dct int -nosmooth -rgb565 -bmp -outfile testout_422m_ifast_565D.bmp testout_422_ifast_opt.jpg
+	md5/md5cmp $(MD5_BMP_422M_IFAST_565D) testout_422m_ifast_565D.bmp
+	rm testout_422m_ifast_565D.bmp testout_422_ifast_opt.jpg
+endif
+
+# CC: RGB->YCC  SAMP: fullsize/h2v2  FDCT: ifast  ENT: prog huff
+	./cjpeg -sample 2x2 -quality 100 -dct fast -prog -outfile testout_420_q100_ifast_prog.jpg $(srcdir)/testimages/testorig.ppm
+	md5/md5cmp $(MD5_JPEG_420_IFAST_Q100_PROG) testout_420_q100_ifast_prog.jpg
+# CC: YCC->RGB  SAMP: fullsize/h2v2 fancy  IDCT: ifast  ENT: prog huff
+	./djpeg -dct fast -outfile testout_420_q100_ifast.ppm testout_420_q100_ifast_prog.jpg
+	md5/md5cmp $(MD5_PPM_420_Q100_IFAST) testout_420_q100_ifast.ppm
+	rm testout_420_q100_ifast.ppm
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: ifast  ENT: prog huff
+	./djpeg -dct fast -nosmooth -outfile testout_420m_q100_ifast.ppm testout_420_q100_ifast_prog.jpg
+	md5/md5cmp $(MD5_PPM_420M_Q100_IFAST) testout_420m_q100_ifast.ppm
+	rm testout_420m_q100_ifast.ppm testout_420_q100_ifast_prog.jpg
+
+# CC: RGB->Gray  SAMP: fullsize  FDCT: islow  ENT: huff
+	./cjpeg -gray -dct int -outfile testout_gray_islow.jpg $(srcdir)/testimages/testorig.ppm
+	md5/md5cmp $(MD5_JPEG_GRAY_ISLOW) testout_gray_islow.jpg
+# CC: Gray->Gray  SAMP: fullsize  IDCT: islow  ENT: huff
+	./djpeg -dct int -outfile testout_gray_islow.ppm testout_gray_islow.jpg
+	md5/md5cmp $(MD5_PPM_GRAY_ISLOW) testout_gray_islow.ppm
+	rm testout_gray_islow.ppm
+# CC: Gray->RGB  SAMP: fullsize  IDCT: islow  ENT: huff
+	./djpeg -dct int -rgb -outfile testout_gray_islow_rgb.ppm testout_gray_islow.jpg
+	md5/md5cmp $(MD5_PPM_GRAY_ISLOW_RGB) testout_gray_islow_rgb.ppm
+	rm testout_gray_islow_rgb.ppm
+if WITH_12BIT
+	rm testout_gray_islow.jpg
+else
+# CC: Gray->RGB565  SAMP: fullsize  IDCT: islow  ENT: huff
+	./djpeg -dct int -rgb565 -dither none -bmp -outfile testout_gray_islow_565.bmp testout_gray_islow.jpg
+	md5/md5cmp $(MD5_BMP_GRAY_ISLOW_565) testout_gray_islow_565.bmp
+	rm testout_gray_islow_565.bmp
+# CC: Gray->RGB565 (dithered)  SAMP: fullsize  IDCT: islow  ENT: huff
+	./djpeg -dct int -rgb565 -bmp -outfile testout_gray_islow_565D.bmp testout_gray_islow.jpg
+	md5/md5cmp $(MD5_BMP_GRAY_ISLOW_565D) testout_gray_islow_565D.bmp
+	rm testout_gray_islow_565D.bmp testout_gray_islow.jpg
+endif
+
+# CC: RGB->YCC  SAMP: fullsize smooth/h2v2 smooth  FDCT: islow
+# ENT: 2-pass huff
+	./cjpeg -sample 2x2 -smooth 1 -dct int -opt -outfile testout_420s_ifast_opt.jpg $(srcdir)/testimages/testorig.ppm
+	md5/md5cmp $(MD5_JPEG_420S_IFAST_OPT) testout_420s_ifast_opt.jpg
+	rm testout_420s_ifast_opt.jpg
+
+# CC: RGB->YCC  SAMP: fullsize/int  FDCT: float  ENT: prog huff
+	./cjpeg -sample 3x2 -dct float -prog -outfile testout_3x2_float_prog.jpg $(srcdir)/testimages/testorig.ppm
+	md5/md5cmp $(MD5_JPEG_3x2_FLOAT_PROG) testout_3x2_float_prog.jpg
+# CC: YCC->RGB  SAMP: fullsize/int  IDCT: float  ENT: prog huff
+	./djpeg -dct float -outfile testout_3x2_float.ppm testout_3x2_float_prog.jpg
+	md5/md5cmp $(MD5_PPM_3x2_FLOAT) testout_3x2_float.ppm
+	rm testout_3x2_float.ppm testout_3x2_float_prog.jpg
+
+if WITH_ARITH_ENC
+# CC: YCC->RGB  SAMP: fullsize/h2v2  FDCT: islow  ENT: arith
+	./cjpeg -dct int -arithmetic -outfile testout_420_islow_ari.jpg $(srcdir)/testimages/testorig.ppm
+	md5/md5cmp $(MD5_JPEG_420_ISLOW_ARI) testout_420_islow_ari.jpg
+	rm testout_420_islow_ari.jpg
+	./jpegtran -arithmetic -outfile testout_420_islow_ari.jpg $(srcdir)/testimages/testimgint.jpg
+	md5/md5cmp $(MD5_JPEG_420_ISLOW_ARI) testout_420_islow_ari.jpg
+	rm testout_420_islow_ari.jpg
+# CC: YCC->RGB  SAMP: fullsize  FDCT: islow  ENT: prog arith
+	./cjpeg -sample 1x1 -dct int -progressive -arithmetic -outfile testout_444_islow_progari.jpg $(srcdir)/testimages/testorig.ppm
+	md5/md5cmp $(MD5_JPEG_444_ISLOW_PROGARI) testout_444_islow_progari.jpg
+	rm testout_444_islow_progari.jpg
+endif
+if WITH_ARITH_DEC
+# CC: RGB->YCC  SAMP: h2v2 merged  IDCT: ifast  ENT: arith
+	./djpeg -fast -ppm -outfile testout_420m_ifast_ari.ppm $(srcdir)/testimages/testimgari.jpg
+	md5/md5cmp $(MD5_PPM_420M_IFAST_ARI) testout_420m_ifast_ari.ppm
+	rm testout_420m_ifast_ari.ppm
+	./jpegtran -outfile testout_420_islow.jpg $(srcdir)/testimages/testimgari.jpg
+	md5/md5cmp $(MD5_JPEG_420_ISLOW) testout_420_islow.jpg
+	rm testout_420_islow.jpg
+endif
+
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 16x16 islow  ENT: huff
+	./djpeg -dct int -scale 2/1 -nosmooth -ppm -outfile testout_420m_islow_2_1.ppm $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_PPM_420M_ISLOW_2_1) testout_420m_islow_2_1.ppm
+	rm testout_420m_islow_2_1.ppm
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 15x15 islow  ENT: huff
+	./djpeg -dct int -scale 15/8 -nosmooth -ppm -outfile testout_420m_islow_15_8.ppm $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_PPM_420M_ISLOW_15_8) testout_420m_islow_15_8.ppm
+	rm testout_420m_islow_15_8.ppm
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 13x13 islow  ENT: huff
+	./djpeg -dct int -scale 13/8 -nosmooth -ppm -outfile testout_420m_islow_13_8.ppm $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_PPM_420M_ISLOW_13_8) testout_420m_islow_13_8.ppm
+	rm testout_420m_islow_13_8.ppm
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 11x11 islow  ENT: huff
+	./djpeg -dct int -scale 11/8 -nosmooth -ppm -outfile testout_420m_islow_11_8.ppm $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_PPM_420M_ISLOW_11_8) testout_420m_islow_11_8.ppm
+	rm testout_420m_islow_11_8.ppm
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 9x9 islow  ENT: huff
+	./djpeg -dct int -scale 9/8 -nosmooth -ppm -outfile testout_420m_islow_9_8.ppm $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_PPM_420M_ISLOW_9_8) testout_420m_islow_9_8.ppm
+	rm testout_420m_islow_9_8.ppm
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 7x7 islow/14x14 islow  ENT: huff
+	./djpeg -dct int -scale 7/8 -nosmooth -ppm -outfile testout_420m_islow_7_8.ppm $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_PPM_420M_ISLOW_7_8) testout_420m_islow_7_8.ppm
+	rm testout_420m_islow_7_8.ppm
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 6x6 islow/12x12 islow  ENT: huff
+	./djpeg -dct int -scale 3/4 -nosmooth -ppm -outfile testout_420m_islow_3_4.ppm $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_PPM_420M_ISLOW_3_4) testout_420m_islow_3_4.ppm
+	rm testout_420m_islow_3_4.ppm
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 5x5 islow/10x10 islow  ENT: huff
+	./djpeg -dct int -scale 5/8 -nosmooth -ppm -outfile testout_420m_islow_5_8.ppm $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_PPM_420M_ISLOW_5_8) testout_420m_islow_5_8.ppm
+	rm testout_420m_islow_5_8.ppm
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 4x4 islow/8x8 islow  ENT: huff
+	./djpeg -dct int -scale 1/2 -nosmooth -ppm -outfile testout_420m_islow_1_2.ppm $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_PPM_420M_ISLOW_1_2) testout_420m_islow_1_2.ppm
+	rm testout_420m_islow_1_2.ppm
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 3x3 islow/6x6 islow  ENT: huff
+	./djpeg -dct int -scale 3/8 -nosmooth -ppm -outfile testout_420m_islow_3_8.ppm $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_PPM_420M_ISLOW_3_8) testout_420m_islow_3_8.ppm
+	rm testout_420m_islow_3_8.ppm
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 2x2 islow/4x4 islow  ENT: huff
+	./djpeg -dct int -scale 1/4 -nosmooth -ppm -outfile testout_420m_islow_1_4.ppm $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_PPM_420M_ISLOW_1_4) testout_420m_islow_1_4.ppm
+	rm testout_420m_islow_1_4.ppm
+# CC: YCC->RGB  SAMP: h2v2 merged  IDCT: 1x1 islow/2x2 islow  ENT: huff
+	./djpeg -dct int -scale 1/8 -nosmooth -ppm -outfile testout_420m_islow_1_8.ppm $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_PPM_420M_ISLOW_1_8) testout_420m_islow_1_8.ppm
+	rm testout_420m_islow_1_8.ppm
+if WITH_12BIT
+else
+# CC: YCC->RGB (dithered)  SAMP: h2v2 fancy  IDCT: islow  ENT: huff
+	./djpeg -dct int -colors 256 -bmp -outfile testout_420_islow_256.bmp $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_BMP_420_ISLOW_256) testout_420_islow_256.bmp
+	rm testout_420_islow_256.bmp
+# CC: YCC->RGB565  SAMP: h2v2 fancy  IDCT: islow  ENT: huff
+	./djpeg -dct int -rgb565 -dither none -bmp -outfile testout_420_islow_565.bmp $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_BMP_420_ISLOW_565) testout_420_islow_565.bmp
+	rm testout_420_islow_565.bmp
+# CC: YCC->RGB565 (dithered)  SAMP: h2v2 fancy  IDCT: islow  ENT: huff
+	./djpeg -dct int -rgb565 -bmp -outfile testout_420_islow_565D.bmp $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_BMP_420_ISLOW_565D) testout_420_islow_565D.bmp
+	rm testout_420_islow_565D.bmp
+# CC: YCC->RGB565  SAMP: h2v2 merged  IDCT: islow  ENT: huff
+	./djpeg -dct int -nosmooth -rgb565 -dither none -bmp -outfile testout_420m_islow_565.bmp $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_BMP_420M_ISLOW_565) testout_420m_islow_565.bmp
+	rm testout_420m_islow_565.bmp
+# CC: YCC->RGB565 (dithered)  SAMP: h2v2 merged  IDCT: islow  ENT: huff
+	./djpeg -dct int -nosmooth -rgb565 -bmp -outfile testout_420m_islow_565D.bmp $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_BMP_420M_ISLOW_565D) testout_420m_islow_565D.bmp
+	rm testout_420m_islow_565D.bmp
+endif
+
+	./jpegtran -crop 120x90+20+50 -transpose -perfect -outfile testout_crop.jpg $(srcdir)/testimages/$(TESTORIG)
+	md5/md5cmp $(MD5_JPEG_CROP) testout_crop.jpg
+	rm testout_crop.jpg
+
+
+testclean:
+	rm -f testout*
+	rm -f *_GRAY_*.bmp
+	rm -f *_GRAY_*.png
+	rm -f *_GRAY_*.ppm
+	rm -f *_GRAY_*.jpg
+	rm -f *_GRAY.yuv
+	rm -f *_420_*.bmp
+	rm -f *_420_*.png
+	rm -f *_420_*.ppm
+	rm -f *_420_*.jpg
+	rm -f *_420.yuv
+	rm -f *_422_*.bmp
+	rm -f *_422_*.png
+	rm -f *_422_*.ppm
+	rm -f *_422_*.jpg
+	rm -f *_422.yuv
+	rm -f *_444_*.bmp
+	rm -f *_444_*.png
+	rm -f *_444_*.ppm
+	rm -f *_444_*.jpg
+	rm -f *_444.yuv
+	rm -f *_440_*.bmp
+	rm -f *_440_*.png
+	rm -f *_440_*.ppm
+	rm -f *_440_*.jpg
+	rm -f *_440.yuv
+	rm -f *_411_*.bmp
+	rm -f *_411_*.png
+	rm -f *_411_*.ppm
+	rm -f *_411_*.jpg
+	rm -f *_411.yuv
+
+
+tjtest:
+	sh ./tjbenchtest
+	sh ./tjbenchtest -yuv
+if WITH_JAVA
+	sh ./tjbenchtest.java
+	sh ./tjbenchtest.java -yuv
+endif
+
+
+pkgscripts/libjpeg-turbo.spec: pkgscripts/libjpeg-turbo.spec.tmpl
+	cat pkgscripts/libjpeg-turbo.spec.tmpl | sed s@%{__prefix}@$(prefix)@g | \
+		sed s@%{__bindir}@$(bindir)@g | sed s@%{__datadir}@$(datadir)@g | \
+		sed s@%{__docdir}@$(docdir)@g | sed s@%{__includedir}@$(includedir)@g | \
+		sed s@%{__libdir}@$(libdir)@g | sed s@%{__mandir}@$(mandir)@g \
+		> pkgscripts/libjpeg-turbo.spec
+
+rpm: all pkgscripts/libjpeg-turbo.spec
+	TMPDIR=`mktemp -d /tmp/${PACKAGE_NAME}-build.XXXXXX`; \
+	mkdir -p $$TMPDIR/RPMS; \
+	ln -fs `pwd` $$TMPDIR/BUILD; \
+	rm -f ${PKGNAME}-${VERSION}.${RPMARCH}.rpm; \
+	rpmbuild -bb --define "_blddir $$TMPDIR/buildroot"  \
+		--define "_topdir $$TMPDIR" \
+		--target ${RPMARCH} pkgscripts/libjpeg-turbo.spec; \
+	cp $$TMPDIR/RPMS/${RPMARCH}/${PKGNAME}-${VERSION}-${BUILD}.${RPMARCH}.rpm \
+		${PKGNAME}-${VERSION}.${RPMARCH}.rpm; \
+	rm -rf $$TMPDIR
+
+srpm: dist-gzip pkgscripts/libjpeg-turbo.spec
+	TMPDIR=`mktemp -d /tmp/${PACKAGE_NAME}-build.XXXXXX`; \
+	mkdir -p $$TMPDIR/RPMS; \
+	mkdir -p $$TMPDIR/SRPMS; \
+	mkdir -p $$TMPDIR/BUILD; \
+	mkdir -p $$TMPDIR/SOURCES; \
+	mkdir -p $$TMPDIR/SPECS; \
+	rm -f ${PKGNAME}-${VERSION}.src.rpm; \
+	cp ${PACKAGE_NAME}-${VERSION}.tar.gz $$TMPDIR/SOURCES; \
+	cat pkgscripts/libjpeg-turbo.spec | sed s/%{_blddir}/%{_tmppath}/g \
+		| sed s/#--\>//g \
+		> $$TMPDIR/SPECS/libjpeg-turbo.spec; \
+	rpmbuild -bs --define "_topdir $$TMPDIR" $$TMPDIR/SPECS/libjpeg-turbo.spec; \
+	cp $$TMPDIR/SRPMS/${PKGNAME}-${VERSION}-${BUILD}.src.rpm \
+		${PKGNAME}-${VERSION}.src.rpm; \
+	rm -rf $$TMPDIR
+
+pkgscripts/makedpkg: pkgscripts/makedpkg.tmpl
+	cat pkgscripts/makedpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \
+		sed s@%{__docdir}@$(docdir)@g | sed s@%{__libdir}@$(libdir)@g \
+		> pkgscripts/makedpkg
+
+deb: all pkgscripts/makedpkg
+	sh pkgscripts/makedpkg
+
+pkgscripts/uninstall: pkgscripts/uninstall.tmpl
+	cat pkgscripts/uninstall.tmpl | sed s@%{__prefix}@$(prefix)@g | \
+		sed s@%{__bindir}@$(bindir)@g | sed s@%{__datadir}@$(datadir)@g | \
+		sed s@%{__includedir}@$(includedir)@g | sed s@%{__libdir}@$(libdir)@g | \
+		sed s@%{__mandir}@$(mandir)@g > pkgscripts/uninstall
+
+pkgscripts/makemacpkg: pkgscripts/makemacpkg.tmpl
+	cat pkgscripts/makemacpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \
+		sed s@%{__bindir}@$(bindir)@g | sed s@%{__docdir}@$(docdir)@g | \
+		sed s@%{__libdir}@$(libdir)@g > pkgscripts/makemacpkg
+
+if X86_64
+
+udmg: all pkgscripts/makemacpkg pkgscripts/uninstall
+	sh pkgscripts/makemacpkg -build32 ${BUILDDIR32}
+
+iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall
+	sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S}
+
+else
+
+iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall
+	sh pkgscripts/makemacpkg -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S}
+
+endif
+
+dmg: all pkgscripts/makemacpkg pkgscripts/uninstall
+	sh pkgscripts/makemacpkg
+
+pkgscripts/makecygwinpkg: pkgscripts/makecygwinpkg.tmpl
+	cat pkgscripts/makecygwinpkg.tmpl | sed s@%{__prefix}@$(prefix)@g | \
+		sed s@%{__docdir}@$(docdir)@g | sed s@%{__libdir}@$(libdir)@g \
+		> pkgscripts/makecygwinpkg
+
+cygwinpkg: all pkgscripts/makecygwinpkg
+	sh pkgscripts/makecygwinpkg
diff --git a/README b/README
index 86cc206..44e69d3 100644
--- a/README
+++ b/README
@@ -1,24 +1,24 @@
+libjpeg-turbo note:  This file has been modified by The libjpeg-turbo Project
+to include only information relevant to libjpeg-turbo, to wordsmith certain
+sections, and to remove impolitic language that existed in the libjpeg v8
+README.  It is included only for reference.  Please see README-turbo.txt for
+information specific to libjpeg-turbo.
+
+
 The Independent JPEG Group's JPEG software
 ==========================================
 
-README for release 6b of 27-Mar-1998
-====================================
+This distribution contains a release of the Independent JPEG Group's free JPEG
+software.  You are welcome to redistribute this software and to use it for any
+purpose, subject to the conditions under LEGAL ISSUES, below.
 
-This distribution contains the sixth public release of the Independent JPEG
-Group's free JPEG software.  You are welcome to redistribute this software and
-to use it for any purpose, subject to the conditions under LEGAL ISSUES, below.
+This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone,
+Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson,
+Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, Ge' Weijers,
+and other members of the Independent JPEG Group.
 
-Serious users of this software (particularly those incorporating it into
-larger programs) should contact IJG at jpeg-info@uunet.uu.net to be added to
-our electronic mailing list.  Mailing list members are notified of updates
-and have a chance to participate in technical discussions, etc.
-
-This software is the work of Tom Lane, Philip Gladstone, Jim Boucher,
-Lee Crocker, Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi,
-Guido Vollbeding, Ge' Weijers, and other members of the Independent JPEG
-Group.
-
-IJG is not affiliated with the official ISO JPEG standards committee.
+IJG is not affiliated with the ISO/IEC JTC1/SC29/WG1 standards committee
+(also known as JPEG, together with ITU-T SG16).
 
 
 DOCUMENTATION ROADMAP
@@ -30,27 +30,25 @@
 LEGAL ISSUES        Copyright, lack of warranty, terms of distribution.
 REFERENCES          Where to learn more about JPEG.
 ARCHIVE LOCATIONS   Where to find newer versions of this software.
-RELATED SOFTWARE    Other stuff you should get.
 FILE FORMAT WARS    Software *not* to get.
 TO DO               Plans for future IJG releases.
 
 Other documentation files in the distribution are:
 
 User documentation:
-  install.doc       How to configure and install the IJG software.
-  usage.doc         Usage instructions for cjpeg, djpeg, jpegtran,
+  install.txt       How to configure and install the IJG software.
+  usage.txt         Usage instructions for cjpeg, djpeg, jpegtran,
                     rdjpgcom, and wrjpgcom.
-  *.1               Unix-style man pages for programs (same info as usage.doc).
-  wizard.doc        Advanced usage instructions for JPEG wizards only.
+  *.1               Unix-style man pages for programs (same info as usage.txt).
+  wizard.txt        Advanced usage instructions for JPEG wizards only.
   change.log        Version-to-version change highlights.
 Programmer and internal documentation:
-  libjpeg.doc       How to use the JPEG library in your own programs.
+  libjpeg.txt       How to use the JPEG library in your own programs.
   example.c         Sample code for calling the JPEG library.
-  structure.doc     Overview of the JPEG library's internal structure.
-  filelist.doc      Road map of IJG files.
-  coderules.doc     Coding style rules --- please read if you contribute code.
+  structure.txt     Overview of the JPEG library's internal structure.
+  coderules.txt     Coding style rules --- please read if you contribute code.
 
-Please read at least the files install.doc and usage.doc.  Useful information
+Please read at least the files install.txt and usage.txt.  Some information
 can also be found in the JPEG FAQ (Frequently Asked Questions) article.  See
 ARCHIVE LOCATIONS below to find out where to obtain the FAQ article.
 
@@ -62,24 +60,27 @@
 OVERVIEW
 ========
 
-This package contains C software to implement JPEG image compression and
-decompression.  JPEG (pronounced "jay-peg") is a standardized compression
-method for full-color and gray-scale images.  JPEG is intended for compressing
-"real-world" scenes; line drawings, cartoons and other non-realistic images
-are not its strong suit.  JPEG is lossy, meaning that the output image is not
-exactly identical to the input image.  Hence you must not use JPEG if you
-have to have identical output bits.  However, on typical photographic images,
-very good compression levels can be obtained with no visible change, and
-remarkably high compression levels are possible if you can tolerate a
-low-quality image.  For more details, see the references, or just experiment
-with various compression settings.
+This package contains C software to implement JPEG image encoding, decoding,
+and transcoding.  JPEG (pronounced "jay-peg") is a standardized compression
+method for full-color and grayscale images.  JPEG's strong suit is compressing
+photographic images or other types of images that have smooth color and
+brightness transitions between neighboring pixels.  Images with sharp lines or
+other abrupt features may not compress well with JPEG, and a higher JPEG
+quality may have to be used to avoid visible compression artifacts with such
+images.
+
+JPEG is lossy, meaning that the output pixels are not necessarily identical to
+the input pixels.  However, on photographic content and other "smooth" images,
+very good compression ratios can be obtained with no visible compression
+artifacts, and extremely high compression ratios are possible if you are
+willing to sacrifice image quality (by reducing the "quality" setting in the
+compressor.)
 
 This software implements JPEG baseline, extended-sequential, and progressive
 compression processes.  Provision is made for supporting all variants of these
 processes, although some uncommon parameter settings aren't implemented yet.
-For legal reasons, we are not distributing code for the arithmetic-coding
-variants of JPEG; see LEGAL ISSUES.  We have made no provision for supporting
-the hierarchical or lossless processes defined in the standard.
+We have made no provision for supporting the hierarchical or lossless
+processes defined in the standard.
 
 We provide a set of library routines for reading and writing JPEG image files,
 plus two sample applications "cjpeg" and "djpeg", which use the library to
@@ -91,10 +92,11 @@
 for example, the color quantization modules are not strictly part of JPEG
 decoding, but they are essential for output to colormapped file formats or
 colormapped displays.  These extra functions can be compiled out of the
-library if not required for a particular application.  We have also included
-"jpegtran", a utility for lossless transcoding between different JPEG
-processes, and "rdjpgcom" and "wrjpgcom", two simple applications for
-inserting and extracting textual comments in JFIF files.
+library if not required for a particular application.
+
+We have also included "jpegtran", a utility for lossless transcoding between
+different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple
+applications for inserting and extracting textual comments in JFIF files.
 
 The emphasis in designing this software has been on achieving portability and
 flexibility, while also making it fast enough to be useful.  In particular,
@@ -127,7 +129,7 @@
 fitness for a particular purpose.  This software is provided "AS IS", and you,
 its user, assume the entire risk as to its quality and accuracy.
 
-This software is copyright (C) 1991-1998, Thomas G. Lane.
+This software is copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
 All Rights Reserved except as specified below.
 
 Permission is hereby granted to use, copy, modify, and distribute this
@@ -158,29 +160,11 @@
 assumed by the product vendor.
 
 
-ansi2knr.c is included in this distribution by permission of L. Peter Deutsch,
-sole proprietor of its copyright holder, Aladdin Enterprises of Menlo Park, CA.
-ansi2knr.c is NOT covered by the above copyright and conditions, but instead
-by the usual distribution terms of the Free Software Foundation; principally,
-that you must include source code if you redistribute it.  (See the file
-ansi2knr.c for full details.)  However, since ansi2knr.c is not needed as part
-of any program generated from the IJG code, this does not limit you more than
-the foregoing paragraphs do.
-
 The Unix configuration script "configure" was produced with GNU Autoconf.
 It is copyright by the Free Software Foundation but is freely distributable.
 The same holds for its supporting scripts (config.guess, config.sub,
-ltconfig, ltmain.sh).  Another support script, install-sh, is copyright
-by M.I.T. but is also freely distributable.
-
-It appears that the arithmetic coding option of the JPEG spec is covered by
-patents owned by IBM, AT&T, and Mitsubishi.  Hence arithmetic coding cannot
-legally be used without obtaining one or more licenses.  For this reason,
-support for arithmetic coding has been removed from the free JPEG software.
-(Since arithmetic coding provides only a marginal gain over the unpatented
-Huffman mode, it is unlikely that very many implementations will support it.)
-So far as we are aware, there are no patent restrictions on the remaining
-code.
+ltmain.sh).  Another support script, install-sh, is copyright by X Consortium
+but is also freely distributable.
 
 The IJG distribution formerly included code to read and write GIF files.
 To avoid entanglement with the Unisys LZW patent, GIF reading support has
@@ -198,7 +182,7 @@
 REFERENCES
 ==========
 
-We highly recommend reading one or more of these references before trying to
+We recommend reading one or more of these references before trying to
 understand the innards of the JPEG software.
 
 The best short technical introduction to the JPEG compression algorithm is
@@ -207,7 +191,7 @@
 (Adjacent articles in that issue discuss MPEG motion picture compression,
 applications of JPEG, and related topics.)  If you don't have the CACM issue
 handy, a PostScript file containing a revised version of Wallace's article is
-available at ftp://ftp.uu.net/graphics/jpeg/wallace.ps.gz.  The file (actually
+available at http://www.ijg.org/files/wallace.ps.gz.  The file (actually
 a preprint for an article that appeared in IEEE Trans. Consumer Electronics)
 omits the sample images that appeared in CACM, but it includes corrections
 and some added material.  Note: the Wallace article is copyright ACM and IEEE,
@@ -222,82 +206,53 @@
 sample code is far from industrial-strength, but when you are ready to look
 at a full implementation, you've got one here...
 
-The best full description of JPEG is the textbook "JPEG Still Image Data
-Compression Standard" by William B. Pennebaker and Joan L. Mitchell, published
-by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1.  Price US$59.95, 638 pp.
-The book includes the complete text of the ISO JPEG standards (DIS 10918-1
-and draft DIS 10918-2).  This is by far the most complete exposition of JPEG
-in existence, and we highly recommend it.
+The best currently available description of JPEG is the textbook "JPEG Still
+Image Data Compression Standard" by William B. Pennebaker and Joan L.
+Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1.
+Price US$59.95, 638 pp.  The book includes the complete text of the ISO JPEG
+standards (DIS 10918-1 and draft DIS 10918-2).
 
-The JPEG standard itself is not available electronically; you must order a
-paper copy through ISO or ITU.  (Unless you feel a need to own a certified
-official copy, we recommend buying the Pennebaker and Mitchell book instead;
-it's much cheaper and includes a great deal of useful explanatory material.)
-In the USA, copies of the standard may be ordered from ANSI Sales at (212)
-642-4900, or from Global Engineering Documents at (800) 854-7179.  (ANSI
-doesn't take credit card orders, but Global does.)  It's not cheap: as of
-1992, ANSI was charging $95 for Part 1 and $47 for Part 2, plus 7%
-shipping/handling.  The standard is divided into two parts, Part 1 being the
-actual specification, while Part 2 covers compliance testing methods.  Part 1
-is titled "Digital Compression and Coding of Continuous-tone Still Images,
+The original JPEG standard is divided into two parts, Part 1 being the actual
+specification, while Part 2 covers compliance testing methods.  Part 1 is
+titled "Digital Compression and Coding of Continuous-tone Still Images,
 Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS
 10918-1, ITU-T T.81.  Part 2 is titled "Digital Compression and Coding of
 Continuous-tone Still Images, Part 2: Compliance testing" and has document
 numbers ISO/IEC IS 10918-2, ITU-T T.83.
 
-Some extensions to the original JPEG standard are defined in JPEG Part 3,
-a newer ISO standard numbered ISO/IEC IS 10918-3 and ITU-T T.84.  IJG
-currently does not support any Part 3 extensions.
-
 The JPEG standard does not specify all details of an interchangeable file
 format.  For the omitted details we follow the "JFIF" conventions, revision
-1.02.  A copy of the JFIF spec is available from:
-	Literature Department
-	C-Cube Microsystems, Inc.
-	1778 McCarthy Blvd.
-	Milpitas, CA 95035
-	phone (408) 944-6300,  fax (408) 944-6314
-A PostScript version of this document is available by FTP at
-ftp://ftp.uu.net/graphics/jpeg/jfif.ps.gz.  There is also a plain text
-version at ftp://ftp.uu.net/graphics/jpeg/jfif.txt.gz, but it is missing
-the figures.
+1.02.  JFIF 1.02 has been adopted as an Ecma International Technical Report
+and thus received a formal publication status.  It is available as a free
+download in PDF format from
+http://www.ecma-international.org/publications/techreports/E-TR-098.htm.
+A PostScript version of the JFIF document is available at
+http://www.ijg.org/files/jfif.ps.gz.  There is also a plain text version at
+http://www.ijg.org/files/jfif.txt.gz, but it is missing the figures.
 
 The TIFF 6.0 file format specification can be obtained by FTP from
 ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz.  The JPEG incorporation scheme
 found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems.
 IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6).
 Instead, we recommend the JPEG design proposed by TIFF Technical Note #2
-(Compression tag 7).  Copies of this Note can be obtained from ftp.sgi.com or
-from ftp://ftp.uu.net/graphics/jpeg/.  It is expected that the next revision
+(Compression tag 7).  Copies of this Note can be obtained from
+http://www.ijg.org/files/.  It is expected that the next revision
 of the TIFF spec will replace the 6.0 JPEG design with the Note's design.
 Although IJG's own code does not support TIFF/JPEG, the free libtiff library
-uses our library to implement TIFF/JPEG per the Note.  libtiff is available
-from ftp://ftp.sgi.com/graphics/tiff/.
+uses our library to implement TIFF/JPEG per the Note.
 
 
 ARCHIVE LOCATIONS
 =================
 
-The "official" archive site for this software is ftp.uu.net (Internet
-address 192.48.96.9).  The most recent released version can always be found
-there in directory graphics/jpeg.  This particular version will be archived
-as ftp://ftp.uu.net/graphics/jpeg/jpegsrc.v6b.tar.gz.  If you don't have
-direct Internet access, UUNET's archives are also available via UUCP; contact
-help@uunet.uu.net for information on retrieving files that way.
+The "official" archive site for this software is www.ijg.org.
+The most recent released version can always be found there in
+directory "files".  This particular version will be archived as
+http://www.ijg.org/files/jpegsrc.v8d.tar.gz, and in Windows-compatible
+"zip" archive format as http://www.ijg.org/files/jpegsr8d.zip.
 
-Numerous Internet sites maintain copies of the UUNET files.  However, only
-ftp.uu.net is guaranteed to have the latest official version.
-
-You can also obtain this software in DOS-compatible "zip" archive format from
-the SimTel archives (ftp://ftp.simtel.net/pub/simtelnet/msdos/graphics/), or
-on CompuServe in the Graphics Support forum (GO CIS:GRAPHSUP), library 12
-"JPEG Tools".  Again, these versions may sometimes lag behind the ftp.uu.net
-release.
-
-The JPEG FAQ (Frequently Asked Questions) article is a useful source of
-general information about JPEG.  It is updated constantly and therefore is
-not included in this distribution.  The FAQ is posted every two weeks to
-Usenet newsgroups comp.graphics.misc, news.answers, and other groups.
+The JPEG FAQ (Frequently Asked Questions) article is a source of some
+general information about JPEG.
 It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/
 and other news.answers archive sites, including the official news.answers
 archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/.
@@ -307,79 +262,21 @@
 	send usenet/news.answers/jpeg-faq/part2
 
 
-RELATED SOFTWARE
-================
-
-Numerous viewing and image manipulation programs now support JPEG.  (Quite a
-few of them use this library to do so.)  The JPEG FAQ described above lists
-some of the more popular free and shareware viewers, and tells where to
-obtain them on Internet.
-
-If you are on a Unix machine, we highly recommend Jef Poskanzer's free
-PBMPLUS software, which provides many useful operations on PPM-format image
-files.  In particular, it can convert PPM images to and from a wide range of
-other formats, thus making cjpeg/djpeg considerably more useful.  The latest
-version is distributed by the NetPBM group, and is available from numerous
-sites, notably ftp://wuarchive.wustl.edu/graphics/graphics/packages/NetPBM/.
-Unfortunately PBMPLUS/NETPBM is not nearly as portable as the IJG software is;
-you are likely to have difficulty making it work on any non-Unix machine.
-
-A different free JPEG implementation, written by the PVRG group at Stanford,
-is available from ftp://havefun.stanford.edu/pub/jpeg/.  This program
-is designed for research and experimentation rather than production use;
-it is slower, harder to use, and less portable than the IJG code, but it
-is easier to read and modify.  Also, the PVRG code supports lossless JPEG,
-which we do not.  (On the other hand, it doesn't do progressive JPEG.)
-
-
 FILE FORMAT WARS
 ================
 
-Some JPEG programs produce files that are not compatible with our library.
-The root of the problem is that the ISO JPEG committee failed to specify a
-concrete file format.  Some vendors "filled in the blanks" on their own,
-creating proprietary formats that no one else could read.  (For example, none
-of the early commercial JPEG implementations for the Macintosh were able to
-exchange compressed files.)
-
-The file format we have adopted is called JFIF (see REFERENCES).  This format
-has been agreed to by a number of major commercial JPEG vendors, and it has
-become the de facto standard.  JFIF is a minimal or "low end" representation.
-We recommend the use of TIFF/JPEG (TIFF revision 6.0 as modified by TIFF
-Technical Note #2) for "high end" applications that need to record a lot of
-additional data about an image.  TIFF/JPEG is fairly new and not yet widely
-supported, unfortunately.
-
-The upcoming JPEG Part 3 standard defines a file format called SPIFF.
-SPIFF is interoperable with JFIF, in the sense that most JFIF decoders should
-be able to read the most common variant of SPIFF.  SPIFF has some technical
-advantages over JFIF, but its major claim to fame is simply that it is an
-official standard rather than an informal one.  At this point it is unclear
-whether SPIFF will supersede JFIF or whether JFIF will remain the de-facto
-standard.  IJG intends to support SPIFF once the standard is frozen, but we
-have not decided whether it should become our default output format or not.
-(In any case, our decoder will remain capable of reading JFIF indefinitely.)
-
-Various proprietary file formats incorporating JPEG compression also exist.
-We have little or no sympathy for the existence of these formats.  Indeed,
-one of the original reasons for developing this free software was to help
-force convergence on common, open format standards for JPEG files.  Don't
-use a proprietary file format!
+The ISO/IEC JTC1/SC29/WG1 standards committee (also known as JPEG, together
+with ITU-T SG16) currently promotes different formats containing the name
+"JPEG" which are incompatible with original DCT-based JPEG.  IJG therefore does
+not support these formats (see REFERENCES).  Indeed, one of the original
+reasons for developing this free software was to help force convergence on
+common, interoperable format standards for JPEG files.
+Don't use an incompatible file format!
+(In any case, our decoder will remain capable of reading existing JPEG
+image files indefinitely.)
 
 
 TO DO
 =====
 
-The major thrust for v7 will probably be improvement of visual quality.
-The current method for scaling the quantization tables is known not to be
-very good at low Q values.  We also intend to investigate block boundary
-smoothing, "poor man's variable quantization", and other means of improving
-quality-vs-file-size performance without sacrificing compatibility.
-
-In future versions, we are considering supporting some of the upcoming JPEG
-Part 3 extensions --- principally, variable quantization and the SPIFF file
-format.
-
-As always, speeding things up is of great interest.
-
-Please send bug reports, offers of help, etc. to jpeg-info@uunet.uu.net.
+Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org.
diff --git a/README-turbo.txt b/README-turbo.txt
new file mode 100755
index 0000000..f5cd613
--- /dev/null
+++ b/README-turbo.txt
@@ -0,0 +1,363 @@
+*******************************************************************************
+**     Background
+*******************************************************************************
+
+libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2,
+NEON) to accelerate baseline JPEG compression and decompression on x86, x86-64,
+and ARM systems.  On such systems, libjpeg-turbo is generally 2-4x as fast as
+libjpeg, all else being equal.  On other types of systems, libjpeg-turbo can
+still outperform libjpeg by a significant amount, by virtue of its
+highly-optimized Huffman coding routines.  In many cases, the performance of
+libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
+
+libjpeg-turbo implements both the traditional libjpeg API as well as the less
+powerful but more straightforward TurboJPEG API.  libjpeg-turbo also features
+colorspace extensions that allow it to compress from/decompress to 32-bit and
+big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java
+interface.
+
+libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated
+derivative of libjpeg v6b developed by Miyasaka Masaru.  The TigerVNC and
+VirtualGL projects made numerous enhancements to the codec in 2009, and in
+early 2010, libjpeg-turbo spun off into an independent project, with the goal
+of making high-speed JPEG compression/decompression technology available to a
+broader range of users and developers.
+
+
+*******************************************************************************
+**     License
+*******************************************************************************
+
+Most of libjpeg-turbo inherits the non-restrictive, BSD-style license used by
+libjpeg (see README.)  The TurboJPEG wrapper (both C and Java versions) and
+associated test programs bear a similar license, which is reproduced below:
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+- Neither the name of the libjpeg-turbo Project nor the names of its
+  contributors may be used to endorse or promote products derived from this
+  software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+
+*******************************************************************************
+**     Using libjpeg-turbo
+*******************************************************************************
+
+libjpeg-turbo includes two APIs that can be used to compress and decompress
+JPEG images:
+
+  TurboJPEG API:  This API provides an easy-to-use interface for compressing
+  and decompressing JPEG images in memory.  It also provides some functionality
+  that would not be straightforward to achieve using the underlying libjpeg
+  API, such as generating planar YUV images and performing multiple
+  simultaneous lossless transforms on an image.  The Java interface for
+  libjpeg-turbo is written on top of the TurboJPEG API.
+
+  libjpeg API:  This is the de facto industry-standard API for compressing and
+  decompressing JPEG images.  It is more difficult to use than the TurboJPEG
+  API but also more powerful.  The libjpeg API implementation in libjpeg-turbo
+  is both API/ABI-compatible and mathematically compatible with libjpeg v6b.
+  It can also optionally be configured to be API/ABI-compatible with libjpeg v7
+  and v8 (see below.)
+
+There is no significant performance advantage to either API when both are used
+to perform similar operations.
+
+=====================
+Colorspace Extensions
+=====================
+
+libjpeg-turbo includes extensions that allow JPEG images to be compressed
+directly from (and decompressed directly to) buffers that use BGR, BGRX,
+RGBX, XBGR, and XRGB pixel ordering.  This is implemented with ten new
+colorspace constants:
+
+  JCS_EXT_RGB   /* red/green/blue */
+  JCS_EXT_RGBX  /* red/green/blue/x */
+  JCS_EXT_BGR   /* blue/green/red */
+  JCS_EXT_BGRX  /* blue/green/red/x */
+  JCS_EXT_XBGR  /* x/blue/green/red */
+  JCS_EXT_XRGB  /* x/red/green/blue */
+  JCS_EXT_RGBA  /* red/green/blue/alpha */
+  JCS_EXT_BGRA  /* blue/green/red/alpha */
+  JCS_EXT_ABGR  /* alpha/blue/green/red */
+  JCS_EXT_ARGB  /* alpha/red/green/blue */
+
+Setting cinfo.in_color_space (compression) or cinfo.out_color_space
+(decompression) to one of these values will cause libjpeg-turbo to read the
+red, green, and blue values from (or write them to) the appropriate position in
+the pixel when compressing from/decompressing to an RGB buffer.
+
+Your application can check for the existence of these extensions at compile
+time with:
+
+  #ifdef JCS_EXTENSIONS
+
+At run time, attempting to use these extensions with a libjpeg implementation
+that does not support them will result in a "Bogus input colorspace" error.
+Applications can trap this error in order to test whether run-time support is
+available for the colorspace extensions.
+
+When using the RGBX, BGRX, XBGR, and XRGB colorspaces during decompression, the
+X byte is undefined, and in order to ensure the best performance, libjpeg-turbo
+can set that byte to whatever value it wishes.  If an application expects the X
+byte to be used as an alpha channel, then it should specify JCS_EXT_RGBA,
+JCS_EXT_BGRA, JCS_EXT_ABGR, or JCS_EXT_ARGB.  When these colorspace constants
+are used, the X byte is guaranteed to be 0xFF, which is interpreted as opaque.
+
+Your application can check for the existence of the alpha channel colorspace
+extensions at compile time with:
+
+  #ifdef JCS_ALPHA_EXTENSIONS
+
+jcstest.c, located in the libjpeg-turbo source tree, demonstrates how to check
+for the existence of the colorspace extensions at compile time and run time.
+
+===================================
+libjpeg v7 and v8 API/ABI Emulation
+===================================
+
+With libjpeg v7 and v8, new features were added that necessitated extending the
+compression and decompression structures.  Unfortunately, due to the exposed
+nature of those structures, extending them also necessitated breaking backward
+ABI compatibility with previous libjpeg releases.  Thus, programs that were
+built to use libjpeg v7 or v8 did not work with libjpeg-turbo, since it is
+based on the libjpeg v6b code base.  Although libjpeg v7 and v8 are not
+as widely used as v6b, enough programs (including a few Linux distros) made
+the switch that there was a demand to emulate the libjpeg v7 and v8 ABIs
+in libjpeg-turbo.  It should be noted, however, that this feature was added
+primarily so that applications that had already been compiled to use libjpeg
+v7+ could take advantage of accelerated baseline JPEG encoding/decoding
+without recompiling.  libjpeg-turbo does not claim to support all of the
+libjpeg v7+ features, nor to produce identical output to libjpeg v7+ in all
+cases (see below.)
+
+By passing an argument of --with-jpeg7 or --with-jpeg8 to configure, or an
+argument of -DWITH_JPEG7=1 or -DWITH_JPEG8=1 to cmake, you can build a version
+of libjpeg-turbo that emulates the libjpeg v7 or v8 ABI, so that programs
+that are built against libjpeg v7 or v8 can be run with libjpeg-turbo.  The
+following section describes which libjpeg v7+ features are supported and which
+aren't.
+
+Support for libjpeg v7 and v8 Features:
+---------------------------------------
+
+Fully supported:
+
+-- libjpeg: IDCT scaling extensions in decompressor
+   libjpeg-turbo supports IDCT scaling with scaling factors of 1/8, 1/4, 3/8,
+   1/2, 5/8, 3/4, 7/8, 9/8, 5/4, 11/8, 3/2, 13/8, 7/4, 15/8, and 2/1 (only 1/4
+   and 1/2 are SIMD-accelerated.)
+
+-- libjpeg: arithmetic coding
+
+-- libjpeg: In-memory source and destination managers
+   See notes below.
+
+-- cjpeg: Separate quality settings for luminance and chrominance
+   Note that the libpjeg v7+ API was extended to accommodate this feature only
+   for convenience purposes.  It has always been possible to implement this
+   feature with libjpeg v6b (see rdswitch.c for an example.)
+
+-- cjpeg: 32-bit BMP support
+
+-- cjpeg: -rgb option
+
+-- jpegtran: lossless cropping
+
+-- jpegtran: -perfect option
+
+-- jpegtran: forcing width/height when performing lossless crop
+
+-- rdjpgcom: -raw option
+
+-- rdjpgcom: locale awareness
+
+
+Not supported:
+
+NOTE:  As of this writing, extensive research has been conducted into the
+usefulness of DCT scaling as a means of data reduction and SmartScale as a
+means of quality improvement.  The reader is invited to peruse the research at
+http://www.libjpeg-turbo.org/About/SmartScale and draw his/her own conclusions,
+but it is the general belief of our project that these features have not
+demonstrated sufficient usefulness to justify inclusion in libjpeg-turbo.
+
+-- libjpeg: DCT scaling in compressor
+   cinfo.scale_num and cinfo.scale_denom are silently ignored.
+   There is no technical reason why DCT scaling could not be supported when
+   emulating the libjpeg v7+ API/ABI, but without the SmartScale extension (see
+   below), only scaling factors of 1/2, 8/15, 4/7, 8/13, 2/3, 8/11, 4/5, and
+   8/9 would be available, which is of limited usefulness.
+
+-- libjpeg: SmartScale
+   cinfo.block_size is silently ignored.
+   SmartScale is an extension to the JPEG format that allows for DCT block
+   sizes other than 8x8.  Providing support for this new format would be
+   feasible (particularly without full acceleration.)  However, until/unless
+   the format becomes either an official industry standard or, at minimum, an
+   accepted solution in the community, we are hesitant to implement it, as
+   there is no sense of whether or how it might change in the future.  It is
+   our belief that SmartScale has not demonstrated sufficient usefulness as a
+   lossless format nor as a means of quality enhancement, and thus, our primary
+   interest in providing this feature would be as a means of supporting
+   additional DCT scaling factors.
+
+-- libjpeg: Fancy downsampling in compressor
+   cinfo.do_fancy_downsampling is silently ignored.
+   This requires the DCT scaling feature, which is not supported.
+
+-- jpegtran: Scaling
+   This requires both the DCT scaling and SmartScale features, which are not
+   supported.
+
+-- Lossless RGB JPEG files
+   This requires the SmartScale feature, which is not supported.
+
+What About libjpeg v9?
+----------------------
+
+libjpeg v9 introduced yet another field to the JPEG compression structure
+(color_transform), thus making the ABI backward incompatible with that of
+libjpeg v8.  This new field was introduced solely for the purpose of supporting
+lossless SmartScale encoding.  Further, there was actually no reason to extend
+the API in this manner, as the color transform could have just as easily been
+activated by way of a new JPEG colorspace constant, thus preserving backward
+ABI compatibility.
+
+Our research (see link above) has shown that lossless SmartScale does not
+generally accomplish anything that can't already be accomplished better with
+existing, standard lossless formats.  Thus, at this time, it is our belief that
+there is not sufficient technical justification for software to upgrade from
+libjpeg v8 to libjpeg v9, and therefore, not sufficient technical justification
+for us to emulate the libjpeg v9 ABI.
+
+=====================================
+In-Memory Source/Destination Managers
+=====================================
+
+By default, libjpeg-turbo 1.3 and later includes the jpeg_mem_src() and
+jpeg_mem_dest() functions, even when not emulating the libjpeg v8 API/ABI.
+Previously, it was necessary to build libjpeg-turbo from source with libjpeg v8
+API/ABI emulation in order to use the in-memory source/destination managers,
+but several projects requested that those functions be included when emulating
+the libjpeg v6b API/ABI as well.  This allows the use of those functions by
+programs that need them without breaking ABI compatibility for programs that
+don't, and it allows those functions to be provided in the "official"
+libjpeg-turbo binaries.
+
+Those who are concerned about maintaining strict conformance with the libjpeg
+v6b or v7 API can pass an argument of --without-mem-srcdst to configure or
+an argument of -DWITH_MEM_SRCDST=0 to CMake prior to building libjpeg-turbo.
+This will restore the pre-1.3 behavior, in which jpeg_mem_src() and
+jpeg_mem_dest() are only included when emulating the libjpeg v8 API/ABI.
+
+On Un*x systems, including the in-memory source/destination managers changes
+the dynamic library version from 62.0.0 to 62.1.0 if using libjpeg v6b API/ABI
+emulation and from 7.0.0 to 7.1.0 if using libjpeg v7 API/ABI emulation.
+
+Note that, on most Un*x systems, the dynamic linker will not look for a
+function in a library until that function is actually used.  Thus, if a program
+is built against libjpeg-turbo 1.3+ and uses jpeg_mem_src() or jpeg_mem_dest(),
+that program will not fail if run against an older version of libjpeg-turbo or
+against libjpeg v7- until the program actually tries to call jpeg_mem_src() or
+jpeg_mem_dest().  Such is not the case on Windows.  If a program is built
+against the libjpeg-turbo 1.3+ DLL and uses jpeg_mem_src() or jpeg_mem_dest(),
+then it must use the libjpeg-turbo 1.3+ DLL at run time.
+
+Both cjpeg and djpeg have been extended to allow testing the in-memory
+source/destination manager functions.  See their respective man pages for more
+details.
+
+
+*******************************************************************************
+**     Mathematical Compatibility
+*******************************************************************************
+
+For the most part, libjpeg-turbo should produce identical output to libjpeg
+v6b.  The one exception to this is when using the floating point DCT/IDCT, in
+which case the outputs of libjpeg v6b and libjpeg-turbo can differ for the
+following reasons:
+
+-- The SSE/SSE2 floating point DCT implementation in libjpeg-turbo is ever so
+   slightly more accurate than the implementation in libjpeg v6b, but not by
+   any amount perceptible to human vision (generally in the range of 0.01 to
+   0.08 dB gain in PNSR.)
+-- When not using the SIMD extensions, libjpeg-turbo uses the more accurate
+   (and slightly faster) floating point IDCT algorithm introduced in libjpeg
+   v8a as opposed to the algorithm used in libjpeg v6b.  It should be noted,
+   however, that this algorithm basically brings the accuracy of the floating
+   point IDCT in line with the accuracy of the slow integer IDCT.  The floating
+   point DCT/IDCT algorithms are mainly a legacy feature, and they do not
+   produce significantly more accuracy than the slow integer algorithms (to put
+   numbers on this, the typical difference in PNSR between the two algorithms
+   is less than 0.10 dB, whereas changing the quality level by 1 in the upper
+   range of the quality scale is typically more like a 1.0 dB difference.)
+-- When not using the SIMD extensions, then the accuracy of the floating point
+   DCT/IDCT can depend on the compiler and compiler settings.
+
+While libjpeg-turbo does emulate the libjpeg v8 API/ABI, under the hood, it is
+still using the same algorithms as libjpeg v6b, so there are several specific
+cases in which libjpeg-turbo cannot be expected to produce the same output as
+libjpeg v8:
+
+-- When decompressing using scaling factors of 1/2 and 1/4, because libjpeg v8
+   implements those scaling algorithms differently than libjpeg v6b does, and
+   libjpeg-turbo's SIMD extensions are based on the libjpeg v6b behavior.
+
+-- When using chrominance subsampling, because libjpeg v8 implements this
+   with its DCT/IDCT scaling algorithms rather than with a separate
+   downsampling/upsampling algorithm.  In our testing, the subsampled/upsampled
+   output of libjpeg v8 is less accurate than that of libjpeg v6b for this
+   reason.
+
+-- When decompressing using a scaling factor > 1 and merged (AKA "non-fancy" or
+   "non-smooth") chrominance upsampling, because libjpeg v8 does not support
+   merged upsampling with scaling factors > 1.
+
+
+*******************************************************************************
+**     Performance Pitfalls
+*******************************************************************************
+
+===============
+Restart Markers
+===============
+
+The optimized Huffman decoder in libjpeg-turbo does not handle restart markers
+in a way that makes the rest of the libjpeg infrastructure happy, so it is
+necessary to use the slow Huffman decoder when decompressing a JPEG image that
+has restart markers.  This can cause the decompression performance to drop by
+as much as 20%, but the performance will still be much greater than that of
+libjpeg.  Many consumer packages, such as PhotoShop, use restart markers when
+generating JPEG images, so images generated by those programs will experience
+this issue.
+
+===============================================
+Fast Integer Forward DCT at High Quality Levels
+===============================================
+
+The algorithm used by the SIMD-accelerated quantization function cannot produce
+correct results whenever the fast integer forward DCT is used along with a JPEG
+quality of 98-100.  Thus, libjpeg-turbo must use the non-SIMD quantization
+function in those cases.  This causes performance to drop by as much as 40%.
+It is therefore strongly advised that you use the slow integer forward DCT
+whenever encoding images with a JPEG quality of 98 or higher.
diff --git a/acinclude.m4 b/acinclude.m4
new file mode 100644
index 0000000..8d66784
--- /dev/null
+++ b/acinclude.m4
@@ -0,0 +1,245 @@
+# AC_PROG_NASM
+# --------------------------
+# Check that NASM exists and determine flags
+AC_DEFUN([AC_PROG_NASM],[
+
+AC_CHECK_PROGS(NASM, [nasm nasmw yasm])
+test -z "$NASM" && AC_MSG_ERROR([no nasm (Netwide Assembler) found])
+
+AC_MSG_CHECKING([for object file format of host system])
+case "$host_os" in
+  cygwin* | mingw* | pw32* | interix*)
+    case "$host_cpu" in
+      x86_64)
+        objfmt='Win64-COFF'
+        ;;
+      *)
+        objfmt='Win32-COFF'
+        ;;
+    esac
+  ;;
+  msdosdjgpp* | go32*)
+    objfmt='COFF'
+  ;;
+  os2-emx*)			# not tested
+    objfmt='MSOMF'		# obj
+  ;;
+  linux*coff* | linux*oldld*)
+    objfmt='COFF'		# ???
+  ;;
+  linux*aout*)
+    objfmt='a.out'
+  ;;
+  linux*)
+    case "$host_cpu" in
+      x86_64)
+        objfmt='ELF64'
+        ;;
+      *)
+        objfmt='ELF'
+        ;;
+    esac
+  ;;
+  kfreebsd* | freebsd* | netbsd* | openbsd*)
+    if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
+      objfmt='BSD-a.out'
+    else
+      case "$host_cpu" in
+        x86_64 | amd64)
+          objfmt='ELF64'
+          ;;
+        *)
+          objfmt='ELF'
+          ;;
+      esac
+    fi
+  ;;
+  solaris* | sunos* | sysv* | sco*)
+    case "$host_cpu" in
+      x86_64)
+        objfmt='ELF64'
+        ;;
+      *)
+        objfmt='ELF'
+        ;;
+    esac
+  ;;
+  darwin* | rhapsody* | nextstep* | openstep* | macos*)
+    case "$host_cpu" in
+      x86_64)
+        objfmt='Mach-O64'
+        ;;
+      *)
+        objfmt='Mach-O'
+        ;;
+    esac
+  ;;
+  *)
+    objfmt='ELF ?'
+  ;;
+esac
+
+AC_MSG_RESULT([$objfmt])
+if test "$objfmt" = 'ELF ?'; then
+  objfmt='ELF'
+  AC_MSG_WARN([unexpected host system. assumed that the format is $objfmt.])
+fi
+
+AC_MSG_CHECKING([for object file format specifier (NAFLAGS) ])
+case "$objfmt" in
+  MSOMF)      NAFLAGS='-fobj -DOBJ32';;
+  Win32-COFF) NAFLAGS='-fwin32 -DWIN32';;
+  Win64-COFF) NAFLAGS='-fwin64 -DWIN64 -D__x86_64__';;
+  COFF)       NAFLAGS='-fcoff -DCOFF';;
+  a.out)      NAFLAGS='-faout -DAOUT';;
+  BSD-a.out)  NAFLAGS='-faoutb -DAOUT';;
+  ELF)        NAFLAGS='-felf -DELF';;
+  ELF64)      NAFLAGS='-felf64 -DELF -D__x86_64__';;
+  RDF)        NAFLAGS='-frdf -DRDF';;
+  Mach-O)     NAFLAGS='-fmacho -DMACHO';;
+  Mach-O64)   NAFLAGS='-fmacho64 -DMACHO -D__x86_64__';;
+esac
+AC_MSG_RESULT([$NAFLAGS])
+AC_SUBST([NAFLAGS])
+
+AC_MSG_CHECKING([whether the assembler ($NASM $NAFLAGS) works])
+cat > conftest.asm <<EOF
+[%line __oline__ "configure"
+        section .text
+        global  _main,main
+_main:
+main:   xor     eax,eax
+        ret
+]EOF
+try_nasm='$NASM $NAFLAGS -o conftest.o conftest.asm'
+if AC_TRY_EVAL(try_nasm) && test -s conftest.o; then
+  AC_MSG_RESULT(yes)
+else
+  echo "configure: failed program was:" >&AC_FD_CC
+  cat conftest.asm >&AC_FD_CC
+  rm -rf conftest*
+  AC_MSG_RESULT(no)
+  AC_MSG_ERROR([installation or configuration problem: assembler cannot create object files.])
+fi
+
+AC_MSG_CHECKING([whether the linker accepts assembler output])
+try_nasm='${CC-cc} -o conftest${ac_exeext} $LDFLAGS conftest.o $LIBS 1>&AC_FD_CC'
+if AC_TRY_EVAL(try_nasm) && test -s conftest${ac_exeext}; then
+  rm -rf conftest*
+  AC_MSG_RESULT(yes)
+else
+  rm -rf conftest*
+  AC_MSG_RESULT(no)
+  AC_MSG_ERROR([configuration problem: maybe object file format mismatch.])
+fi
+
+])
+
+# AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE
+# --------------------------
+# Test whether the assembler is suitable and supports NEON instructions
+AC_DEFUN([AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE],[
+  ac_good_gnu_arm_assembler=no
+  ac_save_CC="$CC"
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS="$CCASFLAGS -x assembler-with-cpp"
+  CC="$CCAS"
+  AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+    .text
+    .fpu neon
+    .arch armv7a
+    .object_arch armv4
+    .arm
+    pld [r0]
+    vmovn.u16 d0, q0]])], ac_good_gnu_arm_assembler=yes)
+
+  ac_use_gas_preprocessor=no
+  if test "x$ac_good_gnu_arm_assembler" = "xno" ; then
+    CC="gas-preprocessor.pl $CCAS"
+    AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+      .text
+      .fpu neon
+      .arch armv7a
+      .object_arch armv4
+      .arm
+      pld [r0]
+      vmovn.u16 d0, q0]])], ac_use_gas_preprocessor=yes)
+  fi
+  CFLAGS="$ac_save_CFLAGS"
+  CC="$ac_save_CC"
+
+  if test "x$ac_use_gas_preprocessor" = "xyes" ; then
+    CCAS="gas-preprocessor.pl $CCAS"
+    AC_SUBST([CCAS])
+    ac_good_gnu_arm_assembler=yes
+  fi
+
+  if test "x$ac_good_gnu_arm_assembler" = "xyes" ; then
+    $1
+  else
+    $2
+  fi
+])
+
+# AC_CHECK_COMPATIBLE_MIPSEL_ASSEMBLER_IFELSE
+# --------------------------
+# Test whether the assembler is suitable and supports MIPS instructions
+AC_DEFUN([AC_CHECK_COMPATIBLE_MIPS_ASSEMBLER_IFELSE],[
+  have_mips_dspr2=no
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS="$CCASFLAGS -mdspr2"
+
+  AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+
+  int main ()
+  {
+    int c = 0, a = 0, b = 0;
+    __asm__ __volatile__ (
+        "precr.qb.ph %[c], %[a], %[b]          \n\t"
+        : [c] "=r" (c)
+        : [a] "r" (a), [b] "r" (b)
+    );
+    return c;
+  }
+  ]])], have_mips_dspr2=yes)
+  CFLAGS=$ac_save_CFLAGS
+
+  if test "x$have_mips_dspr2" = "xyes" ; then
+    $1
+  else
+    $2
+  fi
+])
+
+AC_DEFUN([AC_CHECK_COMPATIBLE_ARM64_ASSEMBLER_IFELSE],[
+  ac_good_gnu_arm_assembler=no
+  ac_save_CC="$CC"
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS="$CCASFLAGS -x assembler-with-cpp"
+  CC="$CCAS"
+  AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+    .text
+    movi v0.16b, #100]])], ac_good_gnu_arm_assembler=yes)
+
+  ac_use_gas_preprocessor=no
+  if test "x$ac_good_gnu_arm_assembler" = "xno" ; then
+    CC="gas-preprocessor.pl $CCAS"
+    AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+      .text
+      movi v0.16b, #100]])], ac_use_gas_preprocessor=yes)
+  fi
+  CFLAGS="$ac_save_CFLAGS"
+  CC="$ac_save_CC"
+
+  if test "x$ac_use_gas_preprocessor" = "xyes" ; then
+    CCAS="gas-preprocessor.pl $CCAS"
+    AC_SUBST([CCAS])
+    ac_good_gnu_arm_assembler=yes
+  fi
+
+  if test "x$ac_good_gnu_arm_assembler" = "xyes" ; then
+    $1
+  else
+    $2
+  fi
+])
diff --git a/ansi2knr.1 b/ansi2knr.1
deleted file mode 100644
index f9ee5a6..0000000
--- a/ansi2knr.1
+++ /dev/null
@@ -1,36 +0,0 @@
-.TH ANSI2KNR 1 "19 Jan 1996"
-.SH NAME
-ansi2knr \- convert ANSI C to Kernighan & Ritchie C
-.SH SYNOPSIS
-.I ansi2knr
-[--varargs] input_file [output_file]
-.SH DESCRIPTION
-If no output_file is supplied, output goes to stdout.
-.br
-There are no error messages.
-.sp
-.I ansi2knr
-recognizes function definitions by seeing a non-keyword identifier at the left
-margin, followed by a left parenthesis, with a right parenthesis as the last
-character on the line, and with a left brace as the first token on the
-following line (ignoring possible intervening comments).  It will recognize a
-multi-line header provided that no intervening line ends with a left or right
-brace or a semicolon.  These algorithms ignore whitespace and comments, except
-that the function name must be the first thing on the line.
-.sp
-The following constructs will confuse it:
-.br
-     - Any other construct that starts at the left margin and follows the
-above syntax (such as a macro or function call).
-.br
-     - Some macros that tinker with the syntax of the function header.
-.sp
-The --varargs switch is obsolete, and is recognized only for
-backwards compatibility.  The present version of
-.I ansi2knr
-will always attempt to convert a ... argument to va_alist and va_dcl.
-.SH AUTHOR
-L. Peter Deutsch <ghost@aladdin.com> wrote the original ansi2knr and
-continues to maintain the current version; most of the code in the current
-version is his work.  ansi2knr also includes contributions by Francois
-Pinard <pinard@iro.umontreal.ca> and Jim Avera <jima@netcom.com>.
diff --git a/ansi2knr.c b/ansi2knr.c
deleted file mode 100644
index 4e05fc2..0000000
--- a/ansi2knr.c
+++ /dev/null
@@ -1,693 +0,0 @@
-/* ansi2knr.c */
-/* Convert ANSI C function definitions to K&R ("traditional C") syntax */
-
-/*
-ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY.  No author or distributor accepts responsibility to anyone for the
-consequences of using it or for whether it serves any particular purpose or
-works at all, unless he says so in writing.  Refer to the GNU General Public
-License (the "GPL") for full details.
-
-Everyone is granted permission to copy, modify and redistribute ansi2knr,
-but only under the conditions described in the GPL.  A copy of this license
-is supposed to have been given to you along with ansi2knr so you can know
-your rights and responsibilities.  It should be in a file named COPYLEFT.
-[In the IJG distribution, the GPL appears below, not in a separate file.]
-Among other things, the copyright notice and this notice must be preserved
-on all copies.
-
-We explicitly state here what we believe is already implied by the GPL: if
-the ansi2knr program is distributed as a separate set of sources and a
-separate executable file which are aggregated on a storage medium together
-with another program, this in itself does not bring the other program under
-the GPL, nor does the mere fact that such a program or the procedures for
-constructing it invoke the ansi2knr executable bring any other part of the
-program under the GPL.
-*/
-
-/*
----------- Here is the GNU GPL file COPYLEFT, referred to above ----------
------ These terms do NOT apply to the JPEG software itself; see README ------
-
-		    GHOSTSCRIPT GENERAL PUBLIC LICENSE
-		    (Clarified 11 Feb 1988)
-
- Copyright (C) 1988 Richard M. Stallman
- Everyone is permitted to copy and distribute verbatim copies of this
- license, but changing it is not allowed.  You can also use this wording
- to make the terms for other programs.
-
-  The license agreements of most software companies keep you at the
-mercy of those companies.  By contrast, our general public license is
-intended to give everyone the right to share Ghostscript.  To make sure
-that you get the rights we want you to have, we need to make
-restrictions that forbid anyone to deny you these rights or to ask you
-to surrender the rights.  Hence this license agreement.
-
-  Specifically, we want to make sure that you have the right to give
-away copies of Ghostscript, that you receive source code or else can get
-it if you want it, that you can change Ghostscript or use pieces of it
-in new free programs, and that you know you can do these things.
-
-  To make sure that everyone has such rights, we have to forbid you to
-deprive anyone else of these rights.  For example, if you distribute
-copies of Ghostscript, you must give the recipients all the rights that
-you have.  You must make sure that they, too, receive or can get the
-source code.  And you must tell them their rights.
-
-  Also, for our own protection, we must make certain that everyone finds
-out that there is no warranty for Ghostscript.  If Ghostscript is
-modified by someone else and passed on, we want its recipients to know
-that what they have is not what we distributed, so that any problems
-introduced by others will not reflect on our reputation.
-
-  Therefore we (Richard M. Stallman and the Free Software Foundation,
-Inc.) make the following terms which say what you must do to be allowed
-to distribute or change Ghostscript.
-
-
-			COPYING POLICIES
-
-  1. You may copy and distribute verbatim copies of Ghostscript source
-code as you receive it, in any medium, provided that you conspicuously
-and appropriately publish on each copy a valid copyright and license
-notice "Copyright (C) 1989 Aladdin Enterprises.  All rights reserved.
-Distributed by Free Software Foundation, Inc." (or with whatever year is
-appropriate); keep intact the notices on all files that refer to this
-License Agreement and to the absence of any warranty; and give any other
-recipients of the Ghostscript program a copy of this License Agreement
-along with the program.  You may charge a distribution fee for the
-physical act of transferring a copy.
-
-  2. You may modify your copy or copies of Ghostscript or any portion of
-it, and copy and distribute such modifications under the terms of
-Paragraph 1 above, provided that you also do the following:
-
-    a) cause the modified files to carry prominent notices stating
-    that you changed the files and the date of any change; and
-
-    b) cause the whole of any work that you distribute or publish,
-    that in whole or in part contains or is a derivative of Ghostscript
-    or any part thereof, to be licensed at no charge to all third
-    parties on terms identical to those contained in this License
-    Agreement (except that you may choose to grant more extensive
-    warranty protection to some or all third parties, at your option).
-
-    c) You may charge a distribution fee for the physical act of
-    transferring a copy, and you may at your option offer warranty
-    protection in exchange for a fee.
-
-Mere aggregation of another unrelated program with this program (or its
-derivative) on a volume of a storage or distribution medium does not bring
-the other program under the scope of these terms.
-
-  3. You may copy and distribute Ghostscript (or a portion or derivative
-of it, under Paragraph 2) in object code or executable form under the
-terms of Paragraphs 1 and 2 above provided that you also do one of the
-following:
-
-    a) accompany it with the complete corresponding machine-readable
-    source code, which must be distributed under the terms of
-    Paragraphs 1 and 2 above; or,
-
-    b) accompany it with a written offer, valid for at least three
-    years, to give any third party free (except for a nominal
-    shipping charge) a complete machine-readable copy of the
-    corresponding source code, to be distributed under the terms of
-    Paragraphs 1 and 2 above; or,
-
-    c) accompany it with the information you received as to where the
-    corresponding source code may be obtained.  (This alternative is
-    allowed only for noncommercial distribution and only if you
-    received the program in object code or executable form alone.)
-
-For an executable file, complete source code means all the source code for
-all modules it contains; but, as a special exception, it need not include
-source code for modules which are standard libraries that accompany the
-operating system on which the executable file runs.
-
-  4. You may not copy, sublicense, distribute or transfer Ghostscript
-except as expressly provided under this License Agreement.  Any attempt
-otherwise to copy, sublicense, distribute or transfer Ghostscript is
-void and your rights to use the program under this License agreement
-shall be automatically terminated.  However, parties who have received
-computer software programs from you with this License Agreement will not
-have their licenses terminated so long as such parties remain in full
-compliance.
-
-  5. If you wish to incorporate parts of Ghostscript into other free
-programs whose distribution conditions are different, write to the Free
-Software Foundation at 675 Mass Ave, Cambridge, MA 02139.  We have not
-yet worked out a simple rule that can be stated here, but we will often
-permit this.  We will be guided by the two goals of preserving the free
-status of all derivatives of our free software and of promoting the
-sharing and reuse of software.
-
-Your comments and suggestions about our licensing policies and our
-software are welcome!  Please contact the Free Software Foundation,
-Inc., 675 Mass Ave, Cambridge, MA 02139, or call (617) 876-3296.
-
-		       NO WARRANTY
-
-  BECAUSE GHOSTSCRIPT IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
-NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW.  EXCEPT
-WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC, RICHARD
-M. STALLMAN, ALADDIN ENTERPRISES, L. PETER DEUTSCH, AND/OR OTHER PARTIES
-PROVIDE GHOSTSCRIPT "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
-EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE
-ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF GHOSTSCRIPT IS WITH
-YOU.  SHOULD GHOSTSCRIPT PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
-NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
-STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., L. PETER DEUTSCH, ALADDIN
-ENTERPRISES, AND/OR ANY OTHER PARTY WHO MAY MODIFY AND REDISTRIBUTE
-GHOSTSCRIPT AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING
-ANY LOST PROFITS, LOST MONIES, OR OTHER SPECIAL, INCIDENTAL OR
-CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
-(INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED
-INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR A FAILURE OF THE
-PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) GHOSTSCRIPT, EVEN IF YOU
-HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES, OR FOR ANY CLAIM
-BY ANY OTHER PARTY.
-
--------------------- End of file COPYLEFT ------------------------------
-*/
-
-/*
- * Usage:
-	ansi2knr input_file [output_file]
- * If no output_file is supplied, output goes to stdout.
- * There are no error messages.
- *
- * ansi2knr recognizes function definitions by seeing a non-keyword
- * identifier at the left margin, followed by a left parenthesis,
- * with a right parenthesis as the last character on the line,
- * and with a left brace as the first token on the following line
- * (ignoring possible intervening comments).
- * It will recognize a multi-line header provided that no intervening
- * line ends with a left or right brace or a semicolon.
- * These algorithms ignore whitespace and comments, except that
- * the function name must be the first thing on the line.
- * The following constructs will confuse it:
- *	- Any other construct that starts at the left margin and
- *	    follows the above syntax (such as a macro or function call).
- *	- Some macros that tinker with the syntax of the function header.
- */
-
-/*
- * The original and principal author of ansi2knr is L. Peter Deutsch
- * <ghost@aladdin.com>.  Other authors are noted in the change history
- * that follows (in reverse chronological order):
-	lpd 96-01-21 added code to cope with not HAVE_CONFIG_H and with
-		compilers that don't understand void, as suggested by
-		Tom Lane
-	lpd 96-01-15 changed to require that the first non-comment token
-		on the line following a function header be a left brace,
-		to reduce sensitivity to macros, as suggested by Tom Lane
-		<tgl@sss.pgh.pa.us>
-	lpd 95-06-22 removed #ifndefs whose sole purpose was to define
-		undefined preprocessor symbols as 0; changed all #ifdefs
-		for configuration symbols to #ifs
-	lpd 95-04-05 changed copyright notice to make it clear that
-		including ansi2knr in a program does not bring the entire
-		program under the GPL
-	lpd 94-12-18 added conditionals for systems where ctype macros
-		don't handle 8-bit characters properly, suggested by
-		Francois Pinard <pinard@iro.umontreal.ca>;
-		removed --varargs switch (this is now the default)
-	lpd 94-10-10 removed CONFIG_BROKETS conditional
-	lpd 94-07-16 added some conditionals to help GNU `configure',
-		suggested by Francois Pinard <pinard@iro.umontreal.ca>;
-		properly erase prototype args in function parameters,
-		contributed by Jim Avera <jima@netcom.com>;
-		correct error in writeblanks (it shouldn't erase EOLs)
-	lpd 89-xx-xx original version
- */
-
-/* Most of the conditionals here are to make ansi2knr work with */
-/* or without the GNU configure machinery. */
-
-#if HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#include <stdio.h>
-#include <ctype.h>
-
-#if HAVE_CONFIG_H
-
-/*
-   For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
-   This will define HAVE_CONFIG_H and so, activate the following lines.
- */
-
-# if STDC_HEADERS || HAVE_STRING_H
-#  include <string.h>
-# else
-#  include <strings.h>
-# endif
-
-#else /* not HAVE_CONFIG_H */
-
-/* Otherwise do it the hard way */
-
-# ifdef BSD
-#  include <strings.h>
-# else
-#  ifdef VMS
-    extern int strlen(), strncmp();
-#  else
-#   include <string.h>
-#  endif
-# endif
-
-#endif /* not HAVE_CONFIG_H */
-
-#if STDC_HEADERS
-# include <stdlib.h>
-#else
-/*
-   malloc and free should be declared in stdlib.h,
-   but if you've got a K&R compiler, they probably aren't.
- */
-# ifdef MSDOS
-#  include <malloc.h>
-# else
-#  ifdef VMS
-     extern char *malloc();
-     extern void free();
-#  else
-     extern char *malloc();
-     extern int free();
-#  endif
-# endif
-
-#endif
-
-/*
- * The ctype macros don't always handle 8-bit characters correctly.
- * Compensate for this here.
- */
-#ifdef isascii
-#  undef HAVE_ISASCII		/* just in case */
-#  define HAVE_ISASCII 1
-#else
-#endif
-#if STDC_HEADERS || !HAVE_ISASCII
-#  define is_ascii(c) 1
-#else
-#  define is_ascii(c) isascii(c)
-#endif
-
-#define is_space(c) (is_ascii(c) && isspace(c))
-#define is_alpha(c) (is_ascii(c) && isalpha(c))
-#define is_alnum(c) (is_ascii(c) && isalnum(c))
-
-/* Scanning macros */
-#define isidchar(ch) (is_alnum(ch) || (ch) == '_')
-#define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
-
-/* Forward references */
-char *skipspace();
-int writeblanks();
-int test1();
-int convert1();
-
-/* The main program */
-int
-main(argc, argv)
-    int argc;
-    char *argv[];
-{	FILE *in, *out;
-#define bufsize 5000			/* arbitrary size */
-	char *buf;
-	char *line;
-	char *more;
-	/*
-	 * In previous versions, ansi2knr recognized a --varargs switch.
-	 * If this switch was supplied, ansi2knr would attempt to convert
-	 * a ... argument to va_alist and va_dcl; if this switch was not
-	 * supplied, ansi2knr would simply drop any such arguments.
-	 * Now, ansi2knr always does this conversion, and we only
-	 * check for this switch for backward compatibility.
-	 */
-	int convert_varargs = 1;
-
-	if ( argc > 1 && argv[1][0] == '-' )
-	  {	if ( !strcmp(argv[1], "--varargs") )
-		  {	convert_varargs = 1;
-			argc--;
-			argv++;
-		  }
-		else
-		  {	fprintf(stderr, "Unrecognized switch: %s\n", argv[1]);
-			exit(1);
-		  }
-	  }
-	switch ( argc )
-	   {
-	default:
-		printf("Usage: ansi2knr input_file [output_file]\n");
-		exit(0);
-	case 2:
-		out = stdout;
-		break;
-	case 3:
-		out = fopen(argv[2], "w");
-		if ( out == NULL )
-		   {	fprintf(stderr, "Cannot open output file %s\n", argv[2]);
-			exit(1);
-		   }
-	   }
-	in = fopen(argv[1], "r");
-	if ( in == NULL )
-	   {	fprintf(stderr, "Cannot open input file %s\n", argv[1]);
-		exit(1);
-	   }
-	fprintf(out, "#line 1 \"%s\"\n", argv[1]);
-	buf = malloc(bufsize);
-	line = buf;
-	while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
-	   {
-test:		line += strlen(line);
-		switch ( test1(buf) )
-		   {
-		case 2:			/* a function header */
-			convert1(buf, out, 1, convert_varargs);
-			break;
-		case 1:			/* a function */
-			/* Check for a { at the start of the next line. */
-			more = ++line;
-f:			if ( line >= buf + (bufsize - 1) ) /* overflow check */
-			  goto wl;
-			if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
-			  goto wl;
-			switch ( *skipspace(more, 1) )
-			  {
-			  case '{':
-			    /* Definitely a function header. */
-			    convert1(buf, out, 0, convert_varargs);
-			    fputs(more, out);
-			    break;
-			  case 0:
-			    /* The next line was blank or a comment: */
-			    /* keep scanning for a non-comment. */
-			    line += strlen(line);
-			    goto f;
-			  default:
-			    /* buf isn't a function header, but */
-			    /* more might be. */
-			    fputs(buf, out);
-			    strcpy(buf, more);
-			    line = buf;
-			    goto test;
-			  }
-			break;
-		case -1:		/* maybe the start of a function */
-			if ( line != buf + (bufsize - 1) ) /* overflow check */
-			  continue;
-			/* falls through */
-		default:		/* not a function */
-wl:			fputs(buf, out);
-			break;
-		   }
-		line = buf;
-	   }
-	if ( line != buf )
-	  fputs(buf, out);
-	free(buf);
-	fclose(out);
-	fclose(in);
-	return 0;
-}
-
-/* Skip over space and comments, in either direction. */
-char *
-skipspace(p, dir)
-    register char *p;
-    register int dir;			/* 1 for forward, -1 for backward */
-{	for ( ; ; )
-	   {	while ( is_space(*p) )
-		  p += dir;
-		if ( !(*p == '/' && p[dir] == '*') )
-		  break;
-		p += dir;  p += dir;
-		while ( !(*p == '*' && p[dir] == '/') )
-		   {	if ( *p == 0 )
-			  return p;	/* multi-line comment?? */
-			p += dir;
-		   }
-		p += dir;  p += dir;
-	   }
-	return p;
-}
-
-/*
- * Write blanks over part of a string.
- * Don't overwrite end-of-line characters.
- */
-int
-writeblanks(start, end)
-    char *start;
-    char *end;
-{	char *p;
-	for ( p = start; p < end; p++ )
-	  if ( *p != '\r' && *p != '\n' )
-	    *p = ' ';
-	return 0;
-}
-
-/*
- * Test whether the string in buf is a function definition.
- * The string may contain and/or end with a newline.
- * Return as follows:
- *	0 - definitely not a function definition;
- *	1 - definitely a function definition;
- *	2 - definitely a function prototype (NOT USED);
- *	-1 - may be the beginning of a function definition,
- *		append another line and look again.
- * The reason we don't attempt to convert function prototypes is that
- * Ghostscript's declaration-generating macros look too much like
- * prototypes, and confuse the algorithms.
- */
-int
-test1(buf)
-    char *buf;
-{	register char *p = buf;
-	char *bend;
-	char *endfn;
-	int contin;
-
-	if ( !isidfirstchar(*p) )
-	  return 0;		/* no name at left margin */
-	bend = skipspace(buf + strlen(buf) - 1, -1);
-	switch ( *bend )
-	   {
-	   case ';': contin = 0 /*2*/; break;
-	   case ')': contin = 1; break;
-	   case '{': return 0;		/* not a function */
-	   case '}': return 0;		/* not a function */
-	   default: contin = -1;
-	   }
-	while ( isidchar(*p) )
-	  p++;
-	endfn = p;
-	p = skipspace(p, 1);
-	if ( *p++ != '(' )
-	  return 0;		/* not a function */
-	p = skipspace(p, 1);
-	if ( *p == ')' )
-	  return 0;		/* no parameters */
-	/* Check that the apparent function name isn't a keyword. */
-	/* We only need to check for keywords that could be followed */
-	/* by a left parenthesis (which, unfortunately, is most of them). */
-	   {	static char *words[] =
-		   {	"asm", "auto", "case", "char", "const", "double",
-			"extern", "float", "for", "if", "int", "long",
-			"register", "return", "short", "signed", "sizeof",
-			"static", "switch", "typedef", "unsigned",
-			"void", "volatile", "while", 0
-		   };
-		char **key = words;
-		char *kp;
-		int len = endfn - buf;
-
-		while ( (kp = *key) != 0 )
-		   {	if ( strlen(kp) == len && !strncmp(kp, buf, len) )
-			  return 0;	/* name is a keyword */
-			key++;
-		   }
-	   }
-	return contin;
-}
-
-/* Convert a recognized function definition or header to K&R syntax. */
-int
-convert1(buf, out, header, convert_varargs)
-    char *buf;
-    FILE *out;
-    int header;			/* Boolean */
-    int convert_varargs;	/* Boolean */
-{	char *endfn;
-	register char *p;
-	char **breaks;
-	unsigned num_breaks = 2;	/* for testing */
-	char **btop;
-	char **bp;
-	char **ap;
-	char *vararg = 0;
-
-	/* Pre-ANSI implementations don't agree on whether strchr */
-	/* is called strchr or index, so we open-code it here. */
-	for ( endfn = buf; *(endfn++) != '('; )
-	  ;
-top:	p = endfn;
-	breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
-	if ( breaks == 0 )
-	   {	/* Couldn't allocate break table, give up */
-		fprintf(stderr, "Unable to allocate break table!\n");
-		fputs(buf, out);
-		return -1;
-	   }
-	btop = breaks + num_breaks * 2 - 2;
-	bp = breaks;
-	/* Parse the argument list */
-	do
-	   {	int level = 0;
-		char *lp = NULL;
-		char *rp;
-		char *end = NULL;
-
-		if ( bp >= btop )
-		   {	/* Filled up break table. */
-			/* Allocate a bigger one and start over. */
-			free((char *)breaks);
-			num_breaks <<= 1;
-			goto top;
-		   }
-		*bp++ = p;
-		/* Find the end of the argument */
-		for ( ; end == NULL; p++ )
-		   {	switch(*p)
-			   {
-			   case ',':
-				if ( !level ) end = p;
-				break;
-			   case '(':
-				if ( !level ) lp = p;
-				level++;
-				break;
-			   case ')':
-				if ( --level < 0 ) end = p;
-				else rp = p;
-				break;
-			   case '/':
-				p = skipspace(p, 1) - 1;
-				break;
-			   default:
-				;
-			   }
-		   }
-		/* Erase any embedded prototype parameters. */
-		if ( lp )
-		  writeblanks(lp + 1, rp);
-		p--;			/* back up over terminator */
-		/* Find the name being declared. */
-		/* This is complicated because of procedure and */
-		/* array modifiers. */
-		for ( ; ; )
-		   {	p = skipspace(p - 1, -1);
-			switch ( *p )
-			   {
-			   case ']':	/* skip array dimension(s) */
-			   case ')':	/* skip procedure args OR name */
-			   {	int level = 1;
-				while ( level )
-				 switch ( *--p )
-				   {
-				   case ']': case ')': level++; break;
-				   case '[': case '(': level--; break;
-				   case '/': p = skipspace(p, -1) + 1; break;
-				   default: ;
-				   }
-			   }
-				if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
-				   {	/* We found the name being declared */
-					while ( !isidfirstchar(*p) )
-					  p = skipspace(p, 1) + 1;
-					goto found;
-				   }
-				break;
-			   default:
-				goto found;
-			   }
-		   }
-found:		if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
-		  {	if ( convert_varargs )
-			  {	*bp++ = "va_alist";
-				vararg = p-2;
-			  }
-			else
-			  {	p++;
-				if ( bp == breaks + 1 )	/* sole argument */
-				  writeblanks(breaks[0], p);
-				else
-				  writeblanks(bp[-1] - 1, p);
-				bp--;
-			  }
-		   }
-		else
-		   {	while ( isidchar(*p) ) p--;
-			*bp++ = p+1;
-		   }
-		p = end;
-	   }
-	while ( *p++ == ',' );
-	*bp = p;
-	/* Make a special check for 'void' arglist */
-	if ( bp == breaks+2 )
-	   {	p = skipspace(breaks[0], 1);
-		if ( !strncmp(p, "void", 4) )
-		   {	p = skipspace(p+4, 1);
-			if ( p == breaks[2] - 1 )
-			   {	bp = breaks;	/* yup, pretend arglist is empty */
-				writeblanks(breaks[0], p + 1);
-			   }
-		   }
-	   }
-	/* Put out the function name and left parenthesis. */
-	p = buf;
-	while ( p != endfn ) putc(*p, out), p++;
-	/* Put out the declaration. */
-	if ( header )
-	  {	fputs(");", out);
-		for ( p = breaks[0]; *p; p++ )
-		  if ( *p == '\r' || *p == '\n' )
-		    putc(*p, out);
-	  }
-	else
-	  {	for ( ap = breaks+1; ap < bp; ap += 2 )
-		  {	p = *ap;
-			while ( isidchar(*p) )
-			  putc(*p, out), p++;
-			if ( ap < bp - 1 )
-			  fputs(", ", out);
-		  }
-		fputs(")  ", out);
-		/* Put out the argument declarations */
-		for ( ap = breaks+2; ap <= bp; ap += 2 )
-		  (*ap)[-1] = ';';
-		if ( vararg != 0 )
-		  {	*vararg = 0;
-			fputs(breaks[0], out);		/* any prior args */
-			fputs("va_dcl", out);		/* the final arg */
-			fputs(bp[0], out);
-		  }
-		else
-		  fputs(breaks[0], out);
-	  }
-	free((char *)breaks);
-	return 0;
-}
diff --git a/bmp.c b/bmp.c
new file mode 100644
index 0000000..4986055
--- /dev/null
+++ b/bmp.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <setjmp.h>
+#include <errno.h>
+#include "cdjpeg.h"
+#include <jpeglib.h>
+#include <jpegint.h>
+#include "tjutil.h"
+#include "bmp.h"
+
+
+/* This duplicates the functionality of the VirtualGL bitmap library using
+   the components from cjpeg and djpeg */
+
+
+/* Error handling (based on example in example.c) */
+
+static char errStr[JMSG_LENGTH_MAX]="No error";
+
+struct my_error_mgr
+{
+	struct jpeg_error_mgr pub;
+	jmp_buf setjmp_buffer;
+};
+typedef struct my_error_mgr *my_error_ptr;
+
+static void my_error_exit(j_common_ptr cinfo)
+{
+	my_error_ptr myerr=(my_error_ptr)cinfo->err;
+	(*cinfo->err->output_message)(cinfo);
+	longjmp(myerr->setjmp_buffer, 1);
+}
+
+/* Based on output_message() in jerror.c */
+
+static void my_output_message(j_common_ptr cinfo)
+{
+	(*cinfo->err->format_message)(cinfo, errStr);
+}
+
+#define _throw(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s", m);  \
+	retval=-1;  goto bailout;}
+#define _throwunix(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s\n%s", m,  \
+	strerror(errno));  retval=-1;  goto bailout;}
+
+
+static void pixelconvert(unsigned char *srcbuf, int srcpf, int srcbottomup,
+	unsigned char *dstbuf, int dstpf, int dstbottomup, int w, int h)
+{
+	unsigned char *srcptr=srcbuf, *srcptr2;
+	int srcps=tjPixelSize[srcpf];
+	int srcstride=srcbottomup? -w*srcps:w*srcps;
+	unsigned char *dstptr=dstbuf, *dstptr2;
+	int dstps=tjPixelSize[dstpf];
+	int dststride=dstbottomup? -w*dstps:w*dstps;
+	int row, col;
+
+	if(srcbottomup) srcptr=&srcbuf[w*srcps*(h-1)];
+	if(dstbottomup) dstptr=&dstbuf[w*dstps*(h-1)];
+	for(row=0; row<h; row++, srcptr+=srcstride, dstptr+=dststride)
+	{
+		for(col=0, srcptr2=srcptr, dstptr2=dstptr; col<w; col++, srcptr2+=srcps,
+			dstptr2+=dstps)
+		{
+			dstptr2[tjRedOffset[dstpf]]=srcptr2[tjRedOffset[srcpf]];
+			dstptr2[tjGreenOffset[dstpf]]=srcptr2[tjGreenOffset[srcpf]];
+			dstptr2[tjBlueOffset[dstpf]]=srcptr2[tjBlueOffset[srcpf]];
+		}
+	}
+}
+
+
+int loadbmp(char *filename, unsigned char **buf, int *w, int *h,
+	int dstpf, int bottomup)
+{
+	int retval=0, dstps, srcpf, tempc;
+	struct jpeg_compress_struct cinfo;
+	struct my_error_mgr jerr;
+	cjpeg_source_ptr src;
+	FILE *file=NULL;
+
+	memset(&cinfo, 0, sizeof(struct jpeg_compress_struct));
+
+	if(!filename || !buf || !w || !h || dstpf<0 || dstpf>=TJ_NUMPF)
+		_throw("loadbmp(): Invalid argument");
+
+	if((file=fopen(filename, "rb"))==NULL)
+		_throwunix("loadbmp(): Cannot open input file");
+
+	cinfo.err=jpeg_std_error(&jerr.pub);
+	jerr.pub.error_exit=my_error_exit;
+	jerr.pub.output_message=my_output_message;
+
+	if(setjmp(jerr.setjmp_buffer))
+	{
+		/* If we get here, the JPEG code has signaled an error. */
+		retval=-1;  goto bailout;
+	}
+
+	jpeg_create_compress(&cinfo);
+	if((tempc=getc(file))<0 || ungetc(tempc, file)==EOF)
+		_throwunix("loadbmp(): Could not read input file")
+	else if(tempc==EOF) _throw("loadbmp(): Input file contains no data");
+
+	if(tempc=='B')
+	{
+		if((src=jinit_read_bmp(&cinfo))==NULL)
+			_throw("loadbmp(): Could not initialize bitmap loader");
+	}
+	else if(tempc=='P')
+	{
+		if((src=jinit_read_ppm(&cinfo))==NULL)
+			_throw("loadbmp(): Could not initialize bitmap loader");
+	}
+	else _throw("loadbmp(): Unsupported file type");
+
+	src->input_file=file;
+	(*src->start_input)(&cinfo, src);
+	(*cinfo.mem->realize_virt_arrays)((j_common_ptr)&cinfo);
+
+	*w=cinfo.image_width;  *h=cinfo.image_height;
+
+	if(cinfo.input_components==1 && cinfo.in_color_space==JCS_RGB)
+		srcpf=TJPF_GRAY;
+	else srcpf=TJPF_RGB;
+
+	dstps=tjPixelSize[dstpf];
+	if((*buf=(unsigned char *)malloc((*w)*(*h)*dstps))==NULL)
+		_throw("loadbmp(): Memory allocation failure");
+
+	while(cinfo.next_scanline<cinfo.image_height)
+	{
+		int i, nlines=(*src->get_pixel_rows)(&cinfo, src);
+		for(i=0; i<nlines; i++)
+		{
+			unsigned char *outbuf;  int row;
+			row=cinfo.next_scanline+i;
+			if(bottomup) outbuf=&(*buf)[((*h)-row-1)*(*w)*dstps];
+			else outbuf=&(*buf)[row*(*w)*dstps];
+			pixelconvert(src->buffer[i], srcpf, 0, outbuf, dstpf, bottomup, *w,
+				nlines);
+		}
+		cinfo.next_scanline+=nlines;
+	}
+
+	(*src->finish_input)(&cinfo, src);
+
+	bailout:
+	jpeg_destroy_compress(&cinfo);
+	if(file) fclose(file);
+	if(retval<0 && buf && *buf) {free(*buf);  *buf=NULL;}
+	return retval;
+}
+
+
+int savebmp(char *filename, unsigned char *buf, int w, int h, int srcpf,
+	int bottomup)
+{
+	int retval=0, srcps, dstpf;
+	struct jpeg_decompress_struct dinfo;
+	struct my_error_mgr jerr;
+	djpeg_dest_ptr dst;
+	FILE *file=NULL;
+	char *ptr=NULL;
+
+	memset(&dinfo, 0, sizeof(struct jpeg_decompress_struct));
+
+	if(!filename || !buf || w<1 || h<1 || srcpf<0 || srcpf>=TJ_NUMPF)
+		_throw("savebmp(): Invalid argument");
+
+	if((file=fopen(filename, "wb"))==NULL)
+		_throwunix("savebmp(): Cannot open output file");
+
+	dinfo.err=jpeg_std_error(&jerr.pub);
+	jerr.pub.error_exit=my_error_exit;
+	jerr.pub.output_message=my_output_message;
+
+	if(setjmp(jerr.setjmp_buffer))
+	{
+		/* If we get here, the JPEG code has signaled an error. */
+		retval=-1;  goto bailout;
+	}
+
+	jpeg_create_decompress(&dinfo);
+	if(srcpf==TJPF_GRAY)
+	{
+		dinfo.out_color_components=dinfo.output_components=1;
+		dinfo.out_color_space=JCS_GRAYSCALE;
+	}
+	else
+	{
+		dinfo.out_color_components=dinfo.output_components=3;
+		dinfo.out_color_space=JCS_RGB;
+	}
+	dinfo.image_width=w;  dinfo.image_height=h;
+	dinfo.global_state=DSTATE_READY;
+	dinfo.scale_num=dinfo.scale_denom=1;
+
+	ptr=strrchr(filename, '.');
+	if(ptr && !strcasecmp(ptr, ".bmp"))
+	{
+		if((dst=jinit_write_bmp(&dinfo, 0))==NULL)
+			_throw("savebmp(): Could not initialize bitmap writer");
+	}
+	else
+	{
+		if((dst=jinit_write_ppm(&dinfo))==NULL)
+			_throw("savebmp(): Could not initialize PPM writer");
+	}
+
+	dst->output_file=file;
+	(*dst->start_output)(&dinfo, dst);
+	(*dinfo.mem->realize_virt_arrays)((j_common_ptr)&dinfo);
+
+	if(srcpf==TJPF_GRAY) dstpf=srcpf;
+	else dstpf=TJPF_RGB;
+	srcps=tjPixelSize[srcpf];
+
+	while(dinfo.output_scanline<dinfo.output_height)
+	{
+		int i, nlines=dst->buffer_height;
+		for(i=0; i<nlines; i++)
+		{
+			unsigned char *inbuf;  int row;
+			row=dinfo.output_scanline+i;
+			if(bottomup) inbuf=&buf[(h-row-1)*w*srcps];
+			else inbuf=&buf[row*w*srcps];
+			pixelconvert(inbuf, srcpf, bottomup, dst->buffer[i], dstpf, 0, w,
+				nlines);
+		}
+		(*dst->put_pixel_rows)(&dinfo, dst, nlines);
+		dinfo.output_scanline+=nlines;
+	}
+
+	(*dst->finish_output)(&dinfo, dst);
+
+	bailout:
+	jpeg_destroy_decompress(&dinfo);
+	if(file) fclose(file);
+	return retval;
+}
+
+const char *bmpgeterr(void)
+{
+	return errStr;
+}
diff --git a/bmp.h b/bmp.h
new file mode 100644
index 0000000..c50c260
--- /dev/null
+++ b/bmp.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BMP_H__
+#define __BMP_H__
+
+#include "./turbojpeg.h"
+
+int loadbmp(char *filename, unsigned char **buf, int *w, int *h, int pf,
+	int bottomup);
+
+int savebmp(char *filename, unsigned char *buf, int w, int h, int pf,
+	int bottomup);
+
+const char *bmpgeterr(void);
+
+#endif
diff --git a/cderror.h b/cderror.h
index 70435e1..6ccb37c 100644
--- a/cderror.h
+++ b/cderror.h
@@ -2,6 +2,7 @@
  * cderror.h
  *
  * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 2009 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -32,7 +33,7 @@
 
 typedef enum {
 
-#define JMESSAGE(code,string)	code ,
+#define JMESSAGE(code,string)   code ,
 
 #endif /* JMAKE_ENUM_LIST */
 
@@ -45,6 +46,7 @@
 JMESSAGE(JERR_BMP_BADPLANES, "Invalid BMP file: biPlanes not equal to 1")
 JMESSAGE(JERR_BMP_COLORSPACE, "BMP output must be grayscale or RGB")
 JMESSAGE(JERR_BMP_COMPRESSED, "Sorry, compressed BMPs not yet supported")
+JMESSAGE(JERR_BMP_EMPTY, "Empty BMP image")
 JMESSAGE(JERR_BMP_NOT, "Not a BMP file - does not start with BM")
 JMESSAGE(JTRC_BMP, "%ux%u 24-bit BMP image")
 JMESSAGE(JTRC_BMP_MAPPED, "%ux%u 8-bit colormapped BMP image")
@@ -60,7 +62,7 @@
 JMESSAGE(JERR_GIF_NOT, "Not a GIF file")
 JMESSAGE(JTRC_GIF, "%ux%ux%d GIF image")
 JMESSAGE(JTRC_GIF_BADVERSION,
-	 "Warning: unexpected GIF version number '%c%c%c'")
+         "Warning: unexpected GIF version number '%c%c%c'")
 JMESSAGE(JTRC_GIF_EXTENSION, "Ignoring GIF extension block of type 0x%02x")
 JMESSAGE(JTRC_GIF_NONSQUARE, "Caution: nonsquare pixels in input")
 JMESSAGE(JWRN_GIF_BADDATA, "Corrupt data in GIF file")
@@ -108,13 +110,13 @@
 #endif /* TARGA_SUPPORTED */
 
 JMESSAGE(JERR_BAD_CMAP_FILE,
-	 "Color map file is invalid or of unsupported format")
+         "Color map file is invalid or of unsupported format")
 JMESSAGE(JERR_TOO_MANY_COLORS,
-	 "Output file format cannot handle %d colormap entries")
+         "Output file format cannot handle %d colormap entries")
 JMESSAGE(JERR_UNGETC_FAILED, "ungetc failed")
 #ifdef TARGA_SUPPORTED
 JMESSAGE(JERR_UNKNOWN_FORMAT,
-	 "Unrecognized input file format --- perhaps you need -targa")
+         "Unrecognized input file format --- perhaps you need -targa")
 #else
 JMESSAGE(JERR_UNKNOWN_FORMAT, "Unrecognized input file format")
 #endif
diff --git a/cdjpeg.c b/cdjpeg.c
index b6250ff..7cc0d6e 100644
--- a/cdjpeg.c
+++ b/cdjpeg.c
@@ -1,60 +1,22 @@
 /*
  * cdjpeg.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains common support routines used by the IJG application
  * programs (cjpeg, djpeg, jpegtran).
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
-#include <ctype.h>		/* to declare isupper(), tolower() */
-#ifdef NEED_SIGNAL_CATCHER
-#include <signal.h>		/* to declare signal() */
-#endif
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include <ctype.h>              /* to declare isupper(), tolower() */
 #ifdef USE_SETMODE
-#include <fcntl.h>		/* to declare setmode()'s parameter macros */
+#include <fcntl.h>              /* to declare setmode()'s parameter macros */
 /* If you have setmode() but not <io.h>, just delete this line: */
-#include <io.h>			/* to declare setmode() */
-#endif
-
-
-/*
- * Signal catcher to ensure that temporary files are removed before aborting.
- * NB: for Amiga Manx C this is actually a global routine named _abort();
- * we put "#define signal_catcher _abort" in jconfig.h.  Talk about bogus...
- */
-
-#ifdef NEED_SIGNAL_CATCHER
-
-static j_common_ptr sig_cinfo;
-
-void				/* must be global for Manx C */
-signal_catcher (int signum)
-{
-  if (sig_cinfo != NULL) {
-    if (sig_cinfo->err != NULL) /* turn off trace output */
-      sig_cinfo->err->trace_level = 0;
-    jpeg_destroy(sig_cinfo);	/* clean up memory allocation & temp files */
-  }
-  exit(EXIT_FAILURE);
-}
-
-
-GLOBAL(void)
-enable_signal_catcher (j_common_ptr cinfo)
-{
-  sig_cinfo = cinfo;
-#ifdef SIGINT			/* not all systems have SIGINT */
-  signal(SIGINT, signal_catcher);
-#endif
-#ifdef SIGTERM			/* not all systems have SIGTERM */
-  signal(SIGTERM, signal_catcher);
-#endif
-}
-
+#include <io.h>                 /* to declare setmode() */
 #endif
 
 
@@ -75,8 +37,8 @@
     prog->percent_done = percent_done;
     if (total_passes > 1) {
       fprintf(stderr, "\rPass %d/%d: %3d%% ",
-	      prog->pub.completed_passes + prog->completed_extra_passes + 1,
-	      total_passes, percent_done);
+              prog->pub.completed_passes + prog->completed_extra_passes + 1,
+              total_passes, percent_done);
     } else {
       fprintf(stderr, "\r %3d%% ", percent_done);
     }
@@ -126,17 +88,17 @@
 
   while ((ca = *arg++) != '\0') {
     if ((ck = *keyword++) == '\0')
-      return FALSE;		/* arg longer than keyword, no good */
-    if (isupper(ca))		/* force arg to lcase (assume ck is already) */
+      return FALSE;             /* arg longer than keyword, no good */
+    if (isupper(ca))            /* force arg to lcase (assume ck is already) */
       ca = tolower(ca);
     if (ca != ck)
-      return FALSE;		/* no good */
-    nmatched++;			/* count matched characters */
+      return FALSE;             /* no good */
+    nmatched++;                 /* count matched characters */
   }
   /* reached end of argument; fail if it's too short for unique abbrev */
   if (nmatched < minchars)
     return FALSE;
-  return TRUE;			/* A-OK */
+  return TRUE;                  /* A-OK */
 }
 
 
@@ -150,10 +112,10 @@
 {
   FILE * input_file = stdin;
 
-#ifdef USE_SETMODE		/* need to hack file mode? */
+#ifdef USE_SETMODE              /* need to hack file mode? */
   setmode(fileno(stdin), O_BINARY);
 #endif
-#ifdef USE_FDOPEN		/* need to re-open in binary mode? */
+#ifdef USE_FDOPEN               /* need to re-open in binary mode? */
   if ((input_file = fdopen(fileno(stdin), READ_BINARY)) == NULL) {
     fprintf(stderr, "Cannot reopen stdin\n");
     exit(EXIT_FAILURE);
@@ -168,10 +130,10 @@
 {
   FILE * output_file = stdout;
 
-#ifdef USE_SETMODE		/* need to hack file mode? */
+#ifdef USE_SETMODE              /* need to hack file mode? */
   setmode(fileno(stdout), O_BINARY);
 #endif
-#ifdef USE_FDOPEN		/* need to re-open in binary mode? */
+#ifdef USE_FDOPEN               /* need to re-open in binary mode? */
   if ((output_file = fdopen(fileno(stdout), WRITE_BINARY)) == NULL) {
     fprintf(stderr, "Cannot reopen stdout\n");
     exit(EXIT_FAILURE);
diff --git a/cdjpeg.h b/cdjpeg.h
index 2b387b6..8461ee3 100644
--- a/cdjpeg.h
+++ b/cdjpeg.h
@@ -1,20 +1,22 @@
 /*
  * cdjpeg.h
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains common declarations for the sample applications
  * cjpeg and djpeg.  It is NOT used by the core JPEG library.
  */
 
-#define JPEG_CJPEG_DJPEG	/* define proper options in jconfig.h */
-#define JPEG_INTERNAL_OPTIONS	/* cjpeg.c,djpeg.c need to see xxx_SUPPORTED */
+#define JPEG_CJPEG_DJPEG        /* define proper options in jconfig.h */
+#define JPEG_INTERNAL_OPTIONS   /* cjpeg.c,djpeg.c need to see xxx_SUPPORTED */
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jerror.h"		/* get library error codes too */
-#include "cderror.h"		/* get application-specific error codes */
+#include "jerror.h"             /* get library error codes too */
+#include "cderror.h"            /* get application-specific error codes */
 
 
 /*
@@ -24,12 +26,9 @@
 typedef struct cjpeg_source_struct * cjpeg_source_ptr;
 
 struct cjpeg_source_struct {
-  JMETHOD(void, start_input, (j_compress_ptr cinfo,
-			      cjpeg_source_ptr sinfo));
-  JMETHOD(JDIMENSION, get_pixel_rows, (j_compress_ptr cinfo,
-				       cjpeg_source_ptr sinfo));
-  JMETHOD(void, finish_input, (j_compress_ptr cinfo,
-			       cjpeg_source_ptr sinfo));
+  void (*start_input) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo);
+  JDIMENSION (*get_pixel_rows) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo);
+  void (*finish_input) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo);
 
   FILE *input_file;
 
@@ -48,15 +47,12 @@
   /* start_output is called after jpeg_start_decompress finishes.
    * The color map will be ready at this time, if one is needed.
    */
-  JMETHOD(void, start_output, (j_decompress_ptr cinfo,
-			       djpeg_dest_ptr dinfo));
+  void (*start_output) (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo);
   /* Emit the specified number of pixel rows from the buffer. */
-  JMETHOD(void, put_pixel_rows, (j_decompress_ptr cinfo,
-				 djpeg_dest_ptr dinfo,
-				 JDIMENSION rows_supplied));
+  void (*put_pixel_rows) (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
+                          JDIMENSION rows_supplied);
   /* Finish up at the end of the image. */
-  JMETHOD(void, finish_output, (j_decompress_ptr cinfo,
-				djpeg_dest_ptr dinfo));
+  void (*finish_output) (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo);
 
   /* Target file spec; filled in by djpeg.c after object is created. */
   FILE * output_file;
@@ -79,9 +75,9 @@
  */
 
 struct cdjpeg_progress_mgr {
-  struct jpeg_progress_mgr pub;	/* fields known to JPEG library */
-  int completed_extra_passes;	/* extra passes completed */
-  int total_extra_passes;	/* total extra */
+  struct jpeg_progress_mgr pub; /* fields known to JPEG library */
+  int completed_extra_passes;   /* extra passes completed */
+  int total_extra_passes;       /* total extra */
   /* last printed percentage stored here to avoid multiple printouts */
   int percent_done;
 };
@@ -89,96 +85,60 @@
 typedef struct cdjpeg_progress_mgr * cd_progress_ptr;
 
 
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jinit_read_bmp		jIRdBMP
-#define jinit_write_bmp		jIWrBMP
-#define jinit_read_gif		jIRdGIF
-#define jinit_write_gif		jIWrGIF
-#define jinit_read_ppm		jIRdPPM
-#define jinit_write_ppm		jIWrPPM
-#define jinit_read_rle		jIRdRLE
-#define jinit_write_rle		jIWrRLE
-#define jinit_read_targa	jIRdTarga
-#define jinit_write_targa	jIWrTarga
-#define read_quant_tables	RdQTables
-#define read_scan_script	RdScnScript
-#define set_quant_slots		SetQSlots
-#define set_sample_factors	SetSFacts
-#define read_color_map		RdCMap
-#define enable_signal_catcher	EnSigCatcher
-#define start_progress_monitor	StProgMon
-#define end_progress_monitor	EnProgMon
-#define read_stdin		RdStdin
-#define write_stdout		WrStdout
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
 /* Module selection routines for I/O modules. */
 
-EXTERN(cjpeg_source_ptr) jinit_read_bmp JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_bmp JPP((j_decompress_ptr cinfo,
-					    boolean is_os2));
-EXTERN(cjpeg_source_ptr) jinit_read_gif JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_gif JPP((j_decompress_ptr cinfo));
-EXTERN(cjpeg_source_ptr) jinit_read_ppm JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_ppm JPP((j_decompress_ptr cinfo));
-EXTERN(cjpeg_source_ptr) jinit_read_rle JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_rle JPP((j_decompress_ptr cinfo));
-EXTERN(cjpeg_source_ptr) jinit_read_targa JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_targa JPP((j_decompress_ptr cinfo));
+EXTERN(cjpeg_source_ptr) jinit_read_bmp (j_compress_ptr cinfo);
+EXTERN(djpeg_dest_ptr) jinit_write_bmp (j_decompress_ptr cinfo,
+                                        boolean is_os2);
+EXTERN(cjpeg_source_ptr) jinit_read_gif (j_compress_ptr cinfo);
+EXTERN(djpeg_dest_ptr) jinit_write_gif (j_decompress_ptr cinfo);
+EXTERN(cjpeg_source_ptr) jinit_read_ppm (j_compress_ptr cinfo);
+EXTERN(djpeg_dest_ptr) jinit_write_ppm (j_decompress_ptr cinfo);
+EXTERN(cjpeg_source_ptr) jinit_read_rle (j_compress_ptr cinfo);
+EXTERN(djpeg_dest_ptr) jinit_write_rle (j_decompress_ptr cinfo);
+EXTERN(cjpeg_source_ptr) jinit_read_targa (j_compress_ptr cinfo);
+EXTERN(djpeg_dest_ptr) jinit_write_targa (j_decompress_ptr cinfo);
 
 /* cjpeg support routines (in rdswitch.c) */
 
-EXTERN(boolean) read_quant_tables JPP((j_compress_ptr cinfo, char * filename,
-				    int scale_factor, boolean force_baseline));
-EXTERN(boolean) read_scan_script JPP((j_compress_ptr cinfo, char * filename));
-EXTERN(boolean) set_quant_slots JPP((j_compress_ptr cinfo, char *arg));
-EXTERN(boolean) set_sample_factors JPP((j_compress_ptr cinfo, char *arg));
+EXTERN(boolean) read_quant_tables (j_compress_ptr cinfo, char * filename,
+                                   boolean force_baseline);
+EXTERN(boolean) read_scan_script (j_compress_ptr cinfo, char * filename);
+EXTERN(boolean) set_quality_ratings (j_compress_ptr cinfo, char *arg,
+                                     boolean force_baseline);
+EXTERN(boolean) set_quant_slots (j_compress_ptr cinfo, char *arg);
+EXTERN(boolean) set_sample_factors (j_compress_ptr cinfo, char *arg);
 
 /* djpeg support routines (in rdcolmap.c) */
 
-EXTERN(void) read_color_map JPP((j_decompress_ptr cinfo, FILE * infile));
+EXTERN(void) read_color_map (j_decompress_ptr cinfo, FILE * infile);
 
 /* common support routines (in cdjpeg.c) */
 
-EXTERN(void) enable_signal_catcher JPP((j_common_ptr cinfo));
-EXTERN(void) start_progress_monitor JPP((j_common_ptr cinfo,
-					 cd_progress_ptr progress));
-EXTERN(void) end_progress_monitor JPP((j_common_ptr cinfo));
-EXTERN(boolean) keymatch JPP((char * arg, const char * keyword, int minchars));
-EXTERN(FILE *) read_stdin JPP((void));
-EXTERN(FILE *) write_stdout JPP((void));
+EXTERN(void) enable_signal_catcher (j_common_ptr cinfo);
+EXTERN(void) start_progress_monitor (j_common_ptr cinfo,
+                                     cd_progress_ptr progress);
+EXTERN(void) end_progress_monitor (j_common_ptr cinfo);
+EXTERN(boolean) keymatch (char * arg, const char * keyword, int minchars);
+EXTERN(FILE *) read_stdin (void);
+EXTERN(FILE *) write_stdout (void);
 
 /* miscellaneous useful macros */
 
-#ifdef DONT_USE_B_MODE		/* define mode parameters for fopen() */
-#define READ_BINARY	"r"
-#define WRITE_BINARY	"w"
+#ifdef DONT_USE_B_MODE          /* define mode parameters for fopen() */
+#define READ_BINARY     "r"
+#define WRITE_BINARY    "w"
 #else
-#ifdef VMS			/* VMS is very nonstandard */
-#define READ_BINARY	"rb", "ctx=stm"
-#define WRITE_BINARY	"wb", "ctx=stm"
-#else				/* standard ANSI-compliant case */
-#define READ_BINARY	"rb"
-#define WRITE_BINARY	"wb"
-#endif
+#define READ_BINARY     "rb"
+#define WRITE_BINARY    "wb"
 #endif
 
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#ifndef EXIT_FAILURE            /* define exit() codes if not provided */
 #define EXIT_FAILURE  1
 #endif
 #ifndef EXIT_SUCCESS
-#ifdef VMS
-#define EXIT_SUCCESS  1		/* VMS is very nonstandard */
-#else
 #define EXIT_SUCCESS  0
 #endif
-#endif
 #ifndef EXIT_WARNING
-#ifdef VMS
-#define EXIT_WARNING  1		/* VMS is very nonstandard */
-#else
 #define EXIT_WARNING  2
 #endif
-#endif
diff --git a/change.log b/change.log
index 74102c0..b60ddd6 100644
--- a/change.log
+++ b/change.log
@@ -1,6 +1,85 @@
+NOTE:  This file was modified by The libjpeg-turbo Project to include only
+information relevant to libjpeg-turbo.
+
 CHANGE LOG for Independent JPEG Group's JPEG software
 
 
+Version 8d  15-Jan-2012
+-----------------------
+
+Add cjpeg -rgb option to create RGB JPEG files.
+Using this switch suppresses the conversion from RGB
+colorspace input to the default YCbCr JPEG colorspace.
+Thank to Michael Koch for the initial suggestion.
+
+Add option to disable the region adjustment in the transupp crop code.
+Thank to Jeffrey Friedl for the suggestion.
+
+
+Version 8b  16-May-2010
+-----------------------
+
+Repair problem in new memory source manager with corrupt JPEG data.
+Thank to Ted Campbell and Samuel Chun for the report.
+
+
+Version 8a  28-Feb-2010
+-----------------------
+
+Writing tables-only datastreams via jpeg_write_tables works again.
+
+Support 32-bit BMPs (RGB image with Alpha channel) for read in cjpeg.
+Thank to Brett Blackham for the suggestion.
+
+
+Version 8  10-Jan-2010
+----------------------
+
+Add sanity check in BMP reader module to avoid cjpeg crash for empty input
+image (thank to Isaev Ildar of ISP RAS, Moscow, RU for reporting this error).
+
+Add data source and destination managers for read from and write to
+memory buffers.  New API functions jpeg_mem_src and jpeg_mem_dest.
+Thank to Roberto Boni from Italy for the suggestion.
+
+
+Version 7  27-Jun-2009
+----------------------
+
+New scaled DCTs implemented.
+djpeg now supports scalings N/8 with all N from 1 to 16.
+
+cjpeg -quality option has been extended for support of separate quality
+settings for luminance and chrominance (or in general, for every provided
+quantization table slot).
+New API function jpeg_default_qtables() and q_scale_factor array in library.
+
+Support arithmetic entropy encoding and decoding.
+Added files jaricom.c, jcarith.c, jdarith.c.
+
+jpegtran has a new "lossless" cropping feature.
+
+Implement -perfect option in jpegtran, new API function
+jtransform_perfect_transform() in transupp. (DP 204_perfect.dpatch)
+
+Better error messages for jpegtran fopen failure.
+(DP 203_jpegtran_errmsg.dpatch)
+
+Fix byte order issue with 16bit PPM/PGM files in rdppm.c/wrppm.c:
+according to Netpbm, the de facto standard implementation of the PNM formats,
+the most significant byte is first. (DP 203_rdppm.dpatch)
+
+Add -raw option to rdjpgcom not to mangle the output.
+(DP 205_rdjpgcom_raw.dpatch)
+
+Make rdjpgcom locale aware. (DP 201_rdjpgcom_locale.dpatch)
+
+Add extern "C" to jpeglib.h.
+This avoids the need to put extern "C" { ... } around #include "jpeglib.h"
+in your C++ application.  Defining the symbol DONT_USE_EXTERN_C in the
+configuration prevents this. (DP 202_jpeglib.h_c++.dpatch)
+
+
 Version 6b  27-Mar-1998
 -----------------------
 
diff --git a/cjpeg.1 b/cjpeg.1
index d175a96..5f5090c 100644
--- a/cjpeg.1
+++ b/cjpeg.1
@@ -1,4 +1,4 @@
-.TH CJPEG 1 "20 March 1998"
+.TH CJPEG 1 "11 May 2014"
 .SH NAME
 cjpeg \- compress an image file to a JPEG file
 .SH SYNOPSIS
@@ -16,7 +16,7 @@
 compresses the named image file, or the standard input if no file is
 named, and produces a JPEG/JFIF file on the standard output.
 The currently supported input file formats are: PPM (PBMPLUS color
-format), PGM (PBMPLUS gray-scale format), BMP, Targa, and RLE (Utah Raster
+format), PGM (PBMPLUS grayscale format), BMP, Targa, and RLE (Utah Raster
 Toolkit format).  (RLE is supported only if the URT library is available.)
 .SH OPTIONS
 All switch names may be abbreviated; for example,
@@ -36,7 +36,7 @@
 .PP
 The basic switches are:
 .TP
-.BI \-quality " N"
+.BI \-quality " N[,...]"
 Scale quantization tables to adjust image quality.  Quality is 0 (worst) to
 100 (best); default is 75.  (See below for more info.)
 .TP
@@ -49,6 +49,11 @@
 .BR \-grayscale ,
 you'll get a smaller JPEG file that takes less time to process.
 .TP
+.B \-rgb
+Create RGB JPEG file.
+Using this switch suppresses the conversion from RGB
+colorspace input to the default YCbCr JPEG colorspace.
+.TP
 .B \-optimize
 Perform optimization of entropy encoding parameters.  Without this, default
 encoding parameters are used.
@@ -108,6 +113,36 @@
 .B \-baseline
 if you need to ensure compatibility at low quality values.)
 .PP
+The \fB-quality\fR option has been extended in this version of \fBcjpeg\fR to
+support separate quality settings for luminance and chrominance (or, in
+general, separate settings for every quantization table slot.)  The principle
+is the same as chrominance subsampling:  since the human eye is more sensitive
+to spatial changes in brightness than spatial changes in color, the chrominance
+components can be quantized more than the luminance components without
+incurring any visible image quality loss.  However, unlike subsampling, this
+feature reduces data in the frequency domain instead of the spatial domain,
+which allows for more fine-grained control.  This option is useful in
+quality-sensitive applications, for which the artifacts generated by
+subsampling may be unacceptable.
+.PP
+The \fB-quality\fR option accepts a comma-separated list of parameters, which
+respectively refer to the quality levels that should be assigned to the
+quantization table slots.  If there are more q-table slots than parameters,
+then the last parameter is replicated.  Thus, if only one quality parameter is
+given, this is used for both luminance and chrominance (slots 0 and 1,
+respectively), preserving the legacy behavior of cjpeg v6b and prior.
+More (or customized) quantization tables can be set with the \fB-qtables\fR
+option and assigned to components with the \fB-qslots\fR option (see the
+"wizard" switches below.)
+.PP
+JPEG files generated with separate luminance and chrominance quality are fully
+compliant with standard JPEG decoders.
+.PP
+.BR CAUTION:
+For this setting to be useful, be sure to pass an argument of \fB-sample 1x1\fR
+to \fBcjpeg\fR to disable chrominance subsampling.  Otherwise, the default
+subsampling level (2x2, AKA "4:2:0") will be used.
+.PP
 The
 .B \-progressive
 switch creates a "progressive JPEG" file.  In this type of JPEG file, the data
@@ -117,25 +152,41 @@
 display with each subsequent scan.  The final image is exactly equivalent to a
 standard JPEG file of the same quality setting, and the total file size is
 about the same --- often a little smaller.
-.B Caution:
-progressive JPEG is not yet widely implemented, so many decoders will be
-unable to view a progressive JPEG file at all.
 .PP
 Switches for advanced users:
 .TP
+.B \-arithmetic
+Use arithmetic coding.
+.B Caution:
+arithmetic coded JPEG is not yet widely implemented, so many decoders will be
+unable to view an arithmetic coded JPEG file at all.
+.TP
 .B \-dct int
 Use integer DCT method (default).
 .TP
 .B \-dct fast
 Use fast integer DCT (less accurate).
+In libjpeg-turbo, the fast method is generally about 5-15% faster than the int
+method when using the x86/x86-64 SIMD extensions (results may vary with other
+SIMD implementations, or when using libjpeg-turbo without SIMD extensions.)
+For quality levels of 90 and below, there should be little or no perceptible
+difference between the two algorithms.  For quality levels above 90, however,
+the difference between the fast and the int methods becomes more pronounced.
+With quality=97, for instance, the fast method incurs generally about a 1-3 dB
+loss (in PSNR) relative to the int method, but this can be larger for some
+images.  Do not use the fast method with quality levels above 97.  The
+algorithm often degenerates at quality=98 and above and can actually produce a
+more lossy image than if lower quality levels had been used.  Also, in
+libjpeg-turbo, the fast method is not fully accelerated for quality levels
+above 97, so it will be slower than the int method.
 .TP
 .B \-dct float
 Use floating-point DCT method.
-The float method is very slightly more accurate than the int method, but is
-much slower unless your machine has very fast floating-point hardware.  Also
-note that results of the floating-point method may vary slightly across
-machines, while the integer methods should give the same results everywhere.
-The fast integer method is much less accurate than the other two.
+The float method is mainly a legacy feature.  It does not produce significantly
+more accurate results than the int method, and it is much slower.  The float
+method may also give different results on different machines due to varying
+roundoff behavior, whereas the integer methods should give the same results on
+all machines.
 .TP
 .BI \-restart " N"
 Emit a JPEG restart marker every N MCU rows, or every N MCU blocks if "B" is
@@ -157,6 +208,11 @@
 .BI \-outfile " name"
 Send output image to the named file, not to standard output.
 .TP
+.BI \-memdst
+Compress to memory instead of a file.  This feature was implemented mainly as a
+way of testing the in-memory destination manager (jpeg_mem_dest()), but it is
+also useful for benchmarking, since it reduces the I/O overhead.
+.TP
 .B \-verbose
 Enable debug printout.  More
 .BR \-v 's
@@ -211,7 +267,7 @@
 .PP
 The "wizard" switches are intended for experimentation with JPEG.  If you
 don't know what you are doing, \fBdon't use them\fR.  These switches are
-documented further in the file wizard.doc.
+documented further in the file wizard.txt.
 .SH EXAMPLES
 .LP
 This example compresses the PPM file foo.ppm with a quality factor of
@@ -275,12 +331,15 @@
 Communications of the ACM, April 1991 (vol. 34, no. 4), pp. 30-44.
 .SH AUTHOR
 Independent JPEG Group
-.SH BUGS
-Arithmetic coding is not supported for legal reasons.
 .PP
-GIF input files are no longer supported, to avoid the Unisys LZW patent.
-Use a Unisys-licensed program if you need to read a GIF file.  (Conversion
-of GIF files to JPEG is usually a bad idea anyway.)
+This file was modified by The libjpeg-turbo Project to include only information
+relevant to libjpeg-turbo, to wordsmith certain sections, and to describe
+features not present in libjpeg.
+.SH BUGS
+Support for GIF input files was removed in cjpeg v6b due to concerns over
+the Unisys LZW patent.  Although this patent expired in 2006, cjpeg still
+lacks GIF support, for these historical reasons.  (Conversion of GIF files to
+JPEG is usually a bad idea anyway.)
 .PP
 Not all variants of BMP and Targa file formats are supported.
 .PP
@@ -288,5 +347,3 @@
 .B \-targa
 switch is not a bug, it's a feature.  (It would be a bug if the Targa format
 designers had not been clueless.)
-.PP
-Still not as fast as we'd like.
diff --git a/cjpeg.c b/cjpeg.c
index f2a929f..a7d2504 100644
--- a/cjpeg.c
+++ b/cjpeg.c
@@ -1,8 +1,11 @@
 /*
  * cjpeg.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2003-2011 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2013, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains a command-line user interface for the JPEG compressor.
@@ -10,8 +13,8 @@
  *
  * Two different command line styles are permitted, depending on the
  * compile-time switch TWO_FILE_COMMANDLINE:
- *	cjpeg [options]  inputfile outputfile
- *	cjpeg [options]  [inputfile]
+ *      cjpeg [options]  inputfile outputfile
+ *      cjpeg [options]  [inputfile]
  * In the second style, output is always to standard output, which you'd
  * normally redirect to a file or pipe to some other program.  Input is
  * either from a named file or from standard input (typically redirected).
@@ -19,27 +22,28 @@
  * don't support pipes.  Also, you MUST use the first style if your system
  * doesn't do binary I/O to stdin/stdout.
  * To simplify script writing, the "-outfile" switch is provided.  The syntax
- *	cjpeg [options]  -outfile outputfile  inputfile
+ *      cjpeg [options]  -outfile outputfile  inputfile
  * works regardless of which command line style is used.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
-#include "jversion.h"		/* for version message */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include "jversion.h"           /* for version message */
+#include "jconfigint.h"
 
-#ifdef USE_CCOMMAND		/* command-line reader for Macintosh */
+#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
 #ifdef __MWERKS__
 #include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>		/* ... and this */
+#include <console.h>            /* ... and this */
 #endif
 #ifdef THINK_C
-#include <console.h>		/* Think declares it here */
+#include <console.h>            /* Think declares it here */
 #endif
 #endif
 
 
 /* Create the add-on message string table. */
 
-#define JMESSAGE(code,string)	string ,
+#define JMESSAGE(code,string)   string ,
 
 static const char * const cdjpeg_message_table[] = {
 #include "cderror.h"
@@ -73,7 +77,7 @@
  * seldom-used ID field), so we provide a switch to force Targa input mode.
  */
 
-static boolean is_targa;	/* records user -targa switch */
+static boolean is_targa;        /* records user -targa switch */
 
 
 LOCAL(cjpeg_source_ptr)
@@ -120,7 +124,7 @@
     break;
   }
 
-  return NULL;			/* suppress compiler warnings */
+  return NULL;                  /* suppress compiler warnings */
 }
 
 
@@ -133,8 +137,9 @@
  */
 
 
-static const char * progname;	/* program name for error messages */
-static char * outfilename;	/* for -outfile switch */
+static const char * progname;   /* program name for error messages */
+static char * outfilename;      /* for -outfile switch */
+boolean memdst;  /* for -memdst switch */
 
 
 LOCAL(void)
@@ -149,8 +154,9 @@
 #endif
 
   fprintf(stderr, "Switches (names may be abbreviated):\n");
-  fprintf(stderr, "  -quality N     Compression quality (0..100; 5-95 is useful range)\n");
+  fprintf(stderr, "  -quality N[,...]   Compression quality (0..100; 5-95 is useful range)\n");
   fprintf(stderr, "  -grayscale     Create monochrome JPEG file\n");
+  fprintf(stderr, "  -rgb           Create RGB JPEG file\n");
 #ifdef ENTROPY_OPT_SUPPORTED
   fprintf(stderr, "  -optimize      Optimize Huffman table (smaller file, but slow compression)\n");
 #endif
@@ -161,17 +167,20 @@
   fprintf(stderr, "  -targa         Input file is Targa format (usually not needed)\n");
 #endif
   fprintf(stderr, "Switches for advanced users:\n");
+#ifdef C_ARITH_CODING_SUPPORTED
+  fprintf(stderr, "  -arithmetic    Use arithmetic coding\n");
+#endif
 #ifdef DCT_ISLOW_SUPPORTED
   fprintf(stderr, "  -dct int       Use integer DCT method%s\n",
-	  (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
+          (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
 #endif
 #ifdef DCT_IFAST_SUPPORTED
   fprintf(stderr, "  -dct fast      Use fast integer DCT (less accurate)%s\n",
-	  (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
+          (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
   fprintf(stderr, "  -dct float     Use floating-point DCT method%s\n",
-	  (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
+          (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
 #endif
   fprintf(stderr, "  -restart N     Set restart interval in rows, or in blocks with B\n");
 #ifdef INPUT_SMOOTHING_SUPPORTED
@@ -179,11 +188,11 @@
 #endif
   fprintf(stderr, "  -maxmemory N   Maximum memory to use (in kbytes)\n");
   fprintf(stderr, "  -outfile name  Specify name for output file\n");
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+  fprintf(stderr, "  -memdst        Compress to memory instead of file (useful for benchmarking)\n");
+#endif
   fprintf(stderr, "  -verbose  or  -debug   Emit debug output\n");
   fprintf(stderr, "Switches for wizards:\n");
-#ifdef C_ARITH_CODING_SUPPORTED
-  fprintf(stderr, "  -arithmetic    Use arithmetic coding\n");
-#endif
   fprintf(stderr, "  -baseline      Force baseline quantization tables\n");
   fprintf(stderr, "  -qtables file  Use quantization tables given in file\n");
   fprintf(stderr, "  -qslots N[,...]    Set component quantization tables\n");
@@ -197,7 +206,7 @@
 
 LOCAL(int)
 parse_switches (j_compress_ptr cinfo, int argc, char **argv,
-		int last_file_arg_seen, boolean for_real)
+                int last_file_arg_seen, boolean for_real)
 /* Parse optional switches.
  * Returns argv[] index of first file-name argument (== argc if none).
  * Any file names with indexes <= last_file_arg_seen are ignored;
@@ -209,25 +218,21 @@
 {
   int argn;
   char * arg;
-  int quality;			/* -quality parameter */
-  int q_scale_factor;		/* scaling percentage for -qtables */
   boolean force_baseline;
   boolean simple_progressive;
-  char * qtablefile = NULL;	/* saves -qtables filename if any */
-  char * qslotsarg = NULL;	/* saves -qslots parm if any */
-  char * samplearg = NULL;	/* saves -sample parm if any */
-  char * scansarg = NULL;	/* saves -scans parm if any */
+  char * qualityarg = NULL;     /* saves -quality parm if any */
+  char * qtablefile = NULL;     /* saves -qtables filename if any */
+  char * qslotsarg = NULL;      /* saves -qslots parm if any */
+  char * samplearg = NULL;      /* saves -sample parm if any */
+  char * scansarg = NULL;       /* saves -scans parm if any */
 
   /* Set up default JPEG parameters. */
-  /* Note that default -quality level need not, and does not,
-   * match the default scaling for an explicit -qtables argument.
-   */
-  quality = 75;			/* default -quality value */
-  q_scale_factor = 100;		/* default to no scaling for -qtables */
-  force_baseline = FALSE;	/* by default, allow 16-bit quantizers */
+
+  force_baseline = FALSE;       /* by default, allow 16-bit quantizers */
   simple_progressive = FALSE;
   is_targa = FALSE;
   outfilename = NULL;
+  memdst = FALSE;
   cinfo->err->trace_level = 0;
 
   /* Scan command line options, adjust parameters */
@@ -237,12 +242,12 @@
     if (*arg != '-') {
       /* Not a switch, must be a file name argument */
       if (argn <= last_file_arg_seen) {
-	outfilename = NULL;	/* -outfile applies to just one input file */
-	continue;		/* ignore this name if previously processed */
+        outfilename = NULL;     /* -outfile applies to just one input file */
+        continue;               /* ignore this name if previously processed */
       }
-      break;			/* else done parsing switches */
+      break;                    /* else done parsing switches */
     }
-    arg++;			/* advance past switch marker character */
+    arg++;                      /* advance past switch marker character */
 
     if (keymatch(arg, "arithmetic", 1)) {
       /* Use arithmetic coding. */
@@ -250,7 +255,7 @@
       cinfo->arith_code = TRUE;
 #else
       fprintf(stderr, "%s: sorry, arithmetic coding not supported\n",
-	      progname);
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
@@ -260,16 +265,16 @@
 
     } else if (keymatch(arg, "dct", 2)) {
       /* Select DCT algorithm. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (keymatch(argv[argn], "int", 1)) {
-	cinfo->dct_method = JDCT_ISLOW;
+        cinfo->dct_method = JDCT_ISLOW;
       } else if (keymatch(argv[argn], "fast", 2)) {
-	cinfo->dct_method = JDCT_IFAST;
+        cinfo->dct_method = JDCT_IFAST;
       } else if (keymatch(argv[argn], "float", 2)) {
-	cinfo->dct_method = JDCT_FLOAT;
+        cinfo->dct_method = JDCT_FLOAT;
       } else
-	usage();
+        usage();
 
     } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
       /* Enable debug printouts. */
@@ -277,9 +282,12 @@
       static boolean printed_version = FALSE;
 
       if (! printed_version) {
-	fprintf(stderr, "Independent JPEG Group's CJPEG, version %s\n%s\n",
-		JVERSION, JCOPYRIGHT);
-	printed_version = TRUE;
+        fprintf(stderr, "%s version %s (build %s)\n",
+                PACKAGE_NAME, VERSION, BUILD);
+        fprintf(stderr, "%s\n\n", JCOPYRIGHT);
+        fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n",
+                JVERSION);
+        printed_version = TRUE;
       }
       cinfo->err->trace_level++;
 
@@ -287,17 +295,21 @@
       /* Force a monochrome JPEG file to be generated. */
       jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
 
+    } else if (keymatch(arg, "rgb", 3)) {
+      /* Force an RGB JPEG file to be generated. */
+      jpeg_set_colorspace(cinfo, JCS_RGB);
+
     } else if (keymatch(arg, "maxmemory", 3)) {
       /* Maximum memory in Kb (or Mb with 'm'). */
       long lval;
       char ch = 'x';
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
-	usage();
+        usage();
       if (ch == 'm' || ch == 'M')
-	lval *= 1000L;
+        lval *= 1000L;
       cinfo->mem->max_memory_to_use = lval * 1000L;
 
     } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) {
@@ -305,16 +317,16 @@
 #ifdef ENTROPY_OPT_SUPPORTED
       cinfo->optimize_coding = TRUE;
 #else
-      fprintf(stderr, "%s: sorry, entropy optimization was not compiled\n",
-	      progname);
+      fprintf(stderr, "%s: sorry, entropy optimization was not compiled in\n",
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
     } else if (keymatch(arg, "outfile", 4)) {
       /* Set output file name. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
-      outfilename = argv[argn];	/* save it away for later use */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      outfilename = argv[argn]; /* save it away for later use */
 
     } else if (keymatch(arg, "progressive", 1)) {
       /* Select simple progressive mode. */
@@ -322,24 +334,31 @@
       simple_progressive = TRUE;
       /* We must postpone execution until num_components is known. */
 #else
-      fprintf(stderr, "%s: sorry, progressive output was not compiled\n",
-	      progname);
+      fprintf(stderr, "%s: sorry, progressive output was not compiled in\n",
+              progname);
+      exit(EXIT_FAILURE);
+#endif
+
+    } else if (keymatch(arg, "memdst", 2)) {
+      /* Use in-memory destination manager */
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+      memdst = TRUE;
+#else
+      fprintf(stderr, "%s: sorry, in-memory destination manager was not compiled in\n",
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
     } else if (keymatch(arg, "quality", 1)) {
-      /* Quality factor (quantization table scaling factor). */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
-      if (sscanf(argv[argn], "%d", &quality) != 1)
-	usage();
-      /* Change scale factor in case -qtables is present. */
-      q_scale_factor = jpeg_quality_scaling(quality);
+      /* Quality ratings (quantization table scaling factors). */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      qualityarg = argv[argn];
 
     } else if (keymatch(arg, "qslots", 2)) {
       /* Quantization table slot numbers. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       qslotsarg = argv[argn];
       /* Must delay setting qslots until after we have processed any
        * colorspace-determining switches, since jpeg_set_colorspace sets
@@ -348,8 +367,8 @@
 
     } else if (keymatch(arg, "qtables", 2)) {
       /* Quantization tables fetched from file. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       qtablefile = argv[argn];
       /* We postpone actually reading the file in case -quality comes later. */
 
@@ -358,40 +377,40 @@
       long lval;
       char ch = 'x';
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
-	usage();
+        usage();
       if (lval < 0 || lval > 65535L)
-	usage();
+        usage();
       if (ch == 'b' || ch == 'B') {
-	cinfo->restart_interval = (unsigned int) lval;
-	cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */
+        cinfo->restart_interval = (unsigned int) lval;
+        cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */
       } else {
-	cinfo->restart_in_rows = (int) lval;
-	/* restart_interval will be computed during startup */
+        cinfo->restart_in_rows = (int) lval;
+        /* restart_interval will be computed during startup */
       }
 
     } else if (keymatch(arg, "sample", 2)) {
       /* Set sampling factors. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       samplearg = argv[argn];
       /* Must delay setting sample factors until after we have processed any
        * colorspace-determining switches, since jpeg_set_colorspace sets
        * default sampling factors.
        */
 
-    } else if (keymatch(arg, "scans", 2)) {
+    } else if (keymatch(arg, "scans", 4)) {
       /* Set scan script. */
 #ifdef C_MULTISCAN_FILES_SUPPORTED
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       scansarg = argv[argn];
       /* We must postpone reading the file in case -progressive appears. */
 #else
-      fprintf(stderr, "%s: sorry, multi-scan output was not compiled\n",
-	      progname);
+      fprintf(stderr, "%s: sorry, multi-scan output was not compiled in\n",
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
@@ -399,12 +418,12 @@
       /* Set input smoothing factor. */
       int val;
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%d", &val) != 1)
-	usage();
+        usage();
       if (val < 0 || val > 100)
-	usage();
+        usage();
       cinfo->smoothing_factor = val;
 
     } else if (keymatch(arg, "targa", 1)) {
@@ -412,7 +431,7 @@
       is_targa = TRUE;
 
     } else {
-      usage();			/* bogus switch */
+      usage();                  /* bogus switch */
     }
   }
 
@@ -422,34 +441,35 @@
 
     /* Set quantization tables for selected quality. */
     /* Some or all may be overridden if -qtables is present. */
-    jpeg_set_quality(cinfo, quality, force_baseline);
+    if (qualityarg != NULL)     /* process -quality if it was present */
+      if (! set_quality_ratings(cinfo, qualityarg, force_baseline))
+        usage();
 
-    if (qtablefile != NULL)	/* process -qtables if it was present */
-      if (! read_quant_tables(cinfo, qtablefile,
-			      q_scale_factor, force_baseline))
-	usage();
+    if (qtablefile != NULL)     /* process -qtables if it was present */
+      if (! read_quant_tables(cinfo, qtablefile, force_baseline))
+        usage();
 
-    if (qslotsarg != NULL)	/* process -qslots if it was present */
+    if (qslotsarg != NULL)      /* process -qslots if it was present */
       if (! set_quant_slots(cinfo, qslotsarg))
-	usage();
+        usage();
 
-    if (samplearg != NULL)	/* process -sample if it was present */
+    if (samplearg != NULL)      /* process -sample if it was present */
       if (! set_sample_factors(cinfo, samplearg))
-	usage();
+        usage();
 
 #ifdef C_PROGRESSIVE_SUPPORTED
-    if (simple_progressive)	/* process -progressive; -scans can override */
+    if (simple_progressive)     /* process -progressive; -scans can override */
       jpeg_simple_progression(cinfo);
 #endif
 
 #ifdef C_MULTISCAN_FILES_SUPPORTED
-    if (scansarg != NULL)	/* process -scans if it was present */
+    if (scansarg != NULL)       /* process -scans if it was present */
       if (! read_scan_script(cinfo, scansarg))
-	usage();
+        usage();
 #endif
   }
 
-  return argn;			/* return index of next arg (file name) */
+  return argn;                  /* return index of next arg (file name) */
 }
 
 
@@ -468,7 +488,9 @@
   int file_index;
   cjpeg_source_ptr src_mgr;
   FILE * input_file;
-  FILE * output_file;
+  FILE * output_file = NULL;
+  unsigned char *outbuffer = NULL;
+  unsigned long outsize = 0;
   JDIMENSION num_scanlines;
 
   /* On Mac, fetch a command line. */
@@ -478,7 +500,7 @@
 
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
-    progname = "cjpeg";		/* in case C library doesn't provide it */
+    progname = "cjpeg";         /* in case C library doesn't provide it */
 
   /* Initialize the JPEG compression object with default error handling. */
   cinfo.err = jpeg_std_error(&jerr);
@@ -488,11 +510,6 @@
   jerr.first_addon_message = JMSG_FIRSTADDONCODE;
   jerr.last_addon_message = JMSG_LASTADDONCODE;
 
-  /* Now safe to enable signal catcher. */
-#ifdef NEED_SIGNAL_CATCHER
-  enable_signal_catcher((j_common_ptr) &cinfo);
-#endif
-
   /* Initialize JPEG parameters.
    * Much of this may be overridden later.
    * In particular, we don't yet know the input file's color space,
@@ -511,19 +528,21 @@
   file_index = parse_switches(&cinfo, argc, argv, 0, FALSE);
 
 #ifdef TWO_FILE_COMMANDLINE
-  /* Must have either -outfile switch or explicit output file name */
-  if (outfilename == NULL) {
-    if (file_index != argc-2) {
-      fprintf(stderr, "%s: must name one input and one output file\n",
-	      progname);
-      usage();
-    }
-    outfilename = argv[file_index+1];
-  } else {
-    if (file_index != argc-1) {
-      fprintf(stderr, "%s: must name one input and one output file\n",
-	      progname);
-      usage();
+  if (!memdst) {
+    /* Must have either -outfile switch or explicit output file name */
+    if (outfilename == NULL) {
+      if (file_index != argc-2) {
+        fprintf(stderr, "%s: must name one input and one output file\n",
+                progname);
+        usage();
+      }
+      outfilename = argv[file_index+1];
+    } else {
+      if (file_index != argc-1) {
+        fprintf(stderr, "%s: must name one input and one output file\n",
+                progname);
+        usage();
+      }
     }
   }
 #else
@@ -551,7 +570,7 @@
       fprintf(stderr, "%s: can't open %s\n", progname, outfilename);
       exit(EXIT_FAILURE);
     }
-  } else {
+  } else if (!memdst) {
     /* default output file is stdout */
     output_file = write_stdout();
   }
@@ -574,7 +593,12 @@
   file_index = parse_switches(&cinfo, argc, argv, 0, TRUE);
 
   /* Specify data destination for compression */
-  jpeg_stdio_dest(&cinfo, output_file);
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+  if (memdst)
+    jpeg_mem_dest(&cinfo, &outbuffer, &outsize);
+  else
+#endif
+    jpeg_stdio_dest(&cinfo, output_file);
 
   /* Start compressor */
   jpeg_start_compress(&cinfo, TRUE);
@@ -593,14 +617,20 @@
   /* Close files, if we opened them */
   if (input_file != stdin)
     fclose(input_file);
-  if (output_file != stdout)
+  if (output_file != stdout && output_file != NULL)
     fclose(output_file);
 
 #ifdef PROGRESS_REPORT
   end_progress_monitor((j_common_ptr) &cinfo);
 #endif
 
+  if (memdst) {
+    fprintf(stderr, "Compressed size:  %lu bytes\n", outsize);
+    if (outbuffer != NULL)
+      free(outbuffer);
+  }
+
   /* All done. */
   exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS);
-  return 0;			/* suppress no-return-value warnings */
+  return 0;                     /* suppress no-return-value warnings */
 }
diff --git a/ckconfig.c b/ckconfig.c
deleted file mode 100644
index 34baf79..0000000
--- a/ckconfig.c
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- * ckconfig.c
- *
- * Copyright (C) 1991-1994, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- */
-
-/*
- * This program is intended to help you determine how to configure the JPEG
- * software for installation on a particular system.  The idea is to try to
- * compile and execute this program.  If your compiler fails to compile the
- * program, make changes as indicated in the comments below.  Once you can
- * compile the program, run it, and it will produce a "jconfig.h" file for
- * your system.
- *
- * As a general rule, each time you try to compile this program,
- * pay attention only to the *first* error message you get from the compiler.
- * Many C compilers will issue lots of spurious error messages once they
- * have gotten confused.  Go to the line indicated in the first error message,
- * and read the comments preceding that line to see what to change.
- *
- * Almost all of the edits you may need to make to this program consist of
- * changing a line that reads "#define SOME_SYMBOL" to "#undef SOME_SYMBOL",
- * or vice versa.  This is called defining or undefining that symbol.
- */
-
-
-/* First we must see if your system has the include files we need.
- * We start out with the assumption that your system has all the ANSI-standard
- * include files.  If you get any error trying to include one of these files,
- * undefine the corresponding HAVE_xxx symbol.
- */
-
-#define HAVE_STDDEF_H		/* replace 'define' by 'undef' if error here */
-#ifdef HAVE_STDDEF_H		/* next line will be skipped if you undef... */
-#include <stddef.h>
-#endif
-
-#define HAVE_STDLIB_H		/* same thing for stdlib.h */
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-
-#include <stdio.h>		/* If you ain't got this, you ain't got C. */
-
-/* We have to see if your string functions are defined by
- * strings.h (old BSD convention) or string.h (everybody else).
- * We try the non-BSD convention first; define NEED_BSD_STRINGS
- * if the compiler says it can't find string.h.
- */
-
-#undef NEED_BSD_STRINGS
-
-#ifdef NEED_BSD_STRINGS
-#include <strings.h>
-#else
-#include <string.h>
-#endif
-
-/* On some systems (especially older Unix machines), type size_t is
- * defined only in the include file <sys/types.h>.  If you get a failure
- * on the size_t test below, try defining NEED_SYS_TYPES_H.
- */
-
-#undef NEED_SYS_TYPES_H		/* start by assuming we don't need it */
-#ifdef NEED_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
-
-/* Usually type size_t is defined in one of the include files we've included
- * above.  If not, you'll get an error on the "typedef size_t my_size_t;" line.
- * In that case, first try defining NEED_SYS_TYPES_H just above.
- * If that doesn't work, you'll have to search through your system library
- * to figure out which include file defines "size_t".  Look for a line that
- * says "typedef something-or-other size_t;".  Then, change the line below
- * that says "#include <someincludefile.h>" to instead include the file
- * you found size_t in, and define NEED_SPECIAL_INCLUDE.  If you can't find
- * type size_t anywhere, try replacing "#include <someincludefile.h>" with
- * "typedef unsigned int size_t;".
- */
-
-#undef NEED_SPECIAL_INCLUDE	/* assume we DON'T need it, for starters */
-
-#ifdef NEED_SPECIAL_INCLUDE
-#include <someincludefile.h>
-#endif
-
-typedef size_t my_size_t;	/* The payoff: do we have size_t now? */
-
-
-/* The next question is whether your compiler supports ANSI-style function
- * prototypes.  You need to know this in order to choose between using
- * makefile.ansi and using makefile.unix.
- * The #define line below is set to assume you have ANSI function prototypes.
- * If you get an error in this group of lines, undefine HAVE_PROTOTYPES.
- */
-
-#define HAVE_PROTOTYPES
-
-#ifdef HAVE_PROTOTYPES
-int testfunction (int arg1, int * arg2); /* check prototypes */
-
-struct methods_struct {		/* check method-pointer declarations */
-  int (*error_exit) (char *msgtext);
-  int (*trace_message) (char *msgtext);
-  int (*another_method) (void);
-};
-
-int testfunction (int arg1, int * arg2) /* check definitions */
-{
-  return arg2[arg1];
-}
-
-int test2function (void)	/* check void arg list */
-{
-  return 0;
-}
-#endif
-
-
-/* Now we want to find out if your compiler knows what "unsigned char" means.
- * If you get an error on the "unsigned char un_char;" line,
- * then undefine HAVE_UNSIGNED_CHAR.
- */
-
-#define HAVE_UNSIGNED_CHAR
-
-#ifdef HAVE_UNSIGNED_CHAR
-unsigned char un_char;
-#endif
-
-
-/* Now we want to find out if your compiler knows what "unsigned short" means.
- * If you get an error on the "unsigned short un_short;" line,
- * then undefine HAVE_UNSIGNED_SHORT.
- */
-
-#define HAVE_UNSIGNED_SHORT
-
-#ifdef HAVE_UNSIGNED_SHORT
-unsigned short un_short;
-#endif
-
-
-/* Now we want to find out if your compiler understands type "void".
- * If you get an error anywhere in here, undefine HAVE_VOID.
- */
-
-#define HAVE_VOID
-
-#ifdef HAVE_VOID
-/* Caution: a C++ compiler will insist on complete prototypes */
-typedef void * void_ptr;	/* check void * */
-#ifdef HAVE_PROTOTYPES		/* check ptr to function returning void */
-typedef void (*void_func) (int a, int b);
-#else
-typedef void (*void_func) ();
-#endif
-
-#ifdef HAVE_PROTOTYPES		/* check void function result */
-void test3function (void_ptr arg1, void_func arg2)
-#else
-void test3function (arg1, arg2)
-     void_ptr arg1;
-     void_func arg2;
-#endif
-{
-  char * locptr = (char *) arg1; /* check casting to and from void * */
-  arg1 = (void *) locptr;
-  (*arg2) (1, 2);		/* check call of fcn returning void */
-}
-#endif
-
-
-/* Now we want to find out if your compiler knows what "const" means.
- * If you get an error here, undefine HAVE_CONST.
- */
-
-#define HAVE_CONST
-
-#ifdef HAVE_CONST
-static const int carray[3] = {1, 2, 3};
-
-#ifdef HAVE_PROTOTYPES
-int test4function (const int arg1)
-#else
-int test4function (arg1)
-     const int arg1;
-#endif
-{
-  return carray[arg1];
-}
-#endif
-
-
-/* If you get an error or warning about this structure definition,
- * define INCOMPLETE_TYPES_BROKEN.
- */
-
-#undef INCOMPLETE_TYPES_BROKEN
-
-#ifndef INCOMPLETE_TYPES_BROKEN
-typedef struct undefined_structure * undef_struct_ptr;
-#endif
-
-
-/* If you get an error about duplicate names,
- * define NEED_SHORT_EXTERNAL_NAMES.
- */
-
-#undef NEED_SHORT_EXTERNAL_NAMES
-
-#ifndef NEED_SHORT_EXTERNAL_NAMES
-
-int possibly_duplicate_function ()
-{
-  return 0;
-}
-
-int possibly_dupli_function ()
-{
-  return 1;
-}
-
-#endif
-
-
-
-/************************************************************************
- *  OK, that's it.  You should not have to change anything beyond this
- *  point in order to compile and execute this program.  (You might get
- *  some warnings, but you can ignore them.)
- *  When you run the program, it will make a couple more tests that it
- *  can do automatically, and then it will create jconfig.h and print out
- *  any additional suggestions it has.
- ************************************************************************
- */
-
-
-#ifdef HAVE_PROTOTYPES
-int is_char_signed (int arg)
-#else
-int is_char_signed (arg)
-     int arg;
-#endif
-{
-  if (arg == 189) {		/* expected result for unsigned char */
-    return 0;			/* type char is unsigned */
-  }
-  else if (arg != -67) {	/* expected result for signed char */
-    printf("Hmm, it seems 'char' is not eight bits wide on your machine.\n");
-    printf("I fear the JPEG software will not work at all.\n\n");
-  }
-  return 1;			/* assume char is signed otherwise */
-}
-
-
-#ifdef HAVE_PROTOTYPES
-int is_shifting_signed (long arg)
-#else
-int is_shifting_signed (arg)
-     long arg;
-#endif
-/* See whether right-shift on a long is signed or not. */
-{
-  long res = arg >> 4;
-
-  if (res == -0x7F7E80CL) {	/* expected result for signed shift */
-    return 1;			/* right shift is signed */
-  }
-  /* see if unsigned-shift hack will fix it. */
-  /* we can't just test exact value since it depends on width of long... */
-  res |= (~0L) << (32-4);
-  if (res == -0x7F7E80CL) {	/* expected result now? */
-    return 0;			/* right shift is unsigned */
-  }
-  printf("Right shift isn't acting as I expect it to.\n");
-  printf("I fear the JPEG software will not work at all.\n\n");
-  return 0;			/* try it with unsigned anyway */
-}
-
-
-#ifdef HAVE_PROTOTYPES
-int main (int argc, char ** argv)
-#else
-int main (argc, argv)
-     int argc;
-     char ** argv;
-#endif
-{
-  char signed_char_check = (char) (-67);
-  FILE *outfile;
-
-  /* Attempt to write jconfig.h */
-  if ((outfile = fopen("jconfig.h", "w")) == NULL) {
-    printf("Failed to write jconfig.h\n");
-    return 1;
-  }
-
-  /* Write out all the info */
-  fprintf(outfile, "/* jconfig.h --- generated by ckconfig.c */\n");
-  fprintf(outfile, "/* see jconfig.doc for explanations */\n\n");
-#ifdef HAVE_PROTOTYPES
-  fprintf(outfile, "#define HAVE_PROTOTYPES\n");
-#else
-  fprintf(outfile, "#undef HAVE_PROTOTYPES\n");
-#endif
-#ifdef HAVE_UNSIGNED_CHAR
-  fprintf(outfile, "#define HAVE_UNSIGNED_CHAR\n");
-#else
-  fprintf(outfile, "#undef HAVE_UNSIGNED_CHAR\n");
-#endif
-#ifdef HAVE_UNSIGNED_SHORT
-  fprintf(outfile, "#define HAVE_UNSIGNED_SHORT\n");
-#else
-  fprintf(outfile, "#undef HAVE_UNSIGNED_SHORT\n");
-#endif
-#ifdef HAVE_VOID
-  fprintf(outfile, "/* #define void char */\n");
-#else
-  fprintf(outfile, "#define void char\n");
-#endif
-#ifdef HAVE_CONST
-  fprintf(outfile, "/* #define const */\n");
-#else
-  fprintf(outfile, "#define const\n");
-#endif
-  if (is_char_signed((int) signed_char_check))
-    fprintf(outfile, "#undef CHAR_IS_UNSIGNED\n");
-  else
-    fprintf(outfile, "#define CHAR_IS_UNSIGNED\n");
-#ifdef HAVE_STDDEF_H
-  fprintf(outfile, "#define HAVE_STDDEF_H\n");
-#else
-  fprintf(outfile, "#undef HAVE_STDDEF_H\n");
-#endif
-#ifdef HAVE_STDLIB_H
-  fprintf(outfile, "#define HAVE_STDLIB_H\n");
-#else
-  fprintf(outfile, "#undef HAVE_STDLIB_H\n");
-#endif
-#ifdef NEED_BSD_STRINGS
-  fprintf(outfile, "#define NEED_BSD_STRINGS\n");
-#else
-  fprintf(outfile, "#undef NEED_BSD_STRINGS\n");
-#endif
-#ifdef NEED_SYS_TYPES_H
-  fprintf(outfile, "#define NEED_SYS_TYPES_H\n");
-#else
-  fprintf(outfile, "#undef NEED_SYS_TYPES_H\n");
-#endif
-  fprintf(outfile, "#undef NEED_FAR_POINTERS\n");
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-  fprintf(outfile, "#define NEED_SHORT_EXTERNAL_NAMES\n");
-#else
-  fprintf(outfile, "#undef NEED_SHORT_EXTERNAL_NAMES\n");
-#endif
-#ifdef INCOMPLETE_TYPES_BROKEN
-  fprintf(outfile, "#define INCOMPLETE_TYPES_BROKEN\n");
-#else
-  fprintf(outfile, "#undef INCOMPLETE_TYPES_BROKEN\n");
-#endif
-  fprintf(outfile, "\n#ifdef JPEG_INTERNALS\n\n");
-  if (is_shifting_signed(-0x7F7E80B1L))
-    fprintf(outfile, "#undef RIGHT_SHIFT_IS_UNSIGNED\n");
-  else
-    fprintf(outfile, "#define RIGHT_SHIFT_IS_UNSIGNED\n");
-  fprintf(outfile, "\n#endif /* JPEG_INTERNALS */\n");
-  fprintf(outfile, "\n#ifdef JPEG_CJPEG_DJPEG\n\n");
-  fprintf(outfile, "#define BMP_SUPPORTED		/* BMP image file format */\n");
-  fprintf(outfile, "#define GIF_SUPPORTED		/* GIF image file format */\n");
-  fprintf(outfile, "#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */\n");
-  fprintf(outfile, "#undef RLE_SUPPORTED		/* Utah RLE image file format */\n");
-  fprintf(outfile, "#define TARGA_SUPPORTED		/* Targa image file format */\n\n");
-  fprintf(outfile, "#undef TWO_FILE_COMMANDLINE	/* You may need this on non-Unix systems */\n");
-  fprintf(outfile, "#undef NEED_SIGNAL_CATCHER	/* Define this if you use jmemname.c */\n");
-  fprintf(outfile, "#undef DONT_USE_B_MODE\n");
-  fprintf(outfile, "/* #define PROGRESS_REPORT */	/* optional */\n");
-  fprintf(outfile, "\n#endif /* JPEG_CJPEG_DJPEG */\n");
-
-  /* Close the jconfig.h file */
-  fclose(outfile);
-
-  /* User report */
-  printf("Configuration check for Independent JPEG Group's software done.\n");
-  printf("\nI have written the jconfig.h file for you.\n\n");
-#ifdef HAVE_PROTOTYPES
-  printf("You should use makefile.ansi as the starting point for your Makefile.\n");
-#else
-  printf("You should use makefile.unix as the starting point for your Makefile.\n");
-#endif
-
-#ifdef NEED_SPECIAL_INCLUDE
-  printf("\nYou'll need to change jconfig.h to include the system include file\n");
-  printf("that you found type size_t in, or add a direct definition of type\n");
-  printf("size_t if that's what you used.  Just add it to the end.\n");
-#endif
-
-  return 0;
-}
diff --git a/cmakescripts/md5cmp.cmake b/cmakescripts/md5cmp.cmake
new file mode 100644
index 0000000..c315aa8
--- /dev/null
+++ b/cmakescripts/md5cmp.cmake
@@ -0,0 +1,15 @@
+if(NOT MD5)
+  message(FATAL_ERROR "MD5 not specified")
+endif()
+
+if(NOT FILE)
+  message(FATAL_ERROR "FILE not specified")
+endif()
+
+file(MD5 ${FILE} MD5FILE)
+
+if(NOT MD5 STREQUAL MD5FILE)
+	message(FATAL_ERROR "MD5 of ${FILE} should be ${MD5}, not ${MD5FILE}.")
+else()
+	message(STATUS "${MD5}: OK")
+endif()
diff --git a/cmakescripts/testclean.cmake b/cmakescripts/testclean.cmake
new file mode 100644
index 0000000..e357787
--- /dev/null
+++ b/cmakescripts/testclean.cmake
@@ -0,0 +1,34 @@
+file(GLOB FILES
+  testout*
+  *_GRAY_*.bmp
+  *_GRAY_*.png
+  *_GRAY_*.ppm
+  *_GRAY_*.jpg
+  *_GRAY.yuv
+  *_420_*.bmp
+  *_420_*.png
+  *_420_*.ppm
+  *_420_*.jpg
+  *_420.yuv
+  *_422_*.bmp
+  *_422_*.png
+  *_422_*.ppm
+  *_422_*.jpg
+  *_422.yuv
+  *_444_*.bmp
+  *_444_*.png
+  *_444_*.ppm
+  *_444_*.jpg
+  *_444.yuv
+  *_440_*.bmp
+  *_440_*.png
+  *_440_*.ppm
+  *_440_*.jpg
+  *_440.yuv)
+
+if(NOT FILES STREQUAL "")
+  message(STATUS "Removing test files")
+  file(REMOVE ${FILES})
+else()
+  message(STATUS "No files to remove")
+endif()
diff --git a/coderules.doc b/coderules.doc
deleted file mode 100644
index 0ab5d9b..0000000
--- a/coderules.doc
+++ /dev/null
@@ -1,118 +0,0 @@
-IJG JPEG LIBRARY:  CODING RULES
-
-Copyright (C) 1991-1996, Thomas G. Lane.
-This file is part of the Independent JPEG Group's software.
-For conditions of distribution and use, see the accompanying README file.
-
-
-Since numerous people will be contributing code and bug fixes, it's important
-to establish a common coding style.  The goal of using similar coding styles
-is much more important than the details of just what that style is.
-
-In general we follow the recommendations of "Recommended C Style and Coding
-Standards" revision 6.1 (Cannon et al. as modified by Spencer, Keppel and
-Brader).  This document is available in the IJG FTP archive (see
-jpeg/doc/cstyle.ms.tbl.Z, or cstyle.txt.Z for those without nroff/tbl).
-
-Block comments should be laid out thusly:
-
-/*
- *  Block comments in this style.
- */
-
-We indent statements in K&R style, e.g.,
-	if (test) {
-	  then-part;
-	} else {
-	  else-part;
-	}
-with two spaces per indentation level.  (This indentation convention is
-handled automatically by GNU Emacs and many other text editors.)
-
-Multi-word names should be written in lower case with underscores, e.g.,
-multi_word_name (not multiWordName).  Preprocessor symbols and enum constants
-are similar but upper case (MULTI_WORD_NAME).  Names should be unique within
-the first fifteen characters.  (On some older systems, global names must be
-unique within six characters.  We accommodate this without cluttering the
-source code by using macros to substitute shorter names.)
-
-We use function prototypes everywhere; we rely on automatic source code
-transformation to feed prototype-less C compilers.  Transformation is done
-by the simple and portable tool 'ansi2knr.c' (courtesy of Ghostscript).
-ansi2knr is not very bright, so it imposes a format requirement on function
-declarations: the function name MUST BEGIN IN COLUMN 1.  Thus all functions
-should be written in the following style:
-
-LOCAL(int *)
-function_name (int a, char *b)
-{
-    code...
-}
-
-Note that each function definition must begin with GLOBAL(type), LOCAL(type),
-or METHODDEF(type).  These macros expand to "static type" or just "type" as
-appropriate.  They provide a readable indication of the routine's usage and
-can readily be changed for special needs.  (For instance, special linkage
-keywords can be inserted for use in Windows DLLs.)
-
-ansi2knr does not transform method declarations (function pointers in
-structs).  We handle these with a macro JMETHOD, defined as
-	#ifdef HAVE_PROTOTYPES
-	#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
-	#else
-	#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
-	#endif
-which is used like this:
-	struct function_pointers {
-	  JMETHOD(void, init_entropy_encoder, (int somearg, jparms *jp));
-	  JMETHOD(void, term_entropy_encoder, (void));
-	};
-Note the set of parentheses surrounding the parameter list.
-
-A similar solution is used for forward and external function declarations
-(see the EXTERN and JPP macros).
-
-If the code is to work on non-ANSI compilers, we cannot rely on a prototype
-declaration to coerce actual parameters into the right types.  Therefore, use
-explicit casts on actual parameters whenever the actual parameter type is not
-identical to the formal parameter.  Beware of implicit conversions to "int".
-
-It seems there are some non-ANSI compilers in which the sizeof() operator
-is defined to return int, yet size_t is defined as long.  Needless to say,
-this is brain-damaged.  Always use the SIZEOF() macro in place of sizeof(),
-so that the result is guaranteed to be of type size_t.
-
-
-The JPEG library is intended to be used within larger programs.  Furthermore,
-we want it to be reentrant so that it can be used by applications that process
-multiple images concurrently.  The following rules support these requirements:
-
-1. Avoid direct use of file I/O, "malloc", error report printouts, etc;
-pass these through the common routines provided.
-
-2. Minimize global namespace pollution.  Functions should be declared static
-wherever possible.  (Note that our method-based calling conventions help this
-a lot: in many modules only the initialization function will ever need to be
-called directly, so only that function need be externally visible.)  All
-global function names should begin with "jpeg_", and should have an
-abbreviated name (unique in the first six characters) substituted by macro
-when NEED_SHORT_EXTERNAL_NAMES is set.
-
-3. Don't use global variables; anything that must be used in another module
-should be in the common data structures.
-
-4. Don't use static variables except for read-only constant tables.  Variables
-that should be private to a module can be placed into private structures (see
-the system architecture document, structure.doc).
-
-5. Source file names should begin with "j" for files that are part of the
-library proper; source files that are not part of the library, such as cjpeg.c
-and djpeg.c, do not begin with "j".  Keep source file names to eight
-characters (plus ".c" or ".h", etc) to make life easy for MS-DOSers.  Keep
-compression and decompression code in separate source files --- some
-applications may want only one half of the library.
-
-Note: these rules (particularly #4) are not followed religiously in the
-modules that are used in cjpeg/djpeg but are not part of the JPEG library
-proper.  Those modules are not really intended to be used in other
-applications.
diff --git a/coderules.txt b/coderules.txt
new file mode 100644
index 0000000..8683e9a
--- /dev/null
+++ b/coderules.txt
@@ -0,0 +1,78 @@
+IJG JPEG LIBRARY:  CODING RULES
+
+This file was part of the Independent JPEG Group's software:
+Copyright (C) 1991-1996, Thomas G. Lane.
+It was modified by The libjpeg-turbo Project to include only information
+relevant to libjpeg-turbo.
+For conditions of distribution and use, see the accompanying README file.
+
+
+Since numerous people will be contributing code and bug fixes, it's important
+to establish a common coding style.  The goal of using similar coding styles
+is much more important than the details of just what that style is.
+
+In general we follow the recommendations of "Recommended C Style and Coding
+Standards" revision 6.1 (Cannon et al. as modified by Spencer, Keppel and
+Brader).  This document is available in the IJG FTP archive (see
+jpeg/doc/cstyle.ms.tbl.Z, or cstyle.txt.Z for those without nroff/tbl).
+
+Block comments should be laid out thusly:
+
+/*
+ *  Block comments in this style.
+ */
+
+We indent statements in K&R style, e.g.,
+        if (test) {
+          then-part;
+        } else {
+          else-part;
+        }
+with two spaces per indentation level.  (This indentation convention is
+handled automatically by GNU Emacs and many other text editors.)
+
+Multi-word names should be written in lower case with underscores, e.g.,
+multi_word_name (not multiWordName).  Preprocessor symbols and enum constants
+are similar but upper case (MULTI_WORD_NAME).  Names should be unique within
+the first fifteen characters.
+
+Note that each function definition must begin with GLOBAL(type), LOCAL(type),
+or METHODDEF(type).  These macros expand to "static type" or just "type" as
+appropriate.  They provide a readable indication of the routine's usage and
+can readily be changed for special needs.  (For instance, special linkage
+keywords can be inserted for use in Windows DLLs.)
+
+A similar solution is used for external function declarations (see the EXTERN
+macro.)
+
+
+The JPEG library is intended to be used within larger programs.  Furthermore,
+we want it to be reentrant so that it can be used by applications that process
+multiple images concurrently.  The following rules support these requirements:
+
+1. Avoid direct use of file I/O, "malloc", error report printouts, etc;
+pass these through the common routines provided.
+
+2. Minimize global namespace pollution.  Functions should be declared static
+wherever possible.  (Note that our method-based calling conventions help this
+a lot: in many modules only the initialization function will ever need to be
+called directly, so only that function need be externally visible.)  All
+global function names should begin with "jpeg_".
+
+3. Don't use global variables; anything that must be used in another module
+should be in the common data structures.
+
+4. Don't use static variables except for read-only constant tables.  Variables
+that should be private to a module can be placed into private structures (see
+the system architecture document, structure.txt).
+
+5. Source file names should begin with "j" for files that are part of the
+library proper; source files that are not part of the library, such as cjpeg.c
+and djpeg.c, do not begin with "j".  Keep compression and decompression code in
+separate source files --- some applications may want only one half of the
+library.
+
+Note: these rules (particularly #4) are not followed religiously in the
+modules that are used in cjpeg/djpeg but are not part of the JPEG library
+proper.  Those modules are not really intended to be used in other
+applications.
diff --git a/config.guess b/config.guess
deleted file mode 100755
index 413ed41..0000000
--- a/config.guess
+++ /dev/null
@@ -1,883 +0,0 @@
-#! /bin/sh
-# Attempt to guess a canonical system name.
-#   Copyright (C) 1992, 93, 94, 95, 96, 1997 Free Software Foundation, Inc.
-#
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# Written by Per Bothner <bothner@cygnus.com>.
-# The master version of this file is at the FSF in /home/gd/gnu/lib.
-#
-# This script attempts to guess a canonical system name similar to
-# config.sub.  If it succeeds, it prints the system name on stdout, and
-# exits with 0.  Otherwise, it exits with 1.
-#
-# The plan is that this can be called by configure scripts if you
-# don't specify an explicit system type (host/target name).
-#
-# Only a few systems have been added to this list; please add others
-# (but try to keep the structure clean).
-#
-
-# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
-# (ghazi@noc.rutgers.edu 8/24/94.)
-if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
-	PATH=$PATH:/.attbin ; export PATH
-fi
-
-UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
-UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
-UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
-UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
-
-trap 'rm -f dummy.c dummy.o dummy; exit 1' 1 2 15
-
-# Note: order is significant - the case branches are not exclusive.
-
-case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
-    alpha:OSF1:*:*)
-	if test $UNAME_RELEASE = "V4.0"; then
-		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
-	fi
-	# A Vn.n version is a released version.
-	# A Tn.n version is a released field test version.
-	# A Xn.n version is an unreleased experimental baselevel.
-	# 1.2 uses "1.2" for uname -r.
-	cat <<EOF >dummy.s
-	.globl main
-	.ent main
-main:
-	.frame \$30,0,\$26,0
-	.prologue 0
-	.long 0x47e03d80 # implver $0
-	lda \$2,259
-	.long 0x47e20c21 # amask $2,$1
-	srl \$1,8,\$2
-	sll \$2,2,\$2
-	sll \$0,3,\$0
-	addl \$1,\$0,\$0
-	addl \$2,\$0,\$0
-	ret \$31,(\$26),1
-	.end main
-EOF
-	${CC-cc} dummy.s -o dummy 2>/dev/null
-	if test "$?" = 0 ; then
-		./dummy
-		case "$?" in
-			7)
-				UNAME_MACHINE="alpha"
-				;;
-			15)
-				UNAME_MACHINE="alphaev5"
-				;;
-			14)
-				UNAME_MACHINE="alphaev56"
-				;;
-			10)
-				UNAME_MACHINE="alphapca56"
-				;;
-			16)
-				UNAME_MACHINE="alphaev6"
-				;;
-		esac
-	fi
-	rm -f dummy.s dummy
-	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr [[A-Z]] [[a-z]]`
-	exit 0 ;;
-    21064:Windows_NT:50:3)
-	echo alpha-dec-winnt3.5
-	exit 0 ;;
-    Amiga*:UNIX_System_V:4.0:*)
-	echo m68k-cbm-sysv4
-	exit 0;;
-    amiga:NetBSD:*:*)
-      echo m68k-cbm-netbsd${UNAME_RELEASE}
-      exit 0 ;;
-    amiga:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    arc64:OpenBSD:*:*)
-	echo mips64el-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    arc:OpenBSD:*:*)
-	echo mipsel-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    hkmips:OpenBSD:*:*)
-	echo mips-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    pmax:OpenBSD:*:*)
-	echo mipsel-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    sgi:OpenBSD:*:*)
-	echo mips-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    wgrisc:OpenBSD:*:*)
-	echo mipsel-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
-	echo arm-acorn-riscix${UNAME_RELEASE}
-	exit 0;;
-    arm32:NetBSD:*:*)
-	echo arm-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
-	exit 0 ;;
-    SR2?01:HI-UX/MPP:*:*)
-	echo hppa1.1-hitachi-hiuxmpp
-	exit 0;;
-    Pyramid*:OSx*:*:*|MIS*:OSx*:*:*)
-	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
-	if test "`(/bin/universe) 2>/dev/null`" = att ; then
-		echo pyramid-pyramid-sysv3
-	else
-		echo pyramid-pyramid-bsd
-	fi
-	exit 0 ;;
-    NILE:*:*:dcosx)
-	echo pyramid-pyramid-svr4
-	exit 0 ;;
-    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
-	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
-    i86pc:SunOS:5.*:*)
-	echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
-    sun4*:SunOS:6*:*)
-	# According to config.sub, this is the proper way to canonicalize
-	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
-	# it's likely to be more like Solaris than SunOS4.
-	echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
-    sun4*:SunOS:*:*)
-	case "`/usr/bin/arch -k`" in
-	    Series*|S4*)
-		UNAME_RELEASE=`uname -v`
-		;;
-	esac
-	# Japanese Language versions have a version number like `4.1.3-JL'.
-	echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
-	exit 0 ;;
-    sun3*:SunOS:*:*)
-	echo m68k-sun-sunos${UNAME_RELEASE}
-	exit 0 ;;
-    sun*:*:4.2BSD:*)
-	UNAME_RELEASE=`(head -1 /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
-	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
-	case "`/bin/arch`" in
-	    sun3)
-		echo m68k-sun-sunos${UNAME_RELEASE}
-		;;
-	    sun4)
-		echo sparc-sun-sunos${UNAME_RELEASE}
-		;;
-	esac
-	exit 0 ;;
-    aushp:SunOS:*:*)
-	echo sparc-auspex-sunos${UNAME_RELEASE}
-	exit 0 ;;
-    atari*:NetBSD:*:*)
-	echo m68k-atari-netbsd${UNAME_RELEASE}
-	exit 0 ;;
-    atari*:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    sun3*:NetBSD:*:*)
-	echo m68k-sun-netbsd${UNAME_RELEASE}
-	exit 0 ;;
-    sun3*:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    mac68k:NetBSD:*:*)
-	echo m68k-apple-netbsd${UNAME_RELEASE}
-	exit 0 ;;
-    mac68k:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    mvme68k:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    mvme88k:OpenBSD:*:*)
-	echo m88k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    powerpc:machten:*:*)
-	echo powerpc-apple-machten${UNAME_RELEASE}
-	exit 0 ;;
-    RISC*:Mach:*:*)
-	echo mips-dec-mach_bsd4.3
-	exit 0 ;;
-    RISC*:ULTRIX:*:*)
-	echo mips-dec-ultrix${UNAME_RELEASE}
-	exit 0 ;;
-    VAX*:ULTRIX*:*:*)
-	echo vax-dec-ultrix${UNAME_RELEASE}
-	exit 0 ;;
-    2020:CLIX:*:*)
-	echo clipper-intergraph-clix${UNAME_RELEASE}
-	exit 0 ;;
-    mips:*:*:UMIPS | mips:*:*:RISCos)
-	sed 's/^	//' << EOF >dummy.c
-	int main (argc, argv) int argc; char **argv; {
-	#if defined (host_mips) && defined (MIPSEB)
-	#if defined (SYSTYPE_SYSV)
-	  printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
-	#endif
-	#if defined (SYSTYPE_SVR4)
-	  printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
-	#endif
-	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
-	  printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
-	#endif
-	#endif
-	  exit (-1);
-	}
-EOF
-	${CC-cc} dummy.c -o dummy \
-	  && ./dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \
-	  && rm dummy.c dummy && exit 0
-	rm -f dummy.c dummy
-	echo mips-mips-riscos${UNAME_RELEASE}
-	exit 0 ;;
-    Night_Hawk:Power_UNIX:*:*)
-	echo powerpc-harris-powerunix
-	exit 0 ;;
-    m88k:CX/UX:7*:*)
-	echo m88k-harris-cxux7
-	exit 0 ;;
-    m88k:*:4*:R4*)
-	echo m88k-motorola-sysv4
-	exit 0 ;;
-    m88k:*:3*:R3*)
-	echo m88k-motorola-sysv3
-	exit 0 ;;
-    AViiON:dgux:*:*)
-        # DG/UX returns AViiON for all architectures
-        UNAME_PROCESSOR=`/usr/bin/uname -p`
-        if [ $UNAME_PROCESSOR = mc88100 -o $UNAME_PROCESSOR = mc88110 ] ; then
-	if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx \
-	     -o ${TARGET_BINARY_INTERFACE}x = x ] ; then
-		echo m88k-dg-dgux${UNAME_RELEASE}
-	else
-		echo m88k-dg-dguxbcs${UNAME_RELEASE}
-	fi
-        else echo i586-dg-dgux${UNAME_RELEASE}
-        fi
- 	exit 0 ;;
-    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
-	echo m88k-dolphin-sysv3
-	exit 0 ;;
-    M88*:*:R3*:*)
-	# Delta 88k system running SVR3
-	echo m88k-motorola-sysv3
-	exit 0 ;;
-    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
-	echo m88k-tektronix-sysv3
-	exit 0 ;;
-    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
-	echo m68k-tektronix-bsd
-	exit 0 ;;
-    *:IRIX*:*:*)
-	echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
-	exit 0 ;;
-    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
-	echo romp-ibm-aix      # uname -m gives an 8 hex-code CPU id
-	exit 0 ;;              # Note that: echo "'`uname -s`'" gives 'AIX '
-    i?86:AIX:*:*)
-	echo i386-ibm-aix
-	exit 0 ;;
-    *:AIX:2:3)
-	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
-		sed 's/^		//' << EOF >dummy.c
-		#include <sys/systemcfg.h>
-
-		main()
-			{
-			if (!__power_pc())
-				exit(1);
-			puts("powerpc-ibm-aix3.2.5");
-			exit(0);
-			}
-EOF
-		${CC-cc} dummy.c -o dummy && ./dummy && rm dummy.c dummy && exit 0
-		rm -f dummy.c dummy
-		echo rs6000-ibm-aix3.2.5
-	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
-		echo rs6000-ibm-aix3.2.4
-	else
-		echo rs6000-ibm-aix3.2
-	fi
-	exit 0 ;;
-    *:AIX:*:4)
-	if /usr/sbin/lsattr -EHl proc0 | grep POWER >/dev/null 2>&1; then
-		IBM_ARCH=rs6000
-	else
-		IBM_ARCH=powerpc
-	fi
-	if [ -x /usr/bin/oslevel ] ; then
-		IBM_REV=`/usr/bin/oslevel`
-	else
-		IBM_REV=4.${UNAME_RELEASE}
-	fi
-	echo ${IBM_ARCH}-ibm-aix${IBM_REV}
-	exit 0 ;;
-    *:AIX:*:*)
-	echo rs6000-ibm-aix
-	exit 0 ;;
-    ibmrt:4.4BSD:*|romp-ibm:BSD:*)
-	echo romp-ibm-bsd4.4
-	exit 0 ;;
-    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC NetBSD and
-	echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
-	exit 0 ;;                           # report: romp-ibm BSD 4.3
-    *:BOSX:*:*)
-	echo rs6000-bull-bosx
-	exit 0 ;;
-    DPX/2?00:B.O.S.:*:*)
-	echo m68k-bull-sysv3
-	exit 0 ;;
-    9000/[34]??:4.3bsd:1.*:*)
-	echo m68k-hp-bsd
-	exit 0 ;;
-    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
-	echo m68k-hp-bsd4.4
-	exit 0 ;;
-    9000/[3478]??:HP-UX:*:*)
-	case "${UNAME_MACHINE}" in
-	    9000/31? )            HP_ARCH=m68000 ;;
-	    9000/[34]?? )         HP_ARCH=m68k ;;
-	    9000/7?? | 9000/8?[1679] ) HP_ARCH=hppa1.1 ;;
-	    9000/8?? )            HP_ARCH=hppa1.0 ;;
-	esac
-	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
-	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
-	exit 0 ;;
-    3050*:HI-UX:*:*)
-	sed 's/^	//' << EOF >dummy.c
-	#include <unistd.h>
-	int
-	main ()
-	{
-	  long cpu = sysconf (_SC_CPU_VERSION);
-	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
-	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
-	     results, however.  */
-	  if (CPU_IS_PA_RISC (cpu))
-	    {
-	      switch (cpu)
-		{
-		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
-		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
-		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
-		  default: puts ("hppa-hitachi-hiuxwe2"); break;
-		}
-	    }
-	  else if (CPU_IS_HP_MC68K (cpu))
-	    puts ("m68k-hitachi-hiuxwe2");
-	  else puts ("unknown-hitachi-hiuxwe2");
-	  exit (0);
-	}
-EOF
-	${CC-cc} dummy.c -o dummy && ./dummy && rm dummy.c dummy && exit 0
-	rm -f dummy.c dummy
-	echo unknown-hitachi-hiuxwe2
-	exit 0 ;;
-    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
-	echo hppa1.1-hp-bsd
-	exit 0 ;;
-    9000/8??:4.3bsd:*:*)
-	echo hppa1.0-hp-bsd
-	exit 0 ;;
-    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
-	echo hppa1.1-hp-osf
-	exit 0 ;;
-    hp8??:OSF1:*:*)
-	echo hppa1.0-hp-osf
-	exit 0 ;;
-    i?86:OSF1:*:*)
-	if [ -x /usr/sbin/sysversion ] ; then
-	    echo ${UNAME_MACHINE}-unknown-osf1mk
-	else
-	    echo ${UNAME_MACHINE}-unknown-osf1
-	fi
-	exit 0 ;;
-    parisc*:Lites*:*:*)
-	echo hppa1.1-hp-lites
-	exit 0 ;;
-    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
-	echo c1-convex-bsd
-        exit 0 ;;
-    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
-	if getsysinfo -f scalar_acc
-	then echo c32-convex-bsd
-	else echo c2-convex-bsd
-	fi
-        exit 0 ;;
-    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
-	echo c34-convex-bsd
-        exit 0 ;;
-    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
-	echo c38-convex-bsd
-        exit 0 ;;
-    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
-	echo c4-convex-bsd
-        exit 0 ;;
-    CRAY*X-MP:*:*:*)
-	echo xmp-cray-unicos
-        exit 0 ;;
-    CRAY*Y-MP:*:*:*)
-	echo ymp-cray-unicos${UNAME_RELEASE}
-	exit 0 ;;
-    CRAY*[A-Z]90:*:*:*)
-	echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
-	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
-	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/
-	exit 0 ;;
-    CRAY*TS:*:*:*)
-	echo t90-cray-unicos${UNAME_RELEASE}
-	exit 0 ;;
-    CRAY-2:*:*:*)
-	echo cray2-cray-unicos
-        exit 0 ;;
-    F300:UNIX_System_V:*:*)
-        FUJITSU_SYS=`uname -p | tr [A-Z] [a-z] | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
-        echo "f300-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-        exit 0 ;;
-    F301:UNIX_System_V:*:*)
-       echo f301-fujitsu-uxpv`echo $UNAME_RELEASE | sed 's/ .*//'`
-       exit 0 ;;
-    hp3[0-9][05]:NetBSD:*:*)
-	echo m68k-hp-netbsd${UNAME_RELEASE}
-	exit 0 ;;
-    hp300:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    i?86:BSD/386:*:* | *:BSD/OS:*:*)
-	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
-	exit 0 ;;
-    *:FreeBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
-	exit 0 ;;
-    *:NetBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-netbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
-	exit 0 ;;
-    *:OpenBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-openbsd`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
-	exit 0 ;;
-    i*:CYGWIN*:*)
-	echo i386-pc-cygwin32
-	exit 0 ;;
-    i*:MINGW*:*)
-	echo i386-pc-mingw32
-	exit 0 ;;
-    p*:CYGWIN*:*)
-	echo powerpcle-unknown-cygwin32
-	exit 0 ;;
-    prep*:SunOS:5.*:*)
-	echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
-    *:GNU:*:*)
-	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
-	exit 0 ;;
-    *:Linux:*:*)
-	# The BFD linker knows what the default object file format is, so
-	# first see if it will tell us.
-	ld_help_string=`ld --help 2>&1`
-	ld_supported_emulations=`echo $ld_help_string \
-			 | sed -ne '/supported emulations:/!d
-				    s/[ 	][ 	]*/ /g
-				    s/.*supported emulations: *//
-				    s/ .*//
-				    p'`
-        case "$ld_supported_emulations" in
-	  i?86linux)  echo "${UNAME_MACHINE}-pc-linux-gnuaout"      ; exit 0 ;;
-	  i?86coff)   echo "${UNAME_MACHINE}-pc-linux-gnucoff"      ; exit 0 ;;
-	  sparclinux) echo "${UNAME_MACHINE}-unknown-linux-gnuaout" ; exit 0 ;;
-	  m68klinux)  echo "${UNAME_MACHINE}-unknown-linux-gnuaout" ; exit 0 ;;
-	  elf32ppc)   echo "powerpc-unknown-linux-gnu"              ; exit 0 ;;
-	esac
-
-	if test "${UNAME_MACHINE}" = "alpha" ; then
-		sed 's/^	//'  <<EOF >dummy.s
-		.globl main
-		.ent main
-	main:
-		.frame \$30,0,\$26,0
-		.prologue 0
-		.long 0x47e03d80 # implver $0
-		lda \$2,259
-		.long 0x47e20c21 # amask $2,$1
-		srl \$1,8,\$2
-		sll \$2,2,\$2
-		sll \$0,3,\$0
-		addl \$1,\$0,\$0
-		addl \$2,\$0,\$0
-		ret \$31,(\$26),1
-		.end main
-EOF
-		LIBC=""
-		${CC-cc} dummy.s -o dummy 2>/dev/null
-		if test "$?" = 0 ; then
-			./dummy
-			case "$?" in
-			7)
-				UNAME_MACHINE="alpha"
-				;;
-			15)
-				UNAME_MACHINE="alphaev5"
-				;;
-			14)
-				UNAME_MACHINE="alphaev56"
-				;;
-			10)
-				UNAME_MACHINE="alphapca56"
-				;;
-			16)
-				UNAME_MACHINE="alphaev6"
-				;;
-			esac	
-
-			objdump --private-headers dummy | \
-			  grep ld.so.1 > /dev/null
-			if test "$?" = 0 ; then
-				LIBC="libc1"
-			fi
-		fi	
-		rm -f dummy.s dummy
-		echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ; exit 0
-	elif test "${UNAME_MACHINE}" = "mips" ; then
-	  cat >dummy.c <<EOF
-main(argc, argv)
-     int argc;
-     char *argv[];
-{
-#ifdef __MIPSEB__
-  printf ("%s-unknown-linux-gnu\n", argv[1]);
-#endif
-#ifdef __MIPSEL__
-  printf ("%sel-unknown-linux-gnu\n", argv[1]);
-#endif
-  return 0;
-}
-EOF
-	  ${CC-cc} dummy.c -o dummy 2>/dev/null && ./dummy "${UNAME_MACHINE}" && rm dummy.c dummy && exit 0
-	  rm -f dummy.c dummy
-	else
-	  # Either a pre-BFD a.out linker (linux-gnuoldld)
-	  # or one that does not give us useful --help.
-	  # GCC wants to distinguish between linux-gnuoldld and linux-gnuaout.
-	  # If ld does not provide *any* "supported emulations:"
-	  # that means it is gnuoldld.
-	  echo "$ld_help_string" | grep >/dev/null 2>&1 "supported emulations:"
-	  test $? != 0 && echo "${UNAME_MACHINE}-pc-linux-gnuoldld" && exit 0
-
-	  case "${UNAME_MACHINE}" in
-	  i?86)
-	    VENDOR=pc;
-	    ;;
-	  *)
-	    VENDOR=unknown;
-	    ;;
-	  esac
-	  # Determine whether the default compiler is a.out or elf
-	  cat >dummy.c <<EOF
-#include <features.h>
-main(argc, argv)
-     int argc;
-     char *argv[];
-{
-#ifdef __ELF__
-# ifdef __GLIBC__
-#  if __GLIBC__ >= 2
-    printf ("%s-${VENDOR}-linux-gnu\n", argv[1]);
-#  else
-    printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]);
-#  endif
-# else
-   printf ("%s-${VENDOR}-linux-gnulibc1\n", argv[1]);
-# endif
-#else
-  printf ("%s-${VENDOR}-linux-gnuaout\n", argv[1]);
-#endif
-  return 0;
-}
-EOF
-	  ${CC-cc} dummy.c -o dummy 2>/dev/null && ./dummy "${UNAME_MACHINE}" && rm dummy.c dummy && exit 0
-	  rm -f dummy.c dummy
-	fi ;;
-# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.  earlier versions
-# are messed up and put the nodename in both sysname and nodename.
-    i?86:DYNIX/ptx:4*:*)
-	echo i386-sequent-sysv4
-	exit 0 ;;
-    i?86:UNIX_SV:4.2MP:2.*)
-        # Unixware is an offshoot of SVR4, but it has its own version
-        # number series starting with 2...
-        # I am not positive that other SVR4 systems won't match this,
-	# I just have to hope.  -- rms.
-        # Use sysv4.2uw... so that sysv4* matches it.
-	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
-	exit 0 ;;
-    i?86:*:4.*:* | i?86:SYSTEM_V:4.*:*)
-	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
-		echo ${UNAME_MACHINE}-univel-sysv${UNAME_RELEASE}
-	else
-		echo ${UNAME_MACHINE}-pc-sysv${UNAME_RELEASE}
-	fi
-	exit 0 ;;
-    i?86:*:3.2:*)
-	if test -f /usr/options/cb.name; then
-		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
-		echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
-	elif /bin/uname -X 2>/dev/null >/dev/null ; then
-		UNAME_REL=`(/bin/uname -X|egrep Release|sed -e 's/.*= //')`
-		(/bin/uname -X|egrep i80486 >/dev/null) && UNAME_MACHINE=i486
-		(/bin/uname -X|egrep '^Machine.*Pentium' >/dev/null) \
-			&& UNAME_MACHINE=i586
-		echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
-	else
-		echo ${UNAME_MACHINE}-pc-sysv32
-	fi
-	exit 0 ;;
-    pc:*:*:*)
-        # uname -m prints for DJGPP always 'pc', but it prints nothing about
-        # the processor, so we play safe by assuming i386.
-	echo i386-pc-msdosdjgpp
-        exit 0 ;;
-    Intel:Mach:3*:*)
-	echo i386-pc-mach3
-	exit 0 ;;
-    paragon:*:*:*)
-	echo i860-intel-osf1
-	exit 0 ;;
-    i860:*:4.*:*) # i860-SVR4
-	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
-	  echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
-	else # Add other i860-SVR4 vendors below as they are discovered.
-	  echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
-	fi
-	exit 0 ;;
-    mini*:CTIX:SYS*5:*)
-	# "miniframe"
-	echo m68010-convergent-sysv
-	exit 0 ;;
-    M68*:*:R3V[567]*:*)
-	test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;;
-    3[34]??:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 4850:*:4.0:3.0)
-	OS_REL=''
-	test -r /etc/.relid \
-	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
-	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	  && echo i486-ncr-sysv4.3${OS_REL} && exit 0
-	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	  && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;;
-    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
-        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-          && echo i486-ncr-sysv4 && exit 0 ;;
-    m68*:LynxOS:2.*:*)
-	echo m68k-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
-    mc68030:UNIX_System_V:4.*:*)
-	echo m68k-atari-sysv4
-	exit 0 ;;
-    i?86:LynxOS:2.*:*)
-	echo i386-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
-    TSUNAMI:LynxOS:2.*:*)
-	echo sparc-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
-    rs6000:LynxOS:2.*:* | PowerPC:LynxOS:2.*:*)
-	echo rs6000-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
-    SM[BE]S:UNIX_SV:*:*)
-	echo mips-dde-sysv${UNAME_RELEASE}
-	exit 0 ;;
-    RM*:SINIX-*:*:*)
-	echo mips-sni-sysv4
-	exit 0 ;;
-    *:SINIX-*:*:*)
-	if uname -p 2>/dev/null >/dev/null ; then
-		UNAME_MACHINE=`(uname -p) 2>/dev/null`
-		echo ${UNAME_MACHINE}-sni-sysv4
-	else
-		echo ns32k-sni-sysv
-	fi
-	exit 0 ;;
-    PENTIUM:CPunix:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
-                           # says <Richard.M.Bartel@ccMail.Census.GOV>
-        echo i586-unisys-sysv4
-        exit 0 ;;
-    *:UNIX_System_V:4*:FTX*)
-	# From Gerald Hewes <hewes@openmarket.com>.
-	# How about differentiating between stratus architectures? -djm
-	echo hppa1.1-stratus-sysv4
-	exit 0 ;;
-    *:*:*:FTX*)
-	# From seanf@swdc.stratus.com.
-	echo i860-stratus-sysv4
-	exit 0 ;;
-    mc68*:A/UX:*:*)
-	echo m68k-apple-aux${UNAME_RELEASE}
-	exit 0 ;;
-    news*:NEWS-OS:*:6*)
-	echo mips-sony-newsos6
-	exit 0 ;;
-    R3000:*System_V*:*:* | R4000:UNIX_SYSV:*:*)
-	if [ -d /usr/nec ]; then
-	        echo mips-nec-sysv${UNAME_RELEASE}
-	else
-	        echo mips-unknown-sysv${UNAME_RELEASE}
-	fi
-        exit 0 ;;
-esac
-
-#echo '(No uname command or uname output not recognized.)' 1>&2
-#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
-
-cat >dummy.c <<EOF
-#ifdef _SEQUENT_
-# include <sys/types.h>
-# include <sys/utsname.h>
-#endif
-main ()
-{
-#if defined (sony)
-#if defined (MIPSEB)
-  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
-     I don't know....  */
-  printf ("mips-sony-bsd\n"); exit (0);
-#else
-#include <sys/param.h>
-  printf ("m68k-sony-newsos%s\n",
-#ifdef NEWSOS4
-          "4"
-#else
-	  ""
-#endif
-         ); exit (0);
-#endif
-#endif
-
-#if defined (__arm) && defined (__acorn) && defined (__unix)
-  printf ("arm-acorn-riscix"); exit (0);
-#endif
-
-#if defined (hp300) && !defined (hpux)
-  printf ("m68k-hp-bsd\n"); exit (0);
-#endif
-
-#if defined (NeXT)
-#if !defined (__ARCHITECTURE__)
-#define __ARCHITECTURE__ "m68k"
-#endif
-  int version;
-  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
-  printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
-  exit (0);
-#endif
-
-#if defined (MULTIMAX) || defined (n16)
-#if defined (UMAXV)
-  printf ("ns32k-encore-sysv\n"); exit (0);
-#else
-#if defined (CMU)
-  printf ("ns32k-encore-mach\n"); exit (0);
-#else
-  printf ("ns32k-encore-bsd\n"); exit (0);
-#endif
-#endif
-#endif
-
-#if defined (__386BSD__)
-  printf ("i386-pc-bsd\n"); exit (0);
-#endif
-
-#if defined (sequent)
-#if defined (i386)
-  printf ("i386-sequent-dynix\n"); exit (0);
-#endif
-#if defined (ns32000)
-  printf ("ns32k-sequent-dynix\n"); exit (0);
-#endif
-#endif
-
-#if defined (_SEQUENT_)
-    struct utsname un;
-
-    uname(&un);
-
-    if (strncmp(un.version, "V2", 2) == 0) {
-	printf ("i386-sequent-ptx2\n"); exit (0);
-    }
-    if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
-	printf ("i386-sequent-ptx1\n"); exit (0);
-    }
-    printf ("i386-sequent-ptx\n"); exit (0);
-
-#endif
-
-#if defined (vax)
-#if !defined (ultrix)
-  printf ("vax-dec-bsd\n"); exit (0);
-#else
-  printf ("vax-dec-ultrix\n"); exit (0);
-#endif
-#endif
-
-#if defined (alliant) && defined (i860)
-  printf ("i860-alliant-bsd\n"); exit (0);
-#endif
-
-  exit (1);
-}
-EOF
-
-${CC-cc} dummy.c -o dummy 2>/dev/null && ./dummy && rm dummy.c dummy && exit 0
-rm -f dummy.c dummy
-
-# Apollos put the system type in the environment.
-
-test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; }
-
-# Convex versions that predate uname can use getsysinfo(1)
-
-if [ -x /usr/convex/getsysinfo ]
-then
-    case `getsysinfo -f cpu_type` in
-    c1*)
-	echo c1-convex-bsd
-	exit 0 ;;
-    c2*)
-	if getsysinfo -f scalar_acc
-	then echo c32-convex-bsd
-	else echo c2-convex-bsd
-	fi
-	exit 0 ;;
-    c34*)
-	echo c34-convex-bsd
-	exit 0 ;;
-    c38*)
-	echo c38-convex-bsd
-	exit 0 ;;
-    c4*)
-	echo c4-convex-bsd
-	exit 0 ;;
-    esac
-fi
-
-#echo '(Unable to guess system type)' 1>&2
-
-exit 1
diff --git a/config.sub b/config.sub
deleted file mode 100755
index 213a6d4..0000000
--- a/config.sub
+++ /dev/null
@@ -1,954 +0,0 @@
-#! /bin/sh
-# Configuration validation subroutine script, version 1.1.
-#   Copyright (C) 1991, 92, 93, 94, 95, 96, 1997 Free Software Foundation, Inc.
-# This file is (in principle) common to ALL GNU software.
-# The presence of a machine in this file suggests that SOME GNU software
-# can handle that machine.  It does not imply ALL GNU software can.
-#
-# This file is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330,
-# Boston, MA 02111-1307, USA.
-
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# Configuration subroutine to validate and canonicalize a configuration type.
-# Supply the specified configuration type as an argument.
-# If it is invalid, we print an error message on stderr and exit with code 1.
-# Otherwise, we print the canonical config type on stdout and succeed.
-
-# This file is supposed to be the same for all GNU packages
-# and recognize all the CPU types, system types and aliases
-# that are meaningful with *any* GNU software.
-# Each package is responsible for reporting which valid configurations
-# it does not support.  The user should be able to distinguish
-# a failure to support a valid configuration from a meaningless
-# configuration.
-
-# The goal of this file is to map all the various variations of a given
-# machine specification into a single specification in the form:
-#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
-# or in some cases, the newer four-part form:
-#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
-# It is wrong to echo any other type of specification.
-
-if [ x$1 = x ]
-then
-	echo Configuration name missing. 1>&2
-	echo "Usage: $0 CPU-MFR-OPSYS" 1>&2
-	echo "or     $0 ALIAS" 1>&2
-	echo where ALIAS is a recognized configuration type. 1>&2
-	exit 1
-fi
-
-# First pass through any local machine types.
-case $1 in
-	*local*)
-		echo $1
-		exit 0
-		;;
-	*)
-	;;
-esac
-
-# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
-# Here we must recognize all the valid KERNEL-OS combinations.
-maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
-case $maybe_os in
-  linux-gnu*)
-    os=-$maybe_os
-    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
-    ;;
-  *)
-    basic_machine=`echo $1 | sed 's/-[^-]*$//'`
-    if [ $basic_machine != $1 ]
-    then os=`echo $1 | sed 's/.*-/-/'`
-    else os=; fi
-    ;;
-esac
-
-### Let's recognize common machines as not being operating systems so
-### that things like config.sub decstation-3100 work.  We also
-### recognize some manufacturers as not being operating systems, so we
-### can provide default operating systems below.
-case $os in
-	-sun*os*)
-		# Prevent following clause from handling this invalid input.
-		;;
-	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
-	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
-	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
-	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
-	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
-	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-	-apple)
-		os=
-		basic_machine=$1
-		;;
-	-hiux*)
-		os=-hiuxwe2
-		;;
-	-sco5)
-		os=sco3.2v5
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco4)
-		os=-sco3.2v4
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco3.2.[4-9]*)
-		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco3.2v[4-9]*)
-		# Don't forget version if it is 3.2v4 or newer.
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco*)
-		os=-sco3.2v2
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-isc)
-		os=-isc2.2
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-clix*)
-		basic_machine=clipper-intergraph
-		;;
-	-isc*)
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-lynx*)
-		os=-lynxos
-		;;
-	-ptx*)
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
-		;;
-	-windowsnt*)
-		os=`echo $os | sed -e 's/windowsnt/winnt/'`
-		;;
-	-psos*)
-		os=-psos
-		;;
-esac
-
-# Decode aliases for certain CPU-COMPANY combinations.
-case $basic_machine in
-	# Recognize the basic CPU types without company name.
-	# Some are omitted here because they have special meanings below.
-	tahoe | i860 | m32r | m68k | m68000 | m88k | ns32k | arc | arm \
-		| arme[lb] | pyramid | mn10200 | mn10300 \
-		| tron | a29k | 580 | i960 | h8300 | hppa | hppa1.0 | hppa1.1 \
-		| alpha | alphaev5 | alphaev56 | we32k | ns16k | clipper \
-		| i370 | sh | powerpc | powerpcle | 1750a | dsp16xx | pdp11 \
-		| mips64 | mipsel | mips64el | mips64orion | mips64orionel \
-		| mipstx39 | mipstx39el \
-		| sparc | sparclet | sparclite | sparc64 | v850)
-		basic_machine=$basic_machine-unknown
-		;;
-	# We use `pc' rather than `unknown'
-	# because (1) that's what they normally are, and
-	# (2) the word "unknown" tends to confuse beginning users.
-	i[3456]86)
-	  basic_machine=$basic_machine-pc
-	  ;;
-	# Object if more than one company name word.
-	*-*-*)
-		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
-		exit 1
-		;;
-	# Recognize the basic CPU types with company name.
-	vax-* | tahoe-* | i[3456]86-* | i860-* | m32r-* | m68k-* | m68000-* \
-	      | m88k-* | sparc-* | ns32k-* | fx80-* | arc-* | arm-* | c[123]* \
-	      | mips-* | pyramid-* | tron-* | a29k-* | romp-* | rs6000-* \
-	      | power-* | none-* | 580-* | cray2-* | h8300-* | i960-* \
-	      | xmp-* | ymp-* | hppa-* | hppa1.0-* | hppa1.1-* \
-	      | alpha-* | alphaev5-* | alphaev56-* | we32k-* | cydra-* \
-	      | ns16k-* | pn-* | np1-* | xps100-* | clipper-* | orion-* \
-	      | sparclite-* | pdp11-* | sh-* | powerpc-* | powerpcle-* \
-	      | sparc64-* | mips64-* | mipsel-* \
-	      | mips64el-* | mips64orion-* | mips64orionel-*  \
-	      | mipstx39-* | mipstx39el-* \
-	      | f301-*)
-		;;
-	# Recognize the various machine names and aliases which stand
-	# for a CPU type and a company and sometimes even an OS.
-	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
-		basic_machine=m68000-att
-		;;
-	3b*)
-		basic_machine=we32k-att
-		;;
-	alliant | fx80)
-		basic_machine=fx80-alliant
-		;;
-	altos | altos3068)
-		basic_machine=m68k-altos
-		;;
-	am29k)
-		basic_machine=a29k-none
-		os=-bsd
-		;;
-	amdahl)
-		basic_machine=580-amdahl
-		os=-sysv
-		;;
-	amiga | amiga-*)
-		basic_machine=m68k-cbm
-		;;
-	amigaos | amigados)
-		basic_machine=m68k-cbm
-		os=-amigaos
-		;;
-	amigaunix | amix)
-		basic_machine=m68k-cbm
-		os=-sysv4
-		;;
-	apollo68)
-		basic_machine=m68k-apollo
-		os=-sysv
-		;;
-	aux)
-		basic_machine=m68k-apple
-		os=-aux
-		;;
-	balance)
-		basic_machine=ns32k-sequent
-		os=-dynix
-		;;
-	convex-c1)
-		basic_machine=c1-convex
-		os=-bsd
-		;;
-	convex-c2)
-		basic_machine=c2-convex
-		os=-bsd
-		;;
-	convex-c32)
-		basic_machine=c32-convex
-		os=-bsd
-		;;
-	convex-c34)
-		basic_machine=c34-convex
-		os=-bsd
-		;;
-	convex-c38)
-		basic_machine=c38-convex
-		os=-bsd
-		;;
-	cray | ymp)
-		basic_machine=ymp-cray
-		os=-unicos
-		;;
-	cray2)
-		basic_machine=cray2-cray
-		os=-unicos
-		;;
-	[ctj]90-cray)
-		basic_machine=c90-cray
-		os=-unicos
-		;;
-	crds | unos)
-		basic_machine=m68k-crds
-		;;
-	da30 | da30-*)
-		basic_machine=m68k-da30
-		;;
-	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
-		basic_machine=mips-dec
-		;;
-	delta | 3300 | motorola-3300 | motorola-delta \
-	      | 3300-motorola | delta-motorola)
-		basic_machine=m68k-motorola
-		;;
-	delta88)
-		basic_machine=m88k-motorola
-		os=-sysv3
-		;;
-	dpx20 | dpx20-*)
-		basic_machine=rs6000-bull
-		os=-bosx
-		;;
-	dpx2* | dpx2*-bull)
-		basic_machine=m68k-bull
-		os=-sysv3
-		;;
-	ebmon29k)
-		basic_machine=a29k-amd
-		os=-ebmon
-		;;
-	elxsi)
-		basic_machine=elxsi-elxsi
-		os=-bsd
-		;;
-	encore | umax | mmax)
-		basic_machine=ns32k-encore
-		;;
-	fx2800)
-		basic_machine=i860-alliant
-		;;
-	genix)
-		basic_machine=ns32k-ns
-		;;
-	gmicro)
-		basic_machine=tron-gmicro
-		os=-sysv
-		;;
-	h3050r* | hiux*)
-		basic_machine=hppa1.1-hitachi
-		os=-hiuxwe2
-		;;
-	h8300hms)
-		basic_machine=h8300-hitachi
-		os=-hms
-		;;
-	harris)
-		basic_machine=m88k-harris
-		os=-sysv3
-		;;
-	hp300-*)
-		basic_machine=m68k-hp
-		;;
-	hp300bsd)
-		basic_machine=m68k-hp
-		os=-bsd
-		;;
-	hp300hpux)
-		basic_machine=m68k-hp
-		os=-hpux
-		;;
-	hp9k2[0-9][0-9] | hp9k31[0-9])
-		basic_machine=m68000-hp
-		;;
-	hp9k3[2-9][0-9])
-		basic_machine=m68k-hp
-		;;
-	hp9k7[0-9][0-9] | hp7[0-9][0-9] | hp9k8[0-9]7 | hp8[0-9]7)
-		basic_machine=hppa1.1-hp
-		;;
-	hp9k8[0-9][0-9] | hp8[0-9][0-9])
-		basic_machine=hppa1.0-hp
-		;;
-	hppa-next)
-		os=-nextstep3
-		;;
-	i370-ibm* | ibm*)
-		basic_machine=i370-ibm
-		os=-mvs
-		;;
-# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
-	i[3456]86v32)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-sysv32
-		;;
-	i[3456]86v4*)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-sysv4
-		;;
-	i[3456]86v)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-sysv
-		;;
-	i[3456]86sol2)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-solaris2
-		;;
-	iris | iris4d)
-		basic_machine=mips-sgi
-		case $os in
-		    -irix*)
-			;;
-		    *)
-			os=-irix4
-			;;
-		esac
-		;;
-	isi68 | isi)
-		basic_machine=m68k-isi
-		os=-sysv
-		;;
-	m88k-omron*)
-		basic_machine=m88k-omron
-		;;
-	magnum | m3230)
-		basic_machine=mips-mips
-		os=-sysv
-		;;
-	merlin)
-		basic_machine=ns32k-utek
-		os=-sysv
-		;;
-	miniframe)
-		basic_machine=m68000-convergent
-		;;
-	mipsel*-linux*)
-		basic_machine=mipsel-unknown
-		os=-linux-gnu
-		;;
-	mips*-linux*)
-		basic_machine=mips-unknown
-		os=-linux-gnu
-		;;
-	mips3*-*)
-		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
-		;;
-	mips3*)
-		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
-		;;
-	ncr3000)
-		basic_machine=i486-ncr
-		os=-sysv4
-		;;
-	news | news700 | news800 | news900)
-		basic_machine=m68k-sony
-		os=-newsos
-		;;
-	news1000)
-		basic_machine=m68030-sony
-		os=-newsos
-		;;
-	news-3600 | risc-news)
-		basic_machine=mips-sony
-		os=-newsos
-		;;
-	next | m*-next )
-		basic_machine=m68k-next
-		case $os in
-		    -nextstep* )
-			;;
-		    -ns2*)
-		      os=-nextstep2
-			;;
-		    *)
-		      os=-nextstep3
-			;;
-		esac
-		;;
-	nh3000)
-		basic_machine=m68k-harris
-		os=-cxux
-		;;
-	nh[45]000)
-		basic_machine=m88k-harris
-		os=-cxux
-		;;
-	nindy960)
-		basic_machine=i960-intel
-		os=-nindy
-		;;
-	np1)
-		basic_machine=np1-gould
-		;;
-	pa-hitachi)
-		basic_machine=hppa1.1-hitachi
-		os=-hiuxwe2
-		;;
-	paragon)
-		basic_machine=i860-intel
-		os=-osf
-		;;
-	pbd)
-		basic_machine=sparc-tti
-		;;
-	pbb)
-		basic_machine=m68k-tti
-		;;
-        pc532 | pc532-*)
-		basic_machine=ns32k-pc532
-		;;
-	pentium | p5)
-		basic_machine=i586-intel
-		;;
-	pentiumpro | p6)
-		basic_machine=i686-intel
-		;;
-	pentium-* | p5-*)
-		basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	pentiumpro-* | p6-*)
-		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	k5)
-		# We don't have specific support for AMD's K5 yet, so just call it a Pentium
-		basic_machine=i586-amd
-		;;
-	nexen)
-		# We don't have specific support for Nexgen yet, so just call it a Pentium
-		basic_machine=i586-nexgen
-		;;
-	pn)
-		basic_machine=pn-gould
-		;;
-	power)	basic_machine=rs6000-ibm
-		;;
-	ppc)	basic_machine=powerpc-unknown
-	        ;;
-	ppc-*)	basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	ppcle | powerpclittle | ppc-le | powerpc-little)
-		basic_machine=powerpcle-unknown
-	        ;;
-	ppcle-* | powerpclittle-*)
-		basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	ps2)
-		basic_machine=i386-ibm
-		;;
-	rm[46]00)
-		basic_machine=mips-siemens
-		;;
-	rtpc | rtpc-*)
-		basic_machine=romp-ibm
-		;;
-	sequent)
-		basic_machine=i386-sequent
-		;;
-	sh)
-		basic_machine=sh-hitachi
-		os=-hms
-		;;
-	sps7)
-		basic_machine=m68k-bull
-		os=-sysv2
-		;;
-	spur)
-		basic_machine=spur-unknown
-		;;
-	sun2)
-		basic_machine=m68000-sun
-		;;
-	sun2os3)
-		basic_machine=m68000-sun
-		os=-sunos3
-		;;
-	sun2os4)
-		basic_machine=m68000-sun
-		os=-sunos4
-		;;
-	sun3os3)
-		basic_machine=m68k-sun
-		os=-sunos3
-		;;
-	sun3os4)
-		basic_machine=m68k-sun
-		os=-sunos4
-		;;
-	sun4os3)
-		basic_machine=sparc-sun
-		os=-sunos3
-		;;
-	sun4os4)
-		basic_machine=sparc-sun
-		os=-sunos4
-		;;
-	sun4sol2)
-		basic_machine=sparc-sun
-		os=-solaris2
-		;;
-	sun3 | sun3-*)
-		basic_machine=m68k-sun
-		;;
-	sun4)
-		basic_machine=sparc-sun
-		;;
-	sun386 | sun386i | roadrunner)
-		basic_machine=i386-sun
-		;;
-	symmetry)
-		basic_machine=i386-sequent
-		os=-dynix
-		;;
-	tx39)
-		basic_machine=mipstx39-unknown
-		;;
-	tx39el)
-		basic_machine=mipstx39el-unknown
-		;;
-	tower | tower-32)
-		basic_machine=m68k-ncr
-		;;
-	udi29k)
-		basic_machine=a29k-amd
-		os=-udi
-		;;
-	ultra3)
-		basic_machine=a29k-nyu
-		os=-sym1
-		;;
-	vaxv)
-		basic_machine=vax-dec
-		os=-sysv
-		;;
-	vms)
-		basic_machine=vax-dec
-		os=-vms
-		;;
-	vpp*|vx|vx-*)
-               basic_machine=f301-fujitsu
-               ;;
-	vxworks960)
-		basic_machine=i960-wrs
-		os=-vxworks
-		;;
-	vxworks68)
-		basic_machine=m68k-wrs
-		os=-vxworks
-		;;
-	vxworks29k)
-		basic_machine=a29k-wrs
-		os=-vxworks
-		;;
-	xmp)
-		basic_machine=xmp-cray
-		os=-unicos
-		;;
-        xps | xps100)
-		basic_machine=xps100-honeywell
-		;;
-	none)
-		basic_machine=none-none
-		os=-none
-		;;
-
-# Here we handle the default manufacturer of certain CPU types.  It is in
-# some cases the only manufacturer, in others, it is the most popular.
-	mips)
-		if [ x$os = x-linux-gnu ]; then
-			basic_machine=mips-unknown
-		else
-			basic_machine=mips-mips
-		fi
-		;;
-	romp)
-		basic_machine=romp-ibm
-		;;
-	rs6000)
-		basic_machine=rs6000-ibm
-		;;
-	vax)
-		basic_machine=vax-dec
-		;;
-	pdp11)
-		basic_machine=pdp11-dec
-		;;
-	we32k)
-		basic_machine=we32k-att
-		;;
-	sparc)
-		basic_machine=sparc-sun
-		;;
-        cydra)
-		basic_machine=cydra-cydrome
-		;;
-	orion)
-		basic_machine=orion-highlevel
-		;;
-	orion105)
-		basic_machine=clipper-highlevel
-		;;
-	*)
-		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
-		exit 1
-		;;
-esac
-
-# Here we canonicalize certain aliases for manufacturers.
-case $basic_machine in
-	*-digital*)
-		basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
-		;;
-	*-commodore*)
-		basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
-		;;
-	*)
-		;;
-esac
-
-# Decode manufacturer-specific aliases for certain operating systems.
-
-if [ x"$os" != x"" ]
-then
-case $os in
-        # First match some system type aliases
-        # that might get confused with valid system types.
-	# -solaris* is a basic system type, with this one exception.
-	-solaris1 | -solaris1.*)
-		os=`echo $os | sed -e 's|solaris1|sunos4|'`
-		;;
-	-solaris)
-		os=-solaris2
-		;;
-	-svr4*)
-		os=-sysv4
-		;;
-	-unixware*)
-		os=-sysv4.2uw
-		;;
-	-gnu/linux*)
-		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
-		;;
-	# First accept the basic system types.
-	# The portable systems comes first.
-	# Each alternative MUST END IN A *, to match a version number.
-	# -sysv* is not here because it comes later, after sysvr4.
-	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
-	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
-	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
-	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
-	      | -aos* \
-	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
-	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
-	      | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \
-	      | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* \
-	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
-	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
-	      | -cygwin32* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -mingw32* | -linux-gnu* | -uxpv*)
-	# Remember, each alternative MUST END IN *, to match a version number.
-		;;
-	-linux*)
-		os=`echo $os | sed -e 's|linux|linux-gnu|'`
-		;;
-	-sunos5*)
-		os=`echo $os | sed -e 's|sunos5|solaris2|'`
-		;;
-	-sunos6*)
-		os=`echo $os | sed -e 's|sunos6|solaris3|'`
-		;;
-	-osfrose*)
-		os=-osfrose
-		;;
-	-osf*)
-		os=-osf
-		;;
-	-utek*)
-		os=-bsd
-		;;
-	-dynix*)
-		os=-bsd
-		;;
-	-acis*)
-		os=-aos
-		;;
-	-ctix* | -uts*)
-		os=-sysv
-		;;
-	-ns2 )
-	        os=-nextstep2
-		;;
-	# Preserve the version number of sinix5.
-	-sinix5.*)
-		os=`echo $os | sed -e 's|sinix|sysv|'`
-		;;
-	-sinix*)
-		os=-sysv4
-		;;
-	-triton*)
-		os=-sysv3
-		;;
-	-oss*)
-		os=-sysv3
-		;;
-	-svr4)
-		os=-sysv4
-		;;
-	-svr3)
-		os=-sysv3
-		;;
-	-sysvr4)
-		os=-sysv4
-		;;
-	# This must come after -sysvr4.
-	-sysv*)
-		;;
-	-xenix)
-		os=-xenix
-		;;
-	-none)
-		;;
-	*)
-		# Get rid of the `-' at the beginning of $os.
-		os=`echo $os | sed 's/[^-]*-//'`
-		echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
-		exit 1
-		;;
-esac
-else
-
-# Here we handle the default operating systems that come with various machines.
-# The value should be what the vendor currently ships out the door with their
-# machine or put another way, the most popular os provided with the machine.
-
-# Note that if you're going to try to match "-MANUFACTURER" here (say,
-# "-sun"), then you have to tell the case statement up towards the top
-# that MANUFACTURER isn't an operating system.  Otherwise, code above
-# will signal an error saying that MANUFACTURER isn't an operating
-# system, and we'll never get to this point.
-
-case $basic_machine in
-	*-acorn)
-		os=-riscix1.2
-		;;
-	arm*-semi)
-		os=-aout
-		;;
-        pdp11-*)
-		os=-none
-		;;
-	*-dec | vax-*)
-		os=-ultrix4.2
-		;;
-	m68*-apollo)
-		os=-domain
-		;;
-	i386-sun)
-		os=-sunos4.0.2
-		;;
-	m68000-sun)
-		os=-sunos3
-		# This also exists in the configure program, but was not the
-		# default.
-		# os=-sunos4
-		;;
-	*-tti)	# must be before sparc entry or we get the wrong os.
-		os=-sysv3
-		;;
-	sparc-* | *-sun)
-		os=-sunos4.1.1
-		;;
-	*-ibm)
-		os=-aix
-		;;
-	*-hp)
-		os=-hpux
-		;;
-	*-hitachi)
-		os=-hiux
-		;;
-	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
-		os=-sysv
-		;;
-	*-cbm)
-		os=-amigaos
-		;;
-	*-dg)
-		os=-dgux
-		;;
-	*-dolphin)
-		os=-sysv3
-		;;
-	m68k-ccur)
-		os=-rtu
-		;;
-	m88k-omron*)
-		os=-luna
-		;;
-	*-next )
-		os=-nextstep
-		;;
-	*-sequent)
-		os=-ptx
-		;;
-	*-crds)
-		os=-unos
-		;;
-	*-ns)
-		os=-genix
-		;;
-	i370-*)
-		os=-mvs
-		;;
-	*-next)
-		os=-nextstep3
-		;;
-        *-gould)
-		os=-sysv
-		;;
-        *-highlevel)
-		os=-bsd
-		;;
-	*-encore)
-		os=-bsd
-		;;
-        *-sgi)
-		os=-irix
-		;;
-        *-siemens)
-		os=-sysv4
-		;;
-	*-masscomp)
-		os=-rtu
-		;;
-	f301-fujitsu)
-		os=-uxpv
-		;;
-	*)
-		os=-none
-		;;
-esac
-fi
-
-# Here we handle the case where we know the os, and the CPU type, but not the
-# manufacturer.  We pick the logical manufacturer.
-vendor=unknown
-case $basic_machine in
-	*-unknown)
-		case $os in
-			-riscix*)
-				vendor=acorn
-				;;
-			-sunos*)
-				vendor=sun
-				;;
-			-aix*)
-				vendor=ibm
-				;;
-			-hpux*)
-				vendor=hp
-				;;
-			-hiux*)
-				vendor=hitachi
-				;;
-			-unos*)
-				vendor=crds
-				;;
-			-dgux*)
-				vendor=dg
-				;;
-			-luna*)
-				vendor=omron
-				;;
-			-genix*)
-				vendor=ns
-				;;
-			-mvs*)
-				vendor=ibm
-				;;
-			-ptx*)
-				vendor=sequent
-				;;
-			-vxsim* | -vxworks*)
-				vendor=wrs
-				;;
-			-aux*)
-				vendor=apple
-				;;
-		esac
-		basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
-		;;
-esac
-
-echo $basic_machine$os
diff --git a/configure b/configure
deleted file mode 100755
index 35c9db5..0000000
--- a/configure
+++ /dev/null
@@ -1,2011 +0,0 @@
-#! /bin/sh
-
-# Guess values for system-dependent variables and create Makefiles.
-# Generated automatically using autoconf version 2.12 
-# Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc.
-#
-# This configure script is free software; the Free Software Foundation
-# gives unlimited permission to copy, distribute and modify it.
-
-# Defaults:
-ac_help=
-ac_default_prefix=/usr/local
-# Any additions from configure.in:
-ac_help="$ac_help
-  --enable-shared         build shared library using GNU libtool"
-ac_help="$ac_help
-  --enable-static         build static library using GNU libtool"
-ac_help="$ac_help
-  --enable-maxmem[=N]     enable use of temp files, set max mem usage to N MB"
-ac_help="$ac_help
-"
-
-# Initialize some variables set by options.
-# The variables have the same names as the options, with
-# dashes changed to underlines.
-build=NONE
-cache_file=./config.cache
-exec_prefix=NONE
-host=NONE
-no_create=
-nonopt=NONE
-no_recursion=
-prefix=NONE
-program_prefix=NONE
-program_suffix=NONE
-program_transform_name=s,x,x,
-silent=
-site=
-srcdir=
-target=NONE
-verbose=
-x_includes=NONE
-x_libraries=NONE
-bindir='${exec_prefix}/bin'
-sbindir='${exec_prefix}/sbin'
-libexecdir='${exec_prefix}/libexec'
-datadir='${prefix}/share'
-sysconfdir='${prefix}/etc'
-sharedstatedir='${prefix}/com'
-localstatedir='${prefix}/var'
-libdir='${exec_prefix}/lib'
-includedir='${prefix}/include'
-oldincludedir='/usr/include'
-infodir='${prefix}/info'
-mandir='${prefix}/man'
-
-# Initialize some other variables.
-subdirs=
-MFLAGS= MAKEFLAGS=
-# Maximum number of lines to put in a shell here document.
-ac_max_here_lines=12
-
-ac_prev=
-for ac_option
-do
-
-  # If the previous option needs an argument, assign it.
-  if test -n "$ac_prev"; then
-    eval "$ac_prev=\$ac_option"
-    ac_prev=
-    continue
-  fi
-
-  case "$ac_option" in
-  -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
-  *) ac_optarg= ;;
-  esac
-
-  # Accept the important Cygnus configure options, so we can diagnose typos.
-
-  case "$ac_option" in
-
-  -bindir | --bindir | --bindi | --bind | --bin | --bi)
-    ac_prev=bindir ;;
-  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
-    bindir="$ac_optarg" ;;
-
-  -build | --build | --buil | --bui | --bu)
-    ac_prev=build ;;
-  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
-    build="$ac_optarg" ;;
-
-  -cache-file | --cache-file | --cache-fil | --cache-fi \
-  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
-    ac_prev=cache_file ;;
-  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
-  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
-    cache_file="$ac_optarg" ;;
-
-  -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
-    ac_prev=datadir ;;
-  -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
-  | --da=*)
-    datadir="$ac_optarg" ;;
-
-  -disable-* | --disable-*)
-    ac_feature=`echo $ac_option|sed -e 's/-*disable-//'`
-    # Reject names that are not valid shell variable names.
-    if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then
-      { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; }
-    fi
-    ac_feature=`echo $ac_feature| sed 's/-/_/g'`
-    eval "enable_${ac_feature}=no" ;;
-
-  -enable-* | --enable-*)
-    ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'`
-    # Reject names that are not valid shell variable names.
-    if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then
-      { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; }
-    fi
-    ac_feature=`echo $ac_feature| sed 's/-/_/g'`
-    case "$ac_option" in
-      *=*) ;;
-      *) ac_optarg=yes ;;
-    esac
-    eval "enable_${ac_feature}='$ac_optarg'" ;;
-
-  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
-  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
-  | --exec | --exe | --ex)
-    ac_prev=exec_prefix ;;
-  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
-  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
-  | --exec=* | --exe=* | --ex=*)
-    exec_prefix="$ac_optarg" ;;
-
-  -gas | --gas | --ga | --g)
-    # Obsolete; use --with-gas.
-    with_gas=yes ;;
-
-  -help | --help | --hel | --he)
-    # Omit some internal or obsolete options to make the list less imposing.
-    # This message is too long to be a string in the A/UX 3.1 sh.
-    cat << EOF
-Usage: configure [options] [host]
-Options: [defaults in brackets after descriptions]
-Configuration:
-  --cache-file=FILE       cache test results in FILE
-  --help                  print this message
-  --no-create             do not create output files
-  --quiet, --silent       do not print \`checking...' messages
-  --version               print the version of autoconf that created configure
-Directory and file names:
-  --prefix=PREFIX         install architecture-independent files in PREFIX
-                          [$ac_default_prefix]
-  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
-                          [same as prefix]
-  --bindir=DIR            user executables in DIR [EPREFIX/bin]
-  --sbindir=DIR           system admin executables in DIR [EPREFIX/sbin]
-  --libexecdir=DIR        program executables in DIR [EPREFIX/libexec]
-  --datadir=DIR           read-only architecture-independent data in DIR
-                          [PREFIX/share]
-  --sysconfdir=DIR        read-only single-machine data in DIR [PREFIX/etc]
-  --sharedstatedir=DIR    modifiable architecture-independent data in DIR
-                          [PREFIX/com]
-  --localstatedir=DIR     modifiable single-machine data in DIR [PREFIX/var]
-  --libdir=DIR            object code libraries in DIR [EPREFIX/lib]
-  --includedir=DIR        C header files in DIR [PREFIX/include]
-  --oldincludedir=DIR     C header files for non-gcc in DIR [/usr/include]
-  --infodir=DIR           info documentation in DIR [PREFIX/info]
-  --mandir=DIR            man documentation in DIR [PREFIX/man]
-  --srcdir=DIR            find the sources in DIR [configure dir or ..]
-  --program-prefix=PREFIX prepend PREFIX to installed program names
-  --program-suffix=SUFFIX append SUFFIX to installed program names
-  --program-transform-name=PROGRAM
-                          run sed PROGRAM on installed program names
-EOF
-    cat << EOF
-Host type:
-  --build=BUILD           configure for building on BUILD [BUILD=HOST]
-  --host=HOST             configure for HOST [guessed]
-  --target=TARGET         configure for TARGET [TARGET=HOST]
-Features and packages:
-  --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
-  --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
-  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
-  --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
-  --x-includes=DIR        X include files are in DIR
-  --x-libraries=DIR       X library files are in DIR
-EOF
-    if test -n "$ac_help"; then
-      echo "--enable and --with options recognized:$ac_help"
-    fi
-    exit 0 ;;
-
-  -host | --host | --hos | --ho)
-    ac_prev=host ;;
-  -host=* | --host=* | --hos=* | --ho=*)
-    host="$ac_optarg" ;;
-
-  -includedir | --includedir | --includedi | --included | --include \
-  | --includ | --inclu | --incl | --inc)
-    ac_prev=includedir ;;
-  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
-  | --includ=* | --inclu=* | --incl=* | --inc=*)
-    includedir="$ac_optarg" ;;
-
-  -infodir | --infodir | --infodi | --infod | --info | --inf)
-    ac_prev=infodir ;;
-  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
-    infodir="$ac_optarg" ;;
-
-  -libdir | --libdir | --libdi | --libd)
-    ac_prev=libdir ;;
-  -libdir=* | --libdir=* | --libdi=* | --libd=*)
-    libdir="$ac_optarg" ;;
-
-  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
-  | --libexe | --libex | --libe)
-    ac_prev=libexecdir ;;
-  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
-  | --libexe=* | --libex=* | --libe=*)
-    libexecdir="$ac_optarg" ;;
-
-  -localstatedir | --localstatedir | --localstatedi | --localstated \
-  | --localstate | --localstat | --localsta | --localst \
-  | --locals | --local | --loca | --loc | --lo)
-    ac_prev=localstatedir ;;
-  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
-  | --localstate=* | --localstat=* | --localsta=* | --localst=* \
-  | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
-    localstatedir="$ac_optarg" ;;
-
-  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
-    ac_prev=mandir ;;
-  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
-    mandir="$ac_optarg" ;;
-
-  -nfp | --nfp | --nf)
-    # Obsolete; use --without-fp.
-    with_fp=no ;;
-
-  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
-  | --no-cr | --no-c)
-    no_create=yes ;;
-
-  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
-  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
-    no_recursion=yes ;;
-
-  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
-  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
-  | --oldin | --oldi | --old | --ol | --o)
-    ac_prev=oldincludedir ;;
-  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
-  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
-  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
-    oldincludedir="$ac_optarg" ;;
-
-  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
-    ac_prev=prefix ;;
-  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
-    prefix="$ac_optarg" ;;
-
-  -program-prefix | --program-prefix | --program-prefi | --program-pref \
-  | --program-pre | --program-pr | --program-p)
-    ac_prev=program_prefix ;;
-  -program-prefix=* | --program-prefix=* | --program-prefi=* \
-  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
-    program_prefix="$ac_optarg" ;;
-
-  -program-suffix | --program-suffix | --program-suffi | --program-suff \
-  | --program-suf | --program-su | --program-s)
-    ac_prev=program_suffix ;;
-  -program-suffix=* | --program-suffix=* | --program-suffi=* \
-  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
-    program_suffix="$ac_optarg" ;;
-
-  -program-transform-name | --program-transform-name \
-  | --program-transform-nam | --program-transform-na \
-  | --program-transform-n | --program-transform- \
-  | --program-transform | --program-transfor \
-  | --program-transfo | --program-transf \
-  | --program-trans | --program-tran \
-  | --progr-tra | --program-tr | --program-t)
-    ac_prev=program_transform_name ;;
-  -program-transform-name=* | --program-transform-name=* \
-  | --program-transform-nam=* | --program-transform-na=* \
-  | --program-transform-n=* | --program-transform-=* \
-  | --program-transform=* | --program-transfor=* \
-  | --program-transfo=* | --program-transf=* \
-  | --program-trans=* | --program-tran=* \
-  | --progr-tra=* | --program-tr=* | --program-t=*)
-    program_transform_name="$ac_optarg" ;;
-
-  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
-  | -silent | --silent | --silen | --sile | --sil)
-    silent=yes ;;
-
-  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
-    ac_prev=sbindir ;;
-  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
-  | --sbi=* | --sb=*)
-    sbindir="$ac_optarg" ;;
-
-  -sharedstatedir | --sharedstatedir | --sharedstatedi \
-  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
-  | --sharedst | --shareds | --shared | --share | --shar \
-  | --sha | --sh)
-    ac_prev=sharedstatedir ;;
-  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
-  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
-  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
-  | --sha=* | --sh=*)
-    sharedstatedir="$ac_optarg" ;;
-
-  -site | --site | --sit)
-    ac_prev=site ;;
-  -site=* | --site=* | --sit=*)
-    site="$ac_optarg" ;;
-
-  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
-    ac_prev=srcdir ;;
-  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
-    srcdir="$ac_optarg" ;;
-
-  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
-  | --syscon | --sysco | --sysc | --sys | --sy)
-    ac_prev=sysconfdir ;;
-  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
-  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
-    sysconfdir="$ac_optarg" ;;
-
-  -target | --target | --targe | --targ | --tar | --ta | --t)
-    ac_prev=target ;;
-  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
-    target="$ac_optarg" ;;
-
-  -v | -verbose | --verbose | --verbos | --verbo | --verb)
-    verbose=yes ;;
-
-  -version | --version | --versio | --versi | --vers)
-    echo "configure generated by autoconf version 2.12"
-    exit 0 ;;
-
-  -with-* | --with-*)
-    ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'`
-    # Reject names that are not valid shell variable names.
-    if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then
-      { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; }
-    fi
-    ac_package=`echo $ac_package| sed 's/-/_/g'`
-    case "$ac_option" in
-      *=*) ;;
-      *) ac_optarg=yes ;;
-    esac
-    eval "with_${ac_package}='$ac_optarg'" ;;
-
-  -without-* | --without-*)
-    ac_package=`echo $ac_option|sed -e 's/-*without-//'`
-    # Reject names that are not valid shell variable names.
-    if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then
-      { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; }
-    fi
-    ac_package=`echo $ac_package| sed 's/-/_/g'`
-    eval "with_${ac_package}=no" ;;
-
-  --x)
-    # Obsolete; use --with-x.
-    with_x=yes ;;
-
-  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
-  | --x-incl | --x-inc | --x-in | --x-i)
-    ac_prev=x_includes ;;
-  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
-  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
-    x_includes="$ac_optarg" ;;
-
-  -x-libraries | --x-libraries | --x-librarie | --x-librari \
-  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
-    ac_prev=x_libraries ;;
-  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
-  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
-    x_libraries="$ac_optarg" ;;
-
-  -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; }
-    ;;
-
-  *=*)
-    varname=`echo "$ac_option"|sed -e 's/=.*//'`
-    # Reject names that aren't valid shell variable names.
-    if test -n "`echo $varname| sed 's/[a-zA-Z0-9_]//g'`"; then
-      { echo "configure: error: $varname: invalid shell variable name" 1>&2; exit 1; }
-    fi
-    val="`echo "$ac_option"|sed 's/[^=]*=//'`"
-    test -n "$verbose" && echo "	setting shell variable $varname to $val"
-    eval "$varname='$val'"
-    eval "export $varname" ;;
-
-  *)
-    if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then
-      echo "configure: warning: $ac_option: invalid host type" 1>&2
-    fi
-    if test "x$nonopt" != xNONE; then
-      { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; }
-    fi
-    nonopt="$ac_option"
-    ;;
-
-  esac
-done
-
-if test -n "$ac_prev"; then
-  { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; }
-fi
-
-trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15
-
-# File descriptor usage:
-# 0 standard input
-# 1 file creation
-# 2 errors and warnings
-# 3 some systems may open it to /dev/tty
-# 4 used on the Kubota Titan
-# 6 checking for... messages and results
-# 5 compiler messages saved in config.log
-if test "$silent" = yes; then
-  exec 6>/dev/null
-else
-  exec 6>&1
-fi
-exec 5>./config.log
-
-echo "\
-This file contains any messages produced by compilers while
-running configure, to aid debugging if configure makes a mistake.
-" 1>&5
-
-# Strip out --no-create and --no-recursion so they do not pile up.
-# Also quote any args containing shell metacharacters.
-ac_configure_args=
-for ac_arg
-do
-  case "$ac_arg" in
-  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
-  | --no-cr | --no-c) ;;
-  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
-  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;;
-  *" "*|*"	"*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*)
-  ac_configure_args="$ac_configure_args '$ac_arg'" ;;
-  *) ac_configure_args="$ac_configure_args $ac_arg" ;;
-  esac
-done
-
-# NLS nuisances.
-# Only set these to C if already set.  These must not be set unconditionally
-# because not all systems understand e.g. LANG=C (notably SCO).
-# Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'!
-# Non-C LC_CTYPE values break the ctype check.
-if test "${LANG+set}"   = set; then LANG=C;   export LANG;   fi
-if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi
-if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi
-if test "${LC_CTYPE+set}"    = set; then LC_CTYPE=C;    export LC_CTYPE;    fi
-
-# confdefs.h avoids OS command line length limits that DEFS can exceed.
-rm -rf conftest* confdefs.h
-# AIX cpp loses on an empty file, so make sure it contains at least a newline.
-echo > confdefs.h
-
-# A filename unique to this package, relative to the directory that
-# configure is in, which we can look for to find out if srcdir is correct.
-ac_unique_file=jcmaster.c
-
-# Find the source files, if location was not specified.
-if test -z "$srcdir"; then
-  ac_srcdir_defaulted=yes
-  # Try the directory containing this script, then its parent.
-  ac_prog=$0
-  ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'`
-  test "x$ac_confdir" = "x$ac_prog" && ac_confdir=.
-  srcdir=$ac_confdir
-  if test ! -r $srcdir/$ac_unique_file; then
-    srcdir=..
-  fi
-else
-  ac_srcdir_defaulted=no
-fi
-if test ! -r $srcdir/$ac_unique_file; then
-  if test "$ac_srcdir_defaulted" = yes; then
-    { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; }
-  else
-    { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; }
-  fi
-fi
-srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'`
-
-# Prefer explicitly selected file to automatically selected ones.
-if test -z "$CONFIG_SITE"; then
-  if test "x$prefix" != xNONE; then
-    CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
-  else
-    CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
-  fi
-fi
-for ac_site_file in $CONFIG_SITE; do
-  if test -r "$ac_site_file"; then
-    echo "loading site script $ac_site_file"
-    . "$ac_site_file"
-  fi
-done
-
-
-ac_ext=c
-# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
-ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
-cross_compiling=$ac_cv_prog_cc_cross
-
-if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then
-  # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu.
-  if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then
-    ac_n= ac_c='
-' ac_t='	'
-  else
-    ac_n=-n ac_c= ac_t=
-  fi
-else
-  ac_n= ac_c='\c' ac_t=
-fi
-
-
-
-# Extract the first word of "gcc", so it can be a program name with args.
-set dummy gcc; ac_word=$2
-echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:538: checking for $ac_word" >&5
-if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
-  echo $ac_n "(cached) $ac_c" 1>&6
-else
-  if test -n "$CC"; then
-  ac_cv_prog_CC="$CC" # Let the user override the test.
-else
-  IFS="${IFS= 	}"; ac_save_ifs="$IFS"; IFS="${IFS}:"
-  for ac_dir in $PATH; do
-    test -z "$ac_dir" && ac_dir=.
-    if test -f $ac_dir/$ac_word; then
-      ac_cv_prog_CC="gcc"
-      break
-    fi
-  done
-  IFS="$ac_save_ifs"
-fi
-fi
-CC="$ac_cv_prog_CC"
-if test -n "$CC"; then
-  echo "$ac_t""$CC" 1>&6
-else
-  echo "$ac_t""no" 1>&6
-fi
-
-if test -z "$CC"; then
-  # Extract the first word of "cc", so it can be a program name with args.
-set dummy cc; ac_word=$2
-echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:567: checking for $ac_word" >&5
-if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
-  echo $ac_n "(cached) $ac_c" 1>&6
-else
-  if test -n "$CC"; then
-  ac_cv_prog_CC="$CC" # Let the user override the test.
-else
-  IFS="${IFS= 	}"; ac_save_ifs="$IFS"; IFS="${IFS}:"
-  ac_prog_rejected=no
-  for ac_dir in $PATH; do
-    test -z "$ac_dir" && ac_dir=.
-    if test -f $ac_dir/$ac_word; then
-      if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then
-        ac_prog_rejected=yes
-	continue
-      fi
-      ac_cv_prog_CC="cc"
-      break
-    fi
-  done
-  IFS="$ac_save_ifs"
-if test $ac_prog_rejected = yes; then
-  # We found a bogon in the path, so make sure we never use it.
-  set dummy $ac_cv_prog_CC
-  shift
-  if test $# -gt 0; then
-    # We chose a different compiler from the bogus one.
-    # However, it has the same basename, so the bogon will be chosen
-    # first if we set CC to just the basename; use the full file name.
-    shift
-    set dummy "$ac_dir/$ac_word" "$@"
-    shift
-    ac_cv_prog_CC="$@"
-  fi
-fi
-fi
-fi
-CC="$ac_cv_prog_CC"
-if test -n "$CC"; then
-  echo "$ac_t""$CC" 1>&6
-else
-  echo "$ac_t""no" 1>&6
-fi
-
-  test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; }
-fi
-
-echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6
-echo "configure:615: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5
-
-ac_ext=c
-# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5'
-ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
-cross_compiling=$ac_cv_prog_cc_cross
-
-cat > conftest.$ac_ext <<EOF
-#line 625 "configure"
-#include "confdefs.h"
-main(){return(0);}
-EOF
-if { (eval echo configure:629: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
-  ac_cv_prog_cc_works=yes
-  # If we can't run a trivial program, we are probably using a cross compiler.
-  if (./conftest; exit) 2>/dev/null; then
-    ac_cv_prog_cc_cross=no
-  else
-    ac_cv_prog_cc_cross=yes
-  fi
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  ac_cv_prog_cc_works=no
-fi
-rm -fr conftest*
-
-echo "$ac_t""$ac_cv_prog_cc_works" 1>&6
-if test $ac_cv_prog_cc_works = no; then
-  { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; }
-fi
-echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6
-echo "configure:649: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5
-echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6
-cross_compiling=$ac_cv_prog_cc_cross
-
-echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6
-echo "configure:654: checking whether we are using GNU C" >&5
-if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then
-  echo $ac_n "(cached) $ac_c" 1>&6
-else
-  cat > conftest.c <<EOF
-#ifdef __GNUC__
-  yes;
-#endif
-EOF
-if { ac_try='${CC-cc} -E conftest.c'; { (eval echo configure:663: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then
-  ac_cv_prog_gcc=yes
-else
-  ac_cv_prog_gcc=no
-fi
-fi
-
-echo "$ac_t""$ac_cv_prog_gcc" 1>&6
-
-if test $ac_cv_prog_gcc = yes; then
-  GCC=yes
-  test "${CFLAGS+set}" = set || CFLAGS="-O2"
-else
-  GCC=
-  test "${CFLAGS+set}" = set || CFLAGS="-O"
-fi
-
-echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6
-echo "configure:681: checking how to run the C preprocessor" >&5
-# On Suns, sometimes $CPP names a directory.
-if test -n "$CPP" && test -d "$CPP"; then
-  CPP=
-fi
-if test -z "$CPP"; then
-if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then
-  echo $ac_n "(cached) $ac_c" 1>&6
-else
-    # This must be in double quotes, not single quotes, because CPP may get
-  # substituted into the Makefile and "${CC-cc}" will confuse make.
-  CPP="${CC-cc} -E"
-  # On the NeXT, cc -E runs the code through the compiler's parser,
-  # not just through cpp.
-  cat > conftest.$ac_ext <<EOF
-#line 696 "configure"
-#include "confdefs.h"
-#include <assert.h>
-Syntax Error
-EOF
-ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:702: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
-ac_err=`grep -v '^ *+' conftest.out`
-if test -z "$ac_err"; then
-  :
-else
-  echo "$ac_err" >&5
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  CPP="${CC-cc} -E -traditional-cpp"
-  cat > conftest.$ac_ext <<EOF
-#line 713 "configure"
-#include "confdefs.h"
-#include <assert.h>
-Syntax Error
-EOF
-ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:719: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
-ac_err=`grep -v '^ *+' conftest.out`
-if test -z "$ac_err"; then
-  :
-else
-  echo "$ac_err" >&5
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  CPP=/lib/cpp
-fi
-rm -f conftest*
-fi
-rm -f conftest*
-  ac_cv_prog_CPP="$CPP"
-fi
-  CPP="$ac_cv_prog_CPP"
-else
-  ac_cv_prog_CPP="$CPP"
-fi
-echo "$ac_t""$CPP" 1>&6
-
-echo $ac_n "checking for function prototypes""... $ac_c" 1>&6
-echo "configure:742: checking for function prototypes" >&5
-if eval "test \"`echo '$''{'ijg_cv_have_prototypes'+set}'`\" = set"; then
-  echo $ac_n "(cached) $ac_c" 1>&6
-else
-  cat > conftest.$ac_ext <<EOF
-#line 747 "configure"
-#include "confdefs.h"
-
-int testfunction (int arg1, int * arg2); /* check prototypes */
-struct methods_struct {		/* check method-pointer declarations */
-  int (*error_exit) (char *msgtext);
-  int (*trace_message) (char *msgtext);
-  int (*another_method) (void);
-};
-int testfunction (int arg1, int * arg2) /* check definitions */
-{ return arg2[arg1]; }
-int test2function (void)	/* check void arg list */
-{ return 0; }
-
-int main() {
- 
-; return 0; }
-EOF
-if { (eval echo configure:765: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
-  rm -rf conftest*
-  ijg_cv_have_prototypes=yes
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  ijg_cv_have_prototypes=no
-fi
-rm -f conftest*
-fi
-
-echo "$ac_t""$ijg_cv_have_prototypes" 1>&6
-if test $ijg_cv_have_prototypes = yes; then
-  cat >> confdefs.h <<\EOF
-#define HAVE_PROTOTYPES 
-EOF
-
-else
-  echo Your compiler does not seem to know about function prototypes.
-  echo Perhaps it needs a special switch to enable ANSI C mode.
-  echo If so, we recommend running configure like this:
-  echo "   ./configure  CC='cc -switch'"
-  echo where -switch is the proper switch.
-fi
-ac_safe=`echo "stddef.h" | sed 'y%./+-%__p_%'`
-echo $ac_n "checking for stddef.h""... $ac_c" 1>&6
-echo "configure:792: checking for stddef.h" >&5
-if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
-  echo $ac_n "(cached) $ac_c" 1>&6
-else
-  cat > conftest.$ac_ext <<EOF
-#line 797 "configure"
-#include "confdefs.h"
-#include <stddef.h>
-EOF
-ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:802: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
-ac_err=`grep -v '^ *+' conftest.out`
-if test -z "$ac_err"; then
-  rm -rf conftest*
-  eval "ac_cv_header_$ac_safe=yes"
-else
-  echo "$ac_err" >&5
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  eval "ac_cv_header_$ac_safe=no"
-fi
-rm -f conftest*
-fi
-if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
-  echo "$ac_t""yes" 1>&6
-  cat >> confdefs.h <<\EOF
-#define HAVE_STDDEF_H 
-EOF
-
-else
-  echo "$ac_t""no" 1>&6
-fi
-
-ac_safe=`echo "stdlib.h" | sed 'y%./+-%__p_%'`
-echo $ac_n "checking for stdlib.h""... $ac_c" 1>&6
-echo "configure:828: checking for stdlib.h" >&5
-if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
-  echo $ac_n "(cached) $ac_c" 1>&6
-else
-  cat > conftest.$ac_ext <<EOF
-#line 833 "configure"
-#include "confdefs.h"
-#include <stdlib.h>
-EOF
-ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:838: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
-ac_err=`grep -v '^ *+' conftest.out`
-if test -z "$ac_err"; then
-  rm -rf conftest*
-  eval "ac_cv_header_$ac_safe=yes"
-else
-  echo "$ac_err" >&5
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  eval "ac_cv_header_$ac_safe=no"
-fi
-rm -f conftest*
-fi
-if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
-  echo "$ac_t""yes" 1>&6
-  cat >> confdefs.h <<\EOF
-#define HAVE_STDLIB_H 
-EOF
-
-else
-  echo "$ac_t""no" 1>&6
-fi
-
-ac_safe=`echo "string.h" | sed 'y%./+-%__p_%'`
-echo $ac_n "checking for string.h""... $ac_c" 1>&6
-echo "configure:864: checking for string.h" >&5
-if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
-  echo $ac_n "(cached) $ac_c" 1>&6
-else
-  cat > conftest.$ac_ext <<EOF
-#line 869 "configure"
-#include "confdefs.h"
-#include <string.h>
-EOF
-ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:874: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
-ac_err=`grep -v '^ *+' conftest.out`
-if test -z "$ac_err"; then
-  rm -rf conftest*
-  eval "ac_cv_header_$ac_safe=yes"
-else
-  echo "$ac_err" >&5
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  eval "ac_cv_header_$ac_safe=no"
-fi
-rm -f conftest*
-fi
-if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
-  echo "$ac_t""yes" 1>&6
-  :
-else
-  echo "$ac_t""no" 1>&6
-cat >> confdefs.h <<\EOF
-#define NEED_BSD_STRINGS 
-EOF
-
-fi
-
-echo $ac_n "checking for size_t""... $ac_c" 1>&6
-echo "configure:900: checking for size_t" >&5
-cat > conftest.$ac_ext <<EOF
-#line 902 "configure"
-#include "confdefs.h"
-
-#ifdef HAVE_STDDEF_H
-#include <stddef.h>
-#endif
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-#include <stdio.h>
-#ifdef NEED_BSD_STRINGS
-#include <strings.h>
-#else
-#include <string.h>
-#endif
-typedef size_t my_size_t;
-
-int main() {
- my_size_t foovar; 
-; return 0; }
-EOF
-if { (eval echo configure:923: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
-  rm -rf conftest*
-  ijg_size_t_ok=yes
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  ijg_size_t_ok="not ANSI, perhaps it is in sys/types.h"
-fi
-rm -f conftest*
-echo "$ac_t""$ijg_size_t_ok" 1>&6
-if test "$ijg_size_t_ok" != yes; then
-ac_safe=`echo "sys/types.h" | sed 'y%./+-%__p_%'`
-echo $ac_n "checking for sys/types.h""... $ac_c" 1>&6
-echo "configure:937: checking for sys/types.h" >&5
-if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
-  echo $ac_n "(cached) $ac_c" 1>&6
-else
-  cat > conftest.$ac_ext <<EOF
-#line 942 "configure"
-#include "confdefs.h"
-#include <sys/types.h>
-EOF
-ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
-{ (eval echo configure:947: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }
-ac_err=`grep -v '^ *+' conftest.out`
-if test -z "$ac_err"; then
-  rm -rf conftest*
-  eval "ac_cv_header_$ac_safe=yes"
-else
-  echo "$ac_err" >&5
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  eval "ac_cv_header_$ac_safe=no"
-fi
-rm -f conftest*
-fi
-if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
-  echo "$ac_t""yes" 1>&6
-  cat >> confdefs.h <<\EOF
-#define NEED_SYS_TYPES_H 
-EOF
-
-cat > conftest.$ac_ext <<EOF
-#line 968 "configure"
-#include "confdefs.h"
-#include <sys/types.h>
-EOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
-  egrep "size_t" >/dev/null 2>&1; then
-  rm -rf conftest*
-  ijg_size_t_ok="size_t is in sys/types.h"
-else
-  rm -rf conftest*
-  ijg_size_t_ok=no
-fi
-rm -f conftest*
-
-else
-  echo "$ac_t""no" 1>&6
-ijg_size_t_ok=no
-fi
-
-echo "$ac_t""$ijg_size_t_ok" 1>&6
-if test "$ijg_size_t_ok" = no; then
-  echo Type size_t is not defined in any of the usual places.
-  echo Try putting '"typedef unsigned int size_t;"' in jconfig.h.
-fi
-fi
-echo $ac_n "checking for type unsigned char""... $ac_c" 1>&6
-echo "configure:994: checking for type unsigned char" >&5
-cat > conftest.$ac_ext <<EOF
-#line 996 "configure"
-#include "confdefs.h"
-
-int main() {
- unsigned char un_char; 
-; return 0; }
-EOF
-if { (eval echo configure:1003: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
-  rm -rf conftest*
-  echo "$ac_t""yes" 1>&6
-cat >> confdefs.h <<\EOF
-#define HAVE_UNSIGNED_CHAR 
-EOF
-
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  echo "$ac_t""no" 1>&6
-fi
-rm -f conftest*
-echo $ac_n "checking for type unsigned short""... $ac_c" 1>&6
-echo "configure:1018: checking for type unsigned short" >&5
-cat > conftest.$ac_ext <<EOF
-#line 1020 "configure"
-#include "confdefs.h"
-
-int main() {
- unsigned short un_short; 
-; return 0; }
-EOF
-if { (eval echo configure:1027: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
-  rm -rf conftest*
-  echo "$ac_t""yes" 1>&6
-cat >> confdefs.h <<\EOF
-#define HAVE_UNSIGNED_SHORT 
-EOF
-
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  echo "$ac_t""no" 1>&6
-fi
-rm -f conftest*
-echo $ac_n "checking for type void""... $ac_c" 1>&6
-echo "configure:1042: checking for type void" >&5
-cat > conftest.$ac_ext <<EOF
-#line 1044 "configure"
-#include "confdefs.h"
-
-/* Caution: a C++ compiler will insist on valid prototypes */
-typedef void * void_ptr;	/* check void * */
-#ifdef HAVE_PROTOTYPES		/* check ptr to function returning void */
-typedef void (*void_func) (int a, int b);
-#else
-typedef void (*void_func) ();
-#endif
-
-#ifdef HAVE_PROTOTYPES		/* check void function result */
-void test3function (void_ptr arg1, void_func arg2)
-#else
-void test3function (arg1, arg2)
-     void_ptr arg1;
-     void_func arg2;
-#endif
-{
-  char * locptr = (char *) arg1; /* check casting to and from void * */
-  arg1 = (void *) locptr;
-  (*arg2) (1, 2);		/* check call of fcn returning void */
-}
-
-int main() {
- 
-; return 0; }
-EOF
-if { (eval echo configure:1072: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
-  rm -rf conftest*
-  echo "$ac_t""yes" 1>&6
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  echo "$ac_t""no" 1>&6
-cat >> confdefs.h <<\EOF
-#define void char
-EOF
-
-fi
-rm -f conftest*
-
-echo $ac_n "checking for working const""... $ac_c" 1>&6
-echo "configure:1088: checking for working const" >&5
-if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then
-  echo $ac_n "(cached) $ac_c" 1>&6
-else
-  cat > conftest.$ac_ext <<EOF
-#line 1093 "configure"
-#include "confdefs.h"
-
-int main() {
-
-/* Ultrix mips cc rejects this.  */
-typedef int charset[2]; const charset x;
-/* SunOS 4.1.1 cc rejects this.  */
-char const *const *ccp;
-char **p;
-/* NEC SVR4.0.2 mips cc rejects this.  */
-struct point {int x, y;};
-static struct point const zero = {0,0};
-/* AIX XL C 1.02.0.0 rejects this.
-   It does not let you subtract one const X* pointer from another in an arm
-   of an if-expression whose if-part is not a constant expression */
-const char *g = "string";
-ccp = &g + (g ? g-g : 0);
-/* HPUX 7.0 cc rejects these. */
-++ccp;
-p = (char**) ccp;
-ccp = (char const *const *) p;
-{ /* SCO 3.2v4 cc rejects this.  */
-  char *t;
-  char const *s = 0 ? (char *) 0 : (char const *) 0;
-
-  *t++ = 0;
-}
-{ /* Someone thinks the Sun supposedly-ANSI compiler will reject this.  */
-  int x[] = {25, 17};
-  const int *foo = &x[0];
-  ++foo;
-}
-{ /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */
-  typedef const int *iptr;
-  iptr p = 0;
-  ++p;
-}
-{ /* AIX XL C 1.02.0.0 rejects this saying
-     "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */
-  struct s { int j; const int *ap[3]; };
-  struct s *b; b->j = 5;
-}
-{ /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */
-  const int foo = 10;
-}
-
-; return 0; }
-EOF
-if { (eval echo configure:1142: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
-  rm -rf conftest*
-  ac_cv_c_const=yes
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  ac_cv_c_const=no
-fi
-rm -f conftest*
-fi
-
-echo "$ac_t""$ac_cv_c_const" 1>&6
-if test $ac_cv_c_const = no; then
-  cat >> confdefs.h <<\EOF
-#define const 
-EOF
-
-fi
-
-echo $ac_n "checking for inline""... $ac_c" 1>&6
-echo "configure:1163: checking for inline" >&5
-ijg_cv_inline=""
-cat > conftest.$ac_ext <<EOF
-#line 1166 "configure"
-#include "confdefs.h"
-
-int main() {
-} __inline__ int foo() { return 0; }
-int bar() { return foo();
-; return 0; }
-EOF
-if { (eval echo configure:1174: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
-  rm -rf conftest*
-  ijg_cv_inline="__inline__"
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  cat > conftest.$ac_ext <<EOF
-#line 1182 "configure"
-#include "confdefs.h"
-
-int main() {
-} __inline int foo() { return 0; }
-int bar() { return foo();
-; return 0; }
-EOF
-if { (eval echo configure:1190: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
-  rm -rf conftest*
-  ijg_cv_inline="__inline"
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  cat > conftest.$ac_ext <<EOF
-#line 1198 "configure"
-#include "confdefs.h"
-
-int main() {
-} inline int foo() { return 0; }
-int bar() { return foo();
-; return 0; }
-EOF
-if { (eval echo configure:1206: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
-  rm -rf conftest*
-  ijg_cv_inline="inline"
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-fi
-rm -f conftest*
-fi
-rm -f conftest*
-fi
-rm -f conftest*
-echo "$ac_t""$ijg_cv_inline" 1>&6
-cat >> confdefs.h <<EOF
-#define INLINE $ijg_cv_inline
-EOF
-
-echo $ac_n "checking for broken incomplete types""... $ac_c" 1>&6
-echo "configure:1224: checking for broken incomplete types" >&5
-cat > conftest.$ac_ext <<EOF
-#line 1226 "configure"
-#include "confdefs.h"
- typedef struct undefined_structure * undef_struct_ptr; 
-int main() {
-
-; return 0; }
-EOF
-if { (eval echo configure:1233: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
-  rm -rf conftest*
-  echo "$ac_t""ok" 1>&6
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  echo "$ac_t""broken" 1>&6
-cat >> confdefs.h <<\EOF
-#define INCOMPLETE_TYPES_BROKEN 
-EOF
-
-fi
-rm -f conftest*
-echo $ac_n "checking for short external names""... $ac_c" 1>&6
-echo "configure:1248: checking for short external names" >&5
-cat > conftest.$ac_ext <<EOF
-#line 1250 "configure"
-#include "confdefs.h"
-
-int possibly_duplicate_function () { return 0; }
-int possibly_dupli_function () { return 1; }
-
-int main() {
- 
-; return 0; }
-EOF
-if { (eval echo configure:1260: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
-  rm -rf conftest*
-  echo "$ac_t""ok" 1>&6
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  echo "$ac_t""short" 1>&6
-cat >> confdefs.h <<\EOF
-#define NEED_SHORT_EXTERNAL_NAMES 
-EOF
-
-fi
-rm -f conftest*
-echo $ac_n "checking to see if char is signed""... $ac_c" 1>&6
-echo "configure:1275: checking to see if char is signed" >&5
-if test "$cross_compiling" = yes; then
-  echo Assuming that char is signed on target machine.
-echo If it is unsigned, this will be a little bit inefficient.
-
-else
-  cat > conftest.$ac_ext <<EOF
-#line 1282 "configure"
-#include "confdefs.h"
-
-#ifdef HAVE_PROTOTYPES
-int is_char_signed (int arg)
-#else
-int is_char_signed (arg)
-     int arg;
-#endif
-{
-  if (arg == 189) {		/* expected result for unsigned char */
-    return 0;			/* type char is unsigned */
-  }
-  else if (arg != -67) {	/* expected result for signed char */
-    printf("Hmm, it seems 'char' is not eight bits wide on your machine.\n");
-    printf("I fear the JPEG software will not work at all.\n\n");
-  }
-  return 1;			/* assume char is signed otherwise */
-}
-char signed_char_check = (char) (-67);
-main() {
-  exit(is_char_signed((int) signed_char_check));
-}
-EOF
-if { (eval echo configure:1306: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null
-then
-  echo "$ac_t""no" 1>&6
-cat >> confdefs.h <<\EOF
-#define CHAR_IS_UNSIGNED 
-EOF
-
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -fr conftest*
-  echo "$ac_t""yes" 1>&6
-fi
-rm -fr conftest*
-fi
-
-echo $ac_n "checking to see if right shift is signed""... $ac_c" 1>&6
-echo "configure:1323: checking to see if right shift is signed" >&5
-if test "$cross_compiling" = yes; then
-  echo "$ac_t""Assuming that right shift is signed on target machine." 1>&6
-else
-  cat > conftest.$ac_ext <<EOF
-#line 1328 "configure"
-#include "confdefs.h"
-
-#ifdef HAVE_PROTOTYPES
-int is_shifting_signed (long arg)
-#else
-int is_shifting_signed (arg)
-     long arg;
-#endif
-/* See whether right-shift on a long is signed or not. */
-{
-  long res = arg >> 4;
-
-  if (res == -0x7F7E80CL) {	/* expected result for signed shift */
-    return 1;			/* right shift is signed */
-  }
-  /* see if unsigned-shift hack will fix it. */
-  /* we can't just test exact value since it depends on width of long... */
-  res |= (~0L) << (32-4);
-  if (res == -0x7F7E80CL) {	/* expected result now? */
-    return 0;			/* right shift is unsigned */
-  }
-  printf("Right shift isn't acting as I expect it to.\n");
-  printf("I fear the JPEG software will not work at all.\n\n");
-  return 0;			/* try it with unsigned anyway */
-}
-main() {
-  exit(is_shifting_signed(-0x7F7E80B1L));
-}
-EOF
-if { (eval echo configure:1358: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null
-then
-  echo "$ac_t""no" 1>&6
-cat >> confdefs.h <<\EOF
-#define RIGHT_SHIFT_IS_UNSIGNED 
-EOF
-
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -fr conftest*
-  echo "$ac_t""yes" 1>&6
-fi
-rm -fr conftest*
-fi
-
-echo $ac_n "checking to see if fopen accepts b spec""... $ac_c" 1>&6
-echo "configure:1375: checking to see if fopen accepts b spec" >&5
-if test "$cross_compiling" = yes; then
-  echo "$ac_t""Assuming that it does." 1>&6
-else
-  cat > conftest.$ac_ext <<EOF
-#line 1380 "configure"
-#include "confdefs.h"
-
-#include <stdio.h>
-main() {
-  if (fopen("conftestdata", "wb") != NULL)
-    exit(0);
-  exit(1);
-}
-EOF
-if { (eval echo configure:1390: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest && (./conftest; exit) 2>/dev/null
-then
-  echo "$ac_t""yes" 1>&6
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -fr conftest*
-  echo "$ac_t""no" 1>&6
-cat >> confdefs.h <<\EOF
-#define DONT_USE_B_MODE 
-EOF
-
-fi
-rm -fr conftest*
-fi
-
-ac_aux_dir=
-for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
-  if test -f $ac_dir/install-sh; then
-    ac_aux_dir=$ac_dir
-    ac_install_sh="$ac_aux_dir/install-sh -c"
-    break
-  elif test -f $ac_dir/install.sh; then
-    ac_aux_dir=$ac_dir
-    ac_install_sh="$ac_aux_dir/install.sh -c"
-    break
-  fi
-done
-if test -z "$ac_aux_dir"; then
-  { echo "configure: error: can not find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." 1>&2; exit 1; }
-fi
-ac_config_guess=$ac_aux_dir/config.guess
-ac_config_sub=$ac_aux_dir/config.sub
-ac_configure=$ac_aux_dir/configure # This should be Cygnus configure.
-
-# Find a good install program.  We prefer a C program (faster),
-# so one script is as good as another.  But avoid the broken or
-# incompatible versions:
-# SysV /etc/install, /usr/sbin/install
-# SunOS /usr/etc/install
-# IRIX /sbin/install
-# AIX /bin/install
-# AFS /usr/afsws/bin/install, which mishandles nonexistent args
-# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
-# ./install, which can be erroneously created by make from ./install.sh.
-echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6
-echo "configure:1436: checking for a BSD compatible install" >&5
-if test -z "$INSTALL"; then
-if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then
-  echo $ac_n "(cached) $ac_c" 1>&6
-else
-    IFS="${IFS= 	}"; ac_save_IFS="$IFS"; IFS="${IFS}:"
-  for ac_dir in $PATH; do
-    # Account for people who put trailing slashes in PATH elements.
-    case "$ac_dir/" in
-    /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;;
-    *)
-      # OSF1 and SCO ODT 3.0 have their own names for install.
-      for ac_prog in ginstall installbsd scoinst install; do
-        if test -f $ac_dir/$ac_prog; then
-	  if test $ac_prog = install &&
-            grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then
-	    # AIX install.  It has an incompatible calling convention.
-	    # OSF/1 installbsd also uses dspmsg, but is usable.
-	    :
-	  else
-	    ac_cv_path_install="$ac_dir/$ac_prog -c"
-	    break 2
-	  fi
-	fi
-      done
-      ;;
-    esac
-  done
-  IFS="$ac_save_IFS"
-
-fi
-  if test "${ac_cv_path_install+set}" = set; then
-    INSTALL="$ac_cv_path_install"
-  else
-    # As a last resort, use the slow shell script.  We don't cache a
-    # path for INSTALL within a source directory, because that will
-    # break other packages using the cache if that directory is
-    # removed, or if the path is relative.
-    INSTALL="$ac_install_sh"
-  fi
-fi
-echo "$ac_t""$INSTALL" 1>&6
-
-# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
-# It thinks the first close brace ends the variable substitution.
-test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
-
-test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
-
-# Extract the first word of "ranlib", so it can be a program name with args.
-set dummy ranlib; ac_word=$2
-echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
-echo "configure:1488: checking for $ac_word" >&5
-if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then
-  echo $ac_n "(cached) $ac_c" 1>&6
-else
-  if test -n "$RANLIB"; then
-  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
-else
-  IFS="${IFS= 	}"; ac_save_ifs="$IFS"; IFS="${IFS}:"
-  for ac_dir in $PATH; do
-    test -z "$ac_dir" && ac_dir=.
-    if test -f $ac_dir/$ac_word; then
-      ac_cv_prog_RANLIB="ranlib"
-      break
-    fi
-  done
-  IFS="$ac_save_ifs"
-  test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":"
-fi
-fi
-RANLIB="$ac_cv_prog_RANLIB"
-if test -n "$RANLIB"; then
-  echo "$ac_t""$RANLIB" 1>&6
-else
-  echo "$ac_t""no" 1>&6
-fi
-
-
-# Decide whether to use libtool,
-# and if so whether to build shared, static, or both flavors of library.
-LTSHARED="no"
-# Check whether --enable-shared or --disable-shared was given.
-if test "${enable_shared+set}" = set; then
-  enableval="$enable_shared"
-  LTSHARED="$enableval"
-fi
-
-LTSTATIC="no"
-# Check whether --enable-static or --disable-static was given.
-if test "${enable_static+set}" = set; then
-  enableval="$enable_static"
-  LTSTATIC="$enableval"
-fi
-
-if test "x$LTSHARED" != xno  -o  "x$LTSTATIC" != xno; then
-  USELIBTOOL="yes"
-  LIBTOOL="./libtool"
-  O="lo"
-  A="la"
-  LN='$(LIBTOOL) --mode=link $(CC)'
-  INSTALL_LIB='$(LIBTOOL) --mode=install ${INSTALL}'
-  INSTALL_PROGRAM="\$(LIBTOOL) --mode=install $INSTALL_PROGRAM"
-else
-  USELIBTOOL="no"
-  LIBTOOL=""
-  O="o"
-  A="a"
-  LN='$(CC)'
-  INSTALL_LIB="$INSTALL_DATA"
-fi
-
-
-
-
-
-
-# Configure libtool if needed.
-if test $USELIBTOOL = yes; then
-  disable_shared=
-  disable_static=
-  if test "x$LTSHARED" = xno; then
-    disable_shared="--disable-shared"
-  fi
-  if test "x$LTSTATIC" = xno; then
-    disable_static="--disable-static"
-  fi
-  $srcdir/ltconfig $disable_shared $disable_static $srcdir/ltmain.sh
-fi
-
-# Select memory manager depending on user input.
-# If no "-enable-maxmem", use jmemnobs
-MEMORYMGR='jmemnobs.$(O)'
-MAXMEM="no"
-# Check whether --enable-maxmem or --disable-maxmem was given.
-if test "${enable_maxmem+set}" = set; then
-  enableval="$enable_maxmem"
-  MAXMEM="$enableval"
-fi
-
-# support --with-maxmem for backwards compatibility with IJG V5.
-# Check whether --with-maxmem or --without-maxmem was given.
-if test "${with_maxmem+set}" = set; then
-  withval="$with_maxmem"
-  MAXMEM="$withval"
-fi
-
-if test "x$MAXMEM" = xyes; then
-  MAXMEM=1
-fi
-if test "x$MAXMEM" != xno; then
-  if test -n "`echo $MAXMEM | sed 's/[0-9]//g'`"; then
-    { echo "configure: error: non-numeric argument to --enable-maxmem" 1>&2; exit 1; }
-  fi
-  DEFAULTMAXMEM=`expr $MAXMEM \* 1048576`
-cat >> confdefs.h <<EOF
-#define DEFAULT_MAX_MEM ${DEFAULTMAXMEM}
-EOF
-
-echo $ac_n "checking for 'tmpfile()'""... $ac_c" 1>&6
-echo "configure:1596: checking for 'tmpfile()'" >&5
-cat > conftest.$ac_ext <<EOF
-#line 1598 "configure"
-#include "confdefs.h"
-#include <stdio.h>
-int main() {
- FILE * tfile = tmpfile(); 
-; return 0; }
-EOF
-if { (eval echo configure:1605: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
-  rm -rf conftest*
-  echo "$ac_t""yes" 1>&6
-MEMORYMGR='jmemansi.$(O)'
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  echo "$ac_t""no" 1>&6
-MEMORYMGR='jmemname.$(O)'
-cat >> confdefs.h <<\EOF
-#define NEED_SIGNAL_CATCHER 
-EOF
-
-echo $ac_n "checking for 'mktemp()'""... $ac_c" 1>&6
-echo "configure:1620: checking for 'mktemp()'" >&5
-cat > conftest.$ac_ext <<EOF
-#line 1622 "configure"
-#include "confdefs.h"
-
-int main() {
- char fname[80]; mktemp(fname); 
-; return 0; }
-EOF
-if { (eval echo configure:1629: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
-  rm -rf conftest*
-  echo "$ac_t""yes" 1>&6
-else
-  echo "configure: failed program was:" >&5
-  cat conftest.$ac_ext >&5
-  rm -rf conftest*
-  echo "$ac_t""no" 1>&6
-cat >> confdefs.h <<\EOF
-#define NO_MKTEMP 
-EOF
-
-fi
-rm -f conftest*
-fi
-rm -f conftest*
-fi
-
-
-# Extract the library version ID from jpeglib.h.
-echo $ac_n "checking libjpeg version number""... $ac_c" 1>&6
-echo "configure:1650: checking libjpeg version number" >&5
-JPEG_LIB_VERSION=`sed -e '/^#define JPEG_LIB_VERSION/!d' -e 's/^[^0-9]*\([0-9][0-9]*\).*$/\1/' $srcdir/jpeglib.h`
-echo "$ac_t""$JPEG_LIB_VERSION" 1>&6
-
-
-# Prepare to massage makefile.cfg correctly.
-if test $ijg_cv_have_prototypes = yes; then
-  A2K_DEPS=""
-  COM_A2K="# "
-else
-  A2K_DEPS="ansi2knr"
-  COM_A2K=""
-fi
-
-
-# ansi2knr needs -DBSD if string.h is missing
-if test $ac_cv_header_string_h = no; then
-  ANSI2KNRFLAGS="-DBSD"
-else
-  ANSI2KNRFLAGS=""
-fi
-
-# Substitutions to enable or disable libtool-related stuff
-if test $USELIBTOOL = yes -a $ijg_cv_have_prototypes = yes; then
-  COM_LT=""
-else
-  COM_LT="# "
-fi
-
-if test "x$LTSHARED" != xno; then
-  FORCE_INSTALL_LIB="install-lib"
-else
-  FORCE_INSTALL_LIB=""
-fi
-
-# Set up -I directives
-if test "x$srcdir" = x.; then
-  INCLUDEFLAGS='-I$(srcdir)'
-else
-  INCLUDEFLAGS='-I. -I$(srcdir)'
-fi
-
-trap '' 1 2 15
-
-trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15
-
-test "x$prefix" = xNONE && prefix=$ac_default_prefix
-# Let make expand exec_prefix.
-test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
-
-# Any assignment to VPATH causes Sun make to only execute
-# the first set of double-colon rules, so remove it if not needed.
-# If there is a colon in the path, we need to keep it.
-if test "x$srcdir" = x.; then
-  ac_vpsub='/^[ 	]*VPATH[ 	]*=[^:]*$/d'
-fi
-
-trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15
-
-DEFS=-DHAVE_CONFIG_H
-
-# Without the "./", some shells look in PATH for config.status.
-: ${CONFIG_STATUS=./config.status}
-
-echo creating $CONFIG_STATUS
-rm -f $CONFIG_STATUS
-cat > $CONFIG_STATUS <<EOF
-#! /bin/sh
-# Generated automatically by configure.
-# Run this file to recreate the current configuration.
-# This directory was configured as follows,
-# on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
-#
-# $0 $ac_configure_args
-#
-# Compiler output produced by configure, useful for debugging
-# configure, is in ./config.log if it exists.
-
-ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]"
-for ac_option
-do
-  case "\$ac_option" in
-  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
-    echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion"
-    exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;;
-  -version | --version | --versio | --versi | --vers | --ver | --ve | --v)
-    echo "$CONFIG_STATUS generated by autoconf version 2.12"
-    exit 0 ;;
-  -help | --help | --hel | --he | --h)
-    echo "\$ac_cs_usage"; exit 0 ;;
-  *) echo "\$ac_cs_usage"; exit 1 ;;
-  esac
-done
-
-ac_given_srcdir=$srcdir
-ac_given_INSTALL="$INSTALL"
-
-trap 'rm -fr `echo "Makefile:makefile.cfg jconfig.h:jconfig.cfg" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15
-EOF
-cat >> $CONFIG_STATUS <<EOF
-
-# Protect against being on the right side of a sed subst in config.status.
-sed 's/%@/@@/; s/@%/@@/; s/%g\$/@g/; /@g\$/s/[\\\\&%]/\\\\&/g;
- s/@@/%@/; s/@@/@%/; s/@g\$/%g/' > conftest.subs <<\\CEOF
-$ac_vpsub
-$extrasub
-s%@CFLAGS@%$CFLAGS%g
-s%@CPPFLAGS@%$CPPFLAGS%g
-s%@CXXFLAGS@%$CXXFLAGS%g
-s%@DEFS@%$DEFS%g
-s%@LDFLAGS@%$LDFLAGS%g
-s%@LIBS@%$LIBS%g
-s%@exec_prefix@%$exec_prefix%g
-s%@prefix@%$prefix%g
-s%@program_transform_name@%$program_transform_name%g
-s%@bindir@%$bindir%g
-s%@sbindir@%$sbindir%g
-s%@libexecdir@%$libexecdir%g
-s%@datadir@%$datadir%g
-s%@sysconfdir@%$sysconfdir%g
-s%@sharedstatedir@%$sharedstatedir%g
-s%@localstatedir@%$localstatedir%g
-s%@libdir@%$libdir%g
-s%@includedir@%$includedir%g
-s%@oldincludedir@%$oldincludedir%g
-s%@infodir@%$infodir%g
-s%@mandir@%$mandir%g
-s%@CC@%$CC%g
-s%@CPP@%$CPP%g
-s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g
-s%@INSTALL_DATA@%$INSTALL_DATA%g
-s%@RANLIB@%$RANLIB%g
-s%@LIBTOOL@%$LIBTOOL%g
-s%@O@%$O%g
-s%@A@%$A%g
-s%@LN@%$LN%g
-s%@INSTALL_LIB@%$INSTALL_LIB%g
-s%@MEMORYMGR@%$MEMORYMGR%g
-s%@JPEG_LIB_VERSION@%$JPEG_LIB_VERSION%g
-s%@A2K_DEPS@%$A2K_DEPS%g
-s%@COM_A2K@%$COM_A2K%g
-s%@ANSI2KNRFLAGS@%$ANSI2KNRFLAGS%g
-s%@COM_LT@%$COM_LT%g
-s%@FORCE_INSTALL_LIB@%$FORCE_INSTALL_LIB%g
-s%@INCLUDEFLAGS@%$INCLUDEFLAGS%g
-
-CEOF
-EOF
-
-cat >> $CONFIG_STATUS <<\EOF
-
-# Split the substitutions into bite-sized pieces for seds with
-# small command number limits, like on Digital OSF/1 and HP-UX.
-ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script.
-ac_file=1 # Number of current file.
-ac_beg=1 # First line for current file.
-ac_end=$ac_max_sed_cmds # Line after last line for current file.
-ac_more_lines=:
-ac_sed_cmds=""
-while $ac_more_lines; do
-  if test $ac_beg -gt 1; then
-    sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file
-  else
-    sed "${ac_end}q" conftest.subs > conftest.s$ac_file
-  fi
-  if test ! -s conftest.s$ac_file; then
-    ac_more_lines=false
-    rm -f conftest.s$ac_file
-  else
-    if test -z "$ac_sed_cmds"; then
-      ac_sed_cmds="sed -f conftest.s$ac_file"
-    else
-      ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file"
-    fi
-    ac_file=`expr $ac_file + 1`
-    ac_beg=$ac_end
-    ac_end=`expr $ac_end + $ac_max_sed_cmds`
-  fi
-done
-if test -z "$ac_sed_cmds"; then
-  ac_sed_cmds=cat
-fi
-EOF
-
-cat >> $CONFIG_STATUS <<EOF
-
-CONFIG_FILES=\${CONFIG_FILES-"Makefile:makefile.cfg"}
-EOF
-cat >> $CONFIG_STATUS <<\EOF
-for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then
-  # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
-  case "$ac_file" in
-  *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'`
-       ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;;
-  *) ac_file_in="${ac_file}.in" ;;
-  esac
-
-  # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories.
-
-  # Remove last slash and all that follows it.  Not all systems have dirname.
-  ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'`
-  if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then
-    # The file is in a subdirectory.
-    test ! -d "$ac_dir" && mkdir "$ac_dir"
-    ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`"
-    # A "../" for each directory in $ac_dir_suffix.
-    ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'`
-  else
-    ac_dir_suffix= ac_dots=
-  fi
-
-  case "$ac_given_srcdir" in
-  .)  srcdir=.
-      if test -z "$ac_dots"; then top_srcdir=.
-      else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;;
-  /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;;
-  *) # Relative path.
-    srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix"
-    top_srcdir="$ac_dots$ac_given_srcdir" ;;
-  esac
-
-  case "$ac_given_INSTALL" in
-  [/$]*) INSTALL="$ac_given_INSTALL" ;;
-  *) INSTALL="$ac_dots$ac_given_INSTALL" ;;
-  esac
-
-  echo creating "$ac_file"
-  rm -f "$ac_file"
-  configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure."
-  case "$ac_file" in
-  *Makefile*) ac_comsub="1i\\
-# $configure_input" ;;
-  *) ac_comsub= ;;
-  esac
-
-  ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"`
-  sed -e "$ac_comsub
-s%@configure_input@%$configure_input%g
-s%@srcdir@%$srcdir%g
-s%@top_srcdir@%$top_srcdir%g
-s%@INSTALL@%$INSTALL%g
-" $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file
-fi; done
-rm -f conftest.s*
-
-# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
-# NAME is the cpp macro being defined and VALUE is the value it is being given.
-#
-# ac_d sets the value in "#define NAME VALUE" lines.
-ac_dA='s%^\([ 	]*\)#\([ 	]*define[ 	][ 	]*\)'
-ac_dB='\([ 	][ 	]*\)[^ 	]*%\1#\2'
-ac_dC='\3'
-ac_dD='%g'
-# ac_u turns "#undef NAME" with trailing blanks into "#define NAME VALUE".
-ac_uA='s%^\([ 	]*\)#\([ 	]*\)undef\([ 	][ 	]*\)'
-ac_uB='\([ 	]\)%\1#\2define\3'
-ac_uC=' '
-ac_uD='\4%g'
-# ac_e turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
-ac_eA='s%^\([ 	]*\)#\([ 	]*\)undef\([ 	][ 	]*\)'
-ac_eB='$%\1#\2define\3'
-ac_eC=' '
-ac_eD='%g'
-
-if test "${CONFIG_HEADERS+set}" != set; then
-EOF
-cat >> $CONFIG_STATUS <<EOF
-  CONFIG_HEADERS="jconfig.h:jconfig.cfg"
-EOF
-cat >> $CONFIG_STATUS <<\EOF
-fi
-for ac_file in .. $CONFIG_HEADERS; do if test "x$ac_file" != x..; then
-  # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
-  case "$ac_file" in
-  *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'`
-       ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;;
-  *) ac_file_in="${ac_file}.in" ;;
-  esac
-
-  echo creating $ac_file
-
-  rm -f conftest.frag conftest.in conftest.out
-  ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"`
-  cat $ac_file_inputs > conftest.in
-
-EOF
-
-# Transform confdefs.h into a sed script conftest.vals that substitutes
-# the proper values into config.h.in to produce config.h.  And first:
-# Protect against being on the right side of a sed subst in config.status.
-# Protect against being in an unquoted here document in config.status.
-rm -f conftest.vals
-cat > conftest.hdr <<\EOF
-s/[\\&%]/\\&/g
-s%[\\$`]%\\&%g
-s%#define \([A-Za-z_][A-Za-z0-9_]*\) *\(.*\)%${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD}%gp
-s%ac_d%ac_u%gp
-s%ac_u%ac_e%gp
-EOF
-sed -n -f conftest.hdr confdefs.h > conftest.vals
-rm -f conftest.hdr
-
-# This sed command replaces #undef with comments.  This is necessary, for
-# example, in the case of _POSIX_SOURCE, which is predefined and required
-# on some systems where configure will not decide to define it.
-cat >> conftest.vals <<\EOF
-EOF
-
-# Break up conftest.vals because some shells have a limit on
-# the size of here documents, and old seds have small limits too.
-
-rm -f conftest.tail
-while :
-do
-  ac_lines=`grep -c . conftest.vals`
-  # grep -c gives empty output for an empty file on some AIX systems.
-  if test -z "$ac_lines" || test "$ac_lines" -eq 0; then break; fi
-  # Write a limited-size here document to conftest.frag.
-  echo '  cat > conftest.frag <<CEOF' >> $CONFIG_STATUS
-  sed ${ac_max_here_lines}q conftest.vals >> $CONFIG_STATUS
-  echo 'CEOF
-  sed -f conftest.frag conftest.in > conftest.out
-  rm -f conftest.in
-  mv conftest.out conftest.in
-' >> $CONFIG_STATUS
-  sed 1,${ac_max_here_lines}d conftest.vals > conftest.tail
-  rm -f conftest.vals
-  mv conftest.tail conftest.vals
-done
-rm -f conftest.vals
-
-cat >> $CONFIG_STATUS <<\EOF
-  rm -f conftest.frag conftest.h
-  echo "/* $ac_file.  Generated automatically by configure.  */" > conftest.h
-  cat conftest.in >> conftest.h
-  rm -f conftest.in
-  if cmp -s $ac_file conftest.h 2>/dev/null; then
-    echo "$ac_file is unchanged"
-    rm -f conftest.h
-  else
-    # Remove last slash and all that follows it.  Not all systems have dirname.
-      ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'`
-      if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then
-      # The file is in a subdirectory.
-      test ! -d "$ac_dir" && mkdir "$ac_dir"
-    fi
-    rm -f $ac_file
-    mv conftest.h $ac_file
-  fi
-fi; done
-
-EOF
-cat >> $CONFIG_STATUS <<EOF
-
-EOF
-cat >> $CONFIG_STATUS <<\EOF
-
-exit 0
-EOF
-chmod +x $CONFIG_STATUS
-rm -fr confdefs* $ac_clean_files
-test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1
-
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..64f3265
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,559 @@
+#                                               -*- Autoconf -*-
+# Process this file with autoconf to produce a configure script.
+
+AC_PREREQ([2.56])
+AC_INIT([libjpeg-turbo], [1.3.80])
+BUILD=`date +%Y%m%d`
+
+AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2])
+AC_PREFIX_DEFAULT(/opt/libjpeg-turbo)
+
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
+
+# Checks for programs.
+SAVED_CFLAGS=${CFLAGS}
+SAVED_CPPFLAGS=${CPPFLAGS}
+AC_PROG_CPP
+AC_PROG_CC
+m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
+AM_PROG_AS
+AM_PROG_CC_C_O
+AC_PROG_INSTALL
+AC_PROG_LIBTOOL
+AC_PROG_LN_S
+
+# When the prefix is /opt/libjpeg-turbo, we assume that an "official" binary is
+# being created, and thus we install things into specific locations.
+
+old_prefix=${prefix}
+if test "x$prefix" = "xNONE" -a "x$ac_default_prefix" != "x"; then
+  prefix=$ac_default_prefix
+fi
+DATADIR=`eval echo ${datadir}`
+DATADIR=`eval echo $DATADIR`
+if test "$DATADIR" = "/opt/libjpeg-turbo/share"; then
+  datadir='${prefix}'
+fi
+DATADIR=`eval echo ${datarootdir}`
+DATADIR=`eval echo $DATADIR`
+if test "$DATADIR" = "/opt/libjpeg-turbo/share"; then
+  datarootdir='${prefix}'
+fi
+
+old_exec_prefix=${exec_prefix}
+if test "x$exec_prefix" = "xNONE"; then
+  exec_prefix=${prefix}
+fi
+
+if test "x${libdir}" = 'x${exec_prefix}/lib' -o "x${libdir}" = 'x${prefix}/lib'; then
+  LIBDIR=`eval echo ${libdir}`
+  LIBDIR=`eval echo $LIBDIR`
+  if test "$LIBDIR" = "/opt/libjpeg-turbo/lib"; then
+    case $host_os in
+      darwin*)
+        ;;
+      *)
+        AC_CHECK_SIZEOF(long)
+        if test "${ac_cv_sizeof_long}" = "8"; then
+          libdir='${exec_prefix}/lib64'
+        elif test "${ac_cv_sizeof_long}" = "4"; then
+          libdir='${exec_prefix}/lib32'
+        fi
+        ;;
+    esac
+  fi
+fi
+exec_prefix=${old_exec_prefix}
+prefix=${old_prefix}
+
+# Check whether compiler supports pointers to undefined structures
+AC_MSG_CHECKING(whether compiler supports pointers to undefined structures)
+AC_TRY_COMPILE([ typedef struct undefined_structure * undef_struct_ptr; ], ,
+  AC_MSG_RESULT(yes),
+  [AC_MSG_RESULT(no)
+   AC_DEFINE([INCOMPLETE_TYPES_BROKEN], [1],
+     [Compiler does not support pointers to undefined structures.])])
+
+if test "x${GCC}" = "xyes"; then
+  if test "x${SAVED_CFLAGS}" = "x"; then
+    CFLAGS=-O3
+  fi
+  if test "x${SAVED_CPPFLAGS}" = "x"; then
+    CPPFLAGS=-Wall
+  fi
+fi
+
+AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"])
+if test "x${SUNCC}" = "xyes"; then
+  if test "x${SAVED_CFLAGS}" = "x"; then
+    CFLAGS=-xO5
+  fi
+fi
+
+# Checks for libraries.
+
+# Checks for header files.
+AC_HEADER_STDC
+AC_CHECK_HEADERS([stddef.h stdlib.h locale.h string.h])
+AC_CHECK_HEADER([sys/types.h],
+  AC_DEFINE([NEED_SYS_TYPES_H], 1, [Define if you need to include <sys/types.h> to get size_t.]))
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+AC_C_CHAR_UNSIGNED
+AC_C_INLINE
+AC_TYPE_SIZE_T
+AC_CHECK_TYPES([unsigned char, unsigned short])
+
+AC_MSG_CHECKING([if right shift is signed])
+AC_TRY_RUN(
+  [#include <stdio.h>
+   int is_shifting_signed (long arg) {
+     long res = arg >> 4;
+
+     if (res == -0x7F7E80CL)
+       return 1; /* right shift is signed */
+
+     /* see if unsigned-shift hack will fix it. */
+     /* we can't just test exact value since it depends on width of long... */
+     res |= (~0L) << (32-4);
+     if (res == -0x7F7E80CL)
+       return 0; /* right shift is unsigned */
+
+     printf("Right shift isn't acting as I expect it to.\n");
+     printf("I fear the JPEG software will not work at all.\n\n");
+     return 0; /* try it with unsigned anyway */
+   }
+   int main (void) {
+     exit(is_shifting_signed(-0x7F7E80B1L));
+   }],
+  [AC_MSG_RESULT(no)
+   AC_DEFINE([RIGHT_SHIFT_IS_UNSIGNED], 1,
+     [Define if your (broken) compiler shifts signed values as if they were unsigned.])],
+  [AC_MSG_RESULT(yes)],
+  [AC_MSG_RESULT(Assuming that right shift is signed on target machine.)])
+
+# Checks for library functions.
+AC_CHECK_FUNCS([memset memcpy], [],
+  [AC_DEFINE([NEED_BSD_STRINGS], 1,
+     [Define if you have BSD-like bzero and bcopy in <strings.h> rather than memset/memcpy in <string.h>.])])
+
+AC_MSG_CHECKING([libjpeg API version])
+AC_ARG_VAR(JPEG_LIB_VERSION, [libjpeg API version (62, 70, or 80)])
+if test "x$JPEG_LIB_VERSION" = "x"; then
+  AC_ARG_WITH([jpeg7],
+    AC_HELP_STRING([--with-jpeg7],
+      [Emulate libjpeg v7 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b.)]))
+  AC_ARG_WITH([jpeg8],
+    AC_HELP_STRING([--with-jpeg8],
+      [Emulate libjpeg v8 API/ABI (this makes libjpeg-turbo backward incompatible with libjpeg v6b.)]))
+  if test "x${with_jpeg8}" = "xyes"; then
+    JPEG_LIB_VERSION=80
+  else
+    if test "x${with_jpeg7}" = "xyes"; then
+      JPEG_LIB_VERSION=70
+    else
+      JPEG_LIB_VERSION=62
+    fi
+  fi
+fi
+JPEG_LIB_VERSION_DECIMAL=`expr $JPEG_LIB_VERSION / 10`.`expr $JPEG_LIB_VERSION % 10`
+AC_SUBST(JPEG_LIB_VERSION_DECIMAL)
+AC_MSG_RESULT([$JPEG_LIB_VERSION_DECIMAL])
+AC_DEFINE_UNQUOTED(JPEG_LIB_VERSION, [$JPEG_LIB_VERSION],
+  [libjpeg API version])
+
+AC_ARG_VAR(SO_MAJOR_VERSION,
+  [Major version of the libjpeg-turbo shared library (default is determined by the API version)])
+AC_ARG_VAR(SO_MINOR_VERSION,
+  [Minor version of the libjpeg-turbo shared library (default is determined by the API version)])
+if test "x$SO_MAJOR_VERSION" = "x"; then
+  case "$JPEG_LIB_VERSION" in
+    62)  SO_MAJOR_VERSION=$JPEG_LIB_VERSION ;;
+    *)   SO_MAJOR_VERSION=`expr $JPEG_LIB_VERSION / 10` ;;
+  esac
+fi
+if test "x$SO_MINOR_VERSION" = "x"; then
+  case "$JPEG_LIB_VERSION" in
+    80)  SO_MINOR_VERSION=2 ;;
+    *)   SO_MINOR_VERSION=0 ;;
+  esac
+fi
+
+RPM_CONFIG_ARGS=
+
+# Memory source/destination managers
+SO_AGE=0
+MEM_SRCDST_FUNCTIONS=
+if test "x${with_jpeg8}" != "xyes"; then
+  AC_MSG_CHECKING([whether to include in-memory source/destination managers])
+  AC_ARG_WITH([mem-srcdst],
+    AC_HELP_STRING([--without-mem-srcdst],
+      [Do not include in-memory source/destination manager functions when emulating the libjpeg v6b or v7 API/ABI]))
+  if test "x$with_mem_srcdst" != "xno"; then
+    AC_MSG_RESULT(yes)
+    AC_DEFINE([MEM_SRCDST_SUPPORTED], [1],
+      [Support in-memory source/destination managers])
+    SO_AGE=1
+    MEM_SRCDST_FUNCTIONS="global:  jpeg_mem_dest;  jpeg_mem_src;";
+  else
+    AC_MSG_RESULT(no)
+    RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-mem-srcdst"
+  fi
+fi
+
+AC_MSG_CHECKING([libjpeg shared library version])
+AC_MSG_RESULT([$SO_MAJOR_VERSION.$SO_AGE.$SO_MINOR_VERSION])
+LIBTOOL_CURRENT=`expr $SO_MAJOR_VERSION + $SO_AGE`
+AC_SUBST(LIBTOOL_CURRENT)
+AC_SUBST(SO_MAJOR_VERSION)
+AC_SUBST(SO_MINOR_VERSION)
+AC_SUBST(SO_AGE)
+AC_SUBST(MEM_SRCDST_FUNCTIONS)
+
+AC_DEFINE_UNQUOTED(LIBJPEG_TURBO_VERSION, [$VERSION], [libjpeg-turbo version])
+
+VERSION_SCRIPT=yes
+AC_ARG_ENABLE([ld-version-script],
+  AS_HELP_STRING([--disable-ld-version-script],
+    [Disable linker version script for libjpeg-turbo (default is to use linker version script if the linker supports it)]),
+  [VERSION_SCRIPT=$enableval], [])
+
+AC_MSG_CHECKING([whether the linker supports version scripts])
+SAVED_LDFLAGS="$LDFLAGS"
+LDFLAGS="$LDFLAGS -Wl,--version-script,conftest.map"
+cat > conftest.map <<EOF
+VERS_1 {
+  global: *;
+};
+EOF
+AC_LINK_IFELSE([AC_LANG_PROGRAM([], [])],
+  [VERSION_SCRIPT_FLAG=-Wl,--version-script,;
+   AC_MSG_RESULT([yes (GNU style)])],
+  [])
+if test "x$VERSION_SCRIPT_FLAG" = "x"; then
+  LDFLAGS="$SAVED_LDFLAGS -Wl,-M,conftest.map"
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([], [])],
+    [VERSION_SCRIPT_FLAG=-Wl,-M,;
+     AC_MSG_RESULT([yes (Sun style)])],
+    [])
+fi
+if test "x$VERSION_SCRIPT_FLAG" = "x"; then
+  VERSION_SCRIPT=no
+  AC_MSG_RESULT(no)
+fi
+LDFLAGS="$SAVED_LDFLAGS"
+
+AC_MSG_CHECKING([whether to use version script when building libjpeg-turbo])
+AC_MSG_RESULT($VERSION_SCRIPT)
+
+AM_CONDITIONAL(VERSION_SCRIPT, test "x$VERSION_SCRIPT" = "xyes")
+AC_SUBST(VERSION_SCRIPT_FLAG)
+
+# Check for non-broken inline under various spellings
+AC_MSG_CHECKING(for inline)
+ljt_cv_inline=""
+AC_TRY_COMPILE(, [} inline __attribute__((always_inline)) int foo() { return 0; }
+int bar() { return foo();], ljt_cv_inline="inline __attribute__((always_inline))",
+AC_TRY_COMPILE(, [} __inline__ int foo() { return 0; }
+int bar() { return foo();], ljt_cv_inline="__inline__",
+AC_TRY_COMPILE(, [} __inline int foo() { return 0; }
+int bar() { return foo();], ljt_cv_inline="__inline",
+AC_TRY_COMPILE(, [} inline int foo() { return 0; }
+int bar() { return foo();], ljt_cv_inline="inline"))))
+AC_MSG_RESULT($ljt_cv_inline)
+AC_DEFINE_UNQUOTED([INLINE],[$ljt_cv_inline],[How to obtain function inlining.])
+
+# Arithmetic coding support
+AC_MSG_CHECKING([whether to include arithmetic encoding support])
+AC_ARG_WITH([arith-enc],
+  AC_HELP_STRING([--without-arith-enc],
+    [Do not include arithmetic encoding support]))
+if test "x$with_12bit" = "xyes"; then
+  with_arith_enc=no
+fi
+if test "x$with_arith_enc" = "xno"; then
+  AC_MSG_RESULT(no)
+  RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-arith-enc"
+else
+  AC_DEFINE([C_ARITH_CODING_SUPPORTED], [1], [Support arithmetic encoding])
+  AC_MSG_RESULT(yes)
+fi
+AM_CONDITIONAL([WITH_ARITH_ENC], [test "x$with_arith_enc" != "xno"])
+
+AC_MSG_CHECKING([whether to include arithmetic decoding support])
+AC_ARG_WITH([arith-dec],
+  AC_HELP_STRING([--without-arith-dec],
+    [Do not include arithmetic decoding support]))
+if test "x$with_12bit" = "xyes"; then
+  with_arith_dec=no
+fi
+if test "x$with_arith_dec" = "xno"; then
+  AC_MSG_RESULT(no)
+  RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-arith-dec"
+else
+  AC_DEFINE([D_ARITH_CODING_SUPPORTED], [1], [Support arithmetic decoding])
+  AC_MSG_RESULT(yes)
+fi
+AM_CONDITIONAL([WITH_ARITH_DEC], [test "x$with_arith_dec" != "xno"])
+
+AM_CONDITIONAL([WITH_ARITH],
+  [test "x$with_arith_dec" != "xno" -o "x$with_arith_enc" != "xno"])
+
+# 12-bit component support
+AC_MSG_CHECKING([whether to use 12-bit samples])
+AC_ARG_WITH([12bit],
+  AC_HELP_STRING([--with-12bit], [Encode/decode JPEG images with 12-bit samples (implies --without-simd --without-turbojpeg --without-arith-dec --without-arith-enc)]))
+if test "x$with_12bit" = "xyes"; then
+  AC_DEFINE([BITS_IN_JSAMPLE], [12], [use 8 or 12])
+  AC_MSG_RESULT(yes)
+else
+  AC_MSG_RESULT(no)
+fi
+AM_CONDITIONAL([WITH_12BIT], [test "x$with_12bit" = "xyes"])
+
+# TurboJPEG support
+AC_MSG_CHECKING([whether to build TurboJPEG C wrapper])
+AC_ARG_WITH([turbojpeg],
+  AC_HELP_STRING([--without-turbojpeg],
+    [Do not include the TurboJPEG wrapper library and associated test programs]))
+if test "x$with_12bit" = "xyes"; then
+  with_turbojpeg=no
+fi
+if test "x$with_turbojpeg" = "xno"; then
+  AC_MSG_RESULT(no)
+  RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-turbojpeg"
+else
+  AC_MSG_RESULT(yes)
+fi
+
+# Java support
+AC_ARG_VAR(JAVAC, [Java compiler command (default: javac)])
+if test "x$JAVAC" = "x"; then
+  JAVAC=javac
+fi
+AC_SUBST(JAVAC)
+AC_ARG_VAR(JAVACFLAGS, [Java compiler flags])
+AC_SUBST(JAVACFLAGS)
+AC_ARG_VAR(JAR, [Java archive command (default: jar)])
+if test "x$JAR" = "x"; then
+  JAR=jar
+fi
+AC_SUBST(JAR)
+AC_ARG_VAR(JAVA, [Java runtime command (default: java)])
+if test "x$JAVA" = "x"; then
+  JAVA=java
+fi
+AC_SUBST(JAVA)
+AC_ARG_VAR(JNI_CFLAGS,
+  [C compiler flags needed to include jni.h (default: -I/System/Library/Frameworks/JavaVM.framework/Headers on OS X, '-I/usr/java/include -I/usr/java/include/solaris' on Solaris, and '-I/usr/java/default/include -I/usr/java/default/include/linux' on Linux)])
+
+AC_MSG_CHECKING([whether to build TurboJPEG Java wrapper])
+AC_ARG_WITH([java],
+  AC_HELP_STRING([--with-java], [Build Java wrapper for the TurboJPEG library]))
+if test "x$with_12bit" = "xyes" -o "x$with_turbojpeg" = "xno"; then
+  with_java=no
+fi
+
+WITH_JAVA=0
+if test "x$with_java" = "xyes"; then
+  AC_MSG_RESULT(yes)
+
+  case $host_os in
+    darwin*)
+      DEFAULT_JNI_CFLAGS=-I/System/Library/Frameworks/JavaVM.framework/Headers
+      ;;
+    solaris*)
+      DEFAULT_JNI_CFLAGS='-I/usr/java/include -I/usr/java/include/solaris'
+      ;;
+    linux*)
+      DEFAULT_JNI_CFLAGS='-I/usr/java/default/include -I/usr/java/default/include/linux'
+      ;;
+  esac
+  if test "x$JNI_CFLAGS" = "x"; then
+    JNI_CFLAGS=$DEFAULT_JNI_CFLAGS
+  fi
+
+  SAVE_CPPFLAGS=${CPPFLAGS}
+  CPPFLAGS="${CPPFLAGS} ${JNI_CFLAGS}"
+  AC_CHECK_HEADERS([jni.h], [DUMMY=1],
+    [AC_MSG_ERROR([Could not find JNI header file])])
+  CPPFLAGS=${SAVE_CPPFLAGS}
+  AC_SUBST(JNI_CFLAGS)
+
+  RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --with-java"
+  JAVA_RPM_CONTENTS_1='%dir %{_datadir}/classes'
+  JAVA_RPM_CONTENTS_2=%{_datadir}/classes/turbojpeg.jar
+  WITH_JAVA=1
+else
+  AC_MSG_RESULT(no)
+fi
+AM_CONDITIONAL([WITH_JAVA], [test "x$with_java" = "xyes"])
+AC_SUBST(WITH_JAVA)
+AC_SUBST(JAVA_RPM_CONTENTS_1)
+AC_SUBST(JAVA_RPM_CONTENTS_2)
+
+# optionally force using gas-preprocessor.pl for compatibility testing
+AC_ARG_WITH([gas-preprocessor],
+  AC_HELP_STRING([--with-gas-preprocessor],
+    [Force using gas-preprocessor.pl on ARM.]))
+if test "x${with_gas_preprocessor}" = "xyes"; then
+  case $host_os in
+    darwin*)
+      CCAS="gas-preprocessor.pl -fix-unreq $CC"
+      ;;
+    *)
+      CCAS="gas-preprocessor.pl -no-fix-unreq $CC"
+      ;;
+  esac
+  AC_SUBST([CCAS])
+fi
+
+# SIMD is optional
+AC_ARG_WITH([simd],
+  AC_HELP_STRING([--without-simd], [Do not include SIMD extensions]))
+if test "x$with_12bit" = "xyes"; then
+  with_simd=no
+fi
+if test "x${with_simd}" != "xno"; then
+  require_simd=no
+  if test "x${with_simd}" = "xyes"; then
+    require_simd=yes
+  fi
+  # Check if we're on a supported CPU
+  AC_MSG_CHECKING([if we have SIMD optimisations for cpu type])
+  case "$host_cpu" in
+    x86_64 | amd64)
+      AC_MSG_RESULT([yes (x86_64)])
+      AC_PROG_NASM
+      simd_arch=x86_64
+      ;;
+    i*86 | x86 | ia32)
+      AC_MSG_RESULT([yes (i386)])
+      AC_PROG_NASM
+      simd_arch=i386
+      ;;
+    arm*)
+      AC_MSG_RESULT([yes (arm)])
+      AC_MSG_CHECKING([if the assembler is GNU-compatible and can be used])
+      AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE(
+        [AC_MSG_RESULT([yes])
+         simd_arch=arm],
+        [AC_MSG_RESULT([no])
+         with_simd=no])
+      if test "x${with_simd}" = "xno"; then
+        if test "x${require_simd}" = "xyes"; then
+          AC_MSG_ERROR([SIMD support can't be enabled.])
+        else
+          AC_MSG_WARN([SIMD support can't be enabled.  Performance will suffer.])
+        fi
+      fi
+      ;;
+    aarch64*)
+      AC_MSG_RESULT([yes (arm64)])
+      AC_MSG_CHECKING([if the assembler is GNU-compatible and can be used])
+      AC_CHECK_COMPATIBLE_ARM64_ASSEMBLER_IFELSE(
+        [AC_MSG_RESULT([yes])
+         simd_arch=aarch64],
+        [AC_MSG_RESULT([no])
+         with_simd=no])
+      if test "x${with_simd}" = "xno"; then
+        if test "x${require_simd}" = "xyes"; then
+          AC_MSG_ERROR([SIMD support can't be enabled.])
+        else
+          AC_MSG_WARN([SIMD support can't be enabled.  Performance will suffer.])
+        fi
+      fi
+      ;;
+    mips*)
+      AC_MSG_RESULT([yes (mips)])
+      AC_MSG_CHECKING([if the assembler is GNU-compatible and can be used])
+      AC_CHECK_COMPATIBLE_MIPS_ASSEMBLER_IFELSE(
+        [AC_MSG_RESULT([yes])
+         simd_arch=mips],
+        [AC_MSG_RESULT([no])
+         with_simd=no])
+      if test "x${with_simd}" = "xno"; then
+        if test "x${require_simd}" = "xyes"; then
+          AC_MSG_ERROR([SIMD support can't be enabled.])
+        else
+          AC_MSG_WARN([SIMD support can't be enabled.  Performance will suffer.])
+        fi
+      fi
+      ;;
+    *)
+      AC_MSG_RESULT([no ("$host_cpu")])
+      with_simd=no;
+      if test "x${require_simd}" = "xyes"; then
+        AC_MSG_ERROR([SIMD support not available for this CPU.])
+      else
+        AC_MSG_WARN([SIMD support not available for this CPU.  Performance will suffer.])
+      fi
+      ;;
+  esac
+
+  if test "x${with_simd}" != "xno"; then
+    AC_DEFINE([WITH_SIMD], [1], [Use accelerated SIMD routines.])
+  fi
+else
+  RPM_CONFIG_ARGS="$RPM_CONFIG_ARGS --without-simd"
+fi
+
+AM_CONDITIONAL([WITH_SIMD], [test "x$with_simd" != "xno"])
+AM_CONDITIONAL([WITH_SSE_FLOAT_DCT], [test "x$simd_arch" = "xx86_64" -o "x$simd_arch" = "xi386"])
+AM_CONDITIONAL([SIMD_I386], [test "x$simd_arch" = "xi386"])
+AM_CONDITIONAL([SIMD_X86_64], [test "x$simd_arch" = "xx86_64"])
+AM_CONDITIONAL([SIMD_ARM], [test "x$simd_arch" = "xarm"])
+AM_CONDITIONAL([SIMD_ARM_64], [test "x$simd_arch" = "xaarch64"])
+AM_CONDITIONAL([SIMD_MIPS], [test "x$simd_arch" = "xmips"])
+AM_CONDITIONAL([X86_64], [test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xamd64"])
+AM_CONDITIONAL([WITH_TURBOJPEG], [test "x$with_turbojpeg" != "xno"])
+
+AC_ARG_VAR(PKGNAME, [distribution package name (default: libjpeg-turbo)])
+if test "x$PKGNAME" = "x"; then
+  PKGNAME=$PACKAGE_NAME
+fi
+AC_SUBST(PKGNAME)
+
+case "$host_cpu" in
+  x86_64)
+    RPMARCH=x86_64
+    DEBARCH=amd64
+    ;;
+  i*86 | x86 | ia32)
+    RPMARCH=i386
+    DEBARCH=i386
+    ;;
+esac
+
+AC_SUBST(RPMARCH)
+AC_SUBST(RPM_CONFIG_ARGS)
+AC_SUBST(DEBARCH)
+AC_SUBST(BUILD)
+AC_DEFINE_UNQUOTED([BUILD], "$BUILD", [libjpeg-turbo build number])
+
+# NOTE: autoheader automatically modifies the input file of the first
+# invocation of AC_CONFIG_HEADERS, so we put config.h first to prevent
+# jconfig.h.in from being clobbered.  config.h is used only internally, whereas
+# jconfig.h contains macros that are relevant to external programs (macros that
+# specify which features were built into the library.)
+AC_CONFIG_HEADERS([config.h])
+AC_CONFIG_HEADERS([jconfig.h])
+AC_CONFIG_HEADERS([jconfigint.h])
+AC_CONFIG_FILES([pkgscripts/libjpeg-turbo.spec.tmpl:release/libjpeg-turbo.spec.in])
+AC_CONFIG_FILES([pkgscripts/makecygwinpkg.tmpl:release/makecygwinpkg.in])
+AC_CONFIG_FILES([pkgscripts/makedpkg.tmpl:release/makedpkg.in])
+AC_CONFIG_FILES([pkgscripts/makemacpkg.tmpl:release/makemacpkg.in])
+AC_CONFIG_FILES([pkgscripts/uninstall.tmpl:release/uninstall.in])
+if test "x$with_turbojpeg" != "xno"; then
+  AC_CONFIG_FILES([tjbenchtest])
+fi
+if test "x$with_java" = "xyes"; then
+  AC_CONFIG_FILES([tjbenchtest.java])
+  AC_CONFIG_FILES([tjexampletest])
+fi
+AC_CONFIG_FILES([libjpeg.map])
+AC_CONFIG_FILES([Makefile simd/Makefile])
+AC_CONFIG_FILES([java/Makefile])
+AC_CONFIG_FILES([md5/Makefile])
+AC_OUTPUT
diff --git a/djpeg.1 b/djpeg.1
index 11beb6a..9b040ae 100644
--- a/djpeg.1
+++ b/djpeg.1
@@ -1,4 +1,4 @@
-.TH DJPEG 1 "22 August 1997"
+.TH DJPEG 1 "11 May 2014"
 .SH NAME
 djpeg \- decompress a JPEG file to an image file
 .SH SYNOPSIS
@@ -55,15 +55,19 @@
 equivalent to \fB\-dct fast \-nosmooth \-onepass \-dither ordered\fR.
 .TP
 .B \-grayscale
-Force gray-scale output even if JPEG file is color.  Useful for viewing on
+Force grayscale output even if JPEG file is color.  Useful for viewing on
 monochrome displays; also,
 .B djpeg
 runs noticeably faster in this mode.
 .TP
+.B \-rgb
+Force RGB output even if JPEG file is grayscale.
+.TP
 .BI \-scale " M/N"
 Scale the output image by a factor M/N.  Currently the scale factor must be
-1/1, 1/2, 1/4, or 1/8.  Scaling is handy if the image is larger than your
-screen; also,
+M/8, where M is an integer between 1 and 16 inclusive, or any reduced fraction
+thereof (such as 1/2, 3/4, etc.)  Scaling is handy if the image is larger than
+your screen; also,
 .B djpeg
 runs much faster when scaling down the output.
 .TP
@@ -73,7 +77,7 @@
 .B \-colors
 or
 .B \-grayscale
-is specified, or if the JPEG file is gray-scale; otherwise, 24-bit full-color
+is specified, or if the JPEG file is grayscale; otherwise, 24-bit full-color
 format is emitted.
 .TP
 .B \-gif
@@ -87,12 +91,12 @@
 .B \-colors
 or
 .B \-grayscale
-is specified, or if the JPEG file is gray-scale; otherwise, 24-bit full-color
+is specified, or if the JPEG file is grayscale; otherwise, 24-bit full-color
 format is emitted.
 .TP
 .B \-pnm
 Select PBMPLUS (PPM/PGM) output format (this is the default format).
-PGM is emitted if the JPEG file is gray-scale or if
+PGM is emitted if the JPEG file is grayscale or if
 .B \-grayscale
 is specified; otherwise PPM is emitted.
 .TP
@@ -100,8 +104,8 @@
 Select RLE output format.  (Requires URT library.)
 .TP
 .B \-targa
-Select Targa output format.  Gray-scale format is emitted if the JPEG file is
-gray-scale or if
+Select Targa output format.  Grayscale format is emitted if the JPEG file is
+grayscale or if
 .B \-grayscale
 is specified; otherwise, colormapped format is emitted if
 .B \-colors
@@ -114,14 +118,28 @@
 .TP
 .B \-dct fast
 Use fast integer DCT (less accurate).
+In libjpeg-turbo, the fast method is generally about 5-15% faster than the int
+method when using the x86/x86-64 SIMD extensions (results may vary with other
+SIMD implementations, or when using libjpeg-turbo without SIMD extensions.)  If
+the JPEG image was compressed using a quality level of 85 or below, then there
+should be little or no perceptible difference between the two algorithms.  When
+decompressing images that were compressed using quality levels above 85,
+however, the difference between the fast and int methods becomes more
+pronounced.  With images compressed using quality=97, for instance, the fast
+method incurs generally about a 4-6 dB loss (in PSNR) relative to the int
+method, but this can be larger for some images.  If you can avoid it, do not
+use the fast method when decompressing images that were compressed using
+quality levels above 97.  The algorithm often degenerates for such images and
+can actually produce a more lossy output image than if the JPEG image had been
+compressed using lower quality levels.
 .TP
 .B \-dct float
 Use floating-point DCT method.
-The float method is very slightly more accurate than the int method, but is
-much slower unless your machine has very fast floating-point hardware.  Also
-note that results of the floating-point method may vary slightly across
-machines, while the integer methods should give the same results everywhere.
-The fast integer method is much less accurate than the other two.
+The float method is mainly a legacy feature.  It does not produce significantly
+more accurate results than the int method, and it is much slower.  The float
+method may also give different results on different machines due to varying
+roundoff behavior, whereas the integer methods should give the same results on
+all machines.
 .TP
 .B \-dither fs
 Use Floyd-Steinberg dithering in color quantization.
@@ -159,7 +177,7 @@
 is ignored unless you also say
 .B \-colors
 .IR N .
-Also, the one-pass method is always used for gray-scale output (the two-pass
+Also, the one-pass method is always used for grayscale output (the two-pass
 method is no improvement then).
 .TP
 .BI \-maxmemory " N"
@@ -172,6 +190,10 @@
 .BI \-outfile " name"
 Send output image to the named file, not to standard output.
 .TP
+.BI \-memsrc
+Load input file into memory before decompressing.  This feature was implemented
+mainly as a way of testing the in-memory source manager (jpeg_mem_src().)
+.TP
 .B \-verbose
 Enable debug printout.  More
 .BR \-v 's
@@ -242,12 +264,12 @@
 Communications of the ACM, April 1991 (vol. 34, no. 4), pp. 30-44.
 .SH AUTHOR
 Independent JPEG Group
-.SH BUGS
-Arithmetic coding is not supported for legal reasons.
 .PP
+This file was modified by The libjpeg-turbo Project to include only information
+relevant to libjpeg-turbo, to wordsmith certain sections, and to describe
+features not present in libjpeg.
+.SH BUGS
 To avoid the Unisys LZW patent,
 .B djpeg
 produces uncompressed GIF files.  These are larger than they should be, but
 are readable by standard GIF decoders.
-.PP
-Still not as fast as we'd like.
diff --git a/djpeg.c b/djpeg.c
index e099e90..7c63f25 100644
--- a/djpeg.c
+++ b/djpeg.c
@@ -1,8 +1,10 @@
 /*
  * djpeg.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010-2011, 2013-2014, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains a command-line user interface for the JPEG decompressor.
@@ -10,8 +12,8 @@
  *
  * Two different command line styles are permitted, depending on the
  * compile-time switch TWO_FILE_COMMANDLINE:
- *	djpeg [options]  inputfile outputfile
- *	djpeg [options]  [inputfile]
+ *      djpeg [options]  inputfile outputfile
+ *      djpeg [options]  [inputfile]
  * In the second style, output is always to standard output, which you'd
  * normally redirect to a file or pipe to some other program.  Input is
  * either from a named file or from standard input (typically redirected).
@@ -19,29 +21,30 @@
  * don't support pipes.  Also, you MUST use the first style if your system
  * doesn't do binary I/O to stdin/stdout.
  * To simplify script writing, the "-outfile" switch is provided.  The syntax
- *	djpeg [options]  -outfile outputfile  inputfile
+ *      djpeg [options]  -outfile outputfile  inputfile
  * works regardless of which command line style is used.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
-#include "jversion.h"		/* for version message */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include "jversion.h"           /* for version message */
+#include "jconfigint.h"
 
-#include <ctype.h>		/* to declare isprint() */
+#include <ctype.h>              /* to declare isprint() */
 
-#ifdef USE_CCOMMAND		/* command-line reader for Macintosh */
+#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
 #ifdef __MWERKS__
 #include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>		/* ... and this */
+#include <console.h>            /* ... and this */
 #endif
 #ifdef THINK_C
-#include <console.h>		/* Think declares it here */
+#include <console.h>            /* Think declares it here */
 #endif
 #endif
 
 
 /* Create the add-on message string table. */
 
-#define JMESSAGE(code,string)	string ,
+#define JMESSAGE(code,string)   string ,
 
 static const char * const cdjpeg_message_table[] = {
 #include "cderror.h"
@@ -57,17 +60,17 @@
  */
 
 typedef enum {
-	FMT_BMP,		/* BMP format (Windows flavor) */
-	FMT_GIF,		/* GIF format */
-	FMT_OS2,		/* BMP format (OS/2 flavor) */
-	FMT_PPM,		/* PPM/PGM (PBMPLUS formats) */
-	FMT_RLE,		/* RLE format */
-	FMT_TARGA,		/* Targa format */
-	FMT_TIFF		/* TIFF format */
+        FMT_BMP,                /* BMP format (Windows flavor) */
+        FMT_GIF,                /* GIF format */
+        FMT_OS2,                /* BMP format (OS/2 flavor) */
+        FMT_PPM,                /* PPM/PGM (PBMPLUS formats) */
+        FMT_RLE,                /* RLE format */
+        FMT_TARGA,              /* Targa format */
+        FMT_TIFF                /* TIFF format */
 } IMAGE_FORMATS;
 
-#ifndef DEFAULT_FMT		/* so can override from CFLAGS in Makefile */
-#define DEFAULT_FMT	FMT_PPM
+#ifndef DEFAULT_FMT             /* so can override from CFLAGS in Makefile */
+#define DEFAULT_FMT     FMT_PPM
 #endif
 
 static IMAGE_FORMATS requested_fmt;
@@ -82,8 +85,10 @@
  */
 
 
-static const char * progname;	/* program name for error messages */
-static char * outfilename;	/* for -outfile switch */
+static const char * progname;   /* program name for error messages */
+static char * outfilename;      /* for -outfile switch */
+boolean memsrc;  /* for -memsrc switch */
+#define INPUT_BUF_SIZE  4096
 
 
 LOCAL(void)
@@ -101,45 +106,47 @@
   fprintf(stderr, "  -colors N      Reduce image to no more than N colors\n");
   fprintf(stderr, "  -fast          Fast, low-quality processing\n");
   fprintf(stderr, "  -grayscale     Force grayscale output\n");
+  fprintf(stderr, "  -rgb           Force RGB output\n");
+  fprintf(stderr, "  -rgb565        Force RGB565 output\n");
 #ifdef IDCT_SCALING_SUPPORTED
   fprintf(stderr, "  -scale M/N     Scale output image by fraction M/N, eg, 1/8\n");
 #endif
 #ifdef BMP_SUPPORTED
   fprintf(stderr, "  -bmp           Select BMP output format (Windows style)%s\n",
-	  (DEFAULT_FMT == FMT_BMP ? " (default)" : ""));
+          (DEFAULT_FMT == FMT_BMP ? " (default)" : ""));
 #endif
 #ifdef GIF_SUPPORTED
   fprintf(stderr, "  -gif           Select GIF output format%s\n",
-	  (DEFAULT_FMT == FMT_GIF ? " (default)" : ""));
+          (DEFAULT_FMT == FMT_GIF ? " (default)" : ""));
 #endif
 #ifdef BMP_SUPPORTED
   fprintf(stderr, "  -os2           Select BMP output format (OS/2 style)%s\n",
-	  (DEFAULT_FMT == FMT_OS2 ? " (default)" : ""));
+          (DEFAULT_FMT == FMT_OS2 ? " (default)" : ""));
 #endif
 #ifdef PPM_SUPPORTED
   fprintf(stderr, "  -pnm           Select PBMPLUS (PPM/PGM) output format%s\n",
-	  (DEFAULT_FMT == FMT_PPM ? " (default)" : ""));
+          (DEFAULT_FMT == FMT_PPM ? " (default)" : ""));
 #endif
 #ifdef RLE_SUPPORTED
   fprintf(stderr, "  -rle           Select Utah RLE output format%s\n",
-	  (DEFAULT_FMT == FMT_RLE ? " (default)" : ""));
+          (DEFAULT_FMT == FMT_RLE ? " (default)" : ""));
 #endif
 #ifdef TARGA_SUPPORTED
   fprintf(stderr, "  -targa         Select Targa output format%s\n",
-	  (DEFAULT_FMT == FMT_TARGA ? " (default)" : ""));
+          (DEFAULT_FMT == FMT_TARGA ? " (default)" : ""));
 #endif
   fprintf(stderr, "Switches for advanced users:\n");
 #ifdef DCT_ISLOW_SUPPORTED
   fprintf(stderr, "  -dct int       Use integer DCT method%s\n",
-	  (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
+          (JDCT_DEFAULT == JDCT_ISLOW ? " (default)" : ""));
 #endif
 #ifdef DCT_IFAST_SUPPORTED
   fprintf(stderr, "  -dct fast      Use fast integer DCT (less accurate)%s\n",
-	  (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
+          (JDCT_DEFAULT == JDCT_IFAST ? " (default)" : ""));
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
   fprintf(stderr, "  -dct float     Use floating-point DCT method%s\n",
-	  (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
+          (JDCT_DEFAULT == JDCT_FLOAT ? " (default)" : ""));
 #endif
   fprintf(stderr, "  -dither fs     Use F-S dithering (default)\n");
   fprintf(stderr, "  -dither none   Don't use dithering in quantization\n");
@@ -153,6 +160,10 @@
 #endif
   fprintf(stderr, "  -maxmemory N   Maximum memory to use (in kbytes)\n");
   fprintf(stderr, "  -outfile name  Specify name for output file\n");
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+  fprintf(stderr, "  -memsrc        Load input file into memory before decompressing\n");
+#endif
+
   fprintf(stderr, "  -verbose  or  -debug   Emit debug output\n");
   exit(EXIT_FAILURE);
 }
@@ -160,7 +171,7 @@
 
 LOCAL(int)
 parse_switches (j_decompress_ptr cinfo, int argc, char **argv,
-		int last_file_arg_seen, boolean for_real)
+                int last_file_arg_seen, boolean for_real)
 /* Parse optional switches.
  * Returns argv[] index of first file-name argument (== argc if none).
  * Any file names with indexes <= last_file_arg_seen are ignored;
@@ -174,8 +185,9 @@
   char * arg;
 
   /* Set up default JPEG parameters. */
-  requested_fmt = DEFAULT_FMT;	/* set default output file format */
+  requested_fmt = DEFAULT_FMT;  /* set default output file format */
   outfilename = NULL;
+  memsrc = FALSE;
   cinfo->err->trace_level = 0;
 
   /* Scan command line options, adjust parameters */
@@ -185,54 +197,54 @@
     if (*arg != '-') {
       /* Not a switch, must be a file name argument */
       if (argn <= last_file_arg_seen) {
-	outfilename = NULL;	/* -outfile applies to just one input file */
-	continue;		/* ignore this name if previously processed */
+        outfilename = NULL;     /* -outfile applies to just one input file */
+        continue;               /* ignore this name if previously processed */
       }
-      break;			/* else done parsing switches */
+      break;                    /* else done parsing switches */
     }
-    arg++;			/* advance past switch marker character */
+    arg++;                      /* advance past switch marker character */
 
     if (keymatch(arg, "bmp", 1)) {
       /* BMP output format. */
       requested_fmt = FMT_BMP;
 
     } else if (keymatch(arg, "colors", 1) || keymatch(arg, "colours", 1) ||
-	       keymatch(arg, "quantize", 1) || keymatch(arg, "quantise", 1)) {
+               keymatch(arg, "quantize", 1) || keymatch(arg, "quantise", 1)) {
       /* Do color quantization. */
       int val;
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%d", &val) != 1)
-	usage();
+        usage();
       cinfo->desired_number_of_colors = val;
       cinfo->quantize_colors = TRUE;
 
     } else if (keymatch(arg, "dct", 2)) {
       /* Select IDCT algorithm. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (keymatch(argv[argn], "int", 1)) {
-	cinfo->dct_method = JDCT_ISLOW;
+        cinfo->dct_method = JDCT_ISLOW;
       } else if (keymatch(argv[argn], "fast", 2)) {
-	cinfo->dct_method = JDCT_IFAST;
+        cinfo->dct_method = JDCT_IFAST;
       } else if (keymatch(argv[argn], "float", 2)) {
-	cinfo->dct_method = JDCT_FLOAT;
+        cinfo->dct_method = JDCT_FLOAT;
       } else
-	usage();
+        usage();
 
     } else if (keymatch(arg, "dither", 2)) {
       /* Select dithering algorithm. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (keymatch(argv[argn], "fs", 2)) {
-	cinfo->dither_mode = JDITHER_FS;
+        cinfo->dither_mode = JDITHER_FS;
       } else if (keymatch(argv[argn], "none", 2)) {
-	cinfo->dither_mode = JDITHER_NONE;
+        cinfo->dither_mode = JDITHER_NONE;
       } else if (keymatch(argv[argn], "ordered", 2)) {
-	cinfo->dither_mode = JDITHER_ORDERED;
+        cinfo->dither_mode = JDITHER_ORDERED;
       } else
-	usage();
+        usage();
 
     } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
       /* Enable debug printouts. */
@@ -240,9 +252,12 @@
       static boolean printed_version = FALSE;
 
       if (! printed_version) {
-	fprintf(stderr, "Independent JPEG Group's DJPEG, version %s\n%s\n",
-		JVERSION, JCOPYRIGHT);
-	printed_version = TRUE;
+        fprintf(stderr, "%s version %s (build %s)\n",
+                PACKAGE_NAME, VERSION, BUILD);
+        fprintf(stderr, "%s\n\n", JCOPYRIGHT);
+        fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n",
+                JVERSION);
+        printed_version = TRUE;
       }
       cinfo->err->trace_level++;
 
@@ -251,7 +266,7 @@
       cinfo->two_pass_quantize = FALSE;
       cinfo->dither_mode = JDITHER_ORDERED;
       if (! cinfo->quantize_colors) /* don't override an earlier -colors */
-	cinfo->desired_number_of_colors = 216;
+        cinfo->desired_number_of_colors = 216;
       cinfo->dct_method = JDCT_FASTEST;
       cinfo->do_fancy_upsampling = FALSE;
 
@@ -263,23 +278,31 @@
       /* Force monochrome output. */
       cinfo->out_color_space = JCS_GRAYSCALE;
 
+    } else if (keymatch(arg, "rgb", 2)) {
+      /* Force RGB output. */
+      cinfo->out_color_space = JCS_RGB;
+
+    } else if (keymatch(arg, "rgb565", 2)) {
+      /* Force RGB565 output. */
+      cinfo->out_color_space = JCS_RGB565;
+
     } else if (keymatch(arg, "map", 3)) {
       /* Quantize to a color map taken from an input file. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
-      if (for_real) {		/* too expensive to do twice! */
-#ifdef QUANT_2PASS_SUPPORTED	/* otherwise can't quantize to supplied map */
-	FILE * mapfile;
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (for_real) {           /* too expensive to do twice! */
+#ifdef QUANT_2PASS_SUPPORTED    /* otherwise can't quantize to supplied map */
+        FILE * mapfile;
 
-	if ((mapfile = fopen(argv[argn], READ_BINARY)) == NULL) {
-	  fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
-	  exit(EXIT_FAILURE);
-	}
-	read_color_map(cinfo, mapfile);
-	fclose(mapfile);
-	cinfo->quantize_colors = TRUE;
+        if ((mapfile = fopen(argv[argn], READ_BINARY)) == NULL) {
+          fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
+          exit(EXIT_FAILURE);
+        }
+        read_color_map(cinfo, mapfile);
+        fclose(mapfile);
+        cinfo->quantize_colors = TRUE;
 #else
-	ERREXIT(cinfo, JERR_NOT_COMPILED);
+        ERREXIT(cinfo, JERR_NOT_COMPILED);
 #endif
       }
 
@@ -288,12 +311,12 @@
       long lval;
       char ch = 'x';
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
-	usage();
+        usage();
       if (ch == 'm' || ch == 'M')
-	lval *= 1000L;
+        lval *= 1000L;
       cinfo->mem->max_memory_to_use = lval * 1000L;
 
     } else if (keymatch(arg, "nosmooth", 3)) {
@@ -310,9 +333,19 @@
 
     } else if (keymatch(arg, "outfile", 4)) {
       /* Set output file name. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
-      outfilename = argv[argn];	/* save it away for later use */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      outfilename = argv[argn]; /* save it away for later use */
+
+    } else if (keymatch(arg, "memsrc", 2)) {
+      /* Use in-memory source manager */
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+      memsrc = TRUE;
+#else
+      fprintf(stderr, "%s: sorry, in-memory source manager was not compiled in\n",
+              progname);
+      exit(EXIT_FAILURE);
+#endif
 
     } else if (keymatch(arg, "pnm", 1) || keymatch(arg, "ppm", 1)) {
       /* PPM/PGM output format. */
@@ -324,22 +357,22 @@
 
     } else if (keymatch(arg, "scale", 1)) {
       /* Scale the output image by a fraction M/N. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%d/%d",
-		 &cinfo->scale_num, &cinfo->scale_denom) != 2)
-	usage();
+                 &cinfo->scale_num, &cinfo->scale_denom) != 2)
+        usage();
 
     } else if (keymatch(arg, "targa", 1)) {
       /* Targa output format. */
       requested_fmt = FMT_TARGA;
 
     } else {
-      usage();			/* bogus switch */
+      usage();                  /* bogus switch */
     }
   }
 
-  return argn;			/* return index of next arg (file name) */
+  return argn;                  /* return index of next arg (file name) */
 }
 
 
@@ -375,14 +408,14 @@
 
   length = jpeg_getc(cinfo) << 8;
   length += jpeg_getc(cinfo);
-  length -= 2;			/* discount the length word itself */
+  length -= 2;                  /* discount the length word itself */
 
   if (traceit) {
     if (cinfo->unread_marker == JPEG_COM)
       fprintf(stderr, "Comment, length %ld:\n", (long) length);
-    else			/* assume it is an APPn otherwise */
+    else                        /* assume it is an APPn otherwise */
       fprintf(stderr, "APP%d, length %ld:\n",
-	      cinfo->unread_marker - JPEG_APP0, (long) length);
+              cinfo->unread_marker - JPEG_APP0, (long) length);
   }
 
   while (--length >= 0) {
@@ -394,16 +427,16 @@
        * Newlines in CR, CR/LF, or LF form will be printed as one newline.
        */
       if (ch == '\r') {
-	fprintf(stderr, "\n");
+        fprintf(stderr, "\n");
       } else if (ch == '\n') {
-	if (lastch != '\r')
-	  fprintf(stderr, "\n");
+        if (lastch != '\r')
+          fprintf(stderr, "\n");
       } else if (ch == '\\') {
-	fprintf(stderr, "\\\\");
+        fprintf(stderr, "\\\\");
       } else if (isprint(ch)) {
-	putc(ch, stderr);
+        putc(ch, stderr);
       } else {
-	fprintf(stderr, "\\%03o", ch);
+        fprintf(stderr, "\\%03o", ch);
       }
       lastch = ch;
     }
@@ -432,6 +465,8 @@
   djpeg_dest_ptr dest_mgr = NULL;
   FILE * input_file;
   FILE * output_file;
+  unsigned char *inbuffer = NULL;
+  unsigned long insize = 0;
   JDIMENSION num_scanlines;
 
   /* On Mac, fetch a command line. */
@@ -441,7 +476,7 @@
 
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
-    progname = "djpeg";		/* in case C library doesn't provide it */
+    progname = "djpeg";         /* in case C library doesn't provide it */
 
   /* Initialize the JPEG decompression object with default error handling. */
   cinfo.err = jpeg_std_error(&jerr);
@@ -455,16 +490,11 @@
    * APP12 is used by some digital camera makers for textual info,
    * so we provide the ability to display it as text.
    * If you like, additional APPn marker types can be selected for display,
-   * but don't try to override APP0 or APP14 this way (see libjpeg.doc).
+   * but don't try to override APP0 or APP14 this way (see libjpeg.txt).
    */
   jpeg_set_marker_processor(&cinfo, JPEG_COM, print_text_marker);
   jpeg_set_marker_processor(&cinfo, JPEG_APP0+12, print_text_marker);
 
-  /* Now safe to enable signal catcher. */
-#ifdef NEED_SIGNAL_CATCHER
-  enable_signal_catcher((j_common_ptr) &cinfo);
-#endif
-
   /* Scan command line to find file names. */
   /* It is convenient to use just one switch-parsing routine, but the switch
    * values read here are ignored; we will rescan the switches after opening
@@ -480,14 +510,14 @@
   if (outfilename == NULL) {
     if (file_index != argc-2) {
       fprintf(stderr, "%s: must name one input and one output file\n",
-	      progname);
+              progname);
       usage();
     }
     outfilename = argv[file_index+1];
   } else {
     if (file_index != argc-1) {
       fprintf(stderr, "%s: must name one input and one output file\n",
-	      progname);
+              progname);
       usage();
     }
   }
@@ -526,7 +556,30 @@
 #endif
 
   /* Specify data source for decompression */
-  jpeg_stdio_src(&cinfo, input_file);
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+  if (memsrc) {
+    size_t nbytes;
+    do {
+      inbuffer = (unsigned char *)realloc(inbuffer, insize + INPUT_BUF_SIZE);
+      if (inbuffer == NULL) {
+        fprintf(stderr, "%s: memory allocation failure\n", progname);
+        exit(EXIT_FAILURE);
+      }
+      nbytes = JFREAD(input_file, &inbuffer[insize], INPUT_BUF_SIZE);
+      if (nbytes < INPUT_BUF_SIZE && ferror(input_file)) {
+        if (file_index < argc)
+          fprintf(stderr, "%s: can't read from %s\n", progname,
+                  argv[file_index]);
+        else
+          fprintf(stderr, "%s: can't read from stdin\n", progname);
+      }
+      insize += (unsigned long)nbytes;
+    } while (nbytes == INPUT_BUF_SIZE);
+    fprintf(stderr, "Compressed size:  %lu bytes\n", insize);
+    jpeg_mem_src(&cinfo, inbuffer, insize);
+  } else
+#endif
+    jpeg_stdio_src(&cinfo, input_file);
 
   /* Read file header, set default decompression parameters */
   (void) jpeg_read_header(&cinfo, TRUE);
@@ -581,7 +634,7 @@
   /* Process data */
   while (cinfo.output_scanline < cinfo.output_height) {
     num_scanlines = jpeg_read_scanlines(&cinfo, dest_mgr->buffer,
-					dest_mgr->buffer_height);
+                                        dest_mgr->buffer_height);
     (*dest_mgr->put_pixel_rows) (&cinfo, dest_mgr, num_scanlines);
   }
 
@@ -610,7 +663,10 @@
   end_progress_monitor((j_common_ptr) &cinfo);
 #endif
 
+  if (memsrc && inbuffer != NULL)
+    free(inbuffer);
+
   /* All done. */
   exit(jerr.num_warnings ? EXIT_WARNING : EXIT_SUCCESS);
-  return 0;			/* suppress no-return-value warnings */
+  return 0;                     /* suppress no-return-value warnings */
 }
diff --git a/doc/html/annotated.html b/doc/html/annotated.html
new file mode 100644
index 0000000..f928720
--- /dev/null
+++ b/doc/html/annotated.html
@@ -0,0 +1,103 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
+<title>TurboJPEG: Data Structures</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.4</span>
+   </div>
+  </td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+  <div id="navrow1" class="tabs">
+    <ul class="tablist">
+      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
+      <li><a href="modules.html"><span>Modules</span></a></li>
+      <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li>
+        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+      </li>
+    </ul>
+  </div>
+  <div id="navrow2" class="tabs2">
+    <ul class="tablist">
+      <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li><a href="classes.html"><span>Data&#160;Structure&#160;Index</span></a></li>
+      <li><a href="functions.html"><span>Data&#160;Fields</span></a></li>
+    </ul>
+  </div>
+</div><!-- top -->
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Data Structures</div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="textblock">Here are the data structures with brief descriptions:</div><div class="directory">
+<table class="directory">
+<tr id="row_0_" class="even"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structtjregion.html" target="_self">tjregion</a></td><td class="desc">Cropping region</td></tr>
+<tr id="row_1_"><td class="entry"><img src="ftv2node.png" alt="o" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structtjscalingfactor.html" target="_self">tjscalingfactor</a></td><td class="desc">Scaling factor</td></tr>
+<tr id="row_2_" class="even"><td class="entry"><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><img src="ftv2cl.png" alt="C" width="24" height="22" /><a class="el" href="structtjtransform.html" target="_self">tjtransform</a></td><td class="desc">Lossless transform</td></tr>
+</table>
+</div><!-- directory -->
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
+</body>
+</html>
diff --git a/doc/html/bc_s.png b/doc/html/bc_s.png
new file mode 100644
index 0000000..224b29a
--- /dev/null
+++ b/doc/html/bc_s.png
Binary files differ
diff --git a/doc/html/bdwn.png b/doc/html/bdwn.png
new file mode 100644
index 0000000..940a0b9
--- /dev/null
+++ b/doc/html/bdwn.png
Binary files differ
diff --git a/doc/html/classes.html b/doc/html/classes.html
new file mode 100644
index 0000000..ad625f1
--- /dev/null
+++ b/doc/html/classes.html
@@ -0,0 +1,105 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
+<title>TurboJPEG: Data Structure Index</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.4</span>
+   </div>
+  </td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+  <div id="navrow1" class="tabs">
+    <ul class="tablist">
+      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
+      <li><a href="modules.html"><span>Modules</span></a></li>
+      <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li>
+        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+      </li>
+    </ul>
+  </div>
+  <div id="navrow2" class="tabs2">
+    <ul class="tablist">
+      <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li class="current"><a href="classes.html"><span>Data&#160;Structure&#160;Index</span></a></li>
+      <li><a href="functions.html"><span>Data&#160;Fields</span></a></li>
+    </ul>
+  </div>
+</div><!-- top -->
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Data Structure Index</div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="qindex"><a class="qindex" href="#letter_T">T</a></div>
+<table style="margin: 10px; white-space: nowrap;" align="center" width="95%" border="0" cellspacing="0" cellpadding="0">
+<tr><td rowspan="2" valign="bottom"><a name="letter_T"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;T&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="structtjscalingfactor.html">tjscalingfactor</a>&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtjtransform.html">tjtransform</a>&#160;&#160;&#160;</td><td></td></tr>
+<tr><td></td><td></td><td></td></tr>
+<tr><td valign="top"><a class="el" href="structtjregion.html">tjregion</a>&#160;&#160;&#160;</td><td></td><td></td><td></td></tr>
+<tr><td></td><td></td><td></td><td></td></tr>
+</table>
+<div class="qindex"><a class="qindex" href="#letter_T">T</a></div>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
+</body>
+</html>
diff --git a/doc/html/closed.png b/doc/html/closed.png
new file mode 100644
index 0000000..98cc2c9
--- /dev/null
+++ b/doc/html/closed.png
Binary files differ
diff --git a/doc/html/doxygen.css b/doc/html/doxygen.css
new file mode 100644
index 0000000..dabaff2
--- /dev/null
+++ b/doc/html/doxygen.css
@@ -0,0 +1,1184 @@
+/* The standard CSS for doxygen 1.8.3.1 */
+
+body, table, div, p, dl {
+	font: 400 14px/19px Roboto,sans-serif;
+}
+
+/* @group Heading Levels */
+
+h1.groupheader {
+	font-size: 150%;
+}
+
+.title {
+	font-size: 150%;
+	font-weight: bold;
+	margin: 10px 2px;
+}
+
+h2.groupheader {
+	border-bottom: 1px solid #879ECB;
+	color: #354C7B;
+	font-size: 150%;
+	font-weight: normal;
+	margin-top: 1.75em;
+	padding-top: 8px;
+	padding-bottom: 4px;
+	width: 100%;
+}
+
+h3.groupheader {
+	font-size: 100%;
+}
+
+h1, h2, h3, h4, h5, h6 {
+	-webkit-transition: text-shadow 0.5s linear;
+	-moz-transition: text-shadow 0.5s linear;
+	-ms-transition: text-shadow 0.5s linear;
+	-o-transition: text-shadow 0.5s linear;
+	transition: text-shadow 0.5s linear;
+	margin-right: 15px;
+}
+
+h1.glow, h2.glow, h3.glow, h4.glow, h5.glow, h6.glow {
+	text-shadow: 0 0 15px cyan;
+}
+
+dt {
+	font-weight: bold;
+}
+
+div.multicol {
+	-moz-column-gap: 1em;
+	-webkit-column-gap: 1em;
+	-moz-column-count: 3;
+	-webkit-column-count: 3;
+}
+
+p.startli, p.startdd, p.starttd {
+	margin-top: 2px;
+}
+
+p.endli {
+	margin-bottom: 0px;
+}
+
+p.enddd {
+	margin-bottom: 4px;
+}
+
+p.endtd {
+	margin-bottom: 2px;
+}
+
+/* @end */
+
+caption {
+	font-weight: bold;
+}
+
+span.legend {
+        font-size: 70%;
+        text-align: center;
+}
+
+h3.version {
+        font-size: 90%;
+        text-align: center;
+}
+
+div.qindex, div.navtab{
+	background-color: #EBEFF6;
+	border: 1px solid #A3B4D7;
+	text-align: center;
+}
+
+div.qindex, div.navpath {
+	width: 100%;
+	line-height: 140%;
+}
+
+div.navtab {
+	margin-right: 15px;
+}
+
+/* @group Link Styling */
+
+a {
+	color: #3D578C;
+	font-weight: normal;
+	text-decoration: none;
+}
+
+.contents a:visited {
+	color: #4665A2;
+}
+
+a:hover {
+	text-decoration: underline;
+}
+
+a.qindex {
+	font-weight: bold;
+}
+
+a.qindexHL {
+	font-weight: bold;
+	background-color: #9CAFD4;
+	color: #ffffff;
+	border: 1px double #869DCA;
+}
+
+.contents a.qindexHL:visited {
+        color: #ffffff;
+}
+
+a.el {
+	font-weight: bold;
+}
+
+a.elRef {
+}
+
+a.code, a.code:visited {
+	color: #4665A2; 
+}
+
+a.codeRef, a.codeRef:visited {
+	color: #4665A2; 
+}
+
+/* @end */
+
+dl.el {
+	margin-left: -1cm;
+}
+
+pre.fragment {
+        border: 1px solid #C4CFE5;
+        background-color: #FBFCFD;
+        padding: 4px 6px;
+        margin: 4px 8px 4px 2px;
+        overflow: auto;
+        word-wrap: break-word;
+        font-size:  9pt;
+        line-height: 125%;
+        font-family: monospace, fixed;
+        font-size: 105%;
+}
+
+div.fragment {
+        padding: 4px;
+        margin: 4px;
+	background-color: #FBFCFD;
+	border: 1px solid #C4CFE5;
+}
+
+div.line {
+	font-family: monospace, fixed;
+        font-size: 13px;
+	min-height: 13px;
+	line-height: 1.0;
+	text-wrap: unrestricted;
+	white-space: -moz-pre-wrap; /* Moz */
+	white-space: -pre-wrap;     /* Opera 4-6 */
+	white-space: -o-pre-wrap;   /* Opera 7 */
+	white-space: pre-wrap;      /* CSS3  */
+	word-wrap: break-word;      /* IE 5.5+ */
+	text-indent: -53px;
+	padding-left: 53px;
+	padding-bottom: 0px;
+	margin: 0px;
+	-webkit-transition-property: background-color, box-shadow;
+	-webkit-transition-duration: 0.5s;
+	-moz-transition-property: background-color, box-shadow;
+	-moz-transition-duration: 0.5s;
+	-ms-transition-property: background-color, box-shadow;
+	-ms-transition-duration: 0.5s;
+	-o-transition-property: background-color, box-shadow;
+	-o-transition-duration: 0.5s;
+	transition-property: background-color, box-shadow;
+	transition-duration: 0.5s;
+}
+
+div.line.glow {
+	background-color: cyan;
+	box-shadow: 0 0 10px cyan;
+}
+
+
+span.lineno {
+	padding-right: 4px;
+	text-align: right;
+	border-right: 2px solid #0F0;
+	background-color: #E8E8E8;
+        white-space: pre;
+}
+span.lineno a {
+	background-color: #D8D8D8;
+}
+
+span.lineno a:hover {
+	background-color: #C8C8C8;
+}
+
+div.ah {
+	background-color: black;
+	font-weight: bold;
+	color: #ffffff;
+	margin-bottom: 3px;
+	margin-top: 3px;
+	padding: 0.2em;
+	border: solid thin #333;
+	border-radius: 0.5em;
+	-webkit-border-radius: .5em;
+	-moz-border-radius: .5em;
+	box-shadow: 2px 2px 3px #999;
+	-webkit-box-shadow: 2px 2px 3px #999;
+	-moz-box-shadow: rgba(0, 0, 0, 0.15) 2px 2px 2px;
+	background-image: -webkit-gradient(linear, left top, left bottom, from(#eee), to(#000),color-stop(0.3, #444));
+	background-image: -moz-linear-gradient(center top, #eee 0%, #444 40%, #000);
+}
+
+div.groupHeader {
+	margin-left: 16px;
+	margin-top: 12px;
+	font-weight: bold;
+}
+
+div.groupText {
+	margin-left: 16px;
+	font-style: italic;
+}
+
+body {
+	background-color: white;
+	color: black;
+        margin: 0;
+}
+
+div.contents {
+	margin-top: 10px;
+	margin-left: 12px;
+	margin-right: 8px;
+}
+
+td.indexkey {
+	background-color: #EBEFF6;
+	font-weight: bold;
+	border: 1px solid #C4CFE5;
+	margin: 2px 0px 2px 0;
+	padding: 2px 10px;
+        white-space: nowrap;
+        vertical-align: top;
+}
+
+td.indexvalue {
+	background-color: #EBEFF6;
+	border: 1px solid #C4CFE5;
+	padding: 2px 10px;
+	margin: 2px 0px;
+}
+
+tr.memlist {
+	background-color: #EEF1F7;
+}
+
+p.formulaDsp {
+	text-align: center;
+}
+
+img.formulaDsp {
+	
+}
+
+img.formulaInl {
+	vertical-align: middle;
+}
+
+div.center {
+	text-align: center;
+        margin-top: 0px;
+        margin-bottom: 0px;
+        padding: 0px;
+}
+
+div.center img {
+	border: 0px;
+}
+
+address.footer {
+	text-align: right;
+	padding-right: 12px;
+}
+
+img.footer {
+	border: 0px;
+	vertical-align: middle;
+}
+
+/* @group Code Colorization */
+
+span.keyword {
+	color: #008000
+}
+
+span.keywordtype {
+	color: #604020
+}
+
+span.keywordflow {
+	color: #e08000
+}
+
+span.comment {
+	color: #800000
+}
+
+span.preprocessor {
+	color: #806020
+}
+
+span.stringliteral {
+	color: #002080
+}
+
+span.charliteral {
+	color: #008080
+}
+
+span.vhdldigit { 
+	color: #ff00ff 
+}
+
+span.vhdlchar { 
+	color: #000000 
+}
+
+span.vhdlkeyword { 
+	color: #700070 
+}
+
+span.vhdllogic { 
+	color: #ff0000 
+}
+
+blockquote {
+        background-color: #F7F8FB;
+        border-left: 2px solid #9CAFD4;
+        margin: 0 24px 0 4px;
+        padding: 0 12px 0 16px;
+}
+
+/* @end */
+
+/*
+.search {
+	color: #003399;
+	font-weight: bold;
+}
+
+form.search {
+	margin-bottom: 0px;
+	margin-top: 0px;
+}
+
+input.search {
+	font-size: 75%;
+	color: #000080;
+	font-weight: normal;
+	background-color: #e8eef2;
+}
+*/
+
+td.tiny {
+	font-size: 75%;
+}
+
+.dirtab {
+	padding: 4px;
+	border-collapse: collapse;
+	border: 1px solid #A3B4D7;
+}
+
+th.dirtab {
+	background: #EBEFF6;
+	font-weight: bold;
+}
+
+hr {
+	height: 0px;
+	border: none;
+	border-top: 1px solid #4A6AAA;
+}
+
+hr.footer {
+	height: 1px;
+}
+
+/* @group Member Descriptions */
+
+table.memberdecls {
+	border-spacing: 0px;
+	padding: 0px;
+}
+
+.memberdecls td, .fieldtable tr {
+	-webkit-transition-property: background-color, box-shadow;
+	-webkit-transition-duration: 0.5s;
+	-moz-transition-property: background-color, box-shadow;
+	-moz-transition-duration: 0.5s;
+	-ms-transition-property: background-color, box-shadow;
+	-ms-transition-duration: 0.5s;
+	-o-transition-property: background-color, box-shadow;
+	-o-transition-duration: 0.5s;
+	transition-property: background-color, box-shadow;
+	transition-duration: 0.5s;
+}
+
+.memberdecls td.glow, .fieldtable tr.glow {
+	background-color: cyan;
+	box-shadow: 0 0 15px cyan;
+}
+
+.mdescLeft, .mdescRight,
+.memItemLeft, .memItemRight,
+.memTemplItemLeft, .memTemplItemRight, .memTemplParams {
+	background-color: #F9FAFC;
+	border: none;
+	margin: 4px;
+	padding: 1px 0 0 8px;
+}
+
+.mdescLeft, .mdescRight {
+	padding: 0px 8px 4px 8px;
+	color: #555;
+}
+
+.memSeparator {
+        border-bottom: 1px solid #DEE4F0;
+        line-height: 1px;
+        margin: 0px;
+        padding: 0px;
+}
+
+.memItemLeft, .memTemplItemLeft {
+        white-space: nowrap;
+}
+
+.memItemRight {
+	width: 100%;
+}
+
+.memTemplParams {
+	color: #4665A2;
+        white-space: nowrap;
+	font-size: 80%;
+}
+
+/* @end */
+
+/* @group Member Details */
+
+/* Styles for detailed member documentation */
+
+.memtemplate {
+	font-size: 80%;
+	color: #4665A2;
+	font-weight: normal;
+	margin-left: 9px;
+}
+
+.memnav {
+	background-color: #EBEFF6;
+	border: 1px solid #A3B4D7;
+	text-align: center;
+	margin: 2px;
+	margin-right: 15px;
+	padding: 2px;
+}
+
+.mempage {
+	width: 100%;
+}
+
+.memitem {
+	padding: 0;
+	margin-bottom: 10px;
+	margin-right: 5px;
+        -webkit-transition: box-shadow 0.5s linear;
+        -moz-transition: box-shadow 0.5s linear;
+        -ms-transition: box-shadow 0.5s linear;
+        -o-transition: box-shadow 0.5s linear;
+        transition: box-shadow 0.5s linear;
+        display: table !important;
+        width: 100%;
+}
+
+.memitem.glow {
+         box-shadow: 0 0 15px cyan;
+}
+
+.memname {
+        font-weight: bold;
+        margin-left: 6px;
+}
+
+.memname td {
+	vertical-align: bottom;
+}
+
+.memproto, dl.reflist dt {
+        border-top: 1px solid #A8B8D9;
+        border-left: 1px solid #A8B8D9;
+        border-right: 1px solid #A8B8D9;
+        padding: 6px 0px 6px 0px;
+        color: #253555;
+        font-weight: bold;
+        text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9);
+        background-image:url('nav_f.png');
+        background-repeat:repeat-x;
+        background-color: #E2E8F2;
+        /* opera specific markup */
+        box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15);
+        border-top-right-radius: 4px;
+        border-top-left-radius: 4px;
+        /* firefox specific markup */
+        -moz-box-shadow: rgba(0, 0, 0, 0.15) 5px 5px 5px;
+        -moz-border-radius-topright: 4px;
+        -moz-border-radius-topleft: 4px;
+        /* webkit specific markup */
+        -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15);
+        -webkit-border-top-right-radius: 4px;
+        -webkit-border-top-left-radius: 4px;
+
+}
+
+.memdoc, dl.reflist dd {
+        border-bottom: 1px solid #A8B8D9;      
+        border-left: 1px solid #A8B8D9;      
+        border-right: 1px solid #A8B8D9; 
+        padding: 6px 10px 2px 10px;
+        background-color: #FBFCFD;
+        border-top-width: 0;
+        background-image:url('nav_g.png');
+        background-repeat:repeat-x;
+        background-color: #FFFFFF;
+        /* opera specific markup */
+        border-bottom-left-radius: 4px;
+        border-bottom-right-radius: 4px;
+        box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15);
+        /* firefox specific markup */
+        -moz-border-radius-bottomleft: 4px;
+        -moz-border-radius-bottomright: 4px;
+        -moz-box-shadow: rgba(0, 0, 0, 0.15) 5px 5px 5px;
+        /* webkit specific markup */
+        -webkit-border-bottom-left-radius: 4px;
+        -webkit-border-bottom-right-radius: 4px;
+        -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15);
+}
+
+dl.reflist dt {
+        padding: 5px;
+}
+
+dl.reflist dd {
+        margin: 0px 0px 10px 0px;
+        padding: 5px;
+}
+
+.paramkey {
+	text-align: right;
+}
+
+.paramtype {
+	white-space: nowrap;
+}
+
+.paramname {
+	color: #602020;
+	white-space: nowrap;
+}
+.paramname em {
+	font-style: normal;
+}
+.paramname code {
+        line-height: 14px;
+}
+
+.params, .retval, .exception, .tparams {
+        margin-left: 0px;
+        padding-left: 0px;
+}       
+
+.params .paramname, .retval .paramname {
+        font-weight: bold;
+        vertical-align: top;
+}
+        
+.params .paramtype {
+        font-style: italic;
+        vertical-align: top;
+}       
+        
+.params .paramdir {
+        font-family: "courier new",courier,monospace;
+        vertical-align: top;
+}
+
+table.mlabels {
+	border-spacing: 0px;
+}
+
+td.mlabels-left {
+	width: 100%;
+	padding: 0px;
+}
+
+td.mlabels-right {
+	vertical-align: bottom;
+	padding: 0px;
+	white-space: nowrap;
+}
+
+span.mlabels {
+        margin-left: 8px;
+}
+
+span.mlabel {
+        background-color: #728DC1;
+        border-top:1px solid #5373B4;
+        border-left:1px solid #5373B4;
+        border-right:1px solid #C4CFE5;
+        border-bottom:1px solid #C4CFE5;
+	text-shadow: none;
+	color: white;
+	margin-right: 4px;
+	padding: 2px 3px;
+	border-radius: 3px;
+	font-size: 7pt;
+	white-space: nowrap;
+	vertical-align: middle;
+}
+
+
+
+/* @end */
+
+/* these are for tree view when not used as main index */
+
+div.directory {
+        margin: 10px 0px;
+        border-top: 1px solid #A8B8D9;
+        border-bottom: 1px solid #A8B8D9;
+        width: 100%;
+}
+
+.directory table {
+        border-collapse:collapse;
+}
+
+.directory td {
+        margin: 0px;
+        padding: 0px;
+	vertical-align: top;
+}
+
+.directory td.entry {
+        white-space: nowrap;
+        padding-right: 6px;
+}
+
+.directory td.entry a {
+        outline:none;
+}
+
+.directory td.entry a img {
+        border: none;
+}
+
+.directory td.desc {
+        width: 100%;
+        padding-left: 6px;
+	padding-right: 6px;
+	padding-top: 3px;
+	border-left: 1px solid rgba(0,0,0,0.05);
+}
+
+.directory tr.even {
+	padding-left: 6px;
+	background-color: #F7F8FB;
+}
+
+.directory img {
+	vertical-align: -30%;
+}
+
+.directory .levels {
+        white-space: nowrap;
+        width: 100%;
+        text-align: right;
+        font-size: 9pt;
+}
+
+.directory .levels span {
+        cursor: pointer;
+        padding-left: 2px;
+        padding-right: 2px;
+	color: #3D578C;
+}
+
+div.dynheader {
+        margin-top: 8px;
+	-webkit-touch-callout: none;
+	-webkit-user-select: none;
+	-khtml-user-select: none;
+	-moz-user-select: none;
+	-ms-user-select: none;
+	user-select: none;
+}
+
+address {
+	font-style: normal;
+	color: #2A3D61;
+}
+
+table.doxtable {
+	border-collapse:collapse;
+        margin-top: 4px;
+        margin-bottom: 4px;
+}
+
+table.doxtable td, table.doxtable th {
+	border: 1px solid #2D4068;
+	padding: 3px 7px 2px;
+}
+
+table.doxtable th {
+	background-color: #374F7F;
+	color: #FFFFFF;
+	font-size: 110%;
+	padding-bottom: 4px;
+	padding-top: 5px;
+}
+
+table.fieldtable {
+        /*width: 100%;*/
+        margin-bottom: 10px;
+        border: 1px solid #A8B8D9;
+        border-spacing: 0px;
+        -moz-border-radius: 4px;
+        -webkit-border-radius: 4px;
+        border-radius: 4px;
+        -moz-box-shadow: rgba(0, 0, 0, 0.15) 2px 2px 2px;
+        -webkit-box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.15);
+        box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.15);
+}
+
+.fieldtable td, .fieldtable th {
+        padding: 3px 7px 2px;
+}
+
+.fieldtable td.fieldtype, .fieldtable td.fieldname {
+        white-space: nowrap;
+        border-right: 1px solid #A8B8D9;
+        border-bottom: 1px solid #A8B8D9;
+        vertical-align: top;
+}
+
+.fieldtable td.fieldname {
+        padding-top: 5px;
+}
+
+.fieldtable td.fielddoc {
+        border-bottom: 1px solid #A8B8D9;
+        /*width: 100%;*/
+}
+
+.fieldtable td.fielddoc p:first-child {
+        margin-top: 2px;
+}       
+        
+.fieldtable td.fielddoc p:last-child {
+        margin-bottom: 2px;
+}
+
+.fieldtable tr:last-child td {
+        border-bottom: none;
+}
+
+.fieldtable th {
+        background-image:url('nav_f.png');
+        background-repeat:repeat-x;
+        background-color: #E2E8F2;
+        font-size: 90%;
+        color: #253555;
+        padding-bottom: 4px;
+        padding-top: 5px;
+        text-align:left;
+        -moz-border-radius-topleft: 4px;
+        -moz-border-radius-topright: 4px;
+        -webkit-border-top-left-radius: 4px;
+        -webkit-border-top-right-radius: 4px;
+        border-top-left-radius: 4px;
+        border-top-right-radius: 4px;
+        border-bottom: 1px solid #A8B8D9;
+}
+
+
+.tabsearch {
+	top: 0px;
+	left: 10px;
+	height: 36px;
+	background-image: url('tab_b.png');
+	z-index: 101;
+	overflow: hidden;
+	font-size: 13px;
+}
+
+.navpath ul
+{
+	font-size: 11px;
+	background-image:url('tab_b.png');
+	background-repeat:repeat-x;
+	background-position: 0 -5px;
+	height:30px;
+	line-height:30px;
+	color:#8AA0CC;
+	border:solid 1px #C2CDE4;
+	overflow:hidden;
+	margin:0px;
+	padding:0px;
+}
+
+.navpath li
+{
+	list-style-type:none;
+	float:left;
+	padding-left:10px;
+	padding-right:15px;
+	background-image:url('bc_s.png');
+	background-repeat:no-repeat;
+	background-position:right;
+	color:#364D7C;
+}
+
+.navpath li.navelem a
+{
+	height:32px;
+	display:block;
+	text-decoration: none;
+	outline: none;
+	color: #283A5D;
+	font-family: 'Lucida Grande',Geneva,Helvetica,Arial,sans-serif;
+	text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9);
+	text-decoration: none;        
+}
+
+.navpath li.navelem a:hover
+{
+	color:#6884BD;
+}
+
+.navpath li.footer
+{
+        list-style-type:none;
+        float:right;
+        padding-left:10px;
+        padding-right:15px;
+        background-image:none;
+        background-repeat:no-repeat;
+        background-position:right;
+        color:#364D7C;
+        font-size: 8pt;
+}
+
+
+div.summary
+{
+	float: right;
+	font-size: 8pt;
+	padding-right: 5px;
+	width: 50%;
+	text-align: right;
+}       
+
+div.summary a
+{
+	white-space: nowrap;
+}
+
+div.ingroups
+{
+	font-size: 8pt;
+	width: 50%;
+	text-align: left;
+}
+
+div.ingroups a
+{
+	white-space: nowrap;
+}
+
+div.header
+{
+        background-image:url('nav_h.png');
+        background-repeat:repeat-x;
+	background-color: #F9FAFC;
+	margin:  0px;
+	border-bottom: 1px solid #C4CFE5;
+}
+
+div.headertitle
+{
+	padding: 5px 5px 5px 10px;
+}
+
+dl
+{
+        padding: 0 0 0 10px;
+}
+
+/* dl.note, dl.warning, dl.attention, dl.pre, dl.post, dl.invariant, dl.deprecated, dl.todo, dl.test, dl.bug */
+dl.section
+{
+	margin-left: 0px;
+	padding-left: 0px;
+}
+
+dl.note
+{
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
+        border-color: #D0C000;
+}
+
+dl.warning, dl.attention
+{
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
+        border-color: #FF0000;
+}
+
+dl.pre, dl.post, dl.invariant
+{
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
+        border-color: #00D000;
+}
+
+dl.deprecated
+{
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
+        border-color: #505050;
+}
+
+dl.todo
+{
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
+        border-color: #00C0E0;
+}
+
+dl.test
+{
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
+        border-color: #3030E0;
+}
+
+dl.bug
+{
+        margin-left:-7px;
+        padding-left: 3px;
+        border-left:4px solid;
+        border-color: #C08050;
+}
+
+dl.section dd {
+	margin-bottom: 6px;
+}
+
+
+#projectlogo
+{
+	text-align: center;
+	vertical-align: bottom;
+	border-collapse: separate;
+}
+ 
+#projectlogo img
+{ 
+	border: 0px none;
+}
+ 
+#projectname
+{
+	font: 300% Tahoma, Arial,sans-serif;
+	margin: 0px;
+	padding: 2px 0px;
+}
+    
+#projectbrief
+{
+	font: 120% Tahoma, Arial,sans-serif;
+	margin: 0px;
+	padding: 0px;
+}
+
+#projectnumber
+{
+	font: 50% Tahoma, Arial,sans-serif;
+	margin: 0px;
+	padding: 0px;
+}
+
+#titlearea
+{
+	padding: 0px;
+	margin: 0px;
+	width: 100%;
+	border-bottom: 1px solid #5373B4;
+}
+
+.image
+{
+        text-align: center;
+}
+
+.dotgraph
+{
+        text-align: center;
+}
+
+.mscgraph
+{
+        text-align: center;
+}
+
+.caption
+{
+	font-weight: bold;
+}
+
+div.zoom
+{
+	border: 1px solid #90A5CE;
+}
+
+dl.citelist {
+        margin-bottom:50px;
+}
+
+dl.citelist dt {
+        color:#334975;
+        float:left;
+        font-weight:bold;
+        margin-right:10px;
+        padding:5px;
+}
+
+dl.citelist dd {
+        margin:2px 0;
+        padding:5px 0;
+}
+
+div.toc {
+        padding: 14px 25px;
+        background-color: #F4F6FA;
+        border: 1px solid #D8DFEE;
+        border-radius: 7px 7px 7px 7px;
+        float: right;
+        height: auto;
+        margin: 0 20px 10px 10px;
+        width: 200px;
+}
+
+div.toc li {
+        background: url("bdwn.png") no-repeat scroll 0 5px transparent;
+        font: 10px/1.2 Verdana,DejaVu Sans,Geneva,sans-serif;
+        margin-top: 5px;
+        padding-left: 10px;
+        padding-top: 2px;
+}
+
+div.toc h3 {
+        font: bold 12px/1.2 Arial,FreeSans,sans-serif;
+	color: #4665A2;
+        border-bottom: 0 none;
+        margin: 0;
+}
+
+div.toc ul {
+        list-style: none outside none;
+        border: medium none;
+        padding: 0px;
+}       
+
+div.toc li.level1 {
+        margin-left: 0px;
+}
+
+div.toc li.level2 {
+        margin-left: 15px;
+}
+
+div.toc li.level3 {
+        margin-left: 30px;
+}
+
+div.toc li.level4 {
+        margin-left: 45px;
+}
+
+.inherit_header {
+        font-weight: bold;
+        color: gray;
+        cursor: pointer;
+	-webkit-touch-callout: none;
+	-webkit-user-select: none;
+	-khtml-user-select: none;
+	-moz-user-select: none;
+	-ms-user-select: none;
+	user-select: none;
+}
+
+.inherit_header td {
+        padding: 6px 0px 2px 5px;
+}
+
+.inherit {
+        display: none;
+}
+
+tr.heading h2 {
+        margin-top: 12px;
+        margin-bottom: 4px;
+}
+
+@media print
+{
+  #top { display: none; }
+  #side-nav { display: none; }
+  #nav-path { display: none; }
+  body { overflow:visible; }
+  h1, h2, h3, h4, h5, h6 { page-break-after: avoid; }
+  .summary { display: none; }
+  .memitem { page-break-inside: avoid; }
+  #doc-content
+  {
+    margin-left:0 !important;
+    height:auto !important;
+    width:auto !important;
+    overflow:inherit;
+    display:inline;
+  }
+}
+
diff --git a/doc/html/doxygen.png b/doc/html/doxygen.png
new file mode 100644
index 0000000..3ff17d8
--- /dev/null
+++ b/doc/html/doxygen.png
Binary files differ
diff --git a/doc/html/dynsections.js b/doc/html/dynsections.js
new file mode 100644
index 0000000..ed092c7
--- /dev/null
+++ b/doc/html/dynsections.js
@@ -0,0 +1,97 @@
+function toggleVisibility(linkObj)
+{
+ var base = $(linkObj).attr('id');
+ var summary = $('#'+base+'-summary');
+ var content = $('#'+base+'-content');
+ var trigger = $('#'+base+'-trigger');
+ var src=$(trigger).attr('src');
+ if (content.is(':visible')===true) {
+   content.hide();
+   summary.show();
+   $(linkObj).addClass('closed').removeClass('opened');
+   $(trigger).attr('src',src.substring(0,src.length-8)+'closed.png');
+ } else {
+   content.show();
+   summary.hide();
+   $(linkObj).removeClass('closed').addClass('opened');
+   $(trigger).attr('src',src.substring(0,src.length-10)+'open.png');
+ } 
+ return false;
+}
+
+function updateStripes()
+{
+  $('table.directory tr').
+       removeClass('even').filter(':visible:even').addClass('even');
+}
+function toggleLevel(level)
+{
+  $('table.directory tr').each(function(){ 
+    var l = this.id.split('_').length-1;
+    var i = $('#img'+this.id.substring(3));
+    var a = $('#arr'+this.id.substring(3));
+    if (l<level+1) {
+      i.attr('src','ftv2folderopen.png');
+      a.attr('src','ftv2mnode.png');
+      $(this).show();
+    } else if (l==level+1) {
+      i.attr('src','ftv2folderclosed.png');
+      a.attr('src','ftv2pnode.png');
+      $(this).show();
+    } else {
+      $(this).hide();
+    }
+  });
+  updateStripes();
+}
+
+function toggleFolder(id)
+{
+  //The clicked row
+  var currentRow = $('#row_'+id);
+  var currentRowImages = currentRow.find("img");
+
+  //All rows after the clicked row
+  var rows = currentRow.nextAll("tr");
+
+  //Only match elements AFTER this one (can't hide elements before)
+  var childRows = rows.filter(function() {
+    var re = new RegExp('^row_'+id+'\\d+_$', "i"); //only one sub
+    return this.id.match(re);
+  });
+
+  //First row is visible we are HIDING
+  if (childRows.filter(':first').is(':visible')===true) {
+    currentRowImages.filter("[id^=arr]").attr('src', 'ftv2pnode.png');
+    currentRowImages.filter("[id^=img]").attr('src', 'ftv2folderclosed.png');
+    rows.filter("[id^=row_"+id+"]").hide();
+  } else { //We are SHOWING
+    //All sub images
+    var childImages = childRows.find("img");
+    var childImg = childImages.filter("[id^=img]");
+    var childArr = childImages.filter("[id^=arr]");
+
+    currentRow.find("[id^=arr]").attr('src', 'ftv2mnode.png'); //open row
+    currentRow.find("[id^=img]").attr('src', 'ftv2folderopen.png'); //open row
+    childImg.attr('src','ftv2folderclosed.png'); //children closed
+    childArr.attr('src','ftv2pnode.png'); //children closed
+    childRows.show(); //show all children
+  }
+  updateStripes();
+}
+
+
+function toggleInherit(id)
+{
+  var rows = $('tr.inherit.'+id);
+  var img = $('tr.inherit_header.'+id+' img');
+  var src = $(img).attr('src');
+  if (rows.filter(':first').is(':visible')===true) {
+    rows.css('display','none');
+    $(img).attr('src',src.substring(0,src.length-8)+'closed.png');
+  } else {
+    rows.css('display','table-row'); // using show() causes jump in firefox
+    $(img).attr('src',src.substring(0,src.length-10)+'open.png');
+  }
+}
+
diff --git a/doc/html/ftv2blank.png b/doc/html/ftv2blank.png
new file mode 100644
index 0000000..63c605b
--- /dev/null
+++ b/doc/html/ftv2blank.png
Binary files differ
diff --git a/doc/html/ftv2cl.png b/doc/html/ftv2cl.png
new file mode 100644
index 0000000..132f657
--- /dev/null
+++ b/doc/html/ftv2cl.png
Binary files differ
diff --git a/doc/html/ftv2doc.png b/doc/html/ftv2doc.png
new file mode 100644
index 0000000..17edabf
--- /dev/null
+++ b/doc/html/ftv2doc.png
Binary files differ
diff --git a/doc/html/ftv2folderclosed.png b/doc/html/ftv2folderclosed.png
new file mode 100644
index 0000000..bb8ab35
--- /dev/null
+++ b/doc/html/ftv2folderclosed.png
Binary files differ
diff --git a/doc/html/ftv2folderopen.png b/doc/html/ftv2folderopen.png
new file mode 100644
index 0000000..d6c7f67
--- /dev/null
+++ b/doc/html/ftv2folderopen.png
Binary files differ
diff --git a/doc/html/ftv2lastnode.png b/doc/html/ftv2lastnode.png
new file mode 100644
index 0000000..63c605b
--- /dev/null
+++ b/doc/html/ftv2lastnode.png
Binary files differ
diff --git a/doc/html/ftv2link.png b/doc/html/ftv2link.png
new file mode 100644
index 0000000..17edabf
--- /dev/null
+++ b/doc/html/ftv2link.png
Binary files differ
diff --git a/doc/html/ftv2mlastnode.png b/doc/html/ftv2mlastnode.png
new file mode 100644
index 0000000..0b63f6d
--- /dev/null
+++ b/doc/html/ftv2mlastnode.png
Binary files differ
diff --git a/doc/html/ftv2mnode.png b/doc/html/ftv2mnode.png
new file mode 100644
index 0000000..0b63f6d
--- /dev/null
+++ b/doc/html/ftv2mnode.png
Binary files differ
diff --git a/doc/html/ftv2mo.png b/doc/html/ftv2mo.png
new file mode 100644
index 0000000..4bfb80f
--- /dev/null
+++ b/doc/html/ftv2mo.png
Binary files differ
diff --git a/doc/html/ftv2node.png b/doc/html/ftv2node.png
new file mode 100644
index 0000000..63c605b
--- /dev/null
+++ b/doc/html/ftv2node.png
Binary files differ
diff --git a/doc/html/ftv2ns.png b/doc/html/ftv2ns.png
new file mode 100644
index 0000000..72e3d71
--- /dev/null
+++ b/doc/html/ftv2ns.png
Binary files differ
diff --git a/doc/html/ftv2plastnode.png b/doc/html/ftv2plastnode.png
new file mode 100644
index 0000000..c6ee22f
--- /dev/null
+++ b/doc/html/ftv2plastnode.png
Binary files differ
diff --git a/doc/html/ftv2pnode.png b/doc/html/ftv2pnode.png
new file mode 100644
index 0000000..c6ee22f
--- /dev/null
+++ b/doc/html/ftv2pnode.png
Binary files differ
diff --git a/doc/html/ftv2splitbar.png b/doc/html/ftv2splitbar.png
new file mode 100644
index 0000000..fe895f2
--- /dev/null
+++ b/doc/html/ftv2splitbar.png
Binary files differ
diff --git a/doc/html/ftv2vertline.png b/doc/html/ftv2vertline.png
new file mode 100644
index 0000000..63c605b
--- /dev/null
+++ b/doc/html/ftv2vertline.png
Binary files differ
diff --git a/doc/html/functions.html b/doc/html/functions.html
new file mode 100644
index 0000000..55ccba0
--- /dev/null
+++ b/doc/html/functions.html
@@ -0,0 +1,133 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
+<title>TurboJPEG: Data Fields</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.4</span>
+   </div>
+  </td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+  <div id="navrow1" class="tabs">
+    <ul class="tablist">
+      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
+      <li><a href="modules.html"><span>Modules</span></a></li>
+      <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li>
+        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+      </li>
+    </ul>
+  </div>
+  <div id="navrow2" class="tabs2">
+    <ul class="tablist">
+      <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li><a href="classes.html"><span>Data&#160;Structure&#160;Index</span></a></li>
+      <li class="current"><a href="functions.html"><span>Data&#160;Fields</span></a></li>
+    </ul>
+  </div>
+  <div id="navrow3" class="tabs2">
+    <ul class="tablist">
+      <li class="current"><a href="functions.html"><span>All</span></a></li>
+      <li><a href="functions_vars.html"><span>Variables</span></a></li>
+    </ul>
+  </div>
+</div><!-- top -->
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="contents">
+<div class="textblock">Here is a list of all documented struct and union fields with links to the struct/union documentation for each field:</div><ul>
+<li>customFilter
+: <a class="el" href="structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1">tjtransform</a>
+</li>
+<li>data
+: <a class="el" href="structtjtransform.html#a688fe8f1a8ecc12a538d9e561cf338e3">tjtransform</a>
+</li>
+<li>denom
+: <a class="el" href="structtjscalingfactor.html#aefbcdf3e9e62274b2d312c695f133ce3">tjscalingfactor</a>
+</li>
+<li>h
+: <a class="el" href="structtjregion.html#aecefc45a26f4d8b60dd4d825c1710115">tjregion</a>
+</li>
+<li>num
+: <a class="el" href="structtjscalingfactor.html#a9b011e57f981ee23083e2c1aa5e640ec">tjscalingfactor</a>
+</li>
+<li>op
+: <a class="el" href="structtjtransform.html#a2525aab4ba6978a1c273f74fef50e498">tjtransform</a>
+</li>
+<li>options
+: <a class="el" href="structtjtransform.html#ac0e74655baa4402209a21e1ae481c8f6">tjtransform</a>
+</li>
+<li>r
+: <a class="el" href="structtjtransform.html#ac324e5e442abec8a961e5bf219db12cf">tjtransform</a>
+</li>
+<li>w
+: <a class="el" href="structtjregion.html#ab6eb73ceef584fc23c8c8097926dce42">tjregion</a>
+</li>
+<li>x
+: <a class="el" href="structtjregion.html#a4b6a37a93997091b26a75831fa291ad9">tjregion</a>
+</li>
+<li>y
+: <a class="el" href="structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2">tjregion</a>
+</li>
+</ul>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
+</body>
+</html>
diff --git a/doc/html/functions_vars.html b/doc/html/functions_vars.html
new file mode 100644
index 0000000..cdc5560
--- /dev/null
+++ b/doc/html/functions_vars.html
@@ -0,0 +1,133 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
+<title>TurboJPEG: Data Fields - Variables</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.4</span>
+   </div>
+  </td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+  <div id="navrow1" class="tabs">
+    <ul class="tablist">
+      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
+      <li><a href="modules.html"><span>Modules</span></a></li>
+      <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li>
+        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+      </li>
+    </ul>
+  </div>
+  <div id="navrow2" class="tabs2">
+    <ul class="tablist">
+      <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li><a href="classes.html"><span>Data&#160;Structure&#160;Index</span></a></li>
+      <li class="current"><a href="functions.html"><span>Data&#160;Fields</span></a></li>
+    </ul>
+  </div>
+  <div id="navrow3" class="tabs2">
+    <ul class="tablist">
+      <li><a href="functions.html"><span>All</span></a></li>
+      <li class="current"><a href="functions_vars.html"><span>Variables</span></a></li>
+    </ul>
+  </div>
+</div><!-- top -->
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="contents">
+&#160;<ul>
+<li>customFilter
+: <a class="el" href="structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1">tjtransform</a>
+</li>
+<li>data
+: <a class="el" href="structtjtransform.html#a688fe8f1a8ecc12a538d9e561cf338e3">tjtransform</a>
+</li>
+<li>denom
+: <a class="el" href="structtjscalingfactor.html#aefbcdf3e9e62274b2d312c695f133ce3">tjscalingfactor</a>
+</li>
+<li>h
+: <a class="el" href="structtjregion.html#aecefc45a26f4d8b60dd4d825c1710115">tjregion</a>
+</li>
+<li>num
+: <a class="el" href="structtjscalingfactor.html#a9b011e57f981ee23083e2c1aa5e640ec">tjscalingfactor</a>
+</li>
+<li>op
+: <a class="el" href="structtjtransform.html#a2525aab4ba6978a1c273f74fef50e498">tjtransform</a>
+</li>
+<li>options
+: <a class="el" href="structtjtransform.html#ac0e74655baa4402209a21e1ae481c8f6">tjtransform</a>
+</li>
+<li>r
+: <a class="el" href="structtjtransform.html#ac324e5e442abec8a961e5bf219db12cf">tjtransform</a>
+</li>
+<li>w
+: <a class="el" href="structtjregion.html#ab6eb73ceef584fc23c8c8097926dce42">tjregion</a>
+</li>
+<li>x
+: <a class="el" href="structtjregion.html#a4b6a37a93997091b26a75831fa291ad9">tjregion</a>
+</li>
+<li>y
+: <a class="el" href="structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2">tjregion</a>
+</li>
+</ul>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
+</body>
+</html>
diff --git a/doc/html/group___turbo_j_p_e_g.html b/doc/html/group___turbo_j_p_e_g.html
new file mode 100644
index 0000000..6936e5d
--- /dev/null
+++ b/doc/html/group___turbo_j_p_e_g.html
@@ -0,0 +1,1915 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
+<title>TurboJPEG: TurboJPEG</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.4</span>
+   </div>
+  </td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+  <div id="navrow1" class="tabs">
+    <ul class="tablist">
+      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
+      <li><a href="modules.html"><span>Modules</span></a></li>
+      <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li>
+        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+      </li>
+    </ul>
+  </div>
+</div><!-- top -->
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="summary">
+<a href="#nested-classes">Data Structures</a> &#124;
+<a href="#define-members">Macros</a> &#124;
+<a href="#typedef-members">Typedefs</a> &#124;
+<a href="#enum-members">Enumerations</a> &#124;
+<a href="#func-members">Functions</a> &#124;
+<a href="#var-members">Variables</a>  </div>
+  <div class="headertitle">
+<div class="title">TurboJPEG</div>  </div>
+</div><!--header-->
+<div class="contents">
+
+<p>TurboJPEG API.  
+<a href="#details">More...</a></p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
+Data Structures</h2></td></tr>
+<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjscalingfactor.html">tjscalingfactor</a></td></tr>
+<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Scaling factor.  <a href="structtjscalingfactor.html#details">More...</a><br/></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html">tjregion</a></td></tr>
+<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Cropping region.  <a href="structtjregion.html#details">More...</a><br/></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html">tjtransform</a></td></tr>
+<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Lossless transform.  <a href="structtjtransform.html#details">More...</a><br/></td></tr>
+<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table><table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="define-members"></a>
+Macros</h2></td></tr>
+<tr class="memitem:ga5ef3d169162ce77ce348e292a0b7477c"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c">TJ_NUMSAMP</a></td></tr>
+<tr class="memdesc:ga5ef3d169162ce77ce348e292a0b7477c"><td class="mdescLeft">&#160;</td><td class="mdescRight">The number of chrominance subsampling options.  <a href="#ga5ef3d169162ce77ce348e292a0b7477c">More...</a><br/></td></tr>
+<tr class="separator:ga5ef3d169162ce77ce348e292a0b7477c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga7010a4402f54a45ba822ad8675a4655e"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a></td></tr>
+<tr class="memdesc:ga7010a4402f54a45ba822ad8675a4655e"><td class="mdescLeft">&#160;</td><td class="mdescRight">The number of pixel formats.  <a href="#ga7010a4402f54a45ba822ad8675a4655e">More...</a><br/></td></tr>
+<tr class="separator:ga7010a4402f54a45ba822ad8675a4655e"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga39f57a6fb02d9cf32e7b6890099b5a71"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga39f57a6fb02d9cf32e7b6890099b5a71">TJ_NUMCS</a></td></tr>
+<tr class="memdesc:ga39f57a6fb02d9cf32e7b6890099b5a71"><td class="mdescLeft">&#160;</td><td class="mdescRight">The number of JPEG colorspaces.  <a href="#ga39f57a6fb02d9cf32e7b6890099b5a71">More...</a><br/></td></tr>
+<tr class="separator:ga39f57a6fb02d9cf32e7b6890099b5a71"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga72ecf4ebe6eb702d3c6f5ca27455e1ec"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">TJFLAG_BOTTOMUP</a></td></tr>
+<tr class="memdesc:ga72ecf4ebe6eb702d3c6f5ca27455e1ec"><td class="mdescLeft">&#160;</td><td class="mdescRight">The uncompressed source/destination image is stored in bottom-up (Windows, OpenGL) order, not top-down (X11) order.  <a href="#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">More...</a><br/></td></tr>
+<tr class="separator:ga72ecf4ebe6eb702d3c6f5ca27455e1ec"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga4ee4506c81177a06f77e2504a22efd2d"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga4ee4506c81177a06f77e2504a22efd2d">TJFLAG_FASTUPSAMPLE</a></td></tr>
+<tr class="memdesc:ga4ee4506c81177a06f77e2504a22efd2d"><td class="mdescLeft">&#160;</td><td class="mdescRight">When decompressing an image that was compressed using chrominance subsampling, use the fastest chrominance upsampling algorithm available in the underlying codec.  <a href="#ga4ee4506c81177a06f77e2504a22efd2d">More...</a><br/></td></tr>
+<tr class="separator:ga4ee4506c81177a06f77e2504a22efd2d"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga8808d403c68b62aaa58a4c1e58e98963"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963">TJFLAG_NOREALLOC</a></td></tr>
+<tr class="memdesc:ga8808d403c68b62aaa58a4c1e58e98963"><td class="mdescLeft">&#160;</td><td class="mdescRight">Disable buffer (re)allocation.  <a href="#ga8808d403c68b62aaa58a4c1e58e98963">More...</a><br/></td></tr>
+<tr class="separator:ga8808d403c68b62aaa58a4c1e58e98963"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gaabce235db80d3f698b27f36cbd453da2"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaabce235db80d3f698b27f36cbd453da2">TJFLAG_FASTDCT</a></td></tr>
+<tr class="memdesc:gaabce235db80d3f698b27f36cbd453da2"><td class="mdescLeft">&#160;</td><td class="mdescRight">Use the fastest DCT/IDCT algorithm available in the underlying codec.  <a href="#gaabce235db80d3f698b27f36cbd453da2">More...</a><br/></td></tr>
+<tr class="separator:gaabce235db80d3f698b27f36cbd453da2"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gacb233cfd722d66d1ccbf48a7de81f0e0"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gacb233cfd722d66d1ccbf48a7de81f0e0">TJFLAG_ACCURATEDCT</a></td></tr>
+<tr class="memdesc:gacb233cfd722d66d1ccbf48a7de81f0e0"><td class="mdescLeft">&#160;</td><td class="mdescRight">Use the most accurate DCT/IDCT algorithm available in the underlying codec.  <a href="#gacb233cfd722d66d1ccbf48a7de81f0e0">More...</a><br/></td></tr>
+<tr class="separator:gacb233cfd722d66d1ccbf48a7de81f0e0"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga0f6dbd18adf38b7d46ac547f0f4d562c"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0f6dbd18adf38b7d46ac547f0f4d562c">TJ_NUMXOP</a></td></tr>
+<tr class="memdesc:ga0f6dbd18adf38b7d46ac547f0f4d562c"><td class="mdescLeft">&#160;</td><td class="mdescRight">The number of transform operations.  <a href="#ga0f6dbd18adf38b7d46ac547f0f4d562c">More...</a><br/></td></tr>
+<tr class="separator:ga0f6dbd18adf38b7d46ac547f0f4d562c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga50e03cb5ed115330e212417429600b00"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00">TJXOPT_PERFECT</a></td></tr>
+<tr class="memdesc:ga50e03cb5ed115330e212417429600b00"><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will cause <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> to return an error if the transform is not perfect.  <a href="#ga50e03cb5ed115330e212417429600b00">More...</a><br/></td></tr>
+<tr class="separator:ga50e03cb5ed115330e212417429600b00"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga319826b7eb1583c0595bbe7b95428709"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga319826b7eb1583c0595bbe7b95428709">TJXOPT_TRIM</a></td></tr>
+<tr class="memdesc:ga319826b7eb1583c0595bbe7b95428709"><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will cause <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> to discard any partial MCU blocks that cannot be transformed.  <a href="#ga319826b7eb1583c0595bbe7b95428709">More...</a><br/></td></tr>
+<tr class="separator:ga319826b7eb1583c0595bbe7b95428709"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga9c771a757fc1294add611906b89ab2d2"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga9c771a757fc1294add611906b89ab2d2">TJXOPT_CROP</a></td></tr>
+<tr class="memdesc:ga9c771a757fc1294add611906b89ab2d2"><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will enable lossless cropping.  <a href="#ga9c771a757fc1294add611906b89ab2d2">More...</a><br/></td></tr>
+<tr class="separator:ga9c771a757fc1294add611906b89ab2d2"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga3acee7b48ade1b99e5588736007c2589"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga3acee7b48ade1b99e5588736007c2589">TJXOPT_GRAY</a></td></tr>
+<tr class="memdesc:ga3acee7b48ade1b99e5588736007c2589"><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will discard the color data in the input image and produce a grayscale output image.  <a href="#ga3acee7b48ade1b99e5588736007c2589">More...</a><br/></td></tr>
+<tr class="separator:ga3acee7b48ade1b99e5588736007c2589"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gafbf992bbf6e006705886333703ffab31"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gafbf992bbf6e006705886333703ffab31">TJXOPT_NOOUTPUT</a></td></tr>
+<tr class="memdesc:gafbf992bbf6e006705886333703ffab31"><td class="mdescLeft">&#160;</td><td class="mdescRight">This option will prevent <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> from outputting a JPEG image for this particular transform (this can be used in conjunction with a custom filter to capture the transformed DCT coefficients without transcoding them.)  <a href="#gafbf992bbf6e006705886333703ffab31">More...</a><br/></td></tr>
+<tr class="separator:gafbf992bbf6e006705886333703ffab31"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga0aba955473315e405295d978f0c16511"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511">TJPAD</a>(width)</td></tr>
+<tr class="memdesc:ga0aba955473315e405295d978f0c16511"><td class="mdescLeft">&#160;</td><td class="mdescRight">Pad the given width to the nearest 32-bit boundary.  <a href="#ga0aba955473315e405295d978f0c16511">More...</a><br/></td></tr>
+<tr class="separator:ga0aba955473315e405295d978f0c16511"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga84878bb65404204743aa18cac02781df"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df">TJSCALED</a>(dimension, scalingFactor)</td></tr>
+<tr class="memdesc:ga84878bb65404204743aa18cac02781df"><td class="mdescLeft">&#160;</td><td class="mdescRight">Compute the scaled value of <code>dimension</code> using the given scaling factor.  <a href="#ga84878bb65404204743aa18cac02781df">More...</a><br/></td></tr>
+<tr class="separator:ga84878bb65404204743aa18cac02781df"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table><table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="typedef-members"></a>
+Typedefs</h2></td></tr>
+<tr class="memitem:gaa29f3189c41be12ec5dee7caec318a31"><td class="memItemLeft" align="right" valign="top">typedef struct <a class="el" href="structtjtransform.html">tjtransform</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaa29f3189c41be12ec5dee7caec318a31">tjtransform</a></td></tr>
+<tr class="memdesc:gaa29f3189c41be12ec5dee7caec318a31"><td class="mdescLeft">&#160;</td><td class="mdescRight">Lossless transform.  <a href="#gaa29f3189c41be12ec5dee7caec318a31">More...</a><br/></td></tr>
+<tr class="separator:gaa29f3189c41be12ec5dee7caec318a31"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga758d2634ecb4949de7815cba621f5763"><td class="memItemLeft" align="right" valign="top">typedef void *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a></td></tr>
+<tr class="memdesc:ga758d2634ecb4949de7815cba621f5763"><td class="mdescLeft">&#160;</td><td class="mdescRight">TurboJPEG instance handle.  <a href="#ga758d2634ecb4949de7815cba621f5763">More...</a><br/></td></tr>
+<tr class="separator:ga758d2634ecb4949de7815cba621f5763"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table><table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="enum-members"></a>
+Enumerations</h2></td></tr>
+<tr class="memitem:ga1d047060ea80bb9820d540bb928e9074"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">TJSAMP</a> { <br/>
+&#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074afb8da4f44197837bdec0a4f593dacae3">TJSAMP_444</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a136130902cc578f11f32429b59368404">TJSAMP_422</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737">TJSAMP_420</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a3f1c9504842ddc7a48d0f690754b6248">TJSAMP_GRAY</a>, 
+<br/>
+&#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974">TJSAMP_440</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a28ec62575e5ea295c3fde3001dc628e2">TJSAMP_411</a>
+<br/>
+ }</td></tr>
+<tr class="memdesc:ga1d047060ea80bb9820d540bb928e9074"><td class="mdescLeft">&#160;</td><td class="mdescRight">Chrominance subsampling options.  <a href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">More...</a><br/></td></tr>
+<tr class="separator:ga1d047060ea80bb9820d540bb928e9074"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gac916144e26c3817ac514e64ae5d12e2a"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">TJPF</a> { <br/>
+&#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c">TJPF_RGB</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839">TJPF_BGR</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01">TJPF_RGBX</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8">TJPF_BGRX</a>, 
+<br/>
+&#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af">TJPF_XBGR</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84">TJPF_XRGB</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa5431b54b015337705f13118073711a1a">TJPF_GRAY</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12">TJPF_RGBA</a>, 
+<br/>
+&#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aac037ff1845cf9b74bb81a3659c2b9fb4">TJPF_BGRA</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa1ba1a7f1631dbeaa49a0a85fc4a40081">TJPF_ABGR</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c">TJPF_ARGB</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7f5100ec44c91994e243f1cf55553f8b">TJPF_CMYK</a>
+<br/>
+ }</td></tr>
+<tr class="memdesc:gac916144e26c3817ac514e64ae5d12e2a"><td class="mdescLeft">&#160;</td><td class="mdescRight">Pixel formats.  <a href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">More...</a><br/></td></tr>
+<tr class="separator:gac916144e26c3817ac514e64ae5d12e2a"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga4f83ad3368e0e29d1957be0efa7c3720"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720">TJCS</a> { <br/>
+&#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a677cb7ccb85c4038ac41964a2e09e555">TJCS_RGB</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75">TJCS_YCbCr</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720ab3e7d6a87f695e45b81c1b5262b5a50a">TJCS_GRAY</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a6c8b636152ac8195b869587db315ee53">TJCS_CMYK</a>, 
+<br/>
+&#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e">TJCS_YCCK</a>
+<br/>
+ }</td></tr>
+<tr class="memdesc:ga4f83ad3368e0e29d1957be0efa7c3720"><td class="mdescLeft">&#160;</td><td class="mdescRight">JPEG colorspaces.  <a href="group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720">More...</a><br/></td></tr>
+<tr class="separator:ga4f83ad3368e0e29d1957be0efa7c3720"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga2de531af4e7e6c4f124908376b354866"><td class="memItemLeft" align="right" valign="top">enum &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">TJXOP</a> { <br/>
+&#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27">TJXOP_NONE</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce">TJXOP_HFLIP</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a324eddfbec53b7e691f61e56929d0d5d">TJXOP_VFLIP</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a31060aed199f886afdd417f80499c32d">TJXOP_TRANSPOSE</a>, 
+<br/>
+&#160;&#160;<a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866af3b14d488aea6ece9e5b3df73a74d6a4">TJXOP_TRANSVERSE</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a43b2bbb23bc4bd548422d43fbe9af128">TJXOP_ROT90</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a140952eb8dd0300accfcc22726d69692">TJXOP_ROT180</a>, 
+<a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a3064ee5dfb7f032df332818587567a08">TJXOP_ROT270</a>
+<br/>
+ }</td></tr>
+<tr class="memdesc:ga2de531af4e7e6c4f124908376b354866"><td class="mdescLeft">&#160;</td><td class="mdescRight">Transform operations for <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a>  <a href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">More...</a><br/></td></tr>
+<tr class="separator:ga2de531af4e7e6c4f124908376b354866"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table><table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
+Functions</h2></td></tr>
+<tr class="memitem:ga3d10c47fbe4a2489a2b30c931551d01a"><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga3d10c47fbe4a2489a2b30c931551d01a">tjInitCompress</a> (void)</td></tr>
+<tr class="memdesc:ga3d10c47fbe4a2489a2b30c931551d01a"><td class="mdescLeft">&#160;</td><td class="mdescRight">Create a TurboJPEG compressor instance.  <a href="#ga3d10c47fbe4a2489a2b30c931551d01a">More...</a><br/></td></tr>
+<tr class="separator:ga3d10c47fbe4a2489a2b30c931551d01a"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gaba62b7a98f960839b588579898495cf2"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2">tjCompress2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags)</td></tr>
+<tr class="memdesc:gaba62b7a98f960839b588579898495cf2"><td class="mdescLeft">&#160;</td><td class="mdescRight">Compress an RGB, grayscale, or CMYK image into a JPEG image.  <a href="#gaba62b7a98f960839b588579898495cf2">More...</a><br/></td></tr>
+<tr class="separator:gaba62b7a98f960839b588579898495cf2"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga0b931126c7a615ddc3bbd0cca6698d67"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0b931126c7a615ddc3bbd0cca6698d67">tjCompressFromYUV</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int width, int pad, int height, int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags)</td></tr>
+<tr class="memdesc:ga0b931126c7a615ddc3bbd0cca6698d67"><td class="mdescLeft">&#160;</td><td class="mdescRight">Compress a YUV planar image into a JPEG image.  <a href="#ga0b931126c7a615ddc3bbd0cca6698d67">More...</a><br/></td></tr>
+<tr class="separator:ga0b931126c7a615ddc3bbd0cca6698d67"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gaccc5bca7f12fcdcc302e6e1c6d4b311b"><td class="memItemLeft" align="right" valign="top">DLLEXPORT unsigned long DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b">tjBufSize</a> (int width, int height, int jpegSubsamp)</td></tr>
+<tr class="memdesc:gaccc5bca7f12fcdcc302e6e1c6d4b311b"><td class="mdescLeft">&#160;</td><td class="mdescRight">The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters.  <a href="#gaccc5bca7f12fcdcc302e6e1c6d4b311b">More...</a><br/></td></tr>
+<tr class="separator:gaccc5bca7f12fcdcc302e6e1c6d4b311b"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gaf451664a62c1f6c7cc5a6401f32908c9"><td class="memItemLeft" align="right" valign="top">DLLEXPORT unsigned long DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9">tjBufSizeYUV2</a> (int width, int pad, int height, int subsamp)</td></tr>
+<tr class="memdesc:gaf451664a62c1f6c7cc5a6401f32908c9"><td class="mdescLeft">&#160;</td><td class="mdescRight">The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters.  <a href="#gaf451664a62c1f6c7cc5a6401f32908c9">More...</a><br/></td></tr>
+<tr class="separator:gaf451664a62c1f6c7cc5a6401f32908c9"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga0a5ffbf7cb58a5b6a8201114fe889360"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0a5ffbf7cb58a5b6a8201114fe889360">tjEncodeYUV3</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, int pad, int subsamp, int flags)</td></tr>
+<tr class="memdesc:ga0a5ffbf7cb58a5b6a8201114fe889360"><td class="mdescLeft">&#160;</td><td class="mdescRight">Encode an RGB or grayscale image into a YUV planar image.  <a href="#ga0a5ffbf7cb58a5b6a8201114fe889360">More...</a><br/></td></tr>
+<tr class="separator:ga0a5ffbf7cb58a5b6a8201114fe889360"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gae5408179d041e2a2f7199c8283cf649e"><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gae5408179d041e2a2f7199c8283cf649e">tjInitDecompress</a> (void)</td></tr>
+<tr class="memdesc:gae5408179d041e2a2f7199c8283cf649e"><td class="mdescLeft">&#160;</td><td class="mdescRight">Create a TurboJPEG decompressor instance.  <a href="#gae5408179d041e2a2f7199c8283cf649e">More...</a><br/></td></tr>
+<tr class="separator:gae5408179d041e2a2f7199c8283cf649e"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gacd0fac3af74b3511d39b4781b7103086"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gacd0fac3af74b3511d39b4781b7103086">tjDecompressHeader3</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, int *jpegSubsamp, int *jpegColorspace)</td></tr>
+<tr class="memdesc:gacd0fac3af74b3511d39b4781b7103086"><td class="mdescLeft">&#160;</td><td class="mdescRight">Retrieve information about a JPEG image without decompressing it.  <a href="#gacd0fac3af74b3511d39b4781b7103086">More...</a><br/></td></tr>
+<tr class="separator:gacd0fac3af74b3511d39b4781b7103086"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga6449044b9af402999ccf52f401333be8"><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="structtjscalingfactor.html">tjscalingfactor</a> *DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8">tjGetScalingFactors</a> (int *numscalingfactors)</td></tr>
+<tr class="memdesc:ga6449044b9af402999ccf52f401333be8"><td class="mdescLeft">&#160;</td><td class="mdescRight">Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of TurboJPEG supports.  <a href="#ga6449044b9af402999ccf52f401333be8">More...</a><br/></td></tr>
+<tr class="separator:ga6449044b9af402999ccf52f401333be8"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gada69cc6443d1bb493b40f1626259e5e9"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gada69cc6443d1bb493b40f1626259e5e9">tjDecompress2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)</td></tr>
+<tr class="memdesc:gada69cc6443d1bb493b40f1626259e5e9"><td class="mdescLeft">&#160;</td><td class="mdescRight">Decompress a JPEG image to an RGB, grayscale, or CMYK image.  <a href="#gada69cc6443d1bb493b40f1626259e5e9">More...</a><br/></td></tr>
+<tr class="separator:gada69cc6443d1bb493b40f1626259e5e9"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga7c08b340ad7f8e85d407bd9e81d44d07"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga7c08b340ad7f8e85d407bd9e81d44d07">tjDecompressToYUV2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pad, int height, int flags)</td></tr>
+<tr class="memdesc:ga7c08b340ad7f8e85d407bd9e81d44d07"><td class="mdescLeft">&#160;</td><td class="mdescRight">Decompress a JPEG image to a YUV planar image.  <a href="#ga7c08b340ad7f8e85d407bd9e81d44d07">More...</a><br/></td></tr>
+<tr class="separator:ga7c08b340ad7f8e85d407bd9e81d44d07"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga132ae2c2cadcf64c8bb0f3bdf69da3ed"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga132ae2c2cadcf64c8bb0f3bdf69da3ed">tjDecodeYUV</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int pad, int subsamp, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)</td></tr>
+<tr class="memdesc:ga132ae2c2cadcf64c8bb0f3bdf69da3ed"><td class="mdescLeft">&#160;</td><td class="mdescRight">Decode a YUV planar image into an RGB or grayscale image.  <a href="#ga132ae2c2cadcf64c8bb0f3bdf69da3ed">More...</a><br/></td></tr>
+<tr class="separator:ga132ae2c2cadcf64c8bb0f3bdf69da3ed"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga3155b775bfbac9dbba869b95a0367902"><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga3155b775bfbac9dbba869b95a0367902">tjInitTransform</a> (void)</td></tr>
+<tr class="memdesc:ga3155b775bfbac9dbba869b95a0367902"><td class="mdescLeft">&#160;</td><td class="mdescRight">Create a new TurboJPEG transformer instance.  <a href="#ga3155b775bfbac9dbba869b95a0367902">More...</a><br/></td></tr>
+<tr class="separator:ga3155b775bfbac9dbba869b95a0367902"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gae403193ceb4aafb7e0f56ab587b48616"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616">tjTransform</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, <a class="el" href="structtjtransform.html">tjtransform</a> *transforms, int flags)</td></tr>
+<tr class="memdesc:gae403193ceb4aafb7e0f56ab587b48616"><td class="mdescLeft">&#160;</td><td class="mdescRight">Losslessly transform a JPEG image into another JPEG image.  <a href="#gae403193ceb4aafb7e0f56ab587b48616">More...</a><br/></td></tr>
+<tr class="separator:gae403193ceb4aafb7e0f56ab587b48616"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga674adee917b95ad4a896f1ba39e12540"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga674adee917b95ad4a896f1ba39e12540">tjDestroy</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle)</td></tr>
+<tr class="memdesc:ga674adee917b95ad4a896f1ba39e12540"><td class="mdescLeft">&#160;</td><td class="mdescRight">Destroy a TurboJPEG compressor, decompressor, or transformer instance.  <a href="#ga674adee917b95ad4a896f1ba39e12540">More...</a><br/></td></tr>
+<tr class="separator:ga674adee917b95ad4a896f1ba39e12540"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga5c9234bda6d993cdaffdd89bf81a00ff"><td class="memItemLeft" align="right" valign="top">DLLEXPORT unsigned char *DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff">tjAlloc</a> (int bytes)</td></tr>
+<tr class="memdesc:ga5c9234bda6d993cdaffdd89bf81a00ff"><td class="mdescLeft">&#160;</td><td class="mdescRight">Allocate an image buffer for use with TurboJPEG.  <a href="#ga5c9234bda6d993cdaffdd89bf81a00ff">More...</a><br/></td></tr>
+<tr class="separator:ga5c9234bda6d993cdaffdd89bf81a00ff"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga8c4a1231dc06a450514c835f6471f137"><td class="memItemLeft" align="right" valign="top">DLLEXPORT void DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137">tjFree</a> (unsigned char *buffer)</td></tr>
+<tr class="memdesc:ga8c4a1231dc06a450514c835f6471f137"><td class="mdescLeft">&#160;</td><td class="mdescRight">Free an image buffer previously allocated by TurboJPEG.  <a href="#ga8c4a1231dc06a450514c835f6471f137">More...</a><br/></td></tr>
+<tr class="separator:ga8c4a1231dc06a450514c835f6471f137"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga9af79c908ec131b1ae8d52fe40375abf"><td class="memItemLeft" align="right" valign="top">DLLEXPORT char *DLLCALL&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf">tjGetErrorStr</a> (void)</td></tr>
+<tr class="memdesc:ga9af79c908ec131b1ae8d52fe40375abf"><td class="mdescLeft">&#160;</td><td class="mdescRight">Returns a descriptive error message explaining why the last command failed.  <a href="#ga9af79c908ec131b1ae8d52fe40375abf">More...</a><br/></td></tr>
+<tr class="separator:ga9af79c908ec131b1ae8d52fe40375abf"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table><table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="var-members"></a>
+Variables</h2></td></tr>
+<tr class="memitem:ga9e61e7cd47a15a173283ba94e781308c"><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c">tjMCUWidth</a> [<a class="el" href="group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c">TJ_NUMSAMP</a>]</td></tr>
+<tr class="memdesc:ga9e61e7cd47a15a173283ba94e781308c"><td class="mdescLeft">&#160;</td><td class="mdescRight">MCU block width (in pixels) for a given level of chrominance subsampling.  <a href="#ga9e61e7cd47a15a173283ba94e781308c">More...</a><br/></td></tr>
+<tr class="separator:ga9e61e7cd47a15a173283ba94e781308c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gabd247bb9fecb393eca57366feb8327bf"><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf">tjMCUHeight</a> [<a class="el" href="group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c">TJ_NUMSAMP</a>]</td></tr>
+<tr class="memdesc:gabd247bb9fecb393eca57366feb8327bf"><td class="mdescLeft">&#160;</td><td class="mdescRight">MCU block height (in pixels) for a given level of chrominance subsampling.  <a href="#gabd247bb9fecb393eca57366feb8327bf">More...</a><br/></td></tr>
+<tr class="separator:gabd247bb9fecb393eca57366feb8327bf"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gadd9b446742ac8a3923f7992c7988fea8"><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gadd9b446742ac8a3923f7992c7988fea8">tjRedOffset</a> [<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td></tr>
+<tr class="memdesc:gadd9b446742ac8a3923f7992c7988fea8"><td class="mdescLeft">&#160;</td><td class="mdescRight">Red offset (in bytes) for a given pixel format.  <a href="#gadd9b446742ac8a3923f7992c7988fea8">More...</a><br/></td></tr>
+<tr class="separator:gadd9b446742ac8a3923f7992c7988fea8"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga82d6e35da441112a411da41923c0ba2f"><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga82d6e35da441112a411da41923c0ba2f">tjGreenOffset</a> [<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td></tr>
+<tr class="memdesc:ga82d6e35da441112a411da41923c0ba2f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Green offset (in bytes) for a given pixel format.  <a href="#ga82d6e35da441112a411da41923c0ba2f">More...</a><br/></td></tr>
+<tr class="separator:ga82d6e35da441112a411da41923c0ba2f"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ga84e2e35d3f08025f976ec1ec53693dea"><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga84e2e35d3f08025f976ec1ec53693dea">tjBlueOffset</a> [<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td></tr>
+<tr class="memdesc:ga84e2e35d3f08025f976ec1ec53693dea"><td class="mdescLeft">&#160;</td><td class="mdescRight">Blue offset (in bytes) for a given pixel format.  <a href="#ga84e2e35d3f08025f976ec1ec53693dea">More...</a><br/></td></tr>
+<tr class="separator:ga84e2e35d3f08025f976ec1ec53693dea"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:gad77cf8fe5b2bfd3cb3f53098146abb4c"><td class="memItemLeft" align="right" valign="top">static const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c">tjPixelSize</a> [<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td></tr>
+<tr class="memdesc:gad77cf8fe5b2bfd3cb3f53098146abb4c"><td class="mdescLeft">&#160;</td><td class="mdescRight">Pixel size (in bytes) for a given pixel format.  <a href="#gad77cf8fe5b2bfd3cb3f53098146abb4c">More...</a><br/></td></tr>
+<tr class="separator:gad77cf8fe5b2bfd3cb3f53098146abb4c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<p>TurboJPEG API. </p>
+<p>This API provides an interface for generating, decoding, and transforming planar YUV and JPEG images in memory. </p>
+<h2 class="groupheader">Macro Definition Documentation</h2>
+<a class="anchor" id="ga39f57a6fb02d9cf32e7b6890099b5a71"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJ_NUMCS</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The number of JPEG colorspaces. </p>
+
+</div>
+</div>
+<a class="anchor" id="ga7010a4402f54a45ba822ad8675a4655e"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJ_NUMPF</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The number of pixel formats. </p>
+
+</div>
+</div>
+<a class="anchor" id="ga5ef3d169162ce77ce348e292a0b7477c"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJ_NUMSAMP</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The number of chrominance subsampling options. </p>
+
+</div>
+</div>
+<a class="anchor" id="ga0f6dbd18adf38b7d46ac547f0f4d562c"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJ_NUMXOP</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The number of transform operations. </p>
+
+</div>
+</div>
+<a class="anchor" id="gacb233cfd722d66d1ccbf48a7de81f0e0"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJFLAG_ACCURATEDCT</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Use the most accurate DCT/IDCT algorithm available in the underlying codec. </p>
+<p>The default if this flag is not specified is implementation-specific. For example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast algorithm by default when compressing, because this has been shown to have only a very slight effect on accuracy, but it uses the accurate algorithm when decompressing, because this has been shown to have a larger effect. </p>
+
+</div>
+</div>
+<a class="anchor" id="ga72ecf4ebe6eb702d3c6f5ca27455e1ec"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJFLAG_BOTTOMUP</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The uncompressed source/destination image is stored in bottom-up (Windows, OpenGL) order, not top-down (X11) order. </p>
+
+</div>
+</div>
+<a class="anchor" id="gaabce235db80d3f698b27f36cbd453da2"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJFLAG_FASTDCT</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Use the fastest DCT/IDCT algorithm available in the underlying codec. </p>
+<p>The default if this flag is not specified is implementation-specific. For example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast algorithm by default when compressing, because this has been shown to have only a very slight effect on accuracy, but it uses the accurate algorithm when decompressing, because this has been shown to have a larger effect. </p>
+
+</div>
+</div>
+<a class="anchor" id="ga4ee4506c81177a06f77e2504a22efd2d"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJFLAG_FASTUPSAMPLE</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>When decompressing an image that was compressed using chrominance subsampling, use the fastest chrominance upsampling algorithm available in the underlying codec. </p>
+<p>The default is to use smooth upsampling, which creates a smooth transition between neighboring chrominance components in order to reduce upsampling artifacts in the decompressed image. </p>
+
+</div>
+</div>
+<a class="anchor" id="ga8808d403c68b62aaa58a4c1e58e98963"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJFLAG_NOREALLOC</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Disable buffer (re)allocation. </p>
+<p>If passed to <a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2" title="Compress an RGB, grayscale, or CMYK image into a JPEG image.">tjCompress2()</a> or <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a>, this flag will cause those functions to generate an error if the JPEG image buffer is invalid or too small rather than attempting to allocate or reallocate that buffer. This reproduces the behavior of earlier versions of TurboJPEG. </p>
+
+</div>
+</div>
+<a class="anchor" id="ga0aba955473315e405295d978f0c16511"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJPAD</td>
+          <td>(</td>
+          <td class="paramtype">&#160;</td>
+          <td class="paramname">width</td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Pad the given width to the nearest 32-bit boundary. </p>
+
+</div>
+</div>
+<a class="anchor" id="ga84878bb65404204743aa18cac02781df"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJSCALED</td>
+          <td>(</td>
+          <td class="paramtype">&#160;</td>
+          <td class="paramname">dimension, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">&#160;</td>
+          <td class="paramname">scalingFactor&#160;</td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Compute the scaled value of <code>dimension</code> using the given scaling factor. </p>
+<p>This macro performs the integer equivalent of <code>ceil(dimension * scalingFactor)</code>. </p>
+
+</div>
+</div>
+<a class="anchor" id="ga9c771a757fc1294add611906b89ab2d2"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJXOPT_CROP</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>This option will enable lossless cropping. </p>
+<p>See <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> for more information. </p>
+
+</div>
+</div>
+<a class="anchor" id="ga3acee7b48ade1b99e5588736007c2589"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJXOPT_GRAY</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>This option will discard the color data in the input image and produce a grayscale output image. </p>
+
+</div>
+</div>
+<a class="anchor" id="gafbf992bbf6e006705886333703ffab31"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJXOPT_NOOUTPUT</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>This option will prevent <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> from outputting a JPEG image for this particular transform (this can be used in conjunction with a custom filter to capture the transformed DCT coefficients without transcoding them.) </p>
+
+</div>
+</div>
+<a class="anchor" id="ga50e03cb5ed115330e212417429600b00"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJXOPT_PERFECT</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>This option will cause <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> to return an error if the transform is not perfect. </p>
+<p>Lossless transforms operate on MCU blocks, whose size depends on the level of chrominance subsampling used (see <a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c" title="MCU block width (in pixels) for a given level of chrominance subsampling.">tjMCUWidth</a> and <a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf" title="MCU block height (in pixels) for a given level of chrominance subsampling.">tjMCUHeight</a>.) If the image's width or height is not evenly divisible by the MCU block size, then there will be partial MCU blocks on the right and/or bottom edges. It is not possible to move these partial MCU blocks to the top or left of the image, so any transform that would require that is "imperfect." If this option is not specified, then any partial MCU blocks that cannot be transformed will be left in place, which will create odd-looking strips on the right or bottom edge of the image. </p>
+
+</div>
+</div>
+<a class="anchor" id="ga319826b7eb1583c0595bbe7b95428709"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">#define TJXOPT_TRIM</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>This option will cause <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> to discard any partial MCU blocks that cannot be transformed. </p>
+
+</div>
+</div>
+<h2 class="groupheader">Typedef Documentation</h2>
+<a class="anchor" id="ga758d2634ecb4949de7815cba621f5763"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">typedef void* <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>TurboJPEG instance handle. </p>
+
+</div>
+</div>
+<a class="anchor" id="gaa29f3189c41be12ec5dee7caec318a31"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">typedef struct <a class="el" href="structtjtransform.html">tjtransform</a>  <a class="el" href="structtjtransform.html">tjtransform</a></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Lossless transform. </p>
+
+</div>
+</div>
+<h2 class="groupheader">Enumeration Type Documentation</h2>
+<a class="anchor" id="ga4f83ad3368e0e29d1957be0efa7c3720"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">enum <a class="el" href="group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720">TJCS</a></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>JPEG colorspaces. </p>
+<table class="fieldtable">
+<tr><th colspan="2">Enumerator</th></tr><tr><td class="fieldname"><em><a class="anchor" id="gga4f83ad3368e0e29d1957be0efa7c3720a677cb7ccb85c4038ac41964a2e09e555"></a>TJCS_RGB</em>&nbsp;</td><td class="fielddoc">
+<p>RGB colorspace. </p>
+<p>When compressing the JPEG image, the R, G, and B components in the source image are reordered into image planes, but no colorspace conversion or subsampling is performed. RGB JPEG images can be decompressed to any of the extended RGB pixel formats or grayscale, but they cannot be decompressed to YUV images. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75"></a>TJCS_YCbCr</em>&nbsp;</td><td class="fielddoc">
+<p>YCbCr colorspace. </p>
+<p>YCbCr is not an absolute colorspace but rather a mathematical transformation of RGB designed solely for storage and transmission. YCbCr images must be converted to RGB before they can actually be displayed. In the YCbCr colorspace, the Y (luminance) component represents the black &amp; white portion of the original image, and the Cb and Cr (chrominance) components represent the color portion of the original image. Originally, the analog equivalent of this transformation allowed the same signal to drive both black &amp; white and color televisions, but JPEG images use YCbCr primarily because it allows the color data to be optionally subsampled for the purposes of reducing bandwidth or disk space. YCbCr is the most common JPEG colorspace, and YCbCr JPEG images can be compressed from and decompressed to any of the extended RGB pixel formats or grayscale, or they can be decompressed to YUV planar images. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga4f83ad3368e0e29d1957be0efa7c3720ab3e7d6a87f695e45b81c1b5262b5a50a"></a>TJCS_GRAY</em>&nbsp;</td><td class="fielddoc">
+<p>Grayscale colorspace. </p>
+<p>The JPEG image retains only the luminance data (Y component), and any color data from the source image is discarded. Grayscale JPEG images can be compressed from and decompressed to any of the extended RGB pixel formats or grayscale, or they can be decompressed to YUV planar images. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga4f83ad3368e0e29d1957be0efa7c3720a6c8b636152ac8195b869587db315ee53"></a>TJCS_CMYK</em>&nbsp;</td><td class="fielddoc">
+<p>CMYK colorspace. </p>
+<p>When compressing the JPEG image, the C, M, Y, and K components in the source image are reordered into image planes, but no colorspace conversion or subsampling is performed. CMYK JPEG images can only be decompressed to CMYK pixels. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e"></a>TJCS_YCCK</em>&nbsp;</td><td class="fielddoc">
+<p>YCCK colorspace. </p>
+<p>YCCK (AKA "YCbCrK") is not an absolute colorspace but rather a mathematical transformation of CMYK designed solely for storage and transmission. It is to CMYK as YCbCr is to RGB. CMYK pixels can be reversibly transformed into YCCK, and as with YCbCr, the chrominance components in the YCCK pixels can be subsampled without incurring major perceptual loss. YCCK JPEG images can only be compressed from and decompressed to CMYK pixels. </p>
+</td></tr>
+</table>
+
+</div>
+</div>
+<a class="anchor" id="gac916144e26c3817ac514e64ae5d12e2a"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">enum <a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">TJPF</a></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Pixel formats. </p>
+<table class="fieldtable">
+<tr><th colspan="2">Enumerator</th></tr><tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c"></a>TJPF_RGB</em>&nbsp;</td><td class="fielddoc">
+<p>RGB pixel format. </p>
+<p>The red, green, and blue components in the image are stored in 3-byte pixels in the order R, G, B from lowest to highest byte address within each pixel. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839"></a>TJPF_BGR</em>&nbsp;</td><td class="fielddoc">
+<p>BGR pixel format. </p>
+<p>The red, green, and blue components in the image are stored in 3-byte pixels in the order B, G, R from lowest to highest byte address within each pixel. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01"></a>TJPF_RGBX</em>&nbsp;</td><td class="fielddoc">
+<p>RGBX pixel format. </p>
+<p>The red, green, and blue components in the image are stored in 4-byte pixels in the order R, G, B from lowest to highest byte address within each pixel. The X component is ignored when compressing and undefined when decompressing. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8"></a>TJPF_BGRX</em>&nbsp;</td><td class="fielddoc">
+<p>BGRX pixel format. </p>
+<p>The red, green, and blue components in the image are stored in 4-byte pixels in the order B, G, R from lowest to highest byte address within each pixel. The X component is ignored when compressing and undefined when decompressing. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af"></a>TJPF_XBGR</em>&nbsp;</td><td class="fielddoc">
+<p>XBGR pixel format. </p>
+<p>The red, green, and blue components in the image are stored in 4-byte pixels in the order R, G, B from highest to lowest byte address within each pixel. The X component is ignored when compressing and undefined when decompressing. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84"></a>TJPF_XRGB</em>&nbsp;</td><td class="fielddoc">
+<p>XRGB pixel format. </p>
+<p>The red, green, and blue components in the image are stored in 4-byte pixels in the order B, G, R from highest to lowest byte address within each pixel. The X component is ignored when compressing and undefined when decompressing. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa5431b54b015337705f13118073711a1a"></a>TJPF_GRAY</em>&nbsp;</td><td class="fielddoc">
+<p>Grayscale pixel format. </p>
+<p>Each 1-byte pixel represents a luminance (brightness) level from 0 to 255. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12"></a>TJPF_RGBA</em>&nbsp;</td><td class="fielddoc">
+<p>RGBA pixel format. </p>
+<p>This is the same as <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01">TJPF_RGBX</a>, except that when decompressing, the X component is guaranteed to be 0xFF, which can be interpreted as an opaque alpha channel. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aac037ff1845cf9b74bb81a3659c2b9fb4"></a>TJPF_BGRA</em>&nbsp;</td><td class="fielddoc">
+<p>BGRA pixel format. </p>
+<p>This is the same as <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8">TJPF_BGRX</a>, except that when decompressing, the X component is guaranteed to be 0xFF, which can be interpreted as an opaque alpha channel. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa1ba1a7f1631dbeaa49a0a85fc4a40081"></a>TJPF_ABGR</em>&nbsp;</td><td class="fielddoc">
+<p>ABGR pixel format. </p>
+<p>This is the same as <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af">TJPF_XBGR</a>, except that when decompressing, the X component is guaranteed to be 0xFF, which can be interpreted as an opaque alpha channel. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c"></a>TJPF_ARGB</em>&nbsp;</td><td class="fielddoc">
+<p>ARGB pixel format. </p>
+<p>This is the same as <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84">TJPF_XRGB</a>, except that when decompressing, the X component is guaranteed to be 0xFF, which can be interpreted as an opaque alpha channel. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa7f5100ec44c91994e243f1cf55553f8b"></a>TJPF_CMYK</em>&nbsp;</td><td class="fielddoc">
+<p>CMYK pixel format. </p>
+<p>Unlike RGB, which is an additive color model used primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive color model used primarily for printing. In the CMYK color model, the value of each color component typically corresponds to an amount of cyan, magenta, yellow, or black ink that is applied to a white background. In order to convert between CMYK and RGB, it is necessary to use a color management system (CMS.) A CMS will attempt to map colors within the printer's gamut to perceptually similar colors in the display's gamut and vice versa, but the mapping is typically not 1:1 or reversible, nor can it be defined with a simple formula. Thus, such a conversion is out of scope for a codec library. However, the TurboJPEG API allows for compressing CMYK pixels into a YCCK JPEG image (see <a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e" title="YCCK colorspace.">TJCS_YCCK</a>) and decompressing YCCK JPEG images into CMYK pixels. </p>
+</td></tr>
+</table>
+
+</div>
+</div>
+<a class="anchor" id="ga1d047060ea80bb9820d540bb928e9074"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">enum <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">TJSAMP</a></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Chrominance subsampling options. </p>
+<p>When pixels are converted from RGB to YCbCr (see <a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75" title="YCbCr colorspace.">TJCS_YCbCr</a>) or from CMYK to YCCK (see <a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e" title="YCCK colorspace.">TJCS_YCCK</a>) as part of the JPEG compression process, some of the Cb and Cr (chrominance) components can be discarded or averaged together to produce a smaller image with little perceptible loss of image clarity (the human eye is more sensitive to small changes in brightness than to small changes in color.) This is called "chrominance subsampling". </p>
+<p>NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the convention of the digital video community, the TurboJPEG API uses "YUV" to refer to an image format consisting of Y, Cb, and Cr image planes. </p>
+<table class="fieldtable">
+<tr><th colspan="2">Enumerator</th></tr><tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074afb8da4f44197837bdec0a4f593dacae3"></a>TJSAMP_444</em>&nbsp;</td><td class="fielddoc">
+<p>4:4:4 chrominance subsampling (no chrominance subsampling). </p>
+<p>The JPEG or YUV image will contain one chrominance component for every pixel in the source image. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074a136130902cc578f11f32429b59368404"></a>TJSAMP_422</em>&nbsp;</td><td class="fielddoc">
+<p>4:2:2 chrominance subsampling. </p>
+<p>The JPEG or YUV image will contain one chrominance component for every 2x1 block of pixels in the source image. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737"></a>TJSAMP_420</em>&nbsp;</td><td class="fielddoc">
+<p>4:2:0 chrominance subsampling. </p>
+<p>The JPEG or YUV image will contain one chrominance component for every 2x2 block of pixels in the source image. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074a3f1c9504842ddc7a48d0f690754b6248"></a>TJSAMP_GRAY</em>&nbsp;</td><td class="fielddoc">
+<p>Grayscale. </p>
+<p>The JPEG or YUV image will contain no chrominance components. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974"></a>TJSAMP_440</em>&nbsp;</td><td class="fielddoc">
+<p>4:4:0 chrominance subsampling. </p>
+<p>The JPEG or YUV image will contain one chrominance component for every 1x2 block of pixels in the source image. Note that 4:4:0 subsampling is not fully accelerated in libjpeg-turbo. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074a28ec62575e5ea295c3fde3001dc628e2"></a>TJSAMP_411</em>&nbsp;</td><td class="fielddoc">
+<p>4:1:1 chrominance subsampling. </p>
+<p>The JPEG or YUV image will contain one chrominance component for every 4x1 block of pixels in the source image. JPEG images compressed with 4:1:1 subsampling will be almost exactly the same size as those compressed with 4:2:0 subsampling, and in the aggregate, both subsampling methods produce approximately the same perceptual quality. However, 4:1:1 is better able to reproduce sharp horizontal features. Note that 4:1:1 subsampling is not fully accelerated in libjpeg-turbo. </p>
+</td></tr>
+</table>
+
+</div>
+</div>
+<a class="anchor" id="ga2de531af4e7e6c4f124908376b354866"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">enum <a class="el" href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">TJXOP</a></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Transform operations for <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> </p>
+<table class="fieldtable">
+<tr><th colspan="2">Enumerator</th></tr><tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27"></a>TJXOP_NONE</em>&nbsp;</td><td class="fielddoc">
+<p>Do not transform the position of the image pixels. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce"></a>TJXOP_HFLIP</em>&nbsp;</td><td class="fielddoc">
+<p>Flip (mirror) image horizontally. </p>
+<p>This transform is imperfect if there are any partial MCU blocks on the right edge (see <a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00" title="This option will cause tjTransform() to return an error if the transform is not perfect.">TJXOPT_PERFECT</a>.) </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a324eddfbec53b7e691f61e56929d0d5d"></a>TJXOP_VFLIP</em>&nbsp;</td><td class="fielddoc">
+<p>Flip (mirror) image vertically. </p>
+<p>This transform is imperfect if there are any partial MCU blocks on the bottom edge (see <a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00" title="This option will cause tjTransform() to return an error if the transform is not perfect.">TJXOPT_PERFECT</a>.) </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a31060aed199f886afdd417f80499c32d"></a>TJXOP_TRANSPOSE</em>&nbsp;</td><td class="fielddoc">
+<p>Transpose image (flip/mirror along upper left to lower right axis.) This transform is always perfect. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866af3b14d488aea6ece9e5b3df73a74d6a4"></a>TJXOP_TRANSVERSE</em>&nbsp;</td><td class="fielddoc">
+<p>Transverse transpose image (flip/mirror along upper right to lower left axis.) This transform is imperfect if there are any partial MCU blocks in the image (see <a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00" title="This option will cause tjTransform() to return an error if the transform is not perfect.">TJXOPT_PERFECT</a>.) </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a43b2bbb23bc4bd548422d43fbe9af128"></a>TJXOP_ROT90</em>&nbsp;</td><td class="fielddoc">
+<p>Rotate image clockwise by 90 degrees. </p>
+<p>This transform is imperfect if there are any partial MCU blocks on the bottom edge (see <a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00" title="This option will cause tjTransform() to return an error if the transform is not perfect.">TJXOPT_PERFECT</a>.) </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a140952eb8dd0300accfcc22726d69692"></a>TJXOP_ROT180</em>&nbsp;</td><td class="fielddoc">
+<p>Rotate image 180 degrees. </p>
+<p>This transform is imperfect if there are any partial MCU blocks in the image (see <a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00" title="This option will cause tjTransform() to return an error if the transform is not perfect.">TJXOPT_PERFECT</a>.) </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga2de531af4e7e6c4f124908376b354866a3064ee5dfb7f032df332818587567a08"></a>TJXOP_ROT270</em>&nbsp;</td><td class="fielddoc">
+<p>Rotate image counter-clockwise by 90 degrees. </p>
+<p>This transform is imperfect if there are any partial MCU blocks on the right edge (see <a class="el" href="group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00" title="This option will cause tjTransform() to return an error if the transform is not perfect.">TJXOPT_PERFECT</a>.) </p>
+</td></tr>
+</table>
+
+</div>
+</div>
+<h2 class="groupheader">Function Documentation</h2>
+<a class="anchor" id="ga5c9234bda6d993cdaffdd89bf81a00ff"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT unsigned char* DLLCALL tjAlloc </td>
+          <td>(</td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>bytes</em></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Allocate an image buffer for use with TurboJPEG. </p>
+<p>You should always use this function to allocate the JPEG destination buffer(s) for <a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2" title="Compress an RGB, grayscale, or CMYK image into a JPEG image.">tjCompress2()</a> and <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> unless you are disabling automatic buffer (re)allocation (by setting <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a>.)</p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">bytes</td><td>the number of bytes to allocate</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>a pointer to a newly-allocated buffer with the specified number of bytes</dd></dl>
+<dl class="section see"><dt>See Also</dt><dd><a class="el" href="group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137" title="Free an image buffer previously allocated by TurboJPEG.">tjFree()</a> </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="gaccc5bca7f12fcdcc302e6e1c6d4b311b"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT unsigned long DLLCALL tjBufSize </td>
+          <td>(</td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>width</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>height</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>jpegSubsamp</em>&#160;</td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters. </p>
+<p>The number of bytes returned by this function is larger than the size of the uncompressed source image. The reason for this is that the JPEG format uses 16-bit coefficients, and it is thus possible for a very high-quality JPEG image with very high-frequency content to expand rather than compress when converted to the JPEG format. Such images represent a very rare corner case, but since there is no way to predict the size of a JPEG image prior to compression, the corner case has to be handled.</p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">width</td><td>width of the image (in pixels) </td></tr>
+    <tr><td class="paramname">height</td><td>height of the image (in pixels) </td></tr>
+    <tr><td class="paramname">jpegSubsamp</td><td>the level of chrominance subsampling to be used when generating the JPEG image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.)</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>the maximum size of the buffer (in bytes) required to hold the image, or -1 if the arguments are out of bounds. </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="gaf451664a62c1f6c7cc5a6401f32908c9"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2 </td>
+          <td>(</td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>width</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pad</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>height</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>subsamp</em>&#160;</td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters. </p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">width</td><td>width of the image (in pixels) </td></tr>
+    <tr><td class="paramname">pad</td><td>the width of each line in each plane of the image is padded to the nearest multiple of this number of bytes (must be a power of 2.) </td></tr>
+    <tr><td class="paramname">height</td><td>height of the image (in pixels) </td></tr>
+    <tr><td class="paramname">subsamp</td><td>level of chrominance subsampling in the image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.)</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>the size of the buffer (in bytes) required to hold the image, or -1 if the arguments are out of bounds. </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="gaba62b7a98f960839b588579898495cf2"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT int DLLCALL tjCompress2 </td>
+          <td>(</td>
+          <td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a>&#160;</td>
+          <td class="paramname"><em>handle</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>srcBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>width</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pitch</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>height</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pixelFormat</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char **&#160;</td>
+          <td class="paramname"><em>jpegBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned long *&#160;</td>
+          <td class="paramname"><em>jpegSize</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>jpegSubsamp</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>jpegQual</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>flags</em>&#160;</td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Compress an RGB, grayscale, or CMYK image into a JPEG image. </p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG compressor or transformer instance </td></tr>
+    <tr><td class="paramname">srcBuf</td><td>pointer to an image buffer containing RGB, grayscale, or CMYK pixels to be compressed </td></tr>
+    <tr><td class="paramname">width</td><td>width (in pixels) of the source image </td></tr>
+    <tr><td class="paramname">pitch</td><td>bytes per line of the source image. Normally, this should be <code>width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code> if the image is unpadded, or <code><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511" title="Pad the given width to the nearest 32-bit boundary.">TJPAD</a>(width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat])</code> if each line of the image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use this parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to <code>width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code>. </td></tr>
+    <tr><td class="paramname">height</td><td>height (in pixels) of the source image </td></tr>
+    <tr><td class="paramname">pixelFormat</td><td>pixel format of the source image (see <a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">Pixel formats</a>.) </td></tr>
+    <tr><td class="paramname">jpegBuf</td><td>address of a pointer to an image buffer that will receive the JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:<ol type="1">
+<li>pre-allocate the JPEG buffer with an arbitrary size using <a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a> and let TurboJPEG grow the buffer as needed,</li>
+<li>set <code>*jpegBuf</code> to NULL to tell TurboJPEG to allocate the buffer for you, or</li>
+<li>pre-allocate the buffer to a "worst case" size determined by calling <a class="el" href="group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b" title="The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters...">tjBufSize()</a>. This should ensure that the buffer never has to be re-allocated (setting <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a> guarantees this.)</li>
+</ol>
+If you choose option 1, <code>*jpegSize</code> should be set to the size of your pre-allocated buffer. In any case, unless you have set <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a>, you should always check <code>*jpegBuf</code> upon return from this function, as it may have changed. </td></tr>
+    <tr><td class="paramname">jpegSize</td><td>pointer to an unsigned long variable that holds the size of the JPEG image buffer. If <code>*jpegBuf</code> points to a pre-allocated buffer, then <code>*jpegSize</code> should be set to the size of the buffer. Upon return, <code>*jpegSize</code> will contain the size of the JPEG image (in bytes.) </td></tr>
+    <tr><td class="paramname">jpegSubsamp</td><td>the level of chrominance subsampling to be used when generating the JPEG image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.) </td></tr>
+    <tr><td class="paramname">jpegQual</td><td>the image quality of the generated JPEG image (1 = worst, 100 = best) </td></tr>
+    <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="ga0b931126c7a615ddc3bbd0cca6698d67"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT int DLLCALL tjCompressFromYUV </td>
+          <td>(</td>
+          <td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a>&#160;</td>
+          <td class="paramname"><em>handle</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>srcBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>width</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pad</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>height</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>subsamp</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char **&#160;</td>
+          <td class="paramname"><em>jpegBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned long *&#160;</td>
+          <td class="paramname"><em>jpegSize</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>jpegQual</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>flags</em>&#160;</td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Compress a YUV planar image into a JPEG image. </p>
+<p>Note that, if the width or height of the YUV image is not an even multiple of the MCU block size (see <a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c" title="MCU block width (in pixels) for a given level of chrominance subsampling.">tjMCUWidth</a> and <a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf" title="MCU block height (in pixels) for a given level of chrominance subsampling.">tjMCUHeight</a>), then an intermediate buffer copy will be performed within TurboJPEG.</p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG compressor or transformer instance </td></tr>
+    <tr><td class="paramname">srcBuf</td><td>pointer to an image buffer containing a YUV planar image to be compressed. The Y, U (Cb), and V (Cr) image planes should be stored sequentially in the buffer, and the size of each plane is determined by the specified width, height, padding, and level of chrominance subsampling. If the chrominance components are subsampled along the horizontal dimension, then the width of the luminance plane should be padded to the nearest multiple of 2 (same goes for the height of the luminance plane, if the chrominance components are subsampled along the vertical dimension.) This is irrespective of any additional padding specified in the <code>pad</code> parameter. </td></tr>
+    <tr><td class="paramname">width</td><td>width (in pixels) of the source image </td></tr>
+    <tr><td class="paramname">pad</td><td>the line padding used in the source image. For instance, if each line in each plane of the YUV image is padded to the nearest multiple of 4 bytes, then <code>pad</code> should be set to 4. </td></tr>
+    <tr><td class="paramname">height</td><td>height (in pixels) of the source image </td></tr>
+    <tr><td class="paramname">subsamp</td><td>the level of chrominance subsampling used in the source image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.) </td></tr>
+    <tr><td class="paramname">jpegBuf</td><td>address of a pointer to an image buffer that will receive the JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:<ol type="1">
+<li>pre-allocate the JPEG buffer with an arbitrary size using <a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a> and let TurboJPEG grow the buffer as needed,</li>
+<li>set <code>*jpegBuf</code> to NULL to tell TurboJPEG to allocate the buffer for you, or</li>
+<li>pre-allocate the buffer to a "worst case" size determined by calling <a class="el" href="group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b" title="The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters...">tjBufSize()</a>. This should ensure that the buffer never has to be re-allocated (setting <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a> guarantees this.)</li>
+</ol>
+If you choose option 1, <code>*jpegSize</code> should be set to the size of your pre-allocated buffer. In any case, unless you have set <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a>, you should always check <code>*jpegBuf</code> upon return from this function, as it may have changed. </td></tr>
+    <tr><td class="paramname">jpegSize</td><td>pointer to an unsigned long variable that holds the size of the JPEG image buffer. If <code>*jpegBuf</code> points to a pre-allocated buffer, then <code>*jpegSize</code> should be set to the size of the buffer. Upon return, <code>*jpegSize</code> will contain the size of the JPEG image (in bytes.) </td></tr>
+    <tr><td class="paramname">jpegQual</td><td>the image quality of the generated JPEG image (1 = worst, 100 = best) </td></tr>
+    <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="ga132ae2c2cadcf64c8bb0f3bdf69da3ed"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT int DLLCALL tjDecodeYUV </td>
+          <td>(</td>
+          <td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a>&#160;</td>
+          <td class="paramname"><em>handle</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>srcBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pad</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>subsamp</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>dstBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>width</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pitch</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>height</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pixelFormat</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>flags</em>&#160;</td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Decode a YUV planar image into an RGB or grayscale image. </p>
+<p>This function uses the accelerated color conversion routines in the underlying codec but does not execute any of the other steps in the JPEG decompression process. The Y, U (Cb), and V (Cr) image planes should be stored sequentially in the source buffer, and the size of each plane is determined by the width and height of the source image, as well as the specified padding and level of chrominance subsampling. If the chrominance components are subsampled along the horizontal dimension, then the width of the luminance plane should be padded to the nearest multiple of 2 in the input image (same goes for the height of the luminance plane, if the chrominance components are subsampled along the vertical dimension.) </p>
+<p>NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the convention of the digital video community, the TurboJPEG API uses "YUV" to refer to an image format consisting of Y, Cb, and Cr image planes.</p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG decompressor or transformer instance </td></tr>
+    <tr><td class="paramname">srcBuf</td><td>pointer to an image buffer containing a YUV planar image to be decoded. The size of this buffer should match the value returned by <a class="el" href="group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9" title="The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters...">tjBufSizeYUV2()</a> for the given image width, height, padding, and level of chrominance subsampling. </td></tr>
+    <tr><td class="paramname">pad</td><td>Use this parameter to specify that the width of each line in each plane of the YUV source image is padded to the nearest multiple of this number of bytes (must be a power of 2.) </td></tr>
+    <tr><td class="paramname">subsamp</td><td>the level of chrominance subsampling used in the YUV source image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.) </td></tr>
+    <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the decoded image. This buffer should normally be <code>pitch * height</code> bytes in size, but the <code>dstBuf</code> pointer can also be used to decode into a specific region of a larger buffer. </td></tr>
+    <tr><td class="paramname">width</td><td>width (in pixels) of the source and destination images </td></tr>
+    <tr><td class="paramname">pitch</td><td>bytes per line of the destination image. Normally, this should be <code>width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code> if the destination image is unpadded, or <code><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511" title="Pad the given width to the nearest 32-bit boundary.">TJPAD</a>(width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat])</code> if each line of the destination image should be padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use the pitch parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to <code>width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code>. </td></tr>
+    <tr><td class="paramname">height</td><td>height (in pixels) of the source and destination images </td></tr>
+    <tr><td class="paramname">pixelFormat</td><td>pixel format of the destination image (see <a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">Pixel formats</a>.) </td></tr>
+    <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="gada69cc6443d1bb493b40f1626259e5e9"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT int DLLCALL tjDecompress2 </td>
+          <td>(</td>
+          <td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a>&#160;</td>
+          <td class="paramname"><em>handle</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>jpegBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned long&#160;</td>
+          <td class="paramname"><em>jpegSize</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>dstBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>width</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pitch</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>height</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pixelFormat</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>flags</em>&#160;</td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Decompress a JPEG image to an RGB, grayscale, or CMYK image. </p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG decompressor or transformer instance </td></tr>
+    <tr><td class="paramname">jpegBuf</td><td>pointer to a buffer containing the JPEG image to decompress </td></tr>
+    <tr><td class="paramname">jpegSize</td><td>size of the JPEG image (in bytes) </td></tr>
+    <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the decompressed image. This buffer should normally be <code>pitch * scaledHeight</code> bytes in size, where <code>scaledHeight</code> can be determined by calling <a class="el" href="group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df" title="Compute the scaled value of dimension using the given scaling factor.">TJSCALED()</a> with the JPEG image height and one of the scaling factors returned by <a class="el" href="group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8" title="Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of Tur...">tjGetScalingFactors()</a>. The <code>dstBuf</code> pointer may also be used to decompress into a specific region of a larger buffer. </td></tr>
+    <tr><td class="paramname">width</td><td>desired width (in pixels) of the destination image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If <code>width</code> is set to 0, then only the height will be considered when determining the scaled image size. </td></tr>
+    <tr><td class="paramname">pitch</td><td>bytes per line of the destination image. Normally, this is <code>scaledWidth * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code> if the decompressed image is unpadded, else <code><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511" title="Pad the given width to the nearest 32-bit boundary.">TJPAD</a>(scaledWidth * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat])</code> if each line of the decompressed image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. (NOTE: <code>scaledWidth</code> can be determined by calling <a class="el" href="group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df" title="Compute the scaled value of dimension using the given scaling factor.">TJSCALED()</a> with the JPEG image width and one of the scaling factors returned by <a class="el" href="group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8" title="Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of Tur...">tjGetScalingFactors()</a>.) You can also be clever and use the pitch parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to <code>scaledWidth * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code>. </td></tr>
+    <tr><td class="paramname">height</td><td>desired height (in pixels) of the destination image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If <code>height</code> is set to 0, then only the width will be considered when determining the scaled image size. </td></tr>
+    <tr><td class="paramname">pixelFormat</td><td>pixel format of the destination image (see <a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">Pixel formats</a>.) </td></tr>
+    <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="gacd0fac3af74b3511d39b4781b7103086"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT int DLLCALL tjDecompressHeader3 </td>
+          <td>(</td>
+          <td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a>&#160;</td>
+          <td class="paramname"><em>handle</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>jpegBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned long&#160;</td>
+          <td class="paramname"><em>jpegSize</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int *&#160;</td>
+          <td class="paramname"><em>width</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int *&#160;</td>
+          <td class="paramname"><em>height</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int *&#160;</td>
+          <td class="paramname"><em>jpegSubsamp</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int *&#160;</td>
+          <td class="paramname"><em>jpegColorspace</em>&#160;</td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Retrieve information about a JPEG image without decompressing it. </p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG decompressor or transformer instance </td></tr>
+    <tr><td class="paramname">jpegBuf</td><td>pointer to a buffer containing a JPEG image </td></tr>
+    <tr><td class="paramname">jpegSize</td><td>size of the JPEG image (in bytes) </td></tr>
+    <tr><td class="paramname">width</td><td>pointer to an integer variable that will receive the width (in pixels) of the JPEG image </td></tr>
+    <tr><td class="paramname">height</td><td>pointer to an integer variable that will receive the height (in pixels) of the JPEG image </td></tr>
+    <tr><td class="paramname">jpegSubsamp</td><td>pointer to an integer variable that will receive the level of chrominance subsampling used when compressing the JPEG image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.) </td></tr>
+    <tr><td class="paramname">jpegColorspace</td><td>pointer to an integer variable that will receive one of the JPEG colorspace constants, indicating the colorspace of the JPEG image (see <a class="el" href="group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720">JPEG colorspaces</a>.)</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="ga7c08b340ad7f8e85d407bd9e81d44d07"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT int DLLCALL tjDecompressToYUV2 </td>
+          <td>(</td>
+          <td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a>&#160;</td>
+          <td class="paramname"><em>handle</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>jpegBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned long&#160;</td>
+          <td class="paramname"><em>jpegSize</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>dstBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>width</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pad</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>height</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>flags</em>&#160;</td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Decompress a JPEG image to a YUV planar image. </p>
+<p>This function performs JPEG decompression but leaves out the color conversion step, so a planar YUV image is generated instead of an RGB image. The structure of the planes in this image is the same as in the images generated by <a class="el" href="group___turbo_j_p_e_g.html#ga0a5ffbf7cb58a5b6a8201114fe889360" title="Encode an RGB or grayscale image into a YUV planar image.">tjEncodeYUV3()</a>. Note that, if the width or height of the JPEG image is not an even multiple of the MCU block size (see <a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c" title="MCU block width (in pixels) for a given level of chrominance subsampling.">tjMCUWidth</a> and <a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf" title="MCU block height (in pixels) for a given level of chrominance subsampling.">tjMCUHeight</a>), then an intermediate buffer copy will be performed within TurboJPEG. </p>
+<p>NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the convention of the digital video community, the TurboJPEG API uses "YUV" to refer to an image format consisting of Y, Cb, and Cr image planes.</p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG decompressor or transformer instance </td></tr>
+    <tr><td class="paramname">jpegBuf</td><td>pointer to a buffer containing the JPEG image to decompress </td></tr>
+    <tr><td class="paramname">jpegSize</td><td>size of the JPEG image (in bytes) </td></tr>
+    <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the YUV image. Use <a class="el" href="group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9" title="The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters...">tjBufSizeYUV2()</a> to determine the appropriate size for this buffer based on the image width, height, padding, and level of subsampling. </td></tr>
+    <tr><td class="paramname">width</td><td>desired width (in pixels) of the YUV image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If <code>width</code> is set to 0, then only the height will be considered when determining the scaled image size. </td></tr>
+    <tr><td class="paramname">pad</td><td>the width of each line in each plane of the YUV image will be padded to the nearest multiple of this number of bytes (must be a power of 2.) To generate images suitable for X Video, <code>pad</code> should be set to 4. </td></tr>
+    <tr><td class="paramname">height</td><td>desired height (in pixels) of the YUV image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If <code>height</code> is set to 0, then only the width will be considered when determining the scaled image size. </td></tr>
+    <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="ga674adee917b95ad4a896f1ba39e12540"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT int DLLCALL tjDestroy </td>
+          <td>(</td>
+          <td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a>&#160;</td>
+          <td class="paramname"><em>handle</em></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Destroy a TurboJPEG compressor, decompressor, or transformer instance. </p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG compressor, decompressor or transformer instance</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="ga0a5ffbf7cb58a5b6a8201114fe889360"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT int DLLCALL tjEncodeYUV3 </td>
+          <td>(</td>
+          <td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a>&#160;</td>
+          <td class="paramname"><em>handle</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>srcBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>width</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pitch</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>height</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pixelFormat</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>dstBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>pad</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>subsamp</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>flags</em>&#160;</td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Encode an RGB or grayscale image into a YUV planar image. </p>
+<p>This function uses the accelerated color conversion routines in the underlying codec but does not execute any of the other steps in the JPEG compression process. The Y, U (Cb), and V (Cr) image planes are stored sequentially into the destination buffer, and the size of each plane is determined by the width and height of the source image, as well as the specified padding and level of chrominance subsampling. If the chrominance components are subsampled along the horizontal dimension, then the width of the luminance plane is padded to the nearest multiple of 2 in the output image (same goes for the height of the luminance plane, if the chrominance components are subsampled along the vertical dimension.) </p>
+<p>NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the convention of the digital video community, the TurboJPEG API uses "YUV" to refer to an image format consisting of Y, Cb, and Cr image planes.</p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG compressor or transformer instance </td></tr>
+    <tr><td class="paramname">srcBuf</td><td>pointer to an image buffer containing RGB or grayscale pixels to be encoded </td></tr>
+    <tr><td class="paramname">width</td><td>width (in pixels) of the source image </td></tr>
+    <tr><td class="paramname">pitch</td><td>bytes per line of the source image. Normally, this should be <code>width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code> if the image is unpadded, or <code><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511" title="Pad the given width to the nearest 32-bit boundary.">TJPAD</a>(width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat])</code> if each line of the image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use this parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to <code>width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code>. </td></tr>
+    <tr><td class="paramname">height</td><td>height (in pixels) of the source image </td></tr>
+    <tr><td class="paramname">pixelFormat</td><td>pixel format of the source image (see <a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">Pixel formats</a>.) </td></tr>
+    <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the YUV image. Use <a class="el" href="group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9" title="The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters...">tjBufSizeYUV2()</a> to determine the appropriate size for this buffer based on the image width, height, padding, and level of chrominance subsampling. </td></tr>
+    <tr><td class="paramname">pad</td><td>the width of each line in each plane of the YUV image will be padded to the nearest multiple of this number of bytes (must be a power of 2.) To generate images suitable for X Video, <code>pad</code> should be set to 4. </td></tr>
+    <tr><td class="paramname">subsamp</td><td>the level of chrominance subsampling to be used when generating the YUV image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.) To generate images suitable for X Video, <code>subsamp</code> should be set to <a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737">TJSAMP_420</a>. This produces an image compatible with the I420 (AKA "YUV420P") format. </td></tr>
+    <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="ga8c4a1231dc06a450514c835f6471f137"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT void DLLCALL tjFree </td>
+          <td>(</td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>buffer</em></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Free an image buffer previously allocated by TurboJPEG. </p>
+<p>You should always use this function to free JPEG destination buffer(s) that were automatically (re)allocated by <a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2" title="Compress an RGB, grayscale, or CMYK image into a JPEG image.">tjCompress2()</a> or <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> or that were manually allocated using <a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a>.</p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">buffer</td><td>address of the buffer to free</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section see"><dt>See Also</dt><dd><a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a> </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="ga9af79c908ec131b1ae8d52fe40375abf"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT char* DLLCALL tjGetErrorStr </td>
+          <td>(</td>
+          <td class="paramtype">void&#160;</td>
+          <td class="paramname"></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Returns a descriptive error message explaining why the last command failed. </p>
+<dl class="section return"><dt>Returns</dt><dd>a descriptive error message explaining why the last command failed. </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="ga6449044b9af402999ccf52f401333be8"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT <a class="el" href="structtjscalingfactor.html">tjscalingfactor</a>* DLLCALL tjGetScalingFactors </td>
+          <td>(</td>
+          <td class="paramtype">int *&#160;</td>
+          <td class="paramname"><em>numscalingfactors</em></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of TurboJPEG supports. </p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">numscalingfactors</td><td>pointer to an integer variable that will receive the number of elements in the list</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>a pointer to a list of fractional scaling factors, or NULL if an error is encountered (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="ga3d10c47fbe4a2489a2b30c931551d01a"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL tjInitCompress </td>
+          <td>(</td>
+          <td class="paramtype">void&#160;</td>
+          <td class="paramname"></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Create a TurboJPEG compressor instance. </p>
+<dl class="section return"><dt>Returns</dt><dd>a handle to the newly-created instance, or NULL if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="gae5408179d041e2a2f7199c8283cf649e"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL tjInitDecompress </td>
+          <td>(</td>
+          <td class="paramtype">void&#160;</td>
+          <td class="paramname"></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Create a TurboJPEG decompressor instance. </p>
+<dl class="section return"><dt>Returns</dt><dd>a handle to the newly-created instance, or NULL if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="ga3155b775bfbac9dbba869b95a0367902"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL tjInitTransform </td>
+          <td>(</td>
+          <td class="paramtype">void&#160;</td>
+          <td class="paramname"></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Create a new TurboJPEG transformer instance. </p>
+<dl class="section return"><dt>Returns</dt><dd>a handle to the newly-created instance, or NULL if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="gae403193ceb4aafb7e0f56ab587b48616"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">DLLEXPORT int DLLCALL tjTransform </td>
+          <td>(</td>
+          <td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a>&#160;</td>
+          <td class="paramname"><em>handle</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char *&#160;</td>
+          <td class="paramname"><em>jpegBuf</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned long&#160;</td>
+          <td class="paramname"><em>jpegSize</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>n</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned char **&#160;</td>
+          <td class="paramname"><em>dstBufs</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">unsigned long *&#160;</td>
+          <td class="paramname"><em>dstSizes</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype"><a class="el" href="structtjtransform.html">tjtransform</a> *&#160;</td>
+          <td class="paramname"><em>transforms</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">int&#160;</td>
+          <td class="paramname"><em>flags</em>&#160;</td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Losslessly transform a JPEG image into another JPEG image. </p>
+<p>Lossless transforms work by moving the raw coefficients from one JPEG image structure to another without altering the values of the coefficients. While this is typically faster than decompressing the image, transforming it, and re-compressing it, lossless transforms are not free. Each lossless transform requires reading and performing Huffman decoding on all of the coefficients in the source image, regardless of the size of the destination image. Thus, this function provides a means of generating multiple transformed images from the same source or applying multiple transformations simultaneously, in order to eliminate the need to read the source coefficients multiple times.</p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG transformer instance </td></tr>
+    <tr><td class="paramname">jpegBuf</td><td>pointer to a buffer containing the JPEG image to transform </td></tr>
+    <tr><td class="paramname">jpegSize</td><td>size of the JPEG image (in bytes) </td></tr>
+    <tr><td class="paramname">n</td><td>the number of transformed JPEG images to generate </td></tr>
+    <tr><td class="paramname">dstBufs</td><td>pointer to an array of n image buffers. <code>dstBufs[i]</code> will receive a JPEG image that has been transformed using the parameters in <code>transforms[i]</code>. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:<ol type="1">
+<li>pre-allocate the JPEG buffer with an arbitrary size using <a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a> and let TurboJPEG grow the buffer as needed,</li>
+<li>set <code>dstBufs[i]</code> to NULL to tell TurboJPEG to allocate the buffer for you, or</li>
+<li>pre-allocate the buffer to a "worst case" size determined by calling <a class="el" href="group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b" title="The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters...">tjBufSize()</a> with the transformed or cropped width and height. This should ensure that the buffer never has to be re-allocated (setting <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a> guarantees this.)</li>
+</ol>
+If you choose option 1, <code>dstSizes[i]</code> should be set to the size of your pre-allocated buffer. In any case, unless you have set <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a>, you should always check <code>dstBufs[i]</code> upon return from this function, as it may have changed. </td></tr>
+    <tr><td class="paramname">dstSizes</td><td>pointer to an array of n unsigned long variables that will receive the actual sizes (in bytes) of each transformed JPEG image. If <code>dstBufs[i]</code> points to a pre-allocated buffer, then <code>dstSizes[i]</code> should be set to the size of the buffer. Upon return, <code>dstSizes[i]</code> will contain the size of the JPEG image (in bytes.) </td></tr>
+    <tr><td class="paramname">transforms</td><td>pointer to an array of n <a class="el" href="structtjtransform.html" title="Lossless transform.">tjtransform</a> structures, each of which specifies the transform parameters and/or cropping region for the corresponding transformed output image. </td></tr>
+    <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<h2 class="groupheader">Variable Documentation</h2>
+<a class="anchor" id="ga84e2e35d3f08025f976ec1ec53693dea"></a>
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
+        <tr>
+          <td class="memname">const int tjBlueOffset[<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td>
+        </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
+<p>Blue offset (in bytes) for a given pixel format. </p>
+<p>This specifies the number of bytes that the Blue component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in <code>char pixel[]</code>, then the blue component will be <code>pixel[tjBlueOffset[TJ_BGRX]]</code>. </p>
+
+</div>
+</div>
+<a class="anchor" id="ga82d6e35da441112a411da41923c0ba2f"></a>
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
+        <tr>
+          <td class="memname">const int tjGreenOffset[<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td>
+        </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
+<p>Green offset (in bytes) for a given pixel format. </p>
+<p>This specifies the number of bytes that the green component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in <code>char pixel[]</code>, then the green component will be <code>pixel[tjGreenOffset[TJ_BGRX]]</code>. </p>
+
+</div>
+</div>
+<a class="anchor" id="gabd247bb9fecb393eca57366feb8327bf"></a>
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
+        <tr>
+          <td class="memname">const int tjMCUHeight[<a class="el" href="group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c">TJ_NUMSAMP</a>]</td>
+        </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
+<p>MCU block height (in pixels) for a given level of chrominance subsampling. </p>
+<p>MCU block sizes:</p>
+<ul>
+<li>8x8 for no subsampling or grayscale</li>
+<li>16x8 for 4:2:2</li>
+<li>8x16 for 4:4:0</li>
+<li>16x16 for 4:2:0</li>
+<li>32x8 for 4:1:1 </li>
+</ul>
+
+</div>
+</div>
+<a class="anchor" id="ga9e61e7cd47a15a173283ba94e781308c"></a>
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
+        <tr>
+          <td class="memname">const int tjMCUWidth[<a class="el" href="group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c">TJ_NUMSAMP</a>]</td>
+        </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
+<p>MCU block width (in pixels) for a given level of chrominance subsampling. </p>
+<p>MCU block sizes:</p>
+<ul>
+<li>8x8 for no subsampling or grayscale</li>
+<li>16x8 for 4:2:2</li>
+<li>8x16 for 4:4:0</li>
+<li>16x16 for 4:2:0</li>
+<li>32x8 for 4:1:1 </li>
+</ul>
+
+</div>
+</div>
+<a class="anchor" id="gad77cf8fe5b2bfd3cb3f53098146abb4c"></a>
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
+        <tr>
+          <td class="memname">const int tjPixelSize[<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td>
+        </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
+<p>Pixel size (in bytes) for a given pixel format. </p>
+
+</div>
+</div>
+<a class="anchor" id="gadd9b446742ac8a3923f7992c7988fea8"></a>
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
+        <tr>
+          <td class="memname">const int tjRedOffset[<a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a>]</td>
+        </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">static</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
+<p>Red offset (in bytes) for a given pixel format. </p>
+<p>This specifies the number of bytes that the red component is offset from the start of the pixel. For instance, if a pixel of format TJ_BGRX is stored in <code>char pixel[]</code>, then the red component will be <code>pixel[tjRedOffset[TJ_BGRX]]</code>. </p>
+
+</div>
+</div>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
+</body>
+</html>
diff --git a/doc/html/index.html b/doc/html/index.html
new file mode 100644
index 0000000..139b84c
--- /dev/null
+++ b/doc/html/index.html
@@ -0,0 +1,89 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
+<title>TurboJPEG: Main Page</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.4</span>
+   </div>
+  </td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+  <div id="navrow1" class="tabs">
+    <ul class="tablist">
+      <li class="current"><a href="index.html"><span>Main&#160;Page</span></a></li>
+      <li><a href="modules.html"><span>Modules</span></a></li>
+      <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li>
+        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+      </li>
+    </ul>
+  </div>
+</div><!-- top -->
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">TurboJPEG Documentation</div>  </div>
+</div><!--header-->
+<div class="contents">
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
+</body>
+</html>
diff --git a/doc/html/jquery.js b/doc/html/jquery.js
new file mode 100644
index 0000000..63939e7
--- /dev/null
+++ b/doc/html/jquery.js
@@ -0,0 +1,8 @@
+/*! jQuery v1.7.1 jquery.com | jquery.org/license */
+(function(a,b){function cy(a){return f.isWindow(a)?a:a.nodeType===9?a.defaultView||a.parentWindow:!1}function cv(a){if(!ck[a]){var b=c.body,d=f("<"+a+">").appendTo(b),e=d.css("display");d.remove();if(e==="none"||e===""){cl||(cl=c.createElement("iframe"),cl.frameBorder=cl.width=cl.height=0),b.appendChild(cl);if(!cm||!cl.createElement)cm=(cl.contentWindow||cl.contentDocument).document,cm.write((c.compatMode==="CSS1Compat"?"<!doctype html>":"")+"<html><body>"),cm.close();d=cm.createElement(a),cm.body.appendChild(d),e=f.css(d,"display"),b.removeChild(cl)}ck[a]=e}return ck[a]}function cu(a,b){var c={};f.each(cq.concat.apply([],cq.slice(0,b)),function(){c[this]=a});return c}function ct(){cr=b}function cs(){setTimeout(ct,0);return cr=f.now()}function cj(){try{return new a.ActiveXObject("Microsoft.XMLHTTP")}catch(b){}}function ci(){try{return new a.XMLHttpRequest}catch(b){}}function cc(a,c){a.dataFilter&&(c=a.dataFilter(c,a.dataType));var d=a.dataTypes,e={},g,h,i=d.length,j,k=d[0],l,m,n,o,p;for(g=1;g<i;g++){if(g===1)for(h in a.converters)typeof h=="string"&&(e[h.toLowerCase()]=a.converters[h]);l=k,k=d[g];if(k==="*")k=l;else if(l!=="*"&&l!==k){m=l+" "+k,n=e[m]||e["* "+k];if(!n){p=b;for(o in e){j=o.split(" ");if(j[0]===l||j[0]==="*"){p=e[j[1]+" "+k];if(p){o=e[o],o===!0?n=p:p===!0&&(n=o);break}}}}!n&&!p&&f.error("No conversion from "+m.replace(" "," to ")),n!==!0&&(c=n?n(c):p(o(c)))}}return c}function cb(a,c,d){var e=a.contents,f=a.dataTypes,g=a.responseFields,h,i,j,k;for(i in g)i in d&&(c[g[i]]=d[i]);while(f[0]==="*")f.shift(),h===b&&(h=a.mimeType||c.getResponseHeader("content-type"));if(h)for(i in e)if(e[i]&&e[i].test(h)){f.unshift(i);break}if(f[0]in d)j=f[0];else{for(i in d){if(!f[0]||a.converters[i+" "+f[0]]){j=i;break}k||(k=i)}j=j||k}if(j){j!==f[0]&&f.unshift(j);return d[j]}}function ca(a,b,c,d){if(f.isArray(b))f.each(b,function(b,e){c||bE.test(a)?d(a,e):ca(a+"["+(typeof e=="object"||f.isArray(e)?b:"")+"]",e,c,d)});else if(!c&&b!=null&&typeof b=="object")for(var e in b)ca(a+"["+e+"]",b[e],c,d);else d(a,b)}function b_(a,c){var d,e,g=f.ajaxSettings.flatOptions||{};for(d in c)c[d]!==b&&((g[d]?a:e||(e={}))[d]=c[d]);e&&f.extend(!0,a,e)}function b$(a,c,d,e,f,g){f=f||c.dataTypes[0],g=g||{},g[f]=!0;var h=a[f],i=0,j=h?h.length:0,k=a===bT,l;for(;i<j&&(k||!l);i++)l=h[i](c,d,e),typeof l=="string"&&(!k||g[l]?l=b:(c.dataTypes.unshift(l),l=b$(a,c,d,e,l,g)));(k||!l)&&!g["*"]&&(l=b$(a,c,d,e,"*",g));return l}function bZ(a){return function(b,c){typeof b!="string"&&(c=b,b="*");if(f.isFunction(c)){var d=b.toLowerCase().split(bP),e=0,g=d.length,h,i,j;for(;e<g;e++)h=d[e],j=/^\+/.test(h),j&&(h=h.substr(1)||"*"),i=a[h]=a[h]||[],i[j?"unshift":"push"](c)}}}function bC(a,b,c){var d=b==="width"?a.offsetWidth:a.offsetHeight,e=b==="width"?bx:by,g=0,h=e.length;if(d>0){if(c!=="border")for(;g<h;g++)c||(d-=parseFloat(f.css(a,"padding"+e[g]))||0),c==="margin"?d+=parseFloat(f.css(a,c+e[g]))||0:d-=parseFloat(f.css(a,"border"+e[g]+"Width"))||0;return d+"px"}d=bz(a,b,b);if(d<0||d==null)d=a.style[b]||0;d=parseFloat(d)||0;if(c)for(;g<h;g++)d+=parseFloat(f.css(a,"padding"+e[g]))||0,c!=="padding"&&(d+=parseFloat(f.css(a,"border"+e[g]+"Width"))||0),c==="margin"&&(d+=parseFloat(f.css(a,c+e[g]))||0);return d+"px"}function bp(a,b){b.src?f.ajax({url:b.src,async:!1,dataType:"script"}):f.globalEval((b.text||b.textContent||b.innerHTML||"").replace(bf,"/*$0*/")),b.parentNode&&b.parentNode.removeChild(b)}function bo(a){var b=c.createElement("div");bh.appendChild(b),b.innerHTML=a.outerHTML;return b.firstChild}function bn(a){var b=(a.nodeName||"").toLowerCase();b==="input"?bm(a):b!=="script"&&typeof a.getElementsByTagName!="undefined"&&f.grep(a.getElementsByTagName("input"),bm)}function bm(a){if(a.type==="checkbox"||a.type==="radio")a.defaultChecked=a.checked}function bl(a){return typeof a.getElementsByTagName!="undefined"?a.getElementsByTagName("*"):typeof a.querySelectorAll!="undefined"?a.querySelectorAll("*"):[]}function bk(a,b){var c;if(b.nodeType===1){b.clearAttributes&&b.clearAttributes(),b.mergeAttributes&&b.mergeAttributes(a),c=b.nodeName.toLowerCase();if(c==="object")b.outerHTML=a.outerHTML;else if(c!=="input"||a.type!=="checkbox"&&a.type!=="radio"){if(c==="option")b.selected=a.defaultSelected;else if(c==="input"||c==="textarea")b.defaultValue=a.defaultValue}else a.checked&&(b.defaultChecked=b.checked=a.checked),b.value!==a.value&&(b.value=a.value);b.removeAttribute(f.expando)}}function bj(a,b){if(b.nodeType===1&&!!f.hasData(a)){var c,d,e,g=f._data(a),h=f._data(b,g),i=g.events;if(i){delete h.handle,h.events={};for(c in i)for(d=0,e=i[c].length;d<e;d++)f.event.add(b,c+(i[c][d].namespace?".":"")+i[c][d].namespace,i[c][d],i[c][d].data)}h.data&&(h.data=f.extend({},h.data))}}function bi(a,b){return f.nodeName(a,"table")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function U(a){var b=V.split("|"),c=a.createDocumentFragment();if(c.createElement)while(b.length)c.createElement(b.pop());return c}function T(a,b,c){b=b||0;if(f.isFunction(b))return f.grep(a,function(a,d){var e=!!b.call(a,d,a);return e===c});if(b.nodeType)return f.grep(a,function(a,d){return a===b===c});if(typeof b=="string"){var d=f.grep(a,function(a){return a.nodeType===1});if(O.test(b))return f.filter(b,d,!c);b=f.filter(b,d)}return f.grep(a,function(a,d){return f.inArray(a,b)>=0===c})}function S(a){return!a||!a.parentNode||a.parentNode.nodeType===11}function K(){return!0}function J(){return!1}function n(a,b,c){var d=b+"defer",e=b+"queue",g=b+"mark",h=f._data(a,d);h&&(c==="queue"||!f._data(a,e))&&(c==="mark"||!f._data(a,g))&&setTimeout(function(){!f._data(a,e)&&!f._data(a,g)&&(f.removeData(a,d,!0),h.fire())},0)}function m(a){for(var b in a){if(b==="data"&&f.isEmptyObject(a[b]))continue;if(b!=="toJSON")return!1}return!0}function l(a,c,d){if(d===b&&a.nodeType===1){var e="data-"+c.replace(k,"-$1").toLowerCase();d=a.getAttribute(e);if(typeof d=="string"){try{d=d==="true"?!0:d==="false"?!1:d==="null"?null:f.isNumeric(d)?parseFloat(d):j.test(d)?f.parseJSON(d):d}catch(g){}f.data(a,c,d)}else d=b}return d}function h(a){var b=g[a]={},c,d;a=a.split(/\s+/);for(c=0,d=a.length;c<d;c++)b[a[c]]=!0;return b}var c=a.document,d=a.navigator,e=a.location,f=function(){function J(){if(!e.isReady){try{c.documentElement.doScroll("left")}catch(a){setTimeout(J,1);return}e.ready()}}var e=function(a,b){return new e.fn.init(a,b,h)},f=a.jQuery,g=a.$,h,i=/^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/,j=/\S/,k=/^\s+/,l=/\s+$/,m=/^<(\w+)\s*\/?>(?:<\/\1>)?$/,n=/^[\],:{}\s]*$/,o=/\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g,p=/"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g,q=/(?:^|:|,)(?:\s*\[)+/g,r=/(webkit)[ \/]([\w.]+)/,s=/(opera)(?:.*version)?[ \/]([\w.]+)/,t=/(msie) ([\w.]+)/,u=/(mozilla)(?:.*? rv:([\w.]+))?/,v=/-([a-z]|[0-9])/ig,w=/^-ms-/,x=function(a,b){return(b+"").toUpperCase()},y=d.userAgent,z,A,B,C=Object.prototype.toString,D=Object.prototype.hasOwnProperty,E=Array.prototype.push,F=Array.prototype.slice,G=String.prototype.trim,H=Array.prototype.indexOf,I={};e.fn=e.prototype={constructor:e,init:function(a,d,f){var g,h,j,k;if(!a)return this;if(a.nodeType){this.context=this[0]=a,this.length=1;return this}if(a==="body"&&!d&&c.body){this.context=c,this[0]=c.body,this.selector=a,this.length=1;return this}if(typeof a=="string"){a.charAt(0)!=="<"||a.charAt(a.length-1)!==">"||a.length<3?g=i.exec(a):g=[null,a,null];if(g&&(g[1]||!d)){if(g[1]){d=d instanceof e?d[0]:d,k=d?d.ownerDocument||d:c,j=m.exec(a),j?e.isPlainObject(d)?(a=[c.createElement(j[1])],e.fn.attr.call(a,d,!0)):a=[k.createElement(j[1])]:(j=e.buildFragment([g[1]],[k]),a=(j.cacheable?e.clone(j.fragment):j.fragment).childNodes);return e.merge(this,a)}h=c.getElementById(g[2]);if(h&&h.parentNode){if(h.id!==g[2])return f.find(a);this.length=1,this[0]=h}this.context=c,this.selector=a;return this}return!d||d.jquery?(d||f).find(a):this.constructor(d).find(a)}if(e.isFunction(a))return f.ready(a);a.selector!==b&&(this.selector=a.selector,this.context=a.context);return e.makeArray(a,this)},selector:"",jquery:"1.7.1",length:0,size:function(){return this.length},toArray:function(){return F.call(this,0)},get:function(a){return a==null?this.toArray():a<0?this[this.length+a]:this[a]},pushStack:function(a,b,c){var d=this.constructor();e.isArray(a)?E.apply(d,a):e.merge(d,a),d.prevObject=this,d.context=this.context,b==="find"?d.selector=this.selector+(this.selector?" ":"")+c:b&&(d.selector=this.selector+"."+b+"("+c+")");return d},each:function(a,b){return e.each(this,a,b)},ready:function(a){e.bindReady(),A.add(a);return this},eq:function(a){a=+a;return a===-1?this.slice(a):this.slice(a,a+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(F.apply(this,arguments),"slice",F.call(arguments).join(","))},map:function(a){return this.pushStack(e.map(this,function(b,c){return a.call(b,c,b)}))},end:function(){return this.prevObject||this.constructor(null)},push:E,sort:[].sort,splice:[].splice},e.fn.init.prototype=e.fn,e.extend=e.fn.extend=function(){var a,c,d,f,g,h,i=arguments[0]||{},j=1,k=arguments.length,l=!1;typeof i=="boolean"&&(l=i,i=arguments[1]||{},j=2),typeof i!="object"&&!e.isFunction(i)&&(i={}),k===j&&(i=this,--j);for(;j<k;j++)if((a=arguments[j])!=null)for(c in a){d=i[c],f=a[c];if(i===f)continue;l&&f&&(e.isPlainObject(f)||(g=e.isArray(f)))?(g?(g=!1,h=d&&e.isArray(d)?d:[]):h=d&&e.isPlainObject(d)?d:{},i[c]=e.extend(l,h,f)):f!==b&&(i[c]=f)}return i},e.extend({noConflict:function(b){a.$===e&&(a.$=g),b&&a.jQuery===e&&(a.jQuery=f);return e},isReady:!1,readyWait:1,holdReady:function(a){a?e.readyWait++:e.ready(!0)},ready:function(a){if(a===!0&&!--e.readyWait||a!==!0&&!e.isReady){if(!c.body)return setTimeout(e.ready,1);e.isReady=!0;if(a!==!0&&--e.readyWait>0)return;A.fireWith(c,[e]),e.fn.trigger&&e(c).trigger("ready").off("ready")}},bindReady:function(){if(!A){A=e.Callbacks("once memory");if(c.readyState==="complete")return setTimeout(e.ready,1);if(c.addEventListener)c.addEventListener("DOMContentLoaded",B,!1),a.addEventListener("load",e.ready,!1);else if(c.attachEvent){c.attachEvent("onreadystatechange",B),a.attachEvent("onload",e.ready);var b=!1;try{b=a.frameElement==null}catch(d){}c.documentElement.doScroll&&b&&J()}}},isFunction:function(a){return e.type(a)==="function"},isArray:Array.isArray||function(a){return e.type(a)==="array"},isWindow:function(a){return a&&typeof a=="object"&&"setInterval"in a},isNumeric:function(a){return!isNaN(parseFloat(a))&&isFinite(a)},type:function(a){return a==null?String(a):I[C.call(a)]||"object"},isPlainObject:function(a){if(!a||e.type(a)!=="object"||a.nodeType||e.isWindow(a))return!1;try{if(a.constructor&&!D.call(a,"constructor")&&!D.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}var d;for(d in a);return d===b||D.call(a,d)},isEmptyObject:function(a){for(var b in a)return!1;return!0},error:function(a){throw new Error(a)},parseJSON:function(b){if(typeof b!="string"||!b)return null;b=e.trim(b);if(a.JSON&&a.JSON.parse)return a.JSON.parse(b);if(n.test(b.replace(o,"@").replace(p,"]").replace(q,"")))return(new Function("return "+b))();e.error("Invalid JSON: "+b)},parseXML:function(c){var d,f;try{a.DOMParser?(f=new DOMParser,d=f.parseFromString(c,"text/xml")):(d=new ActiveXObject("Microsoft.XMLDOM"),d.async="false",d.loadXML(c))}catch(g){d=b}(!d||!d.documentElement||d.getElementsByTagName("parsererror").length)&&e.error("Invalid XML: "+c);return d},noop:function(){},globalEval:function(b){b&&j.test(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(w,"ms-").replace(v,x)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toUpperCase()===b.toUpperCase()},each:function(a,c,d){var f,g=0,h=a.length,i=h===b||e.isFunction(a);if(d){if(i){for(f in a)if(c.apply(a[f],d)===!1)break}else for(;g<h;)if(c.apply(a[g++],d)===!1)break}else if(i){for(f in a)if(c.call(a[f],f,a[f])===!1)break}else for(;g<h;)if(c.call(a[g],g,a[g++])===!1)break;return a},trim:G?function(a){return a==null?"":G.call(a)}:function(a){return a==null?"":(a+"").replace(k,"").replace(l,"")},makeArray:function(a,b){var c=b||[];if(a!=null){var d=e.type(a);a.length==null||d==="string"||d==="function"||d==="regexp"||e.isWindow(a)?E.call(c,a):e.merge(c,a)}return c},inArray:function(a,b,c){var d;if(b){if(H)return H.call(b,a,c);d=b.length,c=c?c<0?Math.max(0,d+c):c:0;for(;c<d;c++)if(c in b&&b[c]===a)return c}return-1},merge:function(a,c){var d=a.length,e=0;if(typeof c.length=="number")for(var f=c.length;e<f;e++)a[d++]=c[e];else while(c[e]!==b)a[d++]=c[e++];a.length=d;return a},grep:function(a,b,c){var d=[],e;c=!!c;for(var f=0,g=a.length;f<g;f++)e=!!b(a[f],f),c!==e&&d.push(a[f]);return d},map:function(a,c,d){var f,g,h=[],i=0,j=a.length,k=a instanceof e||j!==b&&typeof j=="number"&&(j>0&&a[0]&&a[j-1]||j===0||e.isArray(a));if(k)for(;i<j;i++)f=c(a[i],i,d),f!=null&&(h[h.length]=f);else for(g in a)f=c(a[g],g,d),f!=null&&(h[h.length]=f);return h.concat.apply([],h)},guid:1,proxy:function(a,c){if(typeof c=="string"){var d=a[c];c=a,a=d}if(!e.isFunction(a))return b;var f=F.call(arguments,2),g=function(){return a.apply(c,f.concat(F.call(arguments)))};g.guid=a.guid=a.guid||g.guid||e.guid++;return g},access:function(a,c,d,f,g,h){var i=a.length;if(typeof c=="object"){for(var j in c)e.access(a,j,c[j],f,g,d);return a}if(d!==b){f=!h&&f&&e.isFunction(d);for(var k=0;k<i;k++)g(a[k],c,f?d.call(a[k],k,g(a[k],c)):d,h);return a}return i?g(a[0],c):b},now:function(){return(new Date).getTime()},uaMatch:function(a){a=a.toLowerCase();var b=r.exec(a)||s.exec(a)||t.exec(a)||a.indexOf("compatible")<0&&u.exec(a)||[];return{browser:b[1]||"",version:b[2]||"0"}},sub:function(){function a(b,c){return new a.fn.init(b,c)}e.extend(!0,a,this),a.superclass=this,a.fn=a.prototype=this(),a.fn.constructor=a,a.sub=this.sub,a.fn.init=function(d,f){f&&f instanceof e&&!(f instanceof a)&&(f=a(f));return e.fn.init.call(this,d,f,b)},a.fn.init.prototype=a.fn;var b=a(c);return a},browser:{}}),e.each("Boolean Number String Function Array Date RegExp Object".split(" "),function(a,b){I["[object "+b+"]"]=b.toLowerCase()}),z=e.uaMatch(y),z.browser&&(e.browser[z.browser]=!0,e.browser.version=z.version),e.browser.webkit&&(e.browser.safari=!0),j.test(" ")&&(k=/^[\s\xA0]+/,l=/[\s\xA0]+$/),h=e(c),c.addEventListener?B=function(){c.removeEventListener("DOMContentLoaded",B,!1),e.ready()}:c.attachEvent&&(B=function(){c.readyState==="complete"&&(c.detachEvent("onreadystatechange",B),e.ready())});return e}(),g={};f.Callbacks=function(a){a=a?g[a]||h(a):{};var c=[],d=[],e,i,j,k,l,m=function(b){var d,e,g,h,i;for(d=0,e=b.length;d<e;d++)g=b[d],h=f.type(g),h==="array"?m(g):h==="function"&&(!a.unique||!o.has(g))&&c.push(g)},n=function(b,f){f=f||[],e=!a.memory||[b,f],i=!0,l=j||0,j=0,k=c.length;for(;c&&l<k;l++)if(c[l].apply(b,f)===!1&&a.stopOnFalse){e=!0;break}i=!1,c&&(a.once?e===!0?o.disable():c=[]:d&&d.length&&(e=d.shift(),o.fireWith(e[0],e[1])))},o={add:function(){if(c){var a=c.length;m(arguments),i?k=c.length:e&&e!==!0&&(j=a,n(e[0],e[1]))}return this},remove:function(){if(c){var b=arguments,d=0,e=b.length;for(;d<e;d++)for(var f=0;f<c.length;f++)if(b[d]===c[f]){i&&f<=k&&(k--,f<=l&&l--),c.splice(f--,1);if(a.unique)break}}return this},has:function(a){if(c){var b=0,d=c.length;for(;b<d;b++)if(a===c[b])return!0}return!1},empty:function(){c=[];return this},disable:function(){c=d=e=b;return this},disabled:function(){return!c},lock:function(){d=b,(!e||e===!0)&&o.disable();return this},locked:function(){return!d},fireWith:function(b,c){d&&(i?a.once||d.push([b,c]):(!a.once||!e)&&n(b,c));return this},fire:function(){o.fireWith(this,arguments);return this},fired:function(){return!!e}};return o};var i=[].slice;f.extend({Deferred:function(a){var b=f.Callbacks("once memory"),c=f.Callbacks("once memory"),d=f.Callbacks("memory"),e="pending",g={resolve:b,reject:c,notify:d},h={done:b.add,fail:c.add,progress:d.add,state:function(){return e},isResolved:b.fired,isRejected:c.fired,then:function(a,b,c){i.done(a).fail(b).progress(c);return this},always:function(){i.done.apply(i,arguments).fail.apply(i,arguments);return this},pipe:function(a,b,c){return f.Deferred(function(d){f.each({done:[a,"resolve"],fail:[b,"reject"],progress:[c,"notify"]},function(a,b){var c=b[0],e=b[1],g;f.isFunction(c)?i[a](function()
+{g=c.apply(this,arguments),g&&f.isFunction(g.promise)?g.promise().then(d.resolve,d.reject,d.notify):d[e+"With"](this===i?d:this,[g])}):i[a](d[e])})}).promise()},promise:function(a){if(a==null)a=h;else for(var b in h)a[b]=h[b];return a}},i=h.promise({}),j;for(j in g)i[j]=g[j].fire,i[j+"With"]=g[j].fireWith;i.done(function(){e="resolved"},c.disable,d.lock).fail(function(){e="rejected"},b.disable,d.lock),a&&a.call(i,i);return i},when:function(a){function m(a){return function(b){e[a]=arguments.length>1?i.call(arguments,0):b,j.notifyWith(k,e)}}function l(a){return function(c){b[a]=arguments.length>1?i.call(arguments,0):c,--g||j.resolveWith(j,b)}}var b=i.call(arguments,0),c=0,d=b.length,e=Array(d),g=d,h=d,j=d<=1&&a&&f.isFunction(a.promise)?a:f.Deferred(),k=j.promise();if(d>1){for(;c<d;c++)b[c]&&b[c].promise&&f.isFunction(b[c].promise)?b[c].promise().then(l(c),j.reject,m(c)):--g;g||j.resolveWith(j,b)}else j!==a&&j.resolveWith(j,d?[a]:[]);return k}}),f.support=function(){var b,d,e,g,h,i,j,k,l,m,n,o,p,q=c.createElement("div"),r=c.documentElement;q.setAttribute("className","t"),q.innerHTML="   <link/><table></table><a href='/a' style='top:1px;float:left;opacity:.55;'>a</a><input type='checkbox'/>",d=q.getElementsByTagName("*"),e=q.getElementsByTagName("a")[0];if(!d||!d.length||!e)return{};g=c.createElement("select"),h=g.appendChild(c.createElement("option")),i=q.getElementsByTagName("input")[0],b={leadingWhitespace:q.firstChild.nodeType===3,tbody:!q.getElementsByTagName("tbody").length,htmlSerialize:!!q.getElementsByTagName("link").length,style:/top/.test(e.getAttribute("style")),hrefNormalized:e.getAttribute("href")==="/a",opacity:/^0.55/.test(e.style.opacity),cssFloat:!!e.style.cssFloat,checkOn:i.value==="on",optSelected:h.selected,getSetAttribute:q.className!=="t",enctype:!!c.createElement("form").enctype,html5Clone:c.createElement("nav").cloneNode(!0).outerHTML!=="<:nav></:nav>",submitBubbles:!0,changeBubbles:!0,focusinBubbles:!1,deleteExpando:!0,noCloneEvent:!0,inlineBlockNeedsLayout:!1,shrinkWrapBlocks:!1,reliableMarginRight:!0},i.checked=!0,b.noCloneChecked=i.cloneNode(!0).checked,g.disabled=!0,b.optDisabled=!h.disabled;try{delete q.test}catch(s){b.deleteExpando=!1}!q.addEventListener&&q.attachEvent&&q.fireEvent&&(q.attachEvent("onclick",function(){b.noCloneEvent=!1}),q.cloneNode(!0).fireEvent("onclick")),i=c.createElement("input"),i.value="t",i.setAttribute("type","radio"),b.radioValue=i.value==="t",i.setAttribute("checked","checked"),q.appendChild(i),k=c.createDocumentFragment(),k.appendChild(q.lastChild),b.checkClone=k.cloneNode(!0).cloneNode(!0).lastChild.checked,b.appendChecked=i.checked,k.removeChild(i),k.appendChild(q),q.innerHTML="",a.getComputedStyle&&(j=c.createElement("div"),j.style.width="0",j.style.marginRight="0",q.style.width="2px",q.appendChild(j),b.reliableMarginRight=(parseInt((a.getComputedStyle(j,null)||{marginRight:0}).marginRight,10)||0)===0);if(q.attachEvent)for(o in{submit:1,change:1,focusin:1})n="on"+o,p=n in q,p||(q.setAttribute(n,"return;"),p=typeof q[n]=="function"),b[o+"Bubbles"]=p;k.removeChild(q),k=g=h=j=q=i=null,f(function(){var a,d,e,g,h,i,j,k,m,n,o,r=c.getElementsByTagName("body")[0];!r||(j=1,k="position:absolute;top:0;left:0;width:1px;height:1px;margin:0;",m="visibility:hidden;border:0;",n="style='"+k+"border:5px solid #000;padding:0;'",o="<div "+n+"><div></div></div>"+"<table "+n+" cellpadding='0' cellspacing='0'>"+"<tr><td></td></tr></table>",a=c.createElement("div"),a.style.cssText=m+"width:0;height:0;position:static;top:0;margin-top:"+j+"px",r.insertBefore(a,r.firstChild),q=c.createElement("div"),a.appendChild(q),q.innerHTML="<table><tr><td style='padding:0;border:0;display:none'></td><td>t</td></tr></table>",l=q.getElementsByTagName("td"),p=l[0].offsetHeight===0,l[0].style.display="",l[1].style.display="none",b.reliableHiddenOffsets=p&&l[0].offsetHeight===0,q.innerHTML="",q.style.width=q.style.paddingLeft="1px",f.boxModel=b.boxModel=q.offsetWidth===2,typeof q.style.zoom!="undefined"&&(q.style.display="inline",q.style.zoom=1,b.inlineBlockNeedsLayout=q.offsetWidth===2,q.style.display="",q.innerHTML="<div style='width:4px;'></div>",b.shrinkWrapBlocks=q.offsetWidth!==2),q.style.cssText=k+m,q.innerHTML=o,d=q.firstChild,e=d.firstChild,h=d.nextSibling.firstChild.firstChild,i={doesNotAddBorder:e.offsetTop!==5,doesAddBorderForTableAndCells:h.offsetTop===5},e.style.position="fixed",e.style.top="20px",i.fixedPosition=e.offsetTop===20||e.offsetTop===15,e.style.position=e.style.top="",d.style.overflow="hidden",d.style.position="relative",i.subtractsBorderForOverflowNotVisible=e.offsetTop===-5,i.doesNotIncludeMarginInBodyOffset=r.offsetTop!==j,r.removeChild(a),q=a=null,f.extend(b,i))});return b}();var j=/^(?:\{.*\}|\[.*\])$/,k=/([A-Z])/g;f.extend({cache:{},uuid:0,expando:"jQuery"+(f.fn.jquery+Math.random()).replace(/\D/g,""),noData:{embed:!0,object:"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",applet:!0},hasData:function(a){a=a.nodeType?f.cache[a[f.expando]]:a[f.expando];return!!a&&!m(a)},data:function(a,c,d,e){if(!!f.acceptData(a)){var g,h,i,j=f.expando,k=typeof c=="string",l=a.nodeType,m=l?f.cache:a,n=l?a[j]:a[j]&&j,o=c==="events";if((!n||!m[n]||!o&&!e&&!m[n].data)&&k&&d===b)return;n||(l?a[j]=n=++f.uuid:n=j),m[n]||(m[n]={},l||(m[n].toJSON=f.noop));if(typeof c=="object"||typeof c=="function")e?m[n]=f.extend(m[n],c):m[n].data=f.extend(m[n].data,c);g=h=m[n],e||(h.data||(h.data={}),h=h.data),d!==b&&(h[f.camelCase(c)]=d);if(o&&!h[c])return g.events;k?(i=h[c],i==null&&(i=h[f.camelCase(c)])):i=h;return i}},removeData:function(a,b,c){if(!!f.acceptData(a)){var d,e,g,h=f.expando,i=a.nodeType,j=i?f.cache:a,k=i?a[h]:h;if(!j[k])return;if(b){d=c?j[k]:j[k].data;if(d){f.isArray(b)||(b in d?b=[b]:(b=f.camelCase(b),b in d?b=[b]:b=b.split(" ")));for(e=0,g=b.length;e<g;e++)delete d[b[e]];if(!(c?m:f.isEmptyObject)(d))return}}if(!c){delete j[k].data;if(!m(j[k]))return}f.support.deleteExpando||!j.setInterval?delete j[k]:j[k]=null,i&&(f.support.deleteExpando?delete a[h]:a.removeAttribute?a.removeAttribute(h):a[h]=null)}},_data:function(a,b,c){return f.data(a,b,c,!0)},acceptData:function(a){if(a.nodeName){var b=f.noData[a.nodeName.toLowerCase()];if(b)return b!==!0&&a.getAttribute("classid")===b}return!0}}),f.fn.extend({data:function(a,c){var d,e,g,h=null;if(typeof a=="undefined"){if(this.length){h=f.data(this[0]);if(this[0].nodeType===1&&!f._data(this[0],"parsedAttrs")){e=this[0].attributes;for(var i=0,j=e.length;i<j;i++)g=e[i].name,g.indexOf("data-")===0&&(g=f.camelCase(g.substring(5)),l(this[0],g,h[g]));f._data(this[0],"parsedAttrs",!0)}}return h}if(typeof a=="object")return this.each(function(){f.data(this,a)});d=a.split("."),d[1]=d[1]?"."+d[1]:"";if(c===b){h=this.triggerHandler("getData"+d[1]+"!",[d[0]]),h===b&&this.length&&(h=f.data(this[0],a),h=l(this[0],a,h));return h===b&&d[1]?this.data(d[0]):h}return this.each(function(){var b=f(this),e=[d[0],c];b.triggerHandler("setData"+d[1]+"!",e),f.data(this,a,c),b.triggerHandler("changeData"+d[1]+"!",e)})},removeData:function(a){return this.each(function(){f.removeData(this,a)})}}),f.extend({_mark:function(a,b){a&&(b=(b||"fx")+"mark",f._data(a,b,(f._data(a,b)||0)+1))},_unmark:function(a,b,c){a!==!0&&(c=b,b=a,a=!1);if(b){c=c||"fx";var d=c+"mark",e=a?0:(f._data(b,d)||1)-1;e?f._data(b,d,e):(f.removeData(b,d,!0),n(b,c,"mark"))}},queue:function(a,b,c){var d;if(a){b=(b||"fx")+"queue",d=f._data(a,b),c&&(!d||f.isArray(c)?d=f._data(a,b,f.makeArray(c)):d.push(c));return d||[]}},dequeue:function(a,b){b=b||"fx";var c=f.queue(a,b),d=c.shift(),e={};d==="inprogress"&&(d=c.shift()),d&&(b==="fx"&&c.unshift("inprogress"),f._data(a,b+".run",e),d.call(a,function(){f.dequeue(a,b)},e)),c.length||(f.removeData(a,b+"queue "+b+".run",!0),n(a,b,"queue"))}}),f.fn.extend({queue:function(a,c){typeof a!="string"&&(c=a,a="fx");if(c===b)return f.queue(this[0],a);return this.each(function(){var b=f.queue(this,a,c);a==="fx"&&b[0]!=="inprogress"&&f.dequeue(this,a)})},dequeue:function(a){return this.each(function(){f.dequeue(this,a)})},delay:function(a,b){a=f.fx?f.fx.speeds[a]||a:a,b=b||"fx";return this.queue(b,function(b,c){var d=setTimeout(b,a);c.stop=function(){clearTimeout(d)}})},clearQueue:function(a){return this.queue(a||"fx",[])},promise:function(a,c){function m(){--h||d.resolveWith(e,[e])}typeof a!="string"&&(c=a,a=b),a=a||"fx";var d=f.Deferred(),e=this,g=e.length,h=1,i=a+"defer",j=a+"queue",k=a+"mark",l;while(g--)if(l=f.data(e[g],i,b,!0)||(f.data(e[g],j,b,!0)||f.data(e[g],k,b,!0))&&f.data(e[g],i,f.Callbacks("once memory"),!0))h++,l.add(m);m();return d.promise()}});var o=/[\n\t\r]/g,p=/\s+/,q=/\r/g,r=/^(?:button|input)$/i,s=/^(?:button|input|object|select|textarea)$/i,t=/^a(?:rea)?$/i,u=/^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i,v=f.support.getSetAttribute,w,x,y;f.fn.extend({attr:function(a,b){return f.access(this,a,b,!0,f.attr)},removeAttr:function(a){return this.each(function(){f.removeAttr(this,a)})},prop:function(a,b){return f.access(this,a,b,!0,f.prop)},removeProp:function(a){a=f.propFix[a]||a;return this.each(function(){try{this[a]=b,delete this[a]}catch(c){}})},addClass:function(a){var b,c,d,e,g,h,i;if(f.isFunction(a))return this.each(function(b){f(this).addClass(a.call(this,b,this.className))});if(a&&typeof a=="string"){b=a.split(p);for(c=0,d=this.length;c<d;c++){e=this[c];if(e.nodeType===1)if(!e.className&&b.length===1)e.className=a;else{g=" "+e.className+" ";for(h=0,i=b.length;h<i;h++)~g.indexOf(" "+b[h]+" ")||(g+=b[h]+" ");e.className=f.trim(g)}}}return this},removeClass:function(a){var c,d,e,g,h,i,j;if(f.isFunction(a))return this.each(function(b){f(this).removeClass(a.call(this,b,this.className))});if(a&&typeof a=="string"||a===b){c=(a||"").split(p);for(d=0,e=this.length;d<e;d++){g=this[d];if(g.nodeType===1&&g.className)if(a){h=(" "+g.className+" ").replace(o," ");for(i=0,j=c.length;i<j;i++)h=h.replace(" "+c[i]+" "," ");g.className=f.trim(h)}else g.className=""}}return this},toggleClass:function(a,b){var c=typeof a,d=typeof b=="boolean";if(f.isFunction(a))return this.each(function(c){f(this).toggleClass(a.call(this,c,this.className,b),b)});return this.each(function(){if(c==="string"){var e,g=0,h=f(this),i=b,j=a.split(p);while(e=j[g++])i=d?i:!h.hasClass(e),h[i?"addClass":"removeClass"](e)}else if(c==="undefined"||c==="boolean")this.className&&f._data(this,"__className__",this.className),this.className=this.className||a===!1?"":f._data(this,"__className__")||""})},hasClass:function(a){var b=" "+a+" ",c=0,d=this.length;for(;c<d;c++)if(this[c].nodeType===1&&(" "+this[c].className+" ").replace(o," ").indexOf(b)>-1)return!0;return!1},val:function(a){var c,d,e,g=this[0];{if(!!arguments.length){e=f.isFunction(a);return this.each(function(d){var g=f(this),h;if(this.nodeType===1){e?h=a.call(this,d,g.val()):h=a,h==null?h="":typeof h=="number"?h+="":f.isArray(h)&&(h=f.map(h,function(a){return a==null?"":a+""})),c=f.valHooks[this.nodeName.toLowerCase()]||f.valHooks[this.type];if(!c||!("set"in c)||c.set(this,h,"value")===b)this.value=h}})}if(g){c=f.valHooks[g.nodeName.toLowerCase()]||f.valHooks[g.type];if(c&&"get"in c&&(d=c.get(g,"value"))!==b)return d;d=g.value;return typeof d=="string"?d.replace(q,""):d==null?"":d}}}}),f.extend({valHooks:{option:{get:function(a){var b=a.attributes.value;return!b||b.specified?a.value:a.text}},select:{get:function(a){var b,c,d,e,g=a.selectedIndex,h=[],i=a.options,j=a.type==="select-one";if(g<0)return null;c=j?g:0,d=j?g+1:i.length;for(;c<d;c++){e=i[c];if(e.selected&&(f.support.optDisabled?!e.disabled:e.getAttribute("disabled")===null)&&(!e.parentNode.disabled||!f.nodeName(e.parentNode,"optgroup"))){b=f(e).val();if(j)return b;h.push(b)}}if(j&&!h.length&&i.length)return f(i[g]).val();return h},set:function(a,b){var c=f.makeArray(b);f(a).find("option").each(function(){this.selected=f.inArray(f(this).val(),c)>=0}),c.length||(a.selectedIndex=-1);return c}}},attrFn:{val:!0,css:!0,html:!0,text:!0,data:!0,width:!0,height:!0,offset:!0},attr:function(a,c,d,e){var g,h,i,j=a.nodeType;if(!!a&&j!==3&&j!==8&&j!==2){if(e&&c in f.attrFn)return f(a)[c](d);if(typeof a.getAttribute=="undefined")return f.prop(a,c,d);i=j!==1||!f.isXMLDoc(a),i&&(c=c.toLowerCase(),h=f.attrHooks[c]||(u.test(c)?x:w));if(d!==b){if(d===null){f.removeAttr(a,c);return}if(h&&"set"in h&&i&&(g=h.set(a,d,c))!==b)return g;a.setAttribute(c,""+d);return d}if(h&&"get"in h&&i&&(g=h.get(a,c))!==null)return g;g=a.getAttribute(c);return g===null?b:g}},removeAttr:function(a,b){var c,d,e,g,h=0;if(b&&a.nodeType===1){d=b.toLowerCase().split(p),g=d.length;for(;h<g;h++)e=d[h],e&&(c=f.propFix[e]||e,f.attr(a,e,""),a.removeAttribute(v?e:c),u.test(e)&&c in a&&(a[c]=!1))}},attrHooks:{type:{set:function(a,b){if(r.test(a.nodeName)&&a.parentNode)f.error("type property can't be changed");else if(!f.support.radioValue&&b==="radio"&&f.nodeName(a,"input")){var c=a.value;a.setAttribute("type",b),c&&(a.value=c);return b}}},value:{get:function(a,b){if(w&&f.nodeName(a,"button"))return w.get(a,b);return b in a?a.value:null},set:function(a,b,c){if(w&&f.nodeName(a,"button"))return w.set(a,b,c);a.value=b}}},propFix:{tabindex:"tabIndex",readonly:"readOnly","for":"htmlFor","class":"className",maxlength:"maxLength",cellspacing:"cellSpacing",cellpadding:"cellPadding",rowspan:"rowSpan",colspan:"colSpan",usemap:"useMap",frameborder:"frameBorder",contenteditable:"contentEditable"},prop:function(a,c,d){var e,g,h,i=a.nodeType;if(!!a&&i!==3&&i!==8&&i!==2){h=i!==1||!f.isXMLDoc(a),h&&(c=f.propFix[c]||c,g=f.propHooks[c]);return d!==b?g&&"set"in g&&(e=g.set(a,d,c))!==b?e:a[c]=d:g&&"get"in g&&(e=g.get(a,c))!==null?e:a[c]}},propHooks:{tabIndex:{get:function(a){var c=a.getAttributeNode("tabindex");return c&&c.specified?parseInt(c.value,10):s.test(a.nodeName)||t.test(a.nodeName)&&a.href?0:b}}}}),f.attrHooks.tabindex=f.propHooks.tabIndex,x={get:function(a,c){var d,e=f.prop(a,c);return e===!0||typeof e!="boolean"&&(d=a.getAttributeNode(c))&&d.nodeValue!==!1?c.toLowerCase():b},set:function(a,b,c){var d;b===!1?f.removeAttr(a,c):(d=f.propFix[c]||c,d in a&&(a[d]=!0),a.setAttribute(c,c.toLowerCase()));return c}},v||(y={name:!0,id:!0},w=f.valHooks.button={get:function(a,c){var d;d=a.getAttributeNode(c);return d&&(y[c]?d.nodeValue!=="":d.specified)?d.nodeValue:b},set:function(a,b,d){var e=a.getAttributeNode(d);e||(e=c.createAttribute(d),a.setAttributeNode(e));return e.nodeValue=b+""}},f.attrHooks.tabindex.set=w.set,f.each(["width","height"],function(a,b){f.attrHooks[b]=f.extend(f.attrHooks[b],{set:function(a,c){if(c===""){a.setAttribute(b,"auto");return c}}})}),f.attrHooks.contenteditable={get:w.get,set:function(a,b,c){b===""&&(b="false"),w.set(a,b,c)}}),f.support.hrefNormalized||f.each(["href","src","width","height"],function(a,c){f.attrHooks[c]=f.extend(f.attrHooks[c],{get:function(a){var d=a.getAttribute(c,2);return d===null?b:d}})}),f.support.style||(f.attrHooks.style={get:function(a){return a.style.cssText.toLowerCase()||b},set:function(a,b){return a.style.cssText=""+b}}),f.support.optSelected||(f.propHooks.selected=f.extend(f.propHooks.selected,{get:function(a){var b=a.parentNode;b&&(b.selectedIndex,b.parentNode&&b.parentNode.selectedIndex);return null}})),f.support.enctype||(f.propFix.enctype="encoding"),f.support.checkOn||f.each(["radio","checkbox"],function(){f.valHooks[this]={get:function(a){return a.getAttribute("value")===null?"on":a.value}}}),f.each(["radio","checkbox"],function(){f.valHooks[this]=f.extend(f.valHooks[this],{set:function(a,b){if(f.isArray(b))return a.checked=f.inArray(f(a).val(),b)>=0}})});var z=/^(?:textarea|input|select)$/i,A=/^([^\.]*)?(?:\.(.+))?$/,B=/\bhover(\.\S+)?\b/,C=/^key/,D=/^(?:mouse|contextmenu)|click/,E=/^(?:focusinfocus|focusoutblur)$/,F=/^(\w*)(?:#([\w\-]+))?(?:\.([\w\-]+))?$/,G=function(a){var b=F.exec(a);b&&(b[1]=(b[1]||"").toLowerCase(),b[3]=b[3]&&new RegExp("(?:^|\\s)"+b[3]+"(?:\\s|$)"));return b},H=function(a,b){var c=a.attributes||{};return(!b[1]||a.nodeName.toLowerCase()===b[1])&&(!b[2]||(c.id||{}).value===b[2])&&(!b[3]||b[3].test((c["class"]||{}).value))},I=function(a){return f.event.special.hover?a:a.replace(B,"mouseenter$1 mouseleave$1")};
+f.event={add:function(a,c,d,e,g){var h,i,j,k,l,m,n,o,p,q,r,s;if(!(a.nodeType===3||a.nodeType===8||!c||!d||!(h=f._data(a)))){d.handler&&(p=d,d=p.handler),d.guid||(d.guid=f.guid++),j=h.events,j||(h.events=j={}),i=h.handle,i||(h.handle=i=function(a){return typeof f!="undefined"&&(!a||f.event.triggered!==a.type)?f.event.dispatch.apply(i.elem,arguments):b},i.elem=a),c=f.trim(I(c)).split(" ");for(k=0;k<c.length;k++){l=A.exec(c[k])||[],m=l[1],n=(l[2]||"").split(".").sort(),s=f.event.special[m]||{},m=(g?s.delegateType:s.bindType)||m,s=f.event.special[m]||{},o=f.extend({type:m,origType:l[1],data:e,handler:d,guid:d.guid,selector:g,quick:G(g),namespace:n.join(".")},p),r=j[m];if(!r){r=j[m]=[],r.delegateCount=0;if(!s.setup||s.setup.call(a,e,n,i)===!1)a.addEventListener?a.addEventListener(m,i,!1):a.attachEvent&&a.attachEvent("on"+m,i)}s.add&&(s.add.call(a,o),o.handler.guid||(o.handler.guid=d.guid)),g?r.splice(r.delegateCount++,0,o):r.push(o),f.event.global[m]=!0}a=null}},global:{},remove:function(a,b,c,d,e){var g=f.hasData(a)&&f._data(a),h,i,j,k,l,m,n,o,p,q,r,s;if(!!g&&!!(o=g.events)){b=f.trim(I(b||"")).split(" ");for(h=0;h<b.length;h++){i=A.exec(b[h])||[],j=k=i[1],l=i[2];if(!j){for(j in o)f.event.remove(a,j+b[h],c,d,!0);continue}p=f.event.special[j]||{},j=(d?p.delegateType:p.bindType)||j,r=o[j]||[],m=r.length,l=l?new RegExp("(^|\\.)"+l.split(".").sort().join("\\.(?:.*\\.)?")+"(\\.|$)"):null;for(n=0;n<r.length;n++)s=r[n],(e||k===s.origType)&&(!c||c.guid===s.guid)&&(!l||l.test(s.namespace))&&(!d||d===s.selector||d==="**"&&s.selector)&&(r.splice(n--,1),s.selector&&r.delegateCount--,p.remove&&p.remove.call(a,s));r.length===0&&m!==r.length&&((!p.teardown||p.teardown.call(a,l)===!1)&&f.removeEvent(a,j,g.handle),delete o[j])}f.isEmptyObject(o)&&(q=g.handle,q&&(q.elem=null),f.removeData(a,["events","handle"],!0))}},customEvent:{getData:!0,setData:!0,changeData:!0},trigger:function(c,d,e,g){if(!e||e.nodeType!==3&&e.nodeType!==8){var h=c.type||c,i=[],j,k,l,m,n,o,p,q,r,s;if(E.test(h+f.event.triggered))return;h.indexOf("!")>=0&&(h=h.slice(0,-1),k=!0),h.indexOf(".")>=0&&(i=h.split("."),h=i.shift(),i.sort());if((!e||f.event.customEvent[h])&&!f.event.global[h])return;c=typeof c=="object"?c[f.expando]?c:new f.Event(h,c):new f.Event(h),c.type=h,c.isTrigger=!0,c.exclusive=k,c.namespace=i.join("."),c.namespace_re=c.namespace?new RegExp("(^|\\.)"+i.join("\\.(?:.*\\.)?")+"(\\.|$)"):null,o=h.indexOf(":")<0?"on"+h:"";if(!e){j=f.cache;for(l in j)j[l].events&&j[l].events[h]&&f.event.trigger(c,d,j[l].handle.elem,!0);return}c.result=b,c.target||(c.target=e),d=d!=null?f.makeArray(d):[],d.unshift(c),p=f.event.special[h]||{};if(p.trigger&&p.trigger.apply(e,d)===!1)return;r=[[e,p.bindType||h]];if(!g&&!p.noBubble&&!f.isWindow(e)){s=p.delegateType||h,m=E.test(s+h)?e:e.parentNode,n=null;for(;m;m=m.parentNode)r.push([m,s]),n=m;n&&n===e.ownerDocument&&r.push([n.defaultView||n.parentWindow||a,s])}for(l=0;l<r.length&&!c.isPropagationStopped();l++)m=r[l][0],c.type=r[l][1],q=(f._data(m,"events")||{})[c.type]&&f._data(m,"handle"),q&&q.apply(m,d),q=o&&m[o],q&&f.acceptData(m)&&q.apply(m,d)===!1&&c.preventDefault();c.type=h,!g&&!c.isDefaultPrevented()&&(!p._default||p._default.apply(e.ownerDocument,d)===!1)&&(h!=="click"||!f.nodeName(e,"a"))&&f.acceptData(e)&&o&&e[h]&&(h!=="focus"&&h!=="blur"||c.target.offsetWidth!==0)&&!f.isWindow(e)&&(n=e[o],n&&(e[o]=null),f.event.triggered=h,e[h](),f.event.triggered=b,n&&(e[o]=n));return c.result}},dispatch:function(c){c=f.event.fix(c||a.event);var d=(f._data(this,"events")||{})[c.type]||[],e=d.delegateCount,g=[].slice.call(arguments,0),h=!c.exclusive&&!c.namespace,i=[],j,k,l,m,n,o,p,q,r,s,t;g[0]=c,c.delegateTarget=this;if(e&&!c.target.disabled&&(!c.button||c.type!=="click")){m=f(this),m.context=this.ownerDocument||this;for(l=c.target;l!=this;l=l.parentNode||this){o={},q=[],m[0]=l;for(j=0;j<e;j++)r=d[j],s=r.selector,o[s]===b&&(o[s]=r.quick?H(l,r.quick):m.is(s)),o[s]&&q.push(r);q.length&&i.push({elem:l,matches:q})}}d.length>e&&i.push({elem:this,matches:d.slice(e)});for(j=0;j<i.length&&!c.isPropagationStopped();j++){p=i[j],c.currentTarget=p.elem;for(k=0;k<p.matches.length&&!c.isImmediatePropagationStopped();k++){r=p.matches[k];if(h||!c.namespace&&!r.namespace||c.namespace_re&&c.namespace_re.test(r.namespace))c.data=r.data,c.handleObj=r,n=((f.event.special[r.origType]||{}).handle||r.handler).apply(p.elem,g),n!==b&&(c.result=n,n===!1&&(c.preventDefault(),c.stopPropagation()))}}return c.result},props:"attrChange attrName relatedNode srcElement altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),fixHooks:{},keyHooks:{props:"char charCode key keyCode".split(" "),filter:function(a,b){a.which==null&&(a.which=b.charCode!=null?b.charCode:b.keyCode);return a}},mouseHooks:{props:"button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(" "),filter:function(a,d){var e,f,g,h=d.button,i=d.fromElement;a.pageX==null&&d.clientX!=null&&(e=a.target.ownerDocument||c,f=e.documentElement,g=e.body,a.pageX=d.clientX+(f&&f.scrollLeft||g&&g.scrollLeft||0)-(f&&f.clientLeft||g&&g.clientLeft||0),a.pageY=d.clientY+(f&&f.scrollTop||g&&g.scrollTop||0)-(f&&f.clientTop||g&&g.clientTop||0)),!a.relatedTarget&&i&&(a.relatedTarget=i===a.target?d.toElement:i),!a.which&&h!==b&&(a.which=h&1?1:h&2?3:h&4?2:0);return a}},fix:function(a){if(a[f.expando])return a;var d,e,g=a,h=f.event.fixHooks[a.type]||{},i=h.props?this.props.concat(h.props):this.props;a=f.Event(g);for(d=i.length;d;)e=i[--d],a[e]=g[e];a.target||(a.target=g.srcElement||c),a.target.nodeType===3&&(a.target=a.target.parentNode),a.metaKey===b&&(a.metaKey=a.ctrlKey);return h.filter?h.filter(a,g):a},special:{ready:{setup:f.bindReady},load:{noBubble:!0},focus:{delegateType:"focusin"},blur:{delegateType:"focusout"},beforeunload:{setup:function(a,b,c){f.isWindow(this)&&(this.onbeforeunload=c)},teardown:function(a,b){this.onbeforeunload===b&&(this.onbeforeunload=null)}}},simulate:function(a,b,c,d){var e=f.extend(new f.Event,c,{type:a,isSimulated:!0,originalEvent:{}});d?f.event.trigger(e,null,b):f.event.dispatch.call(b,e),e.isDefaultPrevented()&&c.preventDefault()}},f.event.handle=f.event.dispatch,f.removeEvent=c.removeEventListener?function(a,b,c){a.removeEventListener&&a.removeEventListener(b,c,!1)}:function(a,b,c){a.detachEvent&&a.detachEvent("on"+b,c)},f.Event=function(a,b){if(!(this instanceof f.Event))return new f.Event(a,b);a&&a.type?(this.originalEvent=a,this.type=a.type,this.isDefaultPrevented=a.defaultPrevented||a.returnValue===!1||a.getPreventDefault&&a.getPreventDefault()?K:J):this.type=a,b&&f.extend(this,b),this.timeStamp=a&&a.timeStamp||f.now(),this[f.expando]=!0},f.Event.prototype={preventDefault:function(){this.isDefaultPrevented=K;var a=this.originalEvent;!a||(a.preventDefault?a.preventDefault():a.returnValue=!1)},stopPropagation:function(){this.isPropagationStopped=K;var a=this.originalEvent;!a||(a.stopPropagation&&a.stopPropagation(),a.cancelBubble=!0)},stopImmediatePropagation:function(){this.isImmediatePropagationStopped=K,this.stopPropagation()},isDefaultPrevented:J,isPropagationStopped:J,isImmediatePropagationStopped:J},f.each({mouseenter:"mouseover",mouseleave:"mouseout"},function(a,b){f.event.special[a]={delegateType:b,bindType:b,handle:function(a){var c=this,d=a.relatedTarget,e=a.handleObj,g=e.selector,h;if(!d||d!==c&&!f.contains(c,d))a.type=e.origType,h=e.handler.apply(this,arguments),a.type=b;return h}}}),f.support.submitBubbles||(f.event.special.submit={setup:function(){if(f.nodeName(this,"form"))return!1;f.event.add(this,"click._submit keypress._submit",function(a){var c=a.target,d=f.nodeName(c,"input")||f.nodeName(c,"button")?c.form:b;d&&!d._submit_attached&&(f.event.add(d,"submit._submit",function(a){this.parentNode&&!a.isTrigger&&f.event.simulate("submit",this.parentNode,a,!0)}),d._submit_attached=!0)})},teardown:function(){if(f.nodeName(this,"form"))return!1;f.event.remove(this,"._submit")}}),f.support.changeBubbles||(f.event.special.change={setup:function(){if(z.test(this.nodeName)){if(this.type==="checkbox"||this.type==="radio")f.event.add(this,"propertychange._change",function(a){a.originalEvent.propertyName==="checked"&&(this._just_changed=!0)}),f.event.add(this,"click._change",function(a){this._just_changed&&!a.isTrigger&&(this._just_changed=!1,f.event.simulate("change",this,a,!0))});return!1}f.event.add(this,"beforeactivate._change",function(a){var b=a.target;z.test(b.nodeName)&&!b._change_attached&&(f.event.add(b,"change._change",function(a){this.parentNode&&!a.isSimulated&&!a.isTrigger&&f.event.simulate("change",this.parentNode,a,!0)}),b._change_attached=!0)})},handle:function(a){var b=a.target;if(this!==b||a.isSimulated||a.isTrigger||b.type!=="radio"&&b.type!=="checkbox")return a.handleObj.handler.apply(this,arguments)},teardown:function(){f.event.remove(this,"._change");return z.test(this.nodeName)}}),f.support.focusinBubbles||f.each({focus:"focusin",blur:"focusout"},function(a,b){var d=0,e=function(a){f.event.simulate(b,a.target,f.event.fix(a),!0)};f.event.special[b]={setup:function(){d++===0&&c.addEventListener(a,e,!0)},teardown:function(){--d===0&&c.removeEventListener(a,e,!0)}}}),f.fn.extend({on:function(a,c,d,e,g){var h,i;if(typeof a=="object"){typeof c!="string"&&(d=c,c=b);for(i in a)this.on(i,c,d,a[i],g);return this}d==null&&e==null?(e=c,d=c=b):e==null&&(typeof c=="string"?(e=d,d=b):(e=d,d=c,c=b));if(e===!1)e=J;else if(!e)return this;g===1&&(h=e,e=function(a){f().off(a);return h.apply(this,arguments)},e.guid=h.guid||(h.guid=f.guid++));return this.each(function(){f.event.add(this,a,e,d,c)})},one:function(a,b,c,d){return this.on.call(this,a,b,c,d,1)},off:function(a,c,d){if(a&&a.preventDefault&&a.handleObj){var e=a.handleObj;f(a.delegateTarget).off(e.namespace?e.type+"."+e.namespace:e.type,e.selector,e.handler);return this}if(typeof a=="object"){for(var g in a)this.off(g,c,a[g]);return this}if(c===!1||typeof c=="function")d=c,c=b;d===!1&&(d=J);return this.each(function(){f.event.remove(this,a,d,c)})},bind:function(a,b,c){return this.on(a,null,b,c)},unbind:function(a,b){return this.off(a,null,b)},live:function(a,b,c){f(this.context).on(a,this.selector,b,c);return this},die:function(a,b){f(this.context).off(a,this.selector||"**",b);return this},delegate:function(a,b,c,d){return this.on(b,a,c,d)},undelegate:function(a,b,c){return arguments.length==1?this.off(a,"**"):this.off(b,a,c)},trigger:function(a,b){return this.each(function(){f.event.trigger(a,b,this)})},triggerHandler:function(a,b){if(this[0])return f.event.trigger(a,b,this[0],!0)},toggle:function(a){var b=arguments,c=a.guid||f.guid++,d=0,e=function(c){var e=(f._data(this,"lastToggle"+a.guid)||0)%d;f._data(this,"lastToggle"+a.guid,e+1),c.preventDefault();return b[e].apply(this,arguments)||!1};e.guid=c;while(d<b.length)b[d++].guid=c;return this.click(e)},hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)}}),f.each("blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(" "),function(a,b){f.fn[b]=function(a,c){c==null&&(c=a,a=null);return arguments.length>0?this.on(b,null,a,c):this.trigger(b)},f.attrFn&&(f.attrFn[b]=!0),C.test(b)&&(f.event.fixHooks[b]=f.event.keyHooks),D.test(b)&&(f.event.fixHooks[b]=f.event.mouseHooks)}),function(){function x(a,b,c,e,f,g){for(var h=0,i=e.length;h<i;h++){var j=e[h];if(j){var k=!1;j=j[a];while(j){if(j[d]===c){k=e[j.sizset];break}if(j.nodeType===1){g||(j[d]=c,j.sizset=h);if(typeof b!="string"){if(j===b){k=!0;break}}else if(m.filter(b,[j]).length>0){k=j;break}}j=j[a]}e[h]=k}}}function w(a,b,c,e,f,g){for(var h=0,i=e.length;h<i;h++){var j=e[h];if(j){var k=!1;j=j[a];while(j){if(j[d]===c){k=e[j.sizset];break}j.nodeType===1&&!g&&(j[d]=c,j.sizset=h);if(j.nodeName.toLowerCase()===b){k=j;break}j=j[a]}e[h]=k}}}var a=/((?:\((?:\([^()]+\)|[^()]+)+\)|\[(?:\[[^\[\]]*\]|['"][^'"]*['"]|[^\[\]'"]+)+\]|\\.|[^ >+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g,d="sizcache"+(Math.random()+"").replace(".",""),e=0,g=Object.prototype.toString,h=!1,i=!0,j=/\\/g,k=/\r\n/g,l=/\W/;[0,0].sort(function(){i=!1;return 0});var m=function(b,d,e,f){e=e||[],d=d||c;var h=d;if(d.nodeType!==1&&d.nodeType!==9)return[];if(!b||typeof b!="string")return e;var i,j,k,l,n,q,r,t,u=!0,v=m.isXML(d),w=[],x=b;do{a.exec(""),i=a.exec(x);if(i){x=i[3],w.push(i[1]);if(i[2]){l=i[3];break}}}while(i);if(w.length>1&&p.exec(b))if(w.length===2&&o.relative[w[0]])j=y(w[0]+w[1],d,f);else{j=o.relative[w[0]]?[d]:m(w.shift(),d);while(w.length)b=w.shift(),o.relative[b]&&(b+=w.shift()),j=y(b,j,f)}else{!f&&w.length>1&&d.nodeType===9&&!v&&o.match.ID.test(w[0])&&!o.match.ID.test(w[w.length-1])&&(n=m.find(w.shift(),d,v),d=n.expr?m.filter(n.expr,n.set)[0]:n.set[0]);if(d){n=f?{expr:w.pop(),set:s(f)}:m.find(w.pop(),w.length===1&&(w[0]==="~"||w[0]==="+")&&d.parentNode?d.parentNode:d,v),j=n.expr?m.filter(n.expr,n.set):n.set,w.length>0?k=s(j):u=!1;while(w.length)q=w.pop(),r=q,o.relative[q]?r=w.pop():q="",r==null&&(r=d),o.relative[q](k,r,v)}else k=w=[]}k||(k=j),k||m.error(q||b);if(g.call(k)==="[object Array]")if(!u)e.push.apply(e,k);else if(d&&d.nodeType===1)for(t=0;k[t]!=null;t++)k[t]&&(k[t]===!0||k[t].nodeType===1&&m.contains(d,k[t]))&&e.push(j[t]);else for(t=0;k[t]!=null;t++)k[t]&&k[t].nodeType===1&&e.push(j[t]);else s(k,e);l&&(m(l,h,e,f),m.uniqueSort(e));return e};m.uniqueSort=function(a){if(u){h=i,a.sort(u);if(h)for(var b=1;b<a.length;b++)a[b]===a[b-1]&&a.splice(b--,1)}return a},m.matches=function(a,b){return m(a,null,null,b)},m.matchesSelector=function(a,b){return m(b,null,null,[a]).length>0},m.find=function(a,b,c){var d,e,f,g,h,i;if(!a)return[];for(e=0,f=o.order.length;e<f;e++){h=o.order[e];if(g=o.leftMatch[h].exec(a)){i=g[1],g.splice(1,1);if(i.substr(i.length-1)!=="\\"){g[1]=(g[1]||"").replace(j,""),d=o.find[h](g,b,c);if(d!=null){a=a.replace(o.match[h],"");break}}}}d||(d=typeof b.getElementsByTagName!="undefined"?b.getElementsByTagName("*"):[]);return{set:d,expr:a}},m.filter=function(a,c,d,e){var f,g,h,i,j,k,l,n,p,q=a,r=[],s=c,t=c&&c[0]&&m.isXML(c[0]);while(a&&c.length){for(h in o.filter)if((f=o.leftMatch[h].exec(a))!=null&&f[2]){k=o.filter[h],l=f[1],g=!1,f.splice(1,1);if(l.substr(l.length-1)==="\\")continue;s===r&&(r=[]);if(o.preFilter[h]){f=o.preFilter[h](f,s,d,r,e,t);if(!f)g=i=!0;else if(f===!0)continue}if(f)for(n=0;(j=s[n])!=null;n++)j&&(i=k(j,f,n,s),p=e^i,d&&i!=null?p?g=!0:s[n]=!1:p&&(r.push(j),g=!0));if(i!==b){d||(s=r),a=a.replace(o.match[h],"");if(!g)return[];break}}if(a===q)if(g==null)m.error(a);else break;q=a}return s},m.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)};var n=m.getText=function(a){var b,c,d=a.nodeType,e="";if(d){if(d===1||d===9){if(typeof a.textContent=="string")return a.textContent;if(typeof a.innerText=="string")return a.innerText.replace(k,"");for(a=a.firstChild;a;a=a.nextSibling)e+=n(a)}else if(d===3||d===4)return a.nodeValue}else for(b=0;c=a[b];b++)c.nodeType!==8&&(e+=n(c));return e},o=m.selectors={order:["ID","NAME","TAG"],match:{ID:/#((?:[\w\u00c0-\uFFFF\-]|\\.)+)/,CLASS:/\.((?:[\w\u00c0-\uFFFF\-]|\\.)+)/,NAME:/\[name=['"]*((?:[\w\u00c0-\uFFFF\-]|\\.)+)['"]*\]/,ATTR:/\[\s*((?:[\w\u00c0-\uFFFF\-]|\\.)+)\s*(?:(\S?=)\s*(?:(['"])(.*?)\3|(#?(?:[\w\u00c0-\uFFFF\-]|\\.)*)|)|)\s*\]/,TAG:/^((?:[\w\u00c0-\uFFFF\*\-]|\\.)+)/,CHILD:/:(only|nth|last|first)-child(?:\(\s*(even|odd|(?:[+\-]?\d+|(?:[+\-]?\d*)?n\s*(?:[+\-]\s*\d+)?))\s*\))?/,POS:/:(nth|eq|gt|lt|first|last|even|odd)(?:\((\d*)\))?(?=[^\-]|$)/,PSEUDO:/:((?:[\w\u00c0-\uFFFF\-]|\\.)+)(?:\((['"]?)((?:\([^\)]+\)|[^\(\)]*)+)\2\))?/},leftMatch:{},attrMap:{"class":"className","for":"htmlFor"},attrHandle:{href:function(a){return a.getAttribute("href")},type:function(a){return a.getAttribute("type")}},relative:{"+":function(a,b){var c=typeof b=="string",d=c&&!l.test(b),e=c&&!d;d&&(b=b.toLowerCase());for(var f=0,g=a.length,h;f<g;f++)if(h=a[f]){while((h=h.previousSibling)&&h.nodeType!==1);a[f]=e||h&&h.nodeName.toLowerCase()===b?h||!1:h===b}e&&m.filter(b,a,!0)},">":function(a,b){var c,d=typeof b=="string",e=0,f=a.length;if(d&&!l.test(b)){b=b.toLowerCase();for(;e<f;e++){c=a[e];if(c){var g=c.parentNode;a[e]=g.nodeName.toLowerCase()===b?g:!1}}}else{for(;e<f;e++)c=a[e],c&&(a[e]=d?c.parentNode:c.parentNode===b);d
+&&m.filter(b,a,!0)}},"":function(a,b,c){var d,f=e++,g=x;typeof b=="string"&&!l.test(b)&&(b=b.toLowerCase(),d=b,g=w),g("parentNode",b,f,a,d,c)},"~":function(a,b,c){var d,f=e++,g=x;typeof b=="string"&&!l.test(b)&&(b=b.toLowerCase(),d=b,g=w),g("previousSibling",b,f,a,d,c)}},find:{ID:function(a,b,c){if(typeof b.getElementById!="undefined"&&!c){var d=b.getElementById(a[1]);return d&&d.parentNode?[d]:[]}},NAME:function(a,b){if(typeof b.getElementsByName!="undefined"){var c=[],d=b.getElementsByName(a[1]);for(var e=0,f=d.length;e<f;e++)d[e].getAttribute("name")===a[1]&&c.push(d[e]);return c.length===0?null:c}},TAG:function(a,b){if(typeof b.getElementsByTagName!="undefined")return b.getElementsByTagName(a[1])}},preFilter:{CLASS:function(a,b,c,d,e,f){a=" "+a[1].replace(j,"")+" ";if(f)return a;for(var g=0,h;(h=b[g])!=null;g++)h&&(e^(h.className&&(" "+h.className+" ").replace(/[\t\n\r]/g," ").indexOf(a)>=0)?c||d.push(h):c&&(b[g]=!1));return!1},ID:function(a){return a[1].replace(j,"")},TAG:function(a,b){return a[1].replace(j,"").toLowerCase()},CHILD:function(a){if(a[1]==="nth"){a[2]||m.error(a[0]),a[2]=a[2].replace(/^\+|\s*/g,"");var b=/(-?)(\d*)(?:n([+\-]?\d*))?/.exec(a[2]==="even"&&"2n"||a[2]==="odd"&&"2n+1"||!/\D/.test(a[2])&&"0n+"+a[2]||a[2]);a[2]=b[1]+(b[2]||1)-0,a[3]=b[3]-0}else a[2]&&m.error(a[0]);a[0]=e++;return a},ATTR:function(a,b,c,d,e,f){var g=a[1]=a[1].replace(j,"");!f&&o.attrMap[g]&&(a[1]=o.attrMap[g]),a[4]=(a[4]||a[5]||"").replace(j,""),a[2]==="~="&&(a[4]=" "+a[4]+" ");return a},PSEUDO:function(b,c,d,e,f){if(b[1]==="not")if((a.exec(b[3])||"").length>1||/^\w/.test(b[3]))b[3]=m(b[3],null,null,c);else{var g=m.filter(b[3],c,d,!0^f);d||e.push.apply(e,g);return!1}else if(o.match.POS.test(b[0])||o.match.CHILD.test(b[0]))return!0;return b},POS:function(a){a.unshift(!0);return a}},filters:{enabled:function(a){return a.disabled===!1&&a.type!=="hidden"},disabled:function(a){return a.disabled===!0},checked:function(a){return a.checked===!0},selected:function(a){a.parentNode&&a.parentNode.selectedIndex;return a.selected===!0},parent:function(a){return!!a.firstChild},empty:function(a){return!a.firstChild},has:function(a,b,c){return!!m(c[3],a).length},header:function(a){return/h\d/i.test(a.nodeName)},text:function(a){var b=a.getAttribute("type"),c=a.type;return a.nodeName.toLowerCase()==="input"&&"text"===c&&(b===c||b===null)},radio:function(a){return a.nodeName.toLowerCase()==="input"&&"radio"===a.type},checkbox:function(a){return a.nodeName.toLowerCase()==="input"&&"checkbox"===a.type},file:function(a){return a.nodeName.toLowerCase()==="input"&&"file"===a.type},password:function(a){return a.nodeName.toLowerCase()==="input"&&"password"===a.type},submit:function(a){var b=a.nodeName.toLowerCase();return(b==="input"||b==="button")&&"submit"===a.type},image:function(a){return a.nodeName.toLowerCase()==="input"&&"image"===a.type},reset:function(a){var b=a.nodeName.toLowerCase();return(b==="input"||b==="button")&&"reset"===a.type},button:function(a){var b=a.nodeName.toLowerCase();return b==="input"&&"button"===a.type||b==="button"},input:function(a){return/input|select|textarea|button/i.test(a.nodeName)},focus:function(a){return a===a.ownerDocument.activeElement}},setFilters:{first:function(a,b){return b===0},last:function(a,b,c,d){return b===d.length-1},even:function(a,b){return b%2===0},odd:function(a,b){return b%2===1},lt:function(a,b,c){return b<c[3]-0},gt:function(a,b,c){return b>c[3]-0},nth:function(a,b,c){return c[3]-0===b},eq:function(a,b,c){return c[3]-0===b}},filter:{PSEUDO:function(a,b,c,d){var e=b[1],f=o.filters[e];if(f)return f(a,c,b,d);if(e==="contains")return(a.textContent||a.innerText||n([a])||"").indexOf(b[3])>=0;if(e==="not"){var g=b[3];for(var h=0,i=g.length;h<i;h++)if(g[h]===a)return!1;return!0}m.error(e)},CHILD:function(a,b){var c,e,f,g,h,i,j,k=b[1],l=a;switch(k){case"only":case"first":while(l=l.previousSibling)if(l.nodeType===1)return!1;if(k==="first")return!0;l=a;case"last":while(l=l.nextSibling)if(l.nodeType===1)return!1;return!0;case"nth":c=b[2],e=b[3];if(c===1&&e===0)return!0;f=b[0],g=a.parentNode;if(g&&(g[d]!==f||!a.nodeIndex)){i=0;for(l=g.firstChild;l;l=l.nextSibling)l.nodeType===1&&(l.nodeIndex=++i);g[d]=f}j=a.nodeIndex-e;return c===0?j===0:j%c===0&&j/c>=0}},ID:function(a,b){return a.nodeType===1&&a.getAttribute("id")===b},TAG:function(a,b){return b==="*"&&a.nodeType===1||!!a.nodeName&&a.nodeName.toLowerCase()===b},CLASS:function(a,b){return(" "+(a.className||a.getAttribute("class"))+" ").indexOf(b)>-1},ATTR:function(a,b){var c=b[1],d=m.attr?m.attr(a,c):o.attrHandle[c]?o.attrHandle[c](a):a[c]!=null?a[c]:a.getAttribute(c),e=d+"",f=b[2],g=b[4];return d==null?f==="!=":!f&&m.attr?d!=null:f==="="?e===g:f==="*="?e.indexOf(g)>=0:f==="~="?(" "+e+" ").indexOf(g)>=0:g?f==="!="?e!==g:f==="^="?e.indexOf(g)===0:f==="$="?e.substr(e.length-g.length)===g:f==="|="?e===g||e.substr(0,g.length+1)===g+"-":!1:e&&d!==!1},POS:function(a,b,c,d){var e=b[2],f=o.setFilters[e];if(f)return f(a,c,b,d)}}},p=o.match.POS,q=function(a,b){return"\\"+(b-0+1)};for(var r in o.match)o.match[r]=new RegExp(o.match[r].source+/(?![^\[]*\])(?![^\(]*\))/.source),o.leftMatch[r]=new RegExp(/(^(?:.|\r|\n)*?)/.source+o.match[r].source.replace(/\\(\d+)/g,q));var s=function(a,b){a=Array.prototype.slice.call(a,0);if(b){b.push.apply(b,a);return b}return a};try{Array.prototype.slice.call(c.documentElement.childNodes,0)[0].nodeType}catch(t){s=function(a,b){var c=0,d=b||[];if(g.call(a)==="[object Array]")Array.prototype.push.apply(d,a);else if(typeof a.length=="number")for(var e=a.length;c<e;c++)d.push(a[c]);else for(;a[c];c++)d.push(a[c]);return d}}var u,v;c.documentElement.compareDocumentPosition?u=function(a,b){if(a===b){h=!0;return 0}if(!a.compareDocumentPosition||!b.compareDocumentPosition)return a.compareDocumentPosition?-1:1;return a.compareDocumentPosition(b)&4?-1:1}:(u=function(a,b){if(a===b){h=!0;return 0}if(a.sourceIndex&&b.sourceIndex)return a.sourceIndex-b.sourceIndex;var c,d,e=[],f=[],g=a.parentNode,i=b.parentNode,j=g;if(g===i)return v(a,b);if(!g)return-1;if(!i)return 1;while(j)e.unshift(j),j=j.parentNode;j=i;while(j)f.unshift(j),j=j.parentNode;c=e.length,d=f.length;for(var k=0;k<c&&k<d;k++)if(e[k]!==f[k])return v(e[k],f[k]);return k===c?v(a,f[k],-1):v(e[k],b,1)},v=function(a,b,c){if(a===b)return c;var d=a.nextSibling;while(d){if(d===b)return-1;d=d.nextSibling}return 1}),function(){var a=c.createElement("div"),d="script"+(new Date).getTime(),e=c.documentElement;a.innerHTML="<a name='"+d+"'/>",e.insertBefore(a,e.firstChild),c.getElementById(d)&&(o.find.ID=function(a,c,d){if(typeof c.getElementById!="undefined"&&!d){var e=c.getElementById(a[1]);return e?e.id===a[1]||typeof e.getAttributeNode!="undefined"&&e.getAttributeNode("id").nodeValue===a[1]?[e]:b:[]}},o.filter.ID=function(a,b){var c=typeof a.getAttributeNode!="undefined"&&a.getAttributeNode("id");return a.nodeType===1&&c&&c.nodeValue===b}),e.removeChild(a),e=a=null}(),function(){var a=c.createElement("div");a.appendChild(c.createComment("")),a.getElementsByTagName("*").length>0&&(o.find.TAG=function(a,b){var c=b.getElementsByTagName(a[1]);if(a[1]==="*"){var d=[];for(var e=0;c[e];e++)c[e].nodeType===1&&d.push(c[e]);c=d}return c}),a.innerHTML="<a href='#'></a>",a.firstChild&&typeof a.firstChild.getAttribute!="undefined"&&a.firstChild.getAttribute("href")!=="#"&&(o.attrHandle.href=function(a){return a.getAttribute("href",2)}),a=null}(),c.querySelectorAll&&function(){var a=m,b=c.createElement("div"),d="__sizzle__";b.innerHTML="<p class='TEST'></p>";if(!b.querySelectorAll||b.querySelectorAll(".TEST").length!==0){m=function(b,e,f,g){e=e||c;if(!g&&!m.isXML(e)){var h=/^(\w+$)|^\.([\w\-]+$)|^#([\w\-]+$)/.exec(b);if(h&&(e.nodeType===1||e.nodeType===9)){if(h[1])return s(e.getElementsByTagName(b),f);if(h[2]&&o.find.CLASS&&e.getElementsByClassName)return s(e.getElementsByClassName(h[2]),f)}if(e.nodeType===9){if(b==="body"&&e.body)return s([e.body],f);if(h&&h[3]){var i=e.getElementById(h[3]);if(!i||!i.parentNode)return s([],f);if(i.id===h[3])return s([i],f)}try{return s(e.querySelectorAll(b),f)}catch(j){}}else if(e.nodeType===1&&e.nodeName.toLowerCase()!=="object"){var k=e,l=e.getAttribute("id"),n=l||d,p=e.parentNode,q=/^\s*[+~]/.test(b);l?n=n.replace(/'/g,"\\$&"):e.setAttribute("id",n),q&&p&&(e=e.parentNode);try{if(!q||p)return s(e.querySelectorAll("[id='"+n+"'] "+b),f)}catch(r){}finally{l||k.removeAttribute("id")}}}return a(b,e,f,g)};for(var e in a)m[e]=a[e];b=null}}(),function(){var a=c.documentElement,b=a.matchesSelector||a.mozMatchesSelector||a.webkitMatchesSelector||a.msMatchesSelector;if(b){var d=!b.call(c.createElement("div"),"div"),e=!1;try{b.call(c.documentElement,"[test!='']:sizzle")}catch(f){e=!0}m.matchesSelector=function(a,c){c=c.replace(/\=\s*([^'"\]]*)\s*\]/g,"='$1']");if(!m.isXML(a))try{if(e||!o.match.PSEUDO.test(c)&&!/!=/.test(c)){var f=b.call(a,c);if(f||!d||a.document&&a.document.nodeType!==11)return f}}catch(g){}return m(c,null,null,[a]).length>0}}}(),function(){var a=c.createElement("div");a.innerHTML="<div class='test e'></div><div class='test'></div>";if(!!a.getElementsByClassName&&a.getElementsByClassName("e").length!==0){a.lastChild.className="e";if(a.getElementsByClassName("e").length===1)return;o.order.splice(1,0,"CLASS"),o.find.CLASS=function(a,b,c){if(typeof b.getElementsByClassName!="undefined"&&!c)return b.getElementsByClassName(a[1])},a=null}}(),c.documentElement.contains?m.contains=function(a,b){return a!==b&&(a.contains?a.contains(b):!0)}:c.documentElement.compareDocumentPosition?m.contains=function(a,b){return!!(a.compareDocumentPosition(b)&16)}:m.contains=function(){return!1},m.isXML=function(a){var b=(a?a.ownerDocument||a:0).documentElement;return b?b.nodeName!=="HTML":!1};var y=function(a,b,c){var d,e=[],f="",g=b.nodeType?[b]:b;while(d=o.match.PSEUDO.exec(a))f+=d[0],a=a.replace(o.match.PSEUDO,"");a=o.relative[a]?a+"*":a;for(var h=0,i=g.length;h<i;h++)m(a,g[h],e,c);return m.filter(f,e)};m.attr=f.attr,m.selectors.attrMap={},f.find=m,f.expr=m.selectors,f.expr[":"]=f.expr.filters,f.unique=m.uniqueSort,f.text=m.getText,f.isXMLDoc=m.isXML,f.contains=m.contains}();var L=/Until$/,M=/^(?:parents|prevUntil|prevAll)/,N=/,/,O=/^.[^:#\[\.,]*$/,P=Array.prototype.slice,Q=f.expr.match.POS,R={children:!0,contents:!0,next:!0,prev:!0};f.fn.extend({find:function(a){var b=this,c,d;if(typeof a!="string")return f(a).filter(function(){for(c=0,d=b.length;c<d;c++)if(f.contains(b[c],this))return!0});var e=this.pushStack("","find",a),g,h,i;for(c=0,d=this.length;c<d;c++){g=e.length,f.find(a,this[c],e);if(c>0)for(h=g;h<e.length;h++)for(i=0;i<g;i++)if(e[i]===e[h]){e.splice(h--,1);break}}return e},has:function(a){var b=f(a);return this.filter(function(){for(var a=0,c=b.length;a<c;a++)if(f.contains(this,b[a]))return!0})},not:function(a){return this.pushStack(T(this,a,!1),"not",a)},filter:function(a){return this.pushStack(T(this,a,!0),"filter",a)},is:function(a){return!!a&&(typeof a=="string"?Q.test(a)?f(a,this.context).index(this[0])>=0:f.filter(a,this).length>0:this.filter(a).length>0)},closest:function(a,b){var c=[],d,e,g=this[0];if(f.isArray(a)){var h=1;while(g&&g.ownerDocument&&g!==b){for(d=0;d<a.length;d++)f(g).is(a[d])&&c.push({selector:a[d],elem:g,level:h});g=g.parentNode,h++}return c}var i=Q.test(a)||typeof a!="string"?f(a,b||this.context):0;for(d=0,e=this.length;d<e;d++){g=this[d];while(g){if(i?i.index(g)>-1:f.find.matchesSelector(g,a)){c.push(g);break}g=g.parentNode;if(!g||!g.ownerDocument||g===b||g.nodeType===11)break}}c=c.length>1?f.unique(c):c;return this.pushStack(c,"closest",a)},index:function(a){if(!a)return this[0]&&this[0].parentNode?this.prevAll().length:-1;if(typeof a=="string")return f.inArray(this[0],f(a));return f.inArray(a.jquery?a[0]:a,this)},add:function(a,b){var c=typeof a=="string"?f(a,b):f.makeArray(a&&a.nodeType?[a]:a),d=f.merge(this.get(),c);return this.pushStack(S(c[0])||S(d[0])?d:f.unique(d))},andSelf:function(){return this.add(this.prevObject)}}),f.each({parent:function(a){var b=a.parentNode;return b&&b.nodeType!==11?b:null},parents:function(a){return f.dir(a,"parentNode")},parentsUntil:function(a,b,c){return f.dir(a,"parentNode",c)},next:function(a){return f.nth(a,2,"nextSibling")},prev:function(a){return f.nth(a,2,"previousSibling")},nextAll:function(a){return f.dir(a,"nextSibling")},prevAll:function(a){return f.dir(a,"previousSibling")},nextUntil:function(a,b,c){return f.dir(a,"nextSibling",c)},prevUntil:function(a,b,c){return f.dir(a,"previousSibling",c)},siblings:function(a){return f.sibling(a.parentNode.firstChild,a)},children:function(a){return f.sibling(a.firstChild)},contents:function(a){return f.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:f.makeArray(a.childNodes)}},function(a,b){f.fn[a]=function(c,d){var e=f.map(this,b,c);L.test(a)||(d=c),d&&typeof d=="string"&&(e=f.filter(d,e)),e=this.length>1&&!R[a]?f.unique(e):e,(this.length>1||N.test(d))&&M.test(a)&&(e=e.reverse());return this.pushStack(e,a,P.call(arguments).join(","))}}),f.extend({filter:function(a,b,c){c&&(a=":not("+a+")");return b.length===1?f.find.matchesSelector(b[0],a)?[b[0]]:[]:f.find.matches(a,b)},dir:function(a,c,d){var e=[],g=a[c];while(g&&g.nodeType!==9&&(d===b||g.nodeType!==1||!f(g).is(d)))g.nodeType===1&&e.push(g),g=g[c];return e},nth:function(a,b,c,d){b=b||1;var e=0;for(;a;a=a[c])if(a.nodeType===1&&++e===b)break;return a},sibling:function(a,b){var c=[];for(;a;a=a.nextSibling)a.nodeType===1&&a!==b&&c.push(a);return c}});var V="abbr|article|aside|audio|canvas|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",W=/ jQuery\d+="(?:\d+|null)"/g,X=/^\s+/,Y=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/ig,Z=/<([\w:]+)/,$=/<tbody/i,_=/<|&#?\w+;/,ba=/<(?:script|style)/i,bb=/<(?:script|object|embed|option|style)/i,bc=new RegExp("<(?:"+V+")","i"),bd=/checked\s*(?:[^=]|=\s*.checked.)/i,be=/\/(java|ecma)script/i,bf=/^\s*<!(?:\[CDATA\[|\-\-)/,bg={option:[1,"<select multiple='multiple'>","</select>"],legend:[1,"<fieldset>","</fieldset>"],thead:[1,"<table>","</table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],col:[2,"<table><tbody></tbody><colgroup>","</colgroup></table>"],area:[1,"<map>","</map>"],_default:[0,"",""]},bh=U(c);bg.optgroup=bg.option,bg.tbody=bg.tfoot=bg.colgroup=bg.caption=bg.thead,bg.th=bg.td,f.support.htmlSerialize||(bg._default=[1,"div<div>","</div>"]),f.fn.extend({text:function(a){if(f.isFunction(a))return this.each(function(b){var c=f(this);c.text(a.call(this,b,c.text()))});if(typeof a!="object"&&a!==b)return this.empty().append((this[0]&&this[0].ownerDocument||c).createTextNode(a));return f.text(this)},wrapAll:function(a){if(f.isFunction(a))return this.each(function(b){f(this).wrapAll(a.call(this,b))});if(this[0]){var b=f(a,this[0].ownerDocument).eq(0).clone(!0);this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstChild&&a.firstChild.nodeType===1)a=a.firstChild;return a}).append(this)}return this},wrapInner:function(a){if(f.isFunction(a))return this.each(function(b){f(this).wrapInner(a.call(this,b))});return this.each(function(){var b=f(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=f.isFunction(a);return this.each(function(c){f(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){f.nodeName(this,"body")||f(this).replaceWith(this.childNodes)}).end()},append:function(){return this.domManip(arguments,!0,function(a){this.nodeType===1&&this.appendChild(a)})},prepend:function(){return this.domManip(arguments,!0,function(a){this.nodeType===1&&this.insertBefore(a,this.firstChild)})},before:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,!1,function(a){this.parentNode.insertBefore(a,this)});if(arguments.length){var a=f.clean(arguments);a.push.apply(a,this.toArray());return this.pushStack(a,"before",arguments)}},after:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,!1,function(a){this.parentNode.insertBefore(a,this.nextSibling)});if(arguments.length){var a=this.pushStack(this,"after",arguments);a.push.apply(a,f.clean(arguments));return a}},remove:function(a,b){for(var c=0,d;(d=this[c])!=null;c++)if(!a||f.filter(a,[d]).length)!b&&d.nodeType===1&&(f.cleanData(d.getElementsByTagName("*")),
+f.cleanData([d])),d.parentNode&&d.parentNode.removeChild(d);return this},empty:function()
+{for(var a=0,b;(b=this[a])!=null;a++){b.nodeType===1&&f.cleanData(b.getElementsByTagName("*"));while(b.firstChild)b.removeChild(b.firstChild)}return this},clone:function(a,b){a=a==null?!1:a,b=b==null?a:b;return this.map(function(){return f.clone(this,a,b)})},html:function(a){if(a===b)return this[0]&&this[0].nodeType===1?this[0].innerHTML.replace(W,""):null;if(typeof a=="string"&&!ba.test(a)&&(f.support.leadingWhitespace||!X.test(a))&&!bg[(Z.exec(a)||["",""])[1].toLowerCase()]){a=a.replace(Y,"<$1></$2>");try{for(var c=0,d=this.length;c<d;c++)this[c].nodeType===1&&(f.cleanData(this[c].getElementsByTagName("*")),this[c].innerHTML=a)}catch(e){this.empty().append(a)}}else f.isFunction(a)?this.each(function(b){var c=f(this);c.html(a.call(this,b,c.html()))}):this.empty().append(a);return this},replaceWith:function(a){if(this[0]&&this[0].parentNode){if(f.isFunction(a))return this.each(function(b){var c=f(this),d=c.html();c.replaceWith(a.call(this,b,d))});typeof a!="string"&&(a=f(a).detach());return this.each(function(){var b=this.nextSibling,c=this.parentNode;f(this).remove(),b?f(b).before(a):f(c).append(a)})}return this.length?this.pushStack(f(f.isFunction(a)?a():a),"replaceWith",a):this},detach:function(a){return this.remove(a,!0)},domManip:function(a,c,d){var e,g,h,i,j=a[0],k=[];if(!f.support.checkClone&&arguments.length===3&&typeof j=="string"&&bd.test(j))return this.each(function(){f(this).domManip(a,c,d,!0)});if(f.isFunction(j))return this.each(function(e){var g=f(this);a[0]=j.call(this,e,c?g.html():b),g.domManip(a,c,d)});if(this[0]){i=j&&j.parentNode,f.support.parentNode&&i&&i.nodeType===11&&i.childNodes.length===this.length?e={fragment:i}:e=f.buildFragment(a,this,k),h=e.fragment,h.childNodes.length===1?g=h=h.firstChild:g=h.firstChild;if(g){c=c&&f.nodeName(g,"tr");for(var l=0,m=this.length,n=m-1;l<m;l++)d.call(c?bi(this[l],g):this[l],e.cacheable||m>1&&l<n?f.clone(h,!0,!0):h)}k.length&&f.each(k,bp)}return this}}),f.buildFragment=function(a,b,d){var e,g,h,i,j=a[0];b&&b[0]&&(i=b[0].ownerDocument||b[0]),i.createDocumentFragment||(i=c),a.length===1&&typeof j=="string"&&j.length<512&&i===c&&j.charAt(0)==="<"&&!bb.test(j)&&(f.support.checkClone||!bd.test(j))&&(f.support.html5Clone||!bc.test(j))&&(g=!0,h=f.fragments[j],h&&h!==1&&(e=h)),e||(e=i.createDocumentFragment(),f.clean(a,i,e,d)),g&&(f.fragments[j]=h?e:1);return{fragment:e,cacheable:g}},f.fragments={},f.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){f.fn[a]=function(c){var d=[],e=f(c),g=this.length===1&&this[0].parentNode;if(g&&g.nodeType===11&&g.childNodes.length===1&&e.length===1){e[b](this[0]);return this}for(var h=0,i=e.length;h<i;h++){var j=(h>0?this.clone(!0):this).get();f(e[h])[b](j),d=d.concat(j)}return this.pushStack(d,a,e.selector)}}),f.extend({clone:function(a,b,c){var d,e,g,h=f.support.html5Clone||!bc.test("<"+a.nodeName)?a.cloneNode(!0):bo(a);if((!f.support.noCloneEvent||!f.support.noCloneChecked)&&(a.nodeType===1||a.nodeType===11)&&!f.isXMLDoc(a)){bk(a,h),d=bl(a),e=bl(h);for(g=0;d[g];++g)e[g]&&bk(d[g],e[g])}if(b){bj(a,h);if(c){d=bl(a),e=bl(h);for(g=0;d[g];++g)bj(d[g],e[g])}}d=e=null;return h},clean:function(a,b,d,e){var g;b=b||c,typeof b.createElement=="undefined"&&(b=b.ownerDocument||b[0]&&b[0].ownerDocument||c);var h=[],i;for(var j=0,k;(k=a[j])!=null;j++){typeof k=="number"&&(k+="");if(!k)continue;if(typeof k=="string")if(!_.test(k))k=b.createTextNode(k);else{k=k.replace(Y,"<$1></$2>");var l=(Z.exec(k)||["",""])[1].toLowerCase(),m=bg[l]||bg._default,n=m[0],o=b.createElement("div");b===c?bh.appendChild(o):U(b).appendChild(o),o.innerHTML=m[1]+k+m[2];while(n--)o=o.lastChild;if(!f.support.tbody){var p=$.test(k),q=l==="table"&&!p?o.firstChild&&o.firstChild.childNodes:m[1]==="<table>"&&!p?o.childNodes:[];for(i=q.length-1;i>=0;--i)f.nodeName(q[i],"tbody")&&!q[i].childNodes.length&&q[i].parentNode.removeChild(q[i])}!f.support.leadingWhitespace&&X.test(k)&&o.insertBefore(b.createTextNode(X.exec(k)[0]),o.firstChild),k=o.childNodes}var r;if(!f.support.appendChecked)if(k[0]&&typeof (r=k.length)=="number")for(i=0;i<r;i++)bn(k[i]);else bn(k);k.nodeType?h.push(k):h=f.merge(h,k)}if(d){g=function(a){return!a.type||be.test(a.type)};for(j=0;h[j];j++)if(e&&f.nodeName(h[j],"script")&&(!h[j].type||h[j].type.toLowerCase()==="text/javascript"))e.push(h[j].parentNode?h[j].parentNode.removeChild(h[j]):h[j]);else{if(h[j].nodeType===1){var s=f.grep(h[j].getElementsByTagName("script"),g);h.splice.apply(h,[j+1,0].concat(s))}d.appendChild(h[j])}}return h},cleanData:function(a){var b,c,d=f.cache,e=f.event.special,g=f.support.deleteExpando;for(var h=0,i;(i=a[h])!=null;h++){if(i.nodeName&&f.noData[i.nodeName.toLowerCase()])continue;c=i[f.expando];if(c){b=d[c];if(b&&b.events){for(var j in b.events)e[j]?f.event.remove(i,j):f.removeEvent(i,j,b.handle);b.handle&&(b.handle.elem=null)}g?delete i[f.expando]:i.removeAttribute&&i.removeAttribute(f.expando),delete d[c]}}}});var bq=/alpha\([^)]*\)/i,br=/opacity=([^)]*)/,bs=/([A-Z]|^ms)/g,bt=/^-?\d+(?:px)?$/i,bu=/^-?\d/,bv=/^([\-+])=([\-+.\de]+)/,bw={position:"absolute",visibility:"hidden",display:"block"},bx=["Left","Right"],by=["Top","Bottom"],bz,bA,bB;f.fn.css=function(a,c){if(arguments.length===2&&c===b)return this;return f.access(this,a,c,!0,function(a,c,d){return d!==b?f.style(a,c,d):f.css(a,c)})},f.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=bz(a,"opacity","opacity");return c===""?"1":c}return a.style.opacity}}},cssNumber:{fillOpacity:!0,fontWeight:!0,lineHeight:!0,opacity:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":f.support.cssFloat?"cssFloat":"styleFloat"},style:function(a,c,d,e){if(!!a&&a.nodeType!==3&&a.nodeType!==8&&!!a.style){var g,h,i=f.camelCase(c),j=a.style,k=f.cssHooks[i];c=f.cssProps[i]||i;if(d===b){if(k&&"get"in k&&(g=k.get(a,!1,e))!==b)return g;return j[c]}h=typeof d,h==="string"&&(g=bv.exec(d))&&(d=+(g[1]+1)*+g[2]+parseFloat(f.css(a,c)),h="number");if(d==null||h==="number"&&isNaN(d))return;h==="number"&&!f.cssNumber[i]&&(d+="px");if(!k||!("set"in k)||(d=k.set(a,d))!==b)try{j[c]=d}catch(l){}}},css:function(a,c,d){var e,g;c=f.camelCase(c),g=f.cssHooks[c],c=f.cssProps[c]||c,c==="cssFloat"&&(c="float");if(g&&"get"in g&&(e=g.get(a,!0,d))!==b)return e;if(bz)return bz(a,c)},swap:function(a,b,c){var d={};for(var e in b)d[e]=a.style[e],a.style[e]=b[e];c.call(a);for(e in b)a.style[e]=d[e]}}),f.curCSS=f.css,f.each(["height","width"],function(a,b){f.cssHooks[b]={get:function(a,c,d){var e;if(c){if(a.offsetWidth!==0)return bC(a,b,d);f.swap(a,bw,function(){e=bC(a,b,d)});return e}},set:function(a,b){if(!bt.test(b))return b;b=parseFloat(b);if(b>=0)return b+"px"}}}),f.support.opacity||(f.cssHooks.opacity={get:function(a,b){return br.test((b&&a.currentStyle?a.currentStyle.filter:a.style.filter)||"")?parseFloat(RegExp.$1)/100+"":b?"1":""},set:function(a,b){var c=a.style,d=a.currentStyle,e=f.isNumeric(b)?"alpha(opacity="+b*100+")":"",g=d&&d.filter||c.filter||"";c.zoom=1;if(b>=1&&f.trim(g.replace(bq,""))===""){c.removeAttribute("filter");if(d&&!d.filter)return}c.filter=bq.test(g)?g.replace(bq,e):g+" "+e}}),f(function(){f.support.reliableMarginRight||(f.cssHooks.marginRight={get:function(a,b){var c;f.swap(a,{display:"inline-block"},function(){b?c=bz(a,"margin-right","marginRight"):c=a.style.marginRight});return c}})}),c.defaultView&&c.defaultView.getComputedStyle&&(bA=function(a,b){var c,d,e;b=b.replace(bs,"-$1").toLowerCase(),(d=a.ownerDocument.defaultView)&&(e=d.getComputedStyle(a,null))&&(c=e.getPropertyValue(b),c===""&&!f.contains(a.ownerDocument.documentElement,a)&&(c=f.style(a,b)));return c}),c.documentElement.currentStyle&&(bB=function(a,b){var c,d,e,f=a.currentStyle&&a.currentStyle[b],g=a.style;f===null&&g&&(e=g[b])&&(f=e),!bt.test(f)&&bu.test(f)&&(c=g.left,d=a.runtimeStyle&&a.runtimeStyle.left,d&&(a.runtimeStyle.left=a.currentStyle.left),g.left=b==="fontSize"?"1em":f||0,f=g.pixelLeft+"px",g.left=c,d&&(a.runtimeStyle.left=d));return f===""?"auto":f}),bz=bA||bB,f.expr&&f.expr.filters&&(f.expr.filters.hidden=function(a){var b=a.offsetWidth,c=a.offsetHeight;return b===0&&c===0||!f.support.reliableHiddenOffsets&&(a.style&&a.style.display||f.css(a,"display"))==="none"},f.expr.filters.visible=function(a){return!f.expr.filters.hidden(a)});var bD=/%20/g,bE=/\[\]$/,bF=/\r?\n/g,bG=/#.*$/,bH=/^(.*?):[ \t]*([^\r\n]*)\r?$/mg,bI=/^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i,bJ=/^(?:about|app|app\-storage|.+\-extension|file|res|widget):$/,bK=/^(?:GET|HEAD)$/,bL=/^\/\//,bM=/\?/,bN=/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi,bO=/^(?:select|textarea)/i,bP=/\s+/,bQ=/([?&])_=[^&]*/,bR=/^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+))?)?/,bS=f.fn.load,bT={},bU={},bV,bW,bX=["*/"]+["*"];try{bV=e.href}catch(bY){bV=c.createElement("a"),bV.href="",bV=bV.href}bW=bR.exec(bV.toLowerCase())||[],f.fn.extend({load:function(a,c,d){if(typeof a!="string"&&bS)return bS.apply(this,arguments);if(!this.length)return this;var e=a.indexOf(" ");if(e>=0){var g=a.slice(e,a.length);a=a.slice(0,e)}var h="GET";c&&(f.isFunction(c)?(d=c,c=b):typeof c=="object"&&(c=f.param(c,f.ajaxSettings.traditional),h="POST"));var i=this;f.ajax({url:a,type:h,dataType:"html",data:c,complete:function(a,b,c){c=a.responseText,a.isResolved()&&(a.done(function(a){c=a}),i.html(g?f("<div>").append(c.replace(bN,"")).find(g):c)),d&&i.each(d,[c,b,a])}});return this},serialize:function(){return f.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?f.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||bO.test(this.nodeName)||bI.test(this.type))}).map(function(a,b){var c=f(this).val();return c==null?null:f.isArray(c)?f.map(c,function(a,c){return{name:b.name,value:a.replace(bF,"\r\n")}}):{name:b.name,value:c.replace(bF,"\r\n")}}).get()}}),f.each("ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(" "),function(a,b){f.fn[b]=function(a){return this.on(b,a)}}),f.each(["get","post"],function(a,c){f[c]=function(a,d,e,g){f.isFunction(d)&&(g=g||e,e=d,d=b);return f.ajax({type:c,url:a,data:d,success:e,dataType:g})}}),f.extend({getScript:function(a,c){return f.get(a,b,c,"script")},getJSON:function(a,b,c){return f.get(a,b,c,"json")},ajaxSetup:function(a,b){b?b_(a,f.ajaxSettings):(b=a,a=f.ajaxSettings),b_(a,b);return a},ajaxSettings:{url:bV,isLocal:bJ.test(bW[1]),global:!0,type:"GET",contentType:"application/x-www-form-urlencoded",processData:!0,async:!0,accepts:{xml:"application/xml, text/xml",html:"text/html",text:"text/plain",json:"application/json, text/javascript","*":bX},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText"},converters:{"* text":a.String,"text html":!0,"text json":f.parseJSON,"text xml":f.parseXML},flatOptions:{context:!0,url:!0}},ajaxPrefilter:bZ(bT),ajaxTransport:bZ(bU),ajax:function(a,c){function w(a,c,l,m){if(s!==2){s=2,q&&clearTimeout(q),p=b,n=m||"",v.readyState=a>0?4:0;var o,r,u,w=c,x=l?cb(d,v,l):b,y,z;if(a>=200&&a<300||a===304){if(d.ifModified){if(y=v.getResponseHeader("Last-Modified"))f.lastModified[k]=y;if(z=v.getResponseHeader("Etag"))f.etag[k]=z}if(a===304)w="notmodified",o=!0;else try{r=cc(d,x),w="success",o=!0}catch(A){w="parsererror",u=A}}else{u=w;if(!w||a)w="error",a<0&&(a=0)}v.status=a,v.statusText=""+(c||w),o?h.resolveWith(e,[r,w,v]):h.rejectWith(e,[v,w,u]),v.statusCode(j),j=b,t&&g.trigger("ajax"+(o?"Success":"Error"),[v,d,o?r:u]),i.fireWith(e,[v,w]),t&&(g.trigger("ajaxComplete",[v,d]),--f.active||f.event.trigger("ajaxStop"))}}typeof a=="object"&&(c=a,a=b),c=c||{};var d=f.ajaxSetup({},c),e=d.context||d,g=e!==d&&(e.nodeType||e instanceof f)?f(e):f.event,h=f.Deferred(),i=f.Callbacks("once memory"),j=d.statusCode||{},k,l={},m={},n,o,p,q,r,s=0,t,u,v={readyState:0,setRequestHeader:function(a,b){if(!s){var c=a.toLowerCase();a=m[c]=m[c]||a,l[a]=b}return this},getAllResponseHeaders:function(){return s===2?n:null},getResponseHeader:function(a){var c;if(s===2){if(!o){o={};while(c=bH.exec(n))o[c[1].toLowerCase()]=c[2]}c=o[a.toLowerCase()]}return c===b?null:c},overrideMimeType:function(a){s||(d.mimeType=a);return this},abort:function(a){a=a||"abort",p&&p.abort(a),w(0,a);return this}};h.promise(v),v.success=v.done,v.error=v.fail,v.complete=i.add,v.statusCode=function(a){if(a){var b;if(s<2)for(b in a)j[b]=[j[b],a[b]];else b=a[v.status],v.then(b,b)}return this},d.url=((a||d.url)+"").replace(bG,"").replace(bL,bW[1]+"//"),d.dataTypes=f.trim(d.dataType||"*").toLowerCase().split(bP),d.crossDomain==null&&(r=bR.exec(d.url.toLowerCase()),d.crossDomain=!(!r||r[1]==bW[1]&&r[2]==bW[2]&&(r[3]||(r[1]==="http:"?80:443))==(bW[3]||(bW[1]==="http:"?80:443)))),d.data&&d.processData&&typeof d.data!="string"&&(d.data=f.param(d.data,d.traditional)),b$(bT,d,c,v);if(s===2)return!1;t=d.global,d.type=d.type.toUpperCase(),d.hasContent=!bK.test(d.type),t&&f.active++===0&&f.event.trigger("ajaxStart");if(!d.hasContent){d.data&&(d.url+=(bM.test(d.url)?"&":"?")+d.data,delete d.data),k=d.url;if(d.cache===!1){var x=f.now(),y=d.url.replace(bQ,"$1_="+x);d.url=y+(y===d.url?(bM.test(d.url)?"&":"?")+"_="+x:"")}}(d.data&&d.hasContent&&d.contentType!==!1||c.contentType)&&v.setRequestHeader("Content-Type",d.contentType),d.ifModified&&(k=k||d.url,f.lastModified[k]&&v.setRequestHeader("If-Modified-Since",f.lastModified[k]),f.etag[k]&&v.setRequestHeader("If-None-Match",f.etag[k])),v.setRequestHeader("Accept",d.dataTypes[0]&&d.accepts[d.dataTypes[0]]?d.accepts[d.dataTypes[0]]+(d.dataTypes[0]!=="*"?", "+bX+"; q=0.01":""):d.accepts["*"]);for(u in d.headers)v.setRequestHeader(u,d.headers[u]);if(d.beforeSend&&(d.beforeSend.call(e,v,d)===!1||s===2)){v.abort();return!1}for(u in{success:1,error:1,complete:1})v[u](d[u]);p=b$(bU,d,c,v);if(!p)w(-1,"No Transport");else{v.readyState=1,t&&g.trigger("ajaxSend",[v,d]),d.async&&d.timeout>0&&(q=setTimeout(function(){v.abort("timeout")},d.timeout));try{s=1,p.send(l,w)}catch(z){if(s<2)w(-1,z);else throw z}}return v},param:function(a,c){var d=[],e=function(a,b){b=f.isFunction(b)?b():b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};c===b&&(c=f.ajaxSettings.traditional);if(f.isArray(a)||a.jquery&&!f.isPlainObject(a))f.each(a,function(){e(this.name,this.value)});else for(var g in a)ca(g,a[g],c,e);return d.join("&").replace(bD,"+")}}),f.extend({active:0,lastModified:{},etag:{}});var cd=f.now(),ce=/(\=)\?(&|$)|\?\?/i;f.ajaxSetup({jsonp:"callback",jsonpCallback:function(){return f.expando+"_"+cd++}}),f.ajaxPrefilter("json jsonp",function(b,c,d){var e=b.contentType==="application/x-www-form-urlencoded"&&typeof b.data=="string";if(b.dataTypes[0]==="jsonp"||b.jsonp!==!1&&(ce.test(b.url)||e&&ce.test(b.data))){var g,h=b.jsonpCallback=f.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,i=a[h],j=b.url,k=b.data,l="$1"+h+"$2";b.jsonp!==!1&&(j=j.replace(ce,l),b.url===j&&(e&&(k=k.replace(ce,l)),b.data===k&&(j+=(/\?/.test(j)?"&":"?")+b.jsonp+"="+h))),b.url=j,b.data=k,a[h]=function(a){g=[a]},d.always(function(){a[h]=i,g&&f.isFunction(i)&&a[h](g[0])}),b.converters["script json"]=function(){g||f.error(h+" was not called");return g[0]},b.dataTypes[0]="json";return"script"}}),f.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/javascript|ecmascript/},converters:{"text script":function(a){f.globalEval(a);return a}}}),f.ajaxPrefilter("script",function(a){a.cache===b&&(a.cache=!1),a.crossDomain&&(a.type="GET",a.global=!1)}),f.ajaxTransport("script",function(a){if(a.crossDomain){var d,e=c.head||c.getElementsByTagName("head")[0]||c.documentElement;return{send:function(f,g){d=c.createElement("script"),d.async="async",a.scriptCharset&&(d.charset=a.scriptCharset),d.src=a.url,d.onload=d.onreadystatechange=function(a,c){if(c||!d.readyState||/loaded|complete/.test(d.readyState))d.onload=d.onreadystatechange=null,e&&d.parentNode&&e.removeChild(d),d=b,c||g(200,"success")},e.insertBefore(d,e.firstChild)},abort:function(){d&&d.onload(0,1)}}}});var cf=a.ActiveXObject?function(){for(var a in ch)ch[a](0,1)}:!1,cg=0,ch;f.ajaxSettings.xhr=a.ActiveXObject?function(){return!this.isLocal&&ci()||cj()}:ci,function(a){f.extend(f.support,{ajax:!!a,cors:!!a&&"withCredentials"in a})}(f.ajaxSettings.xhr()),f.support.ajax&&f.ajaxTransport(function(c)
+{if(!c.crossDomain||f.support.cors){var d;return{send:function(e,g){var h=c.xhr(),i,j;c.username?h.open(c.type,c.url,c.async,c.username,c.password):h.open(c.type,c.url,c.async);if(c.xhrFields)for(j in c.xhrFields)h[j]=c.xhrFields[j];c.mimeType&&h.overrideMimeType&&h.overrideMimeType(c.mimeType),!c.crossDomain&&!e["X-Requested-With"]&&(e["X-Requested-With"]="XMLHttpRequest");try{for(j in e)h.setRequestHeader(j,e[j])}catch(k){}h.send(c.hasContent&&c.data||null),d=function(a,e){var j,k,l,m,n;try{if(d&&(e||h.readyState===4)){d=b,i&&(h.onreadystatechange=f.noop,cf&&delete ch[i]);if(e)h.readyState!==4&&h.abort();else{j=h.status,l=h.getAllResponseHeaders(),m={},n=h.responseXML,n&&n.documentElement&&(m.xml=n),m.text=h.responseText;try{k=h.statusText}catch(o){k=""}!j&&c.isLocal&&!c.crossDomain?j=m.text?200:404:j===1223&&(j=204)}}}catch(p){e||g(-1,p)}m&&g(j,k,m,l)},!c.async||h.readyState===4?d():(i=++cg,cf&&(ch||(ch={},f(a).unload(cf)),ch[i]=d),h.onreadystatechange=d)},abort:function(){d&&d(0,1)}}}});var ck={},cl,cm,cn=/^(?:toggle|show|hide)$/,co=/^([+\-]=)?([\d+.\-]+)([a-z%]*)$/i,cp,cq=[["height","marginTop","marginBottom","paddingTop","paddingBottom"],["width","marginLeft","marginRight","paddingLeft","paddingRight"],["opacity"]],cr;f.fn.extend({show:function(a,b,c){var d,e;if(a||a===0)return this.animate(cu("show",3),a,b,c);for(var g=0,h=this.length;g<h;g++)d=this[g],d.style&&(e=d.style.display,!f._data(d,"olddisplay")&&e==="none"&&(e=d.style.display=""),e===""&&f.css(d,"display")==="none"&&f._data(d,"olddisplay",cv(d.nodeName)));for(g=0;g<h;g++){d=this[g];if(d.style){e=d.style.display;if(e===""||e==="none")d.style.display=f._data(d,"olddisplay")||""}}return this},hide:function(a,b,c){if(a||a===0)return this.animate(cu("hide",3),a,b,c);var d,e,g=0,h=this.length;for(;g<h;g++)d=this[g],d.style&&(e=f.css(d,"display"),e!=="none"&&!f._data(d,"olddisplay")&&f._data(d,"olddisplay",e));for(g=0;g<h;g++)this[g].style&&(this[g].style.display="none");return this},_toggle:f.fn.toggle,toggle:function(a,b,c){var d=typeof a=="boolean";f.isFunction(a)&&f.isFunction(b)?this._toggle.apply(this,arguments):a==null||d?this.each(function(){var b=d?a:f(this).is(":hidden");f(this)[b?"show":"hide"]()}):this.animate(cu("toggle",3),a,b,c);return this},fadeTo:function(a,b,c,d){return this.filter(":hidden").css("opacity",0).show().end().animate({opacity:b},a,c,d)},animate:function(a,b,c,d){function g(){e.queue===!1&&f._mark(this);var b=f.extend({},e),c=this.nodeType===1,d=c&&f(this).is(":hidden"),g,h,i,j,k,l,m,n,o;b.animatedProperties={};for(i in a){g=f.camelCase(i),i!==g&&(a[g]=a[i],delete a[i]),h=a[g],f.isArray(h)?(b.animatedProperties[g]=h[1],h=a[g]=h[0]):b.animatedProperties[g]=b.specialEasing&&b.specialEasing[g]||b.easing||"swing";if(h==="hide"&&d||h==="show"&&!d)return b.complete.call(this);c&&(g==="height"||g==="width")&&(b.overflow=[this.style.overflow,this.style.overflowX,this.style.overflowY],f.css(this,"display")==="inline"&&f.css(this,"float")==="none"&&(!f.support.inlineBlockNeedsLayout||cv(this.nodeName)==="inline"?this.style.display="inline-block":this.style.zoom=1))}b.overflow!=null&&(this.style.overflow="hidden");for(i in a)j=new f.fx(this,b,i),h=a[i],cn.test(h)?(o=f._data(this,"toggle"+i)||(h==="toggle"?d?"show":"hide":0),o?(f._data(this,"toggle"+i,o==="show"?"hide":"show"),j[o]()):j[h]()):(k=co.exec(h),l=j.cur(),k?(m=parseFloat(k[2]),n=k[3]||(f.cssNumber[i]?"":"px"),n!=="px"&&(f.style(this,i,(m||1)+n),l=(m||1)/j.cur()*l,f.style(this,i,l+n)),k[1]&&(m=(k[1]==="-="?-1:1)*m+l),j.custom(l,m,n)):j.custom(l,h,""));return!0}var e=f.speed(b,c,d);if(f.isEmptyObject(a))return this.each(e.complete,[!1]);a=f.extend({},a);return e.queue===!1?this.each(g):this.queue(e.queue,g)},stop:function(a,c,d){typeof a!="string"&&(d=c,c=a,a=b),c&&a!==!1&&this.queue(a||"fx",[]);return this.each(function(){function h(a,b,c){var e=b[c];f.removeData(a,c,!0),e.stop(d)}var b,c=!1,e=f.timers,g=f._data(this);d||f._unmark(!0,this);if(a==null)for(b in g)g[b]&&g[b].stop&&b.indexOf(".run")===b.length-4&&h(this,g,b);else g[b=a+".run"]&&g[b].stop&&h(this,g,b);for(b=e.length;b--;)e[b].elem===this&&(a==null||e[b].queue===a)&&(d?e[b](!0):e[b].saveState(),c=!0,e.splice(b,1));(!d||!c)&&f.dequeue(this,a)})}}),f.each({slideDown:cu("show",1),slideUp:cu("hide",1),slideToggle:cu("toggle",1),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(a,b){f.fn[a]=function(a,c,d){return this.animate(b,a,c,d)}}),f.extend({speed:function(a,b,c){var d=a&&typeof a=="object"?f.extend({},a):{complete:c||!c&&b||f.isFunction(a)&&a,duration:a,easing:c&&b||b&&!f.isFunction(b)&&b};d.duration=f.fx.off?0:typeof d.duration=="number"?d.duration:d.duration in f.fx.speeds?f.fx.speeds[d.duration]:f.fx.speeds._default;if(d.queue==null||d.queue===!0)d.queue="fx";d.old=d.complete,d.complete=function(a){f.isFunction(d.old)&&d.old.call(this),d.queue?f.dequeue(this,d.queue):a!==!1&&f._unmark(this)};return d},easing:{linear:function(a,b,c,d){return c+d*a},swing:function(a,b,c,d){return(-Math.cos(a*Math.PI)/2+.5)*d+c}},timers:[],fx:function(a,b,c){this.options=b,this.elem=a,this.prop=c,b.orig=b.orig||{}}}),f.fx.prototype={update:function(){this.options.step&&this.options.step.call(this.elem,this.now,this),(f.fx.step[this.prop]||f.fx.step._default)(this)},cur:function(){if(this.elem[this.prop]!=null&&(!this.elem.style||this.elem.style[this.prop]==null))return this.elem[this.prop];var a,b=f.css(this.elem,this.prop);return isNaN(a=parseFloat(b))?!b||b==="auto"?0:b:a},custom:function(a,c,d){function h(a){return e.step(a)}var e=this,g=f.fx;this.startTime=cr||cs(),this.end=c,this.now=this.start=a,this.pos=this.state=0,this.unit=d||this.unit||(f.cssNumber[this.prop]?"":"px"),h.queue=this.options.queue,h.elem=this.elem,h.saveState=function(){e.options.hide&&f._data(e.elem,"fxshow"+e.prop)===b&&f._data(e.elem,"fxshow"+e.prop,e.start)},h()&&f.timers.push(h)&&!cp&&(cp=setInterval(g.tick,g.interval))},show:function(){var a=f._data(this.elem,"fxshow"+this.prop);this.options.orig[this.prop]=a||f.style(this.elem,this.prop),this.options.show=!0,a!==b?this.custom(this.cur(),a):this.custom(this.prop==="width"||this.prop==="height"?1:0,this.cur()),f(this.elem).show()},hide:function(){this.options.orig[this.prop]=f._data(this.elem,"fxshow"+this.prop)||f.style(this.elem,this.prop),this.options.hide=!0,this.custom(this.cur(),0)},step:function(a){var b,c,d,e=cr||cs(),g=!0,h=this.elem,i=this.options;if(a||e>=i.duration+this.startTime){this.now=this.end,this.pos=this.state=1,this.update(),i.animatedProperties[this.prop]=!0;for(b in i.animatedProperties)i.animatedProperties[b]!==!0&&(g=!1);if(g){i.overflow!=null&&!f.support.shrinkWrapBlocks&&f.each(["","X","Y"],function(a,b){h.style["overflow"+b]=i.overflow[a]}),i.hide&&f(h).hide();if(i.hide||i.show)for(b in i.animatedProperties)f.style(h,b,i.orig[b]),f.removeData(h,"fxshow"+b,!0),f.removeData(h,"toggle"+b,!0);d=i.complete,d&&(i.complete=!1,d.call(h))}return!1}i.duration==Infinity?this.now=e:(c=e-this.startTime,this.state=c/i.duration,this.pos=f.easing[i.animatedProperties[this.prop]](this.state,c,0,1,i.duration),this.now=this.start+(this.end-this.start)*this.pos),this.update();return!0}},f.extend(f.fx,{tick:function(){var a,b=f.timers,c=0;for(;c<b.length;c++)a=b[c],!a()&&b[c]===a&&b.splice(c--,1);b.length||f.fx.stop()},interval:13,stop:function(){clearInterval(cp),cp=null},speeds:{slow:600,fast:200,_default:400},step:{opacity:function(a){f.style(a.elem,"opacity",a.now)},_default:function(a){a.elem.style&&a.elem.style[a.prop]!=null?a.elem.style[a.prop]=a.now+a.unit:a.elem[a.prop]=a.now}}}),f.each(["width","height"],function(a,b){f.fx.step[b]=function(a){f.style(a.elem,b,Math.max(0,a.now)+a.unit)}}),f.expr&&f.expr.filters&&(f.expr.filters.animated=function(a){return f.grep(f.timers,function(b){return a===b.elem}).length});var cw=/^t(?:able|d|h)$/i,cx=/^(?:body|html)$/i;"getBoundingClientRect"in c.documentElement?f.fn.offset=function(a){var b=this[0],c;if(a)return this.each(function(b){f.offset.setOffset(this,a,b)});if(!b||!b.ownerDocument)return null;if(b===b.ownerDocument.body)return f.offset.bodyOffset(b);try{c=b.getBoundingClientRect()}catch(d){}var e=b.ownerDocument,g=e.documentElement;if(!c||!f.contains(g,b))return c?{top:c.top,left:c.left}:{top:0,left:0};var h=e.body,i=cy(e),j=g.clientTop||h.clientTop||0,k=g.clientLeft||h.clientLeft||0,l=i.pageYOffset||f.support.boxModel&&g.scrollTop||h.scrollTop,m=i.pageXOffset||f.support.boxModel&&g.scrollLeft||h.scrollLeft,n=c.top+l-j,o=c.left+m-k;return{top:n,left:o}}:f.fn.offset=function(a){var b=this[0];if(a)return this.each(function(b){f.offset.setOffset(this,a,b)});if(!b||!b.ownerDocument)return null;if(b===b.ownerDocument.body)return f.offset.bodyOffset(b);var c,d=b.offsetParent,e=b,g=b.ownerDocument,h=g.documentElement,i=g.body,j=g.defaultView,k=j?j.getComputedStyle(b,null):b.currentStyle,l=b.offsetTop,m=b.offsetLeft;while((b=b.parentNode)&&b!==i&&b!==h){if(f.support.fixedPosition&&k.position==="fixed")break;c=j?j.getComputedStyle(b,null):b.currentStyle,l-=b.scrollTop,m-=b.scrollLeft,b===d&&(l+=b.offsetTop,m+=b.offsetLeft,f.support.doesNotAddBorder&&(!f.support.doesAddBorderForTableAndCells||!cw.test(b.nodeName))&&(l+=parseFloat(c.borderTopWidth)||0,m+=parseFloat(c.borderLeftWidth)||0),e=d,d=b.offsetParent),f.support.subtractsBorderForOverflowNotVisible&&c.overflow!=="visible"&&(l+=parseFloat(c.borderTopWidth)||0,m+=parseFloat(c.borderLeftWidth)||0),k=c}if(k.position==="relative"||k.position==="static")l+=i.offsetTop,m+=i.offsetLeft;f.support.fixedPosition&&k.position==="fixed"&&(l+=Math.max(h.scrollTop,i.scrollTop),m+=Math.max(h.scrollLeft,i.scrollLeft));return{top:l,left:m}},f.offset={bodyOffset:function(a){var b=a.offsetTop,c=a.offsetLeft;f.support.doesNotIncludeMarginInBodyOffset&&(b+=parseFloat(f.css(a,"marginTop"))||0,c+=parseFloat(f.css(a,"marginLeft"))||0);return{top:b,left:c}},setOffset:function(a,b,c){var d=f.css(a,"position");d==="static"&&(a.style.position="relative");var e=f(a),g=e.offset(),h=f.css(a,"top"),i=f.css(a,"left"),j=(d==="absolute"||d==="fixed")&&f.inArray("auto",[h,i])>-1,k={},l={},m,n;j?(l=e.position(),m=l.top,n=l.left):(m=parseFloat(h)||0,n=parseFloat(i)||0),f.isFunction(b)&&(b=b.call(a,c,g)),b.top!=null&&(k.top=b.top-g.top+m),b.left!=null&&(k.left=b.left-g.left+n),"using"in b?b.using.call(a,k):e.css(k)}},f.fn.extend({position:function(){if(!this[0])return null;var a=this[0],b=this.offsetParent(),c=this.offset(),d=cx.test(b[0].nodeName)?{top:0,left:0}:b.offset();c.top-=parseFloat(f.css(a,"marginTop"))||0,c.left-=parseFloat(f.css(a,"marginLeft"))||0,d.top+=parseFloat(f.css(b[0],"borderTopWidth"))||0,d.left+=parseFloat(f.css(b[0],"borderLeftWidth"))||0;return{top:c.top-d.top,left:c.left-d.left}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||c.body;while(a&&!cx.test(a.nodeName)&&f.css(a,"position")==="static")a=a.offsetParent;return a})}}),f.each(["Left","Top"],function(a,c){var d="scroll"+c;f.fn[d]=function(c){var e,g;if(c===b){e=this[0];if(!e)return null;g=cy(e);return g?"pageXOffset"in g?g[a?"pageYOffset":"pageXOffset"]:f.support.boxModel&&g.document.documentElement[d]||g.document.body[d]:e[d]}return this.each(function(){g=cy(this),g?g.scrollTo(a?f(g).scrollLeft():c,a?c:f(g).scrollTop()):this[d]=c})}}),f.each(["Height","Width"],function(a,c){var d=c.toLowerCase();f.fn["inner"+c]=function(){var a=this[0];return a?a.style?parseFloat(f.css(a,d,"padding")):this[d]():null},f.fn["outer"+c]=function(a){var b=this[0];return b?b.style?parseFloat(f.css(b,d,a?"margin":"border")):this[d]():null},f.fn[d]=function(a){var e=this[0];if(!e)return a==null?null:this;if(f.isFunction(a))return this.each(function(b){var c=f(this);c[d](a.call(this,b,c[d]()))});if(f.isWindow(e)){var g=e.document.documentElement["client"+c],h=e.document.body;return e.document.compatMode==="CSS1Compat"&&g||h&&h["client"+c]||g}if(e.nodeType===9)return Math.max(e.documentElement["client"+c],e.body["scroll"+c],e.documentElement["scroll"+c],e.body["offset"+c],e.documentElement["offset"+c]);if(a===b){var i=f.css(e,d),j=parseFloat(i);return f.isNumeric(j)?j:i}return this.css(d,typeof a=="string"?a:a+"px")}}),a.jQuery=a.$=f,typeof define=="function"&&define.amd&&define.amd.jQuery&&define("jquery",[],function(){return f})})(window);
diff --git a/doc/html/modules.html b/doc/html/modules.html
new file mode 100644
index 0000000..6b769e4
--- /dev/null
+++ b/doc/html/modules.html
@@ -0,0 +1,94 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
+<title>TurboJPEG: Modules</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.4</span>
+   </div>
+  </td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+  <div id="navrow1" class="tabs">
+    <ul class="tablist">
+      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
+      <li class="current"><a href="modules.html"><span>Modules</span></a></li>
+      <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li>
+        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+      </li>
+    </ul>
+  </div>
+</div><!-- top -->
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+<div class="header">
+  <div class="headertitle">
+<div class="title">Modules</div>  </div>
+</div><!--header-->
+<div class="contents">
+<div class="textblock">Here is a list of all modules:</div><div class="directory">
+<table class="directory">
+<tr id="row_0_" class="even"><td class="entry"><img src="ftv2lastnode.png" alt="\" width="16" height="22" /><a class="el" href="group___turbo_j_p_e_g.html" target="_self">TurboJPEG</a></td><td class="desc">TurboJPEG API</td></tr>
+</table>
+</div><!-- directory -->
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
+</body>
+</html>
diff --git a/doc/html/nav_f.png b/doc/html/nav_f.png
new file mode 100644
index 0000000..72a58a5
--- /dev/null
+++ b/doc/html/nav_f.png
Binary files differ
diff --git a/doc/html/nav_g.png b/doc/html/nav_g.png
new file mode 100644
index 0000000..2093a23
--- /dev/null
+++ b/doc/html/nav_g.png
Binary files differ
diff --git a/doc/html/nav_h.png b/doc/html/nav_h.png
new file mode 100644
index 0000000..33389b1
--- /dev/null
+++ b/doc/html/nav_h.png
Binary files differ
diff --git a/doc/html/open.png b/doc/html/open.png
new file mode 100644
index 0000000..30f75c7
--- /dev/null
+++ b/doc/html/open.png
Binary files differ
diff --git a/doc/html/search/all_63.html b/doc/html/search/all_63.html
new file mode 100644
index 0000000..e7f34db
--- /dev/null
+++ b/doc/html/search/all_63.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_63.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/all_63.js b/doc/html/search/all_63.js
new file mode 100644
index 0000000..7b058da
--- /dev/null
+++ b/doc/html/search/all_63.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['customfilter',['customFilter',['../structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1',1,'tjtransform']]]
+];
diff --git a/doc/html/search/all_64.html b/doc/html/search/all_64.html
new file mode 100644
index 0000000..360601f
--- /dev/null
+++ b/doc/html/search/all_64.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_64.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/all_64.js b/doc/html/search/all_64.js
new file mode 100644
index 0000000..e19a050
--- /dev/null
+++ b/doc/html/search/all_64.js
@@ -0,0 +1,5 @@
+var searchData=
+[
+  ['data',['data',['../structtjtransform.html#a688fe8f1a8ecc12a538d9e561cf338e3',1,'tjtransform']]],
+  ['denom',['denom',['../structtjscalingfactor.html#aefbcdf3e9e62274b2d312c695f133ce3',1,'tjscalingfactor']]]
+];
diff --git a/doc/html/search/all_68.html b/doc/html/search/all_68.html
new file mode 100644
index 0000000..dec41d6
--- /dev/null
+++ b/doc/html/search/all_68.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_68.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/all_68.js b/doc/html/search/all_68.js
new file mode 100644
index 0000000..7b17e97
--- /dev/null
+++ b/doc/html/search/all_68.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['h',['h',['../structtjregion.html#aecefc45a26f4d8b60dd4d825c1710115',1,'tjregion']]]
+];
diff --git a/doc/html/search/all_6e.html b/doc/html/search/all_6e.html
new file mode 100644
index 0000000..e0fd765
--- /dev/null
+++ b/doc/html/search/all_6e.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_6e.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/all_6e.js b/doc/html/search/all_6e.js
new file mode 100644
index 0000000..83faa13
--- /dev/null
+++ b/doc/html/search/all_6e.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['num',['num',['../structtjscalingfactor.html#a9b011e57f981ee23083e2c1aa5e640ec',1,'tjscalingfactor']]]
+];
diff --git a/doc/html/search/all_6f.html b/doc/html/search/all_6f.html
new file mode 100644
index 0000000..5e86b03
--- /dev/null
+++ b/doc/html/search/all_6f.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_6f.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/all_6f.js b/doc/html/search/all_6f.js
new file mode 100644
index 0000000..1cca832
--- /dev/null
+++ b/doc/html/search/all_6f.js
@@ -0,0 +1,5 @@
+var searchData=
+[
+  ['op',['op',['../structtjtransform.html#a2525aab4ba6978a1c273f74fef50e498',1,'tjtransform']]],
+  ['options',['options',['../structtjtransform.html#ac0e74655baa4402209a21e1ae481c8f6',1,'tjtransform']]]
+];
diff --git a/doc/html/search/all_72.html b/doc/html/search/all_72.html
new file mode 100644
index 0000000..347b9f6
--- /dev/null
+++ b/doc/html/search/all_72.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_72.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/all_72.js b/doc/html/search/all_72.js
new file mode 100644
index 0000000..01cde35
--- /dev/null
+++ b/doc/html/search/all_72.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['r',['r',['../structtjtransform.html#ac324e5e442abec8a961e5bf219db12cf',1,'tjtransform']]]
+];
diff --git a/doc/html/search/all_74.html b/doc/html/search/all_74.html
new file mode 100644
index 0000000..c646aef
--- /dev/null
+++ b/doc/html/search/all_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/all_74.js b/doc/html/search/all_74.js
new file mode 100644
index 0000000..435cec4
--- /dev/null
+++ b/doc/html/search/all_74.js
@@ -0,0 +1,82 @@
+var searchData=
+[
+  ['tj_5fnumcs',['TJ_NUMCS',['../group___turbo_j_p_e_g.html#ga39f57a6fb02d9cf32e7b6890099b5a71',1,'turbojpeg.h']]],
+  ['tj_5fnumpf',['TJ_NUMPF',['../group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e',1,'turbojpeg.h']]],
+  ['tj_5fnumsamp',['TJ_NUMSAMP',['../group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c',1,'turbojpeg.h']]],
+  ['tj_5fnumxop',['TJ_NUMXOP',['../group___turbo_j_p_e_g.html#ga0f6dbd18adf38b7d46ac547f0f4d562c',1,'turbojpeg.h']]],
+  ['tjalloc',['tjAlloc',['../group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff',1,'turbojpeg.h']]],
+  ['tjblueoffset',['tjBlueOffset',['../group___turbo_j_p_e_g.html#ga84e2e35d3f08025f976ec1ec53693dea',1,'turbojpeg.h']]],
+  ['tjbufsize',['tjBufSize',['../group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b',1,'turbojpeg.h']]],
+  ['tjbufsizeyuv2',['tjBufSizeYUV2',['../group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9',1,'turbojpeg.h']]],
+  ['tjcompress2',['tjCompress2',['../group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2',1,'turbojpeg.h']]],
+  ['tjcompressfromyuv',['tjCompressFromYUV',['../group___turbo_j_p_e_g.html#ga0b931126c7a615ddc3bbd0cca6698d67',1,'turbojpeg.h']]],
+  ['tjcs',['TJCS',['../group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720',1,'turbojpeg.h']]],
+  ['tjcs_5fcmyk',['TJCS_CMYK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a6c8b636152ac8195b869587db315ee53',1,'turbojpeg.h']]],
+  ['tjcs_5fgray',['TJCS_GRAY',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720ab3e7d6a87f695e45b81c1b5262b5a50a',1,'turbojpeg.h']]],
+  ['tjcs_5frgb',['TJCS_RGB',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a677cb7ccb85c4038ac41964a2e09e555',1,'turbojpeg.h']]],
+  ['tjcs_5fycbcr',['TJCS_YCbCr',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75',1,'turbojpeg.h']]],
+  ['tjcs_5fycck',['TJCS_YCCK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e',1,'turbojpeg.h']]],
+  ['tjdecodeyuv',['tjDecodeYUV',['../group___turbo_j_p_e_g.html#ga132ae2c2cadcf64c8bb0f3bdf69da3ed',1,'turbojpeg.h']]],
+  ['tjdecompress2',['tjDecompress2',['../group___turbo_j_p_e_g.html#gada69cc6443d1bb493b40f1626259e5e9',1,'turbojpeg.h']]],
+  ['tjdecompressheader3',['tjDecompressHeader3',['../group___turbo_j_p_e_g.html#gacd0fac3af74b3511d39b4781b7103086',1,'turbojpeg.h']]],
+  ['tjdecompresstoyuv2',['tjDecompressToYUV2',['../group___turbo_j_p_e_g.html#ga7c08b340ad7f8e85d407bd9e81d44d07',1,'turbojpeg.h']]],
+  ['tjdestroy',['tjDestroy',['../group___turbo_j_p_e_g.html#ga674adee917b95ad4a896f1ba39e12540',1,'turbojpeg.h']]],
+  ['tjencodeyuv3',['tjEncodeYUV3',['../group___turbo_j_p_e_g.html#ga0a5ffbf7cb58a5b6a8201114fe889360',1,'turbojpeg.h']]],
+  ['tjflag_5faccuratedct',['TJFLAG_ACCURATEDCT',['../group___turbo_j_p_e_g.html#gacb233cfd722d66d1ccbf48a7de81f0e0',1,'turbojpeg.h']]],
+  ['tjflag_5fbottomup',['TJFLAG_BOTTOMUP',['../group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec',1,'turbojpeg.h']]],
+  ['tjflag_5ffastdct',['TJFLAG_FASTDCT',['../group___turbo_j_p_e_g.html#gaabce235db80d3f698b27f36cbd453da2',1,'turbojpeg.h']]],
+  ['tjflag_5ffastupsample',['TJFLAG_FASTUPSAMPLE',['../group___turbo_j_p_e_g.html#ga4ee4506c81177a06f77e2504a22efd2d',1,'turbojpeg.h']]],
+  ['tjflag_5fnorealloc',['TJFLAG_NOREALLOC',['../group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963',1,'turbojpeg.h']]],
+  ['tjfree',['tjFree',['../group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137',1,'turbojpeg.h']]],
+  ['tjgeterrorstr',['tjGetErrorStr',['../group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf',1,'turbojpeg.h']]],
+  ['tjgetscalingfactors',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8',1,'turbojpeg.h']]],
+  ['tjgreenoffset',['tjGreenOffset',['../group___turbo_j_p_e_g.html#ga82d6e35da441112a411da41923c0ba2f',1,'turbojpeg.h']]],
+  ['tjhandle',['tjhandle',['../group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763',1,'turbojpeg.h']]],
+  ['tjinitcompress',['tjInitCompress',['../group___turbo_j_p_e_g.html#ga3d10c47fbe4a2489a2b30c931551d01a',1,'turbojpeg.h']]],
+  ['tjinitdecompress',['tjInitDecompress',['../group___turbo_j_p_e_g.html#gae5408179d041e2a2f7199c8283cf649e',1,'turbojpeg.h']]],
+  ['tjinittransform',['tjInitTransform',['../group___turbo_j_p_e_g.html#ga3155b775bfbac9dbba869b95a0367902',1,'turbojpeg.h']]],
+  ['tjmcuheight',['tjMCUHeight',['../group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf',1,'turbojpeg.h']]],
+  ['tjmcuwidth',['tjMCUWidth',['../group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c',1,'turbojpeg.h']]],
+  ['tjpad',['TJPAD',['../group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511',1,'turbojpeg.h']]],
+  ['tjpf',['TJPF',['../group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a',1,'turbojpeg.h']]],
+  ['tjpf_5fabgr',['TJPF_ABGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa1ba1a7f1631dbeaa49a0a85fc4a40081',1,'turbojpeg.h']]],
+  ['tjpf_5fargb',['TJPF_ARGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c',1,'turbojpeg.h']]],
+  ['tjpf_5fbgr',['TJPF_BGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839',1,'turbojpeg.h']]],
+  ['tjpf_5fbgra',['TJPF_BGRA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aac037ff1845cf9b74bb81a3659c2b9fb4',1,'turbojpeg.h']]],
+  ['tjpf_5fbgrx',['TJPF_BGRX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8',1,'turbojpeg.h']]],
+  ['tjpf_5fcmyk',['TJPF_CMYK',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7f5100ec44c91994e243f1cf55553f8b',1,'turbojpeg.h']]],
+  ['tjpf_5fgray',['TJPF_GRAY',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa5431b54b015337705f13118073711a1a',1,'turbojpeg.h']]],
+  ['tjpf_5frgb',['TJPF_RGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c',1,'turbojpeg.h']]],
+  ['tjpf_5frgba',['TJPF_RGBA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12',1,'turbojpeg.h']]],
+  ['tjpf_5frgbx',['TJPF_RGBX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01',1,'turbojpeg.h']]],
+  ['tjpf_5fxbgr',['TJPF_XBGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af',1,'turbojpeg.h']]],
+  ['tjpf_5fxrgb',['TJPF_XRGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84',1,'turbojpeg.h']]],
+  ['tjpixelsize',['tjPixelSize',['../group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c',1,'turbojpeg.h']]],
+  ['tjredoffset',['tjRedOffset',['../group___turbo_j_p_e_g.html#gadd9b446742ac8a3923f7992c7988fea8',1,'turbojpeg.h']]],
+  ['tjregion',['tjregion',['../structtjregion.html',1,'']]],
+  ['tjsamp',['TJSAMP',['../group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074',1,'turbojpeg.h']]],
+  ['tjsamp_5f411',['TJSAMP_411',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a28ec62575e5ea295c3fde3001dc628e2',1,'turbojpeg.h']]],
+  ['tjsamp_5f420',['TJSAMP_420',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737',1,'turbojpeg.h']]],
+  ['tjsamp_5f422',['TJSAMP_422',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a136130902cc578f11f32429b59368404',1,'turbojpeg.h']]],
+  ['tjsamp_5f440',['TJSAMP_440',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974',1,'turbojpeg.h']]],
+  ['tjsamp_5f444',['TJSAMP_444',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074afb8da4f44197837bdec0a4f593dacae3',1,'turbojpeg.h']]],
+  ['tjsamp_5fgray',['TJSAMP_GRAY',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a3f1c9504842ddc7a48d0f690754b6248',1,'turbojpeg.h']]],
+  ['tjscaled',['TJSCALED',['../group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df',1,'turbojpeg.h']]],
+  ['tjscalingfactor',['tjscalingfactor',['../structtjscalingfactor.html',1,'']]],
+  ['tjtransform',['tjtransform',['../structtjtransform.html',1,'tjtransform'],['../group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616',1,'tjTransform(tjhandle handle, unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, int flags):&#160;turbojpeg.h'],['../group___turbo_j_p_e_g.html#gaa29f3189c41be12ec5dee7caec318a31',1,'tjtransform():&#160;turbojpeg.h']]],
+  ['tjxop',['TJXOP',['../group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866',1,'turbojpeg.h']]],
+  ['tjxop_5fhflip',['TJXOP_HFLIP',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce',1,'turbojpeg.h']]],
+  ['tjxop_5fnone',['TJXOP_NONE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27',1,'turbojpeg.h']]],
+  ['tjxop_5frot180',['TJXOP_ROT180',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a140952eb8dd0300accfcc22726d69692',1,'turbojpeg.h']]],
+  ['tjxop_5frot270',['TJXOP_ROT270',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a3064ee5dfb7f032df332818587567a08',1,'turbojpeg.h']]],
+  ['tjxop_5frot90',['TJXOP_ROT90',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a43b2bbb23bc4bd548422d43fbe9af128',1,'turbojpeg.h']]],
+  ['tjxop_5ftranspose',['TJXOP_TRANSPOSE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a31060aed199f886afdd417f80499c32d',1,'turbojpeg.h']]],
+  ['tjxop_5ftransverse',['TJXOP_TRANSVERSE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866af3b14d488aea6ece9e5b3df73a74d6a4',1,'turbojpeg.h']]],
+  ['tjxop_5fvflip',['TJXOP_VFLIP',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a324eddfbec53b7e691f61e56929d0d5d',1,'turbojpeg.h']]],
+  ['tjxopt_5fcrop',['TJXOPT_CROP',['../group___turbo_j_p_e_g.html#ga9c771a757fc1294add611906b89ab2d2',1,'turbojpeg.h']]],
+  ['tjxopt_5fgray',['TJXOPT_GRAY',['../group___turbo_j_p_e_g.html#ga3acee7b48ade1b99e5588736007c2589',1,'turbojpeg.h']]],
+  ['tjxopt_5fnooutput',['TJXOPT_NOOUTPUT',['../group___turbo_j_p_e_g.html#gafbf992bbf6e006705886333703ffab31',1,'turbojpeg.h']]],
+  ['tjxopt_5fperfect',['TJXOPT_PERFECT',['../group___turbo_j_p_e_g.html#ga50e03cb5ed115330e212417429600b00',1,'turbojpeg.h']]],
+  ['tjxopt_5ftrim',['TJXOPT_TRIM',['../group___turbo_j_p_e_g.html#ga319826b7eb1583c0595bbe7b95428709',1,'turbojpeg.h']]],
+  ['turbojpeg',['TurboJPEG',['../group___turbo_j_p_e_g.html',1,'']]]
+];
diff --git a/doc/html/search/all_77.html b/doc/html/search/all_77.html
new file mode 100644
index 0000000..55d7142
--- /dev/null
+++ b/doc/html/search/all_77.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_77.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/all_77.js b/doc/html/search/all_77.js
new file mode 100644
index 0000000..4267002
--- /dev/null
+++ b/doc/html/search/all_77.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['w',['w',['../structtjregion.html#ab6eb73ceef584fc23c8c8097926dce42',1,'tjregion']]]
+];
diff --git a/doc/html/search/all_78.html b/doc/html/search/all_78.html
new file mode 100644
index 0000000..39075d4
--- /dev/null
+++ b/doc/html/search/all_78.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_78.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/all_78.js b/doc/html/search/all_78.js
new file mode 100644
index 0000000..41a27f2
--- /dev/null
+++ b/doc/html/search/all_78.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['x',['x',['../structtjregion.html#a4b6a37a93997091b26a75831fa291ad9',1,'tjregion']]]
+];
diff --git a/doc/html/search/all_79.html b/doc/html/search/all_79.html
new file mode 100644
index 0000000..033719a
--- /dev/null
+++ b/doc/html/search/all_79.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="all_79.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/all_79.js b/doc/html/search/all_79.js
new file mode 100644
index 0000000..86890a6
--- /dev/null
+++ b/doc/html/search/all_79.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['y',['y',['../structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2',1,'tjregion']]]
+];
diff --git a/doc/html/search/classes_74.html b/doc/html/search/classes_74.html
new file mode 100644
index 0000000..4b0fdaa
--- /dev/null
+++ b/doc/html/search/classes_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="classes_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/classes_74.js b/doc/html/search/classes_74.js
new file mode 100644
index 0000000..cd623d2
--- /dev/null
+++ b/doc/html/search/classes_74.js
@@ -0,0 +1,6 @@
+var searchData=
+[
+  ['tjregion',['tjregion',['../structtjregion.html',1,'']]],
+  ['tjscalingfactor',['tjscalingfactor',['../structtjscalingfactor.html',1,'']]],
+  ['tjtransform',['tjtransform',['../structtjtransform.html',1,'']]]
+];
diff --git a/doc/html/search/close.png b/doc/html/search/close.png
new file mode 100644
index 0000000..9342d3d
--- /dev/null
+++ b/doc/html/search/close.png
Binary files differ
diff --git a/doc/html/search/enums_74.html b/doc/html/search/enums_74.html
new file mode 100644
index 0000000..9b754ee
--- /dev/null
+++ b/doc/html/search/enums_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="enums_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/enums_74.js b/doc/html/search/enums_74.js
new file mode 100644
index 0000000..276aa24
--- /dev/null
+++ b/doc/html/search/enums_74.js
@@ -0,0 +1,7 @@
+var searchData=
+[
+  ['tjcs',['TJCS',['../group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720',1,'turbojpeg.h']]],
+  ['tjpf',['TJPF',['../group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a',1,'turbojpeg.h']]],
+  ['tjsamp',['TJSAMP',['../group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074',1,'turbojpeg.h']]],
+  ['tjxop',['TJXOP',['../group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866',1,'turbojpeg.h']]]
+];
diff --git a/doc/html/search/enumvalues_74.html b/doc/html/search/enumvalues_74.html
new file mode 100644
index 0000000..0d69a0a
--- /dev/null
+++ b/doc/html/search/enumvalues_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="enumvalues_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/enumvalues_74.js b/doc/html/search/enumvalues_74.js
new file mode 100644
index 0000000..7dc2f8d
--- /dev/null
+++ b/doc/html/search/enumvalues_74.js
@@ -0,0 +1,34 @@
+var searchData=
+[
+  ['tjcs_5fcmyk',['TJCS_CMYK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a6c8b636152ac8195b869587db315ee53',1,'turbojpeg.h']]],
+  ['tjcs_5fgray',['TJCS_GRAY',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720ab3e7d6a87f695e45b81c1b5262b5a50a',1,'turbojpeg.h']]],
+  ['tjcs_5frgb',['TJCS_RGB',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a677cb7ccb85c4038ac41964a2e09e555',1,'turbojpeg.h']]],
+  ['tjcs_5fycbcr',['TJCS_YCbCr',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75',1,'turbojpeg.h']]],
+  ['tjcs_5fycck',['TJCS_YCCK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e',1,'turbojpeg.h']]],
+  ['tjpf_5fabgr',['TJPF_ABGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa1ba1a7f1631dbeaa49a0a85fc4a40081',1,'turbojpeg.h']]],
+  ['tjpf_5fargb',['TJPF_ARGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c',1,'turbojpeg.h']]],
+  ['tjpf_5fbgr',['TJPF_BGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839',1,'turbojpeg.h']]],
+  ['tjpf_5fbgra',['TJPF_BGRA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aac037ff1845cf9b74bb81a3659c2b9fb4',1,'turbojpeg.h']]],
+  ['tjpf_5fbgrx',['TJPF_BGRX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8',1,'turbojpeg.h']]],
+  ['tjpf_5fcmyk',['TJPF_CMYK',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7f5100ec44c91994e243f1cf55553f8b',1,'turbojpeg.h']]],
+  ['tjpf_5fgray',['TJPF_GRAY',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa5431b54b015337705f13118073711a1a',1,'turbojpeg.h']]],
+  ['tjpf_5frgb',['TJPF_RGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c',1,'turbojpeg.h']]],
+  ['tjpf_5frgba',['TJPF_RGBA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12',1,'turbojpeg.h']]],
+  ['tjpf_5frgbx',['TJPF_RGBX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01',1,'turbojpeg.h']]],
+  ['tjpf_5fxbgr',['TJPF_XBGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af',1,'turbojpeg.h']]],
+  ['tjpf_5fxrgb',['TJPF_XRGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84',1,'turbojpeg.h']]],
+  ['tjsamp_5f411',['TJSAMP_411',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a28ec62575e5ea295c3fde3001dc628e2',1,'turbojpeg.h']]],
+  ['tjsamp_5f420',['TJSAMP_420',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737',1,'turbojpeg.h']]],
+  ['tjsamp_5f422',['TJSAMP_422',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a136130902cc578f11f32429b59368404',1,'turbojpeg.h']]],
+  ['tjsamp_5f440',['TJSAMP_440',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974',1,'turbojpeg.h']]],
+  ['tjsamp_5f444',['TJSAMP_444',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074afb8da4f44197837bdec0a4f593dacae3',1,'turbojpeg.h']]],
+  ['tjsamp_5fgray',['TJSAMP_GRAY',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a3f1c9504842ddc7a48d0f690754b6248',1,'turbojpeg.h']]],
+  ['tjxop_5fhflip',['TJXOP_HFLIP',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce',1,'turbojpeg.h']]],
+  ['tjxop_5fnone',['TJXOP_NONE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27',1,'turbojpeg.h']]],
+  ['tjxop_5frot180',['TJXOP_ROT180',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a140952eb8dd0300accfcc22726d69692',1,'turbojpeg.h']]],
+  ['tjxop_5frot270',['TJXOP_ROT270',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a3064ee5dfb7f032df332818587567a08',1,'turbojpeg.h']]],
+  ['tjxop_5frot90',['TJXOP_ROT90',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a43b2bbb23bc4bd548422d43fbe9af128',1,'turbojpeg.h']]],
+  ['tjxop_5ftranspose',['TJXOP_TRANSPOSE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a31060aed199f886afdd417f80499c32d',1,'turbojpeg.h']]],
+  ['tjxop_5ftransverse',['TJXOP_TRANSVERSE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866af3b14d488aea6ece9e5b3df73a74d6a4',1,'turbojpeg.h']]],
+  ['tjxop_5fvflip',['TJXOP_VFLIP',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866a324eddfbec53b7e691f61e56929d0d5d',1,'turbojpeg.h']]]
+];
diff --git a/doc/html/search/functions_74.html b/doc/html/search/functions_74.html
new file mode 100644
index 0000000..1605901
--- /dev/null
+++ b/doc/html/search/functions_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="functions_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/functions_74.js b/doc/html/search/functions_74.js
new file mode 100644
index 0000000..0a0e6cd
--- /dev/null
+++ b/doc/html/search/functions_74.js
@@ -0,0 +1,21 @@
+var searchData=
+[
+  ['tjalloc',['tjAlloc',['../group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff',1,'turbojpeg.h']]],
+  ['tjbufsize',['tjBufSize',['../group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b',1,'turbojpeg.h']]],
+  ['tjbufsizeyuv2',['tjBufSizeYUV2',['../group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9',1,'turbojpeg.h']]],
+  ['tjcompress2',['tjCompress2',['../group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2',1,'turbojpeg.h']]],
+  ['tjcompressfromyuv',['tjCompressFromYUV',['../group___turbo_j_p_e_g.html#ga0b931126c7a615ddc3bbd0cca6698d67',1,'turbojpeg.h']]],
+  ['tjdecodeyuv',['tjDecodeYUV',['../group___turbo_j_p_e_g.html#ga132ae2c2cadcf64c8bb0f3bdf69da3ed',1,'turbojpeg.h']]],
+  ['tjdecompress2',['tjDecompress2',['../group___turbo_j_p_e_g.html#gada69cc6443d1bb493b40f1626259e5e9',1,'turbojpeg.h']]],
+  ['tjdecompressheader3',['tjDecompressHeader3',['../group___turbo_j_p_e_g.html#gacd0fac3af74b3511d39b4781b7103086',1,'turbojpeg.h']]],
+  ['tjdecompresstoyuv2',['tjDecompressToYUV2',['../group___turbo_j_p_e_g.html#ga7c08b340ad7f8e85d407bd9e81d44d07',1,'turbojpeg.h']]],
+  ['tjdestroy',['tjDestroy',['../group___turbo_j_p_e_g.html#ga674adee917b95ad4a896f1ba39e12540',1,'turbojpeg.h']]],
+  ['tjencodeyuv3',['tjEncodeYUV3',['../group___turbo_j_p_e_g.html#ga0a5ffbf7cb58a5b6a8201114fe889360',1,'turbojpeg.h']]],
+  ['tjfree',['tjFree',['../group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137',1,'turbojpeg.h']]],
+  ['tjgeterrorstr',['tjGetErrorStr',['../group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf',1,'turbojpeg.h']]],
+  ['tjgetscalingfactors',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8',1,'turbojpeg.h']]],
+  ['tjinitcompress',['tjInitCompress',['../group___turbo_j_p_e_g.html#ga3d10c47fbe4a2489a2b30c931551d01a',1,'turbojpeg.h']]],
+  ['tjinitdecompress',['tjInitDecompress',['../group___turbo_j_p_e_g.html#gae5408179d041e2a2f7199c8283cf649e',1,'turbojpeg.h']]],
+  ['tjinittransform',['tjInitTransform',['../group___turbo_j_p_e_g.html#ga3155b775bfbac9dbba869b95a0367902',1,'turbojpeg.h']]],
+  ['tjtransform',['tjTransform',['../group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616',1,'turbojpeg.h']]]
+];
diff --git a/doc/html/search/groups_74.html b/doc/html/search/groups_74.html
new file mode 100644
index 0000000..a169560
--- /dev/null
+++ b/doc/html/search/groups_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="groups_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/groups_74.js b/doc/html/search/groups_74.js
new file mode 100644
index 0000000..27d4ffb
--- /dev/null
+++ b/doc/html/search/groups_74.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['turbojpeg',['TurboJPEG',['../group___turbo_j_p_e_g.html',1,'']]]
+];
diff --git a/doc/html/search/mag_sel.png b/doc/html/search/mag_sel.png
new file mode 100644
index 0000000..81f6040
--- /dev/null
+++ b/doc/html/search/mag_sel.png
Binary files differ
diff --git a/doc/html/search/nomatches.html b/doc/html/search/nomatches.html
new file mode 100644
index 0000000..b1ded27
--- /dev/null
+++ b/doc/html/search/nomatches.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="NoMatches">No Matches</div>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/search.css b/doc/html/search/search.css
new file mode 100644
index 0000000..5b208ed
--- /dev/null
+++ b/doc/html/search/search.css
@@ -0,0 +1,271 @@
+/*---------------- Search Box */
+
+#FSearchBox {
+    float: left;
+}
+
+#MSearchBox {
+    white-space : nowrap;
+    position: absolute;
+    float: none;
+    display: inline;
+    margin-top: 8px;
+    right: 0px;
+    width: 170px;
+    z-index: 102;
+    background-color: white;
+}
+
+#MSearchBox .left
+{
+    display:block;
+    position:absolute;
+    left:10px;
+    width:20px;
+    height:19px;
+    background:url('search_l.png') no-repeat;
+    background-position:right;
+}
+
+#MSearchSelect {
+    display:block;
+    position:absolute;
+    width:20px;
+    height:19px;
+}
+
+.left #MSearchSelect {
+    left:4px;
+}
+
+.right #MSearchSelect {
+    right:5px;
+}
+
+#MSearchField {
+    display:block;
+    position:absolute;
+    height:19px;
+    background:url('search_m.png') repeat-x;
+    border:none;
+    width:116px;
+    margin-left:20px;
+    padding-left:4px;
+    color: #909090;
+    outline: none;
+    font: 9pt Arial, Verdana, sans-serif;
+}
+
+#FSearchBox #MSearchField {
+    margin-left:15px;
+}
+
+#MSearchBox .right {
+    display:block;
+    position:absolute;
+    right:10px;
+    top:0px;
+    width:20px;
+    height:19px;
+    background:url('search_r.png') no-repeat;
+    background-position:left;
+}
+
+#MSearchClose {
+    display: none;
+    position: absolute;
+    top: 4px;
+    background : none;
+    border: none;
+    margin: 0px 4px 0px 0px;
+    padding: 0px 0px;
+    outline: none;
+}
+
+.left #MSearchClose {
+    left: 6px;
+}
+
+.right #MSearchClose {
+    right: 2px;
+}
+
+.MSearchBoxActive #MSearchField {
+    color: #000000;
+}
+
+/*---------------- Search filter selection */
+
+#MSearchSelectWindow {
+    display: none;
+    position: absolute;
+    left: 0; top: 0;
+    border: 1px solid #90A5CE;
+    background-color: #F9FAFC;
+    z-index: 1;
+    padding-top: 4px;
+    padding-bottom: 4px;
+    -moz-border-radius: 4px;
+    -webkit-border-top-left-radius: 4px;
+    -webkit-border-top-right-radius: 4px;
+    -webkit-border-bottom-left-radius: 4px;
+    -webkit-border-bottom-right-radius: 4px;
+    -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15);
+}
+
+.SelectItem {
+    font: 8pt Arial, Verdana, sans-serif;
+    padding-left:  2px;
+    padding-right: 12px;
+    border: 0px;
+}
+
+span.SelectionMark {
+    margin-right: 4px;
+    font-family: monospace;
+    outline-style: none;
+    text-decoration: none;
+}
+
+a.SelectItem {
+    display: block;
+    outline-style: none;
+    color: #000000; 
+    text-decoration: none;
+    padding-left:   6px;
+    padding-right: 12px;
+}
+
+a.SelectItem:focus,
+a.SelectItem:active {
+    color: #000000; 
+    outline-style: none;
+    text-decoration: none;
+}
+
+a.SelectItem:hover {
+    color: #FFFFFF;
+    background-color: #3D578C;
+    outline-style: none;
+    text-decoration: none;
+    cursor: pointer;
+    display: block;
+}
+
+/*---------------- Search results window */
+
+iframe#MSearchResults {
+    width: 60ex;
+    height: 15em;
+}
+
+#MSearchResultsWindow {
+    display: none;
+    position: absolute;
+    left: 0; top: 0;
+    border: 1px solid #000;
+    background-color: #EEF1F7;
+}
+
+/* ----------------------------------- */
+
+
+#SRIndex {
+    clear:both; 
+    padding-bottom: 15px;
+}
+
+.SREntry {
+    font-size: 10pt;
+    padding-left: 1ex;
+}
+
+.SRPage .SREntry {
+    font-size: 8pt;
+    padding: 1px 5px;
+}
+
+body.SRPage {
+    margin: 5px 2px;
+}
+
+.SRChildren {
+    padding-left: 3ex; padding-bottom: .5em 
+}
+
+.SRPage .SRChildren {
+    display: none;
+}
+
+.SRSymbol {
+    font-weight: bold; 
+    color: #425E97;
+    font-family: Arial, Verdana, sans-serif;
+    text-decoration: none;
+    outline: none;
+}
+
+a.SRScope {
+    display: block;
+    color: #425E97; 
+    font-family: Arial, Verdana, sans-serif;
+    text-decoration: none;
+    outline: none;
+}
+
+a.SRSymbol:focus, a.SRSymbol:active,
+a.SRScope:focus, a.SRScope:active {
+    text-decoration: underline;
+}
+
+span.SRScope {
+    padding-left: 4px;
+}
+
+.SRPage .SRStatus {
+    padding: 2px 5px;
+    font-size: 8pt;
+    font-style: italic;
+}
+
+.SRResult {
+    display: none;
+}
+
+DIV.searchresults {
+    margin-left: 10px;
+    margin-right: 10px;
+}
+
+/*---------------- External search page results */
+
+.searchresult {
+    background-color: #F0F3F8;
+}
+
+.pages b {
+   color: white;
+   padding: 5px 5px 3px 5px;
+   background-image: url("../tab_a.png");
+   background-repeat: repeat-x;
+   text-shadow: 0 1px 1px #000000;
+}
+
+.pages {
+    line-height: 17px;
+    margin-left: 4px;
+    text-decoration: none;
+}
+
+.hl {
+    font-weight: bold;
+}
+
+#searchresults {
+    margin-bottom: 20px;
+}
+
+.searchpages {
+    margin-top: 10px;
+}
+
diff --git a/doc/html/search/search.js b/doc/html/search/search.js
new file mode 100644
index 0000000..409672c
--- /dev/null
+++ b/doc/html/search/search.js
@@ -0,0 +1,809 @@
+// Search script generated by doxygen
+// Copyright (C) 2009 by Dimitri van Heesch.
+
+// The code in this file is loosly based on main.js, part of Natural Docs,
+// which is Copyright (C) 2003-2008 Greg Valure
+// Natural Docs is licensed under the GPL.
+
+var indexSectionsWithContent =
+{
+  0: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001100010000011001010011100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  1: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  2: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  3: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001100010000011001010011100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  4: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  5: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  6: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000",
+  7: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
+};
+
+var indexSectionNames =
+{
+  0: "all",
+  1: "classes",
+  2: "functions",
+  3: "variables",
+  4: "typedefs",
+  5: "enums",
+  6: "enumvalues",
+  7: "groups"
+};
+
+function convertToId(search)
+{
+  var result = '';
+  for (i=0;i<search.length;i++)
+  {
+    var c = search.charAt(i);
+    var cn = c.charCodeAt(0);
+    if (c.match(/[a-z0-9]/))
+    {
+      result+=c;
+    }
+    else if (cn<16) 
+    {
+      result+="_0"+cn.toString(16);
+    }
+    else 
+    {
+      result+="_"+cn.toString(16);
+    }
+  }
+  return result;
+}
+
+function getXPos(item)
+{
+  var x = 0;
+  if (item.offsetWidth)
+  {
+    while (item && item!=document.body)
+    {
+      x   += item.offsetLeft;
+      item = item.offsetParent;
+    }
+  }
+  return x;
+}
+
+function getYPos(item)
+{
+  var y = 0;
+  if (item.offsetWidth)
+  {
+     while (item && item!=document.body)
+     {
+       y   += item.offsetTop;
+       item = item.offsetParent;
+     }
+  }
+  return y;
+}
+
+/* A class handling everything associated with the search panel.
+
+   Parameters:
+   name - The name of the global variable that will be 
+          storing this instance.  Is needed to be able to set timeouts.
+   resultPath - path to use for external files
+*/
+function SearchBox(name, resultsPath, inFrame, label)
+{
+  if (!name || !resultsPath) {  alert("Missing parameters to SearchBox."); }
+   
+  // ---------- Instance variables
+  this.name                  = name;
+  this.resultsPath           = resultsPath;
+  this.keyTimeout            = 0;
+  this.keyTimeoutLength      = 500;
+  this.closeSelectionTimeout = 300;
+  this.lastSearchValue       = "";
+  this.lastResultsPage       = "";
+  this.hideTimeout           = 0;
+  this.searchIndex           = 0;
+  this.searchActive          = false;
+  this.insideFrame           = inFrame;
+  this.searchLabel           = label;
+
+  // ----------- DOM Elements
+
+  this.DOMSearchField = function()
+  {  return document.getElementById("MSearchField");  }
+
+  this.DOMSearchSelect = function()
+  {  return document.getElementById("MSearchSelect");  }
+
+  this.DOMSearchSelectWindow = function()
+  {  return document.getElementById("MSearchSelectWindow");  }
+
+  this.DOMPopupSearchResults = function()
+  {  return document.getElementById("MSearchResults");  }
+
+  this.DOMPopupSearchResultsWindow = function()
+  {  return document.getElementById("MSearchResultsWindow");  }
+
+  this.DOMSearchClose = function()
+  {  return document.getElementById("MSearchClose"); }
+
+  this.DOMSearchBox = function()
+  {  return document.getElementById("MSearchBox");  }
+
+  // ------------ Event Handlers
+
+  // Called when focus is added or removed from the search field.
+  this.OnSearchFieldFocus = function(isActive)
+  {
+    this.Activate(isActive);
+  }
+
+  this.OnSearchSelectShow = function()
+  {
+    var searchSelectWindow = this.DOMSearchSelectWindow();
+    var searchField        = this.DOMSearchSelect();
+
+    if (this.insideFrame)
+    {
+      var left = getXPos(searchField);
+      var top  = getYPos(searchField);
+      left += searchField.offsetWidth + 6;
+      top += searchField.offsetHeight;
+
+      // show search selection popup
+      searchSelectWindow.style.display='block';
+      left -= searchSelectWindow.offsetWidth;
+      searchSelectWindow.style.left =  left + 'px';
+      searchSelectWindow.style.top  =  top  + 'px';
+    }
+    else
+    {
+      var left = getXPos(searchField);
+      var top  = getYPos(searchField);
+      top += searchField.offsetHeight;
+
+      // show search selection popup
+      searchSelectWindow.style.display='block';
+      searchSelectWindow.style.left =  left + 'px';
+      searchSelectWindow.style.top  =  top  + 'px';
+    }
+
+    // stop selection hide timer
+    if (this.hideTimeout) 
+    {
+      clearTimeout(this.hideTimeout);
+      this.hideTimeout=0;
+    }
+    return false; // to avoid "image drag" default event
+  }
+
+  this.OnSearchSelectHide = function()
+  {
+    this.hideTimeout = setTimeout(this.name +".CloseSelectionWindow()",
+                                  this.closeSelectionTimeout);
+  }
+
+  // Called when the content of the search field is changed.
+  this.OnSearchFieldChange = function(evt)
+  {
+    if (this.keyTimeout) // kill running timer
+    {
+      clearTimeout(this.keyTimeout);
+      this.keyTimeout = 0;
+    }
+
+    var e  = (evt) ? evt : window.event; // for IE
+    if (e.keyCode==40 || e.keyCode==13)
+    {
+      if (e.shiftKey==1)
+      {
+        this.OnSearchSelectShow();
+        var win=this.DOMSearchSelectWindow(); 
+        for (i=0;i<win.childNodes.length;i++)
+        {
+          var child = win.childNodes[i]; // get span within a
+          if (child.className=='SelectItem')
+          {
+            child.focus();
+            return;
+          }
+        }
+        return;
+      }
+      else if (window.frames.MSearchResults.searchResults)
+      {
+        var elem = window.frames.MSearchResults.searchResults.NavNext(0);
+        if (elem) elem.focus();
+      }
+    }
+    else if (e.keyCode==27) // Escape out of the search field
+    {
+      this.DOMSearchField().blur();
+      this.DOMPopupSearchResultsWindow().style.display = 'none';
+      this.DOMSearchClose().style.display = 'none';
+      this.lastSearchValue = '';
+      this.Activate(false);
+      return;
+    }
+
+    // strip whitespaces
+    var searchValue = this.DOMSearchField().value.replace(/ +/g, "");
+
+    if (searchValue != this.lastSearchValue) // search value has changed
+    {
+      if (searchValue != "") // non-empty search
+      {
+        // set timer for search update
+        this.keyTimeout = setTimeout(this.name + '.Search()',
+                                     this.keyTimeoutLength);
+      }
+      else // empty search field
+      {
+        this.DOMPopupSearchResultsWindow().style.display = 'none';
+        this.DOMSearchClose().style.display = 'none';
+        this.lastSearchValue = '';
+      }
+    }
+  }
+
+  this.SelectItemCount = function(id)
+  {
+    var count=0;
+    var win=this.DOMSearchSelectWindow(); 
+    for (i=0;i<win.childNodes.length;i++)
+    {
+      var child = win.childNodes[i]; // get span within a
+      if (child.className=='SelectItem')
+      {
+        count++;
+      }
+    }
+    return count;
+  }
+
+  this.SelectItemSet = function(id)
+  {
+    var i,j=0;
+    var win=this.DOMSearchSelectWindow(); 
+    for (i=0;i<win.childNodes.length;i++)
+    {
+      var child = win.childNodes[i]; // get span within a
+      if (child.className=='SelectItem')
+      {
+        var node = child.firstChild;
+        if (j==id)
+        {
+          node.innerHTML='&#8226;';
+        }
+        else
+        {
+          node.innerHTML='&#160;';
+        }
+        j++;
+      }
+    }
+  }
+
+  // Called when an search filter selection is made.
+  // set item with index id as the active item
+  this.OnSelectItem = function(id)
+  {
+    this.searchIndex = id;
+    this.SelectItemSet(id);
+    var searchValue = this.DOMSearchField().value.replace(/ +/g, "");
+    if (searchValue!="" && this.searchActive) // something was found -> do a search
+    {
+      this.Search();
+    }
+  }
+
+  this.OnSearchSelectKey = function(evt)
+  {
+    var e = (evt) ? evt : window.event; // for IE
+    if (e.keyCode==40 && this.searchIndex<this.SelectItemCount()) // Down
+    {
+      this.searchIndex++;
+      this.OnSelectItem(this.searchIndex);
+    }
+    else if (e.keyCode==38 && this.searchIndex>0) // Up
+    {
+      this.searchIndex--;
+      this.OnSelectItem(this.searchIndex);
+    }
+    else if (e.keyCode==13 || e.keyCode==27)
+    {
+      this.OnSelectItem(this.searchIndex);
+      this.CloseSelectionWindow();
+      this.DOMSearchField().focus();
+    }
+    return false;
+  }
+
+  // --------- Actions
+
+  // Closes the results window.
+  this.CloseResultsWindow = function()
+  {
+    this.DOMPopupSearchResultsWindow().style.display = 'none';
+    this.DOMSearchClose().style.display = 'none';
+    this.Activate(false);
+  }
+
+  this.CloseSelectionWindow = function()
+  {
+    this.DOMSearchSelectWindow().style.display = 'none';
+  }
+
+  // Performs a search.
+  this.Search = function()
+  {
+    this.keyTimeout = 0;
+
+    // strip leading whitespace
+    var searchValue = this.DOMSearchField().value.replace(/^ +/, "");
+
+    var code = searchValue.toLowerCase().charCodeAt(0);
+    var hexCode;
+    if (code<16) 
+    {
+      hexCode="0"+code.toString(16);
+    }
+    else 
+    {
+      hexCode=code.toString(16);
+    }
+
+    var resultsPage;
+    var resultsPageWithSearch;
+    var hasResultsPage;
+
+    if (indexSectionsWithContent[this.searchIndex].charAt(code) == '1')
+    {
+       resultsPage = this.resultsPath + '/' + indexSectionNames[this.searchIndex] + '_' + hexCode + '.html';
+       resultsPageWithSearch = resultsPage+'?'+escape(searchValue);
+       hasResultsPage = true;
+    }
+    else // nothing available for this search term
+    {
+       resultsPage = this.resultsPath + '/nomatches.html';
+       resultsPageWithSearch = resultsPage;
+       hasResultsPage = false;
+    }
+
+    window.frames.MSearchResults.location = resultsPageWithSearch;  
+    var domPopupSearchResultsWindow = this.DOMPopupSearchResultsWindow();
+
+    if (domPopupSearchResultsWindow.style.display!='block')
+    {
+       var domSearchBox = this.DOMSearchBox();
+       this.DOMSearchClose().style.display = 'inline';
+       if (this.insideFrame)
+       {
+         var domPopupSearchResults = this.DOMPopupSearchResults();
+         domPopupSearchResultsWindow.style.position = 'relative';
+         domPopupSearchResultsWindow.style.display  = 'block';
+         var width = document.body.clientWidth - 8; // the -8 is for IE :-(
+         domPopupSearchResultsWindow.style.width    = width + 'px';
+         domPopupSearchResults.style.width          = width + 'px';
+       }
+       else
+       {
+         var domPopupSearchResults = this.DOMPopupSearchResults();
+         var left = getXPos(domSearchBox) + 150; // domSearchBox.offsetWidth;
+         var top  = getYPos(domSearchBox) + 20;  // domSearchBox.offsetHeight + 1;
+         domPopupSearchResultsWindow.style.display = 'block';
+         left -= domPopupSearchResults.offsetWidth;
+         domPopupSearchResultsWindow.style.top     = top  + 'px';
+         domPopupSearchResultsWindow.style.left    = left + 'px';
+       }
+    }
+
+    this.lastSearchValue = searchValue;
+    this.lastResultsPage = resultsPage;
+  }
+
+  // -------- Activation Functions
+
+  // Activates or deactivates the search panel, resetting things to 
+  // their default values if necessary. 
+  this.Activate = function(isActive)
+  {
+    if (isActive || // open it
+        this.DOMPopupSearchResultsWindow().style.display == 'block' 
+       )
+    {
+      this.DOMSearchBox().className = 'MSearchBoxActive';
+
+      var searchField = this.DOMSearchField();
+
+      if (searchField.value == this.searchLabel) // clear "Search" term upon entry
+      {  
+        searchField.value = '';  
+        this.searchActive = true;
+      }
+    }
+    else if (!isActive) // directly remove the panel
+    {
+      this.DOMSearchBox().className = 'MSearchBoxInactive';
+      this.DOMSearchField().value   = this.searchLabel;
+      this.searchActive             = false;
+      this.lastSearchValue          = ''
+      this.lastResultsPage          = '';
+    }
+  }
+}
+
+// -----------------------------------------------------------------------
+
+// The class that handles everything on the search results page.
+function SearchResults(name)
+{
+    // The number of matches from the last run of <Search()>.
+    this.lastMatchCount = 0;
+    this.lastKey = 0;
+    this.repeatOn = false;
+
+    // Toggles the visibility of the passed element ID.
+    this.FindChildElement = function(id)
+    {
+      var parentElement = document.getElementById(id);
+      var element = parentElement.firstChild;
+
+      while (element && element!=parentElement)
+      {
+        if (element.nodeName == 'DIV' && element.className == 'SRChildren')
+        {
+          return element;
+        }
+
+        if (element.nodeName == 'DIV' && element.hasChildNodes())
+        {  
+           element = element.firstChild;  
+        }
+        else if (element.nextSibling)
+        {  
+           element = element.nextSibling;  
+        }
+        else
+        {
+          do
+          {
+            element = element.parentNode;
+          }
+          while (element && element!=parentElement && !element.nextSibling);
+
+          if (element && element!=parentElement)
+          {  
+            element = element.nextSibling;  
+          }
+        }
+      }
+    }
+
+    this.Toggle = function(id)
+    {
+      var element = this.FindChildElement(id);
+      if (element)
+      {
+        if (element.style.display == 'block')
+        {
+          element.style.display = 'none';
+        }
+        else
+        {
+          element.style.display = 'block';
+        }
+      }
+    }
+
+    // Searches for the passed string.  If there is no parameter,
+    // it takes it from the URL query.
+    //
+    // Always returns true, since other documents may try to call it
+    // and that may or may not be possible.
+    this.Search = function(search)
+    {
+      if (!search) // get search word from URL
+      {
+        search = window.location.search;
+        search = search.substring(1);  // Remove the leading '?'
+        search = unescape(search);
+      }
+
+      search = search.replace(/^ +/, ""); // strip leading spaces
+      search = search.replace(/ +$/, ""); // strip trailing spaces
+      search = search.toLowerCase();
+      search = convertToId(search);
+
+      var resultRows = document.getElementsByTagName("div");
+      var matches = 0;
+
+      var i = 0;
+      while (i < resultRows.length)
+      {
+        var row = resultRows.item(i);
+        if (row.className == "SRResult")
+        {
+          var rowMatchName = row.id.toLowerCase();
+          rowMatchName = rowMatchName.replace(/^sr\d*_/, ''); // strip 'sr123_'
+
+          if (search.length<=rowMatchName.length && 
+             rowMatchName.substr(0, search.length)==search)
+          {
+            row.style.display = 'block';
+            matches++;
+          }
+          else
+          {
+            row.style.display = 'none';
+          }
+        }
+        i++;
+      }
+      document.getElementById("Searching").style.display='none';
+      if (matches == 0) // no results
+      {
+        document.getElementById("NoMatches").style.display='block';
+      }
+      else // at least one result
+      {
+        document.getElementById("NoMatches").style.display='none';
+      }
+      this.lastMatchCount = matches;
+      return true;
+    }
+
+    // return the first item with index index or higher that is visible
+    this.NavNext = function(index)
+    {
+      var focusItem;
+      while (1)
+      {
+        var focusName = 'Item'+index;
+        focusItem = document.getElementById(focusName);
+        if (focusItem && focusItem.parentNode.parentNode.style.display=='block')
+        {
+          break;
+        }
+        else if (!focusItem) // last element
+        {
+          break;
+        }
+        focusItem=null;
+        index++;
+      }
+      return focusItem;
+    }
+
+    this.NavPrev = function(index)
+    {
+      var focusItem;
+      while (1)
+      {
+        var focusName = 'Item'+index;
+        focusItem = document.getElementById(focusName);
+        if (focusItem && focusItem.parentNode.parentNode.style.display=='block')
+        {
+          break;
+        }
+        else if (!focusItem) // last element
+        {
+          break;
+        }
+        focusItem=null;
+        index--;
+      }
+      return focusItem;
+    }
+
+    this.ProcessKeys = function(e)
+    {
+      if (e.type == "keydown") 
+      {
+        this.repeatOn = false;
+        this.lastKey = e.keyCode;
+      }
+      else if (e.type == "keypress")
+      {
+        if (!this.repeatOn)
+        {
+          if (this.lastKey) this.repeatOn = true;
+          return false; // ignore first keypress after keydown
+        }
+      }
+      else if (e.type == "keyup")
+      {
+        this.lastKey = 0;
+        this.repeatOn = false;
+      }
+      return this.lastKey!=0;
+    }
+
+    this.Nav = function(evt,itemIndex) 
+    {
+      var e  = (evt) ? evt : window.event; // for IE
+      if (e.keyCode==13) return true;
+      if (!this.ProcessKeys(e)) return false;
+
+      if (this.lastKey==38) // Up
+      {
+        var newIndex = itemIndex-1;
+        var focusItem = this.NavPrev(newIndex);
+        if (focusItem)
+        {
+          var child = this.FindChildElement(focusItem.parentNode.parentNode.id);
+          if (child && child.style.display == 'block') // children visible
+          { 
+            var n=0;
+            var tmpElem;
+            while (1) // search for last child
+            {
+              tmpElem = document.getElementById('Item'+newIndex+'_c'+n);
+              if (tmpElem)
+              {
+                focusItem = tmpElem;
+              }
+              else // found it!
+              {
+                break;
+              }
+              n++;
+            }
+          }
+        }
+        if (focusItem)
+        {
+          focusItem.focus();
+        }
+        else // return focus to search field
+        {
+           parent.document.getElementById("MSearchField").focus();
+        }
+      }
+      else if (this.lastKey==40) // Down
+      {
+        var newIndex = itemIndex+1;
+        var focusItem;
+        var item = document.getElementById('Item'+itemIndex);
+        var elem = this.FindChildElement(item.parentNode.parentNode.id);
+        if (elem && elem.style.display == 'block') // children visible
+        {
+          focusItem = document.getElementById('Item'+itemIndex+'_c0');
+        }
+        if (!focusItem) focusItem = this.NavNext(newIndex);
+        if (focusItem)  focusItem.focus();
+      }
+      else if (this.lastKey==39) // Right
+      {
+        var item = document.getElementById('Item'+itemIndex);
+        var elem = this.FindChildElement(item.parentNode.parentNode.id);
+        if (elem) elem.style.display = 'block';
+      }
+      else if (this.lastKey==37) // Left
+      {
+        var item = document.getElementById('Item'+itemIndex);
+        var elem = this.FindChildElement(item.parentNode.parentNode.id);
+        if (elem) elem.style.display = 'none';
+      }
+      else if (this.lastKey==27) // Escape
+      {
+        parent.searchBox.CloseResultsWindow();
+        parent.document.getElementById("MSearchField").focus();
+      }
+      else if (this.lastKey==13) // Enter
+      {
+        return true;
+      }
+      return false;
+    }
+
+    this.NavChild = function(evt,itemIndex,childIndex)
+    {
+      var e  = (evt) ? evt : window.event; // for IE
+      if (e.keyCode==13) return true;
+      if (!this.ProcessKeys(e)) return false;
+
+      if (this.lastKey==38) // Up
+      {
+        if (childIndex>0)
+        {
+          var newIndex = childIndex-1;
+          document.getElementById('Item'+itemIndex+'_c'+newIndex).focus();
+        }
+        else // already at first child, jump to parent
+        {
+          document.getElementById('Item'+itemIndex).focus();
+        }
+      }
+      else if (this.lastKey==40) // Down
+      {
+        var newIndex = childIndex+1;
+        var elem = document.getElementById('Item'+itemIndex+'_c'+newIndex);
+        if (!elem) // last child, jump to parent next parent
+        {
+          elem = this.NavNext(itemIndex+1);
+        }
+        if (elem)
+        {
+          elem.focus();
+        } 
+      }
+      else if (this.lastKey==27) // Escape
+      {
+        parent.searchBox.CloseResultsWindow();
+        parent.document.getElementById("MSearchField").focus();
+      }
+      else if (this.lastKey==13) // Enter
+      {
+        return true;
+      }
+      return false;
+    }
+}
+
+function setKeyActions(elem,action)
+{
+  elem.setAttribute('onkeydown',action);
+  elem.setAttribute('onkeypress',action);
+  elem.setAttribute('onkeyup',action);
+}
+
+function setClassAttr(elem,attr)
+{
+  elem.setAttribute('class',attr);
+  elem.setAttribute('className',attr);
+}
+
+function createResults()
+{
+  var results = document.getElementById("SRResults");
+  for (var e=0; e<searchData.length; e++)
+  {
+    var id = searchData[e][0];
+    var srResult = document.createElement('div');
+    srResult.setAttribute('id','SR_'+id);
+    setClassAttr(srResult,'SRResult');
+    var srEntry = document.createElement('div');
+    setClassAttr(srEntry,'SREntry');
+    var srLink = document.createElement('a');
+    srLink.setAttribute('id','Item'+e);
+    setKeyActions(srLink,'return searchResults.Nav(event,'+e+')');
+    setClassAttr(srLink,'SRSymbol');
+    srLink.innerHTML = searchData[e][1][0];
+    srEntry.appendChild(srLink);
+    if (searchData[e][1].length==2) // single result
+    {
+      srLink.setAttribute('href',searchData[e][1][1][0]);
+      if (searchData[e][1][1][1])
+      {
+       srLink.setAttribute('target','_parent');
+      }
+      var srScope = document.createElement('span');
+      setClassAttr(srScope,'SRScope');
+      srScope.innerHTML = searchData[e][1][1][2];
+      srEntry.appendChild(srScope);
+    }
+    else // multiple results
+    {
+      srLink.setAttribute('href','javascript:searchResults.Toggle("SR_'+id+'")');
+      var srChildren = document.createElement('div');
+      setClassAttr(srChildren,'SRChildren');
+      for (var c=0; c<searchData[e][1].length-1; c++)
+      {
+        var srChild = document.createElement('a');
+        srChild.setAttribute('id','Item'+e+'_c'+c);
+        setKeyActions(srChild,'return searchResults.NavChild(event,'+e+','+c+')');
+        setClassAttr(srChild,'SRScope');
+        srChild.setAttribute('href',searchData[e][1][c+1][0]);
+        if (searchData[e][1][c+1][1])
+        {
+         srChild.setAttribute('target','_parent');
+        }
+        srChild.innerHTML = searchData[e][1][c+1][2];
+        srChildren.appendChild(srChild);
+      }
+      srEntry.appendChild(srChildren);
+    }
+    srResult.appendChild(srEntry);
+    results.appendChild(srResult);
+  }
+}
+
diff --git a/doc/html/search/search_l.png b/doc/html/search/search_l.png
new file mode 100644
index 0000000..c872f4d
--- /dev/null
+++ b/doc/html/search/search_l.png
Binary files differ
diff --git a/doc/html/search/search_m.png b/doc/html/search/search_m.png
new file mode 100644
index 0000000..b429a16
--- /dev/null
+++ b/doc/html/search/search_m.png
Binary files differ
diff --git a/doc/html/search/search_r.png b/doc/html/search/search_r.png
new file mode 100644
index 0000000..97ee8b4
--- /dev/null
+++ b/doc/html/search/search_r.png
Binary files differ
diff --git a/doc/html/search/typedefs_74.html b/doc/html/search/typedefs_74.html
new file mode 100644
index 0000000..b2f6d2a
--- /dev/null
+++ b/doc/html/search/typedefs_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="typedefs_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/typedefs_74.js b/doc/html/search/typedefs_74.js
new file mode 100644
index 0000000..85b00f5
--- /dev/null
+++ b/doc/html/search/typedefs_74.js
@@ -0,0 +1,5 @@
+var searchData=
+[
+  ['tjhandle',['tjhandle',['../group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763',1,'turbojpeg.h']]],
+  ['tjtransform',['tjtransform',['../group___turbo_j_p_e_g.html#gaa29f3189c41be12ec5dee7caec318a31',1,'turbojpeg.h']]]
+];
diff --git a/doc/html/search/variables_63.html b/doc/html/search/variables_63.html
new file mode 100644
index 0000000..422085c
--- /dev/null
+++ b/doc/html/search/variables_63.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_63.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/variables_63.js b/doc/html/search/variables_63.js
new file mode 100644
index 0000000..7b058da
--- /dev/null
+++ b/doc/html/search/variables_63.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['customfilter',['customFilter',['../structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1',1,'tjtransform']]]
+];
diff --git a/doc/html/search/variables_64.html b/doc/html/search/variables_64.html
new file mode 100644
index 0000000..df4414b
--- /dev/null
+++ b/doc/html/search/variables_64.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_64.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/variables_64.js b/doc/html/search/variables_64.js
new file mode 100644
index 0000000..e19a050
--- /dev/null
+++ b/doc/html/search/variables_64.js
@@ -0,0 +1,5 @@
+var searchData=
+[
+  ['data',['data',['../structtjtransform.html#a688fe8f1a8ecc12a538d9e561cf338e3',1,'tjtransform']]],
+  ['denom',['denom',['../structtjscalingfactor.html#aefbcdf3e9e62274b2d312c695f133ce3',1,'tjscalingfactor']]]
+];
diff --git a/doc/html/search/variables_68.html b/doc/html/search/variables_68.html
new file mode 100644
index 0000000..2f0a862
--- /dev/null
+++ b/doc/html/search/variables_68.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_68.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/variables_68.js b/doc/html/search/variables_68.js
new file mode 100644
index 0000000..7b17e97
--- /dev/null
+++ b/doc/html/search/variables_68.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['h',['h',['../structtjregion.html#aecefc45a26f4d8b60dd4d825c1710115',1,'tjregion']]]
+];
diff --git a/doc/html/search/variables_6e.html b/doc/html/search/variables_6e.html
new file mode 100644
index 0000000..2eb4def
--- /dev/null
+++ b/doc/html/search/variables_6e.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_6e.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/variables_6e.js b/doc/html/search/variables_6e.js
new file mode 100644
index 0000000..83faa13
--- /dev/null
+++ b/doc/html/search/variables_6e.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['num',['num',['../structtjscalingfactor.html#a9b011e57f981ee23083e2c1aa5e640ec',1,'tjscalingfactor']]]
+];
diff --git a/doc/html/search/variables_6f.html b/doc/html/search/variables_6f.html
new file mode 100644
index 0000000..f06e2e0
--- /dev/null
+++ b/doc/html/search/variables_6f.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_6f.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/variables_6f.js b/doc/html/search/variables_6f.js
new file mode 100644
index 0000000..1cca832
--- /dev/null
+++ b/doc/html/search/variables_6f.js
@@ -0,0 +1,5 @@
+var searchData=
+[
+  ['op',['op',['../structtjtransform.html#a2525aab4ba6978a1c273f74fef50e498',1,'tjtransform']]],
+  ['options',['options',['../structtjtransform.html#ac0e74655baa4402209a21e1ae481c8f6',1,'tjtransform']]]
+];
diff --git a/doc/html/search/variables_72.html b/doc/html/search/variables_72.html
new file mode 100644
index 0000000..8a4ee7b
--- /dev/null
+++ b/doc/html/search/variables_72.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_72.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/variables_72.js b/doc/html/search/variables_72.js
new file mode 100644
index 0000000..01cde35
--- /dev/null
+++ b/doc/html/search/variables_72.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['r',['r',['../structtjtransform.html#ac324e5e442abec8a961e5bf219db12cf',1,'tjtransform']]]
+];
diff --git a/doc/html/search/variables_74.html b/doc/html/search/variables_74.html
new file mode 100644
index 0000000..1665fb8
--- /dev/null
+++ b/doc/html/search/variables_74.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_74.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/variables_74.js b/doc/html/search/variables_74.js
new file mode 100644
index 0000000..13a056e
--- /dev/null
+++ b/doc/html/search/variables_74.js
@@ -0,0 +1,9 @@
+var searchData=
+[
+  ['tjblueoffset',['tjBlueOffset',['../group___turbo_j_p_e_g.html#ga84e2e35d3f08025f976ec1ec53693dea',1,'turbojpeg.h']]],
+  ['tjgreenoffset',['tjGreenOffset',['../group___turbo_j_p_e_g.html#ga82d6e35da441112a411da41923c0ba2f',1,'turbojpeg.h']]],
+  ['tjmcuheight',['tjMCUHeight',['../group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf',1,'turbojpeg.h']]],
+  ['tjmcuwidth',['tjMCUWidth',['../group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c',1,'turbojpeg.h']]],
+  ['tjpixelsize',['tjPixelSize',['../group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c',1,'turbojpeg.h']]],
+  ['tjredoffset',['tjRedOffset',['../group___turbo_j_p_e_g.html#gadd9b446742ac8a3923f7992c7988fea8',1,'turbojpeg.h']]]
+];
diff --git a/doc/html/search/variables_77.html b/doc/html/search/variables_77.html
new file mode 100644
index 0000000..434c6df
--- /dev/null
+++ b/doc/html/search/variables_77.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_77.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/variables_77.js b/doc/html/search/variables_77.js
new file mode 100644
index 0000000..4267002
--- /dev/null
+++ b/doc/html/search/variables_77.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['w',['w',['../structtjregion.html#ab6eb73ceef584fc23c8c8097926dce42',1,'tjregion']]]
+];
diff --git a/doc/html/search/variables_78.html b/doc/html/search/variables_78.html
new file mode 100644
index 0000000..602e879
--- /dev/null
+++ b/doc/html/search/variables_78.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_78.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/variables_78.js b/doc/html/search/variables_78.js
new file mode 100644
index 0000000..41a27f2
--- /dev/null
+++ b/doc/html/search/variables_78.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['x',['x',['../structtjregion.html#a4b6a37a93997091b26a75831fa291ad9',1,'tjregion']]]
+];
diff --git a/doc/html/search/variables_79.html b/doc/html/search/variables_79.html
new file mode 100644
index 0000000..17faef9
--- /dev/null
+++ b/doc/html/search/variables_79.html
@@ -0,0 +1,26 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html><head><title></title>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta name="generator" content="Doxygen 1.8.3.1">
+<link rel="stylesheet" type="text/css" href="search.css"/>
+<script type="text/javascript" src="variables_79.js"></script>
+<script type="text/javascript" src="search.js"></script>
+</head>
+<body class="SRPage">
+<div id="SRIndex">
+<div class="SRStatus" id="Loading">Loading...</div>
+<div id="SRResults"></div>
+<script type="text/javascript"><!--
+createResults();
+--></script>
+<div class="SRStatus" id="Searching">Searching...</div>
+<div class="SRStatus" id="NoMatches">No Matches</div>
+<script type="text/javascript"><!--
+document.getElementById("Loading").style.display="none";
+document.getElementById("NoMatches").style.display="none";
+var searchResults = new SearchResults("searchResults");
+searchResults.Search();
+--></script>
+</div>
+</body>
+</html>
diff --git a/doc/html/search/variables_79.js b/doc/html/search/variables_79.js
new file mode 100644
index 0000000..86890a6
--- /dev/null
+++ b/doc/html/search/variables_79.js
@@ -0,0 +1,4 @@
+var searchData=
+[
+  ['y',['y',['../structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2',1,'tjregion']]]
+];
diff --git a/doc/html/structtjregion.html b/doc/html/structtjregion.html
new file mode 100644
index 0000000..515686c
--- /dev/null
+++ b/doc/html/structtjregion.html
@@ -0,0 +1,185 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
+<title>TurboJPEG: tjregion Struct Reference</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.4</span>
+   </div>
+  </td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+  <div id="navrow1" class="tabs">
+    <ul class="tablist">
+      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
+      <li><a href="modules.html"><span>Modules</span></a></li>
+      <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li>
+        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+      </li>
+    </ul>
+  </div>
+  <div id="navrow2" class="tabs2">
+    <ul class="tablist">
+      <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li><a href="classes.html"><span>Data&#160;Structure&#160;Index</span></a></li>
+      <li><a href="functions.html"><span>Data&#160;Fields</span></a></li>
+    </ul>
+  </div>
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+</div><!-- top -->
+<div class="header">
+  <div class="summary">
+<a href="#pub-attribs">Data Fields</a>  </div>
+  <div class="headertitle">
+<div class="title">tjregion Struct Reference<div class="ingroups"><a class="el" href="group___turbo_j_p_e_g.html">TurboJPEG</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+
+<p>Cropping region.  
+ <a href="structtjregion.html#details">More...</a></p>
+
+<p><code>#include &lt;turbojpeg.h&gt;</code></p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-attribs"></a>
+Data Fields</h2></td></tr>
+<tr class="memitem:a4b6a37a93997091b26a75831fa291ad9"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html#a4b6a37a93997091b26a75831fa291ad9">x</a></td></tr>
+<tr class="memdesc:a4b6a37a93997091b26a75831fa291ad9"><td class="mdescLeft">&#160;</td><td class="mdescRight">The left boundary of the cropping region.  <a href="#a4b6a37a93997091b26a75831fa291ad9">More...</a><br/></td></tr>
+<tr class="separator:a4b6a37a93997091b26a75831fa291ad9"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a7b3e0c24cfe87acc80e334cafdcf22c2"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html#a7b3e0c24cfe87acc80e334cafdcf22c2">y</a></td></tr>
+<tr class="memdesc:a7b3e0c24cfe87acc80e334cafdcf22c2"><td class="mdescLeft">&#160;</td><td class="mdescRight">The upper boundary of the cropping region.  <a href="#a7b3e0c24cfe87acc80e334cafdcf22c2">More...</a><br/></td></tr>
+<tr class="separator:a7b3e0c24cfe87acc80e334cafdcf22c2"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ab6eb73ceef584fc23c8c8097926dce42"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html#ab6eb73ceef584fc23c8c8097926dce42">w</a></td></tr>
+<tr class="memdesc:ab6eb73ceef584fc23c8c8097926dce42"><td class="mdescLeft">&#160;</td><td class="mdescRight">The width of the cropping region.  <a href="#ab6eb73ceef584fc23c8c8097926dce42">More...</a><br/></td></tr>
+<tr class="separator:ab6eb73ceef584fc23c8c8097926dce42"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:aecefc45a26f4d8b60dd4d825c1710115"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjregion.html#aecefc45a26f4d8b60dd4d825c1710115">h</a></td></tr>
+<tr class="memdesc:aecefc45a26f4d8b60dd4d825c1710115"><td class="mdescLeft">&#160;</td><td class="mdescRight">The height of the cropping region.  <a href="#aecefc45a26f4d8b60dd4d825c1710115">More...</a><br/></td></tr>
+<tr class="separator:aecefc45a26f4d8b60dd4d825c1710115"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<div class="textblock"><p>Cropping region. </p>
+</div><h2 class="groupheader">Field Documentation</h2>
+<a class="anchor" id="aecefc45a26f4d8b60dd4d825c1710115"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int tjregion::h</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The height of the cropping region. </p>
+<p>Setting this to 0 is the equivalent of setting it to the height of the source JPEG image - y. </p>
+
+</div>
+</div>
+<a class="anchor" id="ab6eb73ceef584fc23c8c8097926dce42"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int tjregion::w</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The width of the cropping region. </p>
+<p>Setting this to 0 is the equivalent of setting it to the width of the source JPEG image - x. </p>
+
+</div>
+</div>
+<a class="anchor" id="a4b6a37a93997091b26a75831fa291ad9"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int tjregion::x</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The left boundary of the cropping region. </p>
+<p>This must be evenly divisible by the MCU block width (see <a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c" title="MCU block width (in pixels) for a given level of chrominance subsampling.">tjMCUWidth</a>.) </p>
+
+</div>
+</div>
+<a class="anchor" id="a7b3e0c24cfe87acc80e334cafdcf22c2"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int tjregion::y</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The upper boundary of the cropping region. </p>
+<p>This must be evenly divisible by the MCU block height (see <a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf" title="MCU block height (in pixels) for a given level of chrominance subsampling.">tjMCUHeight</a>.) </p>
+
+</div>
+</div>
+<hr/>The documentation for this struct was generated from the following file:<ul>
+<li>turbojpeg.h</li>
+</ul>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
+</body>
+</html>
diff --git a/doc/html/structtjscalingfactor.html b/doc/html/structtjscalingfactor.html
new file mode 100644
index 0000000..f34e150
--- /dev/null
+++ b/doc/html/structtjscalingfactor.html
@@ -0,0 +1,147 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
+<title>TurboJPEG: tjscalingfactor Struct Reference</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.4</span>
+   </div>
+  </td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+  <div id="navrow1" class="tabs">
+    <ul class="tablist">
+      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
+      <li><a href="modules.html"><span>Modules</span></a></li>
+      <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li>
+        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+      </li>
+    </ul>
+  </div>
+  <div id="navrow2" class="tabs2">
+    <ul class="tablist">
+      <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li><a href="classes.html"><span>Data&#160;Structure&#160;Index</span></a></li>
+      <li><a href="functions.html"><span>Data&#160;Fields</span></a></li>
+    </ul>
+  </div>
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+</div><!-- top -->
+<div class="header">
+  <div class="summary">
+<a href="#pub-attribs">Data Fields</a>  </div>
+  <div class="headertitle">
+<div class="title">tjscalingfactor Struct Reference<div class="ingroups"><a class="el" href="group___turbo_j_p_e_g.html">TurboJPEG</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+
+<p>Scaling factor.  
+ <a href="structtjscalingfactor.html#details">More...</a></p>
+
+<p><code>#include &lt;turbojpeg.h&gt;</code></p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-attribs"></a>
+Data Fields</h2></td></tr>
+<tr class="memitem:a9b011e57f981ee23083e2c1aa5e640ec"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjscalingfactor.html#a9b011e57f981ee23083e2c1aa5e640ec">num</a></td></tr>
+<tr class="memdesc:a9b011e57f981ee23083e2c1aa5e640ec"><td class="mdescLeft">&#160;</td><td class="mdescRight">Numerator.  <a href="#a9b011e57f981ee23083e2c1aa5e640ec">More...</a><br/></td></tr>
+<tr class="separator:a9b011e57f981ee23083e2c1aa5e640ec"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:aefbcdf3e9e62274b2d312c695f133ce3"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjscalingfactor.html#aefbcdf3e9e62274b2d312c695f133ce3">denom</a></td></tr>
+<tr class="memdesc:aefbcdf3e9e62274b2d312c695f133ce3"><td class="mdescLeft">&#160;</td><td class="mdescRight">Denominator.  <a href="#aefbcdf3e9e62274b2d312c695f133ce3">More...</a><br/></td></tr>
+<tr class="separator:aefbcdf3e9e62274b2d312c695f133ce3"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<div class="textblock"><p>Scaling factor. </p>
+</div><h2 class="groupheader">Field Documentation</h2>
+<a class="anchor" id="aefbcdf3e9e62274b2d312c695f133ce3"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int tjscalingfactor::denom</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Denominator. </p>
+
+</div>
+</div>
+<a class="anchor" id="a9b011e57f981ee23083e2c1aa5e640ec"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int tjscalingfactor::num</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Numerator. </p>
+
+</div>
+</div>
+<hr/>The documentation for this struct was generated from the following file:<ul>
+<li>turbojpeg.h</li>
+</ul>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
+</body>
+</html>
diff --git a/doc/html/structtjtransform.html b/doc/html/structtjtransform.html
new file mode 100644
index 0000000..ef2c8d5
--- /dev/null
+++ b/doc/html/structtjtransform.html
@@ -0,0 +1,211 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen 1.8.3.1"/>
+<title>TurboJPEG: tjtransform Struct Reference</title>
+<link href="tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="jquery.js"></script>
+<script type="text/javascript" src="dynsections.js"></script>
+<link href="search/search.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="search/search.js"></script>
+<script type="text/javascript">
+  $(document).ready(function() { searchBox.OnSelectItem(0); });
+</script>
+<link href="doxygen.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <td style="padding-left: 0.5em;">
+   <div id="projectname">TurboJPEG
+   &#160;<span id="projectnumber">1.4</span>
+   </div>
+  </td>
+ </tr>
+ </tbody>
+</table>
+</div>
+<!-- end header part -->
+<!-- Generated by Doxygen 1.8.3.1 -->
+<script type="text/javascript">
+var searchBox = new SearchBox("searchBox", "search",false,'Search');
+</script>
+  <div id="navrow1" class="tabs">
+    <ul class="tablist">
+      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
+      <li><a href="modules.html"><span>Modules</span></a></li>
+      <li class="current"><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li>
+        <div id="MSearchBox" class="MSearchBoxInactive">
+        <span class="left">
+          <img id="MSearchSelect" src="search/mag_sel.png"
+               onmouseover="return searchBox.OnSearchSelectShow()"
+               onmouseout="return searchBox.OnSearchSelectHide()"
+               alt=""/>
+          <input type="text" id="MSearchField" value="Search" accesskey="S"
+               onfocus="searchBox.OnSearchFieldFocus(true)" 
+               onblur="searchBox.OnSearchFieldFocus(false)" 
+               onkeyup="searchBox.OnSearchFieldChange(event)"/>
+          </span><span class="right">
+            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
+          </span>
+        </div>
+      </li>
+    </ul>
+  </div>
+  <div id="navrow2" class="tabs2">
+    <ul class="tablist">
+      <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
+      <li><a href="classes.html"><span>Data&#160;Structure&#160;Index</span></a></li>
+      <li><a href="functions.html"><span>Data&#160;Fields</span></a></li>
+    </ul>
+  </div>
+<!-- window showing the filter options -->
+<div id="MSearchSelectWindow"
+     onmouseover="return searchBox.OnSearchSelectShow()"
+     onmouseout="return searchBox.OnSearchSelectHide()"
+     onkeydown="return searchBox.OnSearchSelectKey(event)">
+<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Data Structures</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Groups</a></div>
+
+<!-- iframe showing the search results (closed by default) -->
+<div id="MSearchResultsWindow">
+<iframe src="javascript:void(0)" frameborder="0" 
+        name="MSearchResults" id="MSearchResults">
+</iframe>
+</div>
+
+</div><!-- top -->
+<div class="header">
+  <div class="summary">
+<a href="#pub-attribs">Data Fields</a>  </div>
+  <div class="headertitle">
+<div class="title">tjtransform Struct Reference<div class="ingroups"><a class="el" href="group___turbo_j_p_e_g.html">TurboJPEG</a></div></div>  </div>
+</div><!--header-->
+<div class="contents">
+
+<p>Lossless transform.  
+ <a href="structtjtransform.html#details">More...</a></p>
+
+<p><code>#include &lt;turbojpeg.h&gt;</code></p>
+<table class="memberdecls">
+<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-attribs"></a>
+Data Fields</h2></td></tr>
+<tr class="memitem:ac324e5e442abec8a961e5bf219db12cf"><td class="memItemLeft" align="right" valign="top"><a class="el" href="structtjregion.html">tjregion</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#ac324e5e442abec8a961e5bf219db12cf">r</a></td></tr>
+<tr class="memdesc:ac324e5e442abec8a961e5bf219db12cf"><td class="mdescLeft">&#160;</td><td class="mdescRight">Cropping region.  <a href="#ac324e5e442abec8a961e5bf219db12cf">More...</a><br/></td></tr>
+<tr class="separator:ac324e5e442abec8a961e5bf219db12cf"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a2525aab4ba6978a1c273f74fef50e498"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#a2525aab4ba6978a1c273f74fef50e498">op</a></td></tr>
+<tr class="memdesc:a2525aab4ba6978a1c273f74fef50e498"><td class="mdescLeft">&#160;</td><td class="mdescRight">One of the <a class="el" href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">transform operations</a>.  <a href="#a2525aab4ba6978a1c273f74fef50e498">More...</a><br/></td></tr>
+<tr class="separator:a2525aab4ba6978a1c273f74fef50e498"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ac0e74655baa4402209a21e1ae481c8f6"><td class="memItemLeft" align="right" valign="top">int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#ac0e74655baa4402209a21e1ae481c8f6">options</a></td></tr>
+<tr class="memdesc:ac0e74655baa4402209a21e1ae481c8f6"><td class="mdescLeft">&#160;</td><td class="mdescRight">The bitwise OR of one of more of the <a class="el" href="group___turbo_j_p_e_g.html#ga9c771a757fc1294add611906b89ab2d2">transform options</a>.  <a href="#ac0e74655baa4402209a21e1ae481c8f6">More...</a><br/></td></tr>
+<tr class="separator:ac0e74655baa4402209a21e1ae481c8f6"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a688fe8f1a8ecc12a538d9e561cf338e3"><td class="memItemLeft" align="right" valign="top">void *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#a688fe8f1a8ecc12a538d9e561cf338e3">data</a></td></tr>
+<tr class="memdesc:a688fe8f1a8ecc12a538d9e561cf338e3"><td class="mdescLeft">&#160;</td><td class="mdescRight">Arbitrary data that can be accessed within the body of the callback function.  <a href="#a688fe8f1a8ecc12a538d9e561cf338e3">More...</a><br/></td></tr>
+<tr class="separator:a688fe8f1a8ecc12a538d9e561cf338e3"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a43ee1bcdd2a8d7249a756774f78793c1"><td class="memItemLeft" align="right" valign="top">int(*&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1">customFilter</a> )(short *coeffs, <a class="el" href="structtjregion.html">tjregion</a> arrayRegion, <a class="el" href="structtjregion.html">tjregion</a> planeRegion, int componentIndex, int transformIndex, struct <a class="el" href="structtjtransform.html">tjtransform</a> *transform)</td></tr>
+<tr class="memdesc:a43ee1bcdd2a8d7249a756774f78793c1"><td class="mdescLeft">&#160;</td><td class="mdescRight">A callback function that can be used to modify the DCT coefficients after they are losslessly transformed but before they are transcoded to a new JPEG image.  <a href="#a43ee1bcdd2a8d7249a756774f78793c1">More...</a><br/></td></tr>
+<tr class="separator:a43ee1bcdd2a8d7249a756774f78793c1"><td class="memSeparator" colspan="2">&#160;</td></tr>
+</table>
+<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
+<div class="textblock"><p>Lossless transform. </p>
+</div><h2 class="groupheader">Field Documentation</h2>
+<a class="anchor" id="a43ee1bcdd2a8d7249a756774f78793c1"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int(* tjtransform::customFilter)(short *coeffs, <a class="el" href="structtjregion.html">tjregion</a> arrayRegion, <a class="el" href="structtjregion.html">tjregion</a> planeRegion, int componentIndex, int transformIndex, struct <a class="el" href="structtjtransform.html">tjtransform</a> *transform)</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>A callback function that can be used to modify the DCT coefficients after they are losslessly transformed but before they are transcoded to a new JPEG image. </p>
+<p>This allows for custom filters or other transformations to be applied in the frequency domain.</p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">coeffs</td><td>pointer to an array of transformed DCT coefficients. (NOTE: this pointer is not guaranteed to be valid once the callback returns, so applications wishing to hand off the DCT coefficients to another function or library should make a copy of them within the body of the callback.) </td></tr>
+    <tr><td class="paramname">arrayRegion</td><td><a class="el" href="structtjregion.html" title="Cropping region.">tjregion</a> structure containing the width and height of the array pointed to by <code>coeffs</code> as well as its offset relative to the component plane. TurboJPEG implementations may choose to split each component plane into multiple DCT coefficient arrays and call the callback function once for each array. </td></tr>
+    <tr><td class="paramname">planeRegion</td><td><a class="el" href="structtjregion.html" title="Cropping region.">tjregion</a> structure containing the width and height of the component plane to which <code>coeffs</code> belongs </td></tr>
+    <tr><td class="paramname">componentID</td><td>ID number of the component plane to which <code>coeffs</code> belongs (Y, Cb, and Cr have, respectively, ID's of 0, 1, and 2 in typical JPEG images.) </td></tr>
+    <tr><td class="paramname">transformID</td><td>ID number of the transformed image to which <code>coeffs</code> belongs. This is the same as the index of the transform in the <code>transforms</code> array that was passed to <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a>. </td></tr>
+    <tr><td class="paramname">transform</td><td>a pointer to a <a class="el" href="structtjtransform.html" title="Lossless transform.">tjtransform</a> structure that specifies the parameters and/or cropping region for this transform</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if the callback was successful, or -1 if an error occurred. </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="a688fe8f1a8ecc12a538d9e561cf338e3"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">void* tjtransform::data</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Arbitrary data that can be accessed within the body of the callback function. </p>
+
+</div>
+</div>
+<a class="anchor" id="a2525aab4ba6978a1c273f74fef50e498"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int tjtransform::op</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>One of the <a class="el" href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">transform operations</a>. </p>
+
+</div>
+</div>
+<a class="anchor" id="ac0e74655baa4402209a21e1ae481c8f6"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">int tjtransform::options</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>The bitwise OR of one of more of the <a class="el" href="group___turbo_j_p_e_g.html#ga9c771a757fc1294add611906b89ab2d2">transform options</a>. </p>
+
+</div>
+</div>
+<a class="anchor" id="ac324e5e442abec8a961e5bf219db12cf"></a>
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname"><a class="el" href="structtjregion.html">tjregion</a> tjtransform::r</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Cropping region. </p>
+
+</div>
+</div>
+<hr/>The documentation for this struct was generated from the following file:<ul>
+<li>turbojpeg.h</li>
+</ul>
+</div><!-- contents -->
+<!-- start footer part -->
+<hr class="footer"/><address class="footer"><small>
+Generated by &#160;<a href="http://www.doxygen.org/index.html">
+<img class="footer" src="doxygen.png" alt="doxygen"/>
+</a> 1.8.3.1
+</small></address>
+</body>
+</html>
diff --git a/doc/html/sync_off.png b/doc/html/sync_off.png
new file mode 100644
index 0000000..3b443fc
--- /dev/null
+++ b/doc/html/sync_off.png
Binary files differ
diff --git a/doc/html/sync_on.png b/doc/html/sync_on.png
new file mode 100644
index 0000000..e08320f
--- /dev/null
+++ b/doc/html/sync_on.png
Binary files differ
diff --git a/doc/html/tab_a.png b/doc/html/tab_a.png
new file mode 100644
index 0000000..3b725c4
--- /dev/null
+++ b/doc/html/tab_a.png
Binary files differ
diff --git a/doc/html/tab_b.png b/doc/html/tab_b.png
new file mode 100644
index 0000000..e2b4a86
--- /dev/null
+++ b/doc/html/tab_b.png
Binary files differ
diff --git a/doc/html/tab_h.png b/doc/html/tab_h.png
new file mode 100644
index 0000000..fd5cb70
--- /dev/null
+++ b/doc/html/tab_h.png
Binary files differ
diff --git a/doc/html/tab_s.png b/doc/html/tab_s.png
new file mode 100644
index 0000000..ab478c9
--- /dev/null
+++ b/doc/html/tab_s.png
Binary files differ
diff --git a/doc/html/tabs.css b/doc/html/tabs.css
new file mode 100644
index 0000000..9cf578f
--- /dev/null
+++ b/doc/html/tabs.css
@@ -0,0 +1,60 @@
+.tabs, .tabs2, .tabs3 {
+    background-image: url('tab_b.png');
+    width: 100%;
+    z-index: 101;
+    font-size: 13px;
+    font-family: 'Lucida Grande',Geneva,Helvetica,Arial,sans-serif;
+}
+
+.tabs2 {
+    font-size: 10px;
+}
+.tabs3 {
+    font-size: 9px;
+}
+
+.tablist {
+    margin: 0;
+    padding: 0;
+    display: table;
+}
+
+.tablist li {
+    float: left;
+    display: table-cell;
+    background-image: url('tab_b.png');
+    line-height: 36px;
+    list-style: none;
+}
+
+.tablist a {
+    display: block;
+    padding: 0 20px;
+    font-weight: bold;
+    background-image:url('tab_s.png');
+    background-repeat:no-repeat;
+    background-position:right;
+    color: #283A5D;
+    text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9);
+    text-decoration: none;
+    outline: none;
+}
+
+.tabs3 .tablist a {
+    padding: 0 10px;
+}
+
+.tablist a:hover {
+    background-image: url('tab_h.png');
+    background-repeat:repeat-x;
+    color: #fff;
+    text-shadow: 0px 1px 1px rgba(0, 0, 0, 1.0);
+    text-decoration: none;
+}
+
+.tablist li.current a {
+    background-image: url('tab_a.png');
+    background-repeat:repeat-x;
+    color: #fff;
+    text-shadow: 0px 1px 1px rgba(0, 0, 0, 1.0);
+}
diff --git a/doxygen.config b/doxygen.config
new file mode 100644
index 0000000..9680175
--- /dev/null
+++ b/doxygen.config
@@ -0,0 +1,15 @@
+PROJECT_NAME = TurboJPEG
+PROJECT_NUMBER = 1.4
+OUTPUT_DIRECTORY = doc/
+USE_WINDOWS_ENCODING = NO
+OPTIMIZE_OUTPUT_FOR_C = YES
+WARN_NO_PARAMDOC = YES
+GENERATE_LATEX = NO
+FILE_PATTERNS = turbojpeg.h
+HIDE_UNDOC_MEMBERS = YES
+VERBATIM_HEADERS = NO
+EXTRACT_STATIC = YES
+JAVADOC_AUTOBRIEF = YES
+MAX_INITIALIZER_LINES = 0
+ALWAYS_DETAILED_SEC = YES
+HTML_TIMESTAMP = NO
diff --git a/example.c b/example.c
index 7fc354f..0a65a6c 100644
--- a/example.c
+++ b/example.c
@@ -3,10 +3,10 @@
  *
  * This file illustrates how to use the IJG code as a subroutine library
  * to read or write JPEG image files.  You should look at this code in
- * conjunction with the documentation file libjpeg.doc.
+ * conjunction with the documentation file libjpeg.txt.
  *
  * This code will not do anything useful as-is, but it may be helpful as a
- * skeleton for constructing routines that call the JPEG library.  
+ * skeleton for constructing routines that call the JPEG library.
  *
  * We present these routines in the same coding style used in the JPEG code
  * (ANSI function definitions, etc); but you are of course free to code your
@@ -58,9 +58,9 @@
  * RGB color and is described by:
  */
 
-extern JSAMPLE * image_buffer;	/* Points to large array of R,G,B-order data */
-extern int image_height;	/* Number of rows in image */
-extern int image_width;		/* Number of columns in image */
+extern JSAMPLE * image_buffer;  /* Points to large array of R,G,B-order data */
+extern int image_height;        /* Number of rows in image */
+extern int image_width;         /* Number of columns in image */
 
 
 /*
@@ -88,9 +88,9 @@
    */
   struct jpeg_error_mgr jerr;
   /* More stuff */
-  FILE * outfile;		/* target file */
-  JSAMPROW row_pointer[1];	/* pointer to JSAMPLE row[s] */
-  int row_stride;		/* physical row width in image buffer */
+  FILE * outfile;               /* target file */
+  JSAMPROW row_pointer[1];      /* pointer to JSAMPLE row[s] */
+  int row_stride;               /* physical row width in image buffer */
 
   /* Step 1: allocate and initialize JPEG compression object */
 
@@ -122,10 +122,10 @@
   /* First we supply a description of the input image.
    * Four fields of the cinfo struct must be filled in:
    */
-  cinfo.image_width = image_width; 	/* image width and height, in pixels */
+  cinfo.image_width = image_width;      /* image width and height, in pixels */
   cinfo.image_height = image_height;
-  cinfo.input_components = 3;		/* # of color components per pixel */
-  cinfo.in_color_space = JCS_RGB; 	/* colorspace of input image */
+  cinfo.input_components = 3;           /* # of color components per pixel */
+  cinfo.in_color_space = JCS_RGB;       /* colorspace of input image */
   /* Now use the library's routine to set default compression parameters.
    * (You must set at least cinfo.in_color_space before calling this,
    * since the defaults depend on the source color space.)
@@ -151,7 +151,7 @@
    * To keep things simple, we pass one scanline per call; you can pass
    * more if you wish, though.
    */
-  row_stride = image_width * 3;	/* JSAMPLEs per row in image_buffer */
+  row_stride = image_width * 3; /* JSAMPLEs per row in image_buffer */
 
   while (cinfo.next_scanline < cinfo.image_height) {
     /* jpeg_write_scanlines expects an array of pointers to scanlines.
@@ -196,7 +196,7 @@
  * files for anything that doesn't fit within the maximum-memory setting.
  * (Note that temp files are NOT needed if you use the default parameters.)
  * On some systems you may need to set up a signal handler to ensure that
- * temporary files are deleted if the program is interrupted.  See libjpeg.doc.
+ * temporary files are deleted if the program is interrupted.  See libjpeg.txt.
  *
  * Scanlines MUST be supplied in top-to-bottom order if you want your JPEG
  * files to be compatible with everyone else's.  If you cannot readily read
@@ -248,9 +248,9 @@
  */
 
 struct my_error_mgr {
-  struct jpeg_error_mgr pub;	/* "public" fields */
+  struct jpeg_error_mgr pub;    /* "public" fields */
 
-  jmp_buf setjmp_buffer;	/* for return to caller */
+  jmp_buf setjmp_buffer;        /* for return to caller */
 };
 
 typedef struct my_error_mgr * my_error_ptr;
@@ -293,9 +293,9 @@
    */
   struct my_error_mgr jerr;
   /* More stuff */
-  FILE * infile;		/* source file */
-  JSAMPARRAY buffer;		/* Output row buffer */
-  int row_stride;		/* physical row width in output buffer */
+  FILE * infile;                /* source file */
+  JSAMPARRAY buffer;            /* Output row buffer */
+  int row_stride;               /* physical row width in output buffer */
 
   /* In this example we want to open the input file before doing anything else,
    * so that the setjmp() error recovery below can assume the file is open.
@@ -335,7 +335,7 @@
   /* We can ignore the return value from jpeg_read_header since
    *   (a) suspension is not possible with the stdio data source, and
    *   (b) we passed TRUE to reject a tables-only JPEG file as an error.
-   * See libjpeg.doc for more info.
+   * See libjpeg.txt for more info.
    */
 
   /* Step 4: set parameters for decompression */
@@ -356,12 +356,12 @@
    * output image dimensions available, as well as the output colormap
    * if we asked for color quantization.
    * In this example, we need to make an output work buffer of the right size.
-   */ 
+   */
   /* JSAMPLEs per row in output buffer */
   row_stride = cinfo.output_width * cinfo.output_components;
   /* Make a one-row-high sample array that will go away when done with image */
   buffer = (*cinfo.mem->alloc_sarray)
-		((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1);
+                ((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1);
 
   /* Step 6: while (scan lines remain to be read) */
   /*           jpeg_read_scanlines(...); */
@@ -413,14 +413,14 @@
  * In the above code, we ignored the return value of jpeg_read_scanlines,
  * which is the number of scanlines actually read.  We could get away with
  * this because we asked for only one line at a time and we weren't using
- * a suspending data source.  See libjpeg.doc for more info.
+ * a suspending data source.  See libjpeg.txt for more info.
  *
  * We cheated a bit by calling alloc_sarray() after jpeg_start_decompress();
  * we should have done it beforehand to ensure that the space would be
  * counted against the JPEG max_memory setting.  In some systems the above
  * code would risk an out-of-memory error.  However, in general we don't
  * know the output image dimensions before jpeg_start_decompress(), unless we
- * call jpeg_calc_output_dimensions().  See libjpeg.doc for more about this.
+ * call jpeg_calc_output_dimensions().  See libjpeg.txt for more about this.
  *
  * Scanlines are returned in the same order as they appear in the JPEG file,
  * which is standardly top-to-bottom.  If you must emit data bottom-to-top,
@@ -429,5 +429,5 @@
  *
  * As with compression, some operating modes may require temporary files.
  * On some systems you may need to set up a signal handler to ensure that
- * temporary files are deleted if the program is interrupted.  See libjpeg.doc.
+ * temporary files are deleted if the program is interrupted.  See libjpeg.txt.
  */
diff --git a/filelist.doc b/filelist.doc
deleted file mode 100644
index e14982c..0000000
--- a/filelist.doc
+++ /dev/null
@@ -1,210 +0,0 @@
-IJG JPEG LIBRARY:  FILE LIST
-
-Copyright (C) 1994-1998, Thomas G. Lane.
-This file is part of the Independent JPEG Group's software.
-For conditions of distribution and use, see the accompanying README file.
-
-
-Here is a road map to the files in the IJG JPEG distribution.  The
-distribution includes the JPEG library proper, plus two application
-programs ("cjpeg" and "djpeg") which use the library to convert JPEG
-files to and from some other popular image formats.  A third application
-"jpegtran" uses the library to do lossless conversion between different
-variants of JPEG.  There are also two stand-alone applications,
-"rdjpgcom" and "wrjpgcom".
-
-
-THE JPEG LIBRARY
-================
-
-Include files:
-
-jpeglib.h	JPEG library's exported data and function declarations.
-jconfig.h	Configuration declarations.  Note: this file is not present
-		in the distribution; it is generated during installation.
-jmorecfg.h	Additional configuration declarations; need not be changed
-		for a standard installation.
-jerror.h	Declares JPEG library's error and trace message codes.
-jinclude.h	Central include file used by all IJG .c files to reference
-		system include files.
-jpegint.h	JPEG library's internal data structures.
-jchuff.h	Private declarations for Huffman encoder modules.
-jdhuff.h	Private declarations for Huffman decoder modules.
-jdct.h		Private declarations for forward & reverse DCT subsystems.
-jmemsys.h	Private declarations for memory management subsystem.
-jversion.h	Version information.
-
-Applications using the library should include jpeglib.h (which in turn
-includes jconfig.h and jmorecfg.h).  Optionally, jerror.h may be included
-if the application needs to reference individual JPEG error codes.  The
-other include files are intended for internal use and would not normally
-be included by an application program.  (cjpeg/djpeg/etc do use jinclude.h,
-since its function is to improve portability of the whole IJG distribution.
-Most other applications will directly include the system include files they
-want, and hence won't need jinclude.h.)
-
-
-C source code files:
-
-These files contain most of the functions intended to be called directly by
-an application program:
-
-jcapimin.c	Application program interface: core routines for compression.
-jcapistd.c	Application program interface: standard compression.
-jdapimin.c	Application program interface: core routines for decompression.
-jdapistd.c	Application program interface: standard decompression.
-jcomapi.c	Application program interface routines common to compression
-		and decompression.
-jcparam.c	Compression parameter setting helper routines.
-jctrans.c	API and library routines for transcoding compression.
-jdtrans.c	API and library routines for transcoding decompression.
-
-Compression side of the library:
-
-jcinit.c	Initialization: determines which other modules to use.
-jcmaster.c	Master control: setup and inter-pass sequencing logic.
-jcmainct.c	Main buffer controller (preprocessor => JPEG compressor).
-jcprepct.c	Preprocessor buffer controller.
-jccoefct.c	Buffer controller for DCT coefficient buffer.
-jccolor.c	Color space conversion.
-jcsample.c	Downsampling.
-jcdctmgr.c	DCT manager (DCT implementation selection & control).
-jfdctint.c	Forward DCT using slow-but-accurate integer method.
-jfdctfst.c	Forward DCT using faster, less accurate integer method.
-jfdctflt.c	Forward DCT using floating-point arithmetic.
-jchuff.c	Huffman entropy coding for sequential JPEG.
-jcphuff.c	Huffman entropy coding for progressive JPEG.
-jcmarker.c	JPEG marker writing.
-jdatadst.c	Data destination manager for stdio output.
-
-Decompression side of the library:
-
-jdmaster.c	Master control: determines which other modules to use.
-jdinput.c	Input controller: controls input processing modules.
-jdmainct.c	Main buffer controller (JPEG decompressor => postprocessor).
-jdcoefct.c	Buffer controller for DCT coefficient buffer.
-jdpostct.c	Postprocessor buffer controller.
-jdmarker.c	JPEG marker reading.
-jdhuff.c	Huffman entropy decoding for sequential JPEG.
-jdphuff.c	Huffman entropy decoding for progressive JPEG.
-jddctmgr.c	IDCT manager (IDCT implementation selection & control).
-jidctint.c	Inverse DCT using slow-but-accurate integer method.
-jidctfst.c	Inverse DCT using faster, less accurate integer method.
-jidctflt.c	Inverse DCT using floating-point arithmetic.
-jidctred.c	Inverse DCTs with reduced-size outputs.
-jdsample.c	Upsampling.
-jdcolor.c	Color space conversion.
-jdmerge.c	Merged upsampling/color conversion (faster, lower quality).
-jquant1.c	One-pass color quantization using a fixed-spacing colormap.
-jquant2.c	Two-pass color quantization using a custom-generated colormap.
-		Also handles one-pass quantization to an externally given map.
-jdatasrc.c	Data source manager for stdio input.
-
-Support files for both compression and decompression:
-
-jerror.c	Standard error handling routines (application replaceable).
-jmemmgr.c	System-independent (more or less) memory management code.
-jutils.c	Miscellaneous utility routines.
-
-jmemmgr.c relies on a system-dependent memory management module.  The IJG
-distribution includes the following implementations of the system-dependent
-module:
-
-jmemnobs.c	"No backing store": assumes adequate virtual memory exists.
-jmemansi.c	Makes temporary files with ANSI-standard routine tmpfile().
-jmemname.c	Makes temporary files with program-generated file names.
-jmemdos.c	Custom implementation for MS-DOS (16-bit environment only):
-		can use extended and expanded memory as well as temp files.
-jmemmac.c	Custom implementation for Apple Macintosh.
-
-Exactly one of the system-dependent modules should be configured into an
-installed JPEG library (see install.doc for hints about which one to use).
-On unusual systems you may find it worthwhile to make a special
-system-dependent memory manager.
-
-
-Non-C source code files:
-
-jmemdosa.asm	80x86 assembly code support for jmemdos.c; used only in
-		MS-DOS-specific configurations of the JPEG library.
-
-
-CJPEG/DJPEG/JPEGTRAN
-====================
-
-Include files:
-
-cdjpeg.h	Declarations shared by cjpeg/djpeg/jpegtran modules.
-cderror.h	Additional error and trace message codes for cjpeg et al.
-transupp.h	Declarations for jpegtran support routines in transupp.c.
-
-C source code files:
-
-cjpeg.c		Main program for cjpeg.
-djpeg.c		Main program for djpeg.
-jpegtran.c	Main program for jpegtran.
-cdjpeg.c	Utility routines used by all three programs.
-rdcolmap.c	Code to read a colormap file for djpeg's "-map" switch.
-rdswitch.c	Code to process some of cjpeg's more complex switches.
-		Also used by jpegtran.
-transupp.c	Support code for jpegtran: lossless image manipulations.
-
-Image file reader modules for cjpeg:
-
-rdbmp.c		BMP file input.
-rdgif.c		GIF file input (now just a stub).
-rdppm.c		PPM/PGM file input.
-rdrle.c		Utah RLE file input.
-rdtarga.c	Targa file input.
-
-Image file writer modules for djpeg:
-
-wrbmp.c		BMP file output.
-wrgif.c		GIF file output (a mere shadow of its former self).
-wrppm.c		PPM/PGM file output.
-wrrle.c		Utah RLE file output.
-wrtarga.c	Targa file output.
-
-
-RDJPGCOM/WRJPGCOM
-=================
-
-C source code files:
-
-rdjpgcom.c	Stand-alone rdjpgcom application.
-wrjpgcom.c	Stand-alone wrjpgcom application.
-
-These programs do not depend on the IJG library.  They do use
-jconfig.h and jinclude.h, only to improve portability.
-
-
-ADDITIONAL FILES
-================
-
-Documentation (see README for a guide to the documentation files):
-
-README		Master documentation file.
-*.doc		Other documentation files.
-*.1		Documentation in Unix man page format.
-change.log	Version-to-version change highlights.
-example.c	Sample code for calling JPEG library.
-
-Configuration/installation files and programs (see install.doc for more info):
-
-configure	Unix shell script to perform automatic configuration.
-ltconfig	Support scripts for configure (from GNU libtool).
-ltmain.sh
-config.guess
-config.sub
-install-sh	Install shell script for those Unix systems lacking one.
-ckconfig.c	Program to generate jconfig.h on non-Unix systems.
-jconfig.doc	Template for making jconfig.h by hand.
-makefile.*	Sample makefiles for particular systems.
-jconfig.*	Sample jconfig.h for particular systems.
-ansi2knr.c	De-ANSIfier for pre-ANSI C compilers (courtesy of
-		L. Peter Deutsch and Aladdin Enterprises).
-
-Test files (see install.doc for test procedure):
-
-test*.*		Source and comparison files for confidence test.
-		These are binary image files, NOT text files.
diff --git a/install-sh b/install-sh
deleted file mode 100755
index e843669..0000000
--- a/install-sh
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/bin/sh
-#
-# install - install a program, script, or datafile
-# This comes from X11R5 (mit/util/scripts/install.sh).
-#
-# Copyright 1991 by the Massachusetts Institute of Technology
-#
-# Permission to use, copy, modify, distribute, and sell this software and its
-# documentation for any purpose is hereby granted without fee, provided that
-# the above copyright notice appear in all copies and that both that
-# copyright notice and this permission notice appear in supporting
-# documentation, and that the name of M.I.T. not be used in advertising or
-# publicity pertaining to distribution of the software without specific,
-# written prior permission.  M.I.T. makes no representations about the
-# suitability of this software for any purpose.  It is provided "as is"
-# without express or implied warranty.
-#
-# Calling this script install-sh is preferred over install.sh, to prevent
-# `make' implicit rules from creating a file called install from it
-# when there is no Makefile.
-#
-# This script is compatible with the BSD install script, but was written
-# from scratch.  It can only install one file at a time, a restriction
-# shared with many OS's install programs.
-
-
-# set DOITPROG to echo to test this script
-
-# Don't use :- since 4.3BSD and earlier shells don't like it.
-doit="${DOITPROG-}"
-
-
-# put in absolute paths if you don't have them in your path; or use env. vars.
-
-mvprog="${MVPROG-mv}"
-cpprog="${CPPROG-cp}"
-chmodprog="${CHMODPROG-chmod}"
-chownprog="${CHOWNPROG-chown}"
-chgrpprog="${CHGRPPROG-chgrp}"
-stripprog="${STRIPPROG-strip}"
-rmprog="${RMPROG-rm}"
-mkdirprog="${MKDIRPROG-mkdir}"
-
-transformbasename=""
-transform_arg=""
-instcmd="$mvprog"
-chmodcmd="$chmodprog 0755"
-chowncmd=""
-chgrpcmd=""
-stripcmd=""
-rmcmd="$rmprog -f"
-mvcmd="$mvprog"
-src=""
-dst=""
-dir_arg=""
-
-while [ x"$1" != x ]; do
-    case $1 in
-	-c) instcmd="$cpprog"
-	    shift
-	    continue;;
-
-	-d) dir_arg=true
-	    shift
-	    continue;;
-
-	-m) chmodcmd="$chmodprog $2"
-	    shift
-	    shift
-	    continue;;
-
-	-o) chowncmd="$chownprog $2"
-	    shift
-	    shift
-	    continue;;
-
-	-g) chgrpcmd="$chgrpprog $2"
-	    shift
-	    shift
-	    continue;;
-
-	-s) stripcmd="$stripprog"
-	    shift
-	    continue;;
-
-	-t=*) transformarg=`echo $1 | sed 's/-t=//'`
-	    shift
-	    continue;;
-
-	-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
-	    shift
-	    continue;;
-
-	*)  if [ x"$src" = x ]
-	    then
-		src=$1
-	    else
-		# this colon is to work around a 386BSD /bin/sh bug
-		:
-		dst=$1
-	    fi
-	    shift
-	    continue;;
-    esac
-done
-
-if [ x"$src" = x ]
-then
-	echo "install:	no input file specified"
-	exit 1
-else
-	true
-fi
-
-if [ x"$dir_arg" != x ]; then
-	dst=$src
-	src=""
-	
-	if [ -d $dst ]; then
-		instcmd=:
-	else
-		instcmd=mkdir
-	fi
-else
-
-# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
-# might cause directories to be created, which would be especially bad 
-# if $src (and thus $dsttmp) contains '*'.
-
-	if [ -f $src -o -d $src ]
-	then
-		true
-	else
-		echo "install:  $src does not exist"
-		exit 1
-	fi
-	
-	if [ x"$dst" = x ]
-	then
-		echo "install:	no destination specified"
-		exit 1
-	else
-		true
-	fi
-
-# If destination is a directory, append the input filename; if your system
-# does not like double slashes in filenames, you may need to add some logic
-
-	if [ -d $dst ]
-	then
-		dst="$dst"/`basename $src`
-	else
-		true
-	fi
-fi
-
-## this sed command emulates the dirname command
-dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
-
-# Make sure that the destination directory exists.
-#  this part is taken from Noah Friedman's mkinstalldirs script
-
-# Skip lots of stat calls in the usual case.
-if [ ! -d "$dstdir" ]; then
-defaultIFS='	
-'
-IFS="${IFS-${defaultIFS}}"
-
-oIFS="${IFS}"
-# Some sh's can't handle IFS=/ for some reason.
-IFS='%'
-set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
-IFS="${oIFS}"
-
-pathcomp=''
-
-while [ $# -ne 0 ] ; do
-	pathcomp="${pathcomp}${1}"
-	shift
-
-	if [ ! -d "${pathcomp}" ] ;
-        then
-		$mkdirprog "${pathcomp}"
-	else
-		true
-	fi
-
-	pathcomp="${pathcomp}/"
-done
-fi
-
-if [ x"$dir_arg" != x ]
-then
-	$doit $instcmd $dst &&
-
-	if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
-	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
-	if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
-	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
-else
-
-# If we're going to rename the final executable, determine the name now.
-
-	if [ x"$transformarg" = x ] 
-	then
-		dstfile=`basename $dst`
-	else
-		dstfile=`basename $dst $transformbasename | 
-			sed $transformarg`$transformbasename
-	fi
-
-# don't allow the sed command to completely eliminate the filename
-
-	if [ x"$dstfile" = x ] 
-	then
-		dstfile=`basename $dst`
-	else
-		true
-	fi
-
-# Make a temp file name in the proper directory.
-
-	dsttmp=$dstdir/#inst.$$#
-
-# Move or copy the file name to the temp name
-
-	$doit $instcmd $src $dsttmp &&
-
-	trap "rm -f ${dsttmp}" 0 &&
-
-# and set any options; do chmod last to preserve setuid bits
-
-# If any of these fail, we abort the whole thing.  If we want to
-# ignore errors from any of these, just make sure not to ignore
-# errors from the above "$doit $instcmd $src $dsttmp" command.
-
-	if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
-	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
-	if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
-	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
-
-# Now rename the file to the real destination.
-
-	$doit $rmcmd -f $dstdir/$dstfile &&
-	$doit $mvcmd $dsttmp $dstdir/$dstfile 
-
-fi &&
-
-
-exit 0
diff --git a/install.doc b/install.doc
deleted file mode 100644
index 3702b98..0000000
--- a/install.doc
+++ /dev/null
@@ -1,1063 +0,0 @@
-INSTALLATION INSTRUCTIONS for the Independent JPEG Group's JPEG software
-
-Copyright (C) 1991-1998, Thomas G. Lane.
-This file is part of the Independent JPEG Group's software.
-For conditions of distribution and use, see the accompanying README file.
-
-
-This file explains how to configure and install the IJG software.  We have
-tried to make this software extremely portable and flexible, so that it can be
-adapted to almost any environment.  The downside of this decision is that the
-installation process is complicated.  We have provided shortcuts to simplify
-the task on common systems.  But in any case, you will need at least a little
-familiarity with C programming and program build procedures for your system.
-
-If you are only using this software as part of a larger program, the larger
-program's installation procedure may take care of configuring the IJG code.
-For example, Ghostscript's installation script will configure the IJG code.
-You don't need to read this file if you just want to compile Ghostscript.
-
-If you are on a Unix machine, you may not need to read this file at all.
-Try doing
-	./configure
-	make
-	make test
-If that doesn't complain, do
-	make install
-(better do "make -n install" first to see if the makefile will put the files
-where you want them).  Read further if you run into snags or want to customize
-the code for your system.
-
-
-TABLE OF CONTENTS
------------------
-
-Before you start
-Configuring the software:
-	using the automatic "configure" script
-	using one of the supplied jconfig and makefile files
-	by hand
-Building the software
-Testing the software
-Installing the software
-Optional stuff
-Optimization
-Hints for specific systems
-
-
-BEFORE YOU START
-================
-
-Before installing the software you must unpack the distributed source code.
-Since you are reading this file, you have probably already succeeded in this
-task.  However, there is a potential for error if you needed to convert the
-files to the local standard text file format (for example, if you are on
-MS-DOS you may have converted LF end-of-line to CR/LF).  You must apply
-such conversion to all the files EXCEPT those whose names begin with "test".
-The test files contain binary data; if you change them in any way then the
-self-test will give bad results.
-
-Please check the last section of this file to see if there are hints for the
-specific machine or compiler you are using.
-
-
-CONFIGURING THE SOFTWARE
-========================
-
-To configure the IJG code for your system, you need to create two files:
-  * jconfig.h: contains values for system-dependent #define symbols.
-  * Makefile: controls the compilation process.
-(On a non-Unix machine, you may create "project files" or some other
-substitute for a Makefile.  jconfig.h is needed in any environment.)
-
-We provide three different ways to generate these files:
-  * On a Unix system, you can just run the "configure" script.
-  * We provide sample jconfig files and makefiles for popular machines;
-    if your machine matches one of the samples, just copy the right sample
-    files to jconfig.h and Makefile.
-  * If all else fails, read the instructions below and make your own files.
-
-
-Configuring the software using the automatic "configure" script
----------------------------------------------------------------
-
-If you are on a Unix machine, you can just type
-	./configure
-and let the configure script construct appropriate configuration files.
-If you're using "csh" on an old version of System V, you might need to type
-	sh configure
-instead to prevent csh from trying to execute configure itself.
-Expect configure to run for a few minutes, particularly on slower machines;
-it works by compiling a series of test programs.
-
-Configure was created with GNU Autoconf and it follows the usual conventions
-for GNU configure scripts.  It makes a few assumptions that you may want to
-override.  You can do this by providing optional switches to configure:
-
-* If you want to build libjpeg as a shared library, say
-	./configure --enable-shared
-To get both shared and static libraries, say
-	./configure --enable-shared --enable-static
-Note that these switches invoke GNU libtool to take care of system-dependent
-shared library building methods.  If things don't work this way, please try
-running configure without either switch; that should build a static library
-without using libtool.  If that works, your problem is probably with libtool
-not with the IJG code.  libtool is fairly new and doesn't support all flavors
-of Unix yet.  (You might be able to find a newer version of libtool than the
-one included with libjpeg; see ftp.gnu.org.  Report libtool problems to
-bug-libtool@gnu.org.)
-
-* Configure will use gcc (GNU C compiler) if it's available, otherwise cc.
-To force a particular compiler to be selected, use the CC option, for example
-	./configure CC='cc'
-The same method can be used to include any unusual compiler switches.
-For example, on HP-UX you probably want to say
-	./configure CC='cc -Aa'
-to get HP's compiler to run in ANSI mode.
-
-* The default CFLAGS setting is "-O" for non-gcc compilers, "-O2" for gcc.
-You can override this by saying, for example,
-	./configure CFLAGS='-g'
-if you want to compile with debugging support.
-
-* Configure will set up the makefile so that "make install" will install files
-into /usr/local/bin, /usr/local/man, etc.  You can specify an installation
-prefix other than "/usr/local" by giving configure the option "--prefix=PATH".
-
-* If you don't have a lot of swap space, you may need to enable the IJG
-software's internal virtual memory mechanism.  To do this, give the option
-"--enable-maxmem=N" where N is the default maxmemory limit in megabytes.
-This is discussed in more detail under "Selecting a memory manager", below.
-You probably don't need to worry about this on reasonably-sized Unix machines,
-unless you plan to process very large images.
-
-Configure has some other features that are useful if you are cross-compiling
-or working in a network of multiple machine types; but if you need those
-features, you probably already know how to use them.
-
-
-Configuring the software using one of the supplied jconfig and makefile files
------------------------------------------------------------------------------
-
-If you have one of these systems, you can just use the provided configuration
-files:
-
-Makefile	jconfig file	System and/or compiler
-
-makefile.manx	jconfig.manx	Amiga, Manx Aztec C
-makefile.sas	jconfig.sas	Amiga, SAS C
-makeproj.mac	jconfig.mac	Apple Macintosh, Metrowerks CodeWarrior
-mak*jpeg.st	jconfig.st	Atari ST/STE/TT, Pure C or Turbo C
-makefile.bcc	jconfig.bcc	MS-DOS or OS/2, Borland C
-makefile.dj	jconfig.dj	MS-DOS, DJGPP (Delorie's port of GNU C)
-makefile.mc6	jconfig.mc6	MS-DOS, Microsoft C (16-bit only)
-makefile.wat	jconfig.wat	MS-DOS, OS/2, or Windows NT, Watcom C
-makefile.vc	jconfig.vc	Windows NT/95, MS Visual C++
-make*.ds	jconfig.vc	Windows NT/95, MS Developer Studio
-makefile.mms	jconfig.vms	Digital VMS, with MMS software
-makefile.vms	jconfig.vms	Digital VMS, without MMS software
-
-Copy the proper jconfig file to jconfig.h and the makefile to Makefile (or
-whatever your system uses as the standard makefile name).  For more info see
-the appropriate system-specific hints section near the end of this file.
-
-
-Configuring the software by hand
---------------------------------
-
-First, generate a jconfig.h file.  If you are moderately familiar with C,
-the comments in jconfig.doc should be enough information to do this; just
-copy jconfig.doc to jconfig.h and edit it appropriately.  Otherwise, you may
-prefer to use the ckconfig.c program.  You will need to compile and execute
-ckconfig.c by hand --- we hope you know at least enough to do that.
-ckconfig.c may not compile the first try (in fact, the whole idea is for it
-to fail if anything is going to).  If you get compile errors, fix them by
-editing ckconfig.c according to the directions given in ckconfig.c.  Once
-you get it to run, it will write a suitable jconfig.h file, and will also
-print out some advice about which makefile to use.
-
-You may also want to look at the canned jconfig files, if there is one for a
-system similar to yours.
-
-Second, select a makefile and copy it to Makefile (or whatever your system
-uses as the standard makefile name).  The most generic makefiles we provide
-are
-	makefile.ansi:	if your C compiler supports function prototypes
-	makefile.unix:	if not.
-(You have function prototypes if ckconfig.c put "#define HAVE_PROTOTYPES"
-in jconfig.h.)  You may want to start from one of the other makefiles if
-there is one for a system similar to yours.
-
-Look over the selected Makefile and adjust options as needed.  In particular
-you may want to change the CC and CFLAGS definitions.  For instance, if you
-are using GCC, set CC=gcc.  If you had to use any compiler switches to get
-ckconfig.c to work, make sure the same switches are in CFLAGS.
-
-If you are on a system that doesn't use makefiles, you'll need to set up
-project files (or whatever you do use) to compile all the source files and
-link them into executable files cjpeg, djpeg, jpegtran, rdjpgcom, and wrjpgcom.
-See the file lists in any of the makefiles to find out which files go into
-each program.  Note that the provided makefiles all make a "library" file
-libjpeg first, but you don't have to do that if you don't want to; the file
-lists identify which source files are actually needed for compression,
-decompression, or both.  As a last resort, you can make a batch script that
-just compiles everything and links it all together; makefile.vms is an example
-of this (it's for VMS systems that have no make-like utility).
-
-Here are comments about some specific configuration decisions you'll
-need to make:
-
-Command line style
-------------------
-
-These programs can use a Unix-like command line style which supports
-redirection and piping, like this:
-	cjpeg inputfile >outputfile
-	cjpeg <inputfile >outputfile
-	source program | cjpeg >outputfile
-The simpler "two file" command line style is just
-	cjpeg inputfile outputfile
-You may prefer the two-file style, particularly if you don't have pipes.
-
-You MUST use two-file style on any system that doesn't cope well with binary
-data fed through stdin/stdout; this is true for some MS-DOS compilers, for
-example.  If you're not on a Unix system, it's safest to assume you need
-two-file style.  (But if your compiler provides either the Posix-standard
-fdopen() library routine or a Microsoft-compatible setmode() routine, you
-can safely use the Unix command line style, by defining USE_FDOPEN or
-USE_SETMODE respectively.)
-
-To use the two-file style, make jconfig.h say "#define TWO_FILE_COMMANDLINE".
-
-Selecting a memory manager
---------------------------
-
-The IJG code is capable of working on images that are too big to fit in main
-memory; data is swapped out to temporary files as necessary.  However, the
-code to do this is rather system-dependent.  We provide five different
-memory managers:
-
-* jmemansi.c	This version uses the ANSI-standard library routine tmpfile(),
-		which not all non-ANSI systems have.  On some systems
-		tmpfile() may put the temporary file in a non-optimal
-		location; if you don't like what it does, use jmemname.c.
-
-* jmemname.c	This version creates named temporary files.  For anything
-		except a Unix machine, you'll need to configure the
-		select_file_name() routine appropriately; see the comments
-		near the head of jmemname.c.  If you use this version, define
-		NEED_SIGNAL_CATCHER in jconfig.h to make sure the temp files
-		are removed if the program is aborted.
-
-* jmemnobs.c	(That stands for No Backing Store :-).)  This will compile on
-		almost any system, but it assumes you have enough main memory
-		or virtual memory to hold the biggest images you work with.
-
-* jmemdos.c	This should be used with most 16-bit MS-DOS compilers.
-		See the system-specific notes about MS-DOS for more info.
-		IMPORTANT: if you use this, define USE_MSDOS_MEMMGR in
-		jconfig.h, and include the assembly file jmemdosa.asm in the
-		programs.  The supplied makefiles and jconfig files for
-		16-bit MS-DOS compilers already do both.
-
-* jmemmac.c	Custom version for Apple Macintosh; see the system-specific
-		notes for Macintosh for more info.
-
-To use a particular memory manager, change the SYSDEPMEM variable in your
-makefile to equal the corresponding object file name (for example, jmemansi.o
-or jmemansi.obj for jmemansi.c).
-
-If you have plenty of (real or virtual) main memory, just use jmemnobs.c.
-"Plenty" means about ten bytes for every pixel in the largest images
-you plan to process, so a lot of systems don't meet this criterion.
-If yours doesn't, try jmemansi.c first.  If that doesn't compile, you'll have
-to use jmemname.c; be sure to adjust select_file_name() for local conditions.
-You may also need to change unlink() to remove() in close_backing_store().
-
-Except with jmemnobs.c or jmemmac.c, you need to adjust the DEFAULT_MAX_MEM
-setting to a reasonable value for your system (either by adding a #define for
-DEFAULT_MAX_MEM to jconfig.h, or by adding a -D switch to the Makefile).
-This value limits the amount of data space the program will attempt to
-allocate.  Code and static data space isn't counted, so the actual memory
-needs for cjpeg or djpeg are typically 100 to 150Kb more than the max-memory
-setting.  Larger max-memory settings reduce the amount of I/O needed to
-process a large image, but too large a value can result in "insufficient
-memory" failures.  On most Unix machines (and other systems with virtual
-memory), just set DEFAULT_MAX_MEM to several million and forget it.  At the
-other end of the spectrum, for MS-DOS machines you probably can't go much
-above 300K to 400K.  (On MS-DOS the value refers to conventional memory only.
-Extended/expanded memory is handled separately by jmemdos.c.)
-
-
-BUILDING THE SOFTWARE
-=====================
-
-Now you should be able to compile the software.  Just say "make" (or
-whatever's necessary to start the compilation).  Have a cup of coffee.
-
-Here are some things that could go wrong:
-
-If your compiler complains about undefined structures, you should be able to
-shut it up by putting "#define INCOMPLETE_TYPES_BROKEN" in jconfig.h.
-
-If you have trouble with missing system include files or inclusion of the
-wrong ones, read jinclude.h.  This shouldn't happen if you used configure
-or ckconfig.c to set up jconfig.h.
-
-There are a fair number of routines that do not use all of their parameters;
-some compilers will issue warnings about this, which you can ignore.  There
-are also a few configuration checks that may give "unreachable code" warnings.
-Any other warning deserves investigation.
-
-If you don't have a getenv() library routine, define NO_GETENV.
-
-Also see the system-specific hints, below.
-
-
-TESTING THE SOFTWARE
-====================
-
-As a quick test of functionality we've included a small sample image in
-several forms:
-	testorig.jpg	Starting point for the djpeg tests.
-	testimg.ppm	The output of djpeg testorig.jpg
-	testimg.bmp	The output of djpeg -bmp -colors 256 testorig.jpg
-	testimg.jpg	The output of cjpeg testimg.ppm
-	testprog.jpg	Progressive-mode equivalent of testorig.jpg.
-	testimgp.jpg	The output of cjpeg -progressive -optimize testimg.ppm
-(The first- and second-generation .jpg files aren't identical since JPEG is
-lossy.)  If you can generate duplicates of the testimg* files then you
-probably have working programs.
-
-With most of the makefiles, "make test" will perform the necessary
-comparisons.
-
-If you're using a makefile that doesn't provide the test option, run djpeg
-and cjpeg by hand and compare the output files to testimg* with whatever
-binary file comparison tool you have.  The files should be bit-for-bit
-identical.
-
-If the programs complain "MAX_ALLOC_CHUNK is wrong, please fix", then you
-need to reduce MAX_ALLOC_CHUNK to a value that fits in type size_t.
-Try adding "#define MAX_ALLOC_CHUNK 65520L" to jconfig.h.  A less likely
-configuration error is "ALIGN_TYPE is wrong, please fix": defining ALIGN_TYPE
-as long should take care of that one.
-
-If the cjpeg test run fails with "Missing Huffman code table entry", it's a
-good bet that you needed to define RIGHT_SHIFT_IS_UNSIGNED.  Go back to the
-configuration step and run ckconfig.c.  (This is a good plan for any other
-test failure, too.)
-
-If you are using Unix (one-file) command line style on a non-Unix system,
-it's a good idea to check that binary I/O through stdin/stdout actually
-works.  You should get the same results from "djpeg <testorig.jpg >out.ppm"
-as from "djpeg -outfile out.ppm testorig.jpg".  Note that the makefiles all
-use the latter style and therefore do not exercise stdin/stdout!  If this
-check fails, try recompiling with USE_SETMODE or USE_FDOPEN defined.
-If it still doesn't work, better use two-file style.
-
-If you chose a memory manager other than jmemnobs.c, you should test that
-temporary-file usage works.  Try "djpeg -bmp -colors 256 -max 0 testorig.jpg"
-and make sure its output matches testimg.bmp.  If you have any really large
-images handy, try compressing them with -optimize and/or decompressing with
--colors 256 to make sure your DEFAULT_MAX_MEM setting is not too large.
-
-NOTE: this is far from an exhaustive test of the JPEG software; some modules,
-such as 1-pass color quantization, are not exercised at all.  It's just a
-quick test to give you some confidence that you haven't missed something
-major.
-
-
-INSTALLING THE SOFTWARE
-=======================
-
-Once you're done with the above steps, you can install the software by
-copying the executable files (cjpeg, djpeg, jpegtran, rdjpgcom, and wrjpgcom)
-to wherever you normally install programs.  On Unix systems, you'll also want
-to put the man pages (cjpeg.1, djpeg.1, jpegtran.1, rdjpgcom.1, wrjpgcom.1)
-in the man-page directory.  The pre-fab makefiles don't support this step
-since there's such a wide variety of installation procedures on different
-systems.
-
-If you generated a Makefile with the "configure" script, you can just say
-	make install
-to install the programs and their man pages into the standard places.
-(You'll probably need to be root to do this.)  We recommend first saying
-	make -n install
-to see where configure thought the files should go.  You may need to edit
-the Makefile, particularly if your system's conventions for man page
-filenames don't match what configure expects.
-
-If you want to install the IJG library itself, for use in compiling other
-programs besides ours, then you need to put the four include files
-	jpeglib.h jerror.h jconfig.h jmorecfg.h
-into your include-file directory, and put the library file libjpeg.a
-(extension may vary depending on system) wherever library files go.
-If you generated a Makefile with "configure", it will do what it thinks
-is the right thing if you say
-	make install-lib
-
-
-OPTIONAL STUFF
-==============
-
-Progress monitor:
-
-If you like, you can #define PROGRESS_REPORT (in jconfig.h) to enable display
-of percent-done progress reports.  The routine provided in cdjpeg.c merely
-prints percentages to stderr, but you can customize it to do something
-fancier.
-
-Utah RLE file format support:
-
-We distribute the software with support for RLE image files (Utah Raster
-Toolkit format) disabled, because the RLE support won't compile without the
-Utah library.  If you have URT version 3.1 or later, you can enable RLE
-support as follows:
-	1.  #define RLE_SUPPORTED in jconfig.h.
-	2.  Add a -I option to CFLAGS in the Makefile for the directory
-	    containing the URT .h files (typically the "include"
-	    subdirectory of the URT distribution).
-	3.  Add -L... -lrle to LDLIBS in the Makefile, where ... specifies
-	    the directory containing the URT "librle.a" file (typically the
-	    "lib" subdirectory of the URT distribution).
-
-Support for 12-bit-deep pixel data:
-
-The JPEG standard allows either 8-bit or 12-bit data precision.  (For color,
-this means 8 or 12 bits per channel, of course.)  If you need to work with
-deeper than 8-bit data, you can compile the IJG code for 12-bit operation.
-To do so:
-  1. In jmorecfg.h, define BITS_IN_JSAMPLE as 12 rather than 8.
-  2. In jconfig.h, undefine BMP_SUPPORTED, RLE_SUPPORTED, and TARGA_SUPPORTED,
-     because the code for those formats doesn't handle 12-bit data and won't
-     even compile.  (The PPM code does work, as explained below.  The GIF
-     code works too; it scales 8-bit GIF data to and from 12-bit depth
-     automatically.)
-  3. Compile.  Don't expect "make test" to pass, since the supplied test
-     files are for 8-bit data.
-
-Currently, 12-bit support does not work on 16-bit-int machines.
-
-Note that a 12-bit version will not read 8-bit JPEG files, nor vice versa;
-so you'll want to keep around a regular 8-bit compilation as well.
-(Run-time selection of data depth, to allow a single copy that does both,
-is possible but would probably slow things down considerably; it's very low
-on our to-do list.)
-
-The PPM reader (rdppm.c) can read 12-bit data from either text-format or
-binary-format PPM and PGM files.  Binary-format PPM/PGM files which have a
-maxval greater than 255 are assumed to use 2 bytes per sample, LSB first
-(little-endian order).  As of early 1995, 2-byte binary format is not
-officially supported by the PBMPLUS library, but it is expected that a
-future release of PBMPLUS will support it.  Note that the PPM reader will
-read files of any maxval regardless of the BITS_IN_JSAMPLE setting; incoming
-data is automatically rescaled to either maxval=255 or maxval=4095 as
-appropriate for the cjpeg bit depth.
-
-The PPM writer (wrppm.c) will normally write 2-byte binary PPM or PGM
-format, maxval 4095, when compiled with BITS_IN_JSAMPLE=12.  Since this
-format is not yet widely supported, you can disable it by compiling wrppm.c
-with PPM_NORAWWORD defined; then the data is scaled down to 8 bits to make a
-standard 1-byte/sample PPM or PGM file.  (Yes, this means still another copy
-of djpeg to keep around.  But hopefully you won't need it for very long.
-Poskanzer's supposed to get that new PBMPLUS release out Real Soon Now.)
-
-Of course, if you are working with 12-bit data, you probably have it stored
-in some other, nonstandard format.  In that case you'll probably want to
-write your own I/O modules to read and write your format.
-
-Note that a 12-bit version of cjpeg always runs in "-optimize" mode, in
-order to generate valid Huffman tables.  This is necessary because our
-default Huffman tables only cover 8-bit data.
-
-Removing code:
-
-If you need to make a smaller version of the JPEG software, some optional
-functions can be removed at compile time.  See the xxx_SUPPORTED #defines in
-jconfig.h and jmorecfg.h.  If at all possible, we recommend that you leave in
-decoder support for all valid JPEG files, to ensure that you can read anyone's
-output.  Taking out support for image file formats that you don't use is the
-most painless way to make the programs smaller.  Another possibility is to
-remove some of the DCT methods: in particular, the "IFAST" method may not be
-enough faster than the others to be worth keeping on your machine.  (If you
-do remove ISLOW or IFAST, be sure to redefine JDCT_DEFAULT or JDCT_FASTEST
-to a supported method, by adding a #define in jconfig.h.)
-
-
-OPTIMIZATION
-============
-
-Unless you own a Cray, you'll probably be interested in making the JPEG
-software go as fast as possible.  This section covers some machine-dependent
-optimizations you may want to try.  We suggest that before trying any of
-this, you first get the basic installation to pass the self-test step.
-Repeat the self-test after any optimization to make sure that you haven't
-broken anything.
-
-The integer DCT routines perform a lot of multiplications.  These
-multiplications must yield 32-bit results, but none of their input values
-are more than 16 bits wide.  On many machines, notably the 680x0 and 80x86
-CPUs, a 16x16=>32 bit multiply instruction is faster than a full 32x32=>32
-bit multiply.  Unfortunately there is no portable way to specify such a
-multiplication in C, but some compilers can generate one when you use the
-right combination of casts.  See the MULTIPLYxxx macro definitions in
-jdct.h.  If your compiler makes "int" be 32 bits and "short" be 16 bits,
-defining SHORTxSHORT_32 is fairly likely to work.  When experimenting with
-alternate definitions, be sure to test not only whether the code still works
-(use the self-test), but also whether it is actually faster --- on some
-compilers, alternate definitions may compute the right answer, yet be slower
-than the default.  Timing cjpeg on a large PGM (grayscale) input file is the
-best way to check this, as the DCT will be the largest fraction of the runtime
-in that mode.  (Note: some of the distributed compiler-specific jconfig files
-already contain #define switches to select appropriate MULTIPLYxxx
-definitions.)
-
-If your machine has sufficiently fast floating point hardware, you may find
-that the float DCT method is faster than the integer DCT methods, even
-after tweaking the integer multiply macros.  In that case you may want to
-make the float DCT be the default method.  (The only objection to this is
-that float DCT results may vary slightly across machines.)  To do that, add
-"#define JDCT_DEFAULT JDCT_FLOAT" to jconfig.h.  Even if you don't change
-the default, you should redefine JDCT_FASTEST, which is the method selected
-by djpeg's -fast switch.  Don't forget to update the documentation files
-(usage.doc and/or cjpeg.1, djpeg.1) to agree with what you've done.
-
-If access to "short" arrays is slow on your machine, it may be a win to
-define type JCOEF as int rather than short.  This will cost a good deal of
-memory though, particularly in some multi-pass modes, so don't do it unless
-you have memory to burn and short is REALLY slow.
-
-If your compiler can compile function calls in-line, make sure the INLINE
-macro in jmorecfg.h is defined as the keyword that marks a function
-inline-able.  Some compilers have a switch that tells the compiler to inline
-any function it thinks is profitable (e.g., -finline-functions for gcc).
-Enabling such a switch is likely to make the compiled code bigger but faster.
-
-In general, it's worth trying the maximum optimization level of your compiler,
-and experimenting with any optional optimizations such as loop unrolling.
-(Unfortunately, far too many compilers have optimizer bugs ... be prepared to
-back off if the code fails self-test.)  If you do any experimentation along
-these lines, please report the optimal settings to jpeg-info@uunet.uu.net so
-we can mention them in future releases.  Be sure to specify your machine and
-compiler version.
-
-
-HINTS FOR SPECIFIC SYSTEMS
-==========================
-
-We welcome reports on changes needed for systems not mentioned here.  Submit
-'em to jpeg-info@uunet.uu.net.  Also, if configure or ckconfig.c is wrong
-about how to configure the JPEG software for your system, please let us know.
-
-
-Acorn RISC OS:
-
-(Thanks to Simon Middleton for these hints on compiling with Desktop C.)
-After renaming the files according to Acorn conventions, take a copy of
-makefile.ansi, change all occurrences of 'libjpeg.a' to 'libjpeg.o' and
-change these definitions as indicated:
-
-CFLAGS= -throwback -IC: -Wn
-LDLIBS=C:o.Stubs
-SYSDEPMEM=jmemansi.o
-LN=Link
-AR=LibFile -c -o
-
-Also add a new line '.c.o:; $(cc) $< $(cflags) -c -o $@'.  Remove the
-lines '$(RM) libjpeg.o' and '$(AR2) libjpeg.o' and the 'jconfig.h'
-dependency section.
-
-Copy jconfig.doc to jconfig.h.  Edit jconfig.h to define TWO_FILE_COMMANDLINE
-and CHAR_IS_UNSIGNED.
-
-Run the makefile using !AMU not !Make.  If you want to use the 'clean' and
-'test' makefile entries then you will have to fiddle with the syntax a bit
-and rename the test files.
-
-
-Amiga:
-
-SAS C 6.50 reportedly is too buggy to compile the IJG code properly.
-A patch to update to 6.51 is available from SAS or AmiNet FTP sites.
-
-The supplied config files are set up to use jmemname.c as the memory
-manager, with temporary files being created on the device named by
-"JPEGTMP:".
-
-
-Atari ST/STE/TT:
- 
-Copy the project files makcjpeg.st, makdjpeg.st, maktjpeg.st, and makljpeg.st
-to cjpeg.prj, djpeg.prj, jpegtran.prj, and libjpeg.prj respectively.  The
-project files should work as-is with Pure C.  For Turbo C, change library
-filenames "pc..." to "tc..." in each project file.  Note that libjpeg.prj
-selects jmemansi.c as the recommended memory manager.  You'll probably want to
-adjust the DEFAULT_MAX_MEM setting --- you want it to be a couple hundred K
-less than your normal free memory.  Put "#define DEFAULT_MAX_MEM nnnn" into
-jconfig.h to do this.
-
-To use the 68881/68882 coprocessor for the floating point DCT, add the
-compiler option "-8" to the project files and replace pcfltlib.lib with
-pc881lib.lib in cjpeg.prj and djpeg.prj.  Or if you don't have a
-coprocessor, you may prefer to remove the float DCT code by undefining
-DCT_FLOAT_SUPPORTED in jmorecfg.h (since without a coprocessor, the float
-code will be too slow to be useful).  In that case, you can delete
-pcfltlib.lib from the project files.
-
-Note that you must make libjpeg.lib before making cjpeg.ttp, djpeg.ttp,
-or jpegtran.ttp.  You'll have to perform the self-test by hand.
-
-We haven't bothered to include project files for rdjpgcom and wrjpgcom.
-Those source files should just be compiled by themselves; they don't
-depend on the JPEG library.
-
-There is a bug in some older versions of the Turbo C library which causes the
-space used by temporary files created with "tmpfile()" not to be freed after
-an abnormal program exit.  If you check your disk afterwards, you will find
-cluster chains that are allocated but not used by a file.  This should not
-happen in cjpeg/djpeg/jpegtran, since we enable a signal catcher to explicitly
-close temp files before exiting.  But if you use the JPEG library with your
-own code, be sure to supply a signal catcher, or else use a different
-system-dependent memory manager.
-
-
-Cray:
-
-Should you be so fortunate as to be running JPEG on a Cray YMP, there is a
-compiler bug in old versions of Cray's Standard C (prior to 3.1).  If you
-still have an old compiler, you'll need to insert a line reading
-"#pragma novector" just before the loop	
-    for (i = 1; i <= (int) htbl->bits[l]; i++)
-      huffsize[p++] = (char) l;
-in fix_huff_tbl (in V5beta1, line 204 of jchuff.c and line 176 of jdhuff.c).
-[This bug may or may not still occur with the current IJG code, but it's
-probably a dead issue anyway...]
-
-
-HP-UX:
-
-If you have HP-UX 7.05 or later with the "software development" C compiler,
-you should run the compiler in ANSI mode.  If using the configure script,
-say
-	./configure CC='cc -Aa'
-(or -Ae if you prefer).  If configuring by hand, use makefile.ansi and add
-"-Aa" to the CFLAGS line in the makefile.
-
-If you have a pre-7.05 system, or if you are using the non-ANSI C compiler
-delivered with a minimum HP-UX system, then you must use makefile.unix
-(and do NOT add -Aa); or just run configure without the CC option.
-
-On HP 9000 series 800 machines, the HP C compiler is buggy in revisions prior
-to A.08.07.  If you get complaints about "not a typedef name", you'll have to
-use makefile.unix, or run configure without the CC option.
-
-
-Macintosh, generic comments:
-
-The supplied user-interface files (cjpeg.c, djpeg.c, etc) are set up to
-provide a Unix-style command line interface.  You can use this interface on
-the Mac by means of the ccommand() library routine provided by Metrowerks
-CodeWarrior or Think C.  This is only appropriate for testing the library,
-however; to make a user-friendly equivalent of cjpeg/djpeg you'd really want
-to develop a Mac-style user interface.  There isn't a complete example
-available at the moment, but there are some helpful starting points:
-1. Sam Bushell's free "To JPEG" applet provides drag-and-drop conversion to
-JPEG under System 7 and later.  This only illustrates how to use the
-compression half of the library, but it does a very nice job of that part.
-The CodeWarrior source code is available from http://www.pobox.com/~jsam.
-2. Jim Brunner prepared a Mac-style user interface for both compression and
-decompression.  Unfortunately, it hasn't been updated since IJG v4, and
-the library's API has changed considerably since then.  Still it may be of
-some help, particularly as a guide to compiling the IJG code under Think C.
-Jim's code is available from the Info-Mac archives, at sumex-aim.stanford.edu
-or mirrors thereof; see file /info-mac/dev/src/jpeg-convert-c.hqx.
-
-jmemmac.c is the recommended memory manager back end for Macintosh.  It uses
-NewPtr/DisposePtr instead of malloc/free, and has a Mac-specific
-implementation of jpeg_mem_available().  It also creates temporary files that
-follow Mac conventions.  (That part of the code relies on System-7-or-later OS
-functions.  See the comments in jmemmac.c if you need to run it on System 6.)
-NOTE that USE_MAC_MEMMGR must be defined in jconfig.h to use jmemmac.c.
-
-You can also use jmemnobs.c, if you don't care about handling images larger
-than available memory.  If you use any memory manager back end other than
-jmemmac.c, we recommend replacing "malloc" and "free" by "NewPtr" and
-"DisposePtr", because Mac C libraries often have peculiar implementations of
-malloc/free.  (For instance, free() may not return the freed space to the
-Mac Memory Manager.  This is undesirable for the IJG code because jmemmgr.c
-already clumps space requests.)
-
-
-Macintosh, Metrowerks CodeWarrior:
-
-The Unix-command-line-style interface can be used by defining USE_CCOMMAND.
-You'll also need to define TWO_FILE_COMMANDLINE to avoid stdin/stdout.
-This means that when using the cjpeg/djpeg programs, you'll have to type the
-input and output file names in the "Arguments" text-edit box, rather than
-using the file radio buttons.  (Perhaps USE_FDOPEN or USE_SETMODE would
-eliminate the problem, but I haven't heard from anyone who's tried it.)
-
-On 680x0 Macs, Metrowerks defines type "double" as a 10-byte IEEE extended
-float.  jmemmgr.c won't like this: it wants sizeof(ALIGN_TYPE) to be a power
-of 2.  Add "#define ALIGN_TYPE long" to jconfig.h to eliminate the complaint.
-
-The supplied configuration file jconfig.mac can be used for your jconfig.h;
-it includes all the recommended symbol definitions.  If you have AppleScript
-installed, you can run the supplied script makeproj.mac to create CodeWarrior
-project files for the library and the testbed applications, then build the
-library and applications.  (Thanks to Dan Sears and Don Agro for this nifty
-hack, which saves us from trying to maintain CodeWarrior project files as part
-of the IJG distribution...)
-
-
-Macintosh, Think C:
-
-The documentation in Jim Brunner's "JPEG Convert" source code (see above)
-includes detailed build instructions for Think C; it's probably somewhat
-out of date for the current release, but may be helpful.
-
-If you want to build the minimal command line version, proceed as follows.
-You'll have to prepare project files for the programs; we don't include any
-in the distribution since they are not text files.  Use the file lists in
-any of the supplied makefiles as a guide.  Also add the ANSI and Unix C
-libraries in a separate segment.  You may need to divide the JPEG files into
-more than one segment; we recommend dividing compression and decompression
-modules.  Define USE_CCOMMAND in jconfig.h so that the ccommand() routine is
-called.  You must also define TWO_FILE_COMMANDLINE because stdin/stdout
-don't handle binary data correctly.
-
-On 680x0 Macs, Think C defines type "double" as a 12-byte IEEE extended float.
-jmemmgr.c won't like this: it wants sizeof(ALIGN_TYPE) to be a power of 2.
-Add "#define ALIGN_TYPE long" to jconfig.h to eliminate the complaint.
-
-jconfig.mac should work as a jconfig.h configuration file for Think C,
-but the makeproj.mac AppleScript script is specific to CodeWarrior.  Sorry.
-
-
-MIPS R3000:
-
-MIPS's cc version 1.31 has a rather nasty optimization bug.  Don't use -O
-if you have that compiler version.  (Use "cc -V" to check the version.)
-Note that the R3000 chip is found in workstations from DEC and others.
-
-
-MS-DOS, generic comments for 16-bit compilers:
-
-The IJG code is designed to work well in 80x86 "small" or "medium" memory
-models (i.e., data pointers are 16 bits unless explicitly declared "far";
-code pointers can be either size).  You may be able to use small model to
-compile cjpeg or djpeg by itself, but you will probably have to use medium
-model for any larger application.  This won't make much difference in
-performance.  You *will* take a noticeable performance hit if you use a
-large-data memory model, and you should avoid "huge" model if at all
-possible.  Be sure that NEED_FAR_POINTERS is defined in jconfig.h if you use
-a small-data memory model; be sure it is NOT defined if you use a large-data
-model.  (The supplied makefiles and jconfig files for Borland and Microsoft C
-compile in medium model and define NEED_FAR_POINTERS.)
-
-The DOS-specific memory manager, jmemdos.c, should be used if possible.
-It needs some assembly-code routines which are in jmemdosa.asm; make sure
-your makefile assembles that file and includes it in the library.  If you
-don't have a suitable assembler, you can get pre-assembled object files for
-jmemdosa by FTP from ftp.uu.net:/graphics/jpeg/jdosaobj.zip.  (DOS-oriented
-distributions of the IJG source code often include these object files.)
-
-When using jmemdos.c, jconfig.h must define USE_MSDOS_MEMMGR and must set
-MAX_ALLOC_CHUNK to less than 64K (65520L is a typical value).  If your
-C library's far-heap malloc() can't allocate blocks that large, reduce
-MAX_ALLOC_CHUNK to whatever it can handle.
-
-If you can't use jmemdos.c for some reason --- for example, because you
-don't have an assembler to assemble jmemdosa.asm --- you'll have to fall
-back to jmemansi.c or jmemname.c.  You'll probably still need to set
-MAX_ALLOC_CHUNK in jconfig.h, because most DOS C libraries won't malloc()
-more than 64K at a time.  IMPORTANT: if you use jmemansi.c or jmemname.c,
-you will have to compile in a large-data memory model in order to get the
-right stdio library.  Too bad.
-
-wrjpgcom needs to be compiled in large model, because it malloc()s a 64KB
-work area to hold the comment text.  If your C library's malloc can't
-handle that, reduce MAX_COM_LENGTH as necessary in wrjpgcom.c.
-
-Most MS-DOS compilers treat stdin/stdout as text files, so you must use
-two-file command line style.  But if your compiler has either fdopen() or
-setmode(), you can use one-file style if you like.  To do this, define
-USE_SETMODE or USE_FDOPEN so that stdin/stdout will be set to binary mode.
-(USE_SETMODE seems to work with more DOS compilers than USE_FDOPEN.)  You
-should test that I/O through stdin/stdout produces the same results as I/O
-to explicitly named files... the "make test" procedures in the supplied
-makefiles do NOT use stdin/stdout.
-
-
-MS-DOS, generic comments for 32-bit compilers:
-
-None of the above comments about memory models apply if you are using a
-32-bit flat-memory-space environment, such as DJGPP or Watcom C.  (And you
-should use one if you have it, as performance will be much better than
-8086-compatible code!)  For flat-memory-space compilers, do NOT define
-NEED_FAR_POINTERS, and do NOT use jmemdos.c.  Use jmemnobs.c if the
-environment supplies adequate virtual memory, otherwise use jmemansi.c or
-jmemname.c.
-
-You'll still need to be careful about binary I/O through stdin/stdout.
-See the last paragraph of the previous section.
-
-
-MS-DOS, Borland C:
-
-Be sure to convert all the source files to DOS text format (CR/LF newlines).
-Although Borland C will often work OK with unmodified Unix (LF newlines)
-source files, sometimes it will give bogus compile errors.
-"Illegal character '#'" is the most common such error.  (This is true with
-Borland C 3.1, but perhaps is fixed in newer releases.)
-
-If you want one-file command line style, just undefine TWO_FILE_COMMANDLINE.
-jconfig.bcc already includes #define USE_SETMODE to make this work.
-(fdopen does not work correctly.)
-
-
-MS-DOS, Microsoft C:
-
-makefile.mc6 works with Microsoft C, DOS Visual C++, etc.  It should only
-be used if you want to build a 16-bit (small or medium memory model) program.
-
-If you want one-file command line style, just undefine TWO_FILE_COMMANDLINE.
-jconfig.mc6 already includes #define USE_SETMODE to make this work.
-(fdopen does not work correctly.)
-
-Note that this makefile assumes that the working copy of itself is called
-"makefile".  If you want to call it something else, say "makefile.mak",
-be sure to adjust the dependency line that reads "$(RFILE) : makefile".
-Otherwise the make will fail because it doesn't know how to create "makefile".
-Worse, some releases of Microsoft's make utilities give an incorrect error
-message in this situation.
-
-Old versions of MS C fail with an "out of macro expansion space" error
-because they can't cope with the macro TRACEMS8 (defined in jerror.h).
-If this happens to you, the easiest solution is to change TRACEMS8 to
-expand to nothing.  You'll lose the ability to dump out JPEG coefficient
-tables with djpeg -debug -debug, but at least you can compile.
-
-Original MS C 6.0 is very buggy; it compiles incorrect code unless you turn
-off optimization entirely (remove -O from CFLAGS).  6.00A is better, but it
-still generates bad code if you enable loop optimizations (-Ol or -Ox).
-
-MS C 8.0 crashes when compiling jquant1.c with optimization switch /Oo ...
-which is on by default.  To work around this bug, compile that one file
-with /Oo-.
-
-
-Microsoft Windows (all versions), generic comments:
-
-Some Windows system include files define typedef boolean as "unsigned char".
-The IJG code also defines typedef boolean, but we make it "int" by default.
-This doesn't affect the IJG programs because we don't import those Windows
-include files.  But if you use the JPEG library in your own program, and some
-of your program's files import one definition of boolean while some import the
-other, you can get all sorts of mysterious problems.  A good preventive step
-is to make the IJG library use "unsigned char" for boolean.  To do that,
-add something like this to your jconfig.h file:
-	/* Define "boolean" as unsigned char, not int, per Windows custom */
-	#ifndef __RPCNDR_H__	/* don't conflict if rpcndr.h already read */
-	typedef unsigned char boolean;
-	#endif
-	#define HAVE_BOOLEAN	/* prevent jmorecfg.h from redefining it */
-(This is already in jconfig.vc, by the way.)
-
-windef.h contains the declarations
-	#define far
-	#define FAR far
-Since jmorecfg.h tries to define FAR as empty, you may get a compiler
-warning if you include both jpeglib.h and windef.h (which windows.h
-includes).  To suppress the warning, you can put "#ifndef FAR"/"#endif"
-around the line "#define FAR" in jmorecfg.h.
-
-When using the library in a Windows application, you will almost certainly
-want to modify or replace the error handler module jerror.c, since our
-default error handler does a couple of inappropriate things:
-  1. it tries to write error and warning messages on stderr;
-  2. in event of a fatal error, it exits by calling exit().
-
-A simple stopgap solution for problem 1 is to replace the line
-	fprintf(stderr, "%s\n", buffer);
-(in output_message in jerror.c) with
-	MessageBox(GetActiveWindow(),buffer,"JPEG Error",MB_OK|MB_ICONERROR);
-It's highly recommended that you at least do that much, since otherwise
-error messages will disappear into nowhere.  (Beginning with IJG v6b, this
-code is already present in jerror.c; just define USE_WINDOWS_MESSAGEBOX in
-jconfig.h to enable it.)
-
-The proper solution for problem 2 is to return control to your calling
-application after a library error.  This can be done with the setjmp/longjmp
-technique discussed in libjpeg.doc and illustrated in example.c.  (NOTE:
-some older Windows C compilers provide versions of setjmp/longjmp that
-don't actually work under Windows.  You may need to use the Windows system
-functions Catch and Throw instead.)
-
-The recommended memory manager under Windows is jmemnobs.c; in other words,
-let Windows do any virtual memory management needed.  You should NOT use
-jmemdos.c nor jmemdosa.asm under Windows.
-
-For Windows 3.1, we recommend compiling in medium or large memory model;
-for newer Windows versions, use a 32-bit flat memory model.  (See the MS-DOS
-sections above for more info about memory models.)  In the 16-bit memory
-models only, you'll need to put
-	#define MAX_ALLOC_CHUNK 65520L	/* Maximum request to malloc() */
-into jconfig.h to limit allocation chunks to 64Kb.  (Without that, you'd
-have to use huge memory model, which slows things down unnecessarily.)
-jmemnobs.c works without modification in large or flat memory models, but to
-use medium model, you need to modify its jpeg_get_large and jpeg_free_large
-routines to allocate far memory.  In any case, you might like to replace
-its calls to malloc and free with direct calls on Windows memory allocation
-functions.
-
-You may also want to modify jdatasrc.c and jdatadst.c to use Windows file
-operations rather than fread/fwrite.  This is only necessary if your C
-compiler doesn't provide a competent implementation of C stdio functions.
-
-You might want to tweak the RGB_xxx macros in jmorecfg.h so that the library
-will accept or deliver color pixels in BGR sample order, not RGB; BGR order
-is usually more convenient under Windows.  Note that this change will break
-the sample applications cjpeg/djpeg, but the library itself works fine.
-
-
-Many people want to convert the IJG library into a DLL.  This is reasonably
-straightforward, but watch out for the following:
-
-  1. Don't try to compile as a DLL in small or medium memory model; use
-large model, or even better, 32-bit flat model.  Many places in the IJG code
-assume the address of a local variable is an ordinary (not FAR) pointer;
-that isn't true in a medium-model DLL.
-
-  2. Microsoft C cannot pass file pointers between applications and DLLs.
-(See Microsoft Knowledge Base, PSS ID Number Q50336.)  So jdatasrc.c and
-jdatadst.c don't work if you open a file in your application and then pass
-the pointer to the DLL.  One workaround is to make jdatasrc.c/jdatadst.c
-part of your main application rather than part of the DLL.
-
-  3. You'll probably need to modify the macros GLOBAL() and EXTERN() to
-attach suitable linkage keywords to the exported routine names.  Similarly,
-you'll want to modify METHODDEF() and JMETHOD() to ensure function pointers
-are declared in a way that lets application routines be called back through
-the function pointers.  These macros are in jmorecfg.h.  Typical definitions
-for a 16-bit DLL are:
-	#define GLOBAL(type)		type _far _pascal _loadds _export
-	#define EXTERN(type)		extern type _far _pascal _loadds
-	#define METHODDEF(type)		static type _far _pascal
-	#define JMETHOD(type,methodname,arglist)  \
-		type (_far _pascal *methodname) arglist
-For a 32-bit DLL you may want something like
-	#define GLOBAL(type)		__declspec(dllexport) type
-	#define EXTERN(type)		extern __declspec(dllexport) type
-Although not all the GLOBAL routines are actually intended to be called by
-the application, the performance cost of making them all DLL entry points is
-negligible.
-
-The unmodified IJG library presents a very C-specific application interface,
-so the resulting DLL is only usable from C or C++ applications.  There has
-been some talk of writing wrapper code that would present a simpler interface
-usable from other languages, such as Visual Basic.  This is on our to-do list
-but hasn't been very high priority --- any volunteers out there?
-
-
-Microsoft Windows, Borland C:
-
-The provided jconfig.bcc should work OK in a 32-bit Windows environment,
-but you'll need to tweak it in a 16-bit environment (you'd need to define
-NEED_FAR_POINTERS and MAX_ALLOC_CHUNK).  Beware that makefile.bcc will need
-alteration if you want to use it for Windows --- in particular, you should
-use jmemnobs.c not jmemdos.c under Windows.
-
-Borland C++ 4.5 fails with an internal compiler error when trying to compile
-jdmerge.c in 32-bit mode.  If enough people complain, perhaps Borland will fix
-it.  In the meantime, the simplest known workaround is to add a redundant
-definition of the variable range_limit in h2v1_merged_upsample(), at the head
-of the block that handles odd image width (about line 268 in v6 jdmerge.c):
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    register JSAMPLE * range_limit = cinfo->sample_range_limit; /* ADD THIS */
-    cb = GETJSAMPLE(*inptr1);
-Pretty bizarre, especially since the very similar routine h2v2_merged_upsample
-doesn't trigger the bug.
-Recent reports suggest that this bug does not occur with "bcc32a" (the
-Pentium-optimized version of the compiler).
-
-Another report from a user of Borland C 4.5 was that incorrect code (leading
-to a color shift in processed images) was produced if any of the following
-optimization switch combinations were used: 
-	-Ot -Og
-	-Ot -Op
-	-Ot -Om
-So try backing off on optimization if you see such a problem.  (Are there
-several different releases all numbered "4.5"??)
-
-
-Microsoft Windows, Microsoft Visual C++:
-
-jconfig.vc should work OK with any Microsoft compiler for a 32-bit memory
-model.  makefile.vc is intended for command-line use.  (If you are using
-the Developer Studio environment, you may prefer the DevStudio project
-files; see below.)
-
-Some users feel that it's easier to call the library from C++ code if you
-force VC++ to treat the library as C++ code, which you can do by renaming
-all the *.c files to *.cpp (and adjusting the makefile to match).  This
-avoids the need to put extern "C" { ... } around #include "jpeglib.h" in
-your C++ application.
-
-
-Microsoft Windows, Microsoft Developer Studio:
-
-We include makefiles that should work as project files in DevStudio 4.2 or
-later.  There is a library makefile that builds the IJG library as a static
-Win32 library, and an application makefile that builds the sample applications
-as Win32 console applications.  (Even if you only want the library, we
-recommend building the applications so that you can run the self-test.)
-
-To use:
-1. Copy jconfig.vc to jconfig.h, makelib.ds to jpeg.mak, and
-   makeapps.ds to apps.mak.  (Note that the renaming is critical!)
-2. Click on the .mak files to construct project workspaces.
-   (If you are using DevStudio more recent than 4.2, you'll probably
-   get a message saying that the makefiles are being updated.)
-3. Build the library project, then the applications project.
-4. Move the application .exe files from `app`\Release to an
-   appropriate location on your path.
-5. To perform the self-test, execute the command line
-	NMAKE /f makefile.vc  test
-
-
-OS/2, Borland C++:
-
-Watch out for optimization bugs in older Borland compilers; you may need
-to back off the optimization switch settings.  See the comments in
-makefile.bcc.
-
-
-SGI:
-
-On some SGI systems, you may need to set "AR2= ar -ts" in the Makefile.
-If you are using configure, you can do this by saying
-	./configure RANLIB='ar -ts'
-This change is not needed on all SGIs.  Use it only if the make fails at the
-stage of linking the completed programs.
-
-On the MIPS R4000 architecture (Indy, etc.), the compiler option "-mips2"
-reportedly speeds up the float DCT method substantially, enough to make it
-faster than the default int method (but still slower than the fast int
-method).  If you use -mips2, you may want to alter the default DCT method to
-be float.  To do this, put "#define JDCT_DEFAULT JDCT_FLOAT" in jconfig.h.
-
-
-VMS:
-
-On an Alpha/VMS system with MMS, be sure to use the "/Marco=Alpha=1"
-qualifier with MMS when building the JPEG package.
-
-VAX/VMS v5.5-1 may have problems with the test step of the build procedure
-reporting differences when it compares the original and test images.  If the
-error points to the last block of the files, it is most likely bogus and may
-be safely ignored.  It seems to be because the files are Stream_LF and
-Backup/Compare has difficulty with the (presumably) null padded files.
-This problem was not observed on VAX/VMS v6.1 or AXP/VMS v6.1.
diff --git a/jaricom.c b/jaricom.c
new file mode 100644
index 0000000..f43e2ea
--- /dev/null
+++ b/jaricom.c
@@ -0,0 +1,153 @@
+/*
+ * jaricom.c
+ *
+ * Developed 1997-2009 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains probability estimation tables for common use in
+ * arithmetic entropy encoding and decoding routines.
+ *
+ * This data represents Table D.2 in the JPEG spec (ISO/IEC IS 10918-1
+ * and CCITT Recommendation ITU-T T.81) and Table 24 in the JBIG spec
+ * (ISO/IEC IS 11544 and CCITT Recommendation ITU-T T.82).
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+/* The following #define specifies the packing of the four components
+ * into the compact INT32 representation.
+ * Note that this formula must match the actual arithmetic encoder
+ * and decoder implementation.  The implementation has to be changed
+ * if this formula is changed.
+ * The current organization is leaned on Markus Kuhn's JBIG
+ * implementation (jbig_tab.c).
+ */
+
+#define V(i,a,b,c,d) (((INT32)a << 16) | ((INT32)c << 8) | ((INT32)d << 7) | b)
+
+const INT32 jpeg_aritab[113+1] = {
+/*
+ * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS
+ */
+  V(   0, 0x5a1d,   1,   1, 1 ),
+  V(   1, 0x2586,  14,   2, 0 ),
+  V(   2, 0x1114,  16,   3, 0 ),
+  V(   3, 0x080b,  18,   4, 0 ),
+  V(   4, 0x03d8,  20,   5, 0 ),
+  V(   5, 0x01da,  23,   6, 0 ),
+  V(   6, 0x00e5,  25,   7, 0 ),
+  V(   7, 0x006f,  28,   8, 0 ),
+  V(   8, 0x0036,  30,   9, 0 ),
+  V(   9, 0x001a,  33,  10, 0 ),
+  V(  10, 0x000d,  35,  11, 0 ),
+  V(  11, 0x0006,   9,  12, 0 ),
+  V(  12, 0x0003,  10,  13, 0 ),
+  V(  13, 0x0001,  12,  13, 0 ),
+  V(  14, 0x5a7f,  15,  15, 1 ),
+  V(  15, 0x3f25,  36,  16, 0 ),
+  V(  16, 0x2cf2,  38,  17, 0 ),
+  V(  17, 0x207c,  39,  18, 0 ),
+  V(  18, 0x17b9,  40,  19, 0 ),
+  V(  19, 0x1182,  42,  20, 0 ),
+  V(  20, 0x0cef,  43,  21, 0 ),
+  V(  21, 0x09a1,  45,  22, 0 ),
+  V(  22, 0x072f,  46,  23, 0 ),
+  V(  23, 0x055c,  48,  24, 0 ),
+  V(  24, 0x0406,  49,  25, 0 ),
+  V(  25, 0x0303,  51,  26, 0 ),
+  V(  26, 0x0240,  52,  27, 0 ),
+  V(  27, 0x01b1,  54,  28, 0 ),
+  V(  28, 0x0144,  56,  29, 0 ),
+  V(  29, 0x00f5,  57,  30, 0 ),
+  V(  30, 0x00b7,  59,  31, 0 ),
+  V(  31, 0x008a,  60,  32, 0 ),
+  V(  32, 0x0068,  62,  33, 0 ),
+  V(  33, 0x004e,  63,  34, 0 ),
+  V(  34, 0x003b,  32,  35, 0 ),
+  V(  35, 0x002c,  33,   9, 0 ),
+  V(  36, 0x5ae1,  37,  37, 1 ),
+  V(  37, 0x484c,  64,  38, 0 ),
+  V(  38, 0x3a0d,  65,  39, 0 ),
+  V(  39, 0x2ef1,  67,  40, 0 ),
+  V(  40, 0x261f,  68,  41, 0 ),
+  V(  41, 0x1f33,  69,  42, 0 ),
+  V(  42, 0x19a8,  70,  43, 0 ),
+  V(  43, 0x1518,  72,  44, 0 ),
+  V(  44, 0x1177,  73,  45, 0 ),
+  V(  45, 0x0e74,  74,  46, 0 ),
+  V(  46, 0x0bfb,  75,  47, 0 ),
+  V(  47, 0x09f8,  77,  48, 0 ),
+  V(  48, 0x0861,  78,  49, 0 ),
+  V(  49, 0x0706,  79,  50, 0 ),
+  V(  50, 0x05cd,  48,  51, 0 ),
+  V(  51, 0x04de,  50,  52, 0 ),
+  V(  52, 0x040f,  50,  53, 0 ),
+  V(  53, 0x0363,  51,  54, 0 ),
+  V(  54, 0x02d4,  52,  55, 0 ),
+  V(  55, 0x025c,  53,  56, 0 ),
+  V(  56, 0x01f8,  54,  57, 0 ),
+  V(  57, 0x01a4,  55,  58, 0 ),
+  V(  58, 0x0160,  56,  59, 0 ),
+  V(  59, 0x0125,  57,  60, 0 ),
+  V(  60, 0x00f6,  58,  61, 0 ),
+  V(  61, 0x00cb,  59,  62, 0 ),
+  V(  62, 0x00ab,  61,  63, 0 ),
+  V(  63, 0x008f,  61,  32, 0 ),
+  V(  64, 0x5b12,  65,  65, 1 ),
+  V(  65, 0x4d04,  80,  66, 0 ),
+  V(  66, 0x412c,  81,  67, 0 ),
+  V(  67, 0x37d8,  82,  68, 0 ),
+  V(  68, 0x2fe8,  83,  69, 0 ),
+  V(  69, 0x293c,  84,  70, 0 ),
+  V(  70, 0x2379,  86,  71, 0 ),
+  V(  71, 0x1edf,  87,  72, 0 ),
+  V(  72, 0x1aa9,  87,  73, 0 ),
+  V(  73, 0x174e,  72,  74, 0 ),
+  V(  74, 0x1424,  72,  75, 0 ),
+  V(  75, 0x119c,  74,  76, 0 ),
+  V(  76, 0x0f6b,  74,  77, 0 ),
+  V(  77, 0x0d51,  75,  78, 0 ),
+  V(  78, 0x0bb6,  77,  79, 0 ),
+  V(  79, 0x0a40,  77,  48, 0 ),
+  V(  80, 0x5832,  80,  81, 1 ),
+  V(  81, 0x4d1c,  88,  82, 0 ),
+  V(  82, 0x438e,  89,  83, 0 ),
+  V(  83, 0x3bdd,  90,  84, 0 ),
+  V(  84, 0x34ee,  91,  85, 0 ),
+  V(  85, 0x2eae,  92,  86, 0 ),
+  V(  86, 0x299a,  93,  87, 0 ),
+  V(  87, 0x2516,  86,  71, 0 ),
+  V(  88, 0x5570,  88,  89, 1 ),
+  V(  89, 0x4ca9,  95,  90, 0 ),
+  V(  90, 0x44d9,  96,  91, 0 ),
+  V(  91, 0x3e22,  97,  92, 0 ),
+  V(  92, 0x3824,  99,  93, 0 ),
+  V(  93, 0x32b4,  99,  94, 0 ),
+  V(  94, 0x2e17,  93,  86, 0 ),
+  V(  95, 0x56a8,  95,  96, 1 ),
+  V(  96, 0x4f46, 101,  97, 0 ),
+  V(  97, 0x47e5, 102,  98, 0 ),
+  V(  98, 0x41cf, 103,  99, 0 ),
+  V(  99, 0x3c3d, 104, 100, 0 ),
+  V( 100, 0x375e,  99,  93, 0 ),
+  V( 101, 0x5231, 105, 102, 0 ),
+  V( 102, 0x4c0f, 106, 103, 0 ),
+  V( 103, 0x4639, 107, 104, 0 ),
+  V( 104, 0x415e, 103,  99, 0 ),
+  V( 105, 0x5627, 105, 106, 1 ),
+  V( 106, 0x50e7, 108, 107, 0 ),
+  V( 107, 0x4b85, 109, 103, 0 ),
+  V( 108, 0x5597, 110, 109, 0 ),
+  V( 109, 0x504f, 111, 107, 0 ),
+  V( 110, 0x5a10, 110, 111, 1 ),
+  V( 111, 0x5522, 112, 109, 0 ),
+  V( 112, 0x59eb, 112, 111, 1 ),
+/*
+ * This last entry is used for fixed probability estimate of 0.5
+ * as recommended in Section 10.3 Table 5 of ITU-T Rec. T.851.
+ */
+  V( 113, 0x5a1d, 113, 113, 0 )
+};
diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt
new file mode 100644
index 0000000..f9f9e57
--- /dev/null
+++ b/java/CMakeLists.txt
@@ -0,0 +1,56 @@
+set(JAR_FILE turbojpeg.jar)
+set(MANIFEST_FILE ${CMAKE_CURRENT_SOURCE_DIR}/MANIFEST.MF)
+
+set(JAVA_CLASSNAMES org/libjpegturbo/turbojpeg/TJ
+  org/libjpegturbo/turbojpeg/TJCompressor
+  org/libjpegturbo/turbojpeg/TJCustomFilter
+  org/libjpegturbo/turbojpeg/TJDecompressor
+  org/libjpegturbo/turbojpeg/TJScalingFactor
+  org/libjpegturbo/turbojpeg/TJTransform
+  org/libjpegturbo/turbojpeg/TJTransformer
+  org/libjpegturbo/turbojpeg/YUVImage
+  TJUnitTest
+  TJExample
+  TJBench)
+
+if(MSVC_IDE)
+  set(OBJDIR "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}")
+else()
+  set(OBJDIR ${CMAKE_CURRENT_BINARY_DIR})
+endif()
+
+set(TURBOJPEG_DLL_NAME "turbojpeg")
+if(MINGW)
+  set(TURBOJPEG_DLL_NAME "libturbojpeg")
+endif()
+configure_file(org/libjpegturbo/turbojpeg/TJLoader.java.in
+  ${CMAKE_CURRENT_BINARY_DIR}/org/libjpegturbo/turbojpeg/TJLoader.java)
+
+set(JAVA_SOURCES "")
+set(JAVA_CLASSES "")
+set(JAVA_CLASSES_FULL "")
+foreach(class ${JAVA_CLASSNAMES})
+  set(JAVA_SOURCES ${JAVA_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/${class}.java)
+  set(JAVA_CLASSES ${JAVA_CLASSES} ${class}.class)
+  set(JAVA_CLASSES_FULL ${JAVA_CLASSES_FULL} ${OBJDIR}/${class}.class)
+endforeach()
+
+set(JAVA_SOURCES ${JAVA_SOURCES}
+  ${CMAKE_CURRENT_BINARY_DIR}/org/libjpegturbo/turbojpeg/TJLoader.java)
+set(JAVA_CLASSES ${JAVA_CLASSES}
+  org/libjpegturbo/turbojpeg/TJLoader.class)
+set(JAVA_CLASSES_FULL ${JAVA_CLASSES_FULL}
+  ${OBJDIR}/org/libjpegturbo/turbojpeg/TJLoader.class)
+
+string(REGEX REPLACE " " ";" JAVACFLAGS "${JAVACFLAGS}")
+add_custom_command(OUTPUT ${JAVA_CLASSES_FULL} DEPENDS ${JAVA_SOURCES}
+  COMMAND ${JAVA_COMPILE} ARGS ${JAVACFLAGS} -d ${OBJDIR} ${JAVA_SOURCES})
+
+add_custom_command(OUTPUT ${JAR_FILE} DEPENDS ${JAVA_CLASSES_FULL}
+  ${MANIFEST_FILE}
+  COMMAND ${JAVA_ARCHIVE} cfm ${JAR_FILE} ${MANIFEST_FILE} ${JAVA_CLASSES}
+  WORKING_DIRECTORY ${OBJDIR})
+
+add_custom_target(java ALL DEPENDS ${JAR_FILE})
+
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${JAR_FILE} DESTINATION classes)
diff --git a/java/MANIFEST.MF b/java/MANIFEST.MF
new file mode 100644
index 0000000..723bc51
--- /dev/null
+++ b/java/MANIFEST.MF
@@ -0,0 +1,2 @@
+Manifest-Version: 1.0
+Main-Class: TJExample
diff --git a/java/Makefile.am b/java/Makefile.am
new file mode 100644
index 0000000..23e3412
--- /dev/null
+++ b/java/Makefile.am
@@ -0,0 +1,73 @@
+JAVAROOT = .
+
+org/libjpegturbo/turbojpeg/TJLoader.java: $(srcdir)/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl
+	mkdir -p org/libjpegturbo/turbojpeg; \
+	cat $(srcdir)/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl | \
+		sed s@%{__libdir}@$(libdir)@g > org/libjpegturbo/turbojpeg/TJLoader.java
+
+
+JAVASOURCES = org/libjpegturbo/turbojpeg/TJ.java \
+	org/libjpegturbo/turbojpeg/TJCompressor.java \
+	org/libjpegturbo/turbojpeg/TJCustomFilter.java \
+	org/libjpegturbo/turbojpeg/TJDecompressor.java \
+	org/libjpegturbo/turbojpeg/TJScalingFactor.java \
+	org/libjpegturbo/turbojpeg/TJTransform.java \
+	org/libjpegturbo/turbojpeg/TJTransformer.java \
+	org/libjpegturbo/turbojpeg/YUVImage.java \
+	TJExample.java \
+	TJUnitTest.java \
+	TJBench.java
+
+JNIHEADERS = org_libjpegturbo_turbojpeg_TJ.h \
+	org_libjpegturbo_turbojpeg_TJCompressor.h \
+	org_libjpegturbo_turbojpeg_TJDecompressor.h \
+	org_libjpegturbo_turbojpeg_TJTransformer.h
+
+if WITH_JAVA
+
+nodist_noinst_JAVA = ${JAVASOURCES} org/libjpegturbo/turbojpeg/TJLoader.java
+
+JAVA_CLASSES = org/libjpegturbo/turbojpeg/TJ.class \
+	org/libjpegturbo/turbojpeg/TJCompressor.class \
+	org/libjpegturbo/turbojpeg/TJCustomFilter.class \
+	org/libjpegturbo/turbojpeg/TJDecompressor.class \
+	org/libjpegturbo/turbojpeg/TJLoader.class \
+	org/libjpegturbo/turbojpeg/TJScalingFactor.class \
+	org/libjpegturbo/turbojpeg/TJTransform.class \
+	org/libjpegturbo/turbojpeg/TJTransformer.class \
+	org/libjpegturbo/turbojpeg/YUVImage.class \
+	TJExample.class \
+	TJUnitTest.class \
+	TJBench.class
+
+all: all-am turbojpeg.jar
+
+turbojpeg.jar: $(JAVA_CLASSES) ${srcdir}/MANIFEST.MF
+	$(JAR) cfm turbojpeg.jar ${srcdir}/MANIFEST.MF $(JAVA_CLASSES)
+
+clean-local:
+	rm -f turbojpeg.jar
+
+install-exec-local: turbojpeg.jar
+	mkdir -p $(DESTDIR)/$(datadir)/classes
+	$(INSTALL) -m 644 turbojpeg.jar $(DESTDIR)/$(datadir)/classes/
+
+uninstall-local:
+	rm -f $(DESTDIR)/$(datadir)/classes/turbojpeg.jar
+	if [ -d $(DESTDIR)/$(datadir)/classes ]; then rmdir $(DESTDIR)/$(datadir)/classes; fi
+
+headers: all
+	javah -d ${srcdir} org.libjpegturbo.turbojpeg.TJ; \
+	javah -d ${srcdir} org.libjpegturbo.turbojpeg.TJCompressor; \
+	javah -d ${srcdir} org.libjpegturbo.turbojpeg.TJDecompressor; \
+	javah -d ${srcdir} org.libjpegturbo.turbojpeg.TJTransformer
+
+docs: all
+	mkdir -p ${srcdir}/doc; \
+	javadoc -notimestamp -d ${srcdir}/doc -sourcepath ${srcdir} org.libjpegturbo.turbojpeg
+
+endif
+
+EXTRA_DIST = MANIFEST.MF ${JAVASOURCES} ${JNIHEADERS} doc CMakeLists.txt \
+	org/libjpegturbo/turbojpeg/TJLoader.java.tmpl \
+	org/libjpegturbo/turbojpeg/TJLoader.java.in
diff --git a/java/README b/java/README
new file mode 100644
index 0000000..88ddc3b
--- /dev/null
+++ b/java/README
@@ -0,0 +1,52 @@
+TurboJPEG Java Wrapper
+======================
+
+The TurboJPEG shared library can optionally be built with a Java Native
+Interface wrapper, which allows the library to be loaded and used directly from
+Java applications.  The Java front end for this is defined in several classes
+located under org/libjpegturbo/turbojpeg.  The source code for these Java
+classes is licensed under a BSD-style license, so the files can be incorporated
+directly into both open source and proprietary projects without restriction.  A
+Java archive (JAR) file containing these classes is also shipped with the
+"official" distribution packages of libjpeg-turbo.
+
+TJExample.java, which should also be located in the same directory as this
+README file, demonstrates how to use the TurboJPEG Java API to compress and
+decompress JPEG images in memory.
+
+
+Performance Pitfalls
+--------------------
+
+The TurboJPEG Java API defines several convenience methods that can allocate
+image buffers or instantiate classes to hold the result of compress,
+decompress, or transform operations.  However, if you use these methods, then
+be mindful of the amount of new data you are creating on the heap.  It may be
+necessary to manually invoke the garbage collector to prevent heap exhaustion
+or to prevent performance degradation.  Background garbage collection can kill
+performance, particularly in a multi-threaded environment (Java pauses all
+threads when the GC runs.)
+
+The TurboJPEG Java API always gives you the option of pre-allocating your own
+source and destination buffers, which allows you to re-use those buffers for
+compressing/decompressing multiple images.  If the image sequence you are
+compressing or decompressing consists of images of the same size, then
+pre-allocating the buffers is recommended.
+
+
+Installation Directory
+----------------------
+
+The TurboJPEG Java Wrapper will look for the TurboJPEG JNI library
+(libturbojpeg.so, libturbojpeg.jnilib, or turbojpeg.dll) in the system library
+paths or in any paths specified in LD_LIBRARY_PATH (Un*x), DYLD_LIBRARY_PATH
+(Mac), or PATH (Windows.)  Failing this, on Un*x and Mac systems, the wrapper
+will look for the JNI library under the library directory configured when
+libjpeg-turbo was built.  If that library directory is
+/opt/libjpeg-turbo/lib32, then /opt/libjpeg-turbo/lib64 is also searched, and
+vice versa.
+
+If you installed the JNI library into another directory, then you will need
+to pass an argument of -Djava.library.path={path_to_JNI_library} to java, or
+manipulate LD_LIBRARY_PATH, DYLD_LIBRARY_PATH, or PATH to include the directory
+containing the JNI library.
diff --git a/java/TJBench.java b/java/TJBench.java
new file mode 100644
index 0000000..617d312
--- /dev/null
+++ b/java/TJBench.java
@@ -0,0 +1,910 @@
+/*
+ * Copyright (C)2009-2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+import java.io.*;
+import java.awt.image.*;
+import javax.imageio.*;
+import java.util.*;
+import org.libjpegturbo.turbojpeg.*;
+
+class TJBench {
+
+  static int flags = 0, quiet = 0, pf = TJ.PF_BGR, yuvpad = 1, warmup = 1;
+  static boolean compOnly, decompOnly, doTile, doYUV;
+
+  static final String[] pixFormatStr = {
+    "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY"
+  };
+
+  static final String[] subNameLong = {
+    "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1"
+  };
+
+  static final String[] subName = {
+    "444", "422", "420", "GRAY", "440", "411"
+  };
+
+  static final String[] csName = {
+    "RGB", "YCbCr", "GRAY", "CMYK", "YCCK"
+  };
+
+  static TJScalingFactor sf;
+  static int xformOp = TJTransform.OP_NONE, xformOpt = 0;
+  static double benchTime = 5.0;
+
+
+  static final double getTime() {
+    return (double)System.nanoTime() / 1.0e9;
+  }
+
+
+  static String formatName(int subsamp, int cs) {
+    if (cs == TJ.CS_YCbCr)
+      return subNameLong[subsamp];
+    else if (cs == TJ.CS_YCCK)
+      return csName[cs] + " " + subNameLong[subsamp];
+    else
+      return csName[cs];
+  }
+
+
+  static String sigFig(double val, int figs) {
+    String format;
+    int digitsAfterDecimal = figs - (int)Math.ceil(Math.log10(Math.abs(val)));
+    if (digitsAfterDecimal < 1)
+      format = new String("%.0f");
+    else
+      format = new String("%." + digitsAfterDecimal + "f");
+    return String.format(format, val);
+  }
+
+
+  static byte[] loadImage(String fileName, int[] w, int[] h, int pixelFormat)
+                          throws Exception {
+    BufferedImage img = ImageIO.read(new File(fileName));
+    if (img == null)
+      throw new Exception("Could not read " + fileName);
+    w[0] = img.getWidth();
+    h[0] = img.getHeight();
+    int[] rgb = img.getRGB(0, 0, w[0], h[0], null, 0, w[0]);
+    int ps = TJ.getPixelSize(pixelFormat);
+    int rindex = TJ.getRedOffset(pixelFormat);
+    int gindex = TJ.getGreenOffset(pixelFormat);
+    int bindex = TJ.getBlueOffset(pixelFormat);
+    byte[] dstBuf = new byte[w[0] * h[0] * ps];
+    int pixels = w[0] * h[0], dstPtr = 0, rgbPtr = 0;
+    while (pixels-- > 0) {
+      dstBuf[dstPtr + rindex] = (byte)((rgb[rgbPtr] >> 16) & 0xff);
+      dstBuf[dstPtr + gindex] = (byte)((rgb[rgbPtr] >> 8) & 0xff);
+      dstBuf[dstPtr + bindex] = (byte)(rgb[rgbPtr] & 0xff);
+      dstPtr += ps;
+      rgbPtr++;
+    }
+    return dstBuf;
+  }
+
+
+  static void saveImage(String fileName, byte[] srcBuf, int w, int h,
+                        int pixelFormat) throws Exception {
+    BufferedImage img = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB);
+    int pixels = w * h, srcPtr = 0;
+    int ps = TJ.getPixelSize(pixelFormat);
+    int rindex = TJ.getRedOffset(pixelFormat);
+    int gindex = TJ.getGreenOffset(pixelFormat);
+    int bindex = TJ.getBlueOffset(pixelFormat);
+    for (int y = 0; y < h; y++) {
+      for (int x = 0; x < w; x++, srcPtr += ps) {
+        int pixel = (srcBuf[srcPtr + rindex] & 0xff) << 16 |
+                    (srcBuf[srcPtr + gindex] & 0xff) << 8 |
+                    (srcBuf[srcPtr + bindex] & 0xff);
+        img.setRGB(x, y, pixel);
+      }
+    }
+    ImageIO.write(img, "bmp", new File(fileName));
+  }
+
+
+  /* Decompression test */
+  static void decomp(byte[] srcBuf, byte[][] jpegBuf, int[] jpegSize,
+                     byte[] dstBuf, int w, int h, int subsamp, int jpegQual,
+                     String fileName, int tilew, int tileh) throws Exception {
+    String qualStr = new String(""), sizeStr, tempStr;
+    TJDecompressor tjd;
+    double elapsed, elapsedDecode;
+    int ps = TJ.getPixelSize(pf), i, iter = 0;
+    int scaledw = sf.getScaled(w);
+    int scaledh = sf.getScaled(h);
+    int pitch = scaledw * ps;
+    YUVImage yuvImage = null;
+
+    if (jpegQual > 0)
+      qualStr = new String("_Q" + jpegQual);
+
+    tjd = new TJDecompressor();
+
+    if (dstBuf == null)
+      dstBuf = new byte[pitch * scaledh];
+
+    /* Set the destination buffer to gray so we know whether the decompressor
+       attempted to write to it */
+    Arrays.fill(dstBuf, (byte)127);
+
+    if (doYUV) {
+      int width = doTile ? tilew : scaledw;
+      int height = doTile ? tileh : scaledh;
+      yuvImage = new YUVImage(width, yuvpad, height, subsamp);
+      Arrays.fill(yuvImage.getBuf(), (byte)127);
+    }
+
+    /* Benchmark */
+    iter -= warmup;
+    elapsed = elapsedDecode = 0.0;
+    while (true) {
+      int tile = 0;
+      double start = getTime();
+      for (int y = 0; y < h; y += tileh) {
+        for (int x = 0; x < w; x += tilew, tile++) {
+          int width = doTile ? Math.min(tilew, w - x) : scaledw;
+          int height = doTile ? Math.min(tileh, h - y) : scaledh;
+          tjd.setSourceImage(jpegBuf[tile], jpegSize[tile]);
+          if (doYUV) {
+            yuvImage.setBuf(yuvImage.getBuf(), width, yuvpad, height, subsamp);
+            tjd.decompressToYUV(yuvImage, flags);
+            double startDecode = getTime();
+            tjd.setSourceImage(yuvImage);
+            tjd.decompress(dstBuf, x, y, width, pitch, height, pf, flags);
+            if (iter >= 0)
+              elapsedDecode += getTime() - startDecode;
+          } else
+            tjd.decompress(dstBuf, x, y, width, pitch, height, pf, flags);
+        }
+      }
+      iter++;
+      if (iter >= 1) {
+        elapsed += getTime() - start;
+        if (elapsed >= benchTime)
+          break;
+      }
+    }
+    if(doYUV)
+      elapsed -= elapsedDecode;
+
+    tjd = null;
+    for (i = 0; i < jpegBuf.length; i++)
+      jpegBuf[i] = null;
+    jpegBuf = null;  jpegSize = null;
+    System.gc();
+
+    if (quiet != 0) {
+      System.out.format("%-6s%s",
+        sigFig((double)(w * h) / 1000000. * (double)iter / elapsed, 4),
+        quiet == 2 ? "\n" : "  ");
+      if (doYUV)
+        System.out.format("%s\n",
+          sigFig((double)(w * h) / 1000000. * (double)iter / elapsedDecode, 4));
+      else if (quiet != 2)
+        System.out.print("\n");
+    } else {
+      System.out.format("%s --> Frame rate:         %f fps\n",
+                        (doYUV ? "Decomp to YUV":"Decompress   "),
+                        (double)iter / elapsed);
+      System.out.format("                  Throughput:         %f Megapixels/sec\n",
+                        (double)(w * h) / 1000000. * (double)iter / elapsed);
+      if (doYUV) {
+        System.out.format("YUV Decode    --> Frame rate:         %f fps\n",
+                          (double)iter / elapsedDecode);
+        System.out.format("                  Throughput:         %f Megapixels/sec\n",
+                          (double)(w * h) / 1000000. * (double)iter / elapsedDecode);
+      }
+    }
+
+    if (sf.getNum() != 1 || sf.getDenom() != 1)
+      sizeStr = new String(sf.getNum() + "_" + sf.getDenom());
+    else if (tilew != w || tileh != h)
+      sizeStr = new String(tilew + "x" + tileh);
+    else
+      sizeStr = new String("full");
+    if (decompOnly)
+      tempStr = new String(fileName + "_" + sizeStr + ".bmp");
+    else
+      tempStr = new String(fileName + "_" + subName[subsamp] + qualStr +
+                           "_" + sizeStr + ".bmp");
+
+    saveImage(tempStr, dstBuf, scaledw, scaledh, pf);
+    int ndx = tempStr.indexOf('.');
+    tempStr = new String(tempStr.substring(0, ndx) + "-err.bmp");
+    if (srcBuf != null && sf.getNum() == 1 && sf.getDenom() == 1) {
+      if (quiet == 0)
+        System.out.println("Compression error written to " + tempStr + ".");
+      if (subsamp == TJ.SAMP_GRAY) {
+        for (int y = 0, index = 0; y < h; y++, index += pitch) {
+          for (int x = 0, index2 = index; x < w; x++, index2 += ps) {
+            int rindex = index2 + TJ.getRedOffset(pf);
+            int gindex = index2 + TJ.getGreenOffset(pf);
+            int bindex = index2 + TJ.getBlueOffset(pf);
+            int lum = (int)((double)(srcBuf[rindex] & 0xff) * 0.299 +
+                            (double)(srcBuf[gindex] & 0xff) * 0.587 +
+                            (double)(srcBuf[bindex] & 0xff) * 0.114 + 0.5);
+            if (lum > 255) lum = 255;
+            if (lum < 0) lum = 0;
+            dstBuf[rindex] = (byte)Math.abs((dstBuf[rindex] & 0xff) - lum);
+            dstBuf[gindex] = (byte)Math.abs((dstBuf[gindex] & 0xff) - lum);
+            dstBuf[bindex] = (byte)Math.abs((dstBuf[bindex] & 0xff) - lum);
+          }
+        }
+      } else {
+        for (int y = 0; y < h; y++)
+          for (int x = 0; x < w * ps; x++)
+            dstBuf[pitch * y + x] =
+              (byte)Math.abs((dstBuf[pitch * y + x] & 0xff) -
+                             (srcBuf[pitch * y + x] & 0xff));
+      }
+      saveImage(tempStr, dstBuf, w, h, pf);
+    }
+  }
+
+
+  static void fullTest(byte[] srcBuf, int w, int h, int subsamp, int jpegQual,
+                       String fileName) throws Exception {
+    TJCompressor tjc;
+    byte[] tmpBuf;
+    byte[][] jpegBuf;
+    int[] jpegSize;
+    double start, elapsed, elapsedEncode;
+    int totalJpegSize = 0, tilew, tileh, i, iter;
+    int ps = TJ.getPixelSize(pf);
+    int ntilesw = 1, ntilesh = 1, pitch = w * ps;
+    String pfStr = pixFormatStr[pf];
+    YUVImage yuvImage = null;
+
+    tmpBuf = new byte[pitch * h];
+
+    if (quiet == 0)
+      System.out.format(">>>>>  %s (%s) <--> JPEG %s Q%d  <<<<<\n", pfStr,
+        (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down",
+        subNameLong[subsamp], jpegQual);
+
+    tjc = new TJCompressor();
+
+    for (tilew = doTile ? 8 : w, tileh = doTile ? 8 : h; ;
+         tilew *= 2, tileh *= 2) {
+      if (tilew > w)
+        tilew = w;
+      if (tileh > h)
+        tileh = h;
+      ntilesw = (w + tilew - 1) / tilew;
+      ntilesh = (h + tileh - 1) / tileh;
+
+      jpegBuf = new byte[ntilesw * ntilesh][TJ.bufSize(tilew, tileh, subsamp)];
+      jpegSize = new int[ntilesw * ntilesh];
+
+      /* Compression test */
+      if (quiet == 1)
+        System.out.format("%-4s (%s)  %-5s    %-3d   ", pfStr,
+                          (flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD",
+                          subNameLong[subsamp], jpegQual);
+      for (i = 0; i < h; i++)
+        System.arraycopy(srcBuf, w * ps * i, tmpBuf, pitch * i, w * ps);
+      tjc.setJPEGQuality(jpegQual);
+      tjc.setSubsamp(subsamp);
+
+      if (doYUV) {
+        yuvImage = new YUVImage(tilew, yuvpad, tileh, subsamp);
+        Arrays.fill(yuvImage.getBuf(), (byte)127);
+      }
+
+      /* Benchmark */
+      iter = -warmup;
+      elapsed = elapsedEncode = 0.0;
+      while (true) {
+        int tile = 0;
+        totalJpegSize = 0;
+        start = getTime();
+        for (int y = 0; y < h; y += tileh) {
+          for (int x = 0; x < w; x += tilew, tile++) {
+            int width = Math.min(tilew, w - x);
+            int height = Math.min(tileh, h - y);
+            tjc.setSourceImage(srcBuf, x, y, width, pitch, height, pf);
+            if (doYUV) {
+              double startEncode = getTime();
+              yuvImage.setBuf(yuvImage.getBuf(), width, yuvpad, height,
+                              subsamp);
+              tjc.encodeYUV(yuvImage, flags);
+              if (iter >= 0)
+                elapsedEncode += getTime() - startEncode;
+              tjc.setSourceImage(yuvImage);
+            }
+            tjc.compress(jpegBuf[tile], flags);
+            jpegSize[tile] = tjc.getCompressedSize();
+            totalJpegSize += jpegSize[tile];
+          }
+        }
+        iter++;
+        if (iter >= 1) {
+          elapsed += getTime() - start;
+          if (elapsed >= benchTime)
+            break;
+        }
+      }
+      if (doYUV)
+        elapsed -= elapsedEncode;
+
+      if (quiet == 1)
+        System.out.format("%-5d  %-5d   ", tilew, tileh);
+      if (quiet != 0) {
+        if (doYUV)
+          System.out.format("%-6s%s",
+            sigFig((double)(w * h) / 1000000. * (double)iter / elapsedEncode, 4),
+            quiet == 2 ? "\n" : "  ");
+        System.out.format("%-6s%s",
+          sigFig((double)(w * h) / 1000000. * (double)iter / elapsed, 4),
+          quiet == 2 ? "\n" : "  ");
+        System.out.format("%-6s%s",
+          sigFig((double)(w * h * ps) / (double)totalJpegSize, 4),
+          quiet == 2 ? "\n" : "  ");
+      } else {
+        System.out.format("\n%s size: %d x %d\n", doTile ? "Tile" : "Image",
+                          tilew, tileh);
+        if (doYUV) {
+          System.out.format("Encode YUV    --> Frame rate:         %f fps\n",
+                            (double)iter / elapsedEncode);
+          System.out.format("                  Output image size:  %d bytes\n",
+                            yuvImage.getSize());
+          System.out.format("                  Compression ratio:  %f:1\n",
+                            (double)(w * h * ps) / (double)yuvImage.getSize());
+          System.out.format("                  Throughput:         %f Megapixels/sec\n",
+                            (double)(w * h) / 1000000. * (double)iter / elapsedEncode);
+          System.out.format("                  Output bit stream:  %f Megabits/sec\n",
+            (double)yuvImage.getSize() * 8. / 1000000. * (double)iter / elapsedEncode);
+        }
+        System.out.format("%s --> Frame rate:         %f fps\n",
+                          doYUV ? "Comp from YUV" : "Compress     ",
+                          (double)iter / elapsed);
+        System.out.format("                  Output image size:  %d bytes\n",
+                          totalJpegSize);
+        System.out.format("                  Compression ratio:  %f:1\n",
+                          (double)(w * h * ps) / (double)totalJpegSize);
+        System.out.format("                  Throughput:         %f Megapixels/sec\n",
+                          (double)(w * h) / 1000000. * (double)iter / elapsed);
+        System.out.format("                  Output bit stream:  %f Megabits/sec\n",
+          (double)totalJpegSize * 8. / 1000000. * (double)iter / elapsed);
+      }
+      if (tilew == w && tileh == h) {
+        String tempStr = fileName + "_" + subName[subsamp] + "_" + "Q" +
+                         jpegQual + ".jpg";
+        FileOutputStream fos = new FileOutputStream(tempStr);
+        fos.write(jpegBuf[0], 0, jpegSize[0]);
+        fos.close();
+        if (quiet == 0)
+          System.out.println("Reference image written to " + tempStr);
+      }
+
+      /* Decompression test */
+      if (!compOnly)
+        decomp(srcBuf, jpegBuf, jpegSize, tmpBuf, w, h, subsamp, jpegQual,
+               fileName, tilew, tileh);
+
+      if (tilew == w && tileh == h) break;
+    }
+  }
+
+
+  static void decompTest(String fileName) throws Exception {
+    TJTransformer tjt;
+    byte[][] jpegBuf = null;
+    byte[] srcBuf;
+    int[] jpegSize = null;
+    int totalJpegSize;
+    int w = 0, h = 0, subsamp = -1, cs = -1, _w, _h, _tilew, _tileh,
+      _ntilesw, _ntilesh, _subsamp, x, y, iter;
+    int ntilesw = 1, ntilesh = 1;
+    double start, elapsed;
+    int ps = TJ.getPixelSize(pf), tile;
+
+    FileInputStream fis = new FileInputStream(fileName);
+    int srcSize = (int)fis.getChannel().size();
+    srcBuf = new byte[srcSize];
+    fis.read(srcBuf, 0, srcSize);
+    fis.close();
+
+    int index = fileName.indexOf('.');
+    if (index >= 0)
+      fileName = new String(fileName.substring(0, index));
+
+    tjt = new TJTransformer();
+
+    tjt.setSourceImage(srcBuf, srcSize);
+    w = tjt.getWidth();
+    h = tjt.getHeight();
+    subsamp = tjt.getSubsamp();
+    cs = tjt.getColorspace();
+
+    if (quiet == 1) {
+      System.out.println("All performance values in Mpixels/sec\n");
+      System.out.format("Bitmap     JPEG   JPEG     %s  %s   Xform   Comp    Decomp  ",
+                        (doTile ? "Tile " : "Image"),
+                        (doTile ? "Tile " : "Image"));
+      if (doYUV)
+        System.out.print("Decode");
+      System.out.print("\n");
+      System.out.print("Format     CS     Subsamp  Width  Height  Perf    Ratio   Perf    ");
+      if (doYUV)
+        System.out.print("Perf");
+      System.out.println("\n");
+    } else if (quiet == 0)
+      System.out.format(">>>>>  JPEG %s --> %s (%s)  <<<<<\n",
+        formatName(subsamp, cs), pixFormatStr[pf],
+        (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down");
+
+    for (int tilew = doTile ? 16 : w, tileh = doTile ? 16 : h; ;
+         tilew *= 2, tileh *= 2) {
+      if (tilew > w)
+        tilew = w;
+      if (tileh > h)
+        tileh = h;
+      ntilesw = (w + tilew - 1) / tilew;
+      ntilesh = (h + tileh - 1) / tileh;
+
+      _w = w;  _h = h;  _tilew = tilew;  _tileh = tileh;
+      if (quiet == 0) {
+        System.out.format("\n%s size: %d x %d", (doTile ? "Tile" : "Image"),
+                          _tilew, _tileh);
+        if (sf.getNum() != 1 || sf.getDenom() != 1)
+          System.out.format(" --> %d x %d", sf.getScaled(_w),
+                            sf.getScaled(_h));
+        System.out.println("");
+      } else if (quiet == 1) {
+        System.out.format("%-4s (%s)  %-5s  %-5s    ", pixFormatStr[pf],
+                          (flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD",
+                          csName[cs], subNameLong[subsamp]);
+        System.out.format("%-5d  %-5d   ", tilew, tileh);
+      }
+
+      _subsamp = subsamp;
+      if (doTile || xformOp != TJTransform.OP_NONE || xformOpt != 0) {
+        if (xformOp == TJTransform.OP_TRANSPOSE ||
+            xformOp == TJTransform.OP_TRANSVERSE ||
+            xformOp == TJTransform.OP_ROT90 ||
+            xformOp == TJTransform.OP_ROT270) {
+          _w = h;  _h = w;  _tilew = tileh;  _tileh = tilew;
+        }
+
+        if ((xformOpt & TJTransform.OPT_GRAY) != 0)
+          _subsamp = TJ.SAMP_GRAY;
+        if (xformOp == TJTransform.OP_HFLIP ||
+            xformOp == TJTransform.OP_ROT180)
+          _w = _w - (_w % TJ.getMCUWidth(_subsamp));
+        if (xformOp == TJTransform.OP_VFLIP ||
+            xformOp == TJTransform.OP_ROT180)
+          _h = _h - (_h % TJ.getMCUHeight(_subsamp));
+        if (xformOp == TJTransform.OP_TRANSVERSE ||
+            xformOp == TJTransform.OP_ROT90)
+          _w = _w - (_w % TJ.getMCUHeight(_subsamp));
+        if (xformOp == TJTransform.OP_TRANSVERSE ||
+            xformOp == TJTransform.OP_ROT270)
+          _h = _h - (_h % TJ.getMCUWidth(_subsamp));
+        _ntilesw = (_w + _tilew - 1) / _tilew;
+        _ntilesh = (_h + _tileh - 1) / _tileh;
+
+        if (xformOp == TJTransform.OP_TRANSPOSE ||
+            xformOp == TJTransform.OP_TRANSVERSE ||
+            xformOp == TJTransform.OP_ROT90 ||
+            xformOp == TJTransform.OP_ROT270) {
+            if (_subsamp == TJ.SAMP_422)
+              _subsamp = TJ.SAMP_440;
+            else if (_subsamp == TJ.SAMP_440)
+              _subsamp = TJ.SAMP_422;
+        }
+
+        TJTransform[] t = new TJTransform[_ntilesw * _ntilesh];
+        jpegBuf = new byte[_ntilesw * _ntilesh][TJ.bufSize(_tilew, _tileh, subsamp)];
+
+        for (y = 0, tile = 0; y < _h; y += _tileh) {
+          for (x = 0; x < _w; x += _tilew, tile++) {
+            t[tile] = new TJTransform();
+            t[tile].width = Math.min(_tilew, _w - x);
+            t[tile].height = Math.min(_tileh, _h - y);
+            t[tile].x = x;
+            t[tile].y = y;
+            t[tile].op = xformOp;
+            t[tile].options = xformOpt | TJTransform.OPT_TRIM;
+            if ((t[tile].options & TJTransform.OPT_NOOUTPUT) != 0 &&
+                jpegBuf[tile] != null)
+              jpegBuf[tile] = null;
+          }
+        }
+
+        iter = -warmup;
+        elapsed = 0.;
+        while (true) {
+          start = getTime();
+          tjt.transform(jpegBuf, t, flags);
+          jpegSize = tjt.getTransformedSizes();
+          iter++;
+          if (iter >= 1) {
+            elapsed += getTime() - start;
+            if (elapsed >= benchTime)
+              break;
+          }
+        }
+        t = null;
+
+        for (tile = 0, totalJpegSize = 0; tile < _ntilesw * _ntilesh; tile++)
+          totalJpegSize += jpegSize[tile];
+
+        if (quiet != 0) {
+          System.out.format("%-6s%s%-6s%s",
+            sigFig((double)(w * h) / 1000000. / elapsed, 4),
+            quiet == 2 ? "\n" : "  ",
+            sigFig((double)(w * h * ps) / (double)totalJpegSize, 4),
+            quiet == 2 ? "\n" : "  ");
+        } else if (quiet == 0) {
+          System.out.format("Transform     --> Frame rate:         %f fps\n",
+                            1.0 / elapsed);
+          System.out.format("                  Output image size:  %d bytes\n",
+                            totalJpegSize);
+          System.out.format("                  Compression ratio:  %f:1\n",
+                            (double)(w * h * ps) / (double)totalJpegSize);
+          System.out.format("                  Throughput:         %f Megapixels/sec\n",
+                            (double)(w * h) / 1000000. / elapsed);
+          System.out.format("                  Output bit stream:  %f Megabits/sec\n",
+                            (double)totalJpegSize * 8. / 1000000. / elapsed);
+        }
+      } else {
+        if (quiet == 1)
+          System.out.print("N/A     N/A     ");
+        jpegBuf = new byte[1][TJ.bufSize(_tilew, _tileh, subsamp)];
+        jpegSize = new int[1];
+        jpegSize[0] = srcSize;
+        System.arraycopy(srcBuf, 0, jpegBuf[0], 0, srcSize);
+      }
+
+      if (w == tilew)
+        _tilew = _w;
+      if (h == tileh)
+        _tileh = _h;
+      if ((xformOpt & TJTransform.OPT_NOOUTPUT) == 0)
+        decomp(null, jpegBuf, jpegSize, null, _w, _h, _subsamp, 0,
+               fileName, _tilew, _tileh);
+      else if (quiet == 1)
+        System.out.println("N/A");
+
+      jpegBuf = null;
+      jpegSize = null;
+
+      if (tilew == w && tileh == h) break;
+    }
+  }
+
+
+  static void usage() throws Exception {
+    int i;
+    TJScalingFactor[] scalingFactors = TJ.getScalingFactors();
+    int nsf = scalingFactors.length;
+    String className = new TJBench().getClass().getName();
+
+    System.out.println("\nUSAGE: java " + className);
+    System.out.println("       <Inputfile (BMP)> <Quality> [options]\n");
+    System.out.println("       java " + className);
+    System.out.println("       <Inputfile (JPG)> [options]\n");
+    System.out.println("Options:\n");
+    System.out.println("-alloc = Dynamically allocate JPEG image buffers");
+    System.out.println("-bottomup = Test bottom-up compression/decompression");
+    System.out.println("-tile = Test performance of the codec when the image is encoded as separate");
+    System.out.println("     tiles of varying sizes.");
+    System.out.println("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =");
+    System.out.println("     Test the specified color conversion path in the codec (default = BGR)");
+    System.out.println("-fastupsample = Use the fastest chrominance upsampling algorithm available in");
+    System.out.println("     the underlying codec");
+    System.out.println("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying");
+    System.out.println("     codec");
+    System.out.println("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the");
+    System.out.println("     underlying codec");
+    System.out.println("-subsamp <s> = When testing JPEG compression, this option specifies the level");
+    System.out.println("     of chrominance subsampling to use (<s> = 444, 422, 440, 420, 411, or");
+    System.out.println("     GRAY).  The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in");
+    System.out.println("     sequence.");
+    System.out.println("-quiet = Output results in tabular rather than verbose format");
+    System.out.println("-yuv = Test YUV encoding/decoding functions");
+    System.out.println("-yuvpad <p> = If testing YUV encoding/decoding, this specifies the number of");
+    System.out.println("     bytes to which each row of each plane in the intermediate YUV image is");
+    System.out.println("     padded (default = 1)");
+    System.out.println("-scale M/N = Scale down the width/height of the decompressed JPEG image by a");
+    System.out.print  ("     factor of M/N (M/N = ");
+    for (i = 0; i < nsf; i++) {
+      System.out.format("%d/%d", scalingFactors[i].getNum(),
+                        scalingFactors[i].getDenom());
+      if (nsf == 2 && i != nsf - 1)
+        System.out.print(" or ");
+      else if (nsf > 2) {
+        if (i != nsf - 1)
+          System.out.print(", ");
+        if (i == nsf - 2)
+          System.out.print("or ");
+      }
+      if (i % 8 == 0 && i != 0)
+        System.out.print("\n     ");
+    }
+    System.out.println(")");
+    System.out.println("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =");
+    System.out.println("     Perform the corresponding lossless transform prior to");
+    System.out.println("     decompression (these options are mutually exclusive)");
+    System.out.println("-grayscale = Perform lossless grayscale conversion prior to decompression");
+    System.out.println("     test (can be combined with the other transforms above)");
+    System.out.println("-benchtime <t> = Run each benchmark for at least <t> seconds (default = 5.0)");
+    System.out.println("-warmup <w> = Execute each benchmark <w> times to prime the cache before");
+    System.out.println("     taking performance measurements (default = 1)");
+    System.out.println("-componly = Stop after running compression tests.  Do not test decompression.\n");
+    System.out.println("NOTE:  If the quality is specified as a range (e.g. 90-100), a separate");
+    System.out.println("test will be performed for all quality values in the range.\n");
+    System.exit(1);
+  }
+
+
+  public static void main(String[] argv) {
+    byte[] srcBuf = null;  int w = 0, h = 0;
+    int minQual = -1, maxQual = -1;
+    int minArg = 1;  int retval = 0;
+    int subsamp = -1;
+
+    try {
+
+      if (argv.length < minArg)
+        usage();
+
+      String tempStr = argv[0].toLowerCase();
+      if (tempStr.endsWith(".jpg") || tempStr.endsWith(".jpeg"))
+        decompOnly = true;
+
+      System.out.println("");
+
+      if (!decompOnly) {
+        minArg = 2;
+        if (argv.length < minArg)
+          usage();
+        try {
+          minQual = Integer.parseInt(argv[1]);
+        } catch (NumberFormatException e) {}
+        if (minQual < 1 || minQual > 100)
+          throw new Exception("Quality must be between 1 and 100.");
+        int dashIndex = argv[1].indexOf('-');
+        if (dashIndex > 0 && argv[1].length() > dashIndex + 1) {
+          try {
+            maxQual = Integer.parseInt(argv[1].substring(dashIndex + 1));
+          } catch (NumberFormatException e) {}
+        }
+        if (maxQual < 1 || maxQual > 100)
+          maxQual = minQual;
+      }
+
+      if (argv.length > minArg) {
+        for (int i = minArg; i < argv.length; i++) {
+          if (argv[i].equalsIgnoreCase("-tile")) {
+            doTile = true;  xformOpt |= TJTransform.OPT_CROP;
+          }
+          if (argv[i].equalsIgnoreCase("-fastupsample")) {
+            System.out.println("Using fast upsampling code\n");
+            flags |= TJ.FLAG_FASTUPSAMPLE;
+          }
+          if (argv[i].equalsIgnoreCase("-fastdct")) {
+            System.out.println("Using fastest DCT/IDCT algorithm\n");
+            flags |= TJ.FLAG_FASTDCT;
+          }
+          if (argv[i].equalsIgnoreCase("-accuratedct")) {
+            System.out.println("Using most accurate DCT/IDCT algorithm\n");
+            flags |= TJ.FLAG_ACCURATEDCT;
+          }
+          if (argv[i].equalsIgnoreCase("-rgb"))
+            pf = TJ.PF_RGB;
+          if (argv[i].equalsIgnoreCase("-rgbx"))
+            pf = TJ.PF_RGBX;
+          if (argv[i].equalsIgnoreCase("-bgr"))
+            pf = TJ.PF_BGR;
+          if (argv[i].equalsIgnoreCase("-bgrx"))
+            pf = TJ.PF_BGRX;
+          if (argv[i].equalsIgnoreCase("-xbgr"))
+            pf = TJ.PF_XBGR;
+          if (argv[i].equalsIgnoreCase("-xrgb"))
+            pf = TJ.PF_XRGB;
+          if (argv[i].equalsIgnoreCase("-bottomup"))
+            flags |= TJ.FLAG_BOTTOMUP;
+          if (argv[i].equalsIgnoreCase("-quiet"))
+            quiet = 1;
+          if (argv[i].equalsIgnoreCase("-qq"))
+            quiet = 2;
+          if (argv[i].equalsIgnoreCase("-scale") && i < argv.length - 1) {
+            int temp1 = 0, temp2 = 0;
+            boolean match = false, scanned = true;
+            Scanner scanner = new Scanner(argv[++i]).useDelimiter("/");
+            try {
+              temp1 = scanner.nextInt();
+              temp2 = scanner.nextInt();
+            } catch(Exception e) {}
+            if (temp2 <= 0) temp2 = 1;
+            if (temp1 > 0) {
+              TJScalingFactor[] scalingFactors = TJ.getScalingFactors();
+              for (int j = 0; j < scalingFactors.length; j++) {
+                if ((double)temp1 / (double)temp2 ==
+                    (double)scalingFactors[j].getNum() /
+                    (double)scalingFactors[j].getDenom()) {
+                  sf = scalingFactors[j];
+                  match = true;   break;
+                }
+              }
+              if (!match) usage();
+            } else
+              usage();
+          }
+          if (argv[i].equalsIgnoreCase("-hflip"))
+            xformOp = TJTransform.OP_HFLIP;
+          if (argv[i].equalsIgnoreCase("-vflip"))
+            xformOp = TJTransform.OP_VFLIP;
+          if (argv[i].equalsIgnoreCase("-transpose"))
+            xformOp = TJTransform.OP_TRANSPOSE;
+          if (argv[i].equalsIgnoreCase("-transverse"))
+            xformOp = TJTransform.OP_TRANSVERSE;
+          if (argv[i].equalsIgnoreCase("-rot90"))
+            xformOp = TJTransform.OP_ROT90;
+          if (argv[i].equalsIgnoreCase("-rot180"))
+            xformOp = TJTransform.OP_ROT180;
+          if (argv[i].equalsIgnoreCase("-rot270"))
+            xformOp = TJTransform.OP_ROT270;
+          if (argv[i].equalsIgnoreCase("-grayscale"))
+            xformOpt |= TJTransform.OPT_GRAY;
+          if (argv[i].equalsIgnoreCase("-nooutput"))
+            xformOpt |= TJTransform.OPT_NOOUTPUT;
+          if (argv[i].equalsIgnoreCase("-benchtime") && i < argv.length - 1) {
+            double temp = -1;
+            try {
+              temp = Double.parseDouble(argv[++i]);
+            } catch (NumberFormatException e) {}
+            if (temp > 0.0)
+              benchTime = temp;
+            else
+              usage();
+          }
+          if (argv[i].equalsIgnoreCase("-yuv")) {
+            System.out.println("Testing YUV planar encoding/decoding\n");
+            doYUV = true;
+          }
+          if (argv[i].equalsIgnoreCase("-yuvpad") && i < argv.length - 1) {
+            int temp = 0;
+            try {
+             temp = Integer.parseInt(argv[++i]);
+            } catch (NumberFormatException e) {}
+            if (temp >= 1)
+              yuvpad = temp;
+          }
+          if (argv[i].equalsIgnoreCase("-subsamp") && i < argv.length - 1) {
+            i++;
+            if (argv[i].toUpperCase().startsWith("G"))
+              subsamp = TJ.SAMP_GRAY;
+            else if (argv[i].equals("444"))
+              subsamp = TJ.SAMP_444;
+            else if (argv[i].equals("422"))
+              subsamp = TJ.SAMP_422;
+            else if (argv[i].equals("440"))
+              subsamp = TJ.SAMP_440;
+            else if (argv[i].equals("420"))
+              subsamp = TJ.SAMP_420;
+            else if (argv[i].equals("411"))
+              subsamp = TJ.SAMP_411;
+          }
+          if (argv[i].equalsIgnoreCase("-componly"))
+            compOnly = true;
+          if (argv[i].equalsIgnoreCase("-warmup") && i < argv.length - 1) {
+            int temp = -1;
+            try {
+             temp = Integer.parseInt(argv[++i]);
+            } catch (NumberFormatException e) {}
+            if (temp >= 0) {
+              warmup = temp;
+              System.out.format("Warmup runs = %d\n\n", warmup);
+            }
+          }
+          if (argv[i].equalsIgnoreCase("-?"))
+            usage();
+        }
+      }
+
+      if (sf == null)
+        sf = new TJScalingFactor(1, 1);
+
+      if ((sf.getNum() != 1 || sf.getDenom() != 1) && doTile) {
+        System.out.println("Disabling tiled compression/decompression tests, because those tests do not");
+        System.out.println("work when scaled decompression is enabled.");
+        doTile = false;
+      }
+
+      if (!decompOnly) {
+        int[] width = new int[1], height = new int[1];
+        srcBuf = loadImage(argv[0], width, height, pf);
+        w = width[0];  h = height[0];
+        int index = -1;
+        if ((index = argv[0].indexOf('.')) >= 0)
+          argv[0] = argv[0].substring(0, index);
+      }
+
+      if (quiet == 1 && !decompOnly) {
+        System.out.println("All performance values in Mpixels/sec\n");
+        System.out.format("Bitmap     JPEG     JPEG  %s  %s   ",
+          (doTile ? "Tile " : "Image"), (doTile ? "Tile " : "Image"));
+        if (doYUV)
+          System.out.print("Encode  ");
+        System.out.print("Comp    Comp    Decomp  ");
+        if (doYUV)
+          System.out.print("Decode");
+        System.out.print("\n");
+        System.out.print("Format     Subsamp  Qual  Width  Height  ");
+        if (doYUV)
+          System.out.print("Perf    ");
+        System.out.print("Perf    Ratio   Perf    ");
+        if (doYUV)
+          System.out.print("Perf");
+        System.out.println("\n");
+      }
+
+      if (decompOnly) {
+        decompTest(argv[0]);
+        System.out.println("");
+        System.exit(retval);
+      }
+
+      System.gc();
+      if (subsamp >= 0 && subsamp < TJ.NUMSAMP) {
+        for (int i = maxQual; i >= minQual; i--)
+          fullTest(srcBuf, w, h, subsamp, i, argv[0]);
+        System.out.println("");
+      } else {
+        for (int i = maxQual; i >= minQual; i--)
+          fullTest(srcBuf, w, h, TJ.SAMP_GRAY, i, argv[0]);
+        System.out.println("");
+        System.gc();
+        for (int i = maxQual; i >= minQual; i--)
+          fullTest(srcBuf, w, h, TJ.SAMP_420, i, argv[0]);
+        System.out.println("");
+        System.gc();
+        for (int i = maxQual; i >= minQual; i--)
+          fullTest(srcBuf, w, h, TJ.SAMP_422, i, argv[0]);
+        System.out.println("");
+        System.gc();
+        for (int i = maxQual; i >= minQual; i--)
+          fullTest(srcBuf, w, h, TJ.SAMP_444, i, argv[0]);
+        System.out.println("");
+      }
+
+    } catch (Exception e) {
+      System.out.println("ERROR: " + e.getMessage());
+      e.printStackTrace();
+      retval = -1;
+    }
+
+    System.exit(retval);
+  }
+
+}
diff --git a/java/TJExample.java b/java/TJExample.java
new file mode 100644
index 0000000..2c6324d
--- /dev/null
+++ b/java/TJExample.java
@@ -0,0 +1,361 @@
+/*
+ * Copyright (C)2011-2012, 2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This program demonstrates how to compress and decompress JPEG files using
+ * the TurboJPEG JNI wrapper
+ */
+
+import java.io.*;
+import java.awt.*;
+import java.awt.image.*;
+import java.nio.*;
+import javax.imageio.*;
+import javax.swing.*;
+import org.libjpegturbo.turbojpeg.*;
+
+public class TJExample implements TJCustomFilter {
+
+  public static final String classname = new TJExample().getClass().getName();
+
+  private static void usage() throws Exception {
+    System.out.println("\nUSAGE: java " + classname + " <Input file> <Output file> [options]\n");
+    System.out.println("Input and output files can be any image format that the Java Image I/O");
+    System.out.println("extensions understand.  If either filename ends in a .jpg extension, then");
+    System.out.println("TurboJPEG will be used to compress or decompress the file.\n");
+    System.out.println("Options:\n");
+    System.out.println("-scale M/N = if the input image is a JPEG file, scale the width/height of the");
+    System.out.print("             output image by a factor of M/N (M/N = ");
+    for (int i = 0; i < sf.length; i++) {
+      System.out.print(sf[i].getNum() + "/" + sf[i].getDenom());
+      if (sf.length == 2 && i != sf.length - 1)
+        System.out.print(" or ");
+      else if (sf.length > 2) {
+        if (i != sf.length - 1)
+          System.out.print(", ");
+        if (i == sf.length - 2)
+          System.out.print("or ");
+      }
+    }
+    System.out.println(")\n");
+    System.out.println("-samp <444|422|420|gray> = If the output image is a JPEG file, this specifies");
+    System.out.println("                           the level of chrominance subsampling to use when");
+    System.out.println("                           recompressing it.  Default is to use the same level");
+    System.out.println("                           of subsampling as the input, if the input is a JPEG");
+    System.out.println("                           file, or 4:4:4 otherwise.\n");
+    System.out.println("-q <1-100> = If the output image is a JPEG file, this specifies the JPEG");
+    System.out.println("             quality to use when recompressing it (default = 95).\n");
+    System.out.println("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =");
+    System.out.println("     If the input image is a JPEG file, perform the corresponding lossless");
+    System.out.println("     transform prior to decompression (these options are mutually exclusive)\n");
+    System.out.println("-grayscale = If the input image is a JPEG file, perform lossless grayscale");
+    System.out.println("     conversion prior to decompression (can be combined with the other");
+    System.out.println("     transforms above)\n");
+    System.out.println("-crop X,Y,WxH = If the input image is a JPEG file, perform lossless cropping");
+    System.out.println("     prior to decompression.  X,Y specifies the upper left corner of the");
+    System.out.println("     cropping region, and WxH specifies its width and height.  X,Y must be");
+    System.out.println("     evenly divible by the MCU block size (8x8 if the source image was");
+    System.out.println("     compressed using no subsampling or grayscale, or 16x8 for 4:2:2 or 16x16");
+    System.out.println("     for 4:2:0.)\n");
+    System.out.println("-display = Display output image (Output file need not be specified in this");
+    System.out.println("     case.)\n");
+    System.out.println("-fastupsample = Use the fastest chrominance upsampling algorithm available in");
+    System.out.println("     the underlying codec\n");
+    System.out.println("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying");
+    System.out.println("     codec\n");
+    System.out.println("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the");
+    System.out.println("     underlying codec\n");
+    System.exit(1);
+  }
+
+  private static final String[] sampName = {
+    "4:4:4", "4:2:2", "4:2:0", "Grayscale", "4:4:0"
+  };
+
+  public static void main(String[] argv) {
+
+    BufferedImage img = null;
+    byte[] bmpBuf = null;
+    TJTransform xform = new TJTransform();
+    int flags = 0;
+
+    try {
+
+      sf = TJ.getScalingFactors();
+
+      if (argv.length < 2) {
+        usage();
+      }
+
+      TJScalingFactor scaleFactor = new TJScalingFactor(1, 1);
+      String inFormat = "jpg", outFormat = "jpg";
+      int outSubsamp = -1, outQual = 95;
+      boolean display = false;
+
+      if (argv.length > 1) {
+        for (int i = 1; i < argv.length; i++) {
+          if (argv[i].length() < 2)
+            continue;
+          if (argv[i].length() > 2 &&
+              argv[i].substring(0, 3).equalsIgnoreCase("-sc")) {
+            int match = 0;
+            if (i < argv.length - 1) {
+              String[] scaleArg = argv[++i].split("/");
+              if (scaleArg.length == 2) {
+                TJScalingFactor tempsf =
+                  new TJScalingFactor(Integer.parseInt(scaleArg[0]),
+                                      Integer.parseInt(scaleArg[1]));
+                for (int j = 0; j < sf.length; j++) {
+                  if (tempsf.equals(sf[j])) {
+                    scaleFactor = sf[j];
+                    match = 1;
+                    break;
+                  }
+                }
+              }
+            }
+            if (match != 1) usage();
+          }
+          if (argv[i].equalsIgnoreCase("-h") || argv[i].equalsIgnoreCase("-?"))
+            usage();
+          if (argv[i].length() > 2 &&
+              argv[i].substring(0, 3).equalsIgnoreCase("-sa")) {
+            if (i < argv.length - 1) {
+              i++;
+              if (argv[i].substring(0, 1).equalsIgnoreCase("g"))
+                outSubsamp = TJ.SAMP_GRAY;
+              else if (argv[i].equals("444"))
+                outSubsamp = TJ.SAMP_444;
+              else if (argv[i].equals("422"))
+                outSubsamp = TJ.SAMP_422;
+              else if (argv[i].equals("420"))
+                outSubsamp = TJ.SAMP_420;
+              else
+                usage();
+            } else
+              usage();
+          }
+          if (argv[i].substring(0, 2).equalsIgnoreCase("-q")) {
+            if (i < argv.length - 1) {
+              int qual = Integer.parseInt(argv[++i]);
+              if (qual >= 1 && qual <= 100)
+                outQual = qual;
+              else
+                usage();
+            } else
+              usage();
+          }
+          if (argv[i].substring(0, 2).equalsIgnoreCase("-g"))
+            xform.options |= TJTransform.OPT_GRAY;
+          if (argv[i].equalsIgnoreCase("-hflip"))
+            xform.op = TJTransform.OP_HFLIP;
+          if (argv[i].equalsIgnoreCase("-vflip"))
+            xform.op = TJTransform.OP_VFLIP;
+          if (argv[i].equalsIgnoreCase("-transpose"))
+            xform.op = TJTransform.OP_TRANSPOSE;
+          if (argv[i].equalsIgnoreCase("-transverse"))
+            xform.op = TJTransform.OP_TRANSVERSE;
+          if (argv[i].equalsIgnoreCase("-rot90"))
+            xform.op = TJTransform.OP_ROT90;
+          if (argv[i].equalsIgnoreCase("-rot180"))
+            xform.op = TJTransform.OP_ROT180;
+          if (argv[i].equalsIgnoreCase("-rot270"))
+            xform.op = TJTransform.OP_ROT270;
+          if (argv[i].equalsIgnoreCase("-custom"))
+            xform.cf = new TJExample();
+          else if (argv[i].length() > 2 &&
+                   argv[i].substring(0, 2).equalsIgnoreCase("-c")) {
+            if (i >= argv.length - 1)
+              usage();
+            String[] cropArg = argv[++i].split(",");
+            if (cropArg.length != 3)
+              usage();
+            String[] dimArg = cropArg[2].split("[xX]");
+            if (dimArg.length != 2)
+              usage();
+            int tempx = Integer.parseInt(cropArg[0]);
+            int tempy = Integer.parseInt(cropArg[1]);
+            int tempw = Integer.parseInt(dimArg[0]);
+            int temph = Integer.parseInt(dimArg[1]);
+            if (tempx < 0 || tempy < 0 || tempw < 0 || temph < 0)
+              usage();
+            xform.x = tempx;
+            xform.y = tempy;
+            xform.width = tempw;
+            xform.height = temph;
+            xform.options |= TJTransform.OPT_CROP;
+          }
+          if (argv[i].substring(0, 2).equalsIgnoreCase("-d"))
+            display = true;
+          if (argv[i].equalsIgnoreCase("-fastupsample")) {
+            System.out.println("Using fast upsampling code");
+            flags |= TJ.FLAG_FASTUPSAMPLE;
+          }
+          if (argv[i].equalsIgnoreCase("-fastdct")) {
+            System.out.println("Using fastest DCT/IDCT algorithm");
+            flags |= TJ.FLAG_FASTDCT;
+          }
+          if (argv[i].equalsIgnoreCase("-accuratedct")) {
+            System.out.println("Using most accurate DCT/IDCT algorithm");
+            flags |= TJ.FLAG_ACCURATEDCT;
+          }
+        }
+      }
+      String[] inFileTokens = argv[0].split("\\.");
+      if (inFileTokens.length > 1)
+        inFormat = inFileTokens[inFileTokens.length - 1];
+      String[] outFileTokens;
+      if (display)
+        outFormat = "bmp";
+      else {
+        outFileTokens = argv[1].split("\\.");
+        if (outFileTokens.length > 1)
+          outFormat = outFileTokens[outFileTokens.length - 1];
+      }
+
+      File file = new File(argv[0]);
+      int width, height;
+
+      if (inFormat.equalsIgnoreCase("jpg")) {
+        FileInputStream fis = new FileInputStream(file);
+        int inputSize = fis.available();
+        if (inputSize < 1) {
+          System.out.println("Input file contains no data");
+          System.exit(1);
+        }
+        byte[] inputBuf = new byte[inputSize];
+        fis.read(inputBuf);
+        fis.close();
+
+        TJDecompressor tjd;
+        if (xform.op != TJTransform.OP_NONE || xform.options != 0 ||
+            xform.cf != null) {
+          TJTransformer tjt = new TJTransformer(inputBuf);
+          TJTransform[] t = new TJTransform[1];
+          t[0] = xform;
+          t[0].options |= TJTransform.OPT_TRIM;
+          TJDecompressor[] tjdx = tjt.transform(t, 0);
+          tjd = tjdx[0];
+        } else
+          tjd = new TJDecompressor(inputBuf);
+
+        width = tjd.getWidth();
+        height = tjd.getHeight();
+        int inSubsamp = tjd.getSubsamp();
+        System.out.println("Source Image: " + width + " x " + height +
+                           " pixels, " + sampName[inSubsamp] + " subsampling");
+        if (outSubsamp < 0)
+          outSubsamp = inSubsamp;
+
+        if (outFormat.equalsIgnoreCase("jpg") &&
+            (xform.op != TJTransform.OP_NONE || xform.options != 0) &&
+            scaleFactor.isOne()) {
+          file = new File(argv[1]);
+          FileOutputStream fos = new FileOutputStream(file);
+          fos.write(tjd.getSourceBuf(), 0, tjd.getSourceSize());
+          fos.close();
+          System.exit(0);
+        }
+
+        width = scaleFactor.getScaled(width);
+        height = scaleFactor.getScaled(height);
+
+        if (!outFormat.equalsIgnoreCase("jpg"))
+          img = tjd.decompress(width, height, BufferedImage.TYPE_INT_RGB,
+                               flags);
+        else
+          bmpBuf = tjd.decompress(width, 0, height, TJ.PF_BGRX, flags);
+        tjd.close();
+      } else {
+        img = ImageIO.read(file);
+        if (img == null)
+          throw new Exception("Input image type not supported.");
+        width = img.getWidth();
+        height = img.getHeight();
+        if (outSubsamp < 0) {
+          if (img.getType() == BufferedImage.TYPE_BYTE_GRAY)
+            outSubsamp = TJ.SAMP_GRAY;
+          else
+            outSubsamp = TJ.SAMP_444;
+        }
+      }
+      System.gc();
+      if (!display)
+        System.out.print("Dest. Image (" + outFormat + "):  " + width + " x " +
+                         height + " pixels");
+
+      if (display) {
+        ImageIcon icon = new ImageIcon(img);
+        JLabel label = new JLabel(icon, JLabel.CENTER);
+        JOptionPane.showMessageDialog(null, label, "Output Image",
+                                      JOptionPane.PLAIN_MESSAGE);
+      } else if (outFormat.equalsIgnoreCase("jpg")) {
+        System.out.println(", " + sampName[outSubsamp] +
+                           " subsampling, quality = " + outQual);
+        TJCompressor tjc = new TJCompressor();
+        int jpegSize;
+        byte[] jpegBuf;
+
+        tjc.setSubsamp(outSubsamp);
+        tjc.setJPEGQuality(outQual);
+        if (img != null)
+          tjc.setSourceImage(img, 0, 0, 0, 0);
+        else {
+          tjc.setSourceImage(bmpBuf, 0, 0, width, 0, height, TJ.PF_BGRX);
+        }
+        jpegBuf = tjc.compress(flags);
+        jpegSize = tjc.getCompressedSize();
+        tjc.close();
+
+        file = new File(argv[1]);
+        FileOutputStream fos = new FileOutputStream(file);
+        fos.write(jpegBuf, 0, jpegSize);
+        fos.close();
+      } else {
+        System.out.print("\n");
+        file = new File(argv[1]);
+        ImageIO.write(img, outFormat, file);
+      }
+
+    } catch(Exception e) {
+      e.printStackTrace();
+      System.exit(-1);
+    }
+  }
+
+  public void customFilter(ShortBuffer coeffBuffer, Rectangle bufferRegion,
+                           Rectangle planeRegion, int componentIndex,
+                           int transformIndex, TJTransform transform)
+                           throws Exception {
+    for (int i = 0; i < bufferRegion.width * bufferRegion.height; i++) {
+      coeffBuffer.put(i, (short)(-coeffBuffer.get(i)));
+    }
+  }
+
+  static TJScalingFactor[] sf = null;
+};
diff --git a/java/TJUnitTest.java b/java/TJUnitTest.java
new file mode 100644
index 0000000..ab539c4
--- /dev/null
+++ b/java/TJUnitTest.java
@@ -0,0 +1,967 @@
+/*
+ * Copyright (C)2011-2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This program tests the various code paths in the TurboJPEG JNI Wrapper
+ */
+
+import java.io.*;
+import java.util.*;
+import java.awt.image.*;
+import javax.imageio.*;
+import java.nio.*;
+import org.libjpegturbo.turbojpeg.*;
+
+public class TJUnitTest {
+
+  private static final String classname =
+    new TJUnitTest().getClass().getName();
+
+  private static void usage() {
+    System.out.println("\nUSAGE: java " + classname + " [options]\n");
+    System.out.println("Options:\n");
+    System.out.println("-yuv = test YUV encoding/decoding support\n");
+    System.out.println("-noyuvpad = do not pad each line of each Y, U, and V plane to the nearest\n");
+    System.out.println("            4-byte boundary\n");
+    System.out.println("-bi = test BufferedImage support\n");
+    System.exit(1);
+  }
+
+  private static final String[] subNameLong = {
+    "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1"
+  };
+  private static final String[] subName = {
+    "444", "422", "420", "GRAY", "440", "411"
+  };
+
+  private static final String[] pixFormatStr = {
+    "RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "Grayscale",
+    "RGBA", "BGRA", "ABGR", "ARGB", "CMYK"
+  };
+
+  private static final int[] alphaOffset = {
+    -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1
+  };
+
+  private static final int[] _3byteFormats = {
+    TJ.PF_RGB, TJ.PF_BGR
+  };
+  private static final int[] _3byteFormatsBI = {
+    BufferedImage.TYPE_3BYTE_BGR
+  };
+  private static final int[] _4byteFormats = {
+    TJ.PF_RGBX, TJ.PF_BGRX, TJ.PF_XBGR, TJ.PF_XRGB, TJ.PF_CMYK
+  };
+  private static final int[] _4byteFormatsBI = {
+    BufferedImage.TYPE_INT_BGR, BufferedImage.TYPE_INT_RGB,
+    BufferedImage.TYPE_4BYTE_ABGR, BufferedImage.TYPE_4BYTE_ABGR_PRE,
+    BufferedImage.TYPE_INT_ARGB, BufferedImage.TYPE_INT_ARGB_PRE
+  };
+  private static final int[] onlyGray = {
+    TJ.PF_GRAY
+  };
+  private static final int[] onlyGrayBI = {
+    BufferedImage.TYPE_BYTE_GRAY
+  };
+  private static final int[] onlyRGB = {
+    TJ.PF_RGB
+  };
+
+  private static boolean doYUV = false;
+  private static int pad = 4;
+  private static boolean bi = false;
+
+  private static int exitStatus = 0;
+
+  private static int biTypePF(int biType) {
+    ByteOrder byteOrder = ByteOrder.nativeOrder();
+    switch(biType) {
+      case BufferedImage.TYPE_3BYTE_BGR:
+        return TJ.PF_BGR;
+      case BufferedImage.TYPE_4BYTE_ABGR:
+      case BufferedImage.TYPE_4BYTE_ABGR_PRE:
+        return TJ.PF_XBGR;
+      case BufferedImage.TYPE_BYTE_GRAY:
+        return TJ.PF_GRAY;
+      case BufferedImage.TYPE_INT_BGR:
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
+          return TJ.PF_XBGR;
+        else
+          return TJ.PF_RGBX;
+      case BufferedImage.TYPE_INT_RGB:
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
+          return TJ.PF_XRGB;
+        else
+          return TJ.PF_BGRX;
+      case BufferedImage.TYPE_INT_ARGB:
+      case BufferedImage.TYPE_INT_ARGB_PRE:
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
+          return TJ.PF_ARGB;
+        else
+          return TJ.PF_BGRA;
+    }
+    return 0;
+  }
+
+  private static String biTypeStr(int biType) {
+    switch(biType) {
+      case BufferedImage.TYPE_3BYTE_BGR:
+        return "3BYTE_BGR";
+      case BufferedImage.TYPE_4BYTE_ABGR:
+        return "4BYTE_ABGR";
+      case BufferedImage.TYPE_4BYTE_ABGR_PRE:
+        return "4BYTE_ABGR_PRE";
+      case BufferedImage.TYPE_BYTE_GRAY:
+        return "BYTE_GRAY";
+      case BufferedImage.TYPE_INT_BGR:
+        return "INT_BGR";
+      case BufferedImage.TYPE_INT_RGB:
+        return "INT_RGB";
+      case BufferedImage.TYPE_INT_ARGB:
+        return "INT_ARGB";
+      case BufferedImage.TYPE_INT_ARGB_PRE:
+        return "INT_ARGB_PRE";
+    }
+    return "Unknown";
+  }
+
+  private static void initBuf(byte[] buf, int w, int pitch, int h, int pf,
+                              int flags) throws Exception {
+    int roffset = TJ.getRedOffset(pf);
+    int goffset = TJ.getGreenOffset(pf);
+    int boffset = TJ.getBlueOffset(pf);
+    int aoffset = alphaOffset[pf];
+    int ps = TJ.getPixelSize(pf);
+    int index, row, col, halfway = 16;
+
+    if (pf == TJ.PF_GRAY) {
+      Arrays.fill(buf, (byte)0);
+      for (row = 0; row < h; row++) {
+        for (col = 0; col < w; col++) {
+          if ((flags & TJ.FLAG_BOTTOMUP) != 0)
+            index = pitch * (h - row - 1) + col;
+          else
+            index = pitch * row + col;
+          if (((row / 8) + (col / 8)) % 2 == 0)
+            buf[index] = (row < halfway) ? (byte)255 : 0;
+          else
+            buf[index] = (row < halfway) ? 76 : (byte)226;
+        }
+      }
+      return;
+    }
+    if (pf == TJ.PF_CMYK) {
+      Arrays.fill(buf, (byte)255);
+      for (row = 0; row < h; row++) {
+        for (col = 0; col < w; col++) {
+          if ((flags & TJ.FLAG_BOTTOMUP) != 0)
+            index = (h - row - 1) * w + col;
+          else
+            index = row * w + col;
+          if (((row / 8) + (col / 8)) % 2 == 0) {
+            if (row >= halfway) buf[index * ps + 3] = 0;
+          } else {
+            buf[index * ps + 2] = 0;
+            if (row < halfway)
+              buf[index * ps + 1] = 0;
+          }
+        }
+      }
+      return;
+    }
+
+    Arrays.fill(buf, (byte)0);
+    for (row = 0; row < h; row++) {
+      for (col = 0; col < w; col++) {
+        if ((flags & TJ.FLAG_BOTTOMUP) != 0)
+          index = pitch * (h - row - 1) + col * ps;
+        else
+          index = pitch * row + col * ps;
+        if (((row / 8) + (col / 8)) % 2 == 0) {
+          if (row < halfway) {
+            buf[index + roffset] = (byte)255;
+            buf[index + goffset] = (byte)255;
+            buf[index + boffset] = (byte)255;
+          }
+        } else {
+          buf[index + roffset] = (byte)255;
+          if (row >= halfway)
+            buf[index + goffset] = (byte)255;
+        }
+        if (aoffset >= 0)
+          buf[index + aoffset] = (byte)255;
+      }
+    }
+  }
+
+  private static void initIntBuf(int[] buf, int w, int pitch, int h, int pf,
+                                 int flags) throws Exception {
+    int rshift = TJ.getRedOffset(pf) * 8;
+    int gshift = TJ.getGreenOffset(pf) * 8;
+    int bshift = TJ.getBlueOffset(pf) * 8;
+    int ashift = alphaOffset[pf] * 8;
+    int index, row, col, halfway = 16;
+
+    Arrays.fill(buf, 0);
+    for (row = 0; row < h; row++) {
+      for (col = 0; col < w; col++) {
+        if ((flags & TJ.FLAG_BOTTOMUP) != 0)
+          index = pitch * (h - row - 1) + col;
+        else
+          index = pitch * row + col;
+        if (((row / 8) + (col / 8)) % 2 == 0) {
+          if (row < halfway) {
+            buf[index] |= (255 << rshift);
+            buf[index] |= (255 << gshift);
+            buf[index] |= (255 << bshift);
+          }
+        } else {
+          buf[index] |= (255 << rshift);
+          if (row >= halfway)
+            buf[index] |= (255 << gshift);
+        }
+        if (ashift >= 0)
+          buf[index] |= (255 << ashift);
+      }
+    }
+  }
+
+  private static void initImg(BufferedImage img, int pf, int flags)
+                              throws Exception {
+    WritableRaster wr = img.getRaster();
+    int imgType = img.getType();
+    if (imgType == BufferedImage.TYPE_INT_RGB ||
+        imgType == BufferedImage.TYPE_INT_BGR ||
+        imgType == BufferedImage.TYPE_INT_ARGB ||
+        imgType == BufferedImage.TYPE_INT_ARGB_PRE) {
+      SinglePixelPackedSampleModel sm =
+        (SinglePixelPackedSampleModel)img.getSampleModel();
+      int pitch = sm.getScanlineStride();
+      DataBufferInt db = (DataBufferInt)wr.getDataBuffer();
+      int[] buf = db.getData();
+      initIntBuf(buf, img.getWidth(), pitch, img.getHeight(), pf, flags);
+    } else {
+      ComponentSampleModel sm = (ComponentSampleModel)img.getSampleModel();
+      int pitch = sm.getScanlineStride();
+      DataBufferByte db = (DataBufferByte)wr.getDataBuffer();
+      byte[] buf = db.getData();
+      initBuf(buf, img.getWidth(), pitch, img.getHeight(), pf, flags);
+    }
+  }
+
+  private static void checkVal(int row, int col, int v, String vname, int cv)
+                               throws Exception {
+    v = (v < 0) ? v + 256 : v;
+    if (v < cv - 1 || v > cv + 1) {
+      throw new Exception("Comp. " + vname + " at " + row + "," + col +
+                          " should be " + cv + ", not " + v);
+    }
+  }
+
+  private static void checkVal0(int row, int col, int v, String vname)
+                                throws Exception {
+    v = (v < 0) ? v + 256 : v;
+    if (v > 1) {
+      throw new Exception("Comp. " + vname + " at " + row + "," + col +
+                          " should be 0, not " + v);
+    }
+  }
+
+  private static void checkVal255(int row, int col, int v, String vname)
+                                  throws Exception {
+    v = (v < 0) ? v + 256 : v;
+    if (v < 254) {
+      throw new Exception("Comp. " + vname + " at " + row + "," + col +
+                          " should be 255, not " + v);
+    }
+  }
+
+  private static int checkBuf(byte[] buf, int w, int pitch, int h, int pf,
+                              int subsamp, TJScalingFactor sf, int flags)
+                              throws Exception {
+    int roffset = TJ.getRedOffset(pf);
+    int goffset = TJ.getGreenOffset(pf);
+    int boffset = TJ.getBlueOffset(pf);
+    int aoffset = alphaOffset[pf];
+    int ps = TJ.getPixelSize(pf);
+    int index, row, col, retval = 1;
+    int halfway = 16 * sf.getNum() / sf.getDenom();
+    int blockSize = 8 * sf.getNum() / sf.getDenom();
+
+    try {
+
+      if (pf == TJ.PF_CMYK) {
+        for (row = 0; row < h; row++) {
+          for (col = 0; col < w; col++) {
+            if ((flags & TJ.FLAG_BOTTOMUP) != 0)
+              index = (h - row - 1) * w + col;
+            else
+              index = row * w + col;
+            byte c = buf[index * ps];
+            byte m = buf[index * ps + 1];
+            byte y = buf[index * ps + 2];
+            byte k = buf[index * ps + 3];
+            checkVal255(row, col, c, "C");
+            if (((row / blockSize) + (col / blockSize)) % 2 == 0) {
+              checkVal255(row, col, m, "M");
+              checkVal255(row, col, y, "Y");
+              if (row < halfway)
+                checkVal255(row, col, k, "K");
+              else
+                checkVal0(row, col, k, "K");
+            } else {
+              checkVal0(row, col, y, "Y");
+              checkVal255(row, col, k, "K");
+              if (row < halfway)
+                checkVal0(row, col, m, "M");
+              else
+                checkVal255(row, col, m, "M");
+            }
+          }
+        }
+        return 1;
+      }
+
+      for (row = 0; row < halfway; row++) {
+        for (col = 0; col < w; col++) {
+          if ((flags & TJ.FLAG_BOTTOMUP) != 0)
+            index = pitch * (h - row - 1) + col * ps;
+          else
+            index = pitch * row + col * ps;
+          byte r = buf[index + roffset];
+          byte g = buf[index + goffset];
+          byte b = buf[index + boffset];
+          byte a = aoffset >= 0 ? buf[index + aoffset] : (byte)255;
+          if (((row / blockSize) + (col / blockSize)) % 2 == 0) {
+            if (row < halfway) {
+              checkVal255(row, col, r, "R");
+              checkVal255(row, col, g, "G");
+              checkVal255(row, col, b, "B");
+            } else {
+              checkVal0(row, col, r, "R");
+              checkVal0(row, col, g, "G");
+              checkVal0(row, col, b, "B");
+            }
+          } else {
+            if (subsamp == TJ.SAMP_GRAY) {
+              if (row < halfway) {
+                checkVal(row, col, r, "R", 76);
+                checkVal(row, col, g, "G", 76);
+                checkVal(row, col, b, "B", 76);
+              } else {
+                checkVal(row, col, r, "R", 226);
+                checkVal(row, col, g, "G", 226);
+                checkVal(row, col, b, "B", 226);
+              }
+            } else {
+              checkVal255(row, col, r, "R");
+              if (row < halfway) {
+                checkVal0(row, col, g, "G");
+              } else {
+                checkVal255(row, col, g, "G");
+              }
+              checkVal0(row, col, b, "B");
+            }
+          }
+          checkVal255(row, col, a, "A");
+        }
+      }
+    } catch(Exception e) {
+      System.out.println("\n" + e.getMessage());
+      retval = 0;
+    }
+
+    if (retval == 0) {
+      for (row = 0; row < h; row++) {
+        for (col = 0; col < w; col++) {
+          if (pf == TJ.PF_CMYK) {
+            int c = buf[pitch * row + col * ps];
+            int m = buf[pitch * row + col * ps + 1];
+            int y = buf[pitch * row + col * ps + 2];
+            int k = buf[pitch * row + col * ps + 3];
+            if (c < 0) c += 256;
+            if (m < 0) m += 256;
+            if (y < 0) y += 256;
+            if (k < 0) k += 256;
+            System.out.format("%3d/%3d/%3d/%3d ", c, m, y, k);
+          } else {
+            int r = buf[pitch * row + col * ps + roffset];
+            int g = buf[pitch * row + col * ps + goffset];
+            int b = buf[pitch * row + col * ps + boffset];
+            if (r < 0) r += 256;
+            if (g < 0) g += 256;
+            if (b < 0) b += 256;
+            System.out.format("%3d/%3d/%3d ", r, g, b);
+          }
+        }
+        System.out.print("\n");
+      }
+    }
+    return retval;
+  }
+
+  private static int checkIntBuf(int[] buf, int w, int pitch, int h, int pf,
+                                 int subsamp, TJScalingFactor sf, int flags)
+                                 throws Exception {
+    int rshift = TJ.getRedOffset(pf) * 8;
+    int gshift = TJ.getGreenOffset(pf) * 8;
+    int bshift = TJ.getBlueOffset(pf) * 8;
+    int ashift = alphaOffset[pf] * 8;
+    int index, row, col, retval = 1;
+    int halfway = 16 * sf.getNum() / sf.getDenom();
+    int blockSize = 8 * sf.getNum() / sf.getDenom();
+
+    try {
+      for (row = 0; row < halfway; row++) {
+        for (col = 0; col < w; col++) {
+          if ((flags & TJ.FLAG_BOTTOMUP) != 0)
+            index = pitch * (h - row - 1) + col;
+          else
+            index = pitch * row + col;
+          int r = (buf[index] >> rshift) & 0xFF;
+          int g = (buf[index] >> gshift) & 0xFF;
+          int b = (buf[index] >> bshift) & 0xFF;
+          int a = ashift >= 0 ? (buf[index] >> ashift) & 0xFF : 255;
+          if (((row / blockSize) + (col / blockSize)) % 2 == 0) {
+            if (row < halfway) {
+              checkVal255(row, col, r, "R");
+              checkVal255(row, col, g, "G");
+              checkVal255(row, col, b, "B");
+            } else {
+              checkVal0(row, col, r, "R");
+              checkVal0(row, col, g, "G");
+              checkVal0(row, col, b, "B");
+            }
+          } else {
+            if (subsamp == TJ.SAMP_GRAY) {
+              if (row < halfway) {
+                checkVal(row, col, r, "R", 76);
+                checkVal(row, col, g, "G", 76);
+                checkVal(row, col, b, "B", 76);
+              } else {
+                checkVal(row, col, r, "R", 226);
+                checkVal(row, col, g, "G", 226);
+                checkVal(row, col, b, "B", 226);
+              }
+            } else {
+              checkVal255(row, col, r, "R");
+              if (row < halfway) {
+                checkVal0(row, col, g, "G");
+              } else {
+                checkVal255(row, col, g, "G");
+              }
+              checkVal0(row, col, b, "B");
+            }
+          }
+          checkVal255(row, col, a, "A");
+        }
+      }
+    } catch(Exception e) {
+      System.out.println("\n" + e.getMessage());
+      retval = 0;
+    }
+
+    if (retval == 0) {
+      for (row = 0; row < h; row++) {
+        for (col = 0; col < w; col++) {
+          int r = (buf[pitch * row + col] >> rshift) & 0xFF;
+          int g = (buf[pitch * row + col] >> gshift) & 0xFF;
+          int b = (buf[pitch * row + col] >> bshift) & 0xFF;
+          if (r < 0) r += 256;
+          if (g < 0) g += 256;
+          if (b < 0) b += 256;
+          System.out.format("%3d/%3d/%3d ", r, g, b);
+        }
+        System.out.print("\n");
+      }
+    }
+    return retval;
+  }
+
+  private static int checkImg(BufferedImage img, int pf, int subsamp,
+                              TJScalingFactor sf, int flags) throws Exception {
+    WritableRaster wr = img.getRaster();
+    int imgType = img.getType();
+    if (imgType == BufferedImage.TYPE_INT_RGB ||
+        imgType == BufferedImage.TYPE_INT_BGR ||
+        imgType == BufferedImage.TYPE_INT_ARGB ||
+        imgType == BufferedImage.TYPE_INT_ARGB_PRE) {
+      SinglePixelPackedSampleModel sm =
+        (SinglePixelPackedSampleModel)img.getSampleModel();
+      int pitch = sm.getScanlineStride();
+      DataBufferInt db = (DataBufferInt)wr.getDataBuffer();
+      int[] buf = db.getData();
+      return checkIntBuf(buf, img.getWidth(), pitch, img.getHeight(), pf,
+                         subsamp, sf, flags);
+    } else {
+      ComponentSampleModel sm = (ComponentSampleModel)img.getSampleModel();
+      int pitch = sm.getScanlineStride();
+      DataBufferByte db = (DataBufferByte)wr.getDataBuffer();
+      byte[] buf = db.getData();
+      return checkBuf(buf, img.getWidth(), pitch, img.getHeight(), pf, subsamp,
+                      sf, flags);
+    }
+  }
+
+  private static int PAD(int v, int p) {
+    return ((v + (p) - 1) & (~((p) - 1)));
+  }
+
+  private static int checkBufYUV(byte[] buf, int size, int w, int h,
+                                 int subsamp, TJScalingFactor sf)
+                                 throws Exception {
+    int row, col;
+    int hsf = TJ.getMCUWidth(subsamp) / 8, vsf = TJ.getMCUHeight(subsamp) / 8;
+    int pw = PAD(w, hsf), ph = PAD(h, vsf);
+    int cw = pw / hsf, ch = ph / vsf;
+    int ypitch = PAD(pw, pad), uvpitch = PAD(cw, pad);
+    int retval = 1;
+    int correctsize = ypitch * ph +
+                      (subsamp == TJ.SAMP_GRAY ? 0 : uvpitch * ch * 2);
+    int halfway = 16 * sf.getNum() / sf.getDenom();
+    int blockSize = 8 * sf.getNum() / sf.getDenom();
+
+    try {
+      if (size != correctsize)
+        throw new Exception("Incorrect size " + size + ".  Should be " +
+                            correctsize);
+
+      for (row = 0; row < ph; row++) {
+        for (col = 0; col < pw; col++) {
+          byte y = buf[ypitch * row + col];
+          if (((row / blockSize) + (col / blockSize)) % 2 == 0) {
+            if (row < halfway)
+              checkVal255(row, col, y, "Y");
+            else
+              checkVal0(row, col, y, "Y");
+          } else {
+            if (row < halfway)
+              checkVal(row, col, y, "Y", 76);
+            else
+              checkVal(row, col, y, "Y", 226);
+          }
+        }
+      }
+      if (subsamp != TJ.SAMP_GRAY) {
+        halfway = 16 / vsf * sf.getNum() / sf.getDenom();
+        for (row = 0; row < ch; row++) {
+          for (col = 0; col < cw; col++) {
+            byte u = buf[ypitch * ph + (uvpitch * row + col)],
+                 v = buf[ypitch * ph + uvpitch * ch + (uvpitch * row + col)];
+            if (((row * vsf / blockSize) + (col * hsf / blockSize)) % 2 == 0) {
+              checkVal(row, col, u, "U", 128);
+              checkVal(row, col, v, "V", 128);
+            } else {
+              if (row < halfway) {
+                checkVal(row, col, u, "U", 85);
+                checkVal255(row, col, v, "V");
+              } else {
+                checkVal0(row, col, u, "U");
+                checkVal(row, col, v, "V", 149);
+              }
+            }
+          }
+        }
+      }
+    } catch(Exception e) {
+      System.out.println("\n" + e.getMessage());
+      retval = 0;
+    }
+
+    if (retval == 0) {
+      for (row = 0; row < ph; row++) {
+        for (col = 0; col < pw; col++) {
+          int y = buf[ypitch * row + col];
+          if (y < 0) y += 256;
+          System.out.format("%3d ", y);
+        }
+        System.out.print("\n");
+      }
+      System.out.print("\n");
+      for (row = 0; row < ch; row++) {
+        for (col = 0; col < cw; col++) {
+          int u = buf[ypitch * ph + (uvpitch * row + col)];
+          if (u < 0) u += 256;
+          System.out.format("%3d ", u);
+        }
+        System.out.print("\n");
+      }
+      System.out.print("\n");
+      for (row = 0; row < ch; row++) {
+        for (col = 0; col < cw; col++) {
+          int v = buf[ypitch * ph + uvpitch * ch + (uvpitch * row + col)];
+          if (v < 0) v += 256;
+          System.out.format("%3d ", v);
+        }
+        System.out.print("\n");
+      }
+    }
+
+    return retval;
+  }
+
+  private static void writeJPEG(byte[] jpegBuf, int jpegBufSize,
+                                String filename) throws Exception {
+    File file = new File(filename);
+    FileOutputStream fos = new FileOutputStream(file);
+    fos.write(jpegBuf, 0, jpegBufSize);
+    fos.close();
+  }
+
+  private static int compTest(TJCompressor tjc, byte[] dstBuf, int w,
+                              int h, int pf, String baseName, int subsamp,
+                              int jpegQual, int flags) throws Exception {
+    String tempStr;
+    byte[] srcBuf = null;
+    BufferedImage img = null;
+    String pfStr, pfStrLong;
+    String buStr = (flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD";
+    String buStrLong = (flags & TJ.FLAG_BOTTOMUP) != 0 ?
+                       "Bottom-Up" : "Top-Down ";
+    int size = 0, ps, imgType = pf;
+
+    if (bi) {
+      pf = biTypePF(imgType);
+      pfStr = biTypeStr(imgType);
+      pfStrLong = pfStr + " (" + pixFormatStr[pf] + ")";
+    } else {
+      pfStr = pixFormatStr[pf];
+      pfStrLong = pfStr;
+    }
+    ps =  TJ.getPixelSize(pf);
+
+    if (bi) {
+      img = new BufferedImage(w, h, imgType);
+      initImg(img, pf, flags);
+      tempStr = baseName + "_enc_" + pfStr + "_" + buStr + "_" +
+                subName[subsamp] + "_Q" + jpegQual + ".png";
+      File file = new File(tempStr);
+      ImageIO.write(img, "png", file);
+      tjc.setSourceImage(img, 0, 0, 0, 0);
+    } else {
+      srcBuf = new byte[w * h * ps + 1];
+      initBuf(srcBuf, w, w * ps, h, pf, flags);
+      tjc.setSourceImage(srcBuf, 0, 0, w, 0, h, pf);
+    }
+    Arrays.fill(dstBuf, (byte)0);
+
+    tjc.setSubsamp(subsamp);
+    tjc.setJPEGQuality(jpegQual);
+    if (doYUV) {
+      System.out.format("%s %s -> YUV %s ... ", pfStrLong, buStrLong,
+                        subNameLong[subsamp]);
+      YUVImage yuvImage = tjc.encodeYUV(pad, flags);
+      if (checkBufYUV(yuvImage.getBuf(), yuvImage.getSize(), w, h, subsamp,
+          new TJScalingFactor(1, 1)) == 1)
+        System.out.print("Passed.\n");
+      else {
+        System.out.print("FAILED!\n");
+        exitStatus = -1;
+      }
+
+      System.out.format("YUV %s %s -> JPEG Q%d ... ", subNameLong[subsamp],
+                        buStrLong, jpegQual);
+      tjc.setSourceImage(yuvImage);
+    } else {
+      System.out.format("%s %s -> %s Q%d ... ", pfStrLong, buStrLong,
+                        subNameLong[subsamp], jpegQual);
+    }
+    tjc.compress(dstBuf, flags);
+    size = tjc.getCompressedSize();
+
+    tempStr = baseName + "_enc_" + pfStr + "_" + buStr + "_" +
+              subName[subsamp] + "_Q" + jpegQual + ".jpg";
+    writeJPEG(dstBuf, size, tempStr);
+    System.out.println("Done.\n  Result in " + tempStr);
+
+    return size;
+  }
+
+  private static void decompTest(TJDecompressor tjd, byte[] jpegBuf,
+                                 int jpegSize, int w, int h, int pf,
+                                 String baseName, int subsamp, int flags,
+                                 TJScalingFactor sf) throws Exception {
+    String pfStr, pfStrLong, tempStr;
+    String buStrLong = (flags & TJ.FLAG_BOTTOMUP) != 0 ?
+                       "Bottom-Up" : "Top-Down ";
+    int scaledWidth = sf.getScaled(w);
+    int scaledHeight = sf.getScaled(h);
+    int temp1, temp2, imgType = pf;
+    BufferedImage img = null;
+    byte[] dstBuf = null;
+
+    if (bi) {
+      pf = biTypePF(imgType);
+      pfStr = biTypeStr(imgType);
+      pfStrLong = pfStr + " (" + pixFormatStr[pf] + ")";
+    } else {
+      pfStr = pixFormatStr[pf];
+      pfStrLong = pfStr;
+    }
+
+    tjd.setSourceImage(jpegBuf, jpegSize);
+    if (tjd.getWidth() != w || tjd.getHeight() != h ||
+        tjd.getSubsamp() != subsamp)
+      throw new Exception("Incorrect JPEG header");
+
+    temp1 = scaledWidth;
+    temp2 = scaledHeight;
+    temp1 = tjd.getScaledWidth(temp1, temp2);
+    temp2 = tjd.getScaledHeight(temp1, temp2);
+    if (temp1 != scaledWidth || temp2 != scaledHeight)
+      throw new Exception("Scaled size mismatch");
+
+    if (doYUV) {
+      System.out.format("JPEG -> YUV %s ", subNameLong[subsamp]);
+      if(!sf.isOne())
+        System.out.format("%d/%d ... ", sf.getNum(), sf.getDenom());
+      else System.out.print("... ");
+      YUVImage yuvImage = tjd.decompressToYUV(scaledWidth, pad, scaledHeight,
+                                              flags);
+      if (checkBufYUV(yuvImage.getBuf(), yuvImage.getSize(), scaledWidth,
+                      scaledHeight, subsamp, sf) == 1)
+        System.out.print("Passed.\n");
+      else {
+        System.out.print("FAILED!\n");  exitStatus = -1;
+      }
+
+      System.out.format("YUV %s -> %s %s ... ", subNameLong[subsamp],
+                        pfStrLong, buStrLong);
+      tjd.setSourceImage(yuvImage);
+    } else {
+      System.out.format("JPEG -> %s %s ", pfStrLong, buStrLong);
+      if(!sf.isOne())
+        System.out.format("%d/%d ... ", sf.getNum(), sf.getDenom());
+      else System.out.print("... ");
+    }
+    if (bi)
+      img = tjd.decompress(scaledWidth, scaledHeight, imgType, flags);
+    else
+      dstBuf = tjd.decompress(scaledWidth, 0, scaledHeight, pf, flags);
+
+    if (bi) {
+      tempStr = baseName + "_dec_" + pfStr + "_" +
+                (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_" +
+                subName[subsamp] + "_" +
+                (double)sf.getNum() / (double)sf.getDenom() + "x" + ".png";
+      File file = new File(tempStr);
+      ImageIO.write(img, "png", file);
+    }
+
+    if ((bi && checkImg(img, pf, subsamp, sf, flags) == 1) ||
+        (!bi && checkBuf(dstBuf, scaledWidth,
+                         scaledWidth * TJ.getPixelSize(pf), scaledHeight, pf,
+                         subsamp, sf, flags) == 1))
+      System.out.print("Passed.\n");
+    else {
+      System.out.print("FAILED!\n");
+      exitStatus = -1;
+    }
+  }
+
+  private static void decompTest(TJDecompressor tjd, byte[] jpegBuf,
+                                 int jpegSize, int w, int h, int pf,
+                                 String baseName, int subsamp,
+                                 int flags) throws Exception {
+    int i;
+    TJScalingFactor[] sf = TJ.getScalingFactors();
+    for (i = 0; i < sf.length; i++) {
+      int num = sf[i].getNum();
+      int denom = sf[i].getDenom();
+      if (subsamp == TJ.SAMP_444 || subsamp == TJ.SAMP_GRAY ||
+          (subsamp == TJ.SAMP_411 && num == 1 &&
+           (denom == 2 || denom == 1)) ||
+          (subsamp != TJ.SAMP_411 && num == 1 &&
+           (denom == 4 || denom == 2 || denom == 1)))
+        decompTest(tjd, jpegBuf, jpegSize, w, h, pf, baseName, subsamp,
+                   flags, sf[i]);
+    }
+  }
+
+  private static void doTest(int w, int h, int[] formats, int subsamp,
+                             String baseName) throws Exception {
+    TJCompressor tjc = null;
+    TJDecompressor tjd = null;
+    int size;
+    byte[] dstBuf;
+
+    dstBuf = new byte[TJ.bufSize(w, h, subsamp)];
+
+    try {
+      tjc = new TJCompressor();
+      tjd = new TJDecompressor();
+
+      for (int pf : formats) {
+        if (pf < 0) continue;
+        for (int i = 0; i < 2; i++) {
+          int flags = 0;
+          if (subsamp == TJ.SAMP_422 || subsamp == TJ.SAMP_420 ||
+              subsamp == TJ.SAMP_440 || subsamp == TJ.SAMP_411)
+            flags |= TJ.FLAG_FASTUPSAMPLE;
+          if (i == 1)
+            flags |= TJ.FLAG_BOTTOMUP;
+          size = compTest(tjc, dstBuf, w, h, pf, baseName, subsamp, 100,
+                          flags);
+          decompTest(tjd, dstBuf, size, w, h, pf, baseName, subsamp, flags);
+          if (pf >= TJ.PF_RGBX && pf <= TJ.PF_XRGB && !bi) {
+            System.out.print("\n");
+            decompTest(tjd, dstBuf, size, w, h, pf + (TJ.PF_RGBA - TJ.PF_RGBX),
+                       baseName, subsamp, flags);
+          }
+          System.out.print("\n");
+        }
+      }
+      System.out.print("--------------------\n\n");
+    } catch(Exception e) {
+      if (tjc != null) tjc.close();
+      if (tjd != null) tjd.close();
+      throw e;
+    }
+    if (tjc != null) tjc.close();
+    if (tjd != null) tjd.close();
+  }
+
+  private static void bufSizeTest() throws Exception {
+    int w, h, i, subsamp;
+    byte[] srcBuf, dstBuf = null;
+    YUVImage dstImage = null;
+    TJCompressor tjc = null;
+    Random r = new Random();
+
+    try {
+      tjc = new TJCompressor();
+      System.out.println("Buffer size regression test");
+      for (subsamp = 0; subsamp < TJ.NUMSAMP; subsamp++) {
+        for (w = 1; w < 48; w++) {
+          int maxh = (w == 1) ? 2048 : 48;
+          for (h = 1; h < maxh; h++) {
+            if (h % 100 == 0)
+              System.out.format("%04d x %04d\b\b\b\b\b\b\b\b\b\b\b", w, h);
+            srcBuf = new byte[w * h * 4];
+            if (doYUV)
+              dstImage = new YUVImage(w, pad, h, subsamp);
+            else
+              dstBuf = new byte[TJ.bufSize(w, h, subsamp)];
+            for (i = 0; i < w * h * 4; i++) {
+              srcBuf[i] = (byte)(r.nextInt(2) * 255);
+            }
+            tjc.setSourceImage(srcBuf, 0, 0, w, 0, h, TJ.PF_BGRX);
+            tjc.setSubsamp(subsamp);
+            tjc.setJPEGQuality(100);
+            if (doYUV)
+              tjc.encodeYUV(dstImage, 0);
+            else
+              tjc.compress(dstBuf, 0);
+
+            srcBuf = new byte[h * w * 4];
+            if (doYUV)
+              dstImage = new YUVImage(h, pad, w, subsamp);
+            else
+              dstBuf = new byte[TJ.bufSize(h, w, subsamp)];
+            for (i = 0; i < h * w * 4; i++) {
+              srcBuf[i] = (byte)(r.nextInt(2) * 255);
+            }
+            tjc.setSourceImage(srcBuf, 0, 0, h, 0, w, TJ.PF_BGRX);
+            if (doYUV)
+              tjc.encodeYUV(dstImage, 0);
+            else
+              tjc.compress(dstBuf, 0);
+          }
+        }
+      }
+      System.out.println("Done.      ");
+    } catch(Exception e) {
+      if (tjc != null) tjc.close();
+      throw e;
+    }
+    if (tjc != null) tjc.close();
+  }
+
+  public static void main(String[] argv) {
+    try {
+      String testName = "javatest";
+      for (int i = 0; i < argv.length; i++) {
+        if (argv[i].equalsIgnoreCase("-yuv"))
+          doYUV = true;
+        if (argv[i].equalsIgnoreCase("-noyuvpad"))
+          pad = 1;
+        if (argv[i].substring(0, 1).equalsIgnoreCase("-h") ||
+            argv[i].equalsIgnoreCase("-?"))
+          usage();
+        if (argv[i].equalsIgnoreCase("-bi")) {
+          bi = true;
+          testName = "javabitest";
+        }
+      }
+      if (doYUV)
+        _4byteFormats[4] = -1;
+      doTest(35, 39, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_444,
+             testName);
+      doTest(39, 41, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_444,
+             testName);
+      doTest(41, 35, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_422,
+             testName);
+      doTest(35, 39, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_422,
+             testName);
+      doTest(39, 41, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_420,
+             testName);
+      doTest(41, 35, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_420,
+             testName);
+      doTest(35, 39, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_440,
+             testName);
+      doTest(39, 41, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_440,
+             testName);
+      doTest(41, 35, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_411,
+             testName);
+      doTest(35, 39, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_411,
+             testName);
+      doTest(39, 41, bi ? onlyGrayBI : onlyGray, TJ.SAMP_GRAY, testName);
+      doTest(41, 35, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_GRAY,
+             testName);
+      _4byteFormats[4] = -1;
+      doTest(35, 39, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_GRAY,
+             testName);
+      if (!bi)
+        bufSizeTest();
+      if (doYUV && !bi) {
+        System.out.print("\n--------------------\n\n");
+        doTest(48, 48, onlyRGB, TJ.SAMP_444, "javatest_yuv0");
+        doTest(48, 48, onlyRGB, TJ.SAMP_422, "javatest_yuv0");
+        doTest(48, 48, onlyRGB, TJ.SAMP_420, "javatest_yuv0");
+        doTest(48, 48, onlyRGB, TJ.SAMP_440, "javatest_yuv0");
+        doTest(48, 48, onlyRGB, TJ.SAMP_411, "javatest_yuv0");
+        doTest(48, 48, onlyRGB, TJ.SAMP_GRAY, "javatest_yuv0");
+        doTest(48, 48, onlyGray, TJ.SAMP_GRAY, "javatest_yuv0");
+      }
+    } catch(Exception e) {
+      e.printStackTrace();
+      exitStatus = -1;
+    }
+    System.exit(exitStatus);
+  }
+}
diff --git a/java/doc/allclasses-frame.html b/java/doc/allclasses-frame.html
new file mode 100644
index 0000000..509ea50
--- /dev/null
+++ b/java/doc/allclasses-frame.html
@@ -0,0 +1,23 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>All Classes</title>
+<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
+</head>
+<body>
+<h1 class="bar">All Classes</h1>
+<div class="indexContainer">
+<ul>
+<li><a href="org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">TJ</a></li>
+<li><a href="org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">TJCompressor</a></li>
+<li><a href="org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg" target="classFrame"><i>TJCustomFilter</i></a></li>
+<li><a href="org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">TJDecompressor</a></li>
+<li><a href="org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">TJScalingFactor</a></li>
+<li><a href="org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">TJTransform</a></li>
+<li><a href="org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">TJTransformer</a></li>
+<li><a href="org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">YUVImage</a></li>
+</ul>
+</div>
+</body>
+</html>
diff --git a/java/doc/allclasses-noframe.html b/java/doc/allclasses-noframe.html
new file mode 100644
index 0000000..3eac18f
--- /dev/null
+++ b/java/doc/allclasses-noframe.html
@@ -0,0 +1,23 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>All Classes</title>
+<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
+</head>
+<body>
+<h1 class="bar">All Classes</h1>
+<div class="indexContainer">
+<ul>
+<li><a href="org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></li>
+<li><a href="org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></li>
+<li><a href="org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg"><i>TJCustomFilter</i></a></li>
+<li><a href="org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></li>
+<li><a href="org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</a></li>
+<li><a href="org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></li>
+<li><a href="org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</a></li>
+<li><a href="org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></li>
+</ul>
+</div>
+</body>
+</html>
diff --git a/java/doc/constant-values.html b/java/doc/constant-values.html
new file mode 100644
index 0000000..ec1b21d
--- /dev/null
+++ b/java/doc/constant-values.html
@@ -0,0 +1,479 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>Constant Field Values</title>
+<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="Constant Field Values";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li><a href="org/libjpegturbo/turbojpeg/package-tree.html">Tree</a></li>
+<li><a href="deprecated-list.html">Deprecated</a></li>
+<li><a href="index-all.html">Index</a></li>
+<li><a href="help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="index.html?constant-values.html" target="_top">Frames</a></li>
+<li><a href="constant-values.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<div class="header">
+<h1 title="Constant Field Values" class="title">Constant Field Values</h1>
+<h2 title="Contents">Contents</h2>
+<ul>
+<li><a href="#org.libjpegturbo">org.libjpegturbo.*</a></li>
+</ul>
+</div>
+<div class="constantValuesContainer"><a name="org.libjpegturbo">
+<!--   -->
+</a>
+<h2 title="org.libjpegturbo">org.libjpegturbo.*</h2>
+<ul class="blockList">
+<li class="blockList">
+<table border="0" cellpadding="3" cellspacing="0" summary="Constant Field Values table, listing constant fields, and values">
+<caption><span>org.libjpegturbo.turbojpeg.<a href="org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th scope="col">Constant Field</th>
+<th class="colLast" scope="col">Value</th>
+</tr>
+<tbody>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.CS_CMYK">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#CS_CMYK">CS_CMYK</a></code></td>
+<td class="colLast"><code>3</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.CS_GRAY">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#CS_GRAY">CS_GRAY</a></code></td>
+<td class="colLast"><code>2</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.CS_RGB">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#CS_RGB">CS_RGB</a></code></td>
+<td class="colLast"><code>0</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.CS_YCbCr">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#CS_YCbCr">CS_YCbCr</a></code></td>
+<td class="colLast"><code>1</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.CS_YCCK">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#CS_YCCK">CS_YCCK</a></code></td>
+<td class="colLast"><code>4</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.FLAG_ACCURATEDCT">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#FLAG_ACCURATEDCT">FLAG_ACCURATEDCT</a></code></td>
+<td class="colLast"><code>4096</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.FLAG_BOTTOMUP">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP">FLAG_BOTTOMUP</a></code></td>
+<td class="colLast"><code>2</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.FLAG_FASTDCT">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FASTDCT">FLAG_FASTDCT</a></code></td>
+<td class="colLast"><code>2048</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.FLAG_FASTUPSAMPLE">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FASTUPSAMPLE">FLAG_FASTUPSAMPLE</a></code></td>
+<td class="colLast"><code>256</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.FLAG_FORCEMMX">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCEMMX">FLAG_FORCEMMX</a></code></td>
+<td class="colLast"><code>8</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE">FLAG_FORCESSE</a></code></td>
+<td class="colLast"><code>16</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE2">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE2">FLAG_FORCESSE2</a></code></td>
+<td class="colLast"><code>32</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE3">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE3">FLAG_FORCESSE3</a></code></td>
+<td class="colLast"><code>128</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.NUMCS">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#NUMCS">NUMCS</a></code></td>
+<td class="colLast"><code>5</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.NUMPF">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#NUMPF">NUMPF</a></code></td>
+<td class="colLast"><code>12</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.NUMSAMP">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#NUMSAMP">NUMSAMP</a></code></td>
+<td class="colLast"><code>6</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.PF_ABGR">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#PF_ABGR">PF_ABGR</a></code></td>
+<td class="colLast"><code>9</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.PF_ARGB">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#PF_ARGB">PF_ARGB</a></code></td>
+<td class="colLast"><code>10</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.PF_BGR">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#PF_BGR">PF_BGR</a></code></td>
+<td class="colLast"><code>1</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.PF_BGRA">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#PF_BGRA">PF_BGRA</a></code></td>
+<td class="colLast"><code>8</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.PF_BGRX">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#PF_BGRX">PF_BGRX</a></code></td>
+<td class="colLast"><code>3</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.PF_CMYK">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#PF_CMYK">PF_CMYK</a></code></td>
+<td class="colLast"><code>11</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.PF_GRAY">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#PF_GRAY">PF_GRAY</a></code></td>
+<td class="colLast"><code>6</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.PF_RGB">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#PF_RGB">PF_RGB</a></code></td>
+<td class="colLast"><code>0</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.PF_RGBA">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#PF_RGBA">PF_RGBA</a></code></td>
+<td class="colLast"><code>7</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.PF_RGBX">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#PF_RGBX">PF_RGBX</a></code></td>
+<td class="colLast"><code>2</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.PF_XBGR">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#PF_XBGR">PF_XBGR</a></code></td>
+<td class="colLast"><code>4</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.PF_XRGB">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#PF_XRGB">PF_XRGB</a></code></td>
+<td class="colLast"><code>5</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.SAMP_411">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#SAMP_411">SAMP_411</a></code></td>
+<td class="colLast"><code>5</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.SAMP_420">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#SAMP_420">SAMP_420</a></code></td>
+<td class="colLast"><code>2</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.SAMP_422">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#SAMP_422">SAMP_422</a></code></td>
+<td class="colLast"><code>1</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.SAMP_440">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#SAMP_440">SAMP_440</a></code></td>
+<td class="colLast"><code>4</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.SAMP_444">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#SAMP_444">SAMP_444</a></code></td>
+<td class="colLast"><code>0</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJ.SAMP_GRAY">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJ.html#SAMP_GRAY">SAMP_GRAY</a></code></td>
+<td class="colLast"><code>3</code></td>
+</tr>
+</tbody>
+</table>
+</li>
+<li class="blockList">
+<table border="0" cellpadding="3" cellspacing="0" summary="Constant Field Values table, listing constant fields, and values">
+<caption><span>org.libjpegturbo.turbojpeg.<a href="org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th scope="col">Constant Field</th>
+<th class="colLast" scope="col">Value</th>
+</tr>
+<tbody>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.NUMOP">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#NUMOP">NUMOP</a></code></td>
+<td class="colLast"><code>8</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OP_HFLIP">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OP_HFLIP">OP_HFLIP</a></code></td>
+<td class="colLast"><code>1</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OP_NONE">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OP_NONE">OP_NONE</a></code></td>
+<td class="colLast"><code>0</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OP_ROT180">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OP_ROT180">OP_ROT180</a></code></td>
+<td class="colLast"><code>6</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OP_ROT270">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OP_ROT270">OP_ROT270</a></code></td>
+<td class="colLast"><code>7</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OP_ROT90">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OP_ROT90">OP_ROT90</a></code></td>
+<td class="colLast"><code>5</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OP_TRANSPOSE">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OP_TRANSPOSE">OP_TRANSPOSE</a></code></td>
+<td class="colLast"><code>3</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OP_TRANSVERSE">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OP_TRANSVERSE">OP_TRANSVERSE</a></code></td>
+<td class="colLast"><code>4</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OP_VFLIP">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OP_VFLIP">OP_VFLIP</a></code></td>
+<td class="colLast"><code>2</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OPT_CROP">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OPT_CROP">OPT_CROP</a></code></td>
+<td class="colLast"><code>4</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OPT_GRAY">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OPT_GRAY">OPT_GRAY</a></code></td>
+<td class="colLast"><code>8</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OPT_NOOUTPUT">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OPT_NOOUTPUT">OPT_NOOUTPUT</a></code></td>
+<td class="colLast"><code>16</code></td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OPT_PERFECT">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OPT_PERFECT">OPT_PERFECT</a></code></td>
+<td class="colLast"><code>1</code></td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a name="org.libjpegturbo.turbojpeg.TJTransform.OPT_TRIM">
+<!--   -->
+</a><code>public&nbsp;static&nbsp;final&nbsp;int</code></td>
+<td><code><a href="org/libjpegturbo/turbojpeg/TJTransform.html#OPT_TRIM">OPT_TRIM</a></code></td>
+<td class="colLast"><code>2</code></td>
+</tr>
+</tbody>
+</table>
+</li>
+</ul>
+</div>
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li><a href="org/libjpegturbo/turbojpeg/package-tree.html">Tree</a></li>
+<li><a href="deprecated-list.html">Deprecated</a></li>
+<li><a href="index-all.html">Index</a></li>
+<li><a href="help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="index.html?constant-values.html" target="_top">Frames</a></li>
+<li><a href="constant-values.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/deprecated-list.html b/java/doc/deprecated-list.html
new file mode 100644
index 0000000..79fd7ed
--- /dev/null
+++ b/java/doc/deprecated-list.html
@@ -0,0 +1,258 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>Deprecated List</title>
+<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="Deprecated List";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li><a href="org/libjpegturbo/turbojpeg/package-tree.html">Tree</a></li>
+<li class="navBarCell1Rev">Deprecated</li>
+<li><a href="index-all.html">Index</a></li>
+<li><a href="help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="index.html?deprecated-list.html" target="_top">Frames</a></li>
+<li><a href="deprecated-list.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<div class="header">
+<h1 title="Deprecated API" class="title">Deprecated API</h1>
+<h2 title="Contents">Contents</h2>
+<ul>
+<li><a href="#field">Deprecated Fields</a></li>
+<li><a href="#method">Deprecated Methods</a></li>
+<li><a href="#constructor">Deprecated Constructors</a></li>
+</ul>
+</div>
+<div class="contentContainer"><a name="field">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<table border="0" cellpadding="3" cellspacing="0" summary="Deprecated Fields table, listing deprecated fields, and an explanation">
+<caption><span>Deprecated Fields</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Field and Description</th>
+</tr>
+<tbody>
+<tr class="altColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCEMMX">org.libjpegturbo.turbojpeg.TJ.FLAG_FORCEMMX</a></td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE">org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE</a></td>
+</tr>
+<tr class="altColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE2">org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE2</a></td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE3">org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE3</a></td>
+</tr>
+</tbody>
+</table>
+</li>
+</ul>
+<a name="method">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<table border="0" cellpadding="3" cellspacing="0" summary="Deprecated Methods table, listing deprecated methods, and an explanation">
+<caption><span>Deprecated Methods</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Method and Description</th>
+</tr>
+<tbody>
+<tr class="altColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)">org.libjpegturbo.turbojpeg.TJ.bufSizeYUV(int, int, int)</a>
+<div class="block"><i>Use <a href="org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><code>TJ.bufSizeYUV(int, int, int, int)</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJCompressor.html#compress(java.awt.image.BufferedImage, byte[], int)">org.libjpegturbo.turbojpeg.TJCompressor.compress(BufferedImage, byte[], int)</a>
+<div class="block"><i>Use
+ <a href="org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>TJCompressor.setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="org/libjpegturbo/turbojpeg/TJCompressor.html#compress(byte[], int)"><code>TJCompressor.compress(byte[], int)</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJCompressor.html#compress(java.awt.image.BufferedImage, int)">org.libjpegturbo.turbojpeg.TJCompressor.compress(BufferedImage, int)</a>
+<div class="block"><i>Use
+ <a href="org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>TJCompressor.setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="org/libjpegturbo/turbojpeg/TJCompressor.html#compress(int)"><code>TJCompressor.compress(int)</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)">org.libjpegturbo.turbojpeg.TJDecompressor.decompress(byte[], int, int, int, int, int)</a>
+<div class="block"><i>Use
+ <a href="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><code>TJDecompressor.decompress(byte[], int, int, int, int, int, int, int)</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)">org.libjpegturbo.turbojpeg.TJDecompressor.decompressToYUV(byte[], int)</a>
+<div class="block"><i>Use <a href="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(org.libjpegturbo.turbojpeg.YUVImage, int)"><code>TJDecompressor.decompressToYUV(YUVImage, int)</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int)">org.libjpegturbo.turbojpeg.TJDecompressor.decompressToYUV(int)</a>
+<div class="block"><i>Use <a href="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)"><code>TJDecompressor.decompressToYUV(int, int, int, int)</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(java.awt.image.BufferedImage, byte[], int)">org.libjpegturbo.turbojpeg.TJCompressor.encodeYUV(BufferedImage, byte[], int)</a>
+<div class="block"><i>Use
+ <a href="org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>TJCompressor.setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(byte[], int)"><code>TJCompressor.encodeYUV(byte[], int)</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(java.awt.image.BufferedImage, int)">org.libjpegturbo.turbojpeg.TJCompressor.encodeYUV(BufferedImage, int)</a>
+<div class="block"><i>Use
+ <a href="org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>TJCompressor.setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int)"><code>TJCompressor.encodeYUV(int)</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(byte[], int)">org.libjpegturbo.turbojpeg.TJCompressor.encodeYUV(byte[], int)</a>
+<div class="block"><i>Use <a href="org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(org.libjpegturbo.turbojpeg.YUVImage, int)"><code>TJCompressor.encodeYUV(YUVImage, int)</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int)">org.libjpegturbo.turbojpeg.TJCompressor.encodeYUV(int)</a>
+<div class="block"><i>Use <a href="org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int, int)"><code>TJCompressor.encodeYUV(int, int)</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGBuf()">org.libjpegturbo.turbojpeg.TJDecompressor.getJPEGBuf()</a>
+<div class="block"><i>Use <a href="org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceBuf()"><code>TJDecompressor.getSourceBuf()</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGSize()">org.libjpegturbo.turbojpeg.TJDecompressor.getJPEGSize()</a>
+<div class="block"><i>Use <a href="org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceSize()"><code>TJDecompressor.getSourceSize()</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJDecompressor.html#setJPEGImage(byte[], int)">org.libjpegturbo.turbojpeg.TJDecompressor.setJPEGImage(byte[], int)</a>
+<div class="block"><i>Use <a href="org/libjpegturbo/turbojpeg/TJDecompressor.html#setSourceImage(byte[], int)"><code>TJDecompressor.setSourceImage(byte[], int)</code></a> instead.</i></div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int)">org.libjpegturbo.turbojpeg.TJCompressor.setSourceImage(byte[], int, int, int, int)</a>
+<div class="block"><i>Use
+ <a href="org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><code>TJCompressor.setSourceImage(byte[], int, int, int, int, int, int)</code></a> instead.</i></div>
+</td>
+</tr>
+</tbody>
+</table>
+</li>
+</ul>
+<a name="constructor">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<table border="0" cellpadding="3" cellspacing="0" summary="Deprecated Constructors table, listing deprecated constructors, and an explanation">
+<caption><span>Deprecated Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tbody>
+<tr class="altColor">
+<td class="colOne"><a href="org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int)">org.libjpegturbo.turbojpeg.TJCompressor(byte[], int, int, int, int)</a>
+<div class="block"><i>Use
+ <a href="org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int, int, int)"><code>TJCompressor.TJCompressor(byte[], int, int, int, int, int, int)</code></a> instead.</i></div>
+</td>
+</tr>
+</tbody>
+</table>
+</li>
+</ul>
+</div>
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li><a href="org/libjpegturbo/turbojpeg/package-tree.html">Tree</a></li>
+<li class="navBarCell1Rev">Deprecated</li>
+<li><a href="index-all.html">Index</a></li>
+<li><a href="help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="index.html?deprecated-list.html" target="_top">Frames</a></li>
+<li><a href="deprecated-list.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/help-doc.html b/java/doc/help-doc.html
new file mode 100644
index 0000000..ce749a9
--- /dev/null
+++ b/java/doc/help-doc.html
@@ -0,0 +1,206 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>API Help</title>
+<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="API Help";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li><a href="org/libjpegturbo/turbojpeg/package-tree.html">Tree</a></li>
+<li><a href="deprecated-list.html">Deprecated</a></li>
+<li><a href="index-all.html">Index</a></li>
+<li class="navBarCell1Rev">Help</li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="index.html?help-doc.html" target="_top">Frames</a></li>
+<li><a href="help-doc.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<div class="header">
+<h1 class="title">How This API Document Is Organized</h1>
+<div class="subTitle">This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.</div>
+</div>
+<div class="contentContainer">
+<ul class="blockList">
+<li class="blockList">
+<h2>Package</h2>
+<p>Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:</p>
+<ul>
+<li>Interfaces (italic)</li>
+<li>Classes</li>
+<li>Enums</li>
+<li>Exceptions</li>
+<li>Errors</li>
+<li>Annotation Types</li>
+</ul>
+</li>
+<li class="blockList">
+<h2>Class/Interface</h2>
+<p>Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:</p>
+<ul>
+<li>Class inheritance diagram</li>
+<li>Direct Subclasses</li>
+<li>All Known Subinterfaces</li>
+<li>All Known Implementing Classes</li>
+<li>Class/interface declaration</li>
+<li>Class/interface description</li>
+</ul>
+<ul>
+<li>Nested Class Summary</li>
+<li>Field Summary</li>
+<li>Constructor Summary</li>
+<li>Method Summary</li>
+</ul>
+<ul>
+<li>Field Detail</li>
+<li>Constructor Detail</li>
+<li>Method Detail</li>
+</ul>
+<p>Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.</p>
+</li>
+<li class="blockList">
+<h2>Annotation Type</h2>
+<p>Each annotation type has its own separate page with the following sections:</p>
+<ul>
+<li>Annotation Type declaration</li>
+<li>Annotation Type description</li>
+<li>Required Element Summary</li>
+<li>Optional Element Summary</li>
+<li>Element Detail</li>
+</ul>
+</li>
+<li class="blockList">
+<h2>Enum</h2>
+<p>Each enum has its own separate page with the following sections:</p>
+<ul>
+<li>Enum declaration</li>
+<li>Enum description</li>
+<li>Enum Constant Summary</li>
+<li>Enum Constant Detail</li>
+</ul>
+</li>
+<li class="blockList">
+<h2>Tree (Class Hierarchy)</h2>
+<p>There is a <a href="overview-tree.html">Class Hierarchy</a> page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with <code>java.lang.Object</code>. The interfaces do not inherit from <code>java.lang.Object</code>.</p>
+<ul>
+<li>When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.</li>
+<li>When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.</li>
+</ul>
+</li>
+<li class="blockList">
+<h2>Deprecated API</h2>
+<p>The <a href="deprecated-list.html">Deprecated API</a> page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.</p>
+</li>
+<li class="blockList">
+<h2>Index</h2>
+<p>The <a href="index-all.html">Index</a> contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.</p>
+</li>
+<li class="blockList">
+<h2>Prev/Next</h2>
+<p>These links take you to the next or previous class, interface, package, or related page.</p>
+</li>
+<li class="blockList">
+<h2>Frames/No Frames</h2>
+<p>These links show and hide the HTML frames.  All pages are available with or without frames.</p>
+</li>
+<li class="blockList">
+<h2>All Classes</h2>
+<p>The <a href="allclasses-noframe.html">All Classes</a> link shows all classes and interfaces except non-static nested types.</p>
+</li>
+<li class="blockList">
+<h2>Serialized Form</h2>
+<p>Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.</p>
+</li>
+<li class="blockList">
+<h2>Constant Field Values</h2>
+<p>The <a href="constant-values.html">Constant Field Values</a> page lists the static final fields and their values.</p>
+</li>
+</ul>
+<em>This help file applies to API documentation generated using the standard doclet.</em></div>
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li><a href="org/libjpegturbo/turbojpeg/package-tree.html">Tree</a></li>
+<li><a href="deprecated-list.html">Deprecated</a></li>
+<li><a href="index-all.html">Index</a></li>
+<li class="navBarCell1Rev">Help</li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="index.html?help-doc.html" target="_top">Frames</a></li>
+<li><a href="help-doc.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/index-all.html b/java/doc/index-all.html
new file mode 100644
index 0000000..fa92e3c
--- /dev/null
+++ b/java/doc/index-all.html
@@ -0,0 +1,924 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>Index</title>
+<link rel="stylesheet" type="text/css" href="./stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="Index";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="./org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li><a href="./org/libjpegturbo/turbojpeg/package-tree.html">Tree</a></li>
+<li><a href="./deprecated-list.html">Deprecated</a></li>
+<li class="navBarCell1Rev">Index</li>
+<li><a href="./help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="./index.html?index-all.html" target="_top">Frames</a></li>
+<li><a href="index-all.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="./allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<div class="contentContainer"><a href="#_B_">B</a>&nbsp;<a href="#_C_">C</a>&nbsp;<a href="#_D_">D</a>&nbsp;<a href="#_E_">E</a>&nbsp;<a href="#_F_">F</a>&nbsp;<a href="#_G_">G</a>&nbsp;<a href="#_H_">H</a>&nbsp;<a href="#_I_">I</a>&nbsp;<a href="#_J_">J</a>&nbsp;<a href="#_N_">N</a>&nbsp;<a href="#_O_">O</a>&nbsp;<a href="#_P_">P</a>&nbsp;<a href="#_S_">S</a>&nbsp;<a href="#_T_">T</a>&nbsp;<a href="#_Y_">Y</a>&nbsp;<a name="_B_">
+<!--   -->
+</a>
+<h2 class="title">B</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#bufSize(int, int, int)">bufSize(int, int, int)</a></span> - Static method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">Returns the maximum size of the buffer (in bytes) required to hold a JPEG
+ image with the given width, height, and level of chrominance subsampling.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)">bufSizeYUV(int, int, int, int)</a></span> - Static method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">Returns the size of the buffer (in bytes) required to hold a YUV planar
+ image with the given width, height, and level of chrominance subsampling.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)">bufSizeYUV(int, int, int)</a></span> - Static method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use <a href="./org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><code>TJ.bufSizeYUV(int, int, int, int)</code></a> instead.</i></div>
+</div>
+</dd>
+</dl>
+<a name="_C_">
+<!--   -->
+</a>
+<h2 class="title">C</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#cf">cf</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Custom filter instance</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#close()">close()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Free the native structures associated with this compressor instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#close()">close()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Free the native structures associated with this decompressor instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#compress(byte[], int)">compress(byte[], int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Compress the uncompressed source image associated with this compressor
+ instance and output a JPEG image to the given destination buffer.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#compress(int)">compress(int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Compress the uncompressed source image associated with this compressor
+ instance and return a buffer containing a JPEG image.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#compress(java.awt.image.BufferedImage, byte[], int)">compress(BufferedImage, byte[], int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use
+ <a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>TJCompressor.setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#compress(byte[], int)"><code>TJCompressor.compress(byte[], int)</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#compress(java.awt.image.BufferedImage, int)">compress(BufferedImage, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use
+ <a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>TJCompressor.setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#compress(int)"><code>TJCompressor.compress(int)</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#CS_CMYK">CS_CMYK</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">CMYK colorspace.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#CS_GRAY">CS_GRAY</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">Grayscale colorspace.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#CS_RGB">CS_RGB</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">RGB colorspace.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#CS_YCbCr">CS_YCbCr</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">YCbCr colorspace.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#CS_YCCK">CS_YCCK</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">YCCK colorspace.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCustomFilter.html#customFilter(java.nio.ShortBuffer, java.awt.Rectangle, java.awt.Rectangle, int, int, org.libjpegturbo.turbojpeg.TJTransform)">customFilter(ShortBuffer, Rectangle, Rectangle, int, int, TJTransform)</a></span> - Method in interface org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg">TJCustomFilter</a></dt>
+<dd>
+<div class="block">A callback function that can be used to modify the DCT coefficients after
+ they are losslessly transformed but before they are transcoded to a new
+ JPEG image.</div>
+</dd>
+</dl>
+<a name="_D_">
+<!--   -->
+</a>
+<h2 class="title">D</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)">decompress(byte[], int, int, int, int, int, int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Decompress the JPEG source image or decode the YUV source image associated
+ with this decompressor instance and output a grayscale, RGB, or CMYK image
+ to the given destination buffer.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)">decompress(byte[], int, int, int, int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use
+ <a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><code>TJDecompressor.decompress(byte[], int, int, int, int, int, int, int)</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int, int)">decompress(int, int, int, int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Decompress the JPEG source image associated with this decompressor
+ instance and return a buffer containing the decompressed image.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int[], int, int, int, int, int, int, int)">decompress(int[], int, int, int, int, int, int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Decompress the JPEG source image or decode the YUV source image associated
+ with this decompressor instance and output a grayscale, RGB, or CMYK image
+ to the given destination buffer.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(java.awt.image.BufferedImage, int)">decompress(BufferedImage, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Decompress the JPEG source image or decode the YUV source image associated
+ with this decompressor instance and output a decompressed/decoded image to
+ the given <code>BufferedImage</code> instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int)">decompress(int, int, int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Decompress the JPEG source image or decode the YUV source image associated
+ with this decompressor instance and return a <code>BufferedImage</code>
+ instance containing the decompressed/decoded image.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(org.libjpegturbo.turbojpeg.YUVImage, int)">decompressToYUV(YUVImage, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Decompress the JPEG source image associated with this decompressor
+ instance into a YUV planar image and store it in the given
+ <code>YUVImage</code> instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)">decompressToYUV(byte[], int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use <a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(org.libjpegturbo.turbojpeg.YUVImage, int)"><code>TJDecompressor.decompressToYUV(YUVImage, int)</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)">decompressToYUV(int, int, int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Decompress the JPEG source image associated with this decompressor
+ instance into a YUV planar image and return a <code>YUVImage</code>
+ instance containing the decompressed image.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int)">decompressToYUV(int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use <a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)"><code>TJDecompressor.decompressToYUV(int, int, int, int)</code></a> instead.</i></div>
+</div>
+</dd>
+</dl>
+<a name="_E_">
+<!--   -->
+</a>
+<h2 class="title">E</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(org.libjpegturbo.turbojpeg.YUVImage, int)">encodeYUV(YUVImage, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Encode the uncompressed source image associated with this compressor
+ instance into a YUV planar image and store it in the given
+ <code>YUVImage</code> instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(byte[], int)">encodeYUV(byte[], int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use <a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(org.libjpegturbo.turbojpeg.YUVImage, int)"><code>TJCompressor.encodeYUV(YUVImage, int)</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int, int)">encodeYUV(int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Encode the uncompressed source image associated with this compressor
+ instance into a YUV planar image and return a <code>YUVImage</code>
+ instance containing the encoded image.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int)">encodeYUV(int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use <a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int, int)"><code>TJCompressor.encodeYUV(int, int)</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(java.awt.image.BufferedImage, byte[], int)">encodeYUV(BufferedImage, byte[], int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use
+ <a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>TJCompressor.setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(byte[], int)"><code>TJCompressor.encodeYUV(byte[], int)</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(java.awt.image.BufferedImage, int)">encodeYUV(BufferedImage, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use
+ <a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>TJCompressor.setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int)"><code>TJCompressor.encodeYUV(int)</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html#equals(org.libjpegturbo.turbojpeg.TJScalingFactor)">equals(TJScalingFactor)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</a></dt>
+<dd>
+<div class="block">Returns true or false, depending on whether this instance and
+ <code>other</code> have the same numerator and denominator.</div>
+</dd>
+</dl>
+<a name="_F_">
+<!--   -->
+</a>
+<h2 class="title">F</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#finalize()">finalize()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>&nbsp;</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#finalize()">finalize()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>&nbsp;</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_ACCURATEDCT">FLAG_ACCURATEDCT</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">Use the most accurate DCT/IDCT algorithm available in the underlying
+ codec.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP">FLAG_BOTTOMUP</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">The uncompressed source/destination image is stored in bottom-up (Windows,
+ OpenGL) order, not top-down (X11) order.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_FASTDCT">FLAG_FASTDCT</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">Use the fastest DCT/IDCT algorithm available in the underlying codec.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_FASTUPSAMPLE">FLAG_FASTUPSAMPLE</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">When decompressing an image that was compressed using chrominance
+ subsampling, use the fastest chrominance upsampling algorithm available in
+ the underlying codec.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCEMMX">FLAG_FORCEMMX</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span></div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE">FLAG_FORCESSE</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span></div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE2">FLAG_FORCESSE2</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span></div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE3">FLAG_FORCESSE3</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span></div>
+</dd>
+</dl>
+<a name="_G_">
+<!--   -->
+</a>
+<h2 class="title">G</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#getBlueOffset(int)">getBlueOffset(int)</a></span> - Static method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">For the given pixel format, returns the number of bytes that the blue
+ component is offset from the start of the pixel.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#getBuf()">getBuf()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>
+<div class="block">Returns the YUV image buffer</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getColorspace()">getColorspace()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Returns the colorspace used in the source image (JPEG or YUV) associated
+ with this decompressor instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#getCompressedSize()">getCompressedSize()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Returns the size of the image (in bytes) generated by the most recent
+ compress/encode operation.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html#getDenom()">getDenom()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</a></dt>
+<dd>
+<div class="block">Returns denominator</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#getGreenOffset(int)">getGreenOffset(int)</a></span> - Static method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">For the given pixel format, returns the number of bytes that the green
+ component is offset from the start of the pixel.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getHeight()">getHeight()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Returns the height of the source image (JPEG or YUV) associated with this
+ decompressor instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#getHeight()">getHeight()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>
+<div class="block">Returns the height of the YUV image.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGBuf()">getJPEGBuf()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use <a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceBuf()"><code>TJDecompressor.getSourceBuf()</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGSize()">getJPEGSize()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use <a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceSize()"><code>TJDecompressor.getSourceSize()</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#getMCUHeight(int)">getMCUHeight(int)</a></span> - Static method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">Returns the MCU block height for the given level of chrominance
+ subsampling.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#getMCUWidth(int)">getMCUWidth(int)</a></span> - Static method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">Returns the MCU block width for the given level of chrominance
+ subsampling.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html#getNum()">getNum()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</a></dt>
+<dd>
+<div class="block">Returns numerator</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#getPad()">getPad()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>
+<div class="block">Returns the line padding used in the YUV image buffer.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#getPixelSize(int)">getPixelSize(int)</a></span> - Static method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">Returns the pixel size (in bytes) for the given pixel format.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#getRedOffset(int)">getRedOffset(int)</a></span> - Static method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">For the given pixel format, returns the number of bytes that the red
+ component is offset from the start of the pixel.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html#getScaled(int)">getScaled(int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</a></dt>
+<dd>
+<div class="block">Returns the scaled value of <code>dimension</code>.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)">getScaledHeight(int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Returns the height of the largest scaled-down image that the TurboJPEG
+ decompressor can generate without exceeding the desired image width and
+ height.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledWidth(int, int)">getScaledWidth(int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Returns the width of the largest scaled-down image that the TurboJPEG
+ decompressor can generate without exceeding the desired image width and
+ height.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#getScalingFactors()">getScalingFactors()</a></span> - Static method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">Returns a list of fractional scaling factors that the JPEG decompressor in
+ this implementation of TurboJPEG supports.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#getSize()">getSize()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>
+<div class="block">Returns the size (in bytes) of the YUV image buffer</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceBuf()">getSourceBuf()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Returns the source image buffer associated with this decompressor
+ instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceSize()">getSourceSize()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Returns the size of the source image (in bytes) associated with this
+ decompressor instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getSubsamp()">getSubsamp()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Returns the level of chrominance subsampling used in the source image
+ (JPEG or YUV) associated with this decompressor instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#getSubsamp()">getSubsamp()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>
+<div class="block">Returns the level of chrominance subsampling used in the YUV image.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransformer.html#getTransformedSizes()">getTransformedSizes()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</a></dt>
+<dd>
+<div class="block">Returns an array containing the sizes of the transformed JPEG images
+ generated by the most recent transform operation.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getWidth()">getWidth()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Returns the width of the source image (JPEG or YUV) associated with this
+ decompressor instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#getWidth()">getWidth()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>
+<div class="block">Returns the width of the YUV image.</div>
+</dd>
+</dl>
+<a name="_H_">
+<!--   -->
+</a>
+<h2 class="title">H</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#handle">handle</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>&nbsp;</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#handle">handle</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>&nbsp;</dd>
+</dl>
+<a name="_I_">
+<!--   -->
+</a>
+<h2 class="title">I</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html#isOne()">isOne()</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</a></dt>
+<dd>
+<div class="block">Returns true or false, depending on whether this instance is equal to
+ 1/1.</div>
+</dd>
+</dl>
+<a name="_J_">
+<!--   -->
+</a>
+<h2 class="title">J</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegBuf">jpegBuf</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>&nbsp;</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegBufSize">jpegBufSize</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>&nbsp;</dd>
+</dl>
+<a name="_N_">
+<!--   -->
+</a>
+<h2 class="title">N</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#NUMCS">NUMCS</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">The number of JPEG colorspaces</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#NUMOP">NUMOP</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">The number of lossless transform operations</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#NUMPF">NUMPF</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">The number of pixel formats</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#NUMSAMP">NUMSAMP</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">The number of chrominance subsampling options</div>
+</dd>
+</dl>
+<a name="_O_">
+<!--   -->
+</a>
+<h2 class="title">O</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#op">op</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Transform operation (one of <code>OP_*</code>)</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OP_HFLIP">OP_HFLIP</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Flip (mirror) image horizontally.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OP_NONE">OP_NONE</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Do not transform the position of the image pixels.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OP_ROT180">OP_ROT180</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Rotate image 180 degrees.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OP_ROT270">OP_ROT270</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Rotate image counter-clockwise by 90 degrees.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OP_ROT90">OP_ROT90</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Rotate image clockwise by 90 degrees.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OP_TRANSPOSE">OP_TRANSPOSE</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Transpose image (flip/mirror along upper left to lower right axis).</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OP_TRANSVERSE">OP_TRANSVERSE</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Transverse transpose image (flip/mirror along upper right to lower left
+ axis).</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OP_VFLIP">OP_VFLIP</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Flip (mirror) image vertically.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OPT_CROP">OPT_CROP</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">This option will enable lossless cropping.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OPT_GRAY">OPT_GRAY</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">This option will discard the color data in the input image and produce
+ a grayscale output image.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OPT_NOOUTPUT">OPT_NOOUTPUT</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">This option will prevent <a href="./org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><code>TJTransformer.transform()</code></a> from outputting a JPEG image for this
+ particular transform.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OPT_PERFECT">OPT_PERFECT</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">This option will cause <a href="./org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><code>TJTransformer.transform()</code></a> to throw an exception if the transform is not
+ perfect.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#OPT_TRIM">OPT_TRIM</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">This option will discard any partial MCU blocks that cannot be
+ transformed.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#options">options</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Transform options (bitwise OR of one or more of <code>OPT_*</code>)</div>
+</dd>
+<dt><a href="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</a> - package org.libjpegturbo.turbojpeg</dt>
+<dd>&nbsp;</dd>
+</dl>
+<a name="_P_">
+<!--   -->
+</a>
+<h2 class="title">P</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#PF_ABGR">PF_ABGR</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">ABGR pixel format.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#PF_ARGB">PF_ARGB</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">ARGB pixel format.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#PF_BGR">PF_BGR</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">BGR pixel format.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#PF_BGRA">PF_BGRA</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">BGRA pixel format.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#PF_BGRX">PF_BGRX</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">BGRX pixel format.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#PF_CMYK">PF_CMYK</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">CMYK pixel format.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#PF_GRAY">PF_GRAY</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">Grayscale pixel format.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#PF_RGB">PF_RGB</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">RGB pixel format.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#PF_RGBA">PF_RGBA</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">RGBA pixel format.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#PF_RGBX">PF_RGBX</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">RGBX pixel format.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#PF_XBGR">PF_XBGR</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">XBGR pixel format.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#PF_XRGB">PF_XRGB</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">XRGB pixel format.</div>
+</dd>
+</dl>
+<a name="_S_">
+<!--   -->
+</a>
+<h2 class="title">S</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#SAMP_411">SAMP_411</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">4:1:1 chrominance subsampling.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#SAMP_420">SAMP_420</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">4:2:0 chrominance subsampling.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#SAMP_422">SAMP_422</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">4:2:2 chrominance subsampling.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#SAMP_440">SAMP_440</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">4:4:0 chrominance subsampling.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#SAMP_444">SAMP_444</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">4:4:4 chrominance subsampling (no chrominance subsampling).</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#SAMP_GRAY">SAMP_GRAY</a></span> - Static variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>
+<div class="block">Grayscale.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#setBuf(byte[], int, int, int, int)">setBuf(byte[], int, int, int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>
+<div class="block">Assign an existing YUV planar image buffer to this <code>YUVImage</code>
+ instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#setJPEGImage(byte[], int)">setJPEGImage(byte[], int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use <a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#setSourceImage(byte[], int)"><code>TJDecompressor.setSourceImage(byte[], int)</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#setJPEGQuality(int)">setJPEGQuality(int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Set the JPEG image quality level for subsequent compress operations.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)">setSourceImage(byte[], int, int, int, int, int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Associate an uncompressed RGB, grayscale, or CMYK source image with this
+ compressor instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int)">setSourceImage(byte[], int, int, int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use
+ <a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><code>TJCompressor.setSourceImage(byte[], int, int, int, int, int, int)</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)">setSourceImage(BufferedImage, int, int, int, int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Associate an uncompressed RGB or grayscale source image with this
+ compressor instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(org.libjpegturbo.turbojpeg.YUVImage)">setSourceImage(YUVImage)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Associate an uncompressed YUV planar source image with this compressor
+ instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#setSourceImage(byte[], int)">setSourceImage(byte[], int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Associate the JPEG image of length <code>imageSize</code> bytes stored in
+ <code>srcImage</code> with this decompressor instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#setSourceImage(org.libjpegturbo.turbojpeg.YUVImage)">setSourceImage(YUVImage)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Associate the specified YUV planar source image with this decompressor
+ instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSubsamp(int)">setSubsamp(int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Set the level of chrominance subsampling for subsequent compress/encode
+ operations.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#srcColorspace">srcColorspace</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>&nbsp;</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#srcHeight">srcHeight</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>&nbsp;</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#srcSubsamp">srcSubsamp</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>&nbsp;</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#srcWidth">srcWidth</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>&nbsp;</dd>
+</dl>
+<a name="_T_">
+<!--   -->
+</a>
+<h2 class="title">T</h2>
+<dl>
+<dt><a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJ</span></a> - Class in <a href="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</a></dt>
+<dd>
+<div class="block">TurboJPEG utility class (cannot be instantiated)</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJ.html#TJ()">TJ()</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></dt>
+<dd>&nbsp;</dd>
+<dt><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJCompressor</span></a> - Class in <a href="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</a></dt>
+<dd>
+<div class="block">TurboJPEG compressor</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor()">TJCompressor()</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Create a TurboJPEG compressor instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int, int, int)">TJCompressor(byte[], int, int, int, int, int, int)</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Create a TurboJPEG compressor instance and associate the uncompressed
+ source image stored in <code>srcImage</code> with the newly created
+ instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int)">TJCompressor(byte[], int, int, int, int)</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block"><span class="strong">Deprecated.</span>
+<div class="block"><i>Use
+ <a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int, int, int)"><code>TJCompressor.TJCompressor(byte[], int, int, int, int, int, int)</code></a> instead.</i></div>
+</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(java.awt.image.BufferedImage, int, int, int, int)">TJCompressor(BufferedImage, int, int, int, int)</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></dt>
+<dd>
+<div class="block">Create a TurboJPEG compressor instance and associate the uncompressed
+ source image stored in <code>srcImage</code> with the newly created
+ instance.</div>
+</dd>
+<dt><a href="./org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg"><span class="strong">TJCustomFilter</span></a> - Interface in <a href="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</a></dt>
+<dd>
+<div class="block">Custom filter callback interface</div>
+</dd>
+<dt><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJDecompressor</span></a> - Class in <a href="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</a></dt>
+<dd>
+<div class="block">TurboJPEG decompressor</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#TJDecompressor()">TJDecompressor()</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Create a TurboJPEG decompresssor instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#TJDecompressor(byte[])">TJDecompressor(byte[])</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Create a TurboJPEG decompressor instance and associate the JPEG source
+ image stored in <code>jpegImage</code> with the newly created instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#TJDecompressor(byte[], int)">TJDecompressor(byte[], int)</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Create a TurboJPEG decompressor instance and associate the JPEG source
+ image of length <code>imageSize</code> bytes stored in
+ <code>jpegImage</code> with the newly created instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#TJDecompressor(org.libjpegturbo.turbojpeg.YUVImage)">TJDecompressor(YUVImage)</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>
+<div class="block">Create a TurboJPEG decompressor instance and associate the YUV planar
+ source image stored in <code>yuvImage</code> with the newly created
+ instance.</div>
+</dd>
+<dt><a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJScalingFactor</span></a> - Class in <a href="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</a></dt>
+<dd>
+<div class="block">Fractional scaling factor</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html#TJScalingFactor(int, int)">TJScalingFactor(int, int)</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</a></dt>
+<dd>&nbsp;</dd>
+<dt><a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJTransform</span></a> - Class in <a href="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</a></dt>
+<dd>
+<div class="block">Lossless transform parameters</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#TJTransform()">TJTransform()</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Create a new lossless transform instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#TJTransform(int, int, int, int, int, int, org.libjpegturbo.turbojpeg.TJCustomFilter)">TJTransform(int, int, int, int, int, int, TJCustomFilter)</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Create a new lossless transform instance with the given parameters.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransform.html#TJTransform(java.awt.Rectangle, int, int, org.libjpegturbo.turbojpeg.TJCustomFilter)">TJTransform(Rectangle, int, int, TJCustomFilter)</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></dt>
+<dd>
+<div class="block">Create a new lossless transform instance with the given parameters.</div>
+</dd>
+<dt><a href="./org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJTransformer</span></a> - Class in <a href="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</a></dt>
+<dd>
+<div class="block">TurboJPEG lossless transformer</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransformer.html#TJTransformer()">TJTransformer()</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</a></dt>
+<dd>
+<div class="block">Create a TurboJPEG lossless transformer instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransformer.html#TJTransformer(byte[])">TJTransformer(byte[])</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</a></dt>
+<dd>
+<div class="block">Create a TurboJPEG lossless transformer instance and associate the JPEG
+ image stored in <code>jpegImage</code> with the newly created instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransformer.html#TJTransformer(byte[], int)">TJTransformer(byte[], int)</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</a></dt>
+<dd>
+<div class="block">Create a TurboJPEG lossless transformer instance and associate the JPEG
+ image of length <code>imageSize</code> bytes stored in
+ <code>jpegImage</code> with the newly created instance.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)">transform(byte[][], TJTransform[], int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</a></dt>
+<dd>
+<div class="block">Losslessly transform the JPEG image associated with this transformer
+ instance into one or more JPEG images stored in the given destination
+ buffers.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJTransformer.html#transform(org.libjpegturbo.turbojpeg.TJTransform[], int)">transform(TJTransform[], int)</a></span> - Method in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</a></dt>
+<dd>
+<div class="block">Losslessly transform the JPEG image associated with this transformer
+ instance and return an array of <a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><code>TJDecompressor</code></a> instances, each of
+ which has a transformed JPEG image associated with it.</div>
+</dd>
+</dl>
+<a name="_Y_">
+<!--   -->
+</a>
+<h2 class="title">Y</h2>
+<dl>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#yuvBuf">yuvBuf</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>&nbsp;</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#yuvHeight">yuvHeight</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>&nbsp;</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html#yuvImage">yuvImage</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></dt>
+<dd>&nbsp;</dd>
+<dt><a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">YUVImage</span></a> - Class in <a href="./org/libjpegturbo/turbojpeg/package-summary.html">org.libjpegturbo.turbojpeg</a></dt>
+<dd>
+<div class="block">This class encapsulates a YUV planar image buffer and the metadata
+ associated with it.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#YUVImage(int, int, int, int)">YUVImage(int, int, int, int)</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>
+<div class="block">Create a <code>YUVImage</code> instance with a new image buffer.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#YUVImage(byte[], int, int, int, int)">YUVImage(byte[], int, int, int, int)</a></span> - Constructor for class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>
+<div class="block">Create a <code>YUVImage</code> instance from an existing YUV planar image
+ buffer.</div>
+</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#yuvPad">yuvPad</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>&nbsp;</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#yuvSubsamp">yuvSubsamp</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>&nbsp;</dd>
+<dt><span class="strong"><a href="./org/libjpegturbo/turbojpeg/YUVImage.html#yuvWidth">yuvWidth</a></span> - Variable in class org.libjpegturbo.turbojpeg.<a href="./org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></dt>
+<dd>&nbsp;</dd>
+</dl>
+<a href="#_B_">B</a>&nbsp;<a href="#_C_">C</a>&nbsp;<a href="#_D_">D</a>&nbsp;<a href="#_E_">E</a>&nbsp;<a href="#_F_">F</a>&nbsp;<a href="#_G_">G</a>&nbsp;<a href="#_H_">H</a>&nbsp;<a href="#_I_">I</a>&nbsp;<a href="#_J_">J</a>&nbsp;<a href="#_N_">N</a>&nbsp;<a href="#_O_">O</a>&nbsp;<a href="#_P_">P</a>&nbsp;<a href="#_S_">S</a>&nbsp;<a href="#_T_">T</a>&nbsp;<a href="#_Y_">Y</a>&nbsp;</div>
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="./org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li><a href="./org/libjpegturbo/turbojpeg/package-tree.html">Tree</a></li>
+<li><a href="./deprecated-list.html">Deprecated</a></li>
+<li class="navBarCell1Rev">Index</li>
+<li><a href="./help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="./index.html?index-all.html" target="_top">Frames</a></li>
+<li><a href="index-all.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="./allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/index.html b/java/doc/index.html
new file mode 100644
index 0000000..25a639d
--- /dev/null
+++ b/java/doc/index.html
@@ -0,0 +1,63 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>Generated Documentation (Untitled)</title>
+<script type="text/javascript">
+    targetPage = "" + window.location.search;
+    if (targetPage != "" && targetPage != "undefined")
+        targetPage = targetPage.substring(1);
+    if (targetPage.indexOf(":") != -1 || (targetPage != "" && !validURL(targetPage)))
+        targetPage = "undefined";
+    function validURL(url) {
+        var pos = url.indexOf(".html");
+        if (pos == -1 || pos != url.length - 5)
+            return false;
+        var allowNumber = false;
+        var allowSep = false;
+        var seenDot = false;
+        for (var i = 0; i < url.length - 5; i++) {
+            var ch = url.charAt(i);
+            if ('a' <= ch && ch <= 'z' ||
+                    'A' <= ch && ch <= 'Z' ||
+                    ch == '$' ||
+                    ch == '_') {
+                allowNumber = true;
+                allowSep = true;
+            } else if ('0' <= ch && ch <= '9'
+                    || ch == '-') {
+                if (!allowNumber)
+                     return false;
+            } else if (ch == '/' || ch == '.') {
+                if (!allowSep)
+                    return false;
+                allowNumber = false;
+                allowSep = false;
+                if (ch == '.')
+                     seenDot = true;
+                if (ch == '/' && seenDot)
+                     return false;
+            } else {
+                return false;
+            }
+        }
+        return true;
+    }
+    function loadFrames() {
+        if (targetPage != "" && targetPage != "undefined")
+             top.classFrame.location = top.targetPage;
+    }
+</script>
+</head>
+<frameset cols="20%,80%" title="Documentation frame" onload="top.loadFrames()">
+<frame src="allclasses-frame.html" name="packageFrame" title="All classes and interfaces (except non-static nested types)">
+<frame src="org/libjpegturbo/turbojpeg/package-summary.html" name="classFrame" title="Package, class and interface descriptions" scrolling="yes">
+<noframes>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<h2>Frame Alert</h2>
+<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="org/libjpegturbo/turbojpeg/package-summary.html">Non-frame version</a>.</p>
+</noframes>
+</frameset>
+</html>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJ.html b/java/doc/org/libjpegturbo/turbojpeg/TJ.html
new file mode 100644
index 0000000..ee22e76
--- /dev/null
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJ.html
@@ -0,0 +1,1201 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>TJ</title>
+<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="TJ";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev Class</li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJ.html" target="_top">Frames</a></li>
+<li><a href="TJ.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li><a href="#field_summary">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li><a href="#field_detail">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">org.libjpegturbo.turbojpeg</div>
+<h2 title="Class TJ" class="title">Class TJ</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>org.libjpegturbo.turbojpeg.TJ</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public final class <span class="strong">TJ</span>
+extends java.lang.Object</pre>
+<div class="block">TurboJPEG utility class (cannot be instantiated)</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- =========== FIELD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field_summary">
+<!--   -->
+</a>
+<h3>Field Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Field Summary table, listing fields, and an explanation">
+<caption><span>Fields</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Field and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_CMYK">CS_CMYK</a></strong></code>
+<div class="block">CMYK colorspace.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_GRAY">CS_GRAY</a></strong></code>
+<div class="block">Grayscale colorspace.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_RGB">CS_RGB</a></strong></code>
+<div class="block">RGB colorspace.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_YCbCr">CS_YCbCr</a></strong></code>
+<div class="block">YCbCr colorspace.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_YCCK">CS_YCCK</a></strong></code>
+<div class="block">YCCK colorspace.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_ACCURATEDCT">FLAG_ACCURATEDCT</a></strong></code>
+<div class="block">Use the most accurate DCT/IDCT algorithm available in the underlying
+ codec.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP">FLAG_BOTTOMUP</a></strong></code>
+<div class="block">The uncompressed source/destination image is stored in bottom-up (Windows,
+ OpenGL) order, not top-down (X11) order.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_FASTDCT">FLAG_FASTDCT</a></strong></code>
+<div class="block">Use the fastest DCT/IDCT algorithm available in the underlying codec.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_FASTUPSAMPLE">FLAG_FASTUPSAMPLE</a></strong></code>
+<div class="block">When decompressing an image that was compressed using chrominance
+ subsampling, use the fastest chrominance upsampling algorithm available in
+ the underlying codec.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCEMMX">FLAG_FORCEMMX</a></strong></code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE">FLAG_FORCESSE</a></strong></code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE2">FLAG_FORCESSE2</a></strong></code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE3">FLAG_FORCESSE3</a></strong></code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#NUMCS">NUMCS</a></strong></code>
+<div class="block">The number of JPEG colorspaces</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#NUMPF">NUMPF</a></strong></code>
+<div class="block">The number of pixel formats</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#NUMSAMP">NUMSAMP</a></strong></code>
+<div class="block">The number of chrominance subsampling options</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_ABGR">PF_ABGR</a></strong></code>
+<div class="block">ABGR pixel format.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_ARGB">PF_ARGB</a></strong></code>
+<div class="block">ARGB pixel format.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_BGR">PF_BGR</a></strong></code>
+<div class="block">BGR pixel format.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_BGRA">PF_BGRA</a></strong></code>
+<div class="block">BGRA pixel format.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_BGRX">PF_BGRX</a></strong></code>
+<div class="block">BGRX pixel format.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_CMYK">PF_CMYK</a></strong></code>
+<div class="block">CMYK pixel format.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_GRAY">PF_GRAY</a></strong></code>
+<div class="block">Grayscale pixel format.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGB">PF_RGB</a></strong></code>
+<div class="block">RGB pixel format.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGBA">PF_RGBA</a></strong></code>
+<div class="block">RGBA pixel format.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGBX">PF_RGBX</a></strong></code>
+<div class="block">RGBX pixel format.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_XBGR">PF_XBGR</a></strong></code>
+<div class="block">XBGR pixel format.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_XRGB">PF_XRGB</a></strong></code>
+<div class="block">XRGB pixel format.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_411">SAMP_411</a></strong></code>
+<div class="block">4:1:1 chrominance subsampling.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_420">SAMP_420</a></strong></code>
+<div class="block">4:2:0 chrominance subsampling.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_422">SAMP_422</a></strong></code>
+<div class="block">4:2:2 chrominance subsampling.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_440">SAMP_440</a></strong></code>
+<div class="block">4:4:0 chrominance subsampling.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_444">SAMP_444</a></strong></code>
+<div class="block">4:4:4 chrominance subsampling (no chrominance subsampling).</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_GRAY">SAMP_GRAY</a></strong></code>
+<div class="block">Grayscale.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#TJ()">TJ</a></strong>()</code>&nbsp;</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span>Methods</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSize(int, int, int)">bufSize</a></strong>(int&nbsp;width,
+       int&nbsp;height,
+       int&nbsp;jpegSubsamp)</code>
+<div class="block">Returns the maximum size of the buffer (in bytes) required to hold a JPEG
+ image with the given width, height, and level of chrominance subsampling.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)">bufSizeYUV</a></strong>(int&nbsp;width,
+          int&nbsp;height,
+          int&nbsp;subsamp)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><code>bufSizeYUV(int, int, int, int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)">bufSizeYUV</a></strong>(int&nbsp;width,
+          int&nbsp;pad,
+          int&nbsp;height,
+          int&nbsp;subsamp)</code>
+<div class="block">Returns the size of the buffer (in bytes) required to hold a YUV planar
+ image with the given width, height, and level of chrominance subsampling.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getBlueOffset(int)">getBlueOffset</a></strong>(int&nbsp;pixelFormat)</code>
+<div class="block">For the given pixel format, returns the number of bytes that the blue
+ component is offset from the start of the pixel.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getGreenOffset(int)">getGreenOffset</a></strong>(int&nbsp;pixelFormat)</code>
+<div class="block">For the given pixel format, returns the number of bytes that the green
+ component is offset from the start of the pixel.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getMCUHeight(int)">getMCUHeight</a></strong>(int&nbsp;subsamp)</code>
+<div class="block">Returns the MCU block height for the given level of chrominance
+ subsampling.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getMCUWidth(int)">getMCUWidth</a></strong>(int&nbsp;subsamp)</code>
+<div class="block">Returns the MCU block width for the given level of chrominance
+ subsampling.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getPixelSize(int)">getPixelSize</a></strong>(int&nbsp;pixelFormat)</code>
+<div class="block">Returns the pixel size (in bytes) for the given pixel format.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getRedOffset(int)">getRedOffset</a></strong>(int&nbsp;pixelFormat)</code>
+<div class="block">For the given pixel format, returns the number of bytes that the red
+ component is offset from the start of the pixel.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static <a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</a>[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getScalingFactors()">getScalingFactors</a></strong>()</code>
+<div class="block">Returns a list of fractional scaling factors that the JPEG decompressor in
+ this implementation of TurboJPEG supports.</div>
+</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ============ FIELD DETAIL =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field_detail">
+<!--   -->
+</a>
+<h3>Field Detail</h3>
+<a name="NUMSAMP">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>NUMSAMP</h4>
+<pre>public static final&nbsp;int NUMSAMP</pre>
+<div class="block">The number of chrominance subsampling options</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.NUMSAMP">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="SAMP_444">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>SAMP_444</h4>
+<pre>public static final&nbsp;int SAMP_444</pre>
+<div class="block">4:4:4 chrominance subsampling (no chrominance subsampling).  The JPEG
+ or YUV image will contain one chrominance component for every pixel in the
+ source image.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.SAMP_444">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="SAMP_422">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>SAMP_422</h4>
+<pre>public static final&nbsp;int SAMP_422</pre>
+<div class="block">4:2:2 chrominance subsampling.  The JPEG or YUV image will contain one
+ chrominance component for every 2x1 block of pixels in the source image.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.SAMP_422">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="SAMP_420">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>SAMP_420</h4>
+<pre>public static final&nbsp;int SAMP_420</pre>
+<div class="block">4:2:0 chrominance subsampling.  The JPEG or YUV image will contain one
+ chrominance component for every 2x2 block of pixels in the source image.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.SAMP_420">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="SAMP_GRAY">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>SAMP_GRAY</h4>
+<pre>public static final&nbsp;int SAMP_GRAY</pre>
+<div class="block">Grayscale.  The JPEG or YUV image will contain no chrominance components.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.SAMP_GRAY">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="SAMP_440">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>SAMP_440</h4>
+<pre>public static final&nbsp;int SAMP_440</pre>
+<div class="block">4:4:0 chrominance subsampling.  The JPEG or YUV image will contain one
+ chrominance component for every 1x2 block of pixels in the source image.
+ Note that 4:4:0 subsampling is not fully accelerated in libjpeg-turbo.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.SAMP_440">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="SAMP_411">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>SAMP_411</h4>
+<pre>public static final&nbsp;int SAMP_411</pre>
+<div class="block">4:1:1 chrominance subsampling.  The JPEG or YUV image will contain one
+ chrominance component for every 4x1 block of pixels in the source image.
+ JPEG images compressed with 4:1:1 subsampling will be almost exactly the
+ same size as those compressed with 4:2:0 subsampling, and in the
+ aggregate, both subsampling methods produce approximately the same
+ perceptual quality.  However, 4:1:1 is better able to reproduce sharp
+ horizontal features.  Note that 4:1:1 subsampling is not fully accelerated
+ in libjpeg-turbo.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.SAMP_411">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="NUMPF">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>NUMPF</h4>
+<pre>public static final&nbsp;int NUMPF</pre>
+<div class="block">The number of pixel formats</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.NUMPF">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="PF_RGB">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>PF_RGB</h4>
+<pre>public static final&nbsp;int PF_RGB</pre>
+<div class="block">RGB pixel format.  The red, green, and blue components in the image are
+ stored in 3-byte pixels in the order R, G, B from lowest to highest byte
+ address within each pixel.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_RGB">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="PF_BGR">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>PF_BGR</h4>
+<pre>public static final&nbsp;int PF_BGR</pre>
+<div class="block">BGR pixel format.  The red, green, and blue components in the image are
+ stored in 3-byte pixels in the order B, G, R from lowest to highest byte
+ address within each pixel.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_BGR">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="PF_RGBX">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>PF_RGBX</h4>
+<pre>public static final&nbsp;int PF_RGBX</pre>
+<div class="block">RGBX pixel format.  The red, green, and blue components in the image are
+ stored in 4-byte pixels in the order R, G, B from lowest to highest byte
+ address within each pixel.  The X component is ignored when compressing
+ and undefined when decompressing.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_RGBX">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="PF_BGRX">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>PF_BGRX</h4>
+<pre>public static final&nbsp;int PF_BGRX</pre>
+<div class="block">BGRX pixel format.  The red, green, and blue components in the image are
+ stored in 4-byte pixels in the order B, G, R from lowest to highest byte
+ address within each pixel.  The X component is ignored when compressing
+ and undefined when decompressing.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_BGRX">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="PF_XBGR">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>PF_XBGR</h4>
+<pre>public static final&nbsp;int PF_XBGR</pre>
+<div class="block">XBGR pixel format.  The red, green, and blue components in the image are
+ stored in 4-byte pixels in the order R, G, B from highest to lowest byte
+ address within each pixel.  The X component is ignored when compressing
+ and undefined when decompressing.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_XBGR">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="PF_XRGB">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>PF_XRGB</h4>
+<pre>public static final&nbsp;int PF_XRGB</pre>
+<div class="block">XRGB pixel format.  The red, green, and blue components in the image are
+ stored in 4-byte pixels in the order B, G, R from highest to lowest byte
+ address within each pixel.  The X component is ignored when compressing
+ and undefined when decompressing.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_XRGB">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="PF_GRAY">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>PF_GRAY</h4>
+<pre>public static final&nbsp;int PF_GRAY</pre>
+<div class="block">Grayscale pixel format.  Each 1-byte pixel represents a luminance
+ (brightness) level from 0 to 255.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_GRAY">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="PF_RGBA">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>PF_RGBA</h4>
+<pre>public static final&nbsp;int PF_RGBA</pre>
+<div class="block">RGBA pixel format.  This is the same as <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGBX"><code>PF_RGBX</code></a>, except that when
+ decompressing, the X byte is guaranteed to be 0xFF, which can be
+ interpreted as an opaque alpha channel.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_RGBA">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="PF_BGRA">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>PF_BGRA</h4>
+<pre>public static final&nbsp;int PF_BGRA</pre>
+<div class="block">BGRA pixel format.  This is the same as <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_BGRX"><code>PF_BGRX</code></a>, except that when
+ decompressing, the X byte is guaranteed to be 0xFF, which can be
+ interpreted as an opaque alpha channel.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_BGRA">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="PF_ABGR">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>PF_ABGR</h4>
+<pre>public static final&nbsp;int PF_ABGR</pre>
+<div class="block">ABGR pixel format.  This is the same as <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_XBGR"><code>PF_XBGR</code></a>, except that when
+ decompressing, the X byte is guaranteed to be 0xFF, which can be
+ interpreted as an opaque alpha channel.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_ABGR">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="PF_ARGB">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>PF_ARGB</h4>
+<pre>public static final&nbsp;int PF_ARGB</pre>
+<div class="block">ARGB pixel format.  This is the same as <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_XRGB"><code>PF_XRGB</code></a>, except that when
+ decompressing, the X byte is guaranteed to be 0xFF, which can be
+ interpreted as an opaque alpha channel.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_ARGB">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="PF_CMYK">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>PF_CMYK</h4>
+<pre>public static final&nbsp;int PF_CMYK</pre>
+<div class="block">CMYK pixel format.  Unlike RGB, which is an additive color model used
+ primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive
+ color model used primarily for printing.  In the CMYK color model, the
+ value of each color component typically corresponds to an amount of cyan,
+ magenta, yellow, or black ink that is applied to a white background.  In
+ order to convert between CMYK and RGB, it is necessary to use a color
+ management system (CMS.)  A CMS will attempt to map colors within the
+ printer's gamut to perceptually similar colors in the display's gamut and
+ vice versa, but the mapping is typically not 1:1 or reversible, nor can it
+ be defined with a simple formula.  Thus, such a conversion is out of scope
+ for a codec library.  However, the TurboJPEG API allows for compressing
+ CMYK pixels into a YCCK JPEG image (see <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_YCCK"><code>CS_YCCK</code></a>) and
+ decompressing YCCK JPEG images into CMYK pixels.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_CMYK">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="NUMCS">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>NUMCS</h4>
+<pre>public static final&nbsp;int NUMCS</pre>
+<div class="block">The number of JPEG colorspaces</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.NUMCS">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="CS_RGB">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>CS_RGB</h4>
+<pre>public static final&nbsp;int CS_RGB</pre>
+<div class="block">RGB colorspace.  When compressing the JPEG image, the R, G, and B
+ components in the source image are reordered into image planes, but no
+ colorspace conversion or subsampling is performed.  RGB JPEG images can be
+ decompressed to any of the extended RGB pixel formats or grayscale, but
+ they cannot be decompressed to YUV images.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.CS_RGB">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="CS_YCbCr">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>CS_YCbCr</h4>
+<pre>public static final&nbsp;int CS_YCbCr</pre>
+<div class="block">YCbCr colorspace.  YCbCr is not an absolute colorspace but rather a
+ mathematical transformation of RGB designed solely for storage and
+ transmission.  YCbCr images must be converted to RGB before they can
+ actually be displayed.  In the YCbCr colorspace, the Y (luminance)
+ component represents the black & white portion of the original image, and
+ the Cb and Cr (chrominance) components represent the color portion of the
+ original image.  Originally, the analog equivalent of this transformation
+ allowed the same signal to drive both black & white and color televisions,
+ but JPEG images use YCbCr primarily because it allows the color data to be
+ optionally subsampled for the purposes of reducing bandwidth or disk
+ space.  YCbCr is the most common JPEG colorspace, and YCbCr JPEG images
+ can be compressed from and decompressed to any of the extended RGB pixel
+ formats or grayscale, or they can be decompressed to YUV planar images.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.CS_YCbCr">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="CS_GRAY">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>CS_GRAY</h4>
+<pre>public static final&nbsp;int CS_GRAY</pre>
+<div class="block">Grayscale colorspace.  The JPEG image retains only the luminance data (Y
+ component), and any color data from the source image is discarded.
+ Grayscale JPEG images can be compressed from and decompressed to any of
+ the extended RGB pixel formats or grayscale, or they can be decompressed
+ to YUV planar images.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.CS_GRAY">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="CS_CMYK">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>CS_CMYK</h4>
+<pre>public static final&nbsp;int CS_CMYK</pre>
+<div class="block">CMYK colorspace.  When compressing the JPEG image, the C, M, Y, and K
+ components in the source image are reordered into image planes, but no
+ colorspace conversion or subsampling is performed.  CMYK JPEG images can
+ only be decompressed to CMYK pixels.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.CS_CMYK">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="CS_YCCK">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>CS_YCCK</h4>
+<pre>public static final&nbsp;int CS_YCCK</pre>
+<div class="block">YCCK colorspace.  YCCK (AKA "YCbCrK") is not an absolute colorspace but
+ rather a mathematical transformation of CMYK designed solely for storage
+ and transmission.  It is to CMYK as YCbCr is to RGB.  CMYK pixels can be
+ reversibly transformed into YCCK, and as with YCbCr, the chrominance
+ components in the YCCK pixels can be subsampled without incurring major
+ perceptual loss.  YCCK JPEG images can only be compressed from and
+ decompressed to CMYK pixels.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.CS_YCCK">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="FLAG_BOTTOMUP">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>FLAG_BOTTOMUP</h4>
+<pre>public static final&nbsp;int FLAG_BOTTOMUP</pre>
+<div class="block">The uncompressed source/destination image is stored in bottom-up (Windows,
+ OpenGL) order, not top-down (X11) order.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_BOTTOMUP">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="FLAG_FORCEMMX">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>FLAG_FORCEMMX</h4>
+<pre>@Deprecated
+public static final&nbsp;int FLAG_FORCEMMX</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_FORCEMMX">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="FLAG_FORCESSE">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>FLAG_FORCESSE</h4>
+<pre>@Deprecated
+public static final&nbsp;int FLAG_FORCESSE</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="FLAG_FORCESSE2">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>FLAG_FORCESSE2</h4>
+<pre>@Deprecated
+public static final&nbsp;int FLAG_FORCESSE2</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE2">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="FLAG_FORCESSE3">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>FLAG_FORCESSE3</h4>
+<pre>@Deprecated
+public static final&nbsp;int FLAG_FORCESSE3</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE3">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="FLAG_FASTUPSAMPLE">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>FLAG_FASTUPSAMPLE</h4>
+<pre>public static final&nbsp;int FLAG_FASTUPSAMPLE</pre>
+<div class="block">When decompressing an image that was compressed using chrominance
+ subsampling, use the fastest chrominance upsampling algorithm available in
+ the underlying codec.  The default is to use smooth upsampling, which
+ creates a smooth transition between neighboring chrominance components in
+ order to reduce upsampling artifacts in the decompressed image.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_FASTUPSAMPLE">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="FLAG_FASTDCT">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>FLAG_FASTDCT</h4>
+<pre>public static final&nbsp;int FLAG_FASTDCT</pre>
+<div class="block">Use the fastest DCT/IDCT algorithm available in the underlying codec.  The
+ default if this flag is not specified is implementation-specific.  For
+ example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast
+ algorithm by default when compressing, because this has been shown to have
+ only a very slight effect on accuracy, but it uses the accurate algorithm
+ when decompressing, because this has been shown to have a larger effect.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_FASTDCT">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="FLAG_ACCURATEDCT">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>FLAG_ACCURATEDCT</h4>
+<pre>public static final&nbsp;int FLAG_ACCURATEDCT</pre>
+<div class="block">Use the most accurate DCT/IDCT algorithm available in the underlying
+ codec.  The default if this flag is not specified is
+ implementation-specific.  For example, the implementation of TurboJPEG for
+ libjpeg[-turbo] uses the fast algorithm by default when compressing,
+ because this has been shown to have only a very slight effect on accuracy,
+ but it uses the accurate algorithm when decompressing, because this has
+ been shown to have a larger effect.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_ACCURATEDCT">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="TJ()">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>TJ</h4>
+<pre>public&nbsp;TJ()</pre>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="getMCUWidth(int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getMCUWidth</h4>
+<pre>public static&nbsp;int&nbsp;getMCUWidth(int&nbsp;subsamp)
+                       throws java.lang.Exception</pre>
+<div class="block">Returns the MCU block width for the given level of chrominance
+ subsampling.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>subsamp</code> - the level of chrominance subsampling (one of
+ <code>SAMP_*</code>)</dd>
+<dt><span class="strong">Returns:</span></dt><dd>the MCU block width for the given level of chrominance subsampling</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getMCUHeight(int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getMCUHeight</h4>
+<pre>public static&nbsp;int&nbsp;getMCUHeight(int&nbsp;subsamp)
+                        throws java.lang.Exception</pre>
+<div class="block">Returns the MCU block height for the given level of chrominance
+ subsampling.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>subsamp</code> - the level of chrominance subsampling (one of
+ <code>SAMP_*</code>)</dd>
+<dt><span class="strong">Returns:</span></dt><dd>the MCU block height for the given level of chrominance
+ subsampling</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getPixelSize(int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getPixelSize</h4>
+<pre>public static&nbsp;int&nbsp;getPixelSize(int&nbsp;pixelFormat)
+                        throws java.lang.Exception</pre>
+<div class="block">Returns the pixel size (in bytes) for the given pixel format.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>pixelFormat</code> - the pixel format (one of <code>PF_*</code>)</dd>
+<dt><span class="strong">Returns:</span></dt><dd>the pixel size (in bytes) for the given pixel format</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getRedOffset(int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getRedOffset</h4>
+<pre>public static&nbsp;int&nbsp;getRedOffset(int&nbsp;pixelFormat)
+                        throws java.lang.Exception</pre>
+<div class="block">For the given pixel format, returns the number of bytes that the red
+ component is offset from the start of the pixel.  For instance, if a pixel
+ of format <code>TJ.PF_BGRX</code> is stored in <code>char pixel[]</code>,
+ then the red component will be
+ <code>pixel[TJ.getRedOffset(TJ.PF_BGRX)]</code>.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>pixelFormat</code> - the pixel format (one of <code>PF_*</code>)</dd>
+<dt><span class="strong">Returns:</span></dt><dd>the red offset for the given pixel format</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getGreenOffset(int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getGreenOffset</h4>
+<pre>public static&nbsp;int&nbsp;getGreenOffset(int&nbsp;pixelFormat)
+                          throws java.lang.Exception</pre>
+<div class="block">For the given pixel format, returns the number of bytes that the green
+ component is offset from the start of the pixel.  For instance, if a pixel
+ of format <code>TJ.PF_BGRX</code> is stored in <code>char pixel[]</code>,
+ then the green component will be
+ <code>pixel[TJ.getGreenOffset(TJ.PF_BGRX)]</code>.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>pixelFormat</code> - the pixel format (one of <code>PF_*</code>)</dd>
+<dt><span class="strong">Returns:</span></dt><dd>the green offset for the given pixel format</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getBlueOffset(int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getBlueOffset</h4>
+<pre>public static&nbsp;int&nbsp;getBlueOffset(int&nbsp;pixelFormat)
+                         throws java.lang.Exception</pre>
+<div class="block">For the given pixel format, returns the number of bytes that the blue
+ component is offset from the start of the pixel.  For instance, if a pixel
+ of format <code>TJ.PF_BGRX</code> is stored in <code>char pixel[]</code>,
+ then the blue component will be
+ <code>pixel[TJ.getBlueOffset(TJ.PF_BGRX)]</code>.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>pixelFormat</code> - the pixel format (one of <code>PF_*</code>)</dd>
+<dt><span class="strong">Returns:</span></dt><dd>the blue offset for the given pixel format</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="bufSize(int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>bufSize</h4>
+<pre>public static&nbsp;int&nbsp;bufSize(int&nbsp;width,
+          int&nbsp;height,
+          int&nbsp;jpegSubsamp)
+                   throws java.lang.Exception</pre>
+<div class="block">Returns the maximum size of the buffer (in bytes) required to hold a JPEG
+ image with the given width, height, and level of chrominance subsampling.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>width</code> - the width (in pixels) of the JPEG image</dd><dd><code>height</code> - the height (in pixels) of the JPEG image</dd><dd><code>jpegSubsamp</code> - the level of chrominance subsampling to be used when
+ generating the JPEG image (one of <a href="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><code>TJ.SAMP_*</code></a>)</dd>
+<dt><span class="strong">Returns:</span></dt><dd>the maximum size of the buffer (in bytes) required to hold a JPEG
+ image with the given width, height, and level of chrominance subsampling</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="bufSizeYUV(int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>bufSizeYUV</h4>
+<pre>public static&nbsp;int&nbsp;bufSizeYUV(int&nbsp;width,
+             int&nbsp;pad,
+             int&nbsp;height,
+             int&nbsp;subsamp)
+                      throws java.lang.Exception</pre>
+<div class="block">Returns the size of the buffer (in bytes) required to hold a YUV planar
+ image with the given width, height, and level of chrominance subsampling.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>width</code> - the width (in pixels) of the YUV image</dd><dd><code>pad</code> - the width of each line in each plane of the image is padded to
+        the nearest multiple of this number of bytes (must be a power of
+        2.)</dd><dd><code>height</code> - the height (in pixels) of the YUV image</dd><dd><code>subsamp</code> - the level of chrominance subsampling used in the YUV
+ image (one of <a href="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><code>TJ.SAMP_*</code></a>)</dd>
+<dt><span class="strong">Returns:</span></dt><dd>the size of the buffer (in bytes) required to hold a YUV planar
+ image with the given width, height, and level of chrominance subsampling</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="bufSizeYUV(int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>bufSizeYUV</h4>
+<pre>@Deprecated
+public static&nbsp;int&nbsp;bufSizeYUV(int&nbsp;width,
+                        int&nbsp;height,
+                        int&nbsp;subsamp)
+                      throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><code>bufSizeYUV(int, int, int, int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getScalingFactors()">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>getScalingFactors</h4>
+<pre>public static&nbsp;<a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</a>[]&nbsp;getScalingFactors()
+                                           throws java.lang.Exception</pre>
+<div class="block">Returns a list of fractional scaling factors that the JPEG decompressor in
+ this implementation of TurboJPEG supports.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>a list of fractional scaling factors that the JPEG decompressor in
+ this implementation of TurboJPEG supports</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev Class</li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJ.html" target="_top">Frames</a></li>
+<li><a href="TJ.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li><a href="#field_summary">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li><a href="#field_detail">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html b/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html
new file mode 100644
index 0000000..6c367a7
--- /dev/null
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html
@@ -0,0 +1,880 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>TJCompressor</title>
+<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="TJCompressor";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJCompressor.html" target="_top">Frames</a></li>
+<li><a href="TJCompressor.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">org.libjpegturbo.turbojpeg</div>
+<h2 title="Class TJCompressor" class="title">Class TJCompressor</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>org.libjpegturbo.turbojpeg.TJCompressor</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public class <span class="strong">TJCompressor</span>
+extends java.lang.Object</pre>
+<div class="block">TurboJPEG compressor</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor()">TJCompressor</a></strong>()</code>
+<div class="block">Create a TurboJPEG compressor instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(java.awt.image.BufferedImage, int, int, int, int)">TJCompressor</a></strong>(java.awt.image.BufferedImage&nbsp;srcImage,
+            int&nbsp;x,
+            int&nbsp;y,
+            int&nbsp;width,
+            int&nbsp;height)</code>
+<div class="block">Create a TurboJPEG compressor instance and associate the uncompressed
+ source image stored in <code>srcImage</code> with the newly created
+ instance.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int)">TJCompressor</a></strong>(byte[]&nbsp;srcImage,
+            int&nbsp;width,
+            int&nbsp;pitch,
+            int&nbsp;height,
+            int&nbsp;pixelFormat)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int, int, int)"><code>TJCompressor(byte[], int, int, int, int, int, int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int, int, int)">TJCompressor</a></strong>(byte[]&nbsp;srcImage,
+            int&nbsp;x,
+            int&nbsp;y,
+            int&nbsp;width,
+            int&nbsp;pitch,
+            int&nbsp;height,
+            int&nbsp;pixelFormat)</code>
+<div class="block">Create a TurboJPEG compressor instance and associate the uncompressed
+ source image stored in <code>srcImage</code> with the newly created
+ instance.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span>Methods</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#close()">close</a></strong>()</code>
+<div class="block">Free the native structures associated with this compressor instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#compress(java.awt.image.BufferedImage, byte[], int)">compress</a></strong>(java.awt.image.BufferedImage&nbsp;srcImage,
+        byte[]&nbsp;dstBuf,
+        int&nbsp;flags)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#compress(byte[], int)"><code>compress(byte[], int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>byte[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#compress(java.awt.image.BufferedImage, int)">compress</a></strong>(java.awt.image.BufferedImage&nbsp;srcImage,
+        int&nbsp;flags)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#compress(int)"><code>compress(int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#compress(byte[], int)">compress</a></strong>(byte[]&nbsp;dstBuf,
+        int&nbsp;flags)</code>
+<div class="block">Compress the uncompressed source image associated with this compressor
+ instance and output a JPEG image to the given destination buffer.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>byte[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#compress(int)">compress</a></strong>(int&nbsp;flags)</code>
+<div class="block">Compress the uncompressed source image associated with this compressor
+ instance and return a buffer containing a JPEG image.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(java.awt.image.BufferedImage, byte[], int)">encodeYUV</a></strong>(java.awt.image.BufferedImage&nbsp;srcImage,
+         byte[]&nbsp;dstBuf,
+         int&nbsp;flags)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(byte[], int)"><code>encodeYUV(byte[], int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>byte[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(java.awt.image.BufferedImage, int)">encodeYUV</a></strong>(java.awt.image.BufferedImage&nbsp;srcImage,
+         int&nbsp;flags)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int)"><code>encodeYUV(int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(byte[], int)">encodeYUV</a></strong>(byte[]&nbsp;dstBuf,
+         int&nbsp;flags)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(org.libjpegturbo.turbojpeg.YUVImage, int)"><code>encodeYUV(YUVImage, int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>byte[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int)">encodeYUV</a></strong>(int&nbsp;flags)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int, int)"><code>encodeYUV(int, int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int, int)">encodeYUV</a></strong>(int&nbsp;pad,
+         int&nbsp;flags)</code>
+<div class="block">Encode the uncompressed source image associated with this compressor
+ instance into a YUV planar image and return a <code>YUVImage</code>
+ instance containing the encoded image.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(org.libjpegturbo.turbojpeg.YUVImage, int)">encodeYUV</a></strong>(<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a>&nbsp;dstImage,
+         int&nbsp;flags)</code>
+<div class="block">Encode the uncompressed source image associated with this compressor
+ instance into a YUV planar image and store it in the given
+ <code>YUVImage</code> instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>protected void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#finalize()">finalize</a></strong>()</code>&nbsp;</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#getCompressedSize()">getCompressedSize</a></strong>()</code>
+<div class="block">Returns the size of the image (in bytes) generated by the most recent
+ compress/encode operation.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setJPEGQuality(int)">setJPEGQuality</a></strong>(int&nbsp;quality)</code>
+<div class="block">Set the JPEG image quality level for subsequent compress operations.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)">setSourceImage</a></strong>(java.awt.image.BufferedImage&nbsp;srcImage,
+              int&nbsp;x,
+              int&nbsp;y,
+              int&nbsp;width,
+              int&nbsp;height)</code>
+<div class="block">Associate an uncompressed RGB or grayscale source image with this
+ compressor instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int)">setSourceImage</a></strong>(byte[]&nbsp;srcImage,
+              int&nbsp;width,
+              int&nbsp;pitch,
+              int&nbsp;height,
+              int&nbsp;pixelFormat)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><code>setSourceImage(byte[], int, int, int, int, int, int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)">setSourceImage</a></strong>(byte[]&nbsp;srcImage,
+              int&nbsp;x,
+              int&nbsp;y,
+              int&nbsp;width,
+              int&nbsp;pitch,
+              int&nbsp;height,
+              int&nbsp;pixelFormat)</code>
+<div class="block">Associate an uncompressed RGB, grayscale, or CMYK source image with this
+ compressor instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(org.libjpegturbo.turbojpeg.YUVImage)">setSourceImage</a></strong>(<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a>&nbsp;srcImage)</code>
+<div class="block">Associate an uncompressed YUV planar source image with this compressor
+ instance.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSubsamp(int)">setSubsamp</a></strong>(int&nbsp;newSubsamp)</code>
+<div class="block">Set the level of chrominance subsampling for subsequent compress/encode
+ operations.</div>
+</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="TJCompressor()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>TJCompressor</h4>
+<pre>public&nbsp;TJCompressor()
+             throws java.lang.Exception</pre>
+<div class="block">Create a TurboJPEG compressor instance.</div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="TJCompressor(byte[], int, int, int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>TJCompressor</h4>
+<pre>public&nbsp;TJCompressor(byte[]&nbsp;srcImage,
+            int&nbsp;x,
+            int&nbsp;y,
+            int&nbsp;width,
+            int&nbsp;pitch,
+            int&nbsp;height,
+            int&nbsp;pixelFormat)
+             throws java.lang.Exception</pre>
+<div class="block">Create a TurboJPEG compressor instance and associate the uncompressed
+ source image stored in <code>srcImage</code> with the newly created
+ instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>srcImage</code> - see <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><code>setSourceImage(byte[], int, int, int, int, int, int)</code></a> for description</dd><dd><code>x</code> - see <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><code>setSourceImage(byte[], int, int, int, int, int, int)</code></a> for description</dd><dd><code>y</code> - see <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><code>setSourceImage(byte[], int, int, int, int, int, int)</code></a> for description</dd><dd><code>width</code> - see <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><code>setSourceImage(byte[], int, int, int, int, int, int)</code></a> for description</dd><dd><code>pitch</code> - see <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><code>setSourceImage(byte[], int, int, int, int, int, int)</code></a> for description</dd><dd><code>height</code> - see <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><code>setSourceImage(byte[], int, int, int, int, int, int)</code></a> for description</dd><dd><code>pixelFormat</code> - pixel format of the source image (one of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGB"><code>TJ.PF_*</code></a>)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="TJCompressor(byte[], int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>TJCompressor</h4>
+<pre>@Deprecated
+public&nbsp;TJCompressor(byte[]&nbsp;srcImage,
+                       int&nbsp;width,
+                       int&nbsp;pitch,
+                       int&nbsp;height,
+                       int&nbsp;pixelFormat)
+             throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#TJCompressor(byte[], int, int, int, int, int, int)"><code>TJCompressor(byte[], int, int, int, int, int, int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="TJCompressor(java.awt.image.BufferedImage, int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>TJCompressor</h4>
+<pre>public&nbsp;TJCompressor(java.awt.image.BufferedImage&nbsp;srcImage,
+            int&nbsp;x,
+            int&nbsp;y,
+            int&nbsp;width,
+            int&nbsp;height)
+             throws java.lang.Exception</pre>
+<div class="block">Create a TurboJPEG compressor instance and associate the uncompressed
+ source image stored in <code>srcImage</code> with the newly created
+ instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>srcImage</code> - see
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> for description</dd><dd><code>x</code> - see
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> for description</dd><dd><code>y</code> - see
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> for description</dd><dd><code>width</code> - see
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> for description</dd><dd><code>height</code> - see
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> for description</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="setSourceImage(byte[], int, int, int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>setSourceImage</h4>
+<pre>public&nbsp;void&nbsp;setSourceImage(byte[]&nbsp;srcImage,
+                  int&nbsp;x,
+                  int&nbsp;y,
+                  int&nbsp;width,
+                  int&nbsp;pitch,
+                  int&nbsp;height,
+                  int&nbsp;pixelFormat)
+                    throws java.lang.Exception</pre>
+<div class="block">Associate an uncompressed RGB, grayscale, or CMYK source image with this
+ compressor instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>srcImage</code> - image buffer containing RGB, grayscale, or CMYK pixels to
+ be compressed or encoded</dd><dd><code>x</code> - x offset (in pixels) of the region in the source image from which
+ the JPEG or YUV image should be compressed/encoded</dd><dd><code>y</code> - y offset (in pixels) of the region in the source image from which
+ the JPEG or YUV image should be compressed/encoded</dd><dd><code>width</code> - width (in pixels) of the region in the source image from
+ which the JPEG or YUV image should be compressed/encoded</dd><dd><code>pitch</code> - bytes per line of the source image.  Normally, this should be
+ <code>width * TJ.pixelSize(pixelFormat)</code> if the source image is
+ unpadded, but you can use this parameter to, for instance, specify that
+ the scanlines in the source image are padded to a 4-byte boundary or to
+ compress/encode a JPEG or YUV image from a region of a larger source
+ image.  You can also be clever and use this parameter to skip lines, etc.
+ Setting this parameter to 0 is the equivalent of setting it to
+ <code>width * TJ.pixelSize(pixelFormat)</code>.</dd><dd><code>height</code> - height (in pixels) of the region in the source image from
+ which the JPEG or YUV image should be compressed/encoded</dd><dd><code>pixelFormat</code> - pixel format of the source image (one of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGB"><code>TJ.PF_*</code></a>)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="setSourceImage(byte[], int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>setSourceImage</h4>
+<pre>@Deprecated
+public&nbsp;void&nbsp;setSourceImage(byte[]&nbsp;srcImage,
+                             int&nbsp;width,
+                             int&nbsp;pitch,
+                             int&nbsp;height,
+                             int&nbsp;pixelFormat)
+                    throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><code>setSourceImage(byte[], int, int, int, int, int, int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="setSourceImage(java.awt.image.BufferedImage, int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>setSourceImage</h4>
+<pre>public&nbsp;void&nbsp;setSourceImage(java.awt.image.BufferedImage&nbsp;srcImage,
+                  int&nbsp;x,
+                  int&nbsp;y,
+                  int&nbsp;width,
+                  int&nbsp;height)
+                    throws java.lang.Exception</pre>
+<div class="block">Associate an uncompressed RGB or grayscale source image with this
+ compressor instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>srcImage</code> - a <code>BufferedImage</code> instance containing RGB or
+ grayscale pixels to be compressed or encoded</dd><dd><code>x</code> - x offset (in pixels) of the region in the source image from which
+ the JPEG or YUV image should be compressed/encoded</dd><dd><code>y</code> - y offset (in pixels) of the region in the source image from which
+ the JPEG or YUV image should be compressed/encoded</dd><dd><code>width</code> - width (in pixels) of the region in the source image from
+ which the JPEG or YUV image should be compressed/encoded (0 = use the
+ width of the source image)</dd><dd><code>height</code> - height (in pixels) of the region in the source image from
+ which the JPEG or YUV image should be compressed/encoded (0 = use the
+ height of the source image)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="setSourceImage(org.libjpegturbo.turbojpeg.YUVImage)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>setSourceImage</h4>
+<pre>public&nbsp;void&nbsp;setSourceImage(<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a>&nbsp;srcImage)
+                    throws java.lang.Exception</pre>
+<div class="block">Associate an uncompressed YUV planar source image with this compressor
+ instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>srcImage</code> - YUV planar image to be compressed</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="setSubsamp(int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>setSubsamp</h4>
+<pre>public&nbsp;void&nbsp;setSubsamp(int&nbsp;newSubsamp)
+                throws java.lang.Exception</pre>
+<div class="block">Set the level of chrominance subsampling for subsequent compress/encode
+ operations.  When pixels are converted from RGB to YCbCr (see
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_YCbCr"><code>TJ.CS_YCbCr</code></a>) or from CMYK to YCCK (see <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_YCCK"><code>TJ.CS_YCCK</code></a>) as part
+ of the JPEG compression process, some of the Cb and Cr (chrominance)
+ components can be discarded or averaged together to produce a smaller
+ image with little perceptible loss of image clarity (the human eye is more
+ sensitive to small changes in brightness than to small changes in color.)
+ This is called "chrominance subsampling".
+ <p>
+ NOTE: This method has no effect when compressing a JPEG image from a YUV
+ planar source.  In that case, the level of chrominance subsampling in
+ the JPEG image is determined by the source.  Further, this method has no
+ effect when encoding to a pre-allocated <a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><code>YUVImage</code></a> instance.  In
+ that case, the level of chrominance subsampling is determined by the
+ destination.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>newSubsamp</code> - the level of chrominance subsampling to use in
+ subsequent compress/encode oeprations (one of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_444"><code>TJ.SAMP_*</code></a>)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="setJPEGQuality(int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>setJPEGQuality</h4>
+<pre>public&nbsp;void&nbsp;setJPEGQuality(int&nbsp;quality)
+                    throws java.lang.Exception</pre>
+<div class="block">Set the JPEG image quality level for subsequent compress operations.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>quality</code> - the new JPEG image quality level (1 to 100, 1 = worst,
+ 100 = best)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="compress(byte[], int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>compress</h4>
+<pre>public&nbsp;void&nbsp;compress(byte[]&nbsp;dstBuf,
+            int&nbsp;flags)
+              throws java.lang.Exception</pre>
+<div class="block">Compress the uncompressed source image associated with this compressor
+ instance and output a JPEG image to the given destination buffer.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>dstBuf</code> - buffer that will receive the JPEG image.  Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSize(int, int, int)"><code>TJ.bufSize(int, int, int)</code></a> to determine the maximum size for this buffer based on
+ the source image's width and height and the desired level of chrominance
+ subsampling.</dd><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="compress(int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>compress</h4>
+<pre>public&nbsp;byte[]&nbsp;compress(int&nbsp;flags)
+                throws java.lang.Exception</pre>
+<div class="block">Compress the uncompressed source image associated with this compressor
+ instance and return a buffer containing a JPEG image.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Returns:</span></dt><dd>a buffer containing a JPEG image.  The length of this buffer will
+ not be equal to the size of the JPEG image.  Use <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#getCompressedSize()"><code>getCompressedSize()</code></a> to obtain the size of the JPEG image.</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="compress(java.awt.image.BufferedImage, byte[], int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>compress</h4>
+<pre>@Deprecated
+public&nbsp;void&nbsp;compress(java.awt.image.BufferedImage&nbsp;srcImage,
+                       byte[]&nbsp;dstBuf,
+                       int&nbsp;flags)
+              throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#compress(byte[], int)"><code>compress(byte[], int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="compress(java.awt.image.BufferedImage, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>compress</h4>
+<pre>@Deprecated
+public&nbsp;byte[]&nbsp;compress(java.awt.image.BufferedImage&nbsp;srcImage,
+                         int&nbsp;flags)
+                throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#compress(int)"><code>compress(int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="encodeYUV(org.libjpegturbo.turbojpeg.YUVImage, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>encodeYUV</h4>
+<pre>public&nbsp;void&nbsp;encodeYUV(<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a>&nbsp;dstImage,
+             int&nbsp;flags)
+               throws java.lang.Exception</pre>
+<div class="block">Encode the uncompressed source image associated with this compressor
+ instance into a YUV planar image and store it in the given
+ <code>YUVImage</code> instance.   This method uses the accelerated color
+ conversion routines in TurboJPEG's underlying codec but does not execute
+ any of the other steps in the JPEG compression process.  Encoding
+ CMYK source images to YUV is not supported.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>dstImage</code> - <a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><code>YUVImage</code></a> instance that will receive the YUV planar
+ image</dd><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="encodeYUV(byte[], int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>encodeYUV</h4>
+<pre>@Deprecated
+public&nbsp;void&nbsp;encodeYUV(byte[]&nbsp;dstBuf,
+                        int&nbsp;flags)
+               throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(org.libjpegturbo.turbojpeg.YUVImage, int)"><code>encodeYUV(YUVImage, int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="encodeYUV(int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>encodeYUV</h4>
+<pre>public&nbsp;<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a>&nbsp;encodeYUV(int&nbsp;pad,
+                 int&nbsp;flags)
+                   throws java.lang.Exception</pre>
+<div class="block">Encode the uncompressed source image associated with this compressor
+ instance into a YUV planar image and return a <code>YUVImage</code>
+ instance containing the encoded image.  This method uses the accelerated
+ color conversion routines in TurboJPEG's underlying codec but does not
+ execute any of the other steps in the JPEG compression process.  Encoding
+ CMYK source images to YUV is not supported.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>pad</code> - the width of each line in each plane of the YUV image will be
+ padded to the nearest multiple of this number of bytes (must be a power of
+ 2.)</dd><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Returns:</span></dt><dd>a YUV planar image</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="encodeYUV(int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>encodeYUV</h4>
+<pre>@Deprecated
+public&nbsp;byte[]&nbsp;encodeYUV(int&nbsp;flags)
+                 throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int, int)"><code>encodeYUV(int, int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="encodeYUV(java.awt.image.BufferedImage, byte[], int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>encodeYUV</h4>
+<pre>@Deprecated
+public&nbsp;void&nbsp;encodeYUV(java.awt.image.BufferedImage&nbsp;srcImage,
+                        byte[]&nbsp;dstBuf,
+                        int&nbsp;flags)
+               throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(byte[], int)"><code>encodeYUV(byte[], int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="encodeYUV(java.awt.image.BufferedImage, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>encodeYUV</h4>
+<pre>@Deprecated
+public&nbsp;byte[]&nbsp;encodeYUV(java.awt.image.BufferedImage&nbsp;srcImage,
+                          int&nbsp;flags)
+                 throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(java.awt.image.BufferedImage, int, int, int, int)"><code>setSourceImage(BufferedImage, int, int, int, int)</code></a> and
+ <a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#encodeYUV(int)"><code>encodeYUV(int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getCompressedSize()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getCompressedSize</h4>
+<pre>public&nbsp;int&nbsp;getCompressedSize()</pre>
+<div class="block">Returns the size of the image (in bytes) generated by the most recent
+ compress/encode operation.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the size of the image (in bytes) generated by the most recent
+ compress/encode operation</dd></dl>
+</li>
+</ul>
+<a name="close()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>close</h4>
+<pre>public&nbsp;void&nbsp;close()
+           throws java.lang.Exception</pre>
+<div class="block">Free the native structures associated with this compressor instance.</div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="finalize()">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>finalize</h4>
+<pre>protected&nbsp;void&nbsp;finalize()
+                 throws java.lang.Throwable</pre>
+<dl>
+<dt><strong>Overrides:</strong></dt>
+<dd><code>finalize</code>&nbsp;in class&nbsp;<code>java.lang.Object</code></dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Throwable</code></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJCompressor.html" target="_top">Frames</a></li>
+<li><a href="TJCompressor.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html b/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html
new file mode 100644
index 0000000..bac519b
--- /dev/null
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html
@@ -0,0 +1,237 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>TJCustomFilter</title>
+<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="TJCustomFilter";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJCustomFilter.html" target="_top">Frames</a></li>
+<li><a href="TJCustomFilter.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li>Constr&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li>Constr&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">org.libjpegturbo.turbojpeg</div>
+<h2 title="Interface TJCustomFilter" class="title">Interface TJCustomFilter</h2>
+</div>
+<div class="contentContainer">
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public interface <span class="strong">TJCustomFilter</span></pre>
+<div class="block">Custom filter callback interface</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span>Methods</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html#customFilter(java.nio.ShortBuffer, java.awt.Rectangle, java.awt.Rectangle, int, int, org.libjpegturbo.turbojpeg.TJTransform)">customFilter</a></strong>(java.nio.ShortBuffer&nbsp;coeffBuffer,
+            java.awt.Rectangle&nbsp;bufferRegion,
+            java.awt.Rectangle&nbsp;planeRegion,
+            int&nbsp;componentID,
+            int&nbsp;transformID,
+            <a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a>&nbsp;transform)</code>
+<div class="block">A callback function that can be used to modify the DCT coefficients after
+ they are losslessly transformed but before they are transcoded to a new
+ JPEG image.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="customFilter(java.nio.ShortBuffer, java.awt.Rectangle, java.awt.Rectangle, int, int, org.libjpegturbo.turbojpeg.TJTransform)">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>customFilter</h4>
+<pre>void&nbsp;customFilter(java.nio.ShortBuffer&nbsp;coeffBuffer,
+                java.awt.Rectangle&nbsp;bufferRegion,
+                java.awt.Rectangle&nbsp;planeRegion,
+                int&nbsp;componentID,
+                int&nbsp;transformID,
+                <a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a>&nbsp;transform)
+                  throws java.lang.Exception</pre>
+<div class="block">A callback function that can be used to modify the DCT coefficients after
+ they are losslessly transformed but before they are transcoded to a new
+ JPEG image.  This allows for custom filters or other transformations to be
+ applied in the frequency domain.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>coeffBuffer</code> - a buffer containing transformed DCT coefficients.
+ (NOTE: this buffer is not guaranteed to be valid once the callback
+ returns, so applications wishing to hand off the DCT coefficients to
+ another function or library should make a copy of them within the body of
+ the callback.)</dd><dd><code>bufferRegion</code> - rectangle containing the width and height of
+ <code>coeffBuffer</code> as well as its offset relative to the component
+ plane.  TurboJPEG implementations may choose to split each component plane
+ into multiple DCT coefficient buffers and call the callback function once
+ for each buffer.</dd><dd><code>planeRegion</code> - rectangle containing the width and height of the
+ component plane to which <code>coeffBuffer</code> belongs</dd><dd><code>componentID</code> - ID number of the component plane to which
+ <code>coeffBuffer</code> belongs (Y, Cb, and Cr have, respectively, ID's
+ of 0, 1, and 2 in typical JPEG images.)</dd><dd><code>transformID</code> - ID number of the transformed image to which
+ <code>coeffBuffer</code> belongs.  This is the same as the index of the
+ transform in the <code>transforms</code> array that was passed to <a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><code>TJTransformer.transform()</code></a>.</dd><dd><code>transform</code> - a <a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><code>TJTransform</code></a> instance that specifies the
+ parameters and/or cropping region for this transform</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJCustomFilter.html" target="_top">Frames</a></li>
+<li><a href="TJCustomFilter.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li>Constr&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li>Constr&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html b/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html
new file mode 100644
index 0000000..af6a902
--- /dev/null
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html
@@ -0,0 +1,1244 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>TJDecompressor</title>
+<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="TJDecompressor";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJDecompressor.html" target="_top">Frames</a></li>
+<li><a href="TJDecompressor.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li><a href="#field_summary">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li><a href="#field_detail">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">org.libjpegturbo.turbojpeg</div>
+<h2 title="Class TJDecompressor" class="title">Class TJDecompressor</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>org.libjpegturbo.turbojpeg.TJDecompressor</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<dl>
+<dt>Direct Known Subclasses:</dt>
+<dd><a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</a></dd>
+</dl>
+<hr>
+<br>
+<pre>public class <span class="strong">TJDecompressor</span>
+extends java.lang.Object</pre>
+<div class="block">TurboJPEG decompressor</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- =========== FIELD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field_summary">
+<!--   -->
+</a>
+<h3>Field Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Field Summary table, listing fields, and an explanation">
+<caption><span>Fields</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Field and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>protected long</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#handle">handle</a></strong></code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>protected byte[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegBuf">jpegBuf</a></strong></code>&nbsp;</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegBufSize">jpegBufSize</a></strong></code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#srcColorspace">srcColorspace</a></strong></code>&nbsp;</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#srcHeight">srcHeight</a></strong></code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#srcSubsamp">srcSubsamp</a></strong></code>&nbsp;</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#srcWidth">srcWidth</a></strong></code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>protected <a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#yuvImage">yuvImage</a></strong></code>&nbsp;</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#TJDecompressor()">TJDecompressor</a></strong>()</code>
+<div class="block">Create a TurboJPEG decompresssor instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#TJDecompressor(byte[])">TJDecompressor</a></strong>(byte[]&nbsp;jpegImage)</code>
+<div class="block">Create a TurboJPEG decompressor instance and associate the JPEG source
+ image stored in <code>jpegImage</code> with the newly created instance.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#TJDecompressor(byte[], int)">TJDecompressor</a></strong>(byte[]&nbsp;jpegImage,
+              int&nbsp;imageSize)</code>
+<div class="block">Create a TurboJPEG decompressor instance and associate the JPEG source
+ image of length <code>imageSize</code> bytes stored in
+ <code>jpegImage</code> with the newly created instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#TJDecompressor(org.libjpegturbo.turbojpeg.YUVImage)">TJDecompressor</a></strong>(<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a>&nbsp;yuvImage)</code>
+<div class="block">Create a TurboJPEG decompressor instance and associate the YUV planar
+ source image stored in <code>yuvImage</code> with the newly created
+ instance.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span>Methods</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#close()">close</a></strong>()</code>
+<div class="block">Free the native structures associated with this decompressor instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(java.awt.image.BufferedImage, int)">decompress</a></strong>(java.awt.image.BufferedImage&nbsp;dstImage,
+          int&nbsp;flags)</code>
+<div class="block">Decompress the JPEG source image or decode the YUV source image associated
+ with this decompressor instance and output a decompressed/decoded image to
+ the given <code>BufferedImage</code> instance.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)">decompress</a></strong>(byte[]&nbsp;dstBuf,
+          int&nbsp;desiredWidth,
+          int&nbsp;pitch,
+          int&nbsp;desiredHeight,
+          int&nbsp;pixelFormat,
+          int&nbsp;flags)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><code>decompress(byte[], int, int, int, int, int, int, int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)">decompress</a></strong>(byte[]&nbsp;dstBuf,
+          int&nbsp;x,
+          int&nbsp;y,
+          int&nbsp;desiredWidth,
+          int&nbsp;pitch,
+          int&nbsp;desiredHeight,
+          int&nbsp;pixelFormat,
+          int&nbsp;flags)</code>
+<div class="block">Decompress the JPEG source image or decode the YUV source image associated
+ with this decompressor instance and output a grayscale, RGB, or CMYK image
+ to the given destination buffer.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int[], int, int, int, int, int, int, int)">decompress</a></strong>(int[]&nbsp;dstBuf,
+          int&nbsp;x,
+          int&nbsp;y,
+          int&nbsp;desiredWidth,
+          int&nbsp;stride,
+          int&nbsp;desiredHeight,
+          int&nbsp;pixelFormat,
+          int&nbsp;flags)</code>
+<div class="block">Decompress the JPEG source image or decode the YUV source image associated
+ with this decompressor instance and output a grayscale, RGB, or CMYK image
+ to the given destination buffer.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>java.awt.image.BufferedImage</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int)">decompress</a></strong>(int&nbsp;desiredWidth,
+          int&nbsp;desiredHeight,
+          int&nbsp;bufferedImageType,
+          int&nbsp;flags)</code>
+<div class="block">Decompress the JPEG source image or decode the YUV source image associated
+ with this decompressor instance and return a <code>BufferedImage</code>
+ instance containing the decompressed/decoded image.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>byte[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int, int)">decompress</a></strong>(int&nbsp;desiredWidth,
+          int&nbsp;pitch,
+          int&nbsp;desiredHeight,
+          int&nbsp;pixelFormat,
+          int&nbsp;flags)</code>
+<div class="block">Decompress the JPEG source image associated with this decompressor
+ instance and return a buffer containing the decompressed image.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)">decompressToYUV</a></strong>(byte[]&nbsp;dstBuf,
+               int&nbsp;flags)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(org.libjpegturbo.turbojpeg.YUVImage, int)"><code>decompressToYUV(YUVImage, int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>byte[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int)">decompressToYUV</a></strong>(int&nbsp;flags)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)"><code>decompressToYUV(int, int, int, int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)">decompressToYUV</a></strong>(int&nbsp;desiredWidth,
+               int&nbsp;pad,
+               int&nbsp;desiredHeight,
+               int&nbsp;flags)</code>
+<div class="block">Decompress the JPEG source image associated with this decompressor
+ instance into a YUV planar image and return a <code>YUVImage</code>
+ instance containing the decompressed image.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(org.libjpegturbo.turbojpeg.YUVImage, int)">decompressToYUV</a></strong>(<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a>&nbsp;dstImage,
+               int&nbsp;flags)</code>
+<div class="block">Decompress the JPEG source image associated with this decompressor
+ instance into a YUV planar image and store it in the given
+ <code>YUVImage</code> instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>protected void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#finalize()">finalize</a></strong>()</code>&nbsp;</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getColorspace()">getColorspace</a></strong>()</code>
+<div class="block">Returns the colorspace used in the source image (JPEG or YUV) associated
+ with this decompressor instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getHeight()">getHeight</a></strong>()</code>
+<div class="block">Returns the height of the source image (JPEG or YUV) associated with this
+ decompressor instance.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>byte[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGBuf()">getJPEGBuf</a></strong>()</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceBuf()"><code>getSourceBuf()</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGSize()">getJPEGSize</a></strong>()</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceSize()"><code>getSourceSize()</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)">getScaledHeight</a></strong>(int&nbsp;desiredWidth,
+               int&nbsp;desiredHeight)</code>
+<div class="block">Returns the height of the largest scaled-down image that the TurboJPEG
+ decompressor can generate without exceeding the desired image width and
+ height.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledWidth(int, int)">getScaledWidth</a></strong>(int&nbsp;desiredWidth,
+              int&nbsp;desiredHeight)</code>
+<div class="block">Returns the width of the largest scaled-down image that the TurboJPEG
+ decompressor can generate without exceeding the desired image width and
+ height.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>byte[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceBuf()">getSourceBuf</a></strong>()</code>
+<div class="block">Returns the source image buffer associated with this decompressor
+ instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceSize()">getSourceSize</a></strong>()</code>
+<div class="block">Returns the size of the source image (in bytes) associated with this
+ decompressor instance.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSubsamp()">getSubsamp</a></strong>()</code>
+<div class="block">Returns the level of chrominance subsampling used in the source image
+ (JPEG or YUV) associated with this decompressor instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getWidth()">getWidth</a></strong>()</code>
+<div class="block">Returns the width of the source image (JPEG or YUV) associated with this
+ decompressor instance.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#setJPEGImage(byte[], int)">setJPEGImage</a></strong>(byte[]&nbsp;jpegImage,
+            int&nbsp;imageSize)</code>
+<div class="block"><strong>Deprecated.</strong>&nbsp;
+<div class="block"><i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#setSourceImage(byte[], int)"><code>setSourceImage(byte[], int)</code></a> instead.</i></div>
+</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#setSourceImage(byte[], int)">setSourceImage</a></strong>(byte[]&nbsp;srcImage,
+              int&nbsp;imageSize)</code>
+<div class="block">Associate the JPEG image of length <code>imageSize</code> bytes stored in
+ <code>srcImage</code> with this decompressor instance.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#setSourceImage(org.libjpegturbo.turbojpeg.YUVImage)">setSourceImage</a></strong>(<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a>&nbsp;srcImage)</code>
+<div class="block">Associate the specified YUV planar source image with this decompressor
+ instance.</div>
+</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ============ FIELD DETAIL =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field_detail">
+<!--   -->
+</a>
+<h3>Field Detail</h3>
+<a name="handle">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>handle</h4>
+<pre>protected&nbsp;long handle</pre>
+</li>
+</ul>
+<a name="jpegBuf">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>jpegBuf</h4>
+<pre>protected&nbsp;byte[] jpegBuf</pre>
+</li>
+</ul>
+<a name="jpegBufSize">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>jpegBufSize</h4>
+<pre>protected&nbsp;int jpegBufSize</pre>
+</li>
+</ul>
+<a name="yuvImage">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>yuvImage</h4>
+<pre>protected&nbsp;<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a> yuvImage</pre>
+</li>
+</ul>
+<a name="srcWidth">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>srcWidth</h4>
+<pre>protected&nbsp;int srcWidth</pre>
+</li>
+</ul>
+<a name="srcHeight">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>srcHeight</h4>
+<pre>protected&nbsp;int srcHeight</pre>
+</li>
+</ul>
+<a name="srcSubsamp">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>srcSubsamp</h4>
+<pre>protected&nbsp;int srcSubsamp</pre>
+</li>
+</ul>
+<a name="srcColorspace">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>srcColorspace</h4>
+<pre>protected&nbsp;int srcColorspace</pre>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="TJDecompressor()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>TJDecompressor</h4>
+<pre>public&nbsp;TJDecompressor()
+               throws java.lang.Exception</pre>
+<div class="block">Create a TurboJPEG decompresssor instance.</div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="TJDecompressor(byte[])">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>TJDecompressor</h4>
+<pre>public&nbsp;TJDecompressor(byte[]&nbsp;jpegImage)
+               throws java.lang.Exception</pre>
+<div class="block">Create a TurboJPEG decompressor instance and associate the JPEG source
+ image stored in <code>jpegImage</code> with the newly created instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>jpegImage</code> - JPEG image buffer (size of the JPEG image is assumed to
+ be the length of the array)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="TJDecompressor(byte[], int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>TJDecompressor</h4>
+<pre>public&nbsp;TJDecompressor(byte[]&nbsp;jpegImage,
+              int&nbsp;imageSize)
+               throws java.lang.Exception</pre>
+<div class="block">Create a TurboJPEG decompressor instance and associate the JPEG source
+ image of length <code>imageSize</code> bytes stored in
+ <code>jpegImage</code> with the newly created instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>jpegImage</code> - JPEG image buffer</dd><dd><code>imageSize</code> - size of the JPEG image (in bytes)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="TJDecompressor(org.libjpegturbo.turbojpeg.YUVImage)">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>TJDecompressor</h4>
+<pre>public&nbsp;TJDecompressor(<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a>&nbsp;yuvImage)
+               throws java.lang.Exception</pre>
+<div class="block">Create a TurboJPEG decompressor instance and associate the YUV planar
+ source image stored in <code>yuvImage</code> with the newly created
+ instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>yuvImage</code> - <a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><code>YUVImage</code></a> instance containing a YUV planar
+ image to be decoded</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="setSourceImage(byte[], int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>setSourceImage</h4>
+<pre>public&nbsp;void&nbsp;setSourceImage(byte[]&nbsp;srcImage,
+                  int&nbsp;imageSize)
+                    throws java.lang.Exception</pre>
+<div class="block">Associate the JPEG image of length <code>imageSize</code> bytes stored in
+ <code>srcImage</code> with this decompressor instance.  This image will
+ be used as the source image for subsequent decompress operations.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>srcImage</code> - JPEG image buffer</dd><dd><code>imageSize</code> - size of the JPEG image (in bytes)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="setJPEGImage(byte[], int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>setJPEGImage</h4>
+<pre>@Deprecated
+public&nbsp;void&nbsp;setJPEGImage(byte[]&nbsp;jpegImage,
+                           int&nbsp;imageSize)
+                  throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#setSourceImage(byte[], int)"><code>setSourceImage(byte[], int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="setSourceImage(org.libjpegturbo.turbojpeg.YUVImage)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>setSourceImage</h4>
+<pre>public&nbsp;void&nbsp;setSourceImage(<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a>&nbsp;srcImage)
+                    throws java.lang.Exception</pre>
+<div class="block">Associate the specified YUV planar source image with this decompressor
+ instance.  Subsequent decompress operations will decode this image into an
+ RGB or grayscale destination image.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>srcImage</code> - <a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><code>YUVImage</code></a> instance containing a YUV planar image to
+ be decoded</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getWidth()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getWidth</h4>
+<pre>public&nbsp;int&nbsp;getWidth()
+             throws java.lang.Exception</pre>
+<div class="block">Returns the width of the source image (JPEG or YUV) associated with this
+ decompressor instance.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the width of the source image (JPEG or YUV) associated with this
+ decompressor instance</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getHeight()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getHeight</h4>
+<pre>public&nbsp;int&nbsp;getHeight()
+              throws java.lang.Exception</pre>
+<div class="block">Returns the height of the source image (JPEG or YUV) associated with this
+ decompressor instance.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the height of the source image (JPEG or YUV) associated with this
+ decompressor instance</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getSubsamp()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getSubsamp</h4>
+<pre>public&nbsp;int&nbsp;getSubsamp()
+               throws java.lang.Exception</pre>
+<div class="block">Returns the level of chrominance subsampling used in the source image
+ (JPEG or YUV) associated with this decompressor instance.  See
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_444"><code>TJ.SAMP_*</code></a>.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the level of chrominance subsampling used in the source image
+ (JPEG or YUV) associated with this decompressor instance</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getColorspace()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getColorspace</h4>
+<pre>public&nbsp;int&nbsp;getColorspace()
+                  throws java.lang.Exception</pre>
+<div class="block">Returns the colorspace used in the source image (JPEG or YUV) associated
+ with this decompressor instance.  See <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_RGB"><code>TJ.CS_*</code></a>.  If the
+ source image is YUV, then this always returns <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_YCbCr"><code>TJ.CS_YCbCr</code></a>.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the colorspace used in the source image (JPEG or YUV) associated
+ with this decompressor instance</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getSourceBuf()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getSourceBuf</h4>
+<pre>public&nbsp;byte[]&nbsp;getSourceBuf()
+                    throws java.lang.Exception</pre>
+<div class="block">Returns the source image buffer associated with this decompressor
+ instance.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the source image buffer associated with this decompressor instance</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getJPEGBuf()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getJPEGBuf</h4>
+<pre>@Deprecated
+public&nbsp;byte[]&nbsp;getJPEGBuf()
+                  throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceBuf()"><code>getSourceBuf()</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getSourceSize()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getSourceSize</h4>
+<pre>public&nbsp;int&nbsp;getSourceSize()
+                  throws java.lang.Exception</pre>
+<div class="block">Returns the size of the source image (in bytes) associated with this
+ decompressor instance.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the size of the source image (in bytes) associated with this
+ decompressor instance</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getJPEGSize()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getJPEGSize</h4>
+<pre>@Deprecated
+public&nbsp;int&nbsp;getJPEGSize()
+                throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceSize()"><code>getSourceSize()</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getScaledWidth(int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getScaledWidth</h4>
+<pre>public&nbsp;int&nbsp;getScaledWidth(int&nbsp;desiredWidth,
+                 int&nbsp;desiredHeight)
+                   throws java.lang.Exception</pre>
+<div class="block">Returns the width of the largest scaled-down image that the TurboJPEG
+ decompressor can generate without exceeding the desired image width and
+ height.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>desiredWidth</code> - desired width (in pixels) of the decompressed image.
+ Setting this to 0 is the same as setting it to the width of the JPEG image
+ (in other words, the width will not be considered when determining the
+ scaled image size.)</dd><dd><code>desiredHeight</code> - desired height (in pixels) of the decompressed image.
+ Setting this to 0 is the same as setting it to the height of the JPEG
+ image (in other words, the height will not be considered when determining
+ the scaled image size.)</dd>
+<dt><span class="strong">Returns:</span></dt><dd>the width of the largest scaled-down image that the TurboJPEG
+ decompressor can generate without exceeding the desired image width and
+ height</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getScaledHeight(int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getScaledHeight</h4>
+<pre>public&nbsp;int&nbsp;getScaledHeight(int&nbsp;desiredWidth,
+                  int&nbsp;desiredHeight)
+                    throws java.lang.Exception</pre>
+<div class="block">Returns the height of the largest scaled-down image that the TurboJPEG
+ decompressor can generate without exceeding the desired image width and
+ height.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>desiredWidth</code> - desired width (in pixels) of the decompressed image.
+ Setting this to 0 is the same as setting it to the width of the JPEG image
+ (in other words, the width will not be considered when determining the
+ scaled image size.)</dd><dd><code>desiredHeight</code> - desired height (in pixels) of the decompressed image.
+ Setting this to 0 is the same as setting it to the height of the JPEG
+ image (in other words, the height will not be considered when determining
+ the scaled image size.)</dd>
+<dt><span class="strong">Returns:</span></dt><dd>the height of the largest scaled-down image that the TurboJPEG
+ decompressor can generate without exceeding the desired image width and
+ height</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="decompress(byte[], int, int, int, int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>decompress</h4>
+<pre>public&nbsp;void&nbsp;decompress(byte[]&nbsp;dstBuf,
+              int&nbsp;x,
+              int&nbsp;y,
+              int&nbsp;desiredWidth,
+              int&nbsp;pitch,
+              int&nbsp;desiredHeight,
+              int&nbsp;pixelFormat,
+              int&nbsp;flags)
+                throws java.lang.Exception</pre>
+<div class="block">Decompress the JPEG source image or decode the YUV source image associated
+ with this decompressor instance and output a grayscale, RGB, or CMYK image
+ to the given destination buffer.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>dstBuf</code> - buffer that will receive the decompressed/decoded image.
+ If the source image is a JPEG image, then this buffer should normally be
+ <code>pitch * scaledHeight</code> bytes in size, where
+ <code>scaledHeight</code> can be determined by calling <code>
+ scalingFactor.<a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#getScaled(int)"><code>getScaled</code></a>(jpegHeight)
+ </code> with one of the scaling factors returned from <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getScalingFactors()"><code>TJ.getScalingFactors()</code></a> or by calling <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)"><code>getScaledHeight(int, int)</code></a>.  If the
+ source image is a YUV image, then this buffer should normally be
+ <code>pitch * height</code> bytes in size, where <code>height</code> is
+ the height of the YUV image.  However, the buffer may also be larger than
+ the dimensions of the source image, in which case the <code>x</code>,
+ <code>y</code>, and <code>pitch</code> parameters can be used to specify
+ the region into which the source image should be decompressed/decoded.</dd><dd><code>x</code> - x offset (in pixels) of the region in the destination image into
+ which the source image should be decompressed/decoded</dd><dd><code>y</code> - y offset (in pixels) of the region in the destination image into
+ which the source image should be decompressed/decoded</dd><dd><code>desiredWidth</code> - If the source image is a JPEG image, then this
+ specifies the desired width (in pixels) of the decompressed image (or
+ image region.)  If the desired destination image dimensions are different
+ than the source image dimensions, then TurboJPEG will use scaling in the
+ JPEG decompressor to generate the largest possible image that will fit
+ within the desired dimensions.  Setting this to 0 is the same as setting
+ it to the width of the JPEG image (in other words, the width will not be
+ considered when determining the scaled image size.)  This parameter is
+ ignored if the source image is a YUV image.</dd><dd><code>pitch</code> - bytes per line of the destination image.  Normally, this
+ should be set to <code>scaledWidth * TJ.pixelSize(pixelFormat)</code> if
+ the destination image is unpadded, but you can use this to, for instance,
+ pad each line of the destination image to a 4-byte boundary or to
+ decompress/decode the source image into a region of a larger image.  NOTE:
+ if the source image is a JPEG image, then <code>scaledWidth</code> can be
+ determined by calling <code>
+ scalingFactor.<a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#getScaled(int)"><code>getScaled</code></a>(jpegWidth)
+ </code> or by calling <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledWidth(int, int)"><code>getScaledWidth(int, int)</code></a>.  If the source image is a
+ YUV image, then <code>scaledWidth</code> is the width of the YUV image.
+ Setting this parameter to 0 is the equivalent of setting it to
+ <code>scaledWidth * TJ.pixelSize(pixelFormat)</code>.</dd><dd><code>desiredHeight</code> - If the source image is a JPEG image, then this
+ specifies the desired height (in pixels) of the decompressed image (or
+ image region.)  If the desired destination image dimensions are different
+ than the source image dimensions, then TurboJPEG will use scaling in the
+ JPEG decompressor to generate the largest possible image that will fit
+ within the desired dimensions.  Setting this to 0 is the same as setting
+ it to the height of the JPEG image (in other words, the height will not be
+ considered when determining the scaled image size.)  This parameter is
+ ignored if the source image is a YUV image.</dd><dd><code>pixelFormat</code> - pixel format of the decompressed/decoded image (one of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGB"><code>TJ.PF_*</code></a>)</dd><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="decompress(byte[], int, int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>decompress</h4>
+<pre>@Deprecated
+public&nbsp;void&nbsp;decompress(byte[]&nbsp;dstBuf,
+                         int&nbsp;desiredWidth,
+                         int&nbsp;pitch,
+                         int&nbsp;desiredHeight,
+                         int&nbsp;pixelFormat,
+                         int&nbsp;flags)
+                throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use
+ <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><code>decompress(byte[], int, int, int, int, int, int, int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="decompress(int, int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>decompress</h4>
+<pre>public&nbsp;byte[]&nbsp;decompress(int&nbsp;desiredWidth,
+                int&nbsp;pitch,
+                int&nbsp;desiredHeight,
+                int&nbsp;pixelFormat,
+                int&nbsp;flags)
+                  throws java.lang.Exception</pre>
+<div class="block">Decompress the JPEG source image associated with this decompressor
+ instance and return a buffer containing the decompressed image.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>desiredWidth</code> - see
+ <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><code>decompress(byte[], int, int, int, int, int, int, int)</code></a>
+ for description</dd><dd><code>pitch</code> - see
+ <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><code>decompress(byte[], int, int, int, int, int, int, int)</code></a>
+ for description</dd><dd><code>desiredHeight</code> - see
+ <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><code>decompress(byte[], int, int, int, int, int, int, int)</code></a>
+ for description</dd><dd><code>pixelFormat</code> - pixel format of the decompressed image (one of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGB"><code>TJ.PF_*</code></a>)</dd><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Returns:</span></dt><dd>a buffer containing the decompressed image</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="decompressToYUV(org.libjpegturbo.turbojpeg.YUVImage, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>decompressToYUV</h4>
+<pre>public&nbsp;void&nbsp;decompressToYUV(<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a>&nbsp;dstImage,
+                   int&nbsp;flags)
+                     throws java.lang.Exception</pre>
+<div class="block">Decompress the JPEG source image associated with this decompressor
+ instance into a YUV planar image and store it in the given
+ <code>YUVImage</code> instance.  This method performs JPEG decompression
+ but leaves out the color conversion step, so a planar YUV image is
+ generated instead of an RGB or grayscale image.  This method cannot be
+ used to decompress JPEG source images with the CMYK or YCCK colorspace.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>dstImage</code> - <a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><code>YUVImage</code></a> instance that will receive the YUV planar
+ image.  The level of subsampling specified in this <code>YUVImage</code>
+ instance must match that of the JPEG image, and the width and height
+ specified in the <code>YUVImage</code> instance must match one of the
+ scaled image sizes that TurboJPEG is capable of generating from the JPEG
+ source image.</dd><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="decompressToYUV(byte[], int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>decompressToYUV</h4>
+<pre>@Deprecated
+public&nbsp;void&nbsp;decompressToYUV(byte[]&nbsp;dstBuf,
+                              int&nbsp;flags)
+                     throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(org.libjpegturbo.turbojpeg.YUVImage, int)"><code>decompressToYUV(YUVImage, int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="decompressToYUV(int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>decompressToYUV</h4>
+<pre>public&nbsp;<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a>&nbsp;decompressToYUV(int&nbsp;desiredWidth,
+                       int&nbsp;pad,
+                       int&nbsp;desiredHeight,
+                       int&nbsp;flags)
+                         throws java.lang.Exception</pre>
+<div class="block">Decompress the JPEG source image associated with this decompressor
+ instance into a YUV planar image and return a <code>YUVImage</code>
+ instance containing the decompressed image.  This method performs JPEG
+ decompression but leaves out the color conversion step, so a planar YUV
+ image is generated instead of an RGB or grayscale image.  This method
+ cannot be used to decompress JPEG source images with the CMYK or YCCK
+ colorspace.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>desiredWidth</code> - desired width (in pixels) of the YUV image.  If the
+ desired image dimensions are different than the dimensions of the JPEG
+ image being decompressed, then TurboJPEG will use scaling in the JPEG
+ decompressor to generate the largest possible image that will fit within
+ the desired dimensions.  Setting this to 0 is the same as setting it to
+ the width of the JPEG image (in other words, the width will not be
+ considered when determining the scaled image size.)</dd><dd><code>pad</code> - the width of each line in each plane of the YUV image will be
+ padded to the nearest multiple of this number of bytes (must be a power of
+ 2.)</dd><dd><code>desiredHeight</code> - desired height (in pixels) of the YUV image.  If the
+ desired image dimensions are different than the dimensions of the JPEG
+ image being decompressed, then TurboJPEG will use scaling in the JPEG
+ decompressor to generate the largest possible image that will fit within
+ the desired dimensions.  Setting this to 0 is the same as setting it to
+ the height of the JPEG image (in other words, the height will not be
+ considered when determining the scaled image size.)</dd><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Returns:</span></dt><dd>a YUV planar image</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="decompressToYUV(int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>decompressToYUV</h4>
+<pre>@Deprecated
+public&nbsp;byte[]&nbsp;decompressToYUV(int&nbsp;flags)
+                       throws java.lang.Exception</pre>
+<div class="block"><span class="strong">Deprecated.</span>&nbsp;<i>Use <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)"><code>decompressToYUV(int, int, int, int)</code></a> instead.</i></div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="decompress(int[], int, int, int, int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>decompress</h4>
+<pre>public&nbsp;void&nbsp;decompress(int[]&nbsp;dstBuf,
+              int&nbsp;x,
+              int&nbsp;y,
+              int&nbsp;desiredWidth,
+              int&nbsp;stride,
+              int&nbsp;desiredHeight,
+              int&nbsp;pixelFormat,
+              int&nbsp;flags)
+                throws java.lang.Exception</pre>
+<div class="block">Decompress the JPEG source image or decode the YUV source image associated
+ with this decompressor instance and output a grayscale, RGB, or CMYK image
+ to the given destination buffer.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>dstBuf</code> - buffer that will receive the decompressed/decoded image.
+ If the source image is a JPEG image, then this buffer should normally be
+ <code>stride * scaledHeight</code> pixels in size, where
+ <code>scaledHeight</code> can be determined by calling <code>
+ scalingFactor.<a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#getScaled(int)"><code>getScaled</code></a>(jpegHeight)
+ </code> with one of the scaling factors returned from <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getScalingFactors()"><code>TJ.getScalingFactors()</code></a> or by calling <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)"><code>getScaledHeight(int, int)</code></a>.  If the
+ source image is a YUV image, then this buffer should normally be
+ <code>stride * height</code> pixels in size, where <code>height</code> is
+ the height of the YUV image.  However, the buffer may also be larger than
+ the dimensions of the JPEG image, in which case the <code>x</code>,
+ <code>y</code>, and <code>stride</code> parameters can be used to specify
+ the region into which the source image should be decompressed.</dd><dd><code>x</code> - x offset (in pixels) of the region in the destination image into
+ which the source image should be decompressed/decoded</dd><dd><code>y</code> - y offset (in pixels) of the region in the destination image into
+ which the source image should be decompressed/decoded</dd><dd><code>desiredWidth</code> - If the source image is a JPEG image, then this
+ specifies the desired width (in pixels) of the decompressed image (or
+ image region.)  If the desired destination image dimensions are different
+ than the source image dimensions, then TurboJPEG will use scaling in the
+ JPEG decompressor to generate the largest possible image that will fit
+ within the desired dimensions.  Setting this to 0 is the same as setting
+ it to the width of the JPEG image (in other words, the width will not be
+ considered when determining the scaled image size.)  This parameter is
+ ignored if the source image is a YUV image.</dd><dd><code>stride</code> - pixels per line of the destination image.  Normally, this
+ should be set to <code>scaledWidth</code>, but you can use this to, for
+ instance, decompress the JPEG image into a region of a larger image.
+ NOTE: if the source image is a JPEG image, then <code>scaledWidth</code>
+ can be determined by calling <code>
+ scalingFactor.<a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#getScaled(int)"><code>getScaled</code></a>(jpegWidth)
+ </code> or by calling <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledWidth(int, int)"><code>getScaledWidth(int, int)</code></a>.  If the source image is a
+ YUV image, then <code>scaledWidth</code> is the width of the YUV image.
+ Setting this parameter to 0 is the equivalent of setting it to
+ <code>scaledWidth</code>.</dd><dd><code>desiredHeight</code> - If the source image is a JPEG image, then this
+ specifies the desired height (in pixels) of the decompressed image (or
+ image region.)  If the desired destination image dimensions are different
+ than the source image dimensions, then TurboJPEG will use scaling in the
+ JPEG decompressor to generate the largest possible image that will fit
+ within the desired dimensions.  Setting this to 0 is the same as setting
+ it to the height of the JPEG image (in other words, the height will not be
+ considered when determining the scaled image size.)  This parameter is
+ ignored if the source image is a YUV image.</dd><dd><code>pixelFormat</code> - pixel format of the decompressed image (one of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_RGB"><code>TJ.PF_*</code></a>)</dd><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="decompress(java.awt.image.BufferedImage, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>decompress</h4>
+<pre>public&nbsp;void&nbsp;decompress(java.awt.image.BufferedImage&nbsp;dstImage,
+              int&nbsp;flags)
+                throws java.lang.Exception</pre>
+<div class="block">Decompress the JPEG source image or decode the YUV source image associated
+ with this decompressor instance and output a decompressed/decoded image to
+ the given <code>BufferedImage</code> instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>dstImage</code> - a <code>BufferedImage</code> instance that will receive
+ the decompressed/decoded image.  If the source image is a JPEG image, then
+ the width and height of the <code>BufferedImage</code> instance must match
+ one of the scaled image sizes that TurboJPEG is capable of generating from
+ the JPEG image.  If the source image is a YUV image, then the width and
+ height of the <code>BufferedImage</code> instance must match the width and
+ height of the YUV image.</dd><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="decompress(int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>decompress</h4>
+<pre>public&nbsp;java.awt.image.BufferedImage&nbsp;decompress(int&nbsp;desiredWidth,
+                                      int&nbsp;desiredHeight,
+                                      int&nbsp;bufferedImageType,
+                                      int&nbsp;flags)
+                                        throws java.lang.Exception</pre>
+<div class="block">Decompress the JPEG source image or decode the YUV source image associated
+ with this decompressor instance and return a <code>BufferedImage</code>
+ instance containing the decompressed/decoded image.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>desiredWidth</code> - see
+ <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><code>decompress(byte[], int, int, int, int, int, int, int)</code></a> for
+ description</dd><dd><code>desiredHeight</code> - see
+ <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><code>decompress(byte[], int, int, int, int, int, int, int)</code></a> for
+ description</dd><dd><code>bufferedImageType</code> - the image type of the <code>BufferedImage</code>
+ instance that will be created (for instance,
+ <code>BufferedImage.TYPE_INT_RGB</code>)</dd><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Returns:</span></dt><dd>a <code>BufferedImage</code> instance containing the
+ decompressed/decoded image</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="close()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>close</h4>
+<pre>public&nbsp;void&nbsp;close()
+           throws java.lang.Exception</pre>
+<div class="block">Free the native structures associated with this decompressor instance.</div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="finalize()">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>finalize</h4>
+<pre>protected&nbsp;void&nbsp;finalize()
+                 throws java.lang.Throwable</pre>
+<dl>
+<dt><strong>Overrides:</strong></dt>
+<dd><code>finalize</code>&nbsp;in class&nbsp;<code>java.lang.Object</code></dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Throwable</code></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJDecompressor.html" target="_top">Frames</a></li>
+<li><a href="TJDecompressor.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li><a href="#field_summary">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li><a href="#field_detail">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html b/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html
new file mode 100644
index 0000000..1b90147
--- /dev/null
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html
@@ -0,0 +1,336 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>TJScalingFactor</title>
+<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="TJScalingFactor";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJScalingFactor.html" target="_top">Frames</a></li>
+<li><a href="TJScalingFactor.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">org.libjpegturbo.turbojpeg</div>
+<h2 title="Class TJScalingFactor" class="title">Class TJScalingFactor</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>org.libjpegturbo.turbojpeg.TJScalingFactor</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public class <span class="strong">TJScalingFactor</span>
+extends java.lang.Object</pre>
+<div class="block">Fractional scaling factor</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#TJScalingFactor(int, int)">TJScalingFactor</a></strong>(int&nbsp;num,
+               int&nbsp;denom)</code>&nbsp;</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span>Methods</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>boolean</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#equals(org.libjpegturbo.turbojpeg.TJScalingFactor)">equals</a></strong>(<a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</a>&nbsp;other)</code>
+<div class="block">Returns true or false, depending on whether this instance and
+ <code>other</code> have the same numerator and denominator.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#getDenom()">getDenom</a></strong>()</code>
+<div class="block">Returns denominator</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#getNum()">getNum</a></strong>()</code>
+<div class="block">Returns numerator</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#getScaled(int)">getScaled</a></strong>(int&nbsp;dimension)</code>
+<div class="block">Returns the scaled value of <code>dimension</code>.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>boolean</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html#isOne()">isOne</a></strong>()</code>
+<div class="block">Returns true or false, depending on whether this instance is equal to
+ 1/1.</div>
+</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="TJScalingFactor(int, int)">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>TJScalingFactor</h4>
+<pre>public&nbsp;TJScalingFactor(int&nbsp;num,
+               int&nbsp;denom)
+                throws java.lang.Exception</pre>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="getNum()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getNum</h4>
+<pre>public&nbsp;int&nbsp;getNum()</pre>
+<div class="block">Returns numerator</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>numerator</dd></dl>
+</li>
+</ul>
+<a name="getDenom()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getDenom</h4>
+<pre>public&nbsp;int&nbsp;getDenom()</pre>
+<div class="block">Returns denominator</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>denominator</dd></dl>
+</li>
+</ul>
+<a name="getScaled(int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getScaled</h4>
+<pre>public&nbsp;int&nbsp;getScaled(int&nbsp;dimension)</pre>
+<div class="block">Returns the scaled value of <code>dimension</code>.  This function
+ performs the integer equivalent of
+ <code>ceil(dimension * scalingFactor)</code>.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the scaled value of <code>dimension</code></dd></dl>
+</li>
+</ul>
+<a name="equals(org.libjpegturbo.turbojpeg.TJScalingFactor)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>equals</h4>
+<pre>public&nbsp;boolean&nbsp;equals(<a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</a>&nbsp;other)</pre>
+<div class="block">Returns true or false, depending on whether this instance and
+ <code>other</code> have the same numerator and denominator.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>true or false, depending on whether this instance and
+ <code>other</code> have the same numerator and denominator</dd></dl>
+</li>
+</ul>
+<a name="isOne()">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>isOne</h4>
+<pre>public&nbsp;boolean&nbsp;isOne()</pre>
+<div class="block">Returns true or false, depending on whether this instance is equal to
+ 1/1.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>true or false, depending on whether this instance is equal to
+ 1/1</dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJScalingFactor.html" target="_top">Frames</a></li>
+<li><a href="TJScalingFactor.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html b/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html
new file mode 100644
index 0000000..b4bd5cc
--- /dev/null
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html
@@ -0,0 +1,712 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>TJTransform</title>
+<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="TJTransform";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJTransform.html" target="_top">Frames</a></li>
+<li><a href="TJTransform.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested_classes_inherited_from_class_java.awt.geom.Rectangle2D">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#field_summary">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#methods_inherited_from_class_java.awt.Rectangle">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li><a href="#field_detail">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li>Method</li>
+</ul>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">org.libjpegturbo.turbojpeg</div>
+<h2 title="Class TJTransform" class="title">Class TJTransform</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>java.awt.geom.RectangularShape</li>
+<li>
+<ul class="inheritance">
+<li>java.awt.geom.Rectangle2D</li>
+<li>
+<ul class="inheritance">
+<li>java.awt.Rectangle</li>
+<li>
+<ul class="inheritance">
+<li>org.libjpegturbo.turbojpeg.TJTransform</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<dl>
+<dt>All Implemented Interfaces:</dt>
+<dd>java.awt.Shape, java.io.Serializable, java.lang.Cloneable</dd>
+</dl>
+<hr>
+<br>
+<pre>public class <span class="strong">TJTransform</span>
+extends java.awt.Rectangle</pre>
+<div class="block">Lossless transform parameters</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../serialized-form.html#org.libjpegturbo.turbojpeg.TJTransform">Serialized Form</a></dd></dl>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- ======== NESTED CLASS SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="nested_class_summary">
+<!--   -->
+</a>
+<h3>Nested Class Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="nested_classes_inherited_from_class_java.awt.geom.Rectangle2D">
+<!--   -->
+</a>
+<h3>Nested classes/interfaces inherited from class&nbsp;java.awt.geom.Rectangle2D</h3>
+<code>java.awt.geom.Rectangle2D.Double, java.awt.geom.Rectangle2D.Float</code></li>
+</ul>
+</li>
+</ul>
+<!-- =========== FIELD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field_summary">
+<!--   -->
+</a>
+<h3>Field Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Field Summary table, listing fields, and an explanation">
+<caption><span>Fields</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Field and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code><a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg">TJCustomFilter</a></code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#cf">cf</a></strong></code>
+<div class="block">Custom filter instance</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#NUMOP">NUMOP</a></strong></code>
+<div class="block">The number of lossless transform operations</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#op">op</a></strong></code>
+<div class="block">Transform operation (one of <code>OP_*</code>)</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OP_HFLIP">OP_HFLIP</a></strong></code>
+<div class="block">Flip (mirror) image horizontally.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OP_NONE">OP_NONE</a></strong></code>
+<div class="block">Do not transform the position of the image pixels.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OP_ROT180">OP_ROT180</a></strong></code>
+<div class="block">Rotate image 180 degrees.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OP_ROT270">OP_ROT270</a></strong></code>
+<div class="block">Rotate image counter-clockwise by 90 degrees.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OP_ROT90">OP_ROT90</a></strong></code>
+<div class="block">Rotate image clockwise by 90 degrees.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OP_TRANSPOSE">OP_TRANSPOSE</a></strong></code>
+<div class="block">Transpose image (flip/mirror along upper left to lower right axis).</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OP_TRANSVERSE">OP_TRANSVERSE</a></strong></code>
+<div class="block">Transverse transpose image (flip/mirror along upper right to lower left
+ axis).</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OP_VFLIP">OP_VFLIP</a></strong></code>
+<div class="block">Flip (mirror) image vertically.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OPT_CROP">OPT_CROP</a></strong></code>
+<div class="block">This option will enable lossless cropping.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OPT_GRAY">OPT_GRAY</a></strong></code>
+<div class="block">This option will discard the color data in the input image and produce
+ a grayscale output image.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OPT_NOOUTPUT">OPT_NOOUTPUT</a></strong></code>
+<div class="block">This option will prevent <a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><code>TJTransformer.transform()</code></a> from outputting a JPEG image for this
+ particular transform.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OPT_PERFECT">OPT_PERFECT</a></strong></code>
+<div class="block">This option will cause <a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><code>TJTransformer.transform()</code></a> to throw an exception if the transform is not
+ perfect.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>static int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OPT_TRIM">OPT_TRIM</a></strong></code>
+<div class="block">This option will discard any partial MCU blocks that cannot be
+ transformed.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#options">options</a></strong></code>
+<div class="block">Transform options (bitwise OR of one or more of <code>OPT_*</code>)</div>
+</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="fields_inherited_from_class_java.awt.Rectangle">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;java.awt.Rectangle</h3>
+<code>height, width, x, y</code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="fields_inherited_from_class_java.awt.geom.Rectangle2D">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;java.awt.geom.Rectangle2D</h3>
+<code>OUT_BOTTOM, OUT_LEFT, OUT_RIGHT, OUT_TOP</code></li>
+</ul>
+</li>
+</ul>
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#TJTransform()">TJTransform</a></strong>()</code>
+<div class="block">Create a new lossless transform instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#TJTransform(int, int, int, int, int, int, org.libjpegturbo.turbojpeg.TJCustomFilter)">TJTransform</a></strong>(int&nbsp;x,
+           int&nbsp;y,
+           int&nbsp;w,
+           int&nbsp;h,
+           int&nbsp;op,
+           int&nbsp;options,
+           <a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg">TJCustomFilter</a>&nbsp;cf)</code>
+<div class="block">Create a new lossless transform instance with the given parameters.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#TJTransform(java.awt.Rectangle, int, int, org.libjpegturbo.turbojpeg.TJCustomFilter)">TJTransform</a></strong>(java.awt.Rectangle&nbsp;r,
+           int&nbsp;op,
+           int&nbsp;options,
+           <a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg">TJCustomFilter</a>&nbsp;cf)</code>
+<div class="block">Create a new lossless transform instance with the given parameters.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.awt.Rectangle">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.awt.Rectangle</h3>
+<code>add, add, add, contains, contains, contains, contains, createIntersection, createUnion, equals, getBounds, getBounds2D, getHeight, getLocation, getSize, getWidth, getX, getY, grow, inside, intersection, intersects, isEmpty, move, outcode, reshape, resize, setBounds, setBounds, setLocation, setLocation, setRect, setSize, setSize, toString, translate, union</code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.awt.geom.Rectangle2D">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.awt.geom.Rectangle2D</h3>
+<code>add, add, add, contains, contains, getPathIterator, getPathIterator, hashCode, intersect, intersects, intersectsLine, intersectsLine, outcode, setFrame, setRect, union</code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.awt.geom.RectangularShape">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.awt.geom.RectangularShape</h3>
+<code>clone, contains, contains, getCenterX, getCenterY, getFrame, getMaxX, getMaxY, getMinX, getMinY, intersects, setFrame, setFrame, setFrameFromCenter, setFrameFromCenter, setFrameFromDiagonal, setFrameFromDiagonal</code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>finalize, getClass, notify, notifyAll, wait, wait, wait</code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.awt.Shape">
+<!--   -->
+</a>
+<h3>Methods inherited from interface&nbsp;java.awt.Shape</h3>
+<code>contains, contains, contains, contains, getPathIterator, getPathIterator, intersects, intersects</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ============ FIELD DETAIL =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field_detail">
+<!--   -->
+</a>
+<h3>Field Detail</h3>
+<a name="NUMOP">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>NUMOP</h4>
+<pre>public static final&nbsp;int NUMOP</pre>
+<div class="block">The number of lossless transform operations</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.NUMOP">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OP_NONE">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OP_NONE</h4>
+<pre>public static final&nbsp;int OP_NONE</pre>
+<div class="block">Do not transform the position of the image pixels.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OP_NONE">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OP_HFLIP">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OP_HFLIP</h4>
+<pre>public static final&nbsp;int OP_HFLIP</pre>
+<div class="block">Flip (mirror) image horizontally.  This transform is imperfect if there
+ are any partial MCU blocks on the right edge.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OPT_PERFECT"><code>OPT_PERFECT</code></a>, 
+<a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OP_HFLIP">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OP_VFLIP">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OP_VFLIP</h4>
+<pre>public static final&nbsp;int OP_VFLIP</pre>
+<div class="block">Flip (mirror) image vertically.  This transform is imperfect if there are
+ any partial MCU blocks on the bottom edge.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OPT_PERFECT"><code>OPT_PERFECT</code></a>, 
+<a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OP_VFLIP">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OP_TRANSPOSE">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OP_TRANSPOSE</h4>
+<pre>public static final&nbsp;int OP_TRANSPOSE</pre>
+<div class="block">Transpose image (flip/mirror along upper left to lower right axis).  This
+ transform is always perfect.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OPT_PERFECT"><code>OPT_PERFECT</code></a>, 
+<a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OP_TRANSPOSE">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OP_TRANSVERSE">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OP_TRANSVERSE</h4>
+<pre>public static final&nbsp;int OP_TRANSVERSE</pre>
+<div class="block">Transverse transpose image (flip/mirror along upper right to lower left
+ axis).  This transform is imperfect if there are any partial MCU blocks in
+ the image.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OPT_PERFECT"><code>OPT_PERFECT</code></a>, 
+<a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OP_TRANSVERSE">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OP_ROT90">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OP_ROT90</h4>
+<pre>public static final&nbsp;int OP_ROT90</pre>
+<div class="block">Rotate image clockwise by 90 degrees.  This transform is imperfect if
+ there are any partial MCU blocks on the bottom edge.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OPT_PERFECT"><code>OPT_PERFECT</code></a>, 
+<a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OP_ROT90">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OP_ROT180">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OP_ROT180</h4>
+<pre>public static final&nbsp;int OP_ROT180</pre>
+<div class="block">Rotate image 180 degrees.  This transform is imperfect if there are any
+ partial MCU blocks in the image.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OPT_PERFECT"><code>OPT_PERFECT</code></a>, 
+<a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OP_ROT180">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OP_ROT270">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OP_ROT270</h4>
+<pre>public static final&nbsp;int OP_ROT270</pre>
+<div class="block">Rotate image counter-clockwise by 90 degrees.  This transform is imperfect
+ if there are any partial MCU blocks on the right edge.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#OPT_PERFECT"><code>OPT_PERFECT</code></a>, 
+<a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OP_ROT270">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OPT_PERFECT">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OPT_PERFECT</h4>
+<pre>public static final&nbsp;int OPT_PERFECT</pre>
+<div class="block">This option will cause <a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><code>TJTransformer.transform()</code></a> to throw an exception if the transform is not
+ perfect.  Lossless transforms operate on MCU blocks, whose size depends on
+ the level of chrominance subsampling used.  If the image's width or height
+ is not evenly divisible by the MCU block size (see <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getMCUWidth(int)"><code>TJ.getMCUWidth(int)</code></a>
+ and <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getMCUHeight(int)"><code>TJ.getMCUHeight(int)</code></a>), then there will be partial MCU blocks on the
+ right and/or bottom edges.   It is not possible to move these partial MCU
+ blocks to the top or left of the image, so any transform that would
+ require that is "imperfect."  If this option is not specified, then any
+ partial MCU blocks that cannot be transformed will be left in place, which
+ will create odd-looking strips on the right or bottom edge of the image.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OPT_PERFECT">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OPT_TRIM">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OPT_TRIM</h4>
+<pre>public static final&nbsp;int OPT_TRIM</pre>
+<div class="block">This option will discard any partial MCU blocks that cannot be
+ transformed.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OPT_TRIM">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OPT_CROP">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OPT_CROP</h4>
+<pre>public static final&nbsp;int OPT_CROP</pre>
+<div class="block">This option will enable lossless cropping.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OPT_CROP">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OPT_GRAY">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OPT_GRAY</h4>
+<pre>public static final&nbsp;int OPT_GRAY</pre>
+<div class="block">This option will discard the color data in the input image and produce
+ a grayscale output image.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OPT_GRAY">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="OPT_NOOUTPUT">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>OPT_NOOUTPUT</h4>
+<pre>public static final&nbsp;int OPT_NOOUTPUT</pre>
+<div class="block">This option will prevent <a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)"><code>TJTransformer.transform()</code></a> from outputting a JPEG image for this
+ particular transform.  This can be used in conjunction with a custom
+ filter to capture the transformed DCT coefficients without transcoding
+ them.</div>
+<dl><dt><span class="strong">See Also:</span></dt><dd><a href="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJTransform.OPT_NOOUTPUT">Constant Field Values</a></dd></dl>
+</li>
+</ul>
+<a name="op">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>op</h4>
+<pre>public&nbsp;int op</pre>
+<div class="block">Transform operation (one of <code>OP_*</code>)</div>
+</li>
+</ul>
+<a name="options">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>options</h4>
+<pre>public&nbsp;int options</pre>
+<div class="block">Transform options (bitwise OR of one or more of <code>OPT_*</code>)</div>
+</li>
+</ul>
+<a name="cf">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>cf</h4>
+<pre>public&nbsp;<a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg">TJCustomFilter</a> cf</pre>
+<div class="block">Custom filter instance</div>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="TJTransform()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>TJTransform</h4>
+<pre>public&nbsp;TJTransform()</pre>
+<div class="block">Create a new lossless transform instance.</div>
+</li>
+</ul>
+<a name="TJTransform(int, int, int, int, int, int, org.libjpegturbo.turbojpeg.TJCustomFilter)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>TJTransform</h4>
+<pre>public&nbsp;TJTransform(int&nbsp;x,
+           int&nbsp;y,
+           int&nbsp;w,
+           int&nbsp;h,
+           int&nbsp;op,
+           int&nbsp;options,
+           <a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg">TJCustomFilter</a>&nbsp;cf)
+            throws java.lang.Exception</pre>
+<div class="block">Create a new lossless transform instance with the given parameters.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>x</code> - the left boundary of the cropping region.  This must be evenly
+ divisible by the MCU block width (see <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getMCUWidth(int)"><code>TJ.getMCUWidth(int)</code></a>)</dd><dd><code>y</code> - the upper boundary of the cropping region.  This must be evenly
+ divisible by the MCU block height (see <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#getMCUHeight(int)"><code>TJ.getMCUHeight(int)</code></a>)</dd><dd><code>w</code> - the width of the cropping region.  Setting this to 0 is the
+ equivalent of setting it to (width of the source JPEG image -
+ <code>x</code>).</dd><dd><code>h</code> - the height of the cropping region.  Setting this to 0 is the
+ equivalent of setting it to (height of the source JPEG image -
+ <code>y</code>).</dd><dd><code>op</code> - one of the transform operations (<code>OP_*</code>)</dd><dd><code>options</code> - the bitwise OR of one or more of the transform options
+ (<code>OPT_*</code>)</dd><dd><code>cf</code> - an instance of an object that implements the <a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg"><code>TJCustomFilter</code></a> interface, or null if no custom filter is needed</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="TJTransform(java.awt.Rectangle, int, int, org.libjpegturbo.turbojpeg.TJCustomFilter)">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>TJTransform</h4>
+<pre>public&nbsp;TJTransform(java.awt.Rectangle&nbsp;r,
+           int&nbsp;op,
+           int&nbsp;options,
+           <a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg">TJCustomFilter</a>&nbsp;cf)
+            throws java.lang.Exception</pre>
+<div class="block">Create a new lossless transform instance with the given parameters.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>r</code> - a <code>Rectangle</code> instance that specifies the cropping
+ region.  See <a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html#TJTransform(int, int, int, int, int, int, org.libjpegturbo.turbojpeg.TJCustomFilter)"><code>TJTransform(int, int, int, int, int, int, TJCustomFilter)</code></a> for more
+ detail.</dd><dd><code>op</code> - one of the transform operations (<code>OP_*</code>)</dd><dd><code>options</code> - the bitwise OR of one or more of the transform options
+ (<code>OPT_*</code>)</dd><dd><code>cf</code> - an instance of an object that implements the <a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg"><code>TJCustomFilter</code></a> interface, or null if no custom filter is needed</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJTransform.html" target="_top">Frames</a></li>
+<li><a href="TJTransform.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li><a href="#nested_classes_inherited_from_class_java.awt.geom.Rectangle2D">Nested</a>&nbsp;|&nbsp;</li>
+<li><a href="#field_summary">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#methods_inherited_from_class_java.awt.Rectangle">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li><a href="#field_detail">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li>Method</li>
+</ul>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html b/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html
new file mode 100644
index 0000000..3be18ec
--- /dev/null
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html
@@ -0,0 +1,416 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>TJTransformer</title>
+<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="TJTransformer";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJTransformer.html" target="_top">Frames</a></li>
+<li><a href="TJTransformer.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li><a href="#fields_inherited_from_class_org.libjpegturbo.turbojpeg.TJDecompressor">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">org.libjpegturbo.turbojpeg</div>
+<h2 title="Class TJTransformer" class="title">Class TJTransformer</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">org.libjpegturbo.turbojpeg.TJDecompressor</a></li>
+<li>
+<ul class="inheritance">
+<li>org.libjpegturbo.turbojpeg.TJTransformer</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public class <span class="strong">TJTransformer</span>
+extends <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></pre>
+<div class="block">TurboJPEG lossless transformer</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- =========== FIELD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field_summary">
+<!--   -->
+</a>
+<h3>Field Summary</h3>
+<ul class="blockList">
+<li class="blockList"><a name="fields_inherited_from_class_org.libjpegturbo.turbojpeg.TJDecompressor">
+<!--   -->
+</a>
+<h3>Fields inherited from class&nbsp;org.libjpegturbo.turbojpeg.<a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></h3>
+<code><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#handle">handle</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegBuf">jpegBuf</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegBufSize">jpegBufSize</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#srcColorspace">srcColorspace</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#srcHeight">srcHeight</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#srcSubsamp">srcSubsamp</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#srcWidth">srcWidth</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#yuvImage">yuvImage</a></code></li>
+</ul>
+</li>
+</ul>
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#TJTransformer()">TJTransformer</a></strong>()</code>
+<div class="block">Create a TurboJPEG lossless transformer instance.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#TJTransformer(byte[])">TJTransformer</a></strong>(byte[]&nbsp;jpegImage)</code>
+<div class="block">Create a TurboJPEG lossless transformer instance and associate the JPEG
+ image stored in <code>jpegImage</code> with the newly created instance.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#TJTransformer(byte[], int)">TJTransformer</a></strong>(byte[]&nbsp;jpegImage,
+             int&nbsp;imageSize)</code>
+<div class="block">Create a TurboJPEG lossless transformer instance and associate the JPEG
+ image of length <code>imageSize</code> bytes stored in
+ <code>jpegImage</code> with the newly created instance.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span>Methods</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>int[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#getTransformedSizes()">getTransformedSizes</a></strong>()</code>
+<div class="block">Returns an array containing the sizes of the transformed JPEG images
+ generated by the most recent transform operation.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)">transform</a></strong>(byte[][]&nbsp;dstBufs,
+         <a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a>[]&nbsp;transforms,
+         int&nbsp;flags)</code>
+<div class="block">Losslessly transform the JPEG image associated with this transformer
+ instance into one or more JPEG images stored in the given destination
+ buffers.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a>[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html#transform(org.libjpegturbo.turbojpeg.TJTransform[], int)">transform</a></strong>(<a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a>[]&nbsp;transforms,
+         int&nbsp;flags)</code>
+<div class="block">Losslessly transform the JPEG image associated with this transformer
+ instance and return an array of <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><code>TJDecompressor</code></a> instances, each of
+ which has a transformed JPEG image associated with it.</div>
+</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_org.libjpegturbo.turbojpeg.TJDecompressor">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;org.libjpegturbo.turbojpeg.<a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></h3>
+<code><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#close()">close</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(java.awt.image.BufferedImage, int)">decompress</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)">decompress</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)">decompress</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int[], int, int, int, int, int, int, int)">decompress</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int)">decompress</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int, int)">decompress</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)">decompressToYUV</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int)">decompressToYUV</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)">decompressToYUV</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(org.libjpegturbo.turbojpeg.YUVImage, int)">decompressToYUV</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#finalize()">finalize</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getColorspace()">getColorspace</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getHeight()">getHeight</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGBuf()">getJPEGBuf</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGSize()">getJPEGSize</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)">getScaledHeight</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledWidth(int, int)">getScaledWidth</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceBuf()">getSourceBuf</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSourceSize()">getSourceSize</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSubsamp()">getSubsamp</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getWidth()">getWidth</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#setJPEGImage(byte[], int)">setJPEGImage</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#setSourceImage(byte[], int)">setSourceImage</a>, <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#setSourceImage(org.libjpegturbo.turbojpeg.YUVImage)">setSourceImage</a></code></li>
+</ul>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="TJTransformer()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>TJTransformer</h4>
+<pre>public&nbsp;TJTransformer()
+              throws java.lang.Exception</pre>
+<div class="block">Create a TurboJPEG lossless transformer instance.</div>
+<dl><dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="TJTransformer(byte[])">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>TJTransformer</h4>
+<pre>public&nbsp;TJTransformer(byte[]&nbsp;jpegImage)
+              throws java.lang.Exception</pre>
+<div class="block">Create a TurboJPEG lossless transformer instance and associate the JPEG
+ image stored in <code>jpegImage</code> with the newly created instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>jpegImage</code> - JPEG image buffer (size of the JPEG image is assumed to
+ be the length of the array)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="TJTransformer(byte[], int)">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>TJTransformer</h4>
+<pre>public&nbsp;TJTransformer(byte[]&nbsp;jpegImage,
+             int&nbsp;imageSize)
+              throws java.lang.Exception</pre>
+<div class="block">Create a TurboJPEG lossless transformer instance and associate the JPEG
+ image of length <code>imageSize</code> bytes stored in
+ <code>jpegImage</code> with the newly created instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>jpegImage</code> - JPEG image buffer</dd><dd><code>imageSize</code> - size of the JPEG image (in bytes)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="transform(byte[][], org.libjpegturbo.turbojpeg.TJTransform[], int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>transform</h4>
+<pre>public&nbsp;void&nbsp;transform(byte[][]&nbsp;dstBufs,
+             <a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a>[]&nbsp;transforms,
+             int&nbsp;flags)
+               throws java.lang.Exception</pre>
+<div class="block">Losslessly transform the JPEG image associated with this transformer
+ instance into one or more JPEG images stored in the given destination
+ buffers.  Lossless transforms work by moving the raw coefficients from one
+ JPEG image structure to another without altering the values of the
+ coefficients.  While this is typically faster than decompressing the
+ image, transforming it, and re-compressing it, lossless transforms are not
+ free.  Each lossless transform requires reading and performing Huffman
+ decoding on all of the coefficients in the source image, regardless of the
+ size of the destination image.  Thus, this method provides a means of
+ generating multiple transformed images from the same source or of applying
+ multiple transformations simultaneously, in order to eliminate the need to
+ read the source coefficients multiple times.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>dstBufs</code> - an array of image buffers.  <code>dstbufs[i]</code> will
+ receive a JPEG image that has been transformed using the parameters in
+ <code>transforms[i]</code>.  Use <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSize(int, int, int)"><code>TJ.bufSize(int, int, int)</code></a> to determine the
+ maximum size for each buffer based on the transformed or cropped width and
+ height and the level of subsampling used in the source image.</dd><dd><code>transforms</code> - an array of <a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><code>TJTransform</code></a> instances, each of
+ which specifies the transform parameters and/or cropping region for the
+ corresponding transformed output image</dd><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="transform(org.libjpegturbo.turbojpeg.TJTransform[], int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>transform</h4>
+<pre>public&nbsp;<a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a>[]&nbsp;transform(<a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a>[]&nbsp;transforms,
+                         int&nbsp;flags)
+                           throws java.lang.Exception</pre>
+<div class="block">Losslessly transform the JPEG image associated with this transformer
+ instance and return an array of <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><code>TJDecompressor</code></a> instances, each of
+ which has a transformed JPEG image associated with it.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>transforms</code> - an array of <a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><code>TJTransform</code></a> instances, each of
+ which specifies the transform parameters and/or cropping region for the
+ corresponding transformed output image</dd><dd><code>flags</code> - the bitwise OR of one or more of
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_BOTTOMUP"><code>TJ.FLAG_*</code></a></dd>
+<dt><span class="strong">Returns:</span></dt><dd>an array of <a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><code>TJDecompressor</code></a> instances, each of
+ which has a transformed JPEG image associated with it</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getTransformedSizes()">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>getTransformedSizes</h4>
+<pre>public&nbsp;int[]&nbsp;getTransformedSizes()
+                          throws java.lang.Exception</pre>
+<div class="block">Returns an array containing the sizes of the transformed JPEG images
+ generated by the most recent transform operation.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>an array containing the sizes of the transformed JPEG images
+ generated by the most recent transform operation</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Next Class</span></a></li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/TJTransformer.html" target="_top">Frames</a></li>
+<li><a href="TJTransformer.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li><a href="#fields_inherited_from_class_org.libjpegturbo.turbojpeg.TJDecompressor">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li>Field&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html b/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html
new file mode 100644
index 0000000..13e6160
--- /dev/null
+++ b/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html
@@ -0,0 +1,569 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>YUVImage</title>
+<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="YUVImage";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li>Next Class</li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/YUVImage.html" target="_top">Frames</a></li>
+<li><a href="YUVImage.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li><a href="#field_summary">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li><a href="#field_detail">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<!-- ======== START OF CLASS DATA ======== -->
+<div class="header">
+<div class="subTitle">org.libjpegturbo.turbojpeg</div>
+<h2 title="Class YUVImage" class="title">Class YUVImage</h2>
+</div>
+<div class="contentContainer">
+<ul class="inheritance">
+<li>java.lang.Object</li>
+<li>
+<ul class="inheritance">
+<li>org.libjpegturbo.turbojpeg.YUVImage</li>
+</ul>
+</li>
+</ul>
+<div class="description">
+<ul class="blockList">
+<li class="blockList">
+<hr>
+<br>
+<pre>public class <span class="strong">YUVImage</span>
+extends java.lang.Object</pre>
+<div class="block">This class encapsulates a YUV planar image buffer and the metadata
+ associated with it.  The TurboJPEG API allows both the JPEG compression and
+ decompression pipelines to be split into stages:  YUV encode, compress from
+ YUV, decompress to YUV, and YUV decode.  A <code>YUVImage</code> instance
+ serves as the destination image for YUV encode and decompress-to-YUV
+ operations and as the source image for compress-from-YUV and YUV decode
+ operations.
+ <p>
+ Technically, the JPEG format uses the YCbCr colorspace (which technically is
+ not a "colorspace" but rather a "color transform"), but per the convention
+ of the digital video community, the TurboJPEG API uses "YUV" to refer to an
+ image format consisting of Y, Cb, and Cr image planes.  In this image
+ format, the Y, Cb (U), and Cr (V) planes are stored sequentially in the same
+ image buffer, and the size of each plane is determined by the image width,
+ height, line padding, and level of chrominance subsampling.  If the
+ chrominance components are subsampled along the horizontal dimension, then
+ the width of the luminance plane would be padded to the nearest multiple of
+ 2 (same goes for the height of the luminance plane, if the chrominance
+ components are subsampled along the vertical dimension.)  For instance, if
+ the source image is 35 x 35 pixels and 4:2:2 subsampling is used, then the
+ luminance plane would be 36 x 35 bytes, and each of the chrominance planes
+ would be 18 x 35 bytes.  If you specify, for instance, a line padding of 4
+ bytes on top of this, then the luminance plane would be 36 x 35 bytes, and
+ each of the chrominance planes would be 20 x 35 bytes.</div>
+</li>
+</ul>
+</div>
+<div class="summary">
+<ul class="blockList">
+<li class="blockList">
+<!-- =========== FIELD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field_summary">
+<!--   -->
+</a>
+<h3>Field Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Field Summary table, listing fields, and an explanation">
+<caption><span>Fields</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Field and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>protected long</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#handle">handle</a></strong></code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>protected byte[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#yuvBuf">yuvBuf</a></strong></code>&nbsp;</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#yuvHeight">yuvHeight</a></strong></code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#yuvPad">yuvPad</a></strong></code>&nbsp;</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#yuvSubsamp">yuvSubsamp</a></strong></code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>protected int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#yuvWidth">yuvWidth</a></strong></code>&nbsp;</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ======== CONSTRUCTOR SUMMARY ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_summary">
+<!--   -->
+</a>
+<h3>Constructor Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
+<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colOne" scope="col">Constructor and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#YUVImage(byte[], int, int, int, int)">YUVImage</a></strong>(byte[]&nbsp;yuvImage,
+        int&nbsp;width,
+        int&nbsp;pad,
+        int&nbsp;height,
+        int&nbsp;subsamp)</code>
+<div class="block">Create a <code>YUVImage</code> instance from an existing YUV planar image
+ buffer.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colOne"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#YUVImage(int, int, int, int)">YUVImage</a></strong>(int&nbsp;width,
+        int&nbsp;pad,
+        int&nbsp;height,
+        int&nbsp;subsamp)</code>
+<div class="block">Create a <code>YUVImage</code> instance with a new image buffer.</div>
+</td>
+</tr>
+</table>
+</li>
+</ul>
+<!-- ========== METHOD SUMMARY =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_summary">
+<!--   -->
+</a>
+<h3>Method Summary</h3>
+<table class="overviewSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
+<caption><span>Methods</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Modifier and Type</th>
+<th class="colLast" scope="col">Method and Description</th>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>byte[]</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#getBuf()">getBuf</a></strong>()</code>
+<div class="block">Returns the YUV image buffer</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#getHeight()">getHeight</a></strong>()</code>
+<div class="block">Returns the height of the YUV image.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#getPad()">getPad</a></strong>()</code>
+<div class="block">Returns the line padding used in the YUV image buffer.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#getSize()">getSize</a></strong>()</code>
+<div class="block">Returns the size (in bytes) of the YUV image buffer</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#getSubsamp()">getSubsamp</a></strong>()</code>
+<div class="block">Returns the level of chrominance subsampling used in the YUV image.</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><code>int</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#getWidth()">getWidth</a></strong>()</code>
+<div class="block">Returns the width of the YUV image.</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><code>void</code></td>
+<td class="colLast"><code><strong><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html#setBuf(byte[], int, int, int, int)">setBuf</a></strong>(byte[]&nbsp;yuvImage,
+      int&nbsp;width,
+      int&nbsp;pad,
+      int&nbsp;height,
+      int&nbsp;subsamp)</code>
+<div class="block">Assign an existing YUV planar image buffer to this <code>YUVImage</code>
+ instance.</div>
+</td>
+</tr>
+</table>
+<ul class="blockList">
+<li class="blockList"><a name="methods_inherited_from_class_java.lang.Object">
+<!--   -->
+</a>
+<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
+<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="details">
+<ul class="blockList">
+<li class="blockList">
+<!-- ============ FIELD DETAIL =========== -->
+<ul class="blockList">
+<li class="blockList"><a name="field_detail">
+<!--   -->
+</a>
+<h3>Field Detail</h3>
+<a name="handle">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>handle</h4>
+<pre>protected&nbsp;long handle</pre>
+</li>
+</ul>
+<a name="yuvBuf">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>yuvBuf</h4>
+<pre>protected&nbsp;byte[] yuvBuf</pre>
+</li>
+</ul>
+<a name="yuvPad">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>yuvPad</h4>
+<pre>protected&nbsp;int yuvPad</pre>
+</li>
+</ul>
+<a name="yuvWidth">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>yuvWidth</h4>
+<pre>protected&nbsp;int yuvWidth</pre>
+</li>
+</ul>
+<a name="yuvHeight">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>yuvHeight</h4>
+<pre>protected&nbsp;int yuvHeight</pre>
+</li>
+</ul>
+<a name="yuvSubsamp">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>yuvSubsamp</h4>
+<pre>protected&nbsp;int yuvSubsamp</pre>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ========= CONSTRUCTOR DETAIL ======== -->
+<ul class="blockList">
+<li class="blockList"><a name="constructor_detail">
+<!--   -->
+</a>
+<h3>Constructor Detail</h3>
+<a name="YUVImage(int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>YUVImage</h4>
+<pre>public&nbsp;YUVImage(int&nbsp;width,
+        int&nbsp;pad,
+        int&nbsp;height,
+        int&nbsp;subsamp)
+         throws java.lang.Exception</pre>
+<div class="block">Create a <code>YUVImage</code> instance with a new image buffer.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>width</code> - width (in pixels) of the YUV image</dd><dd><code>pad</code> - Each line of each plane in the YUV image buffer will be padded
+ to this number of bytes (must be a power of 2.)</dd><dd><code>height</code> - height (in pixels) of the YUV image</dd><dd><code>subsamp</code> - the level of chrominance subsampling to be used in the YUV
+ image (one of <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_444"><code>TJ.SAMP_*</code></a>)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="YUVImage(byte[], int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>YUVImage</h4>
+<pre>public&nbsp;YUVImage(byte[]&nbsp;yuvImage,
+        int&nbsp;width,
+        int&nbsp;pad,
+        int&nbsp;height,
+        int&nbsp;subsamp)
+         throws java.lang.Exception</pre>
+<div class="block">Create a <code>YUVImage</code> instance from an existing YUV planar image
+ buffer.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>yuvImage</code> - image buffer that contains or will contain YUV planar
+ image data.  See <a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><code>above</code></a> for a description of the image
+ format.  Use <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><code>TJ.bufSizeYUV(int, int, int, int)</code></a> to determine the minimum size for this
+ buffer.</dd><dd><code>width</code> - width (in pixels) of the YUV image</dd><dd><code>pad</code> - the line padding used in the YUV image buffer.  For
+ instance, if each line in each plane of the buffer is padded to the
+ nearest multiple of 4 bytes, then <code>pad</code> should be set to 4.</dd><dd><code>height</code> - height (in pixels) of the YUV image</dd><dd><code>subsamp</code> - the level of chrominance subsampling used in the YUV
+ image (one of <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_444"><code>TJ.SAMP_*</code></a>)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+<!-- ============ METHOD DETAIL ========== -->
+<ul class="blockList">
+<li class="blockList"><a name="method_detail">
+<!--   -->
+</a>
+<h3>Method Detail</h3>
+<a name="setBuf(byte[], int, int, int, int)">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>setBuf</h4>
+<pre>public&nbsp;void&nbsp;setBuf(byte[]&nbsp;yuvImage,
+          int&nbsp;width,
+          int&nbsp;pad,
+          int&nbsp;height,
+          int&nbsp;subsamp)
+            throws java.lang.Exception</pre>
+<div class="block">Assign an existing YUV planar image buffer to this <code>YUVImage</code>
+ instance.</div>
+<dl><dt><span class="strong">Parameters:</span></dt><dd><code>yuvImage</code> - image buffer that contains or will contain YUV planar
+ image data.  See <a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><code>above</code></a> for a description of the image
+ format.  Use <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><code>TJ.bufSizeYUV(int, int, int, int)</code></a> to determine the minimum size for this
+ buffer.</dd><dd><code>width</code> - width (in pixels) of the YUV image</dd><dd><code>pad</code> - the line padding used in the YUV image buffer.  For
+ instance, if each line in each plane of the buffer is padded to the
+ nearest multiple of 4 bytes, then <code>pad</code> should be set to 4.</dd><dd><code>height</code> - height (in pixels) of the YUV image</dd><dd><code>subsamp</code> - the level of chrominance subsampling used in the YUV
+ image (one of <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_444"><code>TJ.SAMP_*</code></a>)</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getWidth()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getWidth</h4>
+<pre>public&nbsp;int&nbsp;getWidth()
+             throws java.lang.Exception</pre>
+<div class="block">Returns the width of the YUV image.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the width of the YUV image</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getHeight()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getHeight</h4>
+<pre>public&nbsp;int&nbsp;getHeight()
+              throws java.lang.Exception</pre>
+<div class="block">Returns the height of the YUV image.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the height of the YUV image</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getPad()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getPad</h4>
+<pre>public&nbsp;int&nbsp;getPad()
+           throws java.lang.Exception</pre>
+<div class="block">Returns the line padding used in the YUV image buffer.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the line padding used in the YUV image buffer</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getSubsamp()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getSubsamp</h4>
+<pre>public&nbsp;int&nbsp;getSubsamp()
+               throws java.lang.Exception</pre>
+<div class="block">Returns the level of chrominance subsampling used in the YUV image.  See
+ <a href="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_444"><code>TJ.SAMP_*</code></a>.</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the level of chrominance subsampling used in the YUV image</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getBuf()">
+<!--   -->
+</a>
+<ul class="blockList">
+<li class="blockList">
+<h4>getBuf</h4>
+<pre>public&nbsp;byte[]&nbsp;getBuf()
+              throws java.lang.Exception</pre>
+<div class="block">Returns the YUV image buffer</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the YUV image buffer</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+<a name="getSize()">
+<!--   -->
+</a>
+<ul class="blockListLast">
+<li class="blockList">
+<h4>getSize</h4>
+<pre>public&nbsp;int&nbsp;getSize()
+            throws java.lang.Exception</pre>
+<div class="block">Returns the size (in bytes) of the YUV image buffer</div>
+<dl><dt><span class="strong">Returns:</span></dt><dd>the size (in bytes) of the YUV image buffer</dd>
+<dt><span class="strong">Throws:</span></dt>
+<dd><code>java.lang.Exception</code></dd></dl>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+</div>
+<!-- ========= END OF CLASS DATA ========= -->
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li class="navBarCell1Rev">Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li><a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">Prev Class</span></a></li>
+<li>Next Class</li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/YUVImage.html" target="_top">Frames</a></li>
+<li><a href="YUVImage.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<div>
+<ul class="subNavList">
+<li>Summary:&nbsp;</li>
+<li>Nested&nbsp;|&nbsp;</li>
+<li><a href="#field_summary">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_summary">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_summary">Method</a></li>
+</ul>
+<ul class="subNavList">
+<li>Detail:&nbsp;</li>
+<li><a href="#field_detail">Field</a>&nbsp;|&nbsp;</li>
+<li><a href="#constructor_detail">Constr</a>&nbsp;|&nbsp;</li>
+<li><a href="#method_detail">Method</a></li>
+</ul>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/package-frame.html b/java/doc/org/libjpegturbo/turbojpeg/package-frame.html
new file mode 100644
index 0000000..7cb8fa0
--- /dev/null
+++ b/java/doc/org/libjpegturbo/turbojpeg/package-frame.html
@@ -0,0 +1,27 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>org.libjpegturbo.turbojpeg</title>
+<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
+</head>
+<body>
+<h1 class="bar"><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html" target="classFrame">org.libjpegturbo.turbojpeg</a></h1>
+<div class="indexContainer">
+<h2 title="Interfaces">Interfaces</h2>
+<ul title="Interfaces">
+<li><a href="TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg" target="classFrame"><i>TJCustomFilter</i></a></li>
+</ul>
+<h2 title="Classes">Classes</h2>
+<ul title="Classes">
+<li><a href="TJ.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">TJ</a></li>
+<li><a href="TJCompressor.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">TJCompressor</a></li>
+<li><a href="TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">TJDecompressor</a></li>
+<li><a href="TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">TJScalingFactor</a></li>
+<li><a href="TJTransform.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">TJTransform</a></li>
+<li><a href="TJTransformer.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">TJTransformer</a></li>
+<li><a href="YUVImage.html" title="class in org.libjpegturbo.turbojpeg" target="classFrame">YUVImage</a></li>
+</ul>
+</div>
+</body>
+</html>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/package-summary.html b/java/doc/org/libjpegturbo/turbojpeg/package-summary.html
new file mode 100644
index 0000000..c5e2f7d
--- /dev/null
+++ b/java/doc/org/libjpegturbo/turbojpeg/package-summary.html
@@ -0,0 +1,183 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>org.libjpegturbo.turbojpeg</title>
+<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="org.libjpegturbo.turbojpeg";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev Package</li>
+<li>Next Package</li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/package-summary.html" target="_top">Frames</a></li>
+<li><a href="package-summary.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<div class="header">
+<h1 title="Package" class="title">Package&nbsp;org.libjpegturbo.turbojpeg</h1>
+</div>
+<div class="contentContainer">
+<ul class="blockList">
+<li class="blockList">
+<table class="packageSummary" border="0" cellpadding="3" cellspacing="0" summary="Interface Summary table, listing interfaces, and an explanation">
+<caption><span>Interface Summary</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Interface</th>
+<th class="colLast" scope="col">Description</th>
+</tr>
+<tbody>
+<tr class="altColor">
+<td class="colFirst"><a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg">TJCustomFilter</a></td>
+<td class="colLast">
+<div class="block">Custom filter callback interface</div>
+</td>
+</tr>
+</tbody>
+</table>
+</li>
+<li class="blockList">
+<table class="packageSummary" border="0" cellpadding="3" cellspacing="0" summary="Class Summary table, listing classes, and an explanation">
+<caption><span>Class Summary</span><span class="tabEnd">&nbsp;</span></caption>
+<tr>
+<th class="colFirst" scope="col">Class</th>
+<th class="colLast" scope="col">Description</th>
+</tr>
+<tbody>
+<tr class="altColor">
+<td class="colFirst"><a href="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</a></td>
+<td class="colLast">
+<div class="block">TurboJPEG utility class (cannot be instantiated)</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</a></td>
+<td class="colLast">
+<div class="block">TurboJPEG compressor</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</a></td>
+<td class="colLast">
+<div class="block">TurboJPEG decompressor</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg">TJScalingFactor</a></td>
+<td class="colLast">
+<div class="block">Fractional scaling factor</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</a></td>
+<td class="colLast">
+<div class="block">Lossless transform parameters</div>
+</td>
+</tr>
+<tr class="rowColor">
+<td class="colFirst"><a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg">TJTransformer</a></td>
+<td class="colLast">
+<div class="block">TurboJPEG lossless transformer</div>
+</td>
+</tr>
+<tr class="altColor">
+<td class="colFirst"><a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg">YUVImage</a></td>
+<td class="colLast">
+<div class="block">This class encapsulates a YUV planar image buffer and the metadata
+ associated with it.</div>
+</td>
+</tr>
+</tbody>
+</table>
+</li>
+</ul>
+</div>
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li><a href="package-tree.html">Tree</a></li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev Package</li>
+<li>Next Package</li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/package-summary.html" target="_top">Frames</a></li>
+<li><a href="package-summary.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/package-tree.html b/java/doc/org/libjpegturbo/turbojpeg/package-tree.html
new file mode 100644
index 0000000..1033ee5
--- /dev/null
+++ b/java/doc/org/libjpegturbo/turbojpeg/package-tree.html
@@ -0,0 +1,143 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>org.libjpegturbo.turbojpeg Class Hierarchy</title>
+<link rel="stylesheet" type="text/css" href="../../../stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="org.libjpegturbo.turbojpeg Class Hierarchy";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li class="navBarCell1Rev">Tree</li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/package-tree.html" target="_top">Frames</a></li>
+<li><a href="package-tree.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<div class="header">
+<h1 class="title">Hierarchy For Package org.libjpegturbo.turbojpeg</h1>
+</div>
+<div class="contentContainer">
+<h2 title="Class Hierarchy">Class Hierarchy</h2>
+<ul>
+<li type="circle">java.lang.Object
+<ul>
+<li type="circle">java.awt.geom.RectangularShape (implements java.lang.Cloneable, java.awt.Shape)
+<ul>
+<li type="circle">java.awt.geom.Rectangle2D
+<ul>
+<li type="circle">java.awt.Rectangle (implements java.io.Serializable, java.awt.Shape)
+<ul>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="../../../org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJTransform</span></a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJ</span></a></li>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="../../../org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJCompressor</span></a></li>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJDecompressor</span></a>
+<ul>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="../../../org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJTransformer</span></a></li>
+</ul>
+</li>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="../../../org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJScalingFactor</span></a></li>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="../../../org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">YUVImage</span></a></li>
+</ul>
+</li>
+</ul>
+<h2 title="Interface Hierarchy">Interface Hierarchy</h2>
+<ul>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="../../../org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg"><span class="strong">TJCustomFilter</span></a></li>
+</ul>
+</div>
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="../../../org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li class="navBarCell1Rev">Tree</li>
+<li><a href="../../../deprecated-list.html">Deprecated</a></li>
+<li><a href="../../../index-all.html">Index</a></li>
+<li><a href="../../../help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="../../../index.html?org/libjpegturbo/turbojpeg/package-tree.html" target="_top">Frames</a></li>
+<li><a href="package-tree.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="../../../allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/overview-tree.html b/java/doc/overview-tree.html
new file mode 100644
index 0000000..eae18a1
--- /dev/null
+++ b/java/doc/overview-tree.html
@@ -0,0 +1,147 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>Class Hierarchy</title>
+<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="Class Hierarchy";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li class="navBarCell1Rev">Tree</li>
+<li><a href="deprecated-list.html">Deprecated</a></li>
+<li><a href="index-all.html">Index</a></li>
+<li><a href="help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="index.html?overview-tree.html" target="_top">Frames</a></li>
+<li><a href="overview-tree.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<div class="header">
+<h1 class="title">Hierarchy For All Packages</h1>
+<span class="strong">Package Hierarchies:</span>
+<ul class="horizontal">
+<li><a href="org/libjpegturbo/turbojpeg/package-tree.html">org.libjpegturbo.turbojpeg</a></li>
+</ul>
+</div>
+<div class="contentContainer">
+<h2 title="Class Hierarchy">Class Hierarchy</h2>
+<ul>
+<li type="circle">java.lang.Object
+<ul>
+<li type="circle">java.awt.geom.RectangularShape (implements java.lang.Cloneable, java.awt.Shape)
+<ul>
+<li type="circle">java.awt.geom.Rectangle2D
+<ul>
+<li type="circle">java.awt.Rectangle (implements java.io.Serializable, java.awt.Shape)
+<ul>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJTransform</span></a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJ</span></a></li>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJCompressor</span></a></li>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJDecompressor</span></a>
+<ul>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="org/libjpegturbo/turbojpeg/TJTransformer.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJTransformer</span></a></li>
+</ul>
+</li>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="org/libjpegturbo/turbojpeg/TJScalingFactor.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">TJScalingFactor</span></a></li>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="org/libjpegturbo/turbojpeg/YUVImage.html" title="class in org.libjpegturbo.turbojpeg"><span class="strong">YUVImage</span></a></li>
+</ul>
+</li>
+</ul>
+<h2 title="Interface Hierarchy">Interface Hierarchy</h2>
+<ul>
+<li type="circle">org.libjpegturbo.turbojpeg.<a href="org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg"><span class="strong">TJCustomFilter</span></a></li>
+</ul>
+</div>
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li class="navBarCell1Rev">Tree</li>
+<li><a href="deprecated-list.html">Deprecated</a></li>
+<li><a href="index-all.html">Index</a></li>
+<li><a href="help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="index.html?overview-tree.html" target="_top">Frames</a></li>
+<li><a href="overview-tree.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/package-list b/java/doc/package-list
new file mode 100644
index 0000000..918d936
--- /dev/null
+++ b/java/doc/package-list
@@ -0,0 +1 @@
+org.libjpegturbo.turbojpeg
diff --git a/java/doc/resources/background.gif b/java/doc/resources/background.gif
new file mode 100644
index 0000000..f471940
--- /dev/null
+++ b/java/doc/resources/background.gif
Binary files differ
diff --git a/java/doc/resources/tab.gif b/java/doc/resources/tab.gif
new file mode 100644
index 0000000..1a73a83
--- /dev/null
+++ b/java/doc/resources/tab.gif
Binary files differ
diff --git a/java/doc/resources/titlebar.gif b/java/doc/resources/titlebar.gif
new file mode 100644
index 0000000..17443b3
--- /dev/null
+++ b/java/doc/resources/titlebar.gif
Binary files differ
diff --git a/java/doc/resources/titlebar_end.gif b/java/doc/resources/titlebar_end.gif
new file mode 100644
index 0000000..3ad78d4
--- /dev/null
+++ b/java/doc/resources/titlebar_end.gif
Binary files differ
diff --git a/java/doc/serialized-form.html b/java/doc/serialized-form.html
new file mode 100644
index 0000000..bbe1805
--- /dev/null
+++ b/java/doc/serialized-form.html
@@ -0,0 +1,150 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<!-- NewPage -->
+<html lang="en">
+<head>
+<title>Serialized Form</title>
+<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
+</head>
+<body>
+<script type="text/javascript"><!--
+    if (location.href.indexOf('is-external=true') == -1) {
+        parent.document.title="Serialized Form";
+    }
+//-->
+</script>
+<noscript>
+<div>JavaScript is disabled on your browser.</div>
+</noscript>
+<!-- ========= START OF TOP NAVBAR ======= -->
+<div class="topNav"><a name="navbar_top">
+<!--   -->
+</a><a href="#skip-navbar_top" title="Skip navigation links"></a><a name="navbar_top_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li><a href="org/libjpegturbo/turbojpeg/package-tree.html">Tree</a></li>
+<li><a href="deprecated-list.html">Deprecated</a></li>
+<li><a href="index-all.html">Index</a></li>
+<li><a href="help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="index.html?serialized-form.html" target="_top">Frames</a></li>
+<li><a href="serialized-form.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_top">
+<li><a href="allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_top");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_top">
+<!--   -->
+</a></div>
+<!-- ========= END OF TOP NAVBAR ========= -->
+<div class="header">
+<h1 title="Serialized Form" class="title">Serialized Form</h1>
+</div>
+<div class="serializedFormContainer">
+<ul class="blockList">
+<li class="blockList">
+<h2 title="Package">Package&nbsp;org.libjpegturbo.turbojpeg</h2>
+<ul class="blockList">
+<li class="blockList"><a name="org.libjpegturbo.turbojpeg.TJTransform">
+<!--   -->
+</a>
+<h3>Class <a href="org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">org.libjpegturbo.turbojpeg.TJTransform</a> extends java.awt.Rectangle implements Serializable</h3>
+<dl class="nameValue">
+<dt>serialVersionUID:</dt>
+<dd>-127367705761430371L</dd>
+</dl>
+<ul class="blockList">
+<li class="blockList"><a name="serializedForm">
+<!--   -->
+</a>
+<h3>Serialized Fields</h3>
+<ul class="blockList">
+<li class="blockList">
+<h4>op</h4>
+<pre>int op</pre>
+<div class="block">Transform operation (one of <code>OP_*</code>)</div>
+</li>
+<li class="blockList">
+<h4>options</h4>
+<pre>int options</pre>
+<div class="block">Transform options (bitwise OR of one or more of <code>OPT_*</code>)</div>
+</li>
+<li class="blockListLast">
+<h4>cf</h4>
+<pre><a href="org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg">TJCustomFilter</a> cf</pre>
+<div class="block">Custom filter instance</div>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<!-- ======= START OF BOTTOM NAVBAR ====== -->
+<div class="bottomNav"><a name="navbar_bottom">
+<!--   -->
+</a><a href="#skip-navbar_bottom" title="Skip navigation links"></a><a name="navbar_bottom_firstrow">
+<!--   -->
+</a>
+<ul class="navList" title="Navigation">
+<li><a href="org/libjpegturbo/turbojpeg/package-summary.html">Package</a></li>
+<li>Class</li>
+<li><a href="org/libjpegturbo/turbojpeg/package-tree.html">Tree</a></li>
+<li><a href="deprecated-list.html">Deprecated</a></li>
+<li><a href="index-all.html">Index</a></li>
+<li><a href="help-doc.html">Help</a></li>
+</ul>
+</div>
+<div class="subNav">
+<ul class="navList">
+<li>Prev</li>
+<li>Next</li>
+</ul>
+<ul class="navList">
+<li><a href="index.html?serialized-form.html" target="_top">Frames</a></li>
+<li><a href="serialized-form.html" target="_top">No Frames</a></li>
+</ul>
+<ul class="navList" id="allclasses_navbar_bottom">
+<li><a href="allclasses-noframe.html">All Classes</a></li>
+</ul>
+<div>
+<script type="text/javascript"><!--
+  allClassesLink = document.getElementById("allclasses_navbar_bottom");
+  if(window==top) {
+    allClassesLink.style.display = "block";
+  }
+  else {
+    allClassesLink.style.display = "none";
+  }
+  //-->
+</script>
+</div>
+<a name="skip-navbar_bottom">
+<!--   -->
+</a></div>
+<!-- ======== END OF BOTTOM NAVBAR ======= -->
+</body>
+</html>
diff --git a/java/doc/stylesheet.css b/java/doc/stylesheet.css
new file mode 100644
index 0000000..0aeaa97
--- /dev/null
+++ b/java/doc/stylesheet.css
@@ -0,0 +1,474 @@
+/* Javadoc style sheet */
+/*
+Overall document style
+*/
+body {
+    background-color:#ffffff;
+    color:#353833;
+    font-family:Arial, Helvetica, sans-serif;
+    font-size:76%;
+    margin:0;
+}
+a:link, a:visited {
+    text-decoration:none;
+    color:#4c6b87;
+}
+a:hover, a:focus {
+    text-decoration:none;
+    color:#bb7a2a;
+}
+a:active {
+    text-decoration:none;
+    color:#4c6b87;
+}
+a[name] {
+    color:#353833;
+}
+a[name]:hover {
+    text-decoration:none;
+    color:#353833;
+}
+pre {
+    font-size:1.3em;
+}
+h1 {
+    font-size:1.8em;
+}
+h2 {
+    font-size:1.5em;
+}
+h3 {
+    font-size:1.4em;
+}
+h4 {
+    font-size:1.3em;
+}
+h5 {
+    font-size:1.2em;
+}
+h6 {
+    font-size:1.1em;
+}
+ul {
+    list-style-type:disc;
+}
+code, tt {
+    font-size:1.2em;
+}
+dt code {
+    font-size:1.2em;
+}
+table tr td dt code {
+    font-size:1.2em;
+    vertical-align:top;
+}
+sup {
+    font-size:.6em;
+}
+/*
+Document title and Copyright styles
+*/
+.clear {
+    clear:both;
+    height:0px;
+    overflow:hidden;
+}
+.aboutLanguage {
+    float:right;
+    padding:0px 21px;
+    font-size:.8em;
+    z-index:200;
+    margin-top:-7px;
+}
+.legalCopy {
+    margin-left:.5em;
+}
+.bar a, .bar a:link, .bar a:visited, .bar a:active {
+    color:#FFFFFF;
+    text-decoration:none;
+}
+.bar a:hover, .bar a:focus {
+    color:#bb7a2a;
+}
+.tab {
+    background-color:#0066FF;
+    background-image:url(resources/titlebar.gif);
+    background-position:left top;
+    background-repeat:no-repeat;
+    color:#ffffff;
+    padding:8px;
+    width:5em;
+    font-weight:bold;
+}
+/*
+Navigation bar styles
+*/
+.bar {
+    background-image:url(resources/background.gif);
+    background-repeat:repeat-x;
+    color:#FFFFFF;
+    padding:.8em .5em .4em .8em;
+    height:auto;/*height:1.8em;*/
+    font-size:1em;
+    margin:0;
+}
+.topNav {
+    background-image:url(resources/background.gif);
+    background-repeat:repeat-x;
+    color:#FFFFFF;
+    float:left;
+    padding:0;
+    width:100%;
+    clear:right;
+    height:2.8em;
+    padding-top:10px;
+    overflow:hidden;
+}
+.bottomNav {
+    margin-top:10px;
+    background-image:url(resources/background.gif);
+    background-repeat:repeat-x;
+    color:#FFFFFF;
+    float:left;
+    padding:0;
+    width:100%;
+    clear:right;
+    height:2.8em;
+    padding-top:10px;
+    overflow:hidden;
+}
+.subNav {
+    background-color:#dee3e9;
+    border-bottom:1px solid #9eadc0;
+    float:left;
+    width:100%;
+    overflow:hidden;
+}
+.subNav div {
+    clear:left;
+    float:left;
+    padding:0 0 5px 6px;
+}
+ul.navList, ul.subNavList {
+    float:left;
+    margin:0 25px 0 0;
+    padding:0;
+}
+ul.navList li{
+    list-style:none;
+    float:left;
+    padding:3px 6px;
+}
+ul.subNavList li{
+    list-style:none;
+    float:left;
+    font-size:90%;
+}
+.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited {
+    color:#FFFFFF;
+    text-decoration:none;
+}
+.topNav a:hover, .bottomNav a:hover {
+    text-decoration:none;
+    color:#bb7a2a;
+}
+.navBarCell1Rev {
+    background-image:url(resources/tab.gif);
+    background-color:#a88834;
+    color:#FFFFFF;
+    margin: auto 5px;
+    border:1px solid #c9aa44;
+}
+/*
+Page header and footer styles
+*/
+.header, .footer {
+    clear:both;
+    margin:0 20px;
+    padding:5px 0 0 0;
+}
+.indexHeader {
+    margin:10px;
+    position:relative;
+}
+.indexHeader h1 {
+    font-size:1.3em;
+}
+.title {
+    color:#2c4557;
+    margin:10px 0;
+}
+.subTitle {
+    margin:5px 0 0 0;
+}
+.header ul {
+    margin:0 0 25px 0;
+    padding:0;
+}
+.footer ul {
+    margin:20px 0 5px 0;
+}
+.header ul li, .footer ul li {
+    list-style:none;
+    font-size:1.2em;
+}
+/*
+Heading styles
+*/
+div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 {
+    background-color:#dee3e9;
+    border-top:1px solid #9eadc0;
+    border-bottom:1px solid #9eadc0;
+    margin:0 0 6px -8px;
+    padding:2px 5px;
+}
+ul.blockList ul.blockList ul.blockList li.blockList h3 {
+    background-color:#dee3e9;
+    border-top:1px solid #9eadc0;
+    border-bottom:1px solid #9eadc0;
+    margin:0 0 6px -8px;
+    padding:2px 5px;
+}
+ul.blockList ul.blockList li.blockList h3 {
+    padding:0;
+    margin:15px 0;
+}
+ul.blockList li.blockList h2 {
+    padding:0px 0 20px 0;
+}
+/*
+Page layout container styles
+*/
+.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer {
+    clear:both;
+    padding:10px 20px;
+    position:relative;
+}
+.indexContainer {
+    margin:10px;
+    position:relative;
+    font-size:1.0em;
+}
+.indexContainer h2 {
+    font-size:1.1em;
+    padding:0 0 3px 0;
+}
+.indexContainer ul {
+    margin:0;
+    padding:0;
+}
+.indexContainer ul li {
+    list-style:none;
+}
+.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt {
+    font-size:1.1em;
+    font-weight:bold;
+    margin:10px 0 0 0;
+    color:#4E4E4E;
+}
+.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd {
+    margin:10px 0 10px 20px;
+}
+.serializedFormContainer dl.nameValue dt {
+    margin-left:1px;
+    font-size:1.1em;
+    display:inline;
+    font-weight:bold;
+}
+.serializedFormContainer dl.nameValue dd {
+    margin:0 0 0 1px;
+    font-size:1.1em;
+    display:inline;
+}
+/*
+List styles
+*/
+ul.horizontal li {
+    display:inline;
+    font-size:0.9em;
+}
+ul.inheritance {
+    margin:0;
+    padding:0;
+}
+ul.inheritance li {
+    display:inline;
+    list-style:none;
+}
+ul.inheritance li ul.inheritance {
+    margin-left:15px;
+    padding-left:15px;
+    padding-top:1px;
+}
+ul.blockList, ul.blockListLast {
+    margin:10px 0 10px 0;
+    padding:0;
+}
+ul.blockList li.blockList, ul.blockListLast li.blockList {
+    list-style:none;
+    margin-bottom:25px;
+}
+ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList {
+    padding:0px 20px 5px 10px;
+    border:1px solid #9eadc0;
+    background-color:#f9f9f9;
+}
+ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList {
+    padding:0 0 5px 8px;
+    background-color:#ffffff;
+    border:1px solid #9eadc0;
+    border-top:none;
+}
+ul.blockList ul.blockList ul.blockList ul.blockList li.blockList {
+    margin-left:0;
+    padding-left:0;
+    padding-bottom:15px;
+    border:none;
+    border-bottom:1px solid #9eadc0;
+}
+ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast {
+    list-style:none;
+    border-bottom:none;
+    padding-bottom:0;
+}
+table tr td dl, table tr td dl dt, table tr td dl dd {
+    margin-top:0;
+    margin-bottom:1px;
+}
+/*
+Table styles
+*/
+.contentContainer table, .classUseContainer table, .constantValuesContainer table {
+    border-bottom:1px solid #9eadc0;
+    width:100%;
+}
+.contentContainer ul li table, .classUseContainer ul li table, .constantValuesContainer ul li table {
+    width:100%;
+}
+.contentContainer .description table, .contentContainer .details table {
+    border-bottom:none;
+}
+.contentContainer ul li table th.colOne, .contentContainer ul li table th.colFirst, .contentContainer ul li table th.colLast, .classUseContainer ul li table th, .constantValuesContainer ul li table th, .contentContainer ul li table td.colOne, .contentContainer ul li table td.colFirst, .contentContainer ul li table td.colLast, .classUseContainer ul li table td, .constantValuesContainer ul li table td{
+    vertical-align:top;
+    padding-right:20px;
+}
+.contentContainer ul li table th.colLast, .classUseContainer ul li table th.colLast,.constantValuesContainer ul li table th.colLast,
+.contentContainer ul li table td.colLast, .classUseContainer ul li table td.colLast,.constantValuesContainer ul li table td.colLast,
+.contentContainer ul li table th.colOne, .classUseContainer ul li table th.colOne,
+.contentContainer ul li table td.colOne, .classUseContainer ul li table td.colOne {
+    padding-right:3px;
+}
+.overviewSummary caption, .packageSummary caption, .contentContainer ul.blockList li.blockList caption, .summary caption, .classUseContainer caption, .constantValuesContainer caption {
+    position:relative;
+    text-align:left;
+    background-repeat:no-repeat;
+    color:#FFFFFF;
+    font-weight:bold;
+    clear:none;
+    overflow:hidden;
+    padding:0px;
+    margin:0px;
+}
+caption a:link, caption a:hover, caption a:active, caption a:visited {
+    color:#FFFFFF;
+}
+.overviewSummary caption span, .packageSummary caption span, .contentContainer ul.blockList li.blockList caption span, .summary caption span, .classUseContainer caption span, .constantValuesContainer caption span {
+    white-space:nowrap;
+    padding-top:8px;
+    padding-left:8px;
+    display:block;
+    float:left;
+    background-image:url(resources/titlebar.gif);
+    height:18px;
+}
+.overviewSummary .tabEnd, .packageSummary .tabEnd, .contentContainer ul.blockList li.blockList .tabEnd, .summary .tabEnd, .classUseContainer .tabEnd, .constantValuesContainer .tabEnd {
+    width:10px;
+    background-image:url(resources/titlebar_end.gif);
+    background-repeat:no-repeat;
+    background-position:top right;
+    position:relative;
+    float:left;
+}
+ul.blockList ul.blockList li.blockList table {
+    margin:0 0 12px 0px;
+    width:100%;
+}
+.tableSubHeadingColor {
+    background-color: #EEEEFF;
+}
+.altColor {
+    background-color:#eeeeef;
+}
+.rowColor {
+    background-color:#ffffff;
+}
+.overviewSummary td, .packageSummary td, .contentContainer ul.blockList li.blockList td, .summary td, .classUseContainer td, .constantValuesContainer td {
+    text-align:left;
+    padding:3px 3px 3px 7px;
+}
+th.colFirst, th.colLast, th.colOne, .constantValuesContainer th {
+    background:#dee3e9;
+    border-top:1px solid #9eadc0;
+    border-bottom:1px solid #9eadc0;
+    text-align:left;
+    padding:3px 3px 3px 7px;
+}
+td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover {
+    font-weight:bold;
+}
+td.colFirst, th.colFirst {
+    border-left:1px solid #9eadc0;
+    white-space:nowrap;
+}
+td.colLast, th.colLast {
+    border-right:1px solid #9eadc0;
+}
+td.colOne, th.colOne {
+    border-right:1px solid #9eadc0;
+    border-left:1px solid #9eadc0;
+}
+table.overviewSummary  {
+    padding:0px;
+    margin-left:0px;
+}
+table.overviewSummary td.colFirst, table.overviewSummary th.colFirst,
+table.overviewSummary td.colOne, table.overviewSummary th.colOne {
+    width:25%;
+    vertical-align:middle;
+}
+table.packageSummary td.colFirst, table.overviewSummary th.colFirst {
+    width:25%;
+    vertical-align:middle;
+}
+/*
+Content styles
+*/
+.description pre {
+    margin-top:0;
+}
+.deprecatedContent {
+    margin:0;
+    padding:10px 0;
+}
+.docSummary {
+    padding:0;
+}
+/*
+Formatting effect styles
+*/
+.sourceLineNo {
+    color:green;
+    padding:0 30px 0 0;
+}
+h1.hidden {
+    visibility:hidden;
+    overflow:hidden;
+    font-size:.9em;
+}
+.block {
+    display:block;
+    margin:3px 0 0 0;
+}
+.strong {
+    font-weight:bold;
+}
diff --git a/java/org/libjpegturbo/turbojpeg/TJ.java b/java/org/libjpegturbo/turbojpeg/TJ.java
new file mode 100644
index 0000000..ac4a4dd
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJ.java
@@ -0,0 +1,454 @@
+/*
+ * Copyright (C)2011-2013 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+/**
+ * TurboJPEG utility class (cannot be instantiated)
+ */
+public final class TJ {
+
+
+  /**
+   * The number of chrominance subsampling options
+   */
+  public static final int NUMSAMP   = 6;
+  /**
+   * 4:4:4 chrominance subsampling (no chrominance subsampling).  The JPEG
+   * or YUV image will contain one chrominance component for every pixel in the
+   * source image.
+   */
+  public static final int SAMP_444  = 0;
+  /**
+   * 4:2:2 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 2x1 block of pixels in the source image.
+   */
+  public static final int SAMP_422  = 1;
+  /**
+   * 4:2:0 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 2x2 block of pixels in the source image.
+   */
+  public static final int SAMP_420  = 2;
+  /**
+   * Grayscale.  The JPEG or YUV image will contain no chrominance components.
+   */
+  public static final int SAMP_GRAY = 3;
+  /**
+   * 4:4:0 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 1x2 block of pixels in the source image.
+   * Note that 4:4:0 subsampling is not fully accelerated in libjpeg-turbo.
+   */
+  public static final int SAMP_440  = 4;
+  /**
+   * 4:1:1 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 4x1 block of pixels in the source image.
+   * JPEG images compressed with 4:1:1 subsampling will be almost exactly the
+   * same size as those compressed with 4:2:0 subsampling, and in the
+   * aggregate, both subsampling methods produce approximately the same
+   * perceptual quality.  However, 4:1:1 is better able to reproduce sharp
+   * horizontal features.  Note that 4:1:1 subsampling is not fully accelerated
+   * in libjpeg-turbo.
+   */
+  public static final int SAMP_411  = 5;
+
+
+  /**
+   * Returns the MCU block width for the given level of chrominance
+   * subsampling.
+   *
+   * @param subsamp the level of chrominance subsampling (one of
+   * <code>SAMP_*</code>)
+   *
+   * @return the MCU block width for the given level of chrominance subsampling
+   */
+  public static int getMCUWidth(int subsamp) throws Exception {
+    if (subsamp < 0 || subsamp >= NUMSAMP)
+      throw new Exception("Invalid subsampling type");
+    return mcuWidth[subsamp];
+  }
+
+  private static final int[] mcuWidth = {
+    8, 16, 16, 8, 8, 32
+  };
+
+
+  /**
+   * Returns the MCU block height for the given level of chrominance
+   * subsampling.
+   *
+   * @param subsamp the level of chrominance subsampling (one of
+   * <code>SAMP_*</code>)
+   *
+   * @return the MCU block height for the given level of chrominance
+   * subsampling
+   */
+  public static int getMCUHeight(int subsamp) throws Exception {
+    if (subsamp < 0 || subsamp >= NUMSAMP)
+      throw new Exception("Invalid subsampling type");
+    return mcuHeight[subsamp];
+  }
+
+  private static final int[] mcuHeight = {
+    8, 8, 16, 8, 16, 8
+  };
+
+
+  /**
+   * The number of pixel formats
+   */
+  public static final int NUMPF   = 12;
+  /**
+   * RGB pixel format.  The red, green, and blue components in the image are
+   * stored in 3-byte pixels in the order R, G, B from lowest to highest byte
+   * address within each pixel.
+   */
+  public static final int PF_RGB  = 0;
+  /**
+   * BGR pixel format.  The red, green, and blue components in the image are
+   * stored in 3-byte pixels in the order B, G, R from lowest to highest byte
+   * address within each pixel.
+   */
+  public static final int PF_BGR  = 1;
+  /**
+   * RGBX pixel format.  The red, green, and blue components in the image are
+   * stored in 4-byte pixels in the order R, G, B from lowest to highest byte
+   * address within each pixel.  The X component is ignored when compressing
+   * and undefined when decompressing.
+   */
+  public static final int PF_RGBX = 2;
+  /**
+   * BGRX pixel format.  The red, green, and blue components in the image are
+   * stored in 4-byte pixels in the order B, G, R from lowest to highest byte
+   * address within each pixel.  The X component is ignored when compressing
+   * and undefined when decompressing.
+   */
+  public static final int PF_BGRX = 3;
+  /**
+   * XBGR pixel format.  The red, green, and blue components in the image are
+   * stored in 4-byte pixels in the order R, G, B from highest to lowest byte
+   * address within each pixel.  The X component is ignored when compressing
+   * and undefined when decompressing.
+   */
+  public static final int PF_XBGR = 4;
+  /**
+   * XRGB pixel format.  The red, green, and blue components in the image are
+   * stored in 4-byte pixels in the order B, G, R from highest to lowest byte
+   * address within each pixel.  The X component is ignored when compressing
+   * and undefined when decompressing.
+   */
+  public static final int PF_XRGB = 5;
+  /**
+   * Grayscale pixel format.  Each 1-byte pixel represents a luminance
+   * (brightness) level from 0 to 255.
+   */
+  public static final int PF_GRAY = 6;
+  /**
+   * RGBA pixel format.  This is the same as {@link #PF_RGBX}, except that when
+   * decompressing, the X byte is guaranteed to be 0xFF, which can be
+   * interpreted as an opaque alpha channel.
+   */
+  public static final int PF_RGBA = 7;
+  /**
+   * BGRA pixel format.  This is the same as {@link #PF_BGRX}, except that when
+   * decompressing, the X byte is guaranteed to be 0xFF, which can be
+   * interpreted as an opaque alpha channel.
+   */
+  public static final int PF_BGRA = 8;
+  /**
+   * ABGR pixel format.  This is the same as {@link #PF_XBGR}, except that when
+   * decompressing, the X byte is guaranteed to be 0xFF, which can be
+   * interpreted as an opaque alpha channel.
+   */
+  public static final int PF_ABGR = 9;
+  /**
+   * ARGB pixel format.  This is the same as {@link #PF_XRGB}, except that when
+   * decompressing, the X byte is guaranteed to be 0xFF, which can be
+   * interpreted as an opaque alpha channel.
+   */
+  public static final int PF_ARGB = 10;
+  /**
+   * CMYK pixel format.  Unlike RGB, which is an additive color model used
+   * primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive
+   * color model used primarily for printing.  In the CMYK color model, the
+   * value of each color component typically corresponds to an amount of cyan,
+   * magenta, yellow, or black ink that is applied to a white background.  In
+   * order to convert between CMYK and RGB, it is necessary to use a color
+   * management system (CMS.)  A CMS will attempt to map colors within the
+   * printer's gamut to perceptually similar colors in the display's gamut and
+   * vice versa, but the mapping is typically not 1:1 or reversible, nor can it
+   * be defined with a simple formula.  Thus, such a conversion is out of scope
+   * for a codec library.  However, the TurboJPEG API allows for compressing
+   * CMYK pixels into a YCCK JPEG image (see {@link #CS_YCCK}) and
+   * decompressing YCCK JPEG images into CMYK pixels.
+   */
+  public static final int PF_CMYK = 11;
+
+
+  /**
+   * Returns the pixel size (in bytes) for the given pixel format.
+   *
+   * @param pixelFormat the pixel format (one of <code>PF_*</code>)
+   *
+   * @return the pixel size (in bytes) for the given pixel format
+   */
+  public static int getPixelSize(int pixelFormat) throws Exception {
+    if (pixelFormat < 0 || pixelFormat >= NUMPF)
+      throw new Exception("Invalid pixel format");
+    return pixelSize[pixelFormat];
+  }
+
+  private static final int[] pixelSize = {
+    3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4
+  };
+
+
+  /**
+   * For the given pixel format, returns the number of bytes that the red
+   * component is offset from the start of the pixel.  For instance, if a pixel
+   * of format <code>TJ.PF_BGRX</code> is stored in <code>char pixel[]</code>,
+   * then the red component will be
+   * <code>pixel[TJ.getRedOffset(TJ.PF_BGRX)]</code>.
+   *
+   * @param pixelFormat the pixel format (one of <code>PF_*</code>)
+   *
+   * @return the red offset for the given pixel format
+   */
+  public static int getRedOffset(int pixelFormat) throws Exception {
+    if (pixelFormat < 0 || pixelFormat >= NUMPF)
+      throw new Exception("Invalid pixel format");
+    return redOffset[pixelFormat];
+  }
+
+  private static final int[] redOffset = {
+    0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1, -1
+  };
+
+
+  /**
+   * For the given pixel format, returns the number of bytes that the green
+   * component is offset from the start of the pixel.  For instance, if a pixel
+   * of format <code>TJ.PF_BGRX</code> is stored in <code>char pixel[]</code>,
+   * then the green component will be
+   * <code>pixel[TJ.getGreenOffset(TJ.PF_BGRX)]</code>.
+   *
+   * @param pixelFormat the pixel format (one of <code>PF_*</code>)
+   *
+   * @return the green offset for the given pixel format
+   */
+  public static int getGreenOffset(int pixelFormat) throws Exception {
+    if (pixelFormat < 0 || pixelFormat >= NUMPF)
+      throw new Exception("Invalid pixel format");
+    return greenOffset[pixelFormat];
+  }
+
+  private static final int[] greenOffset = {
+    1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2, -1
+  };
+
+
+  /**
+   * For the given pixel format, returns the number of bytes that the blue
+   * component is offset from the start of the pixel.  For instance, if a pixel
+   * of format <code>TJ.PF_BGRX</code> is stored in <code>char pixel[]</code>,
+   * then the blue component will be
+   * <code>pixel[TJ.getBlueOffset(TJ.PF_BGRX)]</code>.
+   *
+   * @param pixelFormat the pixel format (one of <code>PF_*</code>)
+   *
+   * @return the blue offset for the given pixel format
+   */
+  public static int getBlueOffset(int pixelFormat) throws Exception {
+    if (pixelFormat < 0 || pixelFormat >= NUMPF)
+      throw new Exception("Invalid pixel format");
+    return blueOffset[pixelFormat];
+  }
+
+  private static final int[] blueOffset = {
+    2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3, -1
+  };
+
+
+  /**
+   * The number of JPEG colorspaces
+   */
+  public static final int NUMCS = 5;
+  /**
+   * RGB colorspace.  When compressing the JPEG image, the R, G, and B
+   * components in the source image are reordered into image planes, but no
+   * colorspace conversion or subsampling is performed.  RGB JPEG images can be
+   * decompressed to any of the extended RGB pixel formats or grayscale, but
+   * they cannot be decompressed to YUV images.
+   */
+  public static final int CS_RGB = 0;
+  /**
+   * YCbCr colorspace.  YCbCr is not an absolute colorspace but rather a
+   * mathematical transformation of RGB designed solely for storage and
+   * transmission.  YCbCr images must be converted to RGB before they can
+   * actually be displayed.  In the YCbCr colorspace, the Y (luminance)
+   * component represents the black & white portion of the original image, and
+   * the Cb and Cr (chrominance) components represent the color portion of the
+   * original image.  Originally, the analog equivalent of this transformation
+   * allowed the same signal to drive both black & white and color televisions,
+   * but JPEG images use YCbCr primarily because it allows the color data to be
+   * optionally subsampled for the purposes of reducing bandwidth or disk
+   * space.  YCbCr is the most common JPEG colorspace, and YCbCr JPEG images
+   * can be compressed from and decompressed to any of the extended RGB pixel
+   * formats or grayscale, or they can be decompressed to YUV planar images.
+   */
+  public static final int CS_YCbCr = 1;
+  /**
+   * Grayscale colorspace.  The JPEG image retains only the luminance data (Y
+   * component), and any color data from the source image is discarded.
+   * Grayscale JPEG images can be compressed from and decompressed to any of
+   * the extended RGB pixel formats or grayscale, or they can be decompressed
+   * to YUV planar images.
+   */
+  public static final int CS_GRAY = 2;
+  /**
+   * CMYK colorspace.  When compressing the JPEG image, the C, M, Y, and K
+   * components in the source image are reordered into image planes, but no
+   * colorspace conversion or subsampling is performed.  CMYK JPEG images can
+   * only be decompressed to CMYK pixels.
+   */
+  public static final int CS_CMYK = 3;
+  /**
+   * YCCK colorspace.  YCCK (AKA "YCbCrK") is not an absolute colorspace but
+   * rather a mathematical transformation of CMYK designed solely for storage
+   * and transmission.  It is to CMYK as YCbCr is to RGB.  CMYK pixels can be
+   * reversibly transformed into YCCK, and as with YCbCr, the chrominance
+   * components in the YCCK pixels can be subsampled without incurring major
+   * perceptual loss.  YCCK JPEG images can only be compressed from and
+   * decompressed to CMYK pixels.
+   */
+  public static final int CS_YCCK = 4;
+
+
+  /**
+   * The uncompressed source/destination image is stored in bottom-up (Windows,
+   * OpenGL) order, not top-down (X11) order.
+   */
+  public static final int FLAG_BOTTOMUP     = 2;
+
+  @Deprecated
+  public static final int FLAG_FORCEMMX     = 8;
+  @Deprecated
+  public static final int FLAG_FORCESSE     = 16;
+  @Deprecated
+  public static final int FLAG_FORCESSE2    = 32;
+  @Deprecated
+  public static final int FLAG_FORCESSE3    = 128;
+
+  /**
+   * When decompressing an image that was compressed using chrominance
+   * subsampling, use the fastest chrominance upsampling algorithm available in
+   * the underlying codec.  The default is to use smooth upsampling, which
+   * creates a smooth transition between neighboring chrominance components in
+   * order to reduce upsampling artifacts in the decompressed image.
+   */
+  public static final int FLAG_FASTUPSAMPLE = 256;
+  /**
+   * Use the fastest DCT/IDCT algorithm available in the underlying codec.  The
+   * default if this flag is not specified is implementation-specific.  For
+   * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast
+   * algorithm by default when compressing, because this has been shown to have
+   * only a very slight effect on accuracy, but it uses the accurate algorithm
+   * when decompressing, because this has been shown to have a larger effect.
+   */
+  public static final int FLAG_FASTDCT      =  2048;
+  /**
+   * Use the most accurate DCT/IDCT algorithm available in the underlying
+   * codec.  The default if this flag is not specified is
+   * implementation-specific.  For example, the implementation of TurboJPEG for
+   * libjpeg[-turbo] uses the fast algorithm by default when compressing,
+   * because this has been shown to have only a very slight effect on accuracy,
+   * but it uses the accurate algorithm when decompressing, because this has
+   * been shown to have a larger effect.
+   */
+  public static final int FLAG_ACCURATEDCT  =  4096;
+
+
+  /**
+   * Returns the maximum size of the buffer (in bytes) required to hold a JPEG
+   * image with the given width, height, and level of chrominance subsampling.
+   *
+   * @param width the width (in pixels) of the JPEG image
+   *
+   * @param height the height (in pixels) of the JPEG image
+   *
+   * @param jpegSubsamp the level of chrominance subsampling to be used when
+   * generating the JPEG image (one of {@link TJ TJ.SAMP_*})
+   *
+   * @return the maximum size of the buffer (in bytes) required to hold a JPEG
+   * image with the given width, height, and level of chrominance subsampling
+   */
+  public static native int bufSize(int width, int height, int jpegSubsamp)
+    throws Exception;
+
+  /**
+   * Returns the size of the buffer (in bytes) required to hold a YUV planar
+   * image with the given width, height, and level of chrominance subsampling.
+   *
+   * @param width the width (in pixels) of the YUV image
+   *
+   * @param pad the width of each line in each plane of the image is padded to
+   *        the nearest multiple of this number of bytes (must be a power of
+   *        2.)
+   *
+   * @param height the height (in pixels) of the YUV image
+   *
+   * @param subsamp the level of chrominance subsampling used in the YUV
+   * image (one of {@link TJ TJ.SAMP_*})
+   *
+   * @return the size of the buffer (in bytes) required to hold a YUV planar
+   * image with the given width, height, and level of chrominance subsampling
+   */
+  public static native int bufSizeYUV(int width, int pad, int height,
+                                      int subsamp)
+    throws Exception;
+
+  /**
+   * @deprecated Use {@link #bufSizeYUV(int, int, int, int)} instead.
+   */
+  @Deprecated
+  public static native int bufSizeYUV(int width, int height, int subsamp)
+    throws Exception;
+
+  /**
+   * Returns a list of fractional scaling factors that the JPEG decompressor in
+   * this implementation of TurboJPEG supports.
+   *
+   * @return a list of fractional scaling factors that the JPEG decompressor in
+   * this implementation of TurboJPEG supports
+   */
+  public static native TJScalingFactor[] getScalingFactors()
+    throws Exception;
+
+  static {
+    TJLoader.load();
+  }
+};
diff --git a/java/org/libjpegturbo/turbojpeg/TJCompressor.java b/java/org/libjpegturbo/turbojpeg/TJCompressor.java
new file mode 100644
index 0000000..0debf53
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJCompressor.java
@@ -0,0 +1,611 @@
+/*
+ * Copyright (C)2011-2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+import java.awt.image.*;
+import java.nio.*;
+
+/**
+ * TurboJPEG compressor
+ */
+public class TJCompressor {
+
+  private static final String NO_ASSOC_ERROR =
+    "No source image is associated with this instance";
+
+  /**
+   * Create a TurboJPEG compressor instance.
+   */
+  public TJCompressor() throws Exception {
+    init();
+  }
+
+  /**
+   * Create a TurboJPEG compressor instance and associate the uncompressed
+   * source image stored in <code>srcImage</code> with the newly created
+   * instance.
+   *
+   * @param srcImage see {@link #setSourceImage} for description
+   *
+   * @param x see {@link #setSourceImage} for description
+   *
+   * @param y see {@link #setSourceImage} for description
+   *
+   * @param width see {@link #setSourceImage} for description
+   *
+   * @param pitch see {@link #setSourceImage} for description
+   *
+   * @param height see {@link #setSourceImage} for description
+   *
+   * @param pixelFormat pixel format of the source image (one of
+   * {@link TJ#PF_RGB TJ.PF_*})
+   */
+  public TJCompressor(byte[] srcImage, int x, int y, int width, int pitch,
+                      int height, int pixelFormat) throws Exception {
+    setSourceImage(srcImage, x, y, width, pitch, height, pixelFormat);
+  }
+
+  /**
+   * @deprecated Use
+   * {@link #TJCompressor(byte[], int, int, int, int, int, int)} instead.
+   */
+  @Deprecated
+  public TJCompressor(byte[] srcImage, int width, int pitch, int height,
+                      int pixelFormat) throws Exception {
+    setSourceImage(srcImage, width, pitch, height, pixelFormat);
+  }
+
+  /**
+   * Create a TurboJPEG compressor instance and associate the uncompressed
+   * source image stored in <code>srcImage</code> with the newly created
+   * instance.
+   *
+   * @param srcImage see
+   * {@link #setSourceImage(BufferedImage, int, int, int, int)} for description
+   *
+   * @param x see
+   * {@link #setSourceImage(BufferedImage, int, int, int, int)} for description
+   *
+   * @param y see
+   * {@link #setSourceImage(BufferedImage, int, int, int, int)} for description
+   *
+   * @param width see
+   * {@link #setSourceImage(BufferedImage, int, int, int, int)} for description
+   *
+   * @param height see
+   * {@link #setSourceImage(BufferedImage, int, int, int, int)} for description
+   */
+  public TJCompressor(BufferedImage srcImage, int x, int y, int width,
+                      int height) throws Exception {
+    setSourceImage(srcImage, x, y, width, height);
+  }
+
+  /**
+   * Associate an uncompressed RGB, grayscale, or CMYK source image with this
+   * compressor instance.
+   *
+   * @param srcImage image buffer containing RGB, grayscale, or CMYK pixels to
+   * be compressed or encoded
+   *
+   * @param x x offset (in pixels) of the region in the source image from which
+   * the JPEG or YUV image should be compressed/encoded
+   *
+   * @param y y offset (in pixels) of the region in the source image from which
+   * the JPEG or YUV image should be compressed/encoded
+   *
+   * @param width width (in pixels) of the region in the source image from
+   * which the JPEG or YUV image should be compressed/encoded
+   *
+   * @param pitch bytes per line of the source image.  Normally, this should be
+   * <code>width * TJ.pixelSize(pixelFormat)</code> if the source image is
+   * unpadded, but you can use this parameter to, for instance, specify that
+   * the scanlines in the source image are padded to a 4-byte boundary or to
+   * compress/encode a JPEG or YUV image from a region of a larger source
+   * image.  You can also be clever and use this parameter to skip lines, etc.
+   * Setting this parameter to 0 is the equivalent of setting it to
+   * <code>width * TJ.pixelSize(pixelFormat)</code>.
+   *
+   * @param height height (in pixels) of the region in the source image from
+   * which the JPEG or YUV image should be compressed/encoded
+   *
+   * @param pixelFormat pixel format of the source image (one of
+   * {@link TJ#PF_RGB TJ.PF_*})
+   */
+  public void setSourceImage(byte[] srcImage, int x, int y, int width,
+                             int pitch, int height, int pixelFormat)
+                             throws Exception {
+    if (handle == 0) init();
+    if (srcImage == null || x < 0 || y < 0 || width < 1 || height < 1 ||
+        pitch < 0 || pixelFormat < 0 || pixelFormat >= TJ.NUMPF)
+      throw new Exception("Invalid argument in setSourceImage()");
+    srcBuf = srcImage;
+    srcWidth = width;
+    if (pitch == 0)
+      srcPitch = width * TJ.getPixelSize(pixelFormat);
+    else
+      srcPitch = pitch;
+    srcHeight = height;
+    srcPixelFormat = pixelFormat;
+    srcX = x;
+    srcY = y;
+    srcBufInt = null;
+    srcYUVImage = null;
+  }
+
+  /**
+   * @deprecated Use
+   * {@link #setSourceImage(byte[], int, int, int, int, int, int)} instead.
+   */
+  @Deprecated
+  public void setSourceImage(byte[] srcImage, int width, int pitch,
+                             int height, int pixelFormat) throws Exception {
+    setSourceImage(srcImage, 0, 0, width, pitch, height, pixelFormat);
+    srcX = srcY = -1;
+  }
+
+  /**
+   * Associate an uncompressed RGB or grayscale source image with this
+   * compressor instance.
+   *
+   * @param srcImage a <code>BufferedImage</code> instance containing RGB or
+   * grayscale pixels to be compressed or encoded
+   *
+   * @param x x offset (in pixels) of the region in the source image from which
+   * the JPEG or YUV image should be compressed/encoded
+   *
+   * @param y y offset (in pixels) of the region in the source image from which
+   * the JPEG or YUV image should be compressed/encoded
+   *
+   * @param width width (in pixels) of the region in the source image from
+   * which the JPEG or YUV image should be compressed/encoded (0 = use the
+   * width of the source image)
+   *
+   * @param height height (in pixels) of the region in the source image from
+   * which the JPEG or YUV image should be compressed/encoded (0 = use the
+   * height of the source image)
+   */
+  public void setSourceImage(BufferedImage srcImage, int x, int y, int width,
+                             int height) throws Exception {
+    if (handle == 0) init();
+    if (srcImage == null || x < 0 || y < 0 || width < 0 || height < 0)
+      throw new Exception("Invalid argument in setSourceImage()");
+    srcX = x;
+    srcY = y;
+    srcWidth = (width == 0) ? srcImage.getWidth(): width;
+    srcHeight = (height == 0) ? srcImage.getHeight() : height;
+    if (x + width > srcImage.getWidth() || y + height > srcImage.getHeight())
+      throw new Exception("Compression region exceeds the bounds of the source image");
+
+    int pixelFormat;
+    boolean intPixels = false;
+    if (byteOrder == null)
+      byteOrder = ByteOrder.nativeOrder();
+    switch(srcImage.getType()) {
+      case BufferedImage.TYPE_3BYTE_BGR:
+        pixelFormat = TJ.PF_BGR;  break;
+      case BufferedImage.TYPE_4BYTE_ABGR:
+      case BufferedImage.TYPE_4BYTE_ABGR_PRE:
+        pixelFormat = TJ.PF_XBGR;  break;
+      case BufferedImage.TYPE_BYTE_GRAY:
+        pixelFormat = TJ.PF_GRAY;  break;
+      case BufferedImage.TYPE_INT_BGR:
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
+          pixelFormat = TJ.PF_XBGR;
+        else
+          pixelFormat = TJ.PF_RGBX;
+        intPixels = true;  break;
+      case BufferedImage.TYPE_INT_RGB:
+      case BufferedImage.TYPE_INT_ARGB:
+      case BufferedImage.TYPE_INT_ARGB_PRE:
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
+          pixelFormat = TJ.PF_XRGB;
+        else
+          pixelFormat = TJ.PF_BGRX;
+        intPixels = true;  break;
+      default:
+        throw new Exception("Unsupported BufferedImage format");
+    }
+    srcPixelFormat = pixelFormat;
+
+    WritableRaster wr = srcImage.getRaster();
+    if (intPixels) {
+      SinglePixelPackedSampleModel sm =
+        (SinglePixelPackedSampleModel)srcImage.getSampleModel();
+      srcStride = sm.getScanlineStride();
+      DataBufferInt db = (DataBufferInt)wr.getDataBuffer();
+      srcBufInt = db.getData();
+      srcBuf = null;
+    } else {
+      ComponentSampleModel sm =
+        (ComponentSampleModel)srcImage.getSampleModel();
+      int pixelSize = sm.getPixelStride();
+      if (pixelSize != TJ.getPixelSize(pixelFormat))
+        throw new Exception("Inconsistency between pixel format and pixel size in BufferedImage");
+      srcPitch = sm.getScanlineStride();
+      DataBufferByte db = (DataBufferByte)wr.getDataBuffer();
+      srcBuf = db.getData();
+      srcBufInt = null;
+    }
+    srcYUVImage = null;
+  }
+
+  /**
+   * Associate an uncompressed YUV planar source image with this compressor
+   * instance.
+   *
+   * @param srcImage YUV planar image to be compressed
+   */
+  public void setSourceImage(YUVImage srcImage) throws Exception {
+    if (handle == 0) init();
+    if (srcImage == null)
+      throw new Exception("Invalid argument in setSourceImage()");
+    srcYUVImage = srcImage;
+    srcBuf = null;
+    srcBufInt = null;
+  }
+
+  /**
+   * Set the level of chrominance subsampling for subsequent compress/encode
+   * operations.  When pixels are converted from RGB to YCbCr (see
+   * {@link TJ#CS_YCbCr}) or from CMYK to YCCK (see {@link TJ#CS_YCCK}) as part
+   * of the JPEG compression process, some of the Cb and Cr (chrominance)
+   * components can be discarded or averaged together to produce a smaller
+   * image with little perceptible loss of image clarity (the human eye is more
+   * sensitive to small changes in brightness than to small changes in color.)
+   * This is called "chrominance subsampling".
+   * <p>
+   * NOTE: This method has no effect when compressing a JPEG image from a YUV
+   * planar source.  In that case, the level of chrominance subsampling in
+   * the JPEG image is determined by the source.  Further, this method has no
+   * effect when encoding to a pre-allocated {@link YUVImage} instance.  In
+   * that case, the level of chrominance subsampling is determined by the
+   * destination.
+   *
+   * @param newSubsamp the level of chrominance subsampling to use in
+   * subsequent compress/encode oeprations (one of
+   * {@link TJ#SAMP_444 TJ.SAMP_*})
+   */
+  public void setSubsamp(int newSubsamp) throws Exception {
+    if (newSubsamp < 0 || newSubsamp >= TJ.NUMSAMP)
+      throw new Exception("Invalid argument in setSubsamp()");
+    subsamp = newSubsamp;
+  }
+
+  /**
+   * Set the JPEG image quality level for subsequent compress operations.
+   *
+   * @param quality the new JPEG image quality level (1 to 100, 1 = worst,
+   * 100 = best)
+   */
+  public void setJPEGQuality(int quality) throws Exception {
+    if (quality < 1 || quality > 100)
+      throw new Exception("Invalid argument in setJPEGQuality()");
+    jpegQuality = quality;
+  }
+
+  /**
+   * Compress the uncompressed source image associated with this compressor
+   * instance and output a JPEG image to the given destination buffer.
+   *
+   * @param dstBuf buffer that will receive the JPEG image.  Use
+   * {@link TJ#bufSize} to determine the maximum size for this buffer based on
+   * the source image's width and height and the desired level of chrominance
+   * subsampling.
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   */
+  public void compress(byte[] dstBuf, int flags) throws Exception {
+    if (dstBuf == null || flags < 0)
+      throw new Exception("Invalid argument in compress()");
+    if (srcBuf == null && srcBufInt == null && srcYUVImage == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (jpegQuality < 0)
+      throw new Exception("JPEG Quality not set");
+    if (subsamp < 0 && srcYUVImage == null)
+      throw new Exception("Subsampling level not set");
+
+    if (srcYUVImage != null)
+      compressedSize = compressFromYUV(srcYUVImage.getBuf(),
+                                       srcYUVImage.getWidth(),
+                                       srcYUVImage.getPad(),
+                                       srcYUVImage.getHeight(),
+                                       srcYUVImage.getSubsamp(),
+                                       dstBuf, jpegQuality, flags);
+    else if (srcBuf != null) {
+      if (srcX >= 0 && srcY >= 0)
+        compressedSize = compress(srcBuf, srcX, srcY, srcWidth, srcPitch,
+                                  srcHeight, srcPixelFormat, dstBuf, subsamp,
+                                  jpegQuality, flags);
+      else
+        compressedSize = compress(srcBuf, srcWidth, srcPitch, srcHeight,
+                                  srcPixelFormat, dstBuf, subsamp, jpegQuality,
+                                  flags);
+    } else if (srcBufInt != null) {
+      if (srcX >= 0 && srcY >= 0)
+        compressedSize = compress(srcBufInt, srcX, srcY, srcWidth, srcStride,
+                                  srcHeight, srcPixelFormat, dstBuf, subsamp,
+                                  jpegQuality, flags);
+      else
+        compressedSize = compress(srcBufInt, srcWidth, srcStride, srcHeight,
+                                  srcPixelFormat, dstBuf, subsamp, jpegQuality,
+                                  flags);
+    }
+  }
+
+  /**
+   * Compress the uncompressed source image associated with this compressor
+   * instance and return a buffer containing a JPEG image.
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   *
+   * @return a buffer containing a JPEG image.  The length of this buffer will
+   * not be equal to the size of the JPEG image.  Use {@link
+   * #getCompressedSize} to obtain the size of the JPEG image.
+   */
+  public byte[] compress(int flags) throws Exception {
+    if (srcWidth < 1 || srcHeight < 1)
+      throw new Exception(NO_ASSOC_ERROR);
+    byte[] buf = new byte[TJ.bufSize(srcWidth, srcHeight, subsamp)];
+    compress(buf, flags);
+    return buf;
+  }
+
+  /**
+   * @deprecated Use
+   * {@link #setSourceImage(BufferedImage, int, int, int, int)} and
+   * {@link #compress(byte[], int)} instead.
+   */
+  @Deprecated
+  public void compress(BufferedImage srcImage, byte[] dstBuf, int flags)
+                       throws Exception {
+    setSourceImage(srcImage, 0, 0, 0, 0);
+    compress(dstBuf, flags);
+  }
+
+  /**
+   * @deprecated Use
+   * {@link #setSourceImage(BufferedImage, int, int, int, int)} and
+   * {@link #compress(int)} instead.
+   */
+  @Deprecated
+  public byte[] compress(BufferedImage srcImage, int flags) throws Exception {
+    setSourceImage(srcImage, 0, 0, 0, 0);
+    return compress(flags);
+  }
+
+  /**
+   * Encode the uncompressed source image associated with this compressor
+   * instance into a YUV planar image and store it in the given
+   * <code>YUVImage</code> instance.   This method uses the accelerated color
+   * conversion routines in TurboJPEG's underlying codec but does not execute
+   * any of the other steps in the JPEG compression process.  Encoding
+   * CMYK source images to YUV is not supported.
+   *
+   * @param dstImage {@link YUVImage} instance that will receive the YUV planar
+   * image
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   */
+  public void encodeYUV(YUVImage dstImage, int flags) throws Exception {
+    if (dstImage == null || flags < 0)
+      throw new Exception("Invalid argument in encodeYUV()");
+    if (srcBuf == null && srcBufInt == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (srcYUVImage != null)
+      throw new Exception("Source image is not correct type");
+    if (subsamp < 0)
+      throw new Exception("Subsampling level not set");
+    if (srcWidth != dstImage.getWidth() || srcHeight != dstImage.getHeight())
+      throw new Exception("Destination image is the wrong size");
+
+    if (srcBufInt != null) {
+      encodeYUV(srcBufInt, srcX, srcY, srcWidth, srcStride, srcHeight,
+                srcPixelFormat, dstImage.getBuf(), dstImage.getPad(),
+                dstImage.getSubsamp(), flags);
+    } else {
+      encodeYUV(srcBuf, srcX, srcY, srcWidth, srcPitch, srcHeight,
+                srcPixelFormat, dstImage.getBuf(), dstImage.getPad(),
+                dstImage.getSubsamp(), flags);
+    }
+    compressedSize = dstImage.getSize();
+  }
+
+  /**
+   * @deprecated Use {@link #encodeYUV(YUVImage, int)} instead.
+   */
+  @Deprecated
+  public void encodeYUV(byte[] dstBuf, int flags) throws Exception {
+    if(dstBuf == null)
+      throw new Exception("Invalid argument in encodeYUV()");
+    if (srcWidth < 1 || srcHeight < 1)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (subsamp < 0)
+      throw new Exception("Subsampling level not set");
+    YUVImage yuvImage = new YUVImage(dstBuf, srcWidth, 4, srcHeight, subsamp);
+    encodeYUV(yuvImage, flags);
+  }
+
+  /**
+   * Encode the uncompressed source image associated with this compressor
+   * instance into a YUV planar image and return a <code>YUVImage</code>
+   * instance containing the encoded image.  This method uses the accelerated
+   * color conversion routines in TurboJPEG's underlying codec but does not
+   * execute any of the other steps in the JPEG compression process.  Encoding
+   * CMYK source images to YUV is not supported.
+   *
+   * @param pad the width of each line in each plane of the YUV image will be
+   * padded to the nearest multiple of this number of bytes (must be a power of
+   * 2.)
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   *
+   * @return a YUV planar image
+   */
+  public YUVImage encodeYUV(int pad, int flags) throws Exception {
+    if (srcWidth < 1 || srcHeight < 1)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (subsamp < 0)
+      throw new Exception("Subsampling level not set");
+    if(pad < 1 || ((pad & (pad - 1)) != 0))
+      throw new Exception("Invalid argument in encodeYUV()");
+    YUVImage yuvImage = new YUVImage(srcWidth, pad, srcHeight, subsamp);
+    encodeYUV(yuvImage, flags);
+    return yuvImage;
+  }
+
+  /**
+   * @deprecated Use {@link #encodeYUV(int, int)} instead.
+   */
+  @Deprecated
+  public byte[] encodeYUV(int flags) throws Exception {
+    if (srcWidth < 1 || srcHeight < 1)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (subsamp < 0)
+      throw new Exception("Subsampling level not set");
+    YUVImage yuvImage = new YUVImage(srcWidth, 4, srcHeight, subsamp);
+    encodeYUV(yuvImage, flags);
+    return yuvImage.getBuf();
+  }
+
+  /**
+   * @deprecated Use
+   * {@link #setSourceImage(BufferedImage, int, int, int, int)} and
+   * {@link #encodeYUV(byte[], int)} instead.
+   */
+  @Deprecated
+  public void encodeYUV(BufferedImage srcImage, byte[] dstBuf, int flags)
+    throws Exception {
+    setSourceImage(srcImage, 0, 0, 0, 0);
+    encodeYUV(dstBuf, flags);
+  }
+
+  /**
+   * @deprecated Use
+   * {@link #setSourceImage(BufferedImage, int, int, int, int)} and
+   * {@link #encodeYUV(int)} instead.
+   */
+  @Deprecated
+  public byte[] encodeYUV(BufferedImage srcImage, int flags) throws Exception {
+    setSourceImage(srcImage, 0, 0, 0, 0);
+    return encodeYUV(flags);
+  }
+
+  /**
+   * Returns the size of the image (in bytes) generated by the most recent
+   * compress/encode operation.
+   *
+   * @return the size of the image (in bytes) generated by the most recent
+   * compress/encode operation
+   */
+  public int getCompressedSize() {
+    return compressedSize;
+  }
+
+  /**
+   * Free the native structures associated with this compressor instance.
+   */
+  public void close() throws Exception {
+    destroy();
+  }
+
+  protected void finalize() throws Throwable {
+    try {
+      close();
+    } catch(Exception e) {
+    } finally {
+      super.finalize();
+    }
+  };
+
+  private native void init() throws Exception;
+
+  private native void destroy() throws Exception;
+
+  // JPEG size in bytes is returned
+  private native int compress(byte[] srcBuf, int width, int pitch,
+    int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp, int jpegQual,
+    int flags) throws Exception; // deprecated
+
+  private native int compress(byte[] srcBuf, int x, int y, int width,
+    int pitch, int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp,
+    int jpegQual, int flags) throws Exception;
+
+  private native int compress(int[] srcBuf, int width, int stride,
+    int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp, int jpegQual,
+    int flags) throws Exception; // deprecated
+
+  private native int compress(int[] srcBuf, int x, int y, int width,
+    int stride, int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp,
+    int jpegQual, int flags) throws Exception;
+
+  private native int compressFromYUV(byte[] srcBuf, int width, int pad,
+    int height, int subsamp, byte[] dstBuf, int jpegQual, int flags)
+    throws Exception;
+
+  private native void encodeYUV(byte[] srcBuf, int width, int pitch,
+    int height, int pixelFormat, byte[] dstBuf, int subsamp, int flags)
+    throws Exception; // deprecated
+
+  private native void encodeYUV(byte[] srcBuf, int x, int y, int width,
+    int pitch, int height, int pixelFormat, byte[] dstBuf, int pad,
+    int subsamp, int flags) throws Exception;
+
+  private native void encodeYUV(int[] srcBuf, int width, int stride,
+    int height, int pixelFormat, byte[] dstBuf, int subsamp, int flags)
+    throws Exception; // deprecated
+
+  private native void encodeYUV(int[] srcBuf, int x, int y, int width,
+    int pitch, int height, int pixelFormat, byte[] dstBuf, int pad,
+    int subsamp, int flags) throws Exception;
+
+  static {
+    TJLoader.load();
+  }
+
+  private long handle = 0;
+  private byte[] srcBuf = null;
+  private int[] srcBufInt = null;
+  private int srcWidth = 0;
+  private int srcHeight = 0;
+  private int srcX = -1;
+  private int srcY = -1;
+  private int srcPitch = 0;
+  private int srcStride = 0;
+  private int srcPixelFormat = -1;
+  private YUVImage srcYUVImage = null;
+  private int subsamp = -1;
+  private int jpegQuality = -1;
+  private int compressedSize = 0;
+  private int yuvPad = 4;
+  private ByteOrder byteOrder = null;
+};
diff --git a/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java b/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java
new file mode 100644
index 0000000..bf78f2e
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C)2011, 2013 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+import java.awt.*;
+import java.nio.*;
+
+/**
+ * Custom filter callback interface
+ */
+public interface TJCustomFilter {
+
+  /**
+   * A callback function that can be used to modify the DCT coefficients after
+   * they are losslessly transformed but before they are transcoded to a new
+   * JPEG image.  This allows for custom filters or other transformations to be
+   * applied in the frequency domain.
+   *
+   * @param coeffBuffer a buffer containing transformed DCT coefficients.
+   * (NOTE: this buffer is not guaranteed to be valid once the callback
+   * returns, so applications wishing to hand off the DCT coefficients to
+   * another function or library should make a copy of them within the body of
+   * the callback.)
+   *
+   * @param bufferRegion rectangle containing the width and height of
+   * <code>coeffBuffer</code> as well as its offset relative to the component
+   * plane.  TurboJPEG implementations may choose to split each component plane
+   * into multiple DCT coefficient buffers and call the callback function once
+   * for each buffer.
+   *
+   * @param planeRegion rectangle containing the width and height of the
+   * component plane to which <code>coeffBuffer</code> belongs
+   *
+   * @param componentID ID number of the component plane to which
+   * <code>coeffBuffer</code> belongs (Y, Cb, and Cr have, respectively, ID's
+   * of 0, 1, and 2 in typical JPEG images.)
+   *
+   * @param transformID ID number of the transformed image to which
+   * <code>coeffBuffer</code> belongs.  This is the same as the index of the
+   * transform in the <code>transforms</code> array that was passed to {@link
+   * TJTransformer#transform TJTransformer.transform()}.
+   *
+   * @param transform a {@link TJTransform} instance that specifies the
+   * parameters and/or cropping region for this transform
+   */
+  void customFilter(ShortBuffer coeffBuffer, Rectangle bufferRegion,
+                    Rectangle planeRegion, int componentID, int transformID,
+                    TJTransform transform)
+    throws Exception;
+}
diff --git a/java/org/libjpegturbo/turbojpeg/TJDecompressor.java b/java/org/libjpegturbo/turbojpeg/TJDecompressor.java
new file mode 100644
index 0000000..8305721
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJDecompressor.java
@@ -0,0 +1,863 @@
+/*
+ * Copyright (C)2011-2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+import java.awt.image.*;
+import java.nio.*;
+
+/**
+ * TurboJPEG decompressor
+ */
+public class TJDecompressor {
+
+  private static final String NO_ASSOC_ERROR =
+    "No source image is associated with this instance";
+
+  /**
+   * Create a TurboJPEG decompresssor instance.
+   */
+  public TJDecompressor() throws Exception {
+    init();
+  }
+
+  /**
+   * Create a TurboJPEG decompressor instance and associate the JPEG source
+   * image stored in <code>jpegImage</code> with the newly created instance.
+   *
+   * @param jpegImage JPEG image buffer (size of the JPEG image is assumed to
+   * be the length of the array)
+   */
+  public TJDecompressor(byte[] jpegImage) throws Exception {
+    init();
+    setSourceImage(jpegImage, jpegImage.length);
+  }
+
+  /**
+   * Create a TurboJPEG decompressor instance and associate the JPEG source
+   * image of length <code>imageSize</code> bytes stored in
+   * <code>jpegImage</code> with the newly created instance.
+   *
+   * @param jpegImage JPEG image buffer
+   *
+   * @param imageSize size of the JPEG image (in bytes)
+   */
+  public TJDecompressor(byte[] jpegImage, int imageSize) throws Exception {
+    init();
+    setSourceImage(jpegImage, imageSize);
+  }
+
+  /**
+   * Create a TurboJPEG decompressor instance and associate the YUV planar
+   * source image stored in <code>yuvImage</code> with the newly created
+   * instance.
+   *
+   * @param yuvImage {@link YUVImage} instance containing a YUV planar
+   * image to be decoded
+   */
+  public TJDecompressor(YUVImage yuvImage) throws Exception {
+    init();
+    setSourceImage(yuvImage);
+  }
+
+  /**
+   * Associate the JPEG image of length <code>imageSize</code> bytes stored in
+   * <code>srcImage</code> with this decompressor instance.  This image will
+   * be used as the source image for subsequent decompress operations.
+   *
+   * @param srcImage JPEG image buffer
+   *
+   * @param imageSize size of the JPEG image (in bytes)
+   */
+  public void setSourceImage(byte[] srcImage, int imageSize)
+    throws Exception {
+    if (srcImage == null || imageSize < 1)
+      throw new Exception("Invalid argument in setSourceImage()");
+    jpegBuf = srcImage;
+    jpegBufSize = imageSize;
+    decompressHeader(jpegBuf, jpegBufSize);
+    yuvImage = null;
+  }
+
+  /**
+   * @deprecated Use {@link #setSourceImage(byte[], int)} instead.
+   */
+  @Deprecated
+  public void setJPEGImage(byte[] jpegImage, int imageSize) throws Exception {
+    setSourceImage(jpegImage, imageSize);
+  }
+
+  /**
+   * Associate the specified YUV planar source image with this decompressor
+   * instance.  Subsequent decompress operations will decode this image into an
+   * RGB or grayscale destination image.
+   *
+   * @param srcImage {@link YUVImage} instance containing a YUV planar image to
+   * be decoded
+   */
+  public void setSourceImage(YUVImage srcImage) throws Exception {
+    if (srcImage == null)
+      throw new Exception("Invalid argument in setSourceImage()");
+    yuvImage = srcImage;
+    jpegBuf = null;
+    jpegBufSize = 0;
+  }
+
+
+  /**
+   * Returns the width of the source image (JPEG or YUV) associated with this
+   * decompressor instance.
+   *
+   * @return the width of the source image (JPEG or YUV) associated with this
+   * decompressor instance
+   */
+  public int getWidth() throws Exception {
+    if (yuvImage != null)
+      return yuvImage.getWidth();
+    if (srcWidth < 1)
+      throw new Exception(NO_ASSOC_ERROR);
+    return srcWidth;
+  }
+
+  /**
+   * Returns the height of the source image (JPEG or YUV) associated with this
+   * decompressor instance.
+   *
+   * @return the height of the source image (JPEG or YUV) associated with this
+   * decompressor instance
+   */
+  public int getHeight() throws Exception {
+    if (yuvImage != null)
+      return yuvImage.getHeight();
+    if (srcHeight < 1)
+      throw new Exception(NO_ASSOC_ERROR);
+    return srcHeight;
+  }
+
+  /**
+   * Returns the level of chrominance subsampling used in the source image
+   * (JPEG or YUV) associated with this decompressor instance.  See
+   * {@link TJ#SAMP_444 TJ.SAMP_*}.
+   *
+   * @return the level of chrominance subsampling used in the source image
+   * (JPEG or YUV) associated with this decompressor instance
+   */
+  public int getSubsamp() throws Exception {
+    if (yuvImage != null)
+      return yuvImage.getSubsamp();
+    if (srcSubsamp < 0)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (srcSubsamp >= TJ.NUMSAMP)
+      throw new Exception("JPEG header information is invalid");
+    return srcSubsamp;
+  }
+
+  /**
+   * Returns the colorspace used in the source image (JPEG or YUV) associated
+   * with this decompressor instance.  See {@link TJ#CS_RGB TJ.CS_*}.  If the
+   * source image is YUV, then this always returns {@link TJ#CS_YCbCr}.
+   *
+   * @return the colorspace used in the source image (JPEG or YUV) associated
+   * with this decompressor instance
+   */
+  public int getColorspace() throws Exception {
+    if (yuvImage != null)
+      return TJ.CS_YCbCr;
+    if (srcColorspace < 0)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (srcColorspace >= TJ.NUMCS)
+      throw new Exception("JPEG header information is invalid");
+    return srcColorspace;
+  }
+
+  /**
+   * Returns the source image buffer associated with this decompressor
+   * instance.
+   *
+   * @return the source image buffer associated with this decompressor instance
+   */
+  public byte[] getSourceBuf() throws Exception {
+    if (yuvImage != null)
+      return yuvImage.getBuf();
+    if (jpegBuf == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    return jpegBuf;
+  }
+
+  /**
+   * @deprecated Use {@link #getSourceBuf} instead.
+   */
+  @Deprecated
+  public byte[] getJPEGBuf() throws Exception {
+    if (jpegBuf == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    return jpegBuf;
+  }
+
+  /**
+   * Returns the size of the source image (in bytes) associated with this
+   * decompressor instance.
+   *
+   * @return the size of the source image (in bytes) associated with this
+   * decompressor instance
+   */
+  public int getSourceSize() throws Exception {
+    if (yuvImage != null)
+      return yuvImage.getSize();
+    if (jpegBufSize < 1)
+      throw new Exception(NO_ASSOC_ERROR);
+    return jpegBufSize;
+  }
+
+  /**
+   * @deprecated Use {@link #getSourceSize} instead.
+   */
+  @Deprecated
+  public int getJPEGSize() throws Exception {
+    if (jpegBufSize < 1)
+      throw new Exception(NO_ASSOC_ERROR);
+    return jpegBufSize;
+  }
+
+  /**
+   * Returns the width of the largest scaled-down image that the TurboJPEG
+   * decompressor can generate without exceeding the desired image width and
+   * height.
+   *
+   * @param desiredWidth desired width (in pixels) of the decompressed image.
+   * Setting this to 0 is the same as setting it to the width of the JPEG image
+   * (in other words, the width will not be considered when determining the
+   * scaled image size.)
+   *
+   * @param desiredHeight desired height (in pixels) of the decompressed image.
+   * Setting this to 0 is the same as setting it to the height of the JPEG
+   * image (in other words, the height will not be considered when determining
+   * the scaled image size.)
+   *
+   * @return the width of the largest scaled-down image that the TurboJPEG
+   * decompressor can generate without exceeding the desired image width and
+   * height
+   */
+  public int getScaledWidth(int desiredWidth, int desiredHeight)
+                            throws Exception {
+    if (srcWidth < 1 || srcHeight < 1)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (desiredWidth < 0 || desiredHeight < 0)
+      throw new Exception("Invalid argument in getScaledWidth()");
+    TJScalingFactor[] sf = TJ.getScalingFactors();
+    if (desiredWidth == 0)
+      desiredWidth = srcWidth;
+    if (desiredHeight == 0)
+      desiredHeight = srcHeight;
+    int scaledWidth = srcWidth, scaledHeight = srcHeight;
+    for (int i = 0; i < sf.length; i++) {
+      scaledWidth = sf[i].getScaled(srcWidth);
+      scaledHeight = sf[i].getScaled(srcHeight);
+      if (scaledWidth <= desiredWidth && scaledHeight <= desiredHeight)
+        break;
+    }
+    if (scaledWidth > desiredWidth || scaledHeight > desiredHeight)
+      throw new Exception("Could not scale down to desired image dimensions");
+    return scaledWidth;
+  }
+
+  /**
+   * Returns the height of the largest scaled-down image that the TurboJPEG
+   * decompressor can generate without exceeding the desired image width and
+   * height.
+   *
+   * @param desiredWidth desired width (in pixels) of the decompressed image.
+   * Setting this to 0 is the same as setting it to the width of the JPEG image
+   * (in other words, the width will not be considered when determining the
+   * scaled image size.)
+   *
+   * @param desiredHeight desired height (in pixels) of the decompressed image.
+   * Setting this to 0 is the same as setting it to the height of the JPEG
+   * image (in other words, the height will not be considered when determining
+   * the scaled image size.)
+   *
+   * @return the height of the largest scaled-down image that the TurboJPEG
+   * decompressor can generate without exceeding the desired image width and
+   * height
+   */
+  public int getScaledHeight(int desiredWidth, int desiredHeight)
+                             throws Exception {
+    if (srcWidth < 1 || srcHeight < 1)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (desiredWidth < 0 || desiredHeight < 0)
+      throw new Exception("Invalid argument in getScaledHeight()");
+    TJScalingFactor[] sf = TJ.getScalingFactors();
+    if (desiredWidth == 0)
+      desiredWidth = srcWidth;
+    if (desiredHeight == 0)
+      desiredHeight = srcHeight;
+    int scaledWidth = srcWidth, scaledHeight = srcHeight;
+    for (int i = 0; i < sf.length; i++) {
+      scaledWidth = sf[i].getScaled(srcWidth);
+      scaledHeight = sf[i].getScaled(srcHeight);
+      if (scaledWidth <= desiredWidth && scaledHeight <= desiredHeight)
+        break;
+    }
+    if (scaledWidth > desiredWidth || scaledHeight > desiredHeight)
+      throw new Exception("Could not scale down to desired image dimensions");
+    return scaledHeight;
+  }
+
+  /**
+   * Decompress the JPEG source image or decode the YUV source image associated
+   * with this decompressor instance and output a grayscale, RGB, or CMYK image
+   * to the given destination buffer.
+   *
+   * @param dstBuf buffer that will receive the decompressed/decoded image.
+   * If the source image is a JPEG image, then this buffer should normally be
+   * <code>pitch * scaledHeight</code> bytes in size, where
+   * <code>scaledHeight</code> can be determined by calling <code>
+   * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegHeight)
+   * </code> with one of the scaling factors returned from {@link
+   * TJ#getScalingFactors} or by calling {@link #getScaledHeight}.  If the
+   * source image is a YUV image, then this buffer should normally be
+   * <code>pitch * height</code> bytes in size, where <code>height</code> is
+   * the height of the YUV image.  However, the buffer may also be larger than
+   * the dimensions of the source image, in which case the <code>x</code>,
+   * <code>y</code>, and <code>pitch</code> parameters can be used to specify
+   * the region into which the source image should be decompressed/decoded.
+   *
+   * @param x x offset (in pixels) of the region in the destination image into
+   * which the source image should be decompressed/decoded
+   *
+   * @param y y offset (in pixels) of the region in the destination image into
+   * which the source image should be decompressed/decoded
+   *
+   * @param desiredWidth If the source image is a JPEG image, then this
+   * specifies the desired width (in pixels) of the decompressed image (or
+   * image region.)  If the desired destination image dimensions are different
+   * than the source image dimensions, then TurboJPEG will use scaling in the
+   * JPEG decompressor to generate the largest possible image that will fit
+   * within the desired dimensions.  Setting this to 0 is the same as setting
+   * it to the width of the JPEG image (in other words, the width will not be
+   * considered when determining the scaled image size.)  This parameter is
+   * ignored if the source image is a YUV image.
+   *
+   * @param pitch bytes per line of the destination image.  Normally, this
+   * should be set to <code>scaledWidth * TJ.pixelSize(pixelFormat)</code> if
+   * the destination image is unpadded, but you can use this to, for instance,
+   * pad each line of the destination image to a 4-byte boundary or to
+   * decompress/decode the source image into a region of a larger image.  NOTE:
+   * if the source image is a JPEG image, then <code>scaledWidth</code> can be
+   * determined by calling <code>
+   * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegWidth)
+   * </code> or by calling {@link #getScaledWidth}.  If the source image is a
+   * YUV image, then <code>scaledWidth</code> is the width of the YUV image.
+   * Setting this parameter to 0 is the equivalent of setting it to
+   * <code>scaledWidth * TJ.pixelSize(pixelFormat)</code>.
+   *
+   * @param desiredHeight If the source image is a JPEG image, then this
+   * specifies the desired height (in pixels) of the decompressed image (or
+   * image region.)  If the desired destination image dimensions are different
+   * than the source image dimensions, then TurboJPEG will use scaling in the
+   * JPEG decompressor to generate the largest possible image that will fit
+   * within the desired dimensions.  Setting this to 0 is the same as setting
+   * it to the height of the JPEG image (in other words, the height will not be
+   * considered when determining the scaled image size.)  This parameter is
+   * ignored if the source image is a YUV image.
+   *
+   * @param pixelFormat pixel format of the decompressed/decoded image (one of
+   * {@link TJ#PF_RGB TJ.PF_*})
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   */
+  public void decompress(byte[] dstBuf, int x, int y, int desiredWidth,
+                         int pitch, int desiredHeight, int pixelFormat,
+                         int flags) throws Exception {
+    if (jpegBuf == null && yuvImage == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (dstBuf == null || x < 0 || y < 0 || pitch < 0 ||
+        (yuvImage != null && (desiredWidth < 0 || desiredHeight < 0)) ||
+        pixelFormat < 0 || pixelFormat >= TJ.NUMPF || flags < 0)
+      throw new Exception("Invalid argument in decompress()");
+    if (yuvImage != null)
+      decodeYUV(yuvImage.getBuf(), yuvImage.getPad(), yuvImage.getSubsamp(),
+                dstBuf, x, y, yuvImage.getWidth(), pitch, yuvImage.getHeight(),
+                pixelFormat, flags);
+    else {
+      if (x > 0 || y > 0)
+        decompress(jpegBuf, jpegBufSize, dstBuf, x, y, desiredWidth, pitch,
+                   desiredHeight, pixelFormat, flags);
+      else
+        decompress(jpegBuf, jpegBufSize, dstBuf, desiredWidth, pitch,
+                   desiredHeight, pixelFormat, flags);
+    }
+  }
+
+  /**
+   * @deprecated Use
+   * {@link #decompress(byte[], int, int, int, int, int, int, int)} instead.
+   */
+  @Deprecated
+  public void decompress(byte[] dstBuf, int desiredWidth, int pitch,
+                         int desiredHeight, int pixelFormat, int flags)
+                         throws Exception {
+    decompress(dstBuf, 0, 0, desiredWidth, pitch, desiredHeight, pixelFormat,
+               flags);
+  }
+
+  /**
+   * Decompress the JPEG source image associated with this decompressor
+   * instance and return a buffer containing the decompressed image.
+   *
+   * @param desiredWidth see
+   * {@link #decompress(byte[], int, int, int, int, int, int, int)}
+   * for description
+   *
+   * @param pitch see
+   * {@link #decompress(byte[], int, int, int, int, int, int, int)}
+   * for description
+   *
+   * @param desiredHeight see
+   * {@link #decompress(byte[], int, int, int, int, int, int, int)}
+   * for description
+   *
+   * @param pixelFormat pixel format of the decompressed image (one of
+   * {@link TJ#PF_RGB TJ.PF_*})
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   *
+   * @return a buffer containing the decompressed image
+   */
+  public byte[] decompress(int desiredWidth, int pitch, int desiredHeight,
+                           int pixelFormat, int flags) throws Exception {
+    if (pitch < 0 ||
+        (yuvImage == null && (desiredWidth < 0 || desiredHeight < 0)) ||
+        pixelFormat < 0 || pixelFormat >= TJ.NUMPF || flags < 0)
+      throw new Exception("Invalid argument in decompress()");
+    int pixelSize = TJ.getPixelSize(pixelFormat);
+    int scaledWidth = getScaledWidth(desiredWidth, desiredHeight);
+    int scaledHeight = getScaledHeight(desiredWidth, desiredHeight);
+    if (pitch == 0)
+      pitch = scaledWidth * pixelSize;
+    byte[] buf = new byte[pitch * scaledHeight];
+    decompress(buf, desiredWidth, pitch, desiredHeight, pixelFormat, flags);
+    return buf;
+  }
+
+  /**
+   * Decompress the JPEG source image associated with this decompressor
+   * instance into a YUV planar image and store it in the given
+   * <code>YUVImage</code> instance.  This method performs JPEG decompression
+   * but leaves out the color conversion step, so a planar YUV image is
+   * generated instead of an RGB or grayscale image.  This method cannot be
+   * used to decompress JPEG source images with the CMYK or YCCK colorspace.
+   *
+   * @param dstImage {@link YUVImage} instance that will receive the YUV planar
+   * image.  The level of subsampling specified in this <code>YUVImage</code>
+   * instance must match that of the JPEG image, and the width and height
+   * specified in the <code>YUVImage</code> instance must match one of the
+   * scaled image sizes that TurboJPEG is capable of generating from the JPEG
+   * source image.
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   */
+  public void decompressToYUV(YUVImage dstImage, int flags) throws Exception {
+    if (jpegBuf == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (dstImage == null || flags < 0)
+      throw new Exception("Invalid argument in decompressToYUV()");
+    int scaledWidth = getScaledWidth(dstImage.getWidth(),
+                                     dstImage.getHeight());
+    int scaledHeight = getScaledHeight(dstImage.getWidth(),
+                                       dstImage.getHeight());
+    if (scaledWidth != dstImage.getWidth() ||
+        scaledHeight != dstImage.getHeight())
+      throw new Exception("YUVImage dimensions do not match one of the scaled image sizes that TurboJPEG is capable of generating.");
+    if (srcSubsamp != dstImage.getSubsamp())
+      throw new Exception("YUVImage subsampling level does not match that of the JPEG image");
+
+    decompressToYUV(jpegBuf, jpegBufSize, dstImage.getBuf(),
+                    dstImage.getWidth(), dstImage.getPad(),
+                    dstImage.getHeight(), flags);
+  }
+
+  /**
+   * @deprecated Use {@link #decompressToYUV(YUVImage, int)} instead.
+   */
+  @Deprecated
+  public void decompressToYUV(byte[] dstBuf, int flags) throws Exception {
+    YUVImage dstImage = new YUVImage(dstBuf, srcWidth, 4, srcHeight,
+                                     srcSubsamp);
+    decompressToYUV(dstImage, flags);
+  }
+
+  /**
+   * Decompress the JPEG source image associated with this decompressor
+   * instance into a YUV planar image and return a <code>YUVImage</code>
+   * instance containing the decompressed image.  This method performs JPEG
+   * decompression but leaves out the color conversion step, so a planar YUV
+   * image is generated instead of an RGB or grayscale image.  This method
+   * cannot be used to decompress JPEG source images with the CMYK or YCCK
+   * colorspace.
+   *
+   * @param desiredWidth desired width (in pixels) of the YUV image.  If the
+   * desired image dimensions are different than the dimensions of the JPEG
+   * image being decompressed, then TurboJPEG will use scaling in the JPEG
+   * decompressor to generate the largest possible image that will fit within
+   * the desired dimensions.  Setting this to 0 is the same as setting it to
+   * the width of the JPEG image (in other words, the width will not be
+   * considered when determining the scaled image size.)
+   *
+   * @param pad the width of each line in each plane of the YUV image will be
+   * padded to the nearest multiple of this number of bytes (must be a power of
+   * 2.)
+   *
+   * @param desiredHeight desired height (in pixels) of the YUV image.  If the
+   * desired image dimensions are different than the dimensions of the JPEG
+   * image being decompressed, then TurboJPEG will use scaling in the JPEG
+   * decompressor to generate the largest possible image that will fit within
+   * the desired dimensions.  Setting this to 0 is the same as setting it to
+   * the height of the JPEG image (in other words, the height will not be
+   * considered when determining the scaled image size.)
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   *
+   * @return a YUV planar image
+   */
+  public YUVImage decompressToYUV(int desiredWidth, int pad, int desiredHeight,
+                                  int flags) throws Exception {
+    if (flags < 0)
+      throw new Exception("Invalid argument in decompressToYUV()");
+    if (srcWidth < 1 || srcHeight < 1 || srcSubsamp < 0)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (srcSubsamp >= TJ.NUMSAMP)
+      throw new Exception("JPEG header information is invalid");
+    if (yuvImage != null)
+      throw new Exception("Source image is the wrong type");
+
+    int scaledWidth = getScaledWidth(desiredWidth, desiredHeight);
+    int scaledHeight = getScaledHeight(desiredWidth, desiredHeight);
+    YUVImage yuvImage = new YUVImage(scaledWidth, pad, scaledHeight,
+                                     srcSubsamp);
+    decompressToYUV(yuvImage, flags);
+    return yuvImage;
+  }
+
+  /**
+   * @deprecated Use {@link #decompressToYUV(int, int, int, int)} instead.
+   */
+  @Deprecated
+  public byte[] decompressToYUV(int flags) throws Exception {
+    YUVImage dstImage = new YUVImage(srcWidth, 4, srcHeight, srcSubsamp);
+    decompressToYUV(dstImage, flags);
+    return dstImage.getBuf();
+  }
+
+  /**
+   * Decompress the JPEG source image or decode the YUV source image associated
+   * with this decompressor instance and output a grayscale, RGB, or CMYK image
+   * to the given destination buffer.
+   *
+   * @param dstBuf buffer that will receive the decompressed/decoded image.
+   * If the source image is a JPEG image, then this buffer should normally be
+   * <code>stride * scaledHeight</code> pixels in size, where
+   * <code>scaledHeight</code> can be determined by calling <code>
+   * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegHeight)
+   * </code> with one of the scaling factors returned from {@link
+   * TJ#getScalingFactors} or by calling {@link #getScaledHeight}.  If the
+   * source image is a YUV image, then this buffer should normally be
+   * <code>stride * height</code> pixels in size, where <code>height</code> is
+   * the height of the YUV image.  However, the buffer may also be larger than
+   * the dimensions of the JPEG image, in which case the <code>x</code>,
+   * <code>y</code>, and <code>stride</code> parameters can be used to specify
+   * the region into which the source image should be decompressed.
+   *
+   * @param x x offset (in pixels) of the region in the destination image into
+   * which the source image should be decompressed/decoded
+   *
+   * @param y y offset (in pixels) of the region in the destination image into
+   * which the source image should be decompressed/decoded
+   *
+   * @param desiredWidth If the source image is a JPEG image, then this
+   * specifies the desired width (in pixels) of the decompressed image (or
+   * image region.)  If the desired destination image dimensions are different
+   * than the source image dimensions, then TurboJPEG will use scaling in the
+   * JPEG decompressor to generate the largest possible image that will fit
+   * within the desired dimensions.  Setting this to 0 is the same as setting
+   * it to the width of the JPEG image (in other words, the width will not be
+   * considered when determining the scaled image size.)  This parameter is
+   * ignored if the source image is a YUV image.
+   *
+   * @param stride pixels per line of the destination image.  Normally, this
+   * should be set to <code>scaledWidth</code>, but you can use this to, for
+   * instance, decompress the JPEG image into a region of a larger image.
+   * NOTE: if the source image is a JPEG image, then <code>scaledWidth</code>
+   * can be determined by calling <code>
+   * scalingFactor.{@link TJScalingFactor#getScaled getScaled}(jpegWidth)
+   * </code> or by calling {@link #getScaledWidth}.  If the source image is a
+   * YUV image, then <code>scaledWidth</code> is the width of the YUV image.
+   * Setting this parameter to 0 is the equivalent of setting it to
+   * <code>scaledWidth</code>.
+   *
+   * @param desiredHeight If the source image is a JPEG image, then this
+   * specifies the desired height (in pixels) of the decompressed image (or
+   * image region.)  If the desired destination image dimensions are different
+   * than the source image dimensions, then TurboJPEG will use scaling in the
+   * JPEG decompressor to generate the largest possible image that will fit
+   * within the desired dimensions.  Setting this to 0 is the same as setting
+   * it to the height of the JPEG image (in other words, the height will not be
+   * considered when determining the scaled image size.)  This parameter is
+   * ignored if the source image is a YUV image.
+   *
+   * @param pixelFormat pixel format of the decompressed image (one of
+   * {@link TJ#PF_RGB TJ.PF_*})
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   */
+  public void decompress(int[] dstBuf, int x, int y, int desiredWidth,
+                         int stride, int desiredHeight, int pixelFormat,
+                         int flags) throws Exception {
+    if (jpegBuf == null && yuvImage == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    if (dstBuf == null || x < 0 || y < 0 || stride < 0 ||
+        (yuvImage != null && (desiredWidth < 0 || desiredHeight < 0)) ||
+        pixelFormat < 0 || pixelFormat >= TJ.NUMPF || flags < 0)
+      throw new Exception("Invalid argument in decompress()");
+    if (yuvImage != null)
+      decodeYUV(yuvImage.getBuf(), yuvImage.getPad(), yuvImage.getSubsamp(),
+                dstBuf, x, y, yuvImage.getWidth(), stride,
+                yuvImage.getHeight(), pixelFormat, flags);
+    else
+      decompress(jpegBuf, jpegBufSize, dstBuf, x, y, desiredWidth, stride,
+                 desiredHeight, pixelFormat, flags);
+  }
+
+  /**
+   * Decompress the JPEG source image or decode the YUV source image associated
+   * with this decompressor instance and output a decompressed/decoded image to
+   * the given <code>BufferedImage</code> instance.
+   *
+   * @param dstImage a <code>BufferedImage</code> instance that will receive
+   * the decompressed/decoded image.  If the source image is a JPEG image, then
+   * the width and height of the <code>BufferedImage</code> instance must match
+   * one of the scaled image sizes that TurboJPEG is capable of generating from
+   * the JPEG image.  If the source image is a YUV image, then the width and
+   * height of the <code>BufferedImage</code> instance must match the width and
+   * height of the YUV image.
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   */
+  public void decompress(BufferedImage dstImage, int flags) throws Exception {
+    if (dstImage == null || flags < 0)
+      throw new Exception("Invalid argument in decompress()");
+    int desiredWidth = dstImage.getWidth();
+    int desiredHeight = dstImage.getHeight();
+    int scaledWidth, scaledHeight;
+
+    if (yuvImage != null) {
+      if (desiredWidth != yuvImage.getWidth() ||
+          desiredHeight != yuvImage.getHeight())
+        throw new Exception("BufferedImage dimensions do not match the dimensions of the source image.");
+      scaledWidth = yuvImage.getWidth();
+      scaledHeight = yuvImage.getHeight();
+    } else {
+      scaledWidth = getScaledWidth(desiredWidth, desiredHeight);
+      scaledHeight = getScaledHeight(desiredWidth, desiredHeight);
+      if (scaledWidth != desiredWidth || scaledHeight != desiredHeight)
+        throw new Exception("BufferedImage dimensions do not match one of the scaled image sizes that TurboJPEG is capable of generating.");
+    }
+    int pixelFormat;  boolean intPixels = false;
+    if (byteOrder == null)
+      byteOrder = ByteOrder.nativeOrder();
+    switch(dstImage.getType()) {
+      case BufferedImage.TYPE_3BYTE_BGR:
+        pixelFormat = TJ.PF_BGR;  break;
+      case BufferedImage.TYPE_4BYTE_ABGR:
+      case BufferedImage.TYPE_4BYTE_ABGR_PRE:
+        pixelFormat = TJ.PF_XBGR;  break;
+      case BufferedImage.TYPE_BYTE_GRAY:
+        pixelFormat = TJ.PF_GRAY;  break;
+      case BufferedImage.TYPE_INT_BGR:
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
+          pixelFormat = TJ.PF_XBGR;
+        else
+          pixelFormat = TJ.PF_RGBX;
+        intPixels = true;  break;
+      case BufferedImage.TYPE_INT_RGB:
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
+          pixelFormat = TJ.PF_XRGB;
+        else
+          pixelFormat = TJ.PF_BGRX;
+        intPixels = true;  break;
+      case BufferedImage.TYPE_INT_ARGB:
+      case BufferedImage.TYPE_INT_ARGB_PRE:
+        if (byteOrder == ByteOrder.BIG_ENDIAN)
+          pixelFormat = TJ.PF_ARGB;
+        else
+          pixelFormat = TJ.PF_BGRA;
+        intPixels = true;  break;
+      default:
+        throw new Exception("Unsupported BufferedImage format");
+    }
+    WritableRaster wr = dstImage.getRaster();
+    if (intPixels) {
+      SinglePixelPackedSampleModel sm =
+        (SinglePixelPackedSampleModel)dstImage.getSampleModel();
+      int stride = sm.getScanlineStride();
+      DataBufferInt db = (DataBufferInt)wr.getDataBuffer();
+      int[] buf = db.getData();
+      if (yuvImage != null)
+        decodeYUV(yuvImage.getBuf(), yuvImage.getPad(), yuvImage.getSubsamp(),
+                  buf, 0, 0, yuvImage.getWidth(), stride, yuvImage.getHeight(),
+                  pixelFormat, flags);
+      else {
+        if (jpegBuf == null)
+          throw new Exception(NO_ASSOC_ERROR);
+        decompress(jpegBuf, jpegBufSize, buf, 0, 0, scaledWidth, stride,
+                   scaledHeight, pixelFormat, flags);
+      }
+    } else {
+      ComponentSampleModel sm =
+        (ComponentSampleModel)dstImage.getSampleModel();
+      int pixelSize = sm.getPixelStride();
+      if (pixelSize != TJ.getPixelSize(pixelFormat))
+        throw new Exception("Inconsistency between pixel format and pixel size in BufferedImage");
+      int pitch = sm.getScanlineStride();
+      DataBufferByte db = (DataBufferByte)wr.getDataBuffer();
+      byte[] buf = db.getData();
+      decompress(buf, 0, 0, scaledWidth, pitch, scaledHeight, pixelFormat,
+                 flags);
+    }
+  }
+
+  /**
+   * Decompress the JPEG source image or decode the YUV source image associated
+   * with this decompressor instance and return a <code>BufferedImage</code>
+   * instance containing the decompressed/decoded image.
+   *
+   * @param desiredWidth see
+   * {@link #decompress(byte[], int, int, int, int, int, int, int)} for
+   * description
+   *
+   * @param desiredHeight see
+   * {@link #decompress(byte[], int, int, int, int, int, int, int)} for
+   * description
+   *
+   * @param bufferedImageType the image type of the <code>BufferedImage</code>
+   * instance that will be created (for instance,
+   * <code>BufferedImage.TYPE_INT_RGB</code>)
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   *
+   * @return a <code>BufferedImage</code> instance containing the
+   * decompressed/decoded image
+   */
+  public BufferedImage decompress(int desiredWidth, int desiredHeight,
+                                  int bufferedImageType, int flags)
+                                  throws Exception {
+    if ((yuvImage == null && (desiredWidth < 0 || desiredHeight < 0)) ||
+        flags < 0)
+      throw new Exception("Invalid argument in decompress()");
+    int scaledWidth = getScaledWidth(desiredWidth, desiredHeight);
+    int scaledHeight = getScaledHeight(desiredWidth, desiredHeight);
+    BufferedImage img = new BufferedImage(scaledWidth, scaledHeight,
+                                          bufferedImageType);
+    decompress(img, flags);
+    return img;
+  }
+
+  /**
+   * Free the native structures associated with this decompressor instance.
+   */
+  public void close() throws Exception {
+    destroy();
+  }
+
+  protected void finalize() throws Throwable {
+    try {
+      close();
+    } catch(Exception e) {
+    } finally {
+      super.finalize();
+    }
+  };
+
+  private native void init() throws Exception;
+
+  private native void destroy() throws Exception;
+
+  private native void decompressHeader(byte[] srcBuf, int size)
+    throws Exception;
+
+  private native void decompress(byte[] srcBuf, int size, byte[] dstBuf,
+    int desiredWidth, int pitch, int desiredHeight, int pixelFormat, int flags)
+    throws Exception; // deprecated
+
+  private native void decompress(byte[] srcBuf, int size, byte[] dstBuf, int x,
+    int y, int desiredWidth, int pitch, int desiredHeight, int pixelFormat,
+    int flags) throws Exception;
+
+  private native void decompress(byte[] srcBuf, int size, int[] dstBuf,
+    int desiredWidth, int stride, int desiredHeight, int pixelFormat,
+    int flags) throws Exception; // deprecated
+
+  private native void decompress(byte[] srcBuf, int size, int[] dstBuf, int x,
+    int y, int desiredWidth, int stride, int desiredHeight, int pixelFormat,
+    int flags) throws Exception;
+
+  private native void decompressToYUV(byte[] srcBuf, int size, byte[] dstBuf,
+    int flags) throws Exception; // deprecated
+
+  private native void decompressToYUV(byte[] srcBuf, int size, byte[] dstBuf,
+    int desiredWidth, int pad, int desiredheight, int flags) throws Exception;
+
+  private native void decodeYUV(byte[] srcBuf, int pad, int subsamp,
+    byte[] dstBuf, int x, int y, int width, int pitch, int height,
+    int pixelFormat, int flags) throws Exception;
+
+  private native void decodeYUV(byte[] srcBuf, int pad, int subsamp,
+    int[] dstBuf, int x, int y, int width, int stride, int height,
+    int pixelFormat, int flags) throws Exception;
+
+  static {
+    TJLoader.load();
+  }
+
+  protected long handle = 0;
+  protected byte[] jpegBuf = null;
+  protected int jpegBufSize = 0;
+  protected YUVImage yuvImage = null;
+  protected int srcWidth = 0;
+  protected int srcHeight = 0;
+  protected int srcSubsamp = -1;
+  protected int srcColorspace = -1;
+  private ByteOrder byteOrder = null;
+};
diff --git a/java/org/libjpegturbo/turbojpeg/TJLoader.java.in b/java/org/libjpegturbo/turbojpeg/TJLoader.java.in
new file mode 100644
index 0000000..22353a5
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJLoader.java.in
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+final class TJLoader {
+  static void load() {
+    System.loadLibrary("@TURBOJPEG_DLL_NAME@");
+  }
+};
diff --git a/java/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl b/java/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl
new file mode 100644
index 0000000..a4f1c87
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJLoader.java.tmpl
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C)2011-2013 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+final class TJLoader {
+  static void load() {
+    try {
+      System.loadLibrary("turbojpeg");
+    } catch (java.lang.UnsatisfiedLinkError e) {
+      String os = System.getProperty("os.name").toLowerCase();
+      if (os.indexOf("mac") >= 0) {
+        try {
+          System.load("%{__libdir}/libturbojpeg.jnilib");
+        } catch (java.lang.UnsatisfiedLinkError e2) {
+          System.load("/usr/lib/libturbojpeg.jnilib");
+        }
+      } else {
+        try {
+          System.load("%{__libdir}/libturbojpeg.so");
+        } catch (java.lang.UnsatisfiedLinkError e3) {
+          String libdir = "%{__libdir}";
+          if (libdir.equals("/opt/libjpeg-turbo/lib64")) {
+            System.load("/opt/libjpeg-turbo/lib32/libturbojpeg.so");
+          } else if (libdir.equals("/opt/libjpeg-turbo/lib32")) {
+            System.load("/opt/libjpeg-turbo/lib64/libturbojpeg.so");
+          } else {
+            throw e3;
+          }
+        }
+      }
+    }
+  }
+};
diff --git a/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java b/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java
new file mode 100644
index 0000000..4e7363f
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+/**
+ * Fractional scaling factor
+ */
+public class TJScalingFactor {
+
+  public TJScalingFactor(int num, int denom) throws Exception {
+    if (num < 1 || denom < 1)
+      throw new Exception("Numerator and denominator must be >= 1");
+    this.num = num;
+    this.denom = denom;
+  }
+
+  /**
+   * Returns numerator
+   * @return numerator
+   */
+  public int getNum() {
+    return num;
+  }
+
+  /**
+   * Returns denominator
+   * @return denominator
+   */
+  public int getDenom() {
+    return denom;
+  }
+
+  /**
+   * Returns the scaled value of <code>dimension</code>.  This function
+   * performs the integer equivalent of
+   * <code>ceil(dimension * scalingFactor)</code>.
+   * @return the scaled value of <code>dimension</code>
+   */
+  public int getScaled(int dimension) {
+    return (dimension * num + denom - 1) / denom;
+  }
+
+  /**
+   * Returns true or false, depending on whether this instance and
+   * <code>other</code> have the same numerator and denominator.
+   * @return true or false, depending on whether this instance and
+   * <code>other</code> have the same numerator and denominator
+   */
+  public boolean equals(TJScalingFactor other) {
+    return (this.num == other.num && this.denom == other.denom);
+  }
+
+  /**
+   * Returns true or false, depending on whether this instance is equal to
+   * 1/1.
+   * @return true or false, depending on whether this instance is equal to
+   * 1/1
+   */
+  public boolean isOne() {
+    return (num == 1 && denom == 1);
+  }
+
+  /**
+   * Numerator
+   */
+  private int num = 1;
+
+  /**
+   * Denominator
+   */
+  private int denom = 1;
+};
diff --git a/java/org/libjpegturbo/turbojpeg/TJTransform.java b/java/org/libjpegturbo/turbojpeg/TJTransform.java
new file mode 100644
index 0000000..b464ffd
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJTransform.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C)2011, 2013 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+import java.awt.*;
+
+/**
+ * Lossless transform parameters
+ */
+public class TJTransform extends Rectangle {
+
+  private static final long serialVersionUID = -127367705761430371L;
+
+  /**
+   * The number of lossless transform operations
+   */
+  public static final int NUMOP         = 8;
+  /**
+   * Do not transform the position of the image pixels.
+   */
+  public static final int OP_NONE       = 0;
+  /**
+   * Flip (mirror) image horizontally.  This transform is imperfect if there
+   * are any partial MCU blocks on the right edge.
+   * @see #OPT_PERFECT
+   */
+  public static final int OP_HFLIP      = 1;
+  /**
+   * Flip (mirror) image vertically.  This transform is imperfect if there are
+   * any partial MCU blocks on the bottom edge.
+   * @see #OPT_PERFECT
+   */
+  public static final int OP_VFLIP      = 2;
+  /**
+   * Transpose image (flip/mirror along upper left to lower right axis).  This
+   * transform is always perfect.
+   * @see #OPT_PERFECT
+   */
+  public static final int OP_TRANSPOSE  = 3;
+  /**
+   * Transverse transpose image (flip/mirror along upper right to lower left
+   * axis).  This transform is imperfect if there are any partial MCU blocks in
+   * the image.
+   * @see #OPT_PERFECT
+   */
+  public static final int OP_TRANSVERSE = 4;
+  /**
+   * Rotate image clockwise by 90 degrees.  This transform is imperfect if
+   * there are any partial MCU blocks on the bottom edge.
+   * @see #OPT_PERFECT
+   */
+  public static final int OP_ROT90      = 5;
+  /**
+   * Rotate image 180 degrees.  This transform is imperfect if there are any
+   * partial MCU blocks in the image.
+   * @see #OPT_PERFECT
+   */
+  public static final int OP_ROT180     = 6;
+  /**
+   * Rotate image counter-clockwise by 90 degrees.  This transform is imperfect
+   * if there are any partial MCU blocks on the right edge.
+   * @see #OPT_PERFECT
+   */
+  public static final int OP_ROT270     = 7;
+
+
+  /**
+   * This option will cause {@link TJTransformer#transform
+   * TJTransformer.transform()} to throw an exception if the transform is not
+   * perfect.  Lossless transforms operate on MCU blocks, whose size depends on
+   * the level of chrominance subsampling used.  If the image's width or height
+   * is not evenly divisible by the MCU block size (see {@link TJ#getMCUWidth}
+   * and {@link TJ#getMCUHeight}), then there will be partial MCU blocks on the
+   * right and/or bottom edges.   It is not possible to move these partial MCU
+   * blocks to the top or left of the image, so any transform that would
+   * require that is "imperfect."  If this option is not specified, then any
+   * partial MCU blocks that cannot be transformed will be left in place, which
+   * will create odd-looking strips on the right or bottom edge of the image.
+   */
+  public static final int OPT_PERFECT  = 1;
+  /**
+   * This option will discard any partial MCU blocks that cannot be
+   * transformed.
+   */
+  public static final int OPT_TRIM     = 2;
+  /**
+   * This option will enable lossless cropping.
+   */
+  public static final int OPT_CROP     = 4;
+  /**
+   * This option will discard the color data in the input image and produce
+   * a grayscale output image.
+   */
+  public static final int OPT_GRAY     = 8;
+  /**
+   * This option will prevent {@link TJTransformer#transform
+   * TJTransformer.transform()} from outputting a JPEG image for this
+   * particular transform.  This can be used in conjunction with a custom
+   * filter to capture the transformed DCT coefficients without transcoding
+   * them.
+   */
+  public static final int OPT_NOOUTPUT = 16;
+
+
+  /**
+   * Create a new lossless transform instance.
+   */
+  public TJTransform() {
+  }
+
+  /**
+   * Create a new lossless transform instance with the given parameters.
+   *
+   * @param x the left boundary of the cropping region.  This must be evenly
+   * divisible by the MCU block width (see {@link TJ#getMCUWidth})
+   *
+   * @param y the upper boundary of the cropping region.  This must be evenly
+   * divisible by the MCU block height (see {@link TJ#getMCUHeight})
+   *
+   * @param w the width of the cropping region.  Setting this to 0 is the
+   * equivalent of setting it to (width of the source JPEG image -
+   * <code>x</code>).
+   *
+   * @param h the height of the cropping region.  Setting this to 0 is the
+   * equivalent of setting it to (height of the source JPEG image -
+   * <code>y</code>).
+   *
+   * @param op one of the transform operations (<code>OP_*</code>)
+   *
+   * @param options the bitwise OR of one or more of the transform options
+   * (<code>OPT_*</code>)
+   *
+   * @param cf an instance of an object that implements the {@link
+   * TJCustomFilter} interface, or null if no custom filter is needed
+   */
+  public TJTransform(int x, int y, int w, int h, int op, int options,
+                     TJCustomFilter cf) throws Exception {
+    super(x, y, w, h);
+    this.op = op;
+    this.options = options;
+    this.cf = cf;
+  }
+
+  /**
+   * Create a new lossless transform instance with the given parameters.
+   *
+   * @param r a <code>Rectangle</code> instance that specifies the cropping
+   * region.  See {@link
+   * #TJTransform(int, int, int, int, int, int, TJCustomFilter)} for more
+   * detail.
+   *
+   * @param op one of the transform operations (<code>OP_*</code>)
+   *
+   * @param options the bitwise OR of one or more of the transform options
+   * (<code>OPT_*</code>)
+   *
+   * @param cf an instance of an object that implements the {@link
+   * TJCustomFilter} interface, or null if no custom filter is needed
+   */
+  public TJTransform(Rectangle r, int op, int options,
+                     TJCustomFilter cf) throws Exception {
+    super(r);
+    this.op = op;
+    this.options = options;
+    this.cf = cf;
+  }
+
+  /**
+   * Transform operation (one of <code>OP_*</code>)
+   */
+  public int op = 0;
+
+  /**
+   * Transform options (bitwise OR of one or more of <code>OPT_*</code>)
+   */
+  public int options = 0;
+
+  /**
+   * Custom filter instance
+   */
+  public TJCustomFilter cf = null;
+}
diff --git a/java/org/libjpegturbo/turbojpeg/TJTransformer.java b/java/org/libjpegturbo/turbojpeg/TJTransformer.java
new file mode 100644
index 0000000..ee1f607
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/TJTransformer.java
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C)2011, 2013-2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+/**
+ * TurboJPEG lossless transformer
+ */
+public class TJTransformer extends TJDecompressor {
+
+  /**
+   * Create a TurboJPEG lossless transformer instance.
+   */
+  public TJTransformer() throws Exception {
+    init();
+  }
+
+  /**
+   * Create a TurboJPEG lossless transformer instance and associate the JPEG
+   * image stored in <code>jpegImage</code> with the newly created instance.
+   *
+   * @param jpegImage JPEG image buffer (size of the JPEG image is assumed to
+   * be the length of the array)
+   */
+  public TJTransformer(byte[] jpegImage) throws Exception {
+    init();
+    setSourceImage(jpegImage, jpegImage.length);
+  }
+
+  /**
+   * Create a TurboJPEG lossless transformer instance and associate the JPEG
+   * image of length <code>imageSize</code> bytes stored in
+   * <code>jpegImage</code> with the newly created instance.
+   *
+   * @param jpegImage JPEG image buffer
+   *
+   * @param imageSize size of the JPEG image (in bytes)
+   */
+  public TJTransformer(byte[] jpegImage, int imageSize) throws Exception {
+    init();
+    setSourceImage(jpegImage, imageSize);
+  }
+
+  /**
+   * Losslessly transform the JPEG image associated with this transformer
+   * instance into one or more JPEG images stored in the given destination
+   * buffers.  Lossless transforms work by moving the raw coefficients from one
+   * JPEG image structure to another without altering the values of the
+   * coefficients.  While this is typically faster than decompressing the
+   * image, transforming it, and re-compressing it, lossless transforms are not
+   * free.  Each lossless transform requires reading and performing Huffman
+   * decoding on all of the coefficients in the source image, regardless of the
+   * size of the destination image.  Thus, this method provides a means of
+   * generating multiple transformed images from the same source or of applying
+   * multiple transformations simultaneously, in order to eliminate the need to
+   * read the source coefficients multiple times.
+   *
+   * @param dstBufs an array of image buffers.  <code>dstbufs[i]</code> will
+   * receive a JPEG image that has been transformed using the parameters in
+   * <code>transforms[i]</code>.  Use {@link TJ#bufSize} to determine the
+   * maximum size for each buffer based on the transformed or cropped width and
+   * height and the level of subsampling used in the source image.
+   *
+   * @param transforms an array of {@link TJTransform} instances, each of
+   * which specifies the transform parameters and/or cropping region for the
+   * corresponding transformed output image
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   */
+  public void transform(byte[][] dstBufs, TJTransform[] transforms,
+                        int flags) throws Exception {
+    if (jpegBuf == null)
+      throw new Exception("JPEG buffer not initialized");
+    transformedSizes = transform(jpegBuf, jpegBufSize, dstBufs, transforms,
+                                 flags);
+  }
+
+  /**
+   * Losslessly transform the JPEG image associated with this transformer
+   * instance and return an array of {@link TJDecompressor} instances, each of
+   * which has a transformed JPEG image associated with it.
+   *
+   * @param transforms an array of {@link TJTransform} instances, each of
+   * which specifies the transform parameters and/or cropping region for the
+   * corresponding transformed output image
+   *
+   * @return an array of {@link TJDecompressor} instances, each of
+   * which has a transformed JPEG image associated with it
+   *
+   * @param flags the bitwise OR of one or more of
+   * {@link TJ#FLAG_BOTTOMUP TJ.FLAG_*}
+   */
+  public TJDecompressor[] transform(TJTransform[] transforms, int flags)
+    throws Exception {
+    byte[][] dstBufs = new byte[transforms.length][];
+    if (srcWidth < 1 || srcHeight < 1)
+      throw new Exception("JPEG buffer not initialized");
+    for (int i = 0; i < transforms.length; i++) {
+      int w = srcWidth, h = srcHeight;
+      if ((transforms[i].options & TJTransform.OPT_CROP) != 0) {
+        if (transforms[i].width != 0) w = transforms[i].width;
+        if (transforms[i].height != 0) h = transforms[i].height;
+      }
+      dstBufs[i] = new byte[TJ.bufSize(w, h, srcSubsamp)];
+    }
+    TJDecompressor[] tjd = new TJDecompressor[transforms.length];
+    transform(dstBufs, transforms, flags);
+    for (int i = 0; i < transforms.length; i++)
+      tjd[i] = new TJDecompressor(dstBufs[i], transformedSizes[i]);
+    return tjd;
+  }
+
+  /**
+   * Returns an array containing the sizes of the transformed JPEG images
+   * generated by the most recent transform operation.
+   *
+   * @return an array containing the sizes of the transformed JPEG images
+   * generated by the most recent transform operation
+   */
+  public int[] getTransformedSizes() throws Exception {
+    if (transformedSizes == null)
+      throw new Exception("No image has been transformed yet");
+    return transformedSizes;
+  }
+
+  private native void init() throws Exception;
+
+  private native int[] transform(byte[] srcBuf, int srcSize, byte[][] dstBufs,
+    TJTransform[] transforms, int flags) throws Exception;
+
+  static {
+    TJLoader.load();
+  }
+
+  private int[] transformedSizes = null;
+};
diff --git a/java/org/libjpegturbo/turbojpeg/YUVImage.java b/java/org/libjpegturbo/turbojpeg/YUVImage.java
new file mode 100644
index 0000000..619b0c3
--- /dev/null
+++ b/java/org/libjpegturbo/turbojpeg/YUVImage.java
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C)2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.libjpegturbo.turbojpeg;
+
+/**
+ * This class encapsulates a YUV planar image buffer and the metadata
+ * associated with it.  The TurboJPEG API allows both the JPEG compression and
+ * decompression pipelines to be split into stages:  YUV encode, compress from
+ * YUV, decompress to YUV, and YUV decode.  A <code>YUVImage</code> instance
+ * serves as the destination image for YUV encode and decompress-to-YUV
+ * operations and as the source image for compress-from-YUV and YUV decode
+ * operations.
+ * <p>
+ * Technically, the JPEG format uses the YCbCr colorspace (which technically is
+ * not a "colorspace" but rather a "color transform"), but per the convention
+ * of the digital video community, the TurboJPEG API uses "YUV" to refer to an
+ * image format consisting of Y, Cb, and Cr image planes.  In this image
+ * format, the Y, Cb (U), and Cr (V) planes are stored sequentially in the same
+ * image buffer, and the size of each plane is determined by the image width,
+ * height, line padding, and level of chrominance subsampling.  If the
+ * chrominance components are subsampled along the horizontal dimension, then
+ * the width of the luminance plane would be padded to the nearest multiple of
+ * 2 (same goes for the height of the luminance plane, if the chrominance
+ * components are subsampled along the vertical dimension.)  For instance, if
+ * the source image is 35 x 35 pixels and 4:2:2 subsampling is used, then the
+ * luminance plane would be 36 x 35 bytes, and each of the chrominance planes
+ * would be 18 x 35 bytes.  If you specify, for instance, a line padding of 4
+ * bytes on top of this, then the luminance plane would be 36 x 35 bytes, and
+ * each of the chrominance planes would be 20 x 35 bytes.
+ */
+public class YUVImage {
+
+  private static final String NO_ASSOC_ERROR =
+    "No YUV buffer is associated with this instance";
+
+  /**
+   * Create a <code>YUVImage</code> instance with a new image buffer.
+   *
+   * @param width width (in pixels) of the YUV image
+   *
+   * @param pad Each line of each plane in the YUV image buffer will be padded
+   * to this number of bytes (must be a power of 2.)
+   *
+   * @param height height (in pixels) of the YUV image
+   *
+   * @param subsamp the level of chrominance subsampling to be used in the YUV
+   * image (one of {@link TJ#SAMP_444 TJ.SAMP_*})
+   */
+  public YUVImage(int width, int pad, int height, int subsamp)
+                    throws Exception {
+    setBuf(new byte[TJ.bufSizeYUV(width, pad, height, subsamp)], width, pad,
+           height, subsamp);
+  }
+
+  /**
+   * Create a <code>YUVImage</code> instance from an existing YUV planar image
+   * buffer.
+   *
+   * @param yuvImage image buffer that contains or will contain YUV planar
+   * image data.  See {@link YUVImage above} for a description of the image
+   * format.  Use {@link TJ#bufSizeYUV} to determine the minimum size for this
+   * buffer.
+   *
+   * @param width width (in pixels) of the YUV image
+   *
+   * @param pad the line padding used in the YUV image buffer.  For
+   * instance, if each line in each plane of the buffer is padded to the
+   * nearest multiple of 4 bytes, then <code>pad</code> should be set to 4.
+   *
+   * @param height height (in pixels) of the YUV image
+   *
+   * @param subsamp the level of chrominance subsampling used in the YUV
+   * image (one of {@link TJ#SAMP_444 TJ.SAMP_*})
+   */
+  public YUVImage(byte[] yuvImage, int width, int pad, int height,
+                  int subsamp) throws Exception {
+    setBuf(yuvImage, width, pad, height, subsamp);
+  }
+
+  /**
+   * Assign an existing YUV planar image buffer to this <code>YUVImage</code>
+   * instance.
+   *
+   * @param yuvImage image buffer that contains or will contain YUV planar
+   * image data.  See {@link YUVImage above} for a description of the image
+   * format.  Use {@link TJ#bufSizeYUV} to determine the minimum size for this
+   * buffer.
+   *
+   * @param width width (in pixels) of the YUV image
+   *
+   * @param pad the line padding used in the YUV image buffer.  For
+   * instance, if each line in each plane of the buffer is padded to the
+   * nearest multiple of 4 bytes, then <code>pad</code> should be set to 4.
+   *
+   * @param height height (in pixels) of the YUV image
+   *
+   * @param subsamp the level of chrominance subsampling used in the YUV
+   * image (one of {@link TJ#SAMP_444 TJ.SAMP_*})
+   */
+  public void setBuf(byte[] yuvImage, int width, int pad, int height,
+                     int subsamp) throws Exception {
+    if (yuvImage == null || width < 1 || pad < 1 || ((pad & (pad - 1)) != 0) ||
+        height < 1 || subsamp < 0 || subsamp >= TJ.NUMSAMP)
+      throw new Exception("Invalid argument in YUVImage()");
+    if (yuvImage.length < TJ.bufSizeYUV(width, pad, height, subsamp))
+      throw new Exception("YUV image buffer is not large enough");
+    yuvBuf = yuvImage;
+    yuvWidth = width;
+    yuvPad = pad;
+    yuvHeight = height;
+    yuvSubsamp = subsamp;
+  }
+
+  /**
+   * Returns the width of the YUV image.
+   *
+   * @return the width of the YUV image
+   */
+  public int getWidth() throws Exception {
+    if (yuvWidth < 1)
+      throw new Exception(NO_ASSOC_ERROR);
+    return yuvWidth;
+  }
+
+  /**
+   * Returns the height of the YUV image.
+   *
+   * @return the height of the YUV image
+   */
+  public int getHeight() throws Exception {
+    if (yuvHeight < 1)
+      throw new Exception(NO_ASSOC_ERROR);
+    return yuvHeight;
+  }
+
+  /**
+   * Returns the line padding used in the YUV image buffer.
+   *
+   * @return the line padding used in the YUV image buffer
+   */
+  public int getPad() throws Exception {
+    if (yuvPad < 1 || ((yuvPad & (yuvPad - 1)) != 0))
+      throw new Exception(NO_ASSOC_ERROR);
+    return yuvPad;
+  }
+
+  /**
+   * Returns the level of chrominance subsampling used in the YUV image.  See
+   * {@link TJ#SAMP_444 TJ.SAMP_*}.
+   *
+   * @return the level of chrominance subsampling used in the YUV image
+   */
+  public int getSubsamp() throws Exception {
+    if (yuvSubsamp < 0 || yuvSubsamp >= TJ.NUMSAMP)
+      throw new Exception(NO_ASSOC_ERROR);
+    return yuvSubsamp;
+  }
+
+  /**
+   * Returns the YUV image buffer
+   *
+   * @return the YUV image buffer
+   */
+  public byte[] getBuf() throws Exception {
+    if (yuvBuf == null)
+      throw new Exception(NO_ASSOC_ERROR);
+    return yuvBuf;
+  }
+
+  /**
+   * Returns the size (in bytes) of the YUV image buffer
+   *
+   * @return the size (in bytes) of the YUV image buffer
+   */
+   public int getSize() throws Exception {
+     if (yuvBuf == null)
+       throw new Exception(NO_ASSOC_ERROR);
+     return TJ.bufSizeYUV(yuvWidth, yuvPad, yuvHeight, yuvSubsamp);
+   }
+
+  protected long handle = 0;
+  protected byte[] yuvBuf = null;
+  protected int yuvPad = 0;
+  protected int yuvWidth = 0;
+  protected int yuvHeight = 0;
+  protected int yuvSubsamp = -1;
+};
diff --git a/java/org_libjpegturbo_turbojpeg_TJ.h b/java/org_libjpegturbo_turbojpeg_TJ.h
new file mode 100644
index 0000000..b00a128
--- /dev/null
+++ b/java/org_libjpegturbo_turbojpeg_TJ.h
@@ -0,0 +1,105 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_libjpegturbo_turbojpeg_TJ */
+
+#ifndef _Included_org_libjpegturbo_turbojpeg_TJ
+#define _Included_org_libjpegturbo_turbojpeg_TJ
+#ifdef __cplusplus
+extern "C" {
+#endif
+#undef org_libjpegturbo_turbojpeg_TJ_NUMSAMP
+#define org_libjpegturbo_turbojpeg_TJ_NUMSAMP 6L
+#undef org_libjpegturbo_turbojpeg_TJ_SAMP_444
+#define org_libjpegturbo_turbojpeg_TJ_SAMP_444 0L
+#undef org_libjpegturbo_turbojpeg_TJ_SAMP_422
+#define org_libjpegturbo_turbojpeg_TJ_SAMP_422 1L
+#undef org_libjpegturbo_turbojpeg_TJ_SAMP_420
+#define org_libjpegturbo_turbojpeg_TJ_SAMP_420 2L
+#undef org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY
+#define org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY 3L
+#undef org_libjpegturbo_turbojpeg_TJ_SAMP_440
+#define org_libjpegturbo_turbojpeg_TJ_SAMP_440 4L
+#undef org_libjpegturbo_turbojpeg_TJ_SAMP_411
+#define org_libjpegturbo_turbojpeg_TJ_SAMP_411 5L
+#undef org_libjpegturbo_turbojpeg_TJ_NUMPF
+#define org_libjpegturbo_turbojpeg_TJ_NUMPF 12L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_RGB
+#define org_libjpegturbo_turbojpeg_TJ_PF_RGB 0L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_BGR
+#define org_libjpegturbo_turbojpeg_TJ_PF_BGR 1L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_RGBX
+#define org_libjpegturbo_turbojpeg_TJ_PF_RGBX 2L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_BGRX
+#define org_libjpegturbo_turbojpeg_TJ_PF_BGRX 3L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_XBGR
+#define org_libjpegturbo_turbojpeg_TJ_PF_XBGR 4L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_XRGB
+#define org_libjpegturbo_turbojpeg_TJ_PF_XRGB 5L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_GRAY
+#define org_libjpegturbo_turbojpeg_TJ_PF_GRAY 6L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_RGBA
+#define org_libjpegturbo_turbojpeg_TJ_PF_RGBA 7L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_BGRA
+#define org_libjpegturbo_turbojpeg_TJ_PF_BGRA 8L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_ABGR
+#define org_libjpegturbo_turbojpeg_TJ_PF_ABGR 9L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_ARGB
+#define org_libjpegturbo_turbojpeg_TJ_PF_ARGB 10L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_CMYK
+#define org_libjpegturbo_turbojpeg_TJ_PF_CMYK 11L
+#undef org_libjpegturbo_turbojpeg_TJ_NUMCS
+#define org_libjpegturbo_turbojpeg_TJ_NUMCS 5L
+#undef org_libjpegturbo_turbojpeg_TJ_CS_RGB
+#define org_libjpegturbo_turbojpeg_TJ_CS_RGB 0L
+#undef org_libjpegturbo_turbojpeg_TJ_CS_YCbCr
+#define org_libjpegturbo_turbojpeg_TJ_CS_YCbCr 1L
+#undef org_libjpegturbo_turbojpeg_TJ_CS_GRAY
+#define org_libjpegturbo_turbojpeg_TJ_CS_GRAY 2L
+#undef org_libjpegturbo_turbojpeg_TJ_CS_CMYK
+#define org_libjpegturbo_turbojpeg_TJ_CS_CMYK 3L
+#undef org_libjpegturbo_turbojpeg_TJ_CS_YCCK
+#define org_libjpegturbo_turbojpeg_TJ_CS_YCCK 4L
+#undef org_libjpegturbo_turbojpeg_TJ_FLAG_BOTTOMUP
+#define org_libjpegturbo_turbojpeg_TJ_FLAG_BOTTOMUP 2L
+#undef org_libjpegturbo_turbojpeg_TJ_FLAG_FASTUPSAMPLE
+#define org_libjpegturbo_turbojpeg_TJ_FLAG_FASTUPSAMPLE 256L
+#undef org_libjpegturbo_turbojpeg_TJ_FLAG_FASTDCT
+#define org_libjpegturbo_turbojpeg_TJ_FLAG_FASTDCT 2048L
+#undef org_libjpegturbo_turbojpeg_TJ_FLAG_ACCURATEDCT
+#define org_libjpegturbo_turbojpeg_TJ_FLAG_ACCURATEDCT 4096L
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJ
+ * Method:    bufSize
+ * Signature: (III)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSize
+  (JNIEnv *, jclass, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJ
+ * Method:    bufSizeYUV
+ * Signature: (IIII)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII
+  (JNIEnv *, jclass, jint, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJ
+ * Method:    bufSizeYUV
+ * Signature: (III)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III
+  (JNIEnv *, jclass, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJ
+ * Method:    getScalingFactors
+ * Signature: ()[Lorg/libjpegturbo/turbojpeg/TJScalingFactor;
+ */
+JNIEXPORT jobjectArray JNICALL Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors
+  (JNIEnv *, jclass);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/java/org_libjpegturbo_turbojpeg_TJCompressor.h b/java/org_libjpegturbo_turbojpeg_TJCompressor.h
new file mode 100644
index 0000000..edb23b4
--- /dev/null
+++ b/java/org_libjpegturbo_turbojpeg_TJCompressor.h
@@ -0,0 +1,101 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_libjpegturbo_turbojpeg_TJCompressor */
+
+#ifndef _Included_org_libjpegturbo_turbojpeg_TJCompressor
+#define _Included_org_libjpegturbo_turbojpeg_TJCompressor
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    init
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_init
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    destroy
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    compress
+ * Signature: ([BIIII[BIII)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIII_3BIII
+  (JNIEnv *, jobject, jbyteArray, jint, jint, jint, jint, jbyteArray, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    compress
+ * Signature: ([BIIIIII[BIII)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII
+  (JNIEnv *, jobject, jbyteArray, jint, jint, jint, jint, jint, jint, jbyteArray, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    compress
+ * Signature: ([IIIII[BIII)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII
+  (JNIEnv *, jobject, jintArray, jint, jint, jint, jint, jbyteArray, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    compress
+ * Signature: ([IIIIIII[BIII)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII
+  (JNIEnv *, jobject, jintArray, jint, jint, jint, jint, jint, jint, jbyteArray, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    compressFromYUV
+ * Signature: ([BIIII[BII)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3BIIII_3BII
+  (JNIEnv *, jobject, jbyteArray, jint, jint, jint, jint, jbyteArray, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    encodeYUV
+ * Signature: ([BIIII[BII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII
+  (JNIEnv *, jobject, jbyteArray, jint, jint, jint, jint, jbyteArray, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    encodeYUV
+ * Signature: ([BIIIIII[BIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3BIII
+  (JNIEnv *, jobject, jbyteArray, jint, jint, jint, jint, jint, jint, jbyteArray, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    encodeYUV
+ * Signature: ([IIIII[BII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII
+  (JNIEnv *, jobject, jintArray, jint, jint, jint, jint, jbyteArray, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJCompressor
+ * Method:    encodeYUV
+ * Signature: ([IIIIIII[BIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIIIII_3BIII
+  (JNIEnv *, jobject, jintArray, jint, jint, jint, jint, jint, jint, jbyteArray, jint, jint, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/java/org_libjpegturbo_turbojpeg_TJDecompressor.h b/java/org_libjpegturbo_turbojpeg_TJDecompressor.h
new file mode 100644
index 0000000..1d8205c
--- /dev/null
+++ b/java/org_libjpegturbo_turbojpeg_TJDecompressor.h
@@ -0,0 +1,101 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_libjpegturbo_turbojpeg_TJDecompressor */
+
+#ifndef _Included_org_libjpegturbo_turbojpeg_TJDecompressor
+#define _Included_org_libjpegturbo_turbojpeg_TJDecompressor
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    init
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_init
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    destroy
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    decompressHeader
+ * Signature: ([BI)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader
+  (JNIEnv *, jobject, jbyteArray, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    decompress
+ * Signature: ([BI[BIIIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII
+  (JNIEnv *, jobject, jbyteArray, jint, jbyteArray, jint, jint, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    decompress
+ * Signature: ([BI[BIIIIIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII
+  (JNIEnv *, jobject, jbyteArray, jint, jbyteArray, jint, jint, jint, jint, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    decompress
+ * Signature: ([BI[IIIIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII
+  (JNIEnv *, jobject, jbyteArray, jint, jintArray, jint, jint, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    decompress
+ * Signature: ([BI[IIIIIIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII
+  (JNIEnv *, jobject, jbyteArray, jint, jintArray, jint, jint, jint, jint, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    decompressToYUV
+ * Signature: ([BI[BI)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI
+  (JNIEnv *, jobject, jbyteArray, jint, jbyteArray, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    decompressToYUV
+ * Signature: ([BI[BIIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BIIII
+  (JNIEnv *, jobject, jbyteArray, jint, jbyteArray, jint, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    decodeYUV
+ * Signature: ([BII[BIIIIIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3BII_3BIIIIIII
+  (JNIEnv *, jobject, jbyteArray, jint, jint, jbyteArray, jint, jint, jint, jint, jint, jint, jint);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method:    decodeYUV
+ * Signature: ([BII[IIIIIIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3BII_3IIIIIIII
+  (JNIEnv *, jobject, jbyteArray, jint, jint, jintArray, jint, jint, jint, jint, jint, jint, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/java/org_libjpegturbo_turbojpeg_TJTransformer.h b/java/org_libjpegturbo_turbojpeg_TJTransformer.h
new file mode 100644
index 0000000..a9dad4d
--- /dev/null
+++ b/java/org_libjpegturbo_turbojpeg_TJTransformer.h
@@ -0,0 +1,29 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_libjpegturbo_turbojpeg_TJTransformer */
+
+#ifndef _Included_org_libjpegturbo_turbojpeg_TJTransformer
+#define _Included_org_libjpegturbo_turbojpeg_TJTransformer
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJTransformer
+ * Method:    init
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_init
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_libjpegturbo_turbojpeg_TJTransformer
+ * Method:    transform
+ * Signature: ([BI[[B[Lorg/libjpegturbo/turbojpeg/TJTransform;I)[I
+ */
+JNIEXPORT jintArray JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_transform
+  (JNIEnv *, jobject, jbyteArray, jint, jobjectArray, jobjectArray, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/jcapimin.c b/jcapimin.c
index 54fb8c5..3b005d3 100644
--- a/jcapimin.c
+++ b/jcapimin.c
@@ -1,8 +1,11 @@
 /*
  * jcapimin.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2003-2010 by Guido Vollbeding.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains application interface code for the compression half
@@ -32,12 +35,12 @@
   int i;
 
   /* Guard against version mismatches between library and caller. */
-  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  cinfo->mem = NULL;            /* so jpeg_destroy knows mem mgr not called */
   if (version != JPEG_LIB_VERSION)
     ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
-  if (structsize != SIZEOF(struct jpeg_compress_struct))
-    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
-	     (int) SIZEOF(struct jpeg_compress_struct), (int) structsize);
+  if (structsize != sizeof(struct jpeg_compress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE,
+             (int) sizeof(struct jpeg_compress_struct), (int) structsize);
 
   /* For debugging purposes, we zero the whole master structure.
    * But the application has already set the err pointer, and may have set
@@ -48,7 +51,7 @@
   {
     struct jpeg_error_mgr * err = cinfo->err;
     void * client_data = cinfo->client_data; /* ignore Purify complaint here */
-    MEMZERO(cinfo, SIZEOF(struct jpeg_compress_struct));
+    MEMZERO(cinfo, sizeof(struct jpeg_compress_struct));
     cinfo->err = err;
     cinfo->client_data = client_data;
   }
@@ -63,17 +66,28 @@
 
   cinfo->comp_info = NULL;
 
-  for (i = 0; i < NUM_QUANT_TBLS; i++)
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
     cinfo->quant_tbl_ptrs[i] = NULL;
+#if JPEG_LIB_VERSION >= 70
+    cinfo->q_scale_factor[i] = 100;
+#endif
+  }
 
   for (i = 0; i < NUM_HUFF_TBLS; i++) {
     cinfo->dc_huff_tbl_ptrs[i] = NULL;
     cinfo->ac_huff_tbl_ptrs[i] = NULL;
   }
 
+#if JPEG_LIB_VERSION >= 80
+  /* Must do it here for emit_dqt in case jpeg_write_tables is used */
+  cinfo->block_size = DCTSIZE;
+  cinfo->natural_order = jpeg_natural_order;
+  cinfo->lim_Se = DCTSIZE2-1;
+#endif
+
   cinfo->script_space = NULL;
 
-  cinfo->input_gamma = 1.0;	/* in case application forgets */
+  cinfo->input_gamma = 1.0;     /* in case application forgets */
 
   /* OK, I'm ready */
   cinfo->global_state = CSTATE_START;
@@ -161,15 +175,15 @@
     (*cinfo->master->prepare_for_pass) (cinfo);
     for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) {
       if (cinfo->progress != NULL) {
-	cinfo->progress->pass_counter = (long) iMCU_row;
-	cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+        cinfo->progress->pass_counter = (long) iMCU_row;
+        cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
+        (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
       }
       /* We bypass the main controller and invoke coef controller directly;
        * all work is being done from the coefficient buffer.
        */
       if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL))
-	ERREXIT(cinfo, JERR_CANT_SUSPEND);
+        ERREXIT(cinfo, JERR_CANT_SUSPEND);
     }
     (*cinfo->master->finish_pass) (cinfo);
   }
@@ -190,9 +204,9 @@
 
 GLOBAL(void)
 jpeg_write_marker (j_compress_ptr cinfo, int marker,
-		   const JOCTET *dataptr, unsigned int datalen)
+                   const JOCTET *dataptr, unsigned int datalen)
 {
-  JMETHOD(void, write_marker_byte, (j_compress_ptr info, int val));
+  void (*write_marker_byte) (j_compress_ptr info, int val);
 
   if (cinfo->next_scanline != 0 ||
       (cinfo->global_state != CSTATE_SCANNING &&
@@ -201,7 +215,7 @@
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
   (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
-  write_marker_byte = cinfo->marker->write_marker_byte;	/* copy for speed */
+  write_marker_byte = cinfo->marker->write_marker_byte; /* copy for speed */
   while (datalen--) {
     (*write_marker_byte) (cinfo, *dataptr);
     dataptr++;
@@ -236,14 +250,14 @@
  * To produce a pair of files containing abbreviated tables and abbreviated
  * image data, one would proceed as follows:
  *
- *		initialize JPEG object
- *		set JPEG parameters
- *		set destination to table file
- *		jpeg_write_tables(cinfo);
- *		set destination to image file
- *		jpeg_start_compress(cinfo, FALSE);
- *		write data...
- *		jpeg_finish_compress(cinfo);
+ *              initialize JPEG object
+ *              set JPEG parameters
+ *              set destination to table file
+ *              jpeg_write_tables(cinfo);
+ *              set destination to image file
+ *              jpeg_start_compress(cinfo, FALSE);
+ *              write data...
+ *              jpeg_finish_compress(cinfo);
  *
  * jpeg_write_tables has the side effect of marking all tables written
  * (same as jpeg_suppress_tables(..., TRUE)).  Thus a subsequent start_compress
diff --git a/jcapistd.c b/jcapistd.c
index c0320b1..167f020 100644
--- a/jcapistd.c
+++ b/jcapistd.c
@@ -41,7 +41,7 @@
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
   if (write_all_tables)
-    jpeg_suppress_tables(cinfo, FALSE);	/* mark all tables to be written */
+    jpeg_suppress_tables(cinfo, FALSE); /* mark all tables to be written */
 
   /* (Re)initialize error mgr and destination modules */
   (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
@@ -75,7 +75,7 @@
 
 GLOBAL(JDIMENSION)
 jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines,
-		      JDIMENSION num_lines)
+                      JDIMENSION num_lines)
 {
   JDIMENSION row_ctr, rows_left;
 
@@ -118,7 +118,7 @@
 
 GLOBAL(JDIMENSION)
 jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data,
-		     JDIMENSION num_lines)
+                     JDIMENSION num_lines)
 {
   JDIMENSION lines_per_iMCU_row;
 
diff --git a/jcarith.c b/jcarith.c
new file mode 100644
index 0000000..71a84dd
--- /dev/null
+++ b/jcarith.c
@@ -0,0 +1,927 @@
+/*
+ * jcarith.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Developed 1997-2009 by Guido Vollbeding.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains portable arithmetic entropy encoding routines for JPEG
+ * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
+ *
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Suspension is not currently supported in this module.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Expanded entropy encoder object for arithmetic encoding. */
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  INT32 c; /* C register, base of coding interval, layout as in sec. D.1.3 */
+  INT32 a;               /* A register, normalized size of coding interval */
+  INT32 sc;        /* counter for stacked 0xFF values which might overflow */
+  INT32 zc;          /* counter for pending 0x00 output values which might *
+                          * be discarded at the end ("Pacman" termination) */
+  int ct;  /* bit shift counter, determines when next byte will be written */
+  int buffer;                /* buffer for most recent output byte != 0xFF */
+
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
+
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+  int next_restart_num;         /* next restart number to write (0-7) */
+
+  /* Pointers to statistics areas (these workspaces have image lifespan) */
+  unsigned char * dc_stats[NUM_ARITH_TBLS];
+  unsigned char * ac_stats[NUM_ARITH_TBLS];
+
+  /* Statistics bin for coding with fixed probability 0.5 */
+  unsigned char fixed_bin[4];
+} arith_entropy_encoder;
+
+typedef arith_entropy_encoder * arith_entropy_ptr;
+
+/* The following two definitions specify the allocation chunk size
+ * for the statistics area.
+ * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
+ * 49 statistics bins for DC, and 245 statistics bins for AC coding.
+ *
+ * We use a compact representation with 1 byte per statistics bin,
+ * thus the numbers directly represent byte sizes.
+ * This 1 byte per statistics bin contains the meaning of the MPS
+ * (more probable symbol) in the highest bit (mask 0x80), and the
+ * index into the probability estimation state machine table
+ * in the lower bits (mask 0x7F).
+ */
+
+#define DC_STAT_BINS 64
+#define AC_STAT_BINS 256
+
+/* NOTE: Uncomment the following #define if you want to use the
+ * given formula for calculating the AC conditioning parameter Kx
+ * for spectral selection progressive coding in section G.1.3.2
+ * of the spec (Kx = Kmin + SRL (8 + Se - Kmin) 4).
+ * Although the spec and P&M authors claim that this "has proven
+ * to give good results for 8 bit precision samples", I'm not
+ * convinced yet that this is really beneficial.
+ * Early tests gave only very marginal compression enhancements
+ * (a few - around 5 or so - bytes even for very large files),
+ * which would turn out rather negative if we'd suppress the
+ * DAC (Define Arithmetic Conditioning) marker segments for
+ * the default parameters in the future.
+ * Note that currently the marker writing module emits 12-byte
+ * DAC segments for a full-component scan in a color image.
+ * This is not worth worrying about IMHO. However, since the
+ * spec defines the default values to be used if the tables
+ * are omitted (unlike Huffman tables, which are required
+ * anyway), one might optimize this behaviour in the future,
+ * and then it would be disadvantageous to use custom tables if
+ * they don't provide sufficient gain to exceed the DAC size.
+ *
+ * On the other hand, I'd consider it as a reasonable result
+ * that the conditioning has no significant influence on the
+ * compression performance. This means that the basic
+ * statistical model is already rather stable.
+ *
+ * Thus, at the moment, we use the default conditioning values
+ * anyway, and do not use the custom formula.
+ *
+#define CALCULATE_SPECTRAL_CONDITIONING
+ */
+
+/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
+ * We assume that int right shift is unsigned if INT32 right shift is,
+ * which should be safe.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS    int ishift_temp;
+#define IRIGHT_SHIFT(x,shft)  \
+        ((ishift_temp = (x)) < 0 ? \
+         (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
+         (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)    ((x) >> (shft))
+#endif
+
+
+LOCAL(void)
+emit_byte (int val, j_compress_ptr cinfo)
+/* Write next output byte; we do not support suspension in this module. */
+{
+  struct jpeg_destination_mgr * dest = cinfo->dest;
+
+  *dest->next_output_byte++ = (JOCTET) val;
+  if (--dest->free_in_buffer == 0)
+    if (! (*dest->empty_output_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+}
+
+
+/*
+ * Finish up at the end of an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass (j_compress_ptr cinfo)
+{
+  arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
+  INT32 temp;
+
+  /* Section D.1.8: Termination of encoding */
+
+  /* Find the e->c in the coding interval with the largest
+   * number of trailing zero bits */
+  if ((temp = (e->a - 1 + e->c) & 0xFFFF0000L) < e->c)
+    e->c = temp + 0x8000L;
+  else
+    e->c = temp;
+  /* Send remaining bytes to output */
+  e->c <<= e->ct;
+  if (e->c & 0xF8000000L) {
+    /* One final overflow has to be handled */
+    if (e->buffer >= 0) {
+      if (e->zc)
+        do emit_byte(0x00, cinfo);
+        while (--e->zc);
+      emit_byte(e->buffer + 1, cinfo);
+      if (e->buffer + 1 == 0xFF)
+        emit_byte(0x00, cinfo);
+    }
+    e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
+    e->sc = 0;
+  } else {
+    if (e->buffer == 0)
+      ++e->zc;
+    else if (e->buffer >= 0) {
+      if (e->zc)
+        do emit_byte(0x00, cinfo);
+        while (--e->zc);
+      emit_byte(e->buffer, cinfo);
+    }
+    if (e->sc) {
+      if (e->zc)
+        do emit_byte(0x00, cinfo);
+        while (--e->zc);
+      do {
+        emit_byte(0xFF, cinfo);
+        emit_byte(0x00, cinfo);
+      } while (--e->sc);
+    }
+  }
+  /* Output final bytes only if they are not 0x00 */
+  if (e->c & 0x7FFF800L) {
+    if (e->zc)  /* output final pending zero bytes */
+      do emit_byte(0x00, cinfo);
+      while (--e->zc);
+    emit_byte((e->c >> 19) & 0xFF, cinfo);
+    if (((e->c >> 19) & 0xFF) == 0xFF)
+      emit_byte(0x00, cinfo);
+    if (e->c & 0x7F800L) {
+      emit_byte((e->c >> 11) & 0xFF, cinfo);
+      if (((e->c >> 11) & 0xFF) == 0xFF)
+        emit_byte(0x00, cinfo);
+    }
+  }
+}
+
+
+/*
+ * The core arithmetic encoding routine (common in JPEG and JBIG).
+ * This needs to go as fast as possible.
+ * Machine-dependent optimization facilities
+ * are not utilized in this portable implementation.
+ * However, this code should be fairly efficient and
+ * may be a good base for further optimizations anyway.
+ *
+ * Parameter 'val' to be encoded may be 0 or 1 (binary decision).
+ *
+ * Note: I've added full "Pacman" termination support to the
+ * byte output routines, which is equivalent to the optional
+ * Discard_final_zeros procedure (Figure D.15) in the spec.
+ * Thus, we always produce the shortest possible output
+ * stream compliant to the spec (no trailing zero bytes,
+ * except for FF stuffing).
+ *
+ * I've also introduced a new scheme for accessing
+ * the probability estimation state machine table,
+ * derived from Markus Kuhn's JBIG implementation.
+ */
+
+LOCAL(void)
+arith_encode (j_compress_ptr cinfo, unsigned char *st, int val)
+{
+  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
+  register unsigned char nl, nm;
+  register INT32 qe, temp;
+  register int sv;
+
+  /* Fetch values from our compact representation of Table D.2:
+   * Qe values and probability estimation state machine
+   */
+  sv = *st;
+  qe = jpeg_aritab[sv & 0x7F];  /* => Qe_Value */
+  nl = qe & 0xFF; qe >>= 8;     /* Next_Index_LPS + Switch_MPS */
+  nm = qe & 0xFF; qe >>= 8;     /* Next_Index_MPS */
+
+  /* Encode & estimation procedures per sections D.1.4 & D.1.5 */
+  e->a -= qe;
+  if (val != (sv >> 7)) {
+    /* Encode the less probable symbol */
+    if (e->a >= qe) {
+      /* If the interval size (qe) for the less probable symbol (LPS)
+       * is larger than the interval size for the MPS, then exchange
+       * the two symbols for coding efficiency, otherwise code the LPS
+       * as usual: */
+      e->c += e->a;
+      e->a = qe;
+    }
+    *st = (sv & 0x80) ^ nl;     /* Estimate_after_LPS */
+  } else {
+    /* Encode the more probable symbol */
+    if (e->a >= 0x8000L)
+      return;  /* A >= 0x8000 -> ready, no renormalization required */
+    if (e->a < qe) {
+      /* If the interval size (qe) for the less probable symbol (LPS)
+       * is larger than the interval size for the MPS, then exchange
+       * the two symbols for coding efficiency: */
+      e->c += e->a;
+      e->a = qe;
+    }
+    *st = (sv & 0x80) ^ nm;     /* Estimate_after_MPS */
+  }
+
+  /* Renormalization & data output per section D.1.6 */
+  do {
+    e->a <<= 1;
+    e->c <<= 1;
+    if (--e->ct == 0) {
+      /* Another byte is ready for output */
+      temp = e->c >> 19;
+      if (temp > 0xFF) {
+        /* Handle overflow over all stacked 0xFF bytes */
+        if (e->buffer >= 0) {
+          if (e->zc)
+            do emit_byte(0x00, cinfo);
+            while (--e->zc);
+          emit_byte(e->buffer + 1, cinfo);
+          if (e->buffer + 1 == 0xFF)
+            emit_byte(0x00, cinfo);
+        }
+        e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
+        e->sc = 0;
+        /* Note: The 3 spacer bits in the C register guarantee
+         * that the new buffer byte can't be 0xFF here
+         * (see page 160 in the P&M JPEG book). */
+        e->buffer = temp & 0xFF;  /* new output byte, might overflow later */
+      } else if (temp == 0xFF) {
+        ++e->sc;  /* stack 0xFF byte (which might overflow later) */
+      } else {
+        /* Output all stacked 0xFF bytes, they will not overflow any more */
+        if (e->buffer == 0)
+          ++e->zc;
+        else if (e->buffer >= 0) {
+          if (e->zc)
+            do emit_byte(0x00, cinfo);
+            while (--e->zc);
+          emit_byte(e->buffer, cinfo);
+        }
+        if (e->sc) {
+          if (e->zc)
+            do emit_byte(0x00, cinfo);
+            while (--e->zc);
+          do {
+            emit_byte(0xFF, cinfo);
+            emit_byte(0x00, cinfo);
+          } while (--e->sc);
+        }
+        e->buffer = temp & 0xFF;  /* new output byte (can still overflow) */
+      }
+      e->c &= 0x7FFFFL;
+      e->ct += 8;
+    }
+  } while (e->a < 0x8000L);
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(void)
+emit_restart (j_compress_ptr cinfo, int restart_num)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci;
+  jpeg_component_info * compptr;
+
+  finish_pass(cinfo);
+
+  emit_byte(0xFF, cinfo);
+  emit_byte(JPEG_RST0 + restart_num, cinfo);
+
+  /* Re-initialize statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* DC needs no table for refinement scan */
+    if (cinfo->progressive_mode == 0 || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
+      /* Reset DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    /* AC needs no table when not present */
+    if (cinfo->progressive_mode == 0 || cinfo->Se) {
+      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
+    }
+  }
+
+  /* Reset arithmetic encoding variables */
+  entropy->c = 0;
+  entropy->a = 0x10000L;
+  entropy->sc = 0;
+  entropy->zc = 0;
+  entropy->ct = 11;
+  entropy->buffer = -1;  /* empty */
+}
+
+
+/*
+ * MCU encoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl;
+  int v, v2, m;
+  ISHIFT_TEMPS
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
+
+    /* Compute the DC value after the required point transform by Al.
+     * This is simply an arithmetic right shift.
+     */
+    m = IRIGHT_SHIFT((int) ((*block)[0]), cinfo->Al);
+
+    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.4: Encode_DC_DIFF */
+    if ((v = m - entropy->last_dc_val[ci]) == 0) {
+      arith_encode(cinfo, st, 0);
+      entropy->dc_context[ci] = 0;      /* zero diff category */
+    } else {
+      entropy->last_dc_val[ci] = m;
+      arith_encode(cinfo, st, 1);
+      /* Figure F.6: Encoding nonzero value v */
+      /* Figure F.7: Encoding the sign of v */
+      if (v > 0) {
+        arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */
+        st += 2;                        /* Table F.4: SP = S0 + 2 */
+        entropy->dc_context[ci] = 4;    /* small positive diff category */
+      } else {
+        v = -v;
+        arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */
+        st += 3;                        /* Table F.4: SN = S0 + 3 */
+        entropy->dc_context[ci] = 8;    /* small negative diff category */
+      }
+      /* Figure F.8: Encoding the magnitude category of v */
+      m = 0;
+      if (v -= 1) {
+        arith_encode(cinfo, st, 1);
+        m = 1;
+        v2 = v;
+        st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
+        while (v2 >>= 1) {
+          arith_encode(cinfo, st, 1);
+          m <<= 1;
+          st += 1;
+        }
+      }
+      arith_encode(cinfo, st, 0);
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+        entropy->dc_context[ci] = 0;    /* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+        entropy->dc_context[ci] += 8;   /* large diff category */
+      /* Figure F.9: Encoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+        arith_encode(cinfo, st, (m & v) ? 1 : 0);
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, k, ke;
+  int v, v2, m;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
+
+  /* Establish EOB (end-of-block) index */
+  for (ke = cinfo->Se; ke > 0; ke--)
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if ((v = (*block)[jpeg_natural_order[ke]]) >= 0) {
+      if (v >>= cinfo->Al) break;
+    } else {
+      v = -v;
+      if (v >>= cinfo->Al) break;
+    }
+
+  /* Figure F.5: Encode_AC_Coefficients */
+  for (k = cinfo->Ss; k <= ke; k++) {
+    st = entropy->ac_stats[tbl] + 3 * (k - 1);
+    arith_encode(cinfo, st, 0);         /* EOB decision */
+    for (;;) {
+      if ((v = (*block)[jpeg_natural_order[k]]) >= 0) {
+        if (v >>= cinfo->Al) {
+          arith_encode(cinfo, st + 1, 1);
+          arith_encode(cinfo, entropy->fixed_bin, 0);
+          break;
+        }
+      } else {
+        v = -v;
+        if (v >>= cinfo->Al) {
+          arith_encode(cinfo, st + 1, 1);
+          arith_encode(cinfo, entropy->fixed_bin, 1);
+          break;
+        }
+      }
+      arith_encode(cinfo, st + 1, 0); st += 3; k++;
+    }
+    st += 2;
+    /* Figure F.8: Encoding the magnitude category of v */
+    m = 0;
+    if (v -= 1) {
+      arith_encode(cinfo, st, 1);
+      m = 1;
+      v2 = v;
+      if (v2 >>= 1) {
+        arith_encode(cinfo, st, 1);
+        m <<= 1;
+        st = entropy->ac_stats[tbl] +
+             (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+        while (v2 >>= 1) {
+          arith_encode(cinfo, st, 1);
+          m <<= 1;
+          st += 1;
+        }
+      }
+    }
+    arith_encode(cinfo, st, 0);
+    /* Figure F.9: Encoding the magnitude bit pattern of v */
+    st += 14;
+    while (m >>= 1)
+      arith_encode(cinfo, st, (m & v) ? 1 : 0);
+  }
+  /* Encode EOB decision only if k <= cinfo->Se */
+  if (k <= cinfo->Se) {
+    st = entropy->ac_stats[tbl] + 3 * (k - 1);
+    arith_encode(cinfo, st, 1);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for DC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  unsigned char *st;
+  int Al, blkn;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  st = entropy->fixed_bin;      /* use fixed probability estimation */
+  Al = cinfo->Al;
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    /* We simply emit the Al'th bit of the DC coefficient value. */
+    arith_encode(cinfo, st, (MCU_data[blkn][0][0] >> Al) & 1);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, k, ke, kex;
+  int v;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Section G.1.3.3: Encoding of AC coefficients */
+
+  /* Establish EOB (end-of-block) index */
+  for (ke = cinfo->Se; ke > 0; ke--)
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if ((v = (*block)[jpeg_natural_order[ke]]) >= 0) {
+      if (v >>= cinfo->Al) break;
+    } else {
+      v = -v;
+      if (v >>= cinfo->Al) break;
+    }
+
+  /* Establish EOBx (previous stage end-of-block) index */
+  for (kex = ke; kex > 0; kex--)
+    if ((v = (*block)[jpeg_natural_order[kex]]) >= 0) {
+      if (v >>= cinfo->Ah) break;
+    } else {
+      v = -v;
+      if (v >>= cinfo->Ah) break;
+    }
+
+  /* Figure G.10: Encode_AC_Coefficients_SA */
+  for (k = cinfo->Ss; k <= ke; k++) {
+    st = entropy->ac_stats[tbl] + 3 * (k - 1);
+    if (k > kex)
+      arith_encode(cinfo, st, 0);       /* EOB decision */
+    for (;;) {
+      if ((v = (*block)[jpeg_natural_order[k]]) >= 0) {
+        if (v >>= cinfo->Al) {
+          if (v >> 1)                   /* previously nonzero coef */
+            arith_encode(cinfo, st + 2, (v & 1));
+          else {                        /* newly nonzero coef */
+            arith_encode(cinfo, st + 1, 1);
+            arith_encode(cinfo, entropy->fixed_bin, 0);
+          }
+          break;
+        }
+      } else {
+        v = -v;
+        if (v >>= cinfo->Al) {
+          if (v >> 1)                   /* previously nonzero coef */
+            arith_encode(cinfo, st + 2, (v & 1));
+          else {                        /* newly nonzero coef */
+            arith_encode(cinfo, st + 1, 1);
+            arith_encode(cinfo, entropy->fixed_bin, 1);
+          }
+          break;
+        }
+      }
+      arith_encode(cinfo, st + 1, 0); st += 3; k++;
+    }
+  }
+  /* Encode EOB decision only if k <= cinfo->Se */
+  if (k <= cinfo->Se) {
+    st = entropy->ac_stats[tbl] + 3 * (k - 1);
+    arith_encode(cinfo, st, 1);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Encode and output one MCU's worth of arithmetic-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  jpeg_component_info * compptr;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, k, ke;
+  int v, v2, m;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
+
+    tbl = compptr->dc_tbl_no;
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.4: Encode_DC_DIFF */
+    if ((v = (*block)[0] - entropy->last_dc_val[ci]) == 0) {
+      arith_encode(cinfo, st, 0);
+      entropy->dc_context[ci] = 0;      /* zero diff category */
+    } else {
+      entropy->last_dc_val[ci] = (*block)[0];
+      arith_encode(cinfo, st, 1);
+      /* Figure F.6: Encoding nonzero value v */
+      /* Figure F.7: Encoding the sign of v */
+      if (v > 0) {
+        arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */
+        st += 2;                        /* Table F.4: SP = S0 + 2 */
+        entropy->dc_context[ci] = 4;    /* small positive diff category */
+      } else {
+        v = -v;
+        arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */
+        st += 3;                        /* Table F.4: SN = S0 + 3 */
+        entropy->dc_context[ci] = 8;    /* small negative diff category */
+      }
+      /* Figure F.8: Encoding the magnitude category of v */
+      m = 0;
+      if (v -= 1) {
+        arith_encode(cinfo, st, 1);
+        m = 1;
+        v2 = v;
+        st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
+        while (v2 >>= 1) {
+          arith_encode(cinfo, st, 1);
+          m <<= 1;
+          st += 1;
+        }
+      }
+      arith_encode(cinfo, st, 0);
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+        entropy->dc_context[ci] = 0;    /* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+        entropy->dc_context[ci] += 8;   /* large diff category */
+      /* Figure F.9: Encoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+        arith_encode(cinfo, st, (m & v) ? 1 : 0);
+    }
+
+    /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
+
+    tbl = compptr->ac_tbl_no;
+
+    /* Establish EOB (end-of-block) index */
+    for (ke = DCTSIZE2 - 1; ke > 0; ke--)
+      if ((*block)[jpeg_natural_order[ke]]) break;
+
+    /* Figure F.5: Encode_AC_Coefficients */
+    for (k = 1; k <= ke; k++) {
+      st = entropy->ac_stats[tbl] + 3 * (k - 1);
+      arith_encode(cinfo, st, 0);       /* EOB decision */
+      while ((v = (*block)[jpeg_natural_order[k]]) == 0) {
+        arith_encode(cinfo, st + 1, 0); st += 3; k++;
+      }
+      arith_encode(cinfo, st + 1, 1);
+      /* Figure F.6: Encoding nonzero value v */
+      /* Figure F.7: Encoding the sign of v */
+      if (v > 0) {
+        arith_encode(cinfo, entropy->fixed_bin, 0);
+      } else {
+        v = -v;
+        arith_encode(cinfo, entropy->fixed_bin, 1);
+      }
+      st += 2;
+      /* Figure F.8: Encoding the magnitude category of v */
+      m = 0;
+      if (v -= 1) {
+        arith_encode(cinfo, st, 1);
+        m = 1;
+        v2 = v;
+        if (v2 >>= 1) {
+          arith_encode(cinfo, st, 1);
+          m <<= 1;
+          st = entropy->ac_stats[tbl] +
+               (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+          while (v2 >>= 1) {
+            arith_encode(cinfo, st, 1);
+            m <<= 1;
+            st += 1;
+          }
+        }
+      }
+      arith_encode(cinfo, st, 0);
+      /* Figure F.9: Encoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+        arith_encode(cinfo, st, (m & v) ? 1 : 0);
+    }
+    /* Encode EOB decision only if k <= DCTSIZE2 - 1 */
+    if (k <= DCTSIZE2 - 1) {
+      st = entropy->ac_stats[tbl] + 3 * (k - 1);
+      arith_encode(cinfo, st, 1);
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass (j_compress_ptr cinfo, boolean gather_statistics)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+
+  if (gather_statistics)
+    /* Make sure to avoid that in the master control logic!
+     * We are fully adaptive here and need no extra
+     * statistics gathering pass!
+     */
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+
+  /* We assume jcmaster.c already validated the progressive scan parameters. */
+
+  /* Select execution routines */
+  if (cinfo->progressive_mode) {
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+        entropy->pub.encode_mcu = encode_mcu_DC_first;
+      else
+        entropy->pub.encode_mcu = encode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+        entropy->pub.encode_mcu = encode_mcu_DC_refine;
+      else
+        entropy->pub.encode_mcu = encode_mcu_AC_refine;
+    }
+  } else
+    entropy->pub.encode_mcu = encode_mcu;
+
+  /* Allocate & initialize requested statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* DC needs no table for refinement scan */
+    if (cinfo->progressive_mode == 0 || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      tbl = compptr->dc_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+        ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->dc_stats[tbl] == NULL)
+        entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+          ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
+      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
+      /* Initialize DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    /* AC needs no table when not present */
+    if (cinfo->progressive_mode == 0 || cinfo->Se) {
+      tbl = compptr->ac_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+        ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->ac_stats[tbl] == NULL)
+        entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+          ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
+      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
+#ifdef CALCULATE_SPECTRAL_CONDITIONING
+      if (cinfo->progressive_mode)
+        /* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */
+        cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4);
+#endif
+    }
+  }
+
+  /* Initialize arithmetic encoding variables */
+  entropy->c = 0;
+  entropy->a = 0x10000L;
+  entropy->sc = 0;
+  entropy->zc = 0;
+  entropy->ct = 11;
+  entropy->buffer = -1;  /* empty */
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/*
+ * Module initialization routine for arithmetic entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_arith_encoder (j_compress_ptr cinfo)
+{
+  arith_entropy_ptr entropy;
+  int i;
+
+  entropy = (arith_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                sizeof(arith_entropy_encoder));
+  cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
+  entropy->pub.start_pass = start_pass;
+  entropy->pub.finish_pass = finish_pass;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    entropy->dc_stats[i] = NULL;
+    entropy->ac_stats[i] = NULL;
+  }
+
+  /* Initialize index for fixed probability estimation */
+  entropy->fixed_bin[0] = 113;
+}
diff --git a/jccoefct.c b/jccoefct.c
index 1963ddb..a4acce5 100644
--- a/jccoefct.c
+++ b/jccoefct.c
@@ -1,8 +1,10 @@
 /*
  * jccoefct.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code and
+ * information relevant to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains the coefficient buffer controller for compression.
@@ -34,19 +36,16 @@
 typedef struct {
   struct jpeg_c_coef_controller pub; /* public fields */
 
-  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
-  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+  JDIMENSION iMCU_row_num;      /* iMCU row # within image */
+  JDIMENSION mcu_ctr;           /* counts MCUs processed in current row */
+  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
 
   /* For single-pass compression, it's sufficient to buffer just one MCU
    * (although this may prove a bit slow in practice).  We allocate a
    * workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks, and reuse it for each
-   * MCU constructed and sent.  (On 80x86, the workspace is FAR even though
-   * it's not really very big; this is to keep the module interfaces unchanged
-   * when a large coefficient buffer is necessary.)
-   * In multi-pass modes, this array points to the current MCU's blocks
-   * within the virtual arrays.
+   * MCU constructed and sent.  In multi-pass modes, this array points to the
+   * current MCU's blocks within the virtual arrays.
    */
   JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
 
@@ -59,12 +58,12 @@
 
 /* Forward declarations */
 METHODDEF(boolean) compress_data
-    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+        (j_compress_ptr cinfo, JSAMPIMAGE input_buf);
 #ifdef FULL_COEF_BUFFER_SUPPORTED
 METHODDEF(boolean) compress_first_pass
-    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+        (j_compress_ptr cinfo, JSAMPIMAGE input_buf);
 METHODDEF(boolean) compress_output
-    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+        (j_compress_ptr cinfo, JSAMPIMAGE input_buf);
 #endif
 
 
@@ -143,7 +142,7 @@
 compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
 {
   my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
   JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
   JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
   int blkn, bi, ci, yindex, yoffset, blockcnt;
@@ -154,7 +153,7 @@
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
     for (MCU_col_num = coef->mcu_ctr; MCU_col_num <= last_MCU_col;
-	 MCU_col_num++) {
+         MCU_col_num++) {
       /* Determine where data comes from in input_buf and do the DCT thing.
        * Each call on forward_DCT processes a horizontal row of DCT blocks
        * as wide as an MCU; we rely on having allocated the MCU_buffer[] blocks
@@ -166,46 +165,46 @@
        */
       blkn = 0;
       for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						: compptr->last_col_width;
-	xpos = MCU_col_num * compptr->MCU_sample_width;
-	ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (coef->iMCU_row_num < last_iMCU_row ||
-	      yoffset+yindex < compptr->last_row_height) {
-	    (*cinfo->fdct->forward_DCT) (cinfo, compptr,
-					 input_buf[compptr->component_index],
-					 coef->MCU_buffer[blkn],
-					 ypos, xpos, (JDIMENSION) blockcnt);
-	    if (blockcnt < compptr->MCU_width) {
-	      /* Create some dummy blocks at the right edge of the image. */
-	      jzero_far((void FAR *) coef->MCU_buffer[blkn + blockcnt],
-			(compptr->MCU_width - blockcnt) * SIZEOF(JBLOCK));
-	      for (bi = blockcnt; bi < compptr->MCU_width; bi++) {
-		coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0];
-	      }
-	    }
-	  } else {
-	    /* Create a row of dummy blocks at the bottom of the image. */
-	    jzero_far((void FAR *) coef->MCU_buffer[blkn],
-		      compptr->MCU_width * SIZEOF(JBLOCK));
-	    for (bi = 0; bi < compptr->MCU_width; bi++) {
-	      coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0];
-	    }
-	  }
-	  blkn += compptr->MCU_width;
-	  ypos += DCTSIZE;
-	}
+        compptr = cinfo->cur_comp_info[ci];
+        blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+                                                : compptr->last_col_width;
+        xpos = MCU_col_num * compptr->MCU_sample_width;
+        ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          if (coef->iMCU_row_num < last_iMCU_row ||
+              yoffset+yindex < compptr->last_row_height) {
+            (*cinfo->fdct->forward_DCT) (cinfo, compptr,
+                                         input_buf[compptr->component_index],
+                                         coef->MCU_buffer[blkn],
+                                         ypos, xpos, (JDIMENSION) blockcnt);
+            if (blockcnt < compptr->MCU_width) {
+              /* Create some dummy blocks at the right edge of the image. */
+              jzero_far((void *) coef->MCU_buffer[blkn + blockcnt],
+                        (compptr->MCU_width - blockcnt) * sizeof(JBLOCK));
+              for (bi = blockcnt; bi < compptr->MCU_width; bi++) {
+                coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0];
+              }
+            }
+          } else {
+            /* Create a row of dummy blocks at the bottom of the image. */
+            jzero_far((void *) coef->MCU_buffer[blkn],
+                      compptr->MCU_width * sizeof(JBLOCK));
+            for (bi = 0; bi < compptr->MCU_width; bi++) {
+              coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0];
+            }
+          }
+          blkn += compptr->MCU_width;
+          ypos += DCTSIZE;
+        }
       }
       /* Try to write the MCU.  In event of a suspension failure, we will
        * re-DCT the MCU on restart (a bit inefficient, could be fixed...)
        */
       if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->mcu_ctr = MCU_col_num;
-	return FALSE;
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->mcu_ctr = MCU_col_num;
+        return FALSE;
       }
     }
     /* Completed an MCU row, but perhaps not an iMCU row */
@@ -280,17 +279,17 @@
     for (block_row = 0; block_row < block_rows; block_row++) {
       thisblockrow = buffer[block_row];
       (*cinfo->fdct->forward_DCT) (cinfo, compptr,
-				   input_buf[ci], thisblockrow,
-				   (JDIMENSION) (block_row * DCTSIZE),
-				   (JDIMENSION) 0, blocks_across);
+                                   input_buf[ci], thisblockrow,
+                                   (JDIMENSION) (block_row * DCTSIZE),
+                                   (JDIMENSION) 0, blocks_across);
       if (ndummy > 0) {
-	/* Create dummy blocks at the right edge of the image. */
-	thisblockrow += blocks_across; /* => first dummy block */
-	jzero_far((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK));
-	lastDC = thisblockrow[-1][0];
-	for (bi = 0; bi < ndummy; bi++) {
-	  thisblockrow[bi][0] = lastDC;
-	}
+        /* Create dummy blocks at the right edge of the image. */
+        thisblockrow += blocks_across; /* => first dummy block */
+        jzero_far((void *) thisblockrow, ndummy * sizeof(JBLOCK));
+        lastDC = thisblockrow[-1][0];
+        for (bi = 0; bi < ndummy; bi++) {
+          thisblockrow[bi][0] = lastDC;
+        }
       }
     }
     /* If at end of image, create dummy block rows as needed.
@@ -299,22 +298,22 @@
      * This squeezes a few more bytes out of the resulting file...
      */
     if (coef->iMCU_row_num == last_iMCU_row) {
-      blocks_across += ndummy;	/* include lower right corner */
+      blocks_across += ndummy;  /* include lower right corner */
       MCUs_across = blocks_across / h_samp_factor;
       for (block_row = block_rows; block_row < compptr->v_samp_factor;
-	   block_row++) {
-	thisblockrow = buffer[block_row];
-	lastblockrow = buffer[block_row-1];
-	jzero_far((void FAR *) thisblockrow,
-		  (size_t) (blocks_across * SIZEOF(JBLOCK)));
-	for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
-	  lastDC = lastblockrow[h_samp_factor-1][0];
-	  for (bi = 0; bi < h_samp_factor; bi++) {
-	    thisblockrow[bi][0] = lastDC;
-	  }
-	  thisblockrow += h_samp_factor; /* advance to next MCU in row */
-	  lastblockrow += h_samp_factor;
-	}
+           block_row++) {
+        thisblockrow = buffer[block_row];
+        lastblockrow = buffer[block_row-1];
+        jzero_far((void *) thisblockrow,
+                  (size_t) (blocks_across * sizeof(JBLOCK)));
+        for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
+          lastDC = lastblockrow[h_samp_factor-1][0];
+          for (bi = 0; bi < h_samp_factor; bi++) {
+            thisblockrow[bi][0] = lastDC;
+          }
+          thisblockrow += h_samp_factor; /* advance to next MCU in row */
+          lastblockrow += h_samp_factor;
+        }
       }
     }
   }
@@ -341,7 +340,7 @@
 compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
 {
   my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
   int blkn, ci, xindex, yindex, yoffset;
   JDIMENSION start_col;
   JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
@@ -364,25 +363,25 @@
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
     for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
+         MCU_col_num++) {
       /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkn = 0;			/* index of current DCT block within MCU */
+      blkn = 0;                 /* index of current DCT block within MCU */
       for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
-	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
-	    coef->MCU_buffer[blkn++] = buffer_ptr++;
-	  }
-	}
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+          for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+            coef->MCU_buffer[blkn++] = buffer_ptr++;
+          }
+        }
       }
       /* Try to write the MCU. */
       if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->mcu_ctr = MCU_col_num;
-	return FALSE;
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->mcu_ctr = MCU_col_num;
+        return FALSE;
       }
     }
     /* Completed an MCU row, but perhaps not an iMCU row */
@@ -408,7 +407,7 @@
 
   coef = (my_coef_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_coef_controller));
+                                sizeof(my_coef_controller));
   cinfo->coef = (struct jpeg_c_coef_controller *) coef;
   coef->pub.start_pass = start_pass_coef;
 
@@ -421,14 +420,14 @@
     jpeg_component_info *compptr;
 
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
+         ci++, compptr++) {
       coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
-				(long) compptr->h_samp_factor),
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor),
-	 (JDIMENSION) compptr->v_samp_factor);
+        ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+         (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+                                (long) compptr->h_samp_factor),
+         (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+                                (long) compptr->v_samp_factor),
+         (JDIMENSION) compptr->v_samp_factor);
     }
 #else
     ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
@@ -440,7 +439,7 @@
 
     buffer = (JBLOCKROW)
       (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+                                  C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK));
     for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
       coef->MCU_buffer[i] = buffer + i;
     }
diff --git a/jccolext.c b/jccolext.c
new file mode 100644
index 0000000..2c6b7ac
--- /dev/null
+++ b/jccolext.c
@@ -0,0 +1,147 @@
+/*
+ * jccolext.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2012, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input colorspace conversion routines.
+ */
+
+
+/* This file is included by jccolor.c */
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ *
+ * Note that we change from the application's interleaved-pixel format
+ * to our internal noninterleaved, one-plane-per-component format.
+ * The input buffer is therefore three times as wide as the output buffer.
+ *
+ * A starting row offset is provided only for the output buffer.  The caller
+ * can easily adjust the passed input_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+INLINE
+LOCAL(void)
+rgb_ycc_convert_internal (j_compress_ptr cinfo,
+                          JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                          JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (JSAMPLE)
+                ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+                 >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (JSAMPLE)
+                ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+                 >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (JSAMPLE)
+                ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+                 >> SCALEBITS);
+    }
+  }
+}
+
+
+/**************** Cases other than RGB -> YCbCr **************/
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles RGB->grayscale conversion, which is the same
+ * as the RGB->Y portion of RGB->YCbCr.
+ * We assume rgb_ycc_start has been called (we only use the Y tables).
+ */
+
+INLINE
+LOCAL(void)
+rgb_gray_convert_internal (j_compress_ptr cinfo,
+                           JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                           JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* Y */
+      outptr[col] = (JSAMPLE)
+                ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+                 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles extended RGB->plain RGB conversion
+ */
+
+INLINE
+LOCAL(void)
+rgb_rgb_convert_internal (j_compress_ptr cinfo,
+                          JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                          JDIMENSION output_row, int num_rows)
+{
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      outptr0[col] = GETJSAMPLE(inptr[RGB_RED]);
+      outptr1[col] = GETJSAMPLE(inptr[RGB_GREEN]);
+      outptr2[col] = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+    }
+  }
+}
diff --git a/jccolor.c b/jccolor.c
index 0a8a4b5..4be75f7 100644
--- a/jccolor.c
+++ b/jccolor.c
@@ -1,8 +1,12 @@
 /*
  * jccolor.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2009-2012, D. R. Commander.
+ * Copyright (C) 2014, MIPS Technologies, Inc., California
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains input colorspace conversion routines.
@@ -11,6 +15,8 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsimd.h"
+#include "jconfigint.h"
 
 
 /* Private subobject */
@@ -19,7 +25,7 @@
   struct jpeg_color_converter pub; /* public fields */
 
   /* Private state for RGB->YCC conversion */
-  INT32 * rgb_ycc_tab;		/* => table for RGB to YCbCr conversion */
+  INT32 * rgb_ycc_tab;          /* => table for RGB to YCbCr conversion */
 } my_color_converter;
 
 typedef my_color_converter * my_cconvert_ptr;
@@ -31,9 +37,9 @@
  * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
  * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
  * The conversion equations to be implemented are therefore
- *	Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
- *	Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERJSAMPLE
- *	Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERJSAMPLE
+ *      Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+ *      Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERJSAMPLE
+ *      Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERJSAMPLE
  * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
  * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
  * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
@@ -55,10 +61,10 @@
  * in the tables to save adding them separately in the inner loop.
  */
 
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define CBCR_OFFSET	((INT32) CENTERJSAMPLE << SCALEBITS)
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+#define SCALEBITS       16      /* speediest right-shift on some machines */
+#define CBCR_OFFSET     ((INT32) CENTERJSAMPLE << SCALEBITS)
+#define ONE_HALF        ((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)          ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
 
 /* We allocate one big table and divide it up into eight parts, instead of
  * doing eight alloc_small requests.  This lets us use a single table base
@@ -66,16 +72,121 @@
  * machines (more than can hold all eight addresses, anyway).
  */
 
-#define R_Y_OFF		0			/* offset to R => Y section */
-#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
-#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
-#define R_CB_OFF	(3*(MAXJSAMPLE+1))
-#define G_CB_OFF	(4*(MAXJSAMPLE+1))
-#define B_CB_OFF	(5*(MAXJSAMPLE+1))
-#define R_CR_OFF	B_CB_OFF		/* B=>Cb, R=>Cr are the same */
-#define G_CR_OFF	(6*(MAXJSAMPLE+1))
-#define B_CR_OFF	(7*(MAXJSAMPLE+1))
-#define TABLE_SIZE	(8*(MAXJSAMPLE+1))
+#define R_Y_OFF         0                       /* offset to R => Y section */
+#define G_Y_OFF         (1*(MAXJSAMPLE+1))      /* offset to G => Y section */
+#define B_Y_OFF         (2*(MAXJSAMPLE+1))      /* etc. */
+#define R_CB_OFF        (3*(MAXJSAMPLE+1))
+#define G_CB_OFF        (4*(MAXJSAMPLE+1))
+#define B_CB_OFF        (5*(MAXJSAMPLE+1))
+#define R_CR_OFF        B_CB_OFF                /* B=>Cb, R=>Cr are the same */
+#define G_CR_OFF        (6*(MAXJSAMPLE+1))
+#define B_CR_OFF        (7*(MAXJSAMPLE+1))
+#define TABLE_SIZE      (8*(MAXJSAMPLE+1))
+
+
+/* Include inline routines for colorspace extensions */
+
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+
+#define RGB_RED EXT_RGB_RED
+#define RGB_GREEN EXT_RGB_GREEN
+#define RGB_BLUE EXT_RGB_BLUE
+#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+#define rgb_ycc_convert_internal extrgb_ycc_convert_internal
+#define rgb_gray_convert_internal extrgb_gray_convert_internal
+#define rgb_rgb_convert_internal extrgb_rgb_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED EXT_RGBX_RED
+#define RGB_GREEN EXT_RGBX_GREEN
+#define RGB_BLUE EXT_RGBX_BLUE
+#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+#define rgb_ycc_convert_internal extrgbx_ycc_convert_internal
+#define rgb_gray_convert_internal extrgbx_gray_convert_internal
+#define rgb_rgb_convert_internal extrgbx_rgb_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED EXT_BGR_RED
+#define RGB_GREEN EXT_BGR_GREEN
+#define RGB_BLUE EXT_BGR_BLUE
+#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+#define rgb_ycc_convert_internal extbgr_ycc_convert_internal
+#define rgb_gray_convert_internal extbgr_gray_convert_internal
+#define rgb_rgb_convert_internal extbgr_rgb_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED EXT_BGRX_RED
+#define RGB_GREEN EXT_BGRX_GREEN
+#define RGB_BLUE EXT_BGRX_BLUE
+#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+#define rgb_ycc_convert_internal extbgrx_ycc_convert_internal
+#define rgb_gray_convert_internal extbgrx_gray_convert_internal
+#define rgb_rgb_convert_internal extbgrx_rgb_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED EXT_XBGR_RED
+#define RGB_GREEN EXT_XBGR_GREEN
+#define RGB_BLUE EXT_XBGR_BLUE
+#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+#define rgb_ycc_convert_internal extxbgr_ycc_convert_internal
+#define rgb_gray_convert_internal extxbgr_gray_convert_internal
+#define rgb_rgb_convert_internal extxbgr_rgb_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED EXT_XRGB_RED
+#define RGB_GREEN EXT_XRGB_GREEN
+#define RGB_BLUE EXT_XRGB_BLUE
+#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+#define rgb_ycc_convert_internal extxrgb_ycc_convert_internal
+#define rgb_gray_convert_internal extxrgb_gray_convert_internal
+#define rgb_rgb_convert_internal extxrgb_rgb_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+#undef rgb_rgb_convert_internal
 
 
 /*
@@ -92,7 +203,7 @@
   /* Allocate and fill in the conversion tables. */
   cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(TABLE_SIZE * SIZEOF(INT32)));
+                                (TABLE_SIZE * sizeof(INT32)));
 
   for (i = 0; i <= MAXJSAMPLE; i++) {
     rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i;
@@ -116,58 +227,46 @@
 
 /*
  * Convert some rows of samples to the JPEG colorspace.
- *
- * Note that we change from the application's interleaved-pixel format
- * to our internal noninterleaved, one-plane-per-component format.
- * The input buffer is therefore three times as wide as the output buffer.
- *
- * A starting row offset is provided only for the output buffer.  The caller
- * can easily adjust the passed input_buf value to accommodate any row
- * offset required on that side.
  */
 
 METHODDEF(void)
 rgb_ycc_convert (j_compress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		 JDIMENSION output_row, int num_rows)
+                 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                 JDIMENSION output_row, int num_rows)
 {
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[RGB_RED]);
-      g = GETJSAMPLE(inptr[RGB_GREEN]);
-      b = GETJSAMPLE(inptr[RGB_BLUE]);
-      inptr += RGB_PIXELSIZE;
-      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
-       * must be too; we do not need an explicit range-limiting operation.
-       * Hence the value being shifted is never negative, and we don't
-       * need the general RIGHT_SHIFT macro.
-       */
-      /* Y */
-      outptr0[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
-      /* Cb */
-      outptr1[col] = (JSAMPLE)
-		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
-		 >> SCALEBITS);
-      /* Cr */
-      outptr2[col] = (JSAMPLE)
-		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
-		 >> SCALEBITS);
-    }
+  switch (cinfo->in_color_space) {
+    case JCS_EXT_RGB:
+      extrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                  num_rows);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      extrgbx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_BGR:
+      extbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                  num_rows);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      extbgrx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      extxbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      extxrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    default:
+      rgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                               num_rows);
+      break;
   }
 }
 
@@ -177,38 +276,92 @@
 
 /*
  * Convert some rows of samples to the JPEG colorspace.
- * This version handles RGB->grayscale conversion, which is the same
- * as the RGB->Y portion of RGB->YCbCr.
- * We assume rgb_ycc_start has been called (we only use the Y tables).
  */
 
 METHODDEF(void)
 rgb_gray_convert (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		  JDIMENSION output_row, int num_rows)
+                  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                  JDIMENSION output_row, int num_rows)
 {
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
+  switch (cinfo->in_color_space) {
+    case JCS_EXT_RGB:
+      extrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      extrgbx_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                    num_rows);
+      break;
+    case JCS_EXT_BGR:
+      extbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      extbgrx_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                    num_rows);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      extxbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                    num_rows);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      extxrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                    num_rows);
+      break;
+    default:
+      rgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                num_rows);
+      break;
+  }
+}
 
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr = output_buf[0][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[RGB_RED]);
-      g = GETJSAMPLE(inptr[RGB_GREEN]);
-      b = GETJSAMPLE(inptr[RGB_BLUE]);
-      inptr += RGB_PIXELSIZE;
-      /* Y */
-      outptr[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
-    }
+
+/*
+ * Extended RGB to plain RGB conversion
+ */
+
+METHODDEF(void)
+rgb_rgb_convert (j_compress_ptr cinfo,
+                  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                  JDIMENSION output_row, int num_rows)
+{
+  switch (cinfo->in_color_space) {
+    case JCS_EXT_RGB:
+      extrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                  num_rows);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      extrgbx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_BGR:
+      extbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                  num_rows);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      extbgrx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      extxbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      extxrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    default:
+      rgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                               num_rows);
+      break;
   }
 }
 
@@ -223,8 +376,8 @@
 
 METHODDEF(void)
 cmyk_ycck_convert (j_compress_ptr cinfo,
-		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		   JDIMENSION output_row, int num_rows)
+                   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                   JDIMENSION output_row, int num_rows)
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   register int r, g, b;
@@ -246,7 +399,7 @@
       g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
       b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
       /* K passes through as-is */
-      outptr3[col] = inptr[3];	/* don't need GETJSAMPLE here */
+      outptr3[col] = inptr[3];  /* don't need GETJSAMPLE here */
       inptr += 4;
       /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
        * must be too; we do not need an explicit range-limiting operation.
@@ -255,16 +408,16 @@
        */
       /* Y */
       outptr0[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
+                ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+                 >> SCALEBITS);
       /* Cb */
       outptr1[col] = (JSAMPLE)
-		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
-		 >> SCALEBITS);
+                ((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+                 >> SCALEBITS);
       /* Cr */
       outptr2[col] = (JSAMPLE)
-		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
-		 >> SCALEBITS);
+                ((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+                 >> SCALEBITS);
     }
   }
 }
@@ -278,8 +431,8 @@
 
 METHODDEF(void)
 grayscale_convert (j_compress_ptr cinfo,
-		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-		   JDIMENSION output_row, int num_rows)
+                   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                   JDIMENSION output_row, int num_rows)
 {
   register JSAMPROW inptr;
   register JSAMPROW outptr;
@@ -292,7 +445,7 @@
     outptr = output_buf[0][output_row];
     output_row++;
     for (col = 0; col < num_cols; col++) {
-      outptr[col] = inptr[0];	/* don't need GETJSAMPLE() here */
+      outptr[col] = inptr[0];   /* don't need GETJSAMPLE() here */
       inptr += instride;
     }
   }
@@ -307,8 +460,8 @@
 
 METHODDEF(void)
 null_convert (j_compress_ptr cinfo,
-	      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-	      JDIMENSION output_row, int num_rows)
+              JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+              JDIMENSION output_row, int num_rows)
 {
   register JSAMPROW inptr;
   register JSAMPROW outptr;
@@ -323,8 +476,8 @@
       inptr = *input_buf;
       outptr = output_buf[ci][output_row];
       for (col = 0; col < num_cols; col++) {
-	outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
-	inptr += nc;
+        outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
+        inptr += nc;
       }
     }
     input_buf++;
@@ -355,7 +508,7 @@
 
   cconvert = (my_cconvert_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_color_converter));
+                                sizeof(my_color_converter));
   cinfo->cconvert = (struct jpeg_color_converter *) cconvert;
   /* set start_pass to null method until we find out differently */
   cconvert->pub.start_pass = null_method;
@@ -368,11 +521,19 @@
     break;
 
   case JCS_RGB:
-#if RGB_PIXELSIZE != 3
-    if (cinfo->input_components != RGB_PIXELSIZE)
+  case JCS_EXT_RGB:
+  case JCS_EXT_RGBX:
+  case JCS_EXT_BGR:
+  case JCS_EXT_BGRX:
+  case JCS_EXT_XBGR:
+  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
+    if (cinfo->input_components != rgb_pixelsize[cinfo->in_color_space])
       ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
     break;
-#endif /* else share code with YCbCr */
 
   case JCS_YCbCr:
     if (cinfo->input_components != 3)
@@ -385,7 +546,7 @@
       ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
     break;
 
-  default:			/* JCS_UNKNOWN can be anything */
+  default:                      /* JCS_UNKNOWN can be anything */
     if (cinfo->input_components < 1)
       ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
     break;
@@ -398,9 +559,23 @@
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
     if (cinfo->in_color_space == JCS_GRAYSCALE)
       cconvert->pub.color_convert = grayscale_convert;
-    else if (cinfo->in_color_space == JCS_RGB) {
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = rgb_gray_convert;
+    else if (cinfo->in_color_space == JCS_RGB ||
+             cinfo->in_color_space == JCS_EXT_RGB ||
+             cinfo->in_color_space == JCS_EXT_RGBX ||
+             cinfo->in_color_space == JCS_EXT_BGR ||
+             cinfo->in_color_space == JCS_EXT_BGRX ||
+             cinfo->in_color_space == JCS_EXT_XBGR ||
+             cinfo->in_color_space == JCS_EXT_XRGB ||
+             cinfo->in_color_space == JCS_EXT_RGBA ||
+             cinfo->in_color_space == JCS_EXT_BGRA ||
+             cinfo->in_color_space == JCS_EXT_ABGR ||
+             cinfo->in_color_space == JCS_EXT_ARGB) {
+      if (jsimd_can_rgb_gray())
+        cconvert->pub.color_convert = jsimd_rgb_gray_convert;
+      else {
+        cconvert->pub.start_pass = rgb_ycc_start;
+        cconvert->pub.color_convert = rgb_gray_convert;
+      }
     } else if (cinfo->in_color_space == JCS_YCbCr)
       cconvert->pub.color_convert = grayscale_convert;
     else
@@ -410,8 +585,28 @@
   case JCS_RGB:
     if (cinfo->num_components != 3)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_RGB && RGB_PIXELSIZE == 3)
-      cconvert->pub.color_convert = null_convert;
+    if (rgb_red[cinfo->in_color_space] == 0 &&
+        rgb_green[cinfo->in_color_space] == 1 &&
+        rgb_blue[cinfo->in_color_space] == 2 &&
+        rgb_pixelsize[cinfo->in_color_space] == 3) {
+#if defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub.color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub.color_convert = null_convert;
+    } else if (cinfo->in_color_space == JCS_RGB ||
+               cinfo->in_color_space == JCS_EXT_RGB ||
+               cinfo->in_color_space == JCS_EXT_RGBX ||
+               cinfo->in_color_space == JCS_EXT_BGR ||
+               cinfo->in_color_space == JCS_EXT_BGRX ||
+               cinfo->in_color_space == JCS_EXT_XBGR ||
+               cinfo->in_color_space == JCS_EXT_XRGB ||
+               cinfo->in_color_space == JCS_EXT_RGBA ||
+               cinfo->in_color_space == JCS_EXT_BGRA ||
+               cinfo->in_color_space == JCS_EXT_ABGR ||
+               cinfo->in_color_space == JCS_EXT_ARGB)
+      cconvert->pub.color_convert = rgb_rgb_convert;
     else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
@@ -419,21 +614,45 @@
   case JCS_YCbCr:
     if (cinfo->num_components != 3)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_RGB) {
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = rgb_ycc_convert;
-    } else if (cinfo->in_color_space == JCS_YCbCr)
-      cconvert->pub.color_convert = null_convert;
-    else
+    if (cinfo->in_color_space == JCS_RGB ||
+        cinfo->in_color_space == JCS_EXT_RGB ||
+        cinfo->in_color_space == JCS_EXT_RGBX ||
+        cinfo->in_color_space == JCS_EXT_BGR ||
+        cinfo->in_color_space == JCS_EXT_BGRX ||
+        cinfo->in_color_space == JCS_EXT_XBGR ||
+        cinfo->in_color_space == JCS_EXT_XRGB ||
+        cinfo->in_color_space == JCS_EXT_RGBA ||
+        cinfo->in_color_space == JCS_EXT_BGRA ||
+        cinfo->in_color_space == JCS_EXT_ABGR ||
+        cinfo->in_color_space == JCS_EXT_ARGB) {
+      if (jsimd_can_rgb_ycc())
+        cconvert->pub.color_convert = jsimd_rgb_ycc_convert;
+      else {
+        cconvert->pub.start_pass = rgb_ycc_start;
+        cconvert->pub.color_convert = rgb_ycc_convert;
+      }
+    } else if (cinfo->in_color_space == JCS_YCbCr) {
+#if defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub.color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub.color_convert = null_convert;
+    } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
   case JCS_CMYK:
     if (cinfo->num_components != 4)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_CMYK)
-      cconvert->pub.color_convert = null_convert;
-    else
+    if (cinfo->in_color_space == JCS_CMYK) {
+#if defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub.color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub.color_convert = null_convert;
+    } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
@@ -443,17 +662,27 @@
     if (cinfo->in_color_space == JCS_CMYK) {
       cconvert->pub.start_pass = rgb_ycc_start;
       cconvert->pub.color_convert = cmyk_ycck_convert;
-    } else if (cinfo->in_color_space == JCS_YCCK)
-      cconvert->pub.color_convert = null_convert;
-    else
+    } else if (cinfo->in_color_space == JCS_YCCK) {
+#if defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub.color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub.color_convert = null_convert;
+    } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
-  default:			/* allow null conversion of JCS_UNKNOWN */
+  default:                      /* allow null conversion of JCS_UNKNOWN */
     if (cinfo->jpeg_color_space != cinfo->in_color_space ||
-	cinfo->num_components != cinfo->input_components)
+        cinfo->num_components != cinfo->input_components)
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-    cconvert->pub.color_convert = null_convert;
+#if defined(__mips__)
+    if (jsimd_c_can_null_convert())
+      cconvert->pub.color_convert = jsimd_c_null_convert;
+    else
+#endif
+      cconvert->pub.color_convert = null_convert;
     break;
   }
 }
diff --git a/jcdctmgr.c b/jcdctmgr.c
index 61fa79b..7893572 100644
--- a/jcdctmgr.c
+++ b/jcdctmgr.c
@@ -1,8 +1,12 @@
 /*
  * jcdctmgr.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2011, 2014 D. R. Commander
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains the forward-DCT management logic.
@@ -14,16 +18,37 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
+#include "jsimddct.h"
 
 
 /* Private subobject for this module */
 
+typedef void (*forward_DCT_method_ptr) (DCTELEM * data);
+typedef void (*float_DCT_method_ptr) (FAST_FLOAT * data);
+
+typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
+                                     JDIMENSION start_col,
+                                     DCTELEM * workspace);
+typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
+                                           JDIMENSION start_col,
+                                           FAST_FLOAT *workspace);
+
+typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM * divisors,
+                                     DCTELEM * workspace);
+typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
+                                           FAST_FLOAT * divisors,
+                                           FAST_FLOAT * workspace);
+
+METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
+
 typedef struct {
-  struct jpeg_forward_dct pub;	/* public fields */
+  struct jpeg_forward_dct pub;  /* public fields */
 
   /* Pointer to the DCT routine actually in use */
-  forward_DCT_method_ptr do_dct;
+  forward_DCT_method_ptr dct;
+  convsamp_method_ptr convsamp;
+  quantize_method_ptr quantize;
 
   /* The actual post-DCT divisors --- not identical to the quant table
    * entries, because of scaling (especially for an unnormalized DCT).
@@ -31,16 +56,155 @@
    */
   DCTELEM * divisors[NUM_QUANT_TBLS];
 
+  /* work area for FDCT subroutine */
+  DCTELEM * workspace;
+
 #ifdef DCT_FLOAT_SUPPORTED
   /* Same as above for the floating-point case. */
-  float_DCT_method_ptr do_float_dct;
+  float_DCT_method_ptr float_dct;
+  float_convsamp_method_ptr float_convsamp;
+  float_quantize_method_ptr float_quantize;
   FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
+  FAST_FLOAT * float_workspace;
 #endif
 } my_fdct_controller;
 
 typedef my_fdct_controller * my_fdct_ptr;
 
 
+#if BITS_IN_JSAMPLE == 8
+
+/*
+ * Find the highest bit in an integer through binary search.
+ */
+
+LOCAL(int)
+flss (UINT16 val)
+{
+  int bit;
+
+  bit = 16;
+
+  if (!val)
+    return 0;
+
+  if (!(val & 0xff00)) {
+    bit -= 8;
+    val <<= 8;
+  }
+  if (!(val & 0xf000)) {
+    bit -= 4;
+    val <<= 4;
+  }
+  if (!(val & 0xc000)) {
+    bit -= 2;
+    val <<= 2;
+  }
+  if (!(val & 0x8000)) {
+    bit -= 1;
+    val <<= 1;
+  }
+
+  return bit;
+}
+
+
+/*
+ * Compute values to do a division using reciprocal.
+ *
+ * This implementation is based on an algorithm described in
+ *   "How to optimize for the Pentium family of microprocessors"
+ *   (http://www.agner.org/assem/).
+ * More information about the basic algorithm can be found in
+ * the paper "Integer Division Using Reciprocals" by Robert Alverson.
+ *
+ * The basic idea is to replace x/d by x * d^-1. In order to store
+ * d^-1 with enough precision we shift it left a few places. It turns
+ * out that this algoright gives just enough precision, and also fits
+ * into DCTELEM:
+ *
+ *   b = (the number of significant bits in divisor) - 1
+ *   r = (word size) + b
+ *   f = 2^r / divisor
+ *
+ * f will not be an integer for most cases, so we need to compensate
+ * for the rounding error introduced:
+ *
+ *   no fractional part:
+ *
+ *       result = input >> r
+ *
+ *   fractional part of f < 0.5:
+ *
+ *       round f down to nearest integer
+ *       result = ((input + 1) * f) >> r
+ *
+ *   fractional part of f > 0.5:
+ *
+ *       round f up to nearest integer
+ *       result = (input * f) >> r
+ *
+ * This is the original algorithm that gives truncated results. But we
+ * want properly rounded results, so we replace "input" with
+ * "input + divisor/2".
+ *
+ * In order to allow SIMD implementations we also tweak the values to
+ * allow the same calculation to be made at all times:
+ *
+ *   dctbl[0] = f rounded to nearest integer
+ *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
+ *   dctbl[2] = 1 << ((word size) * 2 - r)
+ *   dctbl[3] = r - (word size)
+ *
+ * dctbl[2] is for stupid instruction sets where the shift operation
+ * isn't member wise (e.g. MMX).
+ *
+ * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
+ * is that most SIMD implementations have a "multiply and store top
+ * half" operation.
+ *
+ * Lastly, we store each of the values in their own table instead
+ * of in a consecutive manner, yet again in order to allow SIMD
+ * routines.
+ */
+
+LOCAL(int)
+compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
+{
+  UDCTELEM2 fq, fr;
+  UDCTELEM c;
+  int b, r;
+
+  b = flss(divisor) - 1;
+  r  = sizeof(DCTELEM) * 8 + b;
+
+  fq = ((UDCTELEM2)1 << r) / divisor;
+  fr = ((UDCTELEM2)1 << r) % divisor;
+
+  c = divisor / 2; /* for rounding */
+
+  if (fr == 0) { /* divisor is power of two */
+    /* fq will be one bit too large to fit in DCTELEM, so adjust */
+    fq >>= 1;
+    r--;
+  } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
+    c++;
+  } else { /* fractional part is > 0.5 */
+    fq++;
+  }
+
+  dtbl[DCTSIZE2 * 0] = (DCTELEM) fq;      /* reciprocal */
+  dtbl[DCTSIZE2 * 1] = (DCTELEM) c;       /* correction + roundfactor */
+  dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r));  /* scale */
+  dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
+
+  if(r <= 16) return 0;
+  else return 1;
+}
+
+#endif
+
+
 /*
  * Initialize for a processing pass.
  * Verify that all referenced Q-tables are present, and set up
@@ -64,7 +228,7 @@
     qtblno = compptr->quant_tbl_no;
     /* Make sure specified quantization table is present */
     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
-	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+        cinfo->quant_tbl_ptrs[qtblno] == NULL)
       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
     qtbl = cinfo->quant_tbl_ptrs[qtblno];
     /* Compute divisors for this quant table */
@@ -76,87 +240,102 @@
        * coefficients multiplied by 8 (to counteract scaling).
        */
       if (fdct->divisors[qtblno] == NULL) {
-	fdct->divisors[qtblno] = (DCTELEM *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      DCTSIZE2 * SIZEOF(DCTELEM));
+        fdct->divisors[qtblno] = (DCTELEM *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
       }
       dtbl = fdct->divisors[qtblno];
       for (i = 0; i < DCTSIZE2; i++) {
-	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
+#if BITS_IN_JSAMPLE == 8
+        if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
+          && fdct->quantize == jsimd_quantize)
+          fdct->quantize = quantize;
+#else
+        dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
+#endif
       }
       break;
 #endif
 #ifdef DCT_IFAST_SUPPORTED
     case JDCT_IFAST:
       {
-	/* For AA&N IDCT method, divisors are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 8.
-	 */
+        /* For AA&N IDCT method, divisors are equal to quantization
+         * coefficients scaled by scalefactor[row]*scalefactor[col], where
+         *   scalefactor[0] = 1
+         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+         * We apply a further scale factor of 8.
+         */
 #define CONST_BITS 14
-	static const INT16 aanscales[DCTSIZE2] = {
-	  /* precomputed values scaled up by 14 bits */
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
-	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
-	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
-	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
-	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
-	};
-	SHIFT_TEMPS
+        static const INT16 aanscales[DCTSIZE2] = {
+          /* precomputed values scaled up by 14 bits */
+          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+        };
+        SHIFT_TEMPS
 
-	if (fdct->divisors[qtblno] == NULL) {
-	  fdct->divisors[qtblno] = (DCTELEM *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					DCTSIZE2 * SIZEOF(DCTELEM));
-	}
-	dtbl = fdct->divisors[qtblno];
-	for (i = 0; i < DCTSIZE2; i++) {
-	  dtbl[i] = (DCTELEM)
-	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-				  (INT32) aanscales[i]),
-		    CONST_BITS-3);
-	}
+        if (fdct->divisors[qtblno] == NULL) {
+          fdct->divisors[qtblno] = (DCTELEM *)
+            (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
+        }
+        dtbl = fdct->divisors[qtblno];
+        for (i = 0; i < DCTSIZE2; i++) {
+#if BITS_IN_JSAMPLE == 8
+          if(!compute_reciprocal(
+            DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+                                  (INT32) aanscales[i]),
+                    CONST_BITS-3), &dtbl[i])
+            && fdct->quantize == jsimd_quantize)
+            fdct->quantize = quantize;
+#else
+           dtbl[i] = (DCTELEM)
+             DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+                                   (INT32) aanscales[i]),
+                     CONST_BITS-3);
+#endif
+        }
       }
       break;
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
     case JDCT_FLOAT:
       {
-	/* For float AA&N IDCT method, divisors are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * We apply a further scale factor of 8.
-	 * What's actually stored is 1/divisor so that the inner loop can
-	 * use a multiplication rather than a division.
-	 */
-	FAST_FLOAT * fdtbl;
-	int row, col;
-	static const double aanscalefactor[DCTSIZE] = {
-	  1.0, 1.387039845, 1.306562965, 1.175875602,
-	  1.0, 0.785694958, 0.541196100, 0.275899379
-	};
+        /* For float AA&N IDCT method, divisors are equal to quantization
+         * coefficients scaled by scalefactor[row]*scalefactor[col], where
+         *   scalefactor[0] = 1
+         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+         * We apply a further scale factor of 8.
+         * What's actually stored is 1/divisor so that the inner loop can
+         * use a multiplication rather than a division.
+         */
+        FAST_FLOAT * fdtbl;
+        int row, col;
+        static const double aanscalefactor[DCTSIZE] = {
+          1.0, 1.387039845, 1.306562965, 1.175875602,
+          1.0, 0.785694958, 0.541196100, 0.275899379
+        };
 
-	if (fdct->float_divisors[qtblno] == NULL) {
-	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					DCTSIZE2 * SIZEOF(FAST_FLOAT));
-	}
-	fdtbl = fdct->float_divisors[qtblno];
-	i = 0;
-	for (row = 0; row < DCTSIZE; row++) {
-	  for (col = 0; col < DCTSIZE; col++) {
-	    fdtbl[i] = (FAST_FLOAT)
-	      (1.0 / (((double) qtbl->quantval[i] *
-		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
-	    i++;
-	  }
-	}
+        if (fdct->float_divisors[qtblno] == NULL) {
+          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
+            (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                        DCTSIZE2 * sizeof(FAST_FLOAT));
+        }
+        fdtbl = fdct->float_divisors[qtblno];
+        i = 0;
+        for (row = 0; row < DCTSIZE; row++) {
+          for (col = 0; col < DCTSIZE; col++) {
+            fdtbl[i] = (FAST_FLOAT)
+              (1.0 / (((double) qtbl->quantval[i] *
+                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
+            i++;
+          }
+        }
       }
       break;
 #endif
@@ -169,6 +348,118 @@
 
 
 /*
+ * Load data into workspace, applying unsigned->signed conversion.
+ */
+
+METHODDEF(void)
+convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
+{
+  register DCTELEM *workspaceptr;
+  register JSAMPROW elemptr;
+  register int elemr;
+
+  workspaceptr = workspace;
+  for (elemr = 0; elemr < DCTSIZE; elemr++) {
+    elemptr = sample_data[elemr] + start_col;
+
+#if DCTSIZE == 8                /* unroll the inner loop */
+    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+#else
+    {
+      register int elemc;
+      for (elemc = DCTSIZE; elemc > 0; elemc--)
+        *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+    }
+#endif
+  }
+}
+
+
+/*
+ * Quantize/descale the coefficients, and store into coef_blocks[].
+ */
+
+METHODDEF(void)
+quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
+{
+  int i;
+  DCTELEM temp;
+  JCOEFPTR output_ptr = coef_block;
+
+#if BITS_IN_JSAMPLE == 8
+
+  UDCTELEM recip, corr, shift;
+  UDCTELEM2 product;
+
+  for (i = 0; i < DCTSIZE2; i++) {
+    temp = workspace[i];
+    recip = divisors[i + DCTSIZE2 * 0];
+    corr =  divisors[i + DCTSIZE2 * 1];
+    shift = divisors[i + DCTSIZE2 * 3];
+
+    if (temp < 0) {
+      temp = -temp;
+      product = (UDCTELEM2)(temp + corr) * recip;
+      product >>= shift + sizeof(DCTELEM)*8;
+      temp = product;
+      temp = -temp;
+    } else {
+      product = (UDCTELEM2)(temp + corr) * recip;
+      product >>= shift + sizeof(DCTELEM)*8;
+      temp = product;
+    }
+    output_ptr[i] = (JCOEF) temp;
+  }
+
+#else
+
+  register DCTELEM qval;
+
+  for (i = 0; i < DCTSIZE2; i++) {
+    qval = divisors[i];
+    temp = workspace[i];
+    /* Divide the coefficient value by qval, ensuring proper rounding.
+     * Since C does not specify the direction of rounding for negative
+     * quotients, we have to force the dividend positive for portability.
+     *
+     * In most files, at least half of the output values will be zero
+     * (at default quantization settings, more like three-quarters...)
+     * so we should ensure that this case is fast.  On many machines,
+     * a comparison is enough cheaper than a divide to make a special test
+     * a win.  Since both inputs will be nonnegative, we need only test
+     * for a < b to discover whether a/b is 0.
+     * If your machine's division is fast enough, define FAST_DIVIDE.
+     */
+#ifdef FAST_DIVIDE
+#define DIVIDE_BY(a,b)  a /= b
+#else
+#define DIVIDE_BY(a,b)  if (a >= b) a /= b; else a = 0
+#endif
+    if (temp < 0) {
+      temp = -temp;
+      temp += qval>>1;  /* for rounding */
+      DIVIDE_BY(temp, qval);
+      temp = -temp;    
+    } else {
+      temp += qval>>1;  /* for rounding */
+      DIVIDE_BY(temp, qval);
+    }
+    output_ptr[i] = (JCOEF) temp;
+  }
+
+#endif
+
+}
+
+
+/*
  * Perform forward DCT on one or more blocks of a component.
  *
  * The input samples are taken from the sample_data[] array starting at
@@ -178,159 +469,125 @@
 
 METHODDEF(void)
 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
-	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-	     JDIMENSION start_row, JDIMENSION start_col,
-	     JDIMENSION num_blocks)
+             JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+             JDIMENSION start_row, JDIMENSION start_col,
+             JDIMENSION num_blocks)
 /* This version is used for integer DCT implementations. */
 {
   /* This routine is heavily used, so it's worth coding it tightly. */
   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  forward_DCT_method_ptr do_dct = fdct->do_dct;
   DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
-  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
+  DCTELEM * workspace;
   JDIMENSION bi;
 
-  sample_data += start_row;	/* fold in the vertical offset once */
+  /* Make sure the compiler doesn't look up these every pass */
+  forward_DCT_method_ptr do_dct = fdct->dct;
+  convsamp_method_ptr do_convsamp = fdct->convsamp;
+  quantize_method_ptr do_quantize = fdct->quantize;
+  workspace = fdct->workspace;
+
+  sample_data += start_row;     /* fold in the vertical offset once */
 
   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
     /* Load data into workspace, applying unsigned->signed conversion */
-    { register DCTELEM *workspaceptr;
-      register JSAMPROW elemptr;
-      register int elemr;
-
-      workspaceptr = workspace;
-      for (elemr = 0; elemr < DCTSIZE; elemr++) {
-	elemptr = sample_data[elemr] + start_col;
-#if DCTSIZE == 8		/* unroll the inner loop */
-	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-#else
-	{ register int elemc;
-	  for (elemc = DCTSIZE; elemc > 0; elemc--) {
-	    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
-	  }
-	}
-#endif
-      }
-    }
+    (*do_convsamp) (sample_data, start_col, workspace);
 
     /* Perform the DCT */
     (*do_dct) (workspace);
 
     /* Quantize/descale the coefficients, and store into coef_blocks[] */
-    { register DCTELEM temp, qval;
-      register int i;
-      register JCOEFPTR output_ptr = coef_blocks[bi];
-
-      for (i = 0; i < DCTSIZE2; i++) {
-	qval = divisors[i];
-	temp = workspace[i];
-	/* Divide the coefficient value by qval, ensuring proper rounding.
-	 * Since C does not specify the direction of rounding for negative
-	 * quotients, we have to force the dividend positive for portability.
-	 *
-	 * In most files, at least half of the output values will be zero
-	 * (at default quantization settings, more like three-quarters...)
-	 * so we should ensure that this case is fast.  On many machines,
-	 * a comparison is enough cheaper than a divide to make a special test
-	 * a win.  Since both inputs will be nonnegative, we need only test
-	 * for a < b to discover whether a/b is 0.
-	 * If your machine's division is fast enough, define FAST_DIVIDE.
-	 */
-#ifdef FAST_DIVIDE
-#define DIVIDE_BY(a,b)	a /= b
-#else
-#define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
-#endif
-	if (temp < 0) {
-	  temp = -temp;
-	  temp += qval>>1;	/* for rounding */
-	  DIVIDE_BY(temp, qval);
-	  temp = -temp;
-	} else {
-	  temp += qval>>1;	/* for rounding */
-	  DIVIDE_BY(temp, qval);
-	}
-	output_ptr[i] = (JCOEF) temp;
-      }
-    }
+    (*do_quantize) (coef_blocks[bi], divisors, workspace);
   }
 }
 
 
 #ifdef DCT_FLOAT_SUPPORTED
 
+
+METHODDEF(void)
+convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
+{
+  register FAST_FLOAT *workspaceptr;
+  register JSAMPROW elemptr;
+  register int elemr;
+
+  workspaceptr = workspace;
+  for (elemr = 0; elemr < DCTSIZE; elemr++) {
+    elemptr = sample_data[elemr] + start_col;
+#if DCTSIZE == 8                /* unroll the inner loop */
+    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+#else
+    {
+      register int elemc;
+      for (elemc = DCTSIZE; elemc > 0; elemc--)
+        *workspaceptr++ = (FAST_FLOAT)
+                          (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+    }
+#endif
+  }
+}
+
+
+METHODDEF(void)
+quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
+{
+  register FAST_FLOAT temp;
+  register int i;
+  register JCOEFPTR output_ptr = coef_block;
+
+  for (i = 0; i < DCTSIZE2; i++) {
+    /* Apply the quantization and scaling factor */
+    temp = workspace[i] * divisors[i];
+
+    /* Round to nearest integer.
+     * Since C does not specify the direction of rounding for negative
+     * quotients, we have to force the dividend positive for portability.
+     * The maximum coefficient size is +-16K (for 12-bit data), so this
+     * code should work for either 16-bit or 32-bit ints.
+     */
+    output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
+  }
+}
+
+
 METHODDEF(void)
 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-		   JDIMENSION start_row, JDIMENSION start_col,
-		   JDIMENSION num_blocks)
+                   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+                   JDIMENSION start_row, JDIMENSION start_col,
+                   JDIMENSION num_blocks)
 /* This version is used for floating-point DCT implementations. */
 {
   /* This routine is heavily used, so it's worth coding it tightly. */
   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
-  float_DCT_method_ptr do_dct = fdct->do_float_dct;
   FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
-  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
+  FAST_FLOAT * workspace;
   JDIMENSION bi;
 
-  sample_data += start_row;	/* fold in the vertical offset once */
+
+  /* Make sure the compiler doesn't look up these every pass */
+  float_DCT_method_ptr do_dct = fdct->float_dct;
+  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
+  float_quantize_method_ptr do_quantize = fdct->float_quantize;
+  workspace = fdct->float_workspace;
+
+  sample_data += start_row;     /* fold in the vertical offset once */
 
   for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
     /* Load data into workspace, applying unsigned->signed conversion */
-    { register FAST_FLOAT *workspaceptr;
-      register JSAMPROW elemptr;
-      register int elemr;
-
-      workspaceptr = workspace;
-      for (elemr = 0; elemr < DCTSIZE; elemr++) {
-	elemptr = sample_data[elemr] + start_col;
-#if DCTSIZE == 8		/* unroll the inner loop */
-	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-#else
-	{ register int elemc;
-	  for (elemc = DCTSIZE; elemc > 0; elemc--) {
-	    *workspaceptr++ = (FAST_FLOAT)
-	      (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
-	  }
-	}
-#endif
-      }
-    }
+    (*do_convsamp) (sample_data, start_col, workspace);
 
     /* Perform the DCT */
     (*do_dct) (workspace);
 
     /* Quantize/descale the coefficients, and store into coef_blocks[] */
-    { register FAST_FLOAT temp;
-      register int i;
-      register JCOEFPTR output_ptr = coef_blocks[bi];
-
-      for (i = 0; i < DCTSIZE2; i++) {
-	/* Apply the quantization and scaling factor */
-	temp = workspace[i] * divisors[i];
-	/* Round to nearest integer.
-	 * Since C does not specify the direction of rounding for negative
-	 * quotients, we have to force the dividend positive for portability.
-	 * The maximum coefficient size is +-16K (for 12-bit data), so this
-	 * code should work for either 16-bit or 32-bit ints.
-	 */
-	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
-      }
-    }
+    (*do_quantize) (coef_blocks[bi], divisors, workspace);
   }
 }
 
@@ -349,27 +606,37 @@
 
   fdct = (my_fdct_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_fdct_controller));
+                                sizeof(my_fdct_controller));
   cinfo->fdct = (struct jpeg_forward_dct *) fdct;
   fdct->pub.start_pass = start_pass_fdctmgr;
 
+  /* First determine the DCT... */
   switch (cinfo->dct_method) {
 #ifdef DCT_ISLOW_SUPPORTED
   case JDCT_ISLOW:
     fdct->pub.forward_DCT = forward_DCT;
-    fdct->do_dct = jpeg_fdct_islow;
+    if (jsimd_can_fdct_islow())
+      fdct->dct = jsimd_fdct_islow;
+    else
+      fdct->dct = jpeg_fdct_islow;
     break;
 #endif
 #ifdef DCT_IFAST_SUPPORTED
   case JDCT_IFAST:
     fdct->pub.forward_DCT = forward_DCT;
-    fdct->do_dct = jpeg_fdct_ifast;
+    if (jsimd_can_fdct_ifast())
+      fdct->dct = jsimd_fdct_ifast;
+    else
+      fdct->dct = jpeg_fdct_ifast;
     break;
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
   case JDCT_FLOAT:
     fdct->pub.forward_DCT = forward_DCT_float;
-    fdct->do_float_dct = jpeg_fdct_float;
+    if (jsimd_can_fdct_float())
+      fdct->float_dct = jsimd_fdct_float;
+    else
+      fdct->float_dct = jpeg_fdct_float;
     break;
 #endif
   default:
@@ -377,6 +644,54 @@
     break;
   }
 
+  /* ...then the supporting stages. */
+  switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+  case JDCT_ISLOW:
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+  case JDCT_IFAST:
+#endif
+#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
+    if (jsimd_can_convsamp())
+      fdct->convsamp = jsimd_convsamp;
+    else
+      fdct->convsamp = convsamp;
+    if (jsimd_can_quantize())
+      fdct->quantize = jsimd_quantize;
+    else
+      fdct->quantize = quantize;
+    break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  case JDCT_FLOAT:
+    if (jsimd_can_convsamp_float())
+      fdct->float_convsamp = jsimd_convsamp_float;
+    else
+      fdct->float_convsamp = convsamp_float;
+    if (jsimd_can_quantize_float())
+      fdct->float_quantize = jsimd_quantize_float;
+    else
+      fdct->float_quantize = quantize_float;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    break;
+  }
+
+  /* Allocate workspace memory */
+#ifdef DCT_FLOAT_SUPPORTED
+  if (cinfo->dct_method == JDCT_FLOAT)
+    fdct->float_workspace = (FAST_FLOAT *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                  sizeof(FAST_FLOAT) * DCTSIZE2);
+  else
+#endif
+    fdct->workspace = (DCTELEM *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                  sizeof(DCTELEM) * DCTSIZE2);
+
   /* Mark divisor tables unallocated */
   for (i = 0; i < NUM_QUANT_TBLS; i++) {
     fdct->divisors[i] = NULL;
diff --git a/jchuff.c b/jchuff.c
index f235250..5f6288a 100644
--- a/jchuff.c
+++ b/jchuff.c
@@ -1,8 +1,10 @@
 /*
  * jchuff.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains Huffman entropy encoding routines.
@@ -17,7 +19,42 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jchuff.h"		/* Declarations shared with jcphuff.c */
+#include "jchuff.h"             /* Declarations shared with jcphuff.c */
+#include <limits.h>
+
+/*
+ * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
+ * used for bit counting rather than the lookup table.  This will reduce the
+ * memory footprint by 64k, which is important for some mobile applications
+ * that create many isolated instances of libjpeg-turbo (web browsers, for
+ * instance.)  This may improve performance on some mobile platforms as well.
+ * This feature is enabled by default only on ARM processors, because some x86
+ * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
+ * shown to have a significant performance impact even on the x86 chips that
+ * have a fast implementation of it.  When building for ARMv6, you can
+ * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
+ * flags (this defines __thumb__).
+ */
+
+/* NOTE: Both GCC and Clang define __GNUC__ */
+#if defined __GNUC__ && defined __arm__
+#if !defined __thumb__ || defined __thumb2__
+#define USE_CLZ_INTRINSIC
+#endif
+#endif
+
+#ifdef USE_CLZ_INTRINSIC
+#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
+#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
+#else
+#include "jpeg_nbits_table.h"
+#define JPEG_NBITS(x) (jpeg_nbits_table[x])
+#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
+#endif
+
+#ifndef min
+ #define min(a,b) ((a)<(b)?(a):(b))
+#endif
 
 
 /* Expanded entropy encoder object for Huffman encoding.
@@ -27,8 +64,8 @@
  */
 
 typedef struct {
-  INT32 put_buffer;		/* current bit-accumulation buffer */
-  int put_bits;			/* # of bits now in it */
+  size_t put_buffer;            /* current bit-accumulation buffer */
+  int put_bits;                 /* # of bits now in it */
   int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
 } savable_state;
 
@@ -42,12 +79,12 @@
 #else
 #if MAX_COMPS_IN_SCAN == 4
 #define ASSIGN_STATE(dest,src)  \
-	((dest).put_buffer = (src).put_buffer, \
-	 (dest).put_bits = (src).put_bits, \
-	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
-	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
-	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
-	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+        ((dest).put_buffer = (src).put_buffer, \
+         (dest).put_bits = (src).put_bits, \
+         (dest).last_dc_val[0] = (src).last_dc_val[0], \
+         (dest).last_dc_val[1] = (src).last_dc_val[1], \
+         (dest).last_dc_val[2] = (src).last_dc_val[2], \
+         (dest).last_dc_val[3] = (src).last_dc_val[3])
 #endif
 #endif
 
@@ -55,17 +92,17 @@
 typedef struct {
   struct jpeg_entropy_encoder pub; /* public fields */
 
-  savable_state saved;		/* Bit buffer & DC state at start of MCU */
+  savable_state saved;          /* Bit buffer & DC state at start of MCU */
 
   /* These fields are NOT loaded into local working state. */
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-  int next_restart_num;		/* next restart number to write (0-7) */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+  int next_restart_num;         /* next restart number to write (0-7) */
 
   /* Pointers to derived tables (these workspaces have image lifespan) */
   c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
   c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
 
-#ifdef ENTROPY_OPT_SUPPORTED	/* Statistics tables for optimization */
+#ifdef ENTROPY_OPT_SUPPORTED    /* Statistics tables for optimization */
   long * dc_count_ptrs[NUM_HUFF_TBLS];
   long * ac_count_ptrs[NUM_HUFF_TBLS];
 #endif
@@ -78,21 +115,20 @@
  */
 
 typedef struct {
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-  savable_state cur;		/* Current bit buffer & DC state */
-  j_compress_ptr cinfo;		/* dump_buffer needs access to this */
+  JOCTET * next_output_byte;    /* => next byte to write in buffer */
+  size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
+  savable_state cur;            /* Current bit buffer & DC state */
+  j_compress_ptr cinfo;         /* dump_buffer needs access to this */
 } working_state;
 
 
 /* Forward declarations */
-METHODDEF(boolean) encode_mcu_huff JPP((j_compress_ptr cinfo,
-					JBLOCKROW *MCU_data));
-METHODDEF(void) finish_pass_huff JPP((j_compress_ptr cinfo));
+METHODDEF(boolean) encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data);
+METHODDEF(void) finish_pass_huff (j_compress_ptr cinfo);
 #ifdef ENTROPY_OPT_SUPPORTED
-METHODDEF(boolean) encode_mcu_gather JPP((j_compress_ptr cinfo,
-					  JBLOCKROW *MCU_data));
-METHODDEF(void) finish_pass_gather JPP((j_compress_ptr cinfo));
+METHODDEF(boolean) encode_mcu_gather (j_compress_ptr cinfo,
+                                      JBLOCKROW *MCU_data);
+METHODDEF(void) finish_pass_gather (j_compress_ptr cinfo);
 #endif
 
 
@@ -130,29 +166,29 @@
       /* Check for invalid table indexes */
       /* (make_c_derived_tbl does this in the other path) */
       if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS)
-	ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
+        ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
       if (actbl < 0 || actbl >= NUM_HUFF_TBLS)
-	ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);
+        ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);
       /* Allocate and zero the statistics tables */
       /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
       if (entropy->dc_count_ptrs[dctbl] == NULL)
-	entropy->dc_count_ptrs[dctbl] = (long *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      257 * SIZEOF(long));
-      MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * SIZEOF(long));
+        entropy->dc_count_ptrs[dctbl] = (long *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                      257 * sizeof(long));
+      MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * sizeof(long));
       if (entropy->ac_count_ptrs[actbl] == NULL)
-	entropy->ac_count_ptrs[actbl] = (long *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      257 * SIZEOF(long));
-      MEMZERO(entropy->ac_count_ptrs[actbl], 257 * SIZEOF(long));
+        entropy->ac_count_ptrs[actbl] = (long *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                      257 * sizeof(long));
+      MEMZERO(entropy->ac_count_ptrs[actbl], 257 * sizeof(long));
 #endif
     } else {
       /* Compute derived values for Huffman tables */
       /* We may do this more than once for a table, but it's not expensive */
       jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,
-			      & entropy->dc_derived_tbls[dctbl]);
+                              & entropy->dc_derived_tbls[dctbl]);
       jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,
-			      & entropy->ac_derived_tbls[actbl]);
+                              & entropy->ac_derived_tbls[actbl]);
     }
     /* Initialize DC predictions to 0 */
     entropy->saved.last_dc_val[ci] = 0;
@@ -177,7 +213,7 @@
 
 GLOBAL(void)
 jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
-			 c_derived_tbl ** pdtbl)
+                         c_derived_tbl ** pdtbl)
 {
   JHUFF_TBL *htbl;
   c_derived_tbl *dtbl;
@@ -202,22 +238,22 @@
   if (*pdtbl == NULL)
     *pdtbl = (c_derived_tbl *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(c_derived_tbl));
+                                  sizeof(c_derived_tbl));
   dtbl = *pdtbl;
-  
+
   /* Figure C.1: make table of Huffman code length for each symbol */
 
   p = 0;
   for (l = 1; l <= 16; l++) {
     i = (int) htbl->bits[l];
-    if (i < 0 || p + i > 256)	/* protect against table overrun */
+    if (i < 0 || p + i > 256)   /* protect against table overrun */
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     while (i--)
       huffsize[p++] = (char) l;
   }
   huffsize[p] = 0;
   lastp = p;
-  
+
   /* Figure C.2: generate the codes themselves */
   /* We also validate that the counts represent a legal Huffman code tree. */
 
@@ -237,7 +273,7 @@
     code <<= 1;
     si++;
   }
-  
+
   /* Figure C.3: generate encoding tables */
   /* These are code and size indexed by symbol value */
 
@@ -245,7 +281,7 @@
    * this lets us detect duplicate VAL entries here, and later
    * allows emit_bits to detect any attempt to emit such symbols.
    */
-  MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
+  MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi));
 
   /* This is also a convenient place to check for out-of-range
    * and duplicated VAL entries.  We allow 0..255 for AC symbols
@@ -268,10 +304,10 @@
 
 /* Emit a byte, taking 'action' if must suspend. */
 #define emit_byte(state,val,action)  \
-	{ *(state)->next_output_byte++ = (JOCTET) (val);  \
-	  if (--(state)->free_in_buffer == 0)  \
-	    if (! dump_buffer(state))  \
-	      { action; } }
+        { *(state)->next_output_byte++ = (JOCTET) (val);  \
+          if (--(state)->free_in_buffer == 0)  \
+            if (! dump_buffer(state))  \
+              { action; } }
 
 
 LOCAL(boolean)
@@ -291,58 +327,138 @@
 
 /* Outputting bits to the file */
 
-/* Only the right 24 bits of put_buffer are used; the valid bits are
- * left-justified in this part.  At most 16 bits can be passed to emit_bits
- * in one call, and we never retain more than 7 bits in put_buffer
- * between calls, so 24 bits are sufficient.
+/* These macros perform the same task as the emit_bits() function in the
+ * original libjpeg code.  In addition to reducing overhead by explicitly
+ * inlining the code, additional performance is achieved by taking into
+ * account the size of the bit buffer and waiting until it is almost full
+ * before emptying it.  This mostly benefits 64-bit platforms, since 6
+ * bytes can be stored in a 64-bit bit buffer before it has to be emptied.
  */
 
-INLINE
-LOCAL(boolean)
-emit_bits (working_state * state, unsigned int code, int size)
-/* Emit some bits; return TRUE if successful, FALSE if must suspend */
-{
-  /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer = (INT32) code;
-  register int put_bits = state->cur.put_bits;
+#define EMIT_BYTE() { \
+  JOCTET c; \
+  put_bits -= 8; \
+  c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \
+  *buffer++ = c; \
+  if (c == 0xFF)  /* need to stuff a zero byte? */ \
+    *buffer++ = 0; \
+ }
 
-  /* if size is 0, caller used an invalid Huffman table entry */
-  if (size == 0)
-    ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
-
-  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
-  
-  put_bits += size;		/* new number of bits in buffer */
-  
-  put_buffer <<= 24 - put_bits; /* align incoming bits */
-
-  put_buffer |= state->cur.put_buffer; /* and merge with old buffer contents */
-  
-  while (put_bits >= 8) {
-    int c = (int) ((put_buffer >> 16) & 0xFF);
-    
-    emit_byte(state, c, return FALSE);
-    if (c == 0xFF) {		/* need to stuff a zero byte? */
-      emit_byte(state, 0, return FALSE);
-    }
-    put_buffer <<= 8;
-    put_bits -= 8;
-  }
-
-  state->cur.put_buffer = put_buffer; /* update state variables */
-  state->cur.put_bits = put_bits;
-
-  return TRUE;
+#define PUT_BITS(code, size) { \
+  put_bits += size; \
+  put_buffer = (put_buffer << size) | code; \
 }
 
+#define CHECKBUF15() { \
+  if (put_bits > 15) { \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+  } \
+}
+
+#define CHECKBUF31() { \
+  if (put_bits > 31) { \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+  } \
+}
+
+#define CHECKBUF47() { \
+  if (put_bits > 47) { \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+  } \
+}
+
+#if __WORDSIZE==64 || defined(_WIN64)
+
+#define EMIT_BITS(code, size) { \
+  CHECKBUF47() \
+  PUT_BITS(code, size) \
+}
+
+#define EMIT_CODE(code, size) { \
+  temp2 &= (((INT32) 1)<<nbits) - 1; \
+  CHECKBUF31() \
+  PUT_BITS(code, size) \
+  PUT_BITS(temp2, nbits) \
+ }
+
+#else
+
+#define EMIT_BITS(code, size) { \
+  PUT_BITS(code, size) \
+  CHECKBUF15() \
+}
+
+#define EMIT_CODE(code, size) { \
+  temp2 &= (((INT32) 1)<<nbits) - 1; \
+  PUT_BITS(code, size) \
+  CHECKBUF15() \
+  PUT_BITS(temp2, nbits) \
+  CHECKBUF15() \
+ }
+
+#endif
+
+
+#define BUFSIZE (DCTSIZE2 * 2)
+
+#define LOAD_BUFFER() { \
+  if (state->free_in_buffer < BUFSIZE) { \
+    localbuf = 1; \
+    buffer = _buffer; \
+  } \
+  else buffer = state->next_output_byte; \
+ }
+
+#define STORE_BUFFER() { \
+  if (localbuf) { \
+    bytes = buffer - _buffer; \
+    buffer = _buffer; \
+    while (bytes > 0) { \
+      bytestocopy = min(bytes, state->free_in_buffer); \
+      MEMCOPY(state->next_output_byte, buffer, bytestocopy); \
+      state->next_output_byte += bytestocopy; \
+      buffer += bytestocopy; \
+      state->free_in_buffer -= bytestocopy; \
+      if (state->free_in_buffer == 0) \
+        if (! dump_buffer(state)) return FALSE; \
+      bytes -= bytestocopy; \
+    } \
+  } \
+  else { \
+    state->free_in_buffer -= (buffer - state->next_output_byte); \
+    state->next_output_byte = buffer; \
+  } \
+ }
+
 
 LOCAL(boolean)
 flush_bits (working_state * state)
 {
-  if (! emit_bits(state, 0x7F, 7)) /* fill any partial byte with ones */
-    return FALSE;
-  state->cur.put_buffer = 0;	/* and reset bit-buffer to empty */
+  JOCTET _buffer[BUFSIZE], *buffer;
+  size_t put_buffer;  int put_bits;
+  size_t bytes, bytestocopy;  int localbuf = 0;
+
+  put_buffer = state->cur.put_buffer;
+  put_bits = state->cur.put_bits;
+  LOAD_BUFFER()
+
+  /* fill any partial byte with ones */
+  PUT_BITS(0x7F, 7)
+  while (put_bits >= 8) EMIT_BYTE()
+
+  state->cur.put_buffer = 0;    /* and reset bit-buffer to empty */
   state->cur.put_bits = 0;
+  STORE_BUFFER()
+
   return TRUE;
 }
 
@@ -351,93 +467,110 @@
 
 LOCAL(boolean)
 encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
-		  c_derived_tbl *dctbl, c_derived_tbl *actbl)
+                  c_derived_tbl *dctbl, c_derived_tbl *actbl)
 {
-  register int temp, temp2;
-  register int nbits;
-  register int k, r, i;
-  
+  int temp, temp2, temp3;
+  int nbits;
+  int r, code, size;
+  JOCTET _buffer[BUFSIZE], *buffer;
+  size_t put_buffer;  int put_bits;
+  int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];
+  size_t bytes, bytestocopy;  int localbuf = 0;
+
+  put_buffer = state->cur.put_buffer;
+  put_bits = state->cur.put_bits;
+  LOAD_BUFFER()
+
   /* Encode the DC coefficient difference per section F.1.2.1 */
-  
+
   temp = temp2 = block[0] - last_dc_val;
 
-  if (temp < 0) {
-    temp = -temp;		/* temp is abs value of input */
-    /* For a negative input, want temp2 = bitwise complement of abs(input) */
-    /* This code assumes we are on a two's complement machine */
-    temp2--;
-  }
-  
+ /* This is a well-known technique for obtaining the absolute value without a
+  * branch.  It is derived from an assembly language technique presented in
+  * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by
+  * Agner Fog.
+  */
+  temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
+  temp ^= temp3;
+  temp -= temp3;
+
+  /* For a negative input, want temp2 = bitwise complement of abs(input) */
+  /* This code assumes we are on a two's complement machine */
+  temp2 += temp3;
+
   /* Find the number of bits needed for the magnitude of the coefficient */
-  nbits = 0;
-  while (temp) {
-    nbits++;
-    temp >>= 1;
-  }
-  /* Check for out-of-range coefficient values.
-   * Since we're encoding a difference, the range limit is twice as much.
-   */
-  if (nbits > MAX_COEF_BITS+1)
-    ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
-  
+  nbits = JPEG_NBITS(temp);
+
   /* Emit the Huffman-coded symbol for the number of bits */
-  if (! emit_bits(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
-    return FALSE;
+  code = dctbl->ehufco[nbits];
+  size = dctbl->ehufsi[nbits];
+  PUT_BITS(code, size)
+  CHECKBUF15()
+
+  /* Mask off any extra bits in code */
+  temp2 &= (((INT32) 1)<<nbits) - 1;
 
   /* Emit that number of bits of the value, if positive, */
   /* or the complement of its magnitude, if negative. */
-  if (nbits)			/* emit_bits rejects calls with size 0 */
-    if (! emit_bits(state, (unsigned int) temp2, nbits))
-      return FALSE;
+  PUT_BITS(temp2, nbits)
+  CHECKBUF15()
 
   /* Encode the AC coefficients per section F.1.2.2 */
-  
-  r = 0;			/* r = run length of zeros */
-  
-  for (k = 1; k < DCTSIZE2; k++) {
-    if ((temp = block[jpeg_natural_order[k]]) == 0) {
-      r++;
-    } else {
-      /* if run length > 15, must emit special run-length-16 codes (0xF0) */
-      while (r > 15) {
-	if (! emit_bits(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0]))
-	  return FALSE;
-	r -= 16;
-      }
 
-      temp2 = temp;
-      if (temp < 0) {
-	temp = -temp;		/* temp is abs value of input */
-	/* This code assumes we are on a two's complement machine */
-	temp2--;
-      }
-      
-      /* Find the number of bits needed for the magnitude of the coefficient */
-      nbits = 1;		/* there must be at least one 1 bit */
-      while ((temp >>= 1))
-	nbits++;
-      /* Check for out-of-range coefficient values */
-      if (nbits > MAX_COEF_BITS)
-	ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
-      
-      /* Emit Huffman symbol for run length / number of bits */
-      i = (r << 4) + nbits;
-      if (! emit_bits(state, actbl->ehufco[i], actbl->ehufsi[i]))
-	return FALSE;
+  r = 0;                        /* r = run length of zeros */
 
-      /* Emit that number of bits of the value, if positive, */
-      /* or the complement of its magnitude, if negative. */
-      if (! emit_bits(state, (unsigned int) temp2, nbits))
-	return FALSE;
-      
-      r = 0;
-    }
-  }
+/* Manually unroll the k loop to eliminate the counter variable.  This
+ * improves performance greatly on systems with a limited number of
+ * registers (such as x86.)
+ */
+#define kloop(jpeg_natural_order_of_k) {  \
+  if ((temp = block[jpeg_natural_order_of_k]) == 0) { \
+    r++; \
+  } else { \
+    temp2 = temp; \
+    /* Branch-less absolute value, bitwise complement, etc., same as above */ \
+    temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \
+    temp ^= temp3; \
+    temp -= temp3; \
+    temp2 += temp3; \
+    nbits = JPEG_NBITS_NONZERO(temp); \
+    /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
+    while (r > 15) { \
+      EMIT_BITS(code_0xf0, size_0xf0) \
+      r -= 16; \
+    } \
+    /* Emit Huffman symbol for run length / number of bits */ \
+    temp3 = (r << 4) + nbits;  \
+    code = actbl->ehufco[temp3]; \
+    size = actbl->ehufsi[temp3]; \
+    EMIT_CODE(code, size) \
+    r = 0;  \
+  } \
+}
+
+  /* One iteration for each value in jpeg_natural_order[] */
+  kloop(1);   kloop(8);   kloop(16);  kloop(9);   kloop(2);   kloop(3);
+  kloop(10);  kloop(17);  kloop(24);  kloop(32);  kloop(25);  kloop(18);
+  kloop(11);  kloop(4);   kloop(5);   kloop(12);  kloop(19);  kloop(26);
+  kloop(33);  kloop(40);  kloop(48);  kloop(41);  kloop(34);  kloop(27);
+  kloop(20);  kloop(13);  kloop(6);   kloop(7);   kloop(14);  kloop(21);
+  kloop(28);  kloop(35);  kloop(42);  kloop(49);  kloop(56);  kloop(57);
+  kloop(50);  kloop(43);  kloop(36);  kloop(29);  kloop(22);  kloop(15);
+  kloop(23);  kloop(30);  kloop(37);  kloop(44);  kloop(51);  kloop(58);
+  kloop(59);  kloop(52);  kloop(45);  kloop(38);  kloop(31);  kloop(39);
+  kloop(46);  kloop(53);  kloop(60);  kloop(61);  kloop(54);  kloop(47);
+  kloop(55);  kloop(62);  kloop(63);
 
   /* If the last coef(s) were zero, emit an end-of-block code */
-  if (r > 0)
-    if (! emit_bits(state, actbl->ehufco[0], actbl->ehufsi[0]))
-      return FALSE;
+  if (r > 0) {
+    code = actbl->ehufco[0];
+    size = actbl->ehufsi[0];
+    EMIT_BITS(code, size)
+  }
+
+  state->cur.put_buffer = put_buffer;
+  state->cur.put_bits = put_bits;
+  STORE_BUFFER()
 
   return TRUE;
 }
@@ -490,7 +623,7 @@
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! emit_restart(&state, entropy->next_restart_num))
-	return FALSE;
+        return FALSE;
   }
 
   /* Encode the MCU data blocks */
@@ -498,9 +631,9 @@
     ci = cinfo->MCU_membership[blkn];
     compptr = cinfo->cur_comp_info[ci];
     if (! encode_one_block(&state,
-			   MCU_data[blkn][0], state.cur.last_dc_val[ci],
-			   entropy->dc_derived_tbls[compptr->dc_tbl_no],
-			   entropy->ac_derived_tbls[compptr->ac_tbl_no]))
+                           MCU_data[blkn][0], state.cur.last_dc_val[ci],
+                           entropy->dc_derived_tbls[compptr->dc_tbl_no],
+                           entropy->ac_derived_tbls[compptr->ac_tbl_no]))
       return FALSE;
     /* Update last_dc_val */
     state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
@@ -570,18 +703,18 @@
 
 LOCAL(void)
 htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
-		 long dc_counts[], long ac_counts[])
+                 long dc_counts[], long ac_counts[])
 {
   register int temp;
   register int nbits;
   register int k, r;
-  
+
   /* Encode the DC coefficient difference per section F.1.2.1 */
-  
+
   temp = block[0] - last_dc_val;
   if (temp < 0)
     temp = -temp;
-  
+
   /* Find the number of bits needed for the magnitude of the coefficient */
   nbits = 0;
   while (temp) {
@@ -596,36 +729,36 @@
 
   /* Count the Huffman symbol for the number of bits */
   dc_counts[nbits]++;
-  
+
   /* Encode the AC coefficients per section F.1.2.2 */
-  
-  r = 0;			/* r = run length of zeros */
-  
+
+  r = 0;                        /* r = run length of zeros */
+
   for (k = 1; k < DCTSIZE2; k++) {
     if ((temp = block[jpeg_natural_order[k]]) == 0) {
       r++;
     } else {
       /* if run length > 15, must emit special run-length-16 codes (0xF0) */
       while (r > 15) {
-	ac_counts[0xF0]++;
-	r -= 16;
+        ac_counts[0xF0]++;
+        r -= 16;
       }
-      
+
       /* Find the number of bits needed for the magnitude of the coefficient */
       if (temp < 0)
-	temp = -temp;
-      
+        temp = -temp;
+
       /* Find the number of bits needed for the magnitude of the coefficient */
-      nbits = 1;		/* there must be at least one 1 bit */
+      nbits = 1;                /* there must be at least one 1 bit */
       while ((temp >>= 1))
-	nbits++;
+        nbits++;
       /* Check for out-of-range coefficient values */
       if (nbits > MAX_COEF_BITS)
-	ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-      
+        ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
       /* Count Huffman symbol for run length / number of bits */
       ac_counts[(r << 4) + nbits]++;
-      
+
       r = 0;
     }
   }
@@ -653,7 +786,7 @@
     if (entropy->restarts_to_go == 0) {
       /* Re-initialize DC predictions to 0 */
       for (ci = 0; ci < cinfo->comps_in_scan; ci++)
-	entropy->saved.last_dc_val[ci] = 0;
+        entropy->saved.last_dc_val[ci] = 0;
       /* Update restart state */
       entropy->restarts_to_go = cinfo->restart_interval;
     }
@@ -664,8 +797,8 @@
     ci = cinfo->MCU_membership[blkn];
     compptr = cinfo->cur_comp_info[ci];
     htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],
-		    entropy->dc_count_ptrs[compptr->dc_tbl_no],
-		    entropy->ac_count_ptrs[compptr->ac_tbl_no]);
+                    entropy->dc_count_ptrs[compptr->dc_tbl_no],
+                    entropy->ac_count_ptrs[compptr->ac_tbl_no]);
     entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];
   }
 
@@ -704,22 +837,22 @@
 GLOBAL(void)
 jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
 {
-#define MAX_CLEN 32		/* assumed maximum initial code length */
-  UINT8 bits[MAX_CLEN+1];	/* bits[k] = # of symbols with code length k */
-  int codesize[257];		/* codesize[k] = code length of symbol k */
-  int others[257];		/* next symbol in current branch of tree */
+#define MAX_CLEN 32             /* assumed maximum initial code length */
+  UINT8 bits[MAX_CLEN+1];       /* bits[k] = # of symbols with code length k */
+  int codesize[257];            /* codesize[k] = code length of symbol k */
+  int others[257];              /* next symbol in current branch of tree */
   int c1, c2;
   int p, i, j;
   long v;
 
   /* This algorithm is explained in section K.2 of the JPEG standard */
 
-  MEMZERO(bits, SIZEOF(bits));
-  MEMZERO(codesize, SIZEOF(codesize));
+  MEMZERO(bits, sizeof(bits));
+  MEMZERO(codesize, sizeof(codesize));
   for (i = 0; i < 257; i++)
-    others[i] = -1;		/* init links to empty */
-  
-  freq[256] = 1;		/* make sure 256 has a nonzero count */
+    others[i] = -1;             /* init links to empty */
+
+  freq[256] = 1;                /* make sure 256 has a nonzero count */
   /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
    * that no real symbol is given code-value of all ones, because 256
    * will be placed last in the largest codeword category.
@@ -734,8 +867,8 @@
     v = 1000000000L;
     for (i = 0; i <= 256; i++) {
       if (freq[i] && freq[i] <= v) {
-	v = freq[i];
-	c1 = i;
+        v = freq[i];
+        c1 = i;
       }
     }
 
@@ -745,15 +878,15 @@
     v = 1000000000L;
     for (i = 0; i <= 256; i++) {
       if (freq[i] && freq[i] <= v && i != c1) {
-	v = freq[i];
-	c2 = i;
+        v = freq[i];
+        c2 = i;
       }
     }
 
     /* Done if we've merged everything into one frequency */
     if (c2 < 0)
       break;
-    
+
     /* Else merge the two counts/trees */
     freq[c1] += freq[c2];
     freq[c2] = 0;
@@ -764,9 +897,9 @@
       c1 = others[c1];
       codesize[c1]++;
     }
-    
-    others[c1] = c2;		/* chain c2 onto c1's tree branch */
-    
+
+    others[c1] = c2;            /* chain c2 onto c1's tree branch */
+
     /* Increment the codesize of everything in c2's tree branch */
     codesize[c2]++;
     while (others[c2] >= 0) {
@@ -781,7 +914,7 @@
       /* The JPEG standard seems to think that this can't happen, */
       /* but I'm paranoid... */
       if (codesize[i] > MAX_CLEN)
-	ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
+        ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
 
       bits[codesize[i]]++;
     }
@@ -797,28 +930,28 @@
    * shortest nonzero BITS entry is converted into a prefix for two code words
    * one bit longer.
    */
-  
+
   for (i = MAX_CLEN; i > 16; i--) {
     while (bits[i] > 0) {
-      j = i - 2;		/* find length of new prefix to be used */
+      j = i - 2;                /* find length of new prefix to be used */
       while (bits[j] == 0)
-	j--;
-      
-      bits[i] -= 2;		/* remove two symbols */
-      bits[i-1]++;		/* one goes in this length */
-      bits[j+1] += 2;		/* two new symbols in this length */
-      bits[j]--;		/* symbol of this length is now a prefix */
+        j--;
+
+      bits[i] -= 2;             /* remove two symbols */
+      bits[i-1]++;              /* one goes in this length */
+      bits[j+1] += 2;           /* two new symbols in this length */
+      bits[j]--;                /* symbol of this length is now a prefix */
     }
   }
 
   /* Remove the count for the pseudo-symbol 256 from the largest codelength */
-  while (bits[i] == 0)		/* find largest codelength still in use */
+  while (bits[i] == 0)          /* find largest codelength still in use */
     i--;
   bits[i]--;
-  
+
   /* Return final symbol counts (only for lengths 0..16) */
-  MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
-  
+  MEMCOPY(htbl->bits, bits, sizeof(htbl->bits));
+
   /* Return a list of the symbols sorted by code length */
   /* It's not real clear to me why we don't need to consider the codelength
    * changes made above, but the JPEG spec seems to think this works.
@@ -827,8 +960,8 @@
   for (i = 1; i <= MAX_CLEN; i++) {
     for (j = 0; j <= 255; j++) {
       if (codesize[j] == i) {
-	htbl->huffval[p] = (UINT8) j;
-	p++;
+        htbl->huffval[p] = (UINT8) j;
+        p++;
       }
     }
   }
@@ -855,8 +988,8 @@
   /* It's important not to apply jpeg_gen_optimal_table more than once
    * per table, because it clobbers the input frequency counts!
    */
-  MEMZERO(did_dc, SIZEOF(did_dc));
-  MEMZERO(did_ac, SIZEOF(did_ac));
+  MEMZERO(did_dc, sizeof(did_dc));
+  MEMZERO(did_ac, sizeof(did_ac));
 
   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
     compptr = cinfo->cur_comp_info[ci];
@@ -865,14 +998,14 @@
     if (! did_dc[dctbl]) {
       htblptr = & cinfo->dc_huff_tbl_ptrs[dctbl];
       if (*htblptr == NULL)
-	*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+        *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
       jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]);
       did_dc[dctbl] = TRUE;
     }
     if (! did_ac[actbl]) {
       htblptr = & cinfo->ac_huff_tbl_ptrs[actbl];
       if (*htblptr == NULL)
-	*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+        *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
       jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]);
       did_ac[actbl] = TRUE;
     }
@@ -895,7 +1028,7 @@
 
   entropy = (huff_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(huff_entropy_encoder));
+                                sizeof(huff_entropy_encoder));
   cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
   entropy->pub.start_pass = start_pass_huff;
 
diff --git a/jchuff.h b/jchuff.h
index a9599fc..d49a992 100644
--- a/jchuff.h
+++ b/jchuff.h
@@ -1,8 +1,10 @@
 /*
  * jchuff.h
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains declarations for Huffman entropy encoding routines
@@ -25,23 +27,16 @@
 /* Derived data constructed for each Huffman table */
 
 typedef struct {
-  unsigned int ehufco[256];	/* code for each symbol */
-  char ehufsi[256];		/* length of code for each symbol */
+  unsigned int ehufco[256];     /* code for each symbol */
+  char ehufsi[256];             /* length of code for each symbol */
   /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */
 } c_derived_tbl;
 
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_make_c_derived_tbl	jMkCDerived
-#define jpeg_gen_optimal_table	jGenOptTbl
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
 /* Expand a Huffman table definition into the derived format */
 EXTERN(void) jpeg_make_c_derived_tbl
-	JPP((j_compress_ptr cinfo, boolean isDC, int tblno,
-	     c_derived_tbl ** pdtbl));
+        (j_compress_ptr cinfo, boolean isDC, int tblno,
+         c_derived_tbl ** pdtbl);
 
 /* Generate an optimal table definition given the specified counts */
 EXTERN(void) jpeg_gen_optimal_table
-	JPP((j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]));
+        (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]);
diff --git a/jcinit.c b/jcinit.c
index 5efffe3..347cf6d 100644
--- a/jcinit.c
+++ b/jcinit.c
@@ -42,7 +42,11 @@
   jinit_forward_dct(cinfo);
   /* Entropy encoding: either Huffman or arithmetic coding. */
   if (cinfo->arith_code) {
+#ifdef C_ARITH_CODING_SUPPORTED
+    jinit_arith_encoder(cinfo);
+#else
     ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+#endif
   } else {
     if (cinfo->progressive_mode) {
 #ifdef C_PROGRESSIVE_SUPPORTED
@@ -56,7 +60,7 @@
 
   /* Need a full-image coefficient buffer in any multi-pass mode. */
   jinit_c_coef_controller(cinfo,
-		(boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding));
+                (boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding));
   jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
 
   jinit_marker_writer(cinfo);
diff --git a/jcmainct.c b/jcmainct.c
index e0279a7..fc4014b 100644
--- a/jcmainct.c
+++ b/jcmainct.c
@@ -1,8 +1,10 @@
 /*
  * jcmainct.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains the main buffer controller for compression.
@@ -28,10 +30,10 @@
 typedef struct {
   struct jpeg_c_main_controller pub; /* public fields */
 
-  JDIMENSION cur_iMCU_row;	/* number of current iMCU row */
-  JDIMENSION rowgroup_ctr;	/* counts row groups received in iMCU row */
-  boolean suspended;		/* remember if we suspended output */
-  J_BUF_MODE pass_mode;		/* current operating mode */
+  JDIMENSION cur_iMCU_row;      /* number of current iMCU row */
+  JDIMENSION rowgroup_ctr;      /* counts row groups received in iMCU row */
+  boolean suspended;            /* remember if we suspended output */
+  J_BUF_MODE pass_mode;         /* current operating mode */
 
   /* If using just a strip buffer, this points to the entire set of buffers
    * (we allocate one for each component).  In the full-image case, this
@@ -52,12 +54,12 @@
 
 /* Forward declarations */
 METHODDEF(void) process_data_simple_main
-	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
-	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+        (j_compress_ptr cinfo, JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+         JDIMENSION in_rows_avail);
 #ifdef FULL_MAIN_BUFFER_SUPPORTED
 METHODDEF(void) process_data_buffer_main
-	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
-	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+        (j_compress_ptr cinfo, JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+         JDIMENSION in_rows_avail);
 #endif
 
 
@@ -68,32 +70,32 @@
 METHODDEF(void)
 start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
 
   /* Do nothing in raw-data mode. */
   if (cinfo->raw_data_in)
     return;
 
-  main->cur_iMCU_row = 0;	/* initialize counters */
-  main->rowgroup_ctr = 0;
-  main->suspended = FALSE;
-  main->pass_mode = pass_mode;	/* save mode for use by process_data */
+  main_ptr->cur_iMCU_row = 0;   /* initialize counters */
+  main_ptr->rowgroup_ctr = 0;
+  main_ptr->suspended = FALSE;
+  main_ptr->pass_mode = pass_mode;      /* save mode for use by process_data */
 
   switch (pass_mode) {
   case JBUF_PASS_THRU:
 #ifdef FULL_MAIN_BUFFER_SUPPORTED
-    if (main->whole_image[0] != NULL)
+    if (main_ptr->whole_image[0] != NULL)
       ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
 #endif
-    main->pub.process_data = process_data_simple_main;
+    main_ptr->pub.process_data = process_data_simple_main;
     break;
 #ifdef FULL_MAIN_BUFFER_SUPPORTED
   case JBUF_SAVE_SOURCE:
   case JBUF_CRANK_DEST:
   case JBUF_SAVE_AND_PASS:
-    if (main->whole_image[0] == NULL)
+    if (main_ptr->whole_image[0] == NULL)
       ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    main->pub.process_data = process_data_buffer_main;
+    main_ptr->pub.process_data = process_data_buffer_main;
     break;
 #endif
   default:
@@ -111,49 +113,49 @@
 
 METHODDEF(void)
 process_data_simple_main (j_compress_ptr cinfo,
-			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			  JDIMENSION in_rows_avail)
+                          JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+                          JDIMENSION in_rows_avail)
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
 
-  while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
+  while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) {
     /* Read input data if we haven't filled the main buffer yet */
-    if (main->rowgroup_ctr < DCTSIZE)
+    if (main_ptr->rowgroup_ctr < DCTSIZE)
       (*cinfo->prep->pre_process_data) (cinfo,
-					input_buf, in_row_ctr, in_rows_avail,
-					main->buffer, &main->rowgroup_ctr,
-					(JDIMENSION) DCTSIZE);
+                                        input_buf, in_row_ctr, in_rows_avail,
+                                        main_ptr->buffer, &main_ptr->rowgroup_ctr,
+                                        (JDIMENSION) DCTSIZE);
 
     /* If we don't have a full iMCU row buffered, return to application for
      * more data.  Note that preprocessor will always pad to fill the iMCU row
      * at the bottom of the image.
      */
-    if (main->rowgroup_ctr != DCTSIZE)
+    if (main_ptr->rowgroup_ctr != DCTSIZE)
       return;
 
     /* Send the completed row to the compressor */
-    if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
+    if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) {
       /* If compressor did not consume the whole row, then we must need to
        * suspend processing and return to the application.  In this situation
        * we pretend we didn't yet consume the last input row; otherwise, if
        * it happened to be the last row of the image, the application would
        * think we were done.
        */
-      if (! main->suspended) {
-	(*in_row_ctr)--;
-	main->suspended = TRUE;
+      if (! main_ptr->suspended) {
+        (*in_row_ctr)--;
+        main_ptr->suspended = TRUE;
       }
       return;
     }
     /* We did finish the row.  Undo our little suspension hack if a previous
      * call suspended; then mark the main buffer empty.
      */
-    if (main->suspended) {
+    if (main_ptr->suspended) {
       (*in_row_ctr)++;
-      main->suspended = FALSE;
+      main_ptr->suspended = FALSE;
     }
-    main->rowgroup_ctr = 0;
-    main->cur_iMCU_row++;
+    main_ptr->rowgroup_ctr = 0;
+    main_ptr->cur_iMCU_row++;
   }
 }
 
@@ -167,28 +169,28 @@
 
 METHODDEF(void)
 process_data_buffer_main (j_compress_ptr cinfo,
-			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			  JDIMENSION in_rows_avail)
+                          JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+                          JDIMENSION in_rows_avail)
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
   int ci;
   jpeg_component_info *compptr;
-  boolean writing = (main->pass_mode != JBUF_CRANK_DEST);
+  boolean writing = (main_ptr->pass_mode != JBUF_CRANK_DEST);
 
-  while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
+  while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) {
     /* Realign the virtual buffers if at the start of an iMCU row. */
-    if (main->rowgroup_ctr == 0) {
+    if (main_ptr->rowgroup_ctr == 0) {
       for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	   ci++, compptr++) {
-	main->buffer[ci] = (*cinfo->mem->access_virt_sarray)
-	  ((j_common_ptr) cinfo, main->whole_image[ci],
-	   main->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
-	   (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing);
+           ci++, compptr++) {
+        main_ptr->buffer[ci] = (*cinfo->mem->access_virt_sarray)
+          ((j_common_ptr) cinfo, main_ptr->whole_image[ci],
+           main_ptr->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
+           (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing);
       }
       /* In a read pass, pretend we just read some source data. */
       if (! writing) {
-	*in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE;
-	main->rowgroup_ctr = DCTSIZE;
+        *in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE;
+        main_ptr->rowgroup_ctr = DCTSIZE;
       }
     }
 
@@ -196,41 +198,41 @@
     /* Note: preprocessor will pad if necessary to fill the last iMCU row. */
     if (writing) {
       (*cinfo->prep->pre_process_data) (cinfo,
-					input_buf, in_row_ctr, in_rows_avail,
-					main->buffer, &main->rowgroup_ctr,
-					(JDIMENSION) DCTSIZE);
+                                        input_buf, in_row_ctr, in_rows_avail,
+                                        main_ptr->buffer, &main_ptr->rowgroup_ctr,
+                                        (JDIMENSION) DCTSIZE);
       /* Return to application if we need more data to fill the iMCU row. */
-      if (main->rowgroup_ctr < DCTSIZE)
-	return;
+      if (main_ptr->rowgroup_ctr < DCTSIZE)
+        return;
     }
 
     /* Emit data, unless this is a sink-only pass. */
-    if (main->pass_mode != JBUF_SAVE_SOURCE) {
-      if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
-	/* If compressor did not consume the whole row, then we must need to
-	 * suspend processing and return to the application.  In this situation
-	 * we pretend we didn't yet consume the last input row; otherwise, if
-	 * it happened to be the last row of the image, the application would
-	 * think we were done.
-	 */
-	if (! main->suspended) {
-	  (*in_row_ctr)--;
-	  main->suspended = TRUE;
-	}
-	return;
+    if (main_ptr->pass_mode != JBUF_SAVE_SOURCE) {
+      if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) {
+        /* If compressor did not consume the whole row, then we must need to
+         * suspend processing and return to the application.  In this situation
+         * we pretend we didn't yet consume the last input row; otherwise, if
+         * it happened to be the last row of the image, the application would
+         * think we were done.
+         */
+        if (! main_ptr->suspended) {
+          (*in_row_ctr)--;
+          main_ptr->suspended = TRUE;
+        }
+        return;
       }
       /* We did finish the row.  Undo our little suspension hack if a previous
        * call suspended; then mark the main buffer empty.
        */
-      if (main->suspended) {
-	(*in_row_ctr)++;
-	main->suspended = FALSE;
+      if (main_ptr->suspended) {
+        (*in_row_ctr)++;
+        main_ptr->suspended = FALSE;
       }
     }
 
     /* If get here, we are done with this iMCU row.  Mark buffer empty. */
-    main->rowgroup_ctr = 0;
-    main->cur_iMCU_row++;
+    main_ptr->rowgroup_ctr = 0;
+    main_ptr->cur_iMCU_row++;
   }
 }
 
@@ -244,15 +246,15 @@
 GLOBAL(void)
 jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
 {
-  my_main_ptr main;
+  my_main_ptr main_ptr;
   int ci;
   jpeg_component_info *compptr;
 
-  main = (my_main_ptr)
+  main_ptr = (my_main_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_main_controller));
-  cinfo->main = (struct jpeg_c_main_controller *) main;
-  main->pub.start_pass = start_pass_main;
+                                sizeof(my_main_controller));
+  cinfo->main = (struct jpeg_c_main_controller *) main_ptr;
+  main_ptr->pub.start_pass = start_pass_main;
 
   /* We don't need to create a buffer in raw-data mode. */
   if (cinfo->raw_data_in)
@@ -266,28 +268,28 @@
     /* Allocate a full-image virtual array for each component */
     /* Note we pad the bottom to a multiple of the iMCU height */
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      main->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 compptr->width_in_blocks * DCTSIZE,
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor) * DCTSIZE,
-	 (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
+         ci++, compptr++) {
+      main_ptr->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
+        ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+         compptr->width_in_blocks * DCTSIZE,
+         (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+                                (long) compptr->v_samp_factor) * DCTSIZE,
+         (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
     }
 #else
     ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
 #endif
   } else {
 #ifdef FULL_MAIN_BUFFER_SUPPORTED
-    main->whole_image[0] = NULL; /* flag for no virtual arrays */
+    main_ptr->whole_image[0] = NULL; /* flag for no virtual arrays */
 #endif
     /* Allocate a strip buffer for each component */
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      main->buffer[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 compptr->width_in_blocks * DCTSIZE,
-	 (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
+         ci++, compptr++) {
+      main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray)
+        ((j_common_ptr) cinfo, JPOOL_IMAGE,
+         compptr->width_in_blocks * DCTSIZE,
+         (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
     }
   }
 }
diff --git a/jcmarker.c b/jcmarker.c
index 3d1e6c6..aac7dbd 100644
--- a/jcmarker.c
+++ b/jcmarker.c
@@ -1,8 +1,11 @@
 /*
  * jcmarker.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2003-2010 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to write JPEG datastream markers.
@@ -11,31 +14,32 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 
-typedef enum {			/* JPEG marker codes */
+typedef enum {                  /* JPEG marker codes */
   M_SOF0  = 0xc0,
   M_SOF1  = 0xc1,
   M_SOF2  = 0xc2,
   M_SOF3  = 0xc3,
-  
+
   M_SOF5  = 0xc5,
   M_SOF6  = 0xc6,
   M_SOF7  = 0xc7,
-  
+
   M_JPG   = 0xc8,
   M_SOF9  = 0xc9,
   M_SOF10 = 0xca,
   M_SOF11 = 0xcb,
-  
+
   M_SOF13 = 0xcd,
   M_SOF14 = 0xce,
   M_SOF15 = 0xcf,
-  
+
   M_DHT   = 0xc4,
-  
+
   M_DAC   = 0xcc,
-  
+
   M_RST0  = 0xd0,
   M_RST1  = 0xd1,
   M_RST2  = 0xd2,
@@ -44,7 +48,7 @@
   M_RST5  = 0xd5,
   M_RST6  = 0xd6,
   M_RST7  = 0xd7,
-  
+
   M_SOI   = 0xd8,
   M_EOI   = 0xd9,
   M_SOS   = 0xda,
@@ -53,7 +57,7 @@
   M_DRI   = 0xdd,
   M_DHP   = 0xde,
   M_EXP   = 0xdf,
-  
+
   M_APP0  = 0xe0,
   M_APP1  = 0xe1,
   M_APP2  = 0xe2,
@@ -70,13 +74,13 @@
   M_APP13 = 0xed,
   M_APP14 = 0xee,
   M_APP15 = 0xef,
-  
+
   M_JPG0  = 0xf0,
   M_JPG13 = 0xfd,
   M_COM   = 0xfe,
-  
+
   M_TEM   = 0x01,
-  
+
   M_ERROR = 0x100
 } JPEG_MARKER;
 
@@ -169,7 +173,7 @@
       /* The table entries must be emitted in zigzag order. */
       unsigned int qval = qtbl->quantval[jpeg_natural_order[i]];
       if (prec)
-	emit_byte(cinfo, (int) (qval >> 8));
+        emit_byte(cinfo, (int) (qval >> 8));
       emit_byte(cinfo, (int) (qval & 0xFF));
     }
 
@@ -186,33 +190,33 @@
 {
   JHUFF_TBL * htbl;
   int length, i;
-  
+
   if (is_ac) {
     htbl = cinfo->ac_huff_tbl_ptrs[index];
-    index += 0x10;		/* output index has AC bit set */
+    index += 0x10;              /* output index has AC bit set */
   } else {
     htbl = cinfo->dc_huff_tbl_ptrs[index];
   }
 
   if (htbl == NULL)
     ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index);
-  
+
   if (! htbl->sent_table) {
     emit_marker(cinfo, M_DHT);
-    
+
     length = 0;
     for (i = 1; i <= 16; i++)
       length += htbl->bits[i];
-    
+
     emit_2bytes(cinfo, length + 2 + 1 + 16);
     emit_byte(cinfo, index);
-    
+
     for (i = 1; i <= 16; i++)
       emit_byte(cinfo, htbl->bits[i]);
-    
+
     for (i = 0; i < length; i++)
       emit_byte(cinfo, htbl->huffval[i]);
-    
+
     htbl->sent_table = TRUE;
   }
 }
@@ -229,32 +233,38 @@
   char ac_in_use[NUM_ARITH_TBLS];
   int length, i;
   jpeg_component_info *compptr;
-  
+
   for (i = 0; i < NUM_ARITH_TBLS; i++)
     dc_in_use[i] = ac_in_use[i] = 0;
-  
+
   for (i = 0; i < cinfo->comps_in_scan; i++) {
     compptr = cinfo->cur_comp_info[i];
-    dc_in_use[compptr->dc_tbl_no] = 1;
-    ac_in_use[compptr->ac_tbl_no] = 1;
+    /* DC needs no table for refinement scan */
+    if (cinfo->Ss == 0 && cinfo->Ah == 0)
+      dc_in_use[compptr->dc_tbl_no] = 1;
+    /* AC needs no table when not present */
+    if (cinfo->Se)
+      ac_in_use[compptr->ac_tbl_no] = 1;
   }
-  
+
   length = 0;
   for (i = 0; i < NUM_ARITH_TBLS; i++)
     length += dc_in_use[i] + ac_in_use[i];
-  
-  emit_marker(cinfo, M_DAC);
-  
-  emit_2bytes(cinfo, length*2 + 2);
-  
-  for (i = 0; i < NUM_ARITH_TBLS; i++) {
-    if (dc_in_use[i]) {
-      emit_byte(cinfo, i);
-      emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
-    }
-    if (ac_in_use[i]) {
-      emit_byte(cinfo, i + 0x10);
-      emit_byte(cinfo, cinfo->arith_ac_K[i]);
+
+  if (length) {
+    emit_marker(cinfo, M_DAC);
+
+    emit_2bytes(cinfo, length*2 + 2);
+
+    for (i = 0; i < NUM_ARITH_TBLS; i++) {
+      if (dc_in_use[i]) {
+        emit_byte(cinfo, i);
+        emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
+      }
+      if (ac_in_use[i]) {
+        emit_byte(cinfo, i + 0x10);
+        emit_byte(cinfo, cinfo->arith_ac_K[i]);
+      }
     }
   }
 #endif /* C_ARITH_CODING_SUPPORTED */
@@ -266,8 +276,8 @@
 /* Emit a DRI marker */
 {
   emit_marker(cinfo, M_DRI);
-  
-  emit_2bytes(cinfo, 4);	/* fixed length */
+
+  emit_2bytes(cinfo, 4);        /* fixed length */
 
   emit_2bytes(cinfo, (int) cinfo->restart_interval);
 }
@@ -279,19 +289,19 @@
 {
   int ci;
   jpeg_component_info *compptr;
-  
+
   emit_marker(cinfo, code);
-  
+
   emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
 
   /* Make sure image isn't bigger than SOF field can handle */
-  if ((long) cinfo->image_height > 65535L ||
-      (long) cinfo->image_width > 65535L)
+  if ((long) cinfo->_jpeg_height > 65535L ||
+      (long) cinfo->_jpeg_width > 65535L)
     ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535);
 
   emit_byte(cinfo, cinfo->data_precision);
-  emit_2bytes(cinfo, (int) cinfo->image_height);
-  emit_2bytes(cinfo, (int) cinfo->image_width);
+  emit_2bytes(cinfo, (int) cinfo->_jpeg_height);
+  emit_2bytes(cinfo, (int) cinfo->_jpeg_width);
 
   emit_byte(cinfo, cinfo->num_components);
 
@@ -310,32 +320,26 @@
 {
   int i, td, ta;
   jpeg_component_info *compptr;
-  
+
   emit_marker(cinfo, M_SOS);
-  
+
   emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */
-  
+
   emit_byte(cinfo, cinfo->comps_in_scan);
-  
+
   for (i = 0; i < cinfo->comps_in_scan; i++) {
     compptr = cinfo->cur_comp_info[i];
     emit_byte(cinfo, compptr->component_id);
-    td = compptr->dc_tbl_no;
-    ta = compptr->ac_tbl_no;
-    if (cinfo->progressive_mode) {
-      /* Progressive mode: only DC or only AC tables are used in one scan;
-       * furthermore, Huffman coding of DC refinement uses no table at all.
-       * We emit 0 for unused field(s); this is recommended by the P&M text
-       * but does not seem to be specified in the standard.
-       */
-      if (cinfo->Ss == 0) {
-	ta = 0;			/* DC scan */
-	if (cinfo->Ah != 0 && !cinfo->arith_code)
-	  td = 0;		/* no DC table either */
-      } else {
-	td = 0;			/* AC scan */
-      }
-    }
+
+    /* We emit 0 for unused field(s); this is recommended by the P&M text
+     * but does not seem to be specified in the standard.
+     */
+
+    /* DC needs no table for refinement scan */
+    td = cinfo->Ss == 0 && cinfo->Ah == 0 ? compptr->dc_tbl_no : 0;
+    /* AC needs no table when not present */
+    ta = cinfo->Se ? compptr->ac_tbl_no : 0;
+
     emit_byte(cinfo, (td << 4) + ta);
   }
 
@@ -350,22 +354,22 @@
 /* Emit a JFIF-compliant APP0 marker */
 {
   /*
-   * Length of APP0 block	(2 bytes)
-   * Block ID			(4 bytes - ASCII "JFIF")
-   * Zero byte			(1 byte to terminate the ID string)
-   * Version Major, Minor	(2 bytes - major first)
-   * Units			(1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
-   * Xdpu			(2 bytes - dots per unit horizontal)
-   * Ydpu			(2 bytes - dots per unit vertical)
-   * Thumbnail X size		(1 byte)
-   * Thumbnail Y size		(1 byte)
+   * Length of APP0 block       (2 bytes)
+   * Block ID                   (4 bytes - ASCII "JFIF")
+   * Zero byte                  (1 byte to terminate the ID string)
+   * Version Major, Minor       (2 bytes - major first)
+   * Units                      (1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
+   * Xdpu                       (2 bytes - dots per unit horizontal)
+   * Ydpu                       (2 bytes - dots per unit vertical)
+   * Thumbnail X size           (1 byte)
+   * Thumbnail Y size           (1 byte)
    */
-  
+
   emit_marker(cinfo, M_APP0);
-  
+
   emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */
 
-  emit_byte(cinfo, 0x4A);	/* Identifier: ASCII "JFIF" */
+  emit_byte(cinfo, 0x4A);       /* Identifier: ASCII "JFIF" */
   emit_byte(cinfo, 0x46);
   emit_byte(cinfo, 0x49);
   emit_byte(cinfo, 0x46);
@@ -375,7 +379,7 @@
   emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */
   emit_2bytes(cinfo, (int) cinfo->X_density);
   emit_2bytes(cinfo, (int) cinfo->Y_density);
-  emit_byte(cinfo, 0);		/* No thumbnail image */
+  emit_byte(cinfo, 0);          /* No thumbnail image */
   emit_byte(cinfo, 0);
 }
 
@@ -385,12 +389,12 @@
 /* Emit an Adobe APP14 marker */
 {
   /*
-   * Length of APP14 block	(2 bytes)
-   * Block ID			(5 bytes - ASCII "Adobe")
-   * Version Number		(2 bytes - currently 100)
-   * Flags0			(2 bytes - currently 0)
-   * Flags1			(2 bytes - currently 0)
-   * Color transform		(1 byte)
+   * Length of APP14 block      (2 bytes)
+   * Block ID                   (5 bytes - ASCII "Adobe")
+   * Version Number             (2 bytes - currently 100)
+   * Flags0                     (2 bytes - currently 0)
+   * Flags1                     (2 bytes - currently 0)
+   * Color transform            (1 byte)
    *
    * Although Adobe TN 5116 mentions Version = 101, all the Adobe files
    * now in circulation seem to use Version = 100, so that's what we write.
@@ -399,28 +403,28 @@
    * YCbCr, 2 if it's YCCK, 0 otherwise.  Adobe's definition has to do with
    * whether the encoder performed a transformation, which is pretty useless.
    */
-  
+
   emit_marker(cinfo, M_APP14);
-  
+
   emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */
 
-  emit_byte(cinfo, 0x41);	/* Identifier: ASCII "Adobe" */
+  emit_byte(cinfo, 0x41);       /* Identifier: ASCII "Adobe" */
   emit_byte(cinfo, 0x64);
   emit_byte(cinfo, 0x6F);
   emit_byte(cinfo, 0x62);
   emit_byte(cinfo, 0x65);
-  emit_2bytes(cinfo, 100);	/* Version */
-  emit_2bytes(cinfo, 0);	/* Flags0 */
-  emit_2bytes(cinfo, 0);	/* Flags1 */
+  emit_2bytes(cinfo, 100);      /* Version */
+  emit_2bytes(cinfo, 0);        /* Flags0 */
+  emit_2bytes(cinfo, 0);        /* Flags1 */
   switch (cinfo->jpeg_color_space) {
   case JCS_YCbCr:
-    emit_byte(cinfo, 1);	/* Color transform = 1 */
+    emit_byte(cinfo, 1);        /* Color transform = 1 */
     break;
   case JCS_YCCK:
-    emit_byte(cinfo, 2);	/* Color transform = 2 */
+    emit_byte(cinfo, 2);        /* Color transform = 2 */
     break;
   default:
-    emit_byte(cinfo, 0);	/* Color transform = 0 */
+    emit_byte(cinfo, 0);        /* Color transform = 0 */
     break;
   }
 }
@@ -438,12 +442,12 @@
 write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
 /* Emit an arbitrary marker header */
 {
-  if (datalen > (unsigned int) 65533)		/* safety check */
+  if (datalen > (unsigned int) 65533)           /* safety check */
     ERREXIT(cinfo, JERR_BAD_LENGTH);
 
   emit_marker(cinfo, (JPEG_MARKER) marker);
 
-  emit_2bytes(cinfo, (int) (datalen + 2));	/* total length */
+  emit_2bytes(cinfo, (int) (datalen + 2));      /* total length */
 }
 
 METHODDEF(void)
@@ -470,12 +474,12 @@
 {
   my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
 
-  emit_marker(cinfo, M_SOI);	/* first the SOI */
+  emit_marker(cinfo, M_SOI);    /* first the SOI */
 
   /* SOI is defined to reset restart interval to 0 */
   marker->last_restart_interval = 0;
 
-  if (cinfo->write_JFIF_header)	/* next an optional JFIF APP0 */
+  if (cinfo->write_JFIF_header) /* next an optional JFIF APP0 */
     emit_jfif_app0(cinfo);
   if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */
     emit_adobe_app14(cinfo);
@@ -496,7 +500,7 @@
   int ci, prec;
   boolean is_baseline;
   jpeg_component_info *compptr;
-  
+
   /* Emit DQT for each quantization table.
    * Note that emit_dqt() suppresses any duplicate tables.
    */
@@ -516,9 +520,9 @@
   } else {
     is_baseline = TRUE;
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
+         ci++, compptr++) {
       if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1)
-	is_baseline = FALSE;
+        is_baseline = FALSE;
     }
     if (prec && is_baseline) {
       is_baseline = FALSE;
@@ -529,14 +533,17 @@
 
   /* Emit the proper SOF marker */
   if (cinfo->arith_code) {
-    emit_sof(cinfo, M_SOF9);	/* SOF code for arithmetic coding */
+    if (cinfo->progressive_mode)
+      emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */
+    else
+      emit_sof(cinfo, M_SOF9);  /* SOF code for sequential arithmetic */
   } else {
     if (cinfo->progressive_mode)
-      emit_sof(cinfo, M_SOF2);	/* SOF code for progressive Huffman */
+      emit_sof(cinfo, M_SOF2);  /* SOF code for progressive Huffman */
     else if (is_baseline)
-      emit_sof(cinfo, M_SOF0);	/* SOF code for baseline implementation */
+      emit_sof(cinfo, M_SOF0);  /* SOF code for baseline implementation */
     else
-      emit_sof(cinfo, M_SOF1);	/* SOF code for non-baseline Huffman file */
+      emit_sof(cinfo, M_SOF1);  /* SOF code for non-baseline Huffman file */
   }
 }
 
@@ -566,19 +573,12 @@
      */
     for (i = 0; i < cinfo->comps_in_scan; i++) {
       compptr = cinfo->cur_comp_info[i];
-      if (cinfo->progressive_mode) {
-	/* Progressive mode: only DC or only AC tables are used in one scan */
-	if (cinfo->Ss == 0) {
-	  if (cinfo->Ah == 0)	/* DC needs no table for refinement scan */
-	    emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
-	} else {
-	  emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
-	}
-      } else {
-	/* Sequential mode: need both DC and AC tables */
-	emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
-	emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
-      }
+      /* DC needs no table for refinement scan */
+      if (cinfo->Ss == 0 && cinfo->Ah == 0)
+        emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
+      /* AC needs no table when not present */
+      if (cinfo->Se)
+        emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
     }
   }
 
@@ -627,9 +627,9 @@
   if (! cinfo->arith_code) {
     for (i = 0; i < NUM_HUFF_TBLS; i++) {
       if (cinfo->dc_huff_tbl_ptrs[i] != NULL)
-	emit_dht(cinfo, i, FALSE);
+        emit_dht(cinfo, i, FALSE);
       if (cinfo->ac_huff_tbl_ptrs[i] != NULL)
-	emit_dht(cinfo, i, TRUE);
+        emit_dht(cinfo, i, TRUE);
     }
   }
 
@@ -649,7 +649,7 @@
   /* Create the subobject */
   marker = (my_marker_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_marker_writer));
+                                sizeof(my_marker_writer));
   cinfo->marker = (struct jpeg_marker_writer *) marker;
   /* Initialize method pointers */
   marker->pub.write_file_header = write_file_header;
diff --git a/jcmaster.c b/jcmaster.c
index aab4020..ea13d0d 100644
--- a/jcmaster.c
+++ b/jcmaster.c
@@ -1,38 +1,42 @@
 /*
  * jcmaster.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2003-2010 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains master control logic for the JPEG compressor.
  * These routines are concerned with parameter validation, initial setup,
- * and inter-pass control (determining the number of passes and the work 
+ * and inter-pass control (determining the number of passes and the work
  * to be done in each pass).
  */
 
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 
 /* Private state */
 
 typedef enum {
-	main_pass,		/* input data, also do first output step */
-	huff_opt_pass,		/* Huffman code optimization pass */
-	output_pass		/* data output pass */
+        main_pass,              /* input data, also do first output step */
+        huff_opt_pass,          /* Huffman code optimization pass */
+        output_pass             /* data output pass */
 } c_pass_type;
 
 typedef struct {
-  struct jpeg_comp_master pub;	/* public fields */
+  struct jpeg_comp_master pub;  /* public fields */
 
-  c_pass_type pass_type;	/* the type of the current pass */
+  c_pass_type pass_type;        /* the type of the current pass */
 
-  int pass_number;		/* # of passes completed */
-  int total_passes;		/* total # of passes needed */
+  int pass_number;              /* # of passes completed */
+  int total_passes;             /* total # of passes needed */
 
-  int scan_number;		/* current index in scan_info[] */
+  int scan_number;              /* current index in scan_info[] */
 } my_comp_master;
 
 typedef my_comp_master * my_master_ptr;
@@ -42,8 +46,28 @@
  * Support routines that do various essential calculations.
  */
 
+#if JPEG_LIB_VERSION >= 70
+/*
+ * Compute JPEG image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ */
+
+GLOBAL(void)
+jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+  /* Hardwire it to "no scaling" */
+  cinfo->jpeg_width = cinfo->image_width;
+  cinfo->jpeg_height = cinfo->image_height;
+  cinfo->min_DCT_h_scaled_size = DCTSIZE;
+  cinfo->min_DCT_v_scaled_size = DCTSIZE;
+}
+#endif
+
+
 LOCAL(void)
-initial_setup (j_compress_ptr cinfo)
+initial_setup (j_compress_ptr cinfo, boolean transcode_only)
 /* Do computations that are needed before master selection phase */
 {
   int ci;
@@ -51,14 +75,21 @@
   long samplesperrow;
   JDIMENSION jd_samplesperrow;
 
+#if JPEG_LIB_VERSION >= 70
+#if JPEG_LIB_VERSION >= 80
+  if (!transcode_only)
+#endif
+    jpeg_calc_jpeg_dimensions(cinfo);
+#endif
+
   /* Sanity check on image dimensions */
-  if (cinfo->image_height <= 0 || cinfo->image_width <= 0
+  if (cinfo->_jpeg_height <= 0 || cinfo->_jpeg_width <= 0
       || cinfo->num_components <= 0 || cinfo->input_components <= 0)
     ERREXIT(cinfo, JERR_EMPTY_IMAGE);
 
   /* Make sure image isn't bigger than I can handle */
-  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
-      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
+  if ((long) cinfo->_jpeg_height > (long) JPEG_MAX_DIMENSION ||
+      (long) cinfo->_jpeg_width > (long) JPEG_MAX_DIMENSION)
     ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
 
   /* Width of an input scanline must be representable as JDIMENSION. */
@@ -74,7 +105,7 @@
   /* Check that number of components won't exceed internal array sizes */
   if (cinfo->num_components > MAX_COMPONENTS)
     ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	     MAX_COMPONENTS);
+             MAX_COMPONENTS);
 
   /* Compute maximum sampling factors; check factor validity */
   cinfo->max_h_samp_factor = 1;
@@ -82,12 +113,12 @@
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
     if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
-	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+        compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
       ERREXIT(cinfo, JERR_BAD_SAMPLING);
     cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
-				   compptr->h_samp_factor);
+                                   compptr->h_samp_factor);
     cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
-				   compptr->v_samp_factor);
+                                   compptr->v_samp_factor);
   }
 
   /* Compute dimensions of components */
@@ -96,21 +127,25 @@
     /* Fill in the correct component_index value; don't rely on application */
     compptr->component_index = ci;
     /* For compression, we never do DCT scaling. */
+#if JPEG_LIB_VERSION >= 70
+    compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = DCTSIZE;
+#else
     compptr->DCT_scaled_size = DCTSIZE;
+#endif
     /* Size in DCT blocks */
     compptr->width_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
-		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+      jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor,
+                    (long) (cinfo->max_h_samp_factor * DCTSIZE));
     compptr->height_in_blocks = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
-		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+      jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor,
+                    (long) (cinfo->max_v_samp_factor * DCTSIZE));
     /* Size in samples */
     compptr->downsampled_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
-		    (long) cinfo->max_h_samp_factor);
+      jdiv_round_up((long) cinfo->_jpeg_width * (long) compptr->h_samp_factor,
+                    (long) cinfo->max_h_samp_factor);
     compptr->downsampled_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
-		    (long) cinfo->max_v_samp_factor);
+      jdiv_round_up((long) cinfo->_jpeg_height * (long) compptr->v_samp_factor,
+                    (long) cinfo->max_v_samp_factor);
     /* Mark component needed (this flag isn't actually used for compression) */
     compptr->component_needed = TRUE;
   }
@@ -119,8 +154,8 @@
    * main controller will call coefficient controller).
    */
   cinfo->total_iMCU_rows = (JDIMENSION)
-    jdiv_round_up((long) cinfo->image_height,
-		  (long) (cinfo->max_v_samp_factor*DCTSIZE));
+    jdiv_round_up((long) cinfo->_jpeg_height,
+                  (long) (cinfo->max_v_samp_factor*DCTSIZE));
 }
 
 
@@ -153,15 +188,15 @@
 #ifdef C_PROGRESSIVE_SUPPORTED
     cinfo->progressive_mode = TRUE;
     last_bitpos_ptr = & last_bitpos[0][0];
-    for (ci = 0; ci < cinfo->num_components; ci++) 
+    for (ci = 0; ci < cinfo->num_components; ci++)
       for (coefi = 0; coefi < DCTSIZE2; coefi++)
-	*last_bitpos_ptr++ = -1;
+        *last_bitpos_ptr++ = -1;
 #else
     ERREXIT(cinfo, JERR_NOT_COMPILED);
 #endif
   } else {
     cinfo->progressive_mode = FALSE;
-    for (ci = 0; ci < cinfo->num_components; ci++) 
+    for (ci = 0; ci < cinfo->num_components; ci++)
       component_sent[ci] = FALSE;
   }
 
@@ -173,10 +208,10 @@
     for (ci = 0; ci < ncomps; ci++) {
       thisi = scanptr->component_index[ci];
       if (thisi < 0 || thisi >= cinfo->num_components)
-	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+        ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
       /* Components must appear in SOF order within each scan */
       if (ci > 0 && thisi <= scanptr->component_index[ci-1])
-	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+        ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
     }
     /* Validate progression parameters */
     Ss = scanptr->Ss;
@@ -198,43 +233,43 @@
 #define MAX_AH_AL 13
 #endif
       if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
-	  Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL)
-	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+          Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL)
+        ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
       if (Ss == 0) {
-	if (Se != 0)		/* DC and AC together not OK */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+        if (Se != 0)            /* DC and AC together not OK */
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
       } else {
-	if (ncomps != 1)	/* AC scans must be for only one component */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+        if (ncomps != 1)        /* AC scans must be for only one component */
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
       }
       for (ci = 0; ci < ncomps; ci++) {
-	last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0];
-	if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
-	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	for (coefi = Ss; coefi <= Se; coefi++) {
-	  if (last_bitpos_ptr[coefi] < 0) {
-	    /* first scan of this coefficient */
-	    if (Ah != 0)
-	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	  } else {
-	    /* not first scan */
-	    if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1)
-	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
-	  }
-	  last_bitpos_ptr[coefi] = Al;
-	}
+        last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0];
+        if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+        for (coefi = Ss; coefi <= Se; coefi++) {
+          if (last_bitpos_ptr[coefi] < 0) {
+            /* first scan of this coefficient */
+            if (Ah != 0)
+              ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+          } else {
+            /* not first scan */
+            if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1)
+              ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+          }
+          last_bitpos_ptr[coefi] = Al;
+        }
       }
 #endif
     } else {
       /* For sequential JPEG, all progression parameters must be these: */
       if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0)
-	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+        ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
       /* Make sure components are not sent twice */
       for (ci = 0; ci < ncomps; ci++) {
-	thisi = scanptr->component_index[ci];
-	if (component_sent[thisi])
-	  ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
-	component_sent[thisi] = TRUE;
+        thisi = scanptr->component_index[ci];
+        if (component_sent[thisi])
+          ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+        component_sent[thisi] = TRUE;
       }
     }
   }
@@ -249,13 +284,13 @@
      */
     for (ci = 0; ci < cinfo->num_components; ci++) {
       if (last_bitpos[ci][0] < 0)
-	ERREXIT(cinfo, JERR_MISSING_DATA);
+        ERREXIT(cinfo, JERR_MISSING_DATA);
     }
 #endif
   } else {
     for (ci = 0; ci < cinfo->num_components; ci++) {
       if (! component_sent[ci])
-	ERREXIT(cinfo, JERR_MISSING_DATA);
+        ERREXIT(cinfo, JERR_MISSING_DATA);
     }
   }
 }
@@ -278,7 +313,7 @@
     cinfo->comps_in_scan = scanptr->comps_in_scan;
     for (ci = 0; ci < scanptr->comps_in_scan; ci++) {
       cinfo->cur_comp_info[ci] =
-	&cinfo->comp_info[scanptr->component_index[ci]];
+        &cinfo->comp_info[scanptr->component_index[ci]];
     }
     cinfo->Ss = scanptr->Ss;
     cinfo->Se = scanptr->Se;
@@ -291,7 +326,7 @@
     /* Prepare for single sequential-JPEG scan containing all components */
     if (cinfo->num_components > MAX_COMPS_IN_SCAN)
       ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	       MAX_COMPS_IN_SCAN);
+               MAX_COMPS_IN_SCAN);
     cinfo->comps_in_scan = cinfo->num_components;
     for (ci = 0; ci < cinfo->num_components; ci++) {
       cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
@@ -311,16 +346,16 @@
 {
   int ci, mcublks, tmp;
   jpeg_component_info *compptr;
-  
+
   if (cinfo->comps_in_scan == 1) {
-    
+
     /* Noninterleaved (single-component) scan */
     compptr = cinfo->cur_comp_info[0];
-    
+
     /* Overall image size in MCUs */
     cinfo->MCUs_per_row = compptr->width_in_blocks;
     cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
-    
+
     /* For noninterleaved scan, always one block per MCU */
     compptr->MCU_width = 1;
     compptr->MCU_height = 1;
@@ -333,28 +368,28 @@
     tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
     if (tmp == 0) tmp = compptr->v_samp_factor;
     compptr->last_row_height = tmp;
-    
+
     /* Prepare array describing MCU composition */
     cinfo->blocks_in_MCU = 1;
     cinfo->MCU_membership[0] = 0;
-    
+
   } else {
-    
+
     /* Interleaved (multi-component) scan */
     if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
       ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
-	       MAX_COMPS_IN_SCAN);
-    
+               MAX_COMPS_IN_SCAN);
+
     /* Overall image size in MCUs */
     cinfo->MCUs_per_row = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width,
-		    (long) (cinfo->max_h_samp_factor*DCTSIZE));
+      jdiv_round_up((long) cinfo->_jpeg_width,
+                    (long) (cinfo->max_h_samp_factor*DCTSIZE));
     cinfo->MCU_rows_in_scan = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height,
-		    (long) (cinfo->max_v_samp_factor*DCTSIZE));
-    
+      jdiv_round_up((long) cinfo->_jpeg_height,
+                    (long) (cinfo->max_v_samp_factor*DCTSIZE));
+
     cinfo->blocks_in_MCU = 0;
-    
+
     for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
       compptr = cinfo->cur_comp_info[ci];
       /* Sampling factors give # of blocks of component in each MCU */
@@ -372,12 +407,12 @@
       /* Prepare array describing MCU composition */
       mcublks = compptr->MCU_blocks;
       if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU)
-	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+        ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
       while (mcublks-- > 0) {
-	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+        cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
       }
     }
-    
+
   }
 
   /* Convert restart specified in rows to actual MCU count. */
@@ -417,8 +452,8 @@
     (*cinfo->fdct->start_pass) (cinfo);
     (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding);
     (*cinfo->coef->start_pass) (cinfo,
-				(master->total_passes > 1 ?
-				 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+                                (master->total_passes > 1 ?
+                                 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
     (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
     if (cinfo->optimize_coding) {
       /* No immediate data output; postpone writing frame/scan headers */
@@ -546,7 +581,7 @@
 
   master = (my_master_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(my_comp_master));
+                                  sizeof(my_comp_master));
   cinfo->master = (struct jpeg_comp_master *) master;
   master->pub.prepare_for_pass = prepare_for_pass;
   master->pub.pass_startup = pass_startup;
@@ -554,7 +589,7 @@
   master->pub.is_last_pass = FALSE;
 
   /* Validate parameters, determine derived values */
-  initial_setup(cinfo);
+  initial_setup(cinfo, transcode_only);
 
   if (cinfo->scan_info != NULL) {
 #ifdef C_MULTISCAN_FILES_SUPPORTED
@@ -567,7 +602,7 @@
     cinfo->num_scans = 1;
   }
 
-  if (cinfo->progressive_mode)	/*  TEMPORARY HACK ??? */
+  if (cinfo->progressive_mode && !cinfo->arith_code)    /*  TEMPORARY HACK ??? */
     cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */
 
   /* Initialize my private state */
diff --git a/jcomapi.c b/jcomapi.c
index 9b1fa75..d8f396d 100644
--- a/jcomapi.c
+++ b/jcomapi.c
@@ -1,8 +1,10 @@
 /*
  * jcomapi.c
  *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1997, Thomas G. Lane.0
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains application interface routines that are used for both
@@ -72,8 +74,8 @@
   /* NB: mem pointer is NULL if memory mgr failed to initialize. */
   if (cinfo->mem != NULL)
     (*cinfo->mem->self_destruct) (cinfo);
-  cinfo->mem = NULL;		/* be safe if jpeg_destroy is called twice */
-  cinfo->global_state = 0;	/* mark it destroyed */
+  cinfo->mem = NULL;            /* be safe if jpeg_destroy is called twice */
+  cinfo->global_state = 0;      /* mark it destroyed */
 }
 
 
@@ -88,8 +90,8 @@
   JQUANT_TBL *tbl;
 
   tbl = (JQUANT_TBL *)
-    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL));
-  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, sizeof(JQUANT_TBL));
+  tbl->sent_table = FALSE;      /* make sure this is false in any new table */
   return tbl;
 }
 
@@ -100,7 +102,7 @@
   JHUFF_TBL *tbl;
 
   tbl = (JHUFF_TBL *)
-    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL));
-  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, sizeof(JHUFF_TBL));
+  tbl->sent_table = FALSE;      /* make sure this is false in any new table */
   return tbl;
 }
diff --git a/jconfig.bcc b/jconfig.bcc
deleted file mode 100644
index c6c53ff..0000000
--- a/jconfig.bcc
+++ /dev/null
@@ -1,48 +0,0 @@
-/* jconfig.bcc --- jconfig.h for Borland C (Turbo C) on MS-DOS or OS/2. */
-/* see jconfig.doc for explanations */
-
-#define HAVE_PROTOTYPES
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-/* #define void char */
-/* #define const */
-#undef CHAR_IS_UNSIGNED
-#define HAVE_STDDEF_H
-#define HAVE_STDLIB_H
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#ifdef __MSDOS__
-#define NEED_FAR_POINTERS	/* for small or medium memory model */
-#endif
-#undef NEED_SHORT_EXTERNAL_NAMES
-#undef INCOMPLETE_TYPES_BROKEN	/* this assumes you have -w-stu in CFLAGS */
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-#ifdef __MSDOS__
-#define USE_MSDOS_MEMMGR	/* Define this if you use jmemdos.c */
-#define MAX_ALLOC_CHUNK 65520L	/* Maximum request to malloc() */
-#define USE_FMEM		/* Borland has _fmemcpy() and _fmemset() */
-#endif
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#define TWO_FILE_COMMANDLINE
-#define USE_SETMODE		/* Borland has setmode() */
-#ifdef __MSDOS__
-#define NEED_SIGNAL_CATCHER	/* Define this if you use jmemdos.c */
-#endif
-#undef DONT_USE_B_MODE
-#undef PROGRESS_REPORT		/* optional */
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfig.cfg b/jconfig.cfg
deleted file mode 100644
index 36a04fa..0000000
--- a/jconfig.cfg
+++ /dev/null
@@ -1,44 +0,0 @@
-/* jconfig.cfg --- source file edited by configure script */
-/* see jconfig.doc for explanations */
-
-#undef HAVE_PROTOTYPES
-#undef HAVE_UNSIGNED_CHAR
-#undef HAVE_UNSIGNED_SHORT
-#undef void
-#undef const
-#undef CHAR_IS_UNSIGNED
-#undef HAVE_STDDEF_H
-#undef HAVE_STDLIB_H
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#undef NEED_FAR_POINTERS
-#undef NEED_SHORT_EXTERNAL_NAMES
-/* Define this if you get warnings about undefined structures. */
-#undef INCOMPLETE_TYPES_BROKEN
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-#undef INLINE
-/* These are for configuring the JPEG memory manager. */
-#undef DEFAULT_MAX_MEM
-#undef NO_MKTEMP
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#undef TWO_FILE_COMMANDLINE
-#undef NEED_SIGNAL_CATCHER
-#undef DONT_USE_B_MODE
-
-/* Define this if you want percent-done progress reports from cjpeg/djpeg. */
-#undef PROGRESS_REPORT
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfig.dj b/jconfig.dj
deleted file mode 100644
index f759a9d..0000000
--- a/jconfig.dj
+++ /dev/null
@@ -1,38 +0,0 @@
-/* jconfig.dj --- jconfig.h for DJGPP (Delorie's GNU C port) on MS-DOS. */
-/* see jconfig.doc for explanations */
-
-#define HAVE_PROTOTYPES
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-/* #define void char */
-/* #define const */
-#undef CHAR_IS_UNSIGNED
-#define HAVE_STDDEF_H
-#define HAVE_STDLIB_H
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#undef NEED_FAR_POINTERS	/* DJGPP uses flat 32-bit addressing */
-#undef NEED_SHORT_EXTERNAL_NAMES
-#undef INCOMPLETE_TYPES_BROKEN
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#undef TWO_FILE_COMMANDLINE	/* optional */
-#define USE_SETMODE		/* Needed to make one-file style work in DJGPP */
-#undef NEED_SIGNAL_CATCHER	/* Define this if you use jmemname.c */
-#undef DONT_USE_B_MODE
-#undef PROGRESS_REPORT		/* optional */
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfig.h.in b/jconfig.h.in
new file mode 100644
index 0000000..42d86f2
--- /dev/null
+++ b/jconfig.h.in
@@ -0,0 +1,70 @@
+/* Version ID for the JPEG library.
+ * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
+ */
+#define JPEG_LIB_VERSION  62	/* Version 6b */
+
+/* libjpeg-turbo version */
+#define LIBJPEG_TURBO_VERSION 0
+
+/* Support arithmetic encoding */
+#undef C_ARITH_CODING_SUPPORTED
+
+/* Support arithmetic decoding */
+#undef D_ARITH_CODING_SUPPORTED
+
+/*
+ * Define BITS_IN_JSAMPLE as either
+ *   8   for 8-bit sample values (the usual setting)
+ *   12  for 12-bit sample values
+ * Only 8 and 12 are legal data precisions for lossy JPEG according to the
+ * JPEG standard, and the IJG code does not support anything else!
+ * We do not support run-time selection of data precision, sorry.
+ */
+
+#define BITS_IN_JSAMPLE  8      /* use 8 or 12 */
+
+/* Define to 1 if you have the <locale.h> header file. */
+#undef HAVE_LOCALE_H
+
+/* Define to 1 if you have the <stddef.h> header file. */
+#undef HAVE_STDDEF_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if the system has the type `unsigned char'. */
+#undef HAVE_UNSIGNED_CHAR
+
+/* Define to 1 if the system has the type `unsigned short'. */
+#undef HAVE_UNSIGNED_SHORT
+
+/* Compiler does not support pointers to undefined structures. */
+#undef INCOMPLETE_TYPES_BROKEN
+
+/* Support in-memory source/destination managers */
+#undef MEM_SRCDST_SUPPORTED
+
+/* Define if you have BSD-like bzero and bcopy in <strings.h> rather than
+   memset/memcpy in <string.h>. */
+#undef NEED_BSD_STRINGS
+
+/* Define if you need to include <sys/types.h> to get size_t. */
+#undef NEED_SYS_TYPES_H
+
+/* Define if your (broken) compiler shifts signed values as if they were
+   unsigned. */
+#undef RIGHT_SHIFT_IS_UNSIGNED
+
+/* Use accelerated SIMD routines. */
+#undef WITH_SIMD
+
+/* Define to 1 if type `char' is unsigned and you are not using gcc.  */
+#ifndef __CHAR_UNSIGNED__
+# undef __CHAR_UNSIGNED__
+#endif
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+#undef size_t
diff --git a/jconfig.mac b/jconfig.mac
deleted file mode 100644
index 0de3efe..0000000
--- a/jconfig.mac
+++ /dev/null
@@ -1,43 +0,0 @@
-/* jconfig.mac --- jconfig.h for CodeWarrior on Apple Macintosh */
-/* see jconfig.doc for explanations */
-
-#define HAVE_PROTOTYPES
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-/* #define void char */
-/* #define const */
-#undef CHAR_IS_UNSIGNED
-#define HAVE_STDDEF_H
-#define HAVE_STDLIB_H
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#undef NEED_FAR_POINTERS
-#undef NEED_SHORT_EXTERNAL_NAMES
-#undef INCOMPLETE_TYPES_BROKEN
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-#define USE_MAC_MEMMGR		/* Define this if you use jmemmac.c */
-
-#define ALIGN_TYPE long		/* Needed for 680x0 Macs */
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#define USE_CCOMMAND		/* Command line reader for Macintosh */
-#define TWO_FILE_COMMANDLINE	/* Binary I/O thru stdin/stdout doesn't work */
-
-#undef NEED_SIGNAL_CATCHER
-#undef DONT_USE_B_MODE
-#undef PROGRESS_REPORT		/* optional */
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfig.manx b/jconfig.manx
deleted file mode 100644
index 6dd0d00..0000000
--- a/jconfig.manx
+++ /dev/null
@@ -1,43 +0,0 @@
-/* jconfig.manx --- jconfig.h for Amiga systems using Manx Aztec C ver 5.x. */
-/* see jconfig.doc for explanations */
-
-#define HAVE_PROTOTYPES
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-/* #define void char */
-/* #define const */
-#undef CHAR_IS_UNSIGNED
-#define HAVE_STDDEF_H
-#define HAVE_STDLIB_H
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#undef NEED_FAR_POINTERS
-#undef NEED_SHORT_EXTERNAL_NAMES
-#undef INCOMPLETE_TYPES_BROKEN
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-#define TEMP_DIRECTORY "JPEGTMP:"	/* recommended setting for Amiga */
-
-#define SHORTxSHORT_32		/* produces better DCT code with Aztec C */
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#define TWO_FILE_COMMANDLINE
-#define NEED_SIGNAL_CATCHER
-#undef DONT_USE_B_MODE
-#undef PROGRESS_REPORT		/* optional */
-
-#define signal_catcher _abort	/* hack for Aztec C naming requirements */
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfig.mc6 b/jconfig.mc6
deleted file mode 100644
index c55082d..0000000
--- a/jconfig.mc6
+++ /dev/null
@@ -1,52 +0,0 @@
-/* jconfig.mc6 --- jconfig.h for Microsoft C on MS-DOS, version 6.00A & up. */
-/* see jconfig.doc for explanations */
-
-#define HAVE_PROTOTYPES
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-/* #define void char */
-/* #define const */
-#undef CHAR_IS_UNSIGNED
-#define HAVE_STDDEF_H
-#define HAVE_STDLIB_H
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#define NEED_FAR_POINTERS	/* for small or medium memory model */
-#undef NEED_SHORT_EXTERNAL_NAMES
-#undef INCOMPLETE_TYPES_BROKEN
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-#define USE_MSDOS_MEMMGR	/* Define this if you use jmemdos.c */
-
-#define MAX_ALLOC_CHUNK 65520L	/* Maximum request to malloc() */
-
-#define USE_FMEM		/* Microsoft has _fmemcpy() and _fmemset() */
-
-#define NEED_FHEAPMIN		/* far heap management routines are broken */
-
-#define SHORTxLCONST_32		/* enable compiler-specific DCT optimization */
-/* Note: the above define is known to improve the code with Microsoft C 6.00A.
- * I do not know whether it is good for later compiler versions.
- * Please report any info on this point to jpeg-info@uunet.uu.net.
- */
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#define TWO_FILE_COMMANDLINE
-#define USE_SETMODE		/* Microsoft has setmode() */
-#define NEED_SIGNAL_CATCHER	/* Define this if you use jmemdos.c */
-#undef DONT_USE_B_MODE
-#undef PROGRESS_REPORT		/* optional */
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfig.sas b/jconfig.sas
deleted file mode 100644
index efdac22..0000000
--- a/jconfig.sas
+++ /dev/null
@@ -1,43 +0,0 @@
-/* jconfig.sas --- jconfig.h for Amiga systems using SAS C 6.0 and up. */
-/* see jconfig.doc for explanations */
-
-#define HAVE_PROTOTYPES
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-/* #define void char */
-/* #define const */
-#undef CHAR_IS_UNSIGNED
-#define HAVE_STDDEF_H
-#define HAVE_STDLIB_H
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#undef NEED_FAR_POINTERS
-#undef NEED_SHORT_EXTERNAL_NAMES
-#undef INCOMPLETE_TYPES_BROKEN
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-#define TEMP_DIRECTORY "JPEGTMP:"	/* recommended setting for Amiga */
-
-#define NO_MKTEMP		/* SAS C doesn't have mktemp() */
-
-#define SHORTxSHORT_32		/* produces better DCT code with SAS C */
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#define TWO_FILE_COMMANDLINE
-#define NEED_SIGNAL_CATCHER
-#undef DONT_USE_B_MODE
-#undef PROGRESS_REPORT		/* optional */
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfig.st b/jconfig.st
deleted file mode 100644
index 4421b7a..0000000
--- a/jconfig.st
+++ /dev/null
@@ -1,42 +0,0 @@
-/* jconfig.st --- jconfig.h for Atari ST/STE/TT using Pure C or Turbo C. */
-/* see jconfig.doc for explanations */
-
-#define HAVE_PROTOTYPES
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-/* #define void char */
-/* #define const */
-#undef CHAR_IS_UNSIGNED
-#define HAVE_STDDEF_H
-#define HAVE_STDLIB_H
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#undef NEED_FAR_POINTERS
-#undef NEED_SHORT_EXTERNAL_NAMES
-#define INCOMPLETE_TYPES_BROKEN	/* suppress undefined-structure warnings */
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-#define ALIGN_TYPE  long	/* apparently double is a weird size? */
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#define TWO_FILE_COMMANDLINE	/* optional -- undef if you like Unix style */
-/* Note: if you undef TWO_FILE_COMMANDLINE, you may need to define
- * USE_SETMODE.  Some Atari compilers require it, some do not.
- */
-#define NEED_SIGNAL_CATCHER	/* needed if you use jmemname.c */
-#undef DONT_USE_B_MODE
-#undef PROGRESS_REPORT		/* optional */
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfig.doc b/jconfig.txt
similarity index 75%
rename from jconfig.doc
rename to jconfig.txt
index c18d1c0..8acd8dd 100644
--- a/jconfig.doc
+++ b/jconfig.txt
@@ -1,8 +1,10 @@
 /*
- * jconfig.doc
+ * jconfig.txt
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1994, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file documents the configuration options that are required to
@@ -23,11 +25,6 @@
  * #define the symbol if yes, #undef it if no.
  */
 
-/* Does your compiler support function prototypes?
- * (If not, you also need to use ansi2knr, see install.doc)
- */
-#define HAVE_PROTOTYPES
-
 /* Does your compiler support the declaration "unsigned char" ?
  * How about "unsigned short" ?
  */
@@ -48,7 +45,7 @@
  * If you're not sure, leaving it undefined will work at some cost in speed.
  * If you defined HAVE_UNSIGNED_CHAR then the speed difference is minimal.
  */
-#undef CHAR_IS_UNSIGNED
+#undef __CHAR_UNSIGNED__
 
 /* Define this if your system has an ANSI-conforming <stddef.h> file.
  */
@@ -69,19 +66,6 @@
  */
 #undef NEED_SYS_TYPES_H
 
-/* For 80x86 machines, you need to define NEED_FAR_POINTERS,
- * unless you are using a large-data memory model or 80386 flat-memory mode.
- * On less brain-damaged CPUs this symbol must not be defined.
- * (Defining this symbol causes large data structures to be referenced through
- * "far" pointers and to be allocated with a special version of malloc.)
- */
-#undef NEED_FAR_POINTERS
-
-/* Define this if your linker needs global names to be unique in less
- * than the first 15 characters.
- */
-#undef NEED_SHORT_EXTERNAL_NAMES
-
 /* Although a real ANSI C compiler can deal perfectly well with pointers to
  * unspecified structures (see "incomplete types" in the spec), a few pre-ANSI
  * and pseudo-ANSI compilers get confused.  To keep one of these bozos happy,
@@ -91,6 +75,15 @@
  */
 #undef INCOMPLETE_TYPES_BROKEN
 
+/* Define "boolean" as unsigned char, not int, on Windows systems.
+ */
+#ifdef _WIN32
+#ifndef __RPCNDR_H__            /* don't conflict if rpcndr.h already read */
+typedef unsigned char boolean;
+#endif
+#define HAVE_BOOLEAN            /* prevent jmorecfg.h from redefining it */
+#endif
+
 
 /*
  * The following options affect code selection within the JPEG library,
@@ -121,11 +114,11 @@
 
 /* These defines indicate which image (non-JPEG) file formats are allowed. */
 
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
+#define BMP_SUPPORTED           /* BMP image file format */
+#define GIF_SUPPORTED           /* GIF image file format */
+#define PPM_SUPPORTED           /* PBMPLUS PPM/PGM image file format */
+#undef RLE_SUPPORTED            /* Utah RLE image file format */
+#define TARGA_SUPPORTED         /* Targa image file format */
 
 /* Define this if you want to name both input and output files on the command
  * line, rather than using stdout and optionally stdin.  You MUST do this if
@@ -134,12 +127,6 @@
  */
 #undef TWO_FILE_COMMANDLINE
 
-/* Define this if your system needs explicit cleanup of temporary files.
- * This is crucial under MS-DOS, where the temporary "files" may be areas
- * of extended memory; on most other systems it's not as important.
- */
-#undef NEED_SIGNAL_CATCHER
-
 /* By default, we open image files with fopen(...,"rb") or fopen(...,"wb").
  * This is necessary on systems that distinguish text files from binary files,
  * and is harmless on most systems that don't.  If you have one of the rare
diff --git a/jconfig.vc b/jconfig.vc
deleted file mode 100644
index 7e291c7..0000000
--- a/jconfig.vc
+++ /dev/null
@@ -1,45 +0,0 @@
-/* jconfig.vc --- jconfig.h for Microsoft Visual C++ on Windows 95 or NT. */
-/* see jconfig.doc for explanations */
-
-#define HAVE_PROTOTYPES
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-/* #define void char */
-/* #define const */
-#undef CHAR_IS_UNSIGNED
-#define HAVE_STDDEF_H
-#define HAVE_STDLIB_H
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#undef NEED_FAR_POINTERS	/* we presume a 32-bit flat memory model */
-#undef NEED_SHORT_EXTERNAL_NAMES
-#undef INCOMPLETE_TYPES_BROKEN
-
-/* Define "boolean" as unsigned char, not int, per Windows custom */
-#ifndef __RPCNDR_H__		/* don't conflict if rpcndr.h already read */
-typedef unsigned char boolean;
-#endif
-#define HAVE_BOOLEAN		/* prevent jmorecfg.h from redefining it */
-
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#define TWO_FILE_COMMANDLINE	/* optional */
-#define USE_SETMODE		/* Microsoft has setmode() */
-#undef NEED_SIGNAL_CATCHER
-#undef DONT_USE_B_MODE
-#undef PROGRESS_REPORT		/* optional */
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfig.vms b/jconfig.vms
deleted file mode 100644
index 55a6ffb..0000000
--- a/jconfig.vms
+++ /dev/null
@@ -1,37 +0,0 @@
-/* jconfig.vms --- jconfig.h for use on Digital VMS. */
-/* see jconfig.doc for explanations */
-
-#define HAVE_PROTOTYPES
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-/* #define void char */
-/* #define const */
-#undef CHAR_IS_UNSIGNED
-#define HAVE_STDDEF_H
-#define HAVE_STDLIB_H
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#undef NEED_FAR_POINTERS
-#undef NEED_SHORT_EXTERNAL_NAMES
-#undef INCOMPLETE_TYPES_BROKEN
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#define TWO_FILE_COMMANDLINE	/* Needed on VMS */
-#undef NEED_SIGNAL_CATCHER
-#undef DONT_USE_B_MODE
-#undef PROGRESS_REPORT		/* optional */
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfig.wat b/jconfig.wat
deleted file mode 100644
index 6cc545b..0000000
--- a/jconfig.wat
+++ /dev/null
@@ -1,38 +0,0 @@
-/* jconfig.wat --- jconfig.h for Watcom C/C++ on MS-DOS or OS/2. */
-/* see jconfig.doc for explanations */
-
-#define HAVE_PROTOTYPES
-#define HAVE_UNSIGNED_CHAR
-#define HAVE_UNSIGNED_SHORT
-/* #define void char */
-/* #define const */
-#define CHAR_IS_UNSIGNED
-#define HAVE_STDDEF_H
-#define HAVE_STDLIB_H
-#undef NEED_BSD_STRINGS
-#undef NEED_SYS_TYPES_H
-#undef NEED_FAR_POINTERS	/* Watcom uses flat 32-bit addressing */
-#undef NEED_SHORT_EXTERNAL_NAMES
-#undef INCOMPLETE_TYPES_BROKEN
-
-#ifdef JPEG_INTERNALS
-
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-#endif /* JPEG_INTERNALS */
-
-#ifdef JPEG_CJPEG_DJPEG
-
-#define BMP_SUPPORTED		/* BMP image file format */
-#define GIF_SUPPORTED		/* GIF image file format */
-#define PPM_SUPPORTED		/* PBMPLUS PPM/PGM image file format */
-#undef RLE_SUPPORTED		/* Utah RLE image file format */
-#define TARGA_SUPPORTED		/* Targa image file format */
-
-#undef TWO_FILE_COMMANDLINE	/* optional */
-#define USE_SETMODE		/* Needed to make one-file style work in Watcom */
-#undef NEED_SIGNAL_CATCHER	/* Define this if you use jmemname.c */
-#undef DONT_USE_B_MODE
-#undef PROGRESS_REPORT		/* optional */
-
-#endif /* JPEG_CJPEG_DJPEG */
diff --git a/jconfigint.h.in b/jconfigint.h.in
new file mode 100644
index 0000000..8f216eb
--- /dev/null
+++ b/jconfigint.h.in
@@ -0,0 +1,11 @@
+/* libjpeg-turbo build number */
+#undef BUILD
+
+/* How to obtain function inlining. */
+#undef INLINE
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Version number of package */
+#undef VERSION
diff --git a/jcparam.c b/jcparam.c
index 6fc48f5..3194c9d 100644
--- a/jcparam.c
+++ b/jcparam.c
@@ -1,8 +1,11 @@
 /*
  * jcparam.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2003-2008 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains optional default-setting code for the JPEG compressor.
@@ -13,6 +16,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jstdhuff.c"
 
 
 /*
@@ -21,8 +25,8 @@
 
 GLOBAL(void)
 jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
-		      const unsigned int *basic_table,
-		      int scale_factor, boolean force_baseline)
+                      const unsigned int *basic_table,
+                      int scale_factor, boolean force_baseline)
 /* Define a quantization table equal to the basic_table times
  * a scale factor (given as a percentage).
  * If force_baseline is TRUE, the computed quantization table entries
@@ -51,7 +55,7 @@
     if (temp <= 0L) temp = 1L;
     if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */
     if (force_baseline && temp > 255L)
-      temp = 255L;		/* limit to baseline range if requested */
+      temp = 255L;              /* limit to baseline range if requested */
     (*qtblptr)->quantval[i] = (UINT16) temp;
   }
 
@@ -60,45 +64,63 @@
 }
 
 
+/* These are the sample quantization tables given in JPEG spec section K.1.
+ * The spec says that the values given produce "good" quality, and
+ * when divided by 2, "very good" quality.
+ */
+static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
+  16,  11,  10,  16,  24,  40,  51,  61,
+  12,  12,  14,  19,  26,  58,  60,  55,
+  14,  13,  16,  24,  40,  57,  69,  56,
+  14,  17,  22,  29,  51,  87,  80,  62,
+  18,  22,  37,  56,  68, 109, 103,  77,
+  24,  35,  55,  64,  81, 104, 113,  92,
+  49,  64,  78,  87, 103, 121, 120, 101,
+  72,  92,  95,  98, 112, 100, 103,  99
+};
+static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
+  17,  18,  24,  47,  99,  99,  99,  99,
+  18,  21,  26,  66,  99,  99,  99,  99,
+  24,  26,  56,  99,  99,  99,  99,  99,
+  47,  66,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99
+};
+
+
+#if JPEG_LIB_VERSION >= 70
+GLOBAL(void)
+jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables
+ * and straight percentage-scaling quality scales.
+ * This entry point allows different scalings for luminance and chrominance.
+ */
+{
+  /* Set up two quantization tables using the specified scaling */
+  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
+                       cinfo->q_scale_factor[0], force_baseline);
+  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
+                       cinfo->q_scale_factor[1], force_baseline);
+}
+#endif
+
+
 GLOBAL(void)
 jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
-			 boolean force_baseline)
+                         boolean force_baseline)
 /* Set or change the 'quality' (quantization) setting, using default tables
  * and a straight percentage-scaling quality scale.  In most cases it's better
  * to use jpeg_set_quality (below); this entry point is provided for
  * applications that insist on a linear percentage scaling.
  */
 {
-  /* These are the sample quantization tables given in JPEG spec section K.1.
-   * The spec says that the values given produce "good" quality, and
-   * when divided by 2, "very good" quality.
-   */
-  static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
-    16,  11,  10,  16,  24,  40,  51,  61,
-    12,  12,  14,  19,  26,  58,  60,  55,
-    14,  13,  16,  24,  40,  57,  69,  56,
-    14,  17,  22,  29,  51,  87,  80,  62,
-    18,  22,  37,  56,  68, 109, 103,  77,
-    24,  35,  55,  64,  81, 104, 113,  92,
-    49,  64,  78,  87, 103, 121, 120, 101,
-    72,  92,  95,  98, 112, 100, 103,  99
-  };
-  static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
-    17,  18,  24,  47,  99,  99,  99,  99,
-    18,  21,  26,  66,  99,  99,  99,  99,
-    24,  26,  56,  99,  99,  99,  99,  99,
-    47,  66,  99,  99,  99,  99,  99,  99,
-    99,  99,  99,  99,  99,  99,  99,  99,
-    99,  99,  99,  99,  99,  99,  99,  99,
-    99,  99,  99,  99,  99,  99,  99,  99,
-    99,  99,  99,  99,  99,  99,  99,  99
-  };
-
   /* Set up two quantization tables using the specified scaling */
   jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
-		       scale_factor, force_baseline);
+                       scale_factor, force_baseline);
   jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
-		       scale_factor, force_baseline);
+                       scale_factor, force_baseline);
 }
 
 
@@ -145,116 +167,6 @@
 
 
 /*
- * Huffman table setup routines
- */
-
-LOCAL(void)
-add_huff_table (j_compress_ptr cinfo,
-		JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val)
-/* Define a Huffman table */
-{
-  int nsymbols, len;
-
-  if (*htblptr == NULL)
-    *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-
-  /* Copy the number-of-symbols-of-each-code-length counts */
-  MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
-
-  /* Validate the counts.  We do this here mainly so we can copy the right
-   * number of symbols from the val[] array, without risking marching off
-   * the end of memory.  jchuff.c will do a more thorough test later.
-   */
-  nsymbols = 0;
-  for (len = 1; len <= 16; len++)
-    nsymbols += bits[len];
-  if (nsymbols < 1 || nsymbols > 256)
-    ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-
-  MEMCOPY((*htblptr)->huffval, val, nsymbols * SIZEOF(UINT8));
-
-  /* Initialize sent_table FALSE so table will be written to JPEG file. */
-  (*htblptr)->sent_table = FALSE;
-}
-
-
-LOCAL(void)
-std_huff_tables (j_compress_ptr cinfo)
-/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
-/* IMPORTANT: these are only valid for 8-bit data precision! */
-{
-  static const UINT8 bits_dc_luminance[17] =
-    { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
-  static const UINT8 val_dc_luminance[] =
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-  
-  static const UINT8 bits_dc_chrominance[17] =
-    { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
-  static const UINT8 val_dc_chrominance[] =
-    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-  
-  static const UINT8 bits_ac_luminance[17] =
-    { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
-  static const UINT8 val_ac_luminance[] =
-    { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
-      0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
-      0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
-      0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
-      0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
-      0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
-      0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
-      0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
-      0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
-      0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
-      0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
-      0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
-      0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
-      0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-      0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
-      0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
-      0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
-      0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
-      0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
-      0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-      0xf9, 0xfa };
-  
-  static const UINT8 bits_ac_chrominance[17] =
-    { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
-  static const UINT8 val_ac_chrominance[] =
-    { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
-      0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
-      0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
-      0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
-      0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
-      0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
-      0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
-      0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
-      0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
-      0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
-      0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
-      0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-      0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
-      0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
-      0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
-      0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
-      0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
-      0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
-      0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
-      0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-      0xf9, 0xfa };
-  
-  add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[0],
-		 bits_dc_luminance, val_dc_luminance);
-  add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[0],
-		 bits_ac_luminance, val_ac_luminance);
-  add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[1],
-		 bits_dc_chrominance, val_dc_chrominance);
-  add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[1],
-		 bits_ac_chrominance, val_ac_chrominance);
-}
-
-
-/*
  * Default parameter setup for compression.
  *
  * Applications that don't choose to use this routine must do their
@@ -280,15 +192,19 @@
   if (cinfo->comp_info == NULL)
     cinfo->comp_info = (jpeg_component_info *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  MAX_COMPONENTS * SIZEOF(jpeg_component_info));
+                                  MAX_COMPONENTS * sizeof(jpeg_component_info));
 
   /* Initialize everything not dependent on the color space */
 
+#if JPEG_LIB_VERSION >= 70
+  cinfo->scale_num = 1;         /* 1:1 scaling */
+  cinfo->scale_denom = 1;
+#endif
   cinfo->data_precision = BITS_IN_JSAMPLE;
   /* Set up two quantization tables using default quality of 75 */
   jpeg_set_quality(cinfo, 75, TRUE);
   /* Set up two Huffman tables */
-  std_huff_tables(cinfo);
+  std_huff_tables((j_common_ptr) cinfo);
 
   /* Initialize default arithmetic coding conditioning */
   for (i = 0; i < NUM_ARITH_TBLS; i++) {
@@ -320,6 +236,11 @@
   /* By default, use the simpler non-cosited sampling alignment */
   cinfo->CCIR601_sampling = FALSE;
 
+#if JPEG_LIB_VERSION >= 70
+  /* By default, apply fancy downsampling */
+  cinfo->do_fancy_downsampling = TRUE;
+#endif
+
   /* No input smoothing */
   cinfo->smoothing_factor = 0;
 
@@ -341,8 +262,8 @@
    */
   cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
   cinfo->JFIF_minor_version = 1;
-  cinfo->density_unit = 0;	/* Pixel size is unknown by default */
-  cinfo->X_density = 1;		/* Pixel aspect ratio is square by default */
+  cinfo->density_unit = 0;      /* Pixel size is unknown by default */
+  cinfo->X_density = 1;         /* Pixel aspect ratio is square by default */
   cinfo->Y_density = 1;
 
   /* Choose JPEG colorspace based on input space, set defaults accordingly */
@@ -363,6 +284,16 @@
     jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
     break;
   case JCS_RGB:
+  case JCS_EXT_RGB:
+  case JCS_EXT_RGBX:
+  case JCS_EXT_BGR:
+  case JCS_EXT_BGRX:
+  case JCS_EXT_XBGR:
+  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
     jpeg_set_colorspace(cinfo, JCS_YCbCr);
     break;
   case JCS_YCbCr:
@@ -458,7 +389,7 @@
     cinfo->num_components = cinfo->input_components;
     if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
       ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	       MAX_COMPONENTS);
+               MAX_COMPONENTS);
     for (ci = 0; ci < cinfo->num_components; ci++) {
       SET_COMP(ci, ci, 1,1, 0, 0,0);
     }
@@ -473,7 +404,7 @@
 
 LOCAL(jpeg_scan_info *)
 fill_a_scan (jpeg_scan_info * scanptr, int ci,
-	     int Ss, int Se, int Ah, int Al)
+             int Ss, int Se, int Ah, int Al)
 /* Support routine: generate one scan for specified component */
 {
   scanptr->comps_in_scan = 1;
@@ -488,7 +419,7 @@
 
 LOCAL(jpeg_scan_info *)
 fill_scans (jpeg_scan_info * scanptr, int ncomps,
-	    int Ss, int Se, int Ah, int Al)
+            int Ss, int Se, int Ah, int Al)
 /* Support routine: generate one scan for each component */
 {
   int ci;
@@ -551,9 +482,9 @@
   } else {
     /* All-purpose script for other color spaces. */
     if (ncomps > MAX_COMPS_IN_SCAN)
-      nscans = 6 * ncomps;	/* 2 DC + 4 AC scans per component */
+      nscans = 6 * ncomps;      /* 2 DC + 4 AC scans per component */
     else
-      nscans = 2 + 4 * ncomps;	/* 2 DC scans; 4 AC scans per component */
+      nscans = 2 + 4 * ncomps;  /* 2 DC scans; 4 AC scans per component */
   }
 
   /* Allocate space for script.
@@ -567,7 +498,7 @@
     cinfo->script_space_size = MAX(nscans, 10);
     cinfo->script_space = (jpeg_scan_info *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-			cinfo->script_space_size * SIZEOF(jpeg_scan_info));
+                        cinfo->script_space_size * sizeof(jpeg_scan_info));
   }
   scanptr = cinfo->script_space;
   cinfo->scan_info = scanptr;
diff --git a/jcphuff.c b/jcphuff.c
index 07f9178..5ce12b5 100644
--- a/jcphuff.c
+++ b/jcphuff.c
@@ -1,8 +1,10 @@
 /*
  * jcphuff.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains Huffman entropy encoding routines for progressive JPEG.
@@ -15,7 +17,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jchuff.h"		/* Declarations shared with jchuff.c */
+#include "jchuff.h"             /* Declarations shared with jchuff.c */
 
 #ifdef C_PROGRESSIVE_SUPPORTED
 
@@ -30,24 +32,24 @@
   /* Bit-level coding status.
    * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
    */
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
-  INT32 put_buffer;		/* current bit-accumulation buffer */
-  int put_bits;			/* # of bits now in it */
-  j_compress_ptr cinfo;		/* link to cinfo (needed for dump_buffer) */
+  JOCTET * next_output_byte;    /* => next byte to write in buffer */
+  size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
+  INT32 put_buffer;             /* current bit-accumulation buffer */
+  int put_bits;                 /* # of bits now in it */
+  j_compress_ptr cinfo;         /* link to cinfo (needed for dump_buffer) */
 
   /* Coding status for DC components */
   int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
 
   /* Coding status for AC components */
-  int ac_tbl_no;		/* the table number of the single component */
-  unsigned int EOBRUN;		/* run length of EOBs */
-  unsigned int BE;		/* # of buffered correction bits before MCU */
-  char * bit_buffer;		/* buffer for correction bits (1 per char) */
+  int ac_tbl_no;                /* the table number of the single component */
+  unsigned int EOBRUN;          /* run length of EOBs */
+  unsigned int BE;              /* # of buffered correction bits before MCU */
+  char * bit_buffer;            /* buffer for correction bits (1 per char) */
   /* packing correction bits tightly would save some space but cost time... */
 
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
-  int next_restart_num;		/* next restart number to write (0-7) */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+  int next_restart_num;         /* next restart number to write (0-7) */
 
   /* Pointers to derived tables (these workspaces have image lifespan).
    * Since any one scan codes only DC or only AC, we only need one set
@@ -67,7 +69,7 @@
  * The minimum safe size is 64 bits.
  */
 
-#define MAX_CORR_BITS  1000	/* Max # of correction bits I can buffer */
+#define MAX_CORR_BITS  1000     /* Max # of correction bits I can buffer */
 
 /* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
  * We assume that int right shift is unsigned if INT32 right shift is,
@@ -75,27 +77,27 @@
  */
 
 #ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	int ishift_temp;
+#define ISHIFT_TEMPS    int ishift_temp;
 #define IRIGHT_SHIFT(x,shft)  \
-	((ishift_temp = (x)) < 0 ? \
-	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
-	 (ishift_temp >> (shft)))
+        ((ishift_temp = (x)) < 0 ? \
+         (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
+         (ishift_temp >> (shft)))
 #else
 #define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#define IRIGHT_SHIFT(x,shft)    ((x) >> (shft))
 #endif
 
 /* Forward declarations */
-METHODDEF(boolean) encode_mcu_DC_first JPP((j_compress_ptr cinfo,
-					    JBLOCKROW *MCU_data));
-METHODDEF(boolean) encode_mcu_AC_first JPP((j_compress_ptr cinfo,
-					    JBLOCKROW *MCU_data));
-METHODDEF(boolean) encode_mcu_DC_refine JPP((j_compress_ptr cinfo,
-					     JBLOCKROW *MCU_data));
-METHODDEF(boolean) encode_mcu_AC_refine JPP((j_compress_ptr cinfo,
-					     JBLOCKROW *MCU_data));
-METHODDEF(void) finish_pass_phuff JPP((j_compress_ptr cinfo));
-METHODDEF(void) finish_pass_gather_phuff JPP((j_compress_ptr cinfo));
+METHODDEF(boolean) encode_mcu_DC_first (j_compress_ptr cinfo,
+                                        JBLOCKROW *MCU_data);
+METHODDEF(boolean) encode_mcu_AC_first (j_compress_ptr cinfo,
+                                        JBLOCKROW *MCU_data);
+METHODDEF(boolean) encode_mcu_DC_refine (j_compress_ptr cinfo,
+                                         JBLOCKROW *MCU_data);
+METHODDEF(boolean) encode_mcu_AC_refine (j_compress_ptr cinfo,
+                                         JBLOCKROW *MCU_data);
+METHODDEF(void) finish_pass_phuff (j_compress_ptr cinfo);
+METHODDEF(void) finish_pass_gather_phuff (j_compress_ptr cinfo);
 
 
 /*
@@ -104,7 +106,7 @@
 
 METHODDEF(void)
 start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics)
-{  
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
   boolean is_DC_band;
   int ci, tbl;
@@ -130,9 +132,9 @@
       entropy->pub.encode_mcu = encode_mcu_AC_refine;
       /* AC refinement needs a correction bit buffer */
       if (entropy->bit_buffer == NULL)
-	entropy->bit_buffer = (char *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      MAX_CORR_BITS * SIZEOF(char));
+        entropy->bit_buffer = (char *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                      MAX_CORR_BITS * sizeof(char));
     }
   }
   if (gather_statistics)
@@ -149,8 +151,8 @@
     entropy->last_dc_val[ci] = 0;
     /* Get table index */
     if (is_DC_band) {
-      if (cinfo->Ah != 0)	/* DC refinement needs no table */
-	continue;
+      if (cinfo->Ah != 0)       /* DC refinement needs no table */
+        continue;
       tbl = compptr->dc_tbl_no;
     } else {
       entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
@@ -163,15 +165,15 @@
       /* Allocate and zero the statistics tables */
       /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
       if (entropy->count_ptrs[tbl] == NULL)
-	entropy->count_ptrs[tbl] = (long *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      257 * SIZEOF(long));
-      MEMZERO(entropy->count_ptrs[tbl], 257 * SIZEOF(long));
+        entropy->count_ptrs[tbl] = (long *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                      257 * sizeof(long));
+      MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long));
     } else {
       /* Compute derived values for Huffman table */
       /* We may do this more than once for a table, but it's not expensive */
       jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
-			      & entropy->derived_tbls[tbl]);
+                              & entropy->derived_tbls[tbl]);
     }
   }
 
@@ -196,9 +198,9 @@
 
 /* Emit a byte */
 #define emit_byte(entropy,val)  \
-	{ *(entropy)->next_output_byte++ = (JOCTET) (val);  \
-	  if (--(entropy)->free_in_buffer == 0)  \
-	    dump_buffer(entropy); }
+        { *(entropy)->next_output_byte++ = (JOCTET) (val);  \
+          if (--(entropy)->free_in_buffer == 0)  \
+            dump_buffer(entropy); }
 
 
 LOCAL(void)
@@ -223,7 +225,6 @@
  * between calls, so 24 bits are sufficient.
  */
 
-INLINE
 LOCAL(void)
 emit_bits (phuff_entropy_ptr entropy, unsigned int code, int size)
 /* Emit some bits, unless we are in gather mode */
@@ -237,21 +238,21 @@
     ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
 
   if (entropy->gather_statistics)
-    return;			/* do nothing if we're only getting stats */
+    return;                     /* do nothing if we're only getting stats */
 
   put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
-  
-  put_bits += size;		/* new number of bits in buffer */
-  
+
+  put_bits += size;             /* new number of bits in buffer */
+
   put_buffer <<= 24 - put_bits; /* align incoming bits */
 
   put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
 
   while (put_bits >= 8) {
     int c = (int) ((put_buffer >> 16) & 0xFF);
-    
+
     emit_byte(entropy, c);
-    if (c == 0xFF) {		/* need to stuff a zero byte? */
+    if (c == 0xFF) {            /* need to stuff a zero byte? */
       emit_byte(entropy, 0);
     }
     put_buffer <<= 8;
@@ -276,7 +277,6 @@
  * Emit (or just count) a Huffman symbol.
  */
 
-INLINE
 LOCAL(void)
 emit_symbol (phuff_entropy_ptr entropy, int tbl_no, int symbol)
 {
@@ -295,10 +295,10 @@
 
 LOCAL(void)
 emit_buffered_bits (phuff_entropy_ptr entropy, char * bufstart,
-		    unsigned int nbits)
+                    unsigned int nbits)
 {
   if (entropy->gather_statistics)
-    return;			/* no real work */
+    return;                     /* no real work */
 
   while (nbits > 0) {
     emit_bits(entropy, (unsigned int) (*bufstart), 1);
@@ -317,7 +317,7 @@
 {
   register int temp, nbits;
 
-  if (entropy->EOBRUN > 0) {	/* if there is any pending EOBRUN */
+  if (entropy->EOBRUN > 0) {    /* if there is any pending EOBRUN */
     temp = entropy->EOBRUN;
     nbits = 0;
     while ((temp >>= 1))
@@ -411,12 +411,12 @@
     /* Encode the DC coefficient difference per section G.1.2.1 */
     temp2 = temp;
     if (temp < 0) {
-      temp = -temp;		/* temp is abs value of input */
+      temp = -temp;             /* temp is abs value of input */
       /* For a negative input, want temp2 = bitwise complement of abs(input) */
       /* This code assumes we are on a two's complement machine */
       temp2--;
     }
-    
+
     /* Find the number of bits needed for the magnitude of the coefficient */
     nbits = 0;
     while (temp) {
@@ -428,13 +428,13 @@
      */
     if (nbits > MAX_COEF_BITS+1)
       ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-    
+
     /* Count/emit the Huffman-coded symbol for the number of bits */
     emit_symbol(entropy, compptr->dc_tbl_no, nbits);
-    
+
     /* Emit that number of bits of the value, if positive, */
     /* or the complement of its magnitude, if negative. */
-    if (nbits)			/* emit_bits rejects calls with size 0 */
+    if (nbits)                  /* emit_bits rejects calls with size 0 */
       emit_bits(entropy, (unsigned int) temp2, nbits);
   }
 
@@ -483,9 +483,9 @@
   block = MCU_data[0];
 
   /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
-  
-  r = 0;			/* r = run length of zeros */
-   
+
+  r = 0;                        /* r = run length of zeros */
+
   for (k = cinfo->Ss; k <= Se; k++) {
     if ((temp = (*block)[jpeg_natural_order[k]]) == 0) {
       r++;
@@ -497,12 +497,12 @@
      * interwoven with finding the abs value (temp) and output bits (temp2).
      */
     if (temp < 0) {
-      temp = -temp;		/* temp is abs value of input */
-      temp >>= Al;		/* apply the point transform */
+      temp = -temp;             /* temp is abs value of input */
+      temp >>= Al;              /* apply the point transform */
       /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
       temp2 = ~temp;
     } else {
-      temp >>= Al;		/* apply the point transform */
+      temp >>= Al;              /* apply the point transform */
       temp2 = temp;
     }
     /* Watch out for case that nonzero coef is zero after point transform */
@@ -521,7 +521,7 @@
     }
 
     /* Find the number of bits needed for the magnitude of the coefficient */
-    nbits = 1;			/* there must be at least one 1 bit */
+    nbits = 1;                  /* there must be at least one 1 bit */
     while ((temp >>= 1))
       nbits++;
     /* Check for out-of-range coefficient values */
@@ -535,13 +535,13 @@
     /* or the complement of its magnitude, if negative. */
     emit_bits(entropy, (unsigned int) temp2, nbits);
 
-    r = 0;			/* reset zero run length */
+    r = 0;                      /* reset zero run length */
   }
 
-  if (r > 0) {			/* If there are trailing zeroes, */
-    entropy->EOBRUN++;		/* count an EOB */
+  if (r > 0) {                  /* If there are trailing zeroes, */
+    entropy->EOBRUN++;          /* count an EOB */
     if (entropy->EOBRUN == 0x7FFF)
-      emit_eobrun(entropy);	/* force it out to avoid overflow */
+      emit_eobrun(entropy);     /* force it out to avoid overflow */
   }
 
   cinfo->dest->next_output_byte = entropy->next_output_byte;
@@ -650,17 +650,17 @@
      * in C, we shift after obtaining the absolute value.
      */
     if (temp < 0)
-      temp = -temp;		/* temp is abs value of input */
-    temp >>= Al;		/* apply the point transform */
-    absvalues[k] = temp;	/* save abs value for main pass */
+      temp = -temp;             /* temp is abs value of input */
+    temp >>= Al;                /* apply the point transform */
+    absvalues[k] = temp;        /* save abs value for main pass */
     if (temp == 1)
-      EOB = k;			/* EOB = index of last newly-nonzero coef */
+      EOB = k;                  /* EOB = index of last newly-nonzero coef */
   }
 
   /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
-  
-  r = 0;			/* r = run length of zeros */
-  BR = 0;			/* BR = count of buffered bits added now */
+
+  r = 0;                        /* r = run length of zeros */
+  BR = 0;                       /* BR = count of buffered bits added now */
   BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
 
   for (k = cinfo->Ss; k <= Se; k++) {
@@ -707,12 +707,12 @@
     emit_buffered_bits(entropy, BR_buffer, BR);
     BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
     BR = 0;
-    r = 0;			/* reset zero run length */
+    r = 0;                      /* reset zero run length */
   }
 
-  if (r > 0 || BR > 0) {	/* If there are trailing zeroes, */
-    entropy->EOBRUN++;		/* count an EOB */
-    entropy->BE += BR;		/* concat my correction bits to older ones */
+  if (r > 0 || BR > 0) {        /* If there are trailing zeroes, */
+    entropy->EOBRUN++;          /* count an EOB */
+    entropy->BE += BR;          /* concat my correction bits to older ones */
     /* We force out the EOB if we risk either:
      * 1. overflow of the EOB counter;
      * 2. overflow of the correction bit buffer during the next MCU.
@@ -744,7 +744,7 @@
 
 METHODDEF(void)
 finish_pass_phuff (j_compress_ptr cinfo)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
 
   entropy->next_output_byte = cinfo->dest->next_output_byte;
@@ -781,13 +781,13 @@
   /* It's important not to apply jpeg_gen_optimal_table more than once
    * per table, because it clobbers the input frequency counts!
    */
-  MEMZERO(did, SIZEOF(did));
+  MEMZERO(did, sizeof(did));
 
   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
     compptr = cinfo->cur_comp_info[ci];
     if (is_DC_band) {
-      if (cinfo->Ah != 0)	/* DC refinement needs no table */
-	continue;
+      if (cinfo->Ah != 0)       /* DC refinement needs no table */
+        continue;
       tbl = compptr->dc_tbl_no;
     } else {
       tbl = compptr->ac_tbl_no;
@@ -818,7 +818,7 @@
 
   entropy = (phuff_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(phuff_entropy_encoder));
+                                sizeof(phuff_entropy_encoder));
   cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
   entropy->pub.start_pass = start_pass_phuff;
 
@@ -827,7 +827,7 @@
     entropy->derived_tbls[i] = NULL;
     entropy->count_ptrs[i] = NULL;
   }
-  entropy->bit_buffer = NULL;	/* needed only in AC refinement scan */
+  entropy->bit_buffer = NULL;   /* needed only in AC refinement scan */
 }
 
 #endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/jcprepct.c b/jcprepct.c
index fa93333..3470de0 100644
--- a/jcprepct.c
+++ b/jcprepct.c
@@ -1,8 +1,10 @@
 /*
  * jcprepct.c
  *
+ * This file is part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains the compression preprocessing controller.
@@ -58,12 +60,12 @@
    */
   JSAMPARRAY color_buf[MAX_COMPONENTS];
 
-  JDIMENSION rows_to_go;	/* counts rows remaining in source image */
-  int next_buf_row;		/* index of next row to store in color_buf */
+  JDIMENSION rows_to_go;        /* counts rows remaining in source image */
+  int next_buf_row;             /* index of next row to store in color_buf */
 
-#ifdef CONTEXT_ROWS_SUPPORTED	/* only needed for context case */
-  int this_row_group;		/* starting row index of group to process */
-  int next_buf_stop;		/* downsample when we reach this index */
+#ifdef CONTEXT_ROWS_SUPPORTED   /* only needed for context case */
+  int this_row_group;           /* starting row index of group to process */
+  int next_buf_stop;            /* downsample when we reach this index */
 #endif
 } my_prep_controller;
 
@@ -104,13 +106,13 @@
 
 LOCAL(void)
 expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols,
-		    int input_rows, int output_rows)
+                    int input_rows, int output_rows)
 {
   register int row;
 
   for (row = input_rows; row < output_rows; row++) {
     jcopy_sample_rows(image_data, input_rows-1, image_data, row,
-		      1, num_cols);
+                      1, num_cols);
   }
 }
 
@@ -126,10 +128,10 @@
 
 METHODDEF(void)
 pre_process_data (j_compress_ptr cinfo,
-		  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-		  JDIMENSION in_rows_avail,
-		  JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
-		  JDIMENSION out_row_groups_avail)
+                  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+                  JDIMENSION in_rows_avail,
+                  JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+                  JDIMENSION out_row_groups_avail)
 {
   my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
   int numrows, ci;
@@ -137,32 +139,32 @@
   jpeg_component_info * compptr;
 
   while (*in_row_ctr < in_rows_avail &&
-	 *out_row_group_ctr < out_row_groups_avail) {
+         *out_row_group_ctr < out_row_groups_avail) {
     /* Do color conversion to fill the conversion buffer. */
     inrows = in_rows_avail - *in_row_ctr;
     numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
     numrows = (int) MIN((JDIMENSION) numrows, inrows);
     (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
-				       prep->color_buf,
-				       (JDIMENSION) prep->next_buf_row,
-				       numrows);
+                                       prep->color_buf,
+                                       (JDIMENSION) prep->next_buf_row,
+                                       numrows);
     *in_row_ctr += numrows;
     prep->next_buf_row += numrows;
     prep->rows_to_go -= numrows;
     /* If at bottom of image, pad to fill the conversion buffer. */
     if (prep->rows_to_go == 0 &&
-	prep->next_buf_row < cinfo->max_v_samp_factor) {
+        prep->next_buf_row < cinfo->max_v_samp_factor) {
       for (ci = 0; ci < cinfo->num_components; ci++) {
-	expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
-			   prep->next_buf_row, cinfo->max_v_samp_factor);
+        expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+                           prep->next_buf_row, cinfo->max_v_samp_factor);
       }
       prep->next_buf_row = cinfo->max_v_samp_factor;
     }
     /* If we've filled the conversion buffer, empty it. */
     if (prep->next_buf_row == cinfo->max_v_samp_factor) {
       (*cinfo->downsample->downsample) (cinfo,
-					prep->color_buf, (JDIMENSION) 0,
-					output_buf, *out_row_group_ctr);
+                                        prep->color_buf, (JDIMENSION) 0,
+                                        output_buf, *out_row_group_ctr);
       prep->next_buf_row = 0;
       (*out_row_group_ctr)++;
     }
@@ -170,16 +172,16 @@
      * Note we assume the caller is providing a one-iMCU-height output buffer!
      */
     if (prep->rows_to_go == 0 &&
-	*out_row_group_ctr < out_row_groups_avail) {
+        *out_row_group_ctr < out_row_groups_avail) {
       for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	   ci++, compptr++) {
-	expand_bottom_edge(output_buf[ci],
-			   compptr->width_in_blocks * DCTSIZE,
-			   (int) (*out_row_group_ctr * compptr->v_samp_factor),
-			   (int) (out_row_groups_avail * compptr->v_samp_factor));
+           ci++, compptr++) {
+        expand_bottom_edge(output_buf[ci],
+                           compptr->width_in_blocks * DCTSIZE,
+                           (int) (*out_row_group_ctr * compptr->v_samp_factor),
+                           (int) (out_row_groups_avail * compptr->v_samp_factor));
       }
       *out_row_group_ctr = out_row_groups_avail;
-      break;			/* can exit outer loop without test */
+      break;                    /* can exit outer loop without test */
     }
   }
 }
@@ -193,10 +195,10 @@
 
 METHODDEF(void)
 pre_process_context (j_compress_ptr cinfo,
-		     JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-		     JDIMENSION in_rows_avail,
-		     JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
-		     JDIMENSION out_row_groups_avail)
+                     JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+                     JDIMENSION in_rows_avail,
+                     JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+                     JDIMENSION out_row_groups_avail)
 {
   my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
   int numrows, ci;
@@ -210,19 +212,19 @@
       numrows = prep->next_buf_stop - prep->next_buf_row;
       numrows = (int) MIN((JDIMENSION) numrows, inrows);
       (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
-					 prep->color_buf,
-					 (JDIMENSION) prep->next_buf_row,
-					 numrows);
+                                         prep->color_buf,
+                                         (JDIMENSION) prep->next_buf_row,
+                                         numrows);
       /* Pad at top of image, if first time through */
       if (prep->rows_to_go == cinfo->image_height) {
-	for (ci = 0; ci < cinfo->num_components; ci++) {
-	  int row;
-	  for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
-	    jcopy_sample_rows(prep->color_buf[ci], 0,
-			      prep->color_buf[ci], -row,
-			      1, cinfo->image_width);
-	  }
-	}
+        for (ci = 0; ci < cinfo->num_components; ci++) {
+          int row;
+          for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
+            jcopy_sample_rows(prep->color_buf[ci], 0,
+                              prep->color_buf[ci], -row,
+                              1, cinfo->image_width);
+          }
+        }
       }
       *in_row_ctr += numrows;
       prep->next_buf_row += numrows;
@@ -230,29 +232,29 @@
     } else {
       /* Return for more data, unless we are at the bottom of the image. */
       if (prep->rows_to_go != 0)
-	break;
+        break;
       /* When at bottom of image, pad to fill the conversion buffer. */
       if (prep->next_buf_row < prep->next_buf_stop) {
-	for (ci = 0; ci < cinfo->num_components; ci++) {
-	  expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
-			     prep->next_buf_row, prep->next_buf_stop);
-	}
-	prep->next_buf_row = prep->next_buf_stop;
+        for (ci = 0; ci < cinfo->num_components; ci++) {
+          expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+                             prep->next_buf_row, prep->next_buf_stop);
+        }
+        prep->next_buf_row = prep->next_buf_stop;
       }
     }
     /* If we've gotten enough data, downsample a row group. */
     if (prep->next_buf_row == prep->next_buf_stop) {
       (*cinfo->downsample->downsample) (cinfo,
-					prep->color_buf,
-					(JDIMENSION) prep->this_row_group,
-					output_buf, *out_row_group_ctr);
+                                        prep->color_buf,
+                                        (JDIMENSION) prep->this_row_group,
+                                        output_buf, *out_row_group_ctr);
       (*out_row_group_ctr)++;
       /* Advance pointers with wraparound as necessary. */
       prep->this_row_group += cinfo->max_v_samp_factor;
       if (prep->this_row_group >= buf_height)
-	prep->this_row_group = 0;
+        prep->this_row_group = 0;
       if (prep->next_buf_row >= buf_height)
-	prep->next_buf_row = 0;
+        prep->next_buf_row = 0;
       prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor;
     }
   }
@@ -277,8 +279,8 @@
    */
   fake_buffer = (JSAMPARRAY)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(cinfo->num_components * 5 * rgroup_height) *
-				SIZEOF(JSAMPROW));
+                                (cinfo->num_components * 5 * rgroup_height) *
+                                sizeof(JSAMPROW));
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
@@ -289,11 +291,11 @@
     true_buffer = (*cinfo->mem->alloc_sarray)
       ((j_common_ptr) cinfo, JPOOL_IMAGE,
        (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
-		      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+                      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
        (JDIMENSION) (3 * rgroup_height));
     /* Copy true buffer row pointers into the middle of the fake row array */
     MEMCOPY(fake_buffer + rgroup_height, true_buffer,
-	    3 * rgroup_height * SIZEOF(JSAMPROW));
+            3 * rgroup_height * sizeof(JSAMPROW));
     /* Fill in the above and below wraparound pointers */
     for (i = 0; i < rgroup_height; i++) {
       fake_buffer[i] = true_buffer[2 * rgroup_height + i];
@@ -318,12 +320,12 @@
   int ci;
   jpeg_component_info * compptr;
 
-  if (need_full_buffer)		/* safety check */
+  if (need_full_buffer)         /* safety check */
     ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
 
   prep = (my_prep_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_prep_controller));
+                                sizeof(my_prep_controller));
   cinfo->prep = (struct jpeg_c_prep_controller *) prep;
   prep->pub.start_pass = start_pass_prep;
 
@@ -343,12 +345,12 @@
     /* No context, just make it tall enough for one row group */
     prep->pub.pre_process_data = pre_process_data;
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
+         ci++, compptr++) {
       prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
-			cinfo->max_h_samp_factor) / compptr->h_samp_factor),
-	 (JDIMENSION) cinfo->max_v_samp_factor);
+        ((j_common_ptr) cinfo, JPOOL_IMAGE,
+         (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
+                        cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+         (JDIMENSION) cinfo->max_v_samp_factor);
     }
   }
 }
diff --git a/jcsample.c b/jcsample.c
index 212ec87..286f758 100644
--- a/jcsample.c
+++ b/jcsample.c
@@ -1,8 +1,11 @@
 /*
  * jcsample.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2014, MIPS Technologies, Inc., California
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains downsampling routines.
@@ -48,17 +51,19 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsimd.h"
 
 
 /* Pointer to routine to downsample a single component */
-typedef JMETHOD(void, downsample1_ptr,
-		(j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data));
+typedef void (*downsample1_ptr) (j_compress_ptr cinfo,
+                                 jpeg_component_info * compptr,
+                                 JSAMPARRAY input_data,
+                                 JSAMPARRAY output_data);
 
 /* Private subobject */
 
 typedef struct {
-  struct jpeg_downsampler pub;	/* public fields */
+  struct jpeg_downsampler pub;  /* public fields */
 
   /* Downsampling method pointers, one per component */
   downsample1_ptr methods[MAX_COMPONENTS];
@@ -85,7 +90,7 @@
 
 LOCAL(void)
 expand_right_edge (JSAMPARRAY image_data, int num_rows,
-		   JDIMENSION input_cols, JDIMENSION output_cols)
+                   JDIMENSION input_cols, JDIMENSION output_cols)
 {
   register JSAMPROW ptr;
   register JSAMPLE pixval;
@@ -96,9 +101,9 @@
   if (numcols > 0) {
     for (row = 0; row < num_rows; row++) {
       ptr = image_data[row] + input_cols;
-      pixval = ptr[-1];		/* don't need GETJSAMPLE() here */
+      pixval = ptr[-1];         /* don't need GETJSAMPLE() here */
       for (count = numcols; count > 0; count--)
-	*ptr++ = pixval;
+        *ptr++ = pixval;
     }
   }
 }
@@ -112,8 +117,8 @@
 
 METHODDEF(void)
 sep_downsample (j_compress_ptr cinfo,
-		JSAMPIMAGE input_buf, JDIMENSION in_row_index,
-		JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
+                JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+                JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
 {
   my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
   int ci;
@@ -138,10 +143,10 @@
 
 METHODDEF(void)
 int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		JSAMPARRAY input_data, JSAMPARRAY output_data)
+                JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
   int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
-  JDIMENSION outcol, outcol_h;	/* outcol_h == outcol*h_expand */
+  JDIMENSION outcol, outcol_h;  /* outcol_h == outcol*h_expand */
   JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
   JSAMPROW inptr, outptr;
   INT32 outvalue;
@@ -156,19 +161,19 @@
    * efficient.
    */
   expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * h_expand);
+                    cinfo->image_width, output_cols * h_expand);
 
   inrow = 0;
   for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
     outptr = output_data[outrow];
     for (outcol = 0, outcol_h = 0; outcol < output_cols;
-	 outcol++, outcol_h += h_expand) {
+         outcol++, outcol_h += h_expand) {
       outvalue = 0;
       for (v = 0; v < v_expand; v++) {
-	inptr = input_data[inrow+v] + outcol_h;
-	for (h = 0; h < h_expand; h++) {
-	  outvalue += (INT32) GETJSAMPLE(*inptr++);
-	}
+        inptr = input_data[inrow+v] + outcol_h;
+        for (h = 0; h < h_expand; h++) {
+          outvalue += (INT32) GETJSAMPLE(*inptr++);
+        }
       }
       *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
     }
@@ -185,14 +190,14 @@
 
 METHODDEF(void)
 fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		     JSAMPARRAY input_data, JSAMPARRAY output_data)
+                     JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
   /* Copy the data */
   jcopy_sample_rows(input_data, 0, output_data, 0,
-		    cinfo->max_v_samp_factor, cinfo->image_width);
+                    cinfo->max_v_samp_factor, cinfo->image_width);
   /* Edge-expand */
   expand_right_edge(output_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, compptr->width_in_blocks * DCTSIZE);
+                    cinfo->image_width, compptr->width_in_blocks * DCTSIZE);
 }
 
 
@@ -210,7 +215,7 @@
 
 METHODDEF(void)
 h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+                 JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
   int outrow;
   JDIMENSION outcol;
@@ -223,16 +228,16 @@
    * efficient.
    */
   expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * 2);
+                    cinfo->image_width, output_cols * 2);
 
   for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
     outptr = output_data[outrow];
     inptr = input_data[outrow];
-    bias = 0;			/* bias = 0,1,0,1,... for successive samples */
+    bias = 0;                   /* bias = 0,1,0,1,... for successive samples */
     for (outcol = 0; outcol < output_cols; outcol++) {
       *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1])
-			      + bias) >> 1);
-      bias ^= 1;		/* 0=>1, 1=>0 */
+                              + bias) >> 1);
+      bias ^= 1;                /* 0=>1, 1=>0 */
       inptr += 2;
     }
   }
@@ -247,7 +252,7 @@
 
 METHODDEF(void)
 h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+                 JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
   int inrow, outrow;
   JDIMENSION outcol;
@@ -260,19 +265,19 @@
    * efficient.
    */
   expand_right_edge(input_data, cinfo->max_v_samp_factor,
-		    cinfo->image_width, output_cols * 2);
+                    cinfo->image_width, output_cols * 2);
 
   inrow = 0;
   for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
     outptr = output_data[outrow];
     inptr0 = input_data[inrow];
     inptr1 = input_data[inrow+1];
-    bias = 1;			/* bias = 1,2,1,2,... for successive samples */
+    bias = 1;                   /* bias = 1,2,1,2,... for successive samples */
     for (outcol = 0; outcol < output_cols; outcol++) {
       *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-			      GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
-			      + bias) >> 2);
-      bias ^= 3;		/* 1=>2, 2=>1 */
+                              GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
+                              + bias) >> 2);
+      bias ^= 3;                /* 1=>2, 2=>1 */
       inptr0 += 2; inptr1 += 2;
     }
     inrow += 2;
@@ -290,7 +295,7 @@
 
 METHODDEF(void)
 h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
-			JSAMPARRAY input_data, JSAMPARRAY output_data)
+                        JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
   int inrow, outrow;
   JDIMENSION colctr;
@@ -303,7 +308,7 @@
    * efficient.
    */
   expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
-		    cinfo->image_width, output_cols * 2);
+                    cinfo->image_width, output_cols * 2);
 
   /* We don't bother to form the individual "smoothed" input pixel values;
    * we can directly compute the output which is the average of the four
@@ -331,14 +336,14 @@
 
     /* Special case for first column: pretend column -1 is same as column 0 */
     membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+                GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
     neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-	       GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
-	       GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
+               GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+               GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
+               GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
     neighsum += neighsum;
     neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
-		GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
+                GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
     membersum = membersum * memberscale + neighsum * neighscale;
     *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
     inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
@@ -346,17 +351,17 @@
     for (colctr = output_cols - 2; colctr > 0; colctr--) {
       /* sum of pixels directly mapped to this output element */
       membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+                  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
       /* sum of edge-neighbor pixels */
       neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-		 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-		 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
-		 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
+                 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+                 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
+                 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
       /* The edge-neighbors count twice as much as corner-neighbors */
       neighsum += neighsum;
       /* Add in the corner-neighbors */
       neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
-		  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
+                  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
       /* form final output scaled up by 2^16 */
       membersum = membersum * memberscale + neighsum * neighscale;
       /* round, descale and output it */
@@ -366,14 +371,14 @@
 
     /* Special case for last column */
     membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
-		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+                GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
     neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
-	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
-	       GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
-	       GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
+               GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+               GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
+               GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
     neighsum += neighsum;
     neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
-		GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
+                GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
     membersum = membersum * memberscale + neighsum * neighscale;
     *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
 
@@ -390,7 +395,7 @@
 
 METHODDEF(void)
 fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
-			    JSAMPARRAY input_data, JSAMPARRAY output_data)
+                            JSAMPARRAY input_data, JSAMPARRAY output_data)
 {
   int outrow;
   JDIMENSION colctr;
@@ -404,7 +409,7 @@
    * efficient.
    */
   expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
-		    cinfo->image_width, output_cols);
+                    cinfo->image_width, output_cols);
 
   /* Each of the eight neighbor pixels contributes a fraction SF to the
    * smoothed pixel, while the main pixel contributes (1-8*SF).  In order
@@ -423,10 +428,10 @@
 
     /* Special case for first column */
     colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
-	     GETJSAMPLE(*inptr);
+             GETJSAMPLE(*inptr);
     membersum = GETJSAMPLE(*inptr++);
     nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
-		 GETJSAMPLE(*inptr);
+                 GETJSAMPLE(*inptr);
     neighsum = colsum + (colsum - membersum) + nextcolsum;
     membersum = membersum * memberscale + neighsum * neighscale;
     *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
@@ -436,7 +441,7 @@
       membersum = GETJSAMPLE(*inptr++);
       above_ptr++; below_ptr++;
       nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
-		   GETJSAMPLE(*inptr);
+                   GETJSAMPLE(*inptr);
       neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
       membersum = membersum * memberscale + neighsum * neighscale;
       *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
@@ -470,7 +475,7 @@
 
   downsample = (my_downsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_downsampler));
+                                sizeof(my_downsampler));
   cinfo->downsample = (struct jpeg_downsampler *) downsample;
   downsample->pub.start_pass = start_pass_downsample;
   downsample->pub.downsample = sep_downsample;
@@ -483,29 +488,41 @@
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
     if (compptr->h_samp_factor == cinfo->max_h_samp_factor &&
-	compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+        compptr->v_samp_factor == cinfo->max_v_samp_factor) {
 #ifdef INPUT_SMOOTHING_SUPPORTED
       if (cinfo->smoothing_factor) {
-	downsample->methods[ci] = fullsize_smooth_downsample;
-	downsample->pub.need_context_rows = TRUE;
+        downsample->methods[ci] = fullsize_smooth_downsample;
+        downsample->pub.need_context_rows = TRUE;
       } else
 #endif
-	downsample->methods[ci] = fullsize_downsample;
+        downsample->methods[ci] = fullsize_downsample;
     } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
-	       compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+               compptr->v_samp_factor == cinfo->max_v_samp_factor) {
       smoothok = FALSE;
-      downsample->methods[ci] = h2v1_downsample;
+      if (jsimd_can_h2v1_downsample())
+        downsample->methods[ci] = jsimd_h2v1_downsample;
+      else
+        downsample->methods[ci] = h2v1_downsample;
     } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
-	       compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
+               compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
 #ifdef INPUT_SMOOTHING_SUPPORTED
       if (cinfo->smoothing_factor) {
-	downsample->methods[ci] = h2v2_smooth_downsample;
-	downsample->pub.need_context_rows = TRUE;
-      } else
+#if defined(__mips__)
+        if (jsimd_can_h2v2_smooth_downsample())
+          downsample->methods[ci] = jsimd_h2v2_smooth_downsample;
+        else
 #endif
-	downsample->methods[ci] = h2v2_downsample;
+          downsample->methods[ci] = h2v2_smooth_downsample;
+        downsample->pub.need_context_rows = TRUE;
+      } else {
+#endif
+        if (jsimd_can_h2v2_downsample())
+          downsample->methods[ci] = jsimd_h2v2_downsample;
+        else
+          downsample->methods[ci] = h2v2_downsample;
+      }
     } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 &&
-	       (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) {
+               (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) {
       smoothok = FALSE;
       downsample->methods[ci] = int_downsample;
     } else
diff --git a/jcstest.c b/jcstest.c
new file mode 100644
index 0000000..358ed25
--- /dev/null
+++ b/jcstest.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This program demonstrates how to check for the colorspace extension
+   capabilities of libjpeg-turbo at both compile time and run time. */
+
+#include <stdio.h>
+#include <jpeglib.h>
+#include <jerror.h>
+#include <setjmp.h>
+
+#ifndef JCS_EXTENSIONS
+#define JCS_EXT_RGB 6
+#endif
+#if !defined(JCS_EXTENSIONS) || !defined(JCS_ALPHA_EXTENSIONS)
+#define JCS_EXT_RGBA 12
+#endif
+
+static char lasterror[JMSG_LENGTH_MAX] = "No error";
+
+typedef struct _error_mgr {
+  struct jpeg_error_mgr pub;
+  jmp_buf jb;
+} error_mgr;
+
+static void my_error_exit(j_common_ptr cinfo)
+{
+  error_mgr *myerr = (error_mgr *)cinfo->err;
+  (*cinfo->err->output_message)(cinfo);
+  longjmp(myerr->jb, 1);
+}
+
+static void my_output_message(j_common_ptr cinfo)
+{
+  (*cinfo->err->format_message)(cinfo, lasterror);
+}
+
+int main(void)
+{
+  int jcs_valid = -1, jcs_alpha_valid = -1;
+  struct jpeg_compress_struct cinfo;
+  error_mgr jerr;
+
+  printf("libjpeg-turbo colorspace extensions:\n");
+  #if JCS_EXTENSIONS
+  printf("  Present at compile time\n");
+  #else
+  printf("  Not present at compile time\n");
+  #endif
+
+  cinfo.err = jpeg_std_error(&jerr.pub);
+  jerr.pub.error_exit = my_error_exit;
+  jerr.pub.output_message = my_output_message;
+
+  if(setjmp(jerr.jb)) {
+    /* this will execute if libjpeg has an error */
+    jcs_valid = 0;
+    goto done;
+  }
+
+  jpeg_create_compress(&cinfo);
+  cinfo.input_components = 3;
+  jpeg_set_defaults(&cinfo);
+  cinfo.in_color_space = JCS_EXT_RGB;
+  jpeg_default_colorspace(&cinfo);
+  jcs_valid = 1;
+
+  done:
+  if (jcs_valid)
+    printf("  Working properly\n");
+  else
+    printf("  Not working properly.  Error returned was:\n    %s\n",
+           lasterror);
+
+  printf("libjpeg-turbo alpha colorspace extensions:\n");
+  #if JCS_ALPHA_EXTENSIONS
+  printf("  Present at compile time\n");
+  #else
+  printf("  Not present at compile time\n");
+  #endif
+
+  if(setjmp(jerr.jb)) {
+    /* this will execute if libjpeg has an error */
+    jcs_alpha_valid = 0;
+    goto done2;
+  }
+
+  cinfo.in_color_space = JCS_EXT_RGBA;
+  jpeg_default_colorspace(&cinfo);
+  jcs_alpha_valid = 1;
+
+  done2:
+  if (jcs_alpha_valid)
+    printf("  Working properly\n");
+  else
+    printf("  Not working properly.  Error returned was:\n    %s\n",
+           lasterror);
+
+  jpeg_destroy_compress(&cinfo);
+  return 0;
+}
diff --git a/jctrans.c b/jctrans.c
index 0e6d707..ccd7b34 100644
--- a/jctrans.c
+++ b/jctrans.c
@@ -1,8 +1,11 @@
 /*
  * jctrans.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2000-2009 by Guido Vollbeding.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains library routines for transcoding compression,
@@ -17,9 +20,9 @@
 
 /* Forward declarations */
 LOCAL(void) transencode_master_selection
-	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+        (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays);
 LOCAL(void) transencode_coef_controller
-	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+        (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays);
 
 
 /*
@@ -47,7 +50,7 @@
   /* Perform master selection of active modules */
   transencode_master_selection(cinfo, coef_arrays);
   /* Wait for jpeg_finish_compress() call */
-  cinfo->next_scanline = 0;	/* so jpeg_write_marker works */
+  cinfo->next_scanline = 0;     /* so jpeg_write_marker works */
   cinfo->global_state = CSTATE_WRCOEFS;
 }
 
@@ -61,7 +64,7 @@
 
 GLOBAL(void)
 jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
-			       j_compress_ptr dstinfo)
+                               j_compress_ptr dstinfo)
 {
   JQUANT_TBL ** qtblptr;
   jpeg_component_info *incomp, *outcomp;
@@ -76,6 +79,12 @@
   dstinfo->image_height = srcinfo->image_height;
   dstinfo->input_components = srcinfo->num_components;
   dstinfo->in_color_space = srcinfo->jpeg_color_space;
+#if JPEG_LIB_VERSION >= 70
+  dstinfo->jpeg_width = srcinfo->output_width;
+  dstinfo->jpeg_height = srcinfo->output_height;
+  dstinfo->min_DCT_h_scaled_size = srcinfo->min_DCT_h_scaled_size;
+  dstinfo->min_DCT_v_scaled_size = srcinfo->min_DCT_v_scaled_size;
+#endif
   /* Initialize all parameters to default values */
   jpeg_set_defaults(dstinfo);
   /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
@@ -89,10 +98,10 @@
     if (srcinfo->quant_tbl_ptrs[tblno] != NULL) {
       qtblptr = & dstinfo->quant_tbl_ptrs[tblno];
       if (*qtblptr == NULL)
-	*qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
+        *qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
       MEMCOPY((*qtblptr)->quantval,
-	      srcinfo->quant_tbl_ptrs[tblno]->quantval,
-	      SIZEOF((*qtblptr)->quantval));
+              srcinfo->quant_tbl_ptrs[tblno]->quantval,
+              sizeof((*qtblptr)->quantval));
       (*qtblptr)->sent_table = FALSE;
     }
   }
@@ -102,7 +111,7 @@
   dstinfo->num_components = srcinfo->num_components;
   if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS)
     ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components,
-	     MAX_COMPONENTS);
+             MAX_COMPONENTS);
   for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info;
        ci < dstinfo->num_components; ci++, incomp++, outcomp++) {
     outcomp->component_id = incomp->component_id;
@@ -115,14 +124,14 @@
      */
     tblno = outcomp->quant_tbl_no;
     if (tblno < 0 || tblno >= NUM_QUANT_TBLS ||
-	srcinfo->quant_tbl_ptrs[tblno] == NULL)
+        srcinfo->quant_tbl_ptrs[tblno] == NULL)
       ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno);
     slot_quant = srcinfo->quant_tbl_ptrs[tblno];
     c_quant = incomp->quant_table;
     if (c_quant != NULL) {
       for (coefi = 0; coefi < DCTSIZE2; coefi++) {
-	if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
-	  ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
+        if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
+          ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
       }
     }
     /* Note: we do not copy the source's Huffman table assignments;
@@ -156,7 +165,7 @@
 
 LOCAL(void)
 transencode_master_selection (j_compress_ptr cinfo,
-			      jvirt_barray_ptr * coef_arrays)
+                              jvirt_barray_ptr * coef_arrays)
 {
   /* Although we don't actually use input_components for transcoding,
    * jcmaster.c's initial_setup will complain if input_components is 0.
@@ -167,7 +176,11 @@
 
   /* Entropy encoding: either Huffman or arithmetic coding. */
   if (cinfo->arith_code) {
+#ifdef C_ARITH_CODING_SUPPORTED
+    jinit_arith_encoder(cinfo);
+#else
     ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+#endif
   } else {
     if (cinfo->progressive_mode) {
 #ifdef C_PROGRESSIVE_SUPPORTED
@@ -208,10 +221,10 @@
 typedef struct {
   struct jpeg_c_coef_controller pub; /* public fields */
 
-  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
-  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+  JDIMENSION iMCU_row_num;      /* iMCU row # within image */
+  JDIMENSION mcu_ctr;           /* counts MCUs processed in current row */
+  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
 
   /* Virtual block array for each component. */
   jvirt_barray_ptr * whole_image;
@@ -278,7 +291,7 @@
 compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
 {
   my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
   JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
   JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
   int blkn, ci, xindex, yindex, yoffset, blockcnt;
@@ -301,44 +314,44 @@
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
     for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
+         MCU_col_num++) {
       /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkn = 0;			/* index of current DCT block within MCU */
+      blkn = 0;                 /* index of current DCT block within MCU */
       for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						: compptr->last_col_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (coef->iMCU_row_num < last_iMCU_row ||
-	      yindex+yoffset < compptr->last_row_height) {
-	    /* Fill in pointers to real blocks in this row */
-	    buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
-	    for (xindex = 0; xindex < blockcnt; xindex++)
-	      MCU_buffer[blkn++] = buffer_ptr++;
-	  } else {
-	    /* At bottom of image, need a whole row of dummy blocks */
-	    xindex = 0;
-	  }
-	  /* Fill in any dummy blocks needed in this row.
-	   * Dummy blocks are filled in the same way as in jccoefct.c:
-	   * all zeroes in the AC entries, DC entries equal to previous
-	   * block's DC value.  The init routine has already zeroed the
-	   * AC entries, so we need only set the DC entries correctly.
-	   */
-	  for (; xindex < compptr->MCU_width; xindex++) {
-	    MCU_buffer[blkn] = coef->dummy_buffer[blkn];
-	    MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0];
-	    blkn++;
-	  }
-	}
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+                                                : compptr->last_col_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          if (coef->iMCU_row_num < last_iMCU_row ||
+              yindex+yoffset < compptr->last_row_height) {
+            /* Fill in pointers to real blocks in this row */
+            buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+            for (xindex = 0; xindex < blockcnt; xindex++)
+              MCU_buffer[blkn++] = buffer_ptr++;
+          } else {
+            /* At bottom of image, need a whole row of dummy blocks */
+            xindex = 0;
+          }
+          /* Fill in any dummy blocks needed in this row.
+           * Dummy blocks are filled in the same way as in jccoefct.c:
+           * all zeroes in the AC entries, DC entries equal to previous
+           * block's DC value.  The init routine has already zeroed the
+           * AC entries, so we need only set the DC entries correctly.
+           */
+          for (; xindex < compptr->MCU_width; xindex++) {
+            MCU_buffer[blkn] = coef->dummy_buffer[blkn];
+            MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0];
+            blkn++;
+          }
+        }
       }
       /* Try to write the MCU. */
       if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->mcu_ctr = MCU_col_num;
-	return FALSE;
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->mcu_ctr = MCU_col_num;
+        return FALSE;
       }
     }
     /* Completed an MCU row, but perhaps not an iMCU row */
@@ -361,7 +374,7 @@
 
 LOCAL(void)
 transencode_coef_controller (j_compress_ptr cinfo,
-			     jvirt_barray_ptr * coef_arrays)
+                             jvirt_barray_ptr * coef_arrays)
 {
   my_coef_ptr coef;
   JBLOCKROW buffer;
@@ -369,7 +382,7 @@
 
   coef = (my_coef_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_coef_controller));
+                                sizeof(my_coef_controller));
   cinfo->coef = (struct jpeg_c_coef_controller *) coef;
   coef->pub.start_pass = start_pass_coef;
   coef->pub.compress_data = compress_output;
@@ -380,8 +393,8 @@
   /* Allocate and pre-zero space for dummy DCT blocks. */
   buffer = (JBLOCKROW)
     (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
-  jzero_far((void FAR *) buffer, C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+                                C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK));
+  jzero_far((void *) buffer, C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK));
   for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
     coef->dummy_buffer[i] = buffer + i;
   }
diff --git a/jdapimin.c b/jdapimin.c
index cadb59f..fc8898f 100644
--- a/jdapimin.c
+++ b/jdapimin.c
@@ -1,8 +1,10 @@
 /*
  * jdapimin.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains application interface code for the decompression half
@@ -32,12 +34,12 @@
   int i;
 
   /* Guard against version mismatches between library and caller. */
-  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  cinfo->mem = NULL;            /* so jpeg_destroy knows mem mgr not called */
   if (version != JPEG_LIB_VERSION)
     ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
-  if (structsize != SIZEOF(struct jpeg_decompress_struct))
-    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
-	     (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize);
+  if (structsize != sizeof(struct jpeg_decompress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE,
+             (int) sizeof(struct jpeg_decompress_struct), (int) structsize);
 
   /* For debugging purposes, we zero the whole master structure.
    * But the application has already set the err pointer, and may have set
@@ -48,7 +50,7 @@
   {
     struct jpeg_error_mgr * err = cinfo->err;
     void * client_data = cinfo->client_data; /* ignore Purify complaint here */
-    MEMZERO(cinfo, SIZEOF(struct jpeg_decompress_struct));
+    MEMZERO(cinfo, sizeof(struct jpeg_decompress_struct));
     cinfo->err = err;
     cinfo->client_data = client_data;
   }
@@ -121,22 +123,22 @@
     cinfo->jpeg_color_space = JCS_GRAYSCALE;
     cinfo->out_color_space = JCS_GRAYSCALE;
     break;
-    
+
   case 3:
     if (cinfo->saw_JFIF_marker) {
       cinfo->jpeg_color_space = JCS_YCbCr; /* JFIF implies YCbCr */
     } else if (cinfo->saw_Adobe_marker) {
       switch (cinfo->Adobe_transform) {
       case 0:
-	cinfo->jpeg_color_space = JCS_RGB;
-	break;
+        cinfo->jpeg_color_space = JCS_RGB;
+        break;
       case 1:
-	cinfo->jpeg_color_space = JCS_YCbCr;
-	break;
+        cinfo->jpeg_color_space = JCS_YCbCr;
+        break;
       default:
-	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
-	break;
+        WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+        cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+        break;
       }
     } else {
       /* Saw no special markers, try to guess from the component IDs */
@@ -145,31 +147,31 @@
       int cid2 = cinfo->comp_info[2].component_id;
 
       if (cid0 == 1 && cid1 == 2 && cid2 == 3)
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
+        cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
       else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
-	cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
+        cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
       else {
-	TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+        TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
+        cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
       }
     }
     /* Always guess RGB is proper output colorspace. */
     cinfo->out_color_space = JCS_RGB;
     break;
-    
+
   case 4:
     if (cinfo->saw_Adobe_marker) {
       switch (cinfo->Adobe_transform) {
       case 0:
-	cinfo->jpeg_color_space = JCS_CMYK;
-	break;
+        cinfo->jpeg_color_space = JCS_CMYK;
+        break;
       case 2:
-	cinfo->jpeg_color_space = JCS_YCCK;
-	break;
+        cinfo->jpeg_color_space = JCS_YCCK;
+        break;
       default:
-	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */
-	break;
+        WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+        cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */
+        break;
       }
     } else {
       /* No special markers, assume straight CMYK. */
@@ -177,7 +179,7 @@
     }
     cinfo->out_color_space = JCS_CMYK;
     break;
-    
+
   default:
     cinfo->jpeg_color_space = JCS_UNKNOWN;
     cinfo->out_color_space = JCS_UNKNOWN;
@@ -185,7 +187,7 @@
   }
 
   /* Set defaults for other decompression parameters. */
-  cinfo->scale_num = 1;		/* 1:1 scaling */
+  cinfo->scale_num = 1;         /* 1:1 scaling */
   cinfo->scale_denom = 1;
   cinfo->output_gamma = 1.0;
   cinfo->buffered_image = FALSE;
@@ -253,7 +255,7 @@
     retcode = JPEG_HEADER_OK;
     break;
   case JPEG_REACHED_EOI:
-    if (require_image)		/* Complain if application wanted an image */
+    if (require_image)          /* Complain if application wanted an image */
       ERREXIT(cinfo, JERR_NO_IMAGE);
     /* Reset to start state; it would be safer to require the application to
      * call jpeg_abort, but we can't change it now for compatibility reasons.
@@ -385,7 +387,7 @@
   /* Read until EOI */
   while (! cinfo->inputctl->eoi_reached) {
     if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
-      return FALSE;		/* Suspend, come back later */
+      return FALSE;             /* Suspend, come back later */
   }
   /* Do final cleanup */
   (*cinfo->src->term_source) (cinfo);
diff --git a/jdapistd.c b/jdapistd.c
index c8e3fa0..3be527c 100644
--- a/jdapistd.c
+++ b/jdapistd.c
@@ -1,8 +1,10 @@
 /*
  * jdapistd.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains application interface code for the decompression half
@@ -17,10 +19,11 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 
 /* Forward declarations */
-LOCAL(boolean) output_pass_setup JPP((j_decompress_ptr cinfo));
+LOCAL(boolean) output_pass_setup (j_decompress_ptr cinfo);
 
 
 /*
@@ -52,24 +55,24 @@
     if (cinfo->inputctl->has_multiple_scans) {
 #ifdef D_MULTISCAN_FILES_SUPPORTED
       for (;;) {
-	int retcode;
-	/* Call progress monitor hook if present */
-	if (cinfo->progress != NULL)
-	  (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
-	/* Absorb some more input */
-	retcode = (*cinfo->inputctl->consume_input) (cinfo);
-	if (retcode == JPEG_SUSPENDED)
-	  return FALSE;
-	if (retcode == JPEG_REACHED_EOI)
-	  break;
-	/* Advance progress counter if appropriate */
-	if (cinfo->progress != NULL &&
-	    (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
-	  if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
-	    /* jdmaster underestimated number of scans; ratchet up one scan */
-	    cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
-	  }
-	}
+        int retcode;
+        /* Call progress monitor hook if present */
+        if (cinfo->progress != NULL)
+          (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+        /* Absorb some more input */
+        retcode = (*cinfo->inputctl->consume_input) (cinfo);
+        if (retcode == JPEG_SUSPENDED)
+          return FALSE;
+        if (retcode == JPEG_REACHED_EOI)
+          break;
+        /* Advance progress counter if appropriate */
+        if (cinfo->progress != NULL &&
+            (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+          if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+            /* jdmaster underestimated number of scans; ratchet up one scan */
+            cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+          }
+        }
       }
 #else
       ERREXIT(cinfo, JERR_NOT_COMPILED);
@@ -108,16 +111,16 @@
       JDIMENSION last_scanline;
       /* Call progress monitor hook if present */
       if (cinfo->progress != NULL) {
-	cinfo->progress->pass_counter = (long) cinfo->output_scanline;
-	cinfo->progress->pass_limit = (long) cinfo->output_height;
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+        cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+        cinfo->progress->pass_limit = (long) cinfo->output_height;
+        (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
       }
       /* Process some data */
       last_scanline = cinfo->output_scanline;
       (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL,
-				    &cinfo->output_scanline, (JDIMENSION) 0);
+                                    &cinfo->output_scanline, (JDIMENSION) 0);
       if (cinfo->output_scanline == last_scanline)
-	return FALSE;		/* No progress made, must suspend */
+        return FALSE;           /* No progress made, must suspend */
     }
     /* Finish up dummy pass, and set up for another one */
     (*cinfo->master->finish_output_pass) (cinfo);
@@ -150,7 +153,7 @@
 
 GLOBAL(JDIMENSION)
 jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines,
-		     JDIMENSION max_lines)
+                     JDIMENSION max_lines)
 {
   JDIMENSION row_ctr;
 
@@ -183,7 +186,7 @@
 
 GLOBAL(JDIMENSION)
 jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
-		    JDIMENSION max_lines)
+                    JDIMENSION max_lines)
 {
   JDIMENSION lines_per_iMCU_row;
 
@@ -202,13 +205,13 @@
   }
 
   /* Verify that at least one iMCU row can be returned. */
-  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size;
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size;
   if (max_lines < lines_per_iMCU_row)
     ERREXIT(cinfo, JERR_BUFFER_SIZE);
 
   /* Decompress directly into user's buffer. */
   if (! (*cinfo->coef->decompress_data) (cinfo, data))
-    return 0;			/* suspension forced, can do nothing more */
+    return 0;                   /* suspension forced, can do nothing more */
 
   /* OK, we processed one iMCU row. */
   cinfo->output_scanline += lines_per_iMCU_row;
@@ -264,9 +267,9 @@
   }
   /* Read markers looking for SOS or EOI */
   while (cinfo->input_scan_number <= cinfo->output_scan_number &&
-	 ! cinfo->inputctl->eoi_reached) {
+         ! cinfo->inputctl->eoi_reached) {
     if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
-      return FALSE;		/* Suspend, come back later */
+      return FALSE;             /* Suspend, come back later */
   }
   cinfo->global_state = DSTATE_BUFIMAGE;
   return TRUE;
diff --git a/jdarith.c b/jdarith.c
new file mode 100644
index 0000000..c6a1a99
--- /dev/null
+++ b/jdarith.c
@@ -0,0 +1,763 @@
+/*
+ * jdarith.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Developed 1997-2009 by Guido Vollbeding.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains portable arithmetic entropy decoding routines for JPEG
+ * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81).
+ *
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Suspension is not currently supported in this module.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Expanded entropy decoder object for arithmetic decoding. */
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  INT32 c;       /* C register, base of coding interval + input bit buffer */
+  INT32 a;               /* A register, normalized size of coding interval */
+  int ct;     /* bit shift counter, # of bits left in bit buffer part of C */
+                                                         /* init: ct = -16 */
+                                                         /* run: ct = 0..7 */
+                                                         /* error: ct = -1 */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
+
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+
+  /* Pointers to statistics areas (these workspaces have image lifespan) */
+  unsigned char * dc_stats[NUM_ARITH_TBLS];
+  unsigned char * ac_stats[NUM_ARITH_TBLS];
+
+  /* Statistics bin for coding with fixed probability 0.5 */
+  unsigned char fixed_bin[4];
+} arith_entropy_decoder;
+
+typedef arith_entropy_decoder * arith_entropy_ptr;
+
+/* The following two definitions specify the allocation chunk size
+ * for the statistics area.
+ * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
+ * 49 statistics bins for DC, and 245 statistics bins for AC coding.
+ *
+ * We use a compact representation with 1 byte per statistics bin,
+ * thus the numbers directly represent byte sizes.
+ * This 1 byte per statistics bin contains the meaning of the MPS
+ * (more probable symbol) in the highest bit (mask 0x80), and the
+ * index into the probability estimation state machine table
+ * in the lower bits (mask 0x7F).
+ */
+
+#define DC_STAT_BINS 64
+#define AC_STAT_BINS 256
+
+
+LOCAL(int)
+get_byte (j_decompress_ptr cinfo)
+/* Read next input byte; we do not support suspension in this module. */
+{
+  struct jpeg_source_mgr * src = cinfo->src;
+
+  if (src->bytes_in_buffer == 0)
+    if (! (*src->fill_input_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  src->bytes_in_buffer--;
+  return GETJOCTET(*src->next_input_byte++);
+}
+
+
+/*
+ * The core arithmetic decoding routine (common in JPEG and JBIG).
+ * This needs to go as fast as possible.
+ * Machine-dependent optimization facilities
+ * are not utilized in this portable implementation.
+ * However, this code should be fairly efficient and
+ * may be a good base for further optimizations anyway.
+ *
+ * Return value is 0 or 1 (binary decision).
+ *
+ * Note: I've changed the handling of the code base & bit
+ * buffer register C compared to other implementations
+ * based on the standards layout & procedures.
+ * While it also contains both the actual base of the
+ * coding interval (16 bits) and the next-bits buffer,
+ * the cut-point between these two parts is floating
+ * (instead of fixed) with the bit shift counter CT.
+ * Thus, we also need only one (variable instead of
+ * fixed size) shift for the LPS/MPS decision, and
+ * we can get away with any renormalization update
+ * of C (except for new data insertion, of course).
+ *
+ * I've also introduced a new scheme for accessing
+ * the probability estimation state machine table,
+ * derived from Markus Kuhn's JBIG implementation.
+ */
+
+LOCAL(int)
+arith_decode (j_decompress_ptr cinfo, unsigned char *st)
+{
+  register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy;
+  register unsigned char nl, nm;
+  register INT32 qe, temp;
+  register int sv, data;
+
+  /* Renormalization & data input per section D.2.6 */
+  while (e->a < 0x8000L) {
+    if (--e->ct < 0) {
+      /* Need to fetch next data byte */
+      if (cinfo->unread_marker)
+        data = 0;               /* stuff zero data */
+      else {
+        data = get_byte(cinfo); /* read next input byte */
+        if (data == 0xFF) {     /* zero stuff or marker code */
+          do data = get_byte(cinfo);
+          while (data == 0xFF); /* swallow extra 0xFF bytes */
+          if (data == 0)
+            data = 0xFF;        /* discard stuffed zero byte */
+          else {
+            /* Note: Different from the Huffman decoder, hitting
+             * a marker while processing the compressed data
+             * segment is legal in arithmetic coding.
+             * The convention is to supply zero data
+             * then until decoding is complete.
+             */
+            cinfo->unread_marker = data;
+            data = 0;
+          }
+        }
+      }
+      e->c = (e->c << 8) | data; /* insert data into C register */
+      if ((e->ct += 8) < 0)      /* update bit shift counter */
+        /* Need more initial bytes */
+        if (++e->ct == 0)
+          /* Got 2 initial bytes -> re-init A and exit loop */
+          e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */
+    }
+    e->a <<= 1;
+  }
+
+  /* Fetch values from our compact representation of Table D.2:
+   * Qe values and probability estimation state machine
+   */
+  sv = *st;
+  qe = jpeg_aritab[sv & 0x7F];  /* => Qe_Value */
+  nl = qe & 0xFF; qe >>= 8;     /* Next_Index_LPS + Switch_MPS */
+  nm = qe & 0xFF; qe >>= 8;     /* Next_Index_MPS */
+
+  /* Decode & estimation procedures per sections D.2.4 & D.2.5 */
+  temp = e->a - qe;
+  e->a = temp;
+  temp <<= e->ct;
+  if (e->c >= temp) {
+    e->c -= temp;
+    /* Conditional LPS (less probable symbol) exchange */
+    if (e->a < qe) {
+      e->a = qe;
+      *st = (sv & 0x80) ^ nm;   /* Estimate_after_MPS */
+    } else {
+      e->a = qe;
+      *st = (sv & 0x80) ^ nl;   /* Estimate_after_LPS */
+      sv ^= 0x80;               /* Exchange LPS/MPS */
+    }
+  } else if (e->a < 0x8000L) {
+    /* Conditional MPS (more probable symbol) exchange */
+    if (e->a < qe) {
+      *st = (sv & 0x80) ^ nl;   /* Estimate_after_LPS */
+      sv ^= 0x80;               /* Exchange LPS/MPS */
+    } else {
+      *st = (sv & 0x80) ^ nm;   /* Estimate_after_MPS */
+    }
+  }
+
+  return sv >> 7;
+}
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ */
+
+LOCAL(void)
+process_restart (j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci;
+  jpeg_component_info * compptr;
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    ERREXIT(cinfo, JERR_CANT_SUSPEND);
+
+  /* Re-initialize statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
+      /* Reset DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    if (! cinfo->progressive_mode || cinfo->Ss) {
+      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
+    }
+  }
+
+  /* Reset arithmetic decoding variables */
+  entropy->c = 0;
+  entropy->a = 0;
+  entropy->ct = -16;    /* force reading 2 initial bytes to fill C */
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Arithmetic MCU decoding.
+ * Each of these routines decodes and returns one MCU's worth of
+ * arithmetic-compressed coefficients.
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
+ */
+
+/*
+ * MCU decoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, sign;
+  int v, m;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;   /* if error do nothing */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
+
+    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.19: Decode_DC_DIFF */
+    if (arith_decode(cinfo, st) == 0)
+      entropy->dc_context[ci] = 0;
+    else {
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, st + 1);
+      st += 2; st += sign;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+        st = entropy->dc_stats[tbl] + 20;       /* Table F.4: X1 = 20 */
+        while (arith_decode(cinfo, st)) {
+          if ((m <<= 1) == 0x8000) {
+            WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+            entropy->ct = -1;                   /* magnitude overflow */
+            return TRUE;
+          }
+          st += 1;
+        }
+      }
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+        entropy->dc_context[ci] = 0;               /* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+        entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
+      else
+        entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+        if (arith_decode(cinfo, st)) v |= m;
+      v += 1; if (sign) v = -v;
+      entropy->last_dc_val[ci] += v;
+    }
+
+    /* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */
+    (*block)[0] = (JCOEF) (entropy->last_dc_val[ci] << cinfo->Al);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, sign, k;
+  int v, m;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;   /* if error do nothing */
+
+  /* There is always only one block per MCU */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
+
+  /* Figure F.20: Decode_AC_coefficients */
+  for (k = cinfo->Ss; k <= cinfo->Se; k++) {
+    st = entropy->ac_stats[tbl] + 3 * (k - 1);
+    if (arith_decode(cinfo, st)) break;         /* EOB flag */
+    while (arith_decode(cinfo, st + 1) == 0) {
+      st += 3; k++;
+      if (k > cinfo->Se) {
+        WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+        entropy->ct = -1;                       /* spectral overflow */
+        return TRUE;
+      }
+    }
+    /* Figure F.21: Decoding nonzero value v */
+    /* Figure F.22: Decoding the sign of v */
+    sign = arith_decode(cinfo, entropy->fixed_bin);
+    st += 2;
+    /* Figure F.23: Decoding the magnitude category of v */
+    if ((m = arith_decode(cinfo, st)) != 0) {
+      if (arith_decode(cinfo, st)) {
+        m <<= 1;
+        st = entropy->ac_stats[tbl] +
+             (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+        while (arith_decode(cinfo, st)) {
+          if ((m <<= 1) == 0x8000) {
+            WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+            entropy->ct = -1;                   /* magnitude overflow */
+            return TRUE;
+          }
+          st += 1;
+        }
+      }
+    }
+    v = m;
+    /* Figure F.24: Decoding the magnitude bit pattern of v */
+    st += 14;
+    while (m >>= 1)
+      if (arith_decode(cinfo, st)) v |= m;
+    v += 1; if (sign) v = -v;
+    /* Scale and output coefficient in natural (dezigzagged) order */
+    (*block)[jpeg_natural_order[k]] = (JCOEF) (v << cinfo->Al);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for DC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  unsigned char *st;
+  int p1, blkn;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  st = entropy->fixed_bin;      /* use fixed probability estimation */
+  p1 = 1 << cinfo->Al;          /* 1 in the bit position being coded */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    /* Encoded data is simply the next bit of the two's-complement DC value */
+    if (arith_decode(cinfo, st))
+      MCU_data[blkn][0][0] |= p1;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  JBLOCKROW block;
+  JCOEFPTR thiscoef;
+  unsigned char *st;
+  int tbl, k, kex;
+  int p1, m1;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;   /* if error do nothing */
+
+  /* There is always only one block per MCU */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  p1 = 1 << cinfo->Al;          /* 1 in the bit position being coded */
+  m1 = (-1) << cinfo->Al;       /* -1 in the bit position being coded */
+
+  /* Establish EOBx (previous stage end-of-block) index */
+  for (kex = cinfo->Se; kex > 0; kex--)
+    if ((*block)[jpeg_natural_order[kex]]) break;
+
+  for (k = cinfo->Ss; k <= cinfo->Se; k++) {
+    st = entropy->ac_stats[tbl] + 3 * (k - 1);
+    if (k > kex)
+      if (arith_decode(cinfo, st)) break;       /* EOB flag */
+    for (;;) {
+      thiscoef = *block + jpeg_natural_order[k];
+      if (*thiscoef) {                          /* previously nonzero coef */
+        if (arith_decode(cinfo, st + 2)) {
+          if (*thiscoef < 0)
+            *thiscoef += m1;
+          else
+            *thiscoef += p1;
+        }
+        break;
+      }
+      if (arith_decode(cinfo, st + 1)) {        /* newly nonzero coef */
+        if (arith_decode(cinfo, entropy->fixed_bin))
+          *thiscoef = m1;
+        else
+          *thiscoef = p1;
+        break;
+      }
+      st += 3; k++;
+      if (k > cinfo->Se) {
+        WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+        entropy->ct = -1;                       /* spectral overflow */
+        return TRUE;
+      }
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Decode one MCU's worth of arithmetic-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  jpeg_component_info * compptr;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, sign, k;
+  int v, m;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;   /* if error do nothing */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
+
+    tbl = compptr->dc_tbl_no;
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.19: Decode_DC_DIFF */
+    if (arith_decode(cinfo, st) == 0)
+      entropy->dc_context[ci] = 0;
+    else {
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, st + 1);
+      st += 2; st += sign;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+        st = entropy->dc_stats[tbl] + 20;       /* Table F.4: X1 = 20 */
+        while (arith_decode(cinfo, st)) {
+          if ((m <<= 1) == 0x8000) {
+            WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+            entropy->ct = -1;                   /* magnitude overflow */
+            return TRUE;
+          }
+          st += 1;
+        }
+      }
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1))
+        entropy->dc_context[ci] = 0;               /* zero diff category */
+      else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1))
+        entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
+      else
+        entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+        if (arith_decode(cinfo, st)) v |= m;
+      v += 1; if (sign) v = -v;
+      entropy->last_dc_val[ci] += v;
+    }
+
+    (*block)[0] = (JCOEF) entropy->last_dc_val[ci];
+
+    /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
+
+    tbl = compptr->ac_tbl_no;
+
+    /* Figure F.20: Decode_AC_coefficients */
+    for (k = 1; k <= DCTSIZE2 - 1; k++) {
+      st = entropy->ac_stats[tbl] + 3 * (k - 1);
+      if (arith_decode(cinfo, st)) break;       /* EOB flag */
+      while (arith_decode(cinfo, st + 1) == 0) {
+        st += 3; k++;
+        if (k > DCTSIZE2 - 1) {
+          WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+          entropy->ct = -1;                     /* spectral overflow */
+          return TRUE;
+        }
+      }
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, entropy->fixed_bin);
+      st += 2;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+        if (arith_decode(cinfo, st)) {
+          m <<= 1;
+          st = entropy->ac_stats[tbl] +
+               (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+          while (arith_decode(cinfo, st)) {
+            if ((m <<= 1) == 0x8000) {
+              WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+              entropy->ct = -1;                 /* magnitude overflow */
+              return TRUE;
+            }
+            st += 1;
+          }
+        }
+      }
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+        if (arith_decode(cinfo, st)) v |= m;
+      v += 1; if (sign) v = -v;
+      (*block)[jpeg_natural_order[k]] = (JCOEF) v;
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass (j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+
+  if (cinfo->progressive_mode) {
+    /* Validate progressive scan parameters */
+    if (cinfo->Ss == 0) {
+      if (cinfo->Se != 0)
+        goto bad;
+    } else {
+      /* need not check Ss/Se < 0 since they came from unsigned bytes */
+      if (cinfo->Se < cinfo->Ss || cinfo->Se > DCTSIZE2 - 1)
+        goto bad;
+      /* AC scans may have only one component */
+      if (cinfo->comps_in_scan != 1)
+        goto bad;
+    }
+    if (cinfo->Ah != 0) {
+      /* Successive approximation refinement scan: must have Al = Ah-1. */
+      if (cinfo->Ah-1 != cinfo->Al)
+        goto bad;
+    }
+    if (cinfo->Al > 13) {       /* need not check for < 0 */
+      bad:
+      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+               cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+    }
+    /* Update progression status, and verify that scan order is legal.
+     * Note that inter-scan inconsistencies are treated as warnings
+     * not fatal errors ... not clear if this is right way to behave.
+     */
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
+      int *coef_bit_ptr = & cinfo->coef_bits[cindex][0];
+      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
+        WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
+        int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+        if (cinfo->Ah != expected)
+          WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+        coef_bit_ptr[coefi] = cinfo->Al;
+      }
+    }
+    /* Select MCU decoding routine */
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+        entropy->pub.decode_mcu = decode_mcu_DC_first;
+      else
+        entropy->pub.decode_mcu = decode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+        entropy->pub.decode_mcu = decode_mcu_DC_refine;
+      else
+        entropy->pub.decode_mcu = decode_mcu_AC_refine;
+    }
+  } else {
+    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
+     * This ought to be an error condition, but we make it a warning.
+     */
+    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
+        (cinfo->Se < DCTSIZE2 && cinfo->Se != DCTSIZE2 - 1))
+      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
+    /* Select MCU decoding routine */
+    entropy->pub.decode_mcu = decode_mcu;
+  }
+
+  /* Allocate & initialize requested statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      tbl = compptr->dc_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+        ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->dc_stats[tbl] == NULL)
+        entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+          ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS);
+      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
+      /* Initialize DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    if (! cinfo->progressive_mode || cinfo->Ss) {
+      tbl = compptr->ac_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+        ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->ac_stats[tbl] == NULL)
+        entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small)
+          ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS);
+      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
+    }
+  }
+
+  /* Initialize arithmetic decoding variables */
+  entropy->c = 0;
+  entropy->a = 0;
+  entropy->ct = -16;    /* force reading 2 initial bytes to fill C */
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Module initialization routine for arithmetic entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_arith_decoder (j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy;
+  int i;
+
+  entropy = (arith_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                sizeof(arith_entropy_decoder));
+  cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
+  entropy->pub.start_pass = start_pass;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    entropy->dc_stats[i] = NULL;
+    entropy->ac_stats[i] = NULL;
+  }
+
+  /* Initialize index for fixed probability estimation */
+  entropy->fixed_bin[0] = 113;
+
+  if (cinfo->progressive_mode) {
+    /* Create progression status table */
+    int *coef_bit_ptr, ci;
+    cinfo->coef_bits = (int (*)[DCTSIZE2])
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                  cinfo->num_components*DCTSIZE2*sizeof(int));
+    coef_bit_ptr = & cinfo->coef_bits[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++)
+      for (i = 0; i < DCTSIZE2; i++)
+        *coef_bit_ptr++ = -1;
+  }
+}
diff --git a/jdatadst-tj.c b/jdatadst-tj.c
new file mode 100644
index 0000000..bc4a35d
--- /dev/null
+++ b/jdatadst-tj.c
@@ -0,0 +1,190 @@
+/*
+ * jdatadst-tj.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2012 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2011, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains compression data destination routines for the case of
+ * emitting JPEG data to memory or to a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different destination manager.
+ * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
+ * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc(),free() */
+extern void * malloc (size_t size);
+extern void free (void *ptr);
+#endif
+
+
+#define OUTPUT_BUF_SIZE  4096   /* choose an efficiently fwrite'able size */
+
+
+/* Expanded data destination object for memory output */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  unsigned char ** outbuffer;   /* target buffer */
+  unsigned long * outsize;
+  unsigned char * newbuffer;    /* newly allocated buffer */
+  JOCTET * buffer;              /* start of buffer */
+  size_t bufsize;
+  boolean alloc;
+} my_mem_destination_mgr;
+
+typedef my_mem_destination_mgr * my_mem_dest_ptr;
+
+
+/*
+ * Initialize destination --- called by jpeg_start_compress
+ * before any data is actually written.
+ */
+
+METHODDEF(void)
+init_mem_destination (j_compress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Empty the output buffer --- called whenever buffer fills up.
+ *
+ * In typical applications, this should write the entire output buffer
+ * (ignoring the current state of next_output_byte & free_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been dumped.
+ *
+ * In applications that need to be able to suspend compression due to output
+ * overrun, a FALSE return indicates that the buffer cannot be emptied now.
+ * In this situation, the compressor will return to its caller (possibly with
+ * an indication that it has not accepted all the supplied scanlines).  The
+ * application should resume compression after it has made more room in the
+ * output buffer.  Note that there are substantial restrictions on the use of
+ * suspension --- see the documentation.
+ *
+ * When suspending, the compressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_output_byte & free_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point will be regenerated after resumption, so do not
+ * write it out when emptying the buffer externally.
+ */
+
+METHODDEF(boolean)
+empty_mem_output_buffer (j_compress_ptr cinfo)
+{
+  size_t nextsize;
+  JOCTET * nextbuffer;
+  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
+
+  if (!dest->alloc) ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Try to allocate new buffer with double size */
+  nextsize = dest->bufsize * 2;
+  nextbuffer = (JOCTET *) malloc(nextsize);
+
+  if (nextbuffer == NULL)
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+
+  MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
+
+  if (dest->newbuffer != NULL)
+    free(dest->newbuffer);
+
+  dest->newbuffer = nextbuffer;
+
+  dest->pub.next_output_byte = nextbuffer + dest->bufsize;
+  dest->pub.free_in_buffer = dest->bufsize;
+
+  dest->buffer = nextbuffer;
+  dest->bufsize = nextsize;
+
+  return TRUE;
+}
+
+
+/*
+ * Terminate destination --- called by jpeg_finish_compress
+ * after all data has been written.  Usually needs to flush buffer.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_mem_destination (j_compress_ptr cinfo)
+{
+  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
+
+  if(dest->alloc) *dest->outbuffer = dest->buffer;
+  *dest->outsize = (unsigned long)(dest->bufsize - dest->pub.free_in_buffer);
+}
+
+
+/*
+ * Prepare for output to a memory buffer.
+ * The caller may supply an own initial buffer with appropriate size.
+ * Otherwise, or when the actual data output exceeds the given size,
+ * the library adapts the buffer size as necessary.
+ * The standard library functions malloc/free are used for allocating
+ * larger memory, so the buffer is available to the application after
+ * finishing compression, and then the application is responsible for
+ * freeing the requested memory.
+ */
+
+GLOBAL(void)
+jpeg_mem_dest_tj (j_compress_ptr cinfo,
+               unsigned char ** outbuffer, unsigned long * outsize,
+               boolean alloc)
+{
+  my_mem_dest_ptr dest;
+
+  if (outbuffer == NULL || outsize == NULL)     /* sanity check */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same buffer without re-executing jpeg_mem_dest.
+   */
+  if (cinfo->dest == NULL) {    /* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+                                  sizeof(my_mem_destination_mgr));
+    dest = (my_mem_dest_ptr) cinfo->dest;
+    dest->newbuffer = NULL;
+  }
+
+  dest = (my_mem_dest_ptr) cinfo->dest;
+  dest->pub.init_destination = init_mem_destination;
+  dest->pub.empty_output_buffer = empty_mem_output_buffer;
+  dest->pub.term_destination = term_mem_destination;
+  dest->outbuffer = outbuffer;
+  dest->outsize = outsize;
+  dest->alloc = alloc;
+
+  if (*outbuffer == NULL || *outsize == 0) {
+    if (alloc) {
+      /* Allocate initial buffer */
+      dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE);
+      if (dest->newbuffer == NULL)
+        ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+      *outsize = OUTPUT_BUF_SIZE;
+    }
+    else ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  }
+
+  dest->pub.next_output_byte = dest->buffer = *outbuffer;
+  dest->pub.free_in_buffer = dest->bufsize = *outsize;
+}
diff --git a/jdatadst.c b/jdatadst.c
index a8f6fb0..7a40e4f 100644
--- a/jdatadst.c
+++ b/jdatadst.c
@@ -1,14 +1,17 @@
 /*
  * jdatadst.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2009-2012 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2013, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains compression data destination routines for the case of
- * emitting JPEG data to a file (or any stdio stream).  While these routines
- * are sufficient for most applications, some will want to use a different
- * destination manager.
+ * emitting JPEG data to memory or to a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different destination manager.
  * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
  * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
  * than 8 bits on your machine, you may need to do some tweaking.
@@ -19,19 +22,41 @@
 #include "jpeglib.h"
 #include "jerror.h"
 
+#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc(),free() */
+extern void * malloc (size_t size);
+extern void free (void *ptr);
+#endif
+
 
 /* Expanded data destination object for stdio output */
 
 typedef struct {
   struct jpeg_destination_mgr pub; /* public fields */
 
-  FILE * outfile;		/* target stream */
-  JOCTET * buffer;		/* start of buffer */
+  FILE * outfile;               /* target stream */
+  JOCTET * buffer;              /* start of buffer */
 } my_destination_mgr;
 
 typedef my_destination_mgr * my_dest_ptr;
 
-#define OUTPUT_BUF_SIZE  4096	/* choose an efficiently fwrite'able size */
+#define OUTPUT_BUF_SIZE  4096   /* choose an efficiently fwrite'able size */
+
+
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+/* Expanded data destination object for memory output */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  unsigned char ** outbuffer;   /* target buffer */
+  unsigned long * outsize;
+  unsigned char * newbuffer;    /* newly allocated buffer */
+  JOCTET * buffer;              /* start of buffer */
+  size_t bufsize;
+} my_mem_destination_mgr;
+
+typedef my_mem_destination_mgr * my_mem_dest_ptr;
+#endif
 
 
 /*
@@ -47,12 +72,20 @@
   /* Allocate the output buffer --- it will be released when done with image */
   dest->buffer = (JOCTET *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  OUTPUT_BUF_SIZE * SIZEOF(JOCTET));
+                                  OUTPUT_BUF_SIZE * sizeof(JOCTET));
 
   dest->pub.next_output_byte = dest->buffer;
   dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
 }
 
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+METHODDEF(void)
+init_mem_destination (j_compress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+#endif
+
 
 /*
  * Empty the output buffer --- called whenever buffer fills up.
@@ -92,6 +125,38 @@
   return TRUE;
 }
 
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+METHODDEF(boolean)
+empty_mem_output_buffer (j_compress_ptr cinfo)
+{
+  size_t nextsize;
+  JOCTET * nextbuffer;
+  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
+
+  /* Try to allocate new buffer with double size */
+  nextsize = dest->bufsize * 2;
+  nextbuffer = (JOCTET *) malloc(nextsize);
+
+  if (nextbuffer == NULL)
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+
+  MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
+
+  if (dest->newbuffer != NULL)
+    free(dest->newbuffer);
+
+  dest->newbuffer = nextbuffer;
+
+  dest->pub.next_output_byte = nextbuffer + dest->bufsize;
+  dest->pub.free_in_buffer = dest->bufsize;
+
+  dest->buffer = nextbuffer;
+  dest->bufsize = nextsize;
+
+  return TRUE;
+}
+#endif
+
 
 /*
  * Terminate destination --- called by jpeg_finish_compress
@@ -119,6 +184,17 @@
     ERREXIT(cinfo, JERR_FILE_WRITE);
 }
 
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+METHODDEF(void)
+term_mem_destination (j_compress_ptr cinfo)
+{
+  my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest;
+
+  *dest->outbuffer = dest->buffer;
+  *dest->outsize = (unsigned long)(dest->bufsize - dest->pub.free_in_buffer);
+}
+#endif
+
 
 /*
  * Prepare for output to a stdio stream.
@@ -137,10 +213,10 @@
    * manager serially with the same JPEG object, because their private object
    * sizes may be different.  Caveat programmer.
    */
-  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
+  if (cinfo->dest == NULL) {    /* first time for this JPEG object? */
     cinfo->dest = (struct jpeg_destination_mgr *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  SIZEOF(my_destination_mgr));
+                                  sizeof(my_destination_mgr));
   }
 
   dest = (my_dest_ptr) cinfo->dest;
@@ -149,3 +225,55 @@
   dest->pub.term_destination = term_destination;
   dest->outfile = outfile;
 }
+
+
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+/*
+ * Prepare for output to a memory buffer.
+ * The caller may supply an own initial buffer with appropriate size.
+ * Otherwise, or when the actual data output exceeds the given size,
+ * the library adapts the buffer size as necessary.
+ * The standard library functions malloc/free are used for allocating
+ * larger memory, so the buffer is available to the application after
+ * finishing compression, and then the application is responsible for
+ * freeing the requested memory.
+ */
+
+GLOBAL(void)
+jpeg_mem_dest (j_compress_ptr cinfo,
+               unsigned char ** outbuffer, unsigned long * outsize)
+{
+  my_mem_dest_ptr dest;
+
+  if (outbuffer == NULL || outsize == NULL)     /* sanity check */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same buffer without re-executing jpeg_mem_dest.
+   */
+  if (cinfo->dest == NULL) {    /* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+                                  sizeof(my_mem_destination_mgr));
+  }
+
+  dest = (my_mem_dest_ptr) cinfo->dest;
+  dest->pub.init_destination = init_mem_destination;
+  dest->pub.empty_output_buffer = empty_mem_output_buffer;
+  dest->pub.term_destination = term_mem_destination;
+  dest->outbuffer = outbuffer;
+  dest->outsize = outsize;
+  dest->newbuffer = NULL;
+
+  if (*outbuffer == NULL || *outsize == 0) {
+    /* Allocate initial buffer */
+    dest->newbuffer = *outbuffer = (unsigned char *) malloc(OUTPUT_BUF_SIZE);
+    if (dest->newbuffer == NULL)
+      ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+    *outsize = OUTPUT_BUF_SIZE;
+  }
+
+  dest->pub.next_output_byte = dest->buffer = *outbuffer;
+  dest->pub.free_in_buffer = dest->bufsize = *outsize;
+}
+#endif
diff --git a/jdatasrc-tj.c b/jdatasrc-tj.c
new file mode 100644
index 0000000..09f9596
--- /dev/null
+++ b/jdatasrc-tj.c
@@ -0,0 +1,185 @@
+/*
+ * jdatasrc-tj.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2011 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2011, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains decompression data source routines for the case of
+ * reading JPEG data from memory or from a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different source manager.
+ * IMPORTANT: we assume that fread() will correctly transcribe an array of
+ * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/*
+ * Initialize source --- called by jpeg_read_header
+ * before any data is actually read.
+ */
+
+METHODDEF(void)
+init_mem_source (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Fill the input buffer --- called whenever buffer is emptied.
+ *
+ * In typical applications, this should read fresh data into the buffer
+ * (ignoring the current state of next_input_byte & bytes_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been reloaded.  It is not necessary to
+ * fill the buffer entirely, only to obtain at least one more byte.
+ *
+ * There is no such thing as an EOF return.  If the end of the file has been
+ * reached, the routine has a choice of ERREXIT() or inserting fake data into
+ * the buffer.  In most cases, generating a warning message and inserting a
+ * fake EOI marker is the best course of action --- this will allow the
+ * decompressor to output however much of the image is there.  However,
+ * the resulting error message is misleading if the real problem is an empty
+ * input file, so we handle that case specially.
+ *
+ * In applications that need to be able to suspend compression due to input
+ * not being available yet, a FALSE return indicates that no more data can be
+ * obtained right now, but more may be forthcoming later.  In this situation,
+ * the decompressor will return to its caller (with an indication of the
+ * number of scanlines it has read, if any).  The application should resume
+ * decompression after it has loaded more data into the input buffer.  Note
+ * that there are substantial restrictions on the use of suspension --- see
+ * the documentation.
+ *
+ * When suspending, the decompressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point must be rescanned after resumption, so move it to
+ * the front of the buffer rather than discarding it.
+ */
+
+METHODDEF(boolean)
+fill_mem_input_buffer (j_decompress_ptr cinfo)
+{
+  static const JOCTET mybuffer[4] = {
+    (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0
+  };
+
+  /* The whole JPEG data is expected to reside in the supplied memory
+   * buffer, so any request for more data beyond the given buffer size
+   * is treated as an error.
+   */
+  WARNMS(cinfo, JWRN_JPEG_EOF);
+
+  /* Insert a fake EOI marker */
+
+  cinfo->src->next_input_byte = mybuffer;
+  cinfo->src->bytes_in_buffer = 2;
+
+  return TRUE;
+}
+
+
+/*
+ * Skip data --- used to skip over a potentially large amount of
+ * uninteresting data (such as an APPn marker).
+ *
+ * Writers of suspendable-input applications must note that skip_input_data
+ * is not granted the right to give a suspension return.  If the skip extends
+ * beyond the data currently in the buffer, the buffer can be marked empty so
+ * that the next read will cause a fill_input_buffer call that can suspend.
+ * Arranging for additional bytes to be discarded before reloading the input
+ * buffer is the application writer's problem.
+ */
+
+METHODDEF(void)
+skip_input_data (j_decompress_ptr cinfo, long num_bytes)
+{
+  struct jpeg_source_mgr * src = cinfo->src;
+
+  /* Just a dumb implementation for now.  Could use fseek() except
+   * it doesn't work on pipes.  Not clear that being smart is worth
+   * any trouble anyway --- large skips are infrequent.
+   */
+  if (num_bytes > 0) {
+    while (num_bytes > (long) src->bytes_in_buffer) {
+      num_bytes -= (long) src->bytes_in_buffer;
+      (void) (*src->fill_input_buffer) (cinfo);
+      /* note we assume that fill_input_buffer will never return FALSE,
+       * so suspension need not be handled.
+       */
+    }
+    src->next_input_byte += (size_t) num_bytes;
+    src->bytes_in_buffer -= (size_t) num_bytes;
+  }
+}
+
+
+/*
+ * An additional method that can be provided by data source modules is the
+ * resync_to_restart method for error recovery in the presence of RST markers.
+ * For the moment, this source module just uses the default resync method
+ * provided by the JPEG library.  That method assumes that no backtracking
+ * is possible.
+ */
+
+
+/*
+ * Terminate source --- called by jpeg_finish_decompress
+ * after all data has been read.  Often a no-op.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_source (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Prepare for input from a supplied memory buffer.
+ * The buffer must contain the whole JPEG data.
+ */
+
+GLOBAL(void)
+jpeg_mem_src_tj (j_decompress_ptr cinfo,
+              unsigned char * inbuffer, unsigned long insize)
+{
+  struct jpeg_source_mgr * src;
+
+  if (inbuffer == NULL || insize == 0)  /* Treat empty input as fatal error */
+    ERREXIT(cinfo, JERR_INPUT_EMPTY);
+
+  /* The source object is made permanent so that a series of JPEG images
+   * can be read from the same buffer by calling jpeg_mem_src only before
+   * the first one.
+   */
+  if (cinfo->src == NULL) {     /* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+                                  sizeof(struct jpeg_source_mgr));
+  }
+
+  src = cinfo->src;
+  src->init_source = init_mem_source;
+  src->fill_input_buffer = fill_mem_input_buffer;
+  src->skip_input_data = skip_input_data;
+  src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->term_source = term_source;
+  src->bytes_in_buffer = (size_t) insize;
+  src->next_input_byte = (JOCTET *) inbuffer;
+}
diff --git a/jdatasrc.c b/jdatasrc.c
index edc752b..bf70422 100644
--- a/jdatasrc.c
+++ b/jdatasrc.c
@@ -1,14 +1,17 @@
 /*
  * jdatasrc.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2009-2011 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2013, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains decompression data source routines for the case of
- * reading JPEG data from a file (or any stdio stream).  While these routines
- * are sufficient for most applications, some will want to use a different
- * source manager.
+ * reading JPEG data from memory or from a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different source manager.
  * IMPORTANT: we assume that fread() will correctly transcribe an array of
  * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
  * than 8 bits on your machine, you may need to do some tweaking.
@@ -23,16 +26,16 @@
 /* Expanded data source object for stdio input */
 
 typedef struct {
-  struct jpeg_source_mgr pub;	/* public fields */
+  struct jpeg_source_mgr pub;   /* public fields */
 
-  FILE * infile;		/* source stream */
-  JOCTET * buffer;		/* start of buffer */
-  boolean start_of_file;	/* have we gotten any data yet? */
+  FILE * infile;                /* source stream */
+  JOCTET * buffer;              /* start of buffer */
+  boolean start_of_file;        /* have we gotten any data yet? */
 } my_source_mgr;
 
 typedef my_source_mgr * my_src_ptr;
 
-#define INPUT_BUF_SIZE  4096	/* choose an efficiently fread'able size */
+#define INPUT_BUF_SIZE  4096    /* choose an efficiently fread'able size */
 
 
 /*
@@ -52,6 +55,14 @@
   src->start_of_file = TRUE;
 }
 
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+METHODDEF(void)
+init_mem_source (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+#endif
+
 
 /*
  * Fill the input buffer --- called whenever buffer is emptied.
@@ -95,7 +106,7 @@
   nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
 
   if (nbytes <= 0) {
-    if (src->start_of_file)	/* Treat empty input file as fatal error */
+    if (src->start_of_file)     /* Treat empty input file as fatal error */
       ERREXIT(cinfo, JERR_INPUT_EMPTY);
     WARNMS(cinfo, JWRN_JPEG_EOF);
     /* Insert a fake EOI marker */
@@ -111,6 +122,29 @@
   return TRUE;
 }
 
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+METHODDEF(boolean)
+fill_mem_input_buffer (j_decompress_ptr cinfo)
+{
+  static const JOCTET mybuffer[4] = {
+    (JOCTET) 0xFF, (JOCTET) JPEG_EOI, 0, 0
+  };
+
+  /* The whole JPEG data is expected to reside in the supplied memory
+   * buffer, so any request for more data beyond the given buffer size
+   * is treated as an error.
+   */
+  WARNMS(cinfo, JWRN_JPEG_EOF);
+
+  /* Insert a fake EOI marker */
+
+  cinfo->src->next_input_byte = mybuffer;
+  cinfo->src->bytes_in_buffer = 2;
+
+  return TRUE;
+}
+#endif
+
 
 /*
  * Skip data --- used to skip over a potentially large amount of
@@ -127,22 +161,22 @@
 METHODDEF(void)
 skip_input_data (j_decompress_ptr cinfo, long num_bytes)
 {
-  my_src_ptr src = (my_src_ptr) cinfo->src;
+  struct jpeg_source_mgr * src = cinfo->src;
 
   /* Just a dumb implementation for now.  Could use fseek() except
    * it doesn't work on pipes.  Not clear that being smart is worth
    * any trouble anyway --- large skips are infrequent.
    */
   if (num_bytes > 0) {
-    while (num_bytes > (long) src->pub.bytes_in_buffer) {
-      num_bytes -= (long) src->pub.bytes_in_buffer;
-      (void) fill_input_buffer(cinfo);
+    while (num_bytes > (long) src->bytes_in_buffer) {
+      num_bytes -= (long) src->bytes_in_buffer;
+      (void) (*src->fill_input_buffer) (cinfo);
       /* note we assume that fill_input_buffer will never return FALSE,
        * so suspension need not be handled.
        */
     }
-    src->pub.next_input_byte += (size_t) num_bytes;
-    src->pub.bytes_in_buffer -= (size_t) num_bytes;
+    src->next_input_byte += (size_t) num_bytes;
+    src->bytes_in_buffer -= (size_t) num_bytes;
   }
 }
 
@@ -190,14 +224,14 @@
    * This makes it unsafe to use this manager and a different source
    * manager serially with the same JPEG object.  Caveat programmer.
    */
-  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+  if (cinfo->src == NULL) {     /* first time for this JPEG object? */
     cinfo->src = (struct jpeg_source_mgr *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  SIZEOF(my_source_mgr));
+                                  sizeof(my_source_mgr));
     src = (my_src_ptr) cinfo->src;
     src->buffer = (JOCTET *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				  INPUT_BUF_SIZE * SIZEOF(JOCTET));
+                                  INPUT_BUF_SIZE * sizeof(JOCTET));
   }
 
   src = (my_src_ptr) cinfo->src;
@@ -210,3 +244,40 @@
   src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
   src->pub.next_input_byte = NULL; /* until buffer loaded */
 }
+
+
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+/*
+ * Prepare for input from a supplied memory buffer.
+ * The buffer must contain the whole JPEG data.
+ */
+
+GLOBAL(void)
+jpeg_mem_src (j_decompress_ptr cinfo,
+              unsigned char * inbuffer, unsigned long insize)
+{
+  struct jpeg_source_mgr * src;
+
+  if (inbuffer == NULL || insize == 0)  /* Treat empty input as fatal error */
+    ERREXIT(cinfo, JERR_INPUT_EMPTY);
+
+  /* The source object is made permanent so that a series of JPEG images
+   * can be read from the same buffer by calling jpeg_mem_src only before
+   * the first one.
+   */
+  if (cinfo->src == NULL) {     /* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+                                  sizeof(struct jpeg_source_mgr));
+  }
+
+  src = cinfo->src;
+  src->init_source = init_mem_source;
+  src->fill_input_buffer = fill_mem_input_buffer;
+  src->skip_input_data = skip_input_data;
+  src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->term_source = term_source;
+  src->bytes_in_buffer = (size_t) insize;
+  src->next_input_byte = (JOCTET *) inbuffer;
+}
+#endif
diff --git a/jdcoefct.c b/jdcoefct.c
index 4938d20..199a628 100644
--- a/jdcoefct.c
+++ b/jdcoefct.c
@@ -1,8 +1,10 @@
 /*
  * jdcoefct.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains the coefficient buffer controller for decompression.
@@ -17,6 +19,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 /* Block smoothing is only applicable for progressive JPEG, so: */
 #ifndef D_PROGRESSIVE_SUPPORTED
@@ -30,23 +33,23 @@
 
   /* These variables keep track of the current location of the input side. */
   /* cinfo->input_iMCU_row is also used for this. */
-  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
-  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
-  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+  JDIMENSION MCU_ctr;           /* counts MCUs processed in current row */
+  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
 
   /* The output side's location is represented by cinfo->output_iMCU_row. */
 
   /* In single-pass modes, it's sufficient to buffer just one MCU.
    * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks,
    * and let the entropy decoder write into that workspace each time.
-   * (On 80x86, the workspace is FAR even though it's not really very big;
-   * this is to keep the module interfaces unchanged when a large coefficient
-   * buffer is necessary.)
    * In multi-pass modes, this array points to the current MCU's blocks
    * within the virtual arrays; it is used only by the input side.
    */
   JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
 
+  /* Temporary workspace for one MCU */
+  JCOEF * workspace;
+
 #ifdef D_MULTISCAN_FILES_SUPPORTED
   /* In multi-pass modes, we need a virtual block array for each component. */
   jvirt_barray_ptr whole_image[MAX_COMPONENTS];
@@ -55,7 +58,7 @@
 #ifdef BLOCK_SMOOTHING_SUPPORTED
   /* When doing block smoothing, we latch coefficient Al values here */
   int * coef_bits_latch;
-#define SAVED_COEFS  6		/* we save coef_bits[0..5] */
+#define SAVED_COEFS  6          /* we save coef_bits[0..5] */
 #endif
 } my_coef_controller;
 
@@ -63,15 +66,15 @@
 
 /* Forward declarations */
 METHODDEF(int) decompress_onepass
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+        (j_decompress_ptr cinfo, JSAMPIMAGE output_buf);
 #ifdef D_MULTISCAN_FILES_SUPPORTED
 METHODDEF(int) decompress_data
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+        (j_decompress_ptr cinfo, JSAMPIMAGE output_buf);
 #endif
 #ifdef BLOCK_SMOOTHING_SUPPORTED
-LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo));
+LOCAL(boolean) smoothing_ok (j_decompress_ptr cinfo);
 METHODDEF(int) decompress_smooth_data
-	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+        (j_decompress_ptr cinfo, JSAMPIMAGE output_buf);
 #endif
 
 
@@ -147,7 +150,7 @@
 decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
 {
   my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
   JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
   JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
   int blkn, ci, xindex, yindex, yoffset, useful_width;
@@ -160,49 +163,49 @@
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
     for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
-	 MCU_col_num++) {
+         MCU_col_num++) {
       /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
-      jzero_far((void FAR *) coef->MCU_buffer[0],
-		(size_t) (cinfo->blocks_in_MCU * SIZEOF(JBLOCK)));
+      jzero_far((void *) coef->MCU_buffer[0],
+                (size_t) (cinfo->blocks_in_MCU * sizeof(JBLOCK)));
       if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return JPEG_SUSPENDED;
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->MCU_ctr = MCU_col_num;
+        return JPEG_SUSPENDED;
       }
       /* Determine where data should go in output_buf and do the IDCT thing.
        * We skip dummy blocks at the right and bottom edges (but blkn gets
        * incremented past them!).  Note the inner loop relies on having
        * allocated the MCU_buffer[] blocks sequentially.
        */
-      blkn = 0;			/* index of current DCT block within MCU */
+      blkn = 0;                 /* index of current DCT block within MCU */
       for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	/* Don't bother to IDCT an uninteresting component. */
-	if (! compptr->component_needed) {
-	  blkn += compptr->MCU_blocks;
-	  continue;
-	}
-	inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
-	useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
-						    : compptr->last_col_width;
-	output_ptr = output_buf[compptr->component_index] +
-	  yoffset * compptr->DCT_scaled_size;
-	start_col = MCU_col_num * compptr->MCU_sample_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  if (cinfo->input_iMCU_row < last_iMCU_row ||
-	      yoffset+yindex < compptr->last_row_height) {
-	    output_col = start_col;
-	    for (xindex = 0; xindex < useful_width; xindex++) {
-	      (*inverse_DCT) (cinfo, compptr,
-			      (JCOEFPTR) coef->MCU_buffer[blkn+xindex],
-			      output_ptr, output_col);
-	      output_col += compptr->DCT_scaled_size;
-	    }
-	  }
-	  blkn += compptr->MCU_width;
-	  output_ptr += compptr->DCT_scaled_size;
-	}
+        compptr = cinfo->cur_comp_info[ci];
+        /* Don't bother to IDCT an uninteresting component. */
+        if (! compptr->component_needed) {
+          blkn += compptr->MCU_blocks;
+          continue;
+        }
+        inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
+        useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+                                                    : compptr->last_col_width;
+        output_ptr = output_buf[compptr->component_index] +
+          yoffset * compptr->_DCT_scaled_size;
+        start_col = MCU_col_num * compptr->MCU_sample_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          if (cinfo->input_iMCU_row < last_iMCU_row ||
+              yoffset+yindex < compptr->last_row_height) {
+            output_col = start_col;
+            for (xindex = 0; xindex < useful_width; xindex++) {
+              (*inverse_DCT) (cinfo, compptr,
+                              (JCOEFPTR) coef->MCU_buffer[blkn+xindex],
+                              output_ptr, output_col);
+              output_col += compptr->_DCT_scaled_size;
+            }
+          }
+          blkn += compptr->MCU_width;
+          output_ptr += compptr->_DCT_scaled_size;
+        }
       }
     }
     /* Completed an MCU row, but perhaps not an iMCU row */
@@ -227,7 +230,7 @@
 METHODDEF(int)
 dummy_consume_data (j_decompress_ptr cinfo)
 {
-  return JPEG_SUSPENDED;	/* Always indicate nothing was done */
+  return JPEG_SUSPENDED;        /* Always indicate nothing was done */
 }
 
 
@@ -244,7 +247,7 @@
 consume_data (j_decompress_ptr cinfo)
 {
   my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
-  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
   int blkn, ci, xindex, yindex, yoffset;
   JDIMENSION start_col;
   JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
@@ -268,25 +271,25 @@
   for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
        yoffset++) {
     for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
-	 MCU_col_num++) {
+         MCU_col_num++) {
       /* Construct list of pointers to DCT blocks belonging to this MCU */
-      blkn = 0;			/* index of current DCT block within MCU */
+      blkn = 0;                 /* index of current DCT block within MCU */
       for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
-	compptr = cinfo->cur_comp_info[ci];
-	start_col = MCU_col_num * compptr->MCU_width;
-	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
-	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
-	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
-	    coef->MCU_buffer[blkn++] = buffer_ptr++;
-	  }
-	}
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+          for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+            coef->MCU_buffer[blkn++] = buffer_ptr++;
+          }
+        }
       }
       /* Try to fetch the MCU. */
       if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
-	/* Suspension forced; update state counters and exit */
-	coef->MCU_vert_offset = yoffset;
-	coef->MCU_ctr = MCU_col_num;
-	return JPEG_SUSPENDED;
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->MCU_ctr = MCU_col_num;
+        return JPEG_SUSPENDED;
       }
     }
     /* Completed an MCU row, but perhaps not an iMCU row */
@@ -327,8 +330,8 @@
 
   /* Force some input to be done if we are getting ahead of the input. */
   while (cinfo->input_scan_number < cinfo->output_scan_number ||
-	 (cinfo->input_scan_number == cinfo->output_scan_number &&
-	  cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
+         (cinfo->input_scan_number == cinfo->output_scan_number &&
+          cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
     if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
       return JPEG_SUSPENDED;
   }
@@ -359,12 +362,12 @@
       buffer_ptr = buffer[block_row];
       output_col = 0;
       for (block_num = 0; block_num < compptr->width_in_blocks; block_num++) {
-	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
-			output_ptr, output_col);
-	buffer_ptr++;
-	output_col += compptr->DCT_scaled_size;
+        (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
+                        output_ptr, output_col);
+        buffer_ptr++;
+        output_col += compptr->_DCT_scaled_size;
       }
-      output_ptr += compptr->DCT_scaled_size;
+      output_ptr += compptr->_DCT_scaled_size;
     }
   }
 
@@ -419,8 +422,8 @@
   if (coef->coef_bits_latch == NULL)
     coef->coef_bits_latch = (int *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  cinfo->num_components *
-				  (SAVED_COEFS * SIZEOF(int)));
+                                  cinfo->num_components *
+                                  (SAVED_COEFS * sizeof(int)));
   coef_bits_latch = coef->coef_bits_latch;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
@@ -430,11 +433,11 @@
       return FALSE;
     /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
     if (qtable->quantval[0] == 0 ||
-	qtable->quantval[Q01_POS] == 0 ||
-	qtable->quantval[Q10_POS] == 0 ||
-	qtable->quantval[Q20_POS] == 0 ||
-	qtable->quantval[Q11_POS] == 0 ||
-	qtable->quantval[Q02_POS] == 0)
+        qtable->quantval[Q01_POS] == 0 ||
+        qtable->quantval[Q10_POS] == 0 ||
+        qtable->quantval[Q20_POS] == 0 ||
+        qtable->quantval[Q11_POS] == 0 ||
+        qtable->quantval[Q02_POS] == 0)
       return FALSE;
     /* DC values must be at least partly known for all components. */
     coef_bits = cinfo->coef_bits[ci];
@@ -444,7 +447,7 @@
     for (coefi = 1; coefi <= 5; coefi++) {
       coef_bits_latch[coefi] = coef_bits[coefi];
       if (coef_bits[coefi] != 0)
-	smoothing_useful = TRUE;
+        smoothing_useful = TRUE;
     }
     coef_bits_latch += SAVED_COEFS;
   }
@@ -471,16 +474,19 @@
   jpeg_component_info *compptr;
   inverse_DCT_method_ptr inverse_DCT;
   boolean first_row, last_row;
-  JBLOCK workspace;
+  JCOEF * workspace;
   int *coef_bits;
   JQUANT_TBL *quanttbl;
   INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
   int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
   int Al, pred;
 
+  /* Keep a local variable to avoid looking it up more than once */
+  workspace = coef->workspace;
+
   /* Force some input to be done if we are getting ahead of the input. */
   while (cinfo->input_scan_number <= cinfo->output_scan_number &&
-	 ! cinfo->inputctl->eoi_reached) {
+         ! cinfo->inputctl->eoi_reached) {
     if (cinfo->input_scan_number == cinfo->output_scan_number) {
       /* If input is working on current scan, we ordinarily want it to
        * have completed the current row.  But if input scan is DC,
@@ -489,7 +495,7 @@
        */
       JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
       if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta)
-	break;
+        break;
     }
     if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
       return JPEG_SUSPENDED;
@@ -517,15 +523,15 @@
     if (cinfo->output_iMCU_row > 0) {
       access_rows += compptr->v_samp_factor; /* prior iMCU row too */
       buffer = (*cinfo->mem->access_virt_barray)
-	((j_common_ptr) cinfo, coef->whole_image[ci],
-	 (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
-	 (JDIMENSION) access_rows, FALSE);
-      buffer += compptr->v_samp_factor;	/* point to current iMCU row */
+        ((j_common_ptr) cinfo, coef->whole_image[ci],
+         (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
+         (JDIMENSION) access_rows, FALSE);
+      buffer += compptr->v_samp_factor; /* point to current iMCU row */
       first_row = FALSE;
     } else {
       buffer = (*cinfo->mem->access_virt_barray)
-	((j_common_ptr) cinfo, coef->whole_image[ci],
-	 (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
+        ((j_common_ptr) cinfo, coef->whole_image[ci],
+         (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
       first_row = TRUE;
     }
     /* Fetch component-dependent info */
@@ -543,13 +549,13 @@
     for (block_row = 0; block_row < block_rows; block_row++) {
       buffer_ptr = buffer[block_row];
       if (first_row && block_row == 0)
-	prev_block_row = buffer_ptr;
+        prev_block_row = buffer_ptr;
       else
-	prev_block_row = buffer[block_row-1];
+        prev_block_row = buffer[block_row-1];
       if (last_row && block_row == block_rows-1)
-	next_block_row = buffer_ptr;
+        next_block_row = buffer_ptr;
       else
-	next_block_row = buffer[block_row+1];
+        next_block_row = buffer[block_row+1];
       /* We fetch the surrounding DC values using a sliding-register approach.
        * Initialize all nine here so as to do the right thing on narrow pics.
        */
@@ -559,104 +565,104 @@
       output_col = 0;
       last_block_column = compptr->width_in_blocks - 1;
       for (block_num = 0; block_num <= last_block_column; block_num++) {
-	/* Fetch current DCT block into workspace so we can modify it. */
-	jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
-	/* Update DC values */
-	if (block_num < last_block_column) {
-	  DC3 = (int) prev_block_row[1][0];
-	  DC6 = (int) buffer_ptr[1][0];
-	  DC9 = (int) next_block_row[1][0];
-	}
-	/* Compute coefficient estimates per K.8.
-	 * An estimate is applied only if coefficient is still zero,
-	 * and is not known to be fully accurate.
-	 */
-	/* AC01 */
-	if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
-	  num = 36 * Q00 * (DC4 - DC6);
-	  if (num >= 0) {
-	    pred = (int) (((Q01<<7) + num) / (Q01<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q01<<7) - num) / (Q01<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[1] = (JCOEF) pred;
-	}
-	/* AC10 */
-	if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
-	  num = 36 * Q00 * (DC2 - DC8);
-	  if (num >= 0) {
-	    pred = (int) (((Q10<<7) + num) / (Q10<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q10<<7) - num) / (Q10<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[8] = (JCOEF) pred;
-	}
-	/* AC20 */
-	if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
-	  num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
-	  if (num >= 0) {
-	    pred = (int) (((Q20<<7) + num) / (Q20<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q20<<7) - num) / (Q20<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[16] = (JCOEF) pred;
-	}
-	/* AC11 */
-	if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
-	  num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
-	  if (num >= 0) {
-	    pred = (int) (((Q11<<7) + num) / (Q11<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q11<<7) - num) / (Q11<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[9] = (JCOEF) pred;
-	}
-	/* AC02 */
-	if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
-	  num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
-	  if (num >= 0) {
-	    pred = (int) (((Q02<<7) + num) / (Q02<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	  } else {
-	    pred = (int) (((Q02<<7) - num) / (Q02<<8));
-	    if (Al > 0 && pred >= (1<<Al))
-	      pred = (1<<Al)-1;
-	    pred = -pred;
-	  }
-	  workspace[2] = (JCOEF) pred;
-	}
-	/* OK, do the IDCT */
-	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
-			output_ptr, output_col);
-	/* Advance for next column */
-	DC1 = DC2; DC2 = DC3;
-	DC4 = DC5; DC5 = DC6;
-	DC7 = DC8; DC8 = DC9;
-	buffer_ptr++, prev_block_row++, next_block_row++;
-	output_col += compptr->DCT_scaled_size;
+        /* Fetch current DCT block into workspace so we can modify it. */
+        jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
+        /* Update DC values */
+        if (block_num < last_block_column) {
+          DC3 = (int) prev_block_row[1][0];
+          DC6 = (int) buffer_ptr[1][0];
+          DC9 = (int) next_block_row[1][0];
+        }
+        /* Compute coefficient estimates per K.8.
+         * An estimate is applied only if coefficient is still zero,
+         * and is not known to be fully accurate.
+         */
+        /* AC01 */
+        if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
+          num = 36 * Q00 * (DC4 - DC6);
+          if (num >= 0) {
+            pred = (int) (((Q01<<7) + num) / (Q01<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+          } else {
+            pred = (int) (((Q01<<7) - num) / (Q01<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+            pred = -pred;
+          }
+          workspace[1] = (JCOEF) pred;
+        }
+        /* AC10 */
+        if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
+          num = 36 * Q00 * (DC2 - DC8);
+          if (num >= 0) {
+            pred = (int) (((Q10<<7) + num) / (Q10<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+          } else {
+            pred = (int) (((Q10<<7) - num) / (Q10<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+            pred = -pred;
+          }
+          workspace[8] = (JCOEF) pred;
+        }
+        /* AC20 */
+        if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
+          num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
+          if (num >= 0) {
+            pred = (int) (((Q20<<7) + num) / (Q20<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+          } else {
+            pred = (int) (((Q20<<7) - num) / (Q20<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+            pred = -pred;
+          }
+          workspace[16] = (JCOEF) pred;
+        }
+        /* AC11 */
+        if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
+          num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
+          if (num >= 0) {
+            pred = (int) (((Q11<<7) + num) / (Q11<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+          } else {
+            pred = (int) (((Q11<<7) - num) / (Q11<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+            pred = -pred;
+          }
+          workspace[9] = (JCOEF) pred;
+        }
+        /* AC02 */
+        if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
+          num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
+          if (num >= 0) {
+            pred = (int) (((Q02<<7) + num) / (Q02<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+          } else {
+            pred = (int) (((Q02<<7) - num) / (Q02<<8));
+            if (Al > 0 && pred >= (1<<Al))
+              pred = (1<<Al)-1;
+            pred = -pred;
+          }
+          workspace[2] = (JCOEF) pred;
+        }
+        /* OK, do the IDCT */
+        (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
+                        output_ptr, output_col);
+        /* Advance for next column */
+        DC1 = DC2; DC2 = DC3;
+        DC4 = DC5; DC5 = DC6;
+        DC7 = DC8; DC8 = DC9;
+        buffer_ptr++, prev_block_row++, next_block_row++;
+        output_col += compptr->_DCT_scaled_size;
       }
-      output_ptr += compptr->DCT_scaled_size;
+      output_ptr += compptr->_DCT_scaled_size;
     }
   }
 
@@ -679,7 +685,7 @@
 
   coef = (my_coef_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_coef_controller));
+                                sizeof(my_coef_controller));
   cinfo->coef = (struct jpeg_d_coef_controller *) coef;
   coef->pub.start_input_pass = start_input_pass;
   coef->pub.start_output_pass = start_output_pass;
@@ -697,20 +703,20 @@
     jpeg_component_info *compptr;
 
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
+         ci++, compptr++) {
       access_rows = compptr->v_samp_factor;
 #ifdef BLOCK_SMOOTHING_SUPPORTED
       /* If block smoothing could be used, need a bigger window */
       if (cinfo->progressive_mode)
-	access_rows *= 3;
+        access_rows *= 3;
 #endif
       coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
-	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
-				(long) compptr->h_samp_factor),
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor),
-	 (JDIMENSION) access_rows);
+        ((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
+         (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+                                (long) compptr->h_samp_factor),
+         (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+                                (long) compptr->v_samp_factor),
+         (JDIMENSION) access_rows);
     }
     coef->pub.consume_data = consume_data;
     coef->pub.decompress_data = decompress_data;
@@ -725,7 +731,7 @@
 
     buffer = (JBLOCKROW)
       (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+                                  D_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK));
     for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
       coef->MCU_buffer[i] = buffer + i;
     }
@@ -733,4 +739,9 @@
     coef->pub.decompress_data = decompress_onepass;
     coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
   }
+
+  /* Allocate the workspace buffer */
+  coef->workspace = (JCOEF *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                sizeof(JCOEF) * DCTSIZE2);
 }
diff --git a/jdcol565.c b/jdcol565.c
new file mode 100644
index 0000000..a2c98f3
--- /dev/null
+++ b/jdcol565.c
@@ -0,0 +1,408 @@
+/*
+ * jdcol565.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modifications:
+ * Copyright (C) 2013, Linaro Limited.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains output colorspace conversion routines.
+ */
+
+/* This file is included by jdcolor.c */
+
+
+#define PACK_SHORT_565(r, g, b)   ((((r) << 8) & 0xf800) |  \
+                                   (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_TWO_PIXELS(l, r)     ((r << 16) | l)
+#define PACK_NEED_ALIGNMENT(ptr)  (((size_t)(ptr)) & 3)
+
+#define WRITE_TWO_PIXELS(addr, pixels) {  \
+  ((INT16*)(addr))[0] = (pixels);  \
+  ((INT16*)(addr))[1] = (pixels) >> 16;  \
+}
+#define WRITE_TWO_ALIGNED_PIXELS(addr, pixels)  ((*(INT32 *)(addr)) = pixels)
+
+#define DITHER_565_R(r, dither)  ((r) + ((dither) & 0xFF))
+#define DITHER_565_G(g, dither)  ((g) + (((dither) & 0xFF) >> 1))
+#define DITHER_565_B(b, dither)  ((b) + ((dither) & 0xFF))
+
+
+/* Declarations for ordered dithering
+ *
+ * We use a 4x4 ordered dither array packed into 32 bits.  This array is
+ * sufficent for dithering RGB888 to RGB565.
+ */
+
+#define DITHER_MASK       0x3
+#define DITHER_ROTATE(x)  (((x) << 24) | (((x) >> 8) & 0x00FFFFFF))
+static const INT32 dither_matrix[4] = {
+  0x0008020A,
+  0x0C040E06,
+  0x030B0109,
+  0x0F070D05
+};
+
+
+METHODDEF(void)
+ycc_rgb565_convert (j_decompress_ptr cinfo,
+                    JSAMPIMAGE input_buf, JDIMENSION input_row,
+                    JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    INT32 rgb;
+    unsigned int r, g, b;
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+      y  = GETJSAMPLE(*inptr0++);
+      cb = GETJSAMPLE(*inptr1++);
+      cr = GETJSAMPLE(*inptr2++);
+      r = range_limit[y + Crrtab[cr]];
+      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                            SCALEBITS))];
+      b = range_limit[y + Cbbtab[cb]];
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16*)outptr = rgb;
+      outptr += 2;
+      num_cols--;
+    }
+    for (col = 0; col < (num_cols >> 1); col++) {
+      y  = GETJSAMPLE(*inptr0++);
+      cb = GETJSAMPLE(*inptr1++);
+      cr = GETJSAMPLE(*inptr2++);
+      r = range_limit[y + Crrtab[cr]];
+      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                            SCALEBITS))];
+      b = range_limit[y + Cbbtab[cb]];
+      rgb = PACK_SHORT_565(r, g, b);
+
+      y  = GETJSAMPLE(*inptr0++);
+      cb = GETJSAMPLE(*inptr1++);
+      cr = GETJSAMPLE(*inptr2++);
+      r = range_limit[y + Crrtab[cr]];
+      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                            SCALEBITS))];
+      b = range_limit[y + Cbbtab[cb]];
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols & 1) {
+      y  = GETJSAMPLE(*inptr0);
+      cb = GETJSAMPLE(*inptr1);
+      cr = GETJSAMPLE(*inptr2);
+      r = range_limit[y + Crrtab[cr]];
+      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                            SCALEBITS))];
+      b = range_limit[y + Cbbtab[cb]];
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16*)outptr = rgb;
+    }
+  }
+}
+
+
+METHODDEF(void)
+ycc_rgb565D_convert (j_decompress_ptr cinfo,
+                     JSAMPIMAGE input_buf, JDIMENSION input_row,
+                     JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    INT32 rgb;
+    unsigned int r, g, b;
+
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+      y  = GETJSAMPLE(*inptr0++);
+      cb = GETJSAMPLE(*inptr1++);
+      cr = GETJSAMPLE(*inptr2++);
+      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
+      g = range_limit[DITHER_565_G(y +
+                                   ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                     SCALEBITS)), d0)];
+      b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16*)outptr = rgb;
+      outptr += 2;
+      num_cols--;
+    }
+    for (col = 0; col < (num_cols >> 1); col++) {
+      y  = GETJSAMPLE(*inptr0++);
+      cb = GETJSAMPLE(*inptr1++);
+      cr = GETJSAMPLE(*inptr2++);
+      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
+      g = range_limit[DITHER_565_G(y +
+                                   ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                     SCALEBITS)), d0)];
+      b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
+      d0 = DITHER_ROTATE(d0);
+      rgb = PACK_SHORT_565(r, g, b);
+
+      y  = GETJSAMPLE(*inptr0++);
+      cb = GETJSAMPLE(*inptr1++);
+      cr = GETJSAMPLE(*inptr2++);
+      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
+      g = range_limit[DITHER_565_G(y +
+                                   ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                     SCALEBITS)), d0)];
+      b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
+      d0 = DITHER_ROTATE(d0);
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols & 1) {
+      y  = GETJSAMPLE(*inptr0);
+      cb = GETJSAMPLE(*inptr1);
+      cr = GETJSAMPLE(*inptr2);
+      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
+      g = range_limit[DITHER_565_G(y +
+                                   ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                     SCALEBITS)), d0)];
+      b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16*)outptr = rgb;
+    }
+  }
+}
+
+
+METHODDEF(void)
+rgb_rgb565_convert (j_decompress_ptr cinfo,
+                    JSAMPIMAGE input_buf, JDIMENSION input_row,
+                    JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    INT32 rgb;
+    unsigned int r, g, b;
+
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+      r = GETJSAMPLE(*inptr0++);
+      g = GETJSAMPLE(*inptr1++);
+      b = GETJSAMPLE(*inptr2++);
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16*)outptr = rgb;
+      outptr += 2;
+      num_cols--;
+    }
+    for (col = 0; col < (num_cols >> 1); col++) {
+      r = GETJSAMPLE(*inptr0++);
+      g = GETJSAMPLE(*inptr1++);
+      b = GETJSAMPLE(*inptr2++);
+      rgb = PACK_SHORT_565(r, g, b);
+
+      r = GETJSAMPLE(*inptr0++);
+      g = GETJSAMPLE(*inptr1++);
+      b = GETJSAMPLE(*inptr2++);
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols & 1) {
+      r = GETJSAMPLE(*inptr0);
+      g = GETJSAMPLE(*inptr1);
+      b = GETJSAMPLE(*inptr2);
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16*)outptr = rgb;
+    }
+  }
+}
+
+
+METHODDEF(void)
+rgb_rgb565D_convert (j_decompress_ptr cinfo,
+                     JSAMPIMAGE input_buf, JDIMENSION input_row,
+                     JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  JDIMENSION num_cols = cinfo->output_width;
+  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    INT32 rgb;
+    unsigned int r, g, b;
+
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
+      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
+      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16*)outptr = rgb;
+      outptr += 2;
+      num_cols--;
+    }
+    for (col = 0; col < (num_cols >> 1); col++) {
+      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
+      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
+      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+      d0 = DITHER_ROTATE(d0);
+      rgb = PACK_SHORT_565(r, g, b);
+
+      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0++), d0)];
+      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1++), d0)];
+      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2++), d0)];
+      d0 = DITHER_ROTATE(d0);
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols & 1) {
+      r = range_limit[DITHER_565_R(GETJSAMPLE(*inptr0), d0)];
+      g = range_limit[DITHER_565_G(GETJSAMPLE(*inptr1), d0)];
+      b = range_limit[DITHER_565_B(GETJSAMPLE(*inptr2), d0)];
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16*)outptr = rgb;
+    }
+  }
+}
+
+
+METHODDEF(void)
+gray_rgb565_convert (j_decompress_ptr cinfo,
+                     JSAMPIMAGE input_buf, JDIMENSION input_row,
+                     JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    INT32 rgb;
+    unsigned int g;
+
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+      g = *inptr++;
+      rgb = PACK_SHORT_565(g, g, g);
+      *(INT16*)outptr = rgb;
+      outptr += 2;
+      num_cols--;
+    }
+    for (col = 0; col < (num_cols >> 1); col++) {
+      g = *inptr++;
+      rgb = PACK_SHORT_565(g, g, g);
+      g = *inptr++;
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(g, g, g));
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols & 1) {
+      g = *inptr;
+      rgb = PACK_SHORT_565(g, g, g);
+      *(INT16*)outptr = rgb;
+    }
+  }
+}
+
+
+METHODDEF(void)
+gray_rgb565D_convert (j_decompress_ptr cinfo,
+                      JSAMPIMAGE input_buf, JDIMENSION input_row,
+                      JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  JDIMENSION num_cols = cinfo->output_width;
+  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+
+  while (--num_rows >= 0) {
+    INT32 rgb;
+    unsigned int g;
+
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+      g = *inptr++;
+      g = range_limit[DITHER_565_R(g, d0)];
+      rgb = PACK_SHORT_565(g, g, g);
+      *(INT16*)outptr = rgb;
+      outptr += 2;
+      num_cols--;
+    }
+    for (col = 0; col < (num_cols >> 1); col++) {
+      g = *inptr++;
+      g = range_limit[DITHER_565_R(g, d0)];
+      rgb = PACK_SHORT_565(g, g, g);
+      d0 = DITHER_ROTATE(d0);
+
+      g = *inptr++;
+      g = range_limit[DITHER_565_R(g, d0)];
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(g, g, g));
+      d0 = DITHER_ROTATE(d0);
+
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols & 1) {
+      g = *inptr;
+      g = range_limit[DITHER_565_R(g, d0)];
+      rgb = PACK_SHORT_565(g, g, g);
+      *(INT16*)outptr = rgb;
+    }
+  }
+}
diff --git a/jdcolext.c b/jdcolext.c
new file mode 100644
index 0000000..f72cab0
--- /dev/null
+++ b/jdcolext.c
@@ -0,0 +1,142 @@
+/*
+ * jdcolext.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009, 2011, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains output colorspace conversion routines.
+ */
+
+
+/* This file is included by jdcolor.c */
+
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ *
+ * Note that we change from noninterleaved, one-plane-per-component format
+ * to interleaved-pixel format.  The output buffer is therefore three times
+ * as wide as the input buffer.
+ * A starting row offset is provided only for the input buffer.  The caller
+ * can easily adjust the passed output_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+INLINE
+LOCAL(void)
+ycc_rgb_convert_internal (j_decompress_ptr cinfo,
+                          JSAMPIMAGE input_buf, JDIMENSION input_row,
+                          JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
+      outptr[RGB_GREEN] = range_limit[y +
+                              ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                 SCALEBITS))];
+      outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
+      /* Set unused byte to 0xFF so it can be interpreted as an opaque */
+      /* alpha channel value */
+#ifdef RGB_ALPHA
+      outptr[RGB_ALPHA] = 0xFF;
+#endif
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Convert grayscale to RGB: just duplicate the graylevel three times.
+ * This is provided to support applications that don't want to cope
+ * with grayscale as a separate case.
+ */
+
+INLINE
+LOCAL(void)
+gray_rgb_convert_internal (j_decompress_ptr cinfo,
+                           JSAMPIMAGE input_buf, JDIMENSION input_row,
+                           JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
+      /* Set unused byte to 0xFF so it can be interpreted as an opaque */
+      /* alpha channel value */
+#ifdef RGB_ALPHA
+      outptr[RGB_ALPHA] = 0xFF;
+#endif
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Convert RGB to extended RGB: just swap the order of source pixels
+ */
+
+INLINE
+LOCAL(void)
+rgb_rgb_convert_internal (j_decompress_ptr cinfo,
+                          JSAMPIMAGE input_buf, JDIMENSION input_row,
+                          JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED] = inptr0[col];
+      outptr[RGB_GREEN] = inptr1[col];
+      outptr[RGB_BLUE] = inptr2[col];
+      /* Set unused byte to 0xFF so it can be interpreted as an opaque */
+      /* alpha channel value */
+#ifdef RGB_ALPHA
+      outptr[RGB_ALPHA] = 0xFF;
+#endif
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
diff --git a/jdcolor.c b/jdcolor.c
index 6c04dfe..6927e5e 100644
--- a/jdcolor.c
+++ b/jdcolor.c
@@ -1,8 +1,13 @@
 /*
  * jdcolor.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2011 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2009, 2011-2012, D. R. Commander.
+ * Copyright (C) 2013, Linaro Limited.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains output colorspace conversion routines.
@@ -11,6 +16,8 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsimd.h"
+#include "jconfigint.h"
 
 
 /* Private subobject */
@@ -19,24 +26,32 @@
   struct jpeg_color_deconverter pub; /* public fields */
 
   /* Private state for YCC->RGB conversion */
-  int * Cr_r_tab;		/* => table for Cr to R conversion */
-  int * Cb_b_tab;		/* => table for Cb to B conversion */
-  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
-  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+  int * Cr_r_tab;               /* => table for Cr to R conversion */
+  int * Cb_b_tab;               /* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;             /* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;             /* => table for Cb to G conversion */
+
+  /* Private state for RGB->Y conversion */
+  INT32 * rgb_y_tab;            /* => table for RGB to Y conversion */
 } my_color_deconverter;
 
 typedef my_color_deconverter * my_cconvert_ptr;
 
 
 /**************** YCbCr -> RGB conversion: most common case **************/
+/****************   RGB -> Y   conversion: less common case **************/
 
 /*
  * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
  * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
  * The conversion equations to be implemented are therefore
- *	R = Y                + 1.40200 * Cr
- *	G = Y - 0.34414 * Cb - 0.71414 * Cr
- *	B = Y + 1.77200 * Cb
+ *
+ *      R = Y                + 1.40200 * Cr
+ *      G = Y - 0.34414 * Cb - 0.71414 * Cr
+ *      B = Y + 1.77200 * Cb
+ *
+ *      Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
+ *
  * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
  * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
  *
@@ -57,9 +72,134 @@
  * together before rounding.
  */
 
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+#define SCALEBITS       16      /* speediest right-shift on some machines */
+#define ONE_HALF        ((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)          ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+/* We allocate one big table for RGB->Y conversion and divide it up into
+ * three parts, instead of doing three alloc_small requests.  This lets us
+ * use a single table base address, which can be held in a register in the
+ * inner loops on many machines (more than can hold all three addresses,
+ * anyway).
+ */
+
+#define R_Y_OFF         0                       /* offset to R => Y section */
+#define G_Y_OFF         (1*(MAXJSAMPLE+1))      /* offset to G => Y section */
+#define B_Y_OFF         (2*(MAXJSAMPLE+1))      /* etc. */
+#define TABLE_SIZE      (3*(MAXJSAMPLE+1))
+
+
+/* Include inline routines for colorspace extensions */
+
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+
+#define RGB_RED EXT_RGB_RED
+#define RGB_GREEN EXT_RGB_GREEN
+#define RGB_BLUE EXT_RGB_BLUE
+#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+#define ycc_rgb_convert_internal ycc_extrgb_convert_internal
+#define gray_rgb_convert_internal gray_extrgb_convert_internal
+#define rgb_rgb_convert_internal rgb_extrgb_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED EXT_RGBX_RED
+#define RGB_GREEN EXT_RGBX_GREEN
+#define RGB_BLUE EXT_RGBX_BLUE
+#define RGB_ALPHA 3
+#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+#define ycc_rgb_convert_internal ycc_extrgbx_convert_internal
+#define gray_rgb_convert_internal gray_extrgbx_convert_internal
+#define rgb_rgb_convert_internal rgb_extrgbx_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED EXT_BGR_RED
+#define RGB_GREEN EXT_BGR_GREEN
+#define RGB_BLUE EXT_BGR_BLUE
+#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+#define ycc_rgb_convert_internal ycc_extbgr_convert_internal
+#define gray_rgb_convert_internal gray_extbgr_convert_internal
+#define rgb_rgb_convert_internal rgb_extbgr_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED EXT_BGRX_RED
+#define RGB_GREEN EXT_BGRX_GREEN
+#define RGB_BLUE EXT_BGRX_BLUE
+#define RGB_ALPHA 3
+#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+#define ycc_rgb_convert_internal ycc_extbgrx_convert_internal
+#define gray_rgb_convert_internal gray_extbgrx_convert_internal
+#define rgb_rgb_convert_internal rgb_extbgrx_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED EXT_XBGR_RED
+#define RGB_GREEN EXT_XBGR_GREEN
+#define RGB_BLUE EXT_XBGR_BLUE
+#define RGB_ALPHA 0
+#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+#define ycc_rgb_convert_internal ycc_extxbgr_convert_internal
+#define gray_rgb_convert_internal gray_extxbgr_convert_internal
+#define rgb_rgb_convert_internal rgb_extxbgr_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED EXT_XRGB_RED
+#define RGB_GREEN EXT_XRGB_GREEN
+#define RGB_BLUE EXT_XRGB_BLUE
+#define RGB_ALPHA 0
+#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+#define ycc_rgb_convert_internal ycc_extxrgb_convert_internal
+#define gray_rgb_convert_internal gray_extxrgb_convert_internal
+#define rgb_rgb_convert_internal rgb_extxrgb_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+#undef rgb_rgb_convert_internal
 
 
 /*
@@ -76,26 +216,26 @@
 
   cconvert->Cr_r_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(int));
+                                (MAXJSAMPLE+1) * sizeof(int));
   cconvert->Cb_b_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(int));
+                                (MAXJSAMPLE+1) * sizeof(int));
   cconvert->Cr_g_tab = (INT32 *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(INT32));
+                                (MAXJSAMPLE+1) * sizeof(INT32));
   cconvert->Cb_g_tab = (INT32 *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(INT32));
+                                (MAXJSAMPLE+1) * sizeof(INT32));
 
   for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
     /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
     /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
     /* Cr=>R value is nearest int to 1.40200 * x */
     cconvert->Cr_r_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+                    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
     /* Cb=>B value is nearest int to 1.77200 * x */
     cconvert->Cb_b_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+                    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
     /* Cr=>G value is scaled-up -0.71414 * x */
     cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x;
     /* Cb=>G value is scaled-up -0.34414 * x */
@@ -107,33 +247,93 @@
 
 /*
  * Convert some rows of samples to the output colorspace.
- *
- * Note that we change from noninterleaved, one-plane-per-component format
- * to interleaved-pixel format.  The output buffer is therefore three times
- * as wide as the input buffer.
- * A starting row offset is provided only for the input buffer.  The caller
- * can easily adjust the passed output_buf value to accommodate any row
- * offset required on that side.
  */
 
 METHODDEF(void)
 ycc_rgb_convert (j_decompress_ptr cinfo,
-		 JSAMPIMAGE input_buf, JDIMENSION input_row,
-		 JSAMPARRAY output_buf, int num_rows)
+                 JSAMPIMAGE input_buf, JDIMENSION input_row,
+                 JSAMPARRAY output_buf, int num_rows)
+{
+  switch (cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      ycc_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                  num_rows);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      ycc_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_BGR:
+      ycc_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                  num_rows);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      ycc_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      ycc_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      ycc_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    default:
+      ycc_rgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                               num_rows);
+      break;
+  }
+}
+
+
+/**************** Cases other than YCbCr -> RGB **************/
+
+
+/*
+ * Initialize for RGB->grayscale colorspace conversion.
+ */
+
+LOCAL(void)
+build_rgb_y_table (j_decompress_ptr cinfo)
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int y, cb, cr;
+  INT32 * rgb_y_tab;
+  INT32 i;
+
+  /* Allocate and fill in the conversion tables. */
+  cconvert->rgb_y_tab = rgb_y_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                (TABLE_SIZE * sizeof(INT32)));
+
+  for (i = 0; i <= MAXJSAMPLE; i++) {
+    rgb_y_tab[i+R_Y_OFF] = FIX(0.29900) * i;
+    rgb_y_tab[i+G_Y_OFF] = FIX(0.58700) * i;
+    rgb_y_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF;
+  }
+}
+
+
+/*
+ * Convert RGB to grayscale.
+ */
+
+METHODDEF(void)
+rgb_gray_convert (j_decompress_ptr cinfo,
+                  JSAMPIMAGE input_buf, JDIMENSION input_row,
+                  JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_y_tab;
   register JSAMPROW outptr;
   register JSAMPROW inptr0, inptr1, inptr2;
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->output_width;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  register int * Crrtab = cconvert->Cr_r_tab;
-  register int * Cbbtab = cconvert->Cb_b_tab;
-  register INT32 * Crgtab = cconvert->Cr_g_tab;
-  register INT32 * Cbgtab = cconvert->Cb_g_tab;
-  SHIFT_TEMPS
 
   while (--num_rows >= 0) {
     inptr0 = input_buf[0][input_row];
@@ -142,24 +342,18 @@
     input_row++;
     outptr = *output_buf++;
     for (col = 0; col < num_cols; col++) {
-      y  = GETJSAMPLE(inptr0[col]);
-      cb = GETJSAMPLE(inptr1[col]);
-      cr = GETJSAMPLE(inptr2[col]);
-      /* Range-limiting is essential due to noise introduced by DCT losses. */
-      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
-      outptr[RGB_GREEN] = range_limit[y +
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS))];
-      outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
-      outptr += RGB_PIXELSIZE;
+      r = GETJSAMPLE(inptr0[col]);
+      g = GETJSAMPLE(inptr1[col]);
+      b = GETJSAMPLE(inptr2[col]);
+      /* Y */
+      outptr[col] = (JSAMPLE)
+                ((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+                 >> SCALEBITS);
     }
   }
 }
 
 
-/**************** Cases other than YCbCr -> RGB **************/
-
-
 /*
  * Color conversion for no colorspace change: just copy the data,
  * converting from separate-planes to interleaved representation.
@@ -167,8 +361,8 @@
 
 METHODDEF(void)
 null_convert (j_decompress_ptr cinfo,
-	      JSAMPIMAGE input_buf, JDIMENSION input_row,
-	      JSAMPARRAY output_buf, int num_rows)
+              JSAMPIMAGE input_buf, JDIMENSION input_row,
+              JSAMPARRAY output_buf, int num_rows)
 {
   register JSAMPROW inptr, outptr;
   register JDIMENSION count;
@@ -181,8 +375,8 @@
       inptr = input_buf[ci][input_row];
       outptr = output_buf[0] + ci;
       for (count = num_cols; count > 0; count--) {
-	*outptr = *inptr++;	/* needn't bother with GETJSAMPLE() here */
-	outptr += num_components;
+        *outptr = *inptr++;     /* needn't bother with GETJSAMPLE() here */
+        outptr += num_components;
       }
     }
     input_row++;
@@ -199,37 +393,102 @@
 
 METHODDEF(void)
 grayscale_convert (j_decompress_ptr cinfo,
-		   JSAMPIMAGE input_buf, JDIMENSION input_row,
-		   JSAMPARRAY output_buf, int num_rows)
+                   JSAMPIMAGE input_buf, JDIMENSION input_row,
+                   JSAMPARRAY output_buf, int num_rows)
 {
   jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0,
-		    num_rows, cinfo->output_width);
+                    num_rows, cinfo->output_width);
 }
 
 
 /*
- * Convert grayscale to RGB: just duplicate the graylevel three times.
- * This is provided to support applications that don't want to cope
- * with grayscale as a separate case.
+ * Convert grayscale to RGB
  */
 
 METHODDEF(void)
 gray_rgb_convert (j_decompress_ptr cinfo,
-		  JSAMPIMAGE input_buf, JDIMENSION input_row,
-		  JSAMPARRAY output_buf, int num_rows)
+                  JSAMPIMAGE input_buf, JDIMENSION input_row,
+                  JSAMPARRAY output_buf, int num_rows)
 {
-  register JSAMPROW inptr, outptr;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
+  switch (cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      gray_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      gray_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                    num_rows);
+      break;
+    case JCS_EXT_BGR:
+      gray_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      gray_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                    num_rows);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      gray_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                    num_rows);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      gray_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                    num_rows);
+      break;
+    default:
+      gray_rgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                num_rows);
+      break;
+  }
+}
 
-  while (--num_rows >= 0) {
-    inptr = input_buf[0][input_row++];
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      /* We can dispense with GETJSAMPLE() here */
-      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
-      outptr += RGB_PIXELSIZE;
-    }
+
+/*
+ * Convert plain RGB to extended RGB
+ */
+
+METHODDEF(void)
+rgb_rgb_convert (j_decompress_ptr cinfo,
+                  JSAMPIMAGE input_buf, JDIMENSION input_row,
+                  JSAMPARRAY output_buf, int num_rows)
+{
+  switch (cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      rgb_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                  num_rows);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      rgb_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_BGR:
+      rgb_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                  num_rows);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      rgb_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      rgb_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      rgb_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    default:
+      rgb_rgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                               num_rows);
+      break;
   }
 }
 
@@ -243,8 +502,8 @@
 
 METHODDEF(void)
 ycck_cmyk_convert (j_decompress_ptr cinfo,
-		   JSAMPIMAGE input_buf, JDIMENSION input_row,
-		   JSAMPARRAY output_buf, int num_rows)
+                   JSAMPIMAGE input_buf, JDIMENSION input_row,
+                   JSAMPARRAY output_buf, int num_rows)
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   register int y, cb, cr;
@@ -272,19 +531,22 @@
       cb = GETJSAMPLE(inptr1[col]);
       cr = GETJSAMPLE(inptr2[col]);
       /* Range-limiting is essential due to noise introduced by DCT losses. */
-      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];	/* red */
-      outptr[1] = range_limit[MAXJSAMPLE - (y +			/* green */
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS)))];
-      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];	/* blue */
+      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];   /* red */
+      outptr[1] = range_limit[MAXJSAMPLE - (y +                 /* green */
+                              ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                 SCALEBITS)))];
+      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];   /* blue */
       /* K passes through unchanged */
-      outptr[3] = inptr3[col];	/* don't need GETJSAMPLE here */
+      outptr[3] = inptr3[col];  /* don't need GETJSAMPLE here */
       outptr += 4;
     }
   }
 }
 
 
+#include "jdcol565.c"
+
+
 /*
  * Empty method for start_pass.
  */
@@ -308,7 +570,7 @@
 
   cconvert = (my_cconvert_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_color_deconverter));
+                                sizeof(my_color_deconverter));
   cinfo->cconvert = (struct jpeg_color_deconverter *) cconvert;
   cconvert->pub.start_pass = start_pass_dcolor;
 
@@ -331,7 +593,7 @@
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
     break;
 
-  default:			/* JCS_UNKNOWN can be anything */
+  default:                      /* JCS_UNKNOWN can be anything */
     if (cinfo->num_components < 1)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
     break;
@@ -346,28 +608,77 @@
   case JCS_GRAYSCALE:
     cinfo->out_color_components = 1;
     if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
-	cinfo->jpeg_color_space == JCS_YCbCr) {
+        cinfo->jpeg_color_space == JCS_YCbCr) {
       cconvert->pub.color_convert = grayscale_convert;
       /* For color->grayscale conversion, only the Y (0) component is needed */
       for (ci = 1; ci < cinfo->num_components; ci++)
-	cinfo->comp_info[ci].component_needed = FALSE;
+        cinfo->comp_info[ci].component_needed = FALSE;
+    } else if (cinfo->jpeg_color_space == JCS_RGB) {
+      cconvert->pub.color_convert = rgb_gray_convert;
+      build_rgb_y_table(cinfo);
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
   case JCS_RGB:
-    cinfo->out_color_components = RGB_PIXELSIZE;
+  case JCS_EXT_RGB:
+  case JCS_EXT_RGBX:
+  case JCS_EXT_BGR:
+  case JCS_EXT_BGRX:
+  case JCS_EXT_XBGR:
+  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
+    cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
     if (cinfo->jpeg_color_space == JCS_YCbCr) {
-      cconvert->pub.color_convert = ycc_rgb_convert;
-      build_ycc_rgb_table(cinfo);
+      if (jsimd_can_ycc_rgb())
+        cconvert->pub.color_convert = jsimd_ycc_rgb_convert;
+      else {
+        cconvert->pub.color_convert = ycc_rgb_convert;
+        build_ycc_rgb_table(cinfo);
+      }
     } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
       cconvert->pub.color_convert = gray_rgb_convert;
-    } else if (cinfo->jpeg_color_space == JCS_RGB && RGB_PIXELSIZE == 3) {
-      cconvert->pub.color_convert = null_convert;
+    } else if (cinfo->jpeg_color_space == JCS_RGB) {
+      if (rgb_red[cinfo->out_color_space] == 0 &&
+          rgb_green[cinfo->out_color_space] == 1 &&
+          rgb_blue[cinfo->out_color_space] == 2 &&
+          rgb_pixelsize[cinfo->out_color_space] == 3)
+        cconvert->pub.color_convert = null_convert;
+      else
+        cconvert->pub.color_convert = rgb_rgb_convert;
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
 
+  case JCS_RGB565:
+    cinfo->out_color_components = 3;
+    if (cinfo->dither_mode == JDITHER_NONE) {
+      if (cinfo->jpeg_color_space == JCS_YCbCr) {
+        cconvert->pub.color_convert = ycc_rgb565_convert;
+        build_ycc_rgb_table(cinfo);
+      } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+        cconvert->pub.color_convert = gray_rgb565_convert;
+      } else if (cinfo->jpeg_color_space == JCS_RGB) {
+        cconvert->pub.color_convert = rgb_rgb565_convert;
+      } else
+        ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    } else {
+      /* only ordered dithering is supported */
+      if (cinfo->jpeg_color_space == JCS_YCbCr) {
+        cconvert->pub.color_convert = ycc_rgb565D_convert;
+        build_ycc_rgb_table(cinfo);
+      } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+        cconvert->pub.color_convert = gray_rgb565D_convert;
+      } else if (cinfo->jpeg_color_space == JCS_RGB) {
+        cconvert->pub.color_convert = rgb_rgb565D_convert;
+      } else
+        ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
   case JCS_CMYK:
     cinfo->out_color_components = 4;
     if (cinfo->jpeg_color_space == JCS_YCCK) {
@@ -384,7 +695,7 @@
     if (cinfo->out_color_space == cinfo->jpeg_color_space) {
       cinfo->out_color_components = cinfo->num_components;
       cconvert->pub.color_convert = null_convert;
-    } else			/* unsupported non-null conversion */
+    } else                      /* unsupported non-null conversion */
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
     break;
   }
diff --git a/jdct.h b/jdct.h
index 04192a2..6f8b159 100644
--- a/jdct.h
+++ b/jdct.h
@@ -1,14 +1,16 @@
 /*
  * jdct.h
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This include file contains common declarations for the forward and
  * inverse DCT modules.  These declarations are private to the DCT managers
  * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
- * The individual DCT algorithms are kept in separate files to ease 
+ * The individual DCT algorithms are kept in separate files to ease
  * machine-dependent tuning (e.g., assembly coding).
  */
 
@@ -23,17 +25,24 @@
  * have a range of +-8K for 8-bit data, +-128K for 12-bit data.  This
  * convention improves accuracy in integer implementations and saves some
  * work in floating-point ones.
- * Quantization of the output coefficients is done by jcdctmgr.c.
+ * Quantization of the output coefficients is done by jcdctmgr.c. This
+ * step requires an unsigned type and also one with twice the bits.
  */
 
 #if BITS_IN_JSAMPLE == 8
-typedef int DCTELEM;		/* 16 or 32 bits is fine */
+#ifndef WITH_SIMD
+typedef int DCTELEM;            /* 16 or 32 bits is fine */
+typedef unsigned int UDCTELEM;
+typedef unsigned long long UDCTELEM2;
 #else
-typedef INT32 DCTELEM;		/* must have 32 bits */
+typedef short DCTELEM;  /* prefer 16 bit with SIMD for parellelism */
+typedef unsigned short UDCTELEM;
+typedef unsigned int UDCTELEM2;
 #endif
-
-typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
-typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
+#else
+typedef INT32 DCTELEM;          /* must have 32 bits */
+typedef unsigned long long UDCTELEM2;
+#endif
 
 
 /*
@@ -56,10 +65,10 @@
 typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */
 #if BITS_IN_JSAMPLE == 8
 typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
-#define IFAST_SCALE_BITS  2	/* fractional bits in scale factors */
+#define IFAST_SCALE_BITS  2     /* fractional bits in scale factors */
 #else
-typedef INT32 IFAST_MULT_TYPE;	/* need 32 bits for scaled quantizers */
-#define IFAST_SCALE_BITS  13	/* fractional bits in scale factors */
+typedef INT32 IFAST_MULT_TYPE;  /* need 32 bits for scaled quantizers */
+#define IFAST_SCALE_BITS  13    /* fractional bits in scale factors */
 #endif
 typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
 
@@ -78,44 +87,66 @@
 #define RANGE_MASK  (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
 
 
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_fdct_islow		jFDislow
-#define jpeg_fdct_ifast		jFDifast
-#define jpeg_fdct_float		jFDfloat
-#define jpeg_idct_islow		jRDislow
-#define jpeg_idct_ifast		jRDifast
-#define jpeg_idct_float		jRDfloat
-#define jpeg_idct_4x4		jRD4x4
-#define jpeg_idct_2x2		jRD2x2
-#define jpeg_idct_1x1		jRD1x1
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
 /* Extern declarations for the forward and inverse DCT routines. */
 
-EXTERN(void) jpeg_fdct_islow JPP((DCTELEM * data));
-EXTERN(void) jpeg_fdct_ifast JPP((DCTELEM * data));
-EXTERN(void) jpeg_fdct_float JPP((FAST_FLOAT * data));
+EXTERN(void) jpeg_fdct_islow (DCTELEM * data);
+EXTERN(void) jpeg_fdct_ifast (DCTELEM * data);
+EXTERN(void) jpeg_fdct_float (FAST_FLOAT * data);
 
 EXTERN(void) jpeg_idct_islow
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
 EXTERN(void) jpeg_idct_ifast
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
 EXTERN(void) jpeg_idct_float
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jpeg_idct_7x7
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jpeg_idct_6x6
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jpeg_idct_5x5
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
 EXTERN(void) jpeg_idct_4x4
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jpeg_idct_3x3
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
 EXTERN(void) jpeg_idct_2x2
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
 EXTERN(void) jpeg_idct_1x1
-    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jpeg_idct_9x9
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jpeg_idct_10x10
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jpeg_idct_11x11
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jpeg_idct_12x12
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jpeg_idct_13x13
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jpeg_idct_14x14
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jpeg_idct_15x15
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jpeg_idct_16x16
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col);
 
 
 /*
@@ -128,7 +159,7 @@
  * and may differ from one module to the next.
  */
 
-#define ONE	((INT32) 1)
+#define ONE     ((INT32) 1)
 #define CONST_SCALE (ONE << CONST_BITS)
 
 /* Convert a positive real constant to an integer scaled by CONST_SCALE.
@@ -136,7 +167,7 @@
  * thus causing a lot of useless floating-point operations at run time.
  */
 
-#define FIX(x)	((INT32) ((x) * CONST_SCALE + 0.5))
+#define FIX(x)  ((INT32) ((x) * CONST_SCALE + 0.5))
 
 /* Descale and correctly round an INT32 value that's scaled by N bits.
  * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
@@ -154,23 +185,23 @@
  * correct combination of casts.
  */
 
-#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#ifdef SHORTxSHORT_32           /* may work if 'int' is 32 bits */
 #define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
 #endif
-#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
+#ifdef SHORTxLCONST_32          /* known to work with Microsoft C 6.0 */
 #define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
 #endif
 
-#ifndef MULTIPLY16C16		/* default definition */
+#ifndef MULTIPLY16C16           /* default definition */
 #define MULTIPLY16C16(var,const)  ((var) * (const))
 #endif
 
 /* Same except both inputs are variables. */
 
-#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#ifdef SHORTxSHORT_32           /* may work if 'int' is 32 bits */
 #define MULTIPLY16V16(var1,var2)  (((INT16) (var1)) * ((INT16) (var2)))
 #endif
 
-#ifndef MULTIPLY16V16		/* default definition */
+#ifndef MULTIPLY16V16           /* default definition */
 #define MULTIPLY16V16(var1,var2)  ((var1) * (var2))
 #endif
diff --git a/jddctmgr.c b/jddctmgr.c
index bbf8d0e..40e68f1 100644
--- a/jddctmgr.c
+++ b/jddctmgr.c
@@ -1,8 +1,13 @@
 /*
  * jddctmgr.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2002-2010 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2010, D. R. Commander.
+ * Copyright (C) 2013, MIPS Technologies, Inc., California
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains the inverse-DCT management logic.
@@ -18,7 +23,9 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
+#include "jsimddct.h"
+#include "jpegcomp.h"
 
 
 /*
@@ -41,7 +48,7 @@
 /* Private subobject for this module */
 
 typedef struct {
-  struct jpeg_inverse_dct pub;	/* public fields */
+  struct jpeg_inverse_dct pub;  /* public fields */
 
   /* This array contains the IDCT method code that each multiplier table
    * is currently set up for, or -1 if it's not yet set up.
@@ -98,48 +105,121 @@
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
     /* Select the proper IDCT routine for this component's scaling */
-    switch (compptr->DCT_scaled_size) {
+    switch (compptr->_DCT_scaled_size) {
 #ifdef IDCT_SCALING_SUPPORTED
     case 1:
       method_ptr = jpeg_idct_1x1;
-      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      method = JDCT_ISLOW;      /* jidctred uses islow-style table */
       break;
     case 2:
-      method_ptr = jpeg_idct_2x2;
-      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      if (jsimd_can_idct_2x2())
+        method_ptr = jsimd_idct_2x2;
+      else
+        method_ptr = jpeg_idct_2x2;
+      method = JDCT_ISLOW;      /* jidctred uses islow-style table */
+      break;
+    case 3:
+      method_ptr = jpeg_idct_3x3;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
     case 4:
-      method_ptr = jpeg_idct_4x4;
-      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      if (jsimd_can_idct_4x4())
+        method_ptr = jsimd_idct_4x4;
+      else
+        method_ptr = jpeg_idct_4x4;
+      method = JDCT_ISLOW;      /* jidctred uses islow-style table */
+      break;
+    case 5:
+      method_ptr = jpeg_idct_5x5;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 6:
+#if defined(__mips__)
+      if (jsimd_can_idct_6x6())
+        method_ptr = jsimd_idct_6x6;
+      else
+#endif
+      method_ptr = jpeg_idct_6x6;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 7:
+      method_ptr = jpeg_idct_7x7;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
       break;
 #endif
     case DCTSIZE:
       switch (cinfo->dct_method) {
 #ifdef DCT_ISLOW_SUPPORTED
       case JDCT_ISLOW:
-	method_ptr = jpeg_idct_islow;
-	method = JDCT_ISLOW;
-	break;
+        if (jsimd_can_idct_islow())
+          method_ptr = jsimd_idct_islow;
+        else
+          method_ptr = jpeg_idct_islow;
+        method = JDCT_ISLOW;
+        break;
 #endif
 #ifdef DCT_IFAST_SUPPORTED
       case JDCT_IFAST:
-	method_ptr = jpeg_idct_ifast;
-	method = JDCT_IFAST;
-	break;
+        if (jsimd_can_idct_ifast())
+          method_ptr = jsimd_idct_ifast;
+        else
+          method_ptr = jpeg_idct_ifast;
+        method = JDCT_IFAST;
+        break;
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
       case JDCT_FLOAT:
-	method_ptr = jpeg_idct_float;
-	method = JDCT_FLOAT;
-	break;
+        if (jsimd_can_idct_float())
+          method_ptr = jsimd_idct_float;
+        else
+          method_ptr = jpeg_idct_float;
+        method = JDCT_FLOAT;
+        break;
 #endif
       default:
-	ERREXIT(cinfo, JERR_NOT_COMPILED);
-	break;
+        ERREXIT(cinfo, JERR_NOT_COMPILED);
+        break;
       }
       break;
+    case 9:
+      method_ptr = jpeg_idct_9x9;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 10:
+      method_ptr = jpeg_idct_10x10;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 11:
+      method_ptr = jpeg_idct_11x11;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 12:
+#if defined(__mips__)
+      if (jsimd_can_idct_12x12())
+        method_ptr = jsimd_idct_12x12;
+      else
+#endif
+      method_ptr = jpeg_idct_12x12;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 13:
+      method_ptr = jpeg_idct_13x13;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 14:
+      method_ptr = jpeg_idct_14x14;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 15:
+      method_ptr = jpeg_idct_15x15;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 16:
+      method_ptr = jpeg_idct_16x16;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
     default:
-      ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->DCT_scaled_size);
+      ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->_DCT_scaled_size);
       break;
     }
     idct->pub.inverse_DCT[ci] = method_ptr;
@@ -153,81 +233,81 @@
     if (! compptr->component_needed || idct->cur_method[ci] == method)
       continue;
     qtbl = compptr->quant_table;
-    if (qtbl == NULL)		/* happens if no data yet for component */
+    if (qtbl == NULL)           /* happens if no data yet for component */
       continue;
     idct->cur_method[ci] = method;
     switch (method) {
 #ifdef PROVIDE_ISLOW_TABLES
     case JDCT_ISLOW:
       {
-	/* For LL&M IDCT method, multipliers are equal to raw quantization
-	 * coefficients, but are stored as ints to ensure access efficiency.
-	 */
-	ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
-	for (i = 0; i < DCTSIZE2; i++) {
-	  ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
-	}
+        /* For LL&M IDCT method, multipliers are equal to raw quantization
+         * coefficients, but are stored as ints to ensure access efficiency.
+         */
+        ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
+        for (i = 0; i < DCTSIZE2; i++) {
+          ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
+        }
       }
       break;
 #endif
 #ifdef DCT_IFAST_SUPPORTED
     case JDCT_IFAST:
       {
-	/* For AA&N IDCT method, multipliers are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 * For integer operation, the multiplier table is to be scaled by
-	 * IFAST_SCALE_BITS.
-	 */
-	IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
+        /* For AA&N IDCT method, multipliers are equal to quantization
+         * coefficients scaled by scalefactor[row]*scalefactor[col], where
+         *   scalefactor[0] = 1
+         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+         * For integer operation, the multiplier table is to be scaled by
+         * IFAST_SCALE_BITS.
+         */
+        IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
 #define CONST_BITS 14
-	static const INT16 aanscales[DCTSIZE2] = {
-	  /* precomputed values scaled up by 14 bits */
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
-	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
-	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
-	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
-	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
-	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
-	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
-	};
-	SHIFT_TEMPS
+        static const INT16 aanscales[DCTSIZE2] = {
+          /* precomputed values scaled up by 14 bits */
+          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+        };
+        SHIFT_TEMPS
 
-	for (i = 0; i < DCTSIZE2; i++) {
-	  ifmtbl[i] = (IFAST_MULT_TYPE)
-	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
-				  (INT32) aanscales[i]),
-		    CONST_BITS-IFAST_SCALE_BITS);
-	}
+        for (i = 0; i < DCTSIZE2; i++) {
+          ifmtbl[i] = (IFAST_MULT_TYPE)
+            DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+                                  (INT32) aanscales[i]),
+                    CONST_BITS-IFAST_SCALE_BITS);
+        }
       }
       break;
 #endif
 #ifdef DCT_FLOAT_SUPPORTED
     case JDCT_FLOAT:
       {
-	/* For float AA&N IDCT method, multipliers are equal to quantization
-	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
-	 *   scalefactor[0] = 1
-	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
-	 */
-	FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
-	int row, col;
-	static const double aanscalefactor[DCTSIZE] = {
-	  1.0, 1.387039845, 1.306562965, 1.175875602,
-	  1.0, 0.785694958, 0.541196100, 0.275899379
-	};
+        /* For float AA&N IDCT method, multipliers are equal to quantization
+         * coefficients scaled by scalefactor[row]*scalefactor[col], where
+         *   scalefactor[0] = 1
+         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+         */
+        FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
+        int row, col;
+        static const double aanscalefactor[DCTSIZE] = {
+          1.0, 1.387039845, 1.306562965, 1.175875602,
+          1.0, 0.785694958, 0.541196100, 0.275899379
+        };
 
-	i = 0;
-	for (row = 0; row < DCTSIZE; row++) {
-	  for (col = 0; col < DCTSIZE; col++) {
-	    fmtbl[i] = (FLOAT_MULT_TYPE)
-	      ((double) qtbl->quantval[i] *
-	       aanscalefactor[row] * aanscalefactor[col]);
-	    i++;
-	  }
-	}
+        i = 0;
+        for (row = 0; row < DCTSIZE; row++) {
+          for (col = 0; col < DCTSIZE; col++) {
+            fmtbl[i] = (FLOAT_MULT_TYPE)
+              ((double) qtbl->quantval[i] *
+               aanscalefactor[row] * aanscalefactor[col]);
+            i++;
+          }
+        }
       }
       break;
 #endif
@@ -252,7 +332,7 @@
 
   idct = (my_idct_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_idct_controller));
+                                sizeof(my_idct_controller));
   cinfo->idct = (struct jpeg_inverse_dct *) idct;
   idct->pub.start_pass = start_pass;
 
@@ -261,8 +341,8 @@
     /* Allocate and pre-zero a multiplier table for each component */
     compptr->dct_table =
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(multiplier_table));
-    MEMZERO(compptr->dct_table, SIZEOF(multiplier_table));
+                                  sizeof(multiplier_table));
+    MEMZERO(compptr->dct_table, sizeof(multiplier_table));
     /* Mark multiplier table not yet set up for any method */
     idct->cur_method[ci] = -1;
   }
diff --git a/jdhuff.c b/jdhuff.c
index b5ba39f..7dc1328 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -1,8 +1,10 @@
 /*
  * jdhuff.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains Huffman entropy decoding routines.
@@ -17,7 +19,9 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdhuff.h"		/* Declarations shared with jdphuff.c */
+#include "jdhuff.h"             /* Declarations shared with jdphuff.c */
+#include "jpegcomp.h"
+#include "jstdhuff.c"
 
 
 /*
@@ -41,10 +45,10 @@
 #else
 #if MAX_COMPS_IN_SCAN == 4
 #define ASSIGN_STATE(dest,src)  \
-	((dest).last_dc_val[0] = (src).last_dc_val[0], \
-	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
-	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
-	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+        ((dest).last_dc_val[0] = (src).last_dc_val[0], \
+         (dest).last_dc_val[1] = (src).last_dc_val[1], \
+         (dest).last_dc_val[2] = (src).last_dc_val[2], \
+         (dest).last_dc_val[3] = (src).last_dc_val[3])
 #endif
 #endif
 
@@ -55,11 +59,11 @@
   /* These fields are loaded into local variables at start of each MCU.
    * In case of suspension, we exit WITHOUT updating them.
    */
-  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
-  savable_state saved;		/* Other state at start of MCU */
+  bitread_perm_state bitstate;  /* Bit buffer at start of MCU */
+  savable_state saved;          /* Other state at start of MCU */
 
   /* These fields are NOT loaded into local working state. */
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
 
   /* Pointers to derived tables (these workspaces have image lifespan) */
   d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
@@ -104,9 +108,9 @@
     /* Compute derived values for Huffman tables */
     /* We may do this more than once for a table, but it's not expensive */
     jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl,
-			    & entropy->dc_derived_tbls[dctbl]);
+                            & entropy->dc_derived_tbls[dctbl]);
     jpeg_make_d_derived_tbl(cinfo, FALSE, actbl,
-			    & entropy->ac_derived_tbls[actbl]);
+                            & entropy->ac_derived_tbls[actbl]);
     /* Initialize DC predictions to 0 */
     entropy->saved.last_dc_val[ci] = 0;
   }
@@ -122,7 +126,7 @@
     if (compptr->component_needed) {
       entropy->dc_needed[blkn] = TRUE;
       /* we don't need the ACs if producing a 1/8th-size image */
-      entropy->ac_needed[blkn] = (compptr->DCT_scaled_size > 1);
+      entropy->ac_needed[blkn] = (compptr->_DCT_scaled_size > 1);
     } else {
       entropy->dc_needed[blkn] = entropy->ac_needed[blkn] = FALSE;
     }
@@ -147,7 +151,7 @@
 
 GLOBAL(void)
 jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
-			 d_derived_tbl ** pdtbl)
+                         d_derived_tbl ** pdtbl)
 {
   JHUFF_TBL *htbl;
   d_derived_tbl *dtbl;
@@ -173,26 +177,26 @@
   if (*pdtbl == NULL)
     *pdtbl = (d_derived_tbl *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(d_derived_tbl));
+                                  sizeof(d_derived_tbl));
   dtbl = *pdtbl;
-  dtbl->pub = htbl;		/* fill in back link */
-  
+  dtbl->pub = htbl;             /* fill in back link */
+
   /* Figure C.1: make table of Huffman code length for each symbol */
 
   p = 0;
   for (l = 1; l <= 16; l++) {
     i = (int) htbl->bits[l];
-    if (i < 0 || p + i > 256)	/* protect against table overrun */
+    if (i < 0 || p + i > 256)   /* protect against table overrun */
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     while (i--)
       huffsize[p++] = (char) l;
   }
   huffsize[p] = 0;
   numsymbols = p;
-  
+
   /* Figure C.2: generate the codes themselves */
   /* We also validate that the counts represent a legal Huffman code tree. */
-  
+
   code = 0;
   si = huffsize[0];
   p = 0;
@@ -222,9 +226,10 @@
       p += htbl->bits[l];
       dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
     } else {
-      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
+      dtbl->maxcode[l] = -1;    /* -1 if no codes of this length */
     }
   }
+  dtbl->valoffset[17] = 0;
   dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
 
   /* Compute lookahead tables to speed up decoding.
@@ -234,7 +239,8 @@
    * with that code.
    */
 
-  MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
+   for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++)
+     dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD;
 
   p = 0;
   for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
@@ -243,9 +249,8 @@
       /* Generate left-justified code followed by all possible bit sequences */
       lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
       for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
-	dtbl->look_nbits[lookbits] = l;
-	dtbl->look_sym[lookbits] = htbl->huffval[p];
-	lookbits++;
+        dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
+        lookbits++;
       }
     }
   }
@@ -260,7 +265,7 @@
     for (i = 0; i < numsymbols; i++) {
       int sym = htbl->huffval[i];
       if (sym < 0 || sym > 15)
-	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+        ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
     }
   }
 }
@@ -282,7 +287,7 @@
  */
 
 #ifdef SLOW_SHIFT_32
-#define MIN_GET_BITS  15	/* minimum allowable value */
+#define MIN_GET_BITS  15        /* minimum allowable value */
 #else
 #define MIN_GET_BITS  (BIT_BUF_SIZE-7)
 #endif
@@ -290,8 +295,8 @@
 
 GLOBAL(boolean)
 jpeg_fill_bit_buffer (bitread_working_state * state,
-		      register bit_buf_type get_buffer, register int bits_left,
-		      int nbits)
+                      register bit_buf_type get_buffer, register int bits_left,
+                      int nbits)
 /* Load up the bit buffer to a depth of at least nbits */
 {
   /* Copy heavily used state fields into locals (hopefully registers) */
@@ -303,54 +308,54 @@
   /* (It is assumed that no request will be for more than that many bits.) */
   /* We fail to do so only if we hit a marker or are forced to suspend. */
 
-  if (cinfo->unread_marker == 0) {	/* cannot advance past a marker */
+  if (cinfo->unread_marker == 0) {      /* cannot advance past a marker */
     while (bits_left < MIN_GET_BITS) {
       register int c;
 
       /* Attempt to read a byte */
       if (bytes_in_buffer == 0) {
-	if (! (*cinfo->src->fill_input_buffer) (cinfo))
-	  return FALSE;
-	next_input_byte = cinfo->src->next_input_byte;
-	bytes_in_buffer = cinfo->src->bytes_in_buffer;
+        if (! (*cinfo->src->fill_input_buffer) (cinfo))
+          return FALSE;
+        next_input_byte = cinfo->src->next_input_byte;
+        bytes_in_buffer = cinfo->src->bytes_in_buffer;
       }
       bytes_in_buffer--;
       c = GETJOCTET(*next_input_byte++);
 
       /* If it's 0xFF, check and discard stuffed zero byte */
       if (c == 0xFF) {
-	/* Loop here to discard any padding FF's on terminating marker,
-	 * so that we can save a valid unread_marker value.  NOTE: we will
-	 * accept multiple FF's followed by a 0 as meaning a single FF data
-	 * byte.  This data pattern is not valid according to the standard.
-	 */
-	do {
-	  if (bytes_in_buffer == 0) {
-	    if (! (*cinfo->src->fill_input_buffer) (cinfo))
-	      return FALSE;
-	    next_input_byte = cinfo->src->next_input_byte;
-	    bytes_in_buffer = cinfo->src->bytes_in_buffer;
-	  }
-	  bytes_in_buffer--;
-	  c = GETJOCTET(*next_input_byte++);
-	} while (c == 0xFF);
+        /* Loop here to discard any padding FF's on terminating marker,
+         * so that we can save a valid unread_marker value.  NOTE: we will
+         * accept multiple FF's followed by a 0 as meaning a single FF data
+         * byte.  This data pattern is not valid according to the standard.
+         */
+        do {
+          if (bytes_in_buffer == 0) {
+            if (! (*cinfo->src->fill_input_buffer) (cinfo))
+              return FALSE;
+            next_input_byte = cinfo->src->next_input_byte;
+            bytes_in_buffer = cinfo->src->bytes_in_buffer;
+          }
+          bytes_in_buffer--;
+          c = GETJOCTET(*next_input_byte++);
+        } while (c == 0xFF);
 
-	if (c == 0) {
-	  /* Found FF/00, which represents an FF data byte */
-	  c = 0xFF;
-	} else {
-	  /* Oops, it's actually a marker indicating end of compressed data.
-	   * Save the marker code for later use.
-	   * Fine point: it might appear that we should save the marker into
-	   * bitread working state, not straight into permanent state.  But
-	   * once we have hit a marker, we cannot need to suspend within the
-	   * current MCU, because we will read no more bytes from the data
-	   * source.  So it is OK to update permanent state right away.
-	   */
-	  cinfo->unread_marker = c;
-	  /* See if we need to insert some fake zero bits. */
-	  goto no_more_bytes;
-	}
+        if (c == 0) {
+          /* Found FF/00, which represents an FF data byte */
+          c = 0xFF;
+        } else {
+          /* Oops, it's actually a marker indicating end of compressed data.
+           * Save the marker code for later use.
+           * Fine point: it might appear that we should save the marker into
+           * bitread working state, not straight into permanent state.  But
+           * once we have hit a marker, we cannot need to suspend within the
+           * current MCU, because we will read no more bytes from the data
+           * source.  So it is OK to update permanent state right away.
+           */
+          cinfo->unread_marker = c;
+          /* See if we need to insert some fake zero bits. */
+          goto no_more_bytes;
+        }
       }
 
       /* OK, load c into get_buffer */
@@ -370,8 +375,8 @@
        * appears per data segment.
        */
       if (! cinfo->entropy->insufficient_data) {
-	WARNMS(cinfo, JWRN_HIT_MARKER);
-	cinfo->entropy->insufficient_data = TRUE;
+        WARNMS(cinfo, JWRN_HIT_MARKER);
+        cinfo->entropy->insufficient_data = TRUE;
       }
       /* Fill the buffer with zero bits */
       get_buffer <<= MIN_GET_BITS - bits_left;
@@ -389,6 +394,50 @@
 }
 
 
+/* Macro version of the above, which performs much better but does not
+   handle markers.  We have to hand off any blocks with markers to the
+   slower routines. */
+
+#define GET_BYTE \
+{ \
+  register int c0, c1; \
+  c0 = GETJOCTET(*buffer++); \
+  c1 = GETJOCTET(*buffer); \
+  /* Pre-execute most common case */ \
+  get_buffer = (get_buffer << 8) | c0; \
+  bits_left += 8; \
+  if (c0 == 0xFF) { \
+    /* Pre-execute case of FF/00, which represents an FF data byte */ \
+    buffer++; \
+    if (c1 != 0) { \
+      /* Oops, it's actually a marker indicating end of compressed data. */ \
+      cinfo->unread_marker = c1; \
+      /* Back out pre-execution and fill the buffer with zero bits */ \
+      buffer -= 2; \
+      get_buffer &= ~0xFF; \
+    } \
+  } \
+}
+
+#if __WORDSIZE == 64 || defined(_WIN64)
+
+/* Pre-fetch 48 bytes, because the holding register is 64-bit */
+#define FILL_BIT_BUFFER_FAST \
+  if (bits_left < 16) { \
+    GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE \
+  }
+
+#else
+
+/* Pre-fetch 16 bytes, because the holding register is 32-bit */
+#define FILL_BIT_BUFFER_FAST \
+  if (bits_left < 16) { \
+    GET_BYTE GET_BYTE \
+  }
+
+#endif
+
+
 /*
  * Out-of-line code for Huffman code decoding.
  * See jdhuff.h for info about usage.
@@ -396,8 +445,8 @@
 
 GLOBAL(int)
 jpeg_huff_decode (bitread_working_state * state,
-		  register bit_buf_type get_buffer, register int bits_left,
-		  d_derived_tbl * htbl, int min_bits)
+                  register bit_buf_type get_buffer, register int bits_left,
+                  d_derived_tbl * htbl, int min_bits)
 {
   register int l = min_bits;
   register INT32 code;
@@ -426,7 +475,7 @@
 
   if (l > 16) {
     WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
-    return 0;			/* fake a zero as the safest result */
+    return 0;                   /* fake a zero as the safest result */
   }
 
   return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ];
@@ -438,9 +487,10 @@
  * On some machines, a shift and add will be faster than a table lookup.
  */
 
+#define AVOID_TABLES
 #ifdef AVOID_TABLES
 
-#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
+#define HUFF_EXTEND(x,s)  ((x) + ((((x) - (1<<((s)-1))) >> 31) & (((-1)<<(s)) + 1)))
 
 #else
 
@@ -498,6 +548,187 @@
 }
 
 
+LOCAL(boolean)
+decode_mcu_slow (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  BITREAD_STATE_VARS;
+  int blkn;
+  savable_state state;
+  /* Outer loop handles each block in the MCU */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+  ASSIGN_STATE(state, entropy->saved);
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    JBLOCKROW block = MCU_data[blkn];
+    d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
+    d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
+    register int s, k, r;
+
+    /* Decode a single block's worth of coefficients */
+
+    /* Section F.2.2.1: decode the DC coefficient difference */
+    HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
+    if (s) {
+      CHECK_BIT_BUFFER(br_state, s, return FALSE);
+      r = GET_BITS(s);
+      s = HUFF_EXTEND(r, s);
+    }
+
+    if (entropy->dc_needed[blkn]) {
+      /* Convert DC difference to actual value, update last_dc_val */
+      int ci = cinfo->MCU_membership[blkn];
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
+      (*block)[0] = (JCOEF) s;
+    }
+
+    if (entropy->ac_needed[blkn]) {
+
+      /* Section F.2.2.2: decode the AC coefficients */
+      /* Since zeroes are skipped, output area must be cleared beforehand */
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
+
+        r = s >> 4;
+        s &= 15;
+
+        if (s) {
+          k += r;
+          CHECK_BIT_BUFFER(br_state, s, return FALSE);
+          r = GET_BITS(s);
+          s = HUFF_EXTEND(r, s);
+          /* Output coefficient in natural (dezigzagged) order.
+           * Note: the extra entries in jpeg_natural_order[] will save us
+           * if k >= DCTSIZE2, which could happen if the data is corrupted.
+           */
+          (*block)[jpeg_natural_order[k]] = (JCOEF) s;
+        } else {
+          if (r != 15)
+            break;
+          k += 15;
+        }
+      }
+
+    } else {
+
+      /* Section F.2.2.2: decode the AC coefficients */
+      /* In this path we just discard the values */
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
+
+        r = s >> 4;
+        s &= 15;
+
+        if (s) {
+          k += r;
+          CHECK_BIT_BUFFER(br_state, s, return FALSE);
+          DROP_BITS(s);
+        } else {
+          if (r != 15)
+            break;
+          k += 15;
+        }
+      }
+    }
+  }
+
+  /* Completed MCU, so update state */
+  BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+  ASSIGN_STATE(entropy->saved, state);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  BITREAD_STATE_VARS;
+  JOCTET *buffer;
+  int blkn;
+  savable_state state;
+  /* Outer loop handles each block in the MCU */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+  buffer = (JOCTET *) br_state.next_input_byte;
+  ASSIGN_STATE(state, entropy->saved);
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    JBLOCKROW block = MCU_data[blkn];
+    d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
+    d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
+    register int s, k, r, l;
+
+    HUFF_DECODE_FAST(s, l, dctbl);
+    if (s) {
+      FILL_BIT_BUFFER_FAST
+      r = GET_BITS(s);
+      s = HUFF_EXTEND(r, s);
+    }
+
+    if (entropy->dc_needed[blkn]) {
+      int ci = cinfo->MCU_membership[blkn];
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      (*block)[0] = (JCOEF) s;
+    }
+
+    if (entropy->ac_needed[blkn]) {
+
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE_FAST(s, l, actbl);
+        r = s >> 4;
+        s &= 15;
+
+        if (s) {
+          k += r;
+          FILL_BIT_BUFFER_FAST
+          r = GET_BITS(s);
+          s = HUFF_EXTEND(r, s);
+          (*block)[jpeg_natural_order[k]] = (JCOEF) s;
+        } else {
+          if (r != 15) break;
+          k += 15;
+        }
+      }
+
+    } else {
+
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE_FAST(s, l, actbl);
+        r = s >> 4;
+        s &= 15;
+
+        if (s) {
+          k += r;
+          FILL_BIT_BUFFER_FAST
+          DROP_BITS(s);
+        } else {
+          if (r != 15) break;
+          k += 15;
+        }
+      }
+    }
+  }
+
+  if (cinfo->unread_marker != 0) {
+    cinfo->unread_marker = 0;
+    return FALSE;
+  }
+
+  br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
+  br_state.next_input_byte = buffer;
+  BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+  ASSIGN_STATE(entropy->saved, state);
+  return TRUE;
+}
+
+
 /*
  * Decode and return one MCU's worth of Huffman-compressed coefficients.
  * The coefficients are reordered from zigzag order into natural array order,
@@ -513,111 +744,39 @@
  * this module, since we'll just re-assign them on the next call.)
  */
 
+#define BUFSIZE (DCTSIZE2 * 2)
+
 METHODDEF(boolean)
 decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int blkn;
-  BITREAD_STATE_VARS;
-  savable_state state;
+  int usefast = 1;
 
   /* Process restart marker if needed; may have to suspend */
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! process_restart(cinfo))
-	return FALSE;
+        return FALSE;
+    usefast = 0;
   }
 
+  if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU
+    || cinfo->unread_marker != 0)
+    usefast = 0;
+
   /* If we've run out of data, just leave the MCU set to zeroes.
    * This way, we return uniform gray for the remainder of the segment.
    */
   if (! entropy->pub.insufficient_data) {
 
-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
-
-    /* Outer loop handles each block in the MCU */
-
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      JBLOCKROW block = MCU_data[blkn];
-      d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
-      d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
-      register int s, k, r;
-
-      /* Decode a single block's worth of coefficients */
-
-      /* Section F.2.2.1: decode the DC coefficient difference */
-      HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
-      if (s) {
-	CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	r = GET_BITS(s);
-	s = HUFF_EXTEND(r, s);
-      }
-
-      if (entropy->dc_needed[blkn]) {
-	/* Convert DC difference to actual value, update last_dc_val */
-	int ci = cinfo->MCU_membership[blkn];
-	s += state.last_dc_val[ci];
-	state.last_dc_val[ci] = s;
-	/* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
-	(*block)[0] = (JCOEF) s;
-      }
-
-      if (entropy->ac_needed[blkn]) {
-
-	/* Section F.2.2.2: decode the AC coefficients */
-	/* Since zeroes are skipped, output area must be cleared beforehand */
-	for (k = 1; k < DCTSIZE2; k++) {
-	  HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
-      
-	  r = s >> 4;
-	  s &= 15;
-      
-	  if (s) {
-	    k += r;
-	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	    r = GET_BITS(s);
-	    s = HUFF_EXTEND(r, s);
-	    /* Output coefficient in natural (dezigzagged) order.
-	     * Note: the extra entries in jpeg_natural_order[] will save us
-	     * if k >= DCTSIZE2, which could happen if the data is corrupted.
-	     */
-	    (*block)[jpeg_natural_order[k]] = (JCOEF) s;
-	  } else {
-	    if (r != 15)
-	      break;
-	    k += 15;
-	  }
-	}
-
-      } else {
-
-	/* Section F.2.2.2: decode the AC coefficients */
-	/* In this path we just discard the values */
-	for (k = 1; k < DCTSIZE2; k++) {
-	  HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
-      
-	  r = s >> 4;
-	  s &= 15;
-      
-	  if (s) {
-	    k += r;
-	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	    DROP_BITS(s);
-	  } else {
-	    if (r != 15)
-	      break;
-	    k += 15;
-	  }
-	}
-
-      }
+    if (usefast) {
+      if (!decode_mcu_fast(cinfo, MCU_data)) goto use_slow;
+    }
+    else {
+      use_slow:
+      if (!decode_mcu_slow(cinfo, MCU_data)) return FALSE;
     }
 
-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
   }
 
   /* Account for restart interval (no-op if not using restarts) */
@@ -637,9 +796,15 @@
   huff_entropy_ptr entropy;
   int i;
 
+  /* Motion JPEG frames typically do not include the Huffman tables if they
+     are the default tables.  Thus, if the tables are not set by the time
+     the Huffman decoder is initialized (usually within the body of
+     jpeg_start_decompress()), we set them to default values. */
+  std_huff_tables((j_common_ptr) cinfo);
+
   entropy = (huff_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(huff_entropy_decoder));
+                                sizeof(huff_entropy_decoder));
   cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
   entropy->pub.start_pass = start_pass_huff_decoder;
   entropy->pub.decode_mcu = decode_mcu;
diff --git a/jdhuff.h b/jdhuff.h
index ae19b6c..f2805e4 100644
--- a/jdhuff.h
+++ b/jdhuff.h
@@ -1,8 +1,10 @@
 /*
  * jdhuff.h
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010-2011, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains declarations for Huffman entropy decoding routines
@@ -10,24 +12,16 @@
  * progressive decoder (jdphuff.c).  No other modules need to see these.
  */
 
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_make_d_derived_tbl	jMkDDerived
-#define jpeg_fill_bit_buffer	jFilBitBuf
-#define jpeg_huff_decode	jHufDecode
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
 
 /* Derived data constructed for each Huffman table */
 
-#define HUFF_LOOKAHEAD	8	/* # of bits of lookahead */
+#define HUFF_LOOKAHEAD  8       /* # of bits of lookahead */
 
 typedef struct {
   /* Basic tables: (element [0] of each array is unused) */
-  INT32 maxcode[18];		/* largest code of length k (-1 if none) */
+  INT32 maxcode[18];            /* largest code of length k (-1 if none) */
   /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
-  INT32 valoffset[17];		/* huffval[] offset for codes of length k */
+  INT32 valoffset[18];          /* huffval[] offset for codes of length k */
   /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
    * the smallest code of length k; so given a code of length k, the
    * corresponding symbol is huffval[code + valoffset[k]]
@@ -36,19 +30,23 @@
   /* Link to public Huffman table (needed only in jpeg_huff_decode) */
   JHUFF_TBL *pub;
 
-  /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
+  /* Lookahead table: indexed by the next HUFF_LOOKAHEAD bits of
    * the input data stream.  If the next Huffman code is no more
    * than HUFF_LOOKAHEAD bits long, we can obtain its length and
-   * the corresponding symbol directly from these tables.
+   * the corresponding symbol directly from this tables.
+   *
+   * The lower 8 bits of each table entry contain the number of
+   * bits in the corresponding Huffman code, or HUFF_LOOKAHEAD + 1
+   * if too long.  The next 8 bits of each entry contain the
+   * symbol.
    */
-  int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
-  UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
+  int lookup[1<<HUFF_LOOKAHEAD];
 } d_derived_tbl;
 
 /* Expand a Huffman table definition into the derived format */
 EXTERN(void) jpeg_make_d_derived_tbl
-	JPP((j_decompress_ptr cinfo, boolean isDC, int tblno,
-	     d_derived_tbl ** pdtbl));
+        (j_decompress_ptr cinfo, boolean isDC, int tblno,
+         d_derived_tbl ** pdtbl);
 
 
 /*
@@ -69,8 +67,17 @@
  * necessary.
  */
 
-typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
-#define BIT_BUF_SIZE  32	/* size of buffer in bits */
+#if __WORDSIZE == 64 || defined(_WIN64)
+
+typedef size_t bit_buf_type;    /* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  64                /* size of buffer in bits */
+
+#else
+
+typedef INT32 bit_buf_type;     /* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  32                /* size of buffer in bits */
+
+#endif
 
 /* If long is > 32 bits on your machine, and shifting/masking longs is
  * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
@@ -79,43 +86,43 @@
  * because not all machines measure sizeof in 8-bit bytes.
  */
 
-typedef struct {		/* Bitreading state saved across MCUs */
-  bit_buf_type get_buffer;	/* current bit-extraction buffer */
-  int bits_left;		/* # of unused bits in it */
+typedef struct {                /* Bitreading state saved across MCUs */
+  bit_buf_type get_buffer;      /* current bit-extraction buffer */
+  int bits_left;                /* # of unused bits in it */
 } bitread_perm_state;
 
-typedef struct {		/* Bitreading working state within an MCU */
+typedef struct {                /* Bitreading working state within an MCU */
   /* Current data source location */
   /* We need a copy, rather than munging the original, in case of suspension */
   const JOCTET * next_input_byte; /* => next byte to read from source */
-  size_t bytes_in_buffer;	/* # of bytes remaining in source buffer */
+  size_t bytes_in_buffer;       /* # of bytes remaining in source buffer */
   /* Bit input buffer --- note these values are kept in register variables,
    * not in this struct, inside the inner loops.
    */
-  bit_buf_type get_buffer;	/* current bit-extraction buffer */
-  int bits_left;		/* # of unused bits in it */
+  bit_buf_type get_buffer;      /* current bit-extraction buffer */
+  int bits_left;                /* # of unused bits in it */
   /* Pointer needed by jpeg_fill_bit_buffer. */
-  j_decompress_ptr cinfo;	/* back link to decompress master record */
+  j_decompress_ptr cinfo;       /* back link to decompress master record */
 } bitread_working_state;
 
 /* Macros to declare and load/save bitread local variables. */
 #define BITREAD_STATE_VARS  \
-	register bit_buf_type get_buffer;  \
-	register int bits_left;  \
-	bitread_working_state br_state
+        register bit_buf_type get_buffer;  \
+        register int bits_left;  \
+        bitread_working_state br_state
 
 #define BITREAD_LOAD_STATE(cinfop,permstate)  \
-	br_state.cinfo = cinfop; \
-	br_state.next_input_byte = cinfop->src->next_input_byte; \
-	br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
-	get_buffer = permstate.get_buffer; \
-	bits_left = permstate.bits_left;
+        br_state.cinfo = cinfop; \
+        br_state.next_input_byte = cinfop->src->next_input_byte; \
+        br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
+        get_buffer = permstate.get_buffer; \
+        bits_left = permstate.bits_left;
 
 #define BITREAD_SAVE_STATE(cinfop,permstate)  \
-	cinfop->src->next_input_byte = br_state.next_input_byte; \
-	cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
-	permstate.get_buffer = get_buffer; \
-	permstate.bits_left = bits_left
+        cinfop->src->next_input_byte = br_state.next_input_byte; \
+        cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
+        permstate.get_buffer = get_buffer; \
+        permstate.bits_left = bits_left
 
 /*
  * These macros provide the in-line portion of bit fetching.
@@ -123,37 +130,37 @@
  * before using GET_BITS, PEEK_BITS, or DROP_BITS.
  * The variables get_buffer and bits_left are assumed to be locals,
  * but the state struct might not be (jpeg_huff_decode needs this).
- *	CHECK_BIT_BUFFER(state,n,action);
- *		Ensure there are N bits in get_buffer; if suspend, take action.
+ *      CHECK_BIT_BUFFER(state,n,action);
+ *              Ensure there are N bits in get_buffer; if suspend, take action.
  *      val = GET_BITS(n);
- *		Fetch next N bits.
+ *              Fetch next N bits.
  *      val = PEEK_BITS(n);
- *		Fetch next N bits without removing them from the buffer.
- *	DROP_BITS(n);
- *		Discard next N bits.
+ *              Fetch next N bits without removing them from the buffer.
+ *      DROP_BITS(n);
+ *              Discard next N bits.
  * The value N should be a simple variable, not an expression, because it
  * is evaluated multiple times.
  */
 
 #define CHECK_BIT_BUFFER(state,nbits,action) \
-	{ if (bits_left < (nbits)) {  \
-	    if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
-	      { action; }  \
-	    get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
+        { if (bits_left < (nbits)) {  \
+            if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
+              { action; }  \
+            get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
 
 #define GET_BITS(nbits) \
-	(((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1))
+        (((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1))
 
 #define PEEK_BITS(nbits) \
-	(((int) (get_buffer >> (bits_left -  (nbits)))) & ((1<<(nbits))-1))
+        (((int) (get_buffer >> (bits_left -  (nbits)))) & ((1<<(nbits))-1))
 
 #define DROP_BITS(nbits) \
-	(bits_left -= (nbits))
+        (bits_left -= (nbits))
 
 /* Load up the bit buffer to a depth of at least nbits */
 EXTERN(boolean) jpeg_fill_bit_buffer
-	JPP((bitread_working_state * state, register bit_buf_type get_buffer,
-	     register int bits_left, int nbits));
+        (bitread_working_state * state, register bit_buf_type get_buffer,
+         register int bits_left, int nbits);
 
 
 /*
@@ -183,19 +190,38 @@
     } \
   } \
   look = PEEK_BITS(HUFF_LOOKAHEAD); \
-  if ((nb = htbl->look_nbits[look]) != 0) { \
+  if ((nb = (htbl->lookup[look] >> HUFF_LOOKAHEAD)) <= HUFF_LOOKAHEAD) { \
     DROP_BITS(nb); \
-    result = htbl->look_sym[look]; \
+    result = htbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1); \
   } else { \
-    nb = HUFF_LOOKAHEAD+1; \
 slowlabel: \
     if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
-	{ failaction; } \
+        { failaction; } \
     get_buffer = state.get_buffer; bits_left = state.bits_left; \
   } \
 }
 
+#define HUFF_DECODE_FAST(s,nb,htbl) \
+  FILL_BIT_BUFFER_FAST; \
+  s = PEEK_BITS(HUFF_LOOKAHEAD); \
+  s = htbl->lookup[s]; \
+  nb = s >> HUFF_LOOKAHEAD; \
+  /* Pre-execute the common case of nb <= HUFF_LOOKAHEAD */ \
+  DROP_BITS(nb); \
+  s = s & ((1 << HUFF_LOOKAHEAD) - 1); \
+  if (nb > HUFF_LOOKAHEAD) { \
+    /* Equivalent of jpeg_huff_decode() */ \
+    /* Don't use GET_BITS() here because we don't want to modify bits_left */ \
+    s = (get_buffer >> bits_left) & ((1 << (nb)) - 1); \
+    while (s > htbl->maxcode[nb]) { \
+      s <<= 1; \
+      s |= GET_BITS(1); \
+      nb++; \
+    } \
+    s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) & 0xFF ]; \
+  }
+
 /* Out-of-line case for Huffman code fetching */
 EXTERN(int) jpeg_huff_decode
-	JPP((bitread_working_state * state, register bit_buf_type get_buffer,
-	     register int bits_left, d_derived_tbl * htbl, int min_bits));
+        (bitread_working_state * state, register bit_buf_type get_buffer,
+         register int bits_left, d_derived_tbl * htbl, int min_bits);
diff --git a/jdinput.c b/jdinput.c
index 0c2ac8f..6f4ea7b 100644
--- a/jdinput.c
+++ b/jdinput.c
@@ -1,8 +1,10 @@
 /*
  * jdinput.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains input control logic for the JPEG decompressor.
@@ -14,6 +16,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 
 /* Private state */
@@ -21,14 +24,14 @@
 typedef struct {
   struct jpeg_input_controller pub; /* public fields */
 
-  boolean inheaders;		/* TRUE until first SOS is reached */
+  boolean inheaders;            /* TRUE until first SOS is reached */
 } my_input_controller;
 
 typedef my_input_controller * my_inputctl_ptr;
 
 
 /* Forward declarations */
-METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
+METHODDEF(int) consume_markers (j_decompress_ptr cinfo);
 
 
 /*
@@ -54,7 +57,7 @@
   /* Check that number of components won't exceed internal array sizes */
   if (cinfo->num_components > MAX_COMPONENTS)
     ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	     MAX_COMPONENTS);
+             MAX_COMPONENTS);
 
   /* Compute maximum sampling factors; check factor validity */
   cinfo->max_h_samp_factor = 1;
@@ -62,31 +65,45 @@
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
     if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
-	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+        compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
       ERREXIT(cinfo, JERR_BAD_SAMPLING);
     cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
-				   compptr->h_samp_factor);
+                                   compptr->h_samp_factor);
     cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
-				   compptr->v_samp_factor);
+                                   compptr->v_samp_factor);
   }
 
+#if JPEG_LIB_VERSION >=80
+    cinfo->block_size = DCTSIZE;
+    cinfo->natural_order = jpeg_natural_order;
+    cinfo->lim_Se = DCTSIZE2-1;
+#endif
+
   /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE.
    * In the full decompressor, this will be overridden by jdmaster.c;
    * but in the transcoder, jdmaster.c is not used, so we must do it here.
    */
+#if JPEG_LIB_VERSION >= 70
+  cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = DCTSIZE;
+#else
   cinfo->min_DCT_scaled_size = DCTSIZE;
+#endif
 
   /* Compute dimensions of components */
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
+#if JPEG_LIB_VERSION >= 70
+    compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = DCTSIZE;
+#else
     compptr->DCT_scaled_size = DCTSIZE;
+#endif
     /* Size in DCT blocks */
     compptr->width_in_blocks = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
-		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+                    (long) (cinfo->max_h_samp_factor * DCTSIZE));
     compptr->height_in_blocks = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
-		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+                    (long) (cinfo->max_v_samp_factor * DCTSIZE));
     /* downsampled_width and downsampled_height will also be overridden by
      * jdmaster.c if we are doing full decompression.  The transcoder library
      * doesn't use these values, but the calling application might.
@@ -94,10 +111,10 @@
     /* Size in samples */
     compptr->downsampled_width = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
-		    (long) cinfo->max_h_samp_factor);
+                    (long) cinfo->max_h_samp_factor);
     compptr->downsampled_height = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
-		    (long) cinfo->max_v_samp_factor);
+                    (long) cinfo->max_v_samp_factor);
     /* Mark component needed, until color conversion says otherwise */
     compptr->component_needed = TRUE;
     /* Mark no quantization table yet saved for component */
@@ -107,7 +124,7 @@
   /* Compute number of fully interleaved MCU rows. */
   cinfo->total_iMCU_rows = (JDIMENSION)
     jdiv_round_up((long) cinfo->image_height,
-		  (long) (cinfo->max_v_samp_factor*DCTSIZE));
+                  (long) (cinfo->max_v_samp_factor*DCTSIZE));
 
   /* Decide whether file contains multiple scans */
   if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
@@ -124,21 +141,21 @@
 {
   int ci, mcublks, tmp;
   jpeg_component_info *compptr;
-  
+
   if (cinfo->comps_in_scan == 1) {
-    
+
     /* Noninterleaved (single-component) scan */
     compptr = cinfo->cur_comp_info[0];
-    
+
     /* Overall image size in MCUs */
     cinfo->MCUs_per_row = compptr->width_in_blocks;
     cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
-    
+
     /* For noninterleaved scan, always one block per MCU */
     compptr->MCU_width = 1;
     compptr->MCU_height = 1;
     compptr->MCU_blocks = 1;
-    compptr->MCU_sample_width = compptr->DCT_scaled_size;
+    compptr->MCU_sample_width = compptr->_DCT_scaled_size;
     compptr->last_col_width = 1;
     /* For noninterleaved scans, it is convenient to define last_row_height
      * as the number of block rows present in the last iMCU row.
@@ -146,35 +163,35 @@
     tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
     if (tmp == 0) tmp = compptr->v_samp_factor;
     compptr->last_row_height = tmp;
-    
+
     /* Prepare array describing MCU composition */
     cinfo->blocks_in_MCU = 1;
     cinfo->MCU_membership[0] = 0;
-    
+
   } else {
-    
+
     /* Interleaved (multi-component) scan */
     if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
       ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
-	       MAX_COMPS_IN_SCAN);
-    
+               MAX_COMPS_IN_SCAN);
+
     /* Overall image size in MCUs */
     cinfo->MCUs_per_row = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_width,
-		    (long) (cinfo->max_h_samp_factor*DCTSIZE));
+                    (long) (cinfo->max_h_samp_factor*DCTSIZE));
     cinfo->MCU_rows_in_scan = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_height,
-		    (long) (cinfo->max_v_samp_factor*DCTSIZE));
-    
+                    (long) (cinfo->max_v_samp_factor*DCTSIZE));
+
     cinfo->blocks_in_MCU = 0;
-    
+
     for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
       compptr = cinfo->cur_comp_info[ci];
       /* Sampling factors give # of blocks of component in each MCU */
       compptr->MCU_width = compptr->h_samp_factor;
       compptr->MCU_height = compptr->v_samp_factor;
       compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
-      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_scaled_size;
+      compptr->MCU_sample_width = compptr->MCU_width * compptr->_DCT_scaled_size;
       /* Figure number of non-dummy blocks in last MCU column & row */
       tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
       if (tmp == 0) tmp = compptr->MCU_width;
@@ -185,12 +202,12 @@
       /* Prepare array describing MCU composition */
       mcublks = compptr->MCU_blocks;
       if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU)
-	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+        ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
       while (mcublks-- > 0) {
-	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+        cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
       }
     }
-    
+
   }
 }
 
@@ -231,13 +248,13 @@
     /* Make sure specified quantization table is present */
     qtblno = compptr->quant_tbl_no;
     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
-	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+        cinfo->quant_tbl_ptrs[qtblno] == NULL)
       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
     /* OK, save away the quantization table */
     qtbl = (JQUANT_TBL *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(JQUANT_TBL));
-    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL));
+                                  sizeof(JQUANT_TBL));
+    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], sizeof(JQUANT_TBL));
     compptr->quant_table = qtbl;
   }
 }
@@ -296,31 +313,31 @@
   val = (*cinfo->marker->read_markers) (cinfo);
 
   switch (val) {
-  case JPEG_REACHED_SOS:	/* Found SOS */
-    if (inputctl->inheaders) {	/* 1st SOS */
+  case JPEG_REACHED_SOS:        /* Found SOS */
+    if (inputctl->inheaders) {  /* 1st SOS */
       initial_setup(cinfo);
       inputctl->inheaders = FALSE;
       /* Note: start_input_pass must be called by jdmaster.c
        * before any more input can be consumed.  jdapimin.c is
        * responsible for enforcing this sequencing.
        */
-    } else {			/* 2nd or later SOS marker */
+    } else {                    /* 2nd or later SOS marker */
       if (! inputctl->pub.has_multiple_scans)
-	ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
+        ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
       start_input_pass(cinfo);
     }
     break;
-  case JPEG_REACHED_EOI:	/* Found EOI */
+  case JPEG_REACHED_EOI:        /* Found EOI */
     inputctl->pub.eoi_reached = TRUE;
-    if (inputctl->inheaders) {	/* Tables-only datastream, apparently */
+    if (inputctl->inheaders) {  /* Tables-only datastream, apparently */
       if (cinfo->marker->saw_SOF)
-	ERREXIT(cinfo, JERR_SOF_NO_SOS);
+        ERREXIT(cinfo, JERR_SOF_NO_SOS);
     } else {
       /* Prevent infinite loop in coef ctlr's decompress_data routine
        * if user set output_scan_number larger than number of scans.
        */
       if (cinfo->output_scan_number > cinfo->input_scan_number)
-	cinfo->output_scan_number = cinfo->input_scan_number;
+        cinfo->output_scan_number = cinfo->input_scan_number;
     }
     break;
   case JPEG_SUSPENDED:
@@ -365,7 +382,7 @@
   /* Create subobject in permanent pool */
   inputctl = (my_inputctl_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				SIZEOF(my_input_controller));
+                                sizeof(my_input_controller));
   cinfo->inputctl = (struct jpeg_input_controller *) inputctl;
   /* Initialize method pointers */
   inputctl->pub.consume_input = consume_markers;
diff --git a/jdmainct.c b/jdmainct.c
index 13c956f..7f7bd33 100644
--- a/jdmainct.c
+++ b/jdmainct.c
@@ -1,8 +1,10 @@
 /*
  * jdmainct.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains the main buffer controller for decompression.
@@ -16,6 +18,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 
 /*
@@ -117,39 +120,39 @@
   /* Pointer to allocated workspace (M or M+2 row groups). */
   JSAMPARRAY buffer[MAX_COMPONENTS];
 
-  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
-  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */
+  boolean buffer_full;          /* Have we gotten an iMCU row from decoder? */
+  JDIMENSION rowgroup_ctr;      /* counts row groups output to postprocessor */
 
   /* Remaining fields are only used in the context case. */
 
   /* These are the master pointers to the funny-order pointer lists. */
-  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */
+  JSAMPIMAGE xbuffer[2];        /* pointers to weird pointer lists */
 
-  int whichptr;			/* indicates which pointer set is now in use */
-  int context_state;		/* process_data state machine status */
-  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
-  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
+  int whichptr;                 /* indicates which pointer set is now in use */
+  int context_state;            /* process_data state machine status */
+  JDIMENSION rowgroups_avail;   /* row groups available to postprocessor */
+  JDIMENSION iMCU_row_ctr;      /* counts iMCU rows to detect image top/bot */
 } my_main_controller;
 
 typedef my_main_controller * my_main_ptr;
 
 /* context_state values: */
-#define CTX_PREPARE_FOR_IMCU	0	/* need to prepare for MCU row */
-#define CTX_PROCESS_IMCU	1	/* feeding iMCU to postprocessor */
-#define CTX_POSTPONED_ROW	2	/* feeding postponed row group */
+#define CTX_PREPARE_FOR_IMCU    0       /* need to prepare for MCU row */
+#define CTX_PROCESS_IMCU        1       /* feeding iMCU to postprocessor */
+#define CTX_POSTPONED_ROW       2       /* feeding postponed row group */
 
 
 /* Forward declarations */
 METHODDEF(void) process_data_simple_main
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+        (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+         JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
 METHODDEF(void) process_data_context_main
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+        (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+         JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
 #ifdef QUANT_2PASS_SUPPORTED
 METHODDEF(void) process_data_crank_post
-	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
-	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+        (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+         JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
 #endif
 
 
@@ -159,34 +162,34 @@
  * This is done only once, not once per pass.
  */
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
   int ci, rgroup;
-  int M = cinfo->min_DCT_scaled_size;
+  int M = cinfo->_min_DCT_scaled_size;
   jpeg_component_info *compptr;
   JSAMPARRAY xbuf;
 
   /* Get top-level space for component array pointers.
    * We alloc both arrays with one call to save a few cycles.
    */
-  main->xbuffer[0] = (JSAMPIMAGE)
+  main_ptr->xbuffer[0] = (JSAMPIMAGE)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
-  main->xbuffer[1] = main->xbuffer[0] + cinfo->num_components;
+                                cinfo->num_components * 2 * sizeof(JSAMPARRAY));
+  main_ptr->xbuffer[1] = main_ptr->xbuffer[0] + cinfo->num_components;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
-    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
-      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
+      cinfo->_min_DCT_scaled_size; /* height of a row group of component */
     /* Get space for pointer lists --- M+4 row groups in each list.
      * We alloc both pointer lists with one call to save a few cycles.
      */
     xbuf = (JSAMPARRAY)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
-    xbuf += rgroup;		/* want one row group at negative offsets */
-    main->xbuffer[0][ci] = xbuf;
+                                  2 * (rgroup * (M + 4)) * sizeof(JSAMPROW));
+    xbuf += rgroup;             /* want one row group at negative offsets */
+    main_ptr->xbuffer[0][ci] = xbuf;
     xbuf += rgroup * (M + 4);
-    main->xbuffer[1][ci] = xbuf;
+    main_ptr->xbuffer[1][ci] = xbuf;
   }
 }
 
@@ -194,26 +197,26 @@
 LOCAL(void)
 make_funny_pointers (j_decompress_ptr cinfo)
 /* Create the funny pointer lists discussed in the comments above.
- * The actual workspace is already allocated (in main->buffer),
+ * The actual workspace is already allocated (in main_ptr->buffer),
  * and the space for the pointer lists is allocated too.
  * This routine just fills in the curiously ordered lists.
  * This will be repeated at the beginning of each pass.
  */
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
   int ci, i, rgroup;
-  int M = cinfo->min_DCT_scaled_size;
+  int M = cinfo->_min_DCT_scaled_size;
   jpeg_component_info *compptr;
   JSAMPARRAY buf, xbuf0, xbuf1;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
-    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
-      cinfo->min_DCT_scaled_size; /* height of a row group of component */
-    xbuf0 = main->xbuffer[0][ci];
-    xbuf1 = main->xbuffer[1][ci];
+    rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
+      cinfo->_min_DCT_scaled_size; /* height of a row group of component */
+    xbuf0 = main_ptr->xbuffer[0][ci];
+    xbuf1 = main_ptr->xbuffer[1][ci];
     /* First copy the workspace pointers as-is */
-    buf = main->buffer[ci];
+    buf = main_ptr->buffer[ci];
     for (i = 0; i < rgroup * (M + 2); i++) {
       xbuf0[i] = xbuf1[i] = buf[i];
     }
@@ -240,18 +243,18 @@
  * This changes the pointer list state from top-of-image to the normal state.
  */
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
   int ci, i, rgroup;
-  int M = cinfo->min_DCT_scaled_size;
+  int M = cinfo->_min_DCT_scaled_size;
   jpeg_component_info *compptr;
   JSAMPARRAY xbuf0, xbuf1;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
-    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
-      cinfo->min_DCT_scaled_size; /* height of a row group of component */
-    xbuf0 = main->xbuffer[0][ci];
-    xbuf1 = main->xbuffer[1][ci];
+    rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
+      cinfo->_min_DCT_scaled_size; /* height of a row group of component */
+    xbuf0 = main_ptr->xbuffer[0][ci];
+    xbuf1 = main_ptr->xbuffer[1][ci];
     for (i = 0; i < rgroup; i++) {
       xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i];
       xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i];
@@ -269,7 +272,7 @@
  * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
  */
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
   int ci, i, rgroup, iMCUheight, rows_left;
   jpeg_component_info *compptr;
   JSAMPARRAY xbuf;
@@ -277,8 +280,8 @@
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
     /* Count sample rows in one iMCU row and in one row group */
-    iMCUheight = compptr->v_samp_factor * compptr->DCT_scaled_size;
-    rgroup = iMCUheight / cinfo->min_DCT_scaled_size;
+    iMCUheight = compptr->v_samp_factor * compptr->_DCT_scaled_size;
+    rgroup = iMCUheight / cinfo->_min_DCT_scaled_size;
     /* Count nondummy sample rows remaining for this component */
     rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
     if (rows_left == 0) rows_left = iMCUheight;
@@ -286,12 +289,12 @@
      * so we need only do it once.
      */
     if (ci == 0) {
-      main->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
+      main_ptr->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
     }
     /* Duplicate the last real sample row rgroup*2 times; this pads out the
      * last partial rowgroup and ensures at least one full rowgroup of context.
      */
-    xbuf = main->xbuffer[main->whichptr][ci];
+    xbuf = main_ptr->xbuffer[main_ptr->whichptr][ci];
     for (i = 0; i < rgroup * 2; i++) {
       xbuf[rows_left + i] = xbuf[rows_left-1];
     }
@@ -306,27 +309,27 @@
 METHODDEF(void)
 start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
 
   switch (pass_mode) {
   case JBUF_PASS_THRU:
     if (cinfo->upsample->need_context_rows) {
-      main->pub.process_data = process_data_context_main;
+      main_ptr->pub.process_data = process_data_context_main;
       make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
-      main->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
-      main->context_state = CTX_PREPARE_FOR_IMCU;
-      main->iMCU_row_ctr = 0;
+      main_ptr->whichptr = 0;   /* Read first iMCU row into xbuffer[0] */
+      main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
+      main_ptr->iMCU_row_ctr = 0;
     } else {
       /* Simple case with no context needed */
-      main->pub.process_data = process_data_simple_main;
+      main_ptr->pub.process_data = process_data_simple_main;
     }
-    main->buffer_full = FALSE;	/* Mark buffer empty */
-    main->rowgroup_ctr = 0;
+    main_ptr->buffer_full = FALSE;      /* Mark buffer empty */
+    main_ptr->rowgroup_ctr = 0;
     break;
 #ifdef QUANT_2PASS_SUPPORTED
   case JBUF_CRANK_DEST:
     /* For last pass of 2-pass quantization, just crank the postprocessor */
-    main->pub.process_data = process_data_crank_post;
+    main_ptr->pub.process_data = process_data_crank_post;
     break;
 #endif
   default:
@@ -343,35 +346,35 @@
 
 METHODDEF(void)
 process_data_simple_main (j_decompress_ptr cinfo,
-			  JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			  JDIMENSION out_rows_avail)
+                          JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                          JDIMENSION out_rows_avail)
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
   JDIMENSION rowgroups_avail;
 
   /* Read input data if we haven't filled the main buffer yet */
-  if (! main->buffer_full) {
-    if (! (*cinfo->coef->decompress_data) (cinfo, main->buffer))
-      return;			/* suspension forced, can do nothing more */
-    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+  if (! main_ptr->buffer_full) {
+    if (! (*cinfo->coef->decompress_data) (cinfo, main_ptr->buffer))
+      return;                   /* suspension forced, can do nothing more */
+    main_ptr->buffer_full = TRUE;       /* OK, we have an iMCU row to work with */
   }
 
   /* There are always min_DCT_scaled_size row groups in an iMCU row. */
-  rowgroups_avail = (JDIMENSION) cinfo->min_DCT_scaled_size;
+  rowgroups_avail = (JDIMENSION) cinfo->_min_DCT_scaled_size;
   /* Note: at the bottom of the image, we may pass extra garbage row groups
    * to the postprocessor.  The postprocessor has to check for bottom
    * of image anyway (at row resolution), so no point in us doing it too.
    */
 
   /* Feed the postprocessor */
-  (*cinfo->post->post_process_data) (cinfo, main->buffer,
-				     &main->rowgroup_ctr, rowgroups_avail,
-				     output_buf, out_row_ctr, out_rows_avail);
+  (*cinfo->post->post_process_data) (cinfo, main_ptr->buffer,
+                                     &main_ptr->rowgroup_ctr, rowgroups_avail,
+                                     output_buf, out_row_ctr, out_rows_avail);
 
   /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
-  if (main->rowgroup_ctr >= rowgroups_avail) {
-    main->buffer_full = FALSE;
-    main->rowgroup_ctr = 0;
+  if (main_ptr->rowgroup_ctr >= rowgroups_avail) {
+    main_ptr->buffer_full = FALSE;
+    main_ptr->rowgroup_ctr = 0;
   }
 }
 
@@ -383,18 +386,18 @@
 
 METHODDEF(void)
 process_data_context_main (j_decompress_ptr cinfo,
-			   JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			   JDIMENSION out_rows_avail)
+                           JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                           JDIMENSION out_rows_avail)
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
 
   /* Read input data if we haven't filled the main buffer yet */
-  if (! main->buffer_full) {
+  if (! main_ptr->buffer_full) {
     if (! (*cinfo->coef->decompress_data) (cinfo,
-					   main->xbuffer[main->whichptr]))
-      return;			/* suspension forced, can do nothing more */
-    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
-    main->iMCU_row_ctr++;	/* count rows received */
+                                           main_ptr->xbuffer[main_ptr->whichptr]))
+      return;                   /* suspension forced, can do nothing more */
+    main_ptr->buffer_full = TRUE;       /* OK, we have an iMCU row to work with */
+    main_ptr->iMCU_row_ctr++;   /* count rows received */
   }
 
   /* Postprocessor typically will not swallow all the input data it is handed
@@ -402,47 +405,47 @@
    * to exit and restart.  This switch lets us keep track of how far we got.
    * Note that each case falls through to the next on successful completion.
    */
-  switch (main->context_state) {
+  switch (main_ptr->context_state) {
   case CTX_POSTPONED_ROW:
     /* Call postprocessor using previously set pointers for postponed row */
-    (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
-			&main->rowgroup_ctr, main->rowgroups_avail,
-			output_buf, out_row_ctr, out_rows_avail);
-    if (main->rowgroup_ctr < main->rowgroups_avail)
-      return;			/* Need to suspend */
-    main->context_state = CTX_PREPARE_FOR_IMCU;
+    (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr],
+                        &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail,
+                        output_buf, out_row_ctr, out_rows_avail);
+    if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
+      return;                   /* Need to suspend */
+    main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
     if (*out_row_ctr >= out_rows_avail)
-      return;			/* Postprocessor exactly filled output buf */
+      return;                   /* Postprocessor exactly filled output buf */
     /*FALLTHROUGH*/
   case CTX_PREPARE_FOR_IMCU:
     /* Prepare to process first M-1 row groups of this iMCU row */
-    main->rowgroup_ctr = 0;
-    main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size - 1);
+    main_ptr->rowgroup_ctr = 0;
+    main_ptr->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size - 1);
     /* Check for bottom of image: if so, tweak pointers to "duplicate"
      * the last sample row, and adjust rowgroups_avail to ignore padding rows.
      */
-    if (main->iMCU_row_ctr == cinfo->total_iMCU_rows)
+    if (main_ptr->iMCU_row_ctr == cinfo->total_iMCU_rows)
       set_bottom_pointers(cinfo);
-    main->context_state = CTX_PROCESS_IMCU;
+    main_ptr->context_state = CTX_PROCESS_IMCU;
     /*FALLTHROUGH*/
   case CTX_PROCESS_IMCU:
     /* Call postprocessor using previously set pointers */
-    (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
-			&main->rowgroup_ctr, main->rowgroups_avail,
-			output_buf, out_row_ctr, out_rows_avail);
-    if (main->rowgroup_ctr < main->rowgroups_avail)
-      return;			/* Need to suspend */
+    (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr],
+                        &main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail,
+                        output_buf, out_row_ctr, out_rows_avail);
+    if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
+      return;                   /* Need to suspend */
     /* After the first iMCU, change wraparound pointers to normal state */
-    if (main->iMCU_row_ctr == 1)
+    if (main_ptr->iMCU_row_ctr == 1)
       set_wraparound_pointers(cinfo);
     /* Prepare to load new iMCU row using other xbuffer list */
-    main->whichptr ^= 1;	/* 0=>1 or 1=>0 */
-    main->buffer_full = FALSE;
+    main_ptr->whichptr ^= 1;    /* 0=>1 or 1=>0 */
+    main_ptr->buffer_full = FALSE;
     /* Still need to process last row group of this iMCU row, */
     /* which is saved at index M+1 of the other xbuffer */
-    main->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_scaled_size + 1);
-    main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size + 2);
-    main->context_state = CTX_POSTPONED_ROW;
+    main_ptr->rowgroup_ctr = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 1);
+    main_ptr->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 2);
+    main_ptr->context_state = CTX_POSTPONED_ROW;
   }
 }
 
@@ -457,12 +460,12 @@
 
 METHODDEF(void)
 process_data_crank_post (j_decompress_ptr cinfo,
-			 JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			 JDIMENSION out_rows_avail)
+                         JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                         JDIMENSION out_rows_avail)
 {
   (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL,
-				     (JDIMENSION *) NULL, (JDIMENSION) 0,
-				     output_buf, out_row_ctr, out_rows_avail);
+                                     (JDIMENSION *) NULL, (JDIMENSION) 0,
+                                     output_buf, out_row_ctr, out_rows_avail);
 }
 
 #endif /* QUANT_2PASS_SUPPORTED */
@@ -475,38 +478,38 @@
 GLOBAL(void)
 jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
 {
-  my_main_ptr main;
+  my_main_ptr main_ptr;
   int ci, rgroup, ngroups;
   jpeg_component_info *compptr;
 
-  main = (my_main_ptr)
+  main_ptr = (my_main_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_main_controller));
-  cinfo->main = (struct jpeg_d_main_controller *) main;
-  main->pub.start_pass = start_pass_main;
+                                sizeof(my_main_controller));
+  cinfo->main = (struct jpeg_d_main_controller *) main_ptr;
+  main_ptr->pub.start_pass = start_pass_main;
 
-  if (need_full_buffer)		/* shouldn't happen */
+  if (need_full_buffer)         /* shouldn't happen */
     ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
 
   /* Allocate the workspace.
    * ngroups is the number of row groups we need.
    */
   if (cinfo->upsample->need_context_rows) {
-    if (cinfo->min_DCT_scaled_size < 2) /* unsupported, see comments above */
+    if (cinfo->_min_DCT_scaled_size < 2) /* unsupported, see comments above */
       ERREXIT(cinfo, JERR_NOTIMPL);
     alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
-    ngroups = cinfo->min_DCT_scaled_size + 2;
+    ngroups = cinfo->_min_DCT_scaled_size + 2;
   } else {
-    ngroups = cinfo->min_DCT_scaled_size;
+    ngroups = cinfo->_min_DCT_scaled_size;
   }
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
-    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
-      cinfo->min_DCT_scaled_size; /* height of a row group of component */
-    main->buffer[ci] = (*cinfo->mem->alloc_sarray)
-			((j_common_ptr) cinfo, JPOOL_IMAGE,
-			 compptr->width_in_blocks * compptr->DCT_scaled_size,
-			 (JDIMENSION) (rgroup * ngroups));
+    rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
+      cinfo->_min_DCT_scaled_size; /* height of a row group of component */
+    main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray)
+                        ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                         compptr->width_in_blocks * compptr->_DCT_scaled_size,
+                         (JDIMENSION) (rgroup * ngroups));
   }
 }
diff --git a/jdmarker.c b/jdmarker.c
index f4cca8c..d1357af 100644
--- a/jdmarker.c
+++ b/jdmarker.c
@@ -1,8 +1,10 @@
 /*
  * jdmarker.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2012, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to decode JPEG datastream markers.
@@ -17,29 +19,29 @@
 #include "jpeglib.h"
 
 
-typedef enum {			/* JPEG marker codes */
+typedef enum {                  /* JPEG marker codes */
   M_SOF0  = 0xc0,
   M_SOF1  = 0xc1,
   M_SOF2  = 0xc2,
   M_SOF3  = 0xc3,
-  
+
   M_SOF5  = 0xc5,
   M_SOF6  = 0xc6,
   M_SOF7  = 0xc7,
-  
+
   M_JPG   = 0xc8,
   M_SOF9  = 0xc9,
   M_SOF10 = 0xca,
   M_SOF11 = 0xcb,
-  
+
   M_SOF13 = 0xcd,
   M_SOF14 = 0xce,
   M_SOF15 = 0xcf,
-  
+
   M_DHT   = 0xc4,
-  
+
   M_DAC   = 0xcc,
-  
+
   M_RST0  = 0xd0,
   M_RST1  = 0xd1,
   M_RST2  = 0xd2,
@@ -48,7 +50,7 @@
   M_RST5  = 0xd5,
   M_RST6  = 0xd6,
   M_RST7  = 0xd7,
-  
+
   M_SOI   = 0xd8,
   M_EOI   = 0xd9,
   M_SOS   = 0xda,
@@ -57,7 +59,7 @@
   M_DRI   = 0xdd,
   M_DHP   = 0xde,
   M_EXP   = 0xdf,
-  
+
   M_APP0  = 0xe0,
   M_APP1  = 0xe1,
   M_APP2  = 0xe2,
@@ -74,13 +76,13 @@
   M_APP13 = 0xed,
   M_APP14 = 0xee,
   M_APP15 = 0xef,
-  
+
   M_JPG0  = 0xf0,
   M_JPG13 = 0xfd,
   M_COM   = 0xfe,
-  
+
   M_TEM   = 0x01,
-  
+
   M_ERROR = 0x100
 } JPEG_MARKER;
 
@@ -99,8 +101,8 @@
   unsigned int length_limit_APPn[16];
 
   /* Status of COM/APPn marker saving */
-  jpeg_saved_marker_ptr cur_marker;	/* NULL if not processing a marker */
-  unsigned int bytes_read;		/* data bytes read so far in marker */
+  jpeg_saved_marker_ptr cur_marker;     /* NULL if not processing a marker */
+  unsigned int bytes_read;              /* data bytes read so far in marker */
   /* Note: cur_marker is not linked into marker_list until it's all read. */
 } my_marker_reader;
 
@@ -117,49 +119,49 @@
 
 /* Declare and initialize local copies of input pointer/count */
 #define INPUT_VARS(cinfo)  \
-	struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
-	const JOCTET * next_input_byte = datasrc->next_input_byte;  \
-	size_t bytes_in_buffer = datasrc->bytes_in_buffer
+        struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
+        const JOCTET * next_input_byte = datasrc->next_input_byte;  \
+        size_t bytes_in_buffer = datasrc->bytes_in_buffer
 
 /* Unload the local copies --- do this only at a restart boundary */
 #define INPUT_SYNC(cinfo)  \
-	( datasrc->next_input_byte = next_input_byte,  \
-	  datasrc->bytes_in_buffer = bytes_in_buffer )
+        ( datasrc->next_input_byte = next_input_byte,  \
+          datasrc->bytes_in_buffer = bytes_in_buffer )
 
 /* Reload the local copies --- used only in MAKE_BYTE_AVAIL */
 #define INPUT_RELOAD(cinfo)  \
-	( next_input_byte = datasrc->next_input_byte,  \
-	  bytes_in_buffer = datasrc->bytes_in_buffer )
+        ( next_input_byte = datasrc->next_input_byte,  \
+          bytes_in_buffer = datasrc->bytes_in_buffer )
 
 /* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available.
  * Note we do *not* do INPUT_SYNC before calling fill_input_buffer,
  * but we must reload the local copies after a successful fill.
  */
 #define MAKE_BYTE_AVAIL(cinfo,action)  \
-	if (bytes_in_buffer == 0) {  \
-	  if (! (*datasrc->fill_input_buffer) (cinfo))  \
-	    { action; }  \
-	  INPUT_RELOAD(cinfo);  \
-	}
+        if (bytes_in_buffer == 0) {  \
+          if (! (*datasrc->fill_input_buffer) (cinfo))  \
+            { action; }  \
+          INPUT_RELOAD(cinfo);  \
+        }
 
 /* Read a byte into variable V.
  * If must suspend, take the specified action (typically "return FALSE").
  */
 #define INPUT_BYTE(cinfo,V,action)  \
-	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V = GETJOCTET(*next_input_byte++); )
+        MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+                  bytes_in_buffer--; \
+                  V = GETJOCTET(*next_input_byte++); )
 
 /* As above, but read two bytes interpreted as an unsigned 16-bit integer.
  * V should be declared unsigned int or perhaps INT32.
  */
 #define INPUT_2BYTES(cinfo,V,action)  \
-	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
-		  MAKE_BYTE_AVAIL(cinfo,action); \
-		  bytes_in_buffer--; \
-		  V += GETJOCTET(*next_input_byte++); )
+        MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+                  bytes_in_buffer--; \
+                  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
+                  MAKE_BYTE_AVAIL(cinfo,action); \
+                  bytes_in_buffer--; \
+                  V += GETJOCTET(*next_input_byte++); )
 
 
 /*
@@ -198,7 +200,7 @@
 /* Process an SOI marker */
 {
   int i;
-  
+
   TRACEMS(cinfo, 1, JTRC_SOI);
 
   if (cinfo->marker->saw_SOI)
@@ -255,8 +257,8 @@
   length -= 8;
 
   TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker,
-	   (int) cinfo->image_width, (int) cinfo->image_height,
-	   cinfo->num_components);
+           (int) cinfo->image_width, (int) cinfo->image_height,
+           cinfo->num_components);
 
   if (cinfo->marker->saw_SOF)
     ERREXIT(cinfo, JERR_SOF_DUPLICATE);
@@ -271,11 +273,11 @@
   if (length != (cinfo->num_components * 3))
     ERREXIT(cinfo, JERR_BAD_LENGTH);
 
-  if (cinfo->comp_info == NULL)	/* do only once, even if suspend */
+  if (cinfo->comp_info == NULL) /* do only once, even if suspend */
     cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small)
-			((j_common_ptr) cinfo, JPOOL_IMAGE,
-			 cinfo->num_components * SIZEOF(jpeg_component_info));
-  
+                        ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                         cinfo->num_components * sizeof(jpeg_component_info));
+
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
     compptr->component_index = ci;
@@ -286,8 +288,8 @@
     INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE);
 
     TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT,
-	     compptr->component_id, compptr->h_samp_factor,
-	     compptr->v_samp_factor, compptr->quant_tbl_no);
+             compptr->component_id, compptr->h_samp_factor,
+             compptr->v_samp_factor, compptr->quant_tbl_no);
   }
 
   cinfo->marker->saw_SOF = TRUE;
@@ -302,7 +304,7 @@
 /* Process a SOS marker */
 {
   INT32 length;
-  int i, ci, n, c, cc;
+  int i, ci, n, c, cc, pi;
   jpeg_component_info * compptr;
   INPUT_VARS(cinfo);
 
@@ -322,14 +324,18 @@
 
   /* Collect the component-spec parameters */
 
+  for (i = 0; i < MAX_COMPS_IN_SCAN; i++)
+    cinfo->cur_comp_info[i] = NULL;
+
   for (i = 0; i < n; i++) {
     INPUT_BYTE(cinfo, cc, return FALSE);
     INPUT_BYTE(cinfo, c, return FALSE);
-    
-    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
-      if (cc == compptr->component_id)
-	goto id_found;
+
+    for (ci = 0, compptr = cinfo->comp_info;
+         ci < cinfo->num_components && ci < MAX_COMPS_IN_SCAN;
+         ci++, compptr++) {
+      if (cc == compptr->component_id && !cinfo->cur_comp_info[ci])
+        goto id_found;
     }
 
     ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc);
@@ -339,9 +345,16 @@
     cinfo->cur_comp_info[i] = compptr;
     compptr->dc_tbl_no = (c >> 4) & 15;
     compptr->ac_tbl_no = (c     ) & 15;
-    
+
     TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, cc,
-	     compptr->dc_tbl_no, compptr->ac_tbl_no);
+             compptr->dc_tbl_no, compptr->ac_tbl_no);
+
+    /* This CSi (cc) should differ from the previous CSi */
+    for (pi = 0; pi < i; pi++) {
+      if (cinfo->cur_comp_info[pi] == compptr) {
+        ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc);
+      }
+    }
   }
 
   /* Collect the additional scan parameters Ss, Se, Ah/Al. */
@@ -354,7 +367,7 @@
   cinfo->Al = (c     ) & 15;
 
   TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se,
-	   cinfo->Ah, cinfo->Al);
+           cinfo->Ah, cinfo->Al);
 
   /* Prepare to scan data & restart markers */
   cinfo->marker->next_restart_num = 0;
@@ -379,7 +392,7 @@
 
   INPUT_2BYTES(cinfo, length, return FALSE);
   length -= 2;
-  
+
   while (length > 0) {
     INPUT_BYTE(cinfo, index, return FALSE);
     INPUT_BYTE(cinfo, val, return FALSE);
@@ -393,11 +406,11 @@
 
     if (index >= NUM_ARITH_TBLS) { /* define AC table */
       cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val;
-    } else {			/* define DC table */
+    } else {                    /* define DC table */
       cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F);
       cinfo->arith_dc_U[index] = (UINT8) (val >> 4);
       if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index])
-	ERREXIT1(cinfo, JERR_DAC_VALUE, val);
+        ERREXIT1(cinfo, JERR_DAC_VALUE, val);
     }
   }
 
@@ -428,12 +441,12 @@
 
   INPUT_2BYTES(cinfo, length, return FALSE);
   length -= 2;
-  
+
   while (length > 16) {
     INPUT_BYTE(cinfo, index, return FALSE);
 
     TRACEMS1(cinfo, 1, JTRC_DHT, index);
-      
+
     bits[0] = 0;
     count = 0;
     for (i = 1; i <= 16; i++) {
@@ -444,11 +457,11 @@
     length -= 1 + 16;
 
     TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
-	     bits[1], bits[2], bits[3], bits[4],
-	     bits[5], bits[6], bits[7], bits[8]);
+             bits[1], bits[2], bits[3], bits[4],
+             bits[5], bits[6], bits[7], bits[8]);
     TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
-	     bits[9], bits[10], bits[11], bits[12],
-	     bits[13], bits[14], bits[15], bits[16]);
+             bits[9], bits[10], bits[11], bits[12],
+             bits[13], bits[14], bits[15], bits[16]);
 
     /* Here we just do minimal validation of the counts to avoid walking
      * off the end of our table space.  jdhuff.c will check more carefully.
@@ -459,23 +472,26 @@
     for (i = 0; i < count; i++)
       INPUT_BYTE(cinfo, huffval[i], return FALSE);
 
+    MEMZERO(&huffval[count], (256 - count) * sizeof(UINT8));
+
     length -= count;
 
-    if (index & 0x10) {		/* AC table definition */
+    if (index & 0x10) {         /* AC table definition */
       index -= 0x10;
+      if (index < 0 || index >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_DHT_INDEX, index);
       htblptr = &cinfo->ac_huff_tbl_ptrs[index];
-    } else {			/* DC table definition */
+    } else {                    /* DC table definition */
+      if (index < 0 || index >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_DHT_INDEX, index);
       htblptr = &cinfo->dc_huff_tbl_ptrs[index];
     }
 
-    if (index < 0 || index >= NUM_HUFF_TBLS)
-      ERREXIT1(cinfo, JERR_DHT_INDEX, index);
-
     if (*htblptr == NULL)
       *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-  
-    MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
-    MEMCOPY((*htblptr)->huffval, huffval, SIZEOF((*htblptr)->huffval));
+
+    MEMCOPY((*htblptr)->bits, bits, sizeof((*htblptr)->bits));
+    MEMCOPY((*htblptr)->huffval, huffval, sizeof((*htblptr)->huffval));
   }
 
   if (length != 0)
@@ -508,27 +524,27 @@
 
     if (n >= NUM_QUANT_TBLS)
       ERREXIT1(cinfo, JERR_DQT_INDEX, n);
-      
+
     if (cinfo->quant_tbl_ptrs[n] == NULL)
       cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo);
     quant_ptr = cinfo->quant_tbl_ptrs[n];
 
     for (i = 0; i < DCTSIZE2; i++) {
       if (prec)
-	INPUT_2BYTES(cinfo, tmp, return FALSE);
+        INPUT_2BYTES(cinfo, tmp, return FALSE);
       else
-	INPUT_BYTE(cinfo, tmp, return FALSE);
+        INPUT_BYTE(cinfo, tmp, return FALSE);
       /* We convert the zigzag-order table to natural array order. */
       quant_ptr->quantval[jpeg_natural_order[i]] = (UINT16) tmp;
     }
 
     if (cinfo->err->trace_level >= 2) {
       for (i = 0; i < DCTSIZE2; i += 8) {
-	TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
-		 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
-		 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
-		 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
-		 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
+        TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
+                 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
+                 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
+                 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
+                 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
       }
     }
 
@@ -553,7 +569,7 @@
   INPUT_VARS(cinfo);
 
   INPUT_2BYTES(cinfo, length, return FALSE);
-  
+
   if (length != 4)
     ERREXIT(cinfo, JERR_BAD_LENGTH);
 
@@ -575,14 +591,14 @@
  * JFIF and Adobe markers, respectively.
  */
 
-#define APP0_DATA_LEN	14	/* Length of interesting data in APP0 */
-#define APP14_DATA_LEN	12	/* Length of interesting data in APP14 */
-#define APPN_DATA_LEN	14	/* Must be the largest of the above!! */
+#define APP0_DATA_LEN   14      /* Length of interesting data in APP0 */
+#define APP14_DATA_LEN  12      /* Length of interesting data in APP14 */
+#define APPN_DATA_LEN   14      /* Must be the largest of the above!! */
 
 
 LOCAL(void)
-examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data,
-	      unsigned int datalen, INT32 remaining)
+examine_app0 (j_decompress_ptr cinfo, JOCTET * data,
+              unsigned int datalen, INT32 remaining)
 /* Examine first few bytes from an APP0.
  * Take appropriate action if it is a JFIF marker.
  * datalen is # of bytes at data[], remaining is length of rest of marker data.
@@ -611,18 +627,18 @@
      */
     if (cinfo->JFIF_major_version != 1)
       WARNMS2(cinfo, JWRN_JFIF_MAJOR,
-	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
+              cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
     /* Generate trace messages */
     TRACEMS5(cinfo, 1, JTRC_JFIF,
-	     cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
-	     cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
+             cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+             cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
     /* Validate thumbnail dimensions and issue appropriate messages */
     if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
       TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
-	       GETJOCTET(data[12]), GETJOCTET(data[13]));
+               GETJOCTET(data[12]), GETJOCTET(data[13]));
     totallen -= APP0_DATA_LEN;
     if (totallen !=
-	((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
+        ((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
       TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen);
   } else if (datalen >= 6 &&
       GETJOCTET(data[0]) == 0x4A &&
@@ -646,7 +662,7 @@
       break;
     default:
       TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
-	       GETJOCTET(data[5]), (int) totallen);
+               GETJOCTET(data[5]), (int) totallen);
       break;
     }
   } else {
@@ -657,8 +673,8 @@
 
 
 LOCAL(void)
-examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data,
-	       unsigned int datalen, INT32 remaining)
+examine_app14 (j_decompress_ptr cinfo, JOCTET * data,
+               unsigned int datalen, INT32 remaining)
 /* Examine first few bytes from an APP14.
  * Take appropriate action if it is an Adobe marker.
  * datalen is # of bytes at data[], remaining is length of rest of marker data.
@@ -713,10 +729,10 @@
   /* process it */
   switch (cinfo->unread_marker) {
   case M_APP0:
-    examine_app0(cinfo, (JOCTET FAR *) b, numtoread, length);
+    examine_app0(cinfo, (JOCTET *) b, numtoread, length);
     break;
   case M_APP14:
-    examine_app14(cinfo, (JOCTET FAR *) b, numtoread, length);
+    examine_app14(cinfo, (JOCTET *) b, numtoread, length);
     break;
   default:
     /* can't get here unless jpeg_save_markers chooses wrong processor */
@@ -742,7 +758,7 @@
   my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
   jpeg_saved_marker_ptr cur_marker = marker->cur_marker;
   unsigned int bytes_read, data_length;
-  JOCTET FAR * data;
+  JOCTET * data;
   INT32 length = 0;
   INPUT_VARS(cinfo);
 
@@ -750,25 +766,25 @@
     /* begin reading a marker */
     INPUT_2BYTES(cinfo, length, return FALSE);
     length -= 2;
-    if (length >= 0) {		/* watch out for bogus length word */
+    if (length >= 0) {          /* watch out for bogus length word */
       /* figure out how much we want to save */
       unsigned int limit;
       if (cinfo->unread_marker == (int) M_COM)
-	limit = marker->length_limit_COM;
+        limit = marker->length_limit_COM;
       else
-	limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
+        limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
       if ((unsigned int) length < limit)
-	limit = (unsigned int) length;
+        limit = (unsigned int) length;
       /* allocate and initialize the marker item */
       cur_marker = (jpeg_saved_marker_ptr)
-	(*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				    SIZEOF(struct jpeg_marker_struct) + limit);
+        (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+                                    sizeof(struct jpeg_marker_struct) + limit);
       cur_marker->next = NULL;
       cur_marker->marker = (UINT8) cinfo->unread_marker;
       cur_marker->original_length = (unsigned int) length;
       cur_marker->data_length = limit;
       /* data area is just beyond the jpeg_marker_struct */
-      data = cur_marker->data = (JOCTET FAR *) (cur_marker + 1);
+      data = cur_marker->data = (JOCTET *) (cur_marker + 1);
       marker->cur_marker = cur_marker;
       marker->bytes_read = 0;
       bytes_read = 0;
@@ -786,7 +802,7 @@
   }
 
   while (bytes_read < data_length) {
-    INPUT_SYNC(cinfo);		/* move the restart point to here */
+    INPUT_SYNC(cinfo);          /* move the restart point to here */
     marker->bytes_read = bytes_read;
     /* If there's not at least one byte in buffer, suspend */
     MAKE_BYTE_AVAIL(cinfo, return FALSE);
@@ -799,14 +815,14 @@
   }
 
   /* Done reading what we want to read */
-  if (cur_marker != NULL) {	/* will be NULL if bogus length word */
+  if (cur_marker != NULL) {     /* will be NULL if bogus length word */
     /* Add new marker to end of list */
     if (cinfo->marker_list == NULL) {
       cinfo->marker_list = cur_marker;
     } else {
       jpeg_saved_marker_ptr prev = cinfo->marker_list;
       while (prev->next != NULL)
-	prev = prev->next;
+        prev = prev->next;
       prev->next = cur_marker;
     }
     /* Reset pointer & calc remaining data length */
@@ -826,12 +842,12 @@
     break;
   default:
     TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker,
-	     (int) (data_length + length));
+             (int) (data_length + length));
     break;
   }
 
   /* skip any remaining data -- could be lots */
-  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  INPUT_SYNC(cinfo);            /* do before skip_input_data */
   if (length > 0)
     (*cinfo->src->skip_input_data) (cinfo, (long) length);
 
@@ -850,10 +866,10 @@
 
   INPUT_2BYTES(cinfo, length, return FALSE);
   length -= 2;
-  
+
   TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length);
 
-  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  INPUT_SYNC(cinfo);            /* do before skip_input_data */
   if (length > 0)
     (*cinfo->src->skip_input_data) (cinfo, (long) length);
 
@@ -897,7 +913,7 @@
       INPUT_BYTE(cinfo, c, return FALSE);
     } while (c == 0xFF);
     if (c != 0)
-      break;			/* found a valid marker, exit loop */
+      break;                    /* found a valid marker, exit loop */
     /* Reach here if we found a stuffed-zero data sequence (FF/00).
      * Discard it and loop back to try again.
      */
@@ -957,11 +973,11 @@
     /* NB: first_marker() enforces the requirement that SOI appear first. */
     if (cinfo->unread_marker == 0) {
       if (! cinfo->marker->saw_SOI) {
-	if (! first_marker(cinfo))
-	  return JPEG_SUSPENDED;
+        if (! first_marker(cinfo))
+          return JPEG_SUSPENDED;
       } else {
-	if (! next_marker(cinfo))
-	  return JPEG_SUSPENDED;
+        if (! next_marker(cinfo))
+          return JPEG_SUSPENDED;
       }
     }
     /* At this point cinfo->unread_marker contains the marker code and the
@@ -971,74 +987,74 @@
     switch (cinfo->unread_marker) {
     case M_SOI:
       if (! get_soi(cinfo))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
 
-    case M_SOF0:		/* Baseline */
-    case M_SOF1:		/* Extended sequential, Huffman */
+    case M_SOF0:                /* Baseline */
+    case M_SOF1:                /* Extended sequential, Huffman */
       if (! get_sof(cinfo, FALSE, FALSE))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
 
-    case M_SOF2:		/* Progressive, Huffman */
+    case M_SOF2:                /* Progressive, Huffman */
       if (! get_sof(cinfo, TRUE, FALSE))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
 
-    case M_SOF9:		/* Extended sequential, arithmetic */
+    case M_SOF9:                /* Extended sequential, arithmetic */
       if (! get_sof(cinfo, FALSE, TRUE))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
 
-    case M_SOF10:		/* Progressive, arithmetic */
+    case M_SOF10:               /* Progressive, arithmetic */
       if (! get_sof(cinfo, TRUE, TRUE))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
 
     /* Currently unsupported SOFn types */
-    case M_SOF3:		/* Lossless, Huffman */
-    case M_SOF5:		/* Differential sequential, Huffman */
-    case M_SOF6:		/* Differential progressive, Huffman */
-    case M_SOF7:		/* Differential lossless, Huffman */
-    case M_JPG:			/* Reserved for JPEG extensions */
-    case M_SOF11:		/* Lossless, arithmetic */
-    case M_SOF13:		/* Differential sequential, arithmetic */
-    case M_SOF14:		/* Differential progressive, arithmetic */
-    case M_SOF15:		/* Differential lossless, arithmetic */
+    case M_SOF3:                /* Lossless, Huffman */
+    case M_SOF5:                /* Differential sequential, Huffman */
+    case M_SOF6:                /* Differential progressive, Huffman */
+    case M_SOF7:                /* Differential lossless, Huffman */
+    case M_JPG:                 /* Reserved for JPEG extensions */
+    case M_SOF11:               /* Lossless, arithmetic */
+    case M_SOF13:               /* Differential sequential, arithmetic */
+    case M_SOF14:               /* Differential progressive, arithmetic */
+    case M_SOF15:               /* Differential lossless, arithmetic */
       ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
       break;
 
     case M_SOS:
       if (! get_sos(cinfo))
-	return JPEG_SUSPENDED;
-      cinfo->unread_marker = 0;	/* processed the marker */
+        return JPEG_SUSPENDED;
+      cinfo->unread_marker = 0; /* processed the marker */
       return JPEG_REACHED_SOS;
-    
+
     case M_EOI:
       TRACEMS(cinfo, 1, JTRC_EOI);
-      cinfo->unread_marker = 0;	/* processed the marker */
+      cinfo->unread_marker = 0; /* processed the marker */
       return JPEG_REACHED_EOI;
-      
+
     case M_DAC:
       if (! get_dac(cinfo))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
-      
+
     case M_DHT:
       if (! get_dht(cinfo))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
-      
+
     case M_DQT:
       if (! get_dqt(cinfo))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
-      
+
     case M_DRI:
       if (! get_dri(cinfo))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
-      
+
     case M_APP0:
     case M_APP1:
     case M_APP2:
@@ -1056,16 +1072,16 @@
     case M_APP14:
     case M_APP15:
       if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[
-		cinfo->unread_marker - (int) M_APP0]) (cinfo))
-	return JPEG_SUSPENDED;
-      break;
-      
-    case M_COM:
-      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
-	return JPEG_SUSPENDED;
+                cinfo->unread_marker - (int) M_APP0]) (cinfo))
+        return JPEG_SUSPENDED;
       break;
 
-    case M_RST0:		/* these are all parameterless */
+    case M_COM:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_RST0:                /* these are all parameterless */
     case M_RST1:
     case M_RST2:
     case M_RST3:
@@ -1077,12 +1093,12 @@
       TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker);
       break;
 
-    case M_DNL:			/* Ignore DNL ... perhaps the wrong thing */
+    case M_DNL:                 /* Ignore DNL ... perhaps the wrong thing */
       if (! skip_variable(cinfo))
-	return JPEG_SUSPENDED;
+        return JPEG_SUSPENDED;
       break;
 
-    default:			/* must be DHP, EXP, JPGn, or RESn */
+    default:                    /* must be DHP, EXP, JPGn, or RESn */
       /* For now, we treat the reserved markers as fatal errors since they are
        * likely to be used to signal incompatible JPEG Part 3 extensions.
        * Once the JPEG 3 version-number marker is well defined, this code
@@ -1128,7 +1144,7 @@
     /* Uh-oh, the restart markers have been messed up. */
     /* Let the data source manager determine how to resync. */
     if (! (*cinfo->src->resync_to_restart) (cinfo,
-					    cinfo->marker->next_restart_num))
+                                            cinfo->marker->next_restart_num))
       return FALSE;
   }
 
@@ -1193,25 +1209,25 @@
 {
   int marker = cinfo->unread_marker;
   int action = 1;
-  
+
   /* Always put up a warning. */
   WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired);
-  
+
   /* Outer loop handles repeated decision after scanning forward. */
   for (;;) {
     if (marker < (int) M_SOF0)
-      action = 2;		/* invalid marker */
+      action = 2;               /* invalid marker */
     else if (marker < (int) M_RST0 || marker > (int) M_RST7)
-      action = 3;		/* valid non-restart marker */
+      action = 3;               /* valid non-restart marker */
     else {
       if (marker == ((int) M_RST0 + ((desired+1) & 7)) ||
-	  marker == ((int) M_RST0 + ((desired+2) & 7)))
-	action = 3;		/* one of the next two expected restarts */
+          marker == ((int) M_RST0 + ((desired+2) & 7)))
+        action = 3;             /* one of the next two expected restarts */
       else if (marker == ((int) M_RST0 + ((desired-1) & 7)) ||
-	       marker == ((int) M_RST0 + ((desired-2) & 7)))
-	action = 2;		/* a prior restart, so advance */
+               marker == ((int) M_RST0 + ((desired-2) & 7)))
+        action = 2;             /* a prior restart, so advance */
       else
-	action = 1;		/* desired restart or too far away */
+        action = 1;             /* desired restart or too far away */
     }
     TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action);
     switch (action) {
@@ -1222,7 +1238,7 @@
     case 2:
       /* Scan to the next marker, and repeat the decision loop. */
       if (! next_marker(cinfo))
-	return FALSE;
+        return FALSE;
       marker = cinfo->unread_marker;
       break;
     case 3:
@@ -1243,10 +1259,10 @@
 {
   my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
 
-  cinfo->comp_info = NULL;		/* until allocated by get_sof */
-  cinfo->input_scan_number = 0;		/* no SOS seen yet */
-  cinfo->unread_marker = 0;		/* no pending marker */
-  marker->pub.saw_SOI = FALSE;		/* set internal state too */
+  cinfo->comp_info = NULL;              /* until allocated by get_sof */
+  cinfo->input_scan_number = 0;         /* no SOS seen yet */
+  cinfo->unread_marker = 0;             /* no pending marker */
+  marker->pub.saw_SOI = FALSE;          /* set internal state too */
   marker->pub.saw_SOF = FALSE;
   marker->pub.discarded_bytes = 0;
   marker->cur_marker = NULL;
@@ -1267,7 +1283,7 @@
   /* Create subobject in permanent pool */
   marker = (my_marker_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
-				SIZEOF(my_marker_reader));
+                                sizeof(my_marker_reader));
   cinfo->marker = (struct jpeg_marker_reader *) marker;
   /* Initialize public method pointers */
   marker->pub.reset_marker_reader = reset_marker_reader;
@@ -1298,7 +1314,7 @@
 
 GLOBAL(void)
 jpeg_save_markers (j_decompress_ptr cinfo, int marker_code,
-		   unsigned int length_limit)
+                   unsigned int length_limit)
 {
   my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
   long maxlength;
@@ -1307,7 +1323,7 @@
   /* Length limit mustn't be larger than what we can allocate
    * (should only be a concern in a 16-bit environment).
    */
-  maxlength = cinfo->mem->max_alloc_chunk - SIZEOF(struct jpeg_marker_struct);
+  maxlength = cinfo->mem->max_alloc_chunk - sizeof(struct jpeg_marker_struct);
   if (((long) length_limit) > maxlength)
     length_limit = (unsigned int) maxlength;
 
@@ -1347,7 +1363,7 @@
 
 GLOBAL(void)
 jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code,
-			   jpeg_marker_parser_method routine)
+                           jpeg_marker_parser_method routine)
 {
   my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
 
diff --git a/jdmaster.c b/jdmaster.c
index 2802c5b..604e291 100644
--- a/jdmaster.c
+++ b/jdmaster.c
@@ -1,8 +1,12 @@
 /*
  * jdmaster.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2002-2009 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2011, D. R. Commander.
+ * Copyright (C) 2013, Linaro Limited.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains master control logic for the JPEG decompressor.
@@ -14,6 +18,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jpegcomp.h"
 
 
 /* Private state */
@@ -21,7 +26,7 @@
 typedef struct {
   struct jpeg_decomp_master pub; /* public fields */
 
-  int pass_number;		/* # of passes completed */
+  int pass_number;              /* # of passes completed */
 
   boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */
 
@@ -47,10 +52,25 @@
   /* Merging is the equivalent of plain box-filter upsampling */
   if (cinfo->do_fancy_upsampling || cinfo->CCIR601_sampling)
     return FALSE;
-  /* jdmerge.c only supports YCC=>RGB color conversion */
+  /* jdmerge.c only supports YCC=>RGB and YCC=>RGB565 color conversion */
   if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 ||
-      cinfo->out_color_space != JCS_RGB ||
-      cinfo->out_color_components != RGB_PIXELSIZE)
+      (cinfo->out_color_space != JCS_RGB &&
+      cinfo->out_color_space != JCS_RGB565 &&
+      cinfo->out_color_space != JCS_EXT_RGB &&
+      cinfo->out_color_space != JCS_EXT_RGBX &&
+      cinfo->out_color_space != JCS_EXT_BGR &&
+      cinfo->out_color_space != JCS_EXT_BGRX &&
+      cinfo->out_color_space != JCS_EXT_XBGR &&
+      cinfo->out_color_space != JCS_EXT_XRGB &&
+      cinfo->out_color_space != JCS_EXT_RGBA &&
+      cinfo->out_color_space != JCS_EXT_BGRA &&
+      cinfo->out_color_space != JCS_EXT_ABGR &&
+      cinfo->out_color_space != JCS_EXT_ARGB))
+    return FALSE;
+  if ((cinfo->out_color_space == JCS_RGB565 &&
+      cinfo->out_color_components != 3) ||
+      (cinfo->out_color_space != JCS_RGB565 &&
+      cinfo->out_color_components != rgb_pixelsize[cinfo->out_color_space]))
     return FALSE;
   /* and it only handles 2h1v or 2h2v sampling ratios */
   if (cinfo->comp_info[0].h_samp_factor != 2 ||
@@ -61,12 +81,12 @@
       cinfo->comp_info[2].v_samp_factor != 1)
     return FALSE;
   /* furthermore, it doesn't work if we've scaled the IDCTs differently */
-  if (cinfo->comp_info[0].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
-      cinfo->comp_info[1].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
-      cinfo->comp_info[2].DCT_scaled_size != cinfo->min_DCT_scaled_size)
+  if (cinfo->comp_info[0]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
+      cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
+      cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size)
     return FALSE;
   /* ??? also need to test for upsample-time rescaling, when & if supported */
-  return TRUE;			/* by golly, it'll work... */
+  return TRUE;                  /* by golly, it'll work... */
 #else
   return FALSE;
 #endif
@@ -77,6 +97,177 @@
  * Compute output image dimensions and related values.
  * NOTE: this is exported for possible use by application.
  * Hence it mustn't do anything that can't be done twice.
+ */
+
+#if JPEG_LIB_VERSION >= 80
+GLOBAL(void)
+#else
+LOCAL(void)
+#endif
+jpeg_core_output_dimensions (j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase.
+ * This function is used for transcoding and full decompression.
+ */
+{
+#ifdef IDCT_SCALING_SUPPORTED
+  int ci;
+  jpeg_component_info *compptr;
+
+  /* Compute actual output image dimensions and DCT scaling choices. */
+  if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom) {
+    /* Provide 1/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 1;
+    cinfo->_min_DCT_v_scaled_size = 1;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 2) {
+    /* Provide 2/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 2L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 2L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 2;
+    cinfo->_min_DCT_v_scaled_size = 2;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 3) {
+    /* Provide 3/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 3L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 3L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 3;
+    cinfo->_min_DCT_v_scaled_size = 3;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 4) {
+    /* Provide 4/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 4L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 4L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 4;
+    cinfo->_min_DCT_v_scaled_size = 4;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 5) {
+    /* Provide 5/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 5L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 5L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 5;
+    cinfo->_min_DCT_v_scaled_size = 5;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 6) {
+    /* Provide 6/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 6L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 6L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 6;
+    cinfo->_min_DCT_v_scaled_size = 6;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 7) {
+    /* Provide 7/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 7L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 7L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 7;
+    cinfo->_min_DCT_v_scaled_size = 7;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 8) {
+    /* Provide 8/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 8L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 8L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 8;
+    cinfo->_min_DCT_v_scaled_size = 8;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 9) {
+    /* Provide 9/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 9L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 9L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 9;
+    cinfo->_min_DCT_v_scaled_size = 9;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 10) {
+    /* Provide 10/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 10L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 10L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 10;
+    cinfo->_min_DCT_v_scaled_size = 10;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 11) {
+    /* Provide 11/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 11L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 11L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 11;
+    cinfo->_min_DCT_v_scaled_size = 11;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 12) {
+    /* Provide 12/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 12L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 12L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 12;
+    cinfo->_min_DCT_v_scaled_size = 12;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 13) {
+    /* Provide 13/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 13L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 13L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 13;
+    cinfo->_min_DCT_v_scaled_size = 13;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 14) {
+    /* Provide 14/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 14L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 14L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 14;
+    cinfo->_min_DCT_v_scaled_size = 14;
+  } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 15) {
+    /* Provide 15/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 15L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 15L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 15;
+    cinfo->_min_DCT_v_scaled_size = 15;
+  } else {
+    /* Provide 16/block_size scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * 16L, (long) DCTSIZE);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * 16L, (long) DCTSIZE);
+    cinfo->_min_DCT_h_scaled_size = 16;
+    cinfo->_min_DCT_v_scaled_size = 16;
+  }
+
+  /* Recompute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->_DCT_h_scaled_size = cinfo->_min_DCT_h_scaled_size;
+    compptr->_DCT_v_scaled_size = cinfo->_min_DCT_v_scaled_size;
+  }
+
+#else /* !IDCT_SCALING_SUPPORTED */
+
+  /* Hardwire it to "no scaling" */
+  cinfo->output_width = cinfo->image_width;
+  cinfo->output_height = cinfo->image_height;
+  /* jdinput.c has already initialized DCT_scaled_size,
+   * and has computed unscaled downsampled_width and downsampled_height.
+   */
+
+#endif /* IDCT_SCALING_SUPPORTED */
+}
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
  * Also note that it may be called before the master module is initialized!
  */
 
@@ -93,52 +284,31 @@
   if (cinfo->global_state != DSTATE_READY)
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
+  /* Compute core output image dimensions and DCT scaling choices. */
+  jpeg_core_output_dimensions(cinfo);
+
 #ifdef IDCT_SCALING_SUPPORTED
 
-  /* Compute actual output image dimensions and DCT scaling choices. */
-  if (cinfo->scale_num * 8 <= cinfo->scale_denom) {
-    /* Provide 1/8 scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width, 8L);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height, 8L);
-    cinfo->min_DCT_scaled_size = 1;
-  } else if (cinfo->scale_num * 4 <= cinfo->scale_denom) {
-    /* Provide 1/4 scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width, 4L);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height, 4L);
-    cinfo->min_DCT_scaled_size = 2;
-  } else if (cinfo->scale_num * 2 <= cinfo->scale_denom) {
-    /* Provide 1/2 scaling */
-    cinfo->output_width = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_width, 2L);
-    cinfo->output_height = (JDIMENSION)
-      jdiv_round_up((long) cinfo->image_height, 2L);
-    cinfo->min_DCT_scaled_size = 4;
-  } else {
-    /* Provide 1/1 scaling */
-    cinfo->output_width = cinfo->image_width;
-    cinfo->output_height = cinfo->image_height;
-    cinfo->min_DCT_scaled_size = DCTSIZE;
-  }
   /* In selecting the actual DCT scaling for each component, we try to
    * scale up the chroma components via IDCT scaling rather than upsampling.
    * This saves time if the upsampler gets to use 1:1 scaling.
-   * Note this code assumes that the supported DCT scalings are powers of 2.
+   * Note this code adapts subsampling ratios which are powers of 2.
    */
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
        ci++, compptr++) {
-    int ssize = cinfo->min_DCT_scaled_size;
+    int ssize = cinfo->_min_DCT_scaled_size;
     while (ssize < DCTSIZE &&
-	   (compptr->h_samp_factor * ssize * 2 <=
-	    cinfo->max_h_samp_factor * cinfo->min_DCT_scaled_size) &&
-	   (compptr->v_samp_factor * ssize * 2 <=
-	    cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size)) {
+           ((cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) %
+            (compptr->h_samp_factor * ssize * 2) == 0) &&
+           ((cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size) %
+            (compptr->v_samp_factor * ssize * 2) == 0)) {
       ssize = ssize * 2;
     }
+#if JPEG_LIB_VERSION >= 70
+    compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = ssize;
+#else
     compptr->DCT_scaled_size = ssize;
+#endif
   }
 
   /* Recompute downsampled dimensions of components;
@@ -149,12 +319,12 @@
     /* Size in samples, after IDCT scaling */
     compptr->downsampled_width = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_width *
-		    (long) (compptr->h_samp_factor * compptr->DCT_scaled_size),
-		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+                    (long) (compptr->h_samp_factor * compptr->_DCT_scaled_size),
+                    (long) (cinfo->max_h_samp_factor * DCTSIZE));
     compptr->downsampled_height = (JDIMENSION)
       jdiv_round_up((long) cinfo->image_height *
-		    (long) (compptr->v_samp_factor * compptr->DCT_scaled_size),
-		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+                    (long) (compptr->v_samp_factor * compptr->_DCT_scaled_size),
+                    (long) (cinfo->max_v_samp_factor * DCTSIZE));
   }
 
 #else /* !IDCT_SCALING_SUPPORTED */
@@ -175,23 +345,32 @@
     cinfo->out_color_components = 1;
     break;
   case JCS_RGB:
-#if RGB_PIXELSIZE != 3
-    cinfo->out_color_components = RGB_PIXELSIZE;
+  case JCS_EXT_RGB:
+  case JCS_EXT_RGBX:
+  case JCS_EXT_BGR:
+  case JCS_EXT_BGRX:
+  case JCS_EXT_XBGR:
+  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
+    cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
     break;
-#endif /* else share code with YCbCr */
   case JCS_YCbCr:
+  case JCS_RGB565:
     cinfo->out_color_components = 3;
     break;
   case JCS_CMYK:
   case JCS_YCCK:
     cinfo->out_color_components = 4;
     break;
-  default:			/* else must be same colorspace as in file */
+  default:                      /* else must be same colorspace as in file */
     cinfo->out_color_components = cinfo->num_components;
     break;
   }
   cinfo->output_components = (cinfo->quantize_colors ? 1 :
-			      cinfo->out_color_components);
+                              cinfo->out_color_components);
 
   /* See if upsampler will want to emit more than one row at a time */
   if (use_merged_upsample(cinfo))
@@ -208,20 +387,20 @@
  * processes are inner loops and need to be as fast as possible.  On most
  * machines, particularly CPUs with pipelines or instruction prefetch,
  * a (subscript-check-less) C table lookup
- *		x = sample_range_limit[x];
+ *              x = sample_range_limit[x];
  * is faster than explicit tests
- *		if (x < 0)  x = 0;
- *		else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
+ *              if (x < 0)  x = 0;
+ *              else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
  * These processes all use a common table prepared by the routine below.
  *
  * For most steps we can mathematically guarantee that the initial value
  * of x is within MAXJSAMPLE+1 of the legal range, so a table running from
  * -(MAXJSAMPLE+1) to 2*MAXJSAMPLE+1 is sufficient.  But for the initial
- * limiting step (just after the IDCT), a wildly out-of-range value is 
+ * limiting step (just after the IDCT), a wildly out-of-range value is
  * possible if the input data is corrupt.  To avoid any chance of indexing
  * off the end of memory and getting a bad-pointer trap, we perform the
  * post-IDCT limiting thus:
- *		x = range_limit[x & MASK];
+ *              x = range_limit[x & MASK];
  * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit
  * samples.  Under normal circumstances this is more than enough range and
  * a correct output will be generated; with bogus input data the mask will
@@ -239,9 +418,6 @@
  * We can save some space by overlapping the start of the post-IDCT table
  * with the simpler range limiting table.  The post-IDCT table begins at
  * sample_range_limit + CENTERJSAMPLE.
- *
- * Note that the table is allocated in near data space on PCs; it's small
- * enough and used often enough to justify this.
  */
 
 LOCAL(void)
@@ -253,23 +429,23 @@
 
   table = (JSAMPLE *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-		(5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * SIZEOF(JSAMPLE));
-  table += (MAXJSAMPLE+1);	/* allow negative subscripts of simple table */
+                (5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * sizeof(JSAMPLE));
+  table += (MAXJSAMPLE+1);      /* allow negative subscripts of simple table */
   cinfo->sample_range_limit = table;
   /* First segment of "simple" table: limit[x] = 0 for x < 0 */
-  MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
+  MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * sizeof(JSAMPLE));
   /* Main part of "simple" table: limit[x] = x */
   for (i = 0; i <= MAXJSAMPLE; i++)
     table[i] = (JSAMPLE) i;
-  table += CENTERJSAMPLE;	/* Point to where post-IDCT table starts */
+  table += CENTERJSAMPLE;       /* Point to where post-IDCT table starts */
   /* End of simple table, rest of first half of post-IDCT table */
   for (i = CENTERJSAMPLE; i < 2*(MAXJSAMPLE+1); i++)
     table[i] = MAXJSAMPLE;
   /* Second half of post-IDCT table */
   MEMZERO(table + (2 * (MAXJSAMPLE+1)),
-	  (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * SIZEOF(JSAMPLE));
+          (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * sizeof(JSAMPLE));
   MEMCOPY(table + (4 * (MAXJSAMPLE+1) - CENTERJSAMPLE),
-	  cinfo->sample_range_limit, CENTERJSAMPLE * SIZEOF(JSAMPLE));
+          cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE));
 }
 
 
@@ -373,7 +549,11 @@
   jinit_inverse_dct(cinfo);
   /* Entropy decoding: either Huffman or arithmetic coding. */
   if (cinfo->arith_code) {
+#ifdef D_ARITH_CODING_SUPPORTED
+    jinit_arith_decoder(cinfo);
+#else
     ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+#endif
   } else {
     if (cinfo->progressive_mode) {
 #ifdef D_PROGRESSIVE_SUPPORTED
@@ -453,24 +633,24 @@
     if (cinfo->quantize_colors && cinfo->colormap == NULL) {
       /* Select new quantization method */
       if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
-	cinfo->cquantize = master->quantizer_2pass;
-	master->pub.is_dummy_pass = TRUE;
+        cinfo->cquantize = master->quantizer_2pass;
+        master->pub.is_dummy_pass = TRUE;
       } else if (cinfo->enable_1pass_quant) {
-	cinfo->cquantize = master->quantizer_1pass;
+        cinfo->cquantize = master->quantizer_1pass;
       } else {
-	ERREXIT(cinfo, JERR_MODE_CHANGE);
+        ERREXIT(cinfo, JERR_MODE_CHANGE);
       }
     }
     (*cinfo->idct->start_pass) (cinfo);
     (*cinfo->coef->start_output_pass) (cinfo);
     if (! cinfo->raw_data_out) {
       if (! master->using_merged_upsample)
-	(*cinfo->cconvert->start_pass) (cinfo);
+        (*cinfo->cconvert->start_pass) (cinfo);
       (*cinfo->upsample->start_pass) (cinfo);
       if (cinfo->quantize_colors)
-	(*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
+        (*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
       (*cinfo->post->start_pass) (cinfo,
-	    (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+            (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
       (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
     }
   }
@@ -479,7 +659,7 @@
   if (cinfo->progress != NULL) {
     cinfo->progress->completed_passes = master->pass_number;
     cinfo->progress->total_passes = master->pass_number +
-				    (master->pub.is_dummy_pass ? 2 : 1);
+                                    (master->pub.is_dummy_pass ? 2 : 1);
     /* In buffered-image mode, we assume one more output pass if EOI not
      * yet reached, but no more passes if EOI has been reached.
      */
@@ -546,7 +726,7 @@
 
   master = (my_master_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(my_decomp_master));
+                                  sizeof(my_decomp_master));
   cinfo->master = (struct jpeg_decomp_master *) master;
   master->pub.prepare_for_output_pass = prepare_for_output_pass;
   master->pub.finish_output_pass = finish_output_pass;
diff --git a/jdmerge.c b/jdmerge.c
index 3744446..f89d69f 100644
--- a/jdmerge.c
+++ b/jdmerge.c
@@ -1,8 +1,12 @@
 /*
  * jdmerge.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009, 2011, D. R. Commander.
+ * Copyright (C) 2013, Linaro Limited.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains code for merged upsampling/color conversion.
@@ -14,19 +18,19 @@
  * (ie, box filtering), we can save some work in color conversion by
  * calculating all the output pixels corresponding to a pair of chroma
  * samples at one time.  In the conversion equations
- *	R = Y           + K1 * Cr
- *	G = Y + K2 * Cb + K3 * Cr
- *	B = Y + K4 * Cb
+ *      R = Y           + K1 * Cr
+ *      G = Y + K2 * Cb + K3 * Cr
+ *      B = Y + K4 * Cb
  * only the Y term varies among the group of pixels corresponding to a pair
  * of chroma samples, so the rest of the terms can be calculated just once.
  * At typical sampling ratios, this eliminates half or three-quarters of the
  * multiplications needed for color conversion.
  *
  * This file currently provides implementations for the following cases:
- *	YCbCr => RGB color conversion only.
- *	Sampling ratios of 2h1v or 2h2v.
- *	No scaling needed at upsample time.
- *	Corner-aligned (non-CCIR601) sampling alignment.
+ *      YCbCr => RGB color conversion only.
+ *      Sampling ratios of 2h1v or 2h2v.
+ *      No scaling needed at upsample time.
+ *      Corner-aligned (non-CCIR601) sampling alignment.
  * Other special cases could be added, but in most applications these are
  * the only common cases.  (For uncommon cases we fall back on the more
  * general code in jdsample.c and jdcolor.c.)
@@ -35,25 +39,58 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsimd.h"
+#include "jconfigint.h"
 
 #ifdef UPSAMPLE_MERGING_SUPPORTED
 
 
+#define PACK_SHORT_565(r, g, b)   ((((r) << 8) & 0xf800) |  \
+                                   (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_TWO_PIXELS(l, r)     ((r << 16) | l)
+#define PACK_NEED_ALIGNMENT(ptr)  (((size_t)(ptr)) & 3)
+
+#define WRITE_TWO_PIXELS(addr, pixels) {  \
+  ((INT16*)(addr))[0] = (pixels);  \
+  ((INT16*)(addr))[1] = (pixels) >> 16;  \
+}
+#define WRITE_TWO_ALIGNED_PIXELS(addr, pixels)  ((*(INT32 *)(addr)) = pixels)
+
+#define DITHER_565_R(r, dither)  ((r) + ((dither) & 0xFF))
+#define DITHER_565_G(g, dither)  ((g) + (((dither) & 0xFF) >> 1))
+#define DITHER_565_B(b, dither)  ((b) + ((dither) & 0xFF))
+
+
+/* Declarations for ordered dithering
+ *
+ * We use a 4x4 ordered dither array packed into 32 bits.  This array is
+ * sufficent for dithering RGB888 to RGB565.
+ */
+
+#define DITHER_MASK       0x3
+#define DITHER_ROTATE(x)  (((x) << 24) | (((x) >> 8) & 0x00FFFFFF))
+static const INT32 dither_matrix[4] = {
+  0x0008020A,
+  0x0C040E06,
+  0x030B0109,
+  0x0F070D05
+};
+
+
 /* Private subobject */
 
 typedef struct {
-  struct jpeg_upsampler pub;	/* public fields */
+  struct jpeg_upsampler pub;    /* public fields */
 
   /* Pointer to routine to do actual upsampling/conversion of one row group */
-  JMETHOD(void, upmethod, (j_decompress_ptr cinfo,
-			   JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-			   JSAMPARRAY output_buf));
+  void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+                    JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
 
   /* Private state for YCC->RGB conversion */
-  int * Cr_r_tab;		/* => table for Cr to R conversion */
-  int * Cb_b_tab;		/* => table for Cb to B conversion */
-  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
-  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+  int * Cr_r_tab;               /* => table for Cr to R conversion */
+  int * Cb_b_tab;               /* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;             /* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;             /* => table for Cb to G conversion */
 
   /* For 2:1 vertical sampling, we produce two output rows at a time.
    * We need a "spare" row buffer to hold the second output row if the
@@ -61,17 +98,118 @@
    * to discard the dummy last row if the image height is odd.
    */
   JSAMPROW spare_row;
-  boolean spare_full;		/* T if spare buffer is occupied */
+  boolean spare_full;           /* T if spare buffer is occupied */
 
-  JDIMENSION out_row_width;	/* samples per output row */
-  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+  JDIMENSION out_row_width;     /* samples per output row */
+  JDIMENSION rows_to_go;        /* counts rows remaining in image */
 } my_upsampler;
 
 typedef my_upsampler * my_upsample_ptr;
 
-#define SCALEBITS	16	/* speediest right-shift on some machines */
-#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
-#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+#define SCALEBITS       16      /* speediest right-shift on some machines */
+#define ONE_HALF        ((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)          ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+
+/* Include inline routines for colorspace extensions */
+
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+
+#define RGB_RED EXT_RGB_RED
+#define RGB_GREEN EXT_RGB_GREEN
+#define RGB_BLUE EXT_RGB_BLUE
+#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+#define h2v1_merged_upsample_internal extrgb_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal extrgb_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED EXT_RGBX_RED
+#define RGB_GREEN EXT_RGBX_GREEN
+#define RGB_BLUE EXT_RGBX_BLUE
+#define RGB_ALPHA 3
+#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+#define h2v1_merged_upsample_internal extrgbx_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal extrgbx_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED EXT_BGR_RED
+#define RGB_GREEN EXT_BGR_GREEN
+#define RGB_BLUE EXT_BGR_BLUE
+#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+#define h2v1_merged_upsample_internal extbgr_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal extbgr_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED EXT_BGRX_RED
+#define RGB_GREEN EXT_BGRX_GREEN
+#define RGB_BLUE EXT_BGRX_BLUE
+#define RGB_ALPHA 3
+#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+#define h2v1_merged_upsample_internal extbgrx_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal extbgrx_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED EXT_XBGR_RED
+#define RGB_GREEN EXT_XBGR_GREEN
+#define RGB_BLUE EXT_XBGR_BLUE
+#define RGB_ALPHA 0
+#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+#define h2v1_merged_upsample_internal extxbgr_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal extxbgr_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED EXT_XRGB_RED
+#define RGB_GREEN EXT_XRGB_GREEN
+#define RGB_BLUE EXT_XRGB_BLUE
+#define RGB_ALPHA 0
+#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+#define h2v1_merged_upsample_internal extxrgb_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal extxrgb_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
 
 
 /*
@@ -89,26 +227,26 @@
 
   upsample->Cr_r_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(int));
+                                (MAXJSAMPLE+1) * sizeof(int));
   upsample->Cb_b_tab = (int *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(int));
+                                (MAXJSAMPLE+1) * sizeof(int));
   upsample->Cr_g_tab = (INT32 *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(INT32));
+                                (MAXJSAMPLE+1) * sizeof(INT32));
   upsample->Cb_g_tab = (INT32 *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(MAXJSAMPLE+1) * SIZEOF(INT32));
+                                (MAXJSAMPLE+1) * sizeof(INT32));
 
   for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
     /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
     /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
     /* Cr=>R value is nearest int to 1.40200 * x */
     upsample->Cr_r_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+                    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
     /* Cb=>B value is nearest int to 1.77200 * x */
     upsample->Cb_b_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+                    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
     /* Cr=>G value is scaled-up -0.71414 * x */
     upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x;
     /* Cb=>G value is scaled-up -0.34414 * x */
@@ -142,20 +280,23 @@
 
 METHODDEF(void)
 merged_2v_upsample (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
+                    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+                    JDIMENSION in_row_groups_avail,
+                    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                    JDIMENSION out_rows_avail)
 /* 2:1 vertical sampling case: may need a spare row. */
 {
   my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
   JSAMPROW work_ptrs[2];
-  JDIMENSION num_rows;		/* number of rows returned to caller */
+  JDIMENSION num_rows;          /* number of rows returned to caller */
 
   if (upsample->spare_full) {
     /* If we have a spare row saved from a previous cycle, just return it. */
+    JDIMENSION size = upsample->out_row_width;
+    if (cinfo->out_color_space == JCS_RGB565)
+      size = cinfo->output_width * 2;
     jcopy_sample_rows(& upsample->spare_row, 0, output_buf + *out_row_ctr, 0,
-		      1, upsample->out_row_width);
+                      1, size);
     num_rows = 1;
     upsample->spare_full = FALSE;
   } else {
@@ -191,17 +332,17 @@
 
 METHODDEF(void)
 merged_1v_upsample (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
+                    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+                    JDIMENSION in_row_groups_avail,
+                    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                    JDIMENSION out_rows_avail)
 /* 1:1 vertical sampling case: much easier, never need a spare row. */
 {
   my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
 
   /* Just do the upsampling. */
   (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
-			 output_buf + *out_row_ctr);
+                         output_buf + *out_row_ctr);
   /* Adjust counts */
   (*out_row_ctr)++;
   (*in_row_group_ctr)++;
@@ -224,8 +365,96 @@
 
 METHODDEF(void)
 h2v1_merged_upsample (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-		      JSAMPARRAY output_buf)
+                      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+                      JSAMPARRAY output_buf)
+{
+  switch (cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      extrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                           output_buf);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      extrgbx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    case JCS_EXT_BGR:
+      extbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                           output_buf);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      extbgrx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      extxbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      extxrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    default:
+      h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                    output_buf);
+      break;
+  }
+}
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+ */
+
+METHODDEF(void)
+h2v2_merged_upsample (j_decompress_ptr cinfo,
+                      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+                      JSAMPARRAY output_buf)
+{
+  switch (cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      extrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                           output_buf);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      extrgbx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    case JCS_EXT_BGR:
+      extbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                           output_buf);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      extbgrx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      extxbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      extxrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    default:
+      h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                    output_buf);
+      break;
+  }
+}
+
+
+METHODDEF(void)
+h2v1_merged_upsample_565 (j_decompress_ptr cinfo,
+                          JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+                          JSAMPARRAY output_buf)
 {
   my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
   register int y, cred, cgreen, cblue;
@@ -239,12 +468,15 @@
   int * Cbbtab = upsample->Cb_b_tab;
   INT32 * Crgtab = upsample->Cr_g_tab;
   INT32 * Cbgtab = upsample->Cb_g_tab;
+  unsigned int r, g, b;
+  INT32 rgb;
   SHIFT_TEMPS
 
   inptr0 = input_buf[0][in_row_group_ctr];
   inptr1 = input_buf[1][in_row_group_ctr];
   inptr2 = input_buf[2][in_row_group_ctr];
   outptr = output_buf[0];
+
   /* Loop for each pair of output pixels */
   for (col = cinfo->output_width >> 1; col > 0; col--) {
     /* Do the chroma part of the calculation */
@@ -253,18 +485,24 @@
     cred = Crrtab[cr];
     cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
+
     /* Fetch 2 Y values and emit 2 pixels */
     y  = GETJSAMPLE(*inptr0++);
-    outptr[RGB_RED] =   range_limit[y + cred];
-    outptr[RGB_GREEN] = range_limit[y + cgreen];
-    outptr[RGB_BLUE] =  range_limit[y + cblue];
-    outptr += RGB_PIXELSIZE;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r, g, b);
+
     y  = GETJSAMPLE(*inptr0++);
-    outptr[RGB_RED] =   range_limit[y + cred];
-    outptr[RGB_GREEN] = range_limit[y + cgreen];
-    outptr[RGB_BLUE] =  range_limit[y + cblue];
-    outptr += RGB_PIXELSIZE;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+    WRITE_TWO_PIXELS(outptr, rgb);
+    outptr += 4;
   }
+
   /* If image width is odd, do the last output column separately */
   if (cinfo->output_width & 1) {
     cb = GETJSAMPLE(*inptr1);
@@ -273,21 +511,91 @@
     cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
     y  = GETJSAMPLE(*inptr0);
-    outptr[RGB_RED] =   range_limit[y + cred];
-    outptr[RGB_GREEN] = range_limit[y + cgreen];
-    outptr[RGB_BLUE] =  range_limit[y + cblue];
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r, g, b);
+    *(INT16*)outptr = rgb;
+   }
+ }
+
+
+METHODDEF(void)
+h2v1_merged_upsample_565D (j_decompress_ptr cinfo,
+                           JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+                           JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr;
+  JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  unsigned int r, g, b;
+  INT32 rgb;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = GETJSAMPLE(*inptr0++);
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    d0 = DITHER_ROTATE(d0);
+    rgb = PACK_SHORT_565(r, g, b);
+
+    y  = GETJSAMPLE(*inptr0++);
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    d0 = DITHER_ROTATE(d0);
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+    WRITE_TWO_PIXELS(outptr, rgb);
+    outptr += 4;
+  }
+
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr0);
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    rgb = PACK_SHORT_565(r, g, b);
+    *(INT16*)outptr = rgb;
   }
 }
 
 
-/*
- * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
- */
-
 METHODDEF(void)
-h2v2_merged_upsample (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
-		      JSAMPARRAY output_buf)
+h2v2_merged_upsample_565 (j_decompress_ptr cinfo,
+                          JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+                          JSAMPARRAY output_buf)
 {
   my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
   register int y, cred, cgreen, cblue;
@@ -301,14 +609,17 @@
   int * Cbbtab = upsample->Cb_b_tab;
   INT32 * Crgtab = upsample->Cr_g_tab;
   INT32 * Cbgtab = upsample->Cb_g_tab;
+  unsigned int r, g, b;
+  INT32 rgb;
   SHIFT_TEMPS
 
-  inptr00 = input_buf[0][in_row_group_ctr*2];
-  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
+  inptr00 = input_buf[0][in_row_group_ctr * 2];
+  inptr01 = input_buf[0][in_row_group_ctr * 2 + 1];
   inptr1 = input_buf[1][in_row_group_ctr];
   inptr2 = input_buf[2][in_row_group_ctr];
   outptr0 = output_buf[0];
   outptr1 = output_buf[1];
+
   /* Loop for each group of output pixels */
   for (col = cinfo->output_width >> 1; col > 0; col--) {
     /* Do the chroma part of the calculation */
@@ -317,28 +628,39 @@
     cred = Crrtab[cr];
     cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
+
     /* Fetch 4 Y values and emit 4 pixels */
     y  = GETJSAMPLE(*inptr00++);
-    outptr0[RGB_RED] =   range_limit[y + cred];
-    outptr0[RGB_GREEN] = range_limit[y + cgreen];
-    outptr0[RGB_BLUE] =  range_limit[y + cblue];
-    outptr0 += RGB_PIXELSIZE;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r, g, b);
+
     y  = GETJSAMPLE(*inptr00++);
-    outptr0[RGB_RED] =   range_limit[y + cred];
-    outptr0[RGB_GREEN] = range_limit[y + cgreen];
-    outptr0[RGB_BLUE] =  range_limit[y + cblue];
-    outptr0 += RGB_PIXELSIZE;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+    WRITE_TWO_PIXELS(outptr0, rgb);
+    outptr0 += 4;
+
     y  = GETJSAMPLE(*inptr01++);
-    outptr1[RGB_RED] =   range_limit[y + cred];
-    outptr1[RGB_GREEN] = range_limit[y + cgreen];
-    outptr1[RGB_BLUE] =  range_limit[y + cblue];
-    outptr1 += RGB_PIXELSIZE;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r, g, b);
+
     y  = GETJSAMPLE(*inptr01++);
-    outptr1[RGB_RED] =   range_limit[y + cred];
-    outptr1[RGB_GREEN] = range_limit[y + cgreen];
-    outptr1[RGB_BLUE] =  range_limit[y + cblue];
-    outptr1 += RGB_PIXELSIZE;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+    WRITE_TWO_PIXELS(outptr1, rgb);
+    outptr1 += 4;
   }
+
   /* If image width is odd, do the last output column separately */
   if (cinfo->output_width & 1) {
     cb = GETJSAMPLE(*inptr1);
@@ -346,14 +668,120 @@
     cred = Crrtab[cr];
     cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
     cblue = Cbbtab[cb];
+
     y  = GETJSAMPLE(*inptr00);
-    outptr0[RGB_RED] =   range_limit[y + cred];
-    outptr0[RGB_GREEN] = range_limit[y + cgreen];
-    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r, g, b);
+    *(INT16*)outptr0 = rgb;
+
     y  = GETJSAMPLE(*inptr01);
-    outptr1[RGB_RED] =   range_limit[y + cred];
-    outptr1[RGB_GREEN] = range_limit[y + cgreen];
-    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r, g, b);
+    *(INT16*)outptr1 = rgb;
+  }
+}
+
+
+METHODDEF(void)
+h2v2_merged_upsample_565D (j_decompress_ptr cinfo,
+                           JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+                           JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr0, outptr1;
+  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  INT32 d1 = dither_matrix[(cinfo->output_scanline+1) & DITHER_MASK];
+  unsigned int r, g, b;
+  INT32 rgb;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr*2];
+  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = GETJSAMPLE(*inptr00++);
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    d0 = DITHER_ROTATE(d0);
+    rgb = PACK_SHORT_565(r, g, b);
+
+    y  = GETJSAMPLE(*inptr00++);
+    r = range_limit[DITHER_565_R(y + cred, d1)];
+    g = range_limit[DITHER_565_G(y + cgreen, d1)];
+    b = range_limit[DITHER_565_B(y + cblue, d1)];
+    d1 = DITHER_ROTATE(d1);
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+    WRITE_TWO_PIXELS(outptr0, rgb);
+    outptr0 += 4;
+
+    y  = GETJSAMPLE(*inptr01++);
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    d0 = DITHER_ROTATE(d0);
+    rgb = PACK_SHORT_565(r, g, b);
+
+    y  = GETJSAMPLE(*inptr01++);
+    r = range_limit[DITHER_565_R(y + cred, d1)];
+    g = range_limit[DITHER_565_G(y + cgreen, d1)];
+    b = range_limit[DITHER_565_B(y + cblue, d1)];
+    d1 = DITHER_ROTATE(d1);
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+    WRITE_TWO_PIXELS(outptr1, rgb);
+    outptr1 += 4;
+  }
+
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+
+    y  = GETJSAMPLE(*inptr00);
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    rgb = PACK_SHORT_565(r, g, b);
+    *(INT16*)outptr0 = rgb;
+
+    y  = GETJSAMPLE(*inptr01);
+    r = range_limit[DITHER_565_R(y + cred, d1)];
+    g = range_limit[DITHER_565_G(y + cgreen, d1)];
+    b = range_limit[DITHER_565_B(y + cblue, d1)];
+    rgb = PACK_SHORT_565(r, g, b);
+    *(INT16*)outptr1 = rgb;
   }
 }
 
@@ -373,7 +801,7 @@
 
   upsample = (my_upsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_upsampler));
+                                sizeof(my_upsampler));
   cinfo->upsample = (struct jpeg_upsampler *) upsample;
   upsample->pub.start_pass = start_pass_merged_upsample;
   upsample->pub.need_context_rows = FALSE;
@@ -382,14 +810,34 @@
 
   if (cinfo->max_v_samp_factor == 2) {
     upsample->pub.upsample = merged_2v_upsample;
-    upsample->upmethod = h2v2_merged_upsample;
+    if (jsimd_can_h2v2_merged_upsample())
+      upsample->upmethod = jsimd_h2v2_merged_upsample;
+    else
+      upsample->upmethod = h2v2_merged_upsample;
+    if (cinfo->out_color_space == JCS_RGB565) {
+      if (cinfo->dither_mode != JDITHER_NONE) {
+        upsample->upmethod = h2v2_merged_upsample_565D;
+      } else {
+        upsample->upmethod = h2v2_merged_upsample_565;
+      }
+    }
     /* Allocate a spare row buffer */
     upsample->spare_row = (JSAMPROW)
       (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-		(size_t) (upsample->out_row_width * SIZEOF(JSAMPLE)));
+                (size_t) (upsample->out_row_width * sizeof(JSAMPLE)));
   } else {
     upsample->pub.upsample = merged_1v_upsample;
-    upsample->upmethod = h2v1_merged_upsample;
+    if (jsimd_can_h2v1_merged_upsample())
+      upsample->upmethod = jsimd_h2v1_merged_upsample;
+    else
+      upsample->upmethod = h2v1_merged_upsample;
+    if (cinfo->out_color_space == JCS_RGB565) {
+      if (cinfo->dither_mode != JDITHER_NONE) {
+        upsample->upmethod = h2v1_merged_upsample_565D;
+      } else {
+        upsample->upmethod = h2v1_merged_upsample_565;
+      }
+    }
     /* No spare row needed */
     upsample->spare_row = NULL;
   }
diff --git a/jdmrgext.c b/jdmrgext.c
new file mode 100644
index 0000000..1f0a550
--- /dev/null
+++ b/jdmrgext.c
@@ -0,0 +1,185 @@
+/*
+ * jdmrgext.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2011, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains code for merged upsampling/color conversion.
+ */
+
+
+/* This file is included by jdmerge.c */
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+ */
+
+INLINE
+LOCAL(void)
+h2v1_merged_upsample_internal (j_decompress_ptr cinfo,
+                               JSAMPIMAGE input_buf,
+                               JDIMENSION in_row_group_ctr,
+                               JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr;
+  JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr[RGB_ALPHA] = 0xFF;
+#endif
+    outptr += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr[RGB_ALPHA] = 0xFF;
+#endif
+    outptr += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr0);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr[RGB_ALPHA] = 0xFF;
+#endif
+  }
+}
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+ */
+
+INLINE
+LOCAL(void)
+h2v2_merged_upsample_internal (j_decompress_ptr cinfo,
+                               JSAMPIMAGE input_buf,
+                               JDIMENSION in_row_group_ctr,
+                               JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr0, outptr1;
+  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr*2];
+  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr0[RGB_ALPHA] = 0xFF;
+#endif
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr0[RGB_ALPHA] = 0xFF;
+#endif
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr1[RGB_ALPHA] = 0xFF;
+#endif
+    outptr1 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr1[RGB_ALPHA] = 0xFF;
+#endif
+    outptr1 += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr00);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr0[RGB_ALPHA] = 0xFF;
+#endif
+    y  = GETJSAMPLE(*inptr01);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr1[RGB_ALPHA] = 0xFF;
+#endif
+  }
+}
diff --git a/jdphuff.c b/jdphuff.c
index 2267809..eae1538 100644
--- a/jdphuff.c
+++ b/jdphuff.c
@@ -1,8 +1,10 @@
 /*
  * jdphuff.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains Huffman entropy decoding routines for progressive JPEG.
@@ -17,7 +19,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdhuff.h"		/* Declarations shared with jdhuff.c */
+#include "jdhuff.h"             /* Declarations shared with jdhuff.c */
 
 
 #ifdef D_PROGRESSIVE_SUPPORTED
@@ -30,8 +32,8 @@
  */
 
 typedef struct {
-  unsigned int EOBRUN;			/* remaining EOBs in EOBRUN */
-  int last_dc_val[MAX_COMPS_IN_SCAN];	/* last DC coef for each component */
+  unsigned int EOBRUN;                  /* remaining EOBs in EOBRUN */
+  int last_dc_val[MAX_COMPS_IN_SCAN];   /* last DC coef for each component */
 } savable_state;
 
 /* This macro is to work around compilers with missing or broken
@@ -44,11 +46,11 @@
 #else
 #if MAX_COMPS_IN_SCAN == 4
 #define ASSIGN_STATE(dest,src)  \
-	((dest).EOBRUN = (src).EOBRUN, \
-	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
-	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
-	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
-	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+        ((dest).EOBRUN = (src).EOBRUN, \
+         (dest).last_dc_val[0] = (src).last_dc_val[0], \
+         (dest).last_dc_val[1] = (src).last_dc_val[1], \
+         (dest).last_dc_val[2] = (src).last_dc_val[2], \
+         (dest).last_dc_val[3] = (src).last_dc_val[3])
 #endif
 #endif
 
@@ -59,11 +61,11 @@
   /* These fields are loaded into local variables at start of each MCU.
    * In case of suspension, we exit WITHOUT updating them.
    */
-  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
-  savable_state saved;		/* Other state at start of MCU */
+  bitread_perm_state bitstate;  /* Bit buffer at start of MCU */
+  savable_state saved;          /* Other state at start of MCU */
 
   /* These fields are NOT loaded into local working state. */
-  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
 
   /* Pointers to derived tables (these workspaces have image lifespan) */
   d_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
@@ -74,14 +76,14 @@
 typedef phuff_entropy_decoder * phuff_entropy_ptr;
 
 /* Forward declarations */
-METHODDEF(boolean) decode_mcu_DC_first JPP((j_decompress_ptr cinfo,
-					    JBLOCKROW *MCU_data));
-METHODDEF(boolean) decode_mcu_AC_first JPP((j_decompress_ptr cinfo,
-					    JBLOCKROW *MCU_data));
-METHODDEF(boolean) decode_mcu_DC_refine JPP((j_decompress_ptr cinfo,
-					     JBLOCKROW *MCU_data));
-METHODDEF(boolean) decode_mcu_AC_refine JPP((j_decompress_ptr cinfo,
-					     JBLOCKROW *MCU_data));
+METHODDEF(boolean) decode_mcu_DC_first (j_decompress_ptr cinfo,
+                                        JBLOCKROW *MCU_data);
+METHODDEF(boolean) decode_mcu_AC_first (j_decompress_ptr cinfo,
+                                        JBLOCKROW *MCU_data);
+METHODDEF(boolean) decode_mcu_DC_refine (j_decompress_ptr cinfo,
+                                         JBLOCKROW *MCU_data);
+METHODDEF(boolean) decode_mcu_AC_refine (j_decompress_ptr cinfo,
+                                         JBLOCKROW *MCU_data);
 
 
 /*
@@ -117,7 +119,7 @@
     if (cinfo->Al != cinfo->Ah-1)
       bad = TRUE;
   }
-  if (cinfo->Al > 13)		/* need not check for < 0 */
+  if (cinfo->Al > 13)           /* need not check for < 0 */
     bad = TRUE;
   /* Arguably the maximum Al value should be less than 13 for 8-bit precision,
    * but the spec doesn't say so, and we try to be liberal about what we
@@ -127,7 +129,7 @@
    */
   if (bad)
     ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
-	     cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+             cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
   /* Update progression status, and verify that scan order is legal.
    * Note that inter-scan inconsistencies are treated as warnings
    * not fatal errors ... not clear if this is right way to behave.
@@ -140,7 +142,7 @@
     for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
       int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
       if (cinfo->Ah != expected)
-	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+        WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
       coef_bit_ptr[coefi] = cinfo->Al;
     }
   }
@@ -164,15 +166,15 @@
      * We may build same derived table more than once, but it's not expensive.
      */
     if (is_DC_band) {
-      if (cinfo->Ah == 0) {	/* DC refinement needs no table */
-	tbl = compptr->dc_tbl_no;
-	jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
-				& entropy->derived_tbls[tbl]);
+      if (cinfo->Ah == 0) {     /* DC refinement needs no table */
+        tbl = compptr->dc_tbl_no;
+        jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
+                                & entropy->derived_tbls[tbl]);
       }
     } else {
       tbl = compptr->ac_tbl_no;
       jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
-			      & entropy->derived_tbls[tbl]);
+                              & entropy->derived_tbls[tbl]);
       /* remember the single active table */
       entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
     }
@@ -198,6 +200,7 @@
  * On some machines, a shift and add will be faster than a table lookup.
  */
 
+#define AVOID_TABLES
 #ifdef AVOID_TABLES
 
 #define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
@@ -263,7 +266,7 @@
 /*
  * Huffman MCU decoding.
  * Each of these routines decodes and returns one MCU's worth of
- * Huffman-compressed coefficients. 
+ * Huffman-compressed coefficients.
  * The coefficients are reordered from zigzag order into natural array order,
  * but are not dequantized.
  *
@@ -284,7 +287,7 @@
 
 METHODDEF(boolean)
 decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
   int Al = cinfo->Al;
   register int s, r;
@@ -299,7 +302,7 @@
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! process_restart(cinfo))
-	return FALSE;
+        return FALSE;
   }
 
   /* If we've run out of data, just leave the MCU set to zeroes.
@@ -324,9 +327,9 @@
       /* Section F.2.2.1: decode the DC coefficient difference */
       HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
       if (s) {
-	CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	r = GET_BITS(s);
-	s = HUFF_EXTEND(r, s);
+        CHECK_BIT_BUFFER(br_state, s, return FALSE);
+        r = GET_BITS(s);
+        s = HUFF_EXTEND(r, s);
       }
 
       /* Convert DC difference to actual value, update last_dc_val */
@@ -355,7 +358,7 @@
 
 METHODDEF(boolean)
 decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
   int Se = cinfo->Se;
   int Al = cinfo->Al;
@@ -369,7 +372,7 @@
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! process_restart(cinfo))
-	return FALSE;
+        return FALSE;
   }
 
   /* If we've run out of data, just leave the MCU set to zeroes.
@@ -380,49 +383,49 @@
     /* Load up working state.
      * We can avoid loading/saving bitread state if in an EOB run.
      */
-    EOBRUN = entropy->saved.EOBRUN;	/* only part of saved state we need */
+    EOBRUN = entropy->saved.EOBRUN;     /* only part of saved state we need */
 
     /* There is always only one block per MCU */
 
-    if (EOBRUN > 0)		/* if it's a band of zeroes... */
-      EOBRUN--;			/* ...process it now (we do nothing) */
+    if (EOBRUN > 0)             /* if it's a band of zeroes... */
+      EOBRUN--;                 /* ...process it now (we do nothing) */
     else {
       BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
       block = MCU_data[0];
       tbl = entropy->ac_derived_tbl;
 
       for (k = cinfo->Ss; k <= Se; k++) {
-	HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
-	r = s >> 4;
-	s &= 15;
-	if (s) {
-	  k += r;
-	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	  r = GET_BITS(s);
-	  s = HUFF_EXTEND(r, s);
-	  /* Scale and output coefficient in natural (dezigzagged) order */
-	  (*block)[jpeg_natural_order[k]] = (JCOEF) (s << Al);
-	} else {
-	  if (r == 15) {	/* ZRL */
-	    k += 15;		/* skip 15 zeroes in band */
-	  } else {		/* EOBr, run length is 2^r + appended bits */
-	    EOBRUN = 1 << r;
-	    if (r) {		/* EOBr, r > 0 */
-	      CHECK_BIT_BUFFER(br_state, r, return FALSE);
-	      r = GET_BITS(r);
-	      EOBRUN += r;
-	    }
-	    EOBRUN--;		/* this band is processed at this moment */
-	    break;		/* force end-of-band */
-	  }
-	}
+        HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
+        r = s >> 4;
+        s &= 15;
+        if (s) {
+          k += r;
+          CHECK_BIT_BUFFER(br_state, s, return FALSE);
+          r = GET_BITS(s);
+          s = HUFF_EXTEND(r, s);
+          /* Scale and output coefficient in natural (dezigzagged) order */
+          (*block)[jpeg_natural_order[k]] = (JCOEF) (s << Al);
+        } else {
+          if (r == 15) {        /* ZRL */
+            k += 15;            /* skip 15 zeroes in band */
+          } else {              /* EOBr, run length is 2^r + appended bits */
+            EOBRUN = 1 << r;
+            if (r) {            /* EOBr, r > 0 */
+              CHECK_BIT_BUFFER(br_state, r, return FALSE);
+              r = GET_BITS(r);
+              EOBRUN += r;
+            }
+            EOBRUN--;           /* this band is processed at this moment */
+            break;              /* force end-of-band */
+          }
+        }
       }
 
       BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
     }
 
     /* Completed MCU, so update state */
-    entropy->saved.EOBRUN = EOBRUN;	/* only part of saved state we need */
+    entropy->saved.EOBRUN = EOBRUN;     /* only part of saved state we need */
   }
 
   /* Account for restart interval (no-op if not using restarts) */
@@ -440,9 +443,9 @@
 
 METHODDEF(boolean)
 decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
-  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
+  int p1 = 1 << cinfo->Al;      /* 1 in the bit position being coded */
   int blkn;
   JBLOCKROW block;
   BITREAD_STATE_VARS;
@@ -451,7 +454,7 @@
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! process_restart(cinfo))
-	return FALSE;
+        return FALSE;
   }
 
   /* Not worth the cycles to check insufficient_data here,
@@ -489,11 +492,11 @@
 
 METHODDEF(boolean)
 decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
-{   
+{
   phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
   int Se = cinfo->Se;
-  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
-  int m1 = (-1) << cinfo->Al;	/* -1 in the bit position being coded */
+  int p1 = 1 << cinfo->Al;      /* 1 in the bit position being coded */
+  int m1 = (-1) << cinfo->Al;   /* -1 in the bit position being coded */
   register int s, k, r;
   unsigned int EOBRUN;
   JBLOCKROW block;
@@ -507,7 +510,7 @@
   if (cinfo->restart_interval) {
     if (entropy->restarts_to_go == 0)
       if (! process_restart(cinfo))
-	return FALSE;
+        return FALSE;
   }
 
   /* If we've run out of data, don't modify the MCU.
@@ -535,58 +538,58 @@
 
     if (EOBRUN == 0) {
       for (; k <= Se; k++) {
-	HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
-	r = s >> 4;
-	s &= 15;
-	if (s) {
-	  if (s != 1)		/* size of new coef should always be 1 */
-	    WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
-	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	  if (GET_BITS(1))
-	    s = p1;		/* newly nonzero coef is positive */
-	  else
-	    s = m1;		/* newly nonzero coef is negative */
-	} else {
-	  if (r != 15) {
-	    EOBRUN = 1 << r;	/* EOBr, run length is 2^r + appended bits */
-	    if (r) {
-	      CHECK_BIT_BUFFER(br_state, r, goto undoit);
-	      r = GET_BITS(r);
-	      EOBRUN += r;
-	    }
-	    break;		/* rest of block is handled by EOB logic */
-	  }
-	  /* note s = 0 for processing ZRL */
-	}
-	/* Advance over already-nonzero coefs and r still-zero coefs,
-	 * appending correction bits to the nonzeroes.  A correction bit is 1
-	 * if the absolute value of the coefficient must be increased.
-	 */
-	do {
-	  thiscoef = *block + jpeg_natural_order[k];
-	  if (*thiscoef != 0) {
-	    CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	    if (GET_BITS(1)) {
-	      if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
-		if (*thiscoef >= 0)
-		  *thiscoef += p1;
-		else
-		  *thiscoef += m1;
-	      }
-	    }
-	  } else {
-	    if (--r < 0)
-	      break;		/* reached target zero coefficient */
-	  }
-	  k++;
-	} while (k <= Se);
-	if (s) {
-	  int pos = jpeg_natural_order[k];
-	  /* Output newly nonzero coefficient */
-	  (*block)[pos] = (JCOEF) s;
-	  /* Remember its position in case we have to suspend */
-	  newnz_pos[num_newnz++] = pos;
-	}
+        HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
+        r = s >> 4;
+        s &= 15;
+        if (s) {
+          if (s != 1)           /* size of new coef should always be 1 */
+            WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
+          CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+          if (GET_BITS(1))
+            s = p1;             /* newly nonzero coef is positive */
+          else
+            s = m1;             /* newly nonzero coef is negative */
+        } else {
+          if (r != 15) {
+            EOBRUN = 1 << r;    /* EOBr, run length is 2^r + appended bits */
+            if (r) {
+              CHECK_BIT_BUFFER(br_state, r, goto undoit);
+              r = GET_BITS(r);
+              EOBRUN += r;
+            }
+            break;              /* rest of block is handled by EOB logic */
+          }
+          /* note s = 0 for processing ZRL */
+        }
+        /* Advance over already-nonzero coefs and r still-zero coefs,
+         * appending correction bits to the nonzeroes.  A correction bit is 1
+         * if the absolute value of the coefficient must be increased.
+         */
+        do {
+          thiscoef = *block + jpeg_natural_order[k];
+          if (*thiscoef != 0) {
+            CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+            if (GET_BITS(1)) {
+              if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
+                if (*thiscoef >= 0)
+                  *thiscoef += p1;
+                else
+                  *thiscoef += m1;
+              }
+            }
+          } else {
+            if (--r < 0)
+              break;            /* reached target zero coefficient */
+          }
+          k++;
+        } while (k <= Se);
+        if (s) {
+          int pos = jpeg_natural_order[k];
+          /* Output newly nonzero coefficient */
+          (*block)[pos] = (JCOEF) s;
+          /* Remember its position in case we have to suspend */
+          newnz_pos[num_newnz++] = pos;
+        }
       }
     }
 
@@ -597,18 +600,18 @@
        * if the absolute value of the coefficient must be increased.
        */
       for (; k <= Se; k++) {
-	thiscoef = *block + jpeg_natural_order[k];
-	if (*thiscoef != 0) {
-	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
-	  if (GET_BITS(1)) {
-	    if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
-	      if (*thiscoef >= 0)
-		*thiscoef += p1;
-	      else
-		*thiscoef += m1;
-	    }
-	  }
-	}
+        thiscoef = *block + jpeg_natural_order[k];
+        if (*thiscoef != 0) {
+          CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+          if (GET_BITS(1)) {
+            if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
+              if (*thiscoef >= 0)
+                *thiscoef += p1;
+              else
+                *thiscoef += m1;
+            }
+          }
+        }
       }
       /* Count one block completed in EOB run */
       EOBRUN--;
@@ -646,7 +649,7 @@
 
   entropy = (phuff_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(phuff_entropy_decoder));
+                                sizeof(phuff_entropy_decoder));
   cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
   entropy->pub.start_pass = start_pass_phuff_decoder;
 
@@ -658,9 +661,9 @@
   /* Create progression status table */
   cinfo->coef_bits = (int (*)[DCTSIZE2])
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				cinfo->num_components*DCTSIZE2*SIZEOF(int));
+                                cinfo->num_components*DCTSIZE2*sizeof(int));
   coef_bit_ptr = & cinfo->coef_bits[0][0];
-  for (ci = 0; ci < cinfo->num_components; ci++) 
+  for (ci = 0; ci < cinfo->num_components; ci++)
     for (i = 0; i < DCTSIZE2; i++)
       *coef_bit_ptr++ = -1;
 }
diff --git a/jdpostct.c b/jdpostct.c
index 571563d..9eef9ee 100644
--- a/jdpostct.c
+++ b/jdpostct.c
@@ -1,8 +1,10 @@
 /*
  * jdpostct.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains the decompression postprocessing controller.
@@ -31,12 +33,12 @@
    * For two-pass color quantization, we need a full-image buffer;
    * for one-pass operation, a strip buffer is sufficient.
    */
-  jvirt_sarray_ptr whole_image;	/* virtual array, or NULL if one-pass */
-  JSAMPARRAY buffer;		/* strip buffer, or current strip of virtual */
-  JDIMENSION strip_height;	/* buffer size in rows */
+  jvirt_sarray_ptr whole_image; /* virtual array, or NULL if one-pass */
+  JSAMPARRAY buffer;            /* strip buffer, or current strip of virtual */
+  JDIMENSION strip_height;      /* buffer size in rows */
   /* for two-pass mode only: */
-  JDIMENSION starting_row;	/* row # of first row in current strip */
-  JDIMENSION next_row;		/* index of next row to fill/empty in strip */
+  JDIMENSION starting_row;      /* row # of first row in current strip */
+  JDIMENSION next_row;          /* index of next row to fill/empty in strip */
 } my_post_controller;
 
 typedef my_post_controller * my_post_ptr;
@@ -44,24 +46,21 @@
 
 /* Forward declarations */
 METHODDEF(void) post_process_1pass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
+        (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+         JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail,
+         JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+         JDIMENSION out_rows_avail);
 #ifdef QUANT_2PASS_SUPPORTED
 METHODDEF(void) post_process_prepass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
+        (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+         JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail,
+         JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+         JDIMENSION out_rows_avail);
 METHODDEF(void) post_process_2pass
-	JPP((j_decompress_ptr cinfo,
-	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	     JDIMENSION in_row_groups_avail,
-	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	     JDIMENSION out_rows_avail));
+        (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+         JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail,
+         JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+         JDIMENSION out_rows_avail);
 #endif
 
 
@@ -84,9 +83,9 @@
        * allocate a strip buffer.  Use the virtual-array buffer as workspace.
        */
       if (post->buffer == NULL) {
-	post->buffer = (*cinfo->mem->access_virt_sarray)
-	  ((j_common_ptr) cinfo, post->whole_image,
-	   (JDIMENSION) 0, post->strip_height, TRUE);
+        post->buffer = (*cinfo->mem->access_virt_sarray)
+          ((j_common_ptr) cinfo, post->whole_image,
+           (JDIMENSION) 0, post->strip_height, TRUE);
       }
     } else {
       /* For single-pass processing without color quantization,
@@ -124,10 +123,10 @@
 
 METHODDEF(void)
 post_process_1pass (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
+                    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+                    JDIMENSION in_row_groups_avail,
+                    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                    JDIMENSION out_rows_avail)
 {
   my_post_ptr post = (my_post_ptr) cinfo->post;
   JDIMENSION num_rows, max_rows;
@@ -139,11 +138,11 @@
     max_rows = post->strip_height;
   num_rows = 0;
   (*cinfo->upsample->upsample) (cinfo,
-		input_buf, in_row_group_ctr, in_row_groups_avail,
-		post->buffer, &num_rows, max_rows);
+                input_buf, in_row_group_ctr, in_row_groups_avail,
+                post->buffer, &num_rows, max_rows);
   /* Quantize and emit data. */
   (*cinfo->cquantize->color_quantize) (cinfo,
-		post->buffer, output_buf + *out_row_ctr, (int) num_rows);
+                post->buffer, output_buf + *out_row_ctr, (int) num_rows);
   *out_row_ctr += num_rows;
 }
 
@@ -156,10 +155,10 @@
 
 METHODDEF(void)
 post_process_prepass (j_decompress_ptr cinfo,
-		      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		      JDIMENSION in_row_groups_avail,
-		      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		      JDIMENSION out_rows_avail)
+                      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+                      JDIMENSION in_row_groups_avail,
+                      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                      JDIMENSION out_rows_avail)
 {
   my_post_ptr post = (my_post_ptr) cinfo->post;
   JDIMENSION old_next_row, num_rows;
@@ -167,22 +166,22 @@
   /* Reposition virtual buffer if at start of strip. */
   if (post->next_row == 0) {
     post->buffer = (*cinfo->mem->access_virt_sarray)
-	((j_common_ptr) cinfo, post->whole_image,
-	 post->starting_row, post->strip_height, TRUE);
+        ((j_common_ptr) cinfo, post->whole_image,
+         post->starting_row, post->strip_height, TRUE);
   }
 
   /* Upsample some data (up to a strip height's worth). */
   old_next_row = post->next_row;
   (*cinfo->upsample->upsample) (cinfo,
-		input_buf, in_row_group_ctr, in_row_groups_avail,
-		post->buffer, &post->next_row, post->strip_height);
+                input_buf, in_row_group_ctr, in_row_groups_avail,
+                post->buffer, &post->next_row, post->strip_height);
 
   /* Allow quantizer to scan new data.  No data is emitted, */
   /* but we advance out_row_ctr so outer loop can tell when we're done. */
   if (post->next_row > old_next_row) {
     num_rows = post->next_row - old_next_row;
     (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
-					 (JSAMPARRAY) NULL, (int) num_rows);
+                                         (JSAMPARRAY) NULL, (int) num_rows);
     *out_row_ctr += num_rows;
   }
 
@@ -200,10 +199,10 @@
 
 METHODDEF(void)
 post_process_2pass (j_decompress_ptr cinfo,
-		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-		    JDIMENSION in_row_groups_avail,
-		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-		    JDIMENSION out_rows_avail)
+                    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+                    JDIMENSION in_row_groups_avail,
+                    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                    JDIMENSION out_rows_avail)
 {
   my_post_ptr post = (my_post_ptr) cinfo->post;
   JDIMENSION num_rows, max_rows;
@@ -211,8 +210,8 @@
   /* Reposition virtual buffer if at start of strip. */
   if (post->next_row == 0) {
     post->buffer = (*cinfo->mem->access_virt_sarray)
-	((j_common_ptr) cinfo, post->whole_image,
-	 post->starting_row, post->strip_height, FALSE);
+        ((j_common_ptr) cinfo, post->whole_image,
+         post->starting_row, post->strip_height, FALSE);
   }
 
   /* Determine number of rows to emit. */
@@ -227,8 +226,8 @@
 
   /* Quantize and emit data. */
   (*cinfo->cquantize->color_quantize) (cinfo,
-		post->buffer + post->next_row, output_buf + *out_row_ctr,
-		(int) num_rows);
+                post->buffer + post->next_row, output_buf + *out_row_ctr,
+                (int) num_rows);
   *out_row_ctr += num_rows;
 
   /* Advance if we filled the strip. */
@@ -253,11 +252,11 @@
 
   post = (my_post_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_post_controller));
+                                sizeof(my_post_controller));
   cinfo->post = (struct jpeg_d_post_controller *) post;
   post->pub.start_pass = start_pass_dpost;
-  post->whole_image = NULL;	/* flag for no virtual arrays */
-  post->buffer = NULL;		/* flag for no strip buffer */
+  post->whole_image = NULL;     /* flag for no virtual arrays */
+  post->buffer = NULL;          /* flag for no strip buffer */
 
   /* Create the quantization buffer, if needed */
   if (cinfo->quantize_colors) {
@@ -271,20 +270,20 @@
       /* We round up the number of rows to a multiple of the strip height. */
 #ifdef QUANT_2PASS_SUPPORTED
       post->whole_image = (*cinfo->mem->request_virt_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-	 cinfo->output_width * cinfo->out_color_components,
-	 (JDIMENSION) jround_up((long) cinfo->output_height,
-				(long) post->strip_height),
-	 post->strip_height);
+        ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+         cinfo->output_width * cinfo->out_color_components,
+         (JDIMENSION) jround_up((long) cinfo->output_height,
+                                (long) post->strip_height),
+         post->strip_height);
 #else
       ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
 #endif /* QUANT_2PASS_SUPPORTED */
     } else {
       /* One-pass color quantization: just make a strip buffer. */
       post->buffer = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 cinfo->output_width * cinfo->out_color_components,
-	 post->strip_height);
+        ((j_common_ptr) cinfo, JPOOL_IMAGE,
+         cinfo->output_width * cinfo->out_color_components,
+         post->strip_height);
     }
   }
 }
diff --git a/jdsample.c b/jdsample.c
index 80ffefb..2752966 100644
--- a/jdsample.c
+++ b/jdsample.c
@@ -1,8 +1,12 @@
 /*
  * jdsample.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2010, D. R. Commander.
+ * Copyright (C) 2014, MIPS Technologies, Inc., California
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains upsampling routines.
@@ -21,17 +25,20 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
+#include "jsimd.h"
+#include "jpegcomp.h"
 
 
 /* Pointer to routine to upsample a single component */
-typedef JMETHOD(void, upsample1_ptr,
-		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+typedef void (*upsample1_ptr) (j_decompress_ptr cinfo,
+                               jpeg_component_info * compptr,
+                               JSAMPARRAY input_data,
+                               JSAMPARRAY * output_data_ptr);
 
 /* Private subobject */
 
 typedef struct {
-  struct jpeg_upsampler pub;	/* public fields */
+  struct jpeg_upsampler pub;    /* public fields */
 
   /* Color conversion buffer.  When using separate upsampling and color
    * conversion steps, this buffer holds one upsampled row group until it
@@ -45,8 +52,8 @@
   /* Per-component upsampling method pointers */
   upsample1_ptr methods[MAX_COMPONENTS];
 
-  int next_row_out;		/* counts rows emitted from color_buf */
-  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+  int next_row_out;             /* counts rows emitted from color_buf */
+  JDIMENSION rows_to_go;        /* counts rows remaining in image */
 
   /* Height of an input row group for each component. */
   int rowgroup_height[MAX_COMPONENTS];
@@ -87,10 +94,10 @@
 
 METHODDEF(void)
 sep_upsample (j_decompress_ptr cinfo,
-	      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
-	      JDIMENSION in_row_groups_avail,
-	      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-	      JDIMENSION out_rows_avail)
+              JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+              JDIMENSION in_row_groups_avail,
+              JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+              JDIMENSION out_rows_avail)
 {
   my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
   int ci;
@@ -100,13 +107,13 @@
   /* Fill the conversion buffer, if it's empty */
   if (upsample->next_row_out >= cinfo->max_v_samp_factor) {
     for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
-	 ci++, compptr++) {
+         ci++, compptr++) {
       /* Invoke per-component upsample method.  Notice we pass a POINTER
        * to color_buf[ci], so that fullsize_upsample can change it.
        */
       (*upsample->methods[ci]) (cinfo, compptr,
-	input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
-	upsample->color_buf + ci);
+        input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
+        upsample->color_buf + ci);
     }
     upsample->next_row_out = 0;
   }
@@ -118,7 +125,7 @@
   /* Not more than the distance to the end of the image.  Need this test
    * in case the image height is not a multiple of max_v_samp_factor:
    */
-  if (num_rows > upsample->rows_to_go) 
+  if (num_rows > upsample->rows_to_go)
     num_rows = upsample->rows_to_go;
   /* And not more than what the client can accept: */
   out_rows_avail -= *out_row_ctr;
@@ -126,9 +133,9 @@
     num_rows = out_rows_avail;
 
   (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
-				     (JDIMENSION) upsample->next_row_out,
-				     output_buf + *out_row_ctr,
-				     (int) num_rows);
+                                     (JDIMENSION) upsample->next_row_out,
+                                     output_buf + *out_row_ctr,
+                                     (int) num_rows);
 
   /* Adjust counts */
   *out_row_ctr += num_rows;
@@ -155,7 +162,7 @@
 
 METHODDEF(void)
 fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		   JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+                   JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
   *output_data_ptr = input_data;
 }
@@ -168,9 +175,9 @@
 
 METHODDEF(void)
 noop_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+               JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
-  *output_data_ptr = NULL;	/* safety check */
+  *output_data_ptr = NULL;      /* safety check */
 }
 
 
@@ -187,7 +194,7 @@
 
 METHODDEF(void)
 int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	      JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+              JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
   my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
   JSAMPARRAY output_data = *output_data_ptr;
@@ -208,15 +215,15 @@
     outptr = output_data[outrow];
     outend = outptr + cinfo->output_width;
     while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      invalue = *inptr++;       /* don't need GETJSAMPLE() here */
       for (h = h_expand; h > 0; h--) {
-	*outptr++ = invalue;
+        *outptr++ = invalue;
       }
     }
     /* Generate any additional output rows by duplicating the first one */
     if (v_expand > 1) {
       jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
-			v_expand-1, cinfo->output_width);
+                        v_expand-1, cinfo->output_width);
     }
     inrow++;
     outrow += v_expand;
@@ -231,7 +238,7 @@
 
 METHODDEF(void)
 h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+               JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
   JSAMPARRAY output_data = *output_data_ptr;
   register JSAMPROW inptr, outptr;
@@ -244,7 +251,7 @@
     outptr = output_data[inrow];
     outend = outptr + cinfo->output_width;
     while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      invalue = *inptr++;       /* don't need GETJSAMPLE() here */
       *outptr++ = invalue;
       *outptr++ = invalue;
     }
@@ -259,7 +266,7 @@
 
 METHODDEF(void)
 h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+               JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
   JSAMPARRAY output_data = *output_data_ptr;
   register JSAMPROW inptr, outptr;
@@ -273,12 +280,12 @@
     outptr = output_data[outrow];
     outend = outptr + cinfo->output_width;
     while (outptr < outend) {
-      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      invalue = *inptr++;       /* don't need GETJSAMPLE() here */
       *outptr++ = invalue;
       *outptr++ = invalue;
     }
     jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
-		      1, cinfo->output_width);
+                      1, cinfo->output_width);
     inrow++;
     outrow += 2;
   }
@@ -302,7 +309,7 @@
 
 METHODDEF(void)
 h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+                     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
   JSAMPARRAY output_data = *output_data_ptr;
   register JSAMPROW inptr, outptr;
@@ -343,7 +350,7 @@
 
 METHODDEF(void)
 h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+                     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
 {
   JSAMPARRAY output_data = *output_data_ptr;
   register JSAMPROW inptr0, inptr1, outptr;
@@ -360,10 +367,10 @@
     for (v = 0; v < 2; v++) {
       /* inptr0 points to nearest input row, inptr1 points to next nearest */
       inptr0 = input_data[inrow];
-      if (v == 0)		/* next nearest is row above */
-	inptr1 = input_data[inrow-1];
-      else			/* next nearest is row below */
-	inptr1 = input_data[inrow+1];
+      if (v == 0)               /* next nearest is row above */
+        inptr1 = input_data[inrow-1];
+      else                      /* next nearest is row below */
+        inptr1 = input_data[inrow+1];
       outptr = output_data[outrow++];
 
       /* Special case for first column */
@@ -374,12 +381,12 @@
       lastcolsum = thiscolsum; thiscolsum = nextcolsum;
 
       for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
-	/* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
-	/* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
-	nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
-	*outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4);
-	*outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4);
-	lastcolsum = thiscolsum; thiscolsum = nextcolsum;
+        /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
+        /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
+        nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+        *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4);
+        *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4);
+        lastcolsum = thiscolsum; thiscolsum = nextcolsum;
       }
 
       /* Special case for last column */
@@ -406,19 +413,19 @@
 
   upsample = (my_upsample_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_upsampler));
+                                sizeof(my_upsampler));
   cinfo->upsample = (struct jpeg_upsampler *) upsample;
   upsample->pub.start_pass = start_pass_upsample;
   upsample->pub.upsample = sep_upsample;
   upsample->pub.need_context_rows = FALSE; /* until we find out differently */
 
-  if (cinfo->CCIR601_sampling)	/* this isn't supported */
+  if (cinfo->CCIR601_sampling)  /* this isn't supported */
     ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
 
   /* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1,
    * so don't ask for it.
    */
-  do_fancy = cinfo->do_fancy_upsampling && cinfo->min_DCT_scaled_size > 1;
+  do_fancy = cinfo->do_fancy_upsampling && cinfo->_min_DCT_scaled_size > 1;
 
   /* Verify we can handle the sampling factors, select per-component methods,
    * and create storage as needed.
@@ -428,10 +435,10 @@
     /* Compute size of an "input group" after IDCT scaling.  This many samples
      * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
      */
-    h_in_group = (compptr->h_samp_factor * compptr->DCT_scaled_size) /
-		 cinfo->min_DCT_scaled_size;
-    v_in_group = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
-		 cinfo->min_DCT_scaled_size;
+    h_in_group = (compptr->h_samp_factor * compptr->_DCT_scaled_size) /
+                 cinfo->_min_DCT_scaled_size;
+    v_in_group = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
+                 cinfo->_min_DCT_scaled_size;
     h_out_group = cinfo->max_h_samp_factor;
     v_out_group = cinfo->max_v_samp_factor;
     upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
@@ -445,34 +452,53 @@
       upsample->methods[ci] = fullsize_upsample;
       need_buffer = FALSE;
     } else if (h_in_group * 2 == h_out_group &&
-	       v_in_group == v_out_group) {
+               v_in_group == v_out_group) {
       /* Special cases for 2h1v upsampling */
-      if (do_fancy && compptr->downsampled_width > 2)
-	upsample->methods[ci] = h2v1_fancy_upsample;
-      else
-	upsample->methods[ci] = h2v1_upsample;
+      if (do_fancy && compptr->downsampled_width > 2) {
+        if (jsimd_can_h2v1_fancy_upsample())
+          upsample->methods[ci] = jsimd_h2v1_fancy_upsample;
+        else
+          upsample->methods[ci] = h2v1_fancy_upsample;
+      } else {
+        if (jsimd_can_h2v1_upsample())
+          upsample->methods[ci] = jsimd_h2v1_upsample;
+        else
+          upsample->methods[ci] = h2v1_upsample;
+      }
     } else if (h_in_group * 2 == h_out_group &&
-	       v_in_group * 2 == v_out_group) {
+               v_in_group * 2 == v_out_group) {
       /* Special cases for 2h2v upsampling */
       if (do_fancy && compptr->downsampled_width > 2) {
-	upsample->methods[ci] = h2v2_fancy_upsample;
-	upsample->pub.need_context_rows = TRUE;
-      } else
-	upsample->methods[ci] = h2v2_upsample;
+        if (jsimd_can_h2v2_fancy_upsample())
+          upsample->methods[ci] = jsimd_h2v2_fancy_upsample;
+        else
+          upsample->methods[ci] = h2v2_fancy_upsample;
+        upsample->pub.need_context_rows = TRUE;
+      } else {
+        if (jsimd_can_h2v2_upsample())
+          upsample->methods[ci] = jsimd_h2v2_upsample;
+        else
+          upsample->methods[ci] = h2v2_upsample;
+      }
     } else if ((h_out_group % h_in_group) == 0 &&
-	       (v_out_group % v_in_group) == 0) {
+               (v_out_group % v_in_group) == 0) {
       /* Generic integral-factors upsampling method */
-      upsample->methods[ci] = int_upsample;
+#if defined(__mips__)
+      if (jsimd_can_int_upsample())
+        upsample->methods[ci] = jsimd_int_upsample;
+      else
+#endif
+        upsample->methods[ci] = int_upsample;
       upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
       upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
     } else
       ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
     if (need_buffer) {
       upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
-	((j_common_ptr) cinfo, JPOOL_IMAGE,
-	 (JDIMENSION) jround_up((long) cinfo->output_width,
-				(long) cinfo->max_h_samp_factor),
-	 (JDIMENSION) cinfo->max_v_samp_factor);
+        ((j_common_ptr) cinfo, JPOOL_IMAGE,
+         (JDIMENSION) jround_up((long) cinfo->output_width,
+                                (long) cinfo->max_h_samp_factor),
+         (JDIMENSION) cinfo->max_v_samp_factor);
     }
   }
 }
diff --git a/jdtrans.c b/jdtrans.c
index 6c0ab71..86fda3b 100644
--- a/jdtrans.c
+++ b/jdtrans.c
@@ -1,8 +1,10 @@
 /*
  * jdtrans.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1995-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains library routines for transcoding decompression,
@@ -16,7 +18,7 @@
 
 
 /* Forward declarations */
-LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo));
+LOCAL(void) transdecode_master_selection (j_decompress_ptr cinfo);
 
 
 /*
@@ -55,20 +57,20 @@
       int retcode;
       /* Call progress monitor hook if present */
       if (cinfo->progress != NULL)
-	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+        (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
       /* Absorb some more input */
       retcode = (*cinfo->inputctl->consume_input) (cinfo);
       if (retcode == JPEG_SUSPENDED)
-	return NULL;
+        return NULL;
       if (retcode == JPEG_REACHED_EOI)
-	break;
+        break;
       /* Advance progress counter if appropriate */
       if (cinfo->progress != NULL &&
-	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
-	if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
-	  /* startup underestimated number of scans; ratchet up one scan */
-	  cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
-	}
+          (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+        if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+          /* startup underestimated number of scans; ratchet up one scan */
+          cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+        }
       }
     }
     /* Set state so that jpeg_finish_decompress does the right thing */
@@ -84,7 +86,7 @@
   }
   /* Oops, improper usage */
   ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
-  return NULL;			/* keep compiler happy */
+  return NULL;                  /* keep compiler happy */
 }
 
 
@@ -99,9 +101,18 @@
   /* This is effectively a buffered-image operation. */
   cinfo->buffered_image = TRUE;
 
+#if JPEG_LIB_VERSION >= 80
+  /* Compute output image dimensions and related values. */
+  jpeg_core_output_dimensions(cinfo);
+#endif
+
   /* Entropy decoding: either Huffman or arithmetic coding. */
   if (cinfo->arith_code) {
+#ifdef D_ARITH_CODING_SUPPORTED
+    jinit_arith_decoder(cinfo);
+#else
     ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+#endif
   } else {
     if (cinfo->progressive_mode) {
 #ifdef D_PROGRESSIVE_SUPPORTED
diff --git a/jerror.c b/jerror.c
index 3da7be8..cd3098d 100644
--- a/jerror.c
+++ b/jerror.c
@@ -1,8 +1,10 @@
 /*
  * jerror.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains simple error-reporting and trace-message routines.
@@ -28,7 +30,7 @@
 #include <windows.h>
 #endif
 
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#ifndef EXIT_FAILURE            /* define exit() codes if not provided */
 #define EXIT_FAILURE  1
 #endif
 
@@ -41,11 +43,7 @@
  * want to refer to it directly.
  */
 
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_std_message_table	jMsgTable
-#endif
-
-#define JMESSAGE(code,string)	string ,
+#define JMESSAGE(code,string)   string ,
 
 const char * const jpeg_std_message_table[] = {
 #include "jerror.h"
@@ -105,7 +103,7 @@
 #ifdef USE_WINDOWS_MESSAGEBOX
   /* Display it in a message dialog box */
   MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
-	     MB_OK | MB_ICONERROR);
+             MB_OK | MB_ICONERROR);
 #else
   /* Send it to stderr, adding a newline */
   fprintf(stderr, "%s\n", buffer);
@@ -167,8 +165,8 @@
   if (msg_code > 0 && msg_code <= err->last_jpeg_message) {
     msgtext = err->jpeg_message_table[msg_code];
   } else if (err->addon_message_table != NULL &&
-	     msg_code >= err->first_addon_message &&
-	     msg_code <= err->last_addon_message) {
+             msg_code >= err->first_addon_message &&
+             msg_code <= err->last_addon_message) {
     msgtext = err->addon_message_table[msg_code - err->first_addon_message];
   }
 
@@ -193,10 +191,10 @@
     sprintf(buffer, msgtext, err->msg_parm.s);
   else
     sprintf(buffer, msgtext,
-	    err->msg_parm.i[0], err->msg_parm.i[1],
-	    err->msg_parm.i[2], err->msg_parm.i[3],
-	    err->msg_parm.i[4], err->msg_parm.i[5],
-	    err->msg_parm.i[6], err->msg_parm.i[7]);
+            err->msg_parm.i[0], err->msg_parm.i[1],
+            err->msg_parm.i[2], err->msg_parm.i[3],
+            err->msg_parm.i[4], err->msg_parm.i[5],
+            err->msg_parm.i[6], err->msg_parm.i[7]);
 }
 
 
@@ -213,17 +211,17 @@
 {
   cinfo->err->num_warnings = 0;
   /* trace_level is not reset since it is an application-supplied parameter */
-  cinfo->err->msg_code = 0;	/* may be useful as a flag for "no error" */
+  cinfo->err->msg_code = 0;     /* may be useful as a flag for "no error" */
 }
 
 
 /*
  * Fill in the standard error-handling methods in a jpeg_error_mgr object.
  * Typical call is:
- *	struct jpeg_compress_struct cinfo;
- *	struct jpeg_error_mgr err;
+ *      struct jpeg_compress_struct cinfo;
+ *      struct jpeg_error_mgr err;
  *
- *	cinfo.err = jpeg_std_error(&err);
+ *      cinfo.err = jpeg_std_error(&err);
  * after which the application may override some of the methods.
  */
 
@@ -236,16 +234,16 @@
   err->format_message = format_message;
   err->reset_error_mgr = reset_error_mgr;
 
-  err->trace_level = 0;		/* default = no tracing */
-  err->num_warnings = 0;	/* no warnings emitted yet */
-  err->msg_code = 0;		/* may be useful as a flag for "no error" */
+  err->trace_level = 0;         /* default = no tracing */
+  err->num_warnings = 0;        /* no warnings emitted yet */
+  err->msg_code = 0;            /* may be useful as a flag for "no error" */
 
   /* Initialize message table pointers */
   err->jpeg_message_table = jpeg_std_message_table;
   err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1;
 
   err->addon_message_table = NULL;
-  err->first_addon_message = 0;	/* for safety */
+  err->first_addon_message = 0; /* for safety */
   err->last_addon_message = 0;
 
   return err;
diff --git a/jerror.h b/jerror.h
index fc2fffe..402613e 100644
--- a/jerror.h
+++ b/jerror.h
@@ -1,8 +1,11 @@
 /*
  * jerror.h
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 1997-2009 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2014, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file defines the error and message codes for the JPEG library.
@@ -32,39 +35,48 @@
 
 typedef enum {
 
-#define JMESSAGE(code,string)	code ,
+#define JMESSAGE(code,string)   code ,
 
 #endif /* JMAKE_ENUM_LIST */
 
 JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
 
 /* For maintenance convenience, list is alphabetical by message code name */
+#if JPEG_LIB_VERSION < 70
 JMESSAGE(JERR_ARITH_NOTIMPL,
-	 "Sorry, there are legal restrictions on arithmetic coding")
+         "Sorry, arithmetic coding is not implemented")
+#endif
 JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
 JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
 JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
 JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
+#if JPEG_LIB_VERSION >= 70
+JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
+#endif
 JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
 JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported")
+#if JPEG_LIB_VERSION >= 70
+JMESSAGE(JERR_BAD_DROP_SAMPLING,
+         "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
+#endif
 JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
 JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
 JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
 JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
 JMESSAGE(JERR_BAD_LIB_VERSION,
-	 "Wrong JPEG library version: library is %d, caller expects %d")
+         "Wrong JPEG library version: library is %d, caller expects %d")
 JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
 JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
 JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
 JMESSAGE(JERR_BAD_PROGRESSION,
-	 "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
+         "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
 JMESSAGE(JERR_BAD_PROG_SCRIPT,
-	 "Invalid progressive parameters at scan script entry %d")
+         "Invalid progressive parameters at scan script entry %d")
 JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
 JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
 JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
 JMESSAGE(JERR_BAD_STRUCT_SIZE,
-	 "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
+         "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
 JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
 JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
 JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
@@ -88,11 +100,14 @@
 JMESSAGE(JERR_INPUT_EMPTY, "Empty input file")
 JMESSAGE(JERR_INPUT_EOF, "Premature end of input file")
 JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
-	 "Cannot transcode due to multiple use of quantization table %d")
+         "Cannot transcode due to multiple use of quantization table %d")
 JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
 JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
 JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
 JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
+#if JPEG_LIB_VERSION >= 70
+JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
+#endif
 JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
 JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
 JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
@@ -100,7 +115,7 @@
 JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
 JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
 JMESSAGE(JERR_QUANT_COMPONENTS,
-	 "Cannot quantize more than %d color components")
+         "Cannot quantize more than %d color components")
 JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
 JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors")
 JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers")
@@ -112,19 +127,19 @@
 JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file")
 JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file")
 JMESSAGE(JERR_TFILE_WRITE,
-	 "Write failed on temporary file --- out of disk space?")
+         "Write failed on temporary file --- out of disk space?")
 JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines")
 JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x")
 JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up")
 JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation")
 JMESSAGE(JERR_XMS_READ, "Read from XMS failed")
 JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed")
-JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT)
+JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT_SHORT)
 JMESSAGE(JMSG_VERSION, JVERSION)
 JMESSAGE(JTRC_16BIT_TABLES,
-	 "Caution: quantization tables are too coarse for baseline JPEG")
+         "Caution: quantization tables are too coarse for baseline JPEG")
 JMESSAGE(JTRC_ADOBE,
-	 "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
+         "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
 JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u")
 JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u")
 JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x")
@@ -137,9 +152,9 @@
 JMESSAGE(JTRC_HUFFBITS, "        %3d %3d %3d %3d %3d %3d %3d %3d")
 JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d  %d")
 JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE,
-	 "Warning: thumbnail image size does not match data length %u")
+         "Warning: thumbnail image size does not match data length %u")
 JMESSAGE(JTRC_JFIF_EXTENSION,
-	 "JFIF extension marker: type 0x%02x, length %u")
+         "JFIF extension marker: type 0x%02x, length %u")
 JMESSAGE(JTRC_JFIF_THUMBNAIL, "    with %d x %d thumbnail image")
 JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u")
 JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x")
@@ -150,7 +165,7 @@
 JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d")
 JMESSAGE(JTRC_RST, "RST%d")
 JMESSAGE(JTRC_SMOOTH_NOTIMPL,
-	 "Smoothing not supported with nonstandard sampling ratios")
+         "Smoothing not supported with nonstandard sampling ratios")
 JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d")
 JMESSAGE(JTRC_SOF_COMPONENT, "    Component %d: %dhx%dv q=%d")
 JMESSAGE(JTRC_SOI, "Start of Image")
@@ -160,28 +175,38 @@
 JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s")
 JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
 JMESSAGE(JTRC_THUMB_JPEG,
-	 "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
+         "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
 JMESSAGE(JTRC_THUMB_PALETTE,
-	 "JFIF extension marker: palette thumbnail image, length %u")
+         "JFIF extension marker: palette thumbnail image, length %u")
 JMESSAGE(JTRC_THUMB_RGB,
-	 "JFIF extension marker: RGB thumbnail image, length %u")
+         "JFIF extension marker: RGB thumbnail image, length %u")
 JMESSAGE(JTRC_UNKNOWN_IDS,
-	 "Unrecognized component IDs %d %d %d, assuming YCbCr")
+         "Unrecognized component IDs %d %d %d, assuming YCbCr")
 JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
 JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
 JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
+#if JPEG_LIB_VERSION >= 70
+JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
+#endif
 JMESSAGE(JWRN_BOGUS_PROGRESSION,
-	 "Inconsistent progression sequence for component %d coefficient %d")
+         "Inconsistent progression sequence for component %d coefficient %d")
 JMESSAGE(JWRN_EXTRANEOUS_DATA,
-	 "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
+         "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
 JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment")
 JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code")
 JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d")
 JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file")
 JMESSAGE(JWRN_MUST_RESYNC,
-	 "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
+         "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
 JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
 JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
+#if JPEG_LIB_VERSION < 70
+JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
+#if defined(C_ARITH_CODING_SUPPORTED) || defined(D_ARITH_CODING_SUPPORTED)
+JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
+JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
+#endif
+#endif
 
 #ifdef JMAKE_ENUM_LIST
 
@@ -232,7 +257,7 @@
    strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
    (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
 
-#define MAKESTMT(stuff)		do { stuff } while (0)
+#define MAKESTMT(stuff)         do { stuff } while (0)
 
 /* Nonfatal errors (we can keep going, but the data is probably corrupt) */
 #define WARNMS(cinfo,code)  \
@@ -263,26 +288,26 @@
    (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
 #define TRACEMS3(cinfo,lvl,code,p1,p2,p3)  \
   MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+           _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
+           (cinfo)->err->msg_code = (code); \
+           (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
 #define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4)  \
   MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+           _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+           (cinfo)->err->msg_code = (code); \
+           (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
 #define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5)  \
   MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   _mp[4] = (p5); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+           _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+           _mp[4] = (p5); \
+           (cinfo)->err->msg_code = (code); \
+           (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
 #define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8)  \
   MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
-	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
-	   _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
-	   (cinfo)->err->msg_code = (code); \
-	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+           _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+           _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
+           (cinfo)->err->msg_code = (code); \
+           (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
 #define TRACEMSS(cinfo,lvl,code,str)  \
   ((cinfo)->err->msg_code = (code), \
    strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
diff --git a/jfdctflt.c b/jfdctflt.c
index 79d7a00..a8367c6 100644
--- a/jfdctflt.c
+++ b/jfdctflt.c
@@ -37,7 +37,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef DCT_FLOAT_SUPPORTED
 
@@ -76,24 +76,24 @@
     tmp5 = dataptr[2] - dataptr[5];
     tmp3 = dataptr[3] + dataptr[4];
     tmp4 = dataptr[3] - dataptr[4];
-    
+
     /* Even part */
-    
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[0] = tmp10 + tmp11; /* phase 3 */
     dataptr[4] = tmp10 - tmp11;
-    
+
     z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
-    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[2] = tmp13 + z1;    /* phase 5 */
     dataptr[6] = tmp13 - z1;
-    
+
     /* Odd part */
 
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
     tmp11 = tmp5 + tmp6;
     tmp12 = tmp6 + tmp7;
 
@@ -103,15 +103,15 @@
     z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
     z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
 
-    z11 = tmp7 + z3;		/* phase 5 */
+    z11 = tmp7 + z3;            /* phase 5 */
     z13 = tmp7 - z3;
 
-    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[5] = z13 + z2;      /* phase 6 */
     dataptr[3] = z13 - z2;
     dataptr[1] = z11 + z4;
     dataptr[7] = z11 - z4;
 
-    dataptr += DCTSIZE;		/* advance pointer to next row */
+    dataptr += DCTSIZE;         /* advance pointer to next row */
   }
 
   /* Pass 2: process columns. */
@@ -126,24 +126,24 @@
     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-    
+
     /* Even part */
-    
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
     dataptr[DCTSIZE*4] = tmp10 - tmp11;
-    
+
     z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
     dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
     dataptr[DCTSIZE*6] = tmp13 - z1;
-    
+
     /* Odd part */
 
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
     tmp11 = tmp5 + tmp6;
     tmp12 = tmp6 + tmp7;
 
@@ -153,7 +153,7 @@
     z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
     z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
 
-    z11 = tmp7 + z3;		/* phase 5 */
+    z11 = tmp7 + z3;            /* phase 5 */
     z13 = tmp7 - z3;
 
     dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
@@ -161,7 +161,7 @@
     dataptr[DCTSIZE*1] = z11 + z4;
     dataptr[DCTSIZE*7] = z11 - z4;
 
-    dataptr++;			/* advance pointer to next column */
+    dataptr++;                  /* advance pointer to next column */
   }
 }
 
diff --git a/jfdctfst.c b/jfdctfst.c
index ccb378a..4936d47 100644
--- a/jfdctfst.c
+++ b/jfdctfst.c
@@ -33,7 +33,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef DCT_IFAST_SUPPORTED
 
@@ -76,10 +76,10 @@
  */
 
 #if CONST_BITS == 8
-#define FIX_0_382683433  ((INT32)   98)		/* FIX(0.382683433) */
-#define FIX_0_541196100  ((INT32)  139)		/* FIX(0.541196100) */
-#define FIX_0_707106781  ((INT32)  181)		/* FIX(0.707106781) */
-#define FIX_1_306562965  ((INT32)  334)		/* FIX(1.306562965) */
+#define FIX_0_382683433  ((INT32)   98)         /* FIX(0.382683433) */
+#define FIX_0_541196100  ((INT32)  139)         /* FIX(0.541196100) */
+#define FIX_0_707106781  ((INT32)  181)         /* FIX(0.707106781) */
+#define FIX_1_306562965  ((INT32)  334)         /* FIX(1.306562965) */
 #else
 #define FIX_0_382683433  FIX(0.382683433)
 #define FIX_0_541196100  FIX(0.541196100)
@@ -132,24 +132,24 @@
     tmp5 = dataptr[2] - dataptr[5];
     tmp3 = dataptr[3] + dataptr[4];
     tmp4 = dataptr[3] - dataptr[4];
-    
+
     /* Even part */
-    
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[0] = tmp10 + tmp11; /* phase 3 */
     dataptr[4] = tmp10 - tmp11;
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
-    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[2] = tmp13 + z1;    /* phase 5 */
     dataptr[6] = tmp13 - z1;
-    
+
     /* Odd part */
 
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
     tmp11 = tmp5 + tmp6;
     tmp12 = tmp6 + tmp7;
 
@@ -159,15 +159,15 @@
     z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
     z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
 
-    z11 = tmp7 + z3;		/* phase 5 */
+    z11 = tmp7 + z3;            /* phase 5 */
     z13 = tmp7 - z3;
 
-    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[5] = z13 + z2;      /* phase 6 */
     dataptr[3] = z13 - z2;
     dataptr[1] = z11 + z4;
     dataptr[7] = z11 - z4;
 
-    dataptr += DCTSIZE;		/* advance pointer to next row */
+    dataptr += DCTSIZE;         /* advance pointer to next row */
   }
 
   /* Pass 2: process columns. */
@@ -182,24 +182,24 @@
     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-    
+
     /* Even part */
-    
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
     dataptr[DCTSIZE*4] = tmp10 - tmp11;
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
     dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
     dataptr[DCTSIZE*6] = tmp13 - z1;
-    
+
     /* Odd part */
 
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
     tmp11 = tmp5 + tmp6;
     tmp12 = tmp6 + tmp7;
 
@@ -209,7 +209,7 @@
     z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
     z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
 
-    z11 = tmp7 + z3;		/* phase 5 */
+    z11 = tmp7 + z3;            /* phase 5 */
     z13 = tmp7 - z3;
 
     dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
@@ -217,7 +217,7 @@
     dataptr[DCTSIZE*1] = z11 + z4;
     dataptr[DCTSIZE*7] = z11 - z4;
 
-    dataptr++;			/* advance pointer to next column */
+    dataptr++;                  /* advance pointer to next column */
   }
 }
 
diff --git a/jfdctint.c b/jfdctint.c
index 0a78b64..14f486c 100644
--- a/jfdctint.c
+++ b/jfdctint.c
@@ -26,7 +26,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef DCT_ISLOW_SUPPORTED
 
@@ -79,7 +79,7 @@
 #define PASS1_BITS  2
 #else
 #define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
 #endif
 
 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
@@ -90,18 +90,18 @@
  */
 
 #if CONST_BITS == 13
-#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
-#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
-#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
-#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
-#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
-#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
-#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
-#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
-#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
-#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
-#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
-#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#define FIX_0_298631336  ((INT32)  2446)        /* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)        /* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)        /* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)        /* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)       /* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)       /* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)       /* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)       /* FIX(3.072711026) */
 #else
 #define FIX_0_298631336  FIX(0.298631336)
 #define FIX_0_390180644  FIX(0.390180644)
@@ -160,36 +160,36 @@
     tmp5 = dataptr[2] - dataptr[5];
     tmp3 = dataptr[3] + dataptr[4];
     tmp4 = dataptr[3] - dataptr[4];
-    
+
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
      */
-    
+
     tmp10 = tmp0 + tmp3;
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
     dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-				   CONST_BITS-PASS1_BITS);
+                                   CONST_BITS-PASS1_BITS);
     dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-				   CONST_BITS-PASS1_BITS);
-    
+                                   CONST_BITS-PASS1_BITS);
+
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
      * cK represents cos(K*pi/16).
      * i0..i3 in the paper are tmp4..tmp7 here.
      */
-    
+
     z1 = tmp4 + tmp7;
     z2 = tmp5 + tmp6;
     z3 = tmp4 + tmp6;
     z4 = tmp5 + tmp7;
     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-    
+
     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
@@ -198,16 +198,16 @@
     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    
+
     z3 += z5;
     z4 += z5;
-    
+
     dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
     dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
     dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
     dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
-    
-    dataptr += DCTSIZE;		/* advance pointer to next row */
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
   }
 
   /* Pass 2: process columns.
@@ -225,36 +225,36 @@
     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-    
+
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
      */
-    
+
     tmp10 = tmp0 + tmp3;
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-					   CONST_BITS+PASS1_BITS);
+                                           CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-					   CONST_BITS+PASS1_BITS);
-    
+                                           CONST_BITS+PASS1_BITS);
+
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
      * cK represents cos(K*pi/16).
      * i0..i3 in the paper are tmp4..tmp7 here.
      */
-    
+
     z1 = tmp4 + tmp7;
     z2 = tmp5 + tmp6;
     z3 = tmp4 + tmp6;
     z4 = tmp5 + tmp7;
     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-    
+
     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
@@ -263,20 +263,20 @@
     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    
+
     z3 += z5;
     z4 += z5;
-    
+
     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
-					   CONST_BITS+PASS1_BITS);
+                                           CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
-					   CONST_BITS+PASS1_BITS);
+                                           CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
-					   CONST_BITS+PASS1_BITS);
+                                           CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
-					   CONST_BITS+PASS1_BITS);
-    
-    dataptr++;			/* advance pointer to next column */
+                                           CONST_BITS+PASS1_BITS);
+
+    dataptr++;                  /* advance pointer to next column */
   }
 }
 
diff --git a/jidctflt.c b/jidctflt.c
index 0188ce3..2b2e228 100644
--- a/jidctflt.c
+++ b/jidctflt.c
@@ -1,9 +1,12 @@
 /*
  * jidctflt.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
+ * Modified 2010 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2014, D. R. Commander.
+  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains a floating-point implementation of the
  * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
@@ -39,7 +42,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef DCT_FLOAT_SUPPORTED
 
@@ -66,8 +69,8 @@
 
 GLOBAL(void)
 jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
 {
   FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
@@ -76,10 +79,10 @@
   FLOAT_MULT_TYPE * quantptr;
   FAST_FLOAT * wsptr;
   JSAMPROW outptr;
-  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
   int ctr;
   FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
-  SHIFT_TEMPS
+  #define _0_125 ((FLOAT_MULT_TYPE)0.125)
 
   /* Pass 1: process columns from input, store into work array. */
 
@@ -95,14 +98,15 @@
      * With typical images and quantization tables, half or more of the
      * column DCT calculations can be simplified this way.
      */
-    
+
     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
+        inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+        inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+        inptr[DCTSIZE*7] == 0) {
       /* AC terms all zero */
-      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-      
+      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0],
+                                    quantptr[DCTSIZE*0] * _0_125);
+
       wsptr[DCTSIZE*0] = dcval;
       wsptr[DCTSIZE*1] = dcval;
       wsptr[DCTSIZE*2] = dcval;
@@ -111,53 +115,53 @@
       wsptr[DCTSIZE*5] = dcval;
       wsptr[DCTSIZE*6] = dcval;
       wsptr[DCTSIZE*7] = dcval;
-      
-      inptr++;			/* advance pointers to next column */
+
+      inptr++;                  /* advance pointers to next column */
       quantptr++;
       wsptr++;
       continue;
     }
-    
+
     /* Even part */
 
-    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
-    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
-    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
-    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0] * _0_125);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2] * _0_125);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4] * _0_125);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6] * _0_125);
 
-    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp10 = tmp0 + tmp2;        /* phase 3 */
     tmp11 = tmp0 - tmp2;
 
-    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp13 = tmp1 + tmp3;        /* phases 5-3 */
     tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
 
-    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp0 = tmp10 + tmp13;       /* phase 2 */
     tmp3 = tmp10 - tmp13;
     tmp1 = tmp11 + tmp12;
     tmp2 = tmp11 - tmp12;
-    
+
     /* Odd part */
 
-    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
-    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
-    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1] * _0_125);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3] * _0_125);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5] * _0_125);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7] * _0_125);
 
-    z13 = tmp6 + tmp5;		/* phase 6 */
+    z13 = tmp6 + tmp5;          /* phase 6 */
     z10 = tmp6 - tmp5;
     z11 = tmp4 + tmp7;
     z12 = tmp4 - tmp7;
 
-    tmp7 = z11 + z13;		/* phase 5 */
+    tmp7 = z11 + z13;           /* phase 5 */
     tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
 
     z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
-    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
-    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
+    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
 
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp6 = tmp12 - tmp7;        /* phase 2 */
     tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 + tmp5;
+    tmp4 = tmp10 - tmp5;
 
     wsptr[DCTSIZE*0] = tmp0 + tmp7;
     wsptr[DCTSIZE*7] = tmp0 - tmp7;
@@ -165,16 +169,15 @@
     wsptr[DCTSIZE*6] = tmp1 - tmp6;
     wsptr[DCTSIZE*2] = tmp2 + tmp5;
     wsptr[DCTSIZE*5] = tmp2 - tmp5;
-    wsptr[DCTSIZE*4] = tmp3 + tmp4;
-    wsptr[DCTSIZE*3] = tmp3 - tmp4;
+    wsptr[DCTSIZE*3] = tmp3 + tmp4;
+    wsptr[DCTSIZE*4] = tmp3 - tmp4;
 
-    inptr++;			/* advance pointers to next column */
+    inptr++;                    /* advance pointers to next column */
     quantptr++;
     wsptr++;
   }
-  
+
   /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3. */
 
   wsptr = workspace;
   for (ctr = 0; ctr < DCTSIZE; ctr++) {
@@ -184,11 +187,13 @@
      * the simplification applies less often (typically 5% to 10% of the time).
      * And testing floats for zero is relatively expensive, so we don't bother.
      */
-    
+
     /* Even part */
 
-    tmp10 = wsptr[0] + wsptr[4];
-    tmp11 = wsptr[0] - wsptr[4];
+    /* Apply signed->unsigned and prepare float->int conversion */
+    z5 = wsptr[0] + ((FAST_FLOAT) CENTERJSAMPLE + (FAST_FLOAT) 0.5);
+    tmp10 = z5 + wsptr[4];
+    tmp11 = z5 - wsptr[4];
 
     tmp13 = wsptr[2] + wsptr[6];
     tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13;
@@ -209,33 +214,25 @@
     tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562);
 
     z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
-    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
-    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
+    tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */
 
     tmp6 = tmp12 - tmp7;
     tmp5 = tmp11 - tmp6;
-    tmp4 = tmp10 + tmp5;
+    tmp4 = tmp10 - tmp5;
 
-    /* Final output stage: scale down by a factor of 8 and range-limit */
+    /* Final output stage: float->int conversion and range-limit */
 
-    outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3)
-			    & RANGE_MASK];
-    outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3)
-			    & RANGE_MASK];
-    outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3)
-			    & RANGE_MASK];
-    outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3)
-			    & RANGE_MASK];
-    outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3)
-			    & RANGE_MASK];
-    outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3)
-			    & RANGE_MASK];
-    outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3)
-			    & RANGE_MASK];
-    outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3)
-			    & RANGE_MASK];
-    
-    wsptr += DCTSIZE;		/* advance pointer to next row */
+    outptr[0] = range_limit[((int) (tmp0 + tmp7)) & RANGE_MASK];
+    outptr[7] = range_limit[((int) (tmp0 - tmp7)) & RANGE_MASK];
+    outptr[1] = range_limit[((int) (tmp1 + tmp6)) & RANGE_MASK];
+    outptr[6] = range_limit[((int) (tmp1 - tmp6)) & RANGE_MASK];
+    outptr[2] = range_limit[((int) (tmp2 + tmp5)) & RANGE_MASK];
+    outptr[5] = range_limit[((int) (tmp2 - tmp5)) & RANGE_MASK];
+    outptr[3] = range_limit[((int) (tmp3 + tmp4)) & RANGE_MASK];
+    outptr[4] = range_limit[((int) (tmp3 - tmp4)) & RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
   }
 }
 
diff --git a/jidctfst.c b/jidctfst.c
index dba4216..cae22b9 100644
--- a/jidctfst.c
+++ b/jidctfst.c
@@ -35,7 +35,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef DCT_IFAST_SUPPORTED
 
@@ -78,7 +78,7 @@
 #define PASS1_BITS  2
 #else
 #define CONST_BITS  8
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
 #endif
 
 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
@@ -89,10 +89,10 @@
  */
 
 #if CONST_BITS == 8
-#define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */
-#define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */
-#define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */
-#define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */
+#define FIX_1_082392200  ((INT32)  277)         /* FIX(1.082392200) */
+#define FIX_1_414213562  ((INT32)  362)         /* FIX(1.414213562) */
+#define FIX_1_847759065  ((INT32)  473)         /* FIX(1.847759065) */
+#define FIX_2_613125930  ((INT32)  669)         /* FIX(2.613125930) */
 #else
 #define FIX_1_082392200  FIX(1.082392200)
 #define FIX_1_414213562  FIX(1.414213562)
@@ -129,7 +129,7 @@
 #define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))
 #else
 #define DEQUANTIZE(coef,quantval)  \
-	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
+        DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
 #endif
 
 
@@ -138,11 +138,11 @@
  */
 
 #ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define ISHIFT_TEMPS	DCTELEM ishift_temp;
+#define ISHIFT_TEMPS    DCTELEM ishift_temp;
 #if BITS_IN_JSAMPLE == 8
-#define DCTELEMBITS  16		/* DCTELEM may be 16 or 32 bits */
+#define DCTELEMBITS  16         /* DCTELEM may be 16 or 32 bits */
 #else
-#define DCTELEMBITS  32		/* DCTELEM must be 32 bits */
+#define DCTELEMBITS  32         /* DCTELEM must be 32 bits */
 #endif
 #define IRIGHT_SHIFT(x,shft)  \
     ((ishift_temp = (x)) < 0 ? \
@@ -150,7 +150,7 @@
      (ishift_temp >> (shft)))
 #else
 #define ISHIFT_TEMPS
-#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#define IRIGHT_SHIFT(x,shft)    ((x) >> (shft))
 #endif
 
 #ifdef USE_ACCURATE_ROUNDING
@@ -166,8 +166,8 @@
 
 GLOBAL(void)
 jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
 {
   DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   DCTELEM tmp10, tmp11, tmp12, tmp13;
@@ -178,9 +178,9 @@
   JSAMPROW outptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
-  int workspace[DCTSIZE2];	/* buffers data between passes */
-  SHIFT_TEMPS			/* for DESCALE */
-  ISHIFT_TEMPS			/* for IDESCALE */
+  int workspace[DCTSIZE2];      /* buffers data between passes */
+  SHIFT_TEMPS                   /* for DESCALE */
+  ISHIFT_TEMPS                  /* for IDESCALE */
 
   /* Pass 1: process columns from input, store into work array. */
 
@@ -196,11 +196,11 @@
      * With typical images and quantization tables, half or more of the
      * column DCT calculations can be simplified this way.
      */
-    
+
     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
+        inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+        inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+        inptr[DCTSIZE*7] == 0) {
       /* AC terms all zero */
       int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
 
@@ -212,13 +212,13 @@
       wsptr[DCTSIZE*5] = dcval;
       wsptr[DCTSIZE*6] = dcval;
       wsptr[DCTSIZE*7] = dcval;
-      
-      inptr++;			/* advance pointers to next column */
+
+      inptr++;                  /* advance pointers to next column */
       quantptr++;
       wsptr++;
       continue;
     }
-    
+
     /* Even part */
 
     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
@@ -226,17 +226,17 @@
     tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
 
-    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp10 = tmp0 + tmp2;        /* phase 3 */
     tmp11 = tmp0 - tmp2;
 
-    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp13 = tmp1 + tmp3;        /* phases 5-3 */
     tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
 
-    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp0 = tmp10 + tmp13;       /* phase 2 */
     tmp3 = tmp10 - tmp13;
     tmp1 = tmp11 + tmp12;
     tmp2 = tmp11 - tmp12;
-    
+
     /* Odd part */
 
     tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
@@ -244,19 +244,19 @@
     tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
 
-    z13 = tmp6 + tmp5;		/* phase 6 */
+    z13 = tmp6 + tmp5;          /* phase 6 */
     z10 = tmp6 - tmp5;
     z11 = tmp4 + tmp7;
     z12 = tmp4 - tmp7;
 
-    tmp7 = z11 + z13;		/* phase 5 */
+    tmp7 = z11 + z13;           /* phase 5 */
     tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
 
     z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
     tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
     tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
 
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp6 = tmp12 - tmp7;        /* phase 2 */
     tmp5 = tmp11 - tmp6;
     tmp4 = tmp10 + tmp5;
 
@@ -269,11 +269,11 @@
     wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
     wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
 
-    inptr++;			/* advance pointers to next column */
+    inptr++;                    /* advance pointers to next column */
     quantptr++;
     wsptr++;
   }
-  
+
   /* Pass 2: process rows from work array, store into output array. */
   /* Note that we must descale the results by a factor of 8 == 2**3, */
   /* and also undo the PASS1_BITS scaling. */
@@ -288,14 +288,14 @@
      * test takes more time than it's worth.  In that case this section
      * may be commented out.
      */
-    
+
 #ifndef NO_ZERO_ROW_TEST
     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
-	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+        wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
       JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3)
-				  & RANGE_MASK];
-      
+                                  & RANGE_MASK];
+
       outptr[0] = dcval;
       outptr[1] = dcval;
       outptr[2] = dcval;
@@ -305,11 +305,11 @@
       outptr[6] = dcval;
       outptr[7] = dcval;
 
-      wsptr += DCTSIZE;		/* advance pointer to next row */
+      wsptr += DCTSIZE;         /* advance pointer to next row */
       continue;
     }
 #endif
-    
+
     /* Even part */
 
     tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
@@ -317,7 +317,7 @@
 
     tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
     tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562)
-	    - tmp13;
+            - tmp13;
 
     tmp0 = tmp10 + tmp13;
     tmp3 = tmp10 - tmp13;
@@ -331,37 +331,37 @@
     z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
     z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
 
-    tmp7 = z11 + z13;		/* phase 5 */
+    tmp7 = z11 + z13;           /* phase 5 */
     tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
 
     z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
     tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
     tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
 
-    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp6 = tmp12 - tmp7;        /* phase 2 */
     tmp5 = tmp11 - tmp6;
     tmp4 = tmp10 + tmp5;
 
     /* Final output stage: scale down by a factor of 8 and range-limit */
 
     outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
     outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
-			    & RANGE_MASK];
+                            & RANGE_MASK];
 
-    wsptr += DCTSIZE;		/* advance pointer to next row */
+    wsptr += DCTSIZE;           /* advance pointer to next row */
   }
 }
 
diff --git a/jidctint.c b/jidctint.c
index a72b320..688fd22 100644
--- a/jidctint.c
+++ b/jidctint.c
@@ -2,6 +2,7 @@
  * jidctint.c
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modification developed 2002-2009 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -23,12 +24,33 @@
  * The advantage of this method is that no data path contains more than one
  * multiplication; this allows a very simple and accurate implementation in
  * scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * We also provide IDCT routines with various output sample block sizes for
+ * direct resolution reduction or enlargement without additional resampling:
+ * NxN (N=1...16) pixels for one 8x8 input DCT block.
+ *
+ * For N<8 we simply take the corresponding low-frequency coefficients of
+ * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
+ * to yield the downscaled outputs.
+ * This can be seen as direct low-pass downsampling from the DCT domain
+ * point of view rather than the usual spatial domain point of view,
+ * yielding significant computational savings and results at least
+ * as good as common bilinear (averaging) spatial downsampling.
+ *
+ * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
+ * lower frequencies and higher frequencies assumed to be zero.
+ * It turns out that the computational effort is similar to the 8x8 IDCT
+ * regarding the output size.
+ * Furthermore, the scaling and descaling is the same for all IDCT sizes.
+ *
+ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
+ * since there would be too many additional constants to pre-calculate.
  */
 
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef DCT_ISLOW_SUPPORTED
 
@@ -38,7 +60,7 @@
  */
 
 #if DCTSIZE != 8
-  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
 #endif
 
 
@@ -79,7 +101,7 @@
 #define PASS1_BITS  2
 #else
 #define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
 #endif
 
 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
@@ -90,18 +112,18 @@
  */
 
 #if CONST_BITS == 13
-#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
-#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
-#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
-#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
-#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
-#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
-#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
-#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
-#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
-#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
-#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
-#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#define FIX_0_298631336  ((INT32)  2446)        /* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)        /* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)        /* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)        /* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)       /* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)       /* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)       /* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)       /* FIX(3.072711026) */
 #else
 #define FIX_0_298631336  FIX(0.298631336)
 #define FIX_0_390180644  FIX(0.390180644)
@@ -146,8 +168,8 @@
 
 GLOBAL(void)
 jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col)
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
 {
   INT32 tmp0, tmp1, tmp2, tmp3;
   INT32 tmp10, tmp11, tmp12, tmp13;
@@ -158,7 +180,7 @@
   JSAMPROW outptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
-  int workspace[DCTSIZE2];	/* buffers data between passes */
+  int workspace[DCTSIZE2];      /* buffers data between passes */
   SHIFT_TEMPS
 
   /* Pass 1: process columns from input, store into work array. */
@@ -177,14 +199,14 @@
      * With typical images and quantization tables, half or more of the
      * column DCT calculations can be simplified this way.
      */
-    
+
     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
-	inptr[DCTSIZE*7] == 0) {
+        inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+        inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+        inptr[DCTSIZE*7] == 0) {
       /* AC terms all zero */
       int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-      
+
       wsptr[DCTSIZE*0] = dcval;
       wsptr[DCTSIZE*1] = dcval;
       wsptr[DCTSIZE*2] = dcval;
@@ -193,49 +215,49 @@
       wsptr[DCTSIZE*5] = dcval;
       wsptr[DCTSIZE*6] = dcval;
       wsptr[DCTSIZE*7] = dcval;
-      
-      inptr++;			/* advance pointers to next column */
+
+      inptr++;                  /* advance pointers to next column */
       quantptr++;
       wsptr++;
       continue;
     }
-    
+
     /* Even part: reverse the even part of the forward DCT. */
     /* The rotator is sqrt(2)*c(-6). */
-    
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    
+
     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
 
     tmp0 = (z2 + z3) << CONST_BITS;
     tmp1 = (z2 - z3) << CONST_BITS;
-    
+
     tmp10 = tmp0 + tmp3;
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     /* Odd part per figure 8; the matrix is unitary and hence its
      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
      */
-    
+
     tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
     tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    
+
     z1 = tmp0 + tmp3;
     z2 = tmp1 + tmp2;
     z3 = tmp0 + tmp2;
     z4 = tmp1 + tmp3;
     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-    
+
     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
@@ -244,17 +266,17 @@
     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    
+
     z3 += z5;
     z4 += z5;
-    
+
     tmp0 += z1 + z3;
     tmp1 += z2 + z4;
     tmp2 += z2 + z3;
     tmp3 += z1 + z4;
-    
+
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-    
+
     wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
@@ -263,12 +285,12 @@
     wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-    
-    inptr++;			/* advance pointers to next column */
+
+    inptr++;                    /* advance pointers to next column */
     quantptr++;
     wsptr++;
   }
-  
+
   /* Pass 2: process rows from work array, store into output array. */
   /* Note that we must descale the results by a factor of 8 == 2**3, */
   /* and also undo the PASS1_BITS scaling. */
@@ -283,14 +305,14 @@
      * test takes more time than it's worth.  In that case this section
      * may be commented out.
      */
-    
+
 #ifndef NO_ZERO_ROW_TEST
     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
-	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+        wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
       JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
-				  & RANGE_MASK];
-      
+                                  & RANGE_MASK];
+
       outptr[0] = dcval;
       outptr[1] = dcval;
       outptr[2] = dcval;
@@ -300,44 +322,44 @@
       outptr[6] = dcval;
       outptr[7] = dcval;
 
-      wsptr += DCTSIZE;		/* advance pointer to next row */
+      wsptr += DCTSIZE;         /* advance pointer to next row */
       continue;
     }
 #endif
-    
+
     /* Even part: reverse the even part of the forward DCT. */
     /* The rotator is sqrt(2)*c(-6). */
-    
+
     z2 = (INT32) wsptr[2];
     z3 = (INT32) wsptr[6];
-    
+
     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
     tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
     tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    
+
     tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
     tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
-    
+
     tmp10 = tmp0 + tmp3;
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     /* Odd part per figure 8; the matrix is unitary and hence its
      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
      */
-    
+
     tmp0 = (INT32) wsptr[7];
     tmp1 = (INT32) wsptr[5];
     tmp2 = (INT32) wsptr[3];
     tmp3 = (INT32) wsptr[1];
-    
+
     z1 = tmp0 + tmp3;
     z2 = tmp1 + tmp2;
     z3 = tmp0 + tmp2;
     z4 = tmp1 + tmp3;
     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-    
+
     tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
     tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
     tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
@@ -346,44 +368,2256 @@
     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    
+
     z3 += z5;
     z4 += z5;
-    
+
     tmp0 += z1 + z3;
     tmp1 += z2 + z4;
     tmp2 += z2 + z3;
     tmp3 += z1 + z4;
-    
+
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-    
+
     outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
     outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0,
-					  CONST_BITS+PASS1_BITS+3)
-			    & RANGE_MASK];
-    
-    wsptr += DCTSIZE;		/* advance pointer to next row */
+                                          CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
   }
 }
 
+#ifdef IDCT_SCALING_SUPPORTED
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 7x7 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/14).
+ */
+
+GLOBAL(void)
+jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[7*7];   /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp13 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
+    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp0 = z1 + z3;
+    z2 -= tmp0;
+    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
+    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
+    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+
+    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
+    tmp1 += tmp2;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
+    tmp0 += z2;
+    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 7 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp13 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
+    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp0 = z1 + z3;
+    z2 -= tmp0;
+    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
+    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
+    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+
+    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
+    tmp1 += tmp2;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
+    tmp0 += z2;
+    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += 7;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 6x6 output block.
+ *
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/12).
+ */
+
+GLOBAL(void)
+jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[6*6];   /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[6*1] = (int) (tmp11 + tmp1);
+    wsptr[6*4] = (int) (tmp11 - tmp1);
+    wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 6 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[4];
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = tmp0 - tmp10 - tmp10;
+    tmp10 = (INT32) wsptr[2];
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
+    tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
+    tmp1 = (z1 - z2 - z3) << CONST_BITS;
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += 6;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 5x5 output block.
+ *
+ * Optimized algorithm with 5 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/10).
+ */
+
+GLOBAL(void)
+jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
+  INT32 z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[5*5];   /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp12 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
+    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
+    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
+
+    /* Final output stage */
+
+    wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 5 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp12 <<= CONST_BITS;
+    tmp0 = (INT32) wsptr[2];
+    tmp1 = (INT32) wsptr[4];
+    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= z2 << 2;
+
+    /* Odd part */
+
+    z2 = (INT32) wsptr[1];
+    z3 = (INT32) wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
+    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
+    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += 5;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 3x3 output block.
+ *
+ * Optimized algorithm with 2 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/6).
+ */
+
+GLOBAL(void)
+jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[3*3];   /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 3 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+    tmp2 = (INT32) wsptr[2];
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = (INT32) wsptr[1];
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += 3;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 9x9 output block.
+ *
+ * Optimized algorithm with 10 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/18).
+ */
+
+GLOBAL(void)
+jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*9];   /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
+    tmp1 = tmp0 + tmp3;
+    tmp2 = tmp0 - tmp3 - tmp3;
+
+    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+    tmp11 = tmp2 + tmp0;
+    tmp14 = tmp2 - tmp0 - tmp0;
+
+    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
+    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
+
+    tmp10 = tmp1 + tmp0 - tmp3;
+    tmp12 = tmp1 - tmp0 + tmp2;
+    tmp13 = tmp1 - tmp2 + tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
+
+    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
+    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
+    tmp0 = tmp2 + tmp3 - z2;
+    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
+    tmp2 += z2 - tmp1;
+    tmp3 += z2 + tmp1;
+    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 9 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 9; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
+    tmp1 = tmp0 + tmp3;
+    tmp2 = tmp0 - tmp3 - tmp3;
+
+    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+    tmp11 = tmp2 + tmp0;
+    tmp14 = tmp2 - tmp0 - tmp0;
+
+    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
+    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
+
+    tmp10 = tmp1 + tmp0 - tmp3;
+    tmp12 = tmp1 - tmp0 + tmp2;
+    tmp13 = tmp1 - tmp2 + tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
+
+    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
+    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
+    tmp0 = tmp2 + tmp3 - z2;
+    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
+    tmp2 += z2 - tmp1;
+    tmp3 += z2 + tmp1;
+    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 10x10 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/20).
+ */
+
+GLOBAL(void)
+jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
+  INT32 z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*10];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
+                        CONST_BITS-PASS1_BITS);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+    z5 = z3 << CONST_BITS;
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z5 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2] = (int) (tmp22 + tmp12);
+    wsptr[8*7] = (int) (tmp22 - tmp12);
+    wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 10 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 10; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z3 <<= CONST_BITS;
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z3 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
+
+    tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+    outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+                                              CONST_BITS+PASS1_BITS+3)
+                            & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 11x11 output block.
+ *
+ * Optimized algorithm with 24 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/22).
+ */
+
+GLOBAL(void)
+jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*11];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp10 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
+    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
+    z4 = z1 + z3;
+    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
+    z4 -= z2;
+    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
+    tmp21 = tmp20 + tmp23 + tmp25 -
+            MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
+    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+    tmp24 += tmp25;
+    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
+    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
+             MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
+    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z2;
+    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
+    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+            MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
+    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
+    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
+    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
+    tmp11 += z1;
+    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
+    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
+             MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
+             MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 11 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 11; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp10 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[4];
+    z3 = (INT32) wsptr[6];
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
+    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
+    z4 = z1 + z3;
+    tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
+    z4 -= z2;
+    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
+    tmp21 = tmp20 + tmp23 + tmp25 -
+            MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
+    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+    tmp24 += tmp25;
+    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
+    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
+             MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
+    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z2;
+    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
+    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+            MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
+    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
+    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
+    z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
+    tmp11 += z1;
+    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
+    tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
+             MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
+             MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 12x12 output block.
+ *
+ * Optimized algorithm with 15 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/24).
+ */
+
+GLOBAL(void)
+jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*12];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+             MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 12 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 12; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z3 <<= CONST_BITS;
+
+    z4 = (INT32) wsptr[4];
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = (INT32) wsptr[2];
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 <<= CONST_BITS;
+    z2 = (INT32) wsptr[6];
+    z2 <<= CONST_BITS;
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+             MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 13x13 output block.
+ *
+ * Optimized algorithm with 29 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/26).
+ */
+
+GLOBAL(void)
+jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*13];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
+
+    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
+    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
+
+    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
+    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
+
+    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
+    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
+
+    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
+    tmp15 = z1 + z4;
+    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+            MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
+    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
+    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
+    tmp11 += tmp14;
+    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
+    tmp12 += tmp14;
+    tmp13 += tmp14;
+    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
+    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+            MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
+    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
+    tmp14 += z1;
+    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
+             MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 13 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 13; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 <<= CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[4];
+    z4 = (INT32) wsptr[6];
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
+
+    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
+    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
+
+    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
+    tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
+
+    tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
+    tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
+
+    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
+    tmp15 = z1 + z4;
+    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+            MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
+    tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
+    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+    tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
+    tmp11 += tmp14;
+    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+    tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
+    tmp12 += tmp14;
+    tmp13 += tmp14;
+    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
+    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+            MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
+    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
+    tmp14 += z1;
+    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
+             MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 14x14 output block.
+ *
+ * Optimized algorithm with 20 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/28).
+ */
+
+GLOBAL(void)
+jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*14];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
+                        CONST_BITS-PASS1_BITS);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+            MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp13 = z4 << CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
+    tmp16 += tmp15;
+    z1    += z4;
+    z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
+    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
+    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
+    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
+
+    tmp13 = (z1 - z3) << PASS1_BITS;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) (tmp23 + tmp13);
+    wsptr[8*10] = (int) (tmp23 - tmp13);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 14 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 14; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 <<= CONST_BITS;
+    z4 = (INT32) wsptr[4];
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+            MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+    z4 <<= CONST_BITS;
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
+    tmp16 += tmp15;
+    tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
+    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
+    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
+    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
+
+    tmp13 = ((z1 - z3) << CONST_BITS) + z4;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 15x15 output block.
+ *
+ * Optimized algorithm with 22 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/30).
+ */
+
+GLOBAL(void)
+jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*15];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z1 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS-PASS1_BITS-1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+    tmp12 = z1 - tmp10;
+    tmp13 = z1 + tmp11;
+    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
+
+    z4 = z2 - z3;
+    z3 += z2;
+    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
+
+    tmp20 = tmp13 + tmp10 + tmp11;
+    tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+    tmp25 = tmp13 - tmp10 - tmp11;
+    tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+    tmp21 = tmp12 + tmp10 + tmp11;
+    tmp24 = tmp13 - tmp10 + tmp11;
+    tmp11 += tmp11;
+    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
+    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp13 = z2 - z4;
+    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
+    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
+    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
+
+    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
+    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
+    z2 = z1 - z4;
+    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
+
+    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
+    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
+    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
+    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 15 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 15; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    z1 <<= CONST_BITS;
+
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[4];
+    z4 = (INT32) wsptr[6];
+
+    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+    tmp12 = z1 - tmp10;
+    tmp13 = z1 + tmp11;
+    z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
+
+    z4 = z2 - z3;
+    z3 += z2;
+    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
+
+    tmp20 = tmp13 + tmp10 + tmp11;
+    tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+    tmp25 = tmp13 - tmp10 - tmp11;
+    tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+    tmp21 = tmp12 + tmp10 + tmp11;
+    tmp24 = tmp13 - tmp10 + tmp11;
+    tmp11 += tmp11;
+    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
+    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z4 = (INT32) wsptr[5];
+    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
+    z4 = (INT32) wsptr[7];
+
+    tmp13 = z2 - z4;
+    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
+    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
+    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
+
+    tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
+    tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
+    z2 = z1 - z4;
+    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
+
+    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
+    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
+    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
+    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 16x16 output block.
+ *
+ * Optimized algorithm with 28 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/32).
+ */
+
+GLOBAL(void)
+jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                 JCOEFPTR coef_block,
+                 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8*16];  /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= CONST_BITS;
+    /* Add fudge factor here for final descale. */
+    tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+            MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+            MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
+    wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
+    wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
+    wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
+    wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
+    wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
+    wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
+    wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
+    wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
+  }
+
+  /* Pass 2: process 16 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 16; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
+    tmp0 <<= CONST_BITS;
+
+    z1 = (INT32) wsptr[4];
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = (INT32) wsptr[2];
+    z2 = (INT32) wsptr[6];
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = (INT32) wsptr[1];
+    z2 = (INT32) wsptr[3];
+    z3 = (INT32) wsptr[5];
+    z4 = (INT32) wsptr[7];
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+            MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+            MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+    outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
+                                               CONST_BITS+PASS1_BITS+3)
+                             & RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+#endif /* IDCT_SCALING_SUPPORTED */
 #endif /* DCT_ISLOW_SUPPORTED */
diff --git a/jidctred.c b/jidctred.c
index 421f3c7..2b385f8 100644
--- a/jidctred.c
+++ b/jidctred.c
@@ -23,7 +23,7 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jdct.h"		/* Private declarations for DCT subsystem */
+#include "jdct.h"               /* Private declarations for DCT subsystem */
 
 #ifdef IDCT_SCALING_SUPPORTED
 
@@ -44,7 +44,7 @@
 #define PASS1_BITS  2
 #else
 #define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
 #endif
 
 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
@@ -55,20 +55,20 @@
  */
 
 #if CONST_BITS == 13
-#define FIX_0_211164243  ((INT32)  1730)	/* FIX(0.211164243) */
-#define FIX_0_509795579  ((INT32)  4176)	/* FIX(0.509795579) */
-#define FIX_0_601344887  ((INT32)  4926)	/* FIX(0.601344887) */
-#define FIX_0_720959822  ((INT32)  5906)	/* FIX(0.720959822) */
-#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
-#define FIX_0_850430095  ((INT32)  6967)	/* FIX(0.850430095) */
-#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
-#define FIX_1_061594337  ((INT32)  8697)	/* FIX(1.061594337) */
-#define FIX_1_272758580  ((INT32)  10426)	/* FIX(1.272758580) */
-#define FIX_1_451774981  ((INT32)  11893)	/* FIX(1.451774981) */
-#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
-#define FIX_2_172734803  ((INT32)  17799)	/* FIX(2.172734803) */
-#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
-#define FIX_3_624509785  ((INT32)  29692)	/* FIX(3.624509785) */
+#define FIX_0_211164243  ((INT32)  1730)        /* FIX(0.211164243) */
+#define FIX_0_509795579  ((INT32)  4176)        /* FIX(0.509795579) */
+#define FIX_0_601344887  ((INT32)  4926)        /* FIX(0.601344887) */
+#define FIX_0_720959822  ((INT32)  5906)        /* FIX(0.720959822) */
+#define FIX_0_765366865  ((INT32)  6270)        /* FIX(0.765366865) */
+#define FIX_0_850430095  ((INT32)  6967)        /* FIX(0.850430095) */
+#define FIX_0_899976223  ((INT32)  7373)        /* FIX(0.899976223) */
+#define FIX_1_061594337  ((INT32)  8697)        /* FIX(1.061594337) */
+#define FIX_1_272758580  ((INT32)  10426)       /* FIX(1.272758580) */
+#define FIX_1_451774981  ((INT32)  11893)       /* FIX(1.451774981) */
+#define FIX_1_847759065  ((INT32)  15137)       /* FIX(1.847759065) */
+#define FIX_2_172734803  ((INT32)  17799)       /* FIX(2.172734803) */
+#define FIX_2_562915447  ((INT32)  20995)       /* FIX(2.562915447) */
+#define FIX_3_624509785  ((INT32)  29692)       /* FIX(3.624509785) */
 #else
 #define FIX_0_211164243  FIX(0.211164243)
 #define FIX_0_509795579  FIX(0.509795579)
@@ -116,8 +116,8 @@
 
 GLOBAL(void)
 jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
 {
   INT32 tmp0, tmp2, tmp10, tmp12;
   INT32 z1, z2, z3, z4;
@@ -127,7 +127,7 @@
   JSAMPROW outptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
-  int workspace[DCTSIZE*4];	/* buffers data between passes */
+  int workspace[DCTSIZE*4];     /* buffers data between passes */
   SHIFT_TEMPS
 
   /* Pass 1: process columns from input, store into work array. */
@@ -140,57 +140,57 @@
     if (ctr == DCTSIZE-4)
       continue;
     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
-	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 &&
-	inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) {
+        inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 &&
+        inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) {
       /* AC terms all zero; we need not examine term 4 for 4x4 output */
       int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-      
+
       wsptr[DCTSIZE*0] = dcval;
       wsptr[DCTSIZE*1] = dcval;
       wsptr[DCTSIZE*2] = dcval;
       wsptr[DCTSIZE*3] = dcval;
-      
+
       continue;
     }
-    
+
     /* Even part */
-    
+
     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     tmp0 <<= (CONST_BITS+1);
-    
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
 
     tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865);
-    
+
     tmp10 = tmp0 + tmp2;
     tmp12 = tmp0 - tmp2;
-    
+
     /* Odd part */
-    
+
     z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
     z2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     z4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    
+
     tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
-	 + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
-	 + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
-	 + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
-    
+         + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
+         + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
+         + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
+
     tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */
-	 + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
-	 + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
-	 + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
+         + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
+         + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
+         + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
 
     /* Final output stage */
-    
+
     wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1);
     wsptr[DCTSIZE*3] = (int) DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1);
     wsptr[DCTSIZE*1] = (int) DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1);
     wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1);
   }
-  
+
   /* Pass 2: process 4 rows from work array, store into output array. */
 
   wsptr = workspace;
@@ -200,64 +200,64 @@
 
 #ifndef NO_ZERO_ROW_TEST
     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 &&
-	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+        wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
       JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
-				  & RANGE_MASK];
-      
+                                  & RANGE_MASK];
+
       outptr[0] = dcval;
       outptr[1] = dcval;
       outptr[2] = dcval;
       outptr[3] = dcval;
-      
-      wsptr += DCTSIZE;		/* advance pointer to next row */
+
+      wsptr += DCTSIZE;         /* advance pointer to next row */
       continue;
     }
 #endif
-    
+
     /* Even part */
-    
+
     tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1);
-    
+
     tmp2 = MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
-	 + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865);
-    
+         + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865);
+
     tmp10 = tmp0 + tmp2;
     tmp12 = tmp0 - tmp2;
-    
+
     /* Odd part */
-    
+
     z1 = (INT32) wsptr[7];
     z2 = (INT32) wsptr[5];
     z3 = (INT32) wsptr[3];
     z4 = (INT32) wsptr[1];
-    
+
     tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
-	 + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
-	 + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
-	 + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
-    
+         + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
+         + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
+         + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
+
     tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */
-	 + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
-	 + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
-	 + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
+         + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
+         + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
+         + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
 
     /* Final output stage */
-    
+
     outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2,
-					  CONST_BITS+PASS1_BITS+3+1)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3+1)
+                            & RANGE_MASK];
     outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2,
-					  CONST_BITS+PASS1_BITS+3+1)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3+1)
+                            & RANGE_MASK];
     outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0,
-					  CONST_BITS+PASS1_BITS+3+1)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3+1)
+                            & RANGE_MASK];
     outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
-					  CONST_BITS+PASS1_BITS+3+1)
-			    & RANGE_MASK];
-    
-    wsptr += DCTSIZE;		/* advance pointer to next row */
+                                          CONST_BITS+PASS1_BITS+3+1)
+                            & RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
   }
 }
 
@@ -269,8 +269,8 @@
 
 GLOBAL(void)
 jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
 {
   INT32 tmp0, tmp10, z1;
   JCOEFPTR inptr;
@@ -279,7 +279,7 @@
   JSAMPROW outptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   int ctr;
-  int workspace[DCTSIZE*2];	/* buffers data between passes */
+  int workspace[DCTSIZE*2];     /* buffers data between passes */
   SHIFT_TEMPS
 
   /* Pass 1: process columns from input, store into work array. */
@@ -292,21 +292,21 @@
     if (ctr == DCTSIZE-2 || ctr == DCTSIZE-4 || ctr == DCTSIZE-6)
       continue;
     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*3] == 0 &&
-	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) {
+        inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) {
       /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */
       int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-      
+
       wsptr[DCTSIZE*0] = dcval;
       wsptr[DCTSIZE*1] = dcval;
-      
+
       continue;
     }
-    
+
     /* Even part */
-    
+
     z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     tmp10 = z1 << (CONST_BITS+2);
-    
+
     /* Odd part */
 
     z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
@@ -319,11 +319,11 @@
     tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
 
     /* Final output stage */
-    
+
     wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2);
     wsptr[DCTSIZE*1] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2);
   }
-  
+
   /* Pass 2: process 2 rows from work array, store into output array. */
 
   wsptr = workspace;
@@ -335,37 +335,37 @@
     if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) {
       /* AC terms all zero */
       JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
-				  & RANGE_MASK];
-      
+                                  & RANGE_MASK];
+
       outptr[0] = dcval;
       outptr[1] = dcval;
-      
-      wsptr += DCTSIZE;		/* advance pointer to next row */
+
+      wsptr += DCTSIZE;         /* advance pointer to next row */
       continue;
     }
 #endif
-    
+
     /* Even part */
-    
+
     tmp10 = ((INT32) wsptr[0]) << (CONST_BITS+2);
-    
+
     /* Odd part */
 
     tmp0 = MULTIPLY((INT32) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */
-	 + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */
-	 + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */
-	 + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
+         + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */
+         + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */
+         + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
 
     /* Final output stage */
-    
+
     outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
-					  CONST_BITS+PASS1_BITS+3+2)
-			    & RANGE_MASK];
+                                          CONST_BITS+PASS1_BITS+3+2)
+                            & RANGE_MASK];
     outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0,
-					  CONST_BITS+PASS1_BITS+3+2)
-			    & RANGE_MASK];
-    
-    wsptr += DCTSIZE;		/* advance pointer to next row */
+                                          CONST_BITS+PASS1_BITS+3+2)
+                            & RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
   }
 }
 
@@ -377,8 +377,8 @@
 
 GLOBAL(void)
 jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
-	       JCOEFPTR coef_block,
-	       JSAMPARRAY output_buf, JDIMENSION output_col)
+               JCOEFPTR coef_block,
+               JSAMPARRAY output_buf, JDIMENSION output_col)
 {
   int dcval;
   ISLOW_MULT_TYPE * quantptr;
diff --git a/jinclude.h b/jinclude.h
index 0a4f151..4dced6e 100644
--- a/jinclude.h
+++ b/jinclude.h
@@ -1,8 +1,10 @@
 /*
  * jinclude.h
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1994, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file exists to provide a single place to fix any problems with
@@ -17,8 +19,8 @@
 
 /* Include auto-config file to find out which system include files we need. */
 
-#include "jconfig.h"		/* auto configuration options */
-#define JCONFIG_INCLUDED	/* so that jpeglib.h doesn't do it again */
+#include "jconfig.h"            /* auto configuration options */
+#define JCONFIG_INCLUDED        /* so that jpeglib.h doesn't do it again */
 
 /*
  * We need the NULL macro and size_t typedef.
@@ -58,28 +60,18 @@
 #ifdef NEED_BSD_STRINGS
 
 #include <strings.h>
-#define MEMZERO(target,size)	bzero((void *)(target), (size_t)(size))
-#define MEMCOPY(dest,src,size)	bcopy((const void *)(src), (void *)(dest), (size_t)(size))
+#define MEMZERO(target,size)    bzero((void *)(target), (size_t)(size))
+#define MEMCOPY(dest,src,size)  bcopy((const void *)(src), (void *)(dest), (size_t)(size))
 
 #else /* not BSD, assume ANSI/SysV string lib */
 
 #include <string.h>
-#define MEMZERO(target,size)	memset((void *)(target), 0, (size_t)(size))
-#define MEMCOPY(dest,src,size)	memcpy((void *)(dest), (const void *)(src), (size_t)(size))
+#define MEMZERO(target,size)    memset((void *)(target), 0, (size_t)(size))
+#define MEMCOPY(dest,src,size)  memcpy((void *)(dest), (const void *)(src), (size_t)(size))
 
 #endif
 
 /*
- * In ANSI C, and indeed any rational implementation, size_t is also the
- * type returned by sizeof().  However, it seems there are some irrational
- * implementations out there, in which sizeof() returns an int even though
- * size_t is defined as long or unsigned long.  To ensure consistent results
- * we always use this SIZEOF() macro in place of using sizeof() directly.
- */
-
-#define SIZEOF(object)	((size_t) sizeof(object))
-
-/*
  * The modules that use fread() and fwrite() always invoke them through
  * these macros.  On some systems you may need to twiddle the argument casts.
  * CAUTION: argument order is different from underlying functions!
diff --git a/jmemansi.c b/jmemansi.c
deleted file mode 100644
index 2d93e49..0000000
--- a/jmemansi.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * jmemansi.c
- *
- * Copyright (C) 1992-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file provides a simple generic implementation of the system-
- * dependent portion of the JPEG memory manager.  This implementation
- * assumes that you have the ANSI-standard library routine tmpfile().
- * Also, the problem of determining the amount of memory available
- * is shoved onto the user.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
-
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
-extern void * malloc JPP((size_t size));
-extern void free JPP((void *ptr));
-#endif
-
-#ifndef SEEK_SET		/* pre-ANSI systems may not define this; */
-#define SEEK_SET  0		/* if not, assume 0 is correct */
-#endif
-
-
-/*
- * Memory allocation and freeing are controlled by the regular library
- * routines malloc() and free().
- */
-
-GLOBAL(void *)
-jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * "Large" objects are treated the same as "small" ones.
- * NB: although we include FAR keywords in the routine declarations,
- * this file won't actually work in 80x86 small/medium model; at least,
- * you probably won't be able to process useful-size images in only 64KB.
- */
-
-GLOBAL(void FAR *)
-jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void FAR *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * This routine computes the total memory space available for allocation.
- * It's impossible to do this in a portable way; our current solution is
- * to make the user tell us (with a default value set at compile time).
- * If you can actually get the available space, it's a good idea to subtract
- * a slop factor of 5% or so.
- */
-
-#ifndef DEFAULT_MAX_MEM		/* so can override from makefile */
-#define DEFAULT_MAX_MEM		1000000L /* default: one megabyte */
-#endif
-
-GLOBAL(long)
-jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
-		    long max_bytes_needed, long already_allocated)
-{
-  return cinfo->mem->max_memory_to_use - already_allocated;
-}
-
-
-/*
- * Backing store (temporary file) management.
- * Backing store objects are only used when the value returned by
- * jpeg_mem_available is less than the total space needed.  You can dispense
- * with these routines if you have plenty of virtual memory; see jmemnobs.c.
- */
-
-
-METHODDEF(void)
-read_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-		    void FAR * buffer_address,
-		    long file_offset, long byte_count)
-{
-  if (fseek(info->temp_file, file_offset, SEEK_SET))
-    ERREXIT(cinfo, JERR_TFILE_SEEK);
-  if (JFREAD(info->temp_file, buffer_address, byte_count)
-      != (size_t) byte_count)
-    ERREXIT(cinfo, JERR_TFILE_READ);
-}
-
-
-METHODDEF(void)
-write_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-		     void FAR * buffer_address,
-		     long file_offset, long byte_count)
-{
-  if (fseek(info->temp_file, file_offset, SEEK_SET))
-    ERREXIT(cinfo, JERR_TFILE_SEEK);
-  if (JFWRITE(info->temp_file, buffer_address, byte_count)
-      != (size_t) byte_count)
-    ERREXIT(cinfo, JERR_TFILE_WRITE);
-}
-
-
-METHODDEF(void)
-close_backing_store (j_common_ptr cinfo, backing_store_ptr info)
-{
-  fclose(info->temp_file);
-  /* Since this implementation uses tmpfile() to create the file,
-   * no explicit file deletion is needed.
-   */
-}
-
-
-/*
- * Initial opening of a backing-store object.
- *
- * This version uses tmpfile(), which constructs a suitable file name
- * behind the scenes.  We don't have to use info->temp_name[] at all;
- * indeed, we can't even find out the actual name of the temp file.
- */
-
-GLOBAL(void)
-jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-			 long total_bytes_needed)
-{
-  if ((info->temp_file = tmpfile()) == NULL)
-    ERREXITS(cinfo, JERR_TFILE_CREATE, "");
-  info->read_backing_store = read_backing_store;
-  info->write_backing_store = write_backing_store;
-  info->close_backing_store = close_backing_store;
-}
-
-
-/*
- * These routines take care of any system-dependent initialization and
- * cleanup required.
- */
-
-GLOBAL(long)
-jpeg_mem_init (j_common_ptr cinfo)
-{
-  return DEFAULT_MAX_MEM;	/* default for max_memory_to_use */
-}
-
-GLOBAL(void)
-jpeg_mem_term (j_common_ptr cinfo)
-{
-  /* no work */
-}
diff --git a/jmemdos.c b/jmemdos.c
deleted file mode 100644
index 60b45c6..0000000
--- a/jmemdos.c
+++ /dev/null
@@ -1,638 +0,0 @@
-/*
- * jmemdos.c
- *
- * Copyright (C) 1992-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file provides an MS-DOS-compatible implementation of the system-
- * dependent portion of the JPEG memory manager.  Temporary data can be
- * stored in extended or expanded memory as well as in regular DOS files.
- *
- * If you use this file, you must be sure that NEED_FAR_POINTERS is defined
- * if you compile in a small-data memory model; it should NOT be defined if
- * you use a large-data memory model.  This file is not recommended if you
- * are using a flat-memory-space 386 environment such as DJGCC or Watcom C.
- * Also, this code will NOT work if struct fields are aligned on greater than
- * 2-byte boundaries.
- *
- * Based on code contributed by Ge' Weijers.
- */
-
-/*
- * If you have both extended and expanded memory, you may want to change the
- * order in which they are tried in jopen_backing_store.  On a 286 machine
- * expanded memory is usually faster, since extended memory access involves
- * an expensive protected-mode-and-back switch.  On 386 and better, extended
- * memory is usually faster.  As distributed, the code tries extended memory
- * first (what? not everyone has a 386? :-).
- *
- * You can disable use of extended/expanded memory entirely by altering these
- * definitions or overriding them from the Makefile (eg, -DEMS_SUPPORTED=0).
- */
-
-#ifndef XMS_SUPPORTED
-#define XMS_SUPPORTED  1
-#endif
-#ifndef EMS_SUPPORTED
-#define EMS_SUPPORTED  1
-#endif
-
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
-
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare these */
-extern void * malloc JPP((size_t size));
-extern void free JPP((void *ptr));
-extern char * getenv JPP((const char * name));
-#endif
-
-#ifdef NEED_FAR_POINTERS
-
-#ifdef __TURBOC__
-/* These definitions work for Borland C (Turbo C) */
-#include <alloc.h>		/* need farmalloc(), farfree() */
-#define far_malloc(x)	farmalloc(x)
-#define far_free(x)	farfree(x)
-#else
-/* These definitions work for Microsoft C and compatible compilers */
-#include <malloc.h>		/* need _fmalloc(), _ffree() */
-#define far_malloc(x)	_fmalloc(x)
-#define far_free(x)	_ffree(x)
-#endif
-
-#else /* not NEED_FAR_POINTERS */
-
-#define far_malloc(x)	malloc(x)
-#define far_free(x)	free(x)
-
-#endif /* NEED_FAR_POINTERS */
-
-#ifdef DONT_USE_B_MODE		/* define mode parameters for fopen() */
-#define READ_BINARY	"r"
-#else
-#define READ_BINARY	"rb"
-#endif
-
-#ifndef USE_MSDOS_MEMMGR	/* make sure user got configuration right */
-  You forgot to define USE_MSDOS_MEMMGR in jconfig.h. /* deliberate syntax error */
-#endif
-
-#if MAX_ALLOC_CHUNK >= 65535L	/* make sure jconfig.h got this right */
-  MAX_ALLOC_CHUNK should be less than 64K. /* deliberate syntax error */
-#endif
-
-
-/*
- * Declarations for assembly-language support routines (see jmemdosa.asm).
- *
- * The functions are declared "far" as are all their pointer arguments;
- * this ensures the assembly source code will work regardless of the
- * compiler memory model.  We assume "short" is 16 bits, "long" is 32.
- */
-
-typedef void far * XMSDRIVER;	/* actually a pointer to code */
-typedef struct {		/* registers for calling XMS driver */
-	unsigned short ax, dx, bx;
-	void far * ds_si;
-      } XMScontext;
-typedef struct {		/* registers for calling EMS driver */
-	unsigned short ax, dx, bx;
-	void far * ds_si;
-      } EMScontext;
-
-extern short far jdos_open JPP((short far * handle, char far * filename));
-extern short far jdos_close JPP((short handle));
-extern short far jdos_seek JPP((short handle, long offset));
-extern short far jdos_read JPP((short handle, void far * buffer,
-				unsigned short count));
-extern short far jdos_write JPP((short handle, void far * buffer,
-				 unsigned short count));
-extern void far jxms_getdriver JPP((XMSDRIVER far *));
-extern void far jxms_calldriver JPP((XMSDRIVER, XMScontext far *));
-extern short far jems_available JPP((void));
-extern void far jems_calldriver JPP((EMScontext far *));
-
-
-/*
- * Selection of a file name for a temporary file.
- * This is highly system-dependent, and you may want to customize it.
- */
-
-static int next_file_num;	/* to distinguish among several temp files */
-
-LOCAL(void)
-select_file_name (char * fname)
-{
-  const char * env;
-  char * ptr;
-  FILE * tfile;
-
-  /* Keep generating file names till we find one that's not in use */
-  for (;;) {
-    /* Get temp directory name from environment TMP or TEMP variable;
-     * if none, use "."
-     */
-    if ((env = (const char *) getenv("TMP")) == NULL)
-      if ((env = (const char *) getenv("TEMP")) == NULL)
-	env = ".";
-    if (*env == '\0')		/* null string means "." */
-      env = ".";
-    ptr = fname;		/* copy name to fname */
-    while (*env != '\0')
-      *ptr++ = *env++;
-    if (ptr[-1] != '\\' && ptr[-1] != '/')
-      *ptr++ = '\\';		/* append backslash if not in env variable */
-    /* Append a suitable file name */
-    next_file_num++;		/* advance counter */
-    sprintf(ptr, "JPG%03d.TMP", next_file_num);
-    /* Probe to see if file name is already in use */
-    if ((tfile = fopen(fname, READ_BINARY)) == NULL)
-      break;
-    fclose(tfile);		/* oops, it's there; close tfile & try again */
-  }
-}
-
-
-/*
- * Near-memory allocation and freeing are controlled by the regular library
- * routines malloc() and free().
- */
-
-GLOBAL(void *)
-jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * "Large" objects are allocated in far memory, if possible
- */
-
-GLOBAL(void FAR *)
-jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void FAR *) far_malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
-{
-  far_free(object);
-}
-
-
-/*
- * This routine computes the total memory space available for allocation.
- * It's impossible to do this in a portable way; our current solution is
- * to make the user tell us (with a default value set at compile time).
- * If you can actually get the available space, it's a good idea to subtract
- * a slop factor of 5% or so.
- */
-
-#ifndef DEFAULT_MAX_MEM		/* so can override from makefile */
-#define DEFAULT_MAX_MEM		300000L /* for total usage about 450K */
-#endif
-
-GLOBAL(long)
-jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
-		    long max_bytes_needed, long already_allocated)
-{
-  return cinfo->mem->max_memory_to_use - already_allocated;
-}
-
-
-/*
- * Backing store (temporary file) management.
- * Backing store objects are only used when the value returned by
- * jpeg_mem_available is less than the total space needed.  You can dispense
- * with these routines if you have plenty of virtual memory; see jmemnobs.c.
- */
-
-/*
- * For MS-DOS we support three types of backing storage:
- *   1. Conventional DOS files.  We access these by direct DOS calls rather
- *      than via the stdio package.  This provides a bit better performance,
- *      but the real reason is that the buffers to be read or written are FAR.
- *      The stdio library for small-data memory models can't cope with that.
- *   2. Extended memory, accessed per the XMS V2.0 specification.
- *   3. Expanded memory, accessed per the LIM/EMS 4.0 specification.
- * You'll need copies of those specs to make sense of the related code.
- * The specs are available by Internet FTP from the SIMTEL archives 
- * (oak.oakland.edu and its various mirror sites).  See files
- * pub/msdos/microsoft/xms20.arc and pub/msdos/info/limems41.zip.
- */
-
-
-/*
- * Access methods for a DOS file.
- */
-
-
-METHODDEF(void)
-read_file_store (j_common_ptr cinfo, backing_store_ptr info,
-		 void FAR * buffer_address,
-		 long file_offset, long byte_count)
-{
-  if (jdos_seek(info->handle.file_handle, file_offset))
-    ERREXIT(cinfo, JERR_TFILE_SEEK);
-  /* Since MAX_ALLOC_CHUNK is less than 64K, byte_count will be too. */
-  if (byte_count > 65535L)	/* safety check */
-    ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
-  if (jdos_read(info->handle.file_handle, buffer_address,
-		(unsigned short) byte_count))
-    ERREXIT(cinfo, JERR_TFILE_READ);
-}
-
-
-METHODDEF(void)
-write_file_store (j_common_ptr cinfo, backing_store_ptr info,
-		  void FAR * buffer_address,
-		  long file_offset, long byte_count)
-{
-  if (jdos_seek(info->handle.file_handle, file_offset))
-    ERREXIT(cinfo, JERR_TFILE_SEEK);
-  /* Since MAX_ALLOC_CHUNK is less than 64K, byte_count will be too. */
-  if (byte_count > 65535L)	/* safety check */
-    ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
-  if (jdos_write(info->handle.file_handle, buffer_address,
-		 (unsigned short) byte_count))
-    ERREXIT(cinfo, JERR_TFILE_WRITE);
-}
-
-
-METHODDEF(void)
-close_file_store (j_common_ptr cinfo, backing_store_ptr info)
-{
-  jdos_close(info->handle.file_handle);	/* close the file */
-  remove(info->temp_name);	/* delete the file */
-/* If your system doesn't have remove(), try unlink() instead.
- * remove() is the ANSI-standard name for this function, but
- * unlink() was more common in pre-ANSI systems.
- */
-  TRACEMSS(cinfo, 1, JTRC_TFILE_CLOSE, info->temp_name);
-}
-
-
-LOCAL(boolean)
-open_file_store (j_common_ptr cinfo, backing_store_ptr info,
-		 long total_bytes_needed)
-{
-  short handle;
-
-  select_file_name(info->temp_name);
-  if (jdos_open((short far *) & handle, (char far *) info->temp_name)) {
-    /* might as well exit since jpeg_open_backing_store will fail anyway */
-    ERREXITS(cinfo, JERR_TFILE_CREATE, info->temp_name);
-    return FALSE;
-  }
-  info->handle.file_handle = handle;
-  info->read_backing_store = read_file_store;
-  info->write_backing_store = write_file_store;
-  info->close_backing_store = close_file_store;
-  TRACEMSS(cinfo, 1, JTRC_TFILE_OPEN, info->temp_name);
-  return TRUE;			/* succeeded */
-}
-
-
-/*
- * Access methods for extended memory.
- */
-
-#if XMS_SUPPORTED
-
-static XMSDRIVER xms_driver;	/* saved address of XMS driver */
-
-typedef union {			/* either long offset or real-mode pointer */
-	long offset;
-	void far * ptr;
-      } XMSPTR;
-
-typedef struct {		/* XMS move specification structure */
-	long length;
-	XMSH src_handle;
-	XMSPTR src;
-	XMSH dst_handle;
-	XMSPTR dst;
-      } XMSspec;
-
-#define ODD(X)	(((X) & 1L) != 0)
-
-
-METHODDEF(void)
-read_xms_store (j_common_ptr cinfo, backing_store_ptr info,
-		void FAR * buffer_address,
-		long file_offset, long byte_count)
-{
-  XMScontext ctx;
-  XMSspec spec;
-  char endbuffer[2];
-
-  /* The XMS driver can't cope with an odd length, so handle the last byte
-   * specially if byte_count is odd.  We don't expect this to be common.
-   */
-
-  spec.length = byte_count & (~ 1L);
-  spec.src_handle = info->handle.xms_handle;
-  spec.src.offset = file_offset;
-  spec.dst_handle = 0;
-  spec.dst.ptr = buffer_address;
-  
-  ctx.ds_si = (void far *) & spec;
-  ctx.ax = 0x0b00;		/* EMB move */
-  jxms_calldriver(xms_driver, (XMScontext far *) & ctx);
-  if (ctx.ax != 1)
-    ERREXIT(cinfo, JERR_XMS_READ);
-
-  if (ODD(byte_count)) {
-    read_xms_store(cinfo, info, (void FAR *) endbuffer,
-		   file_offset + byte_count - 1L, 2L);
-    ((char FAR *) buffer_address)[byte_count - 1L] = endbuffer[0];
-  }
-}
-
-
-METHODDEF(void)
-write_xms_store (j_common_ptr cinfo, backing_store_ptr info,
-		 void FAR * buffer_address,
-		 long file_offset, long byte_count)
-{
-  XMScontext ctx;
-  XMSspec spec;
-  char endbuffer[2];
-
-  /* The XMS driver can't cope with an odd length, so handle the last byte
-   * specially if byte_count is odd.  We don't expect this to be common.
-   */
-
-  spec.length = byte_count & (~ 1L);
-  spec.src_handle = 0;
-  spec.src.ptr = buffer_address;
-  spec.dst_handle = info->handle.xms_handle;
-  spec.dst.offset = file_offset;
-
-  ctx.ds_si = (void far *) & spec;
-  ctx.ax = 0x0b00;		/* EMB move */
-  jxms_calldriver(xms_driver, (XMScontext far *) & ctx);
-  if (ctx.ax != 1)
-    ERREXIT(cinfo, JERR_XMS_WRITE);
-
-  if (ODD(byte_count)) {
-    read_xms_store(cinfo, info, (void FAR *) endbuffer,
-		   file_offset + byte_count - 1L, 2L);
-    endbuffer[0] = ((char FAR *) buffer_address)[byte_count - 1L];
-    write_xms_store(cinfo, info, (void FAR *) endbuffer,
-		    file_offset + byte_count - 1L, 2L);
-  }
-}
-
-
-METHODDEF(void)
-close_xms_store (j_common_ptr cinfo, backing_store_ptr info)
-{
-  XMScontext ctx;
-
-  ctx.dx = info->handle.xms_handle;
-  ctx.ax = 0x0a00;
-  jxms_calldriver(xms_driver, (XMScontext far *) & ctx);
-  TRACEMS1(cinfo, 1, JTRC_XMS_CLOSE, info->handle.xms_handle);
-  /* we ignore any error return from the driver */
-}
-
-
-LOCAL(boolean)
-open_xms_store (j_common_ptr cinfo, backing_store_ptr info,
-		long total_bytes_needed)
-{
-  XMScontext ctx;
-
-  /* Get address of XMS driver */
-  jxms_getdriver((XMSDRIVER far *) & xms_driver);
-  if (xms_driver == NULL)
-    return FALSE;		/* no driver to be had */
-
-  /* Get version number, must be >= 2.00 */
-  ctx.ax = 0x0000;
-  jxms_calldriver(xms_driver, (XMScontext far *) & ctx);
-  if (ctx.ax < (unsigned short) 0x0200)
-    return FALSE;
-
-  /* Try to get space (expressed in kilobytes) */
-  ctx.dx = (unsigned short) ((total_bytes_needed + 1023L) >> 10);
-  ctx.ax = 0x0900;
-  jxms_calldriver(xms_driver, (XMScontext far *) & ctx);
-  if (ctx.ax != 1)
-    return FALSE;
-
-  /* Succeeded, save the handle and away we go */
-  info->handle.xms_handle = ctx.dx;
-  info->read_backing_store = read_xms_store;
-  info->write_backing_store = write_xms_store;
-  info->close_backing_store = close_xms_store;
-  TRACEMS1(cinfo, 1, JTRC_XMS_OPEN, ctx.dx);
-  return TRUE;			/* succeeded */
-}
-
-#endif /* XMS_SUPPORTED */
-
-
-/*
- * Access methods for expanded memory.
- */
-
-#if EMS_SUPPORTED
-
-/* The EMS move specification structure requires word and long fields aligned
- * at odd byte boundaries.  Some compilers will align struct fields at even
- * byte boundaries.  While it's usually possible to force byte alignment,
- * that causes an overall performance penalty and may pose problems in merging
- * JPEG into a larger application.  Instead we accept some rather dirty code
- * here.  Note this code would fail if the hardware did not allow odd-byte
- * word & long accesses, but all 80x86 CPUs do.
- */
-
-typedef void far * EMSPTR;
-
-typedef union {			/* EMS move specification structure */
-	long length;		/* It's easy to access first 4 bytes */
-	char bytes[18];		/* Misaligned fields in here! */
-      } EMSspec;
-
-/* Macros for accessing misaligned fields */
-#define FIELD_AT(spec,offset,type)  (*((type *) &(spec.bytes[offset])))
-#define SRC_TYPE(spec)		FIELD_AT(spec,4,char)
-#define SRC_HANDLE(spec)	FIELD_AT(spec,5,EMSH)
-#define SRC_OFFSET(spec)	FIELD_AT(spec,7,unsigned short)
-#define SRC_PAGE(spec)		FIELD_AT(spec,9,unsigned short)
-#define SRC_PTR(spec)		FIELD_AT(spec,7,EMSPTR)
-#define DST_TYPE(spec)		FIELD_AT(spec,11,char)
-#define DST_HANDLE(spec)	FIELD_AT(spec,12,EMSH)
-#define DST_OFFSET(spec)	FIELD_AT(spec,14,unsigned short)
-#define DST_PAGE(spec)		FIELD_AT(spec,16,unsigned short)
-#define DST_PTR(spec)		FIELD_AT(spec,14,EMSPTR)
-
-#define EMSPAGESIZE	16384L	/* gospel, see the EMS specs */
-
-#define HIBYTE(W)  (((W) >> 8) & 0xFF)
-#define LOBYTE(W)  ((W) & 0xFF)
-
-
-METHODDEF(void)
-read_ems_store (j_common_ptr cinfo, backing_store_ptr info,
-		void FAR * buffer_address,
-		long file_offset, long byte_count)
-{
-  EMScontext ctx;
-  EMSspec spec;
-
-  spec.length = byte_count;
-  SRC_TYPE(spec) = 1;
-  SRC_HANDLE(spec) = info->handle.ems_handle;
-  SRC_PAGE(spec)   = (unsigned short) (file_offset / EMSPAGESIZE);
-  SRC_OFFSET(spec) = (unsigned short) (file_offset % EMSPAGESIZE);
-  DST_TYPE(spec) = 0;
-  DST_HANDLE(spec) = 0;
-  DST_PTR(spec)    = buffer_address;
-  
-  ctx.ds_si = (void far *) & spec;
-  ctx.ax = 0x5700;		/* move memory region */
-  jems_calldriver((EMScontext far *) & ctx);
-  if (HIBYTE(ctx.ax) != 0)
-    ERREXIT(cinfo, JERR_EMS_READ);
-}
-
-
-METHODDEF(void)
-write_ems_store (j_common_ptr cinfo, backing_store_ptr info,
-		 void FAR * buffer_address,
-		 long file_offset, long byte_count)
-{
-  EMScontext ctx;
-  EMSspec spec;
-
-  spec.length = byte_count;
-  SRC_TYPE(spec) = 0;
-  SRC_HANDLE(spec) = 0;
-  SRC_PTR(spec)    = buffer_address;
-  DST_TYPE(spec) = 1;
-  DST_HANDLE(spec) = info->handle.ems_handle;
-  DST_PAGE(spec)   = (unsigned short) (file_offset / EMSPAGESIZE);
-  DST_OFFSET(spec) = (unsigned short) (file_offset % EMSPAGESIZE);
-  
-  ctx.ds_si = (void far *) & spec;
-  ctx.ax = 0x5700;		/* move memory region */
-  jems_calldriver((EMScontext far *) & ctx);
-  if (HIBYTE(ctx.ax) != 0)
-    ERREXIT(cinfo, JERR_EMS_WRITE);
-}
-
-
-METHODDEF(void)
-close_ems_store (j_common_ptr cinfo, backing_store_ptr info)
-{
-  EMScontext ctx;
-
-  ctx.ax = 0x4500;
-  ctx.dx = info->handle.ems_handle;
-  jems_calldriver((EMScontext far *) & ctx);
-  TRACEMS1(cinfo, 1, JTRC_EMS_CLOSE, info->handle.ems_handle);
-  /* we ignore any error return from the driver */
-}
-
-
-LOCAL(boolean)
-open_ems_store (j_common_ptr cinfo, backing_store_ptr info,
-		long total_bytes_needed)
-{
-  EMScontext ctx;
-
-  /* Is EMS driver there? */
-  if (! jems_available())
-    return FALSE;
-
-  /* Get status, make sure EMS is OK */
-  ctx.ax = 0x4000;
-  jems_calldriver((EMScontext far *) & ctx);
-  if (HIBYTE(ctx.ax) != 0)
-    return FALSE;
-
-  /* Get version, must be >= 4.0 */
-  ctx.ax = 0x4600;
-  jems_calldriver((EMScontext far *) & ctx);
-  if (HIBYTE(ctx.ax) != 0 || LOBYTE(ctx.ax) < 0x40)
-    return FALSE;
-
-  /* Try to allocate requested space */
-  ctx.ax = 0x4300;
-  ctx.bx = (unsigned short) ((total_bytes_needed + EMSPAGESIZE-1L) / EMSPAGESIZE);
-  jems_calldriver((EMScontext far *) & ctx);
-  if (HIBYTE(ctx.ax) != 0)
-    return FALSE;
-
-  /* Succeeded, save the handle and away we go */
-  info->handle.ems_handle = ctx.dx;
-  info->read_backing_store = read_ems_store;
-  info->write_backing_store = write_ems_store;
-  info->close_backing_store = close_ems_store;
-  TRACEMS1(cinfo, 1, JTRC_EMS_OPEN, ctx.dx);
-  return TRUE;			/* succeeded */
-}
-
-#endif /* EMS_SUPPORTED */
-
-
-/*
- * Initial opening of a backing-store object.
- */
-
-GLOBAL(void)
-jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-			 long total_bytes_needed)
-{
-  /* Try extended memory, then expanded memory, then regular file. */
-#if XMS_SUPPORTED
-  if (open_xms_store(cinfo, info, total_bytes_needed))
-    return;
-#endif
-#if EMS_SUPPORTED
-  if (open_ems_store(cinfo, info, total_bytes_needed))
-    return;
-#endif
-  if (open_file_store(cinfo, info, total_bytes_needed))
-    return;
-  ERREXITS(cinfo, JERR_TFILE_CREATE, "");
-}
-
-
-/*
- * These routines take care of any system-dependent initialization and
- * cleanup required.
- */
-
-GLOBAL(long)
-jpeg_mem_init (j_common_ptr cinfo)
-{
-  next_file_num = 0;		/* initialize temp file name generator */
-  return DEFAULT_MAX_MEM;	/* default for max_memory_to_use */
-}
-
-GLOBAL(void)
-jpeg_mem_term (j_common_ptr cinfo)
-{
-  /* Microsoft C, at least in v6.00A, will not successfully reclaim freed
-   * blocks of size > 32Kbytes unless we give it a kick in the rear, like so:
-   */
-#ifdef NEED_FHEAPMIN
-  _fheapmin();
-#endif
-}
diff --git a/jmemdosa.asm b/jmemdosa.asm
deleted file mode 100644
index ecd4372..0000000
--- a/jmemdosa.asm
+++ /dev/null
@@ -1,379 +0,0 @@
-;
-; jmemdosa.asm
-;
-; Copyright (C) 1992, Thomas G. Lane.
-; This file is part of the Independent JPEG Group's software.
-; For conditions of distribution and use, see the accompanying README file.
-;
-; This file contains low-level interface routines to support the MS-DOS
-; backing store manager (jmemdos.c).  Routines are provided to access disk
-; files through direct DOS calls, and to access XMS and EMS drivers.
-;
-; This file should assemble with Microsoft's MASM or any compatible
-; assembler (including Borland's Turbo Assembler).  If you haven't got
-; a compatible assembler, better fall back to jmemansi.c or jmemname.c.
-;
-; To minimize dependence on the C compiler's register usage conventions,
-; we save and restore all 8086 registers, even though most compilers only
-; require SI,DI,DS to be preserved.  Also, we use only 16-bit-wide return
-; values, which everybody returns in AX.
-;
-; Based on code contributed by Ge' Weijers.
-;
-
-JMEMDOSA_TXT	segment byte public 'CODE'
-
-		assume	cs:JMEMDOSA_TXT
-
-		public	_jdos_open
-		public	_jdos_close
-		public	_jdos_seek
-		public	_jdos_read
-		public	_jdos_write
-		public	_jxms_getdriver
-		public	_jxms_calldriver
-		public	_jems_available
-		public	_jems_calldriver
-
-;
-; short far jdos_open (short far * handle, char far * filename)
-;
-; Create and open a temporary file
-;
-_jdos_open	proc	far
-		push	bp			; linkage
-		mov 	bp,sp
-		push	si			; save all registers for safety
-		push	di
-		push	bx
-		push	cx
-		push	dx
-		push	es
-		push	ds
-		mov	cx,0			; normal file attributes
-		lds	dx,dword ptr [bp+10]	; get filename pointer
-		mov	ah,3ch			; create file
-		int	21h
-		jc	open_err		; if failed, return error code
-		lds	bx,dword ptr [bp+6]	; get handle pointer
-		mov	word ptr [bx],ax	; save the handle
-		xor	ax,ax			; return zero for OK
-open_err:	pop	ds			; restore registers and exit
-		pop	es
-		pop	dx
-		pop	cx
-		pop	bx
-		pop	di
-		pop	si
-		pop 	bp
-		ret
-_jdos_open	endp
-
-
-;
-; short far jdos_close (short handle)
-;
-; Close the file handle
-;
-_jdos_close	proc	far
-		push	bp			; linkage
-		mov 	bp,sp
-		push	si			; save all registers for safety
-		push	di
-		push	bx
-		push	cx
-		push	dx
-		push	es
-		push	ds
-		mov	bx,word ptr [bp+6]	; file handle
-		mov	ah,3eh			; close file
-		int	21h
-		jc	close_err		; if failed, return error code
-		xor	ax,ax			; return zero for OK
-close_err:	pop	ds			; restore registers and exit
-		pop	es
-		pop	dx
-		pop	cx
-		pop	bx
-		pop	di
-		pop	si
-		pop 	bp
-		ret
-_jdos_close	endp
-
-
-;
-; short far jdos_seek (short handle, long offset)
-;
-; Set file position
-;
-_jdos_seek	proc	far
-		push	bp			; linkage
-		mov 	bp,sp
-		push	si			; save all registers for safety
-		push	di
-		push	bx
-		push	cx
-		push	dx
-		push	es
-		push	ds
-		mov	bx,word ptr [bp+6]	; file handle
-		mov	dx,word ptr [bp+8]	; LS offset
-		mov	cx,word ptr [bp+10]	; MS offset
-		mov	ax,4200h		; absolute seek
-		int	21h
-		jc	seek_err		; if failed, return error code
-		xor	ax,ax			; return zero for OK
-seek_err:	pop	ds			; restore registers and exit
-		pop	es
-		pop	dx
-		pop	cx
-		pop	bx
-		pop	di
-		pop	si
-		pop 	bp
-		ret
-_jdos_seek	endp
-
-
-;
-; short far jdos_read (short handle, void far * buffer, unsigned short count)
-;
-; Read from file
-;
-_jdos_read	proc	far
-		push	bp			; linkage
-		mov 	bp,sp
-		push	si			; save all registers for safety
-		push	di
-		push	bx
-		push	cx
-		push	dx
-		push	es
-		push	ds
-		mov	bx,word ptr [bp+6]	; file handle
-		lds	dx,dword ptr [bp+8]	; buffer address
-		mov	cx,word ptr [bp+12]	; number of bytes
-		mov	ah,3fh			; read file
-		int	21h
-		jc	read_err		; if failed, return error code
-		cmp	ax,word ptr [bp+12]	; make sure all bytes were read
-		je	read_ok
-		mov	ax,1			; else return 1 for not OK
-		jmp	short read_err
-read_ok:	xor	ax,ax			; return zero for OK
-read_err:	pop	ds			; restore registers and exit
-		pop	es
-		pop	dx
-		pop	cx
-		pop	bx
-		pop	di
-		pop	si
-		pop 	bp
-		ret
-_jdos_read	endp
-
-
-;
-; short far jdos_write (short handle, void far * buffer, unsigned short count)
-;
-; Write to file
-;
-_jdos_write	proc	far
-		push	bp			; linkage
-		mov 	bp,sp
-		push	si			; save all registers for safety
-		push	di
-		push	bx
-		push	cx
-		push	dx
-		push	es
-		push	ds
-		mov	bx,word ptr [bp+6]	; file handle
-		lds	dx,dword ptr [bp+8]	; buffer address
-		mov	cx,word ptr [bp+12]	; number of bytes
-		mov	ah,40h			; write file
-		int	21h
-		jc	write_err		; if failed, return error code
-		cmp	ax,word ptr [bp+12]	; make sure all bytes written
-		je	write_ok
-		mov	ax,1			; else return 1 for not OK
-		jmp	short write_err
-write_ok:	xor	ax,ax			; return zero for OK
-write_err:	pop	ds			; restore registers and exit
-		pop	es
-		pop	dx
-		pop	cx
-		pop	bx
-		pop	di
-		pop	si
-		pop 	bp
-		ret
-_jdos_write	endp
-
-
-;
-; void far jxms_getdriver (XMSDRIVER far *)
-;
-; Get the address of the XMS driver, or NULL if not available
-;
-_jxms_getdriver	proc	far
-		push	bp			; linkage
-		mov 	bp,sp
-		push	si			; save all registers for safety
-		push	di
-		push	bx
-		push	cx
-		push	dx
-		push	es
-		push	ds
-		mov 	ax,4300h		; call multiplex interrupt with
-		int	2fh			; a magic cookie, hex 4300
-		cmp 	al,80h			; AL should contain hex 80
-		je	xmsavail
-		xor 	dx,dx			; no XMS driver available
-		xor 	ax,ax			; return a nil pointer
-		jmp	short xmsavail_done
-xmsavail:	mov 	ax,4310h		; fetch driver address with
-		int	2fh			; another magic cookie
-		mov 	dx,es			; copy address to dx:ax
-		mov 	ax,bx
-xmsavail_done:	les 	bx,dword ptr [bp+6]	; get pointer to return value
-		mov	word ptr es:[bx],ax
-		mov	word ptr es:[bx+2],dx
-		pop	ds			; restore registers and exit
-		pop	es
-		pop	dx
-		pop	cx
-		pop	bx
-		pop	di
-		pop	si
-		pop	bp
-		ret
-_jxms_getdriver	endp
-
-
-;
-; void far jxms_calldriver (XMSDRIVER, XMScontext far *)
-;
-; The XMScontext structure contains values for the AX,DX,BX,SI,DS registers.
-; These are loaded, the XMS call is performed, and the new values of the
-; AX,DX,BX registers are written back to the context structure.
-;
-_jxms_calldriver 	proc	far
-		push	bp			; linkage
-		mov 	bp,sp
-		push	si			; save all registers for safety
-		push	di
-		push	bx
-		push	cx
-		push	dx
-		push	es
-		push	ds
-		les 	bx,dword ptr [bp+10]	; get XMScontext pointer
-		mov 	ax,word ptr es:[bx]	; load registers
-		mov 	dx,word ptr es:[bx+2]
-		mov 	si,word ptr es:[bx+6]
-		mov 	ds,word ptr es:[bx+8]
-		mov 	bx,word ptr es:[bx+4]
-		call	dword ptr [bp+6]	; call the driver
-		mov	cx,bx			; save returned BX for a sec
-		les 	bx,dword ptr [bp+10]	; get XMScontext pointer
-		mov 	word ptr es:[bx],ax	; put back ax,dx,bx
-		mov 	word ptr es:[bx+2],dx
-		mov 	word ptr es:[bx+4],cx
-		pop	ds			; restore registers and exit
-		pop	es
-		pop	dx
-		pop	cx
-		pop	bx
-		pop	di
-		pop	si
-		pop 	bp
-		ret
-_jxms_calldriver 	endp
-
-
-;
-; short far jems_available (void)
-;
-; Have we got an EMS driver? (this comes straight from the EMS 4.0 specs)
-;
-_jems_available	proc	far
-		push	si			; save all registers for safety
-		push	di
-		push	bx
-		push	cx
-		push	dx
-		push	es
-		push	ds
-		mov	ax,3567h		; get interrupt vector 67h
-		int	21h
-		push	cs
-		pop	ds
-		mov	di,000ah		; check offs 10 in returned seg
-		lea	si,ASCII_device_name	; against literal string
-		mov	cx,8
-		cld
-		repe cmpsb
-		jne	no_ems
-		mov	ax,1			; match, it's there
-		jmp	short avail_done
-no_ems:		xor	ax,ax			; it's not there
-avail_done:	pop	ds			; restore registers and exit
-		pop	es
-		pop	dx
-		pop	cx
-		pop	bx
-		pop	di
-		pop	si
-		ret
-
-ASCII_device_name	db	"EMMXXXX0"
-
-_jems_available	endp
-
-
-;
-; void far jems_calldriver (EMScontext far *)
-;
-; The EMScontext structure contains values for the AX,DX,BX,SI,DS registers.
-; These are loaded, the EMS trap is performed, and the new values of the
-; AX,DX,BX registers are written back to the context structure.
-;
-_jems_calldriver	proc far
-		push	bp			; linkage
-		mov 	bp,sp
-		push	si			; save all registers for safety
-		push	di
-		push	bx
-		push	cx
-		push	dx
-		push	es
-		push	ds
-		les 	bx,dword ptr [bp+6]	; get EMScontext pointer
-		mov 	ax,word ptr es:[bx]	; load registers
-		mov 	dx,word ptr es:[bx+2]
-		mov 	si,word ptr es:[bx+6]
-		mov 	ds,word ptr es:[bx+8]
-		mov 	bx,word ptr es:[bx+4]
-		int	67h			; call the EMS driver
-		mov	cx,bx			; save returned BX for a sec
-		les 	bx,dword ptr [bp+6]	; get EMScontext pointer
-		mov 	word ptr es:[bx],ax	; put back ax,dx,bx
-		mov 	word ptr es:[bx+2],dx
-		mov 	word ptr es:[bx+4],cx
-		pop	ds			; restore registers and exit
-		pop	es
-		pop	dx
-		pop	cx
-		pop	bx
-		pop	di
-		pop	si
-		pop 	bp
-		ret
-_jems_calldriver	endp
-
-JMEMDOSA_TXT	ends
-
-		end
diff --git a/jmemmac.c b/jmemmac.c
deleted file mode 100644
index 106f9be..0000000
--- a/jmemmac.c
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * jmemmac.c
- *
- * Copyright (C) 1992-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * jmemmac.c provides an Apple Macintosh implementation of the system-
- * dependent portion of the JPEG memory manager.
- *
- * If you use jmemmac.c, then you must define USE_MAC_MEMMGR in the
- * JPEG_INTERNALS part of jconfig.h.
- *
- * jmemmac.c uses the Macintosh toolbox routines NewPtr and DisposePtr
- * instead of malloc and free.  It accurately determines the amount of
- * memory available by using CompactMem.  Notice that if left to its
- * own devices, this code can chew up all available space in the
- * application's zone, with the exception of the rather small "slop"
- * factor computed in jpeg_mem_available().  The application can ensure
- * that more space is left over by reducing max_memory_to_use.
- *
- * Large images are swapped to disk using temporary files and System 7.0+'s
- * temporary folder functionality.
- *
- * Note that jmemmac.c depends on two features of MacOS that were first
- * introduced in System 7: FindFolder and the FSSpec-based calls.
- * If your application uses jmemmac.c and is run under System 6 or earlier,
- * and the jpeg library decides it needs a temporary file, it will abort,
- * printing error messages about requiring System 7.  (If no temporary files
- * are created, it will run fine.)
- *
- * If you want to use jmemmac.c in an application that might be used with
- * System 6 or earlier, then you should remove dependencies on FindFolder
- * and the FSSpec calls.  You will need to replace FindFolder with some
- * other mechanism for finding a place to put temporary files, and you
- * should replace the FSSpec calls with their HFS equivalents:
- *
- *     FSpDelete     ->  HDelete
- *     FSpGetFInfo   ->  HGetFInfo
- *     FSpCreate     ->  HCreate
- *     FSpOpenDF     ->  HOpen      *** Note: not HOpenDF ***
- *     FSMakeFSSpec  ->  (fill in spec by hand.)
- *
- * (Use HOpen instead of HOpenDF.  HOpen is just a glue-interface to PBHOpen,
- * which is on all HFS macs.  HOpenDF is a System 7 addition which avoids the
- * ages-old problem of names starting with a period.)
- *
- * Contributed by Sam Bushell (jsam@iagu.on.net) and
- * Dan Gildor (gyld@in-touch.com).
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jmemsys.h"    /* import the system-dependent declarations */
-
-#ifndef USE_MAC_MEMMGR	/* make sure user got configuration right */
-  You forgot to define USE_MAC_MEMMGR in jconfig.h. /* deliberate syntax error */
-#endif
-
-#include <Memory.h>     /* we use the MacOS memory manager */
-#include <Files.h>      /* we use the MacOS File stuff */
-#include <Folders.h>    /* we use the MacOS HFS stuff */
-#include <Script.h>     /* for smSystemScript */
-#include <Gestalt.h>    /* we use Gestalt to test for specific functionality */
-
-#ifndef TEMP_FILE_NAME		/* can override from jconfig.h or Makefile */
-#define TEMP_FILE_NAME  "JPG%03d.TMP"
-#endif
-
-static int next_file_num;	/* to distinguish among several temp files */
-
-
-/*
- * Memory allocation and freeing are controlled by the MacOS library
- * routines NewPtr() and DisposePtr(), which allocate fixed-address
- * storage.  Unfortunately, the IJG library isn't smart enough to cope
- * with relocatable storage.
- */
-
-GLOBAL(void *)
-jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void *) NewPtr(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
-{
-  DisposePtr((Ptr) object);
-}
-
-
-/*
- * "Large" objects are treated the same as "small" ones.
- * NB: we include FAR keywords in the routine declarations simply for
- * consistency with the rest of the IJG code; FAR should expand to empty
- * on rational architectures like the Mac.
- */
-
-GLOBAL(void FAR *)
-jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void FAR *) NewPtr(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
-{
-  DisposePtr((Ptr) object);
-}
-
-
-/*
- * This routine computes the total memory space available for allocation.
- */
-
-GLOBAL(long)
-jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
-		    long max_bytes_needed, long already_allocated)
-{
-  long limit = cinfo->mem->max_memory_to_use - already_allocated;
-  long slop, mem;
-
-  /* Don't ask for more than what application has told us we may use */
-  if (max_bytes_needed > limit && limit > 0)
-    max_bytes_needed = limit;
-  /* Find whether there's a big enough free block in the heap.
-   * CompactMem tries to create a contiguous block of the requested size,
-   * and then returns the size of the largest free block (which could be
-   * much more or much less than we asked for).
-   * We add some slop to ensure we don't use up all available memory.
-   */
-  slop = max_bytes_needed / 16 + 32768L;
-  mem = CompactMem(max_bytes_needed + slop) - slop;
-  if (mem < 0)
-    mem = 0;			/* sigh, couldn't even get the slop */
-  /* Don't take more than the application says we can have */
-  if (mem > limit && limit > 0)
-    mem = limit;
-  return mem;
-}
-
-
-/*
- * Backing store (temporary file) management.
- * Backing store objects are only used when the value returned by
- * jpeg_mem_available is less than the total space needed.  You can dispense
- * with these routines if you have plenty of virtual memory; see jmemnobs.c.
- */
-
-
-METHODDEF(void)
-read_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-		    void FAR * buffer_address,
-		    long file_offset, long byte_count)
-{
-  long bytes = byte_count;
-  long retVal;
-
-  if ( SetFPos ( info->temp_file, fsFromStart, file_offset ) != noErr )
-    ERREXIT(cinfo, JERR_TFILE_SEEK);
-
-  retVal = FSRead ( info->temp_file, &bytes,
-		    (unsigned char *) buffer_address );
-  if ( retVal != noErr || bytes != byte_count )
-    ERREXIT(cinfo, JERR_TFILE_READ);
-}
-
-
-METHODDEF(void)
-write_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-		     void FAR * buffer_address,
-		     long file_offset, long byte_count)
-{
-  long bytes = byte_count;
-  long retVal;
-
-  if ( SetFPos ( info->temp_file, fsFromStart, file_offset ) != noErr )
-    ERREXIT(cinfo, JERR_TFILE_SEEK);
-
-  retVal = FSWrite ( info->temp_file, &bytes,
-		     (unsigned char *) buffer_address );
-  if ( retVal != noErr || bytes != byte_count )
-    ERREXIT(cinfo, JERR_TFILE_WRITE);
-}
-
-
-METHODDEF(void)
-close_backing_store (j_common_ptr cinfo, backing_store_ptr info)
-{
-  FSClose ( info->temp_file );
-  FSpDelete ( &(info->tempSpec) );
-}
-
-
-/*
- * Initial opening of a backing-store object.
- *
- * This version uses FindFolder to find the Temporary Items folder,
- * and puts the temporary file in there.
- */
-
-GLOBAL(void)
-jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-			 long total_bytes_needed)
-{
-  short         tmpRef, vRefNum;
-  long          dirID;
-  FInfo         finderInfo;
-  FSSpec        theSpec;
-  Str255        fName;
-  OSErr         osErr;
-  long          gestaltResponse = 0;
-
-  /* Check that FSSpec calls are available. */
-  osErr = Gestalt( gestaltFSAttr, &gestaltResponse );
-  if ( ( osErr != noErr )
-       || !( gestaltResponse & (1<<gestaltHasFSSpecCalls) ) )
-    ERREXITS(cinfo, JERR_TFILE_CREATE, "- System 7.0 or later required");
-  /* TO DO: add a proper error message to jerror.h. */
-
-  /* Check that FindFolder is available. */
-  osErr = Gestalt( gestaltFindFolderAttr, &gestaltResponse );
-  if ( ( osErr != noErr )
-       || !( gestaltResponse & (1<<gestaltFindFolderPresent) ) )
-    ERREXITS(cinfo, JERR_TFILE_CREATE, "- System 7.0 or later required.");
-  /* TO DO: add a proper error message to jerror.h. */
-
-  osErr = FindFolder ( kOnSystemDisk, kTemporaryFolderType, kCreateFolder,
-                       &vRefNum, &dirID );
-  if ( osErr != noErr )
-    ERREXITS(cinfo, JERR_TFILE_CREATE, "- temporary items folder unavailable");
-  /* TO DO: Try putting the temp files somewhere else. */
-
-  /* Keep generating file names till we find one that's not in use */
-  for (;;) {
-    next_file_num++;		/* advance counter */
-
-    sprintf(info->temp_name, TEMP_FILE_NAME, next_file_num);
-    strcpy ( (Ptr)fName+1, info->temp_name );
-    *fName = strlen (info->temp_name);
-    osErr = FSMakeFSSpec ( vRefNum, dirID, fName, &theSpec );
-
-    if ( (osErr = FSpGetFInfo ( &theSpec, &finderInfo ) ) != noErr )
-      break;
-  }
-
-  osErr = FSpCreate ( &theSpec, '????', '????', smSystemScript );
-  if ( osErr != noErr )
-    ERREXITS(cinfo, JERR_TFILE_CREATE, info->temp_name);
-
-  osErr = FSpOpenDF ( &theSpec, fsRdWrPerm, &(info->temp_file) );
-  if ( osErr != noErr )
-    ERREXITS(cinfo, JERR_TFILE_CREATE, info->temp_name);
-
-  info->tempSpec = theSpec;
-
-  info->read_backing_store = read_backing_store;
-  info->write_backing_store = write_backing_store;
-  info->close_backing_store = close_backing_store;
-  TRACEMSS(cinfo, 1, JTRC_TFILE_OPEN, info->temp_name);
-}
-
-
-/*
- * These routines take care of any system-dependent initialization and
- * cleanup required.
- */
-
-GLOBAL(long)
-jpeg_mem_init (j_common_ptr cinfo)
-{
-  next_file_num = 0;
-
-  /* max_memory_to_use will be initialized to FreeMem()'s result;
-   * the calling application might later reduce it, for example
-   * to leave room to invoke multiple JPEG objects.
-   * Note that FreeMem returns the total number of free bytes;
-   * it may not be possible to allocate a single block of this size.
-   */
-  return FreeMem();
-}
-
-GLOBAL(void)
-jpeg_mem_term (j_common_ptr cinfo)
-{
-  /* no work */
-}
diff --git a/jmemmgr.c b/jmemmgr.c
index d801b32..f7219d2 100644
--- a/jmemmgr.c
+++ b/jmemmgr.c
@@ -1,8 +1,10 @@
 /*
  * jmemmgr.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code and
+ * information relevant to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains the JPEG system-independent memory management
@@ -25,18 +27,27 @@
  */
 
 #define JPEG_INTERNALS
-#define AM_MEMORY_MANAGER	/* we define jvirt_Xarray_control structs */
+#define AM_MEMORY_MANAGER       /* we define jvirt_Xarray_control structs */
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
+#include "jmemsys.h"            /* import the system-dependent declarations */
 
 #ifndef NO_GETENV
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare getenv() */
-extern char * getenv JPP((const char * name));
+#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare getenv() */
+extern char * getenv (const char * name);
 #endif
 #endif
 
 
+LOCAL(size_t)
+round_up_pow2 (size_t a, size_t b)
+/* a rounded up to the next multiple of b, i.e. ceil(a/b)*b */
+/* Assumes a >= 0, b > 0, and b is a power of 2 */
+{
+  return ((a + b - 1) & (~(b - 1)));
+}
+
+
 /*
  * Some important notes:
  *   The allocation routines provided here must never return NULL.
@@ -57,64 +68,56 @@
  * requirement, and we had better do so too.
  * There isn't any really portable way to determine the worst-case alignment
  * requirement.  This module assumes that the alignment requirement is
- * multiples of sizeof(ALIGN_TYPE).
- * By default, we define ALIGN_TYPE as double.  This is necessary on some
+ * multiples of ALIGN_SIZE.
+ * By default, we define ALIGN_SIZE as sizeof(double).  This is necessary on some
  * workstations (where doubles really do need 8-byte alignment) and will work
  * fine on nearly everything.  If your machine has lesser alignment needs,
- * you can save a few bytes by making ALIGN_TYPE smaller.
+ * you can save a few bytes by making ALIGN_SIZE smaller.
  * The only place I know of where this will NOT work is certain Macintosh
  * 680x0 compilers that define double as a 10-byte IEEE extended float.
  * Doing 10-byte alignment is counterproductive because longwords won't be
- * aligned well.  Put "#define ALIGN_TYPE long" in jconfig.h if you have
+ * aligned well.  Put "#define ALIGN_SIZE 4" in jconfig.h if you have
  * such a compiler.
  */
 
-#ifndef ALIGN_TYPE		/* so can override from jconfig.h */
-#define ALIGN_TYPE  double
+#ifndef ALIGN_SIZE              /* so can override from jconfig.h */
+#ifndef WITH_SIMD
+#define ALIGN_SIZE  sizeof(double)
+#else
+#define ALIGN_SIZE  16 /* Most SIMD implementations require this */
 #endif
-
+#endif
 
 /*
  * We allocate objects from "pools", where each pool is gotten with a single
  * request to jpeg_get_small() or jpeg_get_large().  There is no per-object
  * overhead within a pool, except for alignment padding.  Each pool has a
  * header with a link to the next pool of the same class.
- * Small and large pool headers are identical except that the latter's
- * link pointer must be FAR on 80x86 machines.
- * Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE
- * field.  This forces the compiler to make SIZEOF(small_pool_hdr) a multiple
- * of the alignment requirement of ALIGN_TYPE.
+ * Small and large pool headers are identical.
  */
 
-typedef union small_pool_struct * small_pool_ptr;
+typedef struct small_pool_struct * small_pool_ptr;
 
-typedef union small_pool_struct {
-  struct {
-    small_pool_ptr next;	/* next in list of pools */
-    size_t bytes_used;		/* how many bytes already used within pool */
-    size_t bytes_left;		/* bytes still available in this pool */
-  } hdr;
-  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+typedef struct small_pool_struct {
+  small_pool_ptr next;  /* next in list of pools */
+  size_t bytes_used;            /* how many bytes already used within pool */
+  size_t bytes_left;            /* bytes still available in this pool */
 } small_pool_hdr;
 
-typedef union large_pool_struct FAR * large_pool_ptr;
+typedef struct large_pool_struct * large_pool_ptr;
 
-typedef union large_pool_struct {
-  struct {
-    large_pool_ptr next;	/* next in list of pools */
-    size_t bytes_used;		/* how many bytes already used within pool */
-    size_t bytes_left;		/* bytes still available in this pool */
-  } hdr;
-  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+typedef struct large_pool_struct {
+  large_pool_ptr next;  /* next in list of pools */
+  size_t bytes_used;            /* how many bytes already used within pool */
+  size_t bytes_left;            /* bytes still available in this pool */
 } large_pool_hdr;
 
-
 /*
  * Here is the full definition of a memory manager object.
  */
 
 typedef struct {
-  struct jpeg_memory_mgr pub;	/* public fields */
+  struct jpeg_memory_mgr pub;   /* public fields */
 
   /* Each pool identifier (lifetime class) names a linked list of pools. */
   small_pool_ptr small_list[JPOOL_NUMPOOLS];
@@ -129,12 +132,12 @@
   jvirt_barray_ptr virt_barray_list;
 
   /* This counts total space obtained from jpeg_get_small/large */
-  long total_space_allocated;
+  size_t total_space_allocated;
 
   /* alloc_sarray and alloc_barray set this value for use by virtual
    * array routines.
    */
-  JDIMENSION last_rowsperchunk;	/* from most recent alloc_sarray/barray */
+  JDIMENSION last_rowsperchunk; /* from most recent alloc_sarray/barray */
 } my_memory_mgr;
 
 typedef my_memory_mgr * my_mem_ptr;
@@ -148,39 +151,39 @@
  */
 
 struct jvirt_sarray_control {
-  JSAMPARRAY mem_buffer;	/* => the in-memory buffer */
-  JDIMENSION rows_in_array;	/* total virtual array height */
-  JDIMENSION samplesperrow;	/* width of array (and of memory buffer) */
-  JDIMENSION maxaccess;		/* max rows accessed by access_virt_sarray */
-  JDIMENSION rows_in_mem;	/* height of memory buffer */
-  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
-  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
-  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
-  boolean pre_zero;		/* pre-zero mode requested? */
-  boolean dirty;		/* do current buffer contents need written? */
-  boolean b_s_open;		/* is backing-store data valid? */
-  jvirt_sarray_ptr next;	/* link to next virtual sarray control block */
-  backing_store_info b_s_info;	/* System-dependent control info */
+  JSAMPARRAY mem_buffer;        /* => the in-memory buffer */
+  JDIMENSION rows_in_array;     /* total virtual array height */
+  JDIMENSION samplesperrow;     /* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;         /* max rows accessed by access_virt_sarray */
+  JDIMENSION rows_in_mem;       /* height of memory buffer */
+  JDIMENSION rowsperchunk;      /* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;     /* first logical row # in the buffer */
+  JDIMENSION first_undef_row;   /* row # of first uninitialized row */
+  boolean pre_zero;             /* pre-zero mode requested? */
+  boolean dirty;                /* do current buffer contents need written? */
+  boolean b_s_open;             /* is backing-store data valid? */
+  jvirt_sarray_ptr next;        /* link to next virtual sarray control block */
+  backing_store_info b_s_info;  /* System-dependent control info */
 };
 
 struct jvirt_barray_control {
-  JBLOCKARRAY mem_buffer;	/* => the in-memory buffer */
-  JDIMENSION rows_in_array;	/* total virtual array height */
-  JDIMENSION blocksperrow;	/* width of array (and of memory buffer) */
-  JDIMENSION maxaccess;		/* max rows accessed by access_virt_barray */
-  JDIMENSION rows_in_mem;	/* height of memory buffer */
-  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
-  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
-  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
-  boolean pre_zero;		/* pre-zero mode requested? */
-  boolean dirty;		/* do current buffer contents need written? */
-  boolean b_s_open;		/* is backing-store data valid? */
-  jvirt_barray_ptr next;	/* link to next virtual barray control block */
-  backing_store_info b_s_info;	/* System-dependent control info */
+  JBLOCKARRAY mem_buffer;       /* => the in-memory buffer */
+  JDIMENSION rows_in_array;     /* total virtual array height */
+  JDIMENSION blocksperrow;      /* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;         /* max rows accessed by access_virt_barray */
+  JDIMENSION rows_in_mem;       /* height of memory buffer */
+  JDIMENSION rowsperchunk;      /* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;     /* first logical row # in the buffer */
+  JDIMENSION first_undef_row;   /* row # of first uninitialized row */
+  boolean pre_zero;             /* pre-zero mode requested? */
+  boolean dirty;                /* do current buffer contents need written? */
+  boolean b_s_open;             /* is backing-store data valid? */
+  jvirt_barray_ptr next;        /* link to next virtual barray control block */
+  backing_store_info b_s_info;  /* System-dependent control info */
 };
 
 
-#ifdef MEM_STATS		/* optional extra stuff for statistics */
+#ifdef MEM_STATS                /* optional extra stuff for statistics */
 
 LOCAL(void)
 print_mem_stats (j_common_ptr cinfo, int pool_id)
@@ -194,19 +197,19 @@
    * This is helpful because message parm array can't handle longs.
    */
   fprintf(stderr, "Freeing pool %d, total space = %ld\n",
-	  pool_id, mem->total_space_allocated);
+          pool_id, mem->total_space_allocated);
 
   for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
-       lhdr_ptr = lhdr_ptr->hdr.next) {
+       lhdr_ptr = lhdr_ptr->next) {
     fprintf(stderr, "  Large chunk used %ld\n",
-	    (long) lhdr_ptr->hdr.bytes_used);
+            (long) lhdr_ptr->bytes_used);
   }
 
   for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
-       shdr_ptr = shdr_ptr->hdr.next) {
+       shdr_ptr = shdr_ptr->next) {
     fprintf(stderr, "  Small chunk used %ld free %ld\n",
-	    (long) shdr_ptr->hdr.bytes_used,
-	    (long) shdr_ptr->hdr.bytes_left);
+            (long) shdr_ptr->bytes_used,
+            (long) shdr_ptr->bytes_left);
   }
 }
 
@@ -219,7 +222,7 @@
 /* If we compiled MEM_STATS support, report alloc requests before dying */
 {
 #ifdef MEM_STATS
-  cinfo->err->trace_level = 2;	/* force self_destruct to report stats */
+  cinfo->err->trace_level = 2;  /* force self_destruct to report stats */
 #endif
   ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which);
 }
@@ -236,21 +239,25 @@
  * and we also distinguish the first pool of a class from later ones.
  * NOTE: the values given work fairly well on both 16- and 32-bit-int
  * machines, but may be too small if longs are 64 bits or more.
+ *
+ * Since we do not know what alignment malloc() gives us, we have to
+ * allocate ALIGN_SIZE-1 extra space per pool to have room for alignment
+ * adjustment.
  */
 
-static const size_t first_pool_slop[JPOOL_NUMPOOLS] = 
+static const size_t first_pool_slop[JPOOL_NUMPOOLS] =
 {
-	1600,			/* first PERMANENT pool */
-	16000			/* first IMAGE pool */
+        1600,                   /* first PERMANENT pool */
+        16000                   /* first IMAGE pool */
 };
 
-static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = 
+static const size_t extra_pool_slop[JPOOL_NUMPOOLS] =
 {
-	0,			/* additional PERMANENT pools */
-	5000			/* additional IMAGE pools */
+        0,                      /* additional PERMANENT pools */
+        5000                    /* additional IMAGE pools */
 };
 
-#define MIN_SLOP  50		/* greater than 0 to avoid futile looping */
+#define MIN_SLOP  50            /* greater than 0 to avoid futile looping */
 
 
 METHODDEF(void *)
@@ -260,34 +267,37 @@
   my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
   small_pool_ptr hdr_ptr, prev_hdr_ptr;
   char * data_ptr;
-  size_t odd_bytes, min_request, slop;
+  size_t min_request, slop;
+
+  /*
+   * Round up the requested size to a multiple of ALIGN_SIZE in order
+   * to assure alignment for the next object allocated in the same pool
+   * and so that algorithms can straddle outside the proper area up
+   * to the next alignment.
+   */
+  sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE);
 
   /* Check for unsatisfiable request (do now to ensure no overflow below) */
-  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(small_pool_hdr)))
-    out_of_memory(cinfo, 1);	/* request exceeds malloc's ability */
-
-  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
-  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
-  if (odd_bytes > 0)
-    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+  if ((sizeof(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK)
+    out_of_memory(cinfo, 1);    /* request exceeds malloc's ability */
 
   /* See if space is available in any existing pool */
   if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
   prev_hdr_ptr = NULL;
   hdr_ptr = mem->small_list[pool_id];
   while (hdr_ptr != NULL) {
-    if (hdr_ptr->hdr.bytes_left >= sizeofobject)
-      break;			/* found pool with enough space */
+    if (hdr_ptr->bytes_left >= sizeofobject)
+      break;                    /* found pool with enough space */
     prev_hdr_ptr = hdr_ptr;
-    hdr_ptr = hdr_ptr->hdr.next;
+    hdr_ptr = hdr_ptr->next;
   }
 
   /* Time to make a new pool? */
   if (hdr_ptr == NULL) {
     /* min_request is what we need now, slop is what will be leftover */
-    min_request = sizeofobject + SIZEOF(small_pool_hdr);
-    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+    min_request = sizeof(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1;
+    if (prev_hdr_ptr == NULL)   /* first pool in class? */
       slop = first_pool_slop[pool_id];
     else
       slop = extra_pool_slop[pool_id];
@@ -298,27 +308,30 @@
     for (;;) {
       hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop);
       if (hdr_ptr != NULL)
-	break;
+        break;
       slop /= 2;
-      if (slop < MIN_SLOP)	/* give up when it gets real small */
-	out_of_memory(cinfo, 2); /* jpeg_get_small failed */
+      if (slop < MIN_SLOP)      /* give up when it gets real small */
+        out_of_memory(cinfo, 2); /* jpeg_get_small failed */
     }
     mem->total_space_allocated += min_request + slop;
     /* Success, initialize the new pool header and add to end of list */
-    hdr_ptr->hdr.next = NULL;
-    hdr_ptr->hdr.bytes_used = 0;
-    hdr_ptr->hdr.bytes_left = sizeofobject + slop;
-    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+    hdr_ptr->next = NULL;
+    hdr_ptr->bytes_used = 0;
+    hdr_ptr->bytes_left = sizeofobject + slop;
+    if (prev_hdr_ptr == NULL)   /* first pool in class? */
       mem->small_list[pool_id] = hdr_ptr;
     else
-      prev_hdr_ptr->hdr.next = hdr_ptr;
+      prev_hdr_ptr->next = hdr_ptr;
   }
 
   /* OK, allocate the object from the current pool */
-  data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */
-  data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */
-  hdr_ptr->hdr.bytes_used += sizeofobject;
-  hdr_ptr->hdr.bytes_left -= sizeofobject;
+  data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */
+  data_ptr += sizeof(small_pool_hdr); /* ...by skipping the header... */
+  if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */
+    data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE;
+  data_ptr += hdr_ptr->bytes_used; /* point to place for object */
+  hdr_ptr->bytes_used += sizeofobject;
+  hdr_ptr->bytes_left -= sizeofobject;
 
   return (void *) data_ptr;
 }
@@ -327,9 +340,8 @@
 /*
  * Allocation of "large" objects.
  *
- * The external semantics of these are the same as "small" objects,
- * except that FAR pointers are used on 80x86.  However the pool
- * management heuristics are quite different.  We assume that each
+ * The external semantics of these are the same as "small" objects.  However,
+ * the pool management heuristics are quite different.  We assume that each
  * request is large enough that it may as well be passed directly to
  * jpeg_get_large; the pool management just links everything together
  * so that we can free it all on demand.
@@ -338,49 +350,56 @@
  * deliberately bunch rows together to ensure a large request size.
  */
 
-METHODDEF(void FAR *)
+METHODDEF(void *)
 alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
 /* Allocate a "large" object */
 {
   my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
   large_pool_ptr hdr_ptr;
-  size_t odd_bytes;
+  char * data_ptr;
+
+  /*
+   * Round up the requested size to a multiple of ALIGN_SIZE so that
+   * algorithms can straddle outside the proper area up to the next
+   * alignment.
+   */
+  sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE);
 
   /* Check for unsatisfiable request (do now to ensure no overflow below) */
-  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)))
-    out_of_memory(cinfo, 3);	/* request exceeds malloc's ability */
-
-  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
-  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
-  if (odd_bytes > 0)
-    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+  if ((sizeof(large_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK)
+    out_of_memory(cinfo, 3);    /* request exceeds malloc's ability */
 
   /* Always make a new pool */
   if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
 
   hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject +
-					    SIZEOF(large_pool_hdr));
+                                            sizeof(large_pool_hdr) +
+                                            ALIGN_SIZE - 1);
   if (hdr_ptr == NULL)
-    out_of_memory(cinfo, 4);	/* jpeg_get_large failed */
-  mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr);
+    out_of_memory(cinfo, 4);    /* jpeg_get_large failed */
+  mem->total_space_allocated += sizeofobject + sizeof(large_pool_hdr) + ALIGN_SIZE - 1;
 
   /* Success, initialize the new pool header and add to list */
-  hdr_ptr->hdr.next = mem->large_list[pool_id];
+  hdr_ptr->next = mem->large_list[pool_id];
   /* We maintain space counts in each pool header for statistical purposes,
    * even though they are not needed for allocation.
    */
-  hdr_ptr->hdr.bytes_used = sizeofobject;
-  hdr_ptr->hdr.bytes_left = 0;
+  hdr_ptr->bytes_used = sizeofobject;
+  hdr_ptr->bytes_left = 0;
   mem->large_list[pool_id] = hdr_ptr;
 
-  return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */
+  data_ptr = (char *) hdr_ptr; /* point to first data byte in pool... */
+  data_ptr += sizeof(small_pool_hdr); /* ...by skipping the header... */
+  if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */
+    data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE;
+
+  return (void *) data_ptr;
 }
 
 
 /*
  * Creation of 2-D sample arrays.
- * The pointers are in near heap, the samples themselves in FAR heap.
  *
  * To minimize allocation overhead and to allow I/O of large contiguous
  * blocks, we allocate the sample rows in groups of as many rows as possible
@@ -389,11 +408,15 @@
  * this chunking of rows.  The rowsperchunk value is left in the mem manager
  * object so that it can be saved away if this sarray is the workspace for
  * a virtual array.
+ *
+ * Since we are often upsampling with a factor 2, we align the size (not
+ * the start) to 2 * ALIGN_SIZE so that the upsampling routines don't have
+ * to be as careful about size.
  */
 
 METHODDEF(JSAMPARRAY)
 alloc_sarray (j_common_ptr cinfo, int pool_id,
-	      JDIMENSION samplesperrow, JDIMENSION numrows)
+              JDIMENSION samplesperrow, JDIMENSION numrows)
 /* Allocate a 2-D sample array */
 {
   my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
@@ -402,9 +425,14 @@
   JDIMENSION rowsperchunk, currow, i;
   long ltemp;
 
+  /* Make sure each row is properly aligned */
+  if ((ALIGN_SIZE % sizeof(JSAMPLE)) != 0)
+    out_of_memory(cinfo, 5);    /* safety check */
+  samplesperrow = (JDIMENSION)round_up_pow2(samplesperrow, (2 * ALIGN_SIZE) / sizeof(JSAMPLE));
+
   /* Calculate max # of rows allowed in one allocation chunk */
-  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
-	  ((long) samplesperrow * SIZEOF(JSAMPLE));
+  ltemp = (MAX_ALLOC_CHUNK-sizeof(large_pool_hdr)) /
+          ((long) samplesperrow * sizeof(JSAMPLE));
   if (ltemp <= 0)
     ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
   if (ltemp < (long) numrows)
@@ -415,15 +443,15 @@
 
   /* Get space for row pointers (small object) */
   result = (JSAMPARRAY) alloc_small(cinfo, pool_id,
-				    (size_t) (numrows * SIZEOF(JSAMPROW)));
+                                    (size_t) (numrows * sizeof(JSAMPROW)));
 
   /* Get the rows themselves (large objects) */
   currow = 0;
   while (currow < numrows) {
     rowsperchunk = MIN(rowsperchunk, numrows - currow);
     workspace = (JSAMPROW) alloc_large(cinfo, pool_id,
-	(size_t) ((size_t) rowsperchunk * (size_t) samplesperrow
-		  * SIZEOF(JSAMPLE)));
+        (size_t) ((size_t) rowsperchunk * (size_t) samplesperrow
+                  * sizeof(JSAMPLE)));
     for (i = rowsperchunk; i > 0; i--) {
       result[currow++] = workspace;
       workspace += samplesperrow;
@@ -441,7 +469,7 @@
 
 METHODDEF(JBLOCKARRAY)
 alloc_barray (j_common_ptr cinfo, int pool_id,
-	      JDIMENSION blocksperrow, JDIMENSION numrows)
+              JDIMENSION blocksperrow, JDIMENSION numrows)
 /* Allocate a 2-D coefficient-block array */
 {
   my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
@@ -450,9 +478,13 @@
   JDIMENSION rowsperchunk, currow, i;
   long ltemp;
 
+  /* Make sure each row is properly aligned */
+  if ((sizeof(JBLOCK) % ALIGN_SIZE) != 0)
+    out_of_memory(cinfo, 6);    /* safety check */
+
   /* Calculate max # of rows allowed in one allocation chunk */
-  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
-	  ((long) blocksperrow * SIZEOF(JBLOCK));
+  ltemp = (MAX_ALLOC_CHUNK-sizeof(large_pool_hdr)) /
+          ((long) blocksperrow * sizeof(JBLOCK));
   if (ltemp <= 0)
     ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
   if (ltemp < (long) numrows)
@@ -463,15 +495,15 @@
 
   /* Get space for row pointers (small object) */
   result = (JBLOCKARRAY) alloc_small(cinfo, pool_id,
-				     (size_t) (numrows * SIZEOF(JBLOCKROW)));
+                                     (size_t) (numrows * sizeof(JBLOCKROW)));
 
   /* Get the rows themselves (large objects) */
   currow = 0;
   while (currow < numrows) {
     rowsperchunk = MIN(rowsperchunk, numrows - currow);
     workspace = (JBLOCKROW) alloc_large(cinfo, pool_id,
-	(size_t) ((size_t) rowsperchunk * (size_t) blocksperrow
-		  * SIZEOF(JBLOCK)));
+        (size_t) ((size_t) rowsperchunk * (size_t) blocksperrow
+                  * sizeof(JBLOCK)));
     for (i = rowsperchunk; i > 0; i--) {
       result[currow++] = workspace;
       workspace += blocksperrow;
@@ -521,8 +553,8 @@
 
 METHODDEF(jvirt_sarray_ptr)
 request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
-		     JDIMENSION samplesperrow, JDIMENSION numrows,
-		     JDIMENSION maxaccess)
+                     JDIMENSION samplesperrow, JDIMENSION numrows,
+                     JDIMENSION maxaccess)
 /* Request a virtual 2-D sample array */
 {
   my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
@@ -530,18 +562,18 @@
 
   /* Only IMAGE-lifetime virtual arrays are currently supported */
   if (pool_id != JPOOL_IMAGE)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
 
   /* get control block */
   result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id,
-					  SIZEOF(struct jvirt_sarray_control));
+                                          sizeof(struct jvirt_sarray_control));
 
-  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->mem_buffer = NULL;    /* marks array not yet realized */
   result->rows_in_array = numrows;
   result->samplesperrow = samplesperrow;
   result->maxaccess = maxaccess;
   result->pre_zero = pre_zero;
-  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->b_s_open = FALSE;     /* no associated backing-store object */
   result->next = mem->virt_sarray_list; /* add to list of virtual arrays */
   mem->virt_sarray_list = result;
 
@@ -551,8 +583,8 @@
 
 METHODDEF(jvirt_barray_ptr)
 request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
-		     JDIMENSION blocksperrow, JDIMENSION numrows,
-		     JDIMENSION maxaccess)
+                     JDIMENSION blocksperrow, JDIMENSION numrows,
+                     JDIMENSION maxaccess)
 /* Request a virtual 2-D coefficient-block array */
 {
   my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
@@ -560,18 +592,18 @@
 
   /* Only IMAGE-lifetime virtual arrays are currently supported */
   if (pool_id != JPOOL_IMAGE)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
 
   /* get control block */
   result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id,
-					  SIZEOF(struct jvirt_barray_control));
+                                          sizeof(struct jvirt_barray_control));
 
-  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->mem_buffer = NULL;    /* marks array not yet realized */
   result->rows_in_array = numrows;
   result->blocksperrow = blocksperrow;
   result->maxaccess = maxaccess;
   result->pre_zero = pre_zero;
-  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->b_s_open = FALSE;     /* no associated backing-store object */
   result->next = mem->virt_barray_list; /* add to list of virtual arrays */
   mem->virt_barray_list = result;
 
@@ -584,8 +616,8 @@
 /* Allocate the in-memory buffers for any unrealized virtual arrays */
 {
   my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
-  long space_per_minheight, maximum_space, avail_mem;
-  long minheights, max_minheights;
+  size_t space_per_minheight, maximum_space, avail_mem;
+  size_t minheights, max_minheights;
   jvirt_sarray_ptr sptr;
   jvirt_barray_ptr bptr;
 
@@ -598,26 +630,26 @@
   for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
     if (sptr->mem_buffer == NULL) { /* if not realized yet */
       space_per_minheight += (long) sptr->maxaccess *
-			     (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+                             (long) sptr->samplesperrow * sizeof(JSAMPLE);
       maximum_space += (long) sptr->rows_in_array *
-		       (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+                       (long) sptr->samplesperrow * sizeof(JSAMPLE);
     }
   }
   for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
     if (bptr->mem_buffer == NULL) { /* if not realized yet */
       space_per_minheight += (long) bptr->maxaccess *
-			     (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+                             (long) bptr->blocksperrow * sizeof(JBLOCK);
       maximum_space += (long) bptr->rows_in_array *
-		       (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+                       (long) bptr->blocksperrow * sizeof(JBLOCK);
     }
   }
 
   if (space_per_minheight <= 0)
-    return;			/* no unrealized arrays, no work */
+    return;                     /* no unrealized arrays, no work */
 
   /* Determine amount of memory to actually use; this is system-dependent. */
   avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space,
-				 mem->total_space_allocated);
+                                 mem->total_space_allocated);
 
   /* If the maximum space needed is available, make all the buffers full
    * height; otherwise parcel it out with the same number of minheights
@@ -640,19 +672,19 @@
     if (sptr->mem_buffer == NULL) { /* if not realized yet */
       minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L;
       if (minheights <= max_minheights) {
-	/* This buffer fits in memory */
-	sptr->rows_in_mem = sptr->rows_in_array;
+        /* This buffer fits in memory */
+        sptr->rows_in_mem = sptr->rows_in_array;
       } else {
-	/* It doesn't fit in memory, create backing store. */
-	sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
-	jpeg_open_backing_store(cinfo, & sptr->b_s_info,
-				(long) sptr->rows_in_array *
-				(long) sptr->samplesperrow *
-				(long) SIZEOF(JSAMPLE));
-	sptr->b_s_open = TRUE;
+        /* It doesn't fit in memory, create backing store. */
+        sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
+        jpeg_open_backing_store(cinfo, & sptr->b_s_info,
+                                (long) sptr->rows_in_array *
+                                (long) sptr->samplesperrow *
+                                (long) sizeof(JSAMPLE));
+        sptr->b_s_open = TRUE;
       }
       sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
-				      sptr->samplesperrow, sptr->rows_in_mem);
+                                      sptr->samplesperrow, sptr->rows_in_mem);
       sptr->rowsperchunk = mem->last_rowsperchunk;
       sptr->cur_start_row = 0;
       sptr->first_undef_row = 0;
@@ -664,19 +696,19 @@
     if (bptr->mem_buffer == NULL) { /* if not realized yet */
       minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L;
       if (minheights <= max_minheights) {
-	/* This buffer fits in memory */
-	bptr->rows_in_mem = bptr->rows_in_array;
+        /* This buffer fits in memory */
+        bptr->rows_in_mem = bptr->rows_in_array;
       } else {
-	/* It doesn't fit in memory, create backing store. */
-	bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
-	jpeg_open_backing_store(cinfo, & bptr->b_s_info,
-				(long) bptr->rows_in_array *
-				(long) bptr->blocksperrow *
-				(long) SIZEOF(JBLOCK));
-	bptr->b_s_open = TRUE;
+        /* It doesn't fit in memory, create backing store. */
+        bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
+        jpeg_open_backing_store(cinfo, & bptr->b_s_info,
+                                (long) bptr->rows_in_array *
+                                (long) bptr->blocksperrow *
+                                (long) sizeof(JBLOCK));
+        bptr->b_s_open = TRUE;
       }
       bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE,
-				      bptr->blocksperrow, bptr->rows_in_mem);
+                                      bptr->blocksperrow, bptr->rows_in_mem);
       bptr->rowsperchunk = mem->last_rowsperchunk;
       bptr->cur_start_row = 0;
       bptr->first_undef_row = 0;
@@ -692,7 +724,7 @@
 {
   long bytesperrow, file_offset, byte_count, rows, thisrow, i;
 
-  bytesperrow = (long) ptr->samplesperrow * SIZEOF(JSAMPLE);
+  bytesperrow = (long) ptr->samplesperrow * sizeof(JSAMPLE);
   file_offset = ptr->cur_start_row * bytesperrow;
   /* Loop to read or write each allocation chunk in mem_buffer */
   for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
@@ -703,17 +735,17 @@
     rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
     /* Transfer no more than fits in file */
     rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
-    if (rows <= 0)		/* this chunk might be past end of file! */
+    if (rows <= 0)              /* this chunk might be past end of file! */
       break;
     byte_count = rows * bytesperrow;
     if (writing)
       (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
-					    (void FAR *) ptr->mem_buffer[i],
-					    file_offset, byte_count);
+                                            (void *) ptr->mem_buffer[i],
+                                            file_offset, byte_count);
     else
       (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
-					   (void FAR *) ptr->mem_buffer[i],
-					   file_offset, byte_count);
+                                           (void *) ptr->mem_buffer[i],
+                                           file_offset, byte_count);
     file_offset += byte_count;
   }
 }
@@ -725,7 +757,7 @@
 {
   long bytesperrow, file_offset, byte_count, rows, thisrow, i;
 
-  bytesperrow = (long) ptr->blocksperrow * SIZEOF(JBLOCK);
+  bytesperrow = (long) ptr->blocksperrow * sizeof(JBLOCK);
   file_offset = ptr->cur_start_row * bytesperrow;
   /* Loop to read or write each allocation chunk in mem_buffer */
   for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
@@ -736,17 +768,17 @@
     rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
     /* Transfer no more than fits in file */
     rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
-    if (rows <= 0)		/* this chunk might be past end of file! */
+    if (rows <= 0)              /* this chunk might be past end of file! */
       break;
     byte_count = rows * bytesperrow;
     if (writing)
       (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
-					    (void FAR *) ptr->mem_buffer[i],
-					    file_offset, byte_count);
+                                            (void *) ptr->mem_buffer[i],
+                                            file_offset, byte_count);
     else
       (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
-					   (void FAR *) ptr->mem_buffer[i],
-					   file_offset, byte_count);
+                                           (void *) ptr->mem_buffer[i],
+                                           file_offset, byte_count);
     file_offset += byte_count;
   }
 }
@@ -754,8 +786,8 @@
 
 METHODDEF(JSAMPARRAY)
 access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
-		    JDIMENSION start_row, JDIMENSION num_rows,
-		    boolean writable)
+                    JDIMENSION start_row, JDIMENSION num_rows,
+                    boolean writable)
 /* Access the part of a virtual sample array starting at start_row */
 /* and extending for num_rows rows.  writable is true if  */
 /* caller intends to modify the accessed area. */
@@ -793,7 +825,7 @@
 
       ltemp = (long) end_row - (long) ptr->rows_in_mem;
       if (ltemp < 0)
-	ltemp = 0;		/* don't fall off front end of file */
+        ltemp = 0;              /* don't fall off front end of file */
       ptr->cur_start_row = (JDIMENSION) ltemp;
     }
     /* Read in the selected part of the array.
@@ -808,25 +840,25 @@
    */
   if (ptr->first_undef_row < end_row) {
     if (ptr->first_undef_row < start_row) {
-      if (writable)		/* writer skipped over a section of array */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-      undef_row = start_row;	/* but reader is allowed to read ahead */
+      if (writable)             /* writer skipped over a section of array */
+        ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;    /* but reader is allowed to read ahead */
     } else {
       undef_row = ptr->first_undef_row;
     }
     if (writable)
       ptr->first_undef_row = end_row;
     if (ptr->pre_zero) {
-      size_t bytesperrow = (size_t) ptr->samplesperrow * SIZEOF(JSAMPLE);
+      size_t bytesperrow = (size_t) ptr->samplesperrow * sizeof(JSAMPLE);
       undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
       end_row -= ptr->cur_start_row;
       while (undef_row < end_row) {
-	jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
-	undef_row++;
+        jzero_far((void *) ptr->mem_buffer[undef_row], bytesperrow);
+        undef_row++;
       }
     } else {
-      if (! writable)		/* reader looking at undefined data */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      if (! writable)           /* reader looking at undefined data */
+        ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
     }
   }
   /* Flag the buffer dirty if caller will write in it */
@@ -839,8 +871,8 @@
 
 METHODDEF(JBLOCKARRAY)
 access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr,
-		    JDIMENSION start_row, JDIMENSION num_rows,
-		    boolean writable)
+                    JDIMENSION start_row, JDIMENSION num_rows,
+                    boolean writable)
 /* Access the part of a virtual block array starting at start_row */
 /* and extending for num_rows rows.  writable is true if  */
 /* caller intends to modify the accessed area. */
@@ -878,7 +910,7 @@
 
       ltemp = (long) end_row - (long) ptr->rows_in_mem;
       if (ltemp < 0)
-	ltemp = 0;		/* don't fall off front end of file */
+        ltemp = 0;              /* don't fall off front end of file */
       ptr->cur_start_row = (JDIMENSION) ltemp;
     }
     /* Read in the selected part of the array.
@@ -893,25 +925,25 @@
    */
   if (ptr->first_undef_row < end_row) {
     if (ptr->first_undef_row < start_row) {
-      if (writable)		/* writer skipped over a section of array */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
-      undef_row = start_row;	/* but reader is allowed to read ahead */
+      if (writable)             /* writer skipped over a section of array */
+        ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;    /* but reader is allowed to read ahead */
     } else {
       undef_row = ptr->first_undef_row;
     }
     if (writable)
       ptr->first_undef_row = end_row;
     if (ptr->pre_zero) {
-      size_t bytesperrow = (size_t) ptr->blocksperrow * SIZEOF(JBLOCK);
+      size_t bytesperrow = (size_t) ptr->blocksperrow * sizeof(JBLOCK);
       undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
       end_row -= ptr->cur_start_row;
       while (undef_row < end_row) {
-	jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
-	undef_row++;
+        jzero_far((void *) ptr->mem_buffer[undef_row], bytesperrow);
+        undef_row++;
       }
     } else {
-      if (! writable)		/* reader looking at undefined data */
-	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      if (! writable)           /* reader looking at undefined data */
+        ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
     }
   }
   /* Flag the buffer dirty if caller will write in it */
@@ -935,7 +967,7 @@
   size_t space_freed;
 
   if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
-    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
 
 #ifdef MEM_STATS
   if (cinfo->err->trace_level > 1)
@@ -948,16 +980,16 @@
     jvirt_barray_ptr bptr;
 
     for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
-      if (sptr->b_s_open) {	/* there may be no backing store */
-	sptr->b_s_open = FALSE;	/* prevent recursive close if error */
-	(*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
+      if (sptr->b_s_open) {     /* there may be no backing store */
+        sptr->b_s_open = FALSE; /* prevent recursive close if error */
+        (*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
       }
     }
     mem->virt_sarray_list = NULL;
     for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
-      if (bptr->b_s_open) {	/* there may be no backing store */
-	bptr->b_s_open = FALSE;	/* prevent recursive close if error */
-	(*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
+      if (bptr->b_s_open) {     /* there may be no backing store */
+        bptr->b_s_open = FALSE; /* prevent recursive close if error */
+        (*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
       }
     }
     mem->virt_barray_list = NULL;
@@ -968,11 +1000,11 @@
   mem->large_list[pool_id] = NULL;
 
   while (lhdr_ptr != NULL) {
-    large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next;
-    space_freed = lhdr_ptr->hdr.bytes_used +
-		  lhdr_ptr->hdr.bytes_left +
-		  SIZEOF(large_pool_hdr);
-    jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
+    large_pool_ptr next_lhdr_ptr = lhdr_ptr->next;
+    space_freed = lhdr_ptr->bytes_used +
+                  lhdr_ptr->bytes_left +
+                  sizeof(large_pool_hdr);
+    jpeg_free_large(cinfo, (void *) lhdr_ptr, space_freed);
     mem->total_space_allocated -= space_freed;
     lhdr_ptr = next_lhdr_ptr;
   }
@@ -982,10 +1014,10 @@
   mem->small_list[pool_id] = NULL;
 
   while (shdr_ptr != NULL) {
-    small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next;
-    space_freed = shdr_ptr->hdr.bytes_used +
-		  shdr_ptr->hdr.bytes_left +
-		  SIZEOF(small_pool_hdr);
+    small_pool_ptr next_shdr_ptr = shdr_ptr->next;
+    space_freed = shdr_ptr->bytes_used +
+                  shdr_ptr->bytes_left +
+                  sizeof(small_pool_hdr);
     jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
     mem->total_space_allocated -= space_freed;
     shdr_ptr = next_shdr_ptr;
@@ -1012,10 +1044,10 @@
   }
 
   /* Release the memory manager control block too. */
-  jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr));
-  cinfo->mem = NULL;		/* ensures I will be called only once */
+  jpeg_free_small(cinfo, (void *) cinfo->mem, sizeof(my_memory_mgr));
+  cinfo->mem = NULL;            /* ensures I will be called only once */
 
-  jpeg_mem_term(cinfo);		/* system-dependent cleanup */
+  jpeg_mem_term(cinfo);         /* system-dependent cleanup */
 }
 
 
@@ -1032,34 +1064,34 @@
   int pool;
   size_t test_mac;
 
-  cinfo->mem = NULL;		/* for safety if init fails */
+  cinfo->mem = NULL;            /* for safety if init fails */
 
   /* Check for configuration errors.
-   * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably
+   * sizeof(ALIGN_TYPE) should be a power of 2; otherwise, it probably
    * doesn't reflect any real hardware alignment requirement.
    * The test is a little tricky: for X>0, X and X-1 have no one-bits
    * in common if and only if X is a power of 2, ie has only one one-bit.
    * Some compilers may give an "unreachable code" warning here; ignore it.
    */
-  if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0)
+  if ((ALIGN_SIZE & (ALIGN_SIZE-1)) != 0)
     ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
   /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
-   * a multiple of SIZEOF(ALIGN_TYPE).
+   * a multiple of ALIGN_SIZE.
    * Again, an "unreachable code" warning may be ignored here.
    * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
    */
   test_mac = (size_t) MAX_ALLOC_CHUNK;
   if ((long) test_mac != MAX_ALLOC_CHUNK ||
-      (MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0)
+      (MAX_ALLOC_CHUNK % ALIGN_SIZE) != 0)
     ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
 
   max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */
 
   /* Attempt to allocate memory manager's control block */
-  mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr));
+  mem = (my_mem_ptr) jpeg_get_small(cinfo, sizeof(my_memory_mgr));
 
   if (mem == NULL) {
-    jpeg_mem_term(cinfo);	/* system-dependent cleanup */
+    jpeg_mem_term(cinfo);       /* system-dependent cleanup */
     ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0);
   }
 
@@ -1089,7 +1121,7 @@
   mem->virt_sarray_list = NULL;
   mem->virt_barray_list = NULL;
 
-  mem->total_space_allocated = SIZEOF(my_memory_mgr);
+  mem->total_space_allocated = sizeof(my_memory_mgr);
 
   /* Declare ourselves open for business */
   cinfo->mem = & mem->pub;
@@ -1107,9 +1139,9 @@
       char ch = 'x';
 
       if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
-	if (ch == 'm' || ch == 'M')
-	  max_to_use *= 1000L;
-	mem->pub.max_memory_to_use = max_to_use * 1000L;
+        if (ch == 'm' || ch == 'M')
+          max_to_use *= 1000L;
+        mem->pub.max_memory_to_use = max_to_use * 1000L;
       }
     }
   }
diff --git a/jmemname.c b/jmemname.c
deleted file mode 100644
index ed96dee..0000000
--- a/jmemname.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * jmemname.c
- *
- * Copyright (C) 1992-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file provides a generic implementation of the system-dependent
- * portion of the JPEG memory manager.  This implementation assumes that
- * you must explicitly construct a name for each temp file.
- * Also, the problem of determining the amount of memory available
- * is shoved onto the user.
- */
-
-#define JPEG_INTERNALS
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
-
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
-extern void * malloc JPP((size_t size));
-extern void free JPP((void *ptr));
-#endif
-
-#ifndef SEEK_SET		/* pre-ANSI systems may not define this; */
-#define SEEK_SET  0		/* if not, assume 0 is correct */
-#endif
-
-#ifdef DONT_USE_B_MODE		/* define mode parameters for fopen() */
-#define READ_BINARY	"r"
-#define RW_BINARY	"w+"
-#else
-#ifdef VMS			/* VMS is very nonstandard */
-#define READ_BINARY	"rb", "ctx=stm"
-#define RW_BINARY	"w+b", "ctx=stm"
-#else				/* standard ANSI-compliant case */
-#define READ_BINARY	"rb"
-#define RW_BINARY	"w+b"
-#endif
-#endif
-
-
-/*
- * Selection of a file name for a temporary file.
- * This is system-dependent!
- *
- * The code as given is suitable for most Unix systems, and it is easily
- * modified for most non-Unix systems.  Some notes:
- *  1.  The temp file is created in the directory named by TEMP_DIRECTORY.
- *      The default value is /usr/tmp, which is the conventional place for
- *      creating large temp files on Unix.  On other systems you'll probably
- *      want to change the file location.  You can do this by editing the
- *      #define, or (preferred) by defining TEMP_DIRECTORY in jconfig.h.
- *
- *  2.  If you need to change the file name as well as its location,
- *      you can override the TEMP_FILE_NAME macro.  (Note that this is
- *      actually a printf format string; it must contain %s and %d.)
- *      Few people should need to do this.
- *
- *  3.  mktemp() is used to ensure that multiple processes running
- *      simultaneously won't select the same file names.  If your system
- *      doesn't have mktemp(), define NO_MKTEMP to do it the hard way.
- *      (If you don't have <errno.h>, also define NO_ERRNO_H.)
- *
- *  4.  You probably want to define NEED_SIGNAL_CATCHER so that cjpeg.c/djpeg.c
- *      will cause the temp files to be removed if you stop the program early.
- */
-
-#ifndef TEMP_DIRECTORY		/* can override from jconfig.h or Makefile */
-#define TEMP_DIRECTORY  "/usr/tmp/" /* recommended setting for Unix */
-#endif
-
-static int next_file_num;	/* to distinguish among several temp files */
-
-#ifdef NO_MKTEMP
-
-#ifndef TEMP_FILE_NAME		/* can override from jconfig.h or Makefile */
-#define TEMP_FILE_NAME  "%sJPG%03d.TMP"
-#endif
-
-#ifndef NO_ERRNO_H
-#include <errno.h>		/* to define ENOENT */
-#endif
-
-/* ANSI C specifies that errno is a macro, but on older systems it's more
- * likely to be a plain int variable.  And not all versions of errno.h
- * bother to declare it, so we have to in order to be most portable.  Thus:
- */
-#ifndef errno
-extern int errno;
-#endif
-
-
-LOCAL(void)
-select_file_name (char * fname)
-{
-  FILE * tfile;
-
-  /* Keep generating file names till we find one that's not in use */
-  for (;;) {
-    next_file_num++;		/* advance counter */
-    sprintf(fname, TEMP_FILE_NAME, TEMP_DIRECTORY, next_file_num);
-    if ((tfile = fopen(fname, READ_BINARY)) == NULL) {
-      /* fopen could have failed for a reason other than the file not
-       * being there; for example, file there but unreadable.
-       * If <errno.h> isn't available, then we cannot test the cause.
-       */
-#ifdef ENOENT
-      if (errno != ENOENT)
-	continue;
-#endif
-      break;
-    }
-    fclose(tfile);		/* oops, it's there; close tfile & try again */
-  }
-}
-
-#else /* ! NO_MKTEMP */
-
-/* Note that mktemp() requires the initial filename to end in six X's */
-#ifndef TEMP_FILE_NAME		/* can override from jconfig.h or Makefile */
-#define TEMP_FILE_NAME  "%sJPG%dXXXXXX"
-#endif
-
-LOCAL(void)
-select_file_name (char * fname)
-{
-  next_file_num++;		/* advance counter */
-  sprintf(fname, TEMP_FILE_NAME, TEMP_DIRECTORY, next_file_num);
-  mktemp(fname);		/* make sure file name is unique */
-  /* mktemp replaces the trailing XXXXXX with a unique string of characters */
-}
-
-#endif /* NO_MKTEMP */
-
-
-/*
- * Memory allocation and freeing are controlled by the regular library
- * routines malloc() and free().
- */
-
-GLOBAL(void *)
-jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * "Large" objects are treated the same as "small" ones.
- * NB: although we include FAR keywords in the routine declarations,
- * this file won't actually work in 80x86 small/medium model; at least,
- * you probably won't be able to process useful-size images in only 64KB.
- */
-
-GLOBAL(void FAR *)
-jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
-{
-  return (void FAR *) malloc(sizeofobject);
-}
-
-GLOBAL(void)
-jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
-{
-  free(object);
-}
-
-
-/*
- * This routine computes the total memory space available for allocation.
- * It's impossible to do this in a portable way; our current solution is
- * to make the user tell us (with a default value set at compile time).
- * If you can actually get the available space, it's a good idea to subtract
- * a slop factor of 5% or so.
- */
-
-#ifndef DEFAULT_MAX_MEM		/* so can override from makefile */
-#define DEFAULT_MAX_MEM		1000000L /* default: one megabyte */
-#endif
-
-GLOBAL(long)
-jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
-		    long max_bytes_needed, long already_allocated)
-{
-  return cinfo->mem->max_memory_to_use - already_allocated;
-}
-
-
-/*
- * Backing store (temporary file) management.
- * Backing store objects are only used when the value returned by
- * jpeg_mem_available is less than the total space needed.  You can dispense
- * with these routines if you have plenty of virtual memory; see jmemnobs.c.
- */
-
-
-METHODDEF(void)
-read_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-		    void FAR * buffer_address,
-		    long file_offset, long byte_count)
-{
-  if (fseek(info->temp_file, file_offset, SEEK_SET))
-    ERREXIT(cinfo, JERR_TFILE_SEEK);
-  if (JFREAD(info->temp_file, buffer_address, byte_count)
-      != (size_t) byte_count)
-    ERREXIT(cinfo, JERR_TFILE_READ);
-}
-
-
-METHODDEF(void)
-write_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-		     void FAR * buffer_address,
-		     long file_offset, long byte_count)
-{
-  if (fseek(info->temp_file, file_offset, SEEK_SET))
-    ERREXIT(cinfo, JERR_TFILE_SEEK);
-  if (JFWRITE(info->temp_file, buffer_address, byte_count)
-      != (size_t) byte_count)
-    ERREXIT(cinfo, JERR_TFILE_WRITE);
-}
-
-
-METHODDEF(void)
-close_backing_store (j_common_ptr cinfo, backing_store_ptr info)
-{
-  fclose(info->temp_file);	/* close the file */
-  unlink(info->temp_name);	/* delete the file */
-/* If your system doesn't have unlink(), use remove() instead.
- * remove() is the ANSI-standard name for this function, but if
- * your system was ANSI you'd be using jmemansi.c, right?
- */
-  TRACEMSS(cinfo, 1, JTRC_TFILE_CLOSE, info->temp_name);
-}
-
-
-/*
- * Initial opening of a backing-store object.
- */
-
-GLOBAL(void)
-jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-			 long total_bytes_needed)
-{
-  select_file_name(info->temp_name);
-  if ((info->temp_file = fopen(info->temp_name, RW_BINARY)) == NULL)
-    ERREXITS(cinfo, JERR_TFILE_CREATE, info->temp_name);
-  info->read_backing_store = read_backing_store;
-  info->write_backing_store = write_backing_store;
-  info->close_backing_store = close_backing_store;
-  TRACEMSS(cinfo, 1, JTRC_TFILE_OPEN, info->temp_name);
-}
-
-
-/*
- * These routines take care of any system-dependent initialization and
- * cleanup required.
- */
-
-GLOBAL(long)
-jpeg_mem_init (j_common_ptr cinfo)
-{
-  next_file_num = 0;		/* initialize temp file name generator */
-  return DEFAULT_MAX_MEM;	/* default for max_memory_to_use */
-}
-
-GLOBAL(void)
-jpeg_mem_term (j_common_ptr cinfo)
-{
-  /* no work */
-}
diff --git a/jmemnobs.c b/jmemnobs.c
index eb8c337..6282832 100644
--- a/jmemnobs.c
+++ b/jmemnobs.c
@@ -1,8 +1,10 @@
 /*
  * jmemnobs.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1992-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code and
+ * information relevant to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file provides a really simple implementation of the system-
@@ -18,11 +20,11 @@
 #define JPEG_INTERNALS
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "jmemsys.h"		/* import the system-dependent declarations */
+#include "jmemsys.h"            /* import the system-dependent declarations */
 
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
-extern void * malloc JPP((size_t size));
-extern void free JPP((void *ptr));
+#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc(),free() */
+extern void * malloc (size_t size);
+extern void free (void *ptr);
 #endif
 
 
@@ -46,19 +48,16 @@
 
 /*
  * "Large" objects are treated the same as "small" ones.
- * NB: although we include FAR keywords in the routine declarations,
- * this file won't actually work in 80x86 small/medium model; at least,
- * you probably won't be able to process useful-size images in only 64KB.
  */
 
-GLOBAL(void FAR *)
+GLOBAL(void *)
 jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
 {
-  return (void FAR *) malloc(sizeofobject);
+  return (void *) malloc(sizeofobject);
 }
 
 GLOBAL(void)
-jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
+jpeg_free_large (j_common_ptr cinfo, void * object, size_t sizeofobject)
 {
   free(object);
 }
@@ -69,9 +68,9 @@
  * Here we always say, "we got all you want bud!"
  */
 
-GLOBAL(long)
-jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
-		    long max_bytes_needed, long already_allocated)
+GLOBAL(size_t)
+jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed,
+                    size_t max_bytes_needed, size_t already_allocated)
 {
   return max_bytes_needed;
 }
@@ -85,7 +84,7 @@
 
 GLOBAL(void)
 jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
-			 long total_bytes_needed)
+                         long total_bytes_needed)
 {
   ERREXIT(cinfo, JERR_NO_BACKING_STORE);
 }
@@ -99,7 +98,7 @@
 GLOBAL(long)
 jpeg_mem_init (j_common_ptr cinfo)
 {
-  return 0;			/* just set max_memory_to_use to 0 */
+  return 0;                     /* just set max_memory_to_use to 0 */
 }
 
 GLOBAL(void)
diff --git a/jmemsys.h b/jmemsys.h
index 6c3c6d3..5026c7c 100644
--- a/jmemsys.h
+++ b/jmemsys.h
@@ -1,8 +1,10 @@
 /*
  * jmemsys.h
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1992-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code and
+ * information relevant to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This include file defines the interface between the system-independent
@@ -14,25 +16,10 @@
  * in the IJG distribution.  You may need to modify it if you write a
  * custom memory manager.  If system-dependent changes are needed in
  * this file, the best method is to #ifdef them based on a configuration
- * symbol supplied in jconfig.h, as we have done with USE_MSDOS_MEMMGR
- * and USE_MAC_MEMMGR.
+ * symbol supplied in jconfig.h.
  */
 
 
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_get_small		jGetSmall
-#define jpeg_free_small		jFreeSmall
-#define jpeg_get_large		jGetLarge
-#define jpeg_free_large		jFreeLarge
-#define jpeg_mem_available	jMemAvail
-#define jpeg_open_backing_store	jOpenBackStore
-#define jpeg_mem_init		jMemInit
-#define jpeg_mem_term		jMemTerm
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
 /*
  * These two functions are used to allocate and release small chunks of
  * memory.  (Typically the total amount requested through jpeg_get_small is
@@ -41,40 +28,36 @@
  * and free; in particular, jpeg_get_small must return NULL on failure.
  * On most systems, these ARE malloc and free.  jpeg_free_small is passed the
  * size of the object being freed, just in case it's needed.
- * On an 80x86 machine using small-data memory model, these manage near heap.
  */
 
-EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject));
-EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object,
-				  size_t sizeofobject));
+EXTERN(void *) jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject);
+EXTERN(void) jpeg_free_small (j_common_ptr cinfo, void * object,
+                              size_t sizeofobject);
 
 /*
  * These two functions are used to allocate and release large chunks of
  * memory (up to the total free space designated by jpeg_mem_available).
- * The interface is the same as above, except that on an 80x86 machine,
- * far pointers are used.  On most other machines these are identical to
- * the jpeg_get/free_small routines; but we keep them separate anyway,
- * in case a different allocation strategy is desirable for large chunks.
+ * These are identical to the jpeg_get/free_small routines; but we keep them
+ * separate anyway, in case a different allocation strategy is desirable for
+ * large chunks.
  */
 
-EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo,
-				       size_t sizeofobject));
-EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object,
-				  size_t sizeofobject));
+EXTERN(void *) jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject);
+EXTERN(void) jpeg_free_large (j_common_ptr cinfo, void * object,
+                              size_t sizeofobject);
 
 /*
  * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may
  * be requested in a single call to jpeg_get_large (and jpeg_get_small for that
- * matter, but that case should never come into play).  This macro is needed
+ * matter, but that case should never come into play).  This macro was needed
  * to model the 64Kb-segment-size limit of far addressing on 80x86 machines.
- * On those machines, we expect that jconfig.h will provide a proper value.
- * On machines with 32-bit flat address spaces, any large constant may be used.
+ * On machines with flat address spaces, any large constant may be used.
  *
  * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type
  * size_t and will be a multiple of sizeof(align_type).
  */
 
-#ifndef MAX_ALLOC_CHUNK		/* may be overridden in jconfig.h */
+#ifndef MAX_ALLOC_CHUNK         /* may be overridden in jconfig.h */
 #define MAX_ALLOC_CHUNK  1000000000L
 #endif
 
@@ -100,10 +83,9 @@
  * Conversely, zero may be returned to always use the minimum amount of memory.
  */
 
-EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo,
-				     long min_bytes_needed,
-				     long max_bytes_needed,
-				     long already_allocated));
+EXTERN(size_t) jpeg_mem_available (j_common_ptr cinfo, size_t min_bytes_needed,
+                                   size_t max_bytes_needed,
+                                   size_t already_allocated);
 
 
 /*
@@ -113,23 +95,23 @@
  * are private to the system-dependent backing store routines.
  */
 
-#define TEMP_NAME_LENGTH   64	/* max length of a temporary file's name */
+#define TEMP_NAME_LENGTH   64   /* max length of a temporary file's name */
 
 
-#ifdef USE_MSDOS_MEMMGR		/* DOS-specific junk */
+#ifdef USE_MSDOS_MEMMGR         /* DOS-specific junk */
 
-typedef unsigned short XMSH;	/* type of extended-memory handles */
-typedef unsigned short EMSH;	/* type of expanded-memory handles */
+typedef unsigned short XMSH;    /* type of extended-memory handles */
+typedef unsigned short EMSH;    /* type of expanded-memory handles */
 
 typedef union {
-  short file_handle;		/* DOS file handle if it's a temp file */
-  XMSH xms_handle;		/* handle if it's a chunk of XMS */
-  EMSH ems_handle;		/* handle if it's a chunk of EMS */
+  short file_handle;            /* DOS file handle if it's a temp file */
+  XMSH xms_handle;              /* handle if it's a chunk of XMS */
+  EMSH ems_handle;              /* handle if it's a chunk of EMS */
 } handle_union;
 
 #endif /* USE_MSDOS_MEMMGR */
 
-#ifdef USE_MAC_MEMMGR		/* Mac-specific junk */
+#ifdef USE_MAC_MEMMGR           /* Mac-specific junk */
 #include <Files.h>
 #endif /* USE_MAC_MEMMGR */
 
@@ -138,31 +120,28 @@
 
 typedef struct backing_store_struct {
   /* Methods for reading/writing/closing this backing-store object */
-  JMETHOD(void, read_backing_store, (j_common_ptr cinfo,
-				     backing_store_ptr info,
-				     void FAR * buffer_address,
-				     long file_offset, long byte_count));
-  JMETHOD(void, write_backing_store, (j_common_ptr cinfo,
-				      backing_store_ptr info,
-				      void FAR * buffer_address,
-				      long file_offset, long byte_count));
-  JMETHOD(void, close_backing_store, (j_common_ptr cinfo,
-				      backing_store_ptr info));
+  void (*read_backing_store) (j_common_ptr cinfo, backing_store_ptr info,
+                              void * buffer_address, long file_offset,
+                              long byte_count);
+  void (*write_backing_store) (j_common_ptr cinfo, backing_store_ptr info,
+                               void * buffer_address, long file_offset,
+                               long byte_count);
+  void (*close_backing_store) (j_common_ptr cinfo, backing_store_ptr info);
 
   /* Private fields for system-dependent backing-store management */
 #ifdef USE_MSDOS_MEMMGR
   /* For the MS-DOS manager (jmemdos.c), we need: */
-  handle_union handle;		/* reference to backing-store storage object */
+  handle_union handle;          /* reference to backing-store storage object */
   char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
 #else
 #ifdef USE_MAC_MEMMGR
   /* For the Mac manager (jmemmac.c), we need: */
-  short temp_file;		/* file reference number to temp file */
-  FSSpec tempSpec;		/* the FSSpec for the temp file */
+  short temp_file;              /* file reference number to temp file */
+  FSSpec tempSpec;              /* the FSSpec for the temp file */
   char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
 #else
   /* For a typical implementation with temp files, we need: */
-  FILE * temp_file;		/* stdio reference to temp file */
+  FILE * temp_file;             /* stdio reference to temp file */
   char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
 #endif
 #endif
@@ -177,9 +156,9 @@
  * just take an error exit.)
  */
 
-EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo,
-					  backing_store_ptr info,
-					  long total_bytes_needed));
+EXTERN(void) jpeg_open_backing_store (j_common_ptr cinfo,
+                                      backing_store_ptr info,
+                                      long total_bytes_needed);
 
 
 /*
@@ -194,5 +173,5 @@
  * all opened backing-store objects have been closed.
  */
 
-EXTERN(long) jpeg_mem_init JPP((j_common_ptr cinfo));
-EXTERN(void) jpeg_mem_term JPP((j_common_ptr cinfo));
+EXTERN(long) jpeg_mem_init (j_common_ptr cinfo);
+EXTERN(void) jpeg_mem_term (j_common_ptr cinfo);
diff --git a/jmorecfg.h b/jmorecfg.h
index 54a7d1c..36fe971 100644
--- a/jmorecfg.h
+++ b/jmorecfg.h
@@ -1,8 +1,10 @@
 /*
  * jmorecfg.h
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009, 2011, 2014, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains additional configuration options that customize the
@@ -12,18 +14,6 @@
 
 
 /*
- * Define BITS_IN_JSAMPLE as either
- *   8   for 8-bit sample values (the usual setting)
- *   12  for 12-bit sample values
- * Only 8 and 12 are legal data precisions for lossy JPEG according to the
- * JPEG standard, and the IJG code does not support anything else!
- * We do not support run-time selection of data precision, sorry.
- */
-
-#define BITS_IN_JSAMPLE  8	/* use 8 or 12 */
-
-
-/*
  * Maximum number of components (color channels) allowed in JPEG image.
  * To meet the letter of the JPEG spec, set this to 255.  However, darn
  * few applications need more than 4 channels (maybe 5 for CMYK + alpha
@@ -32,7 +22,7 @@
  * bytes of storage, whether actually used in an image or not.)
  */
 
-#define MAX_COMPONENTS  10	/* maximum number of image components */
+#define MAX_COMPONENTS  10      /* maximum number of image components */
 
 
 /*
@@ -62,16 +52,16 @@
 #else /* not HAVE_UNSIGNED_CHAR */
 
 typedef char JSAMPLE;
-#ifdef CHAR_IS_UNSIGNED
+#ifdef __CHAR_UNSIGNED__
 #define GETJSAMPLE(value)  ((int) (value))
 #else
 #define GETJSAMPLE(value)  ((int) (value) & 0xFF)
-#endif /* CHAR_IS_UNSIGNED */
+#endif /* __CHAR_UNSIGNED__ */
 
 #endif /* HAVE_UNSIGNED_CHAR */
 
-#define MAXJSAMPLE	255
-#define CENTERJSAMPLE	128
+#define MAXJSAMPLE      255
+#define CENTERJSAMPLE   128
 
 #endif /* BITS_IN_JSAMPLE == 8 */
 
@@ -84,8 +74,8 @@
 typedef short JSAMPLE;
 #define GETJSAMPLE(value)  ((int) (value))
 
-#define MAXJSAMPLE	4095
-#define CENTERJSAMPLE	2048
+#define MAXJSAMPLE      4095
+#define CENTERJSAMPLE   2048
 
 #endif /* BITS_IN_JSAMPLE == 12 */
 
@@ -113,11 +103,11 @@
 #else /* not HAVE_UNSIGNED_CHAR */
 
 typedef char JOCTET;
-#ifdef CHAR_IS_UNSIGNED
+#ifdef __CHAR_UNSIGNED__
 #define GETJOCTET(value)  (value)
 #else
 #define GETJOCTET(value)  ((value) & 0xFF)
-#endif /* CHAR_IS_UNSIGNED */
+#endif /* __CHAR_UNSIGNED__ */
 
 #endif /* HAVE_UNSIGNED_CHAR */
 
@@ -134,11 +124,11 @@
 #ifdef HAVE_UNSIGNED_CHAR
 typedef unsigned char UINT8;
 #else /* not HAVE_UNSIGNED_CHAR */
-#ifdef CHAR_IS_UNSIGNED
+#ifdef __CHAR_UNSIGNED__
 typedef char UINT8;
-#else /* not CHAR_IS_UNSIGNED */
+#else /* not __CHAR_UNSIGNED__ */
 typedef short UINT8;
-#endif /* CHAR_IS_UNSIGNED */
+#endif /* __CHAR_UNSIGNED__ */
 #endif /* HAVE_UNSIGNED_CHAR */
 
 /* UINT16 must hold at least the values 0..65535. */
@@ -151,13 +141,13 @@
 
 /* INT16 must hold at least the values -32768..32767. */
 
-#ifndef XMD_H			/* X11/xmd.h correctly defines INT16 */
+#ifndef XMD_H                   /* X11/xmd.h correctly defines INT16 */
 typedef short INT16;
 #endif
 
 /* INT32 must hold at least signed 32-bit values. */
 
-#ifndef XMD_H			/* X11/xmd.h correctly defines INT32 */
+#ifndef XMD_H                   /* X11/xmd.h correctly defines INT32 */
 typedef long INT32;
 #endif
 
@@ -181,39 +171,13 @@
  */
 
 /* a function called through method pointers: */
-#define METHODDEF(type)		static type
+#define METHODDEF(type)         static type
 /* a function used only in its module: */
-#define LOCAL(type)		static type
+#define LOCAL(type)             static type
 /* a function referenced thru EXTERNs: */
-#define GLOBAL(type)		type
+#define GLOBAL(type)            type
 /* a reference to a GLOBAL function: */
-#define EXTERN(type)		extern type
-
-
-/* This macro is used to declare a "method", that is, a function pointer.
- * We want to supply prototype parameters if the compiler can cope.
- * Note that the arglist parameter must be parenthesized!
- * Again, you can customize this if you need special linkage keywords.
- */
-
-#ifdef HAVE_PROTOTYPES
-#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
-#else
-#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
-#endif
-
-
-/* Here is the pseudo-keyword for declaring pointers that must be "far"
- * on 80x86 machines.  Most of the specialized coding for 80x86 is handled
- * by just saying "FAR *" where such a pointer is needed.  In a few places
- * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
- */
-
-#ifdef NEED_FAR_POINTERS
-#define FAR  far
-#else
-#define FAR
-#endif
+#define EXTERN(type)            extern type
 
 
 /*
@@ -226,11 +190,11 @@
 #ifndef HAVE_BOOLEAN
 typedef int boolean;
 #endif
-#ifndef FALSE			/* in case these macros already exist */
-#define FALSE	0		/* values of boolean */
+#ifndef FALSE                   /* in case these macros already exist */
+#define FALSE   0               /* values of boolean */
 #endif
 #ifndef TRUE
-#define TRUE	1
+#define TRUE    1
 #endif
 
 
@@ -256,20 +220,17 @@
  * (You may HAVE to do that if your compiler doesn't like null source files.)
  */
 
-/* Arithmetic coding is unsupported for legal reasons.  Complaints to IBM. */
-
 /* Capability options common to encoder and decoder: */
 
-#define DCT_ISLOW_SUPPORTED	/* slow but accurate integer algorithm */
-#define DCT_IFAST_SUPPORTED	/* faster, less accurate integer method */
-#define DCT_FLOAT_SUPPORTED	/* floating-point: accurate, fast on fast HW */
+#define DCT_ISLOW_SUPPORTED     /* slow but accurate integer algorithm */
+#define DCT_IFAST_SUPPORTED     /* faster, less accurate integer method */
+#define DCT_FLOAT_SUPPORTED     /* floating-point: accurate, fast on fast HW */
 
 /* Encoder capability options: */
 
-#undef  C_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
 #define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
-#define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
-#define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
+#define C_PROGRESSIVE_SUPPORTED     /* Progressive JPEG? (Requires MULTISCAN)*/
+#define ENTROPY_OPT_SUPPORTED       /* Optimization of entropy coding parms? */
 /* Note: if you selected 12-bit data precision, it is dangerous to turn off
  * ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only good for 8-bit
  * precision, so jchuff.c normally uses entropy optimization to compute
@@ -282,82 +243,128 @@
 
 /* Decoder capability options: */
 
-#undef  D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
 #define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
-#define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
-#define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
+#define D_PROGRESSIVE_SUPPORTED     /* Progressive JPEG? (Requires MULTISCAN)*/
+#define SAVE_MARKERS_SUPPORTED      /* jpeg_save_markers() needed? */
 #define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
-#define IDCT_SCALING_SUPPORTED	    /* Output rescaling via IDCT? */
+#define IDCT_SCALING_SUPPORTED      /* Output rescaling via IDCT? */
 #undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
 #define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
-#define QUANT_1PASS_SUPPORTED	    /* 1-pass color quantization? */
-#define QUANT_2PASS_SUPPORTED	    /* 2-pass color quantization? */
+#define QUANT_1PASS_SUPPORTED       /* 1-pass color quantization? */
+#define QUANT_2PASS_SUPPORTED       /* 2-pass color quantization? */
 
 /* more capability options later, no doubt */
 
 
 /*
- * Ordering of RGB data in scanlines passed to or from the application.
- * If your application wants to deal with data in the order B,G,R, just
- * change these macros.  You can also deal with formats such as R,G,B,X
- * (one extra byte per pixel) by changing RGB_PIXELSIZE.  Note that changing
- * the offsets will also change the order in which colormap data is organized.
- * RESTRICTIONS:
- * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats.
- * 2. These macros only affect RGB<=>YCbCr color conversion, so they are not
- *    useful if you are using JPEG color spaces other than YCbCr or grayscale.
- * 3. The color quantizer modules will not behave desirably if RGB_PIXELSIZE
- *    is not 3 (they don't understand about dummy color components!).  So you
- *    can't use color quantization if you change that value.
+ * The RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE macros are a vestigial
+ * feature of libjpeg.  The idea was that, if an application developer needed
+ * to compress from/decompress to a BGR/BGRX/RGBX/XBGR/XRGB buffer, they could
+ * change these macros, rebuild libjpeg, and link their application statically
+ * with it.  In reality, few people ever did this, because there were some
+ * severe restrictions involved (cjpeg and djpeg no longer worked properly,
+ * compressing/decompressing RGB JPEGs no longer worked properly, and the color
+ * quantizer wouldn't work with pixel sizes other than 3.)  Further, since all
+ * of the O/S-supplied versions of libjpeg were built with the default values
+ * of RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE, many applications have
+ * come to regard these values as immutable.
+ *
+ * The libjpeg-turbo colorspace extensions provide a much cleaner way of
+ * compressing from/decompressing to buffers with arbitrary component orders
+ * and pixel sizes.  Thus, we do not support changing the values of RGB_RED,
+ * RGB_GREEN, RGB_BLUE, or RGB_PIXELSIZE.  In addition to the restrictions
+ * listed above, changing these values will also break the SIMD extensions and
+ * the regression tests.
  */
 
-#define RGB_RED		0	/* Offset of Red in an RGB scanline element */
-#define RGB_GREEN	1	/* Offset of Green */
-#define RGB_BLUE	2	/* Offset of Blue */
-#define RGB_PIXELSIZE	3	/* JSAMPLEs per RGB scanline element */
+#define RGB_RED         0       /* Offset of Red in an RGB scanline element */
+#define RGB_GREEN       1       /* Offset of Green */
+#define RGB_BLUE        2       /* Offset of Blue */
+#define RGB_PIXELSIZE   3       /* JSAMPLEs per RGB scanline element */
 
+#define JPEG_NUMCS 17
+
+#define EXT_RGB_RED        0
+#define EXT_RGB_GREEN      1
+#define EXT_RGB_BLUE       2
+#define EXT_RGB_PIXELSIZE  3
+
+#define EXT_RGBX_RED       0
+#define EXT_RGBX_GREEN     1
+#define EXT_RGBX_BLUE      2
+#define EXT_RGBX_PIXELSIZE 4
+
+#define EXT_BGR_RED        2
+#define EXT_BGR_GREEN      1
+#define EXT_BGR_BLUE       0
+#define EXT_BGR_PIXELSIZE  3
+
+#define EXT_BGRX_RED       2
+#define EXT_BGRX_GREEN     1
+#define EXT_BGRX_BLUE      0
+#define EXT_BGRX_PIXELSIZE 4
+
+#define EXT_XBGR_RED       3
+#define EXT_XBGR_GREEN     2
+#define EXT_XBGR_BLUE      1
+#define EXT_XBGR_PIXELSIZE 4
+
+#define EXT_XRGB_RED       1
+#define EXT_XRGB_GREEN     2
+#define EXT_XRGB_BLUE      3
+#define EXT_XRGB_PIXELSIZE 4
+
+static const int rgb_red[JPEG_NUMCS] = {
+  -1, -1, RGB_RED, -1, -1, -1, EXT_RGB_RED, EXT_RGBX_RED,
+  EXT_BGR_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED,
+  EXT_RGBX_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED,
+  -1
+};
+
+static const int rgb_green[JPEG_NUMCS] = {
+  -1, -1, RGB_GREEN, -1, -1, -1, EXT_RGB_GREEN, EXT_RGBX_GREEN,
+  EXT_BGR_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN,
+  EXT_RGBX_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN,
+  -1
+};
+
+static const int rgb_blue[JPEG_NUMCS] = {
+  -1, -1, RGB_BLUE, -1, -1, -1, EXT_RGB_BLUE, EXT_RGBX_BLUE,
+  EXT_BGR_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE,
+  EXT_RGBX_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE,
+  -1
+};
+
+static const int rgb_pixelsize[JPEG_NUMCS] = {
+  -1, -1, RGB_PIXELSIZE, -1, -1, -1, EXT_RGB_PIXELSIZE, EXT_RGBX_PIXELSIZE,
+  EXT_BGR_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE,
+  EXT_RGBX_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE,
+  -1
+};
 
 /* Definitions for speed-related optimizations. */
 
-
-/* If your compiler supports inline functions, define INLINE
- * as the inline keyword; otherwise define it as empty.
- */
-
-#ifndef INLINE
-#ifdef __GNUC__			/* for instance, GNU C knows about inline */
-#define INLINE __inline__
-#endif
-#ifndef INLINE
-#define INLINE			/* default is to define it as empty */
-#endif
-#endif
-
-
 /* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
  * two 16-bit shorts is faster than multiplying two ints.  Define MULTIPLIER
  * as short on such a machine.  MULTIPLIER must be at least 16 bits wide.
  */
 
 #ifndef MULTIPLIER
-#define MULTIPLIER  int		/* type for fastest integer multiply */
+#ifndef WITH_SIMD
+#define MULTIPLIER  int         /* type for fastest integer multiply */
+#else
+#define MULTIPLIER short  /* prefer 16-bit with SIMD for parellelism */
+#endif
 #endif
 
 
 /* FAST_FLOAT should be either float or double, whichever is done faster
  * by your compiler.  (Note that this type is only used in the floating point
  * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.)
- * Typically, float is faster in ANSI C compilers, while double is faster in
- * pre-ANSI compilers (because they insist on converting to double anyway).
- * The code below therefore chooses float if we have ANSI-style prototypes.
  */
 
 #ifndef FAST_FLOAT
-#ifdef HAVE_PROTOTYPES
 #define FAST_FLOAT  float
-#else
-#define FAST_FLOAT  double
-#endif
 #endif
 
 #endif /* JPEG_INTERNAL_OPTIONS */
diff --git a/jpeg_nbits_table.h b/jpeg_nbits_table.h
new file mode 100644
index 0000000..fcf7387
--- /dev/null
+++ b/jpeg_nbits_table.h
@@ -0,0 +1,4098 @@
+static const unsigned char jpeg_nbits_table[65536] = {
+   0,  1,  2,  2,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
+   5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+   6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+   6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+};
diff --git a/jpegcomp.h b/jpegcomp.h
new file mode 100644
index 0000000..ed9eeab
--- /dev/null
+++ b/jpegcomp.h
@@ -0,0 +1,30 @@
+/*
+ * jpegcomp.h
+ *
+ * Copyright (C) 2010, D. R. Commander
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * JPEG compatibility macros
+ * These declarations are considered internal to the JPEG library; most
+ * applications using the library shouldn't need to include this file.
+ */
+
+#if JPEG_LIB_VERSION >= 70
+#define _DCT_scaled_size DCT_h_scaled_size
+#define _DCT_h_scaled_size DCT_h_scaled_size
+#define _DCT_v_scaled_size DCT_v_scaled_size
+#define _min_DCT_scaled_size min_DCT_h_scaled_size
+#define _min_DCT_h_scaled_size min_DCT_h_scaled_size
+#define _min_DCT_v_scaled_size min_DCT_v_scaled_size
+#define _jpeg_width jpeg_width
+#define _jpeg_height jpeg_height
+#else
+#define _DCT_scaled_size DCT_scaled_size
+#define _DCT_h_scaled_size DCT_scaled_size
+#define _DCT_v_scaled_size DCT_scaled_size
+#define _min_DCT_scaled_size min_DCT_scaled_size
+#define _min_DCT_h_scaled_size min_DCT_scaled_size
+#define _min_DCT_v_scaled_size min_DCT_scaled_size
+#define _jpeg_width image_width
+#define _jpeg_height image_height
+#endif
diff --git a/jpegint.h b/jpegint.h
index 95b00d4..025accd 100644
--- a/jpegint.h
+++ b/jpegint.h
@@ -1,8 +1,11 @@
 /*
  * jpegint.h
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 1997-2009 by Guido Vollbeding.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file provides common declarations for the various JPEG modules.
@@ -13,121 +16,115 @@
 
 /* Declarations for both compression & decompression */
 
-typedef enum {			/* Operating modes for buffer controllers */
-	JBUF_PASS_THRU,		/* Plain stripwise operation */
-	/* Remaining modes require a full-image buffer to have been created */
-	JBUF_SAVE_SOURCE,	/* Run source subobject only, save output */
-	JBUF_CRANK_DEST,	/* Run dest subobject only, using saved data */
-	JBUF_SAVE_AND_PASS	/* Run both subobjects, save output */
+typedef enum {            /* Operating modes for buffer controllers */
+  JBUF_PASS_THRU,         /* Plain stripwise operation */
+  /* Remaining modes require a full-image buffer to have been created */
+  JBUF_SAVE_SOURCE,       /* Run source subobject only, save output */
+  JBUF_CRANK_DEST,        /* Run dest subobject only, using saved data */
+  JBUF_SAVE_AND_PASS      /* Run both subobjects, save output */
 } J_BUF_MODE;
 
 /* Values of global_state field (jdapi.c has some dependencies on ordering!) */
-#define CSTATE_START	100	/* after create_compress */
-#define CSTATE_SCANNING	101	/* start_compress done, write_scanlines OK */
-#define CSTATE_RAW_OK	102	/* start_compress done, write_raw_data OK */
-#define CSTATE_WRCOEFS	103	/* jpeg_write_coefficients done */
-#define DSTATE_START	200	/* after create_decompress */
-#define DSTATE_INHEADER	201	/* reading header markers, no SOS yet */
-#define DSTATE_READY	202	/* found SOS, ready for start_decompress */
-#define DSTATE_PRELOAD	203	/* reading multiscan file in start_decompress*/
-#define DSTATE_PRESCAN	204	/* performing dummy pass for 2-pass quant */
-#define DSTATE_SCANNING	205	/* start_decompress done, read_scanlines OK */
-#define DSTATE_RAW_OK	206	/* start_decompress done, read_raw_data OK */
-#define DSTATE_BUFIMAGE	207	/* expecting jpeg_start_output */
-#define DSTATE_BUFPOST	208	/* looking for SOS/EOI in jpeg_finish_output */
-#define DSTATE_RDCOEFS	209	/* reading file in jpeg_read_coefficients */
-#define DSTATE_STOPPING	210	/* looking for EOI in jpeg_finish_decompress */
+#define CSTATE_START    100     /* after create_compress */
+#define CSTATE_SCANNING 101     /* start_compress done, write_scanlines OK */
+#define CSTATE_RAW_OK   102     /* start_compress done, write_raw_data OK */
+#define CSTATE_WRCOEFS  103     /* jpeg_write_coefficients done */
+#define DSTATE_START    200     /* after create_decompress */
+#define DSTATE_INHEADER 201     /* reading header markers, no SOS yet */
+#define DSTATE_READY    202     /* found SOS, ready for start_decompress */
+#define DSTATE_PRELOAD  203     /* reading multiscan file in start_decompress*/
+#define DSTATE_PRESCAN  204     /* performing dummy pass for 2-pass quant */
+#define DSTATE_SCANNING 205     /* start_decompress done, read_scanlines OK */
+#define DSTATE_RAW_OK   206     /* start_decompress done, read_raw_data OK */
+#define DSTATE_BUFIMAGE 207     /* expecting jpeg_start_output */
+#define DSTATE_BUFPOST  208     /* looking for SOS/EOI in jpeg_finish_output */
+#define DSTATE_RDCOEFS  209     /* reading file in jpeg_read_coefficients */
+#define DSTATE_STOPPING 210     /* looking for EOI in jpeg_finish_decompress */
 
 
 /* Declarations for compression modules */
 
 /* Master control module */
 struct jpeg_comp_master {
-  JMETHOD(void, prepare_for_pass, (j_compress_ptr cinfo));
-  JMETHOD(void, pass_startup, (j_compress_ptr cinfo));
-  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+  void (*prepare_for_pass) (j_compress_ptr cinfo);
+  void (*pass_startup) (j_compress_ptr cinfo);
+  void (*finish_pass) (j_compress_ptr cinfo);
 
   /* State variables made visible to other modules */
-  boolean call_pass_startup;	/* True if pass_startup must be called */
-  boolean is_last_pass;		/* True during last pass */
+  boolean call_pass_startup;    /* True if pass_startup must be called */
+  boolean is_last_pass;         /* True during last pass */
 };
 
 /* Main buffer control (downsampled-data buffer) */
 struct jpeg_c_main_controller {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, process_data, (j_compress_ptr cinfo,
-			       JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
-			       JDIMENSION in_rows_avail));
+  void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode);
+  void (*process_data) (j_compress_ptr cinfo, JSAMPARRAY input_buf,
+                        JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail);
 };
 
 /* Compression preprocessing (downsampling input buffer control) */
 struct jpeg_c_prep_controller {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, pre_process_data, (j_compress_ptr cinfo,
-				   JSAMPARRAY input_buf,
-				   JDIMENSION *in_row_ctr,
-				   JDIMENSION in_rows_avail,
-				   JSAMPIMAGE output_buf,
-				   JDIMENSION *out_row_group_ctr,
-				   JDIMENSION out_row_groups_avail));
+  void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode);
+  void (*pre_process_data) (j_compress_ptr cinfo, JSAMPARRAY input_buf,
+                            JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail,
+                            JSAMPIMAGE output_buf,
+                            JDIMENSION *out_row_group_ctr,
+                            JDIMENSION out_row_groups_avail);
 };
 
 /* Coefficient buffer control */
 struct jpeg_c_coef_controller {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(boolean, compress_data, (j_compress_ptr cinfo,
-				   JSAMPIMAGE input_buf));
+  void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode);
+  boolean (*compress_data) (j_compress_ptr cinfo, JSAMPIMAGE input_buf);
 };
 
 /* Colorspace conversion */
 struct jpeg_color_converter {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
-  JMETHOD(void, color_convert, (j_compress_ptr cinfo,
-				JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
-				JDIMENSION output_row, int num_rows));
+  void (*start_pass) (j_compress_ptr cinfo);
+  void (*color_convert) (j_compress_ptr cinfo, JSAMPARRAY input_buf,
+                         JSAMPIMAGE output_buf, JDIMENSION output_row,
+                         int num_rows);
 };
 
 /* Downsampling */
 struct jpeg_downsampler {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
-  JMETHOD(void, downsample, (j_compress_ptr cinfo,
-			     JSAMPIMAGE input_buf, JDIMENSION in_row_index,
-			     JSAMPIMAGE output_buf,
-			     JDIMENSION out_row_group_index));
+  void (*start_pass) (j_compress_ptr cinfo);
+  void (*downsample) (j_compress_ptr cinfo, JSAMPIMAGE input_buf,
+                      JDIMENSION in_row_index, JSAMPIMAGE output_buf,
+                      JDIMENSION out_row_group_index);
 
-  boolean need_context_rows;	/* TRUE if need rows above & below */
+  boolean need_context_rows;    /* TRUE if need rows above & below */
 };
 
 /* Forward DCT (also controls coefficient quantization) */
 struct jpeg_forward_dct {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  void (*start_pass) (j_compress_ptr cinfo);
   /* perhaps this should be an array??? */
-  JMETHOD(void, forward_DCT, (j_compress_ptr cinfo,
-			      jpeg_component_info * compptr,
-			      JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
-			      JDIMENSION start_row, JDIMENSION start_col,
-			      JDIMENSION num_blocks));
+  void (*forward_DCT) (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+                       JDIMENSION start_row, JDIMENSION start_col,
+                       JDIMENSION num_blocks);
 };
 
 /* Entropy encoding */
 struct jpeg_entropy_encoder {
-  JMETHOD(void, start_pass, (j_compress_ptr cinfo, boolean gather_statistics));
-  JMETHOD(boolean, encode_mcu, (j_compress_ptr cinfo, JBLOCKROW *MCU_data));
-  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+  void (*start_pass) (j_compress_ptr cinfo, boolean gather_statistics);
+  boolean (*encode_mcu) (j_compress_ptr cinfo, JBLOCKROW *MCU_data);
+  void (*finish_pass) (j_compress_ptr cinfo);
 };
 
 /* Marker writing */
 struct jpeg_marker_writer {
-  JMETHOD(void, write_file_header, (j_compress_ptr cinfo));
-  JMETHOD(void, write_frame_header, (j_compress_ptr cinfo));
-  JMETHOD(void, write_scan_header, (j_compress_ptr cinfo));
-  JMETHOD(void, write_file_trailer, (j_compress_ptr cinfo));
-  JMETHOD(void, write_tables_only, (j_compress_ptr cinfo));
+  void (*write_file_header) (j_compress_ptr cinfo);
+  void (*write_frame_header) (j_compress_ptr cinfo);
+  void (*write_scan_header) (j_compress_ptr cinfo);
+  void (*write_file_trailer) (j_compress_ptr cinfo);
+  void (*write_tables_only) (j_compress_ptr cinfo);
   /* These routines are exported to allow insertion of extra markers */
   /* Probably only COM and APPn markers should be written this way */
-  JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker,
-				      unsigned int datalen));
-  JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val));
+  void (*write_marker_header) (j_compress_ptr cinfo, int marker,
+                               unsigned int datalen);
+  void (*write_marker_byte) (j_compress_ptr cinfo, int val);
 };
 
 
@@ -135,138 +132,130 @@
 
 /* Master control module */
 struct jpeg_decomp_master {
-  JMETHOD(void, prepare_for_output_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo));
+  void (*prepare_for_output_pass) (j_decompress_ptr cinfo);
+  void (*finish_output_pass) (j_decompress_ptr cinfo);
 
   /* State variables made visible to other modules */
-  boolean is_dummy_pass;	/* True during 1st pass for 2-pass quant */
+  boolean is_dummy_pass;        /* True during 1st pass for 2-pass quant */
 };
 
 /* Input control module */
 struct jpeg_input_controller {
-  JMETHOD(int, consume_input, (j_decompress_ptr cinfo));
-  JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo));
-  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo));
+  int (*consume_input) (j_decompress_ptr cinfo);
+  void (*reset_input_controller) (j_decompress_ptr cinfo);
+  void (*start_input_pass) (j_decompress_ptr cinfo);
+  void (*finish_input_pass) (j_decompress_ptr cinfo);
 
   /* State variables made visible to other modules */
-  boolean has_multiple_scans;	/* True if file has multiple scans */
-  boolean eoi_reached;		/* True when EOI has been consumed */
+  boolean has_multiple_scans;   /* True if file has multiple scans */
+  boolean eoi_reached;          /* True when EOI has been consumed */
 };
 
 /* Main buffer control (downsampled-data buffer) */
 struct jpeg_d_main_controller {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, process_data, (j_decompress_ptr cinfo,
-			       JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
-			       JDIMENSION out_rows_avail));
+  void (*start_pass) (j_decompress_ptr cinfo, J_BUF_MODE pass_mode);
+  void (*process_data) (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+                        JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
 };
 
 /* Coefficient buffer control */
 struct jpeg_d_coef_controller {
-  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
-  JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
-  JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
-  JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
-				 JSAMPIMAGE output_buf));
+  void (*start_input_pass) (j_decompress_ptr cinfo);
+  int (*consume_data) (j_decompress_ptr cinfo);
+  void (*start_output_pass) (j_decompress_ptr cinfo);
+  int (*decompress_data) (j_decompress_ptr cinfo, JSAMPIMAGE output_buf);
   /* Pointer to array of coefficient virtual arrays, or NULL if none */
   jvirt_barray_ptr *coef_arrays;
 };
 
 /* Decompression postprocessing (color quantization buffer control) */
 struct jpeg_d_post_controller {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
-  JMETHOD(void, post_process_data, (j_decompress_ptr cinfo,
-				    JSAMPIMAGE input_buf,
-				    JDIMENSION *in_row_group_ctr,
-				    JDIMENSION in_row_groups_avail,
-				    JSAMPARRAY output_buf,
-				    JDIMENSION *out_row_ctr,
-				    JDIMENSION out_rows_avail));
+  void (*start_pass) (j_decompress_ptr cinfo, J_BUF_MODE pass_mode);
+  void (*post_process_data) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+                             JDIMENSION *in_row_group_ctr,
+                             JDIMENSION in_row_groups_avail,
+                             JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                             JDIMENSION out_rows_avail);
 };
 
 /* Marker reading & parsing */
 struct jpeg_marker_reader {
-  JMETHOD(void, reset_marker_reader, (j_decompress_ptr cinfo));
+  void (*reset_marker_reader) (j_decompress_ptr cinfo);
   /* Read markers until SOS or EOI.
    * Returns same codes as are defined for jpeg_consume_input:
    * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
    */
-  JMETHOD(int, read_markers, (j_decompress_ptr cinfo));
+  int (*read_markers) (j_decompress_ptr cinfo);
   /* Read a restart marker --- exported for use by entropy decoder only */
   jpeg_marker_parser_method read_restart_marker;
 
   /* State of marker reader --- nominally internal, but applications
    * supplying COM or APPn handlers might like to know the state.
    */
-  boolean saw_SOI;		/* found SOI? */
-  boolean saw_SOF;		/* found SOF? */
-  int next_restart_num;		/* next restart number expected (0-7) */
-  unsigned int discarded_bytes;	/* # of bytes skipped looking for a marker */
+  boolean saw_SOI;              /* found SOI? */
+  boolean saw_SOF;              /* found SOF? */
+  int next_restart_num;         /* next restart number expected (0-7) */
+  unsigned int discarded_bytes; /* # of bytes skipped looking for a marker */
 };
 
 /* Entropy decoding */
 struct jpeg_entropy_decoder {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo,
-				JBLOCKROW *MCU_data));
+  void (*start_pass) (j_decompress_ptr cinfo);
+  boolean (*decode_mcu) (j_decompress_ptr cinfo, JBLOCKROW *MCU_data);
 
   /* This is here to share code between baseline and progressive decoders; */
   /* other modules probably should not use it */
-  boolean insufficient_data;	/* set TRUE after emitting warning */
+  boolean insufficient_data;    /* set TRUE after emitting warning */
 };
 
 /* Inverse DCT (also performs dequantization) */
-typedef JMETHOD(void, inverse_DCT_method_ptr,
-		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
-		 JCOEFPTR coef_block,
-		 JSAMPARRAY output_buf, JDIMENSION output_col));
+typedef void (*inverse_DCT_method_ptr) (j_decompress_ptr cinfo,
+                                        jpeg_component_info * compptr,
+                                        JCOEFPTR coef_block,
+                                        JSAMPARRAY output_buf,
+                                        JDIMENSION output_col);
 
 struct jpeg_inverse_dct {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  void (*start_pass) (j_decompress_ptr cinfo);
   /* It is useful to allow each component to have a separate IDCT method. */
   inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
 };
 
 /* Upsampling (note that upsampler must also call color converter) */
 struct jpeg_upsampler {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, upsample, (j_decompress_ptr cinfo,
-			   JSAMPIMAGE input_buf,
-			   JDIMENSION *in_row_group_ctr,
-			   JDIMENSION in_row_groups_avail,
-			   JSAMPARRAY output_buf,
-			   JDIMENSION *out_row_ctr,
-			   JDIMENSION out_rows_avail));
+  void (*start_pass) (j_decompress_ptr cinfo);
+  void (*upsample) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+                    JDIMENSION *in_row_group_ctr,
+                    JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
+                    JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
 
-  boolean need_context_rows;	/* TRUE if need rows above & below */
+  boolean need_context_rows;    /* TRUE if need rows above & below */
 };
 
 /* Colorspace conversion */
 struct jpeg_color_deconverter {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, color_convert, (j_decompress_ptr cinfo,
-				JSAMPIMAGE input_buf, JDIMENSION input_row,
-				JSAMPARRAY output_buf, int num_rows));
+  void (*start_pass) (j_decompress_ptr cinfo);
+  void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+                         JDIMENSION input_row, JSAMPARRAY output_buf,
+                         int num_rows);
 };
 
 /* Color quantization or color precision reduction */
 struct jpeg_color_quantizer {
-  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan));
-  JMETHOD(void, color_quantize, (j_decompress_ptr cinfo,
-				 JSAMPARRAY input_buf, JSAMPARRAY output_buf,
-				 int num_rows));
-  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
-  JMETHOD(void, new_color_map, (j_decompress_ptr cinfo));
+  void (*start_pass) (j_decompress_ptr cinfo, boolean is_pre_scan);
+  void (*color_quantize) (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+                          JSAMPARRAY output_buf, int num_rows);
+  void (*finish_pass) (j_decompress_ptr cinfo);
+  void (*new_color_map) (j_decompress_ptr cinfo);
 };
 
 
 /* Miscellaneous useful macros */
 
 #undef MAX
-#define MAX(a,b)	((a) > (b) ? (a) : (b))
+#define MAX(a,b)        ((a) > (b) ? (a) : (b))
 #undef MIN
-#define MIN(a,b)	((a) < (b) ? (a) : (b))
+#define MIN(a,b)        ((a) < (b) ? (a) : (b))
 
 
 /* We assume that right shift corresponds to signed division by 2 with
@@ -280,112 +269,78 @@
  */
 
 #ifdef RIGHT_SHIFT_IS_UNSIGNED
-#define SHIFT_TEMPS	INT32 shift_temp;
+#define SHIFT_TEMPS     INT32 shift_temp;
 #define RIGHT_SHIFT(x,shft)  \
-	((shift_temp = (x)) < 0 ? \
-	 (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
-	 (shift_temp >> (shft)))
+        ((shift_temp = (x)) < 0 ? \
+         (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
+         (shift_temp >> (shft)))
 #else
 #define SHIFT_TEMPS
-#define RIGHT_SHIFT(x,shft)	((x) >> (shft))
+#define RIGHT_SHIFT(x,shft)     ((x) >> (shft))
 #endif
 
 
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jinit_compress_master	jICompress
-#define jinit_c_master_control	jICMaster
-#define jinit_c_main_controller	jICMainC
-#define jinit_c_prep_controller	jICPrepC
-#define jinit_c_coef_controller	jICCoefC
-#define jinit_color_converter	jICColor
-#define jinit_downsampler	jIDownsampler
-#define jinit_forward_dct	jIFDCT
-#define jinit_huff_encoder	jIHEncoder
-#define jinit_phuff_encoder	jIPHEncoder
-#define jinit_marker_writer	jIMWriter
-#define jinit_master_decompress	jIDMaster
-#define jinit_d_main_controller	jIDMainC
-#define jinit_d_coef_controller	jIDCoefC
-#define jinit_d_post_controller	jIDPostC
-#define jinit_input_controller	jIInCtlr
-#define jinit_marker_reader	jIMReader
-#define jinit_huff_decoder	jIHDecoder
-#define jinit_phuff_decoder	jIPHDecoder
-#define jinit_inverse_dct	jIIDCT
-#define jinit_upsampler		jIUpsampler
-#define jinit_color_deconverter	jIDColor
-#define jinit_1pass_quantizer	jI1Quant
-#define jinit_2pass_quantizer	jI2Quant
-#define jinit_merged_upsampler	jIMUpsampler
-#define jinit_memory_mgr	jIMemMgr
-#define jdiv_round_up		jDivRound
-#define jround_up		jRound
-#define jcopy_sample_rows	jCopySamples
-#define jcopy_block_row		jCopyBlocks
-#define jzero_far		jZeroFar
-#define jpeg_zigzag_order	jZIGTable
-#define jpeg_natural_order	jZAGTable
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
 /* Compression module initialization routines */
-EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo,
-					 boolean transcode_only));
-EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_phuff_encoder JPP((j_compress_ptr cinfo));
-EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_compress_master (j_compress_ptr cinfo);
+EXTERN(void) jinit_c_master_control (j_compress_ptr cinfo,
+                                     boolean transcode_only);
+EXTERN(void) jinit_c_main_controller (j_compress_ptr cinfo,
+                                      boolean need_full_buffer);
+EXTERN(void) jinit_c_prep_controller (j_compress_ptr cinfo,
+                                      boolean need_full_buffer);
+EXTERN(void) jinit_c_coef_controller (j_compress_ptr cinfo,
+                                      boolean need_full_buffer);
+EXTERN(void) jinit_color_converter (j_compress_ptr cinfo);
+EXTERN(void) jinit_downsampler (j_compress_ptr cinfo);
+EXTERN(void) jinit_forward_dct (j_compress_ptr cinfo);
+EXTERN(void) jinit_huff_encoder (j_compress_ptr cinfo);
+EXTERN(void) jinit_phuff_encoder (j_compress_ptr cinfo);
+EXTERN(void) jinit_arith_encoder (j_compress_ptr cinfo);
+EXTERN(void) jinit_marker_writer (j_compress_ptr cinfo);
 /* Decompression module initialization routines */
-EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo,
-					  boolean need_full_buffer));
-EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_phuff_decoder JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo));
-EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_master_decompress (j_decompress_ptr cinfo);
+EXTERN(void) jinit_d_main_controller (j_decompress_ptr cinfo,
+                                      boolean need_full_buffer);
+EXTERN(void) jinit_d_coef_controller (j_decompress_ptr cinfo,
+                                      boolean need_full_buffer);
+EXTERN(void) jinit_d_post_controller (j_decompress_ptr cinfo,
+                                      boolean need_full_buffer);
+EXTERN(void) jinit_input_controller (j_decompress_ptr cinfo);
+EXTERN(void) jinit_marker_reader (j_decompress_ptr cinfo);
+EXTERN(void) jinit_huff_decoder (j_decompress_ptr cinfo);
+EXTERN(void) jinit_phuff_decoder (j_decompress_ptr cinfo);
+EXTERN(void) jinit_arith_decoder (j_decompress_ptr cinfo);
+EXTERN(void) jinit_inverse_dct (j_decompress_ptr cinfo);
+EXTERN(void) jinit_upsampler (j_decompress_ptr cinfo);
+EXTERN(void) jinit_color_deconverter (j_decompress_ptr cinfo);
+EXTERN(void) jinit_1pass_quantizer (j_decompress_ptr cinfo);
+EXTERN(void) jinit_2pass_quantizer (j_decompress_ptr cinfo);
+EXTERN(void) jinit_merged_upsampler (j_decompress_ptr cinfo);
 /* Memory manager initialization */
-EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));
+EXTERN(void) jinit_memory_mgr (j_common_ptr cinfo);
 
 /* Utility routines in jutils.c */
-EXTERN(long) jdiv_round_up JPP((long a, long b));
-EXTERN(long) jround_up JPP((long a, long b));
-EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row,
-				    JSAMPARRAY output_array, int dest_row,
-				    int num_rows, JDIMENSION num_cols));
-EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row,
-				  JDIMENSION num_blocks));
-EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero));
+EXTERN(long) jdiv_round_up (long a, long b);
+EXTERN(long) jround_up (long a, long b);
+EXTERN(void) jcopy_sample_rows (JSAMPARRAY input_array, int source_row,
+                                JSAMPARRAY output_array, int dest_row,
+                                int num_rows, JDIMENSION num_cols);
+EXTERN(void) jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
+                              JDIMENSION num_blocks);
+EXTERN(void) jzero_far (void * target, size_t bytestozero);
 /* Constant tables in jutils.c */
-#if 0				/* This table is not actually needed in v6a */
+#if 0                           /* This table is not actually needed in v6a */
 extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
 #endif
 extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
 
+/* Arithmetic coding probability estimation tables in jaricom.c */
+extern const INT32 jpeg_aritab[];
+
 /* Suppress undefined-structure complaints if necessary. */
 
 #ifdef INCOMPLETE_TYPES_BROKEN
-#ifndef AM_MEMORY_MANAGER	/* only jmemmgr.c defines these */
+#ifndef AM_MEMORY_MANAGER       /* only jmemmgr.c defines these */
 struct jvirt_sarray_control { long dummy; };
 struct jvirt_barray_control { long dummy; };
 #endif
diff --git a/jpeglib.h b/jpeglib.h
index d1be8dd..27045a4 100644
--- a/jpeglib.h
+++ b/jpeglib.h
@@ -1,8 +1,11 @@
 /*
  * jpeglib.h
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2002-2009 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2011, 2013-2014, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file defines the application interface for the JPEG library.
@@ -20,17 +23,17 @@
  * manual configuration options that most people need not worry about.
  */
 
-#ifndef JCONFIG_INCLUDED	/* in case jinclude.h already did */
-#include "jconfig.h"		/* widely used configuration options */
+#ifndef JCONFIG_INCLUDED        /* in case jinclude.h already did */
+#include "jconfig.h"            /* widely used configuration options */
 #endif
-#include "jmorecfg.h"		/* seldom changed options */
+#include "jmorecfg.h"           /* seldom changed options */
 
 
-/* Version ID for the JPEG library.
- * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
- */
-
-#define JPEG_LIB_VERSION  62	/* Version 6b */
+#ifdef __cplusplus
+#ifndef DONT_USE_EXTERN_C
+extern "C" {
+#endif
+#endif
 
 
 /* Various constants determining the sizes of things.
@@ -38,13 +41,13 @@
  * if you want to be compatible.
  */
 
-#define DCTSIZE		    8	/* The basic DCT block is 8x8 samples */
-#define DCTSIZE2	    64	/* DCTSIZE squared; # of elements in a block */
-#define NUM_QUANT_TBLS      4	/* Quantization tables are numbered 0..3 */
-#define NUM_HUFF_TBLS       4	/* Huffman tables are numbered 0..3 */
-#define NUM_ARITH_TBLS      16	/* Arith-coding tables are numbered 0..15 */
-#define MAX_COMPS_IN_SCAN   4	/* JPEG limit on # of components in one scan */
-#define MAX_SAMP_FACTOR     4	/* JPEG limit on sampling factors */
+#define DCTSIZE             8   /* The basic DCT block is 8x8 samples */
+#define DCTSIZE2            64  /* DCTSIZE squared; # of elements in a block */
+#define NUM_QUANT_TBLS      4   /* Quantization tables are numbered 0..3 */
+#define NUM_HUFF_TBLS       4   /* Huffman tables are numbered 0..3 */
+#define NUM_ARITH_TBLS      16  /* Arith-coding tables are numbered 0..15 */
+#define MAX_COMPS_IN_SCAN   4   /* JPEG limit on # of components in one scan */
+#define MAX_SAMP_FACTOR     4   /* JPEG limit on sampling factors */
 /* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
  * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
  * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
@@ -59,20 +62,18 @@
 
 
 /* Data structures for images (arrays of samples and of DCT coefficients).
- * On 80x86 machines, the image arrays are too big for near pointers,
- * but the pointer arrays can fit in near memory.
  */
 
-typedef JSAMPLE FAR *JSAMPROW;	/* ptr to one image row of pixel samples. */
-typedef JSAMPROW *JSAMPARRAY;	/* ptr to some rows (a 2-D sample array) */
-typedef JSAMPARRAY *JSAMPIMAGE;	/* a 3-D sample array: top index is color */
+typedef JSAMPLE *JSAMPROW;      /* ptr to one image row of pixel samples. */
+typedef JSAMPROW *JSAMPARRAY;   /* ptr to some rows (a 2-D sample array) */
+typedef JSAMPARRAY *JSAMPIMAGE; /* a 3-D sample array: top index is color */
 
-typedef JCOEF JBLOCK[DCTSIZE2];	/* one block of coefficients */
-typedef JBLOCK FAR *JBLOCKROW;	/* pointer to one row of coefficient blocks */
-typedef JBLOCKROW *JBLOCKARRAY;		/* a 2-D array of coefficient blocks */
-typedef JBLOCKARRAY *JBLOCKIMAGE;	/* a 3-D array of coefficient blocks */
+typedef JCOEF JBLOCK[DCTSIZE2]; /* one block of coefficients */
+typedef JBLOCK *JBLOCKROW;      /* pointer to one row of coefficient blocks */
+typedef JBLOCKROW *JBLOCKARRAY;         /* a 2-D array of coefficient blocks */
+typedef JBLOCKARRAY *JBLOCKIMAGE;       /* a 3-D array of coefficient blocks */
 
-typedef JCOEF FAR *JCOEFPTR;	/* useful in a couple of places */
+typedef JCOEF *JCOEFPTR;        /* useful in a couple of places */
 
 
 /* Types for JPEG compression parameters and working tables. */
@@ -85,13 +86,13 @@
    * (not the zigzag order in which they are stored in a JPEG DQT marker).
    * CAUTION: IJG versions prior to v6a kept this array in zigzag order.
    */
-  UINT16 quantval[DCTSIZE2];	/* quantization step for each coefficient */
+  UINT16 quantval[DCTSIZE2];    /* quantization step for each coefficient */
   /* This field is used only during compression.  It's initialized FALSE when
    * the table is created, and set TRUE when it's been output to the file.
    * You could suppress output of a table by setting this to TRUE.
    * (See jpeg_suppress_tables for an example.)
    */
-  boolean sent_table;		/* TRUE when table has been output */
+  boolean sent_table;           /* TRUE when table has been output */
 } JQUANT_TBL;
 
 
@@ -99,15 +100,15 @@
 
 typedef struct {
   /* These two fields directly represent the contents of a JPEG DHT marker */
-  UINT8 bits[17];		/* bits[k] = # of symbols with codes of */
-				/* length k bits; bits[0] is unused */
-  UINT8 huffval[256];		/* The symbols, in order of incr code length */
+  UINT8 bits[17];               /* bits[k] = # of symbols with codes of */
+                                /* length k bits; bits[0] is unused */
+  UINT8 huffval[256];           /* The symbols, in order of incr code length */
   /* This field is used only during compression.  It's initialized FALSE when
    * the table is created, and set TRUE when it's been output to the file.
    * You could suppress output of a table by setting this to TRUE.
    * (See jpeg_suppress_tables for an example.)
    */
-  boolean sent_table;		/* TRUE when table has been output */
+  boolean sent_table;           /* TRUE when table has been output */
 } JHUFF_TBL;
 
 
@@ -117,20 +118,20 @@
   /* These values are fixed over the whole image. */
   /* For compression, they must be supplied by parameter setup; */
   /* for decompression, they are read from the SOF marker. */
-  int component_id;		/* identifier for this component (0..255) */
-  int component_index;		/* its index in SOF or cinfo->comp_info[] */
-  int h_samp_factor;		/* horizontal sampling factor (1..4) */
-  int v_samp_factor;		/* vertical sampling factor (1..4) */
-  int quant_tbl_no;		/* quantization table selector (0..3) */
+  int component_id;             /* identifier for this component (0..255) */
+  int component_index;          /* its index in SOF or cinfo->comp_info[] */
+  int h_samp_factor;            /* horizontal sampling factor (1..4) */
+  int v_samp_factor;            /* vertical sampling factor (1..4) */
+  int quant_tbl_no;             /* quantization table selector (0..3) */
   /* These values may vary between scans. */
   /* For compression, they must be supplied by parameter setup; */
   /* for decompression, they are read from the SOS marker. */
   /* The decompressor output side may not use these variables. */
-  int dc_tbl_no;		/* DC entropy table selector (0..3) */
-  int ac_tbl_no;		/* AC entropy table selector (0..3) */
-  
+  int dc_tbl_no;                /* DC entropy table selector (0..3) */
+  int ac_tbl_no;                /* AC entropy table selector (0..3) */
+
   /* Remaining fields should be treated as private by applications. */
-  
+
   /* These values are computed during compression or decompression startup: */
   /* Component's size in DCT blocks.
    * Any dummy blocks added to complete an MCU are not counted; therefore
@@ -141,32 +142,37 @@
   /* Size of a DCT block in samples.  Always DCTSIZE for compression.
    * For decompression this is the size of the output from one DCT block,
    * reflecting any scaling we choose to apply during the IDCT step.
-   * Values of 1,2,4,8 are likely to be supported.  Note that different
-   * components may receive different IDCT scalings.
+   * Values from 1 to 16 are supported.
+   * Note that different components may receive different IDCT scalings.
    */
+#if JPEG_LIB_VERSION >= 70
+  int DCT_h_scaled_size;
+  int DCT_v_scaled_size;
+#else
   int DCT_scaled_size;
+#endif
   /* The downsampled dimensions are the component's actual, unpadded number
    * of samples at the main buffer (preprocessing/compression interface), thus
    * downsampled_width = ceil(image_width * Hi/Hmax)
    * and similarly for height.  For decompression, IDCT scaling is included, so
-   * downsampled_width = ceil(image_width * Hi/Hmax * DCT_scaled_size/DCTSIZE)
+   * downsampled_width = ceil(image_width * Hi/Hmax * DCT_[h_]scaled_size/DCTSIZE)
    */
-  JDIMENSION downsampled_width;	 /* actual width in samples */
+  JDIMENSION downsampled_width;  /* actual width in samples */
   JDIMENSION downsampled_height; /* actual height in samples */
   /* This flag is used only for decompression.  In cases where some of the
    * components will be ignored (eg grayscale output from YCbCr image),
    * we can skip most computations for the unused components.
    */
-  boolean component_needed;	/* do we need the value of this component? */
+  boolean component_needed;     /* do we need the value of this component? */
 
   /* These values are computed before starting a scan of the component. */
   /* The decompressor output side may not use these variables. */
-  int MCU_width;		/* number of blocks per MCU, horizontally */
-  int MCU_height;		/* number of blocks per MCU, vertically */
-  int MCU_blocks;		/* MCU_width * MCU_height */
-  int MCU_sample_width;		/* MCU width in samples, MCU_width*DCT_scaled_size */
-  int last_col_width;		/* # of non-dummy blocks across in last MCU */
-  int last_row_height;		/* # of non-dummy blocks down in last MCU */
+  int MCU_width;                /* number of blocks per MCU, horizontally */
+  int MCU_height;               /* number of blocks per MCU, vertically */
+  int MCU_blocks;               /* MCU_width * MCU_height */
+  int MCU_sample_width;         /* MCU width in samples, MCU_width*DCT_[h_]scaled_size */
+  int last_col_width;           /* # of non-dummy blocks across in last MCU */
+  int last_row_height;          /* # of non-dummy blocks down in last MCU */
 
   /* Saved quantization table for component; NULL if none yet saved.
    * See jdinput.c comments about the need for this information.
@@ -182,76 +188,97 @@
 /* The script for encoding a multiple-scan file is an array of these: */
 
 typedef struct {
-  int comps_in_scan;		/* number of components encoded in this scan */
+  int comps_in_scan;            /* number of components encoded in this scan */
   int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
-  int Ss, Se;			/* progressive JPEG spectral selection parms */
-  int Ah, Al;			/* progressive JPEG successive approx. parms */
+  int Ss, Se;                   /* progressive JPEG spectral selection parms */
+  int Ah, Al;                   /* progressive JPEG successive approx. parms */
 } jpeg_scan_info;
 
 /* The decompressor can save APPn and COM markers in a list of these: */
 
-typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr;
+typedef struct jpeg_marker_struct * jpeg_saved_marker_ptr;
 
 struct jpeg_marker_struct {
-  jpeg_saved_marker_ptr next;	/* next in list, or NULL */
-  UINT8 marker;			/* marker code: JPEG_COM, or JPEG_APP0+n */
-  unsigned int original_length;	/* # bytes of data in the file */
-  unsigned int data_length;	/* # bytes of data saved at data[] */
-  JOCTET FAR * data;		/* the data contained in the marker */
+  jpeg_saved_marker_ptr next;   /* next in list, or NULL */
+  UINT8 marker;                 /* marker code: JPEG_COM, or JPEG_APP0+n */
+  unsigned int original_length; /* # bytes of data in the file */
+  unsigned int data_length;     /* # bytes of data saved at data[] */
+  JOCTET * data;                /* the data contained in the marker */
   /* the marker length word is not counted in data_length or original_length */
 };
 
 /* Known color spaces. */
 
+#define JCS_EXTENSIONS 1
+#define JCS_ALPHA_EXTENSIONS 1
+
 typedef enum {
-	JCS_UNKNOWN,		/* error/unspecified */
-	JCS_GRAYSCALE,		/* monochrome */
-	JCS_RGB,		/* red/green/blue */
-	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV) */
-	JCS_CMYK,		/* C/M/Y/K */
-	JCS_YCCK		/* Y/Cb/Cr/K */
+  JCS_UNKNOWN,            /* error/unspecified */
+  JCS_GRAYSCALE,          /* monochrome */
+  JCS_RGB,                /* red/green/blue as specified by the RGB_RED,
+                             RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE macros */
+  JCS_YCbCr,              /* Y/Cb/Cr (also known as YUV) */
+  JCS_CMYK,               /* C/M/Y/K */
+  JCS_YCCK,               /* Y/Cb/Cr/K */
+  JCS_EXT_RGB,            /* red/green/blue */
+  JCS_EXT_RGBX,           /* red/green/blue/x */
+  JCS_EXT_BGR,            /* blue/green/red */
+  JCS_EXT_BGRX,           /* blue/green/red/x */
+  JCS_EXT_XBGR,           /* x/blue/green/red */
+  JCS_EXT_XRGB,           /* x/red/green/blue */
+  /* When out_color_space it set to JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR,
+     or JCS_EXT_XRGB during decompression, the X byte is undefined, and in
+     order to ensure the best performance, libjpeg-turbo can set that byte to
+     whatever value it wishes.  Use the following colorspace constants to
+     ensure that the X byte is set to 0xFF, so that it can be interpreted as an
+     opaque alpha channel. */
+  JCS_EXT_RGBA,           /* red/green/blue/alpha */
+  JCS_EXT_BGRA,           /* blue/green/red/alpha */
+  JCS_EXT_ABGR,           /* alpha/blue/green/red */
+  JCS_EXT_ARGB,           /* alpha/red/green/blue */
+  JCS_RGB565              /* 5-bit red/6-bit green/5-bit blue */
 } J_COLOR_SPACE;
 
 /* DCT/IDCT algorithm options. */
 
 typedef enum {
-	JDCT_ISLOW,		/* slow but accurate integer algorithm */
-	JDCT_IFAST,		/* faster, less accurate integer method */
-	JDCT_FLOAT		/* floating-point: accurate, fast on fast HW */
+  JDCT_ISLOW,             /* slow but accurate integer algorithm */
+  JDCT_IFAST,             /* faster, less accurate integer method */
+  JDCT_FLOAT              /* floating-point: accurate, fast on fast HW */
 } J_DCT_METHOD;
 
-#ifndef JDCT_DEFAULT		/* may be overridden in jconfig.h */
+#ifndef JDCT_DEFAULT            /* may be overridden in jconfig.h */
 #define JDCT_DEFAULT  JDCT_ISLOW
 #endif
-#ifndef JDCT_FASTEST		/* may be overridden in jconfig.h */
+#ifndef JDCT_FASTEST            /* may be overridden in jconfig.h */
 #define JDCT_FASTEST  JDCT_IFAST
 #endif
 
 /* Dithering options for decompression. */
 
 typedef enum {
-	JDITHER_NONE,		/* no dithering */
-	JDITHER_ORDERED,	/* simple ordered dither */
-	JDITHER_FS		/* Floyd-Steinberg error diffusion dither */
+  JDITHER_NONE,           /* no dithering */
+  JDITHER_ORDERED,        /* simple ordered dither */
+  JDITHER_FS              /* Floyd-Steinberg error diffusion dither */
 } J_DITHER_MODE;
 
 
 /* Common fields between JPEG compression and decompression master structs. */
 
 #define jpeg_common_fields \
-  struct jpeg_error_mgr * err;	/* Error handler module */\
-  struct jpeg_memory_mgr * mem;	/* Memory manager module */\
+  struct jpeg_error_mgr * err;  /* Error handler module */\
+  struct jpeg_memory_mgr * mem; /* Memory manager module */\
   struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\
-  void * client_data;		/* Available for use by application */\
-  boolean is_decompressor;	/* So common code can tell which is which */\
-  int global_state		/* For checking call sequence validity */
+  void * client_data;           /* Available for use by application */\
+  boolean is_decompressor;      /* So common code can tell which is which */\
+  int global_state              /* For checking call sequence validity */
 
 /* Routines that are to be used by both halves of the library are declared
  * to receive a pointer to this structure.  There are no actual instances of
  * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct.
  */
 struct jpeg_common_struct {
-  jpeg_common_fields;		/* Fields common to both master struct types */
+  jpeg_common_fields;           /* Fields common to both master struct types */
   /* Additional fields follow in an actual jpeg_compress_struct or
    * jpeg_decompress_struct.  All three structs must agree on these
    * initial fields!  (This would be a lot cleaner in C++.)
@@ -266,7 +293,7 @@
 /* Master record for a compression instance */
 
 struct jpeg_compress_struct {
-  jpeg_common_fields;		/* Fields shared with jpeg_decompress_struct */
+  jpeg_common_fields;           /* Fields shared with jpeg_decompress_struct */
 
   /* Destination for compressed data */
   struct jpeg_destination_mgr * dest;
@@ -276,12 +303,12 @@
    * be correct before you can even call jpeg_set_defaults().
    */
 
-  JDIMENSION image_width;	/* input image width */
-  JDIMENSION image_height;	/* input image height */
-  int input_components;		/* # of color components in input image */
-  J_COLOR_SPACE in_color_space;	/* colorspace of input image */
+  JDIMENSION image_width;       /* input image width */
+  JDIMENSION image_height;      /* input image height */
+  int input_components;         /* # of color components in input image */
+  J_COLOR_SPACE in_color_space; /* colorspace of input image */
 
-  double input_gamma;		/* image gamma of input image */
+  double input_gamma;           /* image gamma of input image */
 
   /* Compression parameters --- these fields must be set before calling
    * jpeg_start_compress().  We recommend calling jpeg_set_defaults() to
@@ -291,38 +318,59 @@
    * helper routines to simplify changing parameters.
    */
 
-  int data_precision;		/* bits of precision in image data */
+#if JPEG_LIB_VERSION >= 70
+  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
 
-  int num_components;		/* # of color components in JPEG image */
+  JDIMENSION jpeg_width;        /* scaled JPEG image width */
+  JDIMENSION jpeg_height;       /* scaled JPEG image height */
+  /* Dimensions of actual JPEG image that will be written to file,
+   * derived from input dimensions by scaling factors above.
+   * These fields are computed by jpeg_start_compress().
+   * You can also use jpeg_calc_jpeg_dimensions() to determine these values
+   * in advance of calling jpeg_start_compress().
+   */
+#endif
+
+  int data_precision;           /* bits of precision in image data */
+
+  int num_components;           /* # of color components in JPEG image */
   J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
 
   jpeg_component_info * comp_info;
   /* comp_info[i] describes component that appears i'th in SOF */
-  
+
   JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
-  /* ptrs to coefficient quantization tables, or NULL if not defined */
-  
+#if JPEG_LIB_VERSION >= 70
+  int q_scale_factor[NUM_QUANT_TBLS];
+#endif
+  /* ptrs to coefficient quantization tables, or NULL if not defined,
+   * and corresponding scale factors (percentage, initialized 100).
+   */
+
   JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
   JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
   /* ptrs to Huffman coding tables, or NULL if not defined */
-  
+
   UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
   UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
   UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
 
-  int num_scans;		/* # of entries in scan_info array */
+  int num_scans;                /* # of entries in scan_info array */
   const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */
   /* The default value of scan_info is NULL, which causes a single-scan
    * sequential JPEG file to be emitted.  To create a multi-scan file,
    * set num_scans and scan_info to point to an array of scan definitions.
    */
 
-  boolean raw_data_in;		/* TRUE=caller supplies downsampled data */
-  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
-  boolean optimize_coding;	/* TRUE=optimize entropy encoding parms */
-  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
-  int smoothing_factor;		/* 1..100, or 0 for no input smoothing */
-  J_DCT_METHOD dct_method;	/* DCT algorithm selector */
+  boolean raw_data_in;          /* TRUE=caller supplies downsampled data */
+  boolean arith_code;           /* TRUE=arithmetic coding, FALSE=Huffman */
+  boolean optimize_coding;      /* TRUE=optimize entropy encoding parms */
+  boolean CCIR601_sampling;     /* TRUE=first samples are cosited */
+#if JPEG_LIB_VERSION >= 70
+  boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */
+#endif
+  int smoothing_factor;         /* 1..100, or 0 for no input smoothing */
+  J_DCT_METHOD dct_method;      /* DCT algorithm selector */
 
   /* The restart interval can be specified in absolute MCUs by setting
    * restart_interval, or in MCU rows by setting restart_in_rows
@@ -330,28 +378,28 @@
    * for each scan).
    */
   unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */
-  int restart_in_rows;		/* if > 0, MCU rows per restart interval */
+  int restart_in_rows;          /* if > 0, MCU rows per restart interval */
 
   /* Parameters controlling emission of special markers. */
 
-  boolean write_JFIF_header;	/* should a JFIF marker be written? */
-  UINT8 JFIF_major_version;	/* What to write for the JFIF version number */
+  boolean write_JFIF_header;    /* should a JFIF marker be written? */
+  UINT8 JFIF_major_version;     /* What to write for the JFIF version number */
   UINT8 JFIF_minor_version;
   /* These three values are not used by the JPEG code, merely copied */
   /* into the JFIF APP0 marker.  density_unit can be 0 for unknown, */
   /* 1 for dots/inch, or 2 for dots/cm.  Note that the pixel aspect */
   /* ratio is defined by X_density/Y_density even when density_unit=0. */
-  UINT8 density_unit;		/* JFIF code for pixel size units */
-  UINT16 X_density;		/* Horizontal pixel density */
-  UINT16 Y_density;		/* Vertical pixel density */
-  boolean write_Adobe_marker;	/* should an Adobe marker be written? */
-  
+  UINT8 density_unit;           /* JFIF code for pixel size units */
+  UINT16 X_density;             /* Horizontal pixel density */
+  UINT16 Y_density;             /* Vertical pixel density */
+  boolean write_Adobe_marker;   /* should an Adobe marker be written? */
+
   /* State variable: index of next scanline to be written to
    * jpeg_write_scanlines().  Application may use this to control its
    * processing loop, e.g., "while (next_scanline < image_height)".
    */
 
-  JDIMENSION next_scanline;	/* 0 .. image_height-1  */
+  JDIMENSION next_scanline;     /* 0 .. image_height-1  */
 
   /* Remaining fields are known throughout compressor, but generally
    * should not be touched by a surrounding application.
@@ -360,34 +408,45 @@
   /*
    * These fields are computed during compression startup
    */
-  boolean progressive_mode;	/* TRUE if scan script uses progressive mode */
-  int max_h_samp_factor;	/* largest h_samp_factor */
-  int max_v_samp_factor;	/* largest v_samp_factor */
+  boolean progressive_mode;     /* TRUE if scan script uses progressive mode */
+  int max_h_samp_factor;        /* largest h_samp_factor */
+  int max_v_samp_factor;        /* largest v_samp_factor */
 
-  JDIMENSION total_iMCU_rows;	/* # of iMCU rows to be input to coef ctlr */
+#if JPEG_LIB_VERSION >= 70
+  int min_DCT_h_scaled_size;    /* smallest DCT_h_scaled_size of any component */
+  int min_DCT_v_scaled_size;    /* smallest DCT_v_scaled_size of any component */
+#endif
+
+  JDIMENSION total_iMCU_rows;   /* # of iMCU rows to be input to coef ctlr */
   /* The coefficient controller receives data in units of MCU rows as defined
    * for fully interleaved scans (whether the JPEG file is interleaved or not).
    * There are v_samp_factor * DCTSIZE sample rows of each component in an
    * "iMCU" (interleaved MCU) row.
    */
-  
+
   /*
    * These fields are valid during any one scan.
    * They describe the components and MCUs actually appearing in the scan.
    */
-  int comps_in_scan;		/* # of JPEG components in this scan */
+  int comps_in_scan;            /* # of JPEG components in this scan */
   jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
   /* *cur_comp_info[i] describes component that appears i'th in SOS */
-  
-  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
-  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
-  
-  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+
+  JDIMENSION MCUs_per_row;      /* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;  /* # of MCU rows in the image */
+
+  int blocks_in_MCU;            /* # of DCT blocks per MCU */
   int MCU_membership[C_MAX_BLOCKS_IN_MCU];
   /* MCU_membership[i] is index in cur_comp_info of component owning */
   /* i'th block in an MCU */
 
-  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+  int Ss, Se, Ah, Al;           /* progressive JPEG parameters for scan */
+
+#if JPEG_LIB_VERSION >= 80
+  int block_size;               /* the basic DCT block size: 1..16 */
+  const int * natural_order;    /* natural-order position array */
+  int lim_Se;                   /* min( Se, DCTSIZE2-1 ) */
+#endif
 
   /*
    * Links to compression subobjects (methods and private variables of modules)
@@ -409,7 +468,7 @@
 /* Master record for a decompression instance */
 
 struct jpeg_decompress_struct {
-  jpeg_common_fields;		/* Fields shared with jpeg_compress_struct */
+  jpeg_common_fields;           /* Fields shared with jpeg_compress_struct */
 
   /* Source of compressed data */
   struct jpeg_source_mgr * src;
@@ -417,9 +476,9 @@
   /* Basic description of image --- filled in by jpeg_read_header(). */
   /* Application may inspect these values to decide how to process image. */
 
-  JDIMENSION image_width;	/* nominal image width (from SOF marker) */
-  JDIMENSION image_height;	/* nominal image height */
-  int num_components;		/* # of color components in JPEG image */
+  JDIMENSION image_width;       /* nominal image width (from SOF marker) */
+  JDIMENSION image_height;      /* nominal image height */
+  int num_components;           /* # of color components in JPEG image */
   J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
 
   /* Decompression processing parameters --- these fields must be set before
@@ -431,24 +490,24 @@
 
   unsigned int scale_num, scale_denom; /* fraction by which to scale image */
 
-  double output_gamma;		/* image gamma wanted in output */
+  double output_gamma;          /* image gamma wanted in output */
 
-  boolean buffered_image;	/* TRUE=multiple output passes */
-  boolean raw_data_out;		/* TRUE=downsampled data wanted */
+  boolean buffered_image;       /* TRUE=multiple output passes */
+  boolean raw_data_out;         /* TRUE=downsampled data wanted */
 
-  J_DCT_METHOD dct_method;	/* IDCT algorithm selector */
-  boolean do_fancy_upsampling;	/* TRUE=apply fancy upsampling */
-  boolean do_block_smoothing;	/* TRUE=apply interblock smoothing */
+  J_DCT_METHOD dct_method;      /* IDCT algorithm selector */
+  boolean do_fancy_upsampling;  /* TRUE=apply fancy upsampling */
+  boolean do_block_smoothing;   /* TRUE=apply interblock smoothing */
 
-  boolean quantize_colors;	/* TRUE=colormapped output wanted */
+  boolean quantize_colors;      /* TRUE=colormapped output wanted */
   /* the following are ignored if not quantize_colors: */
-  J_DITHER_MODE dither_mode;	/* type of color dithering to use */
-  boolean two_pass_quantize;	/* TRUE=use two-pass color quantization */
-  int desired_number_of_colors;	/* max # colors to use in created colormap */
+  J_DITHER_MODE dither_mode;    /* type of color dithering to use */
+  boolean two_pass_quantize;    /* TRUE=use two-pass color quantization */
+  int desired_number_of_colors; /* max # colors to use in created colormap */
   /* these are significant only in buffered-image mode: */
-  boolean enable_1pass_quant;	/* enable future use of 1-pass quantizer */
+  boolean enable_1pass_quant;   /* enable future use of 1-pass quantizer */
   boolean enable_external_quant;/* enable future use of external colormap */
-  boolean enable_2pass_quant;	/* enable future use of 2-pass quantizer */
+  boolean enable_2pass_quant;   /* enable future use of 2-pass quantizer */
 
   /* Description of actual output image that will be returned to application.
    * These fields are computed by jpeg_start_decompress().
@@ -456,14 +515,14 @@
    * in advance of calling jpeg_start_decompress().
    */
 
-  JDIMENSION output_width;	/* scaled image width */
-  JDIMENSION output_height;	/* scaled image height */
-  int out_color_components;	/* # of color components in out_color_space */
-  int output_components;	/* # of color components returned */
+  JDIMENSION output_width;      /* scaled image width */
+  JDIMENSION output_height;     /* scaled image height */
+  int out_color_components;     /* # of color components in out_color_space */
+  int output_components;        /* # of color components returned */
   /* output_components is 1 (a colormap index) when quantizing colors;
    * otherwise it equals out_color_components.
    */
-  int rec_outbuf_height;	/* min recommended height of scanline buffer */
+  int rec_outbuf_height;        /* min recommended height of scanline buffer */
   /* If the buffer passed to jpeg_read_scanlines() is less than this many rows
    * high, space and time will be wasted due to unnecessary data copying.
    * Usually rec_outbuf_height will be 1 or 2, at most 4.
@@ -475,8 +534,8 @@
    * jpeg_start_decompress or jpeg_start_output.
    * The map has out_color_components rows and actual_number_of_colors columns.
    */
-  int actual_number_of_colors;	/* number of entries in use */
-  JSAMPARRAY colormap;		/* The color map as a 2-D pixel array */
+  int actual_number_of_colors;  /* number of entries in use */
+  JSAMPARRAY colormap;          /* The color map as a 2-D pixel array */
 
   /* State variables: these variables indicate the progress of decompression.
    * The application may examine these but must not modify them.
@@ -486,20 +545,20 @@
    * Application may use this to control its processing loop, e.g.,
    * "while (output_scanline < output_height)".
    */
-  JDIMENSION output_scanline;	/* 0 .. output_height-1  */
+  JDIMENSION output_scanline;   /* 0 .. output_height-1  */
 
   /* Current input scan number and number of iMCU rows completed in scan.
    * These indicate the progress of the decompressor input side.
    */
-  int input_scan_number;	/* Number of SOS markers seen so far */
-  JDIMENSION input_iMCU_row;	/* Number of iMCU rows completed */
+  int input_scan_number;        /* Number of SOS markers seen so far */
+  JDIMENSION input_iMCU_row;    /* Number of iMCU rows completed */
 
   /* The "output scan number" is the notional scan being displayed by the
    * output side.  The decompressor will not allow output scan/row number
    * to get ahead of input scan/row, but it can fall arbitrarily far behind.
    */
-  int output_scan_number;	/* Nominal scan number being displayed */
-  JDIMENSION output_iMCU_row;	/* Number of iMCU rows read */
+  int output_scan_number;       /* Nominal scan number being displayed */
+  JDIMENSION output_iMCU_row;   /* Number of iMCU rows read */
 
   /* Current progression status.  coef_bits[c][i] indicates the precision
    * with which component c's DCT coefficient i (in zigzag order) is known.
@@ -508,7 +567,7 @@
    * (thus, 0 at completion of the progression).
    * This pointer is NULL when reading a non-progressive file.
    */
-  int (*coef_bits)[DCTSIZE2];	/* -1 or current Al value for each coef */
+  int (*coef_bits)[DCTSIZE2];   /* -1 or current Al value for each coef */
 
   /* Internal JPEG parameters --- the application usually need not look at
    * these fields.  Note that the decompressor output side may not use
@@ -530,13 +589,16 @@
    * are given in SOF/SOS markers or defined to be reset by SOI.
    */
 
-  int data_precision;		/* bits of precision in image data */
+  int data_precision;           /* bits of precision in image data */
 
   jpeg_component_info * comp_info;
   /* comp_info[i] describes component that appears i'th in SOF */
 
-  boolean progressive_mode;	/* TRUE if SOFn specifies progressive mode */
-  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+#if JPEG_LIB_VERSION >= 80
+  boolean is_baseline;          /* TRUE if Baseline SOF0 encountered */
+#endif
+  boolean progressive_mode;     /* TRUE if SOFn specifies progressive mode */
+  boolean arith_code;           /* TRUE=arithmetic coding, FALSE=Huffman */
 
   UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
   UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
@@ -547,17 +609,17 @@
   /* These fields record data obtained from optional markers recognized by
    * the JPEG library.
    */
-  boolean saw_JFIF_marker;	/* TRUE iff a JFIF APP0 marker was found */
+  boolean saw_JFIF_marker;      /* TRUE iff a JFIF APP0 marker was found */
   /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */
-  UINT8 JFIF_major_version;	/* JFIF version number */
+  UINT8 JFIF_major_version;     /* JFIF version number */
   UINT8 JFIF_minor_version;
-  UINT8 density_unit;		/* JFIF code for pixel size units */
-  UINT16 X_density;		/* Horizontal pixel density */
-  UINT16 Y_density;		/* Vertical pixel density */
-  boolean saw_Adobe_marker;	/* TRUE iff an Adobe APP14 marker was found */
-  UINT8 Adobe_transform;	/* Color transform code from Adobe marker */
+  UINT8 density_unit;           /* JFIF code for pixel size units */
+  UINT16 X_density;             /* Horizontal pixel density */
+  UINT16 Y_density;             /* Vertical pixel density */
+  boolean saw_Adobe_marker;     /* TRUE iff an Adobe APP14 marker was found */
+  UINT8 Adobe_transform;        /* Color transform code from Adobe marker */
 
-  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+  boolean CCIR601_sampling;     /* TRUE=first samples are cosited */
 
   /* Aside from the specific data retained from APPn markers known to the
    * library, the uninterpreted contents of any or all APPn and COM markers
@@ -572,18 +634,23 @@
   /*
    * These fields are computed during decompression startup
    */
-  int max_h_samp_factor;	/* largest h_samp_factor */
-  int max_v_samp_factor;	/* largest v_samp_factor */
+  int max_h_samp_factor;        /* largest h_samp_factor */
+  int max_v_samp_factor;        /* largest v_samp_factor */
 
-  int min_DCT_scaled_size;	/* smallest DCT_scaled_size of any component */
+#if JPEG_LIB_VERSION >= 70
+  int min_DCT_h_scaled_size;    /* smallest DCT_h_scaled_size of any component */
+  int min_DCT_v_scaled_size;    /* smallest DCT_v_scaled_size of any component */
+#else
+  int min_DCT_scaled_size;      /* smallest DCT_scaled_size of any component */
+#endif
 
-  JDIMENSION total_iMCU_rows;	/* # of iMCU rows in image */
+  JDIMENSION total_iMCU_rows;   /* # of iMCU rows in image */
   /* The coefficient controller's input and output progress is measured in
    * units of "iMCU" (interleaved MCU) rows.  These are the same as MCU rows
    * in fully interleaved JPEG scans, but are used whether the scan is
    * interleaved or not.  We define an iMCU row as v_samp_factor DCT block
    * rows of each component.  Therefore, the IDCT output contains
-   * v_samp_factor*DCT_scaled_size sample rows of a component per iMCU row.
+   * v_samp_factor*DCT_[v_]scaled_size sample rows of a component per iMCU row.
    */
 
   JSAMPLE * sample_range_limit; /* table for fast range-limiting */
@@ -593,19 +660,27 @@
    * They describe the components and MCUs actually appearing in the scan.
    * Note that the decompressor output side must not use these fields.
    */
-  int comps_in_scan;		/* # of JPEG components in this scan */
+  int comps_in_scan;            /* # of JPEG components in this scan */
   jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
   /* *cur_comp_info[i] describes component that appears i'th in SOS */
 
-  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
-  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+  JDIMENSION MCUs_per_row;      /* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;  /* # of MCU rows in the image */
 
-  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int blocks_in_MCU;            /* # of DCT blocks per MCU */
   int MCU_membership[D_MAX_BLOCKS_IN_MCU];
   /* MCU_membership[i] is index in cur_comp_info of component owning */
   /* i'th block in an MCU */
 
-  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+  int Ss, Se, Ah, Al;           /* progressive JPEG parameters for scan */
+
+#if JPEG_LIB_VERSION >= 80
+  /* These fields are derived from Se of first SOS marker.
+   */
+  int block_size;               /* the basic DCT block size: 1..16 */
+  const int * natural_order; /* natural-order position array for entropy decode */
+  int lim_Se;                   /* min( Se, DCTSIZE2-1 ) for entropy decode */
+#endif
 
   /* This field is shared between entropy decoder and marker parser.
    * It is either zero or the code of a JPEG marker that has been
@@ -642,17 +717,17 @@
 
 struct jpeg_error_mgr {
   /* Error exit handler: does not return to caller */
-  JMETHOD(void, error_exit, (j_common_ptr cinfo));
+  void (*error_exit) (j_common_ptr cinfo);
   /* Conditionally emit a trace or warning message */
-  JMETHOD(void, emit_message, (j_common_ptr cinfo, int msg_level));
+  void (*emit_message) (j_common_ptr cinfo, int msg_level);
   /* Routine that actually outputs a trace or error message */
-  JMETHOD(void, output_message, (j_common_ptr cinfo));
+  void (*output_message) (j_common_ptr cinfo);
   /* Format a message string for the most recent JPEG error or message */
-  JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer));
-#define JMSG_LENGTH_MAX  200	/* recommended size of format_message buffer */
+  void (*format_message) (j_common_ptr cinfo, char * buffer);
+#define JMSG_LENGTH_MAX  200    /* recommended size of format_message buffer */
   /* Reset error state variables at start of a new image */
-  JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo));
-  
+  void (*reset_error_mgr) (j_common_ptr cinfo);
+
   /* The message ID code and any parameters are saved here.
    * A message can have one string parameter or up to 8 int parameters.
    */
@@ -662,18 +737,18 @@
     int i[8];
     char s[JMSG_STR_PARM_MAX];
   } msg_parm;
-  
+
   /* Standard state variables for error facility */
-  
-  int trace_level;		/* max msg_level that will be displayed */
-  
+
+  int trace_level;              /* max msg_level that will be displayed */
+
   /* For recoverable corrupt-data errors, we emit a warning message,
    * but keep going unless emit_message chooses to abort.  emit_message
    * should count warnings in num_warnings.  The surrounding application
    * can check for bad data by seeing if num_warnings is nonzero at the
    * end of processing.
    */
-  long num_warnings;		/* number of corrupt-data warnings */
+  long num_warnings;            /* number of corrupt-data warnings */
 
   /* These fields point to the table(s) of error message strings.
    * An application can change the table pointer to switch to a different
@@ -691,32 +766,32 @@
    * It contains strings numbered first_addon_message..last_addon_message.
    */
   const char * const * addon_message_table; /* Non-library errors */
-  int first_addon_message;	/* code for first string in addon table */
-  int last_addon_message;	/* code for last string in addon table */
+  int first_addon_message;      /* code for first string in addon table */
+  int last_addon_message;       /* code for last string in addon table */
 };
 
 
 /* Progress monitor object */
 
 struct jpeg_progress_mgr {
-  JMETHOD(void, progress_monitor, (j_common_ptr cinfo));
+  void (*progress_monitor) (j_common_ptr cinfo);
 
-  long pass_counter;		/* work units completed in this pass */
-  long pass_limit;		/* total number of work units in this pass */
-  int completed_passes;		/* passes completed so far */
-  int total_passes;		/* total number of passes expected */
+  long pass_counter;            /* work units completed in this pass */
+  long pass_limit;              /* total number of work units in this pass */
+  int completed_passes;         /* passes completed so far */
+  int total_passes;             /* total number of passes expected */
 };
 
 
 /* Data destination object for compression */
 
 struct jpeg_destination_mgr {
-  JOCTET * next_output_byte;	/* => next byte to write in buffer */
-  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+  JOCTET * next_output_byte;    /* => next byte to write in buffer */
+  size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
 
-  JMETHOD(void, init_destination, (j_compress_ptr cinfo));
-  JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo));
-  JMETHOD(void, term_destination, (j_compress_ptr cinfo));
+  void (*init_destination) (j_compress_ptr cinfo);
+  boolean (*empty_output_buffer) (j_compress_ptr cinfo);
+  void (*term_destination) (j_compress_ptr cinfo);
 };
 
 
@@ -724,13 +799,13 @@
 
 struct jpeg_source_mgr {
   const JOCTET * next_input_byte; /* => next byte to read from buffer */
-  size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
+  size_t bytes_in_buffer;       /* # of bytes remaining in buffer */
 
-  JMETHOD(void, init_source, (j_decompress_ptr cinfo));
-  JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
-  JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes));
-  JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired));
-  JMETHOD(void, term_source, (j_decompress_ptr cinfo));
+  void (*init_source) (j_decompress_ptr cinfo);
+  boolean (*fill_input_buffer) (j_decompress_ptr cinfo);
+  void (*skip_input_data) (j_decompress_ptr cinfo, long num_bytes);
+  boolean (*resync_to_restart) (j_decompress_ptr cinfo, int desired);
+  void (*term_source) (j_decompress_ptr cinfo);
 };
 
 
@@ -745,9 +820,9 @@
  * successful.
  */
 
-#define JPOOL_PERMANENT	0	/* lasts until master record is destroyed */
-#define JPOOL_IMAGE	1	/* lasts until done with image/datastream */
-#define JPOOL_NUMPOOLS	2
+#define JPOOL_PERMANENT 0       /* lasts until master record is destroyed */
+#define JPOOL_IMAGE     1       /* lasts until done with image/datastream */
+#define JPOOL_NUMPOOLS  2
 
 typedef struct jvirt_sarray_control * jvirt_sarray_ptr;
 typedef struct jvirt_barray_control * jvirt_barray_ptr;
@@ -755,41 +830,32 @@
 
 struct jpeg_memory_mgr {
   /* Method pointers */
-  JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id,
-				size_t sizeofobject));
-  JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id,
-				     size_t sizeofobject));
-  JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id,
-				     JDIMENSION samplesperrow,
-				     JDIMENSION numrows));
-  JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id,
-				      JDIMENSION blocksperrow,
-				      JDIMENSION numrows));
-  JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo,
-						  int pool_id,
-						  boolean pre_zero,
-						  JDIMENSION samplesperrow,
-						  JDIMENSION numrows,
-						  JDIMENSION maxaccess));
-  JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo,
-						  int pool_id,
-						  boolean pre_zero,
-						  JDIMENSION blocksperrow,
-						  JDIMENSION numrows,
-						  JDIMENSION maxaccess));
-  JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo));
-  JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo,
-					   jvirt_sarray_ptr ptr,
-					   JDIMENSION start_row,
-					   JDIMENSION num_rows,
-					   boolean writable));
-  JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo,
-					    jvirt_barray_ptr ptr,
-					    JDIMENSION start_row,
-					    JDIMENSION num_rows,
-					    boolean writable));
-  JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id));
-  JMETHOD(void, self_destruct, (j_common_ptr cinfo));
+  void * (*alloc_small) (j_common_ptr cinfo, int pool_id, size_t sizeofobject);
+  void * (*alloc_large) (j_common_ptr cinfo, int pool_id,
+                         size_t sizeofobject);
+  JSAMPARRAY (*alloc_sarray) (j_common_ptr cinfo, int pool_id,
+                              JDIMENSION samplesperrow, JDIMENSION numrows);
+  JBLOCKARRAY (*alloc_barray) (j_common_ptr cinfo, int pool_id,
+                               JDIMENSION blocksperrow, JDIMENSION numrows);
+  jvirt_sarray_ptr (*request_virt_sarray) (j_common_ptr cinfo, int pool_id,
+                                           boolean pre_zero,
+                                           JDIMENSION samplesperrow,
+                                           JDIMENSION numrows,
+                                           JDIMENSION maxaccess);
+  jvirt_barray_ptr (*request_virt_barray) (j_common_ptr cinfo, int pool_id,
+                                           boolean pre_zero,
+                                           JDIMENSION blocksperrow,
+                                           JDIMENSION numrows,
+                                           JDIMENSION maxaccess);
+  void (*realize_virt_arrays) (j_common_ptr cinfo);
+  JSAMPARRAY (*access_virt_sarray) (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
+                                    JDIMENSION start_row, JDIMENSION num_rows,
+                                    boolean writable);
+  JBLOCKARRAY (*access_virt_barray) (j_common_ptr cinfo, jvirt_barray_ptr ptr,
+                                     JDIMENSION start_row, JDIMENSION num_rows,
+                                     boolean writable);
+  void (*free_pool) (j_common_ptr cinfo, int pool_id);
+  void (*self_destruct) (j_common_ptr cinfo);
 
   /* Limit on memory allocation for this JPEG object.  (Note that this is
    * merely advisory, not a guaranteed maximum; it only affects the space
@@ -806,83 +872,11 @@
 /* Routine signature for application-supplied marker processing methods.
  * Need not pass marker code since it is stored in cinfo->unread_marker.
  */
-typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
-
-
-/* Declarations for routines called by application.
- * The JPP macro hides prototype parameters from compilers that can't cope.
- * Note JPP requires double parentheses.
- */
-
-#ifdef HAVE_PROTOTYPES
-#define JPP(arglist)	arglist
-#else
-#define JPP(arglist)	()
-#endif
-
-
-/* Short forms of external names for systems with brain-damaged linkers.
- * We shorten external names to be unique in the first six letters, which
- * is good enough for all known systems.
- * (If your compiler itself needs names to be unique in less than 15 
- * characters, you are out of luck.  Get a better compiler.)
- */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jpeg_std_error		jStdError
-#define jpeg_CreateCompress	jCreaCompress
-#define jpeg_CreateDecompress	jCreaDecompress
-#define jpeg_destroy_compress	jDestCompress
-#define jpeg_destroy_decompress	jDestDecompress
-#define jpeg_stdio_dest		jStdDest
-#define jpeg_stdio_src		jStdSrc
-#define jpeg_set_defaults	jSetDefaults
-#define jpeg_set_colorspace	jSetColorspace
-#define jpeg_default_colorspace	jDefColorspace
-#define jpeg_set_quality	jSetQuality
-#define jpeg_set_linear_quality	jSetLQuality
-#define jpeg_add_quant_table	jAddQuantTable
-#define jpeg_quality_scaling	jQualityScaling
-#define jpeg_simple_progression	jSimProgress
-#define jpeg_suppress_tables	jSuppressTables
-#define jpeg_alloc_quant_table	jAlcQTable
-#define jpeg_alloc_huff_table	jAlcHTable
-#define jpeg_start_compress	jStrtCompress
-#define jpeg_write_scanlines	jWrtScanlines
-#define jpeg_finish_compress	jFinCompress
-#define jpeg_write_raw_data	jWrtRawData
-#define jpeg_write_marker	jWrtMarker
-#define jpeg_write_m_header	jWrtMHeader
-#define jpeg_write_m_byte	jWrtMByte
-#define jpeg_write_tables	jWrtTables
-#define jpeg_read_header	jReadHeader
-#define jpeg_start_decompress	jStrtDecompress
-#define jpeg_read_scanlines	jReadScanlines
-#define jpeg_finish_decompress	jFinDecompress
-#define jpeg_read_raw_data	jReadRawData
-#define jpeg_has_multiple_scans	jHasMultScn
-#define jpeg_start_output	jStrtOutput
-#define jpeg_finish_output	jFinOutput
-#define jpeg_input_complete	jInComplete
-#define jpeg_new_colormap	jNewCMap
-#define jpeg_consume_input	jConsumeInput
-#define jpeg_calc_output_dimensions	jCalcDimensions
-#define jpeg_save_markers	jSaveMarkers
-#define jpeg_set_marker_processor	jSetMarker
-#define jpeg_read_coefficients	jReadCoefs
-#define jpeg_write_coefficients	jWrtCoefs
-#define jpeg_copy_critical_parameters	jCopyCrit
-#define jpeg_abort_compress	jAbrtCompress
-#define jpeg_abort_decompress	jAbrtDecompress
-#define jpeg_abort		jAbort
-#define jpeg_destroy		jDestroy
-#define jpeg_resync_to_restart	jResyncRestart
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
+typedef boolean (*jpeg_marker_parser_method) (j_decompress_ptr cinfo);
 
 
 /* Default error-management setup */
-EXTERN(struct jpeg_error_mgr *) jpeg_std_error
-	JPP((struct jpeg_error_mgr * err));
+EXTERN(struct jpeg_error_mgr *) jpeg_std_error (struct jpeg_error_mgr * err);
 
 /* Initialization of JPEG compression objects.
  * jpeg_create_compress() and jpeg_create_decompress() are the exported
@@ -893,78 +887,88 @@
  */
 #define jpeg_create_compress(cinfo) \
     jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \
-			(size_t) sizeof(struct jpeg_compress_struct))
+                        (size_t) sizeof(struct jpeg_compress_struct))
 #define jpeg_create_decompress(cinfo) \
     jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
-			  (size_t) sizeof(struct jpeg_decompress_struct))
-EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo,
-				      int version, size_t structsize));
-EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo,
-					int version, size_t structsize));
+                          (size_t) sizeof(struct jpeg_decompress_struct))
+EXTERN(void) jpeg_CreateCompress (j_compress_ptr cinfo, int version,
+                                  size_t structsize);
+EXTERN(void) jpeg_CreateDecompress (j_decompress_ptr cinfo, int version,
+                                    size_t structsize);
 /* Destruction of JPEG compression objects */
-EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_destroy_compress (j_compress_ptr cinfo);
+EXTERN(void) jpeg_destroy_decompress (j_decompress_ptr cinfo);
 
 /* Standard data source and destination managers: stdio streams. */
 /* Caller is responsible for opening the file before and closing after. */
-EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
-EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
+EXTERN(void) jpeg_stdio_dest (j_compress_ptr cinfo, FILE * outfile);
+EXTERN(void) jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile);
+
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
+/* Data source and destination managers: memory buffers. */
+EXTERN(void) jpeg_mem_dest (j_compress_ptr cinfo, unsigned char ** outbuffer,
+                            unsigned long * outsize);
+EXTERN(void) jpeg_mem_src (j_decompress_ptr cinfo, unsigned char * inbuffer,
+                           unsigned long insize);
+#endif
 
 /* Default parameter setup for compression */
-EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_set_defaults (j_compress_ptr cinfo);
 /* Compression parameter setup aids */
-EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo,
-				      J_COLOR_SPACE colorspace));
-EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
-				   boolean force_baseline));
-EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
-					  int scale_factor,
-					  boolean force_baseline));
-EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
-				       const unsigned int *basic_table,
-				       int scale_factor,
-				       boolean force_baseline));
-EXTERN(int) jpeg_quality_scaling JPP((int quality));
-EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo,
-				       boolean suppress));
-EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo));
-EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_set_colorspace (j_compress_ptr cinfo,
+                                  J_COLOR_SPACE colorspace);
+EXTERN(void) jpeg_default_colorspace (j_compress_ptr cinfo);
+EXTERN(void) jpeg_set_quality (j_compress_ptr cinfo, int quality,
+                               boolean force_baseline);
+EXTERN(void) jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
+                                      boolean force_baseline);
+#if JPEG_LIB_VERSION >= 70
+EXTERN(void) jpeg_default_qtables (j_compress_ptr cinfo,
+                                   boolean force_baseline);
+#endif
+EXTERN(void) jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
+                                   const unsigned int *basic_table,
+                                   int scale_factor, boolean force_baseline);
+EXTERN(int) jpeg_quality_scaling (int quality);
+EXTERN(void) jpeg_simple_progression (j_compress_ptr cinfo);
+EXTERN(void) jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress);
+EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table (j_common_ptr cinfo);
+EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table (j_common_ptr cinfo);
 
 /* Main entry points for compression */
-EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo,
-				      boolean write_all_tables));
-EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
-					     JSAMPARRAY scanlines,
-					     JDIMENSION num_lines));
-EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_start_compress (j_compress_ptr cinfo,
+                                  boolean write_all_tables);
+EXTERN(JDIMENSION) jpeg_write_scanlines (j_compress_ptr cinfo,
+                                         JSAMPARRAY scanlines,
+                                         JDIMENSION num_lines);
+EXTERN(void) jpeg_finish_compress (j_compress_ptr cinfo);
+
+#if JPEG_LIB_VERSION >= 70
+/* Precalculate JPEG dimensions for current compression parameters. */
+EXTERN(void) jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo);
+#endif
 
 /* Replaces jpeg_write_scanlines when writing raw downsampled data. */
-EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
-					    JSAMPIMAGE data,
-					    JDIMENSION num_lines));
+EXTERN(JDIMENSION) jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data,
+                                        JDIMENSION num_lines);
 
-/* Write a special marker.  See libjpeg.doc concerning safe usage. */
-EXTERN(void) jpeg_write_marker
-	JPP((j_compress_ptr cinfo, int marker,
-	     const JOCTET * dataptr, unsigned int datalen));
+/* Write a special marker.  See libjpeg.txt concerning safe usage. */
+EXTERN(void) jpeg_write_marker (j_compress_ptr cinfo, int marker,
+                                const JOCTET * dataptr, unsigned int datalen);
 /* Same, but piecemeal. */
-EXTERN(void) jpeg_write_m_header
-	JPP((j_compress_ptr cinfo, int marker, unsigned int datalen));
-EXTERN(void) jpeg_write_m_byte
-	JPP((j_compress_ptr cinfo, int val));
+EXTERN(void) jpeg_write_m_header (j_compress_ptr cinfo, int marker,
+                                  unsigned int datalen);
+EXTERN(void) jpeg_write_m_byte (j_compress_ptr cinfo, int val);
 
 /* Alternate compression function: just write an abbreviated table file */
-EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_write_tables (j_compress_ptr cinfo);
 
 /* Decompression startup: read start of JPEG datastream to see what's there */
-EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo,
-				  boolean require_image));
+EXTERN(int) jpeg_read_header (j_decompress_ptr cinfo, boolean require_image);
 /* Return value is one of: */
-#define JPEG_SUSPENDED		0 /* Suspended due to lack of input data */
-#define JPEG_HEADER_OK		1 /* Found valid image datastream */
-#define JPEG_HEADER_TABLES_ONLY	2 /* Found valid table-specs-only datastream */
+#define JPEG_SUSPENDED          0 /* Suspended due to lack of input data */
+#define JPEG_HEADER_OK          1 /* Found valid image datastream */
+#define JPEG_HEADER_TABLES_ONLY 2 /* Found valid table-specs-only datastream */
 /* If you pass require_image = TRUE (normal case), you need not check for
  * a TABLES_ONLY return code; an abbreviated file will cause an error exit.
  * JPEG_SUSPENDED is only possible if you use a data source module that can
@@ -972,51 +976,51 @@
  */
 
 /* Main entry points for decompression */
-EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo));
-EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
-					    JSAMPARRAY scanlines,
-					    JDIMENSION max_lines));
-EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_start_decompress (j_decompress_ptr cinfo);
+EXTERN(JDIMENSION) jpeg_read_scanlines (j_decompress_ptr cinfo,
+                                        JSAMPARRAY scanlines,
+                                        JDIMENSION max_lines);
+EXTERN(boolean) jpeg_finish_decompress (j_decompress_ptr cinfo);
 
 /* Replaces jpeg_read_scanlines when reading raw downsampled data. */
-EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo,
-					   JSAMPIMAGE data,
-					   JDIMENSION max_lines));
+EXTERN(JDIMENSION) jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
+                                       JDIMENSION max_lines);
 
 /* Additional entry points for buffered-image mode. */
-EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo));
-EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo,
-				       int scan_number));
-EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo));
-EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo));
-EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo));
-EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_has_multiple_scans (j_decompress_ptr cinfo);
+EXTERN(boolean) jpeg_start_output (j_decompress_ptr cinfo, int scan_number);
+EXTERN(boolean) jpeg_finish_output (j_decompress_ptr cinfo);
+EXTERN(boolean) jpeg_input_complete (j_decompress_ptr cinfo);
+EXTERN(void) jpeg_new_colormap (j_decompress_ptr cinfo);
+EXTERN(int) jpeg_consume_input (j_decompress_ptr cinfo);
 /* Return value is one of: */
-/* #define JPEG_SUSPENDED	0    Suspended due to lack of input data */
-#define JPEG_REACHED_SOS	1 /* Reached start of new scan */
-#define JPEG_REACHED_EOI	2 /* Reached end of image */
-#define JPEG_ROW_COMPLETED	3 /* Completed one iMCU row */
-#define JPEG_SCAN_COMPLETED	4 /* Completed last iMCU row of a scan */
+/* #define JPEG_SUSPENDED       0    Suspended due to lack of input data */
+#define JPEG_REACHED_SOS        1 /* Reached start of new scan */
+#define JPEG_REACHED_EOI        2 /* Reached end of image */
+#define JPEG_ROW_COMPLETED      3 /* Completed one iMCU row */
+#define JPEG_SCAN_COMPLETED     4 /* Completed last iMCU row of a scan */
 
 /* Precalculate output dimensions for current decompression parameters. */
-EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo));
+#if JPEG_LIB_VERSION >= 80
+EXTERN(void) jpeg_core_output_dimensions (j_decompress_ptr cinfo);
+#endif
+EXTERN(void) jpeg_calc_output_dimensions (j_decompress_ptr cinfo);
 
 /* Control saving of COM and APPn markers into marker_list. */
-EXTERN(void) jpeg_save_markers
-	JPP((j_decompress_ptr cinfo, int marker_code,
-	     unsigned int length_limit));
+EXTERN(void) jpeg_save_markers (j_decompress_ptr cinfo, int marker_code,
+                                unsigned int length_limit);
 
 /* Install a special processing method for COM or APPn markers. */
-EXTERN(void) jpeg_set_marker_processor
-	JPP((j_decompress_ptr cinfo, int marker_code,
-	     jpeg_marker_parser_method routine));
+EXTERN(void) jpeg_set_marker_processor (j_decompress_ptr cinfo,
+                                        int marker_code,
+                                        jpeg_marker_parser_method routine);
 
 /* Read or write raw DCT coefficients --- useful for lossless transcoding. */
-EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
-EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
-					  jvirt_barray_ptr * coef_arrays));
-EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
-						j_compress_ptr dstinfo));
+EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients (j_decompress_ptr cinfo);
+EXTERN(void) jpeg_write_coefficients (j_compress_ptr cinfo,
+                                      jvirt_barray_ptr * coef_arrays);
+EXTERN(void) jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
+                                            j_compress_ptr dstinfo);
 
 /* If you choose to abort compression or decompression before completing
  * jpeg_finish_(de)compress, then you need to clean up to release memory,
@@ -1024,28 +1028,27 @@
  * if you're done with the JPEG object, but if you want to clean it up and
  * reuse it, call this:
  */
-EXTERN(void) jpeg_abort_compress JPP((j_compress_ptr cinfo));
-EXTERN(void) jpeg_abort_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_abort_compress (j_compress_ptr cinfo);
+EXTERN(void) jpeg_abort_decompress (j_decompress_ptr cinfo);
 
 /* Generic versions of jpeg_abort and jpeg_destroy that work on either
  * flavor of JPEG object.  These may be more convenient in some places.
  */
-EXTERN(void) jpeg_abort JPP((j_common_ptr cinfo));
-EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_abort (j_common_ptr cinfo);
+EXTERN(void) jpeg_destroy (j_common_ptr cinfo);
 
 /* Default restart-marker-resync procedure for use by data source modules */
-EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
-					    int desired));
+EXTERN(boolean) jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired);
 
 
 /* These marker codes are exported since applications and data source modules
  * are likely to want to use them.
  */
 
-#define JPEG_RST0	0xD0	/* RST0 marker code */
-#define JPEG_EOI	0xD9	/* EOI marker code */
-#define JPEG_APP0	0xE0	/* APP0 marker code */
-#define JPEG_COM	0xFE	/* COM marker code */
+#define JPEG_RST0       0xD0    /* RST0 marker code */
+#define JPEG_EOI        0xD9    /* EOI marker code */
+#define JPEG_APP0       0xE0    /* APP0 marker code */
+#define JPEG_COM        0xFE    /* COM marker code */
 
 
 /* If we have a brain-damaged compiler that emits warnings (or worse, errors)
@@ -1054,7 +1057,7 @@
  */
 
 #ifdef INCOMPLETE_TYPES_BROKEN
-#ifndef JPEG_INTERNALS		/* will be defined in jpegint.h */
+#ifndef JPEG_INTERNALS          /* will be defined in jpegint.h */
 struct jvirt_sarray_control { long dummy; };
 struct jvirt_barray_control { long dummy; };
 struct jpeg_comp_master { long dummy; };
@@ -1089,8 +1092,14 @@
  */
 
 #ifdef JPEG_INTERNALS
-#include "jpegint.h"		/* fetch private declarations */
-#include "jerror.h"		/* fetch error codes too */
+#include "jpegint.h"            /* fetch private declarations */
+#include "jerror.h"             /* fetch error codes too */
+#endif
+
+#ifdef __cplusplus
+#ifndef DONT_USE_EXTERN_C
+}
+#endif
 #endif
 
 #endif /* JPEGLIB_H */
diff --git a/jpegtran.1 b/jpegtran.1
index 6de18e2..b6a3e56 100644
--- a/jpegtran.1
+++ b/jpegtran.1
@@ -1,4 +1,4 @@
-.TH JPEGTRAN 1 "3 August 1997"
+.TH JPEGTRAN 1 "1 January 2013"
 .SH NAME
 jpegtran \- lossless transformation of JPEG files
 .SH SYNOPSIS
@@ -60,6 +60,9 @@
 Emit a JPEG restart marker every N MCU rows, or every N MCU blocks if "B" is
 attached to the number.
 .TP
+.B \-arithmetic
+Use arithmetic coding.
+.TP
 .BI \-scans " file"
 Use the scan script given in the specified text file.
 .PP
@@ -117,7 +120,7 @@
 .TP
 .B \-trim
 Drop non-transformable edge blocks.
-.PP
+.IP
 Obviously, a transformation with
 .B \-trim
 is not reversible, so strictly speaking
@@ -130,12 +133,38 @@
 followed by
 .B \-rot 180 -trim
 trims both edges.
+.TP
+.B \-perfect
+If you are only interested in perfect transformations, add the
+.B \-perfect
+switch.  This causes
+.B jpegtran
+to fail with an error if the transformation is not perfect.
+.IP
+For example, you may want to do
+.IP
+.B (jpegtran \-rot 90 -perfect
+.I foo.jpg
+.B || djpeg
+.I foo.jpg
+.B | pnmflip \-r90 | cjpeg)
+.IP
+to do a perfect rotation, if available, or an approximated one if not.
+.TP
+.B \-crop WxH+X+Y
+Crop the image to a rectangular region of width W and height H, starting at
+point X,Y.  The lossless crop feature discards data outside of a given image
+region but losslessly preserves what is inside.  Like the rotate and flip
+transforms, lossless crop is restricted by the current JPEG format; the upper
+left corner of the selected region must fall on an iMCU boundary.  If it
+doesn't, then it is silently moved up and/or left to the nearest iMCU boundary
+(the lower right corner is unchanged.)
 .PP
-Another not-strictly-lossless transformation switch is:
+Other not-strictly-lossless transformation switches are:
 .TP
 .B \-grayscale
 Force grayscale output.
-.PP
+.IP
 This option discards the chrominance channels if the input image is YCbCr
 (ie, a standard color JPEG), resulting in a grayscale JPEG file.  The
 luminance channel is preserved exactly, so this is a better method of reducing
@@ -154,20 +183,16 @@
 comments and other excess baggage present in the source file.
 .TP
 .B \-copy comments
-Copy only comment markers.  This setting copies comments from the source file,
-but discards any other inessential data.
+Copy only comment markers.  This setting copies comments from the source file
+but discards any other data that is inessential for image display.
 .TP
 .B \-copy all
 Copy all extra markers.  This setting preserves miscellaneous markers
-found in the source file, such as JFIF thumbnails and Photoshop settings.
-In some files these extra markers can be sizable.
+found in the source file, such as JFIF thumbnails, Exif data, and Photoshop
+settings.  In some files, these extra markers can be sizable.
 .PP
-The default behavior is
-.BR "\-copy comments" .
-(Note: in IJG releases v6 and v6a,
-.B jpegtran
-always did the equivalent of
-.BR "\-copy none" .)
+The default behavior is \fB-copy comments\fR.  (Note: in IJG releases v6 and
+v6a, \fBjpegtran\fR always did the equivalent of \fB-copy none\fR.)
 .PP
 Additional switches recognized by jpegtran are:
 .TP
@@ -226,12 +251,15 @@
 Communications of the ACM, April 1991 (vol. 34, no. 4), pp. 30-44.
 .SH AUTHOR
 Independent JPEG Group
-.SH BUGS
-Arithmetic coding is not supported for legal reasons.
 .PP
+This file was modified by The libjpeg-turbo Project to include only information
+relevant to libjpeg-turbo and to wordsmith certain sections.
+.SH BUGS
 The transform options can't transform odd-size images perfectly.  Use
 .B \-trim
-if you don't like the results without it.
+or
+.B \-perfect
+if you don't like the results.
 .PP
 The entire image is read into memory and then written out again, even in
 cases where this isn't really necessary.  Expect swapping on large images,
diff --git a/jpegtran.c b/jpegtran.c
index 20ef111..6b40de3 100644
--- a/jpegtran.c
+++ b/jpegtran.c
@@ -1,27 +1,30 @@
 /*
  * jpegtran.c
  *
- * Copyright (C) 1995-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1995-2010, Thomas G. Lane, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains a command-line user interface for JPEG transcoding.
- * It is very similar to cjpeg.c, but provides lossless transcoding between
- * different JPEG file formats.  It also provides some lossless and sort-of-
- * lossless transformations of JPEG data.
+ * It is very similar to cjpeg.c, and partly to djpeg.c, but provides
+ * lossless transcoding between different JPEG file formats.  It also
+ * provides some lossless and sort-of-lossless transformations of JPEG data.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
-#include "transupp.h"		/* Support routines for jpegtran */
-#include "jversion.h"		/* for version message */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include "transupp.h"           /* Support routines for jpegtran */
+#include "jversion.h"           /* for version message */
+#include "jconfigint.h"
 
-#ifdef USE_CCOMMAND		/* command-line reader for Macintosh */
+#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
 #ifdef __MWERKS__
 #include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>		/* ... and this */
+#include <console.h>            /* ... and this */
 #endif
 #ifdef THINK_C
-#include <console.h>		/* Think declares it here */
+#include <console.h>            /* Think declares it here */
 #endif
 #endif
 
@@ -35,9 +38,9 @@
  */
 
 
-static const char * progname;	/* program name for error messages */
-static char * outfilename;	/* for -outfile switch */
-static JCOPY_OPTION copyoption;	/* -copy switch */
+static const char * progname;   /* program name for error messages */
+static char * outfilename;      /* for -outfile switch */
+static JCOPY_OPTION copyoption; /* -copy switch */
 static jpeg_transform_info transformoption; /* image transformation options */
 
 
@@ -62,24 +65,28 @@
 #ifdef C_PROGRESSIVE_SUPPORTED
   fprintf(stderr, "  -progressive   Create progressive JPEG file\n");
 #endif
-#if TRANSFORMS_SUPPORTED
   fprintf(stderr, "Switches for modifying the image:\n");
+#if TRANSFORMS_SUPPORTED
+  fprintf(stderr, "  -crop WxH+X+Y  Crop to a rectangular subarea\n");
   fprintf(stderr, "  -grayscale     Reduce to grayscale (omit color data)\n");
   fprintf(stderr, "  -flip [horizontal|vertical]  Mirror image (left-right or top-bottom)\n");
+  fprintf(stderr, "  -perfect       Fail if there is non-transformable edge blocks\n");
   fprintf(stderr, "  -rotate [90|180|270]         Rotate image (degrees clockwise)\n");
+#endif
+#if TRANSFORMS_SUPPORTED
   fprintf(stderr, "  -transpose     Transpose image\n");
   fprintf(stderr, "  -transverse    Transverse transpose image\n");
   fprintf(stderr, "  -trim          Drop non-transformable edge blocks\n");
-#endif /* TRANSFORMS_SUPPORTED */
+#endif
   fprintf(stderr, "Switches for advanced users:\n");
+#ifdef C_ARITH_CODING_SUPPORTED
+  fprintf(stderr, "  -arithmetic    Use arithmetic coding\n");
+#endif
   fprintf(stderr, "  -restart N     Set restart interval in rows, or in blocks with B\n");
   fprintf(stderr, "  -maxmemory N   Maximum memory to use (in kbytes)\n");
   fprintf(stderr, "  -outfile name  Specify name for output file\n");
   fprintf(stderr, "  -verbose  or  -debug   Emit debug output\n");
   fprintf(stderr, "Switches for wizards:\n");
-#ifdef C_ARITH_CODING_SUPPORTED
-  fprintf(stderr, "  -arithmetic    Use arithmetic coding\n");
-#endif
 #ifdef C_MULTISCAN_FILES_SUPPORTED
   fprintf(stderr, "  -scans file    Create multi-scan JPEG per script file\n");
 #endif
@@ -99,12 +106,12 @@
     transformoption.transform = transform;
   } else {
     fprintf(stderr, "%s: can only do one image transformation at a time\n",
-	    progname);
+            progname);
     usage();
   }
 #else
   fprintf(stderr, "%s: sorry, image transformation was not compiled\n",
-	  progname);
+          progname);
   exit(EXIT_FAILURE);
 #endif
 }
@@ -112,7 +119,7 @@
 
 LOCAL(int)
 parse_switches (j_compress_ptr cinfo, int argc, char **argv,
-		int last_file_arg_seen, boolean for_real)
+                int last_file_arg_seen, boolean for_real)
 /* Parse optional switches.
  * Returns argv[] index of first file-name argument (== argc if none).
  * Any file names with indexes <= last_file_arg_seen are ignored;
@@ -125,15 +132,18 @@
   int argn;
   char * arg;
   boolean simple_progressive;
-  char * scansarg = NULL;	/* saves -scans parm if any */
+  char * scansarg = NULL;       /* saves -scans parm if any */
 
   /* Set up default JPEG parameters. */
   simple_progressive = FALSE;
   outfilename = NULL;
   copyoption = JCOPYOPT_DEFAULT;
   transformoption.transform = JXFORM_NONE;
+  transformoption.perfect = FALSE;
   transformoption.trim = FALSE;
   transformoption.force_grayscale = FALSE;
+  transformoption.crop = FALSE;
+  transformoption.slow_hflip = FALSE;
   cinfo->err->trace_level = 0;
 
   /* Scan command line options, adjust parameters */
@@ -143,12 +153,12 @@
     if (*arg != '-') {
       /* Not a switch, must be a file name argument */
       if (argn <= last_file_arg_seen) {
-	outfilename = NULL;	/* -outfile applies to just one input file */
-	continue;		/* ignore this name if previously processed */
+        outfilename = NULL;     /* -outfile applies to just one input file */
+        continue;               /* ignore this name if previously processed */
       }
-      break;			/* else done parsing switches */
+      break;                    /* else done parsing switches */
     }
-    arg++;			/* advance past switch marker character */
+    arg++;                      /* advance past switch marker character */
 
     if (keymatch(arg, "arithmetic", 1)) {
       /* Use arithmetic coding. */
@@ -156,22 +166,36 @@
       cinfo->arith_code = TRUE;
 #else
       fprintf(stderr, "%s: sorry, arithmetic coding not supported\n",
-	      progname);
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
-    } else if (keymatch(arg, "copy", 1)) {
+    } else if (keymatch(arg, "copy", 2)) {
       /* Select which extra markers to copy. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (keymatch(argv[argn], "none", 1)) {
-	copyoption = JCOPYOPT_NONE;
+        copyoption = JCOPYOPT_NONE;
       } else if (keymatch(argv[argn], "comments", 1)) {
-	copyoption = JCOPYOPT_COMMENTS;
+        copyoption = JCOPYOPT_COMMENTS;
       } else if (keymatch(argv[argn], "all", 1)) {
-	copyoption = JCOPYOPT_ALL;
+        copyoption = JCOPYOPT_ALL;
       } else
-	usage();
+        usage();
+
+    } else if (keymatch(arg, "crop", 2)) {
+      /* Perform lossless cropping. */
+#if TRANSFORMS_SUPPORTED
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      if (! jtransform_parse_crop_spec(&transformoption, argv[argn])) {
+        fprintf(stderr, "%s: bogus -crop argument '%s'\n",
+                progname, argv[argn]);
+        exit(EXIT_FAILURE);
+      }
+#else
+      select_transform(JXFORM_NONE);    /* force an error */
+#endif
 
     } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) {
       /* Enable debug printouts. */
@@ -179,29 +203,32 @@
       static boolean printed_version = FALSE;
 
       if (! printed_version) {
-	fprintf(stderr, "Independent JPEG Group's JPEGTRAN, version %s\n%s\n",
-		JVERSION, JCOPYRIGHT);
-	printed_version = TRUE;
+        fprintf(stderr, "%s version %s (build %s)\n",
+                PACKAGE_NAME, VERSION, BUILD);
+        fprintf(stderr, "%s\n\n", JCOPYRIGHT);
+        fprintf(stderr, "Emulating The Independent JPEG Group's software, version %s\n\n",
+                JVERSION);
+        printed_version = TRUE;
       }
       cinfo->err->trace_level++;
 
     } else if (keymatch(arg, "flip", 1)) {
       /* Mirror left-right or top-bottom. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (keymatch(argv[argn], "horizontal", 1))
-	select_transform(JXFORM_FLIP_H);
+        select_transform(JXFORM_FLIP_H);
       else if (keymatch(argv[argn], "vertical", 1))
-	select_transform(JXFORM_FLIP_V);
+        select_transform(JXFORM_FLIP_V);
       else
-	usage();
+        usage();
 
     } else if (keymatch(arg, "grayscale", 1) || keymatch(arg, "greyscale",1)) {
       /* Force to grayscale. */
 #if TRANSFORMS_SUPPORTED
       transformoption.force_grayscale = TRUE;
 #else
-      select_transform(JXFORM_NONE);	/* force an error */
+      select_transform(JXFORM_NONE);    /* force an error */
 #endif
 
     } else if (keymatch(arg, "maxmemory", 3)) {
@@ -209,12 +236,12 @@
       long lval;
       char ch = 'x';
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
-	usage();
+        usage();
       if (ch == 'm' || ch == 'M')
-	lval *= 1000L;
+        lval *= 1000L;
       cinfo->mem->max_memory_to_use = lval * 1000L;
 
     } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) {
@@ -223,24 +250,29 @@
       cinfo->optimize_coding = TRUE;
 #else
       fprintf(stderr, "%s: sorry, entropy optimization was not compiled\n",
-	      progname);
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
     } else if (keymatch(arg, "outfile", 4)) {
       /* Set output file name. */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
-      outfilename = argv[argn];	/* save it away for later use */
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
+      outfilename = argv[argn]; /* save it away for later use */
 
-    } else if (keymatch(arg, "progressive", 1)) {
+    } else if (keymatch(arg, "perfect", 2)) {
+      /* Fail if there is any partial edge MCUs that the transform can't
+       * handle. */
+      transformoption.perfect = TRUE;
+
+    } else if (keymatch(arg, "progressive", 2)) {
       /* Select simple progressive mode. */
 #ifdef C_PROGRESSIVE_SUPPORTED
       simple_progressive = TRUE;
       /* We must postpone execution until num_components is known. */
 #else
       fprintf(stderr, "%s: sorry, progressive output was not compiled\n",
-	      progname);
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
@@ -249,43 +281,43 @@
       long lval;
       char ch = 'x';
 
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1)
-	usage();
+        usage();
       if (lval < 0 || lval > 65535L)
-	usage();
+        usage();
       if (ch == 'b' || ch == 'B') {
-	cinfo->restart_interval = (unsigned int) lval;
-	cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */
+        cinfo->restart_interval = (unsigned int) lval;
+        cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */
       } else {
-	cinfo->restart_in_rows = (int) lval;
-	/* restart_interval will be computed during startup */
+        cinfo->restart_in_rows = (int) lval;
+        /* restart_interval will be computed during startup */
       }
 
     } else if (keymatch(arg, "rotate", 2)) {
       /* Rotate 90, 180, or 270 degrees (measured clockwise). */
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       if (keymatch(argv[argn], "90", 2))
-	select_transform(JXFORM_ROT_90);
+        select_transform(JXFORM_ROT_90);
       else if (keymatch(argv[argn], "180", 3))
-	select_transform(JXFORM_ROT_180);
+        select_transform(JXFORM_ROT_180);
       else if (keymatch(argv[argn], "270", 3))
-	select_transform(JXFORM_ROT_270);
+        select_transform(JXFORM_ROT_270);
       else
-	usage();
+        usage();
 
     } else if (keymatch(arg, "scans", 1)) {
       /* Set scan script. */
 #ifdef C_MULTISCAN_FILES_SUPPORTED
-      if (++argn >= argc)	/* advance to next argument */
-	usage();
+      if (++argn >= argc)       /* advance to next argument */
+        usage();
       scansarg = argv[argn];
       /* We must postpone reading the file in case -progressive appears. */
 #else
       fprintf(stderr, "%s: sorry, multi-scan output was not compiled\n",
-	      progname);
+              progname);
       exit(EXIT_FAILURE);
 #endif
 
@@ -302,7 +334,7 @@
       transformoption.trim = TRUE;
 
     } else {
-      usage();			/* bogus switch */
+      usage();                  /* bogus switch */
     }
   }
 
@@ -311,18 +343,18 @@
   if (for_real) {
 
 #ifdef C_PROGRESSIVE_SUPPORTED
-    if (simple_progressive)	/* process -progressive; -scans can override */
+    if (simple_progressive)     /* process -progressive; -scans can override */
       jpeg_simple_progression(cinfo);
 #endif
 
 #ifdef C_MULTISCAN_FILES_SUPPORTED
-    if (scansarg != NULL)	/* process -scans if it was present */
+    if (scansarg != NULL)       /* process -scans if it was present */
       if (! read_scan_script(cinfo, scansarg))
-	usage();
+        usage();
 #endif
   }
 
-  return argn;			/* return index of next arg (file name) */
+  return argn;                  /* return index of next arg (file name) */
 }
 
 
@@ -342,8 +374,10 @@
   jvirt_barray_ptr * src_coef_arrays;
   jvirt_barray_ptr * dst_coef_arrays;
   int file_index;
-  FILE * input_file;
-  FILE * output_file;
+  /* We assume all-in-memory processing and can therefore use only a
+   * single file pointer for sequential input and output operation.
+   */
+  FILE * fp;
 
   /* On Mac, fetch a command line. */
 #ifdef USE_CCOMMAND
@@ -352,7 +386,7 @@
 
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
-    progname = "jpegtran";	/* in case C library doesn't provide it */
+    progname = "jpegtran";      /* in case C library doesn't provide it */
 
   /* Initialize the JPEG decompression object with default error handling. */
   srcinfo.err = jpeg_std_error(&jsrcerr);
@@ -361,13 +395,6 @@
   dstinfo.err = jpeg_std_error(&jdsterr);
   jpeg_create_compress(&dstinfo);
 
-  /* Now safe to enable signal catcher.
-   * Note: we assume only the decompression object will have virtual arrays.
-   */
-#ifdef NEED_SIGNAL_CATCHER
-  enable_signal_catcher((j_common_ptr) &srcinfo);
-#endif
-
   /* Scan command line to find file names.
    * It is convenient to use just one switch-parsing routine, but the switch
    * values read here are mostly ignored; we will rescan the switches after
@@ -385,14 +412,14 @@
   if (outfilename == NULL) {
     if (file_index != argc-2) {
       fprintf(stderr, "%s: must name one input and one output file\n",
-	      progname);
+              progname);
       usage();
     }
     outfilename = argv[file_index+1];
   } else {
     if (file_index != argc-1) {
       fprintf(stderr, "%s: must name one input and one output file\n",
-	      progname);
+              progname);
       usage();
     }
   }
@@ -406,24 +433,13 @@
 
   /* Open the input file. */
   if (file_index < argc) {
-    if ((input_file = fopen(argv[file_index], READ_BINARY)) == NULL) {
-      fprintf(stderr, "%s: can't open %s\n", progname, argv[file_index]);
+    if ((fp = fopen(argv[file_index], READ_BINARY)) == NULL) {
+      fprintf(stderr, "%s: can't open %s for reading\n", progname, argv[file_index]);
       exit(EXIT_FAILURE);
     }
   } else {
     /* default input file is stdin */
-    input_file = read_stdin();
-  }
-
-  /* Open the output file. */
-  if (outfilename != NULL) {
-    if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) {
-      fprintf(stderr, "%s: can't open %s\n", progname, outfilename);
-      exit(EXIT_FAILURE);
-    }
-  } else {
-    /* default output file is stdout */
-    output_file = write_stdout();
+    fp = read_stdin();
   }
 
 #ifdef PROGRESS_REPORT
@@ -431,7 +447,7 @@
 #endif
 
   /* Specify data source for decompression */
-  jpeg_stdio_src(&srcinfo, input_file);
+  jpeg_stdio_src(&srcinfo, fp);
 
   /* Enable saving of extra markers that we want to copy */
   jcopy_markers_setup(&srcinfo, copyoption);
@@ -443,7 +459,12 @@
    * jpeg_read_coefficients so that memory allocation will be done right.
    */
 #if TRANSFORMS_SUPPORTED
-  jtransform_request_workspace(&srcinfo, &transformoption);
+  /* Fail right away if -perfect is given and transformation is not perfect.
+   */
+  if (!jtransform_request_workspace(&srcinfo, &transformoption)) {
+    fprintf(stderr, "%s: transformation is not perfect\n", progname);
+    exit(EXIT_FAILURE);
+  }
 #endif
 
   /* Read source file as DCT coefficients */
@@ -457,17 +478,38 @@
    */
 #if TRANSFORMS_SUPPORTED
   dst_coef_arrays = jtransform_adjust_parameters(&srcinfo, &dstinfo,
-						 src_coef_arrays,
-						 &transformoption);
+                                                 src_coef_arrays,
+                                                 &transformoption);
 #else
   dst_coef_arrays = src_coef_arrays;
 #endif
 
+  /* Close input file, if we opened it.
+   * Note: we assume that jpeg_read_coefficients consumed all input
+   * until JPEG_REACHED_EOI, and that jpeg_finish_decompress will
+   * only consume more while (! cinfo->inputctl->eoi_reached).
+   * We cannot call jpeg_finish_decompress here since we still need the
+   * virtual arrays allocated from the source object for processing.
+   */
+  if (fp != stdin)
+    fclose(fp);
+
+  /* Open the output file. */
+  if (outfilename != NULL) {
+    if ((fp = fopen(outfilename, WRITE_BINARY)) == NULL) {
+      fprintf(stderr, "%s: can't open %s for writing\n", progname, outfilename);
+      exit(EXIT_FAILURE);
+    }
+  } else {
+    /* default output file is stdout */
+    fp = write_stdout();
+  }
+
   /* Adjust default compression parameters by re-parsing the options */
   file_index = parse_switches(&dstinfo, argc, argv, 0, TRUE);
 
   /* Specify data destination for compression */
-  jpeg_stdio_dest(&dstinfo, output_file);
+  jpeg_stdio_dest(&dstinfo, fp);
 
   /* Start compressor (note no image data is actually written here) */
   jpeg_write_coefficients(&dstinfo, dst_coef_arrays);
@@ -478,8 +520,8 @@
   /* Execute image transformation, if any */
 #if TRANSFORMS_SUPPORTED
   jtransform_execute_transformation(&srcinfo, &dstinfo,
-				    src_coef_arrays,
-				    &transformoption);
+                                    src_coef_arrays,
+                                    &transformoption);
 #endif
 
   /* Finish compression and release memory */
@@ -488,11 +530,9 @@
   (void) jpeg_finish_decompress(&srcinfo);
   jpeg_destroy_decompress(&srcinfo);
 
-  /* Close files, if we opened them */
-  if (input_file != stdin)
-    fclose(input_file);
-  if (output_file != stdout)
-    fclose(output_file);
+  /* Close output file, if we opened it */
+  if (fp != stdout)
+    fclose(fp);
 
 #ifdef PROGRESS_REPORT
   end_progress_monitor((j_common_ptr) &dstinfo);
@@ -500,5 +540,5 @@
 
   /* All done. */
   exit(jsrcerr.num_warnings + jdsterr.num_warnings ?EXIT_WARNING:EXIT_SUCCESS);
-  return 0;			/* suppress no-return-value warnings */
+  return 0;                     /* suppress no-return-value warnings */
 }
diff --git a/jquant1.c b/jquant1.c
index b2f96aa..0e25354 100644
--- a/jquant1.c
+++ b/jquant1.c
@@ -1,8 +1,10 @@
 /*
  * jquant1.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009, D. R. Commander
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains 1-pass color quantization (color mapping) routines.
@@ -68,9 +70,9 @@
  * table in both directions.
  */
 
-#define ODITHER_SIZE  16	/* dimension of dither matrix */
+#define ODITHER_SIZE  16        /* dimension of dither matrix */
 /* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
-#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)	/* # cells in matrix */
+#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)       /* # cells in matrix */
 #define ODITHER_MASK  (ODITHER_SIZE-1) /* mask for wrapping around counters */
 
 typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE];
@@ -105,8 +107,8 @@
  * Errors are accumulated into the array fserrors[], at a resolution of
  * 1/16th of a pixel count.  The error at a given pixel is propagated
  * to its not-yet-processed neighbors using the standard F-S fractions,
- *		...	(here)	7/16
- *		3/16	5/16	1/16
+ *              ...     (here)  7/16
+ *              3/16    5/16    1/16
  * We work left-to-right on even rows, right-to-left on odd rows.
  *
  * We can get away with a single array (holding one row's worth of errors)
@@ -119,49 +121,46 @@
  * The fserrors[] array is indexed [component#][position].
  * We provide (#columns + 2) entries per component; the extra entry at each
  * end saves us from special-casing the first and last pixels.
- *
- * Note: on a wide image, we might not have enough room in a PC's near data
- * segment to hold the error array; so it is allocated with alloc_large.
  */
 
 #if BITS_IN_JSAMPLE == 8
-typedef INT16 FSERROR;		/* 16 bits should be enough */
-typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+typedef INT16 FSERROR;          /* 16 bits should be enough */
+typedef int LOCFSERROR;         /* use 'int' for calculation temps */
 #else
-typedef INT32 FSERROR;		/* may need more than 16 bits */
-typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+typedef INT32 FSERROR;          /* may need more than 16 bits */
+typedef INT32 LOCFSERROR;       /* be sure calculation temps are big enough */
 #endif
 
-typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+typedef FSERROR *FSERRPTR;  /* pointer to error array */
 
 
 /* Private subobject */
 
-#define MAX_Q_COMPS 4		/* max components I can handle */
+#define MAX_Q_COMPS 4           /* max components I can handle */
 
 typedef struct {
   struct jpeg_color_quantizer pub; /* public fields */
 
   /* Initially allocated colormap is saved here */
-  JSAMPARRAY sv_colormap;	/* The color map as a 2-D pixel array */
-  int sv_actual;		/* number of entries in use */
+  JSAMPARRAY sv_colormap;       /* The color map as a 2-D pixel array */
+  int sv_actual;                /* number of entries in use */
 
-  JSAMPARRAY colorindex;	/* Precomputed mapping for speed */
+  JSAMPARRAY colorindex;        /* Precomputed mapping for speed */
   /* colorindex[i][j] = index of color closest to pixel value j in component i,
    * premultiplied as described above.  Since colormap indexes must fit into
    * JSAMPLEs, the entries of this array will too.
    */
-  boolean is_padded;		/* is the colorindex padded for odither? */
+  boolean is_padded;            /* is the colorindex padded for odither? */
 
-  int Ncolors[MAX_Q_COMPS];	/* # of values alloced to each component */
+  int Ncolors[MAX_Q_COMPS];     /* # of values alloced to each component */
 
   /* Variables for ordered dithering */
-  int row_index;		/* cur row's vertical index in dither matrix */
+  int row_index;                /* cur row's vertical index in dither matrix */
   ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
 
   /* Variables for Floyd-Steinberg dithering */
   FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */
-  boolean on_odd_row;		/* flag to remember which row we are on */
+  boolean on_odd_row;           /* flag to remember which row we are on */
 } my_cquantizer;
 
 typedef my_cquantizer * my_cquantize_ptr;
@@ -193,18 +192,21 @@
   int total_colors, iroot, i, j;
   boolean changed;
   long temp;
-  static const int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
+  int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
+  RGB_order[0] = rgb_green[cinfo->out_color_space];
+  RGB_order[1] = rgb_red[cinfo->out_color_space];
+  RGB_order[2] = rgb_blue[cinfo->out_color_space];
 
   /* We can allocate at least the nc'th root of max_colors per component. */
   /* Compute floor(nc'th root of max_colors). */
   iroot = 1;
   do {
     iroot++;
-    temp = iroot;		/* set temp = iroot ** nc */
+    temp = iroot;               /* set temp = iroot ** nc */
     for (i = 1; i < nc; i++)
       temp *= iroot;
   } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */
-  iroot--;			/* now iroot = floor(root) */
+  iroot--;                      /* now iroot = floor(root) */
 
   /* Must have at least 2 color values per component */
   if (iroot < 2)
@@ -228,10 +230,10 @@
       j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i);
       /* calculate new total_colors if Ncolors[j] is incremented */
       temp = total_colors / Ncolors[j];
-      temp *= Ncolors[j]+1;	/* done in long arith to avoid oflo */
+      temp *= Ncolors[j]+1;     /* done in long arith to avoid oflo */
       if (temp > (long) max_colors)
-	break;			/* won't fit, done with this pass */
-      Ncolors[j]++;		/* OK, apply the increment */
+        break;                  /* won't fit, done with this pass */
+      Ncolors[j]++;             /* OK, apply the increment */
       total_colors = (int) temp;
       changed = TRUE;
     }
@@ -273,8 +275,8 @@
 create_colormap (j_decompress_ptr cinfo)
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  JSAMPARRAY colormap;		/* Created colormap */
-  int total_colors;		/* Number of distinct output colors */
+  JSAMPARRAY colormap;          /* Created colormap */
+  int total_colors;             /* Number of distinct output colors */
   int i,j,k, nci, blksize, blkdist, ptr, val;
 
   /* Select number of colors for each component */
@@ -283,8 +285,8 @@
   /* Report selected color counts */
   if (cinfo->out_color_components == 3)
     TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS,
-	     total_colors, cquantize->Ncolors[0],
-	     cquantize->Ncolors[1], cquantize->Ncolors[2]);
+             total_colors, cquantize->Ncolors[0],
+             cquantize->Ncolors[1], cquantize->Ncolors[2]);
   else
     TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
 
@@ -309,12 +311,12 @@
       val = output_value(cinfo, i, j, nci-1);
       /* Fill in all colormap entries that have this value of this component */
       for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
-	/* fill in blksize entries beginning at ptr */
-	for (k = 0; k < blksize; k++)
-	  colormap[i][ptr+k] = (JSAMPLE) val;
+        /* fill in blksize entries beginning at ptr */
+        for (k = 0; k < blksize; k++)
+          colormap[i][ptr+k] = (JSAMPLE) val;
       }
     }
-    blkdist = blksize;		/* blksize of this color is blkdist of next */
+    blkdist = blksize;          /* blksize of this color is blkdist of next */
   }
 
   /* Save the colormap in private storage,
@@ -372,16 +374,16 @@
     val = 0;
     k = largest_input_value(cinfo, i, 0, nci-1);
     for (j = 0; j <= MAXJSAMPLE; j++) {
-      while (j > k)		/* advance val if past boundary */
-	k = largest_input_value(cinfo, i, ++val, nci-1);
+      while (j > k)             /* advance val if past boundary */
+        k = largest_input_value(cinfo, i, ++val, nci-1);
       /* premultiply so that no multiplication needed in main processing */
       indexptr[j] = (JSAMPLE) (val * blksize);
     }
     /* Pad at both ends if necessary */
     if (pad)
       for (j = 1; j <= MAXJSAMPLE; j++) {
-	indexptr[-j] = indexptr[0];
-	indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
+        indexptr[-j] = indexptr[0];
+        indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
       }
   }
 }
@@ -401,7 +403,7 @@
 
   odither = (ODITHER_MATRIX_PTR)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(ODITHER_MATRIX));
+                                sizeof(ODITHER_MATRIX));
   /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
    * Hence the dither value for the matrix cell with fill order f
    * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
@@ -411,7 +413,7 @@
   for (j = 0; j < ODITHER_SIZE; j++) {
     for (k = 0; k < ODITHER_SIZE; k++) {
       num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
-	    * MAXJSAMPLE;
+            * MAXJSAMPLE;
       /* Ensure round towards zero despite C's lack of consistency
        * about rounding negative values in integer division...
        */
@@ -424,7 +426,7 @@
 
 /*
  * Create the ordered-dither tables.
- * Components having the same number of representative colors may 
+ * Components having the same number of representative colors may
  * share a dither table.
  */
 
@@ -437,14 +439,14 @@
 
   for (i = 0; i < cinfo->out_color_components; i++) {
     nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
-    odither = NULL;		/* search for matching prior component */
+    odither = NULL;             /* search for matching prior component */
     for (j = 0; j < i; j++) {
       if (nci == cquantize->Ncolors[j]) {
-	odither = cquantize->odither[j];
-	break;
+        odither = cquantize->odither[j];
+        break;
       }
     }
-    if (odither == NULL)	/* need a new table? */
+    if (odither == NULL)        /* need a new table? */
       odither = make_odither_array(cinfo, nci);
     cquantize->odither[i] = odither;
   }
@@ -457,7 +459,7 @@
 
 METHODDEF(void)
 color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		JSAMPARRAY output_buf, int num_rows)
+                JSAMPARRAY output_buf, int num_rows)
 /* General case, no dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
@@ -475,7 +477,7 @@
     for (col = width; col > 0; col--) {
       pixcode = 0;
       for (ci = 0; ci < nc; ci++) {
-	pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
+        pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
       }
       *ptrout++ = (JSAMPLE) pixcode;
     }
@@ -485,7 +487,7 @@
 
 METHODDEF(void)
 color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		 JSAMPARRAY output_buf, int num_rows)
+                 JSAMPARRAY output_buf, int num_rows)
 /* Fast path for out_color_components==3, no dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
@@ -513,15 +515,15 @@
 
 METHODDEF(void)
 quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		     JSAMPARRAY output_buf, int num_rows)
+                     JSAMPARRAY output_buf, int num_rows)
 /* General case, with ordered dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
   register JSAMPROW input_ptr;
   register JSAMPROW output_ptr;
   JSAMPROW colorindex_ci;
-  int * dither;			/* points to active row of dither matrix */
-  int row_index, col_index;	/* current indexes into dither matrix */
+  int * dither;                 /* points to active row of dither matrix */
+  int row_index, col_index;     /* current indexes into dither matrix */
   int nc = cinfo->out_color_components;
   int ci;
   int row;
@@ -530,8 +532,7 @@
 
   for (row = 0; row < num_rows; row++) {
     /* Initialize output values to 0 so can process components separately */
-    jzero_far((void FAR *) output_buf[row],
-	      (size_t) (width * SIZEOF(JSAMPLE)));
+    jzero_far((void *) output_buf[row], (size_t) (width * sizeof(JSAMPLE)));
     row_index = cquantize->row_index;
     for (ci = 0; ci < nc; ci++) {
       input_ptr = input_buf[row] + ci;
@@ -541,17 +542,17 @@
       col_index = 0;
 
       for (col = width; col > 0; col--) {
-	/* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
-	 * select output value, accumulate into output code for this pixel.
-	 * Range-limiting need not be done explicitly, as we have extended
-	 * the colorindex table to produce the right answers for out-of-range
-	 * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
-	 * required amount of padding.
-	 */
-	*output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
-	input_ptr += nc;
-	output_ptr++;
-	col_index = (col_index + 1) & ODITHER_MASK;
+        /* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
+         * select output value, accumulate into output code for this pixel.
+         * Range-limiting need not be done explicitly, as we have extended
+         * the colorindex table to produce the right answers for out-of-range
+         * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
+         * required amount of padding.
+         */
+        *output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
+        input_ptr += nc;
+        output_ptr++;
+        col_index = (col_index + 1) & ODITHER_MASK;
       }
     }
     /* Advance row index for next row */
@@ -563,7 +564,7 @@
 
 METHODDEF(void)
 quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		      JSAMPARRAY output_buf, int num_rows)
+                      JSAMPARRAY output_buf, int num_rows)
 /* Fast path for out_color_components==3, with ordered dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
@@ -573,10 +574,10 @@
   JSAMPROW colorindex0 = cquantize->colorindex[0];
   JSAMPROW colorindex1 = cquantize->colorindex[1];
   JSAMPROW colorindex2 = cquantize->colorindex[2];
-  int * dither0;		/* points to active row of dither matrix */
+  int * dither0;                /* points to active row of dither matrix */
   int * dither1;
   int * dither2;
-  int row_index, col_index;	/* current indexes into dither matrix */
+  int row_index, col_index;     /* current indexes into dither matrix */
   int row;
   JDIMENSION col;
   JDIMENSION width = cinfo->output_width;
@@ -592,11 +593,11 @@
 
     for (col = width; col > 0; col--) {
       pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) +
-					dither0[col_index]]);
+                                        dither0[col_index]]);
       pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) +
-					dither1[col_index]]);
+                                        dither1[col_index]]);
       pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) +
-					dither2[col_index]]);
+                                        dither2[col_index]]);
       *output_ptr++ = (JSAMPLE) pixcode;
       col_index = (col_index + 1) & ODITHER_MASK;
     }
@@ -608,24 +609,24 @@
 
 METHODDEF(void)
 quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		    JSAMPARRAY output_buf, int num_rows)
+                    JSAMPARRAY output_buf, int num_rows)
 /* General case, with Floyd-Steinberg dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
-  register LOCFSERROR cur;	/* current error or pixel value */
-  LOCFSERROR belowerr;		/* error for pixel below cur */
-  LOCFSERROR bpreverr;		/* error for below/prev col */
-  LOCFSERROR bnexterr;		/* error for below/next col */
+  register LOCFSERROR cur;      /* current error or pixel value */
+  LOCFSERROR belowerr;          /* error for pixel below cur */
+  LOCFSERROR bpreverr;          /* error for below/prev col */
+  LOCFSERROR bnexterr;          /* error for below/next col */
   LOCFSERROR delta;
-  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  register FSERRPTR errorptr;   /* => fserrors[] at column before current */
   register JSAMPROW input_ptr;
   register JSAMPROW output_ptr;
   JSAMPROW colorindex_ci;
   JSAMPROW colormap_ci;
   int pixcode;
   int nc = cinfo->out_color_components;
-  int dir;			/* 1 for left-to-right, -1 for right-to-left */
-  int dirnc;			/* dir * nc */
+  int dir;                      /* 1 for left-to-right, -1 for right-to-left */
+  int dirnc;                    /* dir * nc */
   int ci;
   int row;
   JDIMENSION col;
@@ -635,23 +636,22 @@
 
   for (row = 0; row < num_rows; row++) {
     /* Initialize output values to 0 so can process components separately */
-    jzero_far((void FAR *) output_buf[row],
-	      (size_t) (width * SIZEOF(JSAMPLE)));
+    jzero_far((void *) output_buf[row], (size_t) (width * sizeof(JSAMPLE)));
     for (ci = 0; ci < nc; ci++) {
       input_ptr = input_buf[row] + ci;
       output_ptr = output_buf[row];
       if (cquantize->on_odd_row) {
-	/* work right to left in this row */
-	input_ptr += (width-1) * nc; /* so point to rightmost pixel */
-	output_ptr += width-1;
-	dir = -1;
-	dirnc = -nc;
-	errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
+        /* work right to left in this row */
+        input_ptr += (width-1) * nc; /* so point to rightmost pixel */
+        output_ptr += width-1;
+        dir = -1;
+        dirnc = -nc;
+        errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
       } else {
-	/* work left to right in this row */
-	dir = 1;
-	dirnc = nc;
-	errorptr = cquantize->fserrors[ci]; /* => entry before first column */
+        /* work left to right in this row */
+        dir = 1;
+        dirnc = nc;
+        errorptr = cquantize->fserrors[ci]; /* => entry before first column */
       }
       colorindex_ci = cquantize->colorindex[ci];
       colormap_ci = cquantize->sv_colormap[ci];
@@ -661,47 +661,47 @@
       belowerr = bpreverr = 0;
 
       for (col = width; col > 0; col--) {
-	/* cur holds the error propagated from the previous pixel on the
-	 * current line.  Add the error propagated from the previous line
-	 * to form the complete error correction term for this pixel, and
-	 * round the error term (which is expressed * 16) to an integer.
-	 * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
-	 * for either sign of the error value.
-	 * Note: errorptr points to *previous* column's array entry.
-	 */
-	cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
-	/* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
-	 * The maximum error is +- MAXJSAMPLE; this sets the required size
-	 * of the range_limit array.
-	 */
-	cur += GETJSAMPLE(*input_ptr);
-	cur = GETJSAMPLE(range_limit[cur]);
-	/* Select output value, accumulate into output code for this pixel */
-	pixcode = GETJSAMPLE(colorindex_ci[cur]);
-	*output_ptr += (JSAMPLE) pixcode;
-	/* Compute actual representation error at this pixel */
-	/* Note: we can do this even though we don't have the final */
-	/* pixel code, because the colormap is orthogonal. */
-	cur -= GETJSAMPLE(colormap_ci[pixcode]);
-	/* Compute error fractions to be propagated to adjacent pixels.
-	 * Add these into the running sums, and simultaneously shift the
-	 * next-line error sums left by 1 column.
-	 */
-	bnexterr = cur;
-	delta = cur * 2;
-	cur += delta;		/* form error * 3 */
-	errorptr[0] = (FSERROR) (bpreverr + cur);
-	cur += delta;		/* form error * 5 */
-	bpreverr = belowerr + cur;
-	belowerr = bnexterr;
-	cur += delta;		/* form error * 7 */
-	/* At this point cur contains the 7/16 error value to be propagated
-	 * to the next pixel on the current line, and all the errors for the
-	 * next line have been shifted over. We are therefore ready to move on.
-	 */
-	input_ptr += dirnc;	/* advance input ptr to next column */
-	output_ptr += dir;	/* advance output ptr to next column */
-	errorptr += dir;	/* advance errorptr to current column */
+        /* cur holds the error propagated from the previous pixel on the
+         * current line.  Add the error propagated from the previous line
+         * to form the complete error correction term for this pixel, and
+         * round the error term (which is expressed * 16) to an integer.
+         * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+         * for either sign of the error value.
+         * Note: errorptr points to *previous* column's array entry.
+         */
+        cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
+        /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+         * The maximum error is +- MAXJSAMPLE; this sets the required size
+         * of the range_limit array.
+         */
+        cur += GETJSAMPLE(*input_ptr);
+        cur = GETJSAMPLE(range_limit[cur]);
+        /* Select output value, accumulate into output code for this pixel */
+        pixcode = GETJSAMPLE(colorindex_ci[cur]);
+        *output_ptr += (JSAMPLE) pixcode;
+        /* Compute actual representation error at this pixel */
+        /* Note: we can do this even though we don't have the final */
+        /* pixel code, because the colormap is orthogonal. */
+        cur -= GETJSAMPLE(colormap_ci[pixcode]);
+        /* Compute error fractions to be propagated to adjacent pixels.
+         * Add these into the running sums, and simultaneously shift the
+         * next-line error sums left by 1 column.
+         */
+        bnexterr = cur;
+        delta = cur * 2;
+        cur += delta;           /* form error * 3 */
+        errorptr[0] = (FSERROR) (bpreverr + cur);
+        cur += delta;           /* form error * 5 */
+        bpreverr = belowerr + cur;
+        belowerr = bnexterr;
+        cur += delta;           /* form error * 7 */
+        /* At this point cur contains the 7/16 error value to be propagated
+         * to the next pixel on the current line, and all the errors for the
+         * next line have been shifted over. We are therefore ready to move on.
+         */
+        input_ptr += dirnc;     /* advance input ptr to next column */
+        output_ptr += dir;      /* advance output ptr to next column */
+        errorptr += dir;        /* advance errorptr to current column */
       }
       /* Post-loop cleanup: we must unload the final error value into the
        * final fserrors[] entry.  Note we need not unload belowerr because
@@ -725,7 +725,7 @@
   size_t arraysize;
   int i;
 
-  arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
+  arraysize = (size_t) ((cinfo->output_width + 2) * sizeof(FSERROR));
   for (i = 0; i < cinfo->out_color_components; i++) {
     cquantize->fserrors[i] = (FSERRPTR)
       (*cinfo->mem->alloc_large)((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
@@ -761,7 +761,7 @@
       cquantize->pub.color_quantize = quantize3_ord_dither;
     else
       cquantize->pub.color_quantize = quantize_ord_dither;
-    cquantize->row_index = 0;	/* initialize state for ordered dither */
+    cquantize->row_index = 0;   /* initialize state for ordered dither */
     /* If user changed to ordered dither from another mode,
      * we must recreate the color index table with padding.
      * This will cost extra space, but probably isn't very likely.
@@ -779,9 +779,9 @@
     if (cquantize->fserrors[0] == NULL)
       alloc_fs_workspace(cinfo);
     /* Initialize the propagated errors to zero. */
-    arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
+    arraysize = (size_t) ((cinfo->output_width + 2) * sizeof(FSERROR));
     for (i = 0; i < cinfo->out_color_components; i++)
-      jzero_far((void FAR *) cquantize->fserrors[i], arraysize);
+      jzero_far((void *) cquantize->fserrors[i], arraysize);
     break;
   default:
     ERREXIT(cinfo, JERR_NOT_COMPILED);
@@ -824,13 +824,13 @@
 
   cquantize = (my_cquantize_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_cquantizer));
+                                sizeof(my_cquantizer));
   cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
   cquantize->pub.start_pass = start_pass_1_quant;
   cquantize->pub.finish_pass = finish_pass_1_quant;
   cquantize->pub.new_color_map = new_color_map_1_quant;
   cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
-  cquantize->odither[0] = NULL;	/* Also flag odither arrays not allocated */
+  cquantize->odither[0] = NULL; /* Also flag odither arrays not allocated */
 
   /* Make sure my internal arrays won't overflow */
   if (cinfo->out_color_components > MAX_Q_COMPS)
@@ -844,10 +844,10 @@
   create_colorindex(cinfo);
 
   /* Allocate Floyd-Steinberg workspace now if requested.
-   * We do this now since it is FAR storage and may affect the memory
-   * manager's space calculations.  If the user changes to FS dither
-   * mode in a later pass, we will allocate the space then, and will
-   * possibly overrun the max_memory_to_use setting.
+   * We do this now since it may affect the memory manager's space
+   * calculations.  If the user changes to FS dither mode in a later pass, we
+   * will allocate the space then, and will possibly overrun the
+   * max_memory_to_use setting.
    */
   if (cinfo->dither_mode == JDITHER_FS)
     alloc_fs_workspace(cinfo);
diff --git a/jquant2.c b/jquant2.c
index af601e3..291b4f1 100644
--- a/jquant2.c
+++ b/jquant2.c
@@ -1,8 +1,10 @@
 /*
  * jquant2.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009, 2014, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains 2-pass color quantization (color mapping) routines.
@@ -41,7 +43,7 @@
  * color space, and repeatedly splits the "largest" remaining box until we
  * have as many boxes as desired colors.  Then the mean color in each
  * remaining box becomes one of the possible output colors.
- * 
+ *
  * The second pass over the image maps each input pixel to the closest output
  * color (optionally after applying a Floyd-Steinberg dithering correction).
  * This mapping is logically trivial, but making it go fast enough requires
@@ -70,33 +72,14 @@
  * probably need to change these scale factors.
  */
 
-#define R_SCALE 2		/* scale R distances by this much */
-#define G_SCALE 3		/* scale G distances by this much */
-#define B_SCALE 1		/* and B by this much */
+#define R_SCALE 2               /* scale R distances by this much */
+#define G_SCALE 3               /* scale G distances by this much */
+#define B_SCALE 1               /* and B by this much */
 
-/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
- * in jmorecfg.h.  As the code stands, it will do the right thing for R,G,B
- * and B,G,R orders.  If you define some other weird order in jmorecfg.h,
- * you'll get compile errors until you extend this logic.  In that case
- * you'll probably want to tweak the histogram sizes too.
- */
-
-#if RGB_RED == 0
-#define C0_SCALE R_SCALE
-#endif
-#if RGB_BLUE == 0
-#define C0_SCALE B_SCALE
-#endif
-#if RGB_GREEN == 1
-#define C1_SCALE G_SCALE
-#endif
-#if RGB_RED == 2
-#define C2_SCALE R_SCALE
-#endif
-#if RGB_BLUE == 2
-#define C2_SCALE B_SCALE
-#endif
-
+static const int c_scales[3]={R_SCALE, G_SCALE, B_SCALE};
+#define C0_SCALE c_scales[rgb_red[cinfo->out_color_space]]
+#define C1_SCALE c_scales[rgb_green[cinfo->out_color_space]]
+#define C2_SCALE c_scales[rgb_blue[cinfo->out_color_space]]
 
 /*
  * First we have the histogram data structure and routines for creating it.
@@ -119,9 +102,7 @@
  * machines, we can't just allocate the histogram in one chunk.  Instead
  * of a true 3-D array, we use a row of pointers to 2-D arrays.  Each
  * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
- * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.  Note that
- * on 80x86 machines, the pointer row is in near memory but the actual
- * arrays are in far memory (same arrangement as we use for image arrays).
+ * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.
  */
 
 #define MAXNUMCOLORS  (MAXJSAMPLE+1) /* maximum size of colormap */
@@ -129,9 +110,9 @@
 /* These will do the right thing for either R,G,B or B,G,R color order,
  * but you may not like the results for other color orders.
  */
-#define HIST_C0_BITS  5		/* bits of precision in R/B histogram */
-#define HIST_C1_BITS  6		/* bits of precision in G histogram */
-#define HIST_C2_BITS  5		/* bits of precision in B/R histogram */
+#define HIST_C0_BITS  5         /* bits of precision in R/B histogram */
+#define HIST_C1_BITS  6         /* bits of precision in G histogram */
+#define HIST_C2_BITS  5         /* bits of precision in B/R histogram */
 
 /* Number of elements along histogram axes. */
 #define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
@@ -144,13 +125,13 @@
 #define C2_SHIFT  (BITS_IN_JSAMPLE-HIST_C2_BITS)
 
 
-typedef UINT16 histcell;	/* histogram cell; prefer an unsigned type */
+typedef UINT16 histcell;        /* histogram cell; prefer an unsigned type */
 
-typedef histcell FAR * histptr;	/* for pointers to histogram cells */
+typedef histcell * histptr; /* for pointers to histogram cells */
 
 typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
-typedef hist1d FAR * hist2d;	/* type for the 2nd-level pointers */
-typedef hist2d * hist3d;	/* type for top-level pointer */
+typedef hist1d * hist2d;    /* type for the 2nd-level pointers */
+typedef hist2d * hist3d;        /* type for top-level pointer */
 
 
 /* Declarations for Floyd-Steinberg dithering.
@@ -158,8 +139,8 @@
  * Errors are accumulated into the array fserrors[], at a resolution of
  * 1/16th of a pixel count.  The error at a given pixel is propagated
  * to its not-yet-processed neighbors using the standard F-S fractions,
- *		...	(here)	7/16
- *		3/16	5/16	1/16
+ *              ...     (here)  7/16
+ *              3/16    5/16    1/16
  * We work left-to-right on even rows, right-to-left on odd rows.
  *
  * We can get away with a single array (holding one row's worth of errors)
@@ -172,20 +153,17 @@
  * The fserrors[] array has (#columns + 2) entries; the extra entry at
  * each end saves us from special-casing the first and last pixels.
  * Each entry is three values long, one value for each color component.
- *
- * Note: on a wide image, we might not have enough room in a PC's near data
- * segment to hold the error array; so it is allocated with alloc_large.
  */
 
 #if BITS_IN_JSAMPLE == 8
-typedef INT16 FSERROR;		/* 16 bits should be enough */
-typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+typedef INT16 FSERROR;          /* 16 bits should be enough */
+typedef int LOCFSERROR;         /* use 'int' for calculation temps */
 #else
-typedef INT32 FSERROR;		/* may need more than 16 bits */
-typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+typedef INT32 FSERROR;          /* may need more than 16 bits */
+typedef INT32 LOCFSERROR;       /* be sure calculation temps are big enough */
 #endif
 
-typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+typedef FSERROR *FSERRPTR;      /* pointer to error array */
 
 
 /* Private subobject */
@@ -194,18 +172,18 @@
   struct jpeg_color_quantizer pub; /* public fields */
 
   /* Space for the eventually created colormap is stashed here */
-  JSAMPARRAY sv_colormap;	/* colormap allocated at init time */
-  int desired;			/* desired # of colors = size of colormap */
+  JSAMPARRAY sv_colormap;       /* colormap allocated at init time */
+  int desired;                  /* desired # of colors = size of colormap */
 
   /* Variables for accumulating image statistics */
-  hist3d histogram;		/* pointer to the histogram */
+  hist3d histogram;             /* pointer to the histogram */
 
-  boolean needs_zeroed;		/* TRUE if next pass must zero histogram */
+  boolean needs_zeroed;         /* TRUE if next pass must zero histogram */
 
   /* Variables for Floyd-Steinberg dithering */
-  FSERRPTR fserrors;		/* accumulated errors */
-  boolean on_odd_row;		/* flag to remember which row we are on */
-  int * error_limiter;		/* table for clamping the applied error */
+  FSERRPTR fserrors;            /* accumulated errors */
+  boolean on_odd_row;           /* flag to remember which row we are on */
+  int * error_limiter;          /* table for clamping the applied error */
 } my_cquantizer;
 
 typedef my_cquantizer * my_cquantize_ptr;
@@ -222,7 +200,7 @@
 
 METHODDEF(void)
 prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
-		  JSAMPARRAY output_buf, int num_rows)
+                  JSAMPARRAY output_buf, int num_rows)
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
   register JSAMPROW ptr;
@@ -237,11 +215,11 @@
     for (col = width; col > 0; col--) {
       /* get pixel value and index into the histogram */
       histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
-			 [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
-			 [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
+                         [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
+                         [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
       /* increment, check for overflow and undo increment if so. */
       if (++(*histp) <= 0)
-	(*histp)--;
+        (*histp)--;
       ptr += 3;
     }
   }
@@ -278,7 +256,7 @@
   register int i;
   register long maxc = 0;
   boxptr which = NULL;
-  
+
   for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
     if (boxp->colorcount > maxc && boxp->volume > 0) {
       which = boxp;
@@ -298,7 +276,7 @@
   register int i;
   register INT32 maxv = 0;
   boxptr which = NULL;
-  
+
   for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
     if (boxp->volume > maxv) {
       which = boxp;
@@ -321,75 +299,75 @@
   int c0min,c0max,c1min,c1max,c2min,c2max;
   INT32 dist0,dist1,dist2;
   long ccount;
-  
+
   c0min = boxp->c0min;  c0max = boxp->c0max;
   c1min = boxp->c1min;  c1max = boxp->c1max;
   c2min = boxp->c2min;  c2max = boxp->c2max;
-  
+
   if (c0max > c0min)
     for (c0 = c0min; c0 <= c0max; c0++)
       for (c1 = c1min; c1 <= c1max; c1++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c0min = c0min = c0;
-	    goto have_c0min;
-	  }
+        histp = & histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c0min = c0min = c0;
+            goto have_c0min;
+          }
       }
  have_c0min:
   if (c0max > c0min)
     for (c0 = c0max; c0 >= c0min; c0--)
       for (c1 = c1min; c1 <= c1max; c1++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c0max = c0max = c0;
-	    goto have_c0max;
-	  }
+        histp = & histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c0max = c0max = c0;
+            goto have_c0max;
+          }
       }
  have_c0max:
   if (c1max > c1min)
     for (c1 = c1min; c1 <= c1max; c1++)
       for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c1min = c1min = c1;
-	    goto have_c1min;
-	  }
+        histp = & histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c1min = c1min = c1;
+            goto have_c1min;
+          }
       }
  have_c1min:
   if (c1max > c1min)
     for (c1 = c1max; c1 >= c1min; c1--)
       for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1][c2min];
-	for (c2 = c2min; c2 <= c2max; c2++)
-	  if (*histp++ != 0) {
-	    boxp->c1max = c1max = c1;
-	    goto have_c1max;
-	  }
+        histp = & histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c1max = c1max = c1;
+            goto have_c1max;
+          }
       }
  have_c1max:
   if (c2max > c2min)
     for (c2 = c2min; c2 <= c2max; c2++)
       for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1min][c2];
-	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
-	  if (*histp != 0) {
-	    boxp->c2min = c2min = c2;
-	    goto have_c2min;
-	  }
+        histp = & histogram[c0][c1min][c2];
+        for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+          if (*histp != 0) {
+            boxp->c2min = c2min = c2;
+            goto have_c2min;
+          }
       }
  have_c2min:
   if (c2max > c2min)
     for (c2 = c2max; c2 >= c2min; c2--)
       for (c0 = c0min; c0 <= c0max; c0++) {
-	histp = & histogram[c0][c1min][c2];
-	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
-	  if (*histp != 0) {
-	    boxp->c2max = c2max = c2;
-	    goto have_c2max;
-	  }
+        histp = & histogram[c0][c1min][c2];
+        for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+          if (*histp != 0) {
+            boxp->c2max = c2max = c2;
+            goto have_c2max;
+          }
       }
  have_c2max:
 
@@ -405,16 +383,16 @@
   dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
   dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
   boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2;
-  
+
   /* Now scan remaining volume of box and compute population */
   ccount = 0;
   for (c0 = c0min; c0 <= c0max; c0++)
     for (c1 = c1min; c1 <= c1max; c1++) {
       histp = & histogram[c0][c1][c2min];
       for (c2 = c2min; c2 <= c2max; c2++, histp++)
-	if (*histp != 0) {
-	  ccount++;
-	}
+        if (*histp != 0) {
+          ccount++;
+        }
     }
   boxp->colorcount = ccount;
 }
@@ -422,7 +400,7 @@
 
 LOCAL(int)
 median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
-	    int desired_colors)
+            int desired_colors)
 /* Repeatedly select and split the largest box until we have enough boxes */
 {
   int n,lb;
@@ -438,9 +416,9 @@
     } else {
       b1 = find_biggest_volume(boxlist, numboxes);
     }
-    if (b1 == NULL)		/* no splittable boxes left! */
+    if (b1 == NULL)             /* no splittable boxes left! */
       break;
-    b2 = &boxlist[numboxes];	/* where new box will go */
+    b2 = &boxlist[numboxes];    /* where new box will go */
     /* Copy the color bounds to the new box. */
     b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max;
     b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
@@ -454,15 +432,16 @@
     /* We want to break any ties in favor of green, then red, blue last.
      * This code does the right thing for R,G,B or B,G,R color orders only.
      */
-#if RGB_RED == 0
-    cmax = c1; n = 1;
-    if (c0 > cmax) { cmax = c0; n = 0; }
-    if (c2 > cmax) { n = 2; }
-#else
-    cmax = c1; n = 1;
-    if (c2 > cmax) { cmax = c2; n = 2; }
-    if (c0 > cmax) { n = 0; }
-#endif
+    if (rgb_red[cinfo->out_color_space] == 0) {
+      cmax = c1; n = 1;
+      if (c0 > cmax) { cmax = c0; n = 0; }
+      if (c2 > cmax) { n = 2; }
+    }
+    else {
+      cmax = c1; n = 1;
+      if (c2 > cmax) { cmax = c2; n = 2; }
+      if (c0 > cmax) { n = 0; }
+    }
     /* Choose split point along selected axis, and update box bounds.
      * Current algorithm: split at halfway point.
      * (Since the box has been shrunk to minimum volume,
@@ -511,24 +490,24 @@
   long c0total = 0;
   long c1total = 0;
   long c2total = 0;
-  
+
   c0min = boxp->c0min;  c0max = boxp->c0max;
   c1min = boxp->c1min;  c1max = boxp->c1max;
   c2min = boxp->c2min;  c2max = boxp->c2max;
-  
+
   for (c0 = c0min; c0 <= c0max; c0++)
     for (c1 = c1min; c1 <= c1max; c1++) {
       histp = & histogram[c0][c1][c2min];
       for (c2 = c2min; c2 <= c2max; c2++) {
-	if ((count = *histp++) != 0) {
-	  total += count;
-	  c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
-	  c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
-	  c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
-	}
+        if ((count = *histp++) != 0) {
+          total += count;
+          c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
+          c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
+          c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
+        }
       }
     }
-  
+
   cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total);
   cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total);
   cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total);
@@ -545,7 +524,7 @@
 
   /* Allocate workspace for box list */
   boxlist = (boxptr) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * SIZEOF(box));
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * sizeof(box));
   /* Initialize one box containing whole space */
   numboxes = 1;
   boxlist[0].c0min = 0;
@@ -644,7 +623,7 @@
 
 LOCAL(int)
 find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
-		    JSAMPLE colorlist[])
+                    JSAMPLE colorlist[])
 /* Locate the colormap entries close enough to an update box to be candidates
  * for the nearest entry to some cell(s) in the update box.  The update box
  * is specified by the center coordinates of its first cell.  The number of
@@ -659,7 +638,7 @@
   int centerc0, centerc1, centerc2;
   int i, x, ncolors;
   INT32 minmaxdist, min_dist, max_dist, tdist;
-  INT32 mindist[MAXNUMCOLORS];	/* min distance to colormap entry i */
+  INT32 mindist[MAXNUMCOLORS];  /* min distance to colormap entry i */
 
   /* Compute true coordinates of update box's upper corner and center.
    * Actually we compute the coordinates of the center of the upper-corner
@@ -701,11 +680,11 @@
       /* within cell range so no contribution to min_dist */
       min_dist = 0;
       if (x <= centerc0) {
-	tdist = (x - maxc0) * C0_SCALE;
-	max_dist = tdist*tdist;
+        tdist = (x - maxc0) * C0_SCALE;
+        max_dist = tdist*tdist;
       } else {
-	tdist = (x - minc0) * C0_SCALE;
-	max_dist = tdist*tdist;
+        tdist = (x - minc0) * C0_SCALE;
+        max_dist = tdist*tdist;
       }
     }
 
@@ -723,11 +702,11 @@
     } else {
       /* within cell range so no contribution to min_dist */
       if (x <= centerc1) {
-	tdist = (x - maxc1) * C1_SCALE;
-	max_dist += tdist*tdist;
+        tdist = (x - maxc1) * C1_SCALE;
+        max_dist += tdist*tdist;
       } else {
-	tdist = (x - minc1) * C1_SCALE;
-	max_dist += tdist*tdist;
+        tdist = (x - minc1) * C1_SCALE;
+        max_dist += tdist*tdist;
       }
     }
 
@@ -745,15 +724,15 @@
     } else {
       /* within cell range so no contribution to min_dist */
       if (x <= centerc2) {
-	tdist = (x - maxc2) * C2_SCALE;
-	max_dist += tdist*tdist;
+        tdist = (x - maxc2) * C2_SCALE;
+        max_dist += tdist*tdist;
       } else {
-	tdist = (x - minc2) * C2_SCALE;
-	max_dist += tdist*tdist;
+        tdist = (x - minc2) * C2_SCALE;
+        max_dist += tdist*tdist;
       }
     }
 
-    mindist[i] = min_dist;	/* save away the results */
+    mindist[i] = min_dist;      /* save away the results */
     if (max_dist < minmaxdist)
       minmaxdist = max_dist;
   }
@@ -773,7 +752,7 @@
 
 LOCAL(void)
 find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
-		  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
+                  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
 /* Find the closest colormap entry for each cell in the update box,
  * given the list of candidate colors prepared by find_nearby_colors.
  * Return the indexes of the closest entries in the bestcolor[] array.
@@ -783,13 +762,13 @@
 {
   int ic0, ic1, ic2;
   int i, icolor;
-  register INT32 * bptr;	/* pointer into bestdist[] array */
-  JSAMPLE * cptr;		/* pointer into bestcolor[] array */
-  INT32 dist0, dist1;		/* initial distance values */
-  register INT32 dist2;		/* current distance in inner loop */
-  INT32 xx0, xx1;		/* distance increments */
+  register INT32 * bptr;        /* pointer into bestdist[] array */
+  JSAMPLE * cptr;               /* pointer into bestcolor[] array */
+  INT32 dist0, dist1;           /* initial distance values */
+  register INT32 dist2;         /* current distance in inner loop */
+  INT32 xx0, xx1;               /* distance increments */
   register INT32 xx2;
-  INT32 inc0, inc1, inc2;	/* initial values for increments */
+  INT32 inc0, inc1, inc2;       /* initial values for increments */
   /* This array holds the distance to the nearest-so-far color for each cell */
   INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
 
@@ -797,17 +776,17 @@
   bptr = bestdist;
   for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--)
     *bptr++ = 0x7FFFFFFFL;
-  
+
   /* For each color selected by find_nearby_colors,
    * compute its distance to the center of each cell in the box.
    * If that's less than best-so-far, update best distance and color number.
    */
-  
+
   /* Nominal steps between cell centers ("x" in Thomas article) */
 #define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
 #define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
 #define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
-  
+
   for (i = 0; i < numcolors; i++) {
     icolor = GETJSAMPLE(colorlist[i]);
     /* Compute (square of) distance from minc0/c1/c2 to this color */
@@ -829,20 +808,20 @@
       dist1 = dist0;
       xx1 = inc1;
       for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
-	dist2 = dist1;
-	xx2 = inc2;
-	for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
-	  if (dist2 < *bptr) {
-	    *bptr = dist2;
-	    *cptr = (JSAMPLE) icolor;
-	  }
-	  dist2 += xx2;
-	  xx2 += 2 * STEP_C2 * STEP_C2;
-	  bptr++;
-	  cptr++;
-	}
-	dist1 += xx1;
-	xx1 += 2 * STEP_C1 * STEP_C1;
+        dist2 = dist1;
+        xx2 = inc2;
+        for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
+          if (dist2 < *bptr) {
+            *bptr = dist2;
+            *cptr = (JSAMPLE) icolor;
+          }
+          dist2 += xx2;
+          xx2 += 2 * STEP_C2 * STEP_C2;
+          bptr++;
+          cptr++;
+        }
+        dist1 += xx1;
+        xx1 += 2 * STEP_C1 * STEP_C1;
       }
       dist0 += xx0;
       xx0 += 2 * STEP_C0 * STEP_C0;
@@ -859,13 +838,13 @@
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
   hist3d histogram = cquantize->histogram;
-  int minc0, minc1, minc2;	/* lower left corner of update box */
+  int minc0, minc1, minc2;      /* lower left corner of update box */
   int ic0, ic1, ic2;
-  register JSAMPLE * cptr;	/* pointer into bestcolor[] array */
-  register histptr cachep;	/* pointer into main cache array */
+  register JSAMPLE * cptr;      /* pointer into bestcolor[] array */
+  register histptr cachep;      /* pointer into main cache array */
   /* This array lists the candidate colormap indexes. */
   JSAMPLE colorlist[MAXNUMCOLORS];
-  int numcolors;		/* number of candidate colors */
+  int numcolors;                /* number of candidate colors */
   /* This array holds the actually closest colormap index for each cell. */
   JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
 
@@ -881,7 +860,7 @@
   minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
   minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
   minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
-  
+
   /* Determine which colormap entries are close enough to be candidates
    * for the nearest entry to some cell in the update box.
    */
@@ -889,10 +868,10 @@
 
   /* Determine the actually nearest colors. */
   find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist,
-		   bestcolor);
+                   bestcolor);
 
   /* Save the best color numbers (plus 1) in the main cache array */
-  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
+  c0 <<= BOX_C0_LOG;            /* convert ID back to base cell indexes */
   c1 <<= BOX_C1_LOG;
   c2 <<= BOX_C2_LOG;
   cptr = bestcolor;
@@ -900,7 +879,7 @@
     for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
       cachep = & histogram[c0+ic0][c1+ic1][c2];
       for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
-	*cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
+        *cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
       }
     }
   }
@@ -913,7 +892,7 @@
 
 METHODDEF(void)
 pass2_no_dither (j_decompress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+                 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
 /* This version performs no dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
@@ -937,7 +916,7 @@
       /* If we have not seen this color before, find nearest colormap entry */
       /* and update the cache */
       if (*cachep == 0)
-	fill_inverse_cmap(cinfo, c0,c1,c2);
+        fill_inverse_cmap(cinfo, c0,c1,c2);
       /* Now emit the colormap index for this cell */
       *outptr++ = (JSAMPLE) (*cachep - 1);
     }
@@ -947,20 +926,20 @@
 
 METHODDEF(void)
 pass2_fs_dither (j_decompress_ptr cinfo,
-		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+                 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
 /* This version performs Floyd-Steinberg dithering */
 {
   my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
   hist3d histogram = cquantize->histogram;
-  register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
+  register LOCFSERROR cur0, cur1, cur2; /* current error or pixel value */
   LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
   LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
-  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
-  JSAMPROW inptr;		/* => current input pixel */
-  JSAMPROW outptr;		/* => current output pixel */
+  register FSERRPTR errorptr;   /* => fserrors[] at column before current */
+  JSAMPROW inptr;               /* => current input pixel */
+  JSAMPROW outptr;              /* => current output pixel */
   histptr cachep;
-  int dir;			/* +1 or -1 depending on direction */
-  int dir3;			/* 3*dir, for advancing inptr & errorptr */
+  int dir;                      /* +1 or -1 depending on direction */
+  int dir3;                     /* 3*dir, for advancing inptr & errorptr */
   int row;
   JDIMENSION col;
   JDIMENSION width = cinfo->output_width;
@@ -976,7 +955,7 @@
     outptr = output_buf[row];
     if (cquantize->on_odd_row) {
       /* work right to left in this row */
-      inptr += (width-1) * 3;	/* so point to rightmost pixel */
+      inptr += (width-1) * 3;   /* so point to rightmost pixel */
       outptr += width-1;
       dir = -1;
       dir3 = -3;
@@ -1028,53 +1007,44 @@
       /* If we have not seen this color before, find nearest colormap */
       /* entry and update the cache */
       if (*cachep == 0)
-	fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
+        fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
       /* Now emit the colormap index for this cell */
       { register int pixcode = *cachep - 1;
-	*outptr = (JSAMPLE) pixcode;
-	/* Compute representation error for this pixel */
-	cur0 -= GETJSAMPLE(colormap0[pixcode]);
-	cur1 -= GETJSAMPLE(colormap1[pixcode]);
-	cur2 -= GETJSAMPLE(colormap2[pixcode]);
+        *outptr = (JSAMPLE) pixcode;
+        /* Compute representation error for this pixel */
+        cur0 -= GETJSAMPLE(colormap0[pixcode]);
+        cur1 -= GETJSAMPLE(colormap1[pixcode]);
+        cur2 -= GETJSAMPLE(colormap2[pixcode]);
       }
       /* Compute error fractions to be propagated to adjacent pixels.
        * Add these into the running sums, and simultaneously shift the
        * next-line error sums left by 1 column.
        */
-      { register LOCFSERROR bnexterr, delta;
+      { register LOCFSERROR bnexterr;
 
-	bnexterr = cur0;	/* Process component 0 */
-	delta = cur0 * 2;
-	cur0 += delta;		/* form error * 3 */
-	errorptr[0] = (FSERROR) (bpreverr0 + cur0);
-	cur0 += delta;		/* form error * 5 */
-	bpreverr0 = belowerr0 + cur0;
-	belowerr0 = bnexterr;
-	cur0 += delta;		/* form error * 7 */
-	bnexterr = cur1;	/* Process component 1 */
-	delta = cur1 * 2;
-	cur1 += delta;		/* form error * 3 */
-	errorptr[1] = (FSERROR) (bpreverr1 + cur1);
-	cur1 += delta;		/* form error * 5 */
-	bpreverr1 = belowerr1 + cur1;
-	belowerr1 = bnexterr;
-	cur1 += delta;		/* form error * 7 */
-	bnexterr = cur2;	/* Process component 2 */
-	delta = cur2 * 2;
-	cur2 += delta;		/* form error * 3 */
-	errorptr[2] = (FSERROR) (bpreverr2 + cur2);
-	cur2 += delta;		/* form error * 5 */
-	bpreverr2 = belowerr2 + cur2;
-	belowerr2 = bnexterr;
-	cur2 += delta;		/* form error * 7 */
+        bnexterr = cur0;        /* Process component 0 */
+        errorptr[0] = (FSERROR) (bpreverr0 + cur0 * 3);
+        bpreverr0 = belowerr0 + cur0 * 5;
+        belowerr0 = bnexterr;
+        cur0 *= 7;
+        bnexterr = cur1;        /* Process component 1 */
+        errorptr[1] = (FSERROR) (bpreverr1 + cur1 * 3);
+        bpreverr1 = belowerr1 + cur1 * 5;
+        belowerr1 = bnexterr;
+        cur1 *= 7;
+        bnexterr = cur2;        /* Process component 2 */
+        errorptr[2] = (FSERROR) (bpreverr2 + cur2 * 3);
+        bpreverr2 = belowerr2 + cur2 * 5;
+        belowerr2 = bnexterr;
+        cur2 *= 7;
       }
       /* At this point curN contains the 7/16 error value to be propagated
        * to the next pixel on the current line, and all the errors for the
        * next line have been shifted over.  We are therefore ready to move on.
        */
-      inptr += dir3;		/* Advance pixel pointers to next column */
+      inptr += dir3;            /* Advance pixel pointers to next column */
       outptr += dir;
-      errorptr += dir3;		/* advance errorptr to current column */
+      errorptr += dir3;         /* advance errorptr to current column */
     }
     /* Post-loop cleanup: we must unload the final error values into the
      * final fserrors[] entry.  Note we need not unload belowerrN because
@@ -1113,8 +1083,8 @@
   int in, out;
 
   table = (int *) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int));
-  table += MAXJSAMPLE;		/* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * sizeof(int));
+  table += MAXJSAMPLE;          /* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
   cquantize->error_limiter = table;
 
 #define STEPSIZE ((MAXJSAMPLE+1)/16)
@@ -1197,16 +1167,16 @@
 
     if (cinfo->dither_mode == JDITHER_FS) {
       size_t arraysize = (size_t) ((cinfo->output_width + 2) *
-				   (3 * SIZEOF(FSERROR)));
+                                   (3 * sizeof(FSERROR)));
       /* Allocate Floyd-Steinberg workspace if we didn't already. */
       if (cquantize->fserrors == NULL)
-	cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
-	  ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+        cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+          ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
       /* Initialize the propagated errors to zero. */
-      jzero_far((void FAR *) cquantize->fserrors, arraysize);
+      jzero_far((void *) cquantize->fserrors, arraysize);
       /* Make the error-limit table if we didn't already. */
       if (cquantize->error_limiter == NULL)
-	init_error_limit(cinfo);
+        init_error_limit(cinfo);
       cquantize->on_odd_row = FALSE;
     }
 
@@ -1214,8 +1184,8 @@
   /* Zero the histogram or inverse color map, if necessary */
   if (cquantize->needs_zeroed) {
     for (i = 0; i < HIST_C0_ELEMS; i++) {
-      jzero_far((void FAR *) histogram[i],
-		HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+      jzero_far((void *) histogram[i],
+                HIST_C1_ELEMS*HIST_C2_ELEMS * sizeof(histcell));
     }
     cquantize->needs_zeroed = FALSE;
   }
@@ -1248,11 +1218,11 @@
 
   cquantize = (my_cquantize_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				SIZEOF(my_cquantizer));
+                                sizeof(my_cquantizer));
   cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
   cquantize->pub.start_pass = start_pass_2_quant;
   cquantize->pub.new_color_map = new_color_map_2_quant;
-  cquantize->fserrors = NULL;	/* flag optional arrays not allocated */
+  cquantize->fserrors = NULL;   /* flag optional arrays not allocated */
   cquantize->error_limiter = NULL;
 
   /* Make sure jdmaster didn't give me a case I can't handle */
@@ -1261,17 +1231,17 @@
 
   /* Allocate the histogram/inverse colormap storage */
   cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small)
-    ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d));
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * sizeof(hist2d));
   for (i = 0; i < HIST_C0_ELEMS; i++) {
     cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large)
       ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+       HIST_C1_ELEMS*HIST_C2_ELEMS * sizeof(histcell));
   }
   cquantize->needs_zeroed = TRUE; /* histogram is garbage now */
 
   /* Allocate storage for the completed colormap, if required.
-   * We do this now since it is FAR storage and may affect
-   * the memory manager's space calculations.
+   * We do this now since it may affect the memory manager's space
+   * calculations.
    */
   if (cinfo->enable_2pass_quant) {
     /* Make sure color count is acceptable */
@@ -1294,14 +1264,15 @@
     cinfo->dither_mode = JDITHER_FS;
 
   /* Allocate Floyd-Steinberg workspace if necessary.
-   * This isn't really needed until pass 2, but again it is FAR storage.
-   * Although we will cope with a later change in dither_mode,
-   * we do not promise to honor max_memory_to_use if dither_mode changes.
+   * This isn't really needed until pass 2, but again it may affect the memory
+   * manager's space calculations.  Although we will cope with a later change
+   * in dither_mode, we do not promise to honor max_memory_to_use if
+   * dither_mode changes.
    */
   if (cinfo->dither_mode == JDITHER_FS) {
     cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
       ((j_common_ptr) cinfo, JPOOL_IMAGE,
-       (size_t) ((cinfo->output_width + 2) * (3 * SIZEOF(FSERROR))));
+       (size_t) ((cinfo->output_width + 2) * (3 * sizeof(FSERROR))));
     /* Might as well create the error-limiting table too. */
     init_error_limit(cinfo);
   }
diff --git a/jsimd.h b/jsimd.h
new file mode 100644
index 0000000..d45fd70
--- /dev/null
+++ b/jsimd.h
@@ -0,0 +1,80 @@
+/*
+ * jsimd.h
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2011 D. R. Commander
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ */
+
+EXTERN(int) jsimd_can_rgb_ycc (void);
+EXTERN(int) jsimd_can_rgb_gray (void);
+EXTERN(int) jsimd_can_ycc_rgb (void);
+EXTERN(int) jsimd_c_can_null_convert (void);
+
+EXTERN(void) jsimd_rgb_ycc_convert
+        (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_rgb_gray_convert
+        (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_ycc_rgb_convert
+        (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_c_null_convert
+        (j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+
+EXTERN(int) jsimd_can_h2v2_downsample (void);
+EXTERN(int) jsimd_can_h2v1_downsample (void);
+
+EXTERN(void) jsimd_h2v2_downsample
+        (j_compress_ptr cinfo, jpeg_component_info * compptr,
+         JSAMPARRAY input_data, JSAMPARRAY output_data);
+
+EXTERN(int) jsimd_can_h2v2_smooth_downsample (void);
+
+EXTERN(void) jsimd_h2v2_smooth_downsample
+        (j_compress_ptr cinfo, jpeg_component_info * compptr,
+         JSAMPARRAY input_data, JSAMPARRAY output_data);
+
+EXTERN(void) jsimd_h2v1_downsample
+        (j_compress_ptr cinfo, jpeg_component_info * compptr,
+        JSAMPARRAY input_data, JSAMPARRAY output_data);
+
+EXTERN(int) jsimd_can_h2v2_upsample (void);
+EXTERN(int) jsimd_can_h2v1_upsample (void);
+EXTERN(int) jsimd_can_int_upsample (void);
+
+EXTERN(void) jsimd_h2v2_upsample
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+EXTERN(void) jsimd_h2v1_upsample
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+EXTERN(void) jsimd_int_upsample
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+
+EXTERN(int) jsimd_can_h2v2_fancy_upsample (void);
+EXTERN(int) jsimd_can_h2v1_fancy_upsample (void);
+
+EXTERN(void) jsimd_h2v2_fancy_upsample
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+EXTERN(void) jsimd_h2v1_fancy_upsample
+        (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+
+EXTERN(int) jsimd_can_h2v2_merged_upsample (void);
+EXTERN(int) jsimd_can_h2v1_merged_upsample (void);
+
+EXTERN(void) jsimd_h2v2_merged_upsample
+        (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_merged_upsample
+        (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
diff --git a/jsimd_none.c b/jsimd_none.c
new file mode 100644
index 0000000..96a9842
--- /dev/null
+++ b/jsimd_none.c
@@ -0,0 +1,376 @@
+/*
+ * jsimd_none.c
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2009-2011 D. R. Commander
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ * This file contains stubs for when there is no SIMD support available.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jsimd.h"
+#include "jdct.h"
+#include "jsimddct.h"
+
+GLOBAL(int)
+jsimd_can_rgb_ycc (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_ycc_rgb (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_c_can_null_convert (void)
+{
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
+                       JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                       JDIMENSION output_row, int num_rows)
+{
+}
+
+GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                        JDIMENSION output_row, int num_rows)
+{
+}
+
+GLOBAL(void)
+jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
+                       JSAMPIMAGE input_buf, JDIMENSION input_row,
+                       JSAMPARRAY output_buf, int num_rows)
+{
+}
+
+GLOBAL(void)
+jsimd_c_null_convert (j_compress_ptr cinfo,
+                      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                      JDIMENSION output_row, int num_rows)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_downsample (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_downsample (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_smooth_downsample (void)
+{
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_upsample (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_upsample (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_int_upsample (void)
+{
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                      JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v2_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr,
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr,
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_fancy_upsample (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_fancy_upsample (void)
+{
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr,
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr,
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_merged_upsample (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_merged_upsample (void)
+{
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+}
+
+GLOBAL(int)
+jsimd_can_convsamp (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_convsamp_float (void)
+{
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
+                DCTELEM * workspace)
+{
+}
+
+GLOBAL(void)
+jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
+                      FAST_FLOAT * workspace)
+{
+}
+
+GLOBAL(int)
+jsimd_can_fdct_islow (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_ifast (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_float (void)
+{
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_fdct_islow (DCTELEM * data)
+{
+}
+
+GLOBAL(void)
+jsimd_fdct_ifast (DCTELEM * data)
+{
+}
+
+GLOBAL(void)
+jsimd_fdct_float (FAST_FLOAT * data)
+{
+}
+
+GLOBAL(int)
+jsimd_can_quantize (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_quantize_float (void)
+{
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
+                DCTELEM * workspace)
+{
+}
+
+GLOBAL(void)
+jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+                      FAST_FLOAT * workspace)
+{
+}
+
+GLOBAL(int)
+jsimd_can_idct_2x2 (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_4x4 (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_6x6 (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_12x12 (void)
+{
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+}
+
+GLOBAL(void)
+jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+}
+
+GLOBAL(void)
+jsimd_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+}
+
+GLOBAL(void)
+jsimd_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+}
+
+GLOBAL(int)
+jsimd_can_idct_islow (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_ifast (void)
+{
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_float (void)
+{
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+}
+
+GLOBAL(void)
+jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+}
+
+GLOBAL(void)
+jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+}
+
diff --git a/jsimddct.h b/jsimddct.h
new file mode 100644
index 0000000..aa421fb
--- /dev/null
+++ b/jsimddct.h
@@ -0,0 +1,74 @@
+/*
+ * jsimddct.h
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ */
+
+EXTERN(int) jsimd_can_convsamp (void);
+EXTERN(int) jsimd_can_convsamp_float (void);
+
+EXTERN(void) jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
+                             DCTELEM * workspace);
+EXTERN(void) jsimd_convsamp_float (JSAMPARRAY sample_data,
+                                   JDIMENSION start_col,
+                                   FAST_FLOAT * workspace);
+
+EXTERN(int) jsimd_can_fdct_islow (void);
+EXTERN(int) jsimd_can_fdct_ifast (void);
+EXTERN(int) jsimd_can_fdct_float (void);
+
+EXTERN(void) jsimd_fdct_islow (DCTELEM * data);
+EXTERN(void) jsimd_fdct_ifast (DCTELEM * data);
+EXTERN(void) jsimd_fdct_float (FAST_FLOAT * data);
+
+EXTERN(int) jsimd_can_quantize (void);
+EXTERN(int) jsimd_can_quantize_float (void);
+
+EXTERN(void) jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
+                             DCTELEM * workspace);
+EXTERN(void) jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+                                   FAST_FLOAT * workspace);
+
+EXTERN(int) jsimd_can_idct_2x2 (void);
+EXTERN(int) jsimd_can_idct_4x4 (void);
+EXTERN(int) jsimd_can_idct_6x6 (void);
+EXTERN(int) jsimd_can_idct_12x12 (void);
+
+EXTERN(void) jsimd_idct_2x2 (j_decompress_ptr cinfo,
+                             jpeg_component_info * compptr,
+                             JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                             JDIMENSION output_col);
+EXTERN(void) jsimd_idct_4x4 (j_decompress_ptr cinfo,
+                             jpeg_component_info * compptr,
+                             JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                             JDIMENSION output_col);
+EXTERN(void) jsimd_idct_6x6 (j_decompress_ptr cinfo,
+                             jpeg_component_info * compptr,
+                             JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                             JDIMENSION output_col);
+EXTERN(void) jsimd_idct_12x12 (j_decompress_ptr cinfo,
+                               jpeg_component_info * compptr,
+                               JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                               JDIMENSION output_col);
+
+EXTERN(int) jsimd_can_idct_islow (void);
+EXTERN(int) jsimd_can_idct_ifast (void);
+EXTERN(int) jsimd_can_idct_float (void);
+
+EXTERN(void) jsimd_idct_islow (j_decompress_ptr cinfo,
+                               jpeg_component_info * compptr,
+                               JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                               JDIMENSION output_col);
+EXTERN(void) jsimd_idct_ifast (j_decompress_ptr cinfo,
+                               jpeg_component_info * compptr,
+                               JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                               JDIMENSION output_col);
+EXTERN(void) jsimd_idct_float (j_decompress_ptr cinfo,
+                               jpeg_component_info * compptr,
+                               JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                               JDIMENSION output_col);
diff --git a/jstdhuff.c b/jstdhuff.c
new file mode 100644
index 0000000..b29e5ea
--- /dev/null
+++ b/jstdhuff.c
@@ -0,0 +1,133 @@
+/*
+* jstdhuff.c
+*
+* This file was part of the Independent JPEG Group's software:
+* Copyright (C) 1991-1998, Thomas G. Lane.
+* libjpeg-turbo Modifications:
+* Copyright (C) 2013, D. R. Commander.
+* For conditions of distribution and use, see the accompanying README file.
+*
+* This file contains routines to set the default Huffman tables, if they are
+* not already set.
+*/
+
+/*
+ * Huffman table setup routines
+ */
+
+LOCAL(void)
+add_huff_table (j_common_ptr cinfo,
+                JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val)
+/* Define a Huffman table */
+{
+  int nsymbols, len;
+
+  if (*htblptr == NULL)
+    *htblptr = jpeg_alloc_huff_table(cinfo);
+  else
+    return;
+
+  /* Copy the number-of-symbols-of-each-code-length counts */
+  MEMCOPY((*htblptr)->bits, bits, sizeof((*htblptr)->bits));
+
+  /* Validate the counts.  We do this here mainly so we can copy the right
+   * number of symbols from the val[] array, without risking marching off
+   * the end of memory.  jchuff.c will do a more thorough test later.
+   */
+  nsymbols = 0;
+  for (len = 1; len <= 16; len++)
+    nsymbols += bits[len];
+  if (nsymbols < 1 || nsymbols > 256)
+    ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+  MEMCOPY((*htblptr)->huffval, val, nsymbols * sizeof(UINT8));
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  (*htblptr)->sent_table = FALSE;
+}
+
+
+LOCAL(void)
+std_huff_tables (j_common_ptr cinfo)
+/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
+/* IMPORTANT: these are only valid for 8-bit data precision! */
+{
+  JHUFF_TBL **dc_huff_tbl_ptrs, **ac_huff_tbl_ptrs;
+
+  static const UINT8 bits_dc_luminance[17] =
+    { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
+  static const UINT8 val_dc_luminance[] =
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+  static const UINT8 bits_dc_chrominance[17] =
+    { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+  static const UINT8 val_dc_chrominance[] =
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+  static const UINT8 bits_ac_luminance[17] =
+    { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
+  static const UINT8 val_ac_luminance[] =
+    { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+      0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+      0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+      0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+      0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+      0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+      0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+      0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+      0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+      0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+      0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+      0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+      0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+      0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+      0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+      0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+      0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+      0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+      0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+      0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+      0xf9, 0xfa };
+
+  static const UINT8 bits_ac_chrominance[17] =
+    { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
+  static const UINT8 val_ac_chrominance[] =
+    { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+      0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+      0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+      0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+      0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+      0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+      0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+      0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+      0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+      0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+      0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+      0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+      0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+      0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+      0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+      0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+      0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+      0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+      0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+      0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+      0xf9, 0xfa };
+
+  if (cinfo->is_decompressor) {
+    dc_huff_tbl_ptrs = ((j_decompress_ptr)cinfo)->dc_huff_tbl_ptrs;
+    ac_huff_tbl_ptrs = ((j_decompress_ptr)cinfo)->ac_huff_tbl_ptrs;
+  } else {
+    dc_huff_tbl_ptrs = ((j_compress_ptr)cinfo)->dc_huff_tbl_ptrs;
+    ac_huff_tbl_ptrs = ((j_compress_ptr)cinfo)->ac_huff_tbl_ptrs;
+  }
+
+  add_huff_table(cinfo, &dc_huff_tbl_ptrs[0], bits_dc_luminance,
+                 val_dc_luminance);
+  add_huff_table(cinfo, &ac_huff_tbl_ptrs[0], bits_ac_luminance,
+                 val_ac_luminance);
+  add_huff_table(cinfo, &dc_huff_tbl_ptrs[1], bits_dc_chrominance,
+                 val_dc_chrominance);
+  add_huff_table(cinfo, &ac_huff_tbl_ptrs[1], bits_ac_chrominance,
+                 val_ac_chrominance);
+}
diff --git a/jutils.c b/jutils.c
index d18a955..0e2611c 100644
--- a/jutils.c
+++ b/jutils.c
@@ -1,8 +1,10 @@
 /*
  * jutils.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code
+ * relevant to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains tables and miscellaneous utility routines needed
@@ -21,7 +23,7 @@
  * of a DCT block read in natural order (left to right, top to bottom).
  */
 
-#if 0				/* This table is not actually needed in v6a */
+#if 0                           /* This table is not actually needed in v6a */
 
 const int jpeg_zigzag_order[DCTSIZE2] = {
    0,  1,  5,  6, 14, 15, 27, 28,
@@ -87,30 +89,10 @@
 }
 
 
-/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
- * and coefficient-block arrays.  This won't work on 80x86 because the arrays
- * are FAR and we're assuming a small-pointer memory model.  However, some
- * DOS compilers provide far-pointer versions of memcpy() and memset() even
- * in the small-model libraries.  These will be used if USE_FMEM is defined.
- * Otherwise, the routines below do it the hard way.  (The performance cost
- * is not all that great, because these routines aren't very heavily used.)
- */
-
-#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macros */
-#define FMEMCOPY(dest,src,size)	MEMCOPY(dest,src,size)
-#define FMEMZERO(target,size)	MEMZERO(target,size)
-#else				/* 80x86 case, define if we can */
-#ifdef USE_FMEM
-#define FMEMCOPY(dest,src,size)	_fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size))
-#define FMEMZERO(target,size)	_fmemset((void FAR *)(target), 0, (size_t)(size))
-#endif
-#endif
-
-
 GLOBAL(void)
 jcopy_sample_rows (JSAMPARRAY input_array, int source_row,
-		   JSAMPARRAY output_array, int dest_row,
-		   int num_rows, JDIMENSION num_cols)
+                   JSAMPARRAY output_array, int dest_row,
+                   int num_rows, JDIMENSION num_cols)
 /* Copy some rows of samples from one place to another.
  * num_rows rows are copied from input_array[source_row++]
  * to output_array[dest_row++]; these areas may overlap for duplication.
@@ -118,11 +100,7 @@
  */
 {
   register JSAMPROW inptr, outptr;
-#ifdef FMEMCOPY
-  register size_t count = (size_t) (num_cols * SIZEOF(JSAMPLE));
-#else
-  register JDIMENSION count;
-#endif
+  register size_t count = (size_t) (num_cols * sizeof(JSAMPLE));
   register int row;
 
   input_array += source_row;
@@ -131,49 +109,24 @@
   for (row = num_rows; row > 0; row--) {
     inptr = *input_array++;
     outptr = *output_array++;
-#ifdef FMEMCOPY
-    FMEMCOPY(outptr, inptr, count);
-#else
-    for (count = num_cols; count > 0; count--)
-      *outptr++ = *inptr++;	/* needn't bother with GETJSAMPLE() here */
-#endif
+    MEMCOPY(outptr, inptr, count);
   }
 }
 
 
 GLOBAL(void)
 jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
-		 JDIMENSION num_blocks)
+                 JDIMENSION num_blocks)
 /* Copy a row of coefficient blocks from one place to another. */
 {
-#ifdef FMEMCOPY
-  FMEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * SIZEOF(JCOEF)));
-#else
-  register JCOEFPTR inptr, outptr;
-  register long count;
-
-  inptr = (JCOEFPTR) input_row;
-  outptr = (JCOEFPTR) output_row;
-  for (count = (long) num_blocks * DCTSIZE2; count > 0; count--) {
-    *outptr++ = *inptr++;
-  }
-#endif
+  MEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * sizeof(JCOEF)));
 }
 
 
 GLOBAL(void)
-jzero_far (void FAR * target, size_t bytestozero)
-/* Zero out a chunk of FAR memory. */
+jzero_far (void * target, size_t bytestozero)
+/* Zero out a chunk of memory. */
 /* This might be sample-array data, block-array data, or alloc_large data. */
 {
-#ifdef FMEMZERO
-  FMEMZERO(target, bytestozero);
-#else
-  register char FAR * ptr = (char FAR *) target;
-  register size_t count;
-
-  for (count = bytestozero; count > 0; count--) {
-    *ptr++ = 0;
-  }
-#endif
+  MEMZERO(target, bytestozero);
 }
diff --git a/jversion.h b/jversion.h
index 6472c58..25c3cf0 100644
--- a/jversion.h
+++ b/jversion.h
@@ -1,14 +1,36 @@
 /*
  * jversion.h
  *
- * Copyright (C) 1991-1998, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2012-2014, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains software version identification.
  */
 
 
-#define JVERSION	"6b  27-Mar-1998"
+#if JPEG_LIB_VERSION >= 80
 
-#define JCOPYRIGHT	"Copyright (C) 1998, Thomas G. Lane"
+#define JVERSION        "8d  15-Jan-2012"
+
+#elif JPEG_LIB_VERSION >= 70
+
+#define JVERSION        "7  27-Jun-2009"
+
+#else
+
+#define JVERSION        "6b  27-Mar-1998"
+
+#endif
+
+#define JCOPYRIGHT      "Copyright (C) 1991-2012 Thomas G. Lane, Guido Vollbeding\n" \
+                        "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
+                        "Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
+                        "Copyright (C) 2009-2014 D. R. Commander\n" \
+                        "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
+                        "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
+                        "Copyright (C) 2013 Linaro Limited"
+
+#define JCOPYRIGHT_SHORT "Copyright (C) 1991-2014 The libjpeg-turbo Project and many others"
diff --git a/libjpeg.map.in b/libjpeg.map.in
new file mode 100644
index 0000000..b4480d8
--- /dev/null
+++ b/libjpeg.map.in
@@ -0,0 +1,11 @@
+LIBJPEGTURBO_@JPEG_LIB_VERSION_DECIMAL@ {
+  @MEM_SRCDST_FUNCTIONS@
+  local:
+    jsimd_*;
+    jconst_*;
+};
+
+LIBJPEG_@JPEG_LIB_VERSION_DECIMAL@ {
+  global:
+    *;
+};
diff --git a/libjpeg.doc b/libjpeg.txt
similarity index 78%
rename from libjpeg.doc
rename to libjpeg.txt
index 689b206..7dbb354 100644
--- a/libjpeg.doc
+++ b/libjpeg.txt
@@ -1,7 +1,9 @@
 USING THE IJG JPEG LIBRARY
 
-Copyright (C) 1994-1998, Thomas G. Lane.
-This file is part of the Independent JPEG Group's software.
+This file was part of the Independent JPEG Group's software:
+Copyright (C) 1994-2011, Thomas G. Lane, Guido Vollbeding.
+libjpeg-turbo Modifications:
+Copyright (C) 2010, 2014, D. R. Commander.
 For conditions of distribution and use, see the accompanying README file.
 
 
@@ -25,32 +27,31 @@
 -----------------
 
 Overview:
-	Functions provided by the library
-	Outline of typical usage
+        Functions provided by the library
+        Outline of typical usage
 Basic library usage:
-	Data formats
-	Compression details
-	Decompression details
-	Mechanics of usage: include files, linking, etc
+        Data formats
+        Compression details
+        Decompression details
+        Mechanics of usage: include files, linking, etc
 Advanced features:
-	Compression parameter selection
-	Decompression parameter selection
-	Special color spaces
-	Error handling
-	Compressed data handling (source and destination managers)
-	I/O suspension
-	Progressive JPEG support
-	Buffered-image mode
-	Abbreviated datastreams and multiple images
-	Special markers
-	Raw (downsampled) image data
-	Really raw data: DCT coefficients
-	Progress monitoring
-	Memory management
-	Memory usage
-	Library compile-time options
-	Portability considerations
-	Notes for MS-DOS implementors
+        Compression parameter selection
+        Decompression parameter selection
+        Special color spaces
+        Error handling
+        Compressed data handling (source and destination managers)
+        I/O suspension
+        Progressive JPEG support
+        Buffered-image mode
+        Abbreviated datastreams and multiple images
+        Special markers
+        Raw (downsampled) image data
+        Really raw data: DCT coefficients
+        Progress monitoring
+        Memory management
+        Memory usage
+        Library compile-time options
+        Portability considerations
 
 You should read at least the overview and basic usage sections before trying
 to program with the library.  The sections on advanced features can be read
@@ -91,11 +92,10 @@
 the ISO JPEG standard; most baseline, extended-sequential, and progressive
 JPEG processes are supported.  (Our subset includes all features now in common
 use.)  Unsupported ISO options include:
-	* Hierarchical storage
-	* Lossless JPEG
-	* Arithmetic entropy coding (unsupported for legal reasons)
-	* DNL marker
-	* Nonintegral subsampling ratios
+        * Hierarchical storage
+        * Lossless JPEG
+        * DNL marker
+        * Nonintegral subsampling ratios
 We support both 8- and 12-bit data precision, but this is a compile-time
 choice rather than a run-time choice; hence it is difficult to use both
 precisions in a single application.
@@ -112,14 +112,14 @@
 
 The rough outline of a JPEG compression operation is:
 
-	Allocate and initialize a JPEG compression object
-	Specify the destination for the compressed data (eg, a file)
-	Set parameters for compression, including image size & colorspace
-	jpeg_start_compress(...);
-	while (scan lines remain to be written)
-		jpeg_write_scanlines(...);
-	jpeg_finish_compress(...);
-	Release the JPEG compression object
+        Allocate and initialize a JPEG compression object
+        Specify the destination for the compressed data (eg, a file)
+        Set parameters for compression, including image size & colorspace
+        jpeg_start_compress(...);
+        while (scan lines remain to be written)
+                jpeg_write_scanlines(...);
+        jpeg_finish_compress(...);
+        Release the JPEG compression object
 
 A JPEG compression object holds parameters and working state for the JPEG
 library.  We make creation/destruction of the object separate from starting
@@ -138,15 +138,15 @@
 
 Similarly, the rough outline of a JPEG decompression operation is:
 
-	Allocate and initialize a JPEG decompression object
-	Specify the source of the compressed data (eg, a file)
-	Call jpeg_read_header() to obtain image info
-	Set parameters for decompression
-	jpeg_start_decompress(...);
-	while (scan lines remain to be read)
-		jpeg_read_scanlines(...);
-	jpeg_finish_decompress(...);
-	Release the JPEG decompression object
+        Allocate and initialize a JPEG decompression object
+        Specify the source of the compressed data (eg, a file)
+        Call jpeg_read_header() to obtain image info
+        Set parameters for decompression
+        jpeg_start_decompress(...);
+        while (scan lines remain to be read)
+                jpeg_read_scanlines(...);
+        jpeg_finish_decompress(...);
+        Release the JPEG decompression object
 
 This is comparable to the compression outline except that reading the
 datastream header is a separate step.  This is helpful because information
@@ -271,11 +271,11 @@
 
 Typical code for this step, if you are using the default error handler, is
 
-	struct jpeg_compress_struct cinfo;
-	struct jpeg_error_mgr jerr;
-	...
-	cinfo.err = jpeg_std_error(&jerr);
-	jpeg_create_compress(&cinfo);
+        struct jpeg_compress_struct cinfo;
+        struct jpeg_error_mgr jerr;
+        ...
+        cinfo.err = jpeg_std_error(&jerr);
+        jpeg_create_compress(&cinfo);
 
 jpeg_create_compress allocates a small amount of memory, so it could fail
 if you are out of memory.  In that case it will exit via the error handler;
@@ -292,13 +292,13 @@
 If you use the standard destination module, you must open the target stdio
 stream beforehand.  Typical code for this step looks like:
 
-	FILE * outfile;
-	...
-	if ((outfile = fopen(filename, "wb")) == NULL) {
-	    fprintf(stderr, "can't open %s\n", filename);
-	    exit(1);
-	}
-	jpeg_stdio_dest(&cinfo, outfile);
+        FILE * outfile;
+        ...
+        if ((outfile = fopen(filename, "wb")) == NULL) {
+            fprintf(stderr, "can't open %s\n", filename);
+            exit(1);
+        }
+        jpeg_stdio_dest(&cinfo, outfile);
 
 where the last line invokes the standard destination module.
 
@@ -319,10 +319,10 @@
 You must supply information about the source image by setting the following
 fields in the JPEG object (cinfo structure):
 
-	image_width		Width of image, in pixels
-	image_height		Height of image, in pixels
-	input_components	Number of color channels (samples per pixel)
-	in_color_space		Color space of source image
+        image_width             Width of image, in pixels
+        image_height            Height of image, in pixels
+        input_components        Number of color channels (samples per pixel)
+        in_color_space          Color space of source image
 
 The image dimensions are, hopefully, obvious.  JPEG supports image dimensions
 of 1 to 64K pixels in either direction.  The input color space is typically
@@ -346,13 +346,13 @@
 
 Typical code for a 24-bit RGB source image is
 
-	cinfo.image_width = Width; 	/* image width and height, in pixels */
-	cinfo.image_height = Height;
-	cinfo.input_components = 3;	/* # of color components per pixel */
-	cinfo.in_color_space = JCS_RGB; /* colorspace of input image */
+        cinfo.image_width = Width;      /* image width and height, in pixels */
+        cinfo.image_height = Height;
+        cinfo.input_components = 3;     /* # of color components per pixel */
+        cinfo.in_color_space = JCS_RGB; /* colorspace of input image */
 
-	jpeg_set_defaults(&cinfo);
-	/* Make optional parameter settings here */
+        jpeg_set_defaults(&cinfo);
+        /* Make optional parameter settings here */
 
 
 4. jpeg_start_compress(...);
@@ -364,7 +364,7 @@
 
 Typical code:
 
-	jpeg_start_compress(&cinfo, TRUE);
+        jpeg_start_compress(&cinfo, TRUE);
 
 The "TRUE" parameter ensures that a complete JPEG interchange datastream
 will be written.  This is appropriate in most cases.  If you think you might
@@ -377,7 +377,7 @@
 
 
 5. while (scan lines remain to be written)
-	jpeg_write_scanlines(...);
+        jpeg_write_scanlines(...);
 
 Now write all the required image data by calling jpeg_write_scanlines()
 one or more times.  You can pass one or more scanlines in each call, up
@@ -402,15 +402,15 @@
 example.c shows the following code for the case of a full-size 2-D source
 array containing 3-byte RGB pixels:
 
-	JSAMPROW row_pointer[1];	/* pointer to a single row */
-	int row_stride;			/* physical row width in buffer */
+        JSAMPROW row_pointer[1];        /* pointer to a single row */
+        int row_stride;                 /* physical row width in buffer */
 
-	row_stride = image_width * 3;	/* JSAMPLEs per row in image_buffer */
+        row_stride = image_width * 3;   /* JSAMPLEs per row in image_buffer */
 
-	while (cinfo.next_scanline < cinfo.image_height) {
-	    row_pointer[0] = & image_buffer[cinfo.next_scanline * row_stride];
-	    jpeg_write_scanlines(&cinfo, row_pointer, 1);
-	}
+        while (cinfo.next_scanline < cinfo.image_height) {
+            row_pointer[0] = & image_buffer[cinfo.next_scanline * row_stride];
+            jpeg_write_scanlines(&cinfo, row_pointer, 1);
+        }
 
 jpeg_write_scanlines() returns the number of scanlines actually written.
 This will normally be equal to the number passed in, so you can usually
@@ -435,7 +435,7 @@
 
 Typical code:
 
-	jpeg_finish_compress(&cinfo);
+        jpeg_finish_compress(&cinfo);
 
 If using the stdio destination manager, don't forget to close the output
 stdio stream (if necessary) afterwards.
@@ -478,7 +478,7 @@
 
 Typical code:
 
-	jpeg_destroy_compress(&cinfo);
+        jpeg_destroy_compress(&cinfo);
 
 
 8. Aborting.
@@ -519,11 +519,11 @@
 
 Typical code:
 
-	struct jpeg_decompress_struct cinfo;
-	struct jpeg_error_mgr jerr;
-	...
-	cinfo.err = jpeg_std_error(&jerr);
-	jpeg_create_decompress(&cinfo);
+        struct jpeg_decompress_struct cinfo;
+        struct jpeg_error_mgr jerr;
+        ...
+        cinfo.err = jpeg_std_error(&jerr);
+        jpeg_create_decompress(&cinfo);
 
 (Both here and in the IJG code, we usually use variable name "cinfo" for
 both compression and decompression objects.)
@@ -539,13 +539,13 @@
 If you use the standard source module, you must open the source stdio stream
 beforehand.  Typical code for this step looks like:
 
-	FILE * infile;
-	...
-	if ((infile = fopen(filename, "rb")) == NULL) {
-	    fprintf(stderr, "can't open %s\n", filename);
-	    exit(1);
-	}
-	jpeg_stdio_src(&cinfo, infile);
+        FILE * infile;
+        ...
+        if ((infile = fopen(filename, "rb")) == NULL) {
+            fprintf(stderr, "can't open %s\n", filename);
+            exit(1);
+        }
+        jpeg_stdio_src(&cinfo, infile);
 
 where the last line invokes the standard source module.
 
@@ -568,7 +568,7 @@
 
 Typical code for this step is just
 
-	jpeg_read_header(&cinfo, TRUE);
+        jpeg_read_header(&cinfo, TRUE);
 
 This will read the source datastream header markers, up to the beginning
 of the compressed data proper.  On return, the image dimensions and other
@@ -616,7 +616,7 @@
 
 Typical code is just
 
-	jpeg_start_decompress(&cinfo);
+        jpeg_start_decompress(&cinfo);
 
 If you have requested a multi-pass operating mode, such as 2-pass color
 quantization, jpeg_start_decompress() will do everything needed before data
@@ -629,12 +629,12 @@
 scaling, are available in the JPEG object; so is the selected colormap, if
 colormapped output has been requested.  Useful fields include
 
-	output_width		image width and height, as scaled
-	output_height
-	out_color_components	# of color components in out_color_space
-	output_components	# of color components returned per pixel
-	colormap		the selected colormap, if any
-	actual_number_of_colors		number of entries in colormap
+        output_width            image width and height, as scaled
+        output_height
+        out_color_components    # of color components in out_color_space
+        output_components       # of color components returned per pixel
+        colormap                the selected colormap, if any
+        actual_number_of_colors         number of entries in colormap
 
 output_components is 1 (a colormap index) when quantizing colors; otherwise it
 equals out_color_components.  It is the number of JSAMPLE values that will be
@@ -653,7 +653,7 @@
 
 
 6. while (scan lines remain to be read)
-	jpeg_read_scanlines(...);
+        jpeg_read_scanlines(...);
 
 Now you can read the decompressed image data by calling jpeg_read_scanlines()
 one or more times.  At each call, you pass in the maximum number of scanlines
@@ -695,7 +695,7 @@
 
 Typical code:
 
-	jpeg_finish_decompress(&cinfo);
+        jpeg_finish_decompress(&cinfo);
 
 If using the stdio source manager, don't forget to close the source stdio
 stream if necessary.
@@ -718,7 +718,7 @@
 
 Typical code:
 
-	jpeg_destroy_decompress(&cinfo);
+        jpeg_destroy_decompress(&cinfo);
 
 
 9. Aborting.
@@ -750,7 +750,7 @@
 machines) and reference it at your link step.  If you use only half of the
 library (only compression or only decompression), only that much code will be
 included from the library, unless your linker is hopelessly brain-damaged.
-The supplied makefiles build libjpeg.a automatically (see install.doc).
+The supplied makefiles build libjpeg.a automatically (see install.txt).
 
 While you can build the JPEG library as a shared library if the whim strikes
 you, we don't really recommend it.  The trouble with shared libraries is that
@@ -763,12 +763,6 @@
 applications and introduce a ton of access functions instead.  Too late now,
 however.)
 
-On some systems your application may need to set up a signal handler to ensure
-that temporary files are deleted if the program is interrupted.  This is most
-critical if you are on MS-DOS and use the jmemdos.c memory manager back end;
-it will try to grab extended memory for temp files, and that space will NOT be
-freed automatically.  See cjpeg.c or djpeg.c for an example signal handler.
-
 It may be worth pointing out that the core JPEG library does not actually
 require the stdio library: only the default source/destination managers and
 error handler need it.  You can use the library in a stdio-less environment
@@ -799,192 +793,243 @@
 The helper routines are:
 
 jpeg_set_defaults (j_compress_ptr cinfo)
-	This routine sets all JPEG parameters to reasonable defaults, using
-	only the input image's color space (field in_color_space, which must
-	already be set in cinfo).  Many applications will only need to use
-	this routine and perhaps jpeg_set_quality().
+        This routine sets all JPEG parameters to reasonable defaults, using
+        only the input image's color space (field in_color_space, which must
+        already be set in cinfo).  Many applications will only need to use
+        this routine and perhaps jpeg_set_quality().
 
 jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace)
-	Sets the JPEG file's colorspace (field jpeg_color_space) as specified,
-	and sets other color-space-dependent parameters appropriately.  See
-	"Special color spaces", below, before using this.  A large number of
-	parameters, including all per-component parameters, are set by this
-	routine; if you want to twiddle individual parameters you should call
-	jpeg_set_colorspace() before rather than after.
+        Sets the JPEG file's colorspace (field jpeg_color_space) as specified,
+        and sets other color-space-dependent parameters appropriately.  See
+        "Special color spaces", below, before using this.  A large number of
+        parameters, including all per-component parameters, are set by this
+        routine; if you want to twiddle individual parameters you should call
+        jpeg_set_colorspace() before rather than after.
 
 jpeg_default_colorspace (j_compress_ptr cinfo)
-	Selects an appropriate JPEG colorspace based on cinfo->in_color_space,
-	and calls jpeg_set_colorspace().  This is actually a subroutine of
-	jpeg_set_defaults().  It's broken out in case you want to change
-	just the colorspace-dependent JPEG parameters.
+        Selects an appropriate JPEG colorspace based on cinfo->in_color_space,
+        and calls jpeg_set_colorspace().  This is actually a subroutine of
+        jpeg_set_defaults().  It's broken out in case you want to change
+        just the colorspace-dependent JPEG parameters.
 
 jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline)
-	Constructs JPEG quantization tables appropriate for the indicated
-	quality setting.  The quality value is expressed on the 0..100 scale
-	recommended by IJG (cjpeg's "-quality" switch uses this routine).
-	Note that the exact mapping from quality values to tables may change
-	in future IJG releases as more is learned about DCT quantization.
-	If the force_baseline parameter is TRUE, then the quantization table
-	entries are constrained to the range 1..255 for full JPEG baseline
-	compatibility.  In the current implementation, this only makes a
-	difference for quality settings below 25, and it effectively prevents
-	very small/low quality files from being generated.  The IJG decoder
-	is capable of reading the non-baseline files generated at low quality
-	settings when force_baseline is FALSE, but other decoders may not be.
+        Constructs JPEG quantization tables appropriate for the indicated
+        quality setting.  The quality value is expressed on the 0..100 scale
+        recommended by IJG (cjpeg's "-quality" switch uses this routine).
+        Note that the exact mapping from quality values to tables may change
+        in future IJG releases as more is learned about DCT quantization.
+        If the force_baseline parameter is TRUE, then the quantization table
+        entries are constrained to the range 1..255 for full JPEG baseline
+        compatibility.  In the current implementation, this only makes a
+        difference for quality settings below 25, and it effectively prevents
+        very small/low quality files from being generated.  The IJG decoder
+        is capable of reading the non-baseline files generated at low quality
+        settings when force_baseline is FALSE, but other decoders may not be.
 
 jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
-			 boolean force_baseline)
-	Same as jpeg_set_quality() except that the generated tables are the
-	sample tables given in the JPEC spec section K.1, multiplied by the
-	specified scale factor (which is expressed as a percentage; thus
-	scale_factor = 100 reproduces the spec's tables).  Note that larger
-	scale factors give lower quality.  This entry point is useful for
-	conforming to the Adobe PostScript DCT conventions, but we do not
-	recommend linear scaling as a user-visible quality scale otherwise.
-	force_baseline again constrains the computed table entries to 1..255.
+                         boolean force_baseline)
+        Same as jpeg_set_quality() except that the generated tables are the
+        sample tables given in the JPEC spec section K.1, multiplied by the
+        specified scale factor (which is expressed as a percentage; thus
+        scale_factor = 100 reproduces the spec's tables).  Note that larger
+        scale factors give lower quality.  This entry point is useful for
+        conforming to the Adobe PostScript DCT conventions, but we do not
+        recommend linear scaling as a user-visible quality scale otherwise.
+        force_baseline again constrains the computed table entries to 1..255.
 
 int jpeg_quality_scaling (int quality)
-	Converts a value on the IJG-recommended quality scale to a linear
-	scaling percentage.  Note that this routine may change or go away
-	in future releases --- IJG may choose to adopt a scaling method that
-	can't be expressed as a simple scalar multiplier, in which case the
-	premise of this routine collapses.  Caveat user.
+        Converts a value on the IJG-recommended quality scale to a linear
+        scaling percentage.  Note that this routine may change or go away
+        in future releases --- IJG may choose to adopt a scaling method that
+        can't be expressed as a simple scalar multiplier, in which case the
+        premise of this routine collapses.  Caveat user.
+
+jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
+        [libjpeg v7+ API/ABI emulation only]
+        Set default quantization tables with linear q_scale_factor[] values
+        (see below).
 
 jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
-		      const unsigned int *basic_table,
-		      int scale_factor, boolean force_baseline)
-	Allows an arbitrary quantization table to be created.  which_tbl
-	indicates which table slot to fill.  basic_table points to an array
-	of 64 unsigned ints given in normal array order.  These values are
-	multiplied by scale_factor/100 and then clamped to the range 1..65535
-	(or to 1..255 if force_baseline is TRUE).
-	CAUTION: prior to library version 6a, jpeg_add_quant_table expected
-	the basic table to be given in JPEG zigzag order.  If you need to
-	write code that works with either older or newer versions of this
-	routine, you must check the library version number.  Something like
-	"#if JPEG_LIB_VERSION >= 61" is the right test.
+                      const unsigned int *basic_table,
+                      int scale_factor, boolean force_baseline)
+        Allows an arbitrary quantization table to be created.  which_tbl
+        indicates which table slot to fill.  basic_table points to an array
+        of 64 unsigned ints given in normal array order.  These values are
+        multiplied by scale_factor/100 and then clamped to the range 1..65535
+        (or to 1..255 if force_baseline is TRUE).
+        CAUTION: prior to library version 6a, jpeg_add_quant_table expected
+        the basic table to be given in JPEG zigzag order.  If you need to
+        write code that works with either older or newer versions of this
+        routine, you must check the library version number.  Something like
+        "#if JPEG_LIB_VERSION >= 61" is the right test.
 
 jpeg_simple_progression (j_compress_ptr cinfo)
-	Generates a default scan script for writing a progressive-JPEG file.
-	This is the recommended method of creating a progressive file,
-	unless you want to make a custom scan sequence.  You must ensure that
-	the JPEG color space is set correctly before calling this routine.
+        Generates a default scan script for writing a progressive-JPEG file.
+        This is the recommended method of creating a progressive file,
+        unless you want to make a custom scan sequence.  You must ensure that
+        the JPEG color space is set correctly before calling this routine.
 
 
 Compression parameters (cinfo fields) include:
 
 J_DCT_METHOD dct_method
-	Selects the algorithm used for the DCT step.  Choices are:
-		JDCT_ISLOW: slow but accurate integer algorithm
-		JDCT_IFAST: faster, less accurate integer method
-		JDCT_FLOAT: floating-point method
-		JDCT_DEFAULT: default method (normally JDCT_ISLOW)
-		JDCT_FASTEST: fastest method (normally JDCT_IFAST)
-	The FLOAT method is very slightly more accurate than the ISLOW method,
-	but may give different results on different machines due to varying
-	roundoff behavior.  The integer methods should give the same results
-	on all machines.  On machines with sufficiently fast FP hardware, the
-	floating-point method may also be the fastest.  The IFAST method is
-	considerably less accurate than the other two; its use is not
-	recommended if high quality is a concern.  JDCT_DEFAULT and
-	JDCT_FASTEST are macros configurable by each installation.
+        Selects the algorithm used for the DCT step.  Choices are:
+                JDCT_ISLOW: slow but accurate integer algorithm
+                JDCT_IFAST: faster, less accurate integer method
+                JDCT_FLOAT: floating-point method
+                JDCT_DEFAULT: default method (normally JDCT_ISLOW)
+                JDCT_FASTEST: fastest method (normally JDCT_IFAST)
+        In libjpeg-turbo, JDCT_IFAST is generally about 5-15% faster than
+        JDCT_ISLOW when using the x86/x86-64 SIMD extensions (results may vary
+        with other SIMD implementations, or when using libjpeg-turbo without
+        SIMD extensions.)  For quality levels of 90 and below, there should be
+        little or no perceptible difference between the two algorithms.  For
+        quality levels above 90, however, the difference between JDCT_IFAST and
+        JDCT_ISLOW becomes more pronounced.  With quality=97, for instance,
+        JDCT_IFAST incurs generally about a 1-3 dB loss (in PSNR) relative to
+        JDCT_ISLOW, but this can be larger for some images.  Do not use
+        JDCT_IFAST with quality levels above 97.  The algorithm often
+        degenerates at quality=98 and above and can actually produce a more
+        lossy image than if lower quality levels had been used.  Also, in
+        libjpeg-turbo, JDCT_IFAST is not fully accelerated for quality levels
+        above 97, so it will be slower than JDCT_ISLOW.  JDCT_FLOAT is mainly a
+        legacy feature.  It does not produce significantly more accurate
+        results than the ISLOW method, and it is much slower.  The FLOAT method
+        may also give different results on different machines due to varying
+        roundoff behavior, whereas the integer methods should give the same
+        results on all machines.
 
 J_COLOR_SPACE jpeg_color_space
 int num_components
-	The JPEG color space and corresponding number of components; see
-	"Special color spaces", below, for more info.  We recommend using
-	jpeg_set_color_space() if you want to change these.
+        The JPEG color space and corresponding number of components; see
+        "Special color spaces", below, for more info.  We recommend using
+        jpeg_set_color_space() if you want to change these.
 
 boolean optimize_coding
-	TRUE causes the compressor to compute optimal Huffman coding tables
-	for the image.  This requires an extra pass over the data and
-	therefore costs a good deal of space and time.  The default is
-	FALSE, which tells the compressor to use the supplied or default
-	Huffman tables.  In most cases optimal tables save only a few percent
-	of file size compared to the default tables.  Note that when this is
-	TRUE, you need not supply Huffman tables at all, and any you do
-	supply will be overwritten.
+        TRUE causes the compressor to compute optimal Huffman coding tables
+        for the image.  This requires an extra pass over the data and
+        therefore costs a good deal of space and time.  The default is
+        FALSE, which tells the compressor to use the supplied or default
+        Huffman tables.  In most cases optimal tables save only a few percent
+        of file size compared to the default tables.  Note that when this is
+        TRUE, you need not supply Huffman tables at all, and any you do
+        supply will be overwritten.
 
 unsigned int restart_interval
 int restart_in_rows
-	To emit restart markers in the JPEG file, set one of these nonzero.
-	Set restart_interval to specify the exact interval in MCU blocks.
-	Set restart_in_rows to specify the interval in MCU rows.  (If
-	restart_in_rows is not 0, then restart_interval is set after the
-	image width in MCUs is computed.)  Defaults are zero (no restarts).
-	One restart marker per MCU row is often a good choice.
-	NOTE: the overhead of restart markers is higher in grayscale JPEG
-	files than in color files, and MUCH higher in progressive JPEGs.
-	If you use restarts, you may want to use larger intervals in those
-	cases.
+        To emit restart markers in the JPEG file, set one of these nonzero.
+        Set restart_interval to specify the exact interval in MCU blocks.
+        Set restart_in_rows to specify the interval in MCU rows.  (If
+        restart_in_rows is not 0, then restart_interval is set after the
+        image width in MCUs is computed.)  Defaults are zero (no restarts).
+        One restart marker per MCU row is often a good choice.
+        NOTE: the overhead of restart markers is higher in grayscale JPEG
+        files than in color files, and MUCH higher in progressive JPEGs.
+        If you use restarts, you may want to use larger intervals in those
+        cases.
 
 const jpeg_scan_info * scan_info
 int num_scans
-	By default, scan_info is NULL; this causes the compressor to write a
-	single-scan sequential JPEG file.  If not NULL, scan_info points to
-	an array of scan definition records of length num_scans.  The
-	compressor will then write a JPEG file having one scan for each scan
-	definition record.  This is used to generate noninterleaved or
-	progressive JPEG files.  The library checks that the scan array
-	defines a valid JPEG scan sequence.  (jpeg_simple_progression creates
-	a suitable scan definition array for progressive JPEG.)  This is
-	discussed further under "Progressive JPEG support".
+        By default, scan_info is NULL; this causes the compressor to write a
+        single-scan sequential JPEG file.  If not NULL, scan_info points to
+        an array of scan definition records of length num_scans.  The
+        compressor will then write a JPEG file having one scan for each scan
+        definition record.  This is used to generate noninterleaved or
+        progressive JPEG files.  The library checks that the scan array
+        defines a valid JPEG scan sequence.  (jpeg_simple_progression creates
+        a suitable scan definition array for progressive JPEG.)  This is
+        discussed further under "Progressive JPEG support".
 
 int smoothing_factor
-	If non-zero, the input image is smoothed; the value should be 1 for
-	minimal smoothing to 100 for maximum smoothing.  Consult jcsample.c
-	for details of the smoothing algorithm.  The default is zero.
+        If non-zero, the input image is smoothed; the value should be 1 for
+        minimal smoothing to 100 for maximum smoothing.  Consult jcsample.c
+        for details of the smoothing algorithm.  The default is zero.
 
 boolean write_JFIF_header
-	If TRUE, a JFIF APP0 marker is emitted.  jpeg_set_defaults() and
-	jpeg_set_colorspace() set this TRUE if a JFIF-legal JPEG color space
-	(ie, YCbCr or grayscale) is selected, otherwise FALSE.
+        If TRUE, a JFIF APP0 marker is emitted.  jpeg_set_defaults() and
+        jpeg_set_colorspace() set this TRUE if a JFIF-legal JPEG color space
+        (ie, YCbCr or grayscale) is selected, otherwise FALSE.
 
 UINT8 JFIF_major_version
 UINT8 JFIF_minor_version
-	The version number to be written into the JFIF marker.
-	jpeg_set_defaults() initializes the version to 1.01 (major=minor=1).
-	You should set it to 1.02 (major=1, minor=2) if you plan to write
-	any JFIF 1.02 extension markers.
+        The version number to be written into the JFIF marker.
+        jpeg_set_defaults() initializes the version to 1.01 (major=minor=1).
+        You should set it to 1.02 (major=1, minor=2) if you plan to write
+        any JFIF 1.02 extension markers.
 
 UINT8 density_unit
 UINT16 X_density
 UINT16 Y_density
-	The resolution information to be written into the JFIF marker;
-	not used otherwise.  density_unit may be 0 for unknown,
-	1 for dots/inch, or 2 for dots/cm.  The default values are 0,1,1
-	indicating square pixels of unknown size.
+        The resolution information to be written into the JFIF marker;
+        not used otherwise.  density_unit may be 0 for unknown,
+        1 for dots/inch, or 2 for dots/cm.  The default values are 0,1,1
+        indicating square pixels of unknown size.
 
 boolean write_Adobe_marker
-	If TRUE, an Adobe APP14 marker is emitted.  jpeg_set_defaults() and
-	jpeg_set_colorspace() set this TRUE if JPEG color space RGB, CMYK,
-	or YCCK is selected, otherwise FALSE.  It is generally a bad idea
-	to set both write_JFIF_header and write_Adobe_marker.  In fact,
-	you probably shouldn't change the default settings at all --- the
-	default behavior ensures that the JPEG file's color space can be
-	recognized by the decoder.
+        If TRUE, an Adobe APP14 marker is emitted.  jpeg_set_defaults() and
+        jpeg_set_colorspace() set this TRUE if JPEG color space RGB, CMYK,
+        or YCCK is selected, otherwise FALSE.  It is generally a bad idea
+        to set both write_JFIF_header and write_Adobe_marker.  In fact,
+        you probably shouldn't change the default settings at all --- the
+        default behavior ensures that the JPEG file's color space can be
+        recognized by the decoder.
 
 JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS]
-	Pointers to coefficient quantization tables, one per table slot,
-	or NULL if no table is defined for a slot.  Usually these should
-	be set via one of the above helper routines; jpeg_add_quant_table()
-	is general enough to define any quantization table.  The other
-	routines will set up table slot 0 for luminance quality and table
-	slot 1 for chrominance.
+        Pointers to coefficient quantization tables, one per table slot,
+        or NULL if no table is defined for a slot.  Usually these should
+        be set via one of the above helper routines; jpeg_add_quant_table()
+        is general enough to define any quantization table.  The other
+        routines will set up table slot 0 for luminance quality and table
+        slot 1 for chrominance.
+
+int q_scale_factor[NUM_QUANT_TBLS]
+        [libjpeg v7+ API/ABI emulation only]
+        Linear quantization scaling factors (0-100, default 100)
+        for use with jpeg_default_qtables().
+        See rdswitch.c and cjpeg.c for an example of usage.
+        Note that the q_scale_factor[] values use "linear" scales, so JPEG
+        quality levels chosen by the user must be converted to these scales
+        using jpeg_quality_scaling().  Here is an example that corresponds to
+        cjpeg -quality 90,70:
+
+                jpeg_set_defaults(cinfo);
+
+                /* Set luminance quality 90. */
+                cinfo->q_scale_factor[0] = jpeg_quality_scaling(90);
+                /* Set chrominance quality 70. */
+                cinfo->q_scale_factor[1] = jpeg_quality_scaling(70);
+
+                jpeg_default_qtables(cinfo, force_baseline);
+
+        CAUTION: Setting separate quality levels for chrominance and luminance
+        is mainly only useful if chrominance subsampling is disabled.  2x2
+        chrominance subsampling (AKA "4:2:0") is the default, but you can
+        explicitly disable subsampling as follows:
+
+                cinfo->comp_info[0].v_samp_factor = 1;
+                cinfo->comp_info[0].h_samp_factor = 1;
 
 JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS]
 JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS]
-	Pointers to Huffman coding tables, one per table slot, or NULL if
-	no table is defined for a slot.  Slots 0 and 1 are filled with the
-	JPEG sample tables by jpeg_set_defaults().  If you need to allocate
-	more table structures, jpeg_alloc_huff_table() may be used.
-	Note that optimal Huffman tables can be computed for an image
-	by setting optimize_coding, as discussed above; there's seldom
-	any need to mess with providing your own Huffman tables.
+        Pointers to Huffman coding tables, one per table slot, or NULL if
+        no table is defined for a slot.  Slots 0 and 1 are filled with the
+        JPEG sample tables by jpeg_set_defaults().  If you need to allocate
+        more table structures, jpeg_alloc_huff_table() may be used.
+        Note that optimal Huffman tables can be computed for an image
+        by setting optimize_coding, as discussed above; there's seldom
+        any need to mess with providing your own Huffman tables.
 
-There are some additional cinfo fields which are not documented here
-because you currently can't change them; for example, you can't set
-arith_code TRUE because arithmetic coding is unsupported.
+
+[libjpeg v7+ API/ABI emulation only]
+The actual dimensions of the JPEG image that will be written to the file are
+given by the following fields.  These are computed from the input image
+dimensions and the compression parameters by jpeg_start_compress().  You can
+also call jpeg_calc_jpeg_dimensions() to obtain the values that will result
+from the current parameter settings.  This can be useful if you are trying
+to pick a scaling ratio that will get close to a desired target size.
+
+JDIMENSION jpeg_width           Actual dimensions of output image.
+JDIMENSION jpeg_height
 
 
 Per-component parameters are stored in the struct cinfo.comp_info[i] for
@@ -994,32 +1039,32 @@
 to use that routine, it's up to you to allocate the array.
 
 int component_id
-	The one-byte identifier code to be recorded in the JPEG file for
-	this component.  For the standard color spaces, we recommend you
-	leave the default values alone.
+        The one-byte identifier code to be recorded in the JPEG file for
+        this component.  For the standard color spaces, we recommend you
+        leave the default values alone.
 
 int h_samp_factor
 int v_samp_factor
-	Horizontal and vertical sampling factors for the component; must
-	be 1..4 according to the JPEG standard.  Note that larger sampling
-	factors indicate a higher-resolution component; many people find
-	this behavior quite unintuitive.  The default values are 2,2 for
-	luminance components and 1,1 for chrominance components, except
-	for grayscale where 1,1 is used.
+        Horizontal and vertical sampling factors for the component; must
+        be 1..4 according to the JPEG standard.  Note that larger sampling
+        factors indicate a higher-resolution component; many people find
+        this behavior quite unintuitive.  The default values are 2,2 for
+        luminance components and 1,1 for chrominance components, except
+        for grayscale where 1,1 is used.
 
 int quant_tbl_no
-	Quantization table number for component.  The default value is
-	0 for luminance components and 1 for chrominance components.
+        Quantization table number for component.  The default value is
+        0 for luminance components and 1 for chrominance components.
 
 int dc_tbl_no
 int ac_tbl_no
-	DC and AC entropy coding table numbers.  The default values are
-	0 for luminance components and 1 for chrominance components.
+        DC and AC entropy coding table numbers.  The default values are
+        0 for luminance components and 1 for chrominance components.
 
 int component_index
-	Must equal the component's index in comp_info[].  (Beginning in
-	release v6, the compressor library will fill this in automatically;
-	you don't have to.)
+        Must equal the component's index in comp_info[].  (Beginning in
+        release v6, the compressor library will fill this in automatically;
+        you don't have to.)
 
 
 Decompression parameter selection
@@ -1039,18 +1084,18 @@
 The following fields in the JPEG object are set by jpeg_read_header() and
 may be useful to the application in choosing decompression parameters:
 
-JDIMENSION image_width			Width and height of image
+JDIMENSION image_width                  Width and height of image
 JDIMENSION image_height
-int num_components			Number of color components
-J_COLOR_SPACE jpeg_color_space		Colorspace of image
-boolean saw_JFIF_marker			TRUE if a JFIF APP0 marker was seen
-  UINT8 JFIF_major_version		Version information from JFIF marker
+int num_components                      Number of color components
+J_COLOR_SPACE jpeg_color_space          Colorspace of image
+boolean saw_JFIF_marker                 TRUE if a JFIF APP0 marker was seen
+  UINT8 JFIF_major_version              Version information from JFIF marker
   UINT8 JFIF_minor_version
-  UINT8 density_unit			Resolution data from JFIF marker
+  UINT8 density_unit                    Resolution data from JFIF marker
   UINT16 X_density
   UINT16 Y_density
-boolean saw_Adobe_marker		TRUE if an Adobe APP14 marker was seen
-  UINT8 Adobe_transform			Color transform code from Adobe marker
+boolean saw_Adobe_marker                TRUE if an Adobe APP14 marker was seen
+  UINT8 Adobe_transform                 Color transform code from Adobe marker
 
 The JPEG color space, unfortunately, is something of a guess since the JPEG
 standard proper does not provide a way to record it.  In practice most files
@@ -1062,50 +1107,51 @@
 returned image are:
 
 J_COLOR_SPACE out_color_space
-	Output color space.  jpeg_read_header() sets an appropriate default
-	based on jpeg_color_space; typically it will be RGB or grayscale.
-	The application can change this field to request output in a different
-	colorspace.  For example, set it to JCS_GRAYSCALE to get grayscale
-	output from a color file.  (This is useful for previewing: grayscale
-	output is faster than full color since the color components need not
-	be processed.)  Note that not all possible color space transforms are
-	currently implemented; you may need to extend jdcolor.c if you want an
-	unusual conversion.
+        Output color space.  jpeg_read_header() sets an appropriate default
+        based on jpeg_color_space; typically it will be RGB or grayscale.
+        The application can change this field to request output in a different
+        colorspace.  For example, set it to JCS_GRAYSCALE to get grayscale
+        output from a color file.  (This is useful for previewing: grayscale
+        output is faster than full color since the color components need not
+        be processed.)  Note that not all possible color space transforms are
+        currently implemented; you may need to extend jdcolor.c if you want an
+        unusual conversion.
 
 unsigned int scale_num, scale_denom
-	Scale the image by the fraction scale_num/scale_denom.  Default is
-	1/1, or no scaling.  Currently, the only supported scaling ratios
-	are 1/1, 1/2, 1/4, and 1/8.  (The library design allows for arbitrary
-	scaling ratios but this is not likely to be implemented any time soon.)
-	Smaller scaling ratios permit significantly faster decoding since
-	fewer pixels need be processed and a simpler IDCT method can be used.
+        Scale the image by the fraction scale_num/scale_denom.  Default is
+        1/1, or no scaling.  Currently, the only supported scaling ratios
+        are M/8 with all M from 1 to 16, or any reduced fraction thereof (such
+        as 1/2, 3/4, etc.)  (The library design allows for arbitrary
+        scaling ratios but this is not likely to be implemented any time soon.)
+        Smaller scaling ratios permit significantly faster decoding since
+        fewer pixels need be processed and a simpler IDCT method can be used.
 
 boolean quantize_colors
-	If set TRUE, colormapped output will be delivered.  Default is FALSE,
-	meaning that full-color output will be delivered.
+        If set TRUE, colormapped output will be delivered.  Default is FALSE,
+        meaning that full-color output will be delivered.
 
 The next three parameters are relevant only if quantize_colors is TRUE.
 
 int desired_number_of_colors
-	Maximum number of colors to use in generating a library-supplied color
-	map (the actual number of colors is returned in a different field).
-	Default 256.  Ignored when the application supplies its own color map.
+        Maximum number of colors to use in generating a library-supplied color
+        map (the actual number of colors is returned in a different field).
+        Default 256.  Ignored when the application supplies its own color map.
 
 boolean two_pass_quantize
-	If TRUE, an extra pass over the image is made to select a custom color
-	map for the image.  This usually looks a lot better than the one-size-
-	fits-all colormap that is used otherwise.  Default is TRUE.  Ignored
-	when the application supplies its own color map.
+        If TRUE, an extra pass over the image is made to select a custom color
+        map for the image.  This usually looks a lot better than the one-size-
+        fits-all colormap that is used otherwise.  Default is TRUE.  Ignored
+        when the application supplies its own color map.
 
 J_DITHER_MODE dither_mode
-	Selects color dithering method.  Supported values are:
-		JDITHER_NONE	no dithering: fast, very low quality
-		JDITHER_ORDERED	ordered dither: moderate speed and quality
-		JDITHER_FS	Floyd-Steinberg dither: slow, high quality
-	Default is JDITHER_FS.  (At present, ordered dither is implemented
-	only in the single-pass, standard-colormap case.  If you ask for
-	ordered dither when two_pass_quantize is TRUE or when you supply
-	an external color map, you'll get F-S dithering.)
+        Selects color dithering method.  Supported values are:
+                JDITHER_NONE    no dithering: fast, very low quality
+                JDITHER_ORDERED ordered dither: moderate speed and quality
+                JDITHER_FS      Floyd-Steinberg dither: slow, high quality
+        Default is JDITHER_FS.  (At present, ordered dither is implemented
+        only in the single-pass, standard-colormap case.  If you ask for
+        ordered dither when two_pass_quantize is TRUE or when you supply
+        an external color map, you'll get F-S dithering.)
 
 When quantize_colors is TRUE, the target color map is described by the next
 two fields.  colormap is set to NULL by jpeg_read_header().  The application
@@ -1116,39 +1162,63 @@
 only accepted for 3-component output color spaces.]
 
 JSAMPARRAY colormap
-	The color map, represented as a 2-D pixel array of out_color_components
-	rows and actual_number_of_colors columns.  Ignored if not quantizing.
-	CAUTION: if the JPEG library creates its own colormap, the storage
-	pointed to by this field is released by jpeg_finish_decompress().
-	Copy the colormap somewhere else first, if you want to save it.
+        The color map, represented as a 2-D pixel array of out_color_components
+        rows and actual_number_of_colors columns.  Ignored if not quantizing.
+        CAUTION: if the JPEG library creates its own colormap, the storage
+        pointed to by this field is released by jpeg_finish_decompress().
+        Copy the colormap somewhere else first, if you want to save it.
 
 int actual_number_of_colors
-	The number of colors in the color map.
+        The number of colors in the color map.
 
 Additional decompression parameters that the application may set include:
 
 J_DCT_METHOD dct_method
-	Selects the algorithm used for the DCT step.  Choices are the same
-	as described above for compression.
+        Selects the algorithm used for the DCT step.  Choices are:
+                JDCT_ISLOW: slow but accurate integer algorithm
+                JDCT_IFAST: faster, less accurate integer method
+                JDCT_FLOAT: floating-point method
+                JDCT_DEFAULT: default method (normally JDCT_ISLOW)
+                JDCT_FASTEST: fastest method (normally JDCT_IFAST)
+        In libjpeg-turbo, JDCT_IFAST is generally about 5-15% faster than
+        JDCT_ISLOW when using the x86/x86-64 SIMD extensions (results may vary
+        with other SIMD implementations, or when using libjpeg-turbo without
+        SIMD extensions.)  If the JPEG image was compressed using a quality
+        level of 85 or below, then there should be little or no perceptible
+        difference between the two algorithms.  When decompressing images that
+        were compressed using quality levels above 85, however, the difference
+        between JDCT_IFAST and JDCT_ISLOW becomes more pronounced.  With images
+        compressed using quality=97, for instance, JDCT_IFAST incurs generally
+        about a 4-6 dB loss (in PSNR) relative to JDCT_ISLOW, but this can be
+        larger for some images.  If you can avoid it, do not use JDCT_IFAST
+        when decompressing images that were compressed using quality levels
+        above 97.  The algorithm often degenerates for such images and can
+        actually produce a more lossy output image than if the JPEG image had
+        been compressed using lower quality levels.  JDCT_FLOAT is mainly a
+        legacy feature.  It does not produce significantly more accurate
+        results than the ISLOW method, and it is much slower.  The FLOAT method
+        may also give different results on different machines due to varying
+        roundoff behavior, whereas the integer methods should give the same
+        results on all machines.
 
 boolean do_fancy_upsampling
-	If TRUE, do careful upsampling of chroma components.  If FALSE,
-	a faster but sloppier method is used.  Default is TRUE.  The visual
-	impact of the sloppier method is often very small.
+        If TRUE, do careful upsampling of chroma components.  If FALSE,
+        a faster but sloppier method is used.  Default is TRUE.  The visual
+        impact of the sloppier method is often very small.
 
 boolean do_block_smoothing
-	If TRUE, interblock smoothing is applied in early stages of decoding
-	progressive JPEG files; if FALSE, not.  Default is TRUE.  Early
-	progression stages look "fuzzy" with smoothing, "blocky" without.
-	In any case, block smoothing ceases to be applied after the first few
-	AC coefficients are known to full accuracy, so it is relevant only
-	when using buffered-image mode for progressive images.
+        If TRUE, interblock smoothing is applied in early stages of decoding
+        progressive JPEG files; if FALSE, not.  Default is TRUE.  Early
+        progression stages look "fuzzy" with smoothing, "blocky" without.
+        In any case, block smoothing ceases to be applied after the first few
+        AC coefficients are known to full accuracy, so it is relevant only
+        when using buffered-image mode for progressive images.
 
 boolean enable_1pass_quant
 boolean enable_external_quant
 boolean enable_2pass_quant
-	These are significant only in buffered-image mode, which is
-	described in its own section below.
+        These are significant only in buffered-image mode, which is
+        described in its own section below.
 
 
 The output image dimensions are given by the following fields.  These are
@@ -1160,11 +1230,11 @@
 JPEG library's memory manager to allocate output buffer space, because you
 are supposed to request such buffers *before* jpeg_start_decompress().
 
-JDIMENSION output_width		Actual dimensions of output image.
+JDIMENSION output_width         Actual dimensions of output image.
 JDIMENSION output_height
-int out_color_components	Number of color components in out_color_space.
-int output_components		Number of color components returned.
-int rec_outbuf_height		Recommended height of scanline buffer.
+int out_color_components        Number of color components in out_color_space.
+int output_components           Number of color components returned.
+int rec_outbuf_height           Recommended height of scanline buffer.
 
 When quantizing colors, output_components is 1, indicating a single color map
 index per pixel.  Otherwise it equals out_color_components.  The output arrays
@@ -1204,10 +1274,10 @@
 space depending on in_color_space, but you can override this by calling
 jpeg_set_colorspace().  Of course you must select a supported transformation.
 jccolor.c currently supports the following transformations:
-	RGB => YCbCr
-	RGB => GRAYSCALE
-	YCbCr => GRAYSCALE
-	CMYK => YCCK
+        RGB => YCbCr
+        RGB => GRAYSCALE
+        YCbCr => GRAYSCALE
+        CMYK => YCCK
 plus the null transforms: GRAYSCALE => GRAYSCALE, RGB => RGB,
 YCbCr => YCbCr, CMYK => CMYK, YCCK => YCCK, and UNKNOWN => UNKNOWN.
 
@@ -1237,10 +1307,11 @@
 selects a default output color space based on (its guess of) jpeg_color_space;
 set out_color_space to override this.  Again, you must select a supported
 transformation.  jdcolor.c currently supports
-	YCbCr => GRAYSCALE
-	YCbCr => RGB
-	GRAYSCALE => RGB
-	YCCK => CMYK
+        YCbCr => RGB
+        YCbCr => GRAYSCALE
+        RGB => GRAYSCALE
+        GRAYSCALE => RGB
+        YCCK => CMYK
 as well as the null transforms.  (Since GRAYSCALE=>RGB is provided, an
 application can force grayscale JPEGs to look like color JPEGs if it only
 wants to handle one case.)
@@ -1310,31 +1381,31 @@
 The individual methods that you might wish to override are:
 
 error_exit (j_common_ptr cinfo)
-	Receives control for a fatal error.  Information sufficient to
-	generate the error message has been stored in cinfo->err; call
-	output_message to display it.  Control must NOT return to the caller;
-	generally this routine will exit() or longjmp() somewhere.
-	Typically you would override this routine to get rid of the exit()
-	default behavior.  Note that if you continue processing, you should
-	clean up the JPEG object with jpeg_abort() or jpeg_destroy().
+        Receives control for a fatal error.  Information sufficient to
+        generate the error message has been stored in cinfo->err; call
+        output_message to display it.  Control must NOT return to the caller;
+        generally this routine will exit() or longjmp() somewhere.
+        Typically you would override this routine to get rid of the exit()
+        default behavior.  Note that if you continue processing, you should
+        clean up the JPEG object with jpeg_abort() or jpeg_destroy().
 
 output_message (j_common_ptr cinfo)
-	Actual output of any JPEG message.  Override this to send messages
-	somewhere other than stderr.  Note that this method does not know
-	how to generate a message, only where to send it.
+        Actual output of any JPEG message.  Override this to send messages
+        somewhere other than stderr.  Note that this method does not know
+        how to generate a message, only where to send it.
 
 format_message (j_common_ptr cinfo, char * buffer)
-	Constructs a readable error message string based on the error info
-	stored in cinfo->err.  This method is called by output_message.  Few
-	applications should need to override this method.  One possible
-	reason for doing so is to implement dynamic switching of error message
-	language.
+        Constructs a readable error message string based on the error info
+        stored in cinfo->err.  This method is called by output_message.  Few
+        applications should need to override this method.  One possible
+        reason for doing so is to implement dynamic switching of error message
+        language.
 
 emit_message (j_common_ptr cinfo, int msg_level)
-	Decide whether or not to emit a warning or trace message; if so,
-	calls output_message.  The main reason for overriding this method
-	would be to abort on warnings.  msg_level is -1 for warnings,
-	0 and up for trace messages.
+        Decide whether or not to emit a warning or trace message; if so,
+        calls output_message.  The main reason for overriding this method
+        would be to abort on warnings.  msg_level is -1 for warnings,
+        0 and up for trace messages.
 
 Only error_exit() and emit_message() are called from the rest of the JPEG
 library; the other two are internal to the error handler.
@@ -1357,9 +1428,9 @@
 addon messages (the addon messages are defined in cderror.h).
 
 Actual invocation of the error handler is done via macros defined in jerror.h:
-	ERREXITn(...)	for fatal errors
-	WARNMSn(...)	for corrupt-data warnings
-	TRACEMSn(...)	for trace and informational messages.
+        ERREXITn(...)   for fatal errors
+        WARNMSn(...)    for corrupt-data warnings
+        TRACEMSn(...)   for trace and informational messages.
 These macros store the message code and any additional parameters into the
 error handler struct, then invoke the error_exit() or emit_message() method.
 The variants of each macro are for varying numbers of additional parameters.
@@ -1374,21 +1445,22 @@
 
 The JPEG compression library sends its compressed data to a "destination
 manager" module.  The default destination manager just writes the data to a
-stdio stream, but you can provide your own manager to do something else.
-Similarly, the decompression library calls a "source manager" to obtain the
-compressed data; you can provide your own source manager if you want the data
-to come from somewhere other than a stdio stream.
+memory buffer or to a stdio stream, but you can provide your own manager to
+do something else.  Similarly, the decompression library calls a "source
+manager" to obtain the compressed data; you can provide your own source
+manager if you want the data to come from somewhere other than a memory
+buffer or a stdio stream.
 
 In both cases, compressed data is processed a bufferload at a time: the
 destination or source manager provides a work buffer, and the library invokes
 the manager only when the buffer is filled or emptied.  (You could define a
 one-character buffer to force the manager to be invoked for each byte, but
 that would be rather inefficient.)  The buffer's size and location are
-controlled by the manager, not by the library.  For example, if you desired to
-decompress a JPEG datastream that was all in memory, you could just make the
-buffer pointer and length point to the original data in memory.  Then the
-buffer-reload procedure would be invoked only if the decompressor ran off the
-end of the datastream, which would indicate an erroneous datastream.
+controlled by the manager, not by the library.  For example, the memory
+source manager just makes the buffer pointer and length point to the original
+data in memory.  In this case the buffer-reload procedure will be invoked
+only if the decompressor ran off the end of the datastream, which would
+indicate an erroneous datastream.
 
 The work buffer is defined as an array of datatype JOCTET, which is generally
 "char" or "unsigned char".  On a machine where char is not exactly 8 bits
@@ -1399,8 +1471,8 @@
 A data destination manager struct contains a pointer and count defining the
 next byte to write in the work buffer and the remaining free space:
 
-	JOCTET * next_output_byte;  /* => next byte to write in buffer */
-	size_t free_in_buffer;      /* # of byte spaces remaining in buffer */
+        JOCTET * next_output_byte;  /* => next byte to write in buffer */
+        size_t free_in_buffer;      /* # of byte spaces remaining in buffer */
 
 The library increments the pointer and decrements the count until the buffer
 is filled.  The manager's empty_output_buffer method must reset the pointer
@@ -1410,27 +1482,27 @@
 A data destination manager provides three methods:
 
 init_destination (j_compress_ptr cinfo)
-	Initialize destination.  This is called by jpeg_start_compress()
-	before any data is actually written.  It must initialize
-	next_output_byte and free_in_buffer.  free_in_buffer must be
-	initialized to a positive value.
+        Initialize destination.  This is called by jpeg_start_compress()
+        before any data is actually written.  It must initialize
+        next_output_byte and free_in_buffer.  free_in_buffer must be
+        initialized to a positive value.
 
 empty_output_buffer (j_compress_ptr cinfo)
-	This is called whenever the buffer has filled (free_in_buffer
-	reaches zero).  In typical applications, it should write out the
-	*entire* buffer (use the saved start address and buffer length;
-	ignore the current state of next_output_byte and free_in_buffer).
-	Then reset the pointer & count to the start of the buffer, and
-	return TRUE indicating that the buffer has been dumped.
-	free_in_buffer must be set to a positive value when TRUE is
-	returned.  A FALSE return should only be used when I/O suspension is
-	desired (this operating mode is discussed in the next section).
+        This is called whenever the buffer has filled (free_in_buffer
+        reaches zero).  In typical applications, it should write out the
+        *entire* buffer (use the saved start address and buffer length;
+        ignore the current state of next_output_byte and free_in_buffer).
+        Then reset the pointer & count to the start of the buffer, and
+        return TRUE indicating that the buffer has been dumped.
+        free_in_buffer must be set to a positive value when TRUE is
+        returned.  A FALSE return should only be used when I/O suspension is
+        desired (this operating mode is discussed in the next section).
 
 term_destination (j_compress_ptr cinfo)
-	Terminate destination --- called by jpeg_finish_compress() after all
-	data has been written.  In most applications, this must flush any
-	data remaining in the buffer.  Use either next_output_byte or
-	free_in_buffer to determine how much data is in the buffer.
+        Terminate destination --- called by jpeg_finish_compress() after all
+        data has been written.  In most applications, this must flush any
+        data remaining in the buffer.  Use either next_output_byte or
+        free_in_buffer to determine how much data is in the buffer.
 
 term_destination() is NOT called by jpeg_abort() or jpeg_destroy().  If you
 want the destination manager to be cleaned up during an abort, you must do it
@@ -1440,15 +1512,16 @@
 method pointers, and insert a pointer to the struct into the "dest" field of
 the JPEG compression object.  This can be done in-line in your setup code if
 you like, but it's probably cleaner to provide a separate routine similar to
-the jpeg_stdio_dest() routine of the supplied destination manager.
+the jpeg_stdio_dest() or jpeg_mem_dest() routines of the supplied destination
+managers.
 
 Decompression source managers follow a parallel design, but with some
 additional frammishes.  The source manager struct contains a pointer and count
 defining the next byte to read from the work buffer and the number of bytes
 remaining:
 
-	const JOCTET * next_input_byte; /* => next byte to read from buffer */
-	size_t bytes_in_buffer;         /* # of bytes remaining in buffer */
+        const JOCTET * next_input_byte; /* => next byte to read from buffer */
+        size_t bytes_in_buffer;         /* # of bytes remaining in buffer */
 
 The library increments the pointer and decrements the count until the buffer
 is emptied.  The manager's fill_input_buffer method must reset the pointer and
@@ -1458,47 +1531,47 @@
 A data source manager provides five methods:
 
 init_source (j_decompress_ptr cinfo)
-	Initialize source.  This is called by jpeg_read_header() before any
-	data is actually read.  Unlike init_destination(), it may leave
-	bytes_in_buffer set to 0 (in which case a fill_input_buffer() call
-	will occur immediately).
+        Initialize source.  This is called by jpeg_read_header() before any
+        data is actually read.  Unlike init_destination(), it may leave
+        bytes_in_buffer set to 0 (in which case a fill_input_buffer() call
+        will occur immediately).
 
 fill_input_buffer (j_decompress_ptr cinfo)
-	This is called whenever bytes_in_buffer has reached zero and more
-	data is wanted.  In typical applications, it should read fresh data
-	into the buffer (ignoring the current state of next_input_byte and
-	bytes_in_buffer), reset the pointer & count to the start of the
-	buffer, and return TRUE indicating that the buffer has been reloaded.
-	It is not necessary to fill the buffer entirely, only to obtain at
-	least one more byte.  bytes_in_buffer MUST be set to a positive value
-	if TRUE is returned.  A FALSE return should only be used when I/O
-	suspension is desired (this mode is discussed in the next section).
+        This is called whenever bytes_in_buffer has reached zero and more
+        data is wanted.  In typical applications, it should read fresh data
+        into the buffer (ignoring the current state of next_input_byte and
+        bytes_in_buffer), reset the pointer & count to the start of the
+        buffer, and return TRUE indicating that the buffer has been reloaded.
+        It is not necessary to fill the buffer entirely, only to obtain at
+        least one more byte.  bytes_in_buffer MUST be set to a positive value
+        if TRUE is returned.  A FALSE return should only be used when I/O
+        suspension is desired (this mode is discussed in the next section).
 
 skip_input_data (j_decompress_ptr cinfo, long num_bytes)
-	Skip num_bytes worth of data.  The buffer pointer and count should
-	be advanced over num_bytes input bytes, refilling the buffer as
-	needed.  This is used to skip over a potentially large amount of
-	uninteresting data (such as an APPn marker).  In some applications
-	it may be possible to optimize away the reading of the skipped data,
-	but it's not clear that being smart is worth much trouble; large
-	skips are uncommon.  bytes_in_buffer may be zero on return.
-	A zero or negative skip count should be treated as a no-op.
+        Skip num_bytes worth of data.  The buffer pointer and count should
+        be advanced over num_bytes input bytes, refilling the buffer as
+        needed.  This is used to skip over a potentially large amount of
+        uninteresting data (such as an APPn marker).  In some applications
+        it may be possible to optimize away the reading of the skipped data,
+        but it's not clear that being smart is worth much trouble; large
+        skips are uncommon.  bytes_in_buffer may be zero on return.
+        A zero or negative skip count should be treated as a no-op.
 
 resync_to_restart (j_decompress_ptr cinfo, int desired)
-	This routine is called only when the decompressor has failed to find
-	a restart (RSTn) marker where one is expected.  Its mission is to
-	find a suitable point for resuming decompression.  For most
-	applications, we recommend that you just use the default resync
-	procedure, jpeg_resync_to_restart().  However, if you are able to back
-	up in the input data stream, or if you have a-priori knowledge about
-	the likely location of restart markers, you may be able to do better.
-	Read the read_restart_marker() and jpeg_resync_to_restart() routines
-	in jdmarker.c if you think you'd like to implement your own resync
-	procedure.
+        This routine is called only when the decompressor has failed to find
+        a restart (RSTn) marker where one is expected.  Its mission is to
+        find a suitable point for resuming decompression.  For most
+        applications, we recommend that you just use the default resync
+        procedure, jpeg_resync_to_restart().  However, if you are able to back
+        up in the input data stream, or if you have a-priori knowledge about
+        the likely location of restart markers, you may be able to do better.
+        Read the read_restart_marker() and jpeg_resync_to_restart() routines
+        in jdmarker.c if you think you'd like to implement your own resync
+        procedure.
 
 term_source (j_decompress_ptr cinfo)
-	Terminate source --- called by jpeg_finish_decompress() after all
-	data has been read.  Often a no-op.
+        Terminate source --- called by jpeg_finish_decompress() after all
+        data has been read.  Often a no-op.
 
 For both fill_input_buffer() and skip_input_data(), there is no such thing
 as an EOF return.  If the end of the file has been reached, the routine has
@@ -1516,10 +1589,10 @@
 pointers, and insert a pointer to the struct into the "src" field of the JPEG
 decompression object.  This can be done in-line in your setup code if you
 like, but it's probably cleaner to provide a separate routine similar to the
-jpeg_stdio_src() routine of the supplied source manager.
+jpeg_stdio_src() or jpeg_mem_src() routines of the supplied source managers.
 
-For more information, consult the stdio source and destination managers
-in jdatasrc.c and jdatadst.c.
+For more information, consult the memory and stdio source and destination
+managers in jdatasrc.c and jdatadst.c.
 
 
 I/O suspension
@@ -1606,7 +1679,7 @@
   * jpeg_read_header(): will return JPEG_SUSPENDED.
   * jpeg_start_decompress(): will return FALSE, rather than its usual TRUE.
   * jpeg_read_scanlines(): will return the number of scanlines already
-	completed (possibly 0).
+        completed (possibly 0).
   * jpeg_finish_decompress(): will return FALSE, rather than its usual TRUE.
 The surrounding application must recognize these cases, load more data into
 the input buffer, and repeat the call.  In the case of jpeg_read_scanlines(),
@@ -1784,23 +1857,23 @@
 
 The basic control flow for buffered-image decoding is
 
-	jpeg_create_decompress()
-	set data source
-	jpeg_read_header()
-	set overall decompression parameters
-	cinfo.buffered_image = TRUE;	/* select buffered-image mode */
-	jpeg_start_decompress()
-	for (each output pass) {
-	    adjust output decompression parameters if required
-	    jpeg_start_output()		/* start a new output pass */
-	    for (all scanlines in image) {
-	        jpeg_read_scanlines()
-	        display scanlines
-	    }
-	    jpeg_finish_output()	/* terminate output pass */
-	}
-	jpeg_finish_decompress()
-	jpeg_destroy_decompress()
+        jpeg_create_decompress()
+        set data source
+        jpeg_read_header()
+        set overall decompression parameters
+        cinfo.buffered_image = TRUE;    /* select buffered-image mode */
+        jpeg_start_decompress()
+        for (each output pass) {
+            adjust output decompression parameters if required
+            jpeg_start_output()         /* start a new output pass */
+            for (all scanlines in image) {
+                jpeg_read_scanlines()
+                display scanlines
+            }
+            jpeg_finish_output()        /* terminate output pass */
+        }
+        jpeg_finish_decompress()
+        jpeg_destroy_decompress()
 
 This differs from ordinary unbuffered decoding in that there is an additional
 level of looping.  The application can choose how many output passes to make
@@ -1809,9 +1882,9 @@
 The simplest approach to displaying progressive images is to do one display
 pass for each scan appearing in the input file.  In this case the outer loop
 condition is typically
-	while (! jpeg_input_complete(&cinfo))
+        while (! jpeg_input_complete(&cinfo))
 and the start-output call should read
-	jpeg_start_output(&cinfo, cinfo.input_scan_number);
+        jpeg_start_output(&cinfo, cinfo.input_scan_number);
 The second parameter to jpeg_start_output() indicates which scan of the input
 file is to be displayed; the scans are numbered starting at 1 for this
 purpose.  (You can use a loop counter starting at 1 if you like, but using
@@ -1842,11 +1915,11 @@
 cause the library to decode input data in advance of what's needed to produce
 output.  This is done by calling the routine jpeg_consume_input().
 The return value is one of the following:
-	JPEG_REACHED_SOS:    reached an SOS marker (the start of a new scan)
-	JPEG_REACHED_EOI:    reached the EOI marker (end of image)
-	JPEG_ROW_COMPLETED:  completed reading one MCU row of compressed data
-	JPEG_SCAN_COMPLETED: completed reading last MCU row of current scan
-	JPEG_SUSPENDED:      suspended before completing any of the above
+        JPEG_REACHED_SOS:    reached an SOS marker (the start of a new scan)
+        JPEG_REACHED_EOI:    reached the EOI marker (end of image)
+        JPEG_ROW_COMPLETED:  completed reading one MCU row of compressed data
+        JPEG_SCAN_COMPLETED: completed reading last MCU row of current scan
+        JPEG_SUSPENDED:      suspended before completing any of the above
 (JPEG_SUSPENDED can occur only if a suspending data source is used.)  This
 routine can be called at any time after initializing the JPEG object.  It
 reads some additional data and returns when one of the indicated significant
@@ -1923,27 +1996,27 @@
 output pass after receiving all the data; otherwise your last display may not
 be full quality across the whole screen.  So the right outer loop logic is
 something like this:
-	do {
-	    absorb any waiting input by calling jpeg_consume_input()
-	    final_pass = jpeg_input_complete(&cinfo);
-	    adjust output decompression parameters if required
-	    jpeg_start_output(&cinfo, cinfo.input_scan_number);
-	    ...
-	    jpeg_finish_output()
-	} while (! final_pass);
+        do {
+            absorb any waiting input by calling jpeg_consume_input()
+            final_pass = jpeg_input_complete(&cinfo);
+            adjust output decompression parameters if required
+            jpeg_start_output(&cinfo, cinfo.input_scan_number);
+            ...
+            jpeg_finish_output()
+        } while (! final_pass);
 rather than quitting as soon as jpeg_input_complete() returns TRUE.  This
 arrangement makes it simple to use higher-quality decoding parameters
 for the final pass.  But if you don't want to use special parameters for
 the final pass, the right loop logic is like this:
-	for (;;) {
-	    absorb any waiting input by calling jpeg_consume_input()
-	    jpeg_start_output(&cinfo, cinfo.input_scan_number);
-	    ...
-	    jpeg_finish_output()
-	    if (jpeg_input_complete(&cinfo) &&
-	        cinfo.input_scan_number == cinfo.output_scan_number)
-	      break;
-	}
+        for (;;) {
+            absorb any waiting input by calling jpeg_consume_input()
+            jpeg_start_output(&cinfo, cinfo.input_scan_number);
+            ...
+            jpeg_finish_output()
+            if (jpeg_input_complete(&cinfo) &&
+                cinfo.input_scan_number == cinfo.output_scan_number)
+              break;
+        }
 In this case you don't need to know in advance whether an output pass is to
 be the last one, so it's not necessary to have reached EOF before starting
 the final output pass; rather, what you want to test is whether the output
@@ -2052,9 +2125,9 @@
 one(s) you intend to use before you call jpeg_start_decompress().  (If we did
 not require this, the max_memory_to_use setting would be a complete fiction.)
 You do this by setting one or more of these three cinfo fields to TRUE:
-	enable_1pass_quant		Fixed color cube colormap
-	enable_external_quant		Externally-supplied colormap
-	enable_2pass_quant		Two-pass custom colormap
+        enable_1pass_quant              Fixed color cube colormap
+        enable_external_quant           Externally-supplied colormap
+        enable_2pass_quant              Two-pass custom colormap
 All three are initialized FALSE by jpeg_read_header().  But
 jpeg_start_decompress() automatically sets TRUE the one selected by the
 current two_pass_quantize and colormap settings, so you only need to set the
@@ -2205,14 +2278,14 @@
 A sure-fire way to create matching tables-only and abbreviated image files
 is to proceed as follows:
 
-	create JPEG compression object
-	set JPEG parameters
-	set destination to tables-only file
-	jpeg_write_tables(&cinfo);
-	set destination to image file
-	jpeg_start_compress(&cinfo, FALSE);
-	write data...
-	jpeg_finish_compress(&cinfo);
+        create JPEG compression object
+        set JPEG parameters
+        set destination to tables-only file
+        jpeg_write_tables(&cinfo);
+        set destination to image file
+        jpeg_start_compress(&cinfo, FALSE);
+        write data...
+        jpeg_finish_compress(&cinfo);
 
 Since the JPEG parameters are not altered between writing the table file and
 the abbreviated image file, the same tables are sure to be used.  Of course,
@@ -2240,7 +2313,7 @@
 
     if (cinfo.quant_tbl_ptrs[n] == NULL)
       cinfo.quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) &cinfo);
-    quant_ptr = cinfo.quant_tbl_ptrs[n];	/* quant_ptr is JQUANT_TBL* */
+    quant_ptr = cinfo.quant_tbl_ptrs[n];        /* quant_ptr is JQUANT_TBL* */
     for (i = 0; i < 64; i++) {
       /* Qtable[] is desired quantization table, in natural array order */
       quant_ptr->quantval[i] = Qtable[i];
@@ -2250,7 +2323,7 @@
 
     if (cinfo.ac_huff_tbl_ptrs[n] == NULL)
       cinfo.ac_huff_tbl_ptrs[n] = jpeg_alloc_huff_table((j_common_ptr) &cinfo);
-    huff_ptr = cinfo.ac_huff_tbl_ptrs[n];	/* huff_ptr is JHUFF_TBL* */
+    huff_ptr = cinfo.ac_huff_tbl_ptrs[n];       /* huff_ptr is JHUFF_TBL* */
     for (i = 1; i <= 16; i++) {
       /* counts[i] is number of Huffman codes of length i bits, i=1..16 */
       huff_ptr->bits[i] = counts[i];
@@ -2272,15 +2345,15 @@
 FALSE to indicate that you do not require an image to be present.  Thus, the
 typical scenario is
 
-	create JPEG decompression object
-	set source to tables-only file
-	jpeg_read_header(&cinfo, FALSE);
-	set source to abbreviated image file
-	jpeg_read_header(&cinfo, TRUE);
-	set decompression parameters
-	jpeg_start_decompress(&cinfo);
-	read data...
-	jpeg_finish_decompress(&cinfo);
+        create JPEG decompression object
+        set source to tables-only file
+        jpeg_read_header(&cinfo, FALSE);
+        set source to abbreviated image file
+        jpeg_read_header(&cinfo, TRUE);
+        set decompression parameters
+        jpeg_start_decompress(&cinfo);
+        read data...
+        jpeg_finish_decompress(&cinfo);
 
 In some cases, you may want to read a file without knowing whether it contains
 an image or just tables.  In that case, pass FALSE and check the return value
@@ -2353,7 +2426,7 @@
 "JPEG_APP0 + n" for APPn.  (Actually, jpeg_write_marker will let you write
 any marker type, but we don't recommend writing any other kinds of marker.)
 For example, to write a user comment string pointed to by comment_text:
-	jpeg_write_marker(cinfo, JPEG_COM, comment_text, strlen(comment_text));
+        jpeg_write_marker(cinfo, JPEG_COM, comment_text, strlen(comment_text));
 
 If it's not convenient to store all the marker data in memory at once,
 you can instead call jpeg_write_m_header() followed by multiple calls to
@@ -2399,7 +2472,7 @@
 
 
 To save the contents of special markers in memory, call
-	jpeg_save_markers(cinfo, marker_code, length_limit)
+        jpeg_save_markers(cinfo, marker_code, length_limit)
 where marker_code is the marker type to save, JPEG_COM or JPEG_APP0+n.
 (To arrange to save all the special marker types, you need to call this
 routine 17 times, for COM and APP0-APP15.)  If the incoming marker is longer
@@ -2444,7 +2517,7 @@
 If you want to supply your own marker-reading routine, you do it by calling
 jpeg_set_marker_processor().  A marker processor routine must have the
 signature
-	boolean jpeg_marker_parser_method (j_decompress_ptr cinfo)
+        boolean jpeg_marker_parser_method (j_decompress_ptr cinfo)
 Although the marker code is not explicitly passed, the routine can find it
 in cinfo->unread_marker.  At the time of call, the marker proper has been
 read from the data source module.  The processor routine is responsible for
@@ -2531,8 +2604,8 @@
 
 The required dimensions of the supplied data can be computed for each
 component as
-	cinfo->comp_info[i].width_in_blocks*DCTSIZE  samples per row
-	cinfo->comp_info[i].height_in_blocks*DCTSIZE rows in image
+        cinfo->comp_info[i].width_in_blocks*DCTSIZE  samples per row
+        cinfo->comp_info[i].height_in_blocks*DCTSIZE rows in image
 after jpeg_start_compress() has initialized those fields.  If the valid data
 is smaller than this, it must be padded appropriately.  For some sampling
 factors and image sizes, additional dummy DCT blocks are inserted to make
@@ -2540,12 +2613,12 @@
 blocks itself; it does not read them from your supplied data.  Therefore you
 need never pad by more than DCTSIZE samples.  An example may help here.
 Assume 2h2v downsampling of YCbCr data, that is
-	cinfo->comp_info[0].h_samp_factor = 2		for Y
-	cinfo->comp_info[0].v_samp_factor = 2
-	cinfo->comp_info[1].h_samp_factor = 1		for Cb
-	cinfo->comp_info[1].v_samp_factor = 1
-	cinfo->comp_info[2].h_samp_factor = 1		for Cr
-	cinfo->comp_info[2].v_samp_factor = 1
+        cinfo->comp_info[0].h_samp_factor = 2           for Y
+        cinfo->comp_info[0].v_samp_factor = 2
+        cinfo->comp_info[1].h_samp_factor = 1           for Cb
+        cinfo->comp_info[1].v_samp_factor = 1
+        cinfo->comp_info[2].h_samp_factor = 1           for Cr
+        cinfo->comp_info[2].v_samp_factor = 1
 and suppose that the nominal image dimensions (cinfo->image_width and
 cinfo->image_height) are 101x101 pixels.  Then jpeg_start_compress() will
 compute downsampled_width = 101 and width_in_blocks = 13 for Y,
@@ -2608,7 +2681,7 @@
 component.  The return value is a pointer to an array of virtual-array
 descriptors.  Each virtual array can be accessed directly using the JPEG
 memory manager's access_virt_barray method (see Memory management, below,
-and also read structure.doc's discussion of virtual array handling).  Or,
+and also read structure.txt's discussion of virtual array handling).  Or,
 for simple transcoding to a different JPEG file format, the array list can
 just be handed directly to jpeg_write_coefficients().
 
@@ -2716,18 +2789,18 @@
 can use the same callback routine for both compression and decompression.
 
 The jpeg_progress_mgr struct contains four fields which are set by the library:
-	long pass_counter;	/* work units completed in this pass */
-	long pass_limit;	/* total number of work units in this pass */
-	int completed_passes;	/* passes completed so far */
-	int total_passes;	/* total number of passes expected */
+        long pass_counter;      /* work units completed in this pass */
+        long pass_limit;        /* total number of work units in this pass */
+        int completed_passes;   /* passes completed so far */
+        int total_passes;       /* total number of passes expected */
 During any one pass, pass_counter increases from 0 up to (not including)
 pass_limit; the step size is usually but not necessarily 1.  The pass_limit
 value may change from one pass to another.  The expected total number of
 passes is in total_passes, and the number of passes already completed is in
 completed_passes.  Thus the fraction of work completed may be estimated as
-		completed_passes + (pass_counter/pass_limit)
-		--------------------------------------------
-				total_passes
+                completed_passes + (pass_counter/pass_limit)
+                --------------------------------------------
+                                total_passes
 ignoring the fact that the passes may not be equal amounts of work.
 
 When decompressing, pass_limit can even change within a pass, because it
@@ -2752,7 +2825,7 @@
 -----------------
 
 This section covers some key facts about the JPEG library's built-in memory
-manager.  For more info, please read structure.doc's section about the memory
+manager.  For more info, please read structure.txt's section about the memory
 manager, and consult the source code if necessary.
 
 All memory and temporary file allocation within the library is done via the
@@ -2797,18 +2870,6 @@
 it's too small to be worth worrying about; so a reasonable safety margin
 should be left when setting max_memory_to_use.
 
-If you use the jmemname.c or jmemdos.c memory manager back end, it is
-important to clean up the JPEG object properly to ensure that the temporary
-files get deleted.  (This is especially crucial with jmemdos.c, where the
-"temporary files" may be extended-memory segments; if they are not freed,
-DOS will require a reboot to recover the memory.)  Thus, with these memory
-managers, it's a good idea to provide a signal handler that will trap any
-early exit from your program.  The handler should call either jpeg_abort()
-or jpeg_destroy() for any active JPEG objects.  A handler is not needed with
-jmemnobs.c, and shouldn't be necessary with jmemansi.c or jmemmac.c either,
-since the C library is supposed to take care of deleting files made with
-tmpfile().
-
 
 Memory usage
 ------------
@@ -2867,13 +2928,9 @@
 larger than a char, so it affects the surrounding application's image data.
 The sample applications cjpeg and djpeg can support 12-bit mode only for PPM
 and GIF file formats; you must disable the other file formats to compile a
-12-bit cjpeg or djpeg.  (install.doc has more information about that.)
+12-bit cjpeg or djpeg.  (install.txt has more information about that.)
 At present, a 12-bit library can handle *only* 12-bit images, not both
-precisions.  (If you need to include both 8- and 12-bit libraries in a single
-application, you could probably do it by defining NEED_SHORT_EXTERNAL_NAMES
-for just one of the copies.  You'd have to access the 8-bit and 12-bit copies
-from separate application source files.  This is untested ... if you try it,
-we'd like to hear whether it works!)
+precisions.
 
 Note that a 12-bit library always compresses in Huffman optimization mode,
 in order to generate valid Huffman tables.  This is necessary because our
@@ -2900,7 +2957,7 @@
 
 You can also save a few K by not having text error messages in the library;
 the standard error message table occupies about 5Kb.  This is particularly
-reasonable for embedded applications where there's no good way to display 
+reasonable for embedded applications where there's no good way to display
 a message anyway.  To do this, remove the creation of the message table
 (jpeg_std_message_table[]) from jerror.c, and alter format_message to do
 something reasonable without it.  You could output the numeric value of the
@@ -2918,16 +2975,15 @@
 library to be less portable than is claimed here, we'd appreciate hearing
 about them.)
 
-The code works fine on ANSI C, C++, and pre-ANSI C compilers, using any of
-the popular system include file setups, and some not-so-popular ones too.
-See install.doc for configuration procedures.
+The code works fine on ANSI C and C++ compilers, using any of the popular
+system include file setups, and some not-so-popular ones too.
 
 The code is not dependent on the exact sizes of the C data types.  As
 distributed, we make the assumptions that
-	char	is at least 8 bits wide
-	short	is at least 16 bits wide
-	int	is at least 16 bits wide
-	long	is at least 32 bits wide
+        char    is at least 8 bits wide
+        short   is at least 16 bits wide
+        int     is at least 16 bits wide
+        long    is at least 32 bits wide
 (These are the minimum requirements of the ANSI C standard.)  Wider types will
 work fine, although memory may be used inefficiently if char is much larger
 than 8 bits or short is much bigger than 16 bits.  The code should work
@@ -2955,52 +3011,5 @@
 manager "back end" module, so you can use a different memory allocator by
 replacing that one file.
 
-The code generally assumes that C names must be unique in the first 15
-characters.  However, global function names can be made unique in the
-first 6 characters by defining NEED_SHORT_EXTERNAL_NAMES.
-
-More info about porting the code may be gleaned by reading jconfig.doc,
+More info about porting the code may be gleaned by reading jconfig.txt,
 jmorecfg.h, and jinclude.h.
-
-
-Notes for MS-DOS implementors
------------------------------
-
-The IJG code is designed to work efficiently in 80x86 "small" or "medium"
-memory models (i.e., data pointers are 16 bits unless explicitly declared
-"far"; code pointers can be either size).  You may be able to use small
-model to compile cjpeg or djpeg by itself, but you will probably have to use
-medium model for any larger application.  This won't make much difference in
-performance.  You *will* take a noticeable performance hit if you use a
-large-data memory model (perhaps 10%-25%), and you should avoid "huge" model
-if at all possible.
-
-The JPEG library typically needs 2Kb-3Kb of stack space.  It will also
-malloc about 20K-30K of near heap space while executing (and lots of far
-heap, but that doesn't count in this calculation).  This figure will vary
-depending on selected operating mode, and to a lesser extent on image size.
-There is also about 5Kb-6Kb of constant data which will be allocated in the
-near data segment (about 4Kb of this is the error message table).
-Thus you have perhaps 20K available for other modules' static data and near
-heap space before you need to go to a larger memory model.  The C library's
-static data will account for several K of this, but that still leaves a good
-deal for your needs.  (If you are tight on space, you could reduce the sizes
-of the I/O buffers allocated by jdatasrc.c and jdatadst.c, say from 4K to
-1K.  Another possibility is to move the error message table to far memory;
-this should be doable with only localized hacking on jerror.c.)
-
-About 2K of the near heap space is "permanent" memory that will not be
-released until you destroy the JPEG object.  This is only an issue if you
-save a JPEG object between compression or decompression operations.
-
-Far data space may also be a tight resource when you are dealing with large
-images.  The most memory-intensive case is decompression with two-pass color
-quantization, or single-pass quantization to an externally supplied color
-map.  This requires a 128Kb color lookup table plus strip buffers amounting
-to about 40 bytes per column for typical sampling ratios (eg, about 25600
-bytes for a 640-pixel-wide image).  You may not be able to process wide
-images if you have large data structures of your own.
-
-Of course, all of these concerns vanish if you use a 32-bit flat-memory-model
-compiler, such as DJGPP or Watcom C.  We highly recommend flat model if you
-can use it; the JPEG library is significantly faster in flat model.
diff --git a/ltconfig b/ltconfig
deleted file mode 100755
index 2347e69..0000000
--- a/ltconfig
+++ /dev/null
@@ -1,1512 +0,0 @@
-#! /bin/sh
-
-# ltconfig - Create a system-specific libtool.
-# Copyright (C) 1996-1998 Free Software Foundation, Inc.
-# Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
-#
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# A lot of this script is taken from autoconf-2.10.
-
-# The HP-UX ksh and POSIX shell print the target directory to stdout
-# if CDPATH is set.
-if test "${CDPATH+set}" = set; then CDPATH=; export CDPATH; fi
-
-echo=echo
-if test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then :
-else
-  # The Solaris and AIX default echo program unquotes backslashes.
-  # This makes it impossible to quote backslashes using
-  #   echo "$something" | sed 's/\\/\\\\/g'
-  # So, we emulate echo with printf '%s\n'
-  echo="printf %s\\n"
-  if test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then :
-  else
-    # Oops.  We have no working printf.  Try to find a not-so-buggy echo.
-    echo=echo
-    IFS="${IFS= 	}"; save_ifs="$IFS"; IFS="${IFS}:"
-    for dir in $PATH /usr/ucb; do
-      if test -f $dir/echo && test "X`$dir/echo '\t'`" = 'X\t'; then
-        echo="$dir/echo"
-        break
-      fi
-    done
-    IFS="$save_ifs"
-  fi
-fi
-
-# Sed substitution that helps us do robust quoting.  It backslashifies
-# metacharacters that are still active within double-quoted strings.
-Xsed='sed -e s/^X//'
-sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g'
-
-# Same as above, but do not quote variable references.
-double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g'
-
-# The name of this program.
-progname=`$echo "X$0" | $Xsed -e 's%^.*/%%'`
-
-# Constants:
-PROGRAM=ltconfig
-PACKAGE=libtool
-VERSION=1.2
-ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.c 1>&5'
-ac_link='${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.c $LIBS 1>&5'
-rm="rm -f"
-
-help="Try \`$progname --help' for more information."
-
-# Global variables:
-can_build_shared=yes
-enable_shared=yes
-# All known linkers require a `.a' archive for static linking.
-enable_static=yes
-ltmain=
-silent=
-srcdir=
-ac_config_guess=
-ac_config_sub=
-host=
-nonopt=
-verify_host=yes
-with_gcc=no
-with_gnu_ld=no
-
-old_AR="$AR"
-old_CC="$CC"
-old_CFLAGS="$CFLAGS"
-old_CPPFLAGS="$CPPFLAGS"
-old_LD="$LD"
-old_LN_S="$LN_S"
-old_NM="$NM"
-old_RANLIB="$RANLIB"
-
-# Parse the command line options.
-args=
-prev=
-for option
-do
-  case "$option" in
-  -*=*) optarg=`echo "$option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
-  *) optarg= ;;
-  esac
-
-  # If the previous option needs an argument, assign it.
-  if test -n "$prev"; then
-    eval "$prev=\$option"
-    prev=
-    continue
-  fi
-
-  case "$option" in
-  --help) cat <<EOM
-Usage: $progname [OPTION]... LTMAIN [HOST]
-
-Generate a system-specific libtool script.
-
-    --disable-shared       do not build shared libraries
-    --disable-static       do not build static libraries
-    --help                 display this help and exit
-    --no-verify            do not verify that HOST is a valid host type
-    --quiet                same as \`--silent'
-    --silent               do not print informational messages
-    --srcdir=DIR           find \`config.guess' in DIR
-    --version              output version information and exit
-    --with-gcc             assume that the GNU C compiler will be used
-    --with-gnu-ld          assume that the C compiler uses the GNU linker
-
-LTMAIN is the \`ltmain.sh' shell script fragment that provides basic libtool
-functionality.
-
-HOST is the canonical host system name [default=guessed].
-EOM
-  exit 0
-  ;;
-
-  --disable-shared) enable_shared=no ;;
-
-  --disable-static) enable_static=no ;;
-
-  --quiet | --silent) silent=yes ;;
-
-  --srcdir) prev=srcdir ;;
-  --srcdir=*) srcdir="$optarg" ;;
-
-  --no-verify) verify_host=no ;;
-
-  --version) echo "$PROGRAM (GNU $PACKAGE) $VERSION"; exit 0 ;;
-
-  --with-gcc) with_gcc=yes ;;
-  --with-gnu-ld) with_gnu_ld=yes ;;
-
-  -*)
-    echo "$progname: unrecognized option \`$option'" 1>&2
-    echo "$help" 1>&2
-    exit 1
-    ;;
-
-  *)
-    if test -z "$ltmain"; then
-      ltmain="$option"
-    elif test -z "$host"; then
-# This generates an unnecessary warning for sparc-sun-solaris4.1.3_U1
-#      if test -n "`echo $option| sed 's/[-a-z0-9.]//g'`"; then
-#        echo "$progname: warning \`$option' is not a valid host type" 1>&2
-#      fi
-      host="$option"
-    else
-      echo "$progname: too many arguments" 1>&2
-      echo "$help" 1>&2
-      exit 1
-    fi ;;
-  esac
-done
-
-if test -z "$ltmain"; then
-  echo "$progname: you must specify a LTMAIN file" 1>&2
-  echo "$help" 1>&2
-  exit 1
-fi
-
-if test -f "$ltmain"; then :
-else
-  echo "$progname: \`$ltmain' does not exist" 1>&2
-  echo "$help" 1>&2
-  exit 1
-fi
-
-# Quote any args containing shell metacharacters.
-ltconfig_args=
-for arg
-do
-  case "$arg" in
-  *" "*|*"	"*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*)
-  ltconfig_args="$ltconfig_args '$arg'" ;;
-  *) ltconfig_args="$ltconfig_args $arg" ;;
-  esac
-done
-
-# A relevant subset of AC_INIT.
-
-# File descriptor usage:
-# 0 standard input
-# 1 file creation
-# 2 errors and warnings
-# 3 some systems may open it to /dev/tty
-# 4 used on the Kubota Titan
-# 5 compiler messages saved in config.log
-# 6 checking for... messages and results
-if test "$silent" = yes; then
-  exec 6>/dev/null
-else
-  exec 6>&1
-fi
-exec 5>>./config.log
-
-# NLS nuisances.
-# Only set LANG and LC_ALL to C if already set.
-# These must not be set unconditionally because not all systems understand
-# e.g. LANG=C (notably SCO).
-if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi
-if test "${LANG+set}"   = set; then LANG=C;   export LANG;   fi
-
-if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then
-  # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu.
-  if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then
-    ac_n= ac_c='
-' ac_t='	'
-  else
-    ac_n=-n ac_c= ac_t=
-  fi
-else
-  ac_n= ac_c='\c' ac_t=
-fi
-
-if test -z "$srcdir"; then
-  # Assume the source directory is the same one as the path to ltmain.sh.
-  srcdir=`$echo "$ltmain" | $Xsed -e 's%/[^/]*$%%'`
-  test "$srcdir" = "$ltmain" && srcdir=.
-fi
-
-trap "$rm conftest*; exit 1" 1 2 15
-if test "$verify_host" = yes; then
-  # Check for config.guess and config.sub.
-  ac_aux_dir=
-  for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
-    if test -f $ac_dir/config.guess; then
-      ac_aux_dir=$ac_dir
-      break
-    fi
-  done
-  if test -z "$ac_aux_dir"; then
-    echo "$progname: cannot find config.guess in $srcdir $srcdir/.. $srcdir/../.." 1>&2
-    echo "$help" 1>&2
-    exit 1
-  fi
-  ac_config_guess=$ac_aux_dir/config.guess
-  ac_config_sub=$ac_aux_dir/config.sub
-
-  # Make sure we can run config.sub.
-  if $ac_config_sub sun4 >/dev/null 2>&1; then :
-  else
-    echo "$progname: cannot run $ac_config_sub" 1>&2
-    echo "$help" 1>&2
-    exit 1
-  fi
-
-  echo $ac_n "checking host system type""... $ac_c" 1>&6
-
-  host_alias=$host
-  case "$host_alias" in
-  "")
-    if host_alias=`$ac_config_guess`; then :
-    else
-      echo "$progname: cannot guess host type; you must specify one" 1>&2
-      echo "$help" 1>&2
-      exit 1
-    fi ;;
-  esac
-  host=`$ac_config_sub $host_alias`
-  echo "$ac_t$host" 1>&6
-
-  # Make sure the host verified.
-  test -z "$host" && exit 1
-
-elif test -z "$host"; then
-  echo "$progname: you must specify a host type if you use \`--no-verify'" 1>&2
-  echo "$help" 1>&2
-  exit 1
-else
-  host_alias=$host
-fi
-
-# Transform linux* to *-*-linux-gnu*, to support old configure scripts.
-case "$host_os" in
-linux-gnu*) ;;
-linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'`
-esac
-
-host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'`
-host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'`
-host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'`
-
-case "$host_os" in
-aix3*)
-  # AIX sometimes has problems with the GCC collect2 program.  For some
-  # reason, if we set the COLLECT_NAMES environment variable, the problems
-  # vanish in a puff of smoke.
-  if test "${COLLECT_NAMES+set}" != set; then
-    COLLECT_NAMES=
-    export COLLECT_NAMES
-  fi
-  ;;
-esac
-
-# Determine commands to create old-style static archives.
-old_archive_cmds='$AR cru $oldlib$oldobjs'
-old_postinstall_cmds='chmod 644 $oldlib'
-old_postuninstall_cmds=
-
-# Set a sane default for `AR'.
-test -z "$AR" && AR=ar
-
-# If RANLIB is not set, then run the test.
-if test "${RANLIB+set}" != "set"; then
-  result=no
-
-  echo $ac_n "checking for ranlib... $ac_c" 1>&6
-  IFS="${IFS= 	}"; save_ifs="$IFS"; IFS="${IFS}:"
-  for dir in $PATH; do
-    test -z "$dir" && dir=.
-    if test -f $dir/ranlib; then
-      RANLIB="ranlib"
-      result="ranlib"
-      break
-    fi
-  done
-  IFS="$save_ifs"
-
-  echo "$ac_t$result" 1>&6
-fi
-
-if test -n "$RANLIB"; then
-  old_archive_cmds="$old_archive_cmds;\$RANLIB \$oldlib"
-  old_postinstall_cmds="\$RANLIB \$oldlib;$old_postinstall_cmds"
-fi
-
-# Check to see if we are using GCC.
-if test "$with_gcc" != yes || test -z "$CC"; then
-  # If CC is not set, then try to find GCC or a usable CC.
-  if test -z "$CC"; then
-    echo $ac_n "checking for gcc... $ac_c" 1>&6
-    IFS="${IFS= 	}"; save_ifs="$IFS"; IFS="${IFS}:"
-    for dir in $PATH; do
-      IFS="$save_ifs"
-      test -z "$dir" && dir=.
-      if test -f $dir/gcc; then
-	CC="gcc"
-	break
-      fi
-    done
-    IFS="$save_ifs"
-
-    if test -n "$CC"; then
-      echo "$ac_t$CC" 1>&6
-    else
-      echo "$ac_t"no 1>&6
-    fi
-  fi
-
-  # Not "gcc", so try "cc", rejecting "/usr/ucb/cc".
-  if test -z "$CC"; then
-    echo $ac_n "checking for cc... $ac_c" 1>&6
-    IFS="${IFS= 	}"; save_ifs="$IFS"; IFS="${IFS}:"
-    cc_rejected=no
-    for dir in $PATH; do
-      test -z "$dir" && dir=.
-      if test -f $dir/cc; then
-	if test "$dir/cc" = "/usr/ucb/cc"; then
-	  cc_rejected=yes
-	  continue
-	fi
-	CC="cc"
-	break
-      fi
-    done
-    IFS="$save_ifs"
-    if test $cc_rejected = yes; then
-      # We found a bogon in the path, so make sure we never use it.
-      set dummy $CC
-      shift
-      if test $# -gt 0; then
-	# We chose a different compiler from the bogus one.
-	# However, it has the same name, so the bogon will be chosen
-	# first if we set CC to just the name; use the full file name.
-	shift
-	set dummy "$dir/cc" "$@"
-	shift
-	CC="$@"
-      fi
-    fi
-
-    if test -n "$CC"; then
-      echo "$ac_t$CC" 1>&6
-    else
-      echo "$ac_t"no 1>&6
-    fi
-
-    if test -z "$CC"; then
-      echo "$progname: error: no acceptable cc found in \$PATH" 1>&2
-      exit 1
-    fi
-  fi
-
-  # Now see if the compiler is really GCC.
-  with_gcc=no
-  echo $ac_n "checking whether we are using GNU C... $ac_c" 1>&6
-  echo "$progname:424: checking whether we are using GNU C" >&5
-
-  $rm conftest.c
-  cat > conftest.c <<EOF
-#ifdef __GNUC__
-  yes;
-#endif
-EOF
-  if { ac_try='${CC-cc} -E conftest.c'; { (eval echo $progname:432: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then
-    with_gcc=yes
-  fi
-  $rm conftest.c
-  echo "$ac_t$with_gcc" 1>&6
-fi
-
-# Allow CC to be a program name with arguments.
-set dummy $CC
-compiler="$2"
-
-echo $ac_n "checking for $compiler option to produce PIC... $ac_c" 1>&6
-pic_flag=
-special_shlib_compile_flags=
-wl=
-link_static_flag=
-no_builtin_flag=
-
-if test "$with_gcc" = yes; then
-  wl='-Wl,'
-  link_static_flag='-static'
-  no_builtin_flag=' -fno-builtin'
-
-  case "$host_os" in
-  aix3* | aix4* | irix5* | irix6* | osf3* | osf4*)
-    # PIC is the default for these OSes.
-    ;;
-  os2*)
-    # We can build DLLs from non-PIC.
-    ;;
-  amigaos*)
-    # FIXME: we need at least 68020 code to build shared libraries, but
-    # adding the `-m68020' flag to GCC prevents building anything better,
-    # like `-m68040'.
-    pic_flag='-m68020 -resident32 -malways-restore-a4'
-    ;;
-  *)
-    pic_flag='-fPIC'
-    ;;
-  esac
-else
-  # PORTME Check for PIC flags for the system compiler.
-  case "$host_os" in
-  aix3* | aix4*)
-    # All AIX code is PIC.
-    link_static_flag='-bnso -bI:/lib/syscalls.exp'
-    ;;
-
-  hpux9* | hpux10*)
-    # Is there a better link_static_flag that works with the bundled CC?
-    wl='-Wl,'
-    link_static_flag="${wl}-a ${wl}archive"
-    pic_flag='+Z'
-    ;;
-
-  irix5* | irix6*)
-    wl='-Wl,'
-    link_static_flag='-non_shared'
-    # PIC (with -KPIC) is the default.
-    ;;
-
-  os2*)
-    # We can build DLLs from non-PIC.
-    ;;
-
-  osf3* | osf4*)
-    # All OSF/1 code is PIC.
-    wl='-Wl,'
-    link_static_flag='-non_shared'
-    ;;
-
-  sco3.2v5*)
-    pic_flag='-Kpic'
-    link_static_flag='-dn'
-    special_shlib_compile_flags='-belf'
-    ;;
-
-  solaris2*)
-    pic_flag='-KPIC'
-    link_static_flag='-Bstatic'
-    wl='-Wl,'
-    ;;
-
-  sunos4*)
-    pic_flag='-PIC'
-    link_static_flag='-Bstatic'
-    wl='-Qoption ld '
-    ;;
-
-  sysv4.2uw2*)
-    pic_flag='-KPIC'
-    link_static_flag='-Bstatic'
-    wl='-Wl,'
-    ;;
-
-  uts4*)
-    pic_flag='-pic'
-    link_static_flag='-Bstatic'
-    ;;
-
-  *)
-    can_build_shared=no
-    ;;
-  esac
-fi
-
-if test -n "$pic_flag"; then
-  echo "$ac_t$pic_flag" 1>&6
-
-  # Check to make sure the pic_flag actually works.
-  echo $ac_n "checking if $compiler PIC flag $pic_flag works... $ac_c" 1>&6
-  $rm conftest*
-  echo > conftest.c
-  save_CFLAGS="$CFLAGS"
-  CFLAGS="$CFLAGS $pic_flag -DPIC"
-  echo "$progname:547: checking if $compiler PIC flag $pic_flag works" >&5
-  if { (eval echo $progname:548: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>conftest.err; } && test -s conftest.o; then
-    # Append any warnings to the config.log.
-    cat conftest.err 1>&5
-
-    # On HP-UX, both CC and GCC only warn that PIC is supported... then they
-    # create non-PIC objects.  So, if there were any warnings, we assume that
-    # PIC is not supported.
-    if test -s conftest.err; then
-      echo "$ac_t"no 1>&6
-      can_build_shared=no
-      pic_flag=
-    else
-      echo "$ac_t"yes 1>&6
-      pic_flag=" $pic_flag"
-    fi
-  else
-    # Append any errors to the config.log.
-    cat conftest.err 1>&5
-    can_build_shared=no
-    pic_flag=
-    echo "$ac_t"no 1>&6
-  fi
-  CFLAGS="$save_CFLAGS"
-  $rm conftest*
-else
-  echo "$ac_t"none 1>&6
-fi
-
-# Check for any special shared library compilation flags.
-if test -n "$special_shlib_compile_flags"; then
-  echo "$progname: warning: \`$CC' requires \`$special_shlib_compile_flags' to build shared libraries" 1>&2
-  if echo "$old_CC $old_CFLAGS " | egrep -e "[ 	]$special_shlib_compile_flags[ 	]" >/dev/null; then :
-  else
-    echo "$progname: add \`$special_shlib_compile_flags' to the CC or CFLAGS env variable and reconfigure" 1>&2
-    can_build_shared=no
-  fi
-fi
-
-echo $ac_n "checking if $compiler static flag $link_static_flag works... $ac_c" 1>&6
-$rm conftest*
-echo 'main(){return(0);}' > conftest.c
-save_LDFLAGS="$LDFLAGS"
-LDFLAGS="$LDFLAGS $link_static_flag"
-echo "$progname:591: checking if $compiler static flag $link_static_flag works" >&5
-if { (eval echo $progname:592: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
-  echo "$ac_t$link_static_flag" 1>&6
-else
-  echo "$ac_t"none 1>&6
-  link_static_flag=
-fi
-LDFLAGS="$save_LDFLAGS"
-$rm conftest*
-
-if test -z "$LN_S"; then
-  # Check to see if we can use ln -s, or we need hard links.
-  echo $ac_n "checking whether ln -s works... $ac_c" 1>&6
-  $rm conftestdata
-  if ln -s X conftestdata 2>/dev/null; then
-    $rm conftestdata
-    LN_S="ln -s"
-  else
-    LN_S=ln
-  fi
-  if test "$LN_S" = "ln -s"; then
-    echo "$ac_t"yes 1>&6
-  else
-    echo "$ac_t"no 1>&6
-  fi
-fi
-
-# Make sure LD is an absolute path.
-if test -z "$LD"; then
-  ac_prog=ld
-  if test "$with_gcc" = yes; then
-    # Check if gcc -print-prog-name=ld gives a path.
-    echo $ac_n "checking for ld used by GCC... $ac_c" 1>&6
-    echo "$progname:624: checking for ld used by GCC" >&5
-    ac_prog=`($CC -print-prog-name=ld) 2>&5`
-    case "$ac_prog" in
-    # Accept absolute paths.
-    /* | [A-Za-z]:\\*)
-      test -z "$LD" && LD="$ac_prog"
-      ;;
-    "")
-      # If it fails, then pretend we are not using GCC.
-      ac_prog=ld
-      ;;
-    *)
-      # If it is relative, then search for the first ld in PATH.
-      with_gnu_ld=unknown
-      ;;
-    esac
-  elif test "$with_gnu_ld" = yes; then
-    echo $ac_n "checking for GNU ld... $ac_c" 1>&6
-    echo "$progname:642: checking for GNU ld" >&5
-  else
-    echo $ac_n "checking for non-GNU ld""... $ac_c" 1>&6
-    echo "$progname:645: checking for non-GNU ld" >&5
-  fi
-
-  if test -z "$LD"; then
-    IFS="${IFS= 	}"; ac_save_ifs="$IFS"; IFS="${IFS}:"
-    for ac_dir in $PATH; do
-      test -z "$ac_dir" && ac_dir=.
-      if test -f "$ac_dir/$ac_prog"; then
-	LD="$ac_dir/$ac_prog"
-	# Check to see if the program is GNU ld.  I'd rather use --version,
-	# but apparently some GNU ld's only accept -v.
-	# Break only if it was the GNU/non-GNU ld that we prefer.
-	if "$LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then
-	  test "$with_gnu_ld" != no && break
-	else
-	  test "$with_gnu_ld" != yes && break
-	fi
-      fi
-    done
-    IFS="$ac_save_ifs"
-  fi
-
-  if test -n "$LD"; then
-    echo "$ac_t$LD" 1>&6
-  else
-    echo "$ac_t"no 1>&6
-  fi
-
-  if test -z "$LD"; then
-    echo "$progname: error: no acceptable ld found in \$PATH" 1>&2
-    exit 1
-  fi
-fi
-
-# Check to see if it really is or is not GNU ld.
-echo $ac_n "checking if the linker ($LD) is GNU ld... $ac_c" 1>&6
-# I'd rather use --version here, but apparently some GNU ld's only accept -v.
-if $LD -v 2>&1 </dev/null | egrep '(GNU|with BFD)' 1>&5; then
-  with_gnu_ld=yes
-else
-  with_gnu_ld=no
-fi
-echo "$ac_t$with_gnu_ld" 1>&6
-
-# See if the linker supports building shared libraries.
-echo $ac_n "checking whether the linker ($LD) supports shared libraries... $ac_c" 1>&6
-
-allow_undefined_flag=
-no_undefined_flag=
-archive_cmds=
-old_archive_from_new_cmds=
-export_dynamic_flag_spec=
-hardcode_libdir_flag_spec=
-hardcode_libdir_separator=
-hardcode_direct=no
-hardcode_minus_L=no
-hardcode_shlibpath_var=unsupported
-runpath_var=
-
-case "$host_os" in
-amigaos* | sunos4*)
-  # On these operating systems, we should treat GNU ld like the system ld.
-  gnu_ld_acts_native=yes
-  ;;
-*)
-  gnu_ld_acts_native=no
-  ;;
-esac
-
-ld_shlibs=yes
-if test "$with_gnu_ld" = yes && test "$gnu_ld_acts_native" != yes; then
-
-  # See if GNU ld supports shared libraries.
-  if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then
-    archive_cmds='$CC -shared ${wl}-soname $wl$soname -o $lib$libobjs'
-    runpath_var=LD_RUN_PATH
-    ld_shlibs=yes
-  else
-    ld_shlibs=no
-  fi
-
-  if test "$ld_shlibs" = yes; then
-    hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir'
-    export_dynamic_flag_spec='${wl}--export-dynamic'
-  fi
-else
-  # PORTME fill in a description of your system's linker (not GNU ld)
-  case "$host_os" in
-  aix3*)
-    allow_undefined_flag=unsupported
-    archive_cmds='$NM$libobjs | $global_symbol_pipe | sed '\''s/.* //'\'' > $lib.exp;$LD -o $objdir/$soname$libobjs -bE:$lib.exp -T512 -H512 -bM:SRE;$AR cru $lib $objdir/$soname'
-    # Note: this linker hardcodes the directories in LIBPATH if there
-    # are no directories specified by -L.
-    hardcode_minus_L=yes
-    if test "$with_gcc" = yes && test -z "$link_static_flag"; then
-      # Neither direct hardcoding nor static linking is supported with a
-      # broken collect2.
-      hardcode_direct=unsupported
-    fi
-    ;;
-
-  aix4*)
-    allow_undefined_flag=unsupported
-    archive_cmds='$NM$libobjs | $global_symbol_pipe | sed '\''s/.* //'\'' > $lib.exp;$CC -o $objdir/$soname$libobjs ${wl}-bE:$lib.exp ${wl}-bM:SRE ${wl}-bnoentry;$AR cru $lib $objdir/$soname'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    ;;
-
-  amigaos*)
-    archive_cmds='$rm $objdir/a2ixlibrary.data;$echo "#define NAME $libname" > $objdir/a2ixlibrary.data;$echo "#define LIBRARY_ID 1" >> $objdir/a2ixlibrary.data;$echo "#define VERSION $major" >> $objdir/a2ixlibrary.data;$echo "#define REVISION $revision" >> $objdir/a2ixlibrary.data;$AR cru $lib$libobjs;$RANLIB $lib;(cd $objdir && a2ixlibrary -32)'
-    hardcode_libdir_flag_spec='-L$libdir'
-    hardcode_minus_L=yes
-    ;;
-
-  # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
-  # support.  Future versions do this automatically, but an explicit c++rt0.o
-  # does not break anything, and helps significantly (at the cost of a little
-  # extra space).
-  freebsd2.2*)
-    archive_cmds='$LD -Bshareable -o $lib$libobjs /usr/lib/c++rt0.o'
-    hardcode_libdir_flag_spec='-R$libdir'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    hardcode_shlibpath_var=no
-    ;;
-
-  # Unfortunately, older versions of FreeBSD 2 do not have this feature.
-  freebsd2*)
-    archive_cmds='$LD -Bshareable -o $lib$libobjs'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    hardcode_shlibpath_var=no
-    ;;
-
-  # FreeBSD 3, at last, uses gcc -shared to do shared libraries.
-  freebsd3*)
-    archive_cmds='$CC -shared -o $lib$libobjs'
-    hardcode_libdir_flag_spec='-R$libdir'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    hardcode_shlibpath_var=no
-    ;;
-
-  hpux9*)
-    archive_cmds='$rm $objdir/$soname;$LD -b +s +b $install_libdir -o $objdir/$soname$libobjs;mv $objdir/$soname $lib'
-    hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    export_dynamic_flag_spec='${wl}-E'
-    ;;
-
-  hpux10*)
-    archive_cmds='$LD -b +h $soname +s +b $install_libdir -o $lib$libobjs'
-    hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    export_dynamic_flag_spec='${wl}-E'
-    ;;
-
-  irix5* | irix6*)
-    archive_cmds='$LD -shared -o $lib -soname $soname -set_version $verstring$libobjs'
-    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
-    ;;
-
-  netbsd*)
-    # Tested with NetBSD 1.2 ld
-    archive_cmds='$LD -Bshareable -o $lib$libobjs'
-    hardcode_libdir_flag_spec='-R$libdir'
-    hardcode_direct=yes
-    hardcode_shlibpath_var=no
-    ;;
-
-  openbsd*)
-    archive_cmds='$LD -Bshareable -o $lib$libobjs'
-    hardcode_libdir_flag_spec='-R$libdir'
-    hardcode_direct=yes
-    hardcode_shlibpath_var=no
-    ;;
-
-  os2*)
-    hardcode_libdir_flag_spec='-L$libdir'
-    hardcode_minus_L=yes
-    allow_undefined_flag=unsupported
-    archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $objdir/$libname.def;$echo "DESCRIPTION \"$libname\"" >> $objdir/$libname.def;$echo DATA >> $objdir/$libname.def;$echo " SINGLE NONSHARED" >> $objdir/$libname.def;$echo EXPORTS >> $objdir/$libname.def;emxexp$libobjs >> $objdir/$libname.def;$CC -Zdll -Zcrtdll -o $lib$libobjs $objdir/$libname.def'
-    old_archive_from_new_cmds='emximp -o $objdir/$libname.a $objdir/$libname.def'
-    ;;
-
-  osf3* | osf4*)
-    allow_undefined_flag=' -expect_unresolved \*'
-    archive_cmds='$LD -shared${allow_undefined_flag} -o $lib -soname $soname -set_version $verstring$libobjs$deplibs'
-    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
-    hardcode_libdir_separator=:
-    ;;
-
-  sco3.2v5*)
-    archive_cmds='$LD -G -o $lib$libobjs'
-    hardcode_direct=yes
-    ;;
-
-  solaris2*)
-    no_undefined_flag=' -z text'
-    archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib$libobjs'
-    hardcode_libdir_flag_spec='-R$libdir'
-    hardcode_shlibpath_var=no
-
-    # Solaris 2 before 2.5 hardcodes -L paths.
-    case "$host_os" in
-    solaris2.[0-4]*)
-      hardcode_minus_L=yes
-      ;;
-    esac
-    ;;
-
-  sunos4*)
-    if test "$with_gcc" = yes; then
-      archive_cmds='$CC -shared -o $lib$libobjs'
-    else
-      archive_cmds='$LD -assert pure-text -Bstatic -o $lib$libobjs'
-    fi
-
-    if test "$with_gnu_ld" = yes; then
-      export_dynamic_flag_spec='${wl}-export-dynamic'
-    fi
-    hardcode_libdir_flag_spec='-L$libdir'
-    hardcode_direct=yes
-    hardcode_minus_L=yes
-    hardcode_shlibpath_var=no
-    ;;
-
-  uts4*)
-    archive_cmds='$LD -G -h $soname -o $lib$libobjs'
-    hardcode_libdir_flag_spec='-L$libdir'
-    hardcode_direct=no
-    hardcode_minus_L=no
-    hardcode_shlibpath_var=no
-    ;;
-
-  *)
-    ld_shlibs=no
-    can_build_shared=no
-    ;;
-  esac
-fi
-echo "$ac_t$ld_shlibs" 1>&6
-
-if test -z "$NM"; then
-  echo $ac_n "checking for BSD-compatible nm... $ac_c" 1>&6
-  case "$NM" in
-  /* | [A-Za-z]:\\*) ;; # Let the user override the test with a path.
-  *)
-    IFS="${IFS= 	}"; ac_save_ifs="$IFS"; IFS="${IFS}:"
-    for ac_dir in /usr/ucb /usr/ccs/bin $PATH /bin; do
-      test -z "$ac_dir" && ac_dir=.
-      if test -f $ac_dir/nm; then
-        # Check to see if the nm accepts a BSD-compat flag.
-        # Adding the `sed 1q' prevents false positives on HP-UX, which says:
-        #   nm: unknown option "B" ignored
-        if ($ac_dir/nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
-          NM="$ac_dir/nm -B"
-        elif ($ac_dir/nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then
-          NM="$ac_dir/nm -p"
-	else
-          NM="$ac_dir/nm"
-	fi
-        break
-      fi
-    done
-    IFS="$ac_save_ifs"
-    test -z "$NM" && NM=nm
-    ;;
-  esac
-  echo "$ac_t$NM" 1>&6
-fi
-
-# Check for command to grab the raw symbol name followed by C symbol from nm.
-echo $ac_n "checking command to parse $NM output... $ac_c" 1>&6
-
-# These are sane defaults that work on at least a few old systems.
-# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
-
-# Character class describing NM global symbol codes.
-symcode='[BCDEGRSTU]'
-
-# Regexp to match symbols that can be accessed directly from C.
-sympat='\([_A-Za-z][_A-Za-z0-9]*\)'
-
-# Transform the above into a raw symbol and a C symbol.
-symxfrm='\1 \1'
-
-# Define system-specific variables.
-case "$host_os" in
-aix*)
-  symcode='[BCDTU]'
-  ;;
-irix*)
-  # Cannot use undefined symbols on IRIX because inlined functions mess us up.
-  symcode='[BCDEGRST]'
-  ;;
-solaris2*)
-  symcode='[BDTU]'
-  ;;
-esac
-
-# If we're using GNU nm, then use its standard symbol codes.
-if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then
-  symcode='[ABCDGISTUW]'
-fi
-
-# Write the raw and C identifiers.
-global_symbol_pipe="sed -n -e 's/^.* $symcode $sympat$/$symxfrm/p'"
-
-# Check to see that the pipe works correctly.
-pipe_works=no
-$rm conftest*
-cat > conftest.c <<EOF
-#ifdef __cplusplus
-extern "C" {
-#endif
-char nm_test_var;
-void nm_test_func(){}
-#ifdef __cplusplus
-}
-#endif
-main(){nm_test_var='a';nm_test_func();return(0);}
-EOF
-
-echo "$progname:971: checking if global_symbol_pipe works" >&5
-if { (eval echo $progname:972: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; } && test -s conftest.o; then
-  # Now try to grab the symbols.
-  nlist=conftest.nm
-  if { echo "$progname:975: eval \"$NM conftest.o | $global_symbol_pipe > $nlist\"" >&5; eval "$NM conftest.o | $global_symbol_pipe > $nlist 2>&5"; } && test -s "$nlist"; then
-
-    # Try sorting and uniquifying the output.
-    if sort "$nlist" | uniq > "$nlist"T; then
-      mv -f "$nlist"T "$nlist"
-      wcout=`wc "$nlist" 2>/dev/null`
-      count=`$echo "X$wcout" | $Xsed -e 's/^[ 	]*\([0-9][0-9]*\).*$/\1/'`
-      (test "$count" -ge 0) 2>/dev/null || count=-1
-    else
-      rm -f "$nlist"T
-      count=-1
-    fi
-
-    # Make sure that we snagged all the symbols we need.
-    if egrep ' nm_test_var$' "$nlist" >/dev/null; then
-      if egrep ' nm_test_func$' "$nlist" >/dev/null; then
-	cat <<EOF > conftest.c
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-EOF
-        # Now generate the symbol file.
-        sed 's/^.* \(.*\)$/extern char \1;/' < "$nlist" >> conftest.c
-
-	cat <<EOF >> conftest.c
-#if defined (__STDC__) && __STDC__
-# define __ptr_t void *
-#else
-# define __ptr_t char *
-#endif
-
-/* The number of symbols in dld_preloaded_symbols, -1 if unsorted. */
-int dld_preloaded_symbol_count = $count;
-
-/* The mapping between symbol names and symbols. */
-struct {
-  char *name;
-  __ptr_t address;
-}
-dld_preloaded_symbols[] =
-{
-EOF
-        sed 's/^\(.*\) \(.*\)$/  {"\1", (__ptr_t) \&\2},/' < "$nlist" >> conftest.c
-        cat <<\EOF >> conftest.c
-  {0, (__ptr_t) 0}
-};
-
-#ifdef __cplusplus
-}
-#endif
-EOF
-        # Now try linking the two files.
-        mv conftest.o conftestm.o
-	save_LIBS="$LIBS"
-	save_CFLAGS="$CFLAGS"
-        LIBS='conftestm.o'
-	CFLAGS="$CFLAGS$no_builtin_flag"
-        if { (eval echo $progname:1033: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest; then
-          pipe_works=yes
-        else
-          echo "$progname: failed program was:" >&5
-          cat conftest.c >&5
-        fi
-        LIBS="$save_LIBS"
-      else
-        echo "cannot find nm_test_func in $nlist" >&5
-      fi
-    else
-      echo "cannot find nm_test_var in $nlist" >&5
-    fi
-  else
-    echo "cannot run $global_symbol_pipe" >&5
-  fi
-else
-  echo "$progname: failed program was:" >&5
-  cat conftest.c >&5
-fi
-$rm conftest*
-
-# Do not use the global_symbol_pipe unless it works.
-echo "$ac_t$pipe_works" 1>&6
-test "$pipe_works" = yes || global_symbol_pipe=
-
-# Check hardcoding attributes.
-echo $ac_n "checking how to hardcode library paths into programs... $ac_c" 1>&6
-hardcode_action=
-if test -n "$hardcode_libdir_flag_spec" || \
-   test -n "$runpath_var"; then
-
-  # We can hardcode non-existant directories.
-  if test "$hardcode_direct" != no && \
-     test "$hardcode_minus_L" != no && \
-     test "$hardcode_shlibpath_var" != no; then
-
-    # Linking always hardcodes the temporary library directory.
-    hardcode_action=relink
-  else
-    # We can link without hardcoding, and we can hardcode nonexisting dirs.
-    hardcode_action=immediate
-  fi
-elif test "$hardcode_direct" != yes && \
-     test "$hardcode_minus_L" != yes && \
-     test "$hardcode_shlibpath_var" != yes; then
-  # We cannot hardcode anything.
-  hardcode_action=unsupported
-else
-  # We can only hardcode existing directories.
-  hardcode_action=relink
-fi
-echo "$ac_t$hardcode_action" 1>&6
-test "$hardcode_action" = unsupported && can_build_shared=no
-
-
-reload_flag=
-reload_cmds='$LD$reload_flag -o $output$reload_objs'
-echo $ac_n "checking for $LD option to reload object files... $ac_c" 1>&6
-# PORTME Some linker may need a different reload flag.
-reload_flag='-r'
-echo "$ac_t$reload_flag"
-test -n "$reload_flag" && reload_flag=" $reload_flag"
-
-# PORTME Fill in your ld.so characteristics
-library_names_spec=
-libname_spec='lib$name'
-soname_spec=
-postinstall_cmds=
-postuninstall_cmds=
-finish_cmds=
-finish_eval=
-shlibpath_var=
-version_type=none
-dynamic_linker="$host_os ld.so"
-
-echo $ac_n "checking dynamic linker characteristics... $ac_c" 1>&6
-case "$host_os" in
-aix3* | aix4*)
-  version_type=linux
-  library_names_spec='${libname}${release}.so.$versuffix $libname.a'
-  shlibpath_var=LIBPATH
-
-  # AIX has no versioning support, so we append a major version to the name.
-  soname_spec='${libname}${release}.so.$major'
-  ;;
-
-amigaos*)
-  library_names_spec='$libname.ixlibrary $libname.a'
-  # Create ${libname}_ixlibrary.a entries in /sys/libs.
-  finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done'
-  ;;
-
-freebsd2* | freebsd3*)
-  version_type=sunos
-  library_names_spec='${libname}${release}.so.$versuffix $libname.so'
-  finish_cmds='PATH="$PATH:/sbin" ldconfig -m $libdir'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-gnu*)
-  version_type=sunos
-  library_names_spec='${libname}${release}.so.$versuffix'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-hpux9* | hpux10*)
-  # Give a soname corresponding to the major version so that dld.sl refuses to
-  # link against other versions.
-  dynamic_linker="$host_os dld.sl"
-  version_type=sunos
-  shlibpath_var=SHLIB_PATH
-  library_names_spec='${libname}${release}.sl.$versuffix ${libname}${release}.sl.$major $libname.sl'
-  soname_spec='${libname}${release}.sl.$major'
-  # HP-UX runs *really* slowly unless shared libraries are mode 555.
-  postinstall_cmds='chmod 555 $lib'
-  ;;
-
-irix5* | irix6*)
-  version_type=osf
-  soname_spec='${libname}${release}.so'
-  library_names_spec='${libname}${release}.so.$versuffix $libname.so'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-# No shared lib support for Linux oldld, aout, or coff.
-linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*)
-  dynamic_linker=no
-  ;;
-
-# This must be Linux ELF.
-linux-gnu*)
-  version_type=linux
-  library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so'
-  soname_spec='${libname}${release}.so.$major'
-  finish_cmds='PATH="$PATH:/sbin" ldconfig -n $libdir'
-  shlibpath_var=LD_LIBRARY_PATH
-
-  if test -f /lib/ld.so.1; then
-    dynamic_linker='GNU ld.so'
-  else
-    # Only the GNU ld.so supports shared libraries on MkLinux.
-    case "$host_cpu" in
-    powerpc*) dynamic_linker=no ;;
-    *) dynamic_linker='Linux ld.so' ;;
-    esac
-  fi
-  ;;
-
-netbsd* | openbsd*)
-  version_type=sunos
-  library_names_spec='${libname}${release}.so.$versuffix'
-  finish_cmds='PATH="$PATH:/sbin" ldconfig -m $libdir'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-os2*)
-  libname_spec='$name'
-  library_names_spec='$libname.dll $libname.a'
-  dynamic_linker='OS/2 ld.exe'
-  shlibpath_var=LIBPATH
-  ;;
-
-osf3* | osf4*)
-  version_type=osf
-  soname_spec='${libname}${release}.so'
-  library_names_spec='${libname}${release}.so.$versuffix $libname.so'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-sco3.2v5*)
-  version_type=osf
-  soname_spec='${libname}${release}.so.$major'
-  library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-solaris2*)
-  version_type=linux
-  library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so'
-  soname_spec='${libname}${release}.so.$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-sunos4*)
-  version_type=sunos
-  library_names_spec='${libname}${release}.so.$versuffix'
-  finish_cmds='PATH="$PATH:/usr/etc" ldconfig $libdir'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-sysv4.2uw2*)
-  version_type=linux
-  library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so'
-  soname_spec='${libname}${release}.so.$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-uts4*)
-  version_type=linux
-  library_names_spec='${libname}${release}.so.$versuffix ${libname}${release}.so.$major $libname.so'
-  soname_spec='${libname}${release}.so.$major'
-  shlibpath_var=LD_LIBRARY_PATH
-  ;;
-
-*)
-  dynamic_linker=no
-  ;;
-esac
-echo "$ac_t$dynamic_linker"
-test "$dynamic_linker" = no && can_build_shared=no
-
-# Report the final consequences.
-echo "checking if libtool supports shared libraries... $can_build_shared" 1>&6
-
-echo $ac_n "checking whether to build shared libraries... $ac_c" 1>&6
-test "$can_build_shared" = "no" && enable_shared=no
-
-# On AIX, shared libraries and static libraries use the same namespace, and
-# are all built from PIC.
-case "$host_os" in
-aix*)
-  test "$enable_shared" = yes && enable_static=no
-  if test -n "$RANLIB"; then
-    archive_cmds="$archive_cmds;\$RANLIB \$lib"
-    postinstall_cmds='$RANLIB $lib'
-  fi
-  ;;
-esac
-
-echo "$ac_t$enable_shared" 1>&6
-
-# Make sure either enable_shared or enable_static is yes.
-test "$enable_shared" = yes || enable_static=yes
-
-echo "checking whether to build static libraries... $enable_static" 1>&6
-
-echo $ac_n "checking for objdir... $ac_c" 1>&6
-rm -f .libs 2>/dev/null
-mkdir .libs 2>/dev/null
-if test -d .libs; then
-  objdir=.libs
-else
-  # MS-DOS does not allow filenames that begin with a dot.
-  objdir=_libs
-fi
-rmdir .libs 2>/dev/null
-echo "$ac_t$objdir" 1>&6
-
-# Copy echo and quote the copy, instead of the original, because it is
-# used later.
-ltecho="$echo"
-
-# Now quote all the things that may contain metacharacters.
-for var in ltecho old_CC old_CFLAGS old_CPPFLAGS old_LD old_NM old_RANLIB \
-  old_LN_S AR CC LD LN_S NM reload_flag reload_cmds wl pic_flag \
-  link_static_flag no_builtin_flag export_dynamic_flag_spec \
-  libname_spec library_names_spec soname_spec RANLIB \
-  old_archive_cmds old_archive_from_new_cmds old_postinstall_cmds \
-  old_postuninstall_cmds archive_cmds postinstall_cmds postuninstall_cmds \
-  allow_undefined_flag no_undefined_flag \
-  finish_cmds finish_eval global_symbol_pipe \
-  hardcode_libdir_flag_spec hardcode_libdir_separator; do
-
-  case "$var" in
-  reload_cmds | old_archive_cmds | old_archive_from_new_cmds | \
-  old_postinstall_cmds | old_postuninstall_cmds | archive_cmds | \
-  postinstall_cmds | postuninstall_cmds | finish_cmds)
-    # Double-quote double-evaled strings.
-    eval "$var=\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\"\`"
-    ;;
-  *)
-    eval "$var=\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`"
-    ;;
-  esac
-done
-
-ofile=libtool
-trap "$rm $ofile; exit 1" 1 2 15
-echo creating $ofile
-$rm $ofile
-cat <<EOF > $ofile
-#! /bin/sh
-
-# libtool - Provide generalized library-building support services.
-# Generated automatically by $PROGRAM - GNU $PACKAGE $VERSION
-# NOTE: Changes made to this file will be lost: look at ltconfig or ltmain.sh.
-#
-# Copyright (C) 1996-1998 Free Software Foundation, Inc.
-# Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# This program was configured as follows,
-# on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
-#
-# CC="$old_CC" CFLAGS="$old_CFLAGS" CPPFLAGS="$old_CPPFLAGS" \\
-# LD="$old_LD" NM="$old_NM" RANLIB="$old_RANLIB" LN_S="$old_LN_S" \\
-#   $0$ltconfig_args
-#
-# Compiler and other test output produced by $progname, useful for
-# debugging $progname, is in ./config.log if it exists.
-
-# Sed that helps us avoid accidentally triggering echo(1) options like -n.
-Xsed="sed -e s/^X//"
-
-# The HP-UX ksh and POSIX shell print the target directory to stdout
-# if CDPATH is set.
-if test "\${CDPATH+set}" = set; then CDPATH=; export CDPATH; fi
-
-# An echo program that does not interpret backslashes.
-echo="$ltecho"
-
-# The version of $progname that generated this script.
-LTCONFIG_VERSION="$VERSION"
-
-# Shell to use when invoking shell scripts.
-SHELL=${CONFIG_SHELL-/bin/sh}
-
-# Whether or not to build libtool libraries.
-build_libtool_libs=$enable_shared
-
-# Whether or not to build old-style libraries.
-build_old_libs=$enable_static
-
-# The host system.
-host_alias="$host_alias"
-host="$host"
-
-# The archiver.
-AR="$AR"
-
-# The default C compiler.
-CC="$CC"
-
-# The linker used to build libraries.
-LD="$LD"
-
-# Whether we need hard or soft links.
-LN_S="$LN_S"
-
-# A BSD-compatible nm program.
-NM="$NM"
-
-# The name of the directory that contains temporary libtool files.
-objdir="$objdir"
-
-# How to create reloadable object files.
-reload_flag="$reload_flag"
-reload_cmds="$reload_cmds"
-
-# How to pass a linker flag through the compiler.
-wl="$wl"
-
-# Additional compiler flags for building library objects.
-pic_flag="$pic_flag"
-
-# Compiler flag to prevent dynamic linking.
-link_static_flag="$link_static_flag"
-
-# Compiler flag to turn off builtin functions.
-no_builtin_flag="$no_builtin_flag"
-
-# Compiler flag to allow reflexive dlopens.
-export_dynamic_flag_spec="$export_dynamic_flag_spec"
-
-# Library versioning type.
-version_type=$version_type
-
-# Format of library name prefix.
-libname_spec="$libname_spec"
-
-# List of archive names.  First name is the real one, the rest are links.
-# The last name is the one that the linker finds with -lNAME.
-library_names_spec="$library_names_spec"
-
-# The coded name of the library, if different from the real name.
-soname_spec="$soname_spec"
-
-# Commands used to build and install an old-style archive.
-RANLIB="$RANLIB"
-old_archive_cmds="$old_archive_cmds"
-old_postinstall_cmds="$old_postinstall_cmds"
-old_postuninstall_cmds="$old_postuninstall_cmds"
-
-# Create an old-style archive from a shared archive.
-old_archive_from_new_cmds="$old_archive_from_new_cmds"
-
-# Commands used to build and install a shared archive.
-archive_cmds="$archive_cmds"
-postinstall_cmds="$postinstall_cmds"
-postuninstall_cmds="$postuninstall_cmds"
-
-# Flag that allows shared libraries with undefined symbols to be built.
-allow_undefined_flag="$allow_undefined_flag"
-
-# Flag that forces no undefined symbols.
-no_undefined_flag="$no_undefined_flag"
-
-# Commands used to finish a libtool library installation in a directory.
-finish_cmds="$finish_cmds"
-
-# Same as above, but a single script fragment to be evaled but not shown.
-finish_eval="$finish_eval"
-
-# Take the output of nm and produce a listing of raw symbols and C names.
-global_symbol_pipe="$global_symbol_pipe"
-
-# This is the shared library runtime path variable.
-runpath_var=$runpath_var
-
-# This is the shared library path variable.
-shlibpath_var=$shlibpath_var
-
-# How to hardcode a shared library path into an executable.
-hardcode_action=$hardcode_action
-
-# Flag to hardcode \$libdir into a binary during linking.
-# This must work even if \$libdir does not exist.
-hardcode_libdir_flag_spec="$hardcode_libdir_flag_spec"
-
-# Whether we need a single -rpath flag with a separated argument.
-hardcode_libdir_separator="$hardcode_libdir_separator"
-
-# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the
-# resulting binary.
-hardcode_direct=$hardcode_direct
-
-# Set to yes if using the -LDIR flag during linking hardcodes DIR into the
-# resulting binary.
-hardcode_minus_L=$hardcode_minus_L
-
-# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into
-# the resulting binary.
-hardcode_shlibpath_var=$hardcode_shlibpath_var
-
-EOF
-
-case "$host_os" in
-aix3*)
-  cat <<\EOF >> $ofile
-# AIX sometimes has problems with the GCC collect2 program.  For some
-# reason, if we set the COLLECT_NAMES environment variable, the problems
-# vanish in a puff of smoke.
-if test "${COLLECT_NAMES+set}" != set; then
-  COLLECT_NAMES=
-  export COLLECT_NAMES
-fi
-
-EOF
-  ;;
-esac
-
-# Append the ltmain.sh script.
-cat "$ltmain" >> $ofile || (rm -f $ofile; exit 1)
-
-chmod +x $ofile
-exit 0
-
-# Local Variables:
-# mode:shell-script
-# sh-indentation:2
-# End:
diff --git a/ltmain.sh b/ltmain.sh
deleted file mode 100644
index e9350b3..0000000
--- a/ltmain.sh
+++ /dev/null
@@ -1,2453 +0,0 @@
-# ltmain.sh - Provide generalized library-building support services.
-# NOTE: Changing this file will not affect anything until you rerun ltconfig.
-#
-# Copyright (C) 1996-1998 Free Software Foundation, Inc.
-# Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# The name of this program.
-progname=`$echo "$0" | sed 's%^.*/%%'`
-modename="$progname"
-
-# Constants.
-PROGRAM=ltmain.sh
-PACKAGE=libtool
-VERSION=1.2
-
-default_mode=
-help="Try \`$progname --help' for more information."
-magic="%%%MAGIC variable%%%"
-mkdir="mkdir"
-mv="mv -f"
-rm="rm -f"
-
-# Sed substitution that helps us do robust quoting.  It backslashifies
-# metacharacters that are still active within double-quoted strings.
-Xsed='sed -e s/^X//'
-sed_quote_subst='s/\([\\`\\"$\\\\]\)/\\\1/g'
-
-# NLS nuisances.
-# Only set LANG and LC_ALL to C if already set.
-# These must not be set unconditionally because not all systems understand
-# e.g. LANG=C (notably SCO).
-if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi
-if test "${LANG+set}"   = set; then LANG=C;   export LANG;   fi
-
-if test "$LTCONFIG_VERSION" != "$VERSION"; then
-  echo "$modename: ltconfig version \`$LTCONFIG_VERSION' does not match $PROGRAM version \`$VERSION'" 1>&2
-  echo "Fatal configuration error.  See the $PACKAGE docs for more information." 1>&2
-  exit 1
-fi
-
-if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
-  echo "$modename: not configured to build any kind of library" 1>&2
-  echo "Fatal configuration error.  See the $PACKAGE docs for more information." 1>&2
-  exit 1
-fi
-
-# Global variables.
-mode=$default_mode
-nonopt=
-prev=
-prevopt=
-run=
-show="$echo"
-show_help=
-execute_dlfiles=
-
-# Parse our command line options once, thoroughly.
-while test $# -gt 0
-do
-  arg="$1"
-  shift
-
-  case "$arg" in
-  -*=*) optarg=`$echo "X$arg" | $Xsed -e 's/[-_a-zA-Z0-9]*=//'` ;;
-  *) optarg= ;;
-  esac
-
-  # If the previous option needs an argument, assign it.
-  if test -n "$prev"; then
-    case "$prev" in
-    execute_dlfiles)
-      eval "$prev=\"\$$prev \$arg\""
-      ;;
-    *)
-      eval "$prev=\$arg"
-      ;;
-    esac
-
-    prev=
-    prevopt=
-    continue
-  fi
-
-  # Have we seen a non-optional argument yet?
-  case "$arg" in
-  --help)
-    show_help=yes
-    ;;
-
-  --version)
-    echo "$PROGRAM (GNU $PACKAGE) $VERSION"
-    exit 0
-    ;;
-
-  --dry-run | -n)
-    run=:
-    ;;
-
-  --features)
-    echo "host: $host"
-    if test "$build_libtool_libs" = yes; then
-      echo "enable shared libraries"
-    else
-      echo "disable shared libraries"
-    fi
-    if test "$build_old_libs" = yes; then
-      echo "enable static libraries"
-    else
-      echo "disable static libraries"
-    fi
-    exit 0
-    ;;
-
-  --finish) mode="finish" ;;
-
-  --mode) prevopt="--mode" prev=mode ;;
-  --mode=*) mode="$optarg" ;;
-
-  --quiet | --silent)
-    show=:
-    ;;
-
-  -dlopen)
-    prevopt="-dlopen"
-    prev=execute_dlfiles
-    ;;
-
-  -*)
-    $echo "$modename: unrecognized option \`$arg'" 1>&2
-    $echo "$help" 1>&2
-    exit 1
-    ;;
-
-  *)
-    nonopt="$arg"
-    break
-    ;;
-  esac
-done
-
-if test -n "$prevopt"; then
-  $echo "$modename: option \`$prevopt' requires an argument" 1>&2
-  $echo "$help" 1>&2
-  exit 1
-fi
-
-if test -z "$show_help"; then
-
-  # Infer the operation mode.
-  if test -z "$mode"; then
-    case "$nonopt" in
-    *cc | *++ | gcc* | *-gcc*)
-      mode=link
-      for arg
-      do
-        case "$arg" in
-        -c)
-           mode=compile
-           break
-           ;;
-        esac
-      done
-      ;;
-    *db | *dbx)
-      mode=execute
-      ;;
-    *install*|cp|mv)
-      mode=install
-      ;;
-    *rm)
-      mode=uninstall
-      ;;
-    *)
-      # If we have no mode, but dlfiles were specified, then do execute mode.
-      test -n "$execute_dlfiles" && mode=execute
-
-      # Just use the default operation mode.
-      if test -z "$mode"; then
-        if test -n "$nonopt"; then
-          $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2
-        else
-          $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2
-        fi
-      fi
-      ;;
-    esac
-  fi
-
-  # Only execute mode is allowed to have -dlopen flags.
-  if test -n "$execute_dlfiles" && test "$mode" != execute; then
-    $echo "$modename: unrecognized option \`-dlopen'" 1>&2
-    $echo "$help" 1>&2
-    exit 1
-  fi
-
-  # Change the help message to a mode-specific one.
-  generic_help="$help"
-  help="Try \`$modename --help --mode=$mode' for more information."
-
-  # These modes are in order of execution frequency so that they run quickly.
-  case "$mode" in
-  # libtool compile mode
-  compile)
-    modename="$modename: compile"
-    # Get the compilation command and the source file.
-    base_compile=
-    lastarg=
-    srcfile="$nonopt"
-    suppress_output=
-
-    for arg
-    do
-      # Accept any command-line options.
-      case "$arg" in
-      -o)
-	$echo "$modename: you cannot specify the output filename with \`-o'" 1>&2
-	$echo "$help" 1>&2
-	exit 1
-	;;
-
-      -static)
-	build_libtool_libs=no
-	build_old_libs=yes
-	continue
-	;;
-      esac
-
-      # Accept the current argument as the source file.
-      lastarg="$srcfile"
-      srcfile="$arg"
-
-      # Aesthetically quote the previous argument.
-
-      # Backslashify any backslashes, double quotes, and dollar signs.
-      # These are the only characters that are still specially
-      # interpreted inside of double-quoted scrings.
-      lastarg=`$echo "X$lastarg" | $Xsed -e "$sed_quote_subst"`
-
-      # Double-quote args containing other shell metacharacters.
-      # Many Bourne shells cannot handle close brackets correctly in scan
-      # sets, so we specify it separately.
-      case "$lastarg" in
-      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*)
-	lastarg="\"$lastarg\""
-	;;
-      esac
-
-      # Add the previous argument to base_compile.
-      if test -z "$base_compile"; then
-	base_compile="$lastarg"
-      else
-	base_compile="$base_compile $lastarg"
-      fi
-    done
-
-    # Get the name of the library object.
-    libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'`
-
-    # Recognize several different file suffixes.
-    xform='[cCFSfms]'
-    case "$libobj" in
-    *.ada) xform=ada ;;
-    *.adb) xform=adb ;;
-    *.ads) xform=ads ;;
-    *.asm) xform=asm ;;
-    *.c++) xform=c++ ;;
-    *.cc) xform=cc ;;
-    *.cpp) xform=cpp ;;
-    *.cxx) xform=cxx ;;
-    *.f90) xform=f90 ;;
-    *.for) xform=for ;;
-    esac
-
-    libobj=`$echo "X$libobj" | $Xsed -e "s/\.$xform$/.lo/"`
-
-    case "$libobj" in
-    *.lo) obj=`$echo "X$libobj" | $Xsed -e 's/\.lo$/.o/'` ;;
-    *)
-      $echo "$modename: cannot determine name of library object from \`$srcfile'" 1>&2
-      exit 1
-      ;;
-    esac
-
-    if test -z "$base_compile"; then
-      $echo "$modename: you must specify a compilation command" 1>&2
-      $echo "$help" 1>&2
-      exit 1
-    fi
-
-    # Delete any leftover library objects.
-    if test "$build_old_libs" = yes; then
-      $run $rm $obj $libobj
-      trap "$run $rm $obj $libobj; exit 1" 1 2 15
-    else
-      $run $rm $libobj
-      trap "$run $rm $libobj; exit 1" 1 2 15
-    fi
-
-    # Only build a PIC object if we are building libtool libraries.
-    if test "$build_libtool_libs" = yes; then
-      # Without this assignment, base_compile gets emptied.
-      fbsd_hideous_sh_bug=$base_compile
-
-      # All platforms use -DPIC, to notify preprocessed assembler code.
-      $show "$base_compile$pic_flag -DPIC $srcfile"
-      if $run eval "$base_compile\$pic_flag -DPIC \$srcfile"; then :
-      else
-        test -n "$obj" && $run $rm $obj
-        exit 1
-      fi
-
-      # If we have no pic_flag, then copy the object into place and finish.
-      if test -z "$pic_flag"; then
-        $show "$LN_S $obj $libobj"
-        $run $LN_S $obj $libobj
-        exit $?
-      fi
-
-      # Just move the object, then go on to compile the next one
-      $show "$mv $obj $libobj"
-      $run $mv $obj $libobj || exit 1
-
-      # Allow error messages only from the first compilation.
-      suppress_output=' >/dev/null 2>&1'
-    fi
-
-    # Only build a position-dependent object if we build old libraries.
-    if test "$build_old_libs" = yes; then
-      # Suppress compiler output if we already did a PIC compilation.
-      $show "$base_compile $srcfile$suppress_output"
-      if $run eval "$base_compile \$srcfile$suppress_output"; then :
-      else
-        $run $rm $obj $libobj
-        exit 1
-      fi
-    fi
-
-    # Create an invalid libtool object if no PIC, so that we do not
-    # accidentally link it into a program.
-    if test "$build_libtool_libs" != yes; then
-      $show "echo timestamp > $libobj"
-      $run eval "echo timestamp > \$libobj" || exit $?
-    fi
-
-    exit 0
-    ;;
-
-  # libtool link mode
-  link)
-    modename="$modename: link"
-    CC="$nonopt"
-    allow_undefined=yes
-    compile_command="$CC"
-    finalize_command="$CC"
-
-    compile_shlibpath=
-    finalize_shlibpath=
-    deplibs=
-    dlfiles=
-    dlprefiles=
-    export_dynamic=no
-    hardcode_libdirs=
-    libobjs=
-    link_against_libtool_libs=
-    ltlibs=
-    objs=
-    prev=
-    prevarg=
-    release=
-    rpath=
-    perm_rpath=
-    temp_rpath=
-    vinfo=
-
-    # We need to know -static, to get the right output filenames.
-    for arg
-    do
-      case "$arg" in
-      -all-static | -static)
-        if test "X$arg" = "X-all-static" && test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then
-	    $echo "$modename: warning: complete static linking is impossible in this configuration" 1>&2
-        fi
-        build_libtool_libs=no
-	build_old_libs=yes
-        break
-        ;;
-      esac
-    done
-
-    # See if our shared archives depend on static archives.
-    test -n "$old_archive_from_new_cmds" && build_old_libs=yes
-
-    # Go through the arguments, transforming them on the way.
-    for arg
-    do
-      # If the previous option needs an argument, assign it.
-      if test -n "$prev"; then
-        case "$prev" in
-        output)
-          compile_command="$compile_command @OUTPUT@"
-          finalize_command="$finalize_command @OUTPUT@"
-          ;;
-        esac
-
-        case "$prev" in
-        dlfiles|dlprefiles)
-          case "$arg" in
-          *.la | *.lo) ;;  # We handle these cases below.
-          *)
-            dlprefiles="$dlprefiles $arg"
-            test "$prev" = dlfiles && dlfiles="$dlfiles $arg"
-            prev=
-            ;;
-          esac
-          ;;
-	release)
-	  release="-$arg"
-	  prev=
-	  continue
-	  ;;
-        rpath)
-          rpath="$rpath $arg"
-	  prev=
-	  continue
-	  ;;
-        *)
-          eval "$prev=\"\$arg\""
-          prev=
-          continue
-          ;;
-        esac
-      fi
-
-      prevarg="$arg"
-
-      case "$arg" in
-      -all-static)
-	if test -n "$link_static_flag"; then
-          compile_command="$compile_command $link_static_flag"
-	  finalize_command="$finalize_command $link_static_flag"
-        fi
-        continue
-	;;
-
-      -allow-undefined)
-	# FIXME: remove this flag sometime in the future.
-	$echo "$modename: \`-allow-undefined' is deprecated because it is the default" 1>&2
-	continue
-	;;
-
-      -dlopen)
-        prev=dlfiles
-        continue
-        ;;
-
-      -dlpreopen)
-        prev=dlprefiles
-        continue
-        ;;
-
-      -export-dynamic)
-        if test "$export_dynamic" != yes; then
-          export_dynamic=yes
-	  if test -n "$export_dynamic_flag_spec"; then
-	    eval arg=\"$export_dynamic_flag_spec\"
-	  else
-	    arg=
-	  fi
-
-          # Add the symbol object into the linking commands.
-	  compile_command="$compile_command @SYMFILE@"
-	  finalize_command="$finalize_command @SYMFILE@"
-        fi
-        ;;
-
-      -L*)
-        dir=`$echo "X$arg" | $Xsed -e 's%^-L\(.*\)$%\1%'`
-        case "$dir" in
-        /* | [A-Za-z]:\\*)
-	  # Add the corresponding hardcode_libdir_flag, if it is not identical.
-          ;;
-        *)
-          $echo "$modename: \`-L$dir' cannot specify a relative directory" 1>&2
-          exit 1
-          ;;
-        esac
-        deplibs="$deplibs $arg"
-        ;;
-
-      -l*) deplibs="$deplibs $arg" ;;
-
-      -no-undefined)
-	allow_undefined=no
-	continue
-	;;
-
-      -o) prev=output ;;
-
-      -release)
-	prev=release
-	continue
-	;;
-
-      -rpath)
-        prev=rpath
-        continue
-        ;;
-
-      -static)
-	# If we have no pic_flag, then this is the same as -all-static.
-	if test -z "$pic_flag" && test -n "$link_static_flag"; then
-          compile_command="$compile_command $link_static_flag"
-	  finalize_command="$finalize_command $link_static_flag"
-        fi
-	continue
-	;;
-
-      -version-info)
-        prev=vinfo
-        continue
-        ;;
-
-      # Some other compiler flag.
-      -* | +*)
-	# Unknown arguments in both finalize_command and compile_command need
-	# to be aesthetically quoted because they are evaled later.
-	arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
-	case "$arg" in
-	*[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*)
-	  arg="\"$arg\""
-	  ;;
-	esac
-        ;;
-
-      *.o | *.a)
-        # A standard object.
-        objs="$objs $arg"
-        ;;
-
-      *.lo)
-        # A library object.
-	if test "$prev" = dlfiles; then
-	  dlfiles="$dlfiles $arg"
-	  if test "$build_libtool_libs" = yes; then
-	    prev=
-	    continue
-	  else
-	    # If libtool objects are unsupported, then we need to preload.
-	    prev=dlprefiles
-	  fi
-	fi
-
-	if test "$prev" = dlprefiles; then
-	  # Preload the old-style object.
-	  dlprefiles="$dlprefiles "`$echo "X$arg" | $Xsed -e 's/\.lo$/\.o/'`
-	  prev=
-	fi
-	libobjs="$libobjs $arg"
-        ;;
-
-      *.la)
-        # A libtool-controlled library.
-
-        dlname=
-        libdir=
-        library_names=
-        old_library=
-
-        # Check to see that this really is a libtool archive.
-        if (sed -e '2q' $arg | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then :
-        else
-          $echo "$modename: \`$arg' is not a valid libtool archive" 1>&2
-          exit 1
-        fi
-
-        # If there is no directory component, then add one.
-        case "$arg" in
-        */* | *\\*) . $arg ;;
-        *) . ./$arg ;;
-        esac
-
-        if test -z "$libdir"; then
-          $echo "$modename: \`$arg' contains no -rpath information" 1>&2
-          exit 1
-        fi
-
-        # Get the name of the library we link against.
-        linklib=
-        for l in $old_library $library_names; do
-          linklib="$l"
-        done
-
-        if test -z "$linklib"; then
-          $echo "$modename: cannot find name of link library for \`$arg'" 1>&2
-          exit 1
-        fi
-
-        # Find the relevant object directory and library name.
-        name=`$echo "X$arg" | $Xsed -e 's%^.*/%%' -e 's/\.la$//' -e 's/^lib//'`
-        dir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'`
-        if test "X$dir" = "X$arg"; then
-          dir="$objdir"
-        else
-          dir="$dir/$objdir"
-        fi
-
-        # This library was specified with -dlopen.
-        if test "$prev" = dlfiles; then
-          dlfiles="$dlfiles $arg"
-          if test -z "$dlname"; then
-            # If there is no dlname, we need to preload.
-            prev=dlprefiles
-          else
-            # We should not create a dependency on this library, but we
-	    # may need any libraries it requires.
-	    compile_command="$compile_command$dependency_libs"
-	    finalize_command="$finalize_command$dependency_libs"
-            prev=
-            continue
-          fi
-        fi
-
-        # The library was specified with -dlpreopen.
-        if test "$prev" = dlprefiles; then
-          # Prefer using a static library (so that no silly _DYNAMIC symbols
-          # are required to link).
-          if test -n "$old_library"; then
-            dlprefiles="$dlprefiles $dir/$old_library"
-          else
-            dlprefiles="$dlprefiles $dir/$linklib"
-          fi
-          prev=
-        fi
-
-        if test "$build_libtool_libs" = yes && test -n "$library_names"; then
-          link_against_libtool_libs="$link_against_libtool_libs $arg"
-          if test -n "$shlibpath_var"; then
-            # Make sure the rpath contains only unique directories.
-            case "$temp_rpath " in
-            *" $dir "*) ;;
-            *) temp_rpath="$temp_rpath $dir" ;;
-            esac
-          fi
-
-	  # This is the magic to use -rpath.
-          if test -n "$hardcode_libdir_flag_spec"; then
-            if test -n "$hardcode_libdir_separator"; then
-              if test -z "$hardcode_libdirs"; then
-                # Put the magic libdir with the hardcode flag.
-                hardcode_libdirs="$libdir"
-                libdir="@HARDCODE_LIBDIRS@"
-              else
-                # Just accumulate the unique libdirs.
-		case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in
-		*"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
-		  ;;
-		*)
-		  hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
-		  ;;
-		esac
-                libdir=
-              fi
-            fi
-
-            if test -n "$libdir"; then
-              eval flag=\"$hardcode_libdir_flag_spec\"
-
-              compile_command="$compile_command $flag"
-              finalize_command="$finalize_command $flag"
-            fi
-          elif test -n "$runpath_var"; then
-            # Do the same for the permanent run path.
-            case "$perm_rpath " in
-            *" $libdir "*) ;;
-            *) perm_rpath="$perm_rpath $libdir" ;;
-            esac
-          fi
-
-
-          case "$hardcode_action" in
-          immediate)
-            if test "$hardcode_direct" = no; then
-              compile_command="$compile_command $dir/$linklib"
-            elif test "$hardcode_minus_L" = no; then
-              compile_command="$compile_command -L$dir -l$name"
-            elif test "$hardcode_shlibpath_var" = no; then
-              compile_shlibpath="$compile_shlibpath$dir:"
-              compile_command="$compile_command -l$name"
-            fi
-            ;;
-
-          relink)
-            # We need an absolute path.
-            case "$dir" in
-            /* | [A-Za-z]:\\*) ;;
-            *)
-              absdir=`cd "$dir" && pwd`
-              if test -z "$absdir"; then
-                $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2
-                exit 1
-              fi
-              dir="$absdir"
-              ;;
-            esac
-
-            if test "$hardcode_direct" = yes; then
-              compile_command="$compile_command $dir/$linklib"
-            elif test "$hardcode_minus_L" = yes; then
-              compile_command="$compile_command -L$dir -l$name"
-            elif test "$hardcode_shlibpath_var" = yes; then
-              compile_shlibpath="$compile_shlibpath$dir:"
-              compile_command="$compile_command -l$name"
-            fi
-            ;;
-
-          *)
-            $echo "$modename: \`$hardcode_action' is an unknown hardcode action" 1>&2
-            exit 1
-            ;;
-          esac
-
-          # Finalize command for both is simple: just hardcode it.
-          if test "$hardcode_direct" = yes; then
-            finalize_command="$finalize_command $libdir/$linklib"
-          elif test "$hardcode_minus_L" = yes; then
-            finalize_command="$finalize_command -L$libdir -l$name"
-          elif test "$hardcode_shlibpath_var" = yes; then
-            finalize_shlibpath="$finalize_shlibpath$libdir:"
-            finalize_command="$finalize_command -l$name"
-          else
-            # We cannot seem to hardcode it, guess we'll fake it.
-            finalize_command="$finalize_command -L$libdir -l$name"
-          fi
-        else
-          # Transform directly to old archives if we don't build new libraries.
-          if test -n "$pic_flag" && test -z "$old_library"; then
-            $echo "$modename: cannot find static library for \`$arg'" 1>&2
-            exit 1
-          fi
-
-	  # Here we assume that one of hardcode_direct or hardcode_minus_L
-	  # is not unsupported.  This is valid on all known static and
-	  # shared platforms.
-	  if test "$hardcode_direct" != unsupported; then
-	    test -n "$old_library" && linklib="$old_library"
-	    compile_command="$compile_command $dir/$linklib"
-	    finalize_command="$finalize_command $dir/$linklib"
-	  else
-	    compile_command="$compile_command -L$dir -l$name"
-	    finalize_command="$finalize_command -L$dir -l$name"
-	  fi
-        fi
-
-	# Add in any libraries that this one depends upon.
-	compile_command="$compile_command$dependency_libs"
-	finalize_command="$finalize_command$dependency_libs"
-	continue
-        ;;
-
-      # Some other compiler argument.
-      *)
-	# Unknown arguments in both finalize_command and compile_command need
-	# to be aesthetically quoted because they are evaled later.
-	arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
-	case "$arg" in
-	*[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*)
-	  arg="\"$arg\""
-	  ;;
-	esac
-        ;;
-      esac
-
-      # Now actually substitute the argument into the commands.
-      if test -n "$arg"; then
-	compile_command="$compile_command $arg"
-	finalize_command="$finalize_command $arg"
-      fi
-    done
-
-    if test -n "$prev"; then
-      $echo "$modename: the \`$prevarg' option requires an argument" 1>&2
-      $echo "$help" 1>&2
-      exit 1
-    fi
-
-    if test -n "$vinfo" && test -n "$release"; then
-      $echo "$modename: you cannot specify both \`-version-info' and \`-release'" 1>&2
-      $echo "$help" 1>&2
-      exit 1
-    fi
-
-    oldlib=
-    oldobjs=
-    case "$output" in
-    "")
-      $echo "$modename: you must specify an output file" 1>&2
-      $echo "$help" 1>&2
-      exit 1
-      ;;
-
-    */* | *\\*)
-      $echo "$modename: output file \`$output' must have no directory components" 1>&2
-      exit 1
-      ;;
-
-    *.a)
-      # Now set the variables for building old libraries.
-      build_libtool_libs=no
-      build_old_libs=yes
-      oldlib="$output"
-      $show "$rm $oldlib"
-      $run $rm $oldlib
-      ;;
-
-    *.la)
-      # Make sure we only generate libraries of the form `libNAME.la'.
-      case "$output" in
-      lib*) ;;
-      *)
-	$echo "$modename: libtool library \`$arg' must begin with \`lib'" 1>&2
-	$echo "$help" 1>&2
-	exit 1
-	;;
-      esac
-
-      name=`$echo "X$output" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
-      eval libname=\"$libname_spec\"
-
-      # All the library-specific variables (install_libdir is set above).
-      library_names=
-      old_library=
-      dlname=
-      current=0
-      revision=0
-      age=0
-
-      if test -n "$objs"; then
-        $echo "$modename: cannot build libtool library \`$output' from non-libtool objects:$objs" 2>&1
-        exit 1
-      fi
-
-      # How the heck are we supposed to write a wrapper for a shared library?
-      if test -n "$link_against_libtool_libs"; then
-        $echo "$modename: libtool library \`$output' may not depend on uninstalled libraries:$link_against_libtool_libs" 1>&2
-        exit 1
-      fi
-
-      if test -n "$dlfiles$dlprefiles"; then
-        $echo "$modename: warning: \`-dlopen' is ignored while creating libtool libraries" 1>&2
-        # Nullify the symbol file.
-        compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
-        finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
-      fi
-
-      if test -z "$rpath"; then
-        $echo "$modename: you must specify an installation directory with \`-rpath'" 1>&2
-	$echo "$help" 1>&2
-        exit 1
-      fi
-
-      set dummy $rpath
-      if test $# -gt 2; then
-	$echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2
-      fi
-      install_libdir="$2"
-
-      # Parse the version information argument.
-      IFS="${IFS= 	}"; save_ifs="$IFS"; IFS=':'
-      set dummy $vinfo
-      IFS="$save_ifs"
-
-      if test -n "$5"; then
-        $echo "$modename: too many parameters to \`-version-info'" 1>&2
-        $echo "$help" 1>&2
-        exit 1
-      fi
-
-      test -n "$2" && current="$2"
-      test -n "$3" && revision="$3"
-      test -n "$4" && age="$4"
-
-      # Check that each of the things are valid numbers.
-      case "$current" in
-      0 | [1-9] | [1-9][0-9]*) ;;
-      *)
-        $echo "$modename: CURRENT \`$current' is not a nonnegative integer" 1>&2
-        $echo "$modename: \`$vinfo' is not valid version information" 1>&2
-        exit 1
-        ;;
-      esac
-
-      case "$revision" in
-      0 | [1-9] | [1-9][0-9]*) ;;
-      *)
-        $echo "$modename: REVISION \`$revision' is not a nonnegative integer" 1>&2
-        $echo "$modename: \`$vinfo' is not valid version information" 1>&2
-        exit 1
-        ;;
-      esac
-
-      case "$age" in
-      0 | [1-9] | [1-9][0-9]*) ;;
-      *)
-        $echo "$modename: AGE \`$age' is not a nonnegative integer" 1>&2
-        $echo "$modename: \`$vinfo' is not valid version information" 1>&2
-        exit 1
-        ;;
-      esac
-
-      if test $age -gt $current; then
-        $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2
-        $echo "$modename: \`$vinfo' is not valid version information" 1>&2
-        exit 1
-      fi
-
-      # Calculate the version variables.
-      version_vars="version_type current age revision"
-      case "$version_type" in
-      none) ;;
-
-      linux)
-        version_vars="$version_vars major versuffix"
-        major=`expr $current - $age`
-        versuffix="$major.$age.$revision"
-        ;;
-
-      osf)
-        version_vars="$version_vars versuffix verstring"
-        major=`expr $current - $age`
-        versuffix="$current.$age.$revision"
-        verstring="$versuffix"
-
-        # Add in all the interfaces that we are compatible with.
-        loop=$age
-        while test $loop != 0; do
-          iface=`expr $current - $loop`
-          loop=`expr $loop - 1`
-          verstring="$verstring:${iface}.0"
-        done
-
-        # Make executables depend on our current version.
-        verstring="$verstring:${current}.0"
-        ;;
-
-      sunos)
-        version_vars="$version_vars major versuffix"
-        major="$current"
-        versuffix="$current.$revision"
-        ;;
-
-      *)
-        $echo "$modename: unknown library version type \`$version_type'" 1>&2
-        echo "Fatal configuration error.  See the $PACKAGE docs for more information." 1>&2
-        exit 1
-        ;;
-      esac
-
-      # Create the output directory, or remove our outputs if we need to.
-      if test -d $objdir; then
-        $show "$rm $objdir/$output $objdir/$libname.* $objdir/${libname}${release}.*"
-        $run $rm $objdir/$output $objdir/$libname.* $objdir/${libname}${release}.*
-      else
-        $show "$mkdir $objdir"
-        $run $mkdir $objdir
-	status=$?
-	if test $status -eq 0 || test -d $objdir; then :
-	else
-	  exit $status
-	fi
-      fi
-
-      # Check to see if the archive will have undefined symbols.
-      if test "$allow_undefined" = yes; then
-        if test "$allow_undefined_flag" = unsupported; then
-          $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2
-          build_libtool_libs=no
-	  build_old_libs=yes
-        fi
-      else
-        # Don't allow undefined symbols.
-        allow_undefined_flag="$no_undefined_flag"
-      fi
-
-      # Add libc to deplibs on all systems.
-      dependency_libs="$deplibs"
-      deplibs="$deplibs -lc"
-
-      if test "$build_libtool_libs" = yes; then
-        # Get the real and link names of the library.
-        eval library_names=\"$library_names_spec\"
-        set dummy $library_names
-        realname="$2"
-        shift; shift
-
-        if test -n "$soname_spec"; then
-          eval soname=\"$soname_spec\"
-        else
-          soname="$realname"
-        fi
-
-        lib="$objdir/$realname"
-	for link
-	do
-	  linknames="$linknames $link"
-	done
-
-        # Use standard objects if they are PIC.
-        test -z "$pic_flag" && libobjs=`$echo "X$libobjs " | $Xsed -e 's/\.lo /.o /g' -e 's/ $//g'`
-
-        # Do each of the archive commands.
-        eval cmds=\"$archive_cmds\"
-        IFS="${IFS= 	}"; save_ifs="$IFS"; IFS=';'
-        for cmd in $cmds; do
-          IFS="$save_ifs"
-          $show "$cmd"
-          $run eval "$cmd" || exit $?
-        done
-        IFS="$save_ifs"
-
-        # Create links to the real library.
-        for linkname in $linknames; do
-          $show "(cd $objdir && $LN_S $realname $linkname)"
-          $run eval '(cd $objdir && $LN_S $realname $linkname)' || exit $?
-        done
-
-        # If -export-dynamic was specified, set the dlname.
-        if test "$export_dynamic" = yes; then
-          # On all known operating systems, these are identical.
-          dlname="$soname"
-        fi
-      fi
-
-      # Now set the variables for building old libraries.
-      oldlib="$objdir/$libname.a"
-      ;;
-
-    *.lo | *.o)
-      if test -n "$link_against_libtool_libs"; then
-        $echo "$modename: error: cannot link libtool libraries into reloadable objects" 1>&2
-        exit 1
-      fi
-
-      if test -n "$deplibs"; then
-        $echo "$modename: warning: \`-l' and \`-L' are ignored while creating objects" 1>&2
-      fi
-
-      if test -n "$dlfiles$dlprefiles"; then
-        $echo "$modename: warning: \`-dlopen' is ignored while creating objects" 1>&2
-        # Nullify the symbol file.
-        compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
-        finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
-      fi
-
-      if test -n "$rpath"; then
-        $echo "$modename: warning: \`-rpath' is ignored while creating objects" 1>&2
-      fi
-
-      if test -n "$vinfo"; then
-        $echo "$modename: warning: \`-version-info' is ignored while creating objects" 1>&2
-      fi
-
-      if test -n "$release"; then
-        $echo "$modename: warning: \`-release' is ignored while creating objects" 1>&2
-      fi
-
-      case "$output" in
-      *.lo)
-        if test -n "$objs"; then
-          $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2
-          exit 1
-        fi
-        libobj="$output"
-        obj=`$echo "X$output" | $Xsed -e 's/\.lo$/.o/'`
-        ;;
-      *)
-        libobj=
-        obj="$output"
-        ;;
-      esac
-
-      # Delete the old objects.
-      $run $rm $obj $libobj
-
-      # Create the old-style object.
-      reload_objs="$objs"`$echo "X$libobjs " | $Xsed -e 's/[^       ]*\.a //g' -e 's/\.lo /.o /g' -e 's/ $//g'`
-
-      output="$obj"
-      eval cmds=\"$reload_cmds\"
-      IFS="${IFS= 	}"; save_ifs="$IFS"; IFS=';'
-      for cmd in $cmds; do
-        IFS="$save_ifs"
-        $show "$cmd"
-        $run eval "$cmd" || exit $?
-      done
-      IFS="$save_ifs"
-
-      # Exit if we aren't doing a library object file.
-      test -z "$libobj" && exit 0
-
-      if test "$build_libtool_libs" != yes; then
-        # Create an invalid libtool object if no PIC, so that we don't
-        # accidentally link it into a program.
-        $show "echo timestamp > $libobj"
-        $run eval "echo timestamp > $libobj" || exit $?
-        exit 0
-      fi
-
-      if test -n "$pic_flag"; then
-        # Only do commands if we really have different PIC objects.
-        reload_objs="$libobjs"
-        output="$libobj"
-        eval cmds=\"$reload_cmds\"
-        IFS="${IFS= 	}"; save_ifs="$IFS"; IFS=';'
-        for cmd in $cmds; do
-          IFS="$save_ifs"
-          $show "$cmd"
-          $run eval "$cmd" || exit $?
-        done
-        IFS="$save_ifs"
-      else
-        # Just create a symlink.
-        $show "$LN_S $obj $libobj"
-        $run $LN_S $obj $libobj || exit 1
-      fi
-
-      exit 0
-      ;;
-
-    *)
-      if test -n "$vinfo"; then
-        $echo "$modename: warning: \`-version-info' is ignored while linking programs" 1>&2
-      fi
-
-      if test -n "$release"; then
-        $echo "$modename: warning: \`-release' is ignored while creating objects" 1>&2
-      fi
-
-      if test -n "$rpath"; then
-	# If the user specified any rpath flags, then add them.
-	for libdir in $rpath; do
-          if test -n "$hardcode_libdir_flag_spec"; then
-            if test -n "$hardcode_libdir_separator"; then
-              if test -z "$hardcode_libdirs"; then
-                # Put the magic libdir with the hardcode flag.
-                hardcode_libdirs="$libdir"
-                libdir="@HARDCODE_LIBDIRS@"
-              else
-                # Just accumulate the unique libdirs.
-		case "$hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator" in
-		*"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
-		  ;;
-		*)
-		  hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
-		  ;;
-		esac
-                libdir=
-              fi
-            fi
-
-            if test -n "$libdir"; then
-              eval flag=\"$hardcode_libdir_flag_spec\"
-
-              compile_command="$compile_command $flag"
-              finalize_command="$finalize_command $flag"
-            fi
-          elif test -n "$runpath_var"; then
-            case "$perm_rpath " in
-            *" $libdir "*) ;;
-            *) perm_rpath="$perm_rpath $libdir" ;;
-            esac
-          fi
-	done
-      fi
-
-      # Substitute the hardcoded libdirs into the compile commands.
-      if test -n "$hardcode_libdir_separator"; then
-	compile_command=`$echo "X$compile_command" | $Xsed -e "s%@HARDCODE_LIBDIRS@%$hardcode_libdirs%g"`
-	finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@HARDCODE_LIBDIRS@%$hardcode_libdirs%g"`
-      fi
-
-      if test -n "$libobjs" && test "$build_old_libs" = yes; then
-        # Transform all the library objects into standard objects.
-        compile_command=`$echo "X$compile_command " | $Xsed -e 's/\.lo /.o /g' -e 's/ $//'`
-        finalize_command=`$echo "X$finalize_command " | $Xsed -e 's/\.lo /.o /g' -e 's/ $//'`
-      fi
-
-      if test "$export_dynamic" = yes && test -n "$NM" && test -n "$global_symbol_pipe"; then
-        dlsyms="${output}S.c"
-      else
-        dlsyms=
-      fi
-
-      if test -n "$dlsyms"; then
-        # Add our own program objects to the preloaded list.
-        dlprefiles=`$echo "X$objs$dlprefiles " | $Xsed -e 's/\.lo /.o /g' -e 's/ $//'`
-
-	# Discover the nlist of each of the dlfiles.
-        nlist="$objdir/${output}.nm"
-
-	if test -d $objdir; then
-	  $show "$rm $nlist ${nlist}T"
-	  $run $rm "$nlist" "${nlist}T"
-	else
-	  $show "$mkdir $objdir"
-	  $run $mkdir $objdir
-	  status=$?
-	  if test $status -eq 0 || test -d $objdir; then :
-	  else
-	    exit $status
-	  fi
-	fi
-
-        for arg in $dlprefiles; do
-	  $show "extracting global C symbols from \`$arg'"
-	  $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
-        done
-
-        # Parse the name list into a source file.
-        $show "creating $objdir/$dlsyms"
-        if test -z "$run"; then
-	  # Make sure we at least have an empty file.
-	  test -f "$nlist" || : > "$nlist"
-
-	  # Try sorting and uniquifying the output.
-	  if sort "$nlist" | uniq > "$nlist"T; then
-	    mv -f "$nlist"T "$nlist"
-	    wcout=`wc "$nlist" 2>/dev/null`
-	    count=`echo "X$wcout" | $Xsed -e 's/^[ 	]*\([0-9][0-9]*\).*$/\1/'`
-	    (test "$count" -ge 0) 2>/dev/null || count=-1
-	  else
-	    $rm "$nlist"T
-	    count=-1
-	  fi
-
-	  case "$dlsyms" in
-	  "") ;;
-	  *.c)
-	    $echo > "$objdir/$dlsyms" "\
-/* $dlsyms - symbol resolution table for \`$output' dlsym emulation. */
-/* Generated by $PROGRAM - GNU $PACKAGE $VERSION */
-
-#ifdef __cplusplus
-extern \"C\" {
-#endif
-
-/* Prevent the only kind of declaration conflicts we can make. */
-#define dld_preloaded_symbol_count some_other_symbol
-#define dld_preloaded_symbols some_other_symbol
-
-/* External symbol declarations for the compiler. */\
-"
-
-	    if test -f "$nlist"; then
-	      sed -e 's/^.* \(.*\)$/extern char \1;/' < "$nlist" >> "$objdir/$dlsyms"
-	    else
-	      echo '/* NONE */' >> "$objdir/$dlsyms"
-	    fi
-
-	    $echo >> "$objdir/$dlsyms" "\
-
-#undef dld_preloaded_symbol_count
-#undef dld_preloaded_symbols
-
-#if defined (__STDC__) && __STDC__
-# define __ptr_t void *
-#else
-# define __ptr_t char *
-#endif
-
-/* The number of symbols in dld_preloaded_symbols, -1 if unsorted. */
-int dld_preloaded_symbol_count = $count;
-
-/* The mapping between symbol names and symbols. */
-struct {
-  char *name;
-  __ptr_t address;
-}
-dld_preloaded_symbols[] =
-{\
-"
-
-	    if test -f "$nlist"; then
-	      sed 's/^\(.*\) \(.*\)$/  {"\1", (__ptr_t) \&\2},/' < "$nlist" >> "$objdir/$dlsyms"
-	    fi
-
-	    $echo >> "$objdir/$dlsyms" "\
-  {0, (__ptr_t) 0}
-};
-
-#ifdef __cplusplus
-}
-#endif\
-"
-	    ;;
-
-	  *)
-	    $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2
-	    exit 1
-	    ;;
-	  esac
-        fi
-
-        # Now compile the dynamic symbol file.
-        $show "(cd $objdir && $CC -c$no_builtin_flag \"$dlsyms\")"
-        $run eval '(cd $objdir && $CC -c$no_builtin_flag "$dlsyms")' || exit $?
-
-        # Transform the symbol file into the correct name.
-        compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$objdir/${output}S.o%"`
-        finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$objdir/${output}S.o%"`
-      elif test "$export_dynamic" != yes; then
-        test -n "$dlfiles$dlprefiles" && $echo "$modename: warning: \`-dlopen' and \`-dlpreopen' are ignored without \`-export-dynamic'" 1>&2
-      else
-        # We keep going just in case the user didn't refer to
-        # dld_preloaded_symbols.  The linker will fail if global_symbol_pipe
-        # really was required.
-        $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2
-
-        # Nullify the symbol file.
-        compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
-        finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
-      fi
-
-      if test -z "$link_against_libtool_libs" || test "$build_libtool_libs" != yes; then
-        # Replace the output file specification.
-        compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
-        finalize_command=`$echo "X$finalize_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
-
-        # We have no uninstalled library dependencies, so finalize right now.
-        $show "$compile_command"
-        $run eval "$compile_command"
-        exit $?
-      fi
-
-      # Replace the output file specification.
-      compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$objdir/$output"'%g'`
-      finalize_command=`$echo "X$finalize_command" | $Xsed -e 's%@OUTPUT@%'"$objdir/$output"'T%g'`
-
-      # Create the binary in the object directory, then wrap it.
-      if test -d $objdir; then :
-      else
-        $show "$mkdir $objdir"
-	$run $mkdir $objdir
-	status=$?
-	if test $status -eq 0 || test -d $objdir; then :
-	else
-	  exit $status
-	fi
-      fi
-
-      if test -n "$shlibpath_var"; then
-        # We should set the shlibpath_var
-        rpath=
-        for dir in $temp_rpath; do
-          case "$dir" in
-          /* | [A-Za-z]:\\*)
-            # Absolute path.
-            rpath="$rpath$dir:"
-            ;;
-          *)
-            # Relative path: add a thisdir entry.
-            rpath="$rpath\$thisdir/$dir:"
-            ;;
-          esac
-        done
-        temp_rpath="$rpath"
-      fi
-
-      # Delete the old output file.
-      $run $rm $output
-
-      if test -n "$compile_shlibpath"; then
-        compile_command="$shlibpath_var=\"$compile_shlibpath\$$shlibpath_var\" $compile_command"
-      fi
-      if test -n "$finalize_shlibpath"; then
-        finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command"
-      fi
-
-      if test -n "$runpath_var" && test -n "$perm_rpath"; then
-        # We should set the runpath_var.
-        rpath=
-        for dir in $perm_rpath; do
-          rpath="$rpath$dir:"
-        done
-        compile_command="$runpath_var=\"$rpath\$$runpath_var\" $compile_command"
-        finalize_command="$runpath_var=\"$rpath\$$runpath_var\" $finalize_command"
-      fi
-
-      case "$hardcode_action" in
-      relink)
-        # AGH! Flame the AIX and HP-UX people for me, will ya?
-        $echo "$modename: warning: using a buggy system linker" 1>&2
-        $echo "$modename: relinking will be required before \`$output' can be installed" 1>&2
-        ;;
-      esac
-
-      $show "$compile_command"
-      $run eval "$compile_command" || exit $?
-
-      # Now create the wrapper script.
-      $show "creating $output"
-
-      # Quote the finalize command for shipping.
-      finalize_command=`$echo "X$finalize_command" | $Xsed -e "$sed_quote_subst"`
-
-      # Quote $echo for shipping.
-      qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"`
-
-      # Only actually do things if our run command is non-null.
-      if test -z "$run"; then
-        $rm $output
-        trap "$rm $output; exit 1" 1 2 15
-
-        $echo > $output "\
-#! /bin/sh
-
-# $output - temporary wrapper script for $objdir/$output
-# Generated by ltmain.sh - GNU $PACKAGE $VERSION
-#
-# The $output program cannot be directly executed until all the libtool
-# libraries that it depends on are installed.
-#
-# This wrapper script should never be moved out of \``pwd`'.
-# If it is, it will not operate correctly.
-
-# Sed substitution that helps us do robust quoting.  It backslashifies
-# metacharacters that are still active within double-quoted strings.
-Xsed='sed -e s/^X//'
-sed_quote_subst='$sed_quote_subst'
-
-# The HP-UX ksh and POSIX shell print the target directory to stdout
-# if CDPATH is set.
-if test \"\${CDPATH+set}\" = set; then CDPATH=; export CDPATH; fi
-
-# This environment variable determines our operation mode.
-if test \"\$libtool_install_magic\" = \"$magic\"; then
-  # install mode needs the following variables:
-  link_against_libtool_libs='$link_against_libtool_libs'
-  finalize_command=\"$finalize_command\"
-else
-  # When we are sourced in execute mode, \$file and \$echo are already set.
-  if test \"\$libtool_execute_magic\" = \"$magic\"; then :
-  else
-    echo=\"$qecho\"
-    file=\"\$0\"
-  fi\
-"
-        $echo >> $output "\
-
-  # Find the directory that this script lives in.
-  thisdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\`
-  test \"x\$thisdir\" = \"x\$file\" && thisdir=.
-
-  # Follow symbolic links until we get to the real thisdir.
-  file=\`ls -ld \"\$file\" | sed -n 's/.*-> //p'\`
-  while test -n \"\$file\"; do
-    destdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\`
-
-    # If there was a directory component, then change thisdir.
-    if test \"x\$destdir\" != \"x\$file\"; then
-      case \"\$destdir\" in
-      /* | [A-Za-z]:\\*) thisdir=\"\$destdir\" ;;
-      *) thisdir=\"\$thisdir/\$destdir\" ;;
-      esac
-    fi
-
-    file=\`\$echo \"X\$file\" | \$Xsed -e 's%^.*/%%'\`
-    file=\`ls -ld \"\$thisdir/\$file\" | sed -n 's/.*-> //p'\`
-  done
-
-  # Try to get the absolute directory name.
-  absdir=\`cd \"\$thisdir\" && pwd\`
-  test -n \"\$absdir\" && thisdir=\"\$absdir\"
-
-  progdir=\"\$thisdir/$objdir\"
-  program='$output'
-
-  if test -f \"\$progdir/\$program\"; then"
-
-        # Export our shlibpath_var if we have one.
-        if test -n "$shlibpath_var" && test -n "$temp_rpath"; then
-          $echo >> $output "\
-    # Add our own library path to $shlibpath_var
-    $shlibpath_var=\"$temp_rpath\$$shlibpath_var\"
-
-    # Some systems cannot cope with colon-terminated $shlibpath_var
-    $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/:*\$//'\`
-
-    export $shlibpath_var
-"
-        fi
-
-        $echo >> $output "\
-    if test \"\$libtool_execute_magic\" != \"$magic\"; then
-      # Run the actual program with our arguments.
-
-      # Export the path to the program.
-      PATH=\"\$progdir:\$PATH\"
-      export PATH
-
-      exec \$program \${1+\"\$@\"}
-
-      \$echo \"\$0: cannot exec \$program \${1+\"\$@\"}\"
-      exit 1
-    fi
-  else
-    # The program doesn't exist.
-    \$echo \"\$0: error: \$progdir/\$program does not exist\" 1>&2
-    \$echo \"This script is just a wrapper for \$program.\" 1>&2
-    echo \"See the $PACKAGE documentation for more information.\" 1>&2
-    exit 1
-  fi
-fi\
-"
-        chmod +x $output
-      fi
-      exit 0
-      ;;
-    esac
-
-    # See if we need to build an old-fashioned archive.
-    if test "$build_old_libs" = "yes"; then
-      # Transform .lo files to .o files.
-      oldobjs="$objs"`$echo "X$libobjs " | $Xsed -e 's/[^   ]*\.a //g' -e 's/\.lo /.o /g' -e 's/ $//g'`
-
-      # Do each command in the archive commands.
-      if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then
-	eval cmds=\"$old_archive_from_new_cmds\"
-      else
-	eval cmds=\"$old_archive_cmds\"
-      fi
-      IFS="${IFS= 	}"; save_ifs="$IFS"; IFS=';'
-      for cmd in $cmds; do
-        IFS="$save_ifs"
-        $show "$cmd"
-        $run eval "$cmd" || exit $?
-      done
-      IFS="$save_ifs"
-    fi
-
-    # Now create the libtool archive.
-    case "$output" in
-    *.la)
-      old_library=
-      test "$build_old_libs" = yes && old_library="$libname.a"
-
-      $show "creating $output"
-
-      # Only create the output if not a dry run.
-      if test -z "$run"; then
-        $echo > $output "\
-# $output - a libtool library file
-# Generated by ltmain.sh - GNU $PACKAGE $VERSION
-
-# The name that we can dlopen(3).
-dlname='$dlname'
-
-# Names of this library.
-library_names='$library_names'
-
-# The name of the static archive.
-old_library='$old_library'
-
-# Libraries that this one depends upon.
-dependency_libs='$dependency_libs'
-
-# Version information for $libname.
-current=$current
-age=$age
-revision=$revision
-
-# Directory that this library needs to be installed in:
-libdir='$install_libdir'\
-"
-      fi
-
-      # Do a symbolic link so that the libtool archive can be found in
-      # LD_LIBRARY_PATH before the program is installed.
-      $show "(cd $objdir && $LN_S ../$output $output)"
-      $run eval "(cd $objdir && $LN_S ../$output $output)" || exit 1
-      ;;
-    esac
-    exit 0
-    ;;
-
-  # libtool install mode
-  install)
-    modename="$modename: install"
-
-    # There may be an optional /bin/sh argument at the beginning of
-    # install_prog (especially on Windows NT).
-    if test "$nonopt" = "$SHELL"; then
-      # Aesthetically quote it.
-      arg=`$echo "X$nonopt" | $Xsed -e "$sed_quote_subst"`
-      case "$arg" in
-      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*)
-	arg="\"$arg\""
-	;;
-      esac
-      install_prog="$arg "
-      arg="$1"
-      shift
-    else
-      install_prog=
-      arg="$nonopt"
-    fi
-
-    # The real first argument should be the name of the installation program.
-    # Aesthetically quote it.
-    arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
-    case "$arg" in
-    *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*)
-      arg="\"$arg\""
-      ;;
-    esac
-    install_prog="$install_prog$arg"
-
-    # We need to accept at least all the BSD install flags.
-    dest=
-    files=
-    opts=
-    prev=
-    install_type=
-    isdir=
-    stripme=
-    for arg
-    do
-      if test -n "$dest"; then
-        files="$files $dest"
-        dest="$arg"
-        continue
-      fi
-
-      case "$arg" in
-      -d) isdir=yes ;;
-      -f) prev="-f" ;;
-      -g) prev="-g" ;;
-      -m) prev="-m" ;;
-      -o) prev="-o" ;;
-      -s)
-        stripme=" -s"
-        continue
-        ;;
-      -*) ;;
-
-      *)
-        # If the previous option needed an argument, then skip it.
-        if test -n "$prev"; then
-          prev=
-        else
-          dest="$arg"
-          continue
-        fi
-        ;;
-      esac
-
-      # Aesthetically quote the argument.
-      arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
-      case "$arg" in
-      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*)
-	arg="\"$arg\""
-	;;
-      esac
-      install_prog="$install_prog $arg"
-    done
-
-    if test -z "$install_prog"; then
-      $echo "$modename: you must specify an install program" 1>&2
-      $echo "$help" 1>&2
-      exit 1
-    fi
-
-    if test -n "$prev"; then
-      $echo "$modename: the \`$prev' option requires an argument" 1>&2
-      $echo "$help" 1>&2
-      exit 1
-    fi
-
-    if test -z "$files"; then
-      if test -z "$dest"; then
-        $echo "$modename: no file or destination specified" 1>&2
-      else
-        $echo "$modename: you must specify a destination" 1>&2
-      fi
-      $echo "$help" 1>&2
-      exit 1
-    fi
-
-    # Strip any trailing slash from the destination.
-    dest=`$echo "X$dest" | $Xsed -e 's%/$%%'`
-
-    # Check to see that the destination is a directory.
-    test -d "$dest" && isdir=yes
-    if test -n "$isdir"; then
-      destdir="$dest"
-      destname=
-    else
-      destdir=`$echo "X$dest" | $Xsed -e 's%/[^/]*$%%'`
-      test "X$destdir" = "X$dest" && destdir=.
-      destname=`$echo "X$dest" | $Xsed -e 's%^.*/%%'`
-
-      # Not a directory, so check to see that there is only one file specified.
-      set dummy $files
-      if test $# -gt 2; then
-        $echo "$modename: \`$dest' is not a directory" 1>&2
-        $echo "$help" 1>&2
-        exit 1
-      fi
-    fi
-    case "$destdir" in
-    /* | [A-Za-z]:\\*) ;;
-    *)
-      for file in $files; do
-        case "$file" in
-        *.lo) ;;
-        *)
-          $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2
-          $echo "$help" 1>&2
-          exit 1
-          ;;
-        esac
-      done
-      ;;
-    esac
-
-    # This variable tells wrapper scripts just to set variables rather
-    # than running their programs.
-    libtool_install_magic="$magic"
-
-    staticlibs=
-    future_libdirs=
-    current_libdirs=
-    for file in $files; do
-
-      # Do each installation.
-      case "$file" in
-      *.a)
-        # Do the static libraries later.
-        staticlibs="$staticlibs $file"
-        ;;
-
-      *.la)
-        # Check to see that this really is a libtool archive.
-        if (sed -e '2q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then :
-        else
-          $echo "$modename: \`$file' is not a valid libtool archive" 1>&2
-          $echo "$help" 1>&2
-          exit 1
-        fi
-
-        library_names=
-        old_library=
-        # If there is no directory component, then add one.
-        case "$file" in
-        */* | *\\*) . $file ;;
-        *) . ./$file ;;
-        esac
-
-        # Add the libdir to current_libdirs if it is the destination.
-        if test "X$destdir" = "X$libdir"; then
-          case "$current_libdirs " in
-          *" $libdir "*) ;;
-          *) current_libdirs="$current_libdirs $libdir" ;;
-          esac
-        else
-          # Note the libdir as a future libdir.
-          case "$future_libdirs " in
-          *" $libdir "*) ;;
-          *) future_libdirs="$future_libdirs $libdir" ;;
-          esac
-        fi
-
-        dir="`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/"
-        test "X$dir" = "X$file/" && dir=
-        dir="$dir$objdir"
-
-        # See the names of the shared library.
-        set dummy $library_names
-        if test -n "$2"; then
-          realname="$2"
-          shift
-          shift
-
-          # Install the shared library and build the symlinks.
-          $show "$install_prog $dir/$realname $destdir/$realname"
-          $run eval "$install_prog $dir/$realname $destdir/$realname" || exit $?
-          test "X$dlname" = "X$realname" && dlname=
-
-          if test $# -gt 0; then
-            # Delete the old symlinks.
-            rmcmd="$rm"
-            for linkname
-            do
-              rmcmd="$rmcmd $destdir/$linkname"
-            done
-            $show "$rmcmd"
-            $run $rmcmd
-
-            # ... and create new ones.
-            for linkname
-            do
-              test "X$dlname" = "X$linkname" && dlname=
-              $show "(cd $destdir && $LN_S $realname $linkname)"
-              $run eval "(cd $destdir && $LN_S $realname $linkname)"
-            done
-          fi
-
-          if test -n "$dlname"; then
-            # Install the dynamically-loadable library.
-            $show "$install_prog $dir/$dlname $destdir/$dlname"
-            $run eval "$install_prog $dir/$dlname $destdir/$dlname" || exit $?
-          fi
-
-          # Do each command in the postinstall commands.
-          lib="$destdir/$realname"
-          eval cmds=\"$postinstall_cmds\"
-          IFS="${IFS= 	}"; save_ifs="$IFS"; IFS=';'
-          for cmd in $cmds; do
-            IFS="$save_ifs"
-            $show "$cmd"
-            $run eval "$cmd" || exit $?
-          done
-          IFS="$save_ifs"
-        fi
-
-        # Install the pseudo-library for information purposes.
-        name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
-        $show "$install_prog $file $destdir/$name"
-        $run eval "$install_prog $file $destdir/$name" || exit $?
-
-        # Maybe install the static library, too.
-        test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library"
-        ;;
-
-      *.lo)
-        # Install (i.e. copy) a libtool object.
-
-        # Figure out destination file name, if it wasn't already specified.
-        if test -n "$destname"; then
-          destfile="$destdir/$destname"
-        else
-          destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
-          destfile="$destdir/$destfile"
-        fi
-
-        # Deduce the name of the destination old-style object file.
-        case "$destfile" in
-        *.lo)
-          staticdest=`$echo "X$destfile" | $Xsed -e 's/\.lo$/\.o/'`
-          ;;
-        *.o)
-          staticdest="$destfile"
-          destfile=
-          ;;
-        *)
-          $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2
-          $echo "$help" 1>&2
-          exit 1
-          ;;
-        esac
-
-        # Install the libtool object if requested.
-        if test -n "$destfile"; then
-          $show "$install_prog $file $destfile"
-          $run eval "$install_prog $file $destfile" || exit $?
-        fi
-
-        # Install the old object if enabled.
-        if test "$build_old_libs" = yes; then
-          # Deduce the name of the old-style object file.
-          staticobj=`$echo "X$file" | $Xsed -e 's/\.lo$/\.o/'`
-
-          $show "$install_prog $staticobj $staticdest"
-          $run eval "$install_prog \$staticobj \$staticdest" || exit $?
-        fi
-        exit 0
-        ;;
-
-      *)
-        # Do a test to see if this is really a libtool program.
-        if (sed -e '4q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then
-          link_against_libtool_libs=
-          finalize_command=
-
-          # If there is no directory component, then add one.
-          case "$file" in
-          */* | *\\*) . $file ;;
-          *) . ./$file ;;
-          esac
-
-          # Check the variables that should have been set.
-          if test -z "$link_against_libtool_libs" || test -z "$finalize_command"; then
-            $echo "$modename: invalid libtool wrapper script \`$file'" 1>&2
-            exit 1
-          fi
-
-          finalize=yes
-          for lib in $link_against_libtool_libs; do
-            # Check to see that each library is installed.
-            libdir=
-            if test -f "$lib"; then
-              # If there is no directory component, then add one.
-              case "$lib" in
-              */* | *\\*) . $lib ;;
-              *) . ./$lib ;;
-              esac
-            fi
-            libfile="$libdir/`$echo "X$lib" | $Xsed -e 's%^.*/%%g'`"
-            if test -z "$libdir"; then
-              $echo "$modename: warning: \`$lib' contains no -rpath information" 1>&2
-            elif test -f "$libfile"; then :
-            else
-              $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2
-              finalize=no
-            fi
-          done
-
-          if test "$hardcode_action" = relink; then
-            if test "$finalize" = yes; then
-              $echo "$modename: warning: relinking \`$file' on behalf of your buggy system linker" 1>&2
-              $show "$finalize_command"
-              if $run eval "$finalize_command"; then :
-              else
-                $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
-                continue
-              fi
-              file="$objdir/$file"T
-            else
-              $echo "$modename: warning: cannot relink \`$file' on behalf of your buggy system linker" 1>&2
-            fi
-          else
-            # Install the binary that we compiled earlier.
-	    file=`$echo "X$file" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"`
-          fi
-        fi
-
-        $show "$install_prog$stripme $file $dest"
-        $run eval "$install_prog\$stripme \$file \$dest" || exit $?
-        ;;
-      esac
-    done
-
-    for file in $staticlibs; do
-      name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
-
-      # Set up the ranlib parameters.
-      oldlib="$destdir/$name"
-
-      $show "$install_prog $file $oldlib"
-      $run eval "$install_prog \$file \$oldlib" || exit $?
-
-      # Do each command in the postinstall commands.
-      eval cmds=\"$old_postinstall_cmds\"
-      IFS="${IFS= 	}"; save_ifs="$IFS"; IFS=';'
-      for cmd in $cmds; do
-        IFS="$save_ifs"
-        $show "$cmd"
-        $run eval "$cmd" || exit $?
-      done
-      IFS="$save_ifs"
-    done
-
-    if test -n "$future_libdirs"; then
-      $echo "$modename: warning: remember to run \`$progname --finish$future_libdirs'" 1>&2
-    fi
-
-    if test -n "$current_libdirs"; then
-      # Maybe just do a dry run.
-      test -n "$run" && current_libdirs=" -n$current_libdirs"
-      exec $SHELL $0 --finish$current_libdirs
-      exit 1
-    fi
-
-    exit 0
-    ;;
-
-  # libtool finish mode
-  finish)
-    modename="$modename: finish"
-    libdirs="$nonopt"
-
-    if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
-      for dir
-      do
-        libdirs="$libdirs $dir"
-      done
-
-      for libdir in $libdirs; do
-	if test -n "$finish_cmds"; then
-	  # Do each command in the finish commands.
-	  eval cmds=\"$finish_cmds\"
-          IFS="${IFS= 	}"; save_ifs="$IFS"; IFS=';'
-          for cmd in $cmds; do
-            IFS="$save_ifs"
-            $show "$cmd"
-            $run eval "$cmd"
-          done
-          IFS="$save_ifs"
-	fi
-	if test -n "$finish_eval"; then
-	  # Do the single finish_eval.
-	  eval cmds=\"$finish_eval\"
-	  $run eval "$cmds"
-	fi
-      done
-    fi
-
-    echo "------------------------------------------------------------------------------"
-    echo "Libraries have been installed in:"
-    for libdir in $libdirs; do
-      echo "   $libdir"
-    done
-    echo
-    echo "To link against installed libraries in a given directory, LIBDIR,"
-    echo "you must use the \`-LLIBDIR' flag during linking."
-    echo
-    echo " You will also need to do one of the following:"
-    if test -n "$shlibpath_var"; then
-      echo "   - add LIBDIR to the \`$shlibpath_var' environment variable"
-      echo "     during execution"
-    fi
-    if test -n "$runpath_var"; then
-      echo "   - add LIBDIR to the \`$runpath_var' environment variable"
-      echo "     during linking"
-    fi
-    if test -n "$hardcode_libdir_flag_spec"; then
-      libdir=LIBDIR
-      eval flag=\"$hardcode_libdir_flag_spec\"
-
-      echo "   - use the \`$flag' linker flag"
-    fi
-    if test -f /etc/ld.so.conf; then
-      echo "   - have your system administrator add LIBDIR to \`/etc/ld.so.conf'"
-    fi
-    echo
-    echo "See any operating system documentation about shared libraries for"
-    echo "more information, such as the ld(1) and ld.so(8) manual pages."
-    echo "------------------------------------------------------------------------------"
-    exit 0
-    ;;
-
-  # libtool execute mode
-  execute)
-    modename="$modename: execute"
-
-    # The first argument is the command name.
-    cmd="$nonopt"
-    if test -z "$cmd"; then
-      $echo "$modename: you must specify a COMMAND" 1>&2
-      $echo "$help"
-      exit 1
-    fi
-
-    # Handle -dlopen flags immediately.
-    for file in $execute_dlfiles; do
-      if test -f "$file"; then :
-      else
-	$echo "$modename: \`$file' is not a file" 1>&2
-	$echo "$help" 1>&2
-	exit 1
-      fi
-
-      dir=
-      case "$file" in
-      *.la)
-        # Check to see that this really is a libtool archive.
-        if (sed -e '2q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then :
-        else
-          $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
-          $echo "$help" 1>&2
-          exit 1
-        fi
-
-	# Read the libtool library.
-	dlname=
-	library_names=
-
-        # If there is no directory component, then add one.
-	case "$file" in
-	*/* | *\\*) . $file ;;
-        *) . ./$file ;;
-	esac
-
-	# Skip this library if it cannot be dlopened.
-	if test -z "$dlname"; then
-	  # Warn if it was a shared library.
-	  test -n "$library_names" && $echo "$modename: warning: \`$file' was not linked with \`-export-dynamic'"
-	  continue
-	fi
-
-	dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
-	test "X$dir" = "X$file" && dir=.
-
-	if test -f "$dir/$objdir/$dlname"; then
-	  dir="$dir/$objdir"
-	else
-	  $echo "$modename: cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" 1>&2
-	  exit 1
-	fi
-	;;
-
-      *.lo)
-	# Just add the directory containing the .lo file.
-	dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
-	test "X$dir" = "X$file" && dir=.
-	;;
-
-      *)
-	$echo "$modename: warning \`-dlopen' is ignored for non-libtool libraries and objects" 1>&2
-        continue
-	;;
-      esac
-
-      # Get the absolute pathname.
-      absdir=`cd "$dir" && pwd`
-      test -n "$absdir" && dir="$absdir"
-
-      # Now add the directory to shlibpath_var.
-      if eval "test -z \"\$$shlibpath_var\""; then
-	eval "$shlibpath_var=\"\$dir\""
-      else
-	eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\""
-      fi
-    done
-
-    # This variable tells wrapper scripts just to set shlibpath_var
-    # rather than running their programs.
-    libtool_execute_magic="$magic"
-
-    # Check if any of the arguments is a wrapper script.
-    args=
-    for file
-    do
-      case "$file" in
-      -*) ;;
-      *)
-        # Do a test to see if this is really a libtool program.
-        if (sed -e '4q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then
-	  # If there is no directory component, then add one.
-	  case "$file" in
-	  */* | *\\*) . $file ;;
-	  *) . ./$file ;;
-	  esac
-
-	  # Transform arg to wrapped name.
-	  file="$progdir/$program"
-	fi
-        ;;
-      esac
-      # Quote arguments (to preserve shell metacharacters).
-      file=`$echo "X$file" | $Xsed -e "$sed_quote_subst"`
-      args="$args \"$file\""
-    done
-
-    if test -z "$run"; then
-      # Export the shlibpath_var.
-      eval "export $shlibpath_var"
-
-      # Now actually exec the command.
-      eval "exec \$cmd$args"
-
-      $echo "$modename: cannot exec \$cmd$args"
-      exit 1
-    else
-      # Display what would be done.
-      eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\""
-      $echo "export $shlibpath_var"
-      $echo "$cmd$args"
-      exit 0
-    fi
-    ;;
-
-  # libtool uninstall mode
-  uninstall)
-    modename="$modename: uninstall"
-    rm="$nonopt"
-    files=
-
-    for arg
-    do
-      case "$arg" in
-      -*) rm="$rm $arg" ;;
-      *) files="$files $arg" ;;
-      esac
-    done
-
-    if test -z "$rm"; then
-      $echo "$modename: you must specify an RM program" 1>&2
-      $echo "$help" 1>&2
-      exit 1
-    fi
-
-    for file in $files; do
-      dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
-      test "X$dir" = "X$file" && dir=.
-      name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
-
-      rmfiles="$file"
-
-      case "$name" in
-      *.la)
-        # Possibly a libtool archive, so verify it.
-        if (sed -e '2q' $file | egrep '^# Generated by ltmain\.sh') >/dev/null 2>&1; then
-          . $dir/$name
-
-          # Delete the libtool libraries and symlinks.
-          for n in $library_names; do
-            rmfiles="$rmfiles $dir/$n"
-            test "X$n" = "X$dlname" && dlname=
-          done
-          test -n "$dlname" && rmfiles="$rmfiles $dir/$dlname"
-          test -n "$old_library" && rmfiles="$rmfiles $dir/$old_library"
-
-	  $show "$rm $rmfiles"
-	  $run $rm $rmfiles
-
-	  if test -n "$library_names"; then
-	    # Do each command in the postuninstall commands.
-	    eval cmds=\"$postuninstall_cmds\"
-	    IFS="${IFS= 	}"; save_ifs="$IFS"; IFS=';'
-	    for cmd in $cmds; do
-	      IFS="$save_ifs"
-	      $show "$cmd"
-	      $run eval "$cmd"
-	    done
-	    IFS="$save_ifs"
-	  fi
-
-          if test -n "$old_library"; then
-	    # Do each command in the old_postuninstall commands.
-	    eval cmds=\"$old_postuninstall_cmds\"
-	    IFS="${IFS= 	}"; save_ifs="$IFS"; IFS=';'
-	    for cmd in $cmds; do
-	      IFS="$save_ifs"
-	      $show "$cmd"
-	      $run eval "$cmd"
-	    done
-	    IFS="$save_ifs"
-	  fi
-
-          # FIXME: should reinstall the best remaining shared library.
-        fi
-        ;;
-
-      *.lo)
-        if test "$build_old_libs" = yes; then
-          oldobj=`$echo "X$name" | $Xsed -e 's/\.lo$/\.o/'`
-          rmfiles="$rmfiles $dir/$oldobj"
-        fi
-	$show "$rm $rmfiles"
-	$run $rm $rmfiles
-        ;;
-
-      *)
-      	$show "$rm $rmfiles"
-	$run $rm $rmfiles
-	;;
-      esac
-    done
-    exit 0
-    ;;
-
-  "")
-    $echo "$modename: you must specify a MODE" 1>&2
-    $echo "$generic_help" 1>&2
-    exit 1
-    ;;
-  esac
-
-  $echo "$modename: invalid operation mode \`$mode'" 1>&2
-  $echo "$generic_help" 1>&2
-  exit 1
-fi # test -z "$show_help"
-
-# We need to display help for each of the modes.
-case "$mode" in
-"") $echo \
-"Usage: $modename [OPTION]... [MODE-ARG]...
-
-Provide generalized library-building support services.
-
--n, --dry-run         display commands without modifying any files
-    --features        display configuration information and exit
-    --finish          same as \`--mode=finish'
-    --help            display this help message and exit
-    --mode=MODE       use operation mode MODE [default=inferred from MODE-ARGS]
-    --quiet           same as \`--silent'
-    --silent          don't print informational messages
-    --version         print version information
-
-MODE must be one of the following:
-
-      compile         compile a source file into a libtool object
-      execute         automatically set library path, then run a program
-      finish          complete the installation of libtool libraries
-      install         install libraries or executables
-      link            create a library or an executable
-      uninstall       remove libraries from an installed directory
-
-MODE-ARGS vary depending on the MODE.  Try \`$modename --help --mode=MODE' for
-a more detailed description of MODE."
-  exit 0
-  ;;
-
-compile)
-  $echo \
-"Usage: $modename [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE
-
-Compile a source file into a libtool library object.
-
-COMPILE-COMMAND is a command to be used in creating a \`standard' object file
-from the given SOURCEFILE.
-
-The output file name is determined by removing the directory component from
-SOURCEFILE, then substituting the C source code suffix \`.c' with the
-library object suffix, \`.lo'."
-  ;;
-
-execute)
-  $echo \
-"Usage: $modename [OPTION]... --mode=execute COMMAND [ARGS]...
-
-Automatically set library path, then run a program.
-
-This mode accepts the following additional options:
-
-  -dlopen FILE      add the directory containing FILE to the library path
-
-This mode sets the library path environment variable according to \`-dlopen'
-flags.
-
-If any of the ARGS are libtool executable wrappers, then they are translated
-into their corresponding uninstalled binary, and any of their required library
-directories are added to the library path.
-
-Then, COMMAND is executed, with ARGS as arguments."
-  ;;
-
-finish)
-  $echo \
-"Usage: $modename [OPTION]... --mode=finish [LIBDIR]...
-
-Complete the installation of libtool libraries.
-
-Each LIBDIR is a directory that contains libtool libraries.
-
-The commands that this mode executes may require superuser privileges.  Use
-the \`--dry-run' option if you just want to see what would be executed."
-  ;;
-
-install)
-  $echo \
-"Usage: $modename [OPTION]... --mode=install INSTALL-COMMAND...
-
-Install executables or libraries.
-
-INSTALL-COMMAND is the installation command.  The first component should be
-either the \`install' or \`cp' program.
-
-The rest of the components are interpreted as arguments to that command (only
-BSD-compatible install options are recognized)."
-  ;;
-
-link)
-  $echo \
-"Usage: $modename [OPTION]... --mode=link LINK-COMMAND...
-
-Link object files or libraries together to form another library, or to
-create an executable program.
-
-LINK-COMMAND is a command using the C compiler that you would use to create
-a program from several object files.
-
-The following components of LINK-COMMAND are treated specially:
-
-  -all-static       do not do any dynamic linking at all
-  -dlopen FILE      \`-dlpreopen' FILE if it cannot be dlopened at runtime
-  -dlpreopen FILE   link in FILE and add its symbols to dld_preloaded_symbols
-  -export-dynamic   allow symbols from OUTPUT-FILE to be resolved with dlsym(3)
-  -LLIBDIR          search LIBDIR for required installed libraries
-  -lNAME            OUTPUT-FILE requires the installed library libNAME
-  -no-undefined     declare that a library does not refer to external symbols
-  -o OUTPUT-FILE    create OUTPUT-FILE from the specified objects
-  -release RELEASE  specify package release information
-  -rpath LIBDIR     the created library will eventually be installed in LIBDIR
-  -static           do not do any dynamic linking of libtool libraries
-  -version-info CURRENT[:REVISION[:AGE]]
-                    specify library version info [each variable defaults to 0]
-
-All other options (arguments beginning with \`-') are ignored.
-
-Every other argument is treated as a filename.  Files ending in \`.la' are
-treated as uninstalled libtool libraries, other files are standard or library
-object files.
-
-If the OUTPUT-FILE ends in \`.la', then a libtool library is created, only
-library objects (\`.lo' files) may be specified, and \`-rpath' is required.
-
-If OUTPUT-FILE ends in \`.a', then a standard library is created using \`ar'
-and \`ranlib'.
-
-If OUTPUT-FILE ends in \`.lo' or \`.o', then a reloadable object file is
-created, otherwise an executable program is created."
-  ;;
-
-uninstall)
-  $echo
-"Usage: $modename [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE...
-
-Remove libraries from an installation directory.
-
-RM is the name of the program to use to delete files associated with each FILE
-(typically \`/bin/rm').  RM-OPTIONS are options (such as \`-f') to be passed
-to RM.
-
-If FILE is a libtool library, all the files associated with it are deleted.
-Otherwise, only FILE itself is deleted using RM."
-  ;;
-
-*)
-  $echo "$modename: invalid operation mode \`$mode'" 1>&2
-  $echo "$help" 1>&2
-  exit 1
-  ;;
-esac
-
-echo
-$echo "Try \`$modename --help' for more information about other modes."
-
-exit 0
-
-# Local Variables:
-# mode:shell-script
-# sh-indentation:2
-# End:
diff --git a/makcjpeg.st b/makcjpeg.st
deleted file mode 100644
index fc72c89..0000000
--- a/makcjpeg.st
+++ /dev/null
@@ -1,38 +0,0 @@
-; Project file for Independent JPEG Group's software
-;
-; This project file is for Atari ST/STE/TT systems using Pure C or Turbo C.
-; Thanks to Frank Moehle (Frank.Moehle@arbi.informatik.uni-oldenburg.de),
-; Dr. B. Setzepfandt (bernd@gina.uni-muenster.de),
-; and Guido Vollbeding (guivol@esc.de).
-;
-; To use this file, rename it to cjpeg.prj.
-; If you are using Turbo C, change filenames beginning with "pc..." to "tc..."
-; Read installation instructions before trying to make the program!
-;
-;
-;      * * * Output file * * *
-cjpeg.ttp
-;
-; * * * COMPILER OPTIONS * * *  
-.C[-P]        ; absolute calls
-.C[-M]        ; and no string merging, folks
-.C[-w-cln]    ; no "constant is long" warnings
-.C[-w-par]    ; no "parameter xxxx unused"
-.C[-w-rch]    ; no "unreachable code"
-.C[-wsig]     ; warn if significant digits may be lost
-=
-; * * * * List of modules * * * * 
-pcstart.o
-cjpeg.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h,jversion.h)
-cdjpeg.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-rdswitch.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-rdppm.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-rdgif.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-rdtarga.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-rdbmp.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-rdrle.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-libjpeg.lib        ; built by libjpeg.prj
-pcfltlib.lib       ; floating point library
-; the float library can be omitted if you've turned off DCT_FLOAT_SUPPORTED
-pcstdlib.lib       ; standard library
-pcextlib.lib       ; extended library
diff --git a/makdjpeg.st b/makdjpeg.st
deleted file mode 100644
index 3226726..0000000
--- a/makdjpeg.st
+++ /dev/null
@@ -1,38 +0,0 @@
-; Project file for Independent JPEG Group's software
-;
-; This project file is for Atari ST/STE/TT systems using Pure C or Turbo C.
-; Thanks to Frank Moehle (Frank.Moehle@arbi.informatik.uni-oldenburg.de),
-; Dr. B. Setzepfandt (bernd@gina.uni-muenster.de),
-; and Guido Vollbeding (guivol@esc.de).
-;
-; To use this file, rename it to djpeg.prj.
-; If you are using Turbo C, change filenames beginning with "pc..." to "tc..."
-; Read installation instructions before trying to make the program!
-;
-;
-;      * * * Output file * * *
-djpeg.ttp
-;
-; * * * COMPILER OPTIONS * * *  
-.C[-P]        ; absolute calls
-.C[-M]        ; and no string merging, folks
-.C[-w-cln]    ; no "constant is long" warnings
-.C[-w-par]    ; no "parameter xxxx unused"
-.C[-w-rch]    ; no "unreachable code"
-.C[-wsig]     ; warn if significant digits may be lost
-=
-; * * * * List of modules * * * * 
-pcstart.o
-djpeg.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h,jversion.h)
-cdjpeg.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-rdcolmap.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-wrppm.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-wrgif.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-wrtarga.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-wrbmp.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-wrrle.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-libjpeg.lib        ; built by libjpeg.prj
-pcfltlib.lib       ; floating point library
-; the float library can be omitted if you've turned off DCT_FLOAT_SUPPORTED
-pcstdlib.lib       ; standard library
-pcextlib.lib       ; extended library
diff --git a/makeapps.ds b/makeapps.ds
deleted file mode 100644
index bedd038..0000000
--- a/makeapps.ds
+++ /dev/null
@@ -1,828 +0,0 @@
-# Microsoft Developer Studio Generated NMAKE File, Format Version 4.20
-# ** DO NOT EDIT **
-
-# TARGTYPE "Win32 (x86) Console Application" 0x0103
-
-!IF "$(CFG)" == ""
-CFG=cjpeg - Win32
-!MESSAGE No configuration specified.  Defaulting to cjpeg - Win32.
-!ENDIF 
-
-!IF "$(CFG)" != "cjpeg - Win32" && "$(CFG)" != "djpeg - Win32" &&\
- "$(CFG)" != "jpegtran - Win32" && "$(CFG)" != "rdjpgcom - Win32" &&\
- "$(CFG)" != "wrjpgcom - Win32"
-!MESSAGE Invalid configuration "$(CFG)" specified.
-!MESSAGE You can specify a configuration when running NMAKE on this makefile
-!MESSAGE by defining the macro CFG on the command line.  For example:
-!MESSAGE 
-!MESSAGE NMAKE /f "apps.mak" CFG="cjpeg - Win32"
-!MESSAGE 
-!MESSAGE Possible choices for configuration are:
-!MESSAGE 
-!MESSAGE "cjpeg - Win32" (based on "Win32 (x86) Console Application")
-!MESSAGE "djpeg - Win32" (based on "Win32 (x86) Console Application")
-!MESSAGE "jpegtran - Win32" (based on "Win32 (x86) Console Application")
-!MESSAGE "rdjpgcom - Win32" (based on "Win32 (x86) Console Application")
-!MESSAGE "wrjpgcom - Win32" (based on "Win32 (x86) Console Application")
-!MESSAGE 
-!ERROR An invalid configuration is specified.
-!ENDIF 
-
-!IF "$(OS)" == "Windows_NT"
-NULL=
-!ELSE 
-NULL=nul
-!ENDIF 
-################################################################################
-# Begin Project
-# PROP Target_Last_Scanned "cjpeg - Win32"
-CPP=cl.exe
-RSC=rc.exe
-
-!IF  "$(CFG)" == "cjpeg - Win32"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 0
-# PROP BASE Output_Dir "cjpeg\Release"
-# PROP BASE Intermediate_Dir "cjpeg\Release"
-# PROP BASE Target_Dir "cjpeg"
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 0
-# PROP Output_Dir "cjpeg\Release"
-# PROP Intermediate_Dir "cjpeg\Release"
-# PROP Target_Dir "cjpeg"
-OUTDIR=.\cjpeg\Release
-INTDIR=.\cjpeg\Release
-
-ALL : "$(OUTDIR)\cjpeg.exe"
-
-CLEAN : 
-	-@erase "$(INTDIR)\cjpeg.obj"
-	-@erase "$(INTDIR)\rdppm.obj"
-	-@erase "$(INTDIR)\rdgif.obj"
-	-@erase "$(INTDIR)\rdtarga.obj"
-	-@erase "$(INTDIR)\rdrle.obj"
-	-@erase "$(INTDIR)\rdbmp.obj"
-	-@erase "$(INTDIR)\rdswitch.obj"
-	-@erase "$(INTDIR)\cdjpeg.obj"
-	-@erase "$(OUTDIR)\cjpeg.exe"
-
-"$(OUTDIR)" :
-    if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)"
-
-# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-CPP_PROJ=/nologo /ML /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE"\
- /Fp"$(INTDIR)/cjpeg.pch" /YX /Fo"$(INTDIR)/" /c 
-CPP_OBJS=.\cjpeg\Release/
-CPP_SBRS=.\.
-# ADD BASE RSC /l 0x409 /d "NDEBUG"
-# ADD RSC /l 0x409 /d "NDEBUG"
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-BSC32_FLAGS=/nologo /o"$(OUTDIR)/cjpeg.bsc" 
-BSC32_SBRS= \
-	
-LINK32=link.exe
-# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-# ADD LINK32 Release\jpeg.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-LINK32_FLAGS=Release\jpeg.lib kernel32.lib user32.lib gdi32.lib winspool.lib\
- comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib\
- odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:no\
- /pdb:"$(OUTDIR)/cjpeg.pdb" /machine:I386 /out:"$(OUTDIR)/cjpeg.exe" 
-LINK32_OBJS= \
-	"$(INTDIR)\cjpeg.obj" \
-	"$(INTDIR)\rdppm.obj" \
-	"$(INTDIR)\rdgif.obj" \
-	"$(INTDIR)\rdtarga.obj" \
-	"$(INTDIR)\rdrle.obj" \
-	"$(INTDIR)\rdbmp.obj" \
-	"$(INTDIR)\rdswitch.obj" \
-	"$(INTDIR)\cdjpeg.obj" \
-
-
-"$(OUTDIR)\cjpeg.exe" : "$(OUTDIR)" $(DEF_FILE) $(LINK32_OBJS)
-    $(LINK32) @<<
-  $(LINK32_FLAGS) $(LINK32_OBJS)
-<<
-
-!ELSEIF  "$(CFG)" == "djpeg - Win32"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 0
-# PROP BASE Output_Dir "djpeg\Release"
-# PROP BASE Intermediate_Dir "djpeg\Release"
-# PROP BASE Target_Dir "djpeg"
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 0
-# PROP Output_Dir "djpeg\Release"
-# PROP Intermediate_Dir "djpeg\Release"
-# PROP Target_Dir "djpeg"
-OUTDIR=.\djpeg\Release
-INTDIR=.\djpeg\Release
-
-ALL : "$(OUTDIR)\djpeg.exe"
-
-CLEAN : 
-	-@erase "$(INTDIR)\djpeg.obj"
-	-@erase "$(INTDIR)\wrppm.obj"
-	-@erase "$(INTDIR)\wrgif.obj"
-	-@erase "$(INTDIR)\wrtarga.obj"
-	-@erase "$(INTDIR)\wrrle.obj"
-	-@erase "$(INTDIR)\wrbmp.obj"
-	-@erase "$(INTDIR)\rdcolmap.obj"
-	-@erase "$(INTDIR)\cdjpeg.obj"
-	-@erase "$(OUTDIR)\djpeg.exe"
-
-"$(OUTDIR)" :
-    if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)"
-
-# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-CPP_PROJ=/nologo /ML /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE"\
- /Fp"$(INTDIR)/djpeg.pch" /YX /Fo"$(INTDIR)/" /c 
-CPP_OBJS=.\djpeg\Release/
-CPP_SBRS=.\.
-# ADD BASE RSC /l 0x409 /d "NDEBUG"
-# ADD RSC /l 0x409 /d "NDEBUG"
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-BSC32_FLAGS=/nologo /o"$(OUTDIR)/djpeg.bsc" 
-BSC32_SBRS= \
-	
-LINK32=link.exe
-# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-# ADD LINK32 Release\jpeg.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-LINK32_FLAGS=Release\jpeg.lib kernel32.lib user32.lib gdi32.lib winspool.lib\
- comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib\
- odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:no\
- /pdb:"$(OUTDIR)/djpeg.pdb" /machine:I386 /out:"$(OUTDIR)/djpeg.exe" 
-LINK32_OBJS= \
-	"$(INTDIR)\djpeg.obj" \
-	"$(INTDIR)\wrppm.obj" \
-	"$(INTDIR)\wrgif.obj" \
-	"$(INTDIR)\wrtarga.obj" \
-	"$(INTDIR)\wrrle.obj" \
-	"$(INTDIR)\wrbmp.obj" \
-	"$(INTDIR)\rdcolmap.obj" \
-	"$(INTDIR)\cdjpeg.obj" \
-
-
-"$(OUTDIR)\djpeg.exe" : "$(OUTDIR)" $(DEF_FILE) $(LINK32_OBJS)
-    $(LINK32) @<<
-  $(LINK32_FLAGS) $(LINK32_OBJS)
-<<
-
-!ELSEIF  "$(CFG)" == "jpegtran - Win32"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 0
-# PROP BASE Output_Dir "jpegtran\Release"
-# PROP BASE Intermediate_Dir "jpegtran\Release"
-# PROP BASE Target_Dir "jpegtran"
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 0
-# PROP Output_Dir "jpegtran\Release"
-# PROP Intermediate_Dir "jpegtran\Release"
-# PROP Target_Dir "jpegtran"
-OUTDIR=.\jpegtran\Release
-INTDIR=.\jpegtran\Release
-
-ALL : "$(OUTDIR)\jpegtran.exe"
-
-CLEAN : 
-	-@erase "$(INTDIR)\jpegtran.obj"
-	-@erase "$(INTDIR)\rdswitch.obj"
-	-@erase "$(INTDIR)\cdjpeg.obj"
-	-@erase "$(INTDIR)\transupp.obj"
-	-@erase "$(OUTDIR)\jpegtran.exe"
-
-"$(OUTDIR)" :
-    if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)"
-
-# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-CPP_PROJ=/nologo /ML /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE"\
- /Fp"$(INTDIR)/jpegtran.pch" /YX /Fo"$(INTDIR)/" /c 
-CPP_OBJS=.\jpegtran\Release/
-CPP_SBRS=.\.
-# ADD BASE RSC /l 0x409 /d "NDEBUG"
-# ADD RSC /l 0x409 /d "NDEBUG"
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-BSC32_FLAGS=/nologo /o"$(OUTDIR)/jpegtran.bsc" 
-BSC32_SBRS= \
-	
-LINK32=link.exe
-# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-# ADD LINK32 Release\jpeg.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-LINK32_FLAGS=Release\jpeg.lib kernel32.lib user32.lib gdi32.lib winspool.lib\
- comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib\
- odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:no\
- /pdb:"$(OUTDIR)/jpegtran.pdb" /machine:I386 /out:"$(OUTDIR)/jpegtran.exe" 
-LINK32_OBJS= \
-	"$(INTDIR)\jpegtran.obj" \
-	"$(INTDIR)\rdswitch.obj" \
-	"$(INTDIR)\cdjpeg.obj" \
-	"$(INTDIR)\transupp.obj" \
-
-
-"$(OUTDIR)\jpegtran.exe" : "$(OUTDIR)" $(DEF_FILE) $(LINK32_OBJS)
-    $(LINK32) @<<
-  $(LINK32_FLAGS) $(LINK32_OBJS)
-<<
-
-!ELSEIF  "$(CFG)" == "rdjpgcom - Win32"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 0
-# PROP BASE Output_Dir "rdjpgcom\Release"
-# PROP BASE Intermediate_Dir "rdjpgcom\Release"
-# PROP BASE Target_Dir "rdjpgcom"
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 0
-# PROP Output_Dir "rdjpgcom\Release"
-# PROP Intermediate_Dir "rdjpgcom\Release"
-# PROP Target_Dir "rdjpgcom"
-OUTDIR=.\rdjpgcom\Release
-INTDIR=.\rdjpgcom\Release
-
-ALL : "$(OUTDIR)\rdjpgcom.exe"
-
-CLEAN : 
-	-@erase "$(INTDIR)\rdjpgcom.obj"
-	-@erase "$(OUTDIR)\rdjpgcom.exe"
-
-"$(OUTDIR)" :
-    if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)"
-
-# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-CPP_PROJ=/nologo /ML /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE"\
- /Fp"$(INTDIR)/rdjpgcom.pch" /YX /Fo"$(INTDIR)/" /c 
-CPP_OBJS=.\rdjpgcom\Release/
-CPP_SBRS=.\.
-# ADD BASE RSC /l 0x409 /d "NDEBUG"
-# ADD RSC /l 0x409 /d "NDEBUG"
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-BSC32_FLAGS=/nologo /o"$(OUTDIR)/rdjpgcom.bsc" 
-BSC32_SBRS= \
-	
-LINK32=link.exe
-# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-# ADD LINK32 Release\jpeg.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-LINK32_FLAGS=Release\jpeg.lib kernel32.lib user32.lib gdi32.lib winspool.lib\
- comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib\
- odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:no\
- /pdb:"$(OUTDIR)/rdjpgcom.pdb" /machine:I386 /out:"$(OUTDIR)/rdjpgcom.exe" 
-LINK32_OBJS= \
-	"$(INTDIR)\rdjpgcom.obj"
-
-"$(OUTDIR)\rdjpgcom.exe" : "$(OUTDIR)" $(DEF_FILE) $(LINK32_OBJS)
-    $(LINK32) @<<
-  $(LINK32_FLAGS) $(LINK32_OBJS)
-<<
-
-!ELSEIF  "$(CFG)" == "wrjpgcom - Win32"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 0
-# PROP BASE Output_Dir "wrjpgcom\Release"
-# PROP BASE Intermediate_Dir "wrjpgcom\Release"
-# PROP BASE Target_Dir "wrjpgcom"
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 0
-# PROP Output_Dir "wrjpgcom\Release"
-# PROP Intermediate_Dir "wrjpgcom\Release"
-# PROP Target_Dir "wrjpgcom"
-OUTDIR=.\wrjpgcom\Release
-INTDIR=.\wrjpgcom\Release
-
-ALL : "$(OUTDIR)\wrjpgcom.exe"
-
-CLEAN : 
-	-@erase "$(INTDIR)\wrjpgcom.obj"
-	-@erase "$(OUTDIR)\wrjpgcom.exe"
-
-"$(OUTDIR)" :
-    if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)"
-
-# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-CPP_PROJ=/nologo /ML /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE"\
- /Fp"$(INTDIR)/wrjpgcom.pch" /YX /Fo"$(INTDIR)/" /c 
-CPP_OBJS=.\wrjpgcom\Release/
-CPP_SBRS=.\.
-# ADD BASE RSC /l 0x409 /d "NDEBUG"
-# ADD RSC /l 0x409 /d "NDEBUG"
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-BSC32_FLAGS=/nologo /o"$(OUTDIR)/wrjpgcom.bsc" 
-BSC32_SBRS= \
-	
-LINK32=link.exe
-# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-# ADD LINK32 Release\jpeg.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
-LINK32_FLAGS=Release\jpeg.lib kernel32.lib user32.lib gdi32.lib winspool.lib\
- comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib\
- odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:no\
- /pdb:"$(OUTDIR)/wrjpgcom.pdb" /machine:I386 /out:"$(OUTDIR)/wrjpgcom.exe" 
-LINK32_OBJS= \
-	"$(INTDIR)\wrjpgcom.obj"
-
-"$(OUTDIR)\wrjpgcom.exe" : "$(OUTDIR)" $(DEF_FILE) $(LINK32_OBJS)
-    $(LINK32) @<<
-  $(LINK32_FLAGS) $(LINK32_OBJS)
-<<
-
-!ENDIF 
-
-.c{$(CPP_OBJS)}.obj:
-   $(CPP) $(CPP_PROJ) $<  
-
-.cpp{$(CPP_OBJS)}.obj:
-   $(CPP) $(CPP_PROJ) $<  
-
-.cxx{$(CPP_OBJS)}.obj:
-   $(CPP) $(CPP_PROJ) $<  
-
-.c{$(CPP_SBRS)}.sbr:
-   $(CPP) $(CPP_PROJ) $<  
-
-.cpp{$(CPP_SBRS)}.sbr:
-   $(CPP) $(CPP_PROJ) $<  
-
-.cxx{$(CPP_SBRS)}.sbr:
-   $(CPP) $(CPP_PROJ) $<  
-
-################################################################################
-# Begin Target
-
-# Name "cjpeg - Win32"
-
-!IF  "$(CFG)" == "cjpeg - Win32"
-
-!ENDIF 
-
-################################################################################
-# Begin Source File
-
-SOURCE="cjpeg.c"
-DEP_CPP_CJPEG=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	"jversion.h"\
-	
-
-"$(INTDIR)\cjpeg.obj" : $(SOURCE) $(DEP_CPP_CJPEG) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="cdjpeg.c"
-DEP_CPP_CDJPE=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\cdjpeg.obj" : $(SOURCE) $(DEP_CPP_CDJPE) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="rdswitch.c"
-DEP_CPP_RDSWI=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\rdswitch.obj" : $(SOURCE) $(DEP_CPP_RDSWI) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="rdppm.c"
-DEP_CPP_RDPPM=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\rdppm.obj" : $(SOURCE) $(DEP_CPP_RDPPM) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="rdgif.c"
-DEP_CPP_RDGIF=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\rdgif.obj" : $(SOURCE) $(DEP_CPP_RDGIF) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="rdtarga.c"
-DEP_CPP_RDTAR=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\rdtarga.obj" : $(SOURCE) $(DEP_CPP_RDTAR) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="rdbmp.c"
-DEP_CPP_RDBMP=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\rdbmp.obj" : $(SOURCE) $(DEP_CPP_RDBMP) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="rdrle.c"
-DEP_CPP_RDRLE=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\rdrle.obj" : $(SOURCE) $(DEP_CPP_RDRLE) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-# End Target
-################################################################################
-# Begin Target
-
-# Name "djpeg - Win32"
-
-!IF  "$(CFG)" == "djpeg - Win32"
-
-!ENDIF 
-
-################################################################################
-# Begin Source File
-
-SOURCE="djpeg.c"
-DEP_CPP_DJPEG=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	"jversion.h"\
-	
-
-"$(INTDIR)\djpeg.obj" : $(SOURCE) $(DEP_CPP_DJPEG) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="cdjpeg.c"
-DEP_CPP_CDJPE=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\cdjpeg.obj" : $(SOURCE) $(DEP_CPP_CDJPE) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="rdcolmap.c"
-DEP_CPP_RDCOL=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\rdcolmap.obj" : $(SOURCE) $(DEP_CPP_RDCOL) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="wrppm.c"
-DEP_CPP_WRPPM=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\wrppm.obj" : $(SOURCE) $(DEP_CPP_WRPPM) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="wrgif.c"
-DEP_CPP_WRGIF=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\wrgif.obj" : $(SOURCE) $(DEP_CPP_WRGIF) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="wrtarga.c"
-DEP_CPP_WRTAR=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\wrtarga.obj" : $(SOURCE) $(DEP_CPP_WRTAR) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="wrbmp.c"
-DEP_CPP_WRBMP=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\wrbmp.obj" : $(SOURCE) $(DEP_CPP_WRBMP) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="wrrle.c"
-DEP_CPP_WRRLE=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\wrrle.obj" : $(SOURCE) $(DEP_CPP_WRRLE) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-# End Target
-################################################################################
-# Begin Target
-
-# Name "jpegtran - Win32"
-
-!IF  "$(CFG)" == "jpegtran - Win32"
-
-!ENDIF 
-
-################################################################################
-# Begin Source File
-
-SOURCE="jpegtran.c"
-DEP_CPP_JPEGT=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	"transupp.h"\
-	"jversion.h"\
-	
-
-"$(INTDIR)\jpegtran.obj" : $(SOURCE) $(DEP_CPP_JPEGT) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="cdjpeg.c"
-DEP_CPP_CDJPE=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\cdjpeg.obj" : $(SOURCE) $(DEP_CPP_CDJPE) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="rdswitch.c"
-DEP_CPP_RDSWI=\
-	"cdjpeg.h"\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	"cderror.h"\
-	
-
-"$(INTDIR)\rdswitch.obj" : $(SOURCE) $(DEP_CPP_RDSWI) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="transupp.c"
-DEP_CPP_TRANS=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"transupp.h"\
-	
-
-"$(INTDIR)\transupp.obj" : $(SOURCE) $(DEP_CPP_TRANS) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-# End Target
-################################################################################
-# Begin Target
-
-# Name "rdjpgcom - Win32"
-
-!IF  "$(CFG)" == "rdjpgcom - Win32"
-
-!ENDIF 
-
-################################################################################
-# Begin Source File
-
-SOURCE="rdjpgcom.c"
-DEP_CPP_RDJPG=\
-	"jinclude.h"\
-	"jconfig.h"\
-	
-
-"$(INTDIR)\rdjpgcom.obj" : $(SOURCE) $(DEP_CPP_RDJPG) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-# End Target
-################################################################################
-# Begin Target
-
-# Name "wrjpgcom - Win32"
-
-!IF  "$(CFG)" == "wrjpgcom - Win32"
-
-!ENDIF 
-
-################################################################################
-# Begin Source File
-
-SOURCE="wrjpgcom.c"
-DEP_CPP_WRJPG=\
-	"jinclude.h"\
-	"jconfig.h"\
-	
-
-"$(INTDIR)\wrjpgcom.obj" : $(SOURCE) $(DEP_CPP_WRJPG) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-# End Target
-# End Project
-################################################################################
-
diff --git a/makefile.ansi b/makefile.ansi
deleted file mode 100644
index 8291913..0000000
--- a/makefile.ansi
+++ /dev/null
@@ -1,214 +0,0 @@
-# Makefile for Independent JPEG Group's software
-
-# This makefile is suitable for Unix-like systems with ANSI-capable compilers.
-# If you have a non-ANSI compiler, makefile.unix is a better starting point.
-
-# Read installation instructions before saying "make" !!
-
-# The name of your C compiler:
-CC= cc
-
-# You may need to adjust these cc options:
-CFLAGS= -O
-# Generally, we recommend defining any configuration symbols in jconfig.h,
-# NOT via -D switches here.
-
-# Link-time cc options:
-LDFLAGS= 
-
-# To link any special libraries, add the necessary -l commands here.
-LDLIBS= 
-
-# Put here the object file name for the correct system-dependent memory
-# manager file.  For Unix this is usually jmemnobs.o, but you may want
-# to use jmemansi.o or jmemname.o if you have limited swap space.
-SYSDEPMEM= jmemnobs.o
-
-# miscellaneous OS-dependent stuff
-# linker
-LN= $(CC)
-# file deletion command
-RM= rm -f
-# library (.a) file creation command
-AR= ar rc
-# second step in .a creation (use "touch" if not needed)
-AR2= ranlib
-
-# End of configurable options.
-
-
-# source files: JPEG library proper
-LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
-        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
-        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
-        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
-        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
-        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
-        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
-        jquant2.c jutils.c jmemmgr.c
-# memmgr back ends: compile only one of these into a working library
-SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
-# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
-APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
-        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
-        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
-SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
-# files included by source files
-INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
-        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
-# documentation, test, and support files
-DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
-        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
-        coderules.doc filelist.doc change.log
-MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
-        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
-        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
-        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
-        makvms.opt
-CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
-        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
-        jconfig.vms
-CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
-OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
-TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
-        testimgp.jpg
-DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
-        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
-# library object files common to compression and decompression
-COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM)
-# compression library object files
-CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \
-        jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \
-        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jfdctfst.o jfdctflt.o \
-        jfdctint.o
-# decompression library object files
-DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \
-        jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \
-        jdpostct.o jddctmgr.o jidctfst.o jidctflt.o jidctint.o jidctred.o \
-        jdsample.o jdcolor.o jquant1.o jquant2.o jdmerge.o
-# These objectfiles are included in libjpeg.a
-LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
-# object files for sample applications (excluding library files)
-COBJECTS= cjpeg.o rdppm.o rdgif.o rdtarga.o rdrle.o rdbmp.o rdswitch.o \
-        cdjpeg.o
-DOBJECTS= djpeg.o wrppm.o wrgif.o wrtarga.o wrrle.o wrbmp.o rdcolmap.o \
-        cdjpeg.o
-TROBJECTS= jpegtran.o rdswitch.o cdjpeg.o transupp.o
-
-
-all: libjpeg.a cjpeg djpeg jpegtran rdjpgcom wrjpgcom
-
-libjpeg.a: $(LIBOBJECTS)
-	$(RM) libjpeg.a
-	$(AR) libjpeg.a  $(LIBOBJECTS)
-	$(AR2) libjpeg.a
-
-cjpeg: $(COBJECTS) libjpeg.a
-	$(LN) $(LDFLAGS) -o cjpeg $(COBJECTS) libjpeg.a $(LDLIBS)
-
-djpeg: $(DOBJECTS) libjpeg.a
-	$(LN) $(LDFLAGS) -o djpeg $(DOBJECTS) libjpeg.a $(LDLIBS)
-
-jpegtran: $(TROBJECTS) libjpeg.a
-	$(LN) $(LDFLAGS) -o jpegtran $(TROBJECTS) libjpeg.a $(LDLIBS)
-
-rdjpgcom: rdjpgcom.o
-	$(LN) $(LDFLAGS) -o rdjpgcom rdjpgcom.o $(LDLIBS)
-
-wrjpgcom: wrjpgcom.o
-	$(LN) $(LDFLAGS) -o wrjpgcom wrjpgcom.o $(LDLIBS)
-
-jconfig.h: jconfig.doc
-	echo You must prepare a system-dependent jconfig.h file.
-	echo Please read the installation directions in install.doc.
-	exit 1
-
-clean:
-	$(RM) *.o cjpeg djpeg jpegtran libjpeg.a rdjpgcom wrjpgcom
-	$(RM) core testout*
-
-test: cjpeg djpeg jpegtran
-	$(RM) testout*
-	./djpeg -dct int -ppm -outfile testout.ppm  testorig.jpg
-	./djpeg -dct int -bmp -colors 256 -outfile testout.bmp  testorig.jpg
-	./cjpeg -dct int -outfile testout.jpg  testimg.ppm
-	./djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
-	./cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
-	./jpegtran -outfile testoutt.jpg testprog.jpg
-	cmp testimg.ppm testout.ppm
-	cmp testimg.bmp testout.bmp
-	cmp testimg.jpg testout.jpg
-	cmp testimg.ppm testoutp.ppm
-	cmp testimgp.jpg testoutp.jpg
-	cmp testorig.jpg testoutt.jpg
-
-
-jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmainct.o: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmarker.o: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmaster.o: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jmemmgr.o: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemansi.o: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemname.o: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemnobs.o: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemdos.o: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemmac.o: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-cjpeg.o: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-djpeg.o: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-jpegtran.o: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
-rdjpgcom.o: rdjpgcom.c jinclude.h jconfig.h
-wrjpgcom.o: wrjpgcom.c jinclude.h jconfig.h
-cdjpeg.o: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdcolmap.o: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdswitch.o: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-transupp.o: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
-rdppm.o: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrppm.o: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdgif.o: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrgif.o: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdtarga.o: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrtarga.o: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdbmp.o: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrbmp.o: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdrle.o: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrrle.o: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
diff --git a/makefile.bcc b/makefile.bcc
deleted file mode 100644
index a1cfcde..0000000
--- a/makefile.bcc
+++ /dev/null
@@ -1,285 +0,0 @@
-# Makefile for Independent JPEG Group's software
-
-# This makefile is suitable for Borland C on MS-DOS or OS/2.
-# It works with Borland C++ for DOS, revision 3.0 or later,
-# and has been tested with Borland C++ for OS/2.
-# Watch out for optimization bugs in the OS/2 compilers --- see notes below!
-# Thanks to Tom Wright and Ge' Weijers (original DOS) and
-# Ken Porter (OS/2) for this file.
-
-# Read installation instructions before saying "make" !!
-
-# Are we under DOS or OS/2?
-!if !$d(DOS) && !$d(OS2)
-!if $d(__OS2__)
-OS2=1
-!else
-DOS=1
-!endif
-!endif
-
-# The name of your C compiler:
-CC= bcc
-
-# You may need to adjust these cc options:
-!if $d(DOS)
-CFLAGS= -O2 -mm -w-par -w-stu -w-ccc -w-rch
-!else
-CFLAGS= -O1 -w-par -w-stu -w-ccc -w-rch
-!endif
-# -O2 enables full code optimization (for pre-3.0 Borland C++, use -O -G -Z).
-# -O2 is buggy in Borland OS/2 C++ revision 2.0, so use -O1 there for now.
-# If you have Borland OS/2 C++ revision 1.0, use -O or no optimization at all.
-# -mm selects medium memory model (near data, far code pointers; DOS only!)
-# -w-par suppresses warnings about unused function parameters
-# -w-stu suppresses warnings about incomplete structures
-# -w-ccc suppresses warnings about compile-time-constant conditions
-# -w-rch suppresses warnings about unreachable code
-# Generally, we recommend defining any configuration symbols in jconfig.h,
-# NOT via -D switches here.
-
-# Link-time cc options:
-!if $d(DOS)
-LDFLAGS= -mm
-# memory model option here must match CFLAGS!
-!else
-LDFLAGS=
-# -lai full-screen app
-# -lc case-significant link
-!endif
-
-# Put here the object file name for the correct system-dependent memory
-# manager file.
-# For DOS, we recommend jmemdos.c and jmemdosa.asm.
-# For OS/2, we recommend jmemnobs.c (flat memory!)
-# SYSDEPMEMLIB must list the same files with "+" signs for the librarian.
-!if $d(DOS)
-SYSDEPMEM= jmemdos.obj jmemdosa.obj
-SYSDEPMEMLIB= +jmemdos.obj +jmemdosa.obj
-!else
-SYSDEPMEM= jmemnobs.obj
-SYSDEPMEMLIB= +jmemnobs.obj
-!endif
-
-# End of configurable options.
-
-
-# source files: JPEG library proper
-LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
-        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
-        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
-        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
-        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
-        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
-        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
-        jquant2.c jutils.c jmemmgr.c
-# memmgr back ends: compile only one of these into a working library
-SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
-# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
-APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
-        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
-        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
-SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
-# files included by source files
-INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
-        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
-# documentation, test, and support files
-DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
-        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
-        coderules.doc filelist.doc change.log
-MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
-        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
-        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
-        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
-        makvms.opt
-CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
-        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
-        jconfig.vms
-CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
-OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
-TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
-        testimgp.jpg
-DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
-        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
-# library object files common to compression and decompression
-COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM)
-# compression library object files
-CLIBOBJECTS= jcapimin.obj jcapistd.obj jctrans.obj jcparam.obj jdatadst.obj \
-        jcinit.obj jcmaster.obj jcmarker.obj jcmainct.obj jcprepct.obj \
-        jccoefct.obj jccolor.obj jcsample.obj jchuff.obj jcphuff.obj \
-        jcdctmgr.obj jfdctfst.obj jfdctflt.obj jfdctint.obj
-# decompression library object files
-DLIBOBJECTS= jdapimin.obj jdapistd.obj jdtrans.obj jdatasrc.obj \
-        jdmaster.obj jdinput.obj jdmarker.obj jdhuff.obj jdphuff.obj \
-        jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jidctfst.obj \
-        jidctflt.obj jidctint.obj jidctred.obj jdsample.obj jdcolor.obj \
-        jquant1.obj jquant2.obj jdmerge.obj
-# These objectfiles are included in libjpeg.lib
-LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
-# object files for sample applications (excluding library files)
-COBJECTS= cjpeg.obj rdppm.obj rdgif.obj rdtarga.obj rdrle.obj rdbmp.obj \
-        rdswitch.obj cdjpeg.obj
-DOBJECTS= djpeg.obj wrppm.obj wrgif.obj wrtarga.obj wrrle.obj wrbmp.obj \
-        rdcolmap.obj cdjpeg.obj
-TROBJECTS= jpegtran.obj rdswitch.obj cdjpeg.obj transupp.obj
-
-
-all: libjpeg.lib cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe
-
-libjpeg.lib: $(LIBOBJECTS)
-	- del libjpeg.lib
-	tlib libjpeg.lib /E /C @&&|
-+jcapimin.obj +jcapistd.obj +jctrans.obj +jcparam.obj +jdatadst.obj &
-+jcinit.obj +jcmaster.obj +jcmarker.obj +jcmainct.obj +jcprepct.obj &
-+jccoefct.obj +jccolor.obj +jcsample.obj +jchuff.obj +jcphuff.obj &
-+jcdctmgr.obj +jfdctfst.obj +jfdctflt.obj +jfdctint.obj +jdapimin.obj &
-+jdapistd.obj +jdtrans.obj +jdatasrc.obj +jdmaster.obj +jdinput.obj &
-+jdmarker.obj +jdhuff.obj +jdphuff.obj +jdmainct.obj +jdcoefct.obj &
-+jdpostct.obj +jddctmgr.obj +jidctfst.obj +jidctflt.obj +jidctint.obj &
-+jidctred.obj +jdsample.obj +jdcolor.obj +jquant1.obj +jquant2.obj &
-+jdmerge.obj +jcomapi.obj +jutils.obj +jerror.obj +jmemmgr.obj &
-$(SYSDEPMEMLIB)
-|
-
-cjpeg.exe: $(COBJECTS) libjpeg.lib
-	$(CC) $(LDFLAGS) -ecjpeg.exe $(COBJECTS) libjpeg.lib
-
-djpeg.exe: $(DOBJECTS) libjpeg.lib
-	$(CC) $(LDFLAGS) -edjpeg.exe $(DOBJECTS) libjpeg.lib
-
-jpegtran.exe: $(TROBJECTS) libjpeg.lib
-	$(CC) $(LDFLAGS) -ejpegtran.exe $(TROBJECTS) libjpeg.lib
-
-rdjpgcom.exe: rdjpgcom.c
-!if $d(DOS)
-	$(CC) -ms -O rdjpgcom.c
-!else
-	$(CC) $(CFLAGS) rdjpgcom.c
-!endif
-
-# On DOS, wrjpgcom needs large model so it can malloc a 64K chunk
-wrjpgcom.exe: wrjpgcom.c
-!if $d(DOS)
-	$(CC) -ml -O wrjpgcom.c
-!else
-	$(CC) $(CFLAGS) wrjpgcom.c
-!endif
-
-# This "{}" syntax allows Borland Make to "batch" source files.
-# In this way, each run of the compiler can build many modules.
-.c.obj:
-	$(CC) $(CFLAGS) -c{ $<}
-
-jconfig.h: jconfig.doc
-	echo You must prepare a system-dependent jconfig.h file.
-	echo Please read the installation directions in install.doc.
-	exit 1
-
-clean:
-	- del *.obj
-	- del libjpeg.lib
-	- del cjpeg.exe
-	- del djpeg.exe
-	- del jpegtran.exe
-	- del rdjpgcom.exe
-	- del wrjpgcom.exe
-	- del testout*.*
-
-test: cjpeg.exe djpeg.exe jpegtran.exe
-	- del testout*.*
-	djpeg -dct int -ppm -outfile testout.ppm  testorig.jpg
-	djpeg -dct int -bmp -colors 256 -outfile testout.bmp  testorig.jpg
-	cjpeg -dct int -outfile testout.jpg  testimg.ppm
-	djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
-	cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
-	jpegtran -outfile testoutt.jpg testprog.jpg
-!if $d(DOS)
-	fc /b testimg.ppm testout.ppm
-	fc /b testimg.bmp testout.bmp
-	fc /b testimg.jpg testout.jpg
-	fc /b testimg.ppm testoutp.ppm
-	fc /b testimgp.jpg testoutp.jpg
-	fc /b testorig.jpg testoutt.jpg
-!else
-	echo n > n.tmp
-	comp testimg.ppm testout.ppm < n.tmp
-	comp testimg.bmp testout.bmp < n.tmp
-	comp testimg.jpg testout.jpg < n.tmp
-	comp testimg.ppm testoutp.ppm < n.tmp
-	comp testimgp.jpg testoutp.jpg < n.tmp
-	comp testorig.jpg testoutt.jpg < n.tmp
-	del n.tmp
-!endif
-
-
-jcapimin.obj: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcapistd.obj: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccoefct.obj: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.obj: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcdctmgr.obj: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jchuff.obj: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcinit.obj: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmainct.obj: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmarker.obj: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmaster.obj: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcomapi.obj: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcparam.obj: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcphuff.obj: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcprepct.obj: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.obj: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jctrans.obj: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapimin.obj: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapistd.obj: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdatadst.obj: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdatasrc.obj: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdcoefct.obj: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.obj: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jddctmgr.obj: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jdhuff.obj: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdinput.obj: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmainct.obj: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmarker.obj: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmaster.obj: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.obj: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdphuff.obj: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdpostct.obj: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.obj: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdtrans.obj: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jerror.obj: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.obj: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.obj: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.obj: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.obj: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.obj: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.obj: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.obj: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jquant1.obj: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jquant2.obj: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jutils.obj: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jmemmgr.obj: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemansi.obj: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemname.obj: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemnobs.obj: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemdos.obj: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemmac.obj: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-cjpeg.obj: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-djpeg.obj: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-jpegtran.obj: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
-rdjpgcom.obj: rdjpgcom.c jinclude.h jconfig.h
-wrjpgcom.obj: wrjpgcom.c jinclude.h jconfig.h
-cdjpeg.obj: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdcolmap.obj: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdswitch.obj: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-transupp.obj: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
-rdppm.obj: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrppm.obj: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdgif.obj: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrgif.obj: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdtarga.obj: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrtarga.obj: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdbmp.obj: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrbmp.obj: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdrle.obj: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrrle.obj: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-jmemdosa.obj: jmemdosa.asm
-	tasm /mx jmemdosa.asm
diff --git a/makefile.cfg b/makefile.cfg
deleted file mode 100644
index f25e42e..0000000
--- a/makefile.cfg
+++ /dev/null
@@ -1,319 +0,0 @@
-# Makefile for Independent JPEG Group's software
-
-# makefile.cfg is edited by configure to produce a custom Makefile.
-
-# Read installation instructions before saying "make" !!
-
-# For compiling with source and object files in different directories.
-srcdir = @srcdir@
-VPATH = @srcdir@
-
-# Where to install the programs and man pages.
-prefix = @prefix@
-exec_prefix = @exec_prefix@
-bindir = $(exec_prefix)/bin
-libdir = $(exec_prefix)/lib
-includedir = $(prefix)/include
-binprefix =
-manprefix =
-manext = 1
-mandir = $(prefix)/man/man$(manext)
-
-# The name of your C compiler:
-CC= @CC@
-
-# You may need to adjust these cc options:
-CFLAGS= @CFLAGS@ @CPPFLAGS@ @INCLUDEFLAGS@
-# Generally, we recommend defining any configuration symbols in jconfig.h,
-# NOT via -D switches here.
-# However, any special defines for ansi2knr.c may be included here:
-ANSI2KNRFLAGS= @ANSI2KNRFLAGS@
-
-# Link-time cc options:
-LDFLAGS= @LDFLAGS@
-
-# To link any special libraries, add the necessary -l commands here.
-LDLIBS= @LIBS@
-
-# If using GNU libtool, LIBTOOL references it; if not, LIBTOOL is empty.
-LIBTOOL = @LIBTOOL@
-# $(O) expands to "lo" if using libtool, plain "o" if not.
-# Similarly, $(A) expands to "la" or "a".
-O = @O@
-A = @A@
-
-# Library version ID; libtool uses this for the shared library version number.
-# Note: we suggest this match the macro of the same name in jpeglib.h.
-JPEG_LIB_VERSION = @JPEG_LIB_VERSION@
-
-# Put here the object file name for the correct system-dependent memory
-# manager file.  For Unix this is usually jmemnobs.o, but you may want
-# to use jmemansi.o or jmemname.o if you have limited swap space.
-SYSDEPMEM= @MEMORYMGR@
-
-# miscellaneous OS-dependent stuff
-SHELL= /bin/sh
-# linker
-LN= @LN@
-# file deletion command
-RM= rm -f
-# directory creation command
-MKDIR= mkdir
-# library (.a) file creation command
-AR= ar rc
-# second step in .a creation (use "touch" if not needed)
-AR2= @RANLIB@
-# installation program
-INSTALL= @INSTALL@
-INSTALL_PROGRAM= @INSTALL_PROGRAM@
-INSTALL_LIB= @INSTALL_LIB@
-INSTALL_DATA= @INSTALL_DATA@
-
-# End of configurable options.
-
-
-# source files: JPEG library proper
-LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
-        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
-        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
-        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
-        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
-        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
-        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
-        jquant2.c jutils.c jmemmgr.c
-# memmgr back ends: compile only one of these into a working library
-SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
-# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
-APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
-        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
-        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
-SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
-# files included by source files
-INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
-        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
-# documentation, test, and support files
-DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
-        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
-        coderules.doc filelist.doc change.log
-MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
-        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
-        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
-        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
-        makvms.opt
-CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
-        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
-        jconfig.vms
-CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
-OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
-TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
-        testimgp.jpg
-DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
-        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
-# library object files common to compression and decompression
-COMOBJECTS= jcomapi.$(O) jutils.$(O) jerror.$(O) jmemmgr.$(O) $(SYSDEPMEM)
-# compression library object files
-CLIBOBJECTS= jcapimin.$(O) jcapistd.$(O) jctrans.$(O) jcparam.$(O) \
-        jdatadst.$(O) jcinit.$(O) jcmaster.$(O) jcmarker.$(O) jcmainct.$(O) \
-        jcprepct.$(O) jccoefct.$(O) jccolor.$(O) jcsample.$(O) jchuff.$(O) \
-        jcphuff.$(O) jcdctmgr.$(O) jfdctfst.$(O) jfdctflt.$(O) \
-        jfdctint.$(O)
-# decompression library object files
-DLIBOBJECTS= jdapimin.$(O) jdapistd.$(O) jdtrans.$(O) jdatasrc.$(O) \
-        jdmaster.$(O) jdinput.$(O) jdmarker.$(O) jdhuff.$(O) jdphuff.$(O) \
-        jdmainct.$(O) jdcoefct.$(O) jdpostct.$(O) jddctmgr.$(O) \
-        jidctfst.$(O) jidctflt.$(O) jidctint.$(O) jidctred.$(O) \
-        jdsample.$(O) jdcolor.$(O) jquant1.$(O) jquant2.$(O) jdmerge.$(O)
-# These objectfiles are included in libjpeg.a
-LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
-# object files for sample applications (excluding library files)
-COBJECTS= cjpeg.$(O) rdppm.$(O) rdgif.$(O) rdtarga.$(O) rdrle.$(O) \
-        rdbmp.$(O) rdswitch.$(O) cdjpeg.$(O)
-DOBJECTS= djpeg.$(O) wrppm.$(O) wrgif.$(O) wrtarga.$(O) wrrle.$(O) \
-        wrbmp.$(O) rdcolmap.$(O) cdjpeg.$(O)
-TROBJECTS= jpegtran.$(O) rdswitch.$(O) cdjpeg.$(O) transupp.$(O)
-
-
-all: @A2K_DEPS@ libjpeg.$(A) cjpeg djpeg jpegtran rdjpgcom wrjpgcom
-
-# Special compilation rules to support ansi2knr and libtool.
-.SUFFIXES: .lo .la
-
-# How to compile with libtool.
-@COM_LT@.c.lo:
-@COM_LT@	$(LIBTOOL) --mode=compile $(CC) $(CFLAGS) -c $(srcdir)/$*.c
-
-# How to use ansi2knr, when not using libtool.
-@COM_A2K@.c.o:
-@COM_A2K@	./ansi2knr $(srcdir)/$*.c knr/$*.c
-@COM_A2K@	$(CC) $(CFLAGS) -c knr/$*.c
-@COM_A2K@	$(RM) knr/$*.c
-
-# How to use ansi2knr AND libtool.
-@COM_A2K@.c.lo:
-@COM_A2K@	./ansi2knr $(srcdir)/$*.c knr/$*.c
-@COM_A2K@	$(LIBTOOL) --mode=compile $(CC) $(CFLAGS) -c knr/$*.c
-@COM_A2K@	$(RM) knr/$*.c
-
-ansi2knr: ansi2knr.c
-	$(CC) $(CFLAGS) $(ANSI2KNRFLAGS) -o ansi2knr $(srcdir)/ansi2knr.c
-	$(MKDIR) knr
-
-# the library:
-
-# without libtool:
-libjpeg.a: @A2K_DEPS@ $(LIBOBJECTS)
-	$(RM) libjpeg.a
-	$(AR) libjpeg.a  $(LIBOBJECTS)
-	$(AR2) libjpeg.a
-
-# with libtool:
-libjpeg.la: @A2K_DEPS@ $(LIBOBJECTS)
-	$(LIBTOOL) --mode=link $(CC) -o libjpeg.la $(LIBOBJECTS) \
-		-rpath $(libdir) -version-info $(JPEG_LIB_VERSION)
-
-# sample programs:
-
-cjpeg: $(COBJECTS) libjpeg.$(A)
-	$(LN) $(LDFLAGS) -o cjpeg $(COBJECTS) libjpeg.$(A) $(LDLIBS)
-
-djpeg: $(DOBJECTS) libjpeg.$(A)
-	$(LN) $(LDFLAGS) -o djpeg $(DOBJECTS) libjpeg.$(A) $(LDLIBS)
-
-jpegtran: $(TROBJECTS) libjpeg.$(A)
-	$(LN) $(LDFLAGS) -o jpegtran $(TROBJECTS) libjpeg.$(A) $(LDLIBS)
-
-rdjpgcom: rdjpgcom.$(O)
-	$(LN) $(LDFLAGS) -o rdjpgcom rdjpgcom.$(O) $(LDLIBS)
-
-wrjpgcom: wrjpgcom.$(O)
-	$(LN) $(LDFLAGS) -o wrjpgcom wrjpgcom.$(O) $(LDLIBS)
-
-# Installation rules:
-
-install: cjpeg djpeg jpegtran rdjpgcom wrjpgcom @FORCE_INSTALL_LIB@
-	$(INSTALL_PROGRAM) cjpeg $(bindir)/$(binprefix)cjpeg
-	$(INSTALL_PROGRAM) djpeg $(bindir)/$(binprefix)djpeg
-	$(INSTALL_PROGRAM) jpegtran $(bindir)/$(binprefix)jpegtran
-	$(INSTALL_PROGRAM) rdjpgcom $(bindir)/$(binprefix)rdjpgcom
-	$(INSTALL_PROGRAM) wrjpgcom $(bindir)/$(binprefix)wrjpgcom
-	$(INSTALL_DATA) $(srcdir)/cjpeg.1 $(mandir)/$(manprefix)cjpeg.$(manext)
-	$(INSTALL_DATA) $(srcdir)/djpeg.1 $(mandir)/$(manprefix)djpeg.$(manext)
-	$(INSTALL_DATA) $(srcdir)/jpegtran.1 $(mandir)/$(manprefix)jpegtran.$(manext)
-	$(INSTALL_DATA) $(srcdir)/rdjpgcom.1 $(mandir)/$(manprefix)rdjpgcom.$(manext)
-	$(INSTALL_DATA) $(srcdir)/wrjpgcom.1 $(mandir)/$(manprefix)wrjpgcom.$(manext)
-
-install-lib: libjpeg.$(A) install-headers
-	$(INSTALL_LIB) libjpeg.$(A) $(libdir)/$(binprefix)libjpeg.$(A)
-
-install-headers: jconfig.h
-	$(INSTALL_DATA) jconfig.h $(includedir)/jconfig.h
-	$(INSTALL_DATA) $(srcdir)/jpeglib.h $(includedir)/jpeglib.h
-	$(INSTALL_DATA) $(srcdir)/jmorecfg.h $(includedir)/jmorecfg.h
-	$(INSTALL_DATA) $(srcdir)/jerror.h $(includedir)/jerror.h
-
-clean:
-	$(RM) *.o *.lo libjpeg.a libjpeg.la
-	$(RM) cjpeg djpeg jpegtran rdjpgcom wrjpgcom
-	$(RM) ansi2knr core testout* config.log config.status
-	$(RM) -r knr .libs _libs
-
-distclean: clean
-	$(RM) Makefile jconfig.h libtool config.cache
-
-test: cjpeg djpeg jpegtran
-	$(RM) testout*
-	./djpeg -dct int -ppm -outfile testout.ppm  $(srcdir)/testorig.jpg
-	./djpeg -dct int -bmp -colors 256 -outfile testout.bmp  $(srcdir)/testorig.jpg
-	./cjpeg -dct int -outfile testout.jpg  $(srcdir)/testimg.ppm
-	./djpeg -dct int -ppm -outfile testoutp.ppm $(srcdir)/testprog.jpg
-	./cjpeg -dct int -progressive -opt -outfile testoutp.jpg $(srcdir)/testimg.ppm
-	./jpegtran -outfile testoutt.jpg $(srcdir)/testprog.jpg
-	cmp $(srcdir)/testimg.ppm testout.ppm
-	cmp $(srcdir)/testimg.bmp testout.bmp
-	cmp $(srcdir)/testimg.jpg testout.jpg
-	cmp $(srcdir)/testimg.ppm testoutp.ppm
-	cmp $(srcdir)/testimgp.jpg testoutp.jpg
-	cmp $(srcdir)/testorig.jpg testoutt.jpg
-
-check: test
-
-# Mistake catcher:
-
-jconfig.h: jconfig.doc
-	echo You must prepare a system-dependent jconfig.h file.
-	echo Please read the installation directions in install.doc.
-	exit 1
-
-# GNU Make likes to know which target names are not really files to be made:
-.PHONY: all install install-lib install-headers clean distclean test check
-
-
-jcapimin.$(O): jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcapistd.$(O): jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccoefct.$(O): jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.$(O): jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcdctmgr.$(O): jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jchuff.$(O): jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcinit.$(O): jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmainct.$(O): jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmarker.$(O): jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmaster.$(O): jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcomapi.$(O): jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcparam.$(O): jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcphuff.$(O): jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcprepct.$(O): jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.$(O): jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jctrans.$(O): jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapimin.$(O): jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapistd.$(O): jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdatadst.$(O): jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdatasrc.$(O): jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdcoefct.$(O): jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.$(O): jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jddctmgr.$(O): jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jdhuff.$(O): jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdinput.$(O): jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmainct.$(O): jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmarker.$(O): jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmaster.$(O): jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.$(O): jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdphuff.$(O): jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdpostct.$(O): jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.$(O): jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdtrans.$(O): jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jerror.$(O): jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.$(O): jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.$(O): jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.$(O): jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.$(O): jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.$(O): jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.$(O): jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.$(O): jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jquant1.$(O): jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jquant2.$(O): jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jutils.$(O): jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jmemmgr.$(O): jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemansi.$(O): jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemname.$(O): jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemnobs.$(O): jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemdos.$(O): jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemmac.$(O): jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-cjpeg.$(O): cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-djpeg.$(O): djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-jpegtran.$(O): jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
-rdjpgcom.$(O): rdjpgcom.c jinclude.h jconfig.h
-wrjpgcom.$(O): wrjpgcom.c jinclude.h jconfig.h
-cdjpeg.$(O): cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdcolmap.$(O): rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdswitch.$(O): rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-transupp.$(O): transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
-rdppm.$(O): rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrppm.$(O): wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdgif.$(O): rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrgif.$(O): wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdtarga.$(O): rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrtarga.$(O): wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdbmp.$(O): rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrbmp.$(O): wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdrle.$(O): rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrrle.$(O): wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
diff --git a/makefile.dj b/makefile.dj
deleted file mode 100644
index f766d25..0000000
--- a/makefile.dj
+++ /dev/null
@@ -1,220 +0,0 @@
-# Makefile for Independent JPEG Group's software
-
-# This makefile is for DJGPP (Delorie's GNU C port on MS-DOS), v2.0 or later.
-# Thanks to Frank J. Donahoe for this version.
-
-# Read installation instructions before saying "make" !!
-
-# The name of your C compiler:
-CC= gcc
-
-# You may need to adjust these cc options:
-CFLAGS= -O2 -Wall -I.
-# Generally, we recommend defining any configuration symbols in jconfig.h,
-# NOT via -D switches here.
-
-# Link-time cc options:
-LDFLAGS= -s
-
-# To link any special libraries, add the necessary -l commands here.
-LDLIBS= 
-
-# Put here the object file name for the correct system-dependent memory
-# manager file.  For DJGPP this is usually jmemnobs.o, but you could
-# use jmemname.o if you want to use named temp files instead of swap space.
-SYSDEPMEM= jmemnobs.o
-
-# miscellaneous OS-dependent stuff
-# linker
-LN= $(CC)
-# file deletion command
-RM= del
-# library (.a) file creation command
-AR= ar rc
-# second step in .a creation (use "touch" if not needed)
-AR2= ranlib
-
-# End of configurable options.
-
-
-# source files: JPEG library proper
-LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
-        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
-        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
-        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
-        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
-        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
-        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
-        jquant2.c jutils.c jmemmgr.c
-# memmgr back ends: compile only one of these into a working library
-SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
-# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
-APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
-        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
-        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
-SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
-# files included by source files
-INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
-        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
-# documentation, test, and support files
-DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
-        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
-        coderules.doc filelist.doc change.log
-MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
-        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
-        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
-        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
-        makvms.opt
-CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
-        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
-        jconfig.vms
-CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
-OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
-TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
-        testimgp.jpg
-DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
-        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
-# library object files common to compression and decompression
-COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM)
-# compression library object files
-CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \
-        jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \
-        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jfdctfst.o jfdctflt.o \
-        jfdctint.o
-# decompression library object files
-DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \
-        jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \
-        jdpostct.o jddctmgr.o jidctfst.o jidctflt.o jidctint.o jidctred.o \
-        jdsample.o jdcolor.o jquant1.o jquant2.o jdmerge.o
-# These objectfiles are included in libjpeg.a
-LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
-# object files for sample applications (excluding library files)
-COBJECTS= cjpeg.o rdppm.o rdgif.o rdtarga.o rdrle.o rdbmp.o rdswitch.o \
-        cdjpeg.o
-DOBJECTS= djpeg.o wrppm.o wrgif.o wrtarga.o wrrle.o wrbmp.o rdcolmap.o \
-        cdjpeg.o
-TROBJECTS= jpegtran.o rdswitch.o cdjpeg.o transupp.o
-
-
-all: libjpeg.a cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe
-
-libjpeg.a: $(LIBOBJECTS)
-	$(RM) libjpeg.a
-	$(AR) libjpeg.a  $(LIBOBJECTS)
-	$(AR2) libjpeg.a
-
-cjpeg.exe: $(COBJECTS) libjpeg.a
-	$(LN) $(LDFLAGS) -o cjpeg.exe $(COBJECTS) libjpeg.a $(LDLIBS)
-
-djpeg.exe: $(DOBJECTS) libjpeg.a
-	$(LN) $(LDFLAGS) -o djpeg.exe $(DOBJECTS) libjpeg.a $(LDLIBS)
-
-jpegtran.exe: $(TROBJECTS) libjpeg.a
-	$(LN) $(LDFLAGS) -o jpegtran.exe $(TROBJECTS) libjpeg.a $(LDLIBS)
-
-rdjpgcom.exe: rdjpgcom.o
-	$(LN) $(LDFLAGS) -o rdjpgcom.exe rdjpgcom.o $(LDLIBS)
-
-wrjpgcom.exe: wrjpgcom.o
-	$(LN) $(LDFLAGS) -o wrjpgcom.exe wrjpgcom.o $(LDLIBS)
-
-jconfig.h: jconfig.doc
-	echo You must prepare a system-dependent jconfig.h file.
-	echo Please read the installation directions in install.doc.
-	exit 1
-
-clean:
-	$(RM) *.o
-	$(RM) cjpeg.exe
-	$(RM) djpeg.exe
-	$(RM) jpegtran.exe
-	$(RM) rdjpgcom.exe
-	$(RM) wrjpgcom.exe
-	$(RM) libjpeg.a
-	$(RM) testout*.*
-
-test: cjpeg.exe djpeg.exe jpegtran.exe
-	$(RM) testout*.*
-	./djpeg -dct int -ppm -outfile testout.ppm  testorig.jpg
-	./djpeg -dct int -bmp -colors 256 -outfile testout.bmp  testorig.jpg
-	./cjpeg -dct int -outfile testout.jpg  testimg.ppm
-	./djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
-	./cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
-	./jpegtran -outfile testoutt.jpg testprog.jpg
-	fc /b testimg.ppm testout.ppm
-	fc /b testimg.bmp testout.bmp
-	fc /b testimg.jpg testout.jpg
-	fc /b testimg.ppm testoutp.ppm
-	fc /b testimgp.jpg testoutp.jpg
-	fc /b testorig.jpg testoutt.jpg
-
-
-jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmainct.o: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmarker.o: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmaster.o: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jmemmgr.o: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemansi.o: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemname.o: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemnobs.o: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemdos.o: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemmac.o: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-cjpeg.o: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-djpeg.o: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-jpegtran.o: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
-rdjpgcom.o: rdjpgcom.c jinclude.h jconfig.h
-wrjpgcom.o: wrjpgcom.c jinclude.h jconfig.h
-cdjpeg.o: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdcolmap.o: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdswitch.o: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-transupp.o: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
-rdppm.o: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrppm.o: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdgif.o: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrgif.o: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdtarga.o: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrtarga.o: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdbmp.o: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrbmp.o: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdrle.o: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrrle.o: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
diff --git a/makefile.manx b/makefile.manx
deleted file mode 100644
index 4cb42d1..0000000
--- a/makefile.manx
+++ /dev/null
@@ -1,214 +0,0 @@
-# Makefile for Independent JPEG Group's software
-
-# This makefile is for Amiga systems using Manx Aztec C ver 5.x.
-# Thanks to D.J. James (djjames@cup.portal.com) for this version.
-
-# Read installation instructions before saying "make" !!
-
-# The name of your C compiler:
-CC= cc
-
-# You may need to adjust these cc options:
-# Uncomment for generic 68000 code (will work on any Amiga)
-ARCHFLAGS= -sn
-
-# Uncomment for 68020/68030 code (faster, but won't run on 68000 CPU)
-#ARCHFLAGS= -c2
-
-CFLAGS= -MC -MD $(ARCHFLAGS) -spfam -r4
-
-# Link-time cc options:
-LDFLAGS= -g
-
-# To link any special libraries, add the necessary -l commands here.
-LDLIBS= -lml -lcl
-
-# Put here the object file name for the correct system-dependent memory
-# manager file.  For Amiga we recommend jmemname.o.
-SYSDEPMEM= jmemname.o
-
-# miscellaneous OS-dependent stuff
-# linker
-LN= ln
-# file deletion command
-RM= delete quiet
-# library (.lib) file creation command
-AR= lb
-
-# End of configurable options.
-
-
-# source files: JPEG library proper
-LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
-        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
-        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
-        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
-        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
-        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
-        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
-        jquant2.c jutils.c jmemmgr.c
-# memmgr back ends: compile only one of these into a working library
-SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
-# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
-APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
-        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
-        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
-SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
-# files included by source files
-INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
-        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
-# documentation, test, and support files
-DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
-        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
-        coderules.doc filelist.doc change.log
-MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
-        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
-        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
-        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
-        makvms.opt
-CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
-        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
-        jconfig.vms
-CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
-OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
-TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
-        testimgp.jpg
-DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
-        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
-# library object files common to compression and decompression
-COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM)
-# compression library object files
-CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \
-        jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \
-        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jfdctfst.o jfdctflt.o \
-        jfdctint.o
-# decompression library object files
-DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \
-        jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \
-        jdpostct.o jddctmgr.o jidctfst.o jidctflt.o jidctint.o jidctred.o \
-        jdsample.o jdcolor.o jquant1.o jquant2.o jdmerge.o
-# These objectfiles are included in libjpeg.lib
-LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
-# object files for sample applications (excluding library files)
-COBJECTS= cjpeg.o rdppm.o rdgif.o rdtarga.o rdrle.o rdbmp.o rdswitch.o \
-        cdjpeg.o
-DOBJECTS= djpeg.o wrppm.o wrgif.o wrtarga.o wrrle.o wrbmp.o rdcolmap.o \
-        cdjpeg.o
-TROBJECTS= jpegtran.o rdswitch.o cdjpeg.o transupp.o
-
-
-all: libjpeg.lib cjpeg djpeg jpegtran rdjpgcom wrjpgcom
-
-libjpeg.lib: $(LIBOBJECTS)
-	-$(RM) libjpeg.lib
-	$(AR) libjpeg.lib  $(LIBOBJECTS)
-
-cjpeg: $(COBJECTS) libjpeg.lib
-	$(LN) $(LDFLAGS) -o cjpeg $(COBJECTS) libjpeg.lib $(LDLIBS)
-
-djpeg: $(DOBJECTS) libjpeg.lib
-	$(LN) $(LDFLAGS) -o djpeg $(DOBJECTS) libjpeg.lib $(LDLIBS)
-
-jpegtran: $(TROBJECTS) libjpeg.lib
-	$(LN) $(LDFLAGS) -o jpegtran $(TROBJECTS) libjpeg.lib $(LDLIBS)
-
-rdjpgcom: rdjpgcom.o
-	$(LN) $(LDFLAGS) -o rdjpgcom rdjpgcom.o $(LDLIBS)
-
-wrjpgcom: wrjpgcom.o
-	$(LN) $(LDFLAGS) -o wrjpgcom wrjpgcom.o $(LDLIBS)
-
-jconfig.h: jconfig.doc
-	echo You must prepare a system-dependent jconfig.h file.
-	echo Please read the installation directions in install.doc.
-	exit 1
-
-clean:
-	-$(RM) *.o cjpeg djpeg jpegtran libjpeg.lib rdjpgcom wrjpgcom
-	-$(RM) core testout*.*
-
-test: cjpeg djpeg jpegtran
-	-$(RM) testout*.*
-	djpeg -dct int -ppm -outfile testout.ppm  testorig.jpg
-	djpeg -dct int -bmp -colors 256 -outfile testout.bmp  testorig.jpg
-	cjpeg -dct int -outfile testout.jpg  testimg.ppm
-	djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
-	cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
-	jpegtran -outfile testoutt.jpg testprog.jpg
-	cmp testimg.ppm testout.ppm
-	cmp testimg.bmp testout.bmp
-	cmp testimg.jpg testout.jpg
-	cmp testimg.ppm testoutp.ppm
-	cmp testimgp.jpg testoutp.jpg
-	cmp testorig.jpg testoutt.jpg
-
-
-jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmainct.o: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmarker.o: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmaster.o: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jmemmgr.o: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemansi.o: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemname.o: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemnobs.o: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemdos.o: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemmac.o: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-cjpeg.o: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-djpeg.o: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-jpegtran.o: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
-rdjpgcom.o: rdjpgcom.c jinclude.h jconfig.h
-wrjpgcom.o: wrjpgcom.c jinclude.h jconfig.h
-cdjpeg.o: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdcolmap.o: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdswitch.o: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-transupp.o: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
-rdppm.o: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrppm.o: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdgif.o: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrgif.o: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdtarga.o: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrtarga.o: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdbmp.o: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrbmp.o: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdrle.o: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrrle.o: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
diff --git a/makefile.mc6 b/makefile.mc6
deleted file mode 100644
index 6aff054..0000000
--- a/makefile.mc6
+++ /dev/null
@@ -1,249 +0,0 @@
-# Makefile for Independent JPEG Group's software
-
-# This makefile is for Microsoft C for MS-DOS, version 6.00A and up.
-# Use NMAKE, not Microsoft's brain-damaged MAKE.
-# Thanks to Alan Wright and Chris Turner of Olivetti Research Ltd.
-
-# Read installation instructions before saying "nmake" !!
-
-# You may need to adjust these compiler options:
-CFLAGS = -AM -Oecigt -Gs -W3
-# -AM medium memory model (or use -AS for small model, if you remove features)
-# -Oecigt -Gs  maximum safe optimisation (-Ol has bugs in MSC 6.00A)
-# -W3 warning level 3
-# You might also want to add -G2 if you have an 80286, etc.
-# Generally, we recommend defining any configuration symbols in jconfig.h,
-# NOT via -D switches here.
-
-# Jan-Herman Buining suggests the following switches for MS C 8.0 and a 486:
-# CFLAGS = /AM /f- /FPi87 /G3 /Gs /Gy /Ob1 /Oc /Oe /Og /Oi /Ol /On /Oo /Ot \
-#          /OV4 /W3
-# except for jquant1.c, which must be compiled with /Oo- to avoid a compiler
-# crash.
-
-# Ingar Steinsland suggests the following switches when building
-# a 16-bit Windows DLL:
-# CFLAGS = -ALw -Gsw -Zpe -W3 -O2 -Zi -Zd
-
-# Put here the object file name for the correct system-dependent memory
-# manager file.  For DOS, we recommend jmemdos.c and jmemdosa.asm.
-# (But not for Windows; see install.doc if you use this makefile for Windows.)
-SYSDEPMEM= jmemdos.obj jmemdosa.obj
-# SYSDEPMEMLIB must list the same files with "+" signs for the librarian.
-SYSDEPMEMLIB= +jmemdos.obj +jmemdosa.obj
-
-# End of configurable options.
-
-
-# source files: JPEG library proper
-LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
-        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
-        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
-        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
-        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
-        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
-        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
-        jquant2.c jutils.c jmemmgr.c
-# memmgr back ends: compile only one of these into a working library
-SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
-# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
-APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
-        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
-        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
-SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
-# files included by source files
-INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
-        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
-# documentation, test, and support files
-DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
-        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
-        coderules.doc filelist.doc change.log
-MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
-        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
-        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
-        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
-        makvms.opt
-CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
-        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
-        jconfig.vms
-CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
-OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
-TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
-        testimgp.jpg
-DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
-        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
-# library object files common to compression and decompression
-COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM)
-# compression library object files
-CLIBOBJECTS= jcapimin.obj jcapistd.obj jctrans.obj jcparam.obj jdatadst.obj \
-        jcinit.obj jcmaster.obj jcmarker.obj jcmainct.obj jcprepct.obj \
-        jccoefct.obj jccolor.obj jcsample.obj jchuff.obj jcphuff.obj \
-        jcdctmgr.obj jfdctfst.obj jfdctflt.obj jfdctint.obj
-# decompression library object files
-DLIBOBJECTS= jdapimin.obj jdapistd.obj jdtrans.obj jdatasrc.obj \
-        jdmaster.obj jdinput.obj jdmarker.obj jdhuff.obj jdphuff.obj \
-        jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jidctfst.obj \
-        jidctflt.obj jidctint.obj jidctred.obj jdsample.obj jdcolor.obj \
-        jquant1.obj jquant2.obj jdmerge.obj
-# These objectfiles are included in libjpeg.lib
-LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
-# object files for sample applications (excluding library files)
-COBJECTS= cjpeg.obj rdppm.obj rdgif.obj rdtarga.obj rdrle.obj rdbmp.obj \
-        rdswitch.obj cdjpeg.obj
-DOBJECTS= djpeg.obj wrppm.obj wrgif.obj wrtarga.obj wrrle.obj wrbmp.obj \
-        rdcolmap.obj cdjpeg.obj
-TROBJECTS= jpegtran.obj rdswitch.obj cdjpeg.obj transupp.obj
-
-# need linker response file because file list > 128 chars
-RFILE = libjpeg.ans
-
-
-all: libjpeg.lib cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe
-
-libjpeg.lib: $(LIBOBJECTS) $(RFILE)
-	del libjpeg.lib
-	lib @$(RFILE)
-
-# linker response file for building libjpeg.lib
-$(RFILE) : makefile
-	del $(RFILE)
-	echo libjpeg.lib >$(RFILE)
-# silly want-to-create-it prompt:
-	echo y >>$(RFILE)
-	echo +jcapimin.obj +jcapistd.obj +jctrans.obj +jcparam.obj & >>$(RFILE)
-	echo +jdatadst.obj +jcinit.obj +jcmaster.obj +jcmarker.obj & >>$(RFILE)
-	echo +jcmainct.obj +jcprepct.obj +jccoefct.obj & >>$(RFILE)
-	echo +jccolor.obj +jcsample.obj +jchuff.obj +jcphuff.obj & >>$(RFILE)
-	echo +jcdctmgr.obj +jfdctfst.obj +jfdctflt.obj & >>$(RFILE)
-	echo +jfdctint.obj +jdapimin.obj +jdapistd.obj & >>$(RFILE)
-	echo +jdtrans.obj +jdatasrc.obj +jdmaster.obj +jdinput.obj & >>$(RFILE)
-	echo +jdmarker.obj +jdhuff.obj +jdphuff.obj +jdmainct.obj & >>$(RFILE)
-	echo +jdcoefct.obj +jdpostct.obj +jddctmgr.obj & >>$(RFILE)
-	echo +jidctfst.obj +jidctflt.obj +jidctint.obj & >>$(RFILE)
-	echo +jidctred.obj +jdsample.obj +jdcolor.obj +jquant1.obj & >>$(RFILE)
-	echo +jquant2.obj +jdmerge.obj +jcomapi.obj +jutils.obj & >>$(RFILE)
-	echo +jerror.obj +jmemmgr.obj & >>$(RFILE)
-	echo $(SYSDEPMEMLIB) ; >>$(RFILE)
-
-cjpeg.exe: $(COBJECTS) libjpeg.lib
-	echo $(COBJECTS) >cjpeg.lst
-	link /STACK:4096 /EXEPACK @cjpeg.lst, cjpeg.exe, , libjpeg.lib, ;
-	del cjpeg.lst
-
-djpeg.exe: $(DOBJECTS) libjpeg.lib
-	echo $(DOBJECTS) >djpeg.lst
-	link /STACK:4096 /EXEPACK @djpeg.lst, djpeg.exe, , libjpeg.lib, ;
-	del djpeg.lst
-
-jpegtran.exe: $(TROBJECTS) libjpeg.lib
-	link /STACK:4096 /EXEPACK $(TROBJECTS), jpegtran.exe, , libjpeg.lib, ;
-
-rdjpgcom.exe: rdjpgcom.c
-	$(CC) -AS -O -W3 rdjpgcom.c
-
-# wrjpgcom needs large model so it can malloc a 64K chunk
-wrjpgcom.exe: wrjpgcom.c
-	$(CC) -AL -O -W3 wrjpgcom.c
-
-jconfig.h: jconfig.doc
-	echo You must prepare a system-dependent jconfig.h file.
-	echo Please read the installation directions in install.doc.
-	exit 1
-
-clean:
-	del *.obj
-	del libjpeg.lib
-	del cjpeg.exe
-	del djpeg.exe
-	del jpegtran.exe
-	del rdjpgcom.exe
-	del wrjpgcom.exe
-	del testout*.*
-
-test: cjpeg.exe djpeg.exe jpegtran.exe
-	del testout*.*
-	djpeg -dct int -ppm -outfile testout.ppm  testorig.jpg
-	djpeg -dct int -bmp -colors 256 -outfile testout.bmp  testorig.jpg
-	cjpeg -dct int -outfile testout.jpg  testimg.ppm
-	djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
-	cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
-	jpegtran -outfile testoutt.jpg testprog.jpg
-	fc /b testimg.ppm testout.ppm
-	fc /b testimg.bmp testout.bmp
-	fc /b testimg.jpg testout.jpg
-	fc /b testimg.ppm testoutp.ppm
-	fc /b testimgp.jpg testoutp.jpg
-	fc /b testorig.jpg testoutt.jpg
-
-
-jcapimin.obj: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcapistd.obj: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccoefct.obj: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.obj: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcdctmgr.obj: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jchuff.obj: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcinit.obj: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmainct.obj: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmarker.obj: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmaster.obj: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcomapi.obj: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcparam.obj: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcphuff.obj: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcprepct.obj: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.obj: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jctrans.obj: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapimin.obj: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapistd.obj: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdatadst.obj: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdatasrc.obj: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdcoefct.obj: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.obj: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jddctmgr.obj: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jdhuff.obj: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdinput.obj: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmainct.obj: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmarker.obj: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmaster.obj: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.obj: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdphuff.obj: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdpostct.obj: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.obj: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdtrans.obj: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jerror.obj: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.obj: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.obj: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.obj: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.obj: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.obj: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.obj: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.obj: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jquant1.obj: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jquant2.obj: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jutils.obj: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jmemmgr.obj: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemansi.obj: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemname.obj: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemnobs.obj: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemdos.obj: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemmac.obj: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-cjpeg.obj: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-djpeg.obj: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-jpegtran.obj: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
-rdjpgcom.obj: rdjpgcom.c jinclude.h jconfig.h
-wrjpgcom.obj: wrjpgcom.c jinclude.h jconfig.h
-cdjpeg.obj: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdcolmap.obj: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdswitch.obj: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-transupp.obj: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
-rdppm.obj: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrppm.obj: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdgif.obj: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrgif.obj: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdtarga.obj: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrtarga.obj: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdbmp.obj: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrbmp.obj: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdrle.obj: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrrle.obj: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-jmemdosa.obj : jmemdosa.asm
-	masm /mx $*;
diff --git a/makefile.mms b/makefile.mms
deleted file mode 100644
index cf130e5..0000000
--- a/makefile.mms
+++ /dev/null
@@ -1,218 +0,0 @@
-# Makefile for Independent JPEG Group's software
-
-# This makefile is for use with MMS on Digital VMS systems.
-# Thanks to Rick Dyson (dyson@iowasp.physics.uiowa.edu)
-# and Tim Bell (tbell@netcom.com) for their help.
-
-# Read installation instructions before saying "MMS" !!
-
-# You may need to adjust these cc options:
-CFLAGS= $(CFLAGS) /NoDebug /Optimize
-# Generally, we recommend defining any configuration symbols in jconfig.h,
-# NOT via /Define switches here.
-.ifdef ALPHA
-OPT=
-.else
-OPT= ,Sys$Disk:[]MAKVMS.OPT/Option
-.endif
-
-# Put here the object file name for the correct system-dependent memory
-# manager file.  For Unix this is usually jmemnobs.o, but you may want
-# to use jmemansi.o or jmemname.o if you have limited swap space.
-SYSDEPMEM= jmemnobs.obj
-
-# End of configurable options.
-
-
-# source files: JPEG library proper
-LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
-        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
-        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
-        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
-        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
-        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
-        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
-        jquant2.c jutils.c jmemmgr.c
-# memmgr back ends: compile only one of these into a working library
-SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
-# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
-APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
-        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
-        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
-SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
-# files included by source files
-INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
-        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
-# documentation, test, and support files
-DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
-        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
-        coderules.doc filelist.doc change.log
-MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
-        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
-        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
-        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
-        makvms.opt
-CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
-        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
-        jconfig.vms
-CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
-OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
-TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
-        testimgp.jpg
-DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
-        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
-# library object files common to compression and decompression
-COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM)
-# compression library object files
-CLIBOBJECTS= jcapimin.obj jcapistd.obj jctrans.obj jcparam.obj jdatadst.obj \
-        jcinit.obj jcmaster.obj jcmarker.obj jcmainct.obj jcprepct.obj \
-        jccoefct.obj jccolor.obj jcsample.obj jchuff.obj jcphuff.obj \
-        jcdctmgr.obj jfdctfst.obj jfdctflt.obj jfdctint.obj
-# decompression library object files
-DLIBOBJECTS= jdapimin.obj jdapistd.obj jdtrans.obj jdatasrc.obj \
-        jdmaster.obj jdinput.obj jdmarker.obj jdhuff.obj jdphuff.obj \
-        jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jidctfst.obj \
-        jidctflt.obj jidctint.obj jidctred.obj jdsample.obj jdcolor.obj \
-        jquant1.obj jquant2.obj jdmerge.obj
-# These objectfiles are included in libjpeg.olb
-LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
-# object files for sample applications (excluding library files)
-COBJECTS= cjpeg.obj rdppm.obj rdgif.obj rdtarga.obj rdrle.obj rdbmp.obj \
-        rdswitch.obj cdjpeg.obj
-DOBJECTS= djpeg.obj wrppm.obj wrgif.obj wrtarga.obj wrrle.obj wrbmp.obj \
-        rdcolmap.obj cdjpeg.obj
-TROBJECTS= jpegtran.obj rdswitch.obj cdjpeg.obj transupp.obj
-# objectfile lists with commas --- what a crock
-COBJLIST= cjpeg.obj,rdppm.obj,rdgif.obj,rdtarga.obj,rdrle.obj,rdbmp.obj,\
-          rdswitch.obj,cdjpeg.obj
-DOBJLIST= djpeg.obj,wrppm.obj,wrgif.obj,wrtarga.obj,wrrle.obj,wrbmp.obj,\
-          rdcolmap.obj,cdjpeg.obj
-TROBJLIST= jpegtran.obj,rdswitch.obj,cdjpeg.obj,transupp.obj
-LIBOBJLIST= jcapimin.obj,jcapistd.obj,jctrans.obj,jcparam.obj,jdatadst.obj,\
-          jcinit.obj,jcmaster.obj,jcmarker.obj,jcmainct.obj,jcprepct.obj,\
-          jccoefct.obj,jccolor.obj,jcsample.obj,jchuff.obj,jcphuff.obj,\
-          jcdctmgr.obj,jfdctfst.obj,jfdctflt.obj,jfdctint.obj,jdapimin.obj,\
-          jdapistd.obj,jdtrans.obj,jdatasrc.obj,jdmaster.obj,jdinput.obj,\
-          jdmarker.obj,jdhuff.obj,jdphuff.obj,jdmainct.obj,jdcoefct.obj,\
-          jdpostct.obj,jddctmgr.obj,jidctfst.obj,jidctflt.obj,jidctint.obj,\
-          jidctred.obj,jdsample.obj,jdcolor.obj,jquant1.obj,jquant2.obj,\
-          jdmerge.obj,jcomapi.obj,jutils.obj,jerror.obj,jmemmgr.obj,$(SYSDEPMEM)
-
-
-.first
-	@- Define /NoLog Sys Sys$Library
-
-ALL : libjpeg.olb cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe
-	@ Continue
-
-libjpeg.olb : $(LIBOBJECTS)
-	Library /Create libjpeg.olb $(LIBOBJLIST)
-
-cjpeg.exe : $(COBJECTS) libjpeg.olb
-	$(LINK) $(LFLAGS) /Executable = cjpeg.exe $(COBJLIST),libjpeg.olb/Library$(OPT)
-
-djpeg.exe : $(DOBJECTS) libjpeg.olb
-	$(LINK) $(LFLAGS) /Executable = djpeg.exe $(DOBJLIST),libjpeg.olb/Library$(OPT)
-
-jpegtran.exe : $(TROBJECTS) libjpeg.olb
-	$(LINK) $(LFLAGS) /Executable = jpegtran.exe $(TROBJLIST),libjpeg.olb/Library$(OPT)
-
-rdjpgcom.exe : rdjpgcom.obj
-	$(LINK) $(LFLAGS) /Executable = rdjpgcom.exe rdjpgcom.obj$(OPT)
-
-wrjpgcom.exe : wrjpgcom.obj
-	$(LINK) $(LFLAGS) /Executable = wrjpgcom.exe wrjpgcom.obj$(OPT)
-
-jconfig.h : jconfig.vms
-	@- Copy jconfig.vms jconfig.h
-
-clean :
-	@- Set Protection = Owner:RWED *.*;-1
-	@- Set Protection = Owner:RWED *.OBJ
-	- Purge /NoLog /NoConfirm *.*
-	- Delete /NoLog /NoConfirm *.OBJ;
-
-test : cjpeg.exe djpeg.exe jpegtran.exe
-	mcr sys$disk:[]djpeg -dct int -ppm -outfile testout.ppm testorig.jpg
-	mcr sys$disk:[]djpeg -dct int -bmp -colors 256 -outfile testout.bmp testorig.jpg
-	mcr sys$disk:[]cjpeg -dct int      -outfile testout.jpg testimg.ppm
-	mcr sys$disk:[]djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
-	mcr sys$disk:[]cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
-	mcr sys$disk:[]jpegtran -outfile testoutt.jpg testprog.jpg
-	- Backup /Compare/Log	  testimg.ppm testout.ppm
-	- Backup /Compare/Log	  testimg.bmp testout.bmp
-	- Backup /Compare/Log	  testimg.jpg testout.jpg
-	- Backup /Compare/Log	  testimg.ppm testoutp.ppm
-	- Backup /Compare/Log	  testimgp.jpg testoutp.jpg
-	- Backup /Compare/Log	  testorig.jpg testoutt.jpg
-
-
-jcapimin.obj : jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcapistd.obj : jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccoefct.obj : jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.obj : jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcdctmgr.obj : jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jchuff.obj : jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcinit.obj : jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmainct.obj : jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmarker.obj : jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmaster.obj : jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcomapi.obj : jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcparam.obj : jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcphuff.obj : jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcprepct.obj : jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.obj : jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jctrans.obj : jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapimin.obj : jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapistd.obj : jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdatadst.obj : jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdatasrc.obj : jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdcoefct.obj : jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.obj : jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jddctmgr.obj : jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jdhuff.obj : jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdinput.obj : jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmainct.obj : jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmarker.obj : jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmaster.obj : jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.obj : jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdphuff.obj : jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdpostct.obj : jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.obj : jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdtrans.obj : jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jerror.obj : jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.obj : jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.obj : jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.obj : jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.obj : jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.obj : jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.obj : jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.obj : jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jquant1.obj : jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jquant2.obj : jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jutils.obj : jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jmemmgr.obj : jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemansi.obj : jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemname.obj : jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemnobs.obj : jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemdos.obj : jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemmac.obj : jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-cjpeg.obj : cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-djpeg.obj : djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-jpegtran.obj : jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
-rdjpgcom.obj : rdjpgcom.c jinclude.h jconfig.h
-wrjpgcom.obj : wrjpgcom.c jinclude.h jconfig.h
-cdjpeg.obj : cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdcolmap.obj : rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdswitch.obj : rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-transupp.obj : transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
-rdppm.obj : rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrppm.obj : wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdgif.obj : rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrgif.obj : wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdtarga.obj : rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrtarga.obj : wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdbmp.obj : rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrbmp.obj : wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdrle.obj : rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrrle.obj : wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
diff --git a/makefile.sas b/makefile.sas
deleted file mode 100644
index f296faf..0000000
--- a/makefile.sas
+++ /dev/null
@@ -1,252 +0,0 @@
-# Makefile for Independent JPEG Group's software
-
-# This makefile is for Amiga systems using SAS C 6.0 and up.
-# Thanks to Ed Hanway, Mark Rinfret, and Jim Zepeda.
-
-# Read installation instructions before saying "make" !!
-
-# The name of your C compiler:
-CC= sc
-
-# You may need to adjust these cc options:
-# Uncomment the following lines for generic 680x0 version
-ARCHFLAGS= cpu=any
-SUFFIX=
-
-# Uncomment the following lines for 68030-only version
-#ARCHFLAGS= cpu=68030
-#SUFFIX=.030
-
-CFLAGS= nostackcheck data=near parms=register optimize $(ARCHFLAGS) \
-	ignore=104 ignore=304 ignore=306
-# ignore=104 disables warnings for mismatched const qualifiers
-# ignore=304 disables warnings for variables being optimized out
-# ignore=306 disables warnings for the inlining of functions
-# Generally, we recommend defining any configuration symbols in jconfig.h,
-# NOT via define switches here.
-
-# Link-time cc options:
-LDFLAGS= SC SD ND BATCH
-
-# To link any special libraries, add the necessary commands here.
-LDLIBS= LIB:scm.lib LIB:sc.lib
-
-# Put here the object file name for the correct system-dependent memory
-# manager file.  For Amiga we recommend jmemname.o.
-SYSDEPMEM= jmemname.o
-
-# miscellaneous OS-dependent stuff
-# linker
-LN= slink
-# file deletion command
-RM= delete quiet
-# library (.lib) file creation command
-AR= oml
-
-# End of configurable options.
-
-
-# source files: JPEG library proper
-LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
-        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
-        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
-        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
-        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
-        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
-        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
-        jquant2.c jutils.c jmemmgr.c
-# memmgr back ends: compile only one of these into a working library
-SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
-# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
-APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
-        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
-        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
-SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
-# files included by source files
-INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
-        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
-# documentation, test, and support files
-DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
-        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
-        coderules.doc filelist.doc change.log
-MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
-        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
-        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
-        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
-        makvms.opt
-CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
-        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
-        jconfig.vms
-CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
-OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
-TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
-        testimgp.jpg
-DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
-        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
-# library object files common to compression and decompression
-COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM)
-# compression library object files
-CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \
-        jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \
-        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jfdctfst.o jfdctflt.o \
-        jfdctint.o
-# decompression library object files
-DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \
-        jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \
-        jdpostct.o jddctmgr.o jidctfst.o jidctflt.o jidctint.o jidctred.o \
-        jdsample.o jdcolor.o jquant1.o jquant2.o jdmerge.o
-# These objectfiles are included in libjpeg.lib
-LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
-# object files for sample applications (excluding library files)
-COBJECTS= cjpeg.o rdppm.o rdgif.o rdtarga.o rdrle.o rdbmp.o rdswitch.o \
-        cdjpeg.o
-DOBJECTS= djpeg.o wrppm.o wrgif.o wrtarga.o wrrle.o wrbmp.o rdcolmap.o \
-        cdjpeg.o
-TROBJECTS= jpegtran.o rdswitch.o cdjpeg.o transupp.o
-
-
-all: libjpeg.lib cjpeg$(SUFFIX) djpeg$(SUFFIX) jpegtran$(SUFFIX) rdjpgcom$(SUFFIX) wrjpgcom$(SUFFIX)
-
-# note: do several AR steps to avoid command line length limitations
-
-libjpeg.lib: $(LIBOBJECTS)
-	-$(RM) libjpeg.lib
-	$(AR) libjpeg.lib r $(CLIBOBJECTS)
-	$(AR) libjpeg.lib r $(DLIBOBJECTS)
-	$(AR) libjpeg.lib r $(COMOBJECTS)
-
-cjpeg$(SUFFIX): $(COBJECTS) libjpeg.lib
-	$(LN) <WITH <
-$(LDFLAGS)
-TO cjpeg$(SUFFIX)
-FROM LIB:c.o $(COBJECTS)
-LIB libjpeg.lib $(LDLIBS)
-<
-
-djpeg$(SUFFIX): $(DOBJECTS) libjpeg.lib
-	$(LN) <WITH <
-$(LDFLAGS)
-TO djpeg$(SUFFIX)
-FROM LIB:c.o $(DOBJECTS)
-LIB libjpeg.lib $(LDLIBS)
-<
-
-jpegtran$(SUFFIX): $(TROBJECTS) libjpeg.lib
-	$(LN) <WITH <
-$(LDFLAGS)
-TO jpegtran$(SUFFIX)
-FROM LIB:c.o $(TROBJECTS)
-LIB libjpeg.lib $(LDLIBS)
-<
-
-rdjpgcom$(SUFFIX): rdjpgcom.o
-	$(LN) <WITH <
-$(LDFLAGS)
-TO rdjpgcom$(SUFFIX)
-FROM LIB:c.o rdjpgcom.o
-LIB $(LDLIBS)
-<
-
-wrjpgcom$(SUFFIX): wrjpgcom.o
-	$(LN) <WITH <
-$(LDFLAGS)
-TO wrjpgcom$(SUFFIX)
-FROM LIB:c.o wrjpgcom.o
-LIB $(LDLIBS)
-<
-
-jconfig.h: jconfig.doc
-	echo You must prepare a system-dependent jconfig.h file.
-	echo Please read the installation directions in install.doc.
-	exit 1
-
-clean:
-	-$(RM) *.o cjpeg djpeg jpegtran cjpeg.030 djpeg.030 jpegtran.030
-	-$(RM) rdjpgcom wrjpgcom rdjpgcom.030 wrjpgcom.030
-	-$(RM) libjpeg.lib core testout*.*
-
-test: cjpeg djpeg jpegtran
-	-$(RM) testout*.*
-	djpeg -dct int -ppm -outfile testout.ppm  testorig.jpg
-	djpeg -dct int -bmp -colors 256 -outfile testout.bmp  testorig.jpg
-	cjpeg -dct int -outfile testout.jpg  testimg.ppm
-	djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
-	cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
-	jpegtran -outfile testoutt.jpg testprog.jpg
-	cmp testimg.ppm testout.ppm
-	cmp testimg.bmp testout.bmp
-	cmp testimg.jpg testout.jpg
-	cmp testimg.ppm testoutp.ppm
-	cmp testimgp.jpg testoutp.jpg
-	cmp testorig.jpg testoutt.jpg
-
-
-jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmainct.o: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmarker.o: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmaster.o: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jmemmgr.o: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemansi.o: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemname.o: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemnobs.o: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemdos.o: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemmac.o: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-cjpeg.o: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-djpeg.o: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-jpegtran.o: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
-rdjpgcom.o: rdjpgcom.c jinclude.h jconfig.h
-wrjpgcom.o: wrjpgcom.c jinclude.h jconfig.h
-cdjpeg.o: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdcolmap.o: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdswitch.o: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-transupp.o: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
-rdppm.o: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrppm.o: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdgif.o: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrgif.o: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdtarga.o: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrtarga.o: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdbmp.o: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrbmp.o: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdrle.o: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrrle.o: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
diff --git a/makefile.unix b/makefile.unix
deleted file mode 100644
index 00455ab..0000000
--- a/makefile.unix
+++ /dev/null
@@ -1,228 +0,0 @@
-# Makefile for Independent JPEG Group's software
-
-# This makefile is suitable for Unix-like systems with non-ANSI compilers.
-# If you have an ANSI compiler, makefile.ansi is a better starting point.
-
-# Read installation instructions before saying "make" !!
-
-# The name of your C compiler:
-CC= cc
-
-# You may need to adjust these cc options:
-CFLAGS= -O
-# Generally, we recommend defining any configuration symbols in jconfig.h,
-# NOT via -D switches here.
-# However, any special defines for ansi2knr.c may be included here:
-ANSI2KNRFLAGS= 
-
-# Link-time cc options:
-LDFLAGS= 
-
-# To link any special libraries, add the necessary -l commands here.
-LDLIBS= 
-
-# Put here the object file name for the correct system-dependent memory
-# manager file.  For Unix this is usually jmemnobs.o, but you may want
-# to use jmemansi.o or jmemname.o if you have limited swap space.
-SYSDEPMEM= jmemnobs.o
-
-# miscellaneous OS-dependent stuff
-# linker
-LN= $(CC)
-# file deletion command
-RM= rm -f
-# file rename command
-MV= mv
-# library (.a) file creation command
-AR= ar rc
-# second step in .a creation (use "touch" if not needed)
-AR2= ranlib
-
-# End of configurable options.
-
-
-# source files: JPEG library proper
-LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
-        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
-        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
-        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
-        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
-        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
-        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
-        jquant2.c jutils.c jmemmgr.c
-# memmgr back ends: compile only one of these into a working library
-SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
-# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
-APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
-        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
-        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
-SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
-# files included by source files
-INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
-        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
-# documentation, test, and support files
-DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
-        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
-        coderules.doc filelist.doc change.log
-MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
-        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
-        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
-        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
-        makvms.opt
-CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
-        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
-        jconfig.vms
-CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
-OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
-TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
-        testimgp.jpg
-DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
-        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
-# library object files common to compression and decompression
-COMOBJECTS= jcomapi.o jutils.o jerror.o jmemmgr.o $(SYSDEPMEM)
-# compression library object files
-CLIBOBJECTS= jcapimin.o jcapistd.o jctrans.o jcparam.o jdatadst.o jcinit.o \
-        jcmaster.o jcmarker.o jcmainct.o jcprepct.o jccoefct.o jccolor.o \
-        jcsample.o jchuff.o jcphuff.o jcdctmgr.o jfdctfst.o jfdctflt.o \
-        jfdctint.o
-# decompression library object files
-DLIBOBJECTS= jdapimin.o jdapistd.o jdtrans.o jdatasrc.o jdmaster.o \
-        jdinput.o jdmarker.o jdhuff.o jdphuff.o jdmainct.o jdcoefct.o \
-        jdpostct.o jddctmgr.o jidctfst.o jidctflt.o jidctint.o jidctred.o \
-        jdsample.o jdcolor.o jquant1.o jquant2.o jdmerge.o
-# These objectfiles are included in libjpeg.a
-LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
-# object files for sample applications (excluding library files)
-COBJECTS= cjpeg.o rdppm.o rdgif.o rdtarga.o rdrle.o rdbmp.o rdswitch.o \
-        cdjpeg.o
-DOBJECTS= djpeg.o wrppm.o wrgif.o wrtarga.o wrrle.o wrbmp.o rdcolmap.o \
-        cdjpeg.o
-TROBJECTS= jpegtran.o rdswitch.o cdjpeg.o transupp.o
-
-
-all: ansi2knr libjpeg.a cjpeg djpeg jpegtran rdjpgcom wrjpgcom
-
-# This rule causes ansi2knr to be invoked.
-.c.o:
-	./ansi2knr $*.c T$*.c
-	$(CC) $(CFLAGS) -c T$*.c
-	$(RM) T$*.c $*.o
-	$(MV) T$*.o $*.o
-
-ansi2knr: ansi2knr.c
-	$(CC) $(CFLAGS) $(ANSI2KNRFLAGS) -o ansi2knr ansi2knr.c
-
-libjpeg.a: ansi2knr $(LIBOBJECTS)
-	$(RM) libjpeg.a
-	$(AR) libjpeg.a  $(LIBOBJECTS)
-	$(AR2) libjpeg.a
-
-cjpeg: ansi2knr $(COBJECTS) libjpeg.a
-	$(LN) $(LDFLAGS) -o cjpeg $(COBJECTS) libjpeg.a $(LDLIBS)
-
-djpeg: ansi2knr $(DOBJECTS) libjpeg.a
-	$(LN) $(LDFLAGS) -o djpeg $(DOBJECTS) libjpeg.a $(LDLIBS)
-
-jpegtran: ansi2knr $(TROBJECTS) libjpeg.a
-	$(LN) $(LDFLAGS) -o jpegtran $(TROBJECTS) libjpeg.a $(LDLIBS)
-
-rdjpgcom: rdjpgcom.o
-	$(LN) $(LDFLAGS) -o rdjpgcom rdjpgcom.o $(LDLIBS)
-
-wrjpgcom: wrjpgcom.o
-	$(LN) $(LDFLAGS) -o wrjpgcom wrjpgcom.o $(LDLIBS)
-
-jconfig.h: jconfig.doc
-	echo You must prepare a system-dependent jconfig.h file.
-	echo Please read the installation directions in install.doc.
-	exit 1
-
-clean:
-	$(RM) *.o cjpeg djpeg jpegtran libjpeg.a rdjpgcom wrjpgcom
-	$(RM) ansi2knr core testout*
-
-test: cjpeg djpeg jpegtran
-	$(RM) testout*
-	./djpeg -dct int -ppm -outfile testout.ppm  testorig.jpg
-	./djpeg -dct int -bmp -colors 256 -outfile testout.bmp  testorig.jpg
-	./cjpeg -dct int -outfile testout.jpg  testimg.ppm
-	./djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
-	./cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
-	./jpegtran -outfile testoutt.jpg testprog.jpg
-	cmp testimg.ppm testout.ppm
-	cmp testimg.bmp testout.bmp
-	cmp testimg.jpg testout.jpg
-	cmp testimg.ppm testoutp.ppm
-	cmp testimgp.jpg testoutp.jpg
-	cmp testorig.jpg testoutt.jpg
-
-
-jcapimin.o: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcapistd.o: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccoefct.o: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.o: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcdctmgr.o: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jchuff.o: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcinit.o: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmainct.o: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmarker.o: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmaster.o: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcomapi.o: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcparam.o: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcphuff.o: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcprepct.o: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.o: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jctrans.o: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapimin.o: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapistd.o: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdatadst.o: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdatasrc.o: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdcoefct.o: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.o: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jddctmgr.o: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jdhuff.o: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdinput.o: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmainct.o: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmarker.o: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmaster.o: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.o: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdphuff.o: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdpostct.o: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.o: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdtrans.o: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jerror.o: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.o: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.o: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.o: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.o: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.o: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.o: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.o: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jquant1.o: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jquant2.o: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jutils.o: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jmemmgr.o: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemansi.o: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemname.o: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemnobs.o: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemdos.o: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemmac.o: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-cjpeg.o: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-djpeg.o: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-jpegtran.o: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
-rdjpgcom.o: rdjpgcom.c jinclude.h jconfig.h
-wrjpgcom.o: wrjpgcom.c jinclude.h jconfig.h
-cdjpeg.o: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdcolmap.o: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdswitch.o: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-transupp.o: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
-rdppm.o: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrppm.o: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdgif.o: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrgif.o: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdtarga.o: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrtarga.o: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdbmp.o: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrbmp.o: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdrle.o: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrrle.o: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
diff --git a/makefile.vc b/makefile.vc
deleted file mode 100644
index 2acf069..0000000
--- a/makefile.vc
+++ /dev/null
@@ -1,211 +0,0 @@
-# Makefile for Independent JPEG Group's software
-
-# This makefile is for Microsoft Visual C++ on Windows NT (and 95?).
-# It builds the IJG library as a statically linkable library (.LIB),
-# and builds the sample applications as console-mode apps.
-# Thanks to Xingong Chang, Raymond Everly and others.
-
-# Read installation instructions before saying "nmake" !!
-# To build an optimized library without debug info, say "nmake nodebug=1".
-
-# Pull in standard variable definitions
-!include <win32.mak>
-
-# You may want to adjust these compiler options:
-CFLAGS= $(cflags) $(cdebug) $(cvars) -I.
-# Generally, we recommend defining any configuration symbols in jconfig.h,
-# NOT via -D switches here.
-
-# Link-time options:
-LDFLAGS= $(ldebug) $(conlflags)
-
-# To link any special libraries, add the necessary commands here.
-LDLIBS= $(conlibs)
-
-# Put here the object file name for the correct system-dependent memory
-# manager file.  For NT we suggest jmemnobs.obj, which expects the OS to
-# provide adequate virtual memory.
-SYSDEPMEM= jmemnobs.obj
-
-# miscellaneous OS-dependent stuff
-# file deletion command
-RM= del
-
-# End of configurable options.
-
-
-# source files: JPEG library proper
-LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c \
-        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c \
-        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c \
-        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c \
-        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c \
-        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c \
-        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c \
-        jquant2.c jutils.c jmemmgr.c
-# memmgr back ends: compile only one of these into a working library
-SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
-# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
-APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c \
-        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c \
-        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
-SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
-# files included by source files
-INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h \
-        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
-# documentation, test, and support files
-DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 \
-        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc \
-        coderules.doc filelist.doc change.log
-MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc \
-        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds \
-        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st \
-        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms \
-        makvms.opt
-CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat \
-        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas \
-        jconfig.vms
-CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
-OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
-TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg \
-        testimgp.jpg
-DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) \
-        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
-# library object files common to compression and decompression
-COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM)
-# compression library object files
-CLIBOBJECTS= jcapimin.obj jcapistd.obj jctrans.obj jcparam.obj jdatadst.obj \
-        jcinit.obj jcmaster.obj jcmarker.obj jcmainct.obj jcprepct.obj \
-        jccoefct.obj jccolor.obj jcsample.obj jchuff.obj jcphuff.obj \
-        jcdctmgr.obj jfdctfst.obj jfdctflt.obj jfdctint.obj
-# decompression library object files
-DLIBOBJECTS= jdapimin.obj jdapistd.obj jdtrans.obj jdatasrc.obj \
-        jdmaster.obj jdinput.obj jdmarker.obj jdhuff.obj jdphuff.obj \
-        jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jidctfst.obj \
-        jidctflt.obj jidctint.obj jidctred.obj jdsample.obj jdcolor.obj \
-        jquant1.obj jquant2.obj jdmerge.obj
-# These objectfiles are included in libjpeg.lib
-LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
-# object files for sample applications (excluding library files)
-COBJECTS= cjpeg.obj rdppm.obj rdgif.obj rdtarga.obj rdrle.obj rdbmp.obj \
-        rdswitch.obj cdjpeg.obj
-DOBJECTS= djpeg.obj wrppm.obj wrgif.obj wrtarga.obj wrrle.obj wrbmp.obj \
-        rdcolmap.obj cdjpeg.obj
-TROBJECTS= jpegtran.obj rdswitch.obj cdjpeg.obj transupp.obj
-
-# Template command for compiling .c to .obj
-.c.obj:
-	$(cc) $(CFLAGS) $*.c
-
-
-all: libjpeg.lib cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe
-
-libjpeg.lib: $(LIBOBJECTS)
-	$(RM) libjpeg.lib
-	lib -out:libjpeg.lib  $(LIBOBJECTS)
-
-cjpeg.exe: $(COBJECTS) libjpeg.lib
-	$(link) $(LDFLAGS) -out:cjpeg.exe $(COBJECTS) libjpeg.lib $(LDLIBS)
-
-djpeg.exe: $(DOBJECTS) libjpeg.lib
-	$(link) $(LDFLAGS) -out:djpeg.exe $(DOBJECTS) libjpeg.lib $(LDLIBS)
-
-jpegtran.exe: $(TROBJECTS) libjpeg.lib
-	$(link) $(LDFLAGS) -out:jpegtran.exe $(TROBJECTS) libjpeg.lib $(LDLIBS)
-
-rdjpgcom.exe: rdjpgcom.obj
-	$(link) $(LDFLAGS) -out:rdjpgcom.exe rdjpgcom.obj $(LDLIBS)
-
-wrjpgcom.exe: wrjpgcom.obj
-	$(link) $(LDFLAGS) -out:wrjpgcom.exe wrjpgcom.obj $(LDLIBS)
-
-
-clean:
-	$(RM) *.obj *.exe libjpeg.lib
-	$(RM) testout*
-
-test: cjpeg.exe djpeg.exe jpegtran.exe
-	$(RM) testout*
-	.\djpeg -dct int -ppm -outfile testout.ppm  testorig.jpg
-	.\djpeg -dct int -bmp -colors 256 -outfile testout.bmp  testorig.jpg
-	.\cjpeg -dct int -outfile testout.jpg  testimg.ppm
-	.\djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
-	.\cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
-	.\jpegtran -outfile testoutt.jpg testprog.jpg
-	fc /b testimg.ppm testout.ppm
-	fc /b testimg.bmp testout.bmp
-	fc /b testimg.jpg testout.jpg
-	fc /b testimg.ppm testoutp.ppm
-	fc /b testimgp.jpg testoutp.jpg
-	fc /b testorig.jpg testoutt.jpg
-
-
-jcapimin.obj: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcapistd.obj: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccoefct.obj: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.obj: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcdctmgr.obj: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jchuff.obj: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcinit.obj: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmainct.obj: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmarker.obj: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmaster.obj: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcomapi.obj: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcparam.obj: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcphuff.obj: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcprepct.obj: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.obj: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jctrans.obj: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapimin.obj: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapistd.obj: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdatadst.obj: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdatasrc.obj: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdcoefct.obj: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.obj: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jddctmgr.obj: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jdhuff.obj: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdinput.obj: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmainct.obj: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmarker.obj: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmaster.obj: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.obj: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdphuff.obj: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdpostct.obj: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.obj: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdtrans.obj: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jerror.obj: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.obj: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.obj: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.obj: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.obj: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.obj: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.obj: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.obj: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jquant1.obj: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jquant2.obj: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jutils.obj: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jmemmgr.obj: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemansi.obj: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemname.obj: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemnobs.obj: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemdos.obj: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemmac.obj: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-cjpeg.obj: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-djpeg.obj: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-jpegtran.obj: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
-rdjpgcom.obj: rdjpgcom.c jinclude.h jconfig.h
-wrjpgcom.obj: wrjpgcom.c jinclude.h jconfig.h
-cdjpeg.obj: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdcolmap.obj: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdswitch.obj: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-transupp.obj: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
-rdppm.obj: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrppm.obj: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdgif.obj: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrgif.obj: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdtarga.obj: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrtarga.obj: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdbmp.obj: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrbmp.obj: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdrle.obj: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrrle.obj: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
diff --git a/makefile.vms b/makefile.vms
deleted file mode 100644
index a42358d..0000000
--- a/makefile.vms
+++ /dev/null
@@ -1,142 +0,0 @@
-$! Makefile for Independent JPEG Group's software
-$!
-$! This is a command procedure for Digital VMS systems that do not have MMS.
-$! It builds the JPEG software by brute force, recompiling everything whether
-$! or not it is necessary.  It then runs the basic self-test.
-$! Thanks to Rick Dyson (dyson@iowasp.physics.uiowa.edu)
-$! and Tim Bell (tbell@netcom.com) for their help.
-$!
-$! Read installation instructions before running this!!
-$!
-$ If F$Mode () .eqs. "INTERACTIVE"
-$   Then
-$       VERIFY = F$Verify (0)
-$   Else
-$       VERIFY = F$Verify (1)
-$ EndIf
-$ On Control_Y Then GoTo End
-$ On Error     Then GoTo End
-$
-$ If F$GetSyi ("HW_MODEL") .gt. 1023 
-$   Then
-$       OPT = ""
-$   Else
-$       OPT = ",Sys$Disk:[]makvms.opt/Option"
-$ EndIf
-$ 
-$ DoCompile := CC /NoDebug /Optimize /NoList
-$!
-$ DoCompile jcapimin.c
-$ DoCompile jcapistd.c
-$ DoCompile jctrans.c
-$ DoCompile jcparam.c
-$ DoCompile jdatadst.c
-$ DoCompile jcinit.c
-$ DoCompile jcmaster.c
-$ DoCompile jcmarker.c
-$ DoCompile jcmainct.c
-$ DoCompile jcprepct.c
-$ DoCompile jccoefct.c
-$ DoCompile jccolor.c
-$ DoCompile jcsample.c
-$ DoCompile jchuff.c
-$ DoCompile jcphuff.c
-$ DoCompile jcdctmgr.c
-$ DoCompile jfdctfst.c
-$ DoCompile jfdctflt.c
-$ DoCompile jfdctint.c
-$ DoCompile jdapimin.c
-$ DoCompile jdapistd.c
-$ DoCompile jdtrans.c
-$ DoCompile jdatasrc.c
-$ DoCompile jdmaster.c
-$ DoCompile jdinput.c
-$ DoCompile jdmarker.c
-$ DoCompile jdhuff.c
-$ DoCompile jdphuff.c
-$ DoCompile jdmainct.c
-$ DoCompile jdcoefct.c
-$ DoCompile jdpostct.c
-$ DoCompile jddctmgr.c
-$ DoCompile jidctfst.c
-$ DoCompile jidctflt.c
-$ DoCompile jidctint.c
-$ DoCompile jidctred.c
-$ DoCompile jdsample.c
-$ DoCompile jdcolor.c
-$ DoCompile jquant1.c
-$ DoCompile jquant2.c
-$ DoCompile jdmerge.c
-$ DoCompile jcomapi.c
-$ DoCompile jutils.c
-$ DoCompile jerror.c
-$ DoCompile jmemmgr.c
-$ DoCompile jmemnobs.c
-$!
-$ Library /Create libjpeg.olb  jcapimin.obj,jcapistd.obj,jctrans.obj, -
-          jcparam.obj,jdatadst.obj,jcinit.obj,jcmaster.obj,jcmarker.obj, -
-          jcmainct.obj,jcprepct.obj,jccoefct.obj,jccolor.obj,jcsample.obj, -
-          jchuff.obj,jcphuff.obj,jcdctmgr.obj,jfdctfst.obj,jfdctflt.obj, -
-          jfdctint.obj,jdapimin.obj,jdapistd.obj,jdtrans.obj,jdatasrc.obj, -
-          jdmaster.obj,jdinput.obj,jdmarker.obj,jdhuff.obj,jdphuff.obj, -
-          jdmainct.obj,jdcoefct.obj,jdpostct.obj,jddctmgr.obj,jidctfst.obj, -
-          jidctflt.obj,jidctint.obj,jidctred.obj,jdsample.obj,jdcolor.obj, -
-          jquant1.obj,jquant2.obj,jdmerge.obj,jcomapi.obj,jutils.obj, -
-          jerror.obj,jmemmgr.obj,jmemnobs.obj
-$!
-$ DoCompile cjpeg.c
-$ DoCompile rdppm.c
-$ DoCompile rdgif.c
-$ DoCompile rdtarga.c
-$ DoCompile rdrle.c
-$ DoCompile rdbmp.c
-$ DoCompile rdswitch.c
-$ DoCompile cdjpeg.c
-$!
-$ Link /NoMap /Executable = cjpeg.exe  cjpeg.obj,rdppm.obj,rdgif.obj, -
-          rdtarga.obj,rdrle.obj,rdbmp.obj,rdswitch.obj,cdjpeg.obj,libjpeg.olb/Library'OPT'
-$!
-$ DoCompile djpeg.c
-$ DoCompile wrppm.c
-$ DoCompile wrgif.c
-$ DoCompile wrtarga.c
-$ DoCompile wrrle.c
-$ DoCompile wrbmp.c
-$ DoCompile rdcolmap.c
-$ DoCompile cdjpeg.c
-$!
-$ Link /NoMap /Executable = djpeg.exe  djpeg.obj,wrppm.obj,wrgif.obj, -
-          wrtarga.obj,wrrle.obj,wrbmp.obj,rdcolmap.obj,cdjpeg.obj,libjpeg.olb/Library'OPT'
-$!
-$ DoCompile jpegtran.c
-$ DoCompile rdswitch.c
-$ DoCompile cdjpeg.c
-$ DoCompile transupp.c
-$!
-$ Link /NoMap /Executable = jpegtran.exe  jpegtran.obj,rdswitch.obj, -
-          cdjpeg.obj,transupp.obj,libjpeg.olb/Library'OPT'
-$!
-$ DoCompile rdjpgcom.c
-$ Link /NoMap /Executable = rdjpgcom.exe  rdjpgcom.obj'OPT'
-$!
-$ DoCompile wrjpgcom.c
-$ Link /NoMap /Executable = wrjpgcom.exe  wrjpgcom.obj'OPT'
-$!
-$! Run the self-test
-$!
-$ mcr sys$disk:[]djpeg -dct int -ppm -outfile testout.ppm testorig.jpg
-$ mcr sys$disk:[]djpeg -dct int -bmp -colors 256 -outfile testout.bmp testorig.jpg
-$ mcr sys$disk:[]cjpeg -dct int      -outfile testout.jpg testimg.ppm
-$ mcr sys$disk:[]djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
-$ mcr sys$disk:[]cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
-$ mcr sys$disk:[]jpegtran -outfile testoutt.jpg testprog.jpg
-$ Backup /Compare/Log testimg.ppm testout.ppm
-$ Backup /Compare/Log testimg.bmp testout.bmp
-$ Backup /Compare/Log testimg.jpg testout.jpg
-$ Backup /Compare/Log testimg.ppm testoutp.ppm
-$ Backup /Compare/Log testimgp.jpg testoutp.jpg
-$ Backup /Compare/Log testorig.jpg testoutt.jpg
-$!
-$End:
-$   If Verify Then Set Verify
-$ Exit
diff --git a/makefile.wat b/makefile.wat
deleted file mode 100644
index d953e46..0000000
--- a/makefile.wat
+++ /dev/null
@@ -1,233 +0,0 @@
-# Makefile for Independent JPEG Group's software
-
-# This makefile is suitable for Watcom C/C++ 10.0 on MS-DOS (using
-# dos4g extender), OS/2, and Windows NT console mode.
-# Thanks to Janos Haide, jhaide@btrvtech.com.
-
-# Read installation instructions before saying "wmake" !!
-
-# Uncomment line for desired system
-SYSTEM=DOS
-#SYSTEM=OS2
-#SYSTEM=NT
-
-# The name of your C compiler:
-CC= wcl386
-
-# You may need to adjust these cc options:
-CFLAGS= -4r -ort -wx -zq -bt=$(SYSTEM)
-# Caution: avoid -ol or -ox; these generate bad code with 10.0 or 10.0a.
-# Generally, we recommend defining any configuration symbols in jconfig.h,
-# NOT via -D switches here.
-
-# Link-time cc options:
-!ifeq SYSTEM DOS
-LDFLAGS= -zq -l=dos4g
-!else ifeq SYSTEM OS2
-LDFLAGS= -zq -l=os2v2
-!else ifeq SYSTEM NT
-LDFLAGS= -zq -l=nt
-!endif
-
-# Put here the object file name for the correct system-dependent memory
-# manager file.  jmemnobs should work fine for dos4g or OS/2 environment.
-SYSDEPMEM= jmemnobs.obj
-
-# End of configurable options.
-
-
-# source files: JPEG library proper
-LIBSOURCES= jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c &
-        jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c &
-        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c &
-        jdatadst.c jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c &
-        jdinput.c jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c &
-        jdpostct.c jdsample.c jdtrans.c jerror.c jfdctflt.c jfdctfst.c &
-        jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c jquant1.c &
-        jquant2.c jutils.c jmemmgr.c
-# memmgr back ends: compile only one of these into a working library
-SYSDEPSOURCES= jmemansi.c jmemname.c jmemnobs.c jmemdos.c jmemmac.c
-# source files: cjpeg/djpeg/jpegtran applications, also rdjpgcom/wrjpgcom
-APPSOURCES= cjpeg.c djpeg.c jpegtran.c rdjpgcom.c wrjpgcom.c cdjpeg.c &
-        rdcolmap.c rdswitch.c transupp.c rdppm.c wrppm.c rdgif.c wrgif.c &
-        rdtarga.c wrtarga.c rdbmp.c wrbmp.c rdrle.c wrrle.c
-SOURCES= $(LIBSOURCES) $(SYSDEPSOURCES) $(APPSOURCES)
-# files included by source files
-INCLUDES= jchuff.h jdhuff.h jdct.h jerror.h jinclude.h jmemsys.h jmorecfg.h &
-        jpegint.h jpeglib.h jversion.h cdjpeg.h cderror.h transupp.h
-# documentation, test, and support files
-DOCS= README install.doc usage.doc cjpeg.1 djpeg.1 jpegtran.1 rdjpgcom.1 &
-        wrjpgcom.1 wizard.doc example.c libjpeg.doc structure.doc &
-        coderules.doc filelist.doc change.log
-MKFILES= configure makefile.cfg makefile.ansi makefile.unix makefile.bcc &
-        makefile.mc6 makefile.dj makefile.wat makefile.vc makelib.ds &
-        makeapps.ds makeproj.mac makcjpeg.st makdjpeg.st makljpeg.st &
-        maktjpeg.st makefile.manx makefile.sas makefile.mms makefile.vms &
-        makvms.opt
-CONFIGFILES= jconfig.cfg jconfig.bcc jconfig.mc6 jconfig.dj jconfig.wat &
-        jconfig.vc jconfig.mac jconfig.st jconfig.manx jconfig.sas &
-        jconfig.vms
-CONFIGUREFILES= config.guess config.sub install-sh ltconfig ltmain.sh
-OTHERFILES= jconfig.doc ckconfig.c ansi2knr.c ansi2knr.1 jmemdosa.asm
-TESTFILES= testorig.jpg testimg.ppm testimg.bmp testimg.jpg testprog.jpg &
-        testimgp.jpg
-DISTFILES= $(DOCS) $(MKFILES) $(CONFIGFILES) $(SOURCES) $(INCLUDES) &
-        $(CONFIGUREFILES) $(OTHERFILES) $(TESTFILES)
-# library object files common to compression and decompression
-COMOBJECTS= jcomapi.obj jutils.obj jerror.obj jmemmgr.obj $(SYSDEPMEM)
-# compression library object files
-CLIBOBJECTS= jcapimin.obj jcapistd.obj jctrans.obj jcparam.obj jdatadst.obj &
-        jcinit.obj jcmaster.obj jcmarker.obj jcmainct.obj jcprepct.obj &
-        jccoefct.obj jccolor.obj jcsample.obj jchuff.obj jcphuff.obj &
-        jcdctmgr.obj jfdctfst.obj jfdctflt.obj jfdctint.obj
-# decompression library object files
-DLIBOBJECTS= jdapimin.obj jdapistd.obj jdtrans.obj jdatasrc.obj &
-        jdmaster.obj jdinput.obj jdmarker.obj jdhuff.obj jdphuff.obj &
-        jdmainct.obj jdcoefct.obj jdpostct.obj jddctmgr.obj jidctfst.obj &
-        jidctflt.obj jidctint.obj jidctred.obj jdsample.obj jdcolor.obj &
-        jquant1.obj jquant2.obj jdmerge.obj
-# These objectfiles are included in libjpeg.lib
-LIBOBJECTS= $(CLIBOBJECTS) $(DLIBOBJECTS) $(COMOBJECTS)
-# object files for sample applications (excluding library files)
-COBJECTS= cjpeg.obj rdppm.obj rdgif.obj rdtarga.obj rdrle.obj rdbmp.obj &
-        rdswitch.obj cdjpeg.obj
-DOBJECTS= djpeg.obj wrppm.obj wrgif.obj wrtarga.obj wrrle.obj wrbmp.obj &
-        rdcolmap.obj cdjpeg.obj
-TROBJECTS= jpegtran.obj rdswitch.obj cdjpeg.obj transupp.obj
-
-
-all: libjpeg.lib cjpeg.exe djpeg.exe jpegtran.exe rdjpgcom.exe wrjpgcom.exe
-
-libjpeg.lib: $(LIBOBJECTS)
-	- del libjpeg.lib
-	* wlib -n libjpeg.lib $(LIBOBJECTS)
-
-cjpeg.exe: $(COBJECTS) libjpeg.lib
-	$(CC) $(LDFLAGS) $(COBJECTS) libjpeg.lib
-
-djpeg.exe: $(DOBJECTS) libjpeg.lib
-	$(CC) $(LDFLAGS) $(DOBJECTS) libjpeg.lib
-
-jpegtran.exe: $(TROBJECTS) libjpeg.lib
-	$(CC) $(LDFLAGS) $(TROBJECTS) libjpeg.lib
-
-rdjpgcom.exe: rdjpgcom.c
-	$(CC) $(CFLAGS) $(LDFLAGS) rdjpgcom.c
-
-wrjpgcom.exe: wrjpgcom.c
-	$(CC) $(CFLAGS) $(LDFLAGS) wrjpgcom.c
-
-.c.obj:
-	$(CC) $(CFLAGS) -c $<
-
-jconfig.h: jconfig.doc
-	echo You must prepare a system-dependent jconfig.h file.
-	echo Please read the installation directions in install.doc.
-	exit 1
-
-clean: .SYMBOLIC
-	- del *.obj
-	- del libjpeg.lib
-	- del cjpeg.exe
-	- del djpeg.exe
-	- del jpegtran.exe
-	- del rdjpgcom.exe
-	- del wrjpgcom.exe
-	- del testout*.*
-
-test: cjpeg.exe djpeg.exe jpegtran.exe  .SYMBOLIC
-	- del testout*.*
-	djpeg -dct int -ppm -outfile testout.ppm  testorig.jpg
-	djpeg -dct int -bmp -colors 256 -outfile testout.bmp  testorig.jpg
-	cjpeg -dct int -outfile testout.jpg  testimg.ppm
-	djpeg -dct int -ppm -outfile testoutp.ppm testprog.jpg
-	cjpeg -dct int -progressive -opt -outfile testoutp.jpg testimg.ppm
-	jpegtran -outfile testoutt.jpg testprog.jpg
-!ifeq SYSTEM DOS
-	fc /b testimg.ppm testout.ppm
-	fc /b testimg.bmp testout.bmp
-	fc /b testimg.jpg testout.jpg
-	fc /b testimg.ppm testoutp.ppm
-	fc /b testimgp.jpg testoutp.jpg
-	fc /b testorig.jpg testoutt.jpg
-!else
-	echo n > n.tmp
-	comp testimg.ppm testout.ppm < n.tmp
-	comp testimg.bmp testout.bmp < n.tmp
-	comp testimg.jpg testout.jpg < n.tmp
-	comp testimg.ppm testoutp.ppm < n.tmp
-	comp testimgp.jpg testoutp.jpg < n.tmp
-	comp testorig.jpg testoutt.jpg < n.tmp
-	del n.tmp
-!endif
-
-
-jcapimin.obj: jcapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcapistd.obj: jcapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccoefct.obj: jccoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jccolor.obj: jccolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcdctmgr.obj: jcdctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jchuff.obj: jchuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcinit.obj: jcinit.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmainct.obj: jcmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmarker.obj: jcmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcmaster.obj: jcmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcomapi.obj: jcomapi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcparam.obj: jcparam.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcphuff.obj: jcphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jchuff.h
-jcprepct.obj: jcprepct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jcsample.obj: jcsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jctrans.obj: jctrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapimin.obj: jdapimin.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdapistd.obj: jdapistd.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdatadst.obj: jdatadst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdatasrc.obj: jdatasrc.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h
-jdcoefct.obj: jdcoefct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdcolor.obj: jdcolor.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jddctmgr.obj: jddctmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jdhuff.obj: jdhuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdinput.obj: jdinput.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmainct.obj: jdmainct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmarker.obj: jdmarker.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmaster.obj: jdmaster.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdmerge.obj: jdmerge.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdphuff.obj: jdphuff.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdhuff.h
-jdpostct.obj: jdpostct.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdsample.obj: jdsample.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jdtrans.obj: jdtrans.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jerror.obj: jerror.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jversion.h jerror.h
-jfdctflt.obj: jfdctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctfst.obj: jfdctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jfdctint.obj: jfdctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctflt.obj: jidctflt.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctfst.obj: jidctfst.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctint.obj: jidctint.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jidctred.obj: jidctred.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jdct.h
-jquant1.obj: jquant1.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jquant2.obj: jquant2.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jutils.obj: jutils.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h
-jmemmgr.obj: jmemmgr.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemansi.obj: jmemansi.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemname.obj: jmemname.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemnobs.obj: jmemnobs.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemdos.obj: jmemdos.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-jmemmac.obj: jmemmac.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h jmemsys.h
-cjpeg.obj: cjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-djpeg.obj: djpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h jversion.h
-jpegtran.obj: jpegtran.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h transupp.h jversion.h
-rdjpgcom.obj: rdjpgcom.c jinclude.h jconfig.h
-wrjpgcom.obj: wrjpgcom.c jinclude.h jconfig.h
-cdjpeg.obj: cdjpeg.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdcolmap.obj: rdcolmap.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdswitch.obj: rdswitch.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-transupp.obj: transupp.c jinclude.h jconfig.h jpeglib.h jmorecfg.h jpegint.h jerror.h transupp.h
-rdppm.obj: rdppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrppm.obj: wrppm.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdgif.obj: rdgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrgif.obj: wrgif.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdtarga.obj: rdtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrtarga.obj: wrtarga.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdbmp.obj: rdbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrbmp.obj: wrbmp.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-rdrle.obj: rdrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
-wrrle.obj: wrrle.c cdjpeg.h jinclude.h jconfig.h jpeglib.h jmorecfg.h jerror.h cderror.h
diff --git a/makelib.ds b/makelib.ds
deleted file mode 100644
index c7ad36d..0000000
--- a/makelib.ds
+++ /dev/null
@@ -1,1046 +0,0 @@
-# Microsoft Developer Studio Generated NMAKE File, Format Version 4.20
-# ** DO NOT EDIT **
-
-# TARGTYPE "Win32 (x86) Static Library" 0x0104
-
-!IF "$(CFG)" == ""
-CFG=jpeg - Win32
-!MESSAGE No configuration specified.  Defaulting to jpeg - Win32.
-!ENDIF 
-
-!IF "$(CFG)" != "jpeg - Win32"
-!MESSAGE Invalid configuration "$(CFG)" specified.
-!MESSAGE You can specify a configuration when running NMAKE on this makefile
-!MESSAGE by defining the macro CFG on the command line.  For example:
-!MESSAGE 
-!MESSAGE NMAKE /f "jpeg.mak" CFG="jpeg - Win32"
-!MESSAGE 
-!MESSAGE Possible choices for configuration are:
-!MESSAGE 
-!MESSAGE "jpeg - Win32" (based on "Win32 (x86) Static Library")
-!MESSAGE 
-!ERROR An invalid configuration is specified.
-!ENDIF 
-
-!IF "$(OS)" == "Windows_NT"
-NULL=
-!ELSE 
-NULL=nul
-!ENDIF 
-################################################################################
-# Begin Project
-# PROP Target_Last_Scanned "jpeg - Win32"
-CPP=cl.exe
-
-!IF  "$(CFG)" == "jpeg - Win32"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 0
-# PROP BASE Output_Dir "Release"
-# PROP BASE Intermediate_Dir "Release"
-# PROP BASE Target_Dir ""
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 0
-# PROP Output_Dir "Release"
-# PROP Intermediate_Dir "Release"
-# PROP Target_Dir ""
-OUTDIR=.\Release
-INTDIR=.\Release
-
-ALL : "$(OUTDIR)\jpeg.lib"
-
-CLEAN : 
-	-@erase "$(INTDIR)\jcapimin.obj"
-	-@erase "$(INTDIR)\jcapistd.obj"
-	-@erase "$(INTDIR)\jctrans.obj"
-	-@erase "$(INTDIR)\jcparam.obj"
-	-@erase "$(INTDIR)\jdatadst.obj"
-	-@erase "$(INTDIR)\jcinit.obj"
-	-@erase "$(INTDIR)\jcmaster.obj"
-	-@erase "$(INTDIR)\jcmarker.obj"
-	-@erase "$(INTDIR)\jcmainct.obj"
-	-@erase "$(INTDIR)\jcprepct.obj"
-	-@erase "$(INTDIR)\jccoefct.obj"
-	-@erase "$(INTDIR)\jccolor.obj"
-	-@erase "$(INTDIR)\jcsample.obj"
-	-@erase "$(INTDIR)\jchuff.obj"
-	-@erase "$(INTDIR)\jcphuff.obj"
-	-@erase "$(INTDIR)\jcdctmgr.obj"
-	-@erase "$(INTDIR)\jfdctfst.obj"
-	-@erase "$(INTDIR)\jfdctflt.obj"
-	-@erase "$(INTDIR)\jfdctint.obj"
-	-@erase "$(INTDIR)\jdapimin.obj"
-	-@erase "$(INTDIR)\jdapistd.obj"
-	-@erase "$(INTDIR)\jdtrans.obj"
-	-@erase "$(INTDIR)\jdatasrc.obj"
-	-@erase "$(INTDIR)\jdmaster.obj"
-	-@erase "$(INTDIR)\jdinput.obj"
-	-@erase "$(INTDIR)\jdmarker.obj"
-	-@erase "$(INTDIR)\jdhuff.obj"
-	-@erase "$(INTDIR)\jdphuff.obj"
-	-@erase "$(INTDIR)\jdmainct.obj"
-	-@erase "$(INTDIR)\jdcoefct.obj"
-	-@erase "$(INTDIR)\jdpostct.obj"
-	-@erase "$(INTDIR)\jddctmgr.obj"
-	-@erase "$(INTDIR)\jidctfst.obj"
-	-@erase "$(INTDIR)\jidctflt.obj"
-	-@erase "$(INTDIR)\jidctint.obj"
-	-@erase "$(INTDIR)\jidctred.obj"
-	-@erase "$(INTDIR)\jdsample.obj"
-	-@erase "$(INTDIR)\jdcolor.obj"
-	-@erase "$(INTDIR)\jquant1.obj"
-	-@erase "$(INTDIR)\jquant2.obj"
-	-@erase "$(INTDIR)\jdmerge.obj"
-	-@erase "$(INTDIR)\jcomapi.obj"
-	-@erase "$(INTDIR)\jutils.obj"
-	-@erase "$(INTDIR)\jerror.obj"
-	-@erase "$(INTDIR)\jmemmgr.obj"
-	-@erase "$(INTDIR)\jmemnobs.obj"
-	-@erase "$(OUTDIR)\jpeg.lib"
-
-"$(OUTDIR)" :
-    if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)"
-
-# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /c
-# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /c
-CPP_PROJ=/nologo /ML /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS"\
- /Fp"$(INTDIR)/jpeg.pch" /YX /Fo"$(INTDIR)/" /c 
-CPP_OBJS=.\Release/
-CPP_SBRS=.\.
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo
-# ADD BSC32 /nologo
-BSC32_FLAGS=/nologo /o"$(OUTDIR)/jpeg.bsc" 
-BSC32_SBRS= \
-	
-LIB32=link.exe -lib
-# ADD BASE LIB32 /nologo
-# ADD LIB32 /nologo
-LIB32_FLAGS=/nologo /out:"$(OUTDIR)/jpeg.lib" 
-LIB32_OBJS= \
-	"$(INTDIR)\jcapimin.obj" \
-	"$(INTDIR)\jcapistd.obj" \
-	"$(INTDIR)\jctrans.obj" \
-	"$(INTDIR)\jcparam.obj" \
-	"$(INTDIR)\jdatadst.obj" \
-	"$(INTDIR)\jcinit.obj" \
-	"$(INTDIR)\jcmaster.obj" \
-	"$(INTDIR)\jcmarker.obj" \
-	"$(INTDIR)\jcmainct.obj" \
-	"$(INTDIR)\jcprepct.obj" \
-	"$(INTDIR)\jccoefct.obj" \
-	"$(INTDIR)\jccolor.obj" \
-	"$(INTDIR)\jcsample.obj" \
-	"$(INTDIR)\jchuff.obj" \
-	"$(INTDIR)\jcphuff.obj" \
-	"$(INTDIR)\jcdctmgr.obj" \
-	"$(INTDIR)\jfdctfst.obj" \
-	"$(INTDIR)\jfdctflt.obj" \
-	"$(INTDIR)\jfdctint.obj" \
-	"$(INTDIR)\jdapimin.obj" \
-	"$(INTDIR)\jdapistd.obj" \
-	"$(INTDIR)\jdtrans.obj" \
-	"$(INTDIR)\jdatasrc.obj" \
-	"$(INTDIR)\jdmaster.obj" \
-	"$(INTDIR)\jdinput.obj" \
-	"$(INTDIR)\jdmarker.obj" \
-	"$(INTDIR)\jdhuff.obj" \
-	"$(INTDIR)\jdphuff.obj" \
-	"$(INTDIR)\jdmainct.obj" \
-	"$(INTDIR)\jdcoefct.obj" \
-	"$(INTDIR)\jdpostct.obj" \
-	"$(INTDIR)\jddctmgr.obj" \
-	"$(INTDIR)\jidctfst.obj" \
-	"$(INTDIR)\jidctflt.obj" \
-	"$(INTDIR)\jidctint.obj" \
-	"$(INTDIR)\jidctred.obj" \
-	"$(INTDIR)\jdsample.obj" \
-	"$(INTDIR)\jdcolor.obj" \
-	"$(INTDIR)\jquant1.obj" \
-	"$(INTDIR)\jquant2.obj" \
-	"$(INTDIR)\jdmerge.obj" \
-	"$(INTDIR)\jcomapi.obj" \
-	"$(INTDIR)\jutils.obj" \
-	"$(INTDIR)\jerror.obj" \
-	"$(INTDIR)\jmemmgr.obj" \
-	"$(INTDIR)\jmemnobs.obj"
-
-"$(OUTDIR)\jpeg.lib" : "$(OUTDIR)" $(DEF_FILE) $(LIB32_OBJS)
-    $(LIB32) @<<
-  $(LIB32_FLAGS) $(DEF_FLAGS) $(LIB32_OBJS)
-<<
-
-!ENDIF 
-
-.c{$(CPP_OBJS)}.obj:
-   $(CPP) $(CPP_PROJ) $<  
-
-.cpp{$(CPP_OBJS)}.obj:
-   $(CPP) $(CPP_PROJ) $<  
-
-.cxx{$(CPP_OBJS)}.obj:
-   $(CPP) $(CPP_PROJ) $<  
-
-.c{$(CPP_SBRS)}.sbr:
-   $(CPP) $(CPP_PROJ) $<  
-
-.cpp{$(CPP_SBRS)}.sbr:
-   $(CPP) $(CPP_PROJ) $<  
-
-.cxx{$(CPP_SBRS)}.sbr:
-   $(CPP) $(CPP_PROJ) $<  
-
-################################################################################
-# Begin Target
-
-# Name "jpeg - Win32"
-
-!IF  "$(CFG)" == "jpeg - Win32"
-
-!ENDIF 
-
-################################################################################
-# Begin Source File
-
-SOURCE="jcapimin.c"
-DEP_CPP_JCAPI=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jcapimin.obj" : $(SOURCE) $(DEP_CPP_JCAPI) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jcapistd.c"
-DEP_CPP_JCAPIS=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jcapistd.obj" : $(SOURCE) $(DEP_CPP_JCAPIS) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jccoefct.c"
-DEP_CPP_JCCOE=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jccoefct.obj" : $(SOURCE) $(DEP_CPP_JCCOE) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jccolor.c"
-DEP_CPP_JCCOL=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jccolor.obj" : $(SOURCE) $(DEP_CPP_JCCOL) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jcdctmgr.c"
-DEP_CPP_JCDCT=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jdct.h"\
-	
-
-"$(INTDIR)\jcdctmgr.obj" : $(SOURCE) $(DEP_CPP_JCDCT) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jchuff.c"
-DEP_CPP_JCHUF=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jchuff.h"\
-	
-
-"$(INTDIR)\jchuff.obj" : $(SOURCE) $(DEP_CPP_JCHUF) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jcinit.c"
-DEP_CPP_JCINI=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jcinit.obj" : $(SOURCE) $(DEP_CPP_JCINI) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jcmainct.c"
-DEP_CPP_JCMAI=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jcmainct.obj" : $(SOURCE) $(DEP_CPP_JCMAI) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jcmarker.c"
-DEP_CPP_JCMAR=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jcmarker.obj" : $(SOURCE) $(DEP_CPP_JCMAR) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jcmaster.c"
-DEP_CPP_JCMAS=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jcmaster.obj" : $(SOURCE) $(DEP_CPP_JCMAS) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jcomapi.c"
-DEP_CPP_JCOMA=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jcomapi.obj" : $(SOURCE) $(DEP_CPP_JCOMA) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jcparam.c"
-DEP_CPP_JCPAR=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jcparam.obj" : $(SOURCE) $(DEP_CPP_JCPAR) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jcphuff.c"
-DEP_CPP_JCPHU=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jchuff.h"\
-	
-
-"$(INTDIR)\jcphuff.obj" : $(SOURCE) $(DEP_CPP_JCPHU) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jcprepct.c"
-DEP_CPP_JCPRE=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jcprepct.obj" : $(SOURCE) $(DEP_CPP_JCPRE) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jcsample.c"
-DEP_CPP_JCSAM=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jcsample.obj" : $(SOURCE) $(DEP_CPP_JCSAM) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jctrans.c"
-DEP_CPP_JCTRA=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jctrans.obj" : $(SOURCE) $(DEP_CPP_JCTRA) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdapimin.c"
-DEP_CPP_JDAPI=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdapimin.obj" : $(SOURCE) $(DEP_CPP_JDAPI) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdapistd.c"
-DEP_CPP_JDAPIS=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdapistd.obj" : $(SOURCE) $(DEP_CPP_JDAPIS) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdatadst.c"
-DEP_CPP_JDATA=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdatadst.obj" : $(SOURCE) $(DEP_CPP_JDATA) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdatasrc.c"
-DEP_CPP_JDATAS=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdatasrc.obj" : $(SOURCE) $(DEP_CPP_JDATAS) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdcoefct.c"
-DEP_CPP_JDCOE=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdcoefct.obj" : $(SOURCE) $(DEP_CPP_JDCOE) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdcolor.c"
-DEP_CPP_JDCOL=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdcolor.obj" : $(SOURCE) $(DEP_CPP_JDCOL) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jddctmgr.c"
-DEP_CPP_JDDCT=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jdct.h"\
-	
-
-"$(INTDIR)\jddctmgr.obj" : $(SOURCE) $(DEP_CPP_JDDCT) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdhuff.c"
-DEP_CPP_JDHUF=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jdhuff.h"\
-	
-
-"$(INTDIR)\jdhuff.obj" : $(SOURCE) $(DEP_CPP_JDHUF) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdinput.c"
-DEP_CPP_JDINP=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdinput.obj" : $(SOURCE) $(DEP_CPP_JDINP) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdmainct.c"
-DEP_CPP_JDMAI=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdmainct.obj" : $(SOURCE) $(DEP_CPP_JDMAI) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdmarker.c"
-DEP_CPP_JDMAR=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdmarker.obj" : $(SOURCE) $(DEP_CPP_JDMAR) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdmaster.c"
-DEP_CPP_JDMAS=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdmaster.obj" : $(SOURCE) $(DEP_CPP_JDMAS) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdmerge.c"
-DEP_CPP_JDMER=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdmerge.obj" : $(SOURCE) $(DEP_CPP_JDMER) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdphuff.c"
-DEP_CPP_JDPHU=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jdhuff.h"\
-	
-
-"$(INTDIR)\jdphuff.obj" : $(SOURCE) $(DEP_CPP_JDPHU) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdpostct.c"
-DEP_CPP_JDPOS=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdpostct.obj" : $(SOURCE) $(DEP_CPP_JDPOS) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdsample.c"
-DEP_CPP_JDSAM=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdsample.obj" : $(SOURCE) $(DEP_CPP_JDSAM) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jdtrans.c"
-DEP_CPP_JDTRA=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jdtrans.obj" : $(SOURCE) $(DEP_CPP_JDTRA) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jerror.c"
-DEP_CPP_JERRO=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jversion.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jerror.obj" : $(SOURCE) $(DEP_CPP_JERRO) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jfdctflt.c"
-DEP_CPP_JFDCT=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jdct.h"\
-	
-
-"$(INTDIR)\jfdctflt.obj" : $(SOURCE) $(DEP_CPP_JFDCT) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jfdctfst.c"
-DEP_CPP_JFDCTF=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jdct.h"\
-	
-
-"$(INTDIR)\jfdctfst.obj" : $(SOURCE) $(DEP_CPP_JFDCTF) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jfdctint.c"
-DEP_CPP_JFDCTI=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jdct.h"\
-	
-
-"$(INTDIR)\jfdctint.obj" : $(SOURCE) $(DEP_CPP_JFDCTI) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jidctflt.c"
-DEP_CPP_JIDCT=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jdct.h"\
-	
-
-"$(INTDIR)\jidctflt.obj" : $(SOURCE) $(DEP_CPP_JIDCT) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jidctfst.c"
-DEP_CPP_JIDCTF=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jdct.h"\
-	
-
-"$(INTDIR)\jidctfst.obj" : $(SOURCE) $(DEP_CPP_JIDCTF) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jidctint.c"
-DEP_CPP_JIDCTI=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jdct.h"\
-	
-
-"$(INTDIR)\jidctint.obj" : $(SOURCE) $(DEP_CPP_JIDCTI) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jidctred.c"
-DEP_CPP_JIDCTR=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jdct.h"\
-	
-
-"$(INTDIR)\jidctred.obj" : $(SOURCE) $(DEP_CPP_JIDCTR) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jquant1.c"
-DEP_CPP_JQUAN=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jquant1.obj" : $(SOURCE) $(DEP_CPP_JQUAN) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jquant2.c"
-DEP_CPP_JQUANT=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jquant2.obj" : $(SOURCE) $(DEP_CPP_JQUANT) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jutils.c"
-DEP_CPP_JUTIL=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	
-
-"$(INTDIR)\jutils.obj" : $(SOURCE) $(DEP_CPP_JUTIL) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jmemmgr.c"
-DEP_CPP_JMEMM=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jmemsys.h"\
-	
-
-"$(INTDIR)\jmemmgr.obj" : $(SOURCE) $(DEP_CPP_JMEMM) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-################################################################################
-# Begin Source File
-
-SOURCE="jmemnobs.c"
-DEP_CPP_JMEMN=\
-	"jinclude.h"\
-	"jconfig.h"\
-	"jpeglib.h"\
-	"jmorecfg.h"\
-	"jpegint.h"\
-	"jerror.h"\
-	"jmemsys.h"\
-	
-
-"$(INTDIR)\jmemnobs.obj" : $(SOURCE) $(DEP_CPP_JMEMN) "$(INTDIR)"
-   $(CPP) $(CPP_PROJ) $(SOURCE)
-
-
-# End Source File
-# End Target
-# End Project
-################################################################################
-
diff --git a/makeproj.mac b/makeproj.mac
deleted file mode 100644
index ed277c8..0000000
--- a/makeproj.mac
+++ /dev/null
@@ -1,213 +0,0 @@
---
--- makeproj.mac
---
--- This AppleScript builds Code Warrior PRO Release 2 project files for the
--- libjpeg library as well as the test programs 'cjpeg', 'djpeg', 'jpegtran'.
--- (We'd distribute real project files, except they're not text
--- and would create maintenance headaches.)
---
--- The script then compiles and links the library and the test programs.
--- NOTE: if you haven't already created a 'jconfig.h' file, the script
--- automatically copies 'jconfig.mac' to 'jconfig.h'.
---
--- To use this script, you must have AppleScript 1.1 or later installed
--- and a suitable AppleScript editor like Script Editor or Script Debugger
--- (http://www.latenightsw.com). Open this file with your AppleScript
--- editor and execute the "run" command to build the projects.
---
--- Thanks to Dan Sears and Don Agro for this script.
--- Questions about this script can be addressed to dogpark@interlog.com
---
-
-on run
-
-	choose folder with prompt ">>> Select IJG source folder <<<"
-	set ijg_folder to result
-
-	choose folder with prompt ">>> Select MetroWerks folder <<<"
-	set cw_folder to result
-
-	-- if jconfig.h doesn't already exist, copy jconfig.mac
-
-	tell application "Finder"
-		if not (exists file "jconfig.h" of ijg_folder) then
-			duplicate {file "jconfig.mac" of folder ijg_folder}
-			select file "jconfig.mac copy" of folder ijg_folder
-			set name of selection to "jconfig.h"
-		end if
-	end tell
-
-	tell application "CodeWarrior IDE 2.1"
-	  with timeout of 10000 seconds
-
-		-- create libjpeg project
-
-		activate
-		Create Project (ijg_folder as string) & "libjpeg.proj"
-		Set Preferences of panel "Target Settings" to {Target Name:"libjpeg"}
-		Set Preferences of panel "PPC Project" to {File Name:"libjpeg"}
-		Set Preferences of panel "Target Settings" to {Linker:"MacOS PPC Linker"}
-		Set Preferences of panel "PPC Project" to {Project Type:library}
-		Set Preferences of panel "C/C++ Compiler" to {ANSI Strict:true}
-		Set Preferences of panel "C/C++ Compiler" to {Enums Always Ints:true}
-		Set Preferences of panel "PPC Codegen" to {Struct Alignment:PowerPC}
-		Set Preferences of panel "PPC Linker" to {Generate SYM File:false}
-
-		Add Files (ijg_folder as string) & "jcapimin.c" To Segment 1
-		Add Files (ijg_folder as string) & "jcapistd.c" To Segment 1
-		Add Files (ijg_folder as string) & "jctrans.c" To Segment 1
-		Add Files (ijg_folder as string) & "jcparam.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdatadst.c" To Segment 1
-		Add Files (ijg_folder as string) & "jcinit.c" To Segment 1
-		Add Files (ijg_folder as string) & "jcmaster.c" To Segment 1
-		Add Files (ijg_folder as string) & "jcmarker.c" To Segment 1
-		Add Files (ijg_folder as string) & "jcmainct.c" To Segment 1
-		Add Files (ijg_folder as string) & "jcprepct.c" To Segment 1
-		Add Files (ijg_folder as string) & "jccoefct.c" To Segment 1
-		Add Files (ijg_folder as string) & "jccolor.c" To Segment 1
-		Add Files (ijg_folder as string) & "jcsample.c" To Segment 1
-		Add Files (ijg_folder as string) & "jchuff.c" To Segment 1
-		Add Files (ijg_folder as string) & "jcphuff.c" To Segment 1
-		Add Files (ijg_folder as string) & "jcdctmgr.c" To Segment 1
-		Add Files (ijg_folder as string) & "jfdctfst.c" To Segment 1
-		Add Files (ijg_folder as string) & "jfdctflt.c" To Segment 1
-		Add Files (ijg_folder as string) & "jfdctint.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdapimin.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdapistd.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdtrans.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdatasrc.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdmaster.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdinput.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdmarker.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdhuff.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdphuff.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdmainct.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdcoefct.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdpostct.c" To Segment 1
-		Add Files (ijg_folder as string) & "jddctmgr.c" To Segment 1
-		Add Files (ijg_folder as string) & "jidctfst.c" To Segment 1
-		Add Files (ijg_folder as string) & "jidctflt.c" To Segment 1
-		Add Files (ijg_folder as string) & "jidctint.c" To Segment 1
-		Add Files (ijg_folder as string) & "jidctred.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdsample.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdcolor.c" To Segment 1
-		Add Files (ijg_folder as string) & "jquant1.c" To Segment 1
-		Add Files (ijg_folder as string) & "jquant2.c" To Segment 1
-		Add Files (ijg_folder as string) & "jdmerge.c" To Segment 1
-		Add Files (ijg_folder as string) & "jcomapi.c" To Segment 1
-		Add Files (ijg_folder as string) & "jutils.c" To Segment 1
-		Add Files (ijg_folder as string) & "jerror.c" To Segment 1
-		Add Files (ijg_folder as string) & "jmemmgr.c" To Segment 1
-		Add Files (ijg_folder as string) & "jmemmac.c" To Segment 1
-
-		-- compile and link the library
-
-		Make Project
-		Close Project
-
-		-- create cjpeg project
-
-		activate
-		Create Project (ijg_folder as string) & "cjpeg.proj"
-		Set Preferences of panel "Target Settings" to {Target Name:"cjpeg"}
-		Set Preferences of panel "PPC Project" to {File Name:"cjpeg"}
-		Set Preferences of panel "Target Settings" to {Linker:"MacOS PPC Linker"}
-		Set Preferences of panel "C/C++ Compiler" to {ANSI Strict:true}
-		Set Preferences of panel "C/C++ Compiler" to {Enums Always Ints:true}
-		Set Preferences of panel "PPC Codegen" to {Struct Alignment:PowerPC}
-		Set Preferences of panel "PPC Linker" to {Generate SYM File:false}
-
-		Add Files (ijg_folder as string) & "cjpeg.c" To Segment 1
-		Add Files (ijg_folder as string) & "rdppm.c" To Segment 1
-		Add Files (ijg_folder as string) & "rdgif.c" To Segment 1
-		Add Files (ijg_folder as string) & "rdtarga.c" To Segment 1
-		Add Files (ijg_folder as string) & "rdrle.c" To Segment 1
-		Add Files (ijg_folder as string) & "rdbmp.c" To Segment 1
-		Add Files (ijg_folder as string) & "rdswitch.c" To Segment 1
-		Add Files (ijg_folder as string) & "cdjpeg.c" To Segment 1
-
-		Add Files (ijg_folder as string) & "libjpeg" To Segment 2
-
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:Metrowerks Standard Library:MSL C:Bin:MSL C.PPC.Lib" To Segment 3
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:Metrowerks Standard Library:MSL C:Bin:MSL SIOUX.PPC.Lib" To Segment 3
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:MacOS Support:Libraries:Runtime:Runtime PPC:MSL RuntimePPC.Lib" To Segment 3
-
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:MacOS Support:Libraries:MacOS Common:InterfaceLib" To Segment 4
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:MacOS Support:Libraries:MacOS Common:MathLib" To Segment 4
-
-		-- compile and link cjpeg
-
-		Make Project
-		Close Project
-
-		-- create djpeg project
-
-		activate
-		Create Project (ijg_folder as string) & "djpeg.proj"
-		Set Preferences of panel "Target Settings" to {Target Name:"djpeg"}
-		Set Preferences of panel "PPC Project" to {File Name:"djpeg"}
-		Set Preferences of panel "Target Settings" to {Linker:"MacOS PPC Linker"}
-		Set Preferences of panel "C/C++ Compiler" to {ANSI Strict:true}
-		Set Preferences of panel "C/C++ Compiler" to {Enums Always Ints:true}
-		Set Preferences of panel "PPC Codegen" to {Struct Alignment:PowerPC}
-		Set Preferences of panel "PPC Linker" to {Generate SYM File:false}
-
-		Add Files (ijg_folder as string) & "djpeg.c" To Segment 1
-		Add Files (ijg_folder as string) & "wrppm.c" To Segment 1
-		Add Files (ijg_folder as string) & "wrgif.c" To Segment 1
-		Add Files (ijg_folder as string) & "wrtarga.c" To Segment 1
-		Add Files (ijg_folder as string) & "wrrle.c" To Segment 1
-		Add Files (ijg_folder as string) & "wrbmp.c" To Segment 1
-		Add Files (ijg_folder as string) & "rdcolmap.c" To Segment 1
-		Add Files (ijg_folder as string) & "cdjpeg.c" To Segment 1
-
-		Add Files (ijg_folder as string) & "libjpeg" To Segment 2
-
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:Metrowerks Standard Library:MSL C:Bin:MSL C.PPC.Lib" To Segment 3
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:Metrowerks Standard Library:MSL C:Bin:MSL SIOUX.PPC.Lib" To Segment 3
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:MacOS Support:Libraries:Runtime:Runtime PPC:MSL RuntimePPC.Lib" To Segment 3
-
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:MacOS Support:Libraries:MacOS Common:InterfaceLib" To Segment 4
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:MacOS Support:Libraries:MacOS Common:MathLib" To Segment 4
-
-		-- compile and link djpeg
-
-		Make Project
-		Close Project
-
-		-- create jpegtran project
-
-		activate
-		Create Project (ijg_folder as string) & "jpegtran.proj"
-		Set Preferences of panel "Target Settings" to {Target Name:"jpegtran"}
-		Set Preferences of panel "PPC Project" to {File Name:"jpegtran"}
-		Set Preferences of panel "Target Settings" to {Linker:"MacOS PPC Linker"}
-		Set Preferences of panel "C/C++ Compiler" to {ANSI Strict:true}
-		Set Preferences of panel "C/C++ Compiler" to {Enums Always Ints:true}
-		Set Preferences of panel "PPC Codegen" to {Struct Alignment:PowerPC}
-		Set Preferences of panel "PPC Linker" to {Generate SYM File:false}
-
-		Add Files (ijg_folder as string) & "jpegtran.c" To Segment 1
-		Add Files (ijg_folder as string) & "rdswitch.c" To Segment 1
-		Add Files (ijg_folder as string) & "cdjpeg.c" To Segment 1
-		Add Files (ijg_folder as string) & "transupp.c" To Segment 1
-
-		Add Files (ijg_folder as string) & "libjpeg" To Segment 2
-
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:Metrowerks Standard Library:MSL C:Bin:MSL C.PPC.Lib" To Segment 3
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:Metrowerks Standard Library:MSL C:Bin:MSL SIOUX.PPC.Lib" To Segment 3
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:MacOS Support:Libraries:Runtime:Runtime PPC:MSL RuntimePPC.Lib" To Segment 3
-
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:MacOS Support:Libraries:MacOS Common:InterfaceLib" To Segment 4
-		Add Files (cw_folder as string) & "Metrowerks CodeWarrior:MacOS Support:Libraries:MacOS Common:MathLib" To Segment 4
-
-		-- compile and link jpegtran
-
-		Make Project
-		Close Project
-
-		quit
-
-	  end timeout
-	end tell
-end run
diff --git a/makljpeg.st b/makljpeg.st
deleted file mode 100644
index 813493e..0000000
--- a/makljpeg.st
+++ /dev/null
@@ -1,70 +0,0 @@
-; Project file for Independent JPEG Group's software
-;
-; This project file is for Atari ST/STE/TT systems using Pure C or Turbo C.
-; Thanks to Frank Moehle (Frank.Moehle@arbi.informatik.uni-oldenburg.de),
-; Dr. B. Setzepfandt (bernd@gina.uni-muenster.de),
-; and Guido Vollbeding (guivol@esc.de).
-;
-; To use this file, rename it to libjpeg.prj.
-; Read installation instructions before trying to make the program!
-;
-;
-;      * * * Output file * * *
-libjpeg.lib
-;
-; * * * COMPILER OPTIONS * * *  
-.C[-P]        ; absolute calls
-.C[-M]        ; and no string merging, folks
-.C[-w-cln]    ; no "constant is long" warnings
-.C[-w-par]    ; no "parameter xxxx unused"
-.C[-w-rch]    ; no "unreachable code"
-.C[-wsig]     ; warn if significant digits may be lost
-.L[-J]        ; link new Obj-format (so we get a library)
-=
-; * * * * List of modules * * * * 
-jcapimin.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jcapistd.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jccoefct.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jccolor.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jcdctmgr.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jdct.h)
-jchuff.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jchuff.h)
-jcinit.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jcmainct.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jcmarker.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jcmaster.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jcomapi.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jcparam.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jcphuff.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jchuff.h)
-jcprepct.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jcsample.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jctrans.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jdapimin.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jdapistd.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jdatadst.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h)
-jdatasrc.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h)
-jdcoefct.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jdcolor.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jddctmgr.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jdct.h)
-jdhuff.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jdhuff.h)
-jdinput.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jdmainct.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jdmarker.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jdmaster.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jdmerge.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jdphuff.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jdhuff.h)
-jdpostct.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jdsample.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jdtrans.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jerror.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jversion.h,jerror.h)
-jfdctflt.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jdct.h)
-jfdctfst.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jdct.h)
-jfdctint.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jdct.h)
-jidctflt.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jdct.h)
-jidctfst.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jdct.h)
-jidctint.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jdct.h)
-jidctred.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jdct.h)
-jquant1.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jquant2.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jutils.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h)
-jmemmgr.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jmemsys.h)
-jmemansi.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,jmemsys.h)
diff --git a/maktjpeg.st b/maktjpeg.st
deleted file mode 100644
index 31f4d16..0000000
--- a/maktjpeg.st
+++ /dev/null
@@ -1,32 +0,0 @@
-; Project file for Independent JPEG Group's software
-;
-; This project file is for Atari ST/STE/TT systems using Pure C or Turbo C.
-; Thanks to Frank Moehle (Frank.Moehle@arbi.informatik.uni-oldenburg.de),
-; Dr. B. Setzepfandt (bernd@gina.uni-muenster.de),
-; and Guido Vollbeding (guivol@esc.de).
-;
-; To use this file, rename it to jpegtran.prj.
-; If you are using Turbo C, change filenames beginning with "pc..." to "tc..."
-; Read installation instructions before trying to make the program!
-;
-;
-;      * * * Output file * * *
-jpegtran.ttp
-;
-; * * * COMPILER OPTIONS * * *  
-.C[-P]        ; absolute calls
-.C[-M]        ; and no string merging, folks
-.C[-w-cln]    ; no "constant is long" warnings
-.C[-w-par]    ; no "parameter xxxx unused"
-.C[-w-rch]    ; no "unreachable code"
-.C[-wsig]     ; warn if significant digits may be lost
-=
-; * * * * List of modules * * * * 
-pcstart.o
-jpegtran.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h,transupp.h,jversion.h)
-cdjpeg.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-rdswitch.c	(cdjpeg.h,jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jerror.h,cderror.h)
-transupp.c	(jinclude.h,jconfig.h,jpeglib.h,jmorecfg.h,jpegint.h,jerror.h,transupp.h)
-libjpeg.lib        ; built by libjpeg.prj
-pcstdlib.lib       ; standard library
-pcextlib.lib       ; extended library
diff --git a/makvms.opt b/makvms.opt
deleted file mode 100644
index 675e8fe..0000000
--- a/makvms.opt
+++ /dev/null
@@ -1,4 +0,0 @@
-! A pointer to the VAX/VMS C Run-Time Shareable Library.
-! This file is needed by makefile.mms and makefile.vms,
-! but only for the older VAX C compiler.  DEC C does not need it.
-Sys$Library:VAXCRTL.EXE /Share
diff --git a/md5/Makefile.am b/md5/Makefile.am
new file mode 100644
index 0000000..b36f019
--- /dev/null
+++ b/md5/Makefile.am
@@ -0,0 +1,4 @@
+noinst_PROGRAMS = md5cmp
+
+md5cmp_SOURCES = md5cmp.c md5.c md5hl.c md5.h
+md5cmp_CFLAGS = -I$(srcdir)
diff --git a/md5/md5.c b/md5/md5.c
new file mode 100644
index 0000000..7193e95
--- /dev/null
+++ b/md5/md5.c
@@ -0,0 +1,322 @@
+/*
+ * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
+ *
+ * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+ * rights reserved.
+ *
+ * License to copy and use this software is granted provided that it
+ * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ * Algorithm" in all material mentioning or referencing this software
+ * or this function.
+ *
+ * License is also granted to make and use derivative works provided
+ * that such works are identified as "derived from the RSA Data
+ * Security, Inc. MD5 Message-Digest Algorithm" in all material
+ * mentioning or referencing the derived work.
+ *
+ * RSA Data Security, Inc. makes no representations concerning either
+ * the merchantability of this software or the suitability of this
+ * software for any particular purpose. It is provided "as is"
+ * without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this
+ * documentation and/or software.
+ *
+ * This code is the same as the code published by RSA Inc.  It has been
+ * edited for clarity and style only.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+#include "./md5.h"
+
+static void MD5Transform(unsigned int [4], const unsigned char [64]);
+
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+#define Encode memcpy
+#define Decode memcpy
+#else 
+
+/*
+ * Encodes input (unsigned int) into output (unsigned char). Assumes len is
+ * a multiple of 4.
+ */
+
+static void
+Encode (unsigned char *output, unsigned int *input, unsigned int len)
+{
+	unsigned int i;
+	unsigned int *op = (unsigned int *)output;
+
+	for (i = 0; i < len / 4; i++)
+		op[i] = htole32(input[i]);
+}
+
+/*
+ * Decodes input (unsigned char) into output (unsigned int). Assumes len is
+ * a multiple of 4.
+ */
+
+static void
+Decode (unsigned int *output, const unsigned char *input, unsigned int len)
+{
+	unsigned int i;
+	const unsigned int *ip = (const unsigned int *)input;
+
+	for (i = 0; i < len / 4; i++)
+		output[i] = le32toh(ip[i]);
+}
+#endif
+
+static unsigned char PADDING[64] = {
+  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* F, G, H and I are basic MD5 functions. */
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | (~z)))
+
+/* ROTATE_LEFT rotates x left n bits. */
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+
+/*
+ * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
+ * Rotation is separate from addition to prevent recomputation.
+ */
+#define FF(a, b, c, d, x, s, ac) { \
+	(a) += F ((b), (c), (d)) + (x) + (unsigned int)(ac); \
+	(a) = ROTATE_LEFT ((a), (s)); \
+	(a) += (b); \
+	}
+#define GG(a, b, c, d, x, s, ac) { \
+	(a) += G ((b), (c), (d)) + (x) + (unsigned int)(ac); \
+	(a) = ROTATE_LEFT ((a), (s)); \
+	(a) += (b); \
+	}
+#define HH(a, b, c, d, x, s, ac) { \
+	(a) += H ((b), (c), (d)) + (x) + (unsigned int)(ac); \
+	(a) = ROTATE_LEFT ((a), (s)); \
+	(a) += (b); \
+	}
+#define II(a, b, c, d, x, s, ac) { \
+	(a) += I ((b), (c), (d)) + (x) + (unsigned int)(ac); \
+	(a) = ROTATE_LEFT ((a), (s)); \
+	(a) += (b); \
+	}
+
+/* MD5 initialization. Begins an MD5 operation, writing a new context. */
+
+void
+MD5Init (context)
+	MD5_CTX *context;
+{
+
+	context->count[0] = context->count[1] = 0;
+
+	/* Load magic initialization constants.  */
+	context->state[0] = 0x67452301;
+	context->state[1] = 0xefcdab89;
+	context->state[2] = 0x98badcfe;
+	context->state[3] = 0x10325476;
+}
+
+/* 
+ * MD5 block update operation. Continues an MD5 message-digest
+ * operation, processing another message block, and updating the
+ * context.
+ */
+
+void
+MD5Update (context, in, inputLen)
+	MD5_CTX *context;
+	const void *in;
+	unsigned int inputLen;
+{
+	unsigned int i, idx, partLen;
+	const unsigned char *input = in;
+
+	/* Compute number of bytes mod 64 */
+	idx = (unsigned int)((context->count[0] >> 3) & 0x3F);
+
+	/* Update number of bits */
+	if ((context->count[0] += ((unsigned int)inputLen << 3))
+	    < ((unsigned int)inputLen << 3))
+		context->count[1]++;
+	context->count[1] += ((unsigned int)inputLen >> 29);
+
+	partLen = 64 - idx;
+
+	/* Transform as many times as possible. */
+	if (inputLen >= partLen) {
+		memcpy((void *)&context->buffer[idx], (const void *)input,
+		    partLen);
+		MD5Transform (context->state, context->buffer);
+
+		for (i = partLen; i + 63 < inputLen; i += 64)
+			MD5Transform (context->state, &input[i]);
+
+		idx = 0;
+	}
+	else
+		i = 0;
+
+	/* Buffer remaining input */
+	memcpy ((void *)&context->buffer[idx], (const void *)&input[i],
+	    inputLen-i);
+}
+
+/*
+ * MD5 padding. Adds padding followed by original length.
+ */
+
+void
+MD5Pad (context)
+	MD5_CTX *context;
+{
+	unsigned char bits[8];
+	unsigned int idx, padLen;
+
+	/* Save number of bits */
+	Encode (bits, context->count, 8);
+
+	/* Pad out to 56 mod 64. */
+	idx = (unsigned int)((context->count[0] >> 3) & 0x3f);
+	padLen = (idx < 56) ? (56 - idx) : (120 - idx);
+	MD5Update (context, PADDING, padLen);
+
+	/* Append length (before padding) */
+	MD5Update (context, bits, 8);
+}
+
+/*
+ * MD5 finalization. Ends an MD5 message-digest operation, writing the
+ * the message digest and zeroizing the context.
+ */
+
+void
+MD5Final (digest, context)
+	unsigned char digest[16];
+	MD5_CTX *context;
+{
+	/* Do padding. */
+	MD5Pad (context);
+
+	/* Store state in digest */
+	Encode (digest, context->state, 16);
+
+	/* Zeroize sensitive information. */
+	memset ((void *)context, 0, sizeof (*context));
+}
+
+/* MD5 basic transformation. Transforms state based on block. */
+
+static void
+MD5Transform (state, block)
+	unsigned int state[4];
+	const unsigned char block[64];
+{
+	unsigned int a = state[0], b = state[1], c = state[2], d = state[3], x[16];
+
+	Decode (x, block, 64);
+
+	/* Round 1 */
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+	FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
+	FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
+	FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
+	FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
+	FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
+	FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
+	FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
+	FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
+	FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
+	FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
+	FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
+	FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
+	FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
+	FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
+	FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
+	FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
+
+	/* Round 2 */
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+	GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
+	GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
+	GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
+	GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
+	GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
+	GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
+	GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
+	GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
+	GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
+	GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
+	GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
+	GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
+	GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
+	GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
+	GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
+	GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
+
+	/* Round 3 */
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+	HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
+	HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
+	HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
+	HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
+	HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
+	HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
+	HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
+	HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
+	HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
+	HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
+	HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
+	HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
+	HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
+	HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
+	HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
+	HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
+
+	/* Round 4 */
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+	II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
+	II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
+	II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
+	II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
+	II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
+	II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
+	II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
+	II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
+	II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
+	II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
+	II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
+	II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
+	II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
+	II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
+	II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
+	II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
+
+	state[0] += a;
+	state[1] += b;
+	state[2] += c;
+	state[3] += d;
+
+	/* Zeroize sensitive information. */
+	memset ((void *)x, 0, sizeof (x));
+}
diff --git a/md5/md5.h b/md5/md5.h
new file mode 100644
index 0000000..551e252
--- /dev/null
+++ b/md5/md5.h
@@ -0,0 +1,49 @@
+/* MD5.H - header file for MD5C.C
+ * $FreeBSD$
+ */
+
+/*-
+ Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+rights reserved.
+
+License to copy and use this software is granted provided that it
+is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+Algorithm" in all material mentioning or referencing this software
+or this function.
+
+License is also granted to make and use derivative works provided
+that such works are identified as "derived from the RSA Data
+Security, Inc. MD5 Message-Digest Algorithm" in all material
+mentioning or referencing the derived work.
+
+RSA Data Security, Inc. makes no representations concerning either
+the merchantability of this software or the suitability of this
+software for any particular purpose. It is provided "as is"
+without express or implied warranty of any kind.
+
+These notices must be retained in any copies of any part of this
+documentation and/or software.
+ */
+
+#ifndef _SYS_MD5_H_
+#define _SYS_MD5_H_
+
+#define MD5_BLOCK_LENGTH		64
+#define MD5_DIGEST_LENGTH		16
+#define MD5_DIGEST_STRING_LENGTH	(MD5_DIGEST_LENGTH * 2 + 1)
+
+/* MD5 context. */
+typedef struct MD5Context {
+  unsigned int state[4];	/* state (ABCD) */
+  unsigned int count[2];	/* number of bits, modulo 2^64 (lsb first) */
+  unsigned char buffer[64];	/* input buffer */
+} MD5_CTX;
+
+void   MD5Init (MD5_CTX *);
+void   MD5Update (MD5_CTX *, const void *, unsigned int);
+void   MD5Final (unsigned char [16], MD5_CTX *);
+char * MD5End(MD5_CTX *, char *);
+char * MD5File(const char *, char *);
+char * MD5FileChunk(const char *, char *, off_t, off_t);
+char * MD5Data(const void *, unsigned int, char *);
+#endif /* _SYS_MD5_H_ */
diff --git a/md5/md5cmp.c b/md5/md5cmp.c
new file mode 100644
index 0000000..07acda4
--- /dev/null
+++ b/md5/md5cmp.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C)2013 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include "./md5.h"
+
+int main(int argc, char *argv[])
+{
+	char *md5sum = NULL, buf[65];
+
+	if (argc < 3) {
+		fprintf(stderr, "USAGE: %s <correct MD5 sum> <file>\n", argv[0]);
+		return -1;
+	}
+
+	if (strlen(argv[1]) != 32)
+		fprintf(stderr, "WARNING: MD5 hash size is wrong.\n");
+
+	md5sum = MD5File(argv[2], buf);
+	if (!md5sum) {
+		perror("Could not obtain MD5 sum");
+		return -1;
+	}
+
+	if (!strcasecmp(md5sum, argv[1])) {
+		fprintf(stderr, "%s: OK\n", argv[2]);
+		return 0;
+	} else {
+		fprintf(stderr, "%s: FAILED.  Checksum is %s\n", argv[2], md5sum);
+		return -1;
+	}
+}
diff --git a/md5/md5hl.c b/md5/md5hl.c
new file mode 100644
index 0000000..eaa41e2
--- /dev/null
+++ b/md5/md5hl.c
@@ -0,0 +1,97 @@
+/* mdXhl.c * ----------------------------------------------------------------------------
+ * "THE BEER-WARE LICENSE" (Revision 42):
+ * <phk@FreeBSD.org> wrote this file.  As long as you retain this notice you
+ * can do whatever you want with this stuff. If we meet some day, and you think
+ * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
+ * ----------------------------------------------------------------------------
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define LENGTH 16
+
+#include "./md5.h"
+
+char *
+MD5End(MD5_CTX *ctx, char *buf)
+{
+	int i;
+	unsigned char digest[LENGTH];
+	static const char hex[]="0123456789abcdef";
+
+	if (!buf)
+		buf = malloc(2*LENGTH + 1);
+	if (!buf)
+		return 0;
+	MD5Final(digest, ctx);
+	for (i = 0; i < LENGTH; i++) {
+		buf[i+i] = hex[digest[i] >> 4];
+		buf[i+i+1] = hex[digest[i] & 0x0f];
+	}
+	buf[i+i] = '\0';
+	return buf;
+}
+
+char *
+MD5File(const char *filename, char *buf)
+{
+	return (MD5FileChunk(filename, buf, 0, 0));
+}
+
+char *
+MD5FileChunk(const char *filename, char *buf, off_t ofs, off_t len)
+{
+	unsigned char buffer[BUFSIZ];
+	MD5_CTX ctx;
+	struct stat stbuf;
+	int f, i, e;
+	off_t n;
+
+	MD5Init(&ctx);
+	f = open(filename, O_RDONLY);
+	if (f < 0)
+		return 0;
+	if (fstat(f, &stbuf) < 0)
+		return 0;
+	if (ofs > stbuf.st_size)
+		ofs = stbuf.st_size;
+	if ((len == 0) || (len > stbuf.st_size - ofs))
+		len = stbuf.st_size - ofs;
+	if (lseek(f, ofs, SEEK_SET) < 0)
+		return 0;
+	n = len;
+	i = 0;
+	while (n > 0) {
+		if (n > sizeof(buffer))
+			i = read(f, buffer, sizeof(buffer));
+		else
+			i = read(f, buffer, n);
+		if (i < 0) 
+			break;
+		MD5Update(&ctx, buffer, i);
+		n -= i;
+	} 
+	e = errno;
+	close(f);
+	errno = e;
+	if (i < 0)
+		return 0;
+	return (MD5End(&ctx, buf));
+}
+
+char *
+MD5Data (const void *data, unsigned int len, char *buf)
+{
+	MD5_CTX ctx;
+
+	MD5Init(&ctx);
+	MD5Update(&ctx,data,len);
+	return (MD5End(&ctx, buf));
+}
diff --git a/rdbmp.c b/rdbmp.c
index b05fe2a..ffedeed 100644
--- a/rdbmp.c
+++ b/rdbmp.c
@@ -1,8 +1,11 @@
 /*
  * rdbmp.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2009-2010 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Modified 2011 by Siarhei Siamashka.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to read input images in Microsoft "BMP"
@@ -21,7 +24,7 @@
  * This code contributed by James Arthur Boucher.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef BMP_SUPPORTED
 
@@ -30,19 +33,19 @@
 
 #ifdef HAVE_UNSIGNED_CHAR
 typedef unsigned char U_CHAR;
-#define UCH(x)	((int) (x))
+#define UCH(x)  ((int) (x))
 #else /* !HAVE_UNSIGNED_CHAR */
-#ifdef CHAR_IS_UNSIGNED
+#ifdef __CHAR_UNSIGNED__
 typedef char U_CHAR;
-#define UCH(x)	((int) (x))
+#define UCH(x)  ((int) (x))
 #else
 typedef char U_CHAR;
-#define UCH(x)	((int) (x) & 0xFF)
+#define UCH(x)  ((int) (x) & 0xFF)
 #endif
 #endif /* HAVE_UNSIGNED_CHAR */
 
 
-#define	ReadOK(file,buffer,len)	(JFREAD(file,buffer,len) == ((size_t) (len)))
+#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len)))
 
 
 /* Private version of data source object */
@@ -52,15 +55,15 @@
 typedef struct _bmp_source_struct {
   struct cjpeg_source_struct pub; /* public fields */
 
-  j_compress_ptr cinfo;		/* back link saves passing separate parm */
+  j_compress_ptr cinfo;         /* back link saves passing separate parm */
 
-  JSAMPARRAY colormap;		/* BMP colormap (converted to my format) */
+  JSAMPARRAY colormap;          /* BMP colormap (converted to my format) */
 
-  jvirt_sarray_ptr whole_image;	/* Needed to reverse row order */
-  JDIMENSION source_row;	/* Current source row number */
-  JDIMENSION row_width;		/* Physical width of scanlines in file */
+  jvirt_sarray_ptr whole_image; /* Needed to reverse row order */
+  JDIMENSION source_row;        /* Current source row number */
+  JDIMENSION row_width;         /* Physical width of scanlines in file */
 
-  int bits_per_pixel;		/* remembers 8- or 24-bit format */
+  int bits_per_pixel;           /* remembers 8- or 24-bit format */
 } bmp_source_struct;
 
 
@@ -137,7 +140,7 @@
   outptr = source->pub.buffer[0];
   for (col = cinfo->image_width; col > 0; col--) {
     t = GETJSAMPLE(*inptr++);
-    *outptr++ = colormap[0][t];	/* can omit GETJSAMPLE() safely */
+    *outptr++ = colormap[0][t]; /* can omit GETJSAMPLE() safely */
     *outptr++ = colormap[1][t];
     *outptr++ = colormap[2][t];
   }
@@ -167,7 +170,7 @@
   inptr = image_ptr[0];
   outptr = source->pub.buffer[0];
   for (col = cinfo->image_width; col > 0; col--) {
-    outptr[2] = *inptr++;	/* can omit GETJSAMPLE() safely */
+    outptr[2] = *inptr++;       /* can omit GETJSAMPLE() safely */
     outptr[1] = *inptr++;
     outptr[0] = *inptr++;
     outptr += 3;
@@ -177,10 +180,41 @@
 }
 
 
+METHODDEF(JDIMENSION)
+get_32bit_row (j_compress_ptr cinfo, cjpeg_source_ptr sinfo)
+/* This version is for reading 32-bit pixels */
+{
+  bmp_source_ptr source = (bmp_source_ptr) sinfo;
+  JSAMPARRAY image_ptr;
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+
+  /* Fetch next row from virtual array */
+  source->source_row--;
+  image_ptr = (*cinfo->mem->access_virt_sarray)
+    ((j_common_ptr) cinfo, source->whole_image,
+     source->source_row, (JDIMENSION) 1, FALSE);
+  /* Transfer data.  Note source values are in BGR order
+   * (even though Microsoft's own documents say the opposite).
+   */
+  inptr = image_ptr[0];
+  outptr = source->pub.buffer[0];
+  for (col = cinfo->image_width; col > 0; col--) {
+    outptr[2] = *inptr++;       /* can omit GETJSAMPLE() safely */
+    outptr[1] = *inptr++;
+    outptr[0] = *inptr++;
+    inptr++;                    /* skip the 4th byte (Alpha channel) */
+    outptr += 3;
+  }
+
+  return 1;
+}
+
+
 /*
  * This method loads the image into whole_image during the first call on
  * get_pixel_rows.  The get_pixel_rows pointer is then adjusted to call
- * get_8bit_row or get_24bit_row on subsequent calls.
+ * get_8bit_row, get_24bit_row, or get_32bit_row on subsequent calls.
  */
 
 METHODDEF(JDIMENSION)
@@ -188,10 +222,9 @@
 {
   bmp_source_ptr source = (bmp_source_ptr) sinfo;
   register FILE *infile = source->pub.input_file;
-  register int c;
   register JSAMPROW out_ptr;
   JSAMPARRAY image_ptr;
-  JDIMENSION row, col;
+  JDIMENSION row;
   cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress;
 
   /* Read the data into a virtual array in input-file row order. */
@@ -205,11 +238,11 @@
       ((j_common_ptr) cinfo, source->whole_image,
        row, (JDIMENSION) 1, TRUE);
     out_ptr = image_ptr[0];
-    for (col = source->row_width; col > 0; col--) {
-      /* inline copy of read_byte() for speed */
-      if ((c = getc(infile)) == EOF)
-	ERREXIT(cinfo, JERR_INPUT_EOF);
-      *out_ptr++ = (JSAMPLE) c;
+    if (fread(out_ptr, 1, source->row_width, infile) != source->row_width) {
+      if (feof(infile))
+        ERREXIT(cinfo, JERR_INPUT_EOF);
+      else
+        ERREXIT(cinfo, JERR_FILE_READ);
     }
   }
   if (progress != NULL)
@@ -223,6 +256,9 @@
   case 24:
     source->pub.get_pixel_rows = get_24bit_row;
     break;
+  case 32:
+    source->pub.get_pixel_rows = get_32bit_row;
+    break;
   default:
     ERREXIT(cinfo, JERR_BMP_BADDEPTH);
   }
@@ -244,20 +280,20 @@
   U_CHAR bmpfileheader[14];
   U_CHAR bmpinfoheader[64];
 #define GET_2B(array,offset)  ((unsigned int) UCH(array[offset]) + \
-			       (((unsigned int) UCH(array[offset+1])) << 8))
+                               (((unsigned int) UCH(array[offset+1])) << 8))
 #define GET_4B(array,offset)  ((INT32) UCH(array[offset]) + \
-			       (((INT32) UCH(array[offset+1])) << 8) + \
-			       (((INT32) UCH(array[offset+2])) << 16) + \
-			       (((INT32) UCH(array[offset+3])) << 24))
+                               (((INT32) UCH(array[offset+1])) << 8) + \
+                               (((INT32) UCH(array[offset+2])) << 16) + \
+                               (((INT32) UCH(array[offset+3])) << 24))
   INT32 bfOffBits;
   INT32 headerSize;
-  INT32 biWidth = 0;		/* initialize to avoid compiler warning */
-  INT32 biHeight = 0;
+  INT32 biWidth;
+  INT32 biHeight;
   unsigned int biPlanes;
   INT32 biCompression;
   INT32 biXPelsPerMeter,biYPelsPerMeter;
   INT32 biClrUsed = 0;
-  int mapentrysize = 0;		/* 0 indicates no colormap */
+  int mapentrysize = 0;         /* 0 indicates no colormap */
   INT32 bPad;
   JDIMENSION row_width;
 
@@ -289,19 +325,17 @@
     source->bits_per_pixel = (int) GET_2B(bmpinfoheader,10);
 
     switch (source->bits_per_pixel) {
-    case 8:			/* colormapped image */
-      mapentrysize = 3;		/* OS/2 uses RGBTRIPLE colormap */
+    case 8:                     /* colormapped image */
+      mapentrysize = 3;         /* OS/2 uses RGBTRIPLE colormap */
       TRACEMS2(cinfo, 1, JTRC_BMP_OS2_MAPPED, (int) biWidth, (int) biHeight);
       break;
-    case 24:			/* RGB image */
+    case 24:                    /* RGB image */
       TRACEMS2(cinfo, 1, JTRC_BMP_OS2, (int) biWidth, (int) biHeight);
       break;
     default:
       ERREXIT(cinfo, JERR_BMP_BADDEPTH);
       break;
     }
-    if (biPlanes != 1)
-      ERREXIT(cinfo, JERR_BMP_BADPLANES);
     break;
   case 40:
   case 64:
@@ -318,19 +352,20 @@
     /* biSizeImage, biClrImportant fields are ignored */
 
     switch (source->bits_per_pixel) {
-    case 8:			/* colormapped image */
-      mapentrysize = 4;		/* Windows uses RGBQUAD colormap */
+    case 8:                     /* colormapped image */
+      mapentrysize = 4;         /* Windows uses RGBQUAD colormap */
       TRACEMS2(cinfo, 1, JTRC_BMP_MAPPED, (int) biWidth, (int) biHeight);
       break;
-    case 24:			/* RGB image */
+    case 24:                    /* RGB image */
+      TRACEMS2(cinfo, 1, JTRC_BMP, (int) biWidth, (int) biHeight);
+      break;
+    case 32:                    /* RGB image + Alpha channel */
       TRACEMS2(cinfo, 1, JTRC_BMP, (int) biWidth, (int) biHeight);
       break;
     default:
       ERREXIT(cinfo, JERR_BMP_BADDEPTH);
       break;
     }
-    if (biPlanes != 1)
-      ERREXIT(cinfo, JERR_BMP_BADPLANES);
     if (biCompression != 0)
       ERREXIT(cinfo, JERR_BMP_COMPRESSED);
 
@@ -338,21 +373,26 @@
       /* Set JFIF density parameters from the BMP data */
       cinfo->X_density = (UINT16) (biXPelsPerMeter/100); /* 100 cm per meter */
       cinfo->Y_density = (UINT16) (biYPelsPerMeter/100);
-      cinfo->density_unit = 2;	/* dots/cm */
+      cinfo->density_unit = 2;  /* dots/cm */
     }
     break;
   default:
     ERREXIT(cinfo, JERR_BMP_BADHEADER);
-    break;
+    return;
   }
 
+  if (biWidth <= 0 || biHeight <= 0)
+    ERREXIT(cinfo, JERR_BMP_EMPTY);
+  if (biPlanes != 1)
+    ERREXIT(cinfo, JERR_BMP_BADPLANES);
+
   /* Compute distance to bitmap data --- will adjust for colormap below */
   bPad = bfOffBits - (headerSize + 14);
 
   /* Read the colormap, if any */
   if (mapentrysize > 0) {
     if (biClrUsed <= 0)
-      biClrUsed = 256;		/* assume it's 256 */
+      biClrUsed = 256;          /* assume it's 256 */
     else if (biClrUsed > 256)
       ERREXIT(cinfo, JERR_BMP_BADCMAP);
     /* Allocate space to store the colormap */
@@ -366,7 +406,7 @@
   }
 
   /* Skip any remaining pad bytes */
-  if (bPad < 0)			/* incorrect bfOffBits value? */
+  if (bPad < 0)                 /* incorrect bfOffBits value? */
     ERREXIT(cinfo, JERR_BMP_BADHEADER);
   while (--bPad >= 0) {
     (void) read_byte(source);
@@ -375,6 +415,8 @@
   /* Compute row width in file, including padding to 4-byte boundary */
   if (source->bits_per_pixel == 24)
     row_width = (JDIMENSION) (biWidth * 3);
+  else if (source->bits_per_pixel == 32)
+    row_width = (JDIMENSION) (biWidth * 4);
   else
     row_width = (JDIMENSION) biWidth;
   while ((row_width & 3) != 0) row_width++;
@@ -427,8 +469,8 @@
   /* Create module interface object */
   source = (bmp_source_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(bmp_source_struct));
-  source->cinfo = cinfo;	/* make back link for subroutines */
+                                  sizeof(bmp_source_struct));
+  source->cinfo = cinfo;        /* make back link for subroutines */
   /* Fill in method ptrs, except get_pixel_rows which start_input sets */
   source->pub.start_input = start_input_bmp;
   source->pub.finish_input = finish_input_bmp;
diff --git a/rdcolmap.c b/rdcolmap.c
index 42b3437..ac6f50e 100644
--- a/rdcolmap.c
+++ b/rdcolmap.c
@@ -21,9 +21,9 @@
  * currently implemented.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
-#ifdef QUANT_2PASS_SUPPORTED	/* otherwise can't quantize to supplied map */
+#ifdef QUANT_2PASS_SUPPORTED    /* otherwise can't quantize to supplied map */
 
 /* Portions of this code are based on the PBMPLUS library, which is:
 **
@@ -54,9 +54,9 @@
   /* Check for duplicate color. */
   for (index = 0; index < ncolors; index++) {
     if (GETJSAMPLE(colormap0[index]) == R &&
-	GETJSAMPLE(colormap1[index]) == G &&
-	GETJSAMPLE(colormap2[index]) == B)
-      return;			/* color is already in map */
+        GETJSAMPLE(colormap1[index]) == G &&
+        GETJSAMPLE(colormap2[index]) == B)
+      return;                   /* color is already in map */
   }
 
   /* Check for map overflow. */
@@ -107,9 +107,9 @@
     if (R == EOF || G == EOF || B == EOF)
       ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
     add_map_entry(cinfo,
-		  R << (BITS_IN_JSAMPLE-8),
-		  G << (BITS_IN_JSAMPLE-8),
-		  B << (BITS_IN_JSAMPLE-8));
+                  R << (BITS_IN_JSAMPLE-8),
+                  G << (BITS_IN_JSAMPLE-8),
+                  B << (BITS_IN_JSAMPLE-8));
   }
 }
 
@@ -123,7 +123,7 @@
 /* A comment/newline sequence is returned as a newline */
 {
   register int ch;
-  
+
   ch = getc(infile);
   if (ch == '#') {
     do {
@@ -143,17 +143,17 @@
 {
   register int ch;
   register unsigned int val;
-  
+
   /* Skip any leading whitespace */
   do {
     ch = pbm_getc(infile);
     if (ch == EOF)
       ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
   } while (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
-  
+
   if (ch < '0' || ch > '9')
     ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
-  
+
   val = ch - '0';
   while ((ch = pbm_getc(infile)) >= '0' && ch <= '9') {
     val *= 10;
@@ -175,7 +175,7 @@
   int R, G, B;
 
   /* Initial 'P' has already been read by read_color_map */
-  c = getc(infile);		/* save format discriminator for a sec */
+  c = getc(infile);             /* save format discriminator for a sec */
 
   /* while we fetch the remaining header info */
   w = read_pbm_integer(cinfo, infile);
@@ -190,26 +190,26 @@
     ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
 
   switch (c) {
-  case '3':			/* it's a text-format PPM file */
+  case '3':                     /* it's a text-format PPM file */
     for (row = 0; row < h; row++) {
       for (col = 0; col < w; col++) {
-	R = read_pbm_integer(cinfo, infile);
-	G = read_pbm_integer(cinfo, infile);
-	B = read_pbm_integer(cinfo, infile);
-	add_map_entry(cinfo, R, G, B);
+        R = read_pbm_integer(cinfo, infile);
+        G = read_pbm_integer(cinfo, infile);
+        B = read_pbm_integer(cinfo, infile);
+        add_map_entry(cinfo, R, G, B);
       }
     }
     break;
 
-  case '6':			/* it's a raw-format PPM file */
+  case '6':                     /* it's a raw-format PPM file */
     for (row = 0; row < h; row++) {
       for (col = 0; col < w; col++) {
-	R = getc(infile);
-	G = getc(infile);
-	B = getc(infile);
-	if (R == EOF || G == EOF || B == EOF)
-	  ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
-	add_map_entry(cinfo, R, G, B);
+        R = getc(infile);
+        G = getc(infile);
+        B = getc(infile);
+        if (R == EOF || G == EOF || B == EOF)
+          ERREXIT(cinfo, JERR_BAD_CMAP_FILE);
+        add_map_entry(cinfo, R, G, B);
       }
     }
     break;
diff --git a/rdgif.c b/rdgif.c
index b27c167..5caad8a 100644
--- a/rdgif.c
+++ b/rdgif.c
@@ -19,7 +19,7 @@
  *    CompuServe Incorporated."
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef GIF_SUPPORTED
 
@@ -32,7 +32,7 @@
 {
   fprintf(stderr, "GIF input is unsupported for legal reasons.  Sorry.\n");
   exit(EXIT_FAILURE);
-  return NULL;			/* keep compiler happy */
+  return NULL;                  /* keep compiler happy */
 }
 
 #endif /* GIF_SUPPORTED */
diff --git a/rdjpgcom.1 b/rdjpgcom.1
index 2bba04e..97611df 100644
--- a/rdjpgcom.1
+++ b/rdjpgcom.1
@@ -1,9 +1,12 @@
-.TH RDJPGCOM 1 "11 October 1997"
+.TH RDJPGCOM 1 "02 April 2009"
 .SH NAME
 rdjpgcom \- display text comments from a JPEG file
 .SH SYNOPSIS
 .B rdjpgcom
 [
+.B \-raw
+]
+[
 .B \-verbose
 ]
 [
@@ -25,6 +28,12 @@
 them as you like in one JPEG file.
 .SH OPTIONS
 .TP
+.B \-raw
+Normally
+.B rdjpgcom
+escapes non-printable characters in comments, for security reasons.
+This option avoids that.
+.PP
 .B \-verbose
 Causes
 .B rdjpgcom
diff --git a/rdjpgcom.c b/rdjpgcom.c
index ffe6fc6..2f0115d 100644
--- a/rdjpgcom.c
+++ b/rdjpgcom.c
@@ -1,8 +1,11 @@
 /*
  * rdjpgcom.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2009 by Bill Allombert, Guido Vollbeding.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains a very simple stand-alone application that displays
@@ -11,46 +14,41 @@
  * JPEG markers.
  */
 
-#define JPEG_CJPEG_DJPEG	/* to get the command-line config symbols */
-#include "jinclude.h"		/* get auto-config symbols, <stdio.h> */
+#define JPEG_CJPEG_DJPEG        /* to get the command-line config symbols */
+#include "jinclude.h"           /* get auto-config symbols, <stdio.h> */
 
-#include <ctype.h>		/* to declare isupper(), tolower() */
+#ifdef HAVE_LOCALE_H
+#include <locale.h>             /* Bill Allombert: use locale for isprint */
+#endif
+#include <ctype.h>              /* to declare isupper(), tolower() */
 #ifdef USE_SETMODE
-#include <fcntl.h>		/* to declare setmode()'s parameter macros */
+#include <fcntl.h>              /* to declare setmode()'s parameter macros */
 /* If you have setmode() but not <io.h>, just delete this line: */
-#include <io.h>			/* to declare setmode() */
+#include <io.h>                 /* to declare setmode() */
 #endif
 
-#ifdef USE_CCOMMAND		/* command-line reader for Macintosh */
+#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
 #ifdef __MWERKS__
 #include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>		/* ... and this */
+#include <console.h>            /* ... and this */
 #endif
 #ifdef THINK_C
-#include <console.h>		/* Think declares it here */
+#include <console.h>            /* Think declares it here */
 #endif
 #endif
 
-#ifdef DONT_USE_B_MODE		/* define mode parameters for fopen() */
-#define READ_BINARY	"r"
+#ifdef DONT_USE_B_MODE          /* define mode parameters for fopen() */
+#define READ_BINARY     "r"
 #else
-#ifdef VMS			/* VMS is very nonstandard */
-#define READ_BINARY	"rb", "ctx=stm"
-#else				/* standard ANSI-compliant case */
-#define READ_BINARY	"rb"
-#endif
+#define READ_BINARY     "rb"
 #endif
 
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#ifndef EXIT_FAILURE            /* define exit() codes if not provided */
 #define EXIT_FAILURE  1
 #endif
 #ifndef EXIT_SUCCESS
-#ifdef VMS
-#define EXIT_SUCCESS  1		/* VMS is very nonstandard */
-#else
 #define EXIT_SUCCESS  0
 #endif
-#endif
 
 
 /*
@@ -58,7 +56,7 @@
  * To reuse this code in another application, you might need to change these.
  */
 
-static FILE * infile;		/* input JPEG file */
+static FILE * infile;           /* input JPEG file */
 
 /* Return next input byte, or EOF if no more */
 #define NEXTBYTE()  getc(infile)
@@ -103,11 +101,11 @@
  * in this program.  (See jdmarker.c for a more complete list.)
  */
 
-#define M_SOF0  0xC0		/* Start Of Frame N */
-#define M_SOF1  0xC1		/* N indicates which compression process */
-#define M_SOF2  0xC2		/* Only SOF0-SOF2 are now in common use */
+#define M_SOF0  0xC0            /* Start Of Frame N */
+#define M_SOF1  0xC1            /* N indicates which compression process */
+#define M_SOF2  0xC2            /* Only SOF0-SOF2 are now in common use */
 #define M_SOF3  0xC3
-#define M_SOF5  0xC5		/* NB: codes C4 and CC are NOT SOF markers */
+#define M_SOF5  0xC5            /* NB: codes C4 and CC are NOT SOF markers */
 #define M_SOF6  0xC6
 #define M_SOF7  0xC7
 #define M_SOF9  0xC9
@@ -116,12 +114,12 @@
 #define M_SOF13 0xCD
 #define M_SOF14 0xCE
 #define M_SOF15 0xCF
-#define M_SOI   0xD8		/* Start Of Image (beginning of datastream) */
-#define M_EOI   0xD9		/* End Of Image (end of datastream) */
-#define M_SOS   0xDA		/* Start Of Scan (begins compressed data) */
-#define M_APP0	0xE0		/* Application-specific marker, type N */
-#define M_APP12	0xEC		/* (we don't bother to list all 16 APPn's) */
-#define M_COM   0xFE		/* COMment */
+#define M_SOI   0xD8            /* Start Of Image (beginning of datastream) */
+#define M_EOI   0xD9            /* End Of Image (end of datastream) */
+#define M_SOS   0xDA            /* Start Of Scan (begins compressed data) */
+#define M_APP0  0xE0            /* Application-specific marker, type N */
+#define M_APP12 0xEC            /* (we don't bother to list all 16 APPn's) */
+#define M_COM   0xFE            /* COMment */
 
 
 /*
@@ -218,12 +216,17 @@
  */
 
 static void
-process_COM (void)
+process_COM (int raw)
 {
   unsigned int length;
   int ch;
   int lastch = 0;
 
+  /* Bill Allombert: set locale properly for isprint */
+#ifdef HAVE_LOCALE_H
+  setlocale(LC_CTYPE, "");
+#endif
+
   /* Get the marker parameter length count */
   length = read_2_bytes();
   /* Length includes itself, so must be at least 2 */
@@ -233,16 +236,18 @@
 
   while (length > 0) {
     ch = read_1_byte();
+    if (raw) {
+      putc(ch, stdout);
     /* Emit the character in a readable form.
      * Nonprintables are converted to \nnn form,
      * while \ is converted to \\.
      * Newlines in CR, CR/LF, or LF form will be printed as one newline.
      */
-    if (ch == '\r') {
+    } else if (ch == '\r') {
       printf("\n");
     } else if (ch == '\n') {
       if (lastch != '\r')
-	printf("\n");
+        printf("\n");
     } else if (ch == '\\') {
       printf("\\\\");
     } else if (isprint(ch)) {
@@ -254,6 +259,11 @@
     length--;
   }
   printf("\n");
+
+  /* Bill Allombert: revert to C locale */
+#ifdef HAVE_LOCALE_H
+  setlocale(LC_CTYPE, "C");
+#endif
 }
 
 
@@ -271,7 +281,7 @@
   const char * process;
   int ci;
 
-  length = read_2_bytes();	/* usual parameter length count */
+  length = read_2_bytes();      /* usual parameter length count */
 
   data_precision = read_1_byte();
   image_height = read_2_bytes();
@@ -279,33 +289,33 @@
   num_components = read_1_byte();
 
   switch (marker) {
-  case M_SOF0:	process = "Baseline";  break;
-  case M_SOF1:	process = "Extended sequential";  break;
-  case M_SOF2:	process = "Progressive";  break;
-  case M_SOF3:	process = "Lossless";  break;
-  case M_SOF5:	process = "Differential sequential";  break;
-  case M_SOF6:	process = "Differential progressive";  break;
-  case M_SOF7:	process = "Differential lossless";  break;
-  case M_SOF9:	process = "Extended sequential, arithmetic coding";  break;
-  case M_SOF10:	process = "Progressive, arithmetic coding";  break;
-  case M_SOF11:	process = "Lossless, arithmetic coding";  break;
-  case M_SOF13:	process = "Differential sequential, arithmetic coding";  break;
-  case M_SOF14:	process = "Differential progressive, arithmetic coding"; break;
-  case M_SOF15:	process = "Differential lossless, arithmetic coding";  break;
-  default:	process = "Unknown";  break;
+  case M_SOF0:  process = "Baseline";  break;
+  case M_SOF1:  process = "Extended sequential";  break;
+  case M_SOF2:  process = "Progressive";  break;
+  case M_SOF3:  process = "Lossless";  break;
+  case M_SOF5:  process = "Differential sequential";  break;
+  case M_SOF6:  process = "Differential progressive";  break;
+  case M_SOF7:  process = "Differential lossless";  break;
+  case M_SOF9:  process = "Extended sequential, arithmetic coding";  break;
+  case M_SOF10: process = "Progressive, arithmetic coding";  break;
+  case M_SOF11: process = "Lossless, arithmetic coding";  break;
+  case M_SOF13: process = "Differential sequential, arithmetic coding";  break;
+  case M_SOF14: process = "Differential progressive, arithmetic coding"; break;
+  case M_SOF15: process = "Differential lossless, arithmetic coding";  break;
+  default:      process = "Unknown";  break;
   }
 
   printf("JPEG image is %uw * %uh, %d color components, %d bits per sample\n",
-	 image_width, image_height, num_components, data_precision);
+         image_width, image_height, num_components, data_precision);
   printf("JPEG process: %s\n", process);
 
   if (length != (unsigned int) (8 + num_components * 3))
     ERREXIT("Bogus SOF marker length");
 
   for (ci = 0; ci < num_components; ci++) {
-    (void) read_1_byte();	/* Component ID code */
-    (void) read_1_byte();	/* H, V sampling factors */
-    (void) read_1_byte();	/* Quantization table number */
+    (void) read_1_byte();       /* Component ID code */
+    (void) read_1_byte();       /* H, V sampling factors */
+    (void) read_1_byte();       /* Quantization table number */
   }
 }
 
@@ -321,7 +331,7 @@
  */
 
 static int
-scan_JPEG_header (int verbose)
+scan_JPEG_header (int verbose, int raw)
 {
   int marker;
 
@@ -336,33 +346,33 @@
       /* Note that marker codes 0xC4, 0xC8, 0xCC are not, and must not be,
        * treated as SOFn.  C4 in particular is actually DHT.
        */
-    case M_SOF0:		/* Baseline */
-    case M_SOF1:		/* Extended sequential, Huffman */
-    case M_SOF2:		/* Progressive, Huffman */
-    case M_SOF3:		/* Lossless, Huffman */
-    case M_SOF5:		/* Differential sequential, Huffman */
-    case M_SOF6:		/* Differential progressive, Huffman */
-    case M_SOF7:		/* Differential lossless, Huffman */
-    case M_SOF9:		/* Extended sequential, arithmetic */
-    case M_SOF10:		/* Progressive, arithmetic */
-    case M_SOF11:		/* Lossless, arithmetic */
-    case M_SOF13:		/* Differential sequential, arithmetic */
-    case M_SOF14:		/* Differential progressive, arithmetic */
-    case M_SOF15:		/* Differential lossless, arithmetic */
+    case M_SOF0:                /* Baseline */
+    case M_SOF1:                /* Extended sequential, Huffman */
+    case M_SOF2:                /* Progressive, Huffman */
+    case M_SOF3:                /* Lossless, Huffman */
+    case M_SOF5:                /* Differential sequential, Huffman */
+    case M_SOF6:                /* Differential progressive, Huffman */
+    case M_SOF7:                /* Differential lossless, Huffman */
+    case M_SOF9:                /* Extended sequential, arithmetic */
+    case M_SOF10:               /* Progressive, arithmetic */
+    case M_SOF11:               /* Lossless, arithmetic */
+    case M_SOF13:               /* Differential sequential, arithmetic */
+    case M_SOF14:               /* Differential progressive, arithmetic */
+    case M_SOF15:               /* Differential lossless, arithmetic */
       if (verbose)
-	process_SOFn(marker);
+        process_SOFn(marker);
       else
-	skip_variable();
+        skip_variable();
       break;
 
-    case M_SOS:			/* stop before hitting compressed data */
+    case M_SOS:                 /* stop before hitting compressed data */
       return marker;
 
-    case M_EOI:			/* in case it's a tables-only JPEG stream */
+    case M_EOI:                 /* in case it's a tables-only JPEG stream */
       return marker;
 
     case M_COM:
-      process_COM();
+      process_COM(raw);
       break;
 
     case M_APP12:
@@ -370,14 +380,14 @@
        * APP12 markers, so we print those out too when in -verbose mode.
        */
       if (verbose) {
-	printf("APP12 contains:\n");
-	process_COM();
+        printf("APP12 contains:\n");
+        process_COM(raw);
       } else
-	skip_variable();
+        skip_variable();
       break;
 
-    default:			/* Anything else just gets skipped */
-      skip_variable();		/* we assume it has a parameter count... */
+    default:                    /* Anything else just gets skipped */
+      skip_variable();          /* we assume it has a parameter count... */
       break;
     }
   } /* end loop */
@@ -386,7 +396,7 @@
 
 /* Command line parsing code */
 
-static const char * progname;	/* program name for error messages */
+static const char * progname;   /* program name for error messages */
 
 
 static void
@@ -398,6 +408,7 @@
   fprintf(stderr, "Usage: %s [switches] [inputfile]\n", progname);
 
   fprintf(stderr, "Switches (names may be abbreviated):\n");
+  fprintf(stderr, "  -raw        Display non-printable characters in comments (unsafe)\n");
   fprintf(stderr, "  -verbose    Also display dimensions of JPEG image\n");
 
   exit(EXIT_FAILURE);
@@ -415,17 +426,17 @@
 
   while ((ca = *arg++) != '\0') {
     if ((ck = *keyword++) == '\0')
-      return 0;			/* arg longer than keyword, no good */
-    if (isupper(ca))		/* force arg to lcase (assume ck is already) */
+      return 0;                 /* arg longer than keyword, no good */
+    if (isupper(ca))            /* force arg to lcase (assume ck is already) */
       ca = tolower(ca);
     if (ca != ck)
-      return 0;			/* no good */
-    nmatched++;			/* count matched characters */
+      return 0;                 /* no good */
+    nmatched++;                 /* count matched characters */
   }
   /* reached end of argument; fail if it's too short for unique abbrev */
   if (nmatched < minchars)
     return 0;
-  return 1;			/* A-OK */
+  return 1;                     /* A-OK */
 }
 
 
@@ -438,7 +449,7 @@
 {
   int argn;
   char * arg;
-  int verbose = 0;
+  int verbose = 0, raw = 0;
 
   /* On Mac, fetch a command line. */
 #ifdef USE_CCOMMAND
@@ -447,16 +458,18 @@
 
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
-    progname = "rdjpgcom";	/* in case C library doesn't provide it */
+    progname = "rdjpgcom";      /* in case C library doesn't provide it */
 
   /* Parse switches, if any */
   for (argn = 1; argn < argc; argn++) {
     arg = argv[argn];
     if (arg[0] != '-')
-      break;			/* not switch, must be file name */
-    arg++;			/* advance over '-' */
+      break;                    /* not switch, must be file name */
+    arg++;                      /* advance over '-' */
     if (keymatch(arg, "verbose", 1)) {
       verbose++;
+    } else if (keymatch(arg, "raw", 1)) {
+      raw = 1;
     } else
       usage();
   }
@@ -474,10 +487,10 @@
     }
   } else {
     /* default input file is stdin */
-#ifdef USE_SETMODE		/* need to hack file mode? */
+#ifdef USE_SETMODE              /* need to hack file mode? */
     setmode(fileno(stdin), O_BINARY);
 #endif
-#ifdef USE_FDOPEN		/* need to re-open in binary mode? */
+#ifdef USE_FDOPEN               /* need to re-open in binary mode? */
     if ((infile = fdopen(fileno(stdin), READ_BINARY)) == NULL) {
       fprintf(stderr, "%s: can't open stdin\n", progname);
       exit(EXIT_FAILURE);
@@ -488,9 +501,9 @@
   }
 
   /* Scan the JPEG headers. */
-  (void) scan_JPEG_header(verbose);
+  (void) scan_JPEG_header(verbose, raw);
 
   /* All done. */
   exit(EXIT_SUCCESS);
-  return 0;			/* suppress no-return-value warnings */
+  return 0;                     /* suppress no-return-value warnings */
 }
diff --git a/rdppm.c b/rdppm.c
index 1df35c1..f56d5cf 100644
--- a/rdppm.c
+++ b/rdppm.c
@@ -1,8 +1,11 @@
 /*
  * rdppm.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2009 by Bill Allombert, Guido Vollbeding.
+ * It was modified by The libjpeg-turbo Project to include only code and
+ * information relevant to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to read input images in PPM/PGM format.
@@ -18,7 +21,7 @@
  * the file is indeed PPM format).
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef PPM_SUPPORTED
 
@@ -40,30 +43,19 @@
 
 #ifdef HAVE_UNSIGNED_CHAR
 typedef unsigned char U_CHAR;
-#define UCH(x)	((int) (x))
+#define UCH(x)  ((int) (x))
 #else /* !HAVE_UNSIGNED_CHAR */
-#ifdef CHAR_IS_UNSIGNED
+#ifdef __CHAR_UNSIGNED__
 typedef char U_CHAR;
-#define UCH(x)	((int) (x))
+#define UCH(x)  ((int) (x))
 #else
 typedef char U_CHAR;
-#define UCH(x)	((int) (x) & 0xFF)
+#define UCH(x)  ((int) (x) & 0xFF)
 #endif
 #endif /* HAVE_UNSIGNED_CHAR */
 
 
-#define	ReadOK(file,buffer,len)	(JFREAD(file,buffer,len) == ((size_t) (len)))
-
-
-/*
- * On most systems, reading individual bytes with getc() is drastically less
- * efficient than buffering a row at a time with fread().  On PCs, we must
- * allocate the buffer in near data space, because we are assuming small-data
- * memory model, wherein fread() can't reach far memory.  If you need to
- * process very wide images on a PC, you might have to compile in large-memory
- * model, or else replace fread() with a getc() loop --- which will be much
- * slower.
- */
+#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len)))
 
 
 /* Private version of data source object */
@@ -71,10 +63,11 @@
 typedef struct {
   struct cjpeg_source_struct pub; /* public fields */
 
-  U_CHAR *iobuffer;		/* non-FAR pointer to I/O buffer */
-  JSAMPROW pixrow;		/* FAR pointer to same */
-  size_t buffer_width;		/* width of I/O buffer */
-  JSAMPLE *rescale;		/* => maxval-remapping array, or NULL */
+  /* Usually these two pointers point to the same place: */
+  U_CHAR *iobuffer;             /* fread's I/O buffer */
+  JSAMPROW pixrow;              /* compressor input buffer */
+  size_t buffer_width;          /* width of I/O buffer */
+  JSAMPLE *rescale;             /* => maxval-remapping array, or NULL */
 } ppm_source_struct;
 
 typedef ppm_source_struct * ppm_source_ptr;
@@ -250,8 +243,8 @@
   bufferptr = source->iobuffer;
   for (col = cinfo->image_width; col > 0; col--) {
     register int temp;
-    temp  = UCH(*bufferptr++);
-    temp |= UCH(*bufferptr++) << 8;
+    temp  = UCH(*bufferptr++) << 8;
+    temp |= UCH(*bufferptr++);
     *ptr++ = rescale[temp];
   }
   return 1;
@@ -274,14 +267,14 @@
   bufferptr = source->iobuffer;
   for (col = cinfo->image_width; col > 0; col--) {
     register int temp;
-    temp  = UCH(*bufferptr++);
-    temp |= UCH(*bufferptr++) << 8;
+    temp  = UCH(*bufferptr++) << 8;
+    temp |= UCH(*bufferptr++);
     *ptr++ = rescale[temp];
-    temp  = UCH(*bufferptr++);
-    temp |= UCH(*bufferptr++) << 8;
+    temp  = UCH(*bufferptr++) << 8;
+    temp |= UCH(*bufferptr++);
     *ptr++ = rescale[temp];
-    temp  = UCH(*bufferptr++);
-    temp |= UCH(*bufferptr++) << 8;
+    temp  = UCH(*bufferptr++) << 8;
+    temp |= UCH(*bufferptr++);
     *ptr++ = rescale[temp];
   }
   return 1;
@@ -307,10 +300,10 @@
 
   /* detect unsupported variants (ie, PBM) before trying to read header */
   switch (c) {
-  case '2':			/* it's a text-format PGM file */
-  case '3':			/* it's a text-format PPM file */
-  case '5':			/* it's a raw-format PGM file */
-  case '6':			/* it's a raw-format PPM file */
+  case '2':                     /* it's a text-format PGM file */
+  case '3':                     /* it's a text-format PPM file */
+  case '5':                     /* it's a raw-format PGM file */
+  case '6':                     /* it's a raw-format PPM file */
     break;
   default:
     ERREXIT(cinfo, JERR_PPM_NOT);
@@ -330,12 +323,12 @@
   cinfo->image_height = (JDIMENSION) h;
 
   /* initialize flags to most common settings */
-  need_iobuffer = TRUE;		/* do we need an I/O buffer? */
-  use_raw_buffer = FALSE;	/* do we map input buffer onto I/O buffer? */
-  need_rescale = TRUE;		/* do we need a rescale array? */
+  need_iobuffer = TRUE;         /* do we need an I/O buffer? */
+  use_raw_buffer = FALSE;       /* do we map input buffer onto I/O buffer? */
+  need_rescale = TRUE;          /* do we need a rescale array? */
 
   switch (c) {
-  case '2':			/* it's a text-format PGM file */
+  case '2':                     /* it's a text-format PGM file */
     cinfo->input_components = 1;
     cinfo->in_color_space = JCS_GRAYSCALE;
     TRACEMS2(cinfo, 1, JTRC_PGM_TEXT, w, h);
@@ -343,7 +336,7 @@
     need_iobuffer = FALSE;
     break;
 
-  case '3':			/* it's a text-format PPM file */
+  case '3':                     /* it's a text-format PPM file */
     cinfo->input_components = 3;
     cinfo->in_color_space = JCS_RGB;
     TRACEMS2(cinfo, 1, JTRC_PPM_TEXT, w, h);
@@ -351,13 +344,13 @@
     need_iobuffer = FALSE;
     break;
 
-  case '5':			/* it's a raw-format PGM file */
+  case '5':                     /* it's a raw-format PGM file */
     cinfo->input_components = 1;
     cinfo->in_color_space = JCS_GRAYSCALE;
     TRACEMS2(cinfo, 1, JTRC_PGM, w, h);
     if (maxval > 255) {
       source->pub.get_pixel_rows = get_word_gray_row;
-    } else if (maxval == MAXJSAMPLE && SIZEOF(JSAMPLE) == SIZEOF(U_CHAR)) {
+    } else if (maxval == MAXJSAMPLE && sizeof(JSAMPLE) == sizeof(U_CHAR)) {
       source->pub.get_pixel_rows = get_raw_row;
       use_raw_buffer = TRUE;
       need_rescale = FALSE;
@@ -366,13 +359,13 @@
     }
     break;
 
-  case '6':			/* it's a raw-format PPM file */
+  case '6':                     /* it's a raw-format PPM file */
     cinfo->input_components = 3;
     cinfo->in_color_space = JCS_RGB;
     TRACEMS2(cinfo, 1, JTRC_PPM, w, h);
     if (maxval > 255) {
       source->pub.get_pixel_rows = get_word_rgb_row;
-    } else if (maxval == MAXJSAMPLE && SIZEOF(JSAMPLE) == SIZEOF(U_CHAR)) {
+    } else if (maxval == MAXJSAMPLE && sizeof(JSAMPLE) == sizeof(U_CHAR)) {
       source->pub.get_pixel_rows = get_raw_row;
       use_raw_buffer = TRUE;
       need_rescale = FALSE;
@@ -385,17 +378,16 @@
   /* Allocate space for I/O buffer: 1 or 3 bytes or words/pixel. */
   if (need_iobuffer) {
     source->buffer_width = (size_t) w * cinfo->input_components *
-      ((maxval<=255) ? SIZEOF(U_CHAR) : (2*SIZEOF(U_CHAR)));
+      ((maxval<=255) ? sizeof(U_CHAR) : (2*sizeof(U_CHAR)));
     source->iobuffer = (U_CHAR *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  source->buffer_width);
+                                  source->buffer_width);
   }
 
   /* Create compressor input buffer. */
   if (use_raw_buffer) {
     /* For unscaled raw-input case, we can just map it onto the I/O buffer. */
     /* Synthesize a JSAMPARRAY pointer structure */
-    /* Cast here implies near->far pointer conversion on PCs */
     source->pixrow = (JSAMPROW) source->iobuffer;
     source->pub.buffer = & source->pixrow;
     source->pub.buffer_height = 1;
@@ -414,7 +406,7 @@
     /* On 16-bit-int machines we have to be careful of maxval = 65535 */
     source->rescale = (JSAMPLE *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  (size_t) (((long) maxval + 1L) * SIZEOF(JSAMPLE)));
+                                  (size_t) (((long) maxval + 1L) * sizeof(JSAMPLE)));
     half_maxval = maxval / 2;
     for (val = 0; val <= (INT32) maxval; val++) {
       /* The multiplication here must be done in 32 bits to avoid overflow */
@@ -447,7 +439,7 @@
   /* Create module interface object */
   source = (ppm_source_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(ppm_source_struct));
+                                  sizeof(ppm_source_struct));
   /* Fill in method ptrs, except get_pixel_rows which start_input sets */
   source->pub.start_input = start_input_ppm;
   source->pub.finish_input = finish_input_ppm;
diff --git a/rdrle.c b/rdrle.c
index 542bc37..8df3ddb 100644
--- a/rdrle.c
+++ b/rdrle.c
@@ -1,8 +1,10 @@
 /*
  * rdrle.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code and
+ * information relevant to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to read input images in Utah RLE format.
@@ -19,7 +21,7 @@
  * with updates from Robert Hutchinson.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef RLE_SUPPORTED
 
@@ -38,7 +40,7 @@
 
 /*
  * We support the following types of RLE files:
- *   
+ *
  *   GRAYSCALE   - 8 bits, no colormap
  *   MAPPEDGRAY  - 8 bits, 1 channel colomap
  *   PSEUDOCOLOR - 8 bits, 3 channel colormap
@@ -66,7 +68,7 @@
 
   rle_kind visual;              /* actual type of input file */
   jvirt_sarray_ptr image;       /* virtual array to hold the image */
-  JDIMENSION row;		/* current row # in the virtual array */
+  JDIMENSION row;               /* current row # in the virtual array */
   rle_hdr header;               /* Input file information */
   rle_pixel** rle_row;          /* holds a row returned by rle_getrow() */
 
@@ -111,10 +113,10 @@
   }
 
   /* Figure out what we have, set private vars and return values accordingly */
-  
+
   width  = source->header.xmax - source->header.xmin + 1;
   height = source->header.ymax - source->header.ymin + 1;
-  source->header.xmin = 0;		/* realign horizontally */
+  source->header.xmin = 0;              /* realign horizontally */
   source->header.xmax = width-1;
 
   cinfo->image_width      = width;
@@ -131,17 +133,17 @@
   } else if (source->header.ncolors == 1 && source->header.ncmap == 3) {
     source->visual     = PSEUDOCOLOR;
     TRACEMS3(cinfo, 1, JTRC_RLE_MAPPED, width, height,
-	     1 << source->header.cmaplen);
+             1 << source->header.cmaplen);
   } else if (source->header.ncolors == 3 && source->header.ncmap == 3) {
     source->visual     = TRUECOLOR;
     TRACEMS3(cinfo, 1, JTRC_RLE_FULLMAP, width, height,
-	     1 << source->header.cmaplen);
+             1 << source->header.cmaplen);
   } else if (source->header.ncolors == 3 && source->header.ncmap == 0) {
     source->visual     = DIRECTCOLOR;
     TRACEMS2(cinfo, 1, JTRC_RLE, width, height);
   } else
     ERREXIT(cinfo, JERR_RLE_UNSUPPORTED);
-  
+
   if (source->visual == GRAYSCALE || source->visual == MAPPEDGRAY) {
     cinfo->in_color_space   = JCS_GRAYSCALE;
     cinfo->input_components = 1;
@@ -254,8 +256,7 @@
   rle_row = source->rle_row;
 
   /* Read the RLE data into our virtual array.
-   * We assume here that (a) rle_pixel is represented the same as JSAMPLE,
-   * and (b) we are not on a machine where FAR pointers differ from regular.
+   * We assume here that rle_pixel is represented the same as JSAMPLE.
    */
   RLE_CLR_BIT(source->header, RLE_ALPHA); /* don't read the alpha channel */
 
@@ -348,7 +349,7 @@
   source->row = cinfo->image_height;
 
   /* And fetch the topmost (bottommost) row */
-  return (*source->pub.get_pixel_rows) (cinfo, sinfo);   
+  return (*source->pub.get_pixel_rows) (cinfo, sinfo);
 }
 
 
@@ -375,7 +376,7 @@
   /* Create module interface object */
   source = (rle_source_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-                                  SIZEOF(rle_source_struct));
+                                  sizeof(rle_source_struct));
   /* Fill in method ptrs */
   source->pub.start_input = start_input_rle;
   source->pub.finish_input = finish_input_rle;
diff --git a/rdswitch.c b/rdswitch.c
index 4f4bb4f..a0aa37c 100644
--- a/rdswitch.c
+++ b/rdswitch.c
@@ -1,20 +1,23 @@
 /*
  * rdswitch.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to process some of cjpeg's more complicated
  * command-line switches.  Switches processed here are:
- *	-qtables file		Read quantization tables from text file
- *	-scans file		Read scan script from text file
- *	-qslots N[,N,...]	Set component quantization table selectors
- *	-sample HxV[,HxV,...]	Set component sampling factors
+ *      -qtables file           Read quantization tables from text file
+ *      -scans file             Read scan script from text file
+ *      -quality N[,N,...]      Set quality ratings
+ *      -qslots N[,N,...]       Set component quantization table selectors
+ *      -sample HxV[,HxV,...]   Set component sampling factors
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
-#include <ctype.h>		/* to declare isdigit(), isspace() */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
+#include <ctype.h>              /* to declare isdigit(), isspace() */
 
 
 LOCAL(int)
@@ -23,7 +26,7 @@
 /* A comment/newline sequence is returned as a newline */
 {
   register int ch;
-  
+
   ch = getc(file);
   if (ch == '#') {
     do {
@@ -41,7 +44,7 @@
 {
   register int ch;
   register long val;
-  
+
   /* Skip any leading whitespace, detect EOF */
   do {
     ch = text_getc(file);
@@ -50,7 +53,7 @@
       return FALSE;
     }
   } while (isspace(ch));
-  
+
   if (! isdigit(ch)) {
     *termchar = ch;
     return FALSE;
@@ -69,9 +72,12 @@
 }
 
 
+#if JPEG_LIB_VERSION < 70
+static int q_scale_factor[NUM_QUANT_TBLS] = {100, 100, 100, 100};
+#endif
+
 GLOBAL(boolean)
-read_quant_tables (j_compress_ptr cinfo, char * filename,
-		   int scale_factor, boolean force_baseline)
+read_quant_tables (j_compress_ptr cinfo, char * filename, boolean force_baseline)
 /* Read a set of quantization tables from the specified file.
  * The file is plain ASCII text: decimal numbers with whitespace between.
  * Comments preceded by '#' may be included in the file.
@@ -102,13 +108,19 @@
     table[0] = (unsigned int) val;
     for (i = 1; i < DCTSIZE2; i++) {
       if (! read_text_integer(fp, &val, &termchar)) {
-	fprintf(stderr, "Invalid table data in file %s\n", filename);
-	fclose(fp);
-	return FALSE;
+        fprintf(stderr, "Invalid table data in file %s\n", filename);
+        fclose(fp);
+        return FALSE;
       }
       table[i] = (unsigned int) val;
     }
-    jpeg_add_quant_table(cinfo, tblno, table, scale_factor, force_baseline);
+#if JPEG_LIB_VERSION >= 70
+    jpeg_add_quant_table(cinfo, tblno, table, cinfo->q_scale_factor[tblno],
+                         force_baseline);
+#else
+    jpeg_add_quant_table(cinfo, tblno, table, q_scale_factor[tblno],
+                         force_baseline);
+#endif
     tblno++;
   }
 
@@ -138,7 +150,7 @@
   ch = *termchar;
   while (ch != EOF && isspace(ch))
     ch = text_getc(file);
-  if (isdigit(ch)) {		/* oops, put it back */
+  if (isdigit(ch)) {            /* oops, put it back */
     if (ungetc(ch, file) == EOF)
       return FALSE;
     ch = ' ';
@@ -176,7 +188,7 @@
   int scanno, ncomps, termchar;
   long val;
   jpeg_scan_info * scanptr;
-#define MAX_SCANS  100		/* quite arbitrary limit */
+#define MAX_SCANS  100          /* quite arbitrary limit */
   jpeg_scan_info scans[MAX_SCANS];
 
   if ((fp = fopen(filename, "r")) == NULL) {
@@ -196,29 +208,29 @@
     ncomps = 1;
     while (termchar == ' ') {
       if (ncomps >= MAX_COMPS_IN_SCAN) {
-	fprintf(stderr, "Too many components in one scan in file %s\n",
-		filename);
-	fclose(fp);
-	return FALSE;
+        fprintf(stderr, "Too many components in one scan in file %s\n",
+                filename);
+        fclose(fp);
+        return FALSE;
       }
       if (! read_scan_integer(fp, &val, &termchar))
-	goto bogus;
+        goto bogus;
       scanptr->component_index[ncomps] = (int) val;
       ncomps++;
     }
     scanptr->comps_in_scan = ncomps;
     if (termchar == ':') {
       if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ')
-	goto bogus;
+        goto bogus;
       scanptr->Ss = (int) val;
       if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ')
-	goto bogus;
+        goto bogus;
       scanptr->Se = (int) val;
       if (! read_scan_integer(fp, &val, &termchar) || termchar != ' ')
-	goto bogus;
+        goto bogus;
       scanptr->Ah = (int) val;
       if (! read_scan_integer(fp, &val, &termchar))
-	goto bogus;
+        goto bogus;
       scanptr->Al = (int) val;
     } else {
       /* set non-progressive parameters */
@@ -249,8 +261,8 @@
      */
     scanptr = (jpeg_scan_info *)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  scanno * SIZEOF(jpeg_scan_info));
-    MEMCOPY(scanptr, scans, scanno * SIZEOF(jpeg_scan_info));
+                                  scanno * sizeof(jpeg_scan_info));
+    MEMCOPY(scanptr, scans, scanno * sizeof(jpeg_scan_info));
     cinfo->scan_info = scanptr;
     cinfo->num_scans = scanno;
   }
@@ -262,6 +274,84 @@
 #endif /* C_MULTISCAN_FILES_SUPPORTED */
 
 
+#if JPEG_LIB_VERSION < 70
+/* These are the sample quantization tables given in JPEG spec section K.1.
+ * The spec says that the values given produce "good" quality, and
+ * when divided by 2, "very good" quality.
+ */
+static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
+  16,  11,  10,  16,  24,  40,  51,  61,
+  12,  12,  14,  19,  26,  58,  60,  55,
+  14,  13,  16,  24,  40,  57,  69,  56,
+  14,  17,  22,  29,  51,  87,  80,  62,
+  18,  22,  37,  56,  68, 109, 103,  77,
+  24,  35,  55,  64,  81, 104, 113,  92,
+  49,  64,  78,  87, 103, 121, 120, 101,
+  72,  92,  95,  98, 112, 100, 103,  99
+};
+static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
+  17,  18,  24,  47,  99,  99,  99,  99,
+  18,  21,  26,  66,  99,  99,  99,  99,
+  24,  26,  56,  99,  99,  99,  99,  99,
+  47,  66,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99
+};
+
+
+LOCAL(void)
+jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline)
+{
+  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
+                       q_scale_factor[0], force_baseline);
+  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
+                       q_scale_factor[1], force_baseline);
+}
+#endif
+
+
+GLOBAL(boolean)
+set_quality_ratings (j_compress_ptr cinfo, char *arg, boolean force_baseline)
+/* Process a quality-ratings parameter string, of the form
+ *     N[,N,...]
+ * If there are more q-table slots than parameters, the last value is replicated.
+ */
+{
+  int val = 75;                 /* default value */
+  int tblno;
+  char ch;
+
+  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+    if (*arg) {
+      ch = ',';                 /* if not set by sscanf, will be ',' */
+      if (sscanf(arg, "%d%c", &val, &ch) < 1)
+        return FALSE;
+      if (ch != ',')            /* syntax check */
+        return FALSE;
+      /* Convert user 0-100 rating to percentage scaling */
+#if JPEG_LIB_VERSION >= 70
+      cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val);
+#else
+      q_scale_factor[tblno] = jpeg_quality_scaling(val);
+#endif
+      while (*arg && *arg++ != ',') /* advance to next segment of arg string */
+        ;
+    } else {
+      /* reached end of parameter, set remaining factors to last value */
+#if JPEG_LIB_VERSION >= 70
+      cinfo->q_scale_factor[tblno] = jpeg_quality_scaling(val);
+#else
+      q_scale_factor[tblno] = jpeg_quality_scaling(val);
+#endif
+    }
+  }
+  jpeg_default_qtables(cinfo, force_baseline);
+  return TRUE;
+}
+
+
 GLOBAL(boolean)
 set_quant_slots (j_compress_ptr cinfo, char *arg)
 /* Process a quantization-table-selectors parameter string, of the form
@@ -269,25 +359,25 @@
  * If there are more components than parameters, the last value is replicated.
  */
 {
-  int val = 0;			/* default table # */
+  int val = 0;                  /* default table # */
   int ci;
   char ch;
 
   for (ci = 0; ci < MAX_COMPONENTS; ci++) {
     if (*arg) {
-      ch = ',';			/* if not set by sscanf, will be ',' */
+      ch = ',';                 /* if not set by sscanf, will be ',' */
       if (sscanf(arg, "%d%c", &val, &ch) < 1)
-	return FALSE;
-      if (ch != ',')		/* syntax check */
-	return FALSE;
+        return FALSE;
+      if (ch != ',')            /* syntax check */
+        return FALSE;
       if (val < 0 || val >= NUM_QUANT_TBLS) {
-	fprintf(stderr, "JPEG quantization tables are numbered 0..%d\n",
-		NUM_QUANT_TBLS-1);
-	return FALSE;
+        fprintf(stderr, "JPEG quantization tables are numbered 0..%d\n",
+                NUM_QUANT_TBLS-1);
+        return FALSE;
       }
       cinfo->comp_info[ci].quant_tbl_no = val;
       while (*arg && *arg++ != ',') /* advance to next segment of arg string */
-	;
+        ;
     } else {
       /* reached end of parameter, set remaining components to last table */
       cinfo->comp_info[ci].quant_tbl_no = val;
@@ -309,19 +399,19 @@
 
   for (ci = 0; ci < MAX_COMPONENTS; ci++) {
     if (*arg) {
-      ch2 = ',';		/* if not set by sscanf, will be ',' */
+      ch2 = ',';                /* if not set by sscanf, will be ',' */
       if (sscanf(arg, "%d%c%d%c", &val1, &ch1, &val2, &ch2) < 3)
-	return FALSE;
+        return FALSE;
       if ((ch1 != 'x' && ch1 != 'X') || ch2 != ',') /* syntax check */
-	return FALSE;
+        return FALSE;
       if (val1 <= 0 || val1 > 4 || val2 <= 0 || val2 > 4) {
-	fprintf(stderr, "JPEG sampling factors must be 1..4\n");
-	return FALSE;
+        fprintf(stderr, "JPEG sampling factors must be 1..4\n");
+        return FALSE;
       }
       cinfo->comp_info[ci].h_samp_factor = val1;
       cinfo->comp_info[ci].v_samp_factor = val2;
       while (*arg && *arg++ != ',') /* advance to next segment of arg string */
-	;
+        ;
     } else {
       /* reached end of parameter, set remaining components to 1x1 sampling */
       cinfo->comp_info[ci].h_samp_factor = 1;
diff --git a/rdtarga.c b/rdtarga.c
index 4c2cd26..afd17c4 100644
--- a/rdtarga.c
+++ b/rdtarga.c
@@ -1,8 +1,10 @@
 /*
  * rdtarga.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to read input images in Targa format.
@@ -17,7 +19,7 @@
  * Based on code contributed by Lee Daniel Crocker.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef TARGA_SUPPORTED
 
@@ -26,19 +28,19 @@
 
 #ifdef HAVE_UNSIGNED_CHAR
 typedef unsigned char U_CHAR;
-#define UCH(x)	((int) (x))
+#define UCH(x)  ((int) (x))
 #else /* !HAVE_UNSIGNED_CHAR */
-#ifdef CHAR_IS_UNSIGNED
+#ifdef __CHAR_UNSIGNED__
 typedef char U_CHAR;
-#define UCH(x)	((int) (x))
+#define UCH(x)  ((int) (x))
 #else
 typedef char U_CHAR;
-#define UCH(x)	((int) (x) & 0xFF)
+#define UCH(x)  ((int) (x) & 0xFF)
 #endif
 #endif /* HAVE_UNSIGNED_CHAR */
 
 
-#define	ReadOK(file,buffer,len)	(JFREAD(file,buffer,len) == ((size_t) (len)))
+#define ReadOK(file,buffer,len) (JFREAD(file,buffer,len) == ((size_t) (len)))
 
 
 /* Private version of data source object */
@@ -48,28 +50,27 @@
 typedef struct _tga_source_struct {
   struct cjpeg_source_struct pub; /* public fields */
 
-  j_compress_ptr cinfo;		/* back link saves passing separate parm */
+  j_compress_ptr cinfo;         /* back link saves passing separate parm */
 
-  JSAMPARRAY colormap;		/* Targa colormap (converted to my format) */
+  JSAMPARRAY colormap;          /* Targa colormap (converted to my format) */
 
-  jvirt_sarray_ptr whole_image;	/* Needed if funny input row order */
-  JDIMENSION current_row;	/* Current logical row number to read */
+  jvirt_sarray_ptr whole_image; /* Needed if funny input row order */
+  JDIMENSION current_row;       /* Current logical row number to read */
 
   /* Pointer to routine to extract next Targa pixel from input file */
-  JMETHOD(void, read_pixel, (tga_source_ptr sinfo));
+  void (*read_pixel) (tga_source_ptr sinfo);
 
   /* Result of read_pixel is delivered here: */
   U_CHAR tga_pixel[4];
 
-  int pixel_size;		/* Bytes per Targa pixel (1 to 4) */
+  int pixel_size;               /* Bytes per Targa pixel (1 to 4) */
 
   /* State info for reading RLE-coded pixels; both counts must be init to 0 */
-  int block_count;		/* # of pixels remaining in RLE block */
-  int dup_pixel_count;		/* # of times to duplicate previous pixel */
+  int block_count;              /* # of pixels remaining in RLE block */
+  int dup_pixel_count;          /* # of times to duplicate previous pixel */
 
   /* This saves the correct pixel-row-expansion method for preload_image */
-  JMETHOD(JDIMENSION, get_pixel_rows, (j_compress_ptr cinfo,
-				       cjpeg_source_ptr sinfo));
+  JDIMENSION (*get_pixel_rows) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo);
 } tga_source_struct;
 
 
@@ -148,9 +149,9 @@
   /* Time to read RLE block header? */
   if (--sinfo->block_count < 0) { /* decrement pixels remaining in block */
     i = read_byte(sinfo);
-    if (i & 0x80) {		/* Start of duplicate-pixel block? */
+    if (i & 0x80) {             /* Start of duplicate-pixel block? */
       sinfo->dup_pixel_count = i & 0x7F; /* number of dups after this one */
-      sinfo->block_count = 0;	/* then read new block header */
+      sinfo->block_count = 0;   /* then read new block header */
     } else {
       sinfo->block_count = i & 0x7F; /* number of pixels after this one */
     }
@@ -177,7 +178,7 @@
   tga_source_ptr source = (tga_source_ptr) sinfo;
   register JSAMPROW ptr;
   register JDIMENSION col;
-  
+
   ptr = source->pub.buffer[0];
   for (col = cinfo->image_width; col > 0; col--) {
     (*source->read_pixel) (source); /* Load next pixel into tga_pixel */
@@ -215,7 +216,7 @@
   register int t;
   register JSAMPROW ptr;
   register JDIMENSION col;
-  
+
   ptr = source->pub.buffer[0];
   for (col = cinfo->image_width; col > 0; col--) {
     (*source->read_pixel) (source); /* Load next pixel into tga_pixel */
@@ -242,7 +243,7 @@
   tga_source_ptr source = (tga_source_ptr) sinfo;
   register JSAMPROW ptr;
   register JDIMENSION col;
-  
+
   ptr = source->pub.buffer[0];
   for (col = cinfo->image_width; col > 0; col--) {
     (*source->read_pixel) (source); /* Load next pixel into tga_pixel */
@@ -338,8 +339,8 @@
   unsigned int width, height, maplen;
   boolean is_bottom_up;
 
-#define GET_2B(offset)	((unsigned int) UCH(targaheader[offset]) + \
-			 (((unsigned int) UCH(targaheader[offset+1])) << 8))
+#define GET_2B(offset)  ((unsigned int) UCH(targaheader[offset]) + \
+                         (((unsigned int) UCH(targaheader[offset+1])) << 8))
 
   if (! ReadOK(source->pub.input_file, targaheader, 18))
     ERREXIT(cinfo, JERR_INPUT_EOF);
@@ -355,17 +356,17 @@
   width = GET_2B(12);
   height = GET_2B(14);
   source->pixel_size = UCH(targaheader[16]) >> 3;
-  flags = UCH(targaheader[17]);	/* Image Descriptor byte */
+  flags = UCH(targaheader[17]); /* Image Descriptor byte */
 
-  is_bottom_up = ((flags & 0x20) == 0);	/* bit 5 set => top-down */
-  interlace_type = flags >> 6;	/* bits 6/7 are interlace code */
+  is_bottom_up = ((flags & 0x20) == 0); /* bit 5 set => top-down */
+  interlace_type = flags >> 6;  /* bits 6/7 are interlace code */
 
-  if (cmaptype > 1 ||		/* cmaptype must be 0 or 1 */
+  if (cmaptype > 1 ||           /* cmaptype must be 0 or 1 */
       source->pixel_size < 1 || source->pixel_size > 4 ||
       (UCH(targaheader[16]) & 7) != 0 || /* bits/pixel must be multiple of 8 */
-      interlace_type != 0)	/* currently don't allow interlaced image */
+      interlace_type != 0)      /* currently don't allow interlaced image */
     ERREXIT(cinfo, JERR_TGA_BADPARMS);
-  
+
   if (subtype > 8) {
     /* It's an RLE-coded file */
     source->read_pixel = read_rle_pixel;
@@ -377,18 +378,18 @@
   }
 
   /* Now should have subtype 1, 2, or 3 */
-  components = 3;		/* until proven different */
+  components = 3;               /* until proven different */
   cinfo->in_color_space = JCS_RGB;
 
   switch (subtype) {
-  case 1:			/* Colormapped image */
+  case 1:                       /* Colormapped image */
     if (source->pixel_size == 1 && cmaptype == 1)
       source->get_pixel_rows = get_8bit_row;
     else
       ERREXIT(cinfo, JERR_TGA_BADPARMS);
     TRACEMS2(cinfo, 1, JTRC_TGA_MAPPED, width, height);
     break;
-  case 2:			/* RGB image */
+  case 2:                       /* RGB image */
     switch (source->pixel_size) {
     case 2:
       source->get_pixel_rows = get_16bit_row;
@@ -405,7 +406,7 @@
     }
     TRACEMS2(cinfo, 1, JTRC_TGA, width, height);
     break;
-  case 3:			/* Grayscale image */
+  case 3:                       /* Grayscale image */
     components = 1;
     cinfo->in_color_space = JCS_GRAYSCALE;
     if (source->pixel_size == 1)
@@ -440,8 +441,8 @@
     source->pub.buffer_height = 1;
     source->pub.get_pixel_rows = source->get_pixel_rows;
   }
-  
-  while (idlen--)		/* Throw away ID field */
+
+  while (idlen--)               /* Throw away ID field */
     (void) read_byte(source);
 
   if (maplen > 0) {
@@ -453,7 +454,7 @@
     /* and read it from the file */
     read_colormap(source, (int) maplen, UCH(targaheader[7]));
   } else {
-    if (cmaptype)		/* but you promised a cmap! */
+    if (cmaptype)               /* but you promised a cmap! */
       ERREXIT(cinfo, JERR_TGA_BADPARMS);
     source->colormap = NULL;
   }
@@ -488,8 +489,8 @@
   /* Create module interface object */
   source = (tga_source_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(tga_source_struct));
-  source->cinfo = cinfo;	/* make back link for subroutines */
+                                  sizeof(tga_source_struct));
+  source->cinfo = cinfo;        /* make back link for subroutines */
   /* Fill in method ptrs, except get_pixel_rows which start_input sets */
   source->pub.start_input = start_input_tga;
   source->pub.finish_input = finish_input_tga;
diff --git a/release/Distribution.xml b/release/Distribution.xml
new file mode 100644
index 0000000..ee73ab0
--- /dev/null
+++ b/release/Distribution.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<installer-gui-script minSpecVersion="1">
+	<title>libjpeg-turbo</title>
+	<welcome file="Welcome.rtf" />
+	<readme file="ReadMe.txt" />
+	<license file="License.rtf" />
+	<domains
+		enable_anywhere="false"
+		enable_currentUserHome="false"
+		enable_localSystem="true"
+	/>
+	<options customize="never" />
+	<choices-outline>
+		<line choice="default">
+			<line choice="com.libjpeg-turbo.libjpeg-turbo"/>
+		</line>
+	</choices-outline>
+	<choice id="default"/>
+	<choice id="com.libjpeg-turbo.libjpeg-turbo" visible="false">
+		<pkg-ref id="com.libjpeg-turbo.libjpeg-turbo"/>
+	</choice>
+	<pkg-ref auth="root"
+		id="com.libjpeg-turbo.libjpeg-turbo">libjpeg-turbo.pkg</pkg-ref>
+</installer-gui-script>
diff --git a/release/License.rtf b/release/License.rtf
new file mode 100755
index 0000000..5073a27
--- /dev/null
+++ b/release/License.rtf
@@ -0,0 +1,20 @@
+{\rtf1\ansi\ansicpg1252\cocoartf1038\cocoasubrtf350
+{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+{\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid1\'01\uc0\u8226 ;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid1}}
+{\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}}
+\margl1440\margr1440\vieww9820\viewh8480\viewkind0
+\deftab720
+\pard\pardeftab720
+
+\f0\fs24 \cf0 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:\
+\
+\pard\tx220\tx720\pardeftab720\li720\fi-720
+\ls1\ilvl0\cf0 {\listtext	\'95	}Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.\
+{\listtext	\'95	}Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.\
+{\listtext	\'95	}Neither the name of the libjpeg-turbo Project nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.\
+\pard\pardeftab720\qc
+\cf0 \
+\pard\pardeftab720
+\cf0 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS", AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;  LOSS OF USE, DATA, OR PROFITS;  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\
+}
\ No newline at end of file
diff --git a/release/ReadMe.txt b/release/ReadMe.txt
new file mode 100644
index 0000000..b9f6ca5
--- /dev/null
+++ b/release/ReadMe.txt
@@ -0,0 +1,5 @@
+libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86, x86-64, and ARM systems.  On such systems, libjpeg-turbo is generally 2-4x as fast as libjpeg, all else being equal.  On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized Huffman coding routines.  In many cases, the performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
+
+libjpeg-turbo implements both the traditional libjpeg API as well as the less powerful but more straightforward TurboJPEG API.  libjpeg-turbo also features colorspace extensions that allow it to compress from/decompress to 32-bit and big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java interface.
+
+libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated derivative of libjpeg v6b developed by Miyasaka Masaru.  The TigerVNC and VirtualGL projects made numerous enhancements to the codec in 2009, and in early 2010, libjpeg-turbo spun off into an independent project, with the goal of making high-speed JPEG compression/decompression technology available to a broader range of users and developers.
diff --git a/release/Welcome.rtf b/release/Welcome.rtf
new file mode 100755
index 0000000..a570c5b
--- /dev/null
+++ b/release/Welcome.rtf
@@ -0,0 +1,17 @@
+{\rtf1\ansi\ansicpg1252\cocoartf1038\cocoasubrtf360
+{\fonttbl\f0\fswiss\fcharset0 Helvetica;\f1\fmodern\fcharset0 CourierNewPSMT;}
+{\colortbl;\red255\green255\blue255;}
+\margl1440\margr1440\vieww9000\viewh8400\viewkind0
+\deftab720
+\pard\pardeftab720\ql\qnatural
+
+\f0\fs24 \cf0 This installer will install the libjpeg-turbo SDK and run-time libraries onto your computer so that you can use libjpeg-turbo to build new applications or accelerate existing ones.  To remove the libjpeg-turbo package, run\
+\
+\pard\pardeftab720\ql\qnatural
+
+\f1 \cf0   /opt/libjpeg-turbo/bin/uninstall\
+\pard\pardeftab720\ql\qnatural
+
+\f0 \cf0 \
+from the command line.\
+}
\ No newline at end of file
diff --git a/release/deb-control.tmpl b/release/deb-control.tmpl
new file mode 100644
index 0000000..510b1d6
--- /dev/null
+++ b/release/deb-control.tmpl
@@ -0,0 +1,31 @@
+Package: {__PKGNAME}
+Version: {__VERSION}-{__BUILD}
+Section: misc
+Priority: optional
+Architecture: {__ARCH}
+Essential: no
+Maintainer: The libjpeg-turbo Project <information@libjpeg-turbo.org>
+Homepage: http://www.libjpeg-turbo.org
+Installed-Size: {__SIZE}
+Description: A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs
+ libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2,
+ NEON) to accelerate baseline JPEG compression and decompression on x86,
+ x86-64, and ARM systems.  On such systems, libjpeg-turbo is generally 2-4x as
+ fast as libjpeg, all else being equal.  On other types of systems,
+ libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue
+ of its highly-optimized Huffman coding routines.  In many cases, the
+ performance of libjpeg-turbo rivals that of proprietary high-speed JPEG
+ codecs.
+ .
+ libjpeg-turbo implements both the traditional libjpeg API as well as the less
+ powerful but more straightforward TurboJPEG API.  libjpeg-turbo also features
+ colorspace extensions that allow it to compress from/decompress to 32-bit and
+ big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java
+ interface.
+ .
+ libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated
+ derivative of libjpeg v6b developed by Miyasaka Masaru.  The TigerVNC and
+ VirtualGL projects made numerous enhancements to the codec in 2009, and in
+ early 2010, libjpeg-turbo spun off into an independent project, with the goal
+ of making high-speed JPEG compression/decompression technology available to a
+ broader range of users and developers.
diff --git a/release/libjpeg-turbo.nsi.in b/release/libjpeg-turbo.nsi.in
new file mode 100755
index 0000000..0514565
--- /dev/null
+++ b/release/libjpeg-turbo.nsi.in
@@ -0,0 +1,158 @@
+!include x64.nsh
+Name "@CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@"
+OutFile "@CMAKE_BINARY_DIR@\${BUILDDIR}@INST_NAME@.exe"
+InstallDir @INST_DIR@
+
+SetCompressor bzip2
+
+Page directory
+Page instfiles
+
+UninstPage uninstConfirm
+UninstPage instfiles
+
+Section "@CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@ (required)"
+!ifdef WIN64
+	${If} ${RunningX64}
+	${DisableX64FSRedirection}
+	${Endif}
+!endif
+	SectionIn RO
+!ifdef GCC
+	IfFileExists $SYSDIR/libturbojpeg.dll exists 0
+!else
+	IfFileExists $SYSDIR/turbojpeg.dll exists 0
+!endif
+	goto notexists
+	exists:
+!ifdef GCC
+	MessageBox MB_OK "An existing version of the @CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@ is already installed.  Please uninstall it first."
+!else
+	MessageBox MB_OK "An existing version of the @CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@ or the TurboJPEG SDK is already installed.  Please uninstall it first."
+!endif
+	quit
+
+	notexists:
+	SetOutPath $SYSDIR
+!ifdef GCC
+	File "@CMAKE_BINARY_DIR@\libturbojpeg.dll"
+!else
+	File "@CMAKE_BINARY_DIR@\${BUILDDIR}turbojpeg.dll"
+!endif
+	SetOutPath $INSTDIR\bin
+!ifdef GCC
+	File "@CMAKE_BINARY_DIR@\libturbojpeg.dll"
+!else
+	File "@CMAKE_BINARY_DIR@\${BUILDDIR}turbojpeg.dll"
+!endif
+!ifdef GCC
+	File "/oname=libjpeg-@DLL_VERSION@.dll" "@CMAKE_BINARY_DIR@\sharedlib\libjpeg-*.dll" 
+!else
+	File "@CMAKE_BINARY_DIR@\sharedlib\${BUILDDIR}jpeg@DLL_VERSION@.dll"
+!endif
+	File "@CMAKE_BINARY_DIR@\sharedlib\${BUILDDIR}cjpeg.exe"
+	File "@CMAKE_BINARY_DIR@\sharedlib\${BUILDDIR}djpeg.exe"
+	File "@CMAKE_BINARY_DIR@\sharedlib\${BUILDDIR}jpegtran.exe"
+	File "@CMAKE_BINARY_DIR@\${BUILDDIR}tjbench.exe"
+	File "@CMAKE_BINARY_DIR@\${BUILDDIR}rdjpgcom.exe"
+	File "@CMAKE_BINARY_DIR@\${BUILDDIR}wrjpgcom.exe"
+	SetOutPath $INSTDIR\lib
+!ifdef GCC
+	File "@CMAKE_BINARY_DIR@\libturbojpeg.dll.a"
+	File "@CMAKE_BINARY_DIR@\libturbojpeg.a"
+	File "@CMAKE_BINARY_DIR@\sharedlib\libjpeg.dll.a"
+	File "@CMAKE_BINARY_DIR@\libjpeg.a"
+!else
+	File "@CMAKE_BINARY_DIR@\${BUILDDIR}turbojpeg.lib"
+	File "@CMAKE_BINARY_DIR@\${BUILDDIR}turbojpeg-static.lib"
+	File "@CMAKE_BINARY_DIR@\sharedlib\${BUILDDIR}jpeg.lib"
+	File "@CMAKE_BINARY_DIR@\${BUILDDIR}jpeg-static.lib"
+!endif
+!ifdef JAVA
+	SetOutPath $INSTDIR\classes
+	File "@CMAKE_BINARY_DIR@\java\${BUILDDIR}turbojpeg.jar"
+!endif
+	SetOutPath $INSTDIR\include
+	File "@CMAKE_BINARY_DIR@\jconfig.h"
+	File "@CMAKE_SOURCE_DIR@\jerror.h"
+	File "@CMAKE_SOURCE_DIR@\jmorecfg.h"
+	File "@CMAKE_SOURCE_DIR@\jpeglib.h"
+	File "@CMAKE_SOURCE_DIR@\turbojpeg.h"
+	SetOutPath $INSTDIR\doc
+	File "@CMAKE_SOURCE_DIR@\README"
+	File "@CMAKE_SOURCE_DIR@\README-turbo.txt"
+	File "@CMAKE_SOURCE_DIR@\example.c"
+	File "@CMAKE_SOURCE_DIR@\libjpeg.txt"
+	File "@CMAKE_SOURCE_DIR@\structure.txt"
+	File "@CMAKE_SOURCE_DIR@\usage.txt"
+	File "@CMAKE_SOURCE_DIR@\wizard.txt"
+
+	WriteRegStr HKLM "SOFTWARE\@INST_REG_NAME@ @VERSION@" "Install_Dir" "$INSTDIR"
+
+	WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "DisplayName" "@CMAKE_PROJECT_NAME@ SDK v@VERSION@ for @INST_PLATFORM@"
+	WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "UninstallString" '"$INSTDIR\uninstall_@VERSION@.exe"'
+	WriteRegDWORD HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "NoModify" 1
+	WriteRegDWORD HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@" "NoRepair" 1
+	WriteUninstaller "uninstall_@VERSION@.exe"
+SectionEnd
+
+Section "Uninstall"
+!ifdef WIN64
+	${If} ${RunningX64}
+	${DisableX64FSRedirection}
+	${Endif}
+!endif
+
+	SetShellVarContext all
+
+	DeleteRegKey HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\@INST_REG_NAME@ @VERSION@"
+	DeleteRegKey HKLM "SOFTWARE\@INST_REG_NAME@ @VERSION@"
+
+!ifdef GCC
+	Delete $INSTDIR\bin\libjpeg-@DLL_VERSION@.dll
+	Delete $SYSDIR\libturbojpeg.dll
+	Delete $INSTDIR\lib\libturbojpeg.dll.a"
+	Delete $INSTDIR\lib\libturbojpeg.a"
+	Delete $INSTDIR\lib\libjpeg.dll.a"
+	Delete $INSTDIR\lib\libjpeg.a"
+!else
+	Delete $INSTDIR\bin\jpeg@DLL_VERSION@.dll
+	Delete $SYSDIR\turbojpeg.dll
+	Delete $INSTDIR\lib\jpeg.lib
+	Delete $INSTDIR\lib\jpeg-static.lib
+	Delete $INSTDIR\lib\turbojpeg.lib
+	Delete $INSTDIR\lib\turbojpeg-static.lib
+!endif
+!ifdef JAVA
+	Delete $INSTDIR\classes\turbojpeg.jar
+!endif
+	Delete $INSTDIR\bin\cjpeg.exe
+	Delete $INSTDIR\bin\djpeg.exe
+	Delete $INSTDIR\bin\jpegtran.exe
+	Delete $INSTDIR\bin\tjbench.exe
+	Delete $INSTDIR\bin\rdjpgcom.exe
+	Delete $INSTDIR\bin\wrjpgcom.exe
+	Delete $INSTDIR\include\jconfig.h"
+	Delete $INSTDIR\include\jerror.h"
+	Delete $INSTDIR\include\jmorecfg.h"
+	Delete $INSTDIR\include\jpeglib.h"
+	Delete $INSTDIR\include\turbojpeg.h"
+	Delete $INSTDIR\uninstall_@VERSION@.exe
+	Delete $INSTDIR\doc\README
+	Delete $INSTDIR\doc\README-turbo.txt
+	Delete $INSTDIR\doc\example.c
+	Delete $INSTDIR\doc\libjpeg.txt
+	Delete $INSTDIR\doc\structure.txt
+	Delete $INSTDIR\doc\usage.txt
+	Delete $INSTDIR\doc\wizard.txt
+
+	RMDir "$INSTDIR\include"
+	RMDir "$INSTDIR\lib"
+	RMDir "$INSTDIR\doc"
+!ifdef JAVA
+	RMDir "$INSTDIR\classes"
+!endif
+	RMDir "$INSTDIR\bin"
+	RMDir "$INSTDIR"
+
+SectionEnd
diff --git a/release/libjpeg-turbo.spec.in b/release/libjpeg-turbo.spec.in
new file mode 100644
index 0000000..23793cf
--- /dev/null
+++ b/release/libjpeg-turbo.spec.in
@@ -0,0 +1,150 @@
+# Path under which libjpeg-turbo should be installed
+%define _prefix %{__prefix}
+
+# Path under which executables should be installed
+%define _bindir %{__bindir}
+
+# Path under which Java classes and man pages should be installed
+%define _datadir %{__datadir}
+
+# Path under which docs should be installed
+%define _docdir /usr/share/doc/%{name}-%{version}
+
+# Path under which headers should be installed
+%define _includedir %{__includedir}
+
+# _libdir is set to %{_prefix}/%{_lib} by default
+%ifarch x86_64
+%define _lib lib64
+%else
+%if "%{_prefix}" == "/opt/libjpeg-turbo"
+%define _lib lib32
+%endif
+%endif
+
+# Path under which man pages should be installed
+%define _mandir %{__mandir}
+
+Summary: A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs
+Name: @PKGNAME@
+Version: @VERSION@
+Vendor: The libjpeg-turbo Project
+URL: http://www.libjpeg-turbo.org
+Group: System Environment/Libraries
+#-->Source0: http://prdownloads.sourceforge.net/libjpeg-turbo/libjpeg-turbo-%{version}.tar.gz
+Release: @BUILD@
+License: BSD-style
+BuildRoot: %{_blddir}/%{name}-buildroot-%{version}-%{release}
+Prereq: /sbin/ldconfig
+%ifarch x86_64
+Provides: %{name} = %{version}-%{release}, @PACKAGE_NAME@ = %{version}-%{release}, libturbojpeg.so()(64bit)
+%else
+Provides: %{name} = %{version}-%{release}, @PACKAGE_NAME@ = %{version}-%{release}, libturbojpeg.so
+%endif
+
+%description
+libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2,
+NEON) to accelerate baseline JPEG compression and decompression on x86, x86-64,
+and ARM systems.  On such systems, libjpeg-turbo is generally 2-4x as fast as
+libjpeg, all else being equal.  On other types of systems, libjpeg-turbo can
+still outperform libjpeg by a significant amount, by virtue of its
+highly-optimized Huffman coding routines.  In many cases, the performance of
+libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
+
+libjpeg-turbo implements both the traditional libjpeg API as well as the less
+powerful but more straightforward TurboJPEG API.  libjpeg-turbo also features
+colorspace extensions that allow it to compress from/decompress to 32-bit and
+big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java
+interface.
+
+libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated
+derivative of libjpeg v6b developed by Miyasaka Masaru.  The TigerVNC and
+VirtualGL projects made numerous enhancements to the codec in 2009, and in
+early 2010, libjpeg-turbo spun off into an independent project, with the goal
+of making high-speed JPEG compression/decompression technology available to a
+broader range of users and developers.
+
+#-->%prep
+#-->%setup -q -n libjpeg-turbo-%{version}
+
+#-->%build
+#-->./configure prefix=%{_prefix} bindir=%{_bindir} datadir=%{_datadir} \
+#-->	docdir=%{_docdir} includedir=%{_includedir} libdir=%{_libdir} \
+#-->	mandir=%{_mandir} JPEG_LIB_VERSION=@JPEG_LIB_VERSION@ \
+#-->	SO_MAJOR_VERSION=@SO_MAJOR_VERSION@ SO_MINOR_VERSION=@SO_MINOR_VERSION@ \
+#-->	--with-pic @RPM_CONFIG_ARGS@
+#-->make DESTDIR=$RPM_BUILD_ROOT
+
+%install
+
+rm -rf $RPM_BUILD_ROOT
+make install DESTDIR=$RPM_BUILD_ROOT docdir=%{_docdir} exampledir=%{_docdir}
+rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
+/sbin/ldconfig -n $RPM_BUILD_ROOT%{_libdir}
+
+#-->%if 0
+
+LJT_LIBDIR=%{__libdir}
+if [ ! "$LJT_LIBDIR" = "%{_libdir}" ]; then
+	echo ERROR: libjpeg-turbo must be configured with libdir=%{_prefix}/%{_lib} when generating an in-tree RPM for this architecture.
+	exit 1
+fi
+
+#-->%endif
+
+LJT_DOCDIR=%{__docdir}
+if [ "%{_prefix}" = "/opt/libjpeg-turbo" -a "$LJT_DOCDIR" = "/opt/libjpeg-turbo/doc" ]; then
+	ln -fs %{_docdir} $RPM_BUILD_ROOT/$LJT_DOCDIR
+fi
+
+%post -p /sbin/ldconfig
+
+%postun -p /sbin/ldconfig
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%files
+%defattr(-,root,root)
+%dir %{_docdir}
+%doc %{_docdir}/*
+%dir %{_prefix}
+%if "%{_prefix}" == "/opt/libjpeg-turbo" && "%{_docdir}" != "%{_prefix}/doc"
+ %{_prefix}/doc
+%endif
+%dir %{_bindir}
+%{_bindir}/cjpeg
+%{_bindir}/djpeg
+%{_bindir}/jpegtran
+%{_bindir}/tjbench
+%{_bindir}/rdjpgcom
+%{_bindir}/wrjpgcom
+%dir %{_libdir}
+%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@.@SO_AGE@.@SO_MINOR_VERSION@
+%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@
+%{_libdir}/libjpeg.so
+%{_libdir}/libjpeg.a
+%{_libdir}/libturbojpeg.so.0.1.0
+%{_libdir}/libturbojpeg.so.0
+%{_libdir}/libturbojpeg.so
+%{_libdir}/libturbojpeg.a
+%dir %{_includedir}
+%{_includedir}/jconfig.h
+%{_includedir}/jerror.h
+%{_includedir}/jmorecfg.h
+%{_includedir}/jpeglib.h
+%{_includedir}/turbojpeg.h
+%dir %{_mandir}
+%dir %{_mandir}/man1
+%{_mandir}/man1/cjpeg.1*
+%{_mandir}/man1/djpeg.1*
+%{_mandir}/man1/jpegtran.1*
+%{_mandir}/man1/rdjpgcom.1*
+%{_mandir}/man1/wrjpgcom.1*
+%if "%{_prefix}" != "%{_datadir}"
+ %dir %{_datadir}
+%endif
+@JAVA_RPM_CONTENTS_1@
+@JAVA_RPM_CONTENTS_2@
+
+%changelog
diff --git a/release/makecygwinpkg.in b/release/makecygwinpkg.in
new file mode 100755
index 0000000..f303546
--- /dev/null
+++ b/release/makecygwinpkg.in
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+set -u
+set -e
+trap onexit INT
+trap onexit TERM
+trap onexit EXIT
+
+TMPDIR=
+
+onexit()
+{
+	if [ ! "$TMPDIR" = "" ]; then
+		rm -rf $TMPDIR
+	fi
+}
+
+PACKAGE_NAME=@PKGNAME@
+VERSION=@VERSION@
+BUILD=@BUILD@
+SRCDIR=@abs_top_srcdir@
+
+PREFIX=%{__prefix}
+DOCDIR=%{__docdir}
+LIBDIR=%{__libdir}
+
+umask 022
+rm -f $PACKAGE_NAME-$VERSION-$BUILD.tar.bz2
+TMPDIR=`mktemp -d /tmp/ljtbuild.XXXXXX`
+__PWD=`pwd`
+make install DESTDIR=$TMPDIR/pkg docdir=/usr/share/doc/$PACKAGE_NAME-$VERSION \
+	exampledir=/usr/share/doc/$PACKAGE_NAME-$VERSION
+rm $TMPDIR/pkg$LIBDIR/*.la
+if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$DOCDIR" = "/opt/libjpeg-turbo/doc" ]; then
+	ln -fs /usr/share/doc/$PACKAGE_NAME-$VERSION $TMPDIR/pkg$DOCDIR
+fi
+cd $TMPDIR/pkg
+tar cfj ../$PACKAGE_NAME-$VERSION-$BUILD.tar.bz2 *
+cd $__PWD
+mv $TMPDIR/*.tar.bz2 .
+
+exit 0
diff --git a/release/makedpkg.in b/release/makedpkg.in
new file mode 100644
index 0000000..448e4cd
--- /dev/null
+++ b/release/makedpkg.in
@@ -0,0 +1,72 @@
+#!/bin/sh
+
+set -u
+set -e
+trap onexit INT
+trap onexit TERM
+trap onexit EXIT
+
+TMPDIR=
+
+onexit()
+{
+	if [ ! "$TMPDIR" = "" ]; then
+		sudo rm -rf $TMPDIR
+	fi
+}
+
+makedeb()
+{
+	SUPPLEMENT=$1
+	DIRNAME=$PACKAGE_NAME
+
+	if [ $SUPPLEMENT = 1 ]; then
+		PACKAGE_NAME=$PACKAGE_NAME\32
+		DEBARCH=amd64
+	fi
+
+	umask 022
+	rm -f $PACKAGE_NAME\_$VERSION\_$DEBARCH.deb
+	TMPDIR=`mktemp -d /tmp/$PACKAGE_NAME-build.XXXXXX`
+	mkdir $TMPDIR/DEBIAN
+
+	if [ $SUPPLEMENT = 1 ]; then
+		make install DESTDIR=$TMPDIR bindir=/dummy/bin datadir=/dummy/data \
+			docdir=/dummy/doc includedir=/dummy/include mandir=/dummy/man
+		rm -f $TMPDIR$LIBDIR/*.la
+		rm -rf $TMPDIR/dummy
+	else
+		make install DESTDIR=$TMPDIR docdir=/usr/share/doc/$DIRNAME-$VERSION \
+			exampledir=/usr/share/doc/$DIRNAME-$VERSION
+		rm -f $TMPDIR$LIBDIR/*.la
+		if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$DOCDIR" = "/opt/libjpeg-turbo/doc" ]; then
+			ln -fs /usr/share/doc/$DIRNAME-$VERSION $TMPDIR$DOCDIR
+		fi
+	fi
+
+	SIZE=`du -s $TMPDIR | cut -f1`
+	(cat $SRCDIR/release/deb-control.tmpl | sed s/{__PKGNAME}/$PACKAGE_NAME/g \
+		| sed s/{__VERSION}/$VERSION/g | sed s/{__BUILD}/$BUILD/g \
+		| sed s/{__ARCH}/$DEBARCH/g | sed s/{__SIZE}/$SIZE/g \
+		> $TMPDIR/DEBIAN/control)
+
+
+	/sbin/ldconfig -n $TMPDIR$LIBDIR
+
+	sudo chown -Rh root:root $TMPDIR/*
+	dpkg -b $TMPDIR $PACKAGE_NAME\_$VERSION\_$DEBARCH.deb
+}
+
+PACKAGE_NAME=@PKGNAME@
+VERSION=@VERSION@
+BUILD=@BUILD@
+DEBARCH=@DEBARCH@
+SRCDIR=@abs_top_srcdir@
+PREFIX=%{__prefix}
+DOCDIR=%{__docdir}
+LIBDIR=%{__libdir}
+
+makedeb 0
+if [ "$DEBARCH" = "i386" ]; then makedeb 1; fi
+
+exit
diff --git a/release/makemacpkg.in b/release/makemacpkg.in
new file mode 100644
index 0000000..16c18de
--- /dev/null
+++ b/release/makemacpkg.in
@@ -0,0 +1,265 @@
+#!/bin/sh
+
+set -u
+set -e
+trap onexit INT
+trap onexit TERM
+trap onexit EXIT
+
+TMPDIR=
+
+onexit()
+{
+	if [ ! "$TMPDIR" = "" ]; then
+		rm -rf $TMPDIR
+	fi
+}
+
+usage()
+{
+	echo "$0 [-build32 [32-bit build dir]] [-buildarmv6 [ARM v6 build dir]] [-buildarmv7 [ARM v7 build dir]] [-buildarmv7s [ARM v7s build dir]]"
+	exit 1
+}
+
+PACKAGE_NAME=@PKGNAME@
+VERSION=@VERSION@
+BUILD=@BUILD@
+SRCDIR=@abs_top_srcdir@
+BUILDDIR32=@abs_top_srcdir@/osxx86
+BUILD32=0
+BUILDDIRARMV6=@abs_top_srcdir@/iosarmv6
+BUILDARMV6=0
+BUILDDIRARMV7=@abs_top_srcdir@/iosarmv7
+BUILDARMV7=0
+BUILDDIRARMV7S=@abs_top_srcdir@/iosarmv7s
+BUILDARMV7S=0
+WITH_JAVA=@WITH_JAVA@
+
+PREFIX=%{__prefix}
+BINDIR=%{__bindir}
+DOCDIR=%{__docdir}
+LIBDIR=%{__libdir}
+
+while [ $# -gt 0 ]; do
+	case $1 in
+	-h*)             usage 0                   ;;
+	-build32)
+		BUILD32=1
+		if [ $# -gt 1 ]; then
+			if [[ ! "$2" =~ -.* ]]; then
+				BUILDDIR32=$2;  shift
+			fi
+		fi
+		;;
+	-buildarmv6)
+		BUILDARMV6=1
+		if [ $# -gt 1 ]; then
+			if [[ ! "$2" =~ -.* ]]; then
+				BUILDDIRARMV6=$2;  shift
+			fi
+		fi
+		;;
+	-buildarmv7)
+		BUILDARMV7=1
+		if [ $# -gt 1 ]; then
+			if [[ ! "$2" =~ -.* ]]; then
+				BUILDDIRARMV7=$2;  shift
+			fi
+		fi
+		;;
+	-buildarmv7s)
+		BUILDARMV7S=1
+		if [ $# -gt 1 ]; then
+			if [[ ! "$2" =~ -.* ]]; then
+				BUILDDIRARMV7S=$2;  shift
+			fi
+		fi
+		;;
+	esac
+	shift
+done
+
+if [ -f $PACKAGE_NAME-$VERSION.dmg ]; then
+	rm -f $PACKAGE_NAME-$VERSION.dmg
+fi
+
+umask 022
+TMPDIR=`mktemp -d /tmp/$PACKAGE_NAME-build.XXXXXX`
+PKGROOT=$TMPDIR/pkg/Package_Root
+mkdir -p $PKGROOT
+make install DESTDIR=$PKGROOT docdir=/Library/Documentation/$PACKAGE_NAME \
+	exampledir=/Library/Documentation/$PACKAGE_NAME
+rm -f $PKGROOT$LIBDIR/*.la
+
+if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$DOCDIR" = "/opt/libjpeg-turbo/doc" ]; then
+	ln -fs /Library/Documentation/$PACKAGE_NAME $PKGROOT$DOCDIR
+fi
+
+if [ $BUILD32 = 1 ]; then
+	if [ ! -d $BUILDDIR32 ]; then
+		echo ERROR: 32-bit build directory $BUILDDIR32 does not exist
+		exit 1
+	fi
+	if [ ! -f $BUILDDIR32/Makefile ]; then
+		echo ERROR: 32-bit build directory $BUILDDIR32 is not configured
+		exit 1
+	fi
+	mkdir -p $TMPDIR/dist.x86
+	pushd $BUILDDIR32
+	make install DESTDIR=$TMPDIR/dist.x86
+	popd
+	if [ ! -h $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib -a \
+		! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib ]; then
+		lipo -create \
+			-arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
+			-arch x86_64 $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib \
+			-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib
+	elif [ ! -h $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib -a \
+		! -h $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib ]; then
+		lipo -create \
+			-arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
+			-arch x86_64 $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib \
+			-output $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.0.@SO_MINOR_VERSION@.dylib
+	fi
+	lipo -create \
+		-arch i386 $TMPDIR/dist.x86/$LIBDIR/libjpeg.a \
+		-arch x86_64 $PKGROOT/$LIBDIR/libjpeg.a \
+		-output $PKGROOT/$LIBDIR/libjpeg.a
+	lipo -create \
+		-arch i386 $TMPDIR/dist.x86/$LIBDIR/libturbojpeg.0.dylib \
+		-arch x86_64 $PKGROOT/$LIBDIR/libturbojpeg.0.dylib \
+		-output $PKGROOT/$LIBDIR/libturbojpeg.0.dylib
+	lipo -create \
+		-arch i386 $TMPDIR/dist.x86/$LIBDIR/libturbojpeg.a \
+		-arch x86_64 $PKGROOT/$LIBDIR/libturbojpeg.a \
+		-output $PKGROOT/$LIBDIR/libturbojpeg.a
+	lipo -create \
+		-arch i386 $TMPDIR/dist.x86/$BINDIR/cjpeg \
+		-arch x86_64 $PKGROOT/$BINDIR/cjpeg \
+		-output $PKGROOT/$BINDIR/cjpeg
+	lipo -create \
+		-arch i386 $TMPDIR/dist.x86/$BINDIR/djpeg \
+		-arch x86_64 $PKGROOT/$BINDIR/djpeg \
+		-output $PKGROOT/$BINDIR/djpeg
+	lipo -create \
+		-arch i386 $TMPDIR/dist.x86/$BINDIR/jpegtran \
+		-arch x86_64 $PKGROOT/$BINDIR/jpegtran \
+		-output $PKGROOT/$BINDIR/jpegtran
+	lipo -create \
+		-arch i386 $TMPDIR/dist.x86/$BINDIR/tjbench \
+		-arch x86_64 $PKGROOT/$BINDIR/tjbench \
+		-output $PKGROOT/$BINDIR/tjbench
+	lipo -create \
+		-arch i386 $TMPDIR/dist.x86/$BINDIR/rdjpgcom \
+		-arch x86_64 $PKGROOT/$BINDIR/rdjpgcom \
+		-output $PKGROOT/$BINDIR/rdjpgcom
+	lipo -create \
+		-arch i386 $TMPDIR/dist.x86/$BINDIR/wrjpgcom \
+		-arch x86_64 $PKGROOT/$BINDIR/wrjpgcom \
+		-output $PKGROOT/$BINDIR/wrjpgcom
+
+fi
+
+if [ $BUILDARMV6 = 1 ]; then
+	if [ ! -d $BUILDDIRARMV6 ]; then
+		echo ERROR: ARM v6 build directory $BUILDDIRARMV6 does not exist
+		exit 1
+	fi
+	if [ ! -f $BUILDDIRARMV6/Makefile ]; then
+		echo ERROR: ARM v6 build directory $BUILDDIRARMV6 is not configured
+		exit 1
+	fi
+	mkdir -p $TMPDIR/dist.armv6
+	pushd $BUILDDIRARMV6
+	make install DESTDIR=$TMPDIR/dist.armv6
+	popd
+	lipo -create \
+		$PKGROOT/$LIBDIR/libjpeg.a \
+		-arch arm $TMPDIR/dist.armv6/$LIBDIR/libjpeg.a \
+		-output $PKGROOT/$LIBDIR/libjpeg.a
+	lipo -create \
+		$PKGROOT/$LIBDIR/libturbojpeg.a \
+		-arch arm $TMPDIR/dist.armv6/$LIBDIR/libturbojpeg.a \
+		-output $PKGROOT/$LIBDIR/libturbojpeg.a
+fi
+
+if [ $BUILDARMV7 = 1 ]; then
+	if [ ! -d $BUILDDIRARMV7 ]; then
+		echo ERROR: ARM v7 build directory $BUILDDIRARMV7 does not exist
+		exit 1
+	fi
+	if [ ! -f $BUILDDIRARMV7/Makefile ]; then
+		echo ERROR: ARM v7 build directory $BUILDDIRARMV7 is not configured
+		exit 1
+	fi
+	mkdir -p $TMPDIR/dist.armv7
+	pushd $BUILDDIRARMV7
+	make install DESTDIR=$TMPDIR/dist.armv7
+	popd
+	lipo -create \
+		$PKGROOT/$LIBDIR/libjpeg.a \
+		-arch arm $TMPDIR/dist.armv7/$LIBDIR/libjpeg.a \
+		-output $PKGROOT/$LIBDIR/libjpeg.a
+	lipo -create \
+		$PKGROOT/$LIBDIR/libturbojpeg.a \
+		-arch arm $TMPDIR/dist.armv7/$LIBDIR/libturbojpeg.a \
+		-output $PKGROOT/$LIBDIR/libturbojpeg.a
+fi
+
+if [ $BUILDARMV7S = 1 ]; then
+	if [ ! -d $BUILDDIRARMV7S ]; then
+		echo ERROR: ARM v7s build directory $BUILDDIRARMV7S does not exist
+		exit 1
+	fi
+	if [ ! -f $BUILDDIRARMV7S/Makefile ]; then
+		echo ERROR: ARM v7s build directory $BUILDDIRARMV7S is not configured
+		exit 1
+	fi
+	mkdir -p $TMPDIR/dist.armv7s
+	pushd $BUILDDIRARMV7S
+	make install DESTDIR=$TMPDIR/dist.armv7s
+	popd
+	lipo -create \
+		$PKGROOT/$LIBDIR/libjpeg.a \
+		-arch arm $TMPDIR/dist.armv7s/$LIBDIR/libjpeg.a \
+		-output $PKGROOT/$LIBDIR/libjpeg.a
+	lipo -create \
+		$PKGROOT/$LIBDIR/libturbojpeg.a \
+		-arch arm $TMPDIR/dist.armv7s/$LIBDIR/libturbojpeg.a \
+		-output $PKGROOT/$LIBDIR/libturbojpeg.a
+fi
+
+install_name_tool -id $LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib $PKGROOT/$LIBDIR/libjpeg.@SO_MAJOR_VERSION@.dylib
+install_name_tool -id $LIBDIR/libturbojpeg.0.dylib $PKGROOT/$LIBDIR/libturbojpeg.0.dylib
+
+if [ $WITH_JAVA = 1 ]; then
+	ln -fs libturbojpeg.0.dylib $PKGROOT/$LIBDIR/libturbojpeg.jnilib
+fi
+if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$LIBDIR" = "/opt/libjpeg-turbo/lib" ]; then
+	if [ ! -h $PKGROOT/$PREFIX/lib32 ]; then
+		ln -fs lib $PKGROOT/$PREFIX/lib32
+	fi
+	if [ ! -h $PKGROOT/$PREFIX/lib64 ]; then
+		ln -fs lib $PKGROOT/$PREFIX/lib64
+	fi
+fi
+
+mkdir -p $TMPDIR/pkg
+
+install -m 755 pkgscripts/uninstall $PKGROOT/$BINDIR/
+
+find $PKGROOT -type f | while read file; do xattr -c $file; done
+
+cp $SRCDIR/release/License.rtf $SRCDIR/release/Welcome.rtf $SRCDIR/release/ReadMe.txt $TMPDIR/pkg/
+
+mkdir $TMPDIR/dmg
+pkgbuild --root $PKGROOT --version $VERSION.$BUILD \
+	--identifier com.libjpeg-turbo.libjpeg-turbo $TMPDIR/pkg/$PACKAGE_NAME.pkg
+productbuild --distribution $SRCDIR/release/Distribution.xml \
+	--package-path $TMPDIR/pkg/ --resources $TMPDIR/pkg/ \
+	$TMPDIR/dmg/$PACKAGE_NAME.pkg
+hdiutil create -fs HFS+ -volname $PACKAGE_NAME-$VERSION \
+	-srcfolder "$TMPDIR/dmg" $TMPDIR/$PACKAGE_NAME-$VERSION.dmg 
+cp $TMPDIR/$PACKAGE_NAME-$VERSION.dmg . 
+
+exit
diff --git a/release/uninstall.in b/release/uninstall.in
new file mode 100644
index 0000000..f167bbd
--- /dev/null
+++ b/release/uninstall.in
@@ -0,0 +1,109 @@
+# Copyright (C)2009-2011, 2013 D. R. Commander.  All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# - Neither the name of the libjpeg-turbo Project nor the names of its
+#   contributors may be used to endorse or promote products derived from this
+#   software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+#!/bin/sh
+
+if [ ! "`id -u`" = "0" ]; then
+	echo "ERROR: This script must be executed as root"
+	exit -1
+fi
+
+PACKAGE=@PKGNAME@
+MACPACKAGE=com.$PACKAGE.$PACKAGE
+RECEIPT=/Library/Receipts/$PACKAGE.pkg
+
+LSBOM=
+if [ -d $RECEIPT ]; then
+	LSBOM='lsbom -s -f -l '$RECEIPT'/Contents/Archive.bom'
+else
+	LSBOM='pkgutil --files '$MACPACKAGE
+fi
+
+mylsbom()
+{
+	$LSBOM || (echo "ERROR: Could not list package contents"; exit -1)
+}
+
+echo Removing package files ...
+EXITSTATUS=0
+pushd /
+mylsbom | while read file; do
+	if [ ! -d "$file" ]; then rm "$file" 2>&1 || EXITSTATUS=-1; fi
+done
+popd
+
+echo Removing package directories ...
+PREFIX=%{__prefix}
+BINDIR=%{__bindir}
+DATADIR=%{__datadir}
+INCLUDEDIR=%{__includedir}
+LIBDIR=%{__libdir}
+MANDIR=%{__mandir}
+
+if [ -d $BINDIR ]; then
+	rmdir $BINDIR 2>&1 || EXITSTATUS=-1
+fi
+if [ -d $LIBDIR ]; then
+	rmdir $LIBDIR 2>&1 || EXITSTATUS=-1
+fi
+if [ -d $INCLUDEDIR ]; then
+	rmdir $INCLUDEDIR 2>&1 || EXITSTATUS=-1
+fi
+if [ "$PREFIX" = "/opt/libjpeg-turbo" -a "$LIBDIR" = "/opt/libjpeg-turbo/lib" ]; then
+	if [ -h $LIBDIR\32 ]; then
+		rm $LIBDIR\32 2>&1 || EXITSTATUS=-1
+	fi
+	if [ -h $LIBDIR\64 ]; then
+		rm $LIBDIR\64 2>&1 || EXITSTATUS=-1
+	fi
+fi
+if [ -d $MANDIR/man1 ]; then
+	rmdir $MANDIR/man1 2>&1 || EXITSTATUS=-1
+fi
+if [ -d $MANDIR ]; then
+	rmdir $MANDIR 2>&1 || EXITSTATUS=-1
+fi
+if [ -d $DATADIR/classes ]; then
+	rmdir $DATADIR/classes 2>&1 || EXITSTATUS=-1
+fi
+if [ -d $DATADIR -a "$DATADIR" != "$PREFIX" ]; then
+	rmdir $DATADIR 2>&1 || EXITSTATUS=-1
+fi
+if [ "$PREFIX" = "/opt/libjpeg-turbo" -a -h "$PREFIX/doc" ]; then
+	rm $PREFIX/doc 2>&1 || EXITSTATUS=-1
+fi
+rmdir $PREFIX 2>&1 || EXITSTATUS=-1
+rmdir /Library/Documentation/$PACKAGE 2>&1 || EXITSTATUS=-1
+
+if [ -d $RECEIPT ]; then
+	echo Removing package receipt ...
+	rm -r $RECEIPT 2>&1 || EXITSTATUS=-1
+else
+	echo Forgetting package $MACPACKAGE ...
+	pkgutil --forget $MACPACKAGE
+fi
+
+exit $EXITSTATUS
diff --git a/sharedlib/CMakeLists.txt b/sharedlib/CMakeLists.txt
new file mode 100755
index 0000000..d423cce
--- /dev/null
+++ b/sharedlib/CMakeLists.txt
@@ -0,0 +1,73 @@
+# Anything that must be linked against the shared C library on Windows must
+# be built in this subdirectory, because CMake doesn't allow us to override
+# the compiler flags for each build type except at directory scope.  Note
+# to CMake developers:  Add a COMPILE_FLAGS_<CONFIG> target property, or
+# better yet, provide a friendly way of configuring a Windows target to use the
+# static C library.
+
+if(MSVC)
+  # Build all configurations against shared C library
+  foreach(var CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
+    CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
+    if(${var} MATCHES "/MT")
+      string(REGEX REPLACE "/MT" "/MD" ${var} "${${var}}")
+    endif()
+  endforeach()
+endif()
+
+foreach(src ${JPEG_SOURCES})
+  set(JPEG_SRCS ${JPEG_SRCS} ${CMAKE_SOURCE_DIR}/${src})
+endforeach()
+
+if(WITH_SIMD)
+  # This tells CMake that the "source" files haven't been generated yet
+  set_source_files_properties(${SIMD_OBJS} PROPERTIES GENERATED 1)
+endif()
+
+if(WITH_MEM_SRCDST AND NOT WITH_JPEG8)
+  add_library(jpeg SHARED ${JPEG_SRCS} ${SIMD_OBJS}
+    ${CMAKE_SOURCE_DIR}/win/jpeg${DLL_VERSION}-memsrcdst.def)
+else()
+  add_library(jpeg SHARED ${JPEG_SRCS} ${SIMD_OBJS}
+    ${CMAKE_SOURCE_DIR}/win/jpeg${DLL_VERSION}.def)
+endif()
+set_target_properties(jpeg PROPERTIES SOVERSION ${DLL_VERSION}
+  VERSION ${FULLVERSION})
+if(MSVC)
+  set_target_properties(jpeg PROPERTIES SUFFIX ${DLL_VERSION}.dll)
+elseif(MINGW OR CYGWIN)
+  set_target_properties(jpeg PROPERTIES SUFFIX -${DLL_VERSION}.dll)
+endif(MSVC)
+if(WITH_SIMD)
+  add_dependencies(jpeg simd)
+endif()
+
+if(WITH_12BIT)
+  set(COMPILE_FLAGS "-DGIF_SUPPORTED -DPPM_SUPPORTED -DUSE_SETMODE")
+else()
+  set(COMPILE_FLAGS "-DBMP_SUPPORTED -DGIF_SUPPORTED -DPPM_SUPPORTED -DTARGA_SUPPORTED -DUSE_SETMODE")
+	set(CJPEG_BMP_SOURCES ../rdbmp.c ../rdtarga.c)
+	set(DJPEG_BMP_SOURCES ../wrbmp.c ../wrtarga.c)
+endif()
+
+add_executable(cjpeg ../cjpeg.c ../cdjpeg.c ../rdgif.c ../rdppm.c
+  ../rdswitch.c ${CJPEG_BMP_SOURCES})
+set_property(TARGET cjpeg PROPERTY COMPILE_FLAGS ${COMPILE_FLAGS})
+target_link_libraries(cjpeg jpeg)
+
+add_executable(djpeg ../djpeg.c ../cdjpeg.c ../rdcolmap.c ../rdswitch.c
+  ../wrgif.c ../wrppm.c ${DJPEG_BMP_SOURCES})
+set_property(TARGET djpeg PROPERTY COMPILE_FLAGS ${COMPILE_FLAGS})
+target_link_libraries(djpeg jpeg)
+
+add_executable(jpegtran ../jpegtran.c ../cdjpeg.c ../rdswitch.c ../transupp.c)
+target_link_libraries(jpegtran jpeg)
+set_property(TARGET jpegtran PROPERTY COMPILE_FLAGS "-DUSE_SETMODE")
+
+add_executable(jcstest ../jcstest.c)
+target_link_libraries(jcstest jpeg)
+
+install(TARGETS jpeg cjpeg djpeg jpegtran
+  ARCHIVE DESTINATION lib
+  LIBRARY DESTINATION lib
+  RUNTIME DESTINATION bin)
diff --git a/simd/CMakeLists.txt b/simd/CMakeLists.txt
new file mode 100755
index 0000000..25bd290
--- /dev/null
+++ b/simd/CMakeLists.txt
@@ -0,0 +1,78 @@
+if(NOT DEFINED NASM)
+  set(NASM nasm CACHE PATH "Path to NASM/YASM executable")
+endif()
+
+if(SIMD_X86_64)
+  set(NAFLAGS -fwin64 -DWIN64 -D__x86_64__)
+else()
+  if(BORLAND)
+    set(NAFLAGS -fobj -DOBJ32)
+  else()
+    set(NAFLAGS -fwin32 -DWIN32)
+  endif()
+endif()
+set(NAFLAGS ${NAFLAGS} -I${CMAKE_SOURCE_DIR}/win/ -I${CMAKE_CURRENT_SOURCE_DIR}/)
+
+# This only works if building from the command line.  There is currently no way
+# to set a variable's value based on the build type when using the MSVC IDE.
+if(CMAKE_BUILD_TYPE STREQUAL "Debug"
+  OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
+  set(NAFLAGS ${NAFLAGS} -g)
+endif()
+
+if(SIMD_X86_64)
+  set(SIMD_BASENAMES jfdctflt-sse-64 jccolor-sse2-64 jcgray-sse2-64
+    jcsample-sse2-64 jdcolor-sse2-64 jdmerge-sse2-64 jdsample-sse2-64
+    jfdctfst-sse2-64 jfdctint-sse2-64 jidctflt-sse2-64 jidctfst-sse2-64
+    jidctint-sse2-64 jidctred-sse2-64 jquantf-sse2-64 jquanti-sse2-64)
+  message(STATUS "Building x86_64 SIMD extensions")
+else()
+  set(SIMD_BASENAMES jsimdcpu jfdctflt-3dn jidctflt-3dn jquant-3dn jccolor-mmx
+    jcgray-mmx jcsample-mmx jdcolor-mmx jdmerge-mmx jdsample-mmx jfdctfst-mmx
+    jfdctint-mmx jidctfst-mmx jidctint-mmx jidctred-mmx jquant-mmx jfdctflt-sse
+    jidctflt-sse jquant-sse jccolor-sse2 jcgray-sse2 jcsample-sse2 jdcolor-sse2
+    jdmerge-sse2 jdsample-sse2 jfdctfst-sse2 jfdctint-sse2 jidctflt-sse2
+    jidctfst-sse2 jidctint-sse2 jidctred-sse2 jquantf-sse2 jquanti-sse2)
+  message(STATUS "Building i386 SIMD extensions")
+endif()
+
+if(MSVC_IDE)
+  set(OBJDIR "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}")
+else()
+  set(OBJDIR ${CMAKE_CURRENT_BINARY_DIR})
+endif()
+
+file(GLOB INC_FILES *.inc)
+
+foreach(file ${SIMD_BASENAMES})
+  set(DEPFILE "")
+  set(SIMD_SRC ${CMAKE_CURRENT_SOURCE_DIR}/${file}.asm)
+  if(${file} MATCHES jccolor)
+    set(DEPFILE ${file})
+    string(REGEX REPLACE "jccolor" "jccolext" DEPFILE ${DEPFILE})
+    set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm)
+  endif()
+  if(${file} MATCHES jcgray)
+    set(DEPFILE ${file})
+    string(REGEX REPLACE "jcgray" "jcgryext" DEPFILE ${DEPFILE})
+    set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm)
+  endif()
+  if(${file} MATCHES jdcolor)
+    set(DEPFILE ${file})
+    string(REGEX REPLACE "jdcolor" "jdcolext" DEPFILE ${DEPFILE})
+    set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm)
+  endif()
+  if(${file} MATCHES jdmerge)
+    set(DEPFILE ${file})
+    string(REGEX REPLACE "jdmerge" "jdmrgext" DEPFILE ${DEPFILE})
+    set(DEPFILE ${CMAKE_CURRENT_SOURCE_DIR}/${DEPFILE}.asm)
+  endif()
+  set(SIMD_OBJ ${OBJDIR}/${file}.obj)
+  add_custom_command(OUTPUT ${SIMD_OBJ}
+    DEPENDS ${SIMD_SRC} ${DEPFILE} ${INC_FILES}
+    COMMAND ${NASM} ${NAFLAGS} ${SIMD_SRC} -o${SIMD_OBJ})
+  set(SIMD_OBJS ${SIMD_OBJS} ${SIMD_OBJ})
+endforeach()
+
+set(SIMD_OBJS ${SIMD_OBJS} PARENT_SCOPE)
+add_custom_target(simd DEPENDS ${SIMD_OBJS})
diff --git a/simd/Makefile.am b/simd/Makefile.am
new file mode 100644
index 0000000..72d6366
--- /dev/null
+++ b/simd/Makefile.am
@@ -0,0 +1,79 @@
+noinst_LTLIBRARIES = libsimd.la
+
+BUILT_SOURCES = jsimdcfg.inc
+
+EXTRA_DIST = nasm_lt.sh CMakeLists.txt \
+	jccolext-mmx.asm   jcgryext-mmx.asm   jdcolext-mmx.asm   jdmrgext-mmx.asm \
+	jccolext-sse2.asm  jcgryext-sse2.asm  jdcolext-sse2.asm  jdmrgext-sse2.asm \
+	jccolext-sse2-64.asm  jcgryext-sse2-64.asm  jdcolext-sse2-64.asm \
+	jdmrgext-sse2-64.asm
+
+if SIMD_X86_64
+
+libsimd_la_SOURCES = jsimd_x86_64.c jsimd.h jsimdcfg.inc.h jsimdext.inc \
+	jcolsamp.inc jdct.inc jfdctflt-sse-64.asm \
+	jccolor-sse2-64.asm   jcgray-sse2-64.asm    jcsample-sse2-64.asm \
+	jdcolor-sse2-64.asm   jdmerge-sse2-64.asm   jdsample-sse2-64.asm \
+	jfdctfst-sse2-64.asm  jfdctint-sse2-64.asm  jidctflt-sse2-64.asm \
+	jidctfst-sse2-64.asm  jidctint-sse2-64.asm  jidctred-sse2-64.asm  \
+	jquantf-sse2-64.asm   jquanti-sse2-64.asm
+
+jccolor-sse2-64.lo:  jccolext-sse2-64.asm
+jcgray-sse2-64.lo:   jcgryext-sse2-64.asm
+jdcolor-sse2-64.lo:  jdcolext-sse2-64.asm
+jdmerge-sse2-64.lo:  jdmrgext-sse2-64.asm
+
+endif
+
+if SIMD_I386
+
+libsimd_la_SOURCES = jsimd_i386.c jsimd.h jsimdcfg.inc.h jsimdext.inc \
+	jcolsamp.inc jdct.inc jsimdcpu.asm \
+	jfdctflt-3dn.asm   jidctflt-3dn.asm   jquant-3dn.asm \
+	jccolor-mmx.asm    jcgray-mmx.asm     jcsample-mmx.asm \
+	jdcolor-mmx.asm    jdmerge-mmx.asm    jdsample-mmx.asm \
+	jfdctfst-mmx.asm   jfdctint-mmx.asm   jidctfst-mmx.asm \
+	jidctint-mmx.asm   jidctred-mmx.asm   jquant-mmx.asm \
+	jfdctflt-sse.asm   jidctflt-sse.asm   jquant-sse.asm \
+	jccolor-sse2.asm   jcgray-sse2.asm    jcsample-sse2.asm \
+	jdcolor-sse2.asm   jdmerge-sse2.asm   jdsample-sse2.asm \
+	jfdctfst-sse2.asm  jfdctint-sse2.asm  jidctflt-sse2.asm \
+	jidctfst-sse2.asm  jidctint-sse2.asm  jidctred-sse2.asm  \
+	jquantf-sse2.asm   jquanti-sse2.asm
+
+jccolor-mmx.lo:   jccolext-mmx.asm
+jcgray.-mmx.lo:   jcgryext-mmx.asm
+jdcolor-mmx.lo:   jdcolext-mmx.asm
+jdmerge-mmx.lo:   jdmrgext-mmx.asm
+jccolor-sse2.lo:  jccolext-sse2.asm
+jcgray-sse2.lo:   jcgryext-sse2.asm
+jdcolor-sse2.lo:  jdcolext-sse2.asm
+jdmerge-sse2.lo:  jdmrgext-sse2.asm
+
+endif
+
+if SIMD_ARM
+
+libsimd_la_SOURCES = jsimd_arm.c jsimd_arm_neon.S
+
+endif
+
+if SIMD_ARM_64
+
+libsimd_la_SOURCES = jsimd_arm64.c jsimd_arm_neon_64.S
+
+endif
+
+if SIMD_MIPS
+
+libsimd_la_SOURCES = jsimd_mips.c jsimd_mips_dspr2_asm.h jsimd_mips_dspr2.S
+
+endif
+
+AM_CPPFLAGS = -I$(top_srcdir)
+
+.asm.lo:
+	$(LIBTOOL) --mode=compile --tag NASM $(srcdir)/nasm_lt.sh $(NASM) $(NAFLAGS) -I$(srcdir) -I. $< -o $@
+
+jsimdcfg.inc: $(srcdir)/jsimdcfg.inc.h ../jpeglib.h ../jconfig.h ../jmorecfg.h
+	$(CPP) -I$(top_builddir) -I$(top_builddir)/simd $(srcdir)/jsimdcfg.inc.h | $(EGREP) "^[\;%]|^\ %" | sed 's%_cpp_protection_%%' | sed 's@% define@%define@g' > $@
diff --git a/simd/jccolext-mmx.asm b/simd/jccolext-mmx.asm
new file mode 100644
index 0000000..d3d47a5
--- /dev/null
+++ b/simd/jccolext-mmx.asm
@@ -0,0 +1,477 @@
+;
+; jccolext.asm - colorspace conversion (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_rgb_ycc_convert_mmx (JDIMENSION img_width,
+;                           JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+;                           JDIMENSION output_row, int num_rows);
+;
+
+%define img_width(b)    (b)+8           ; JDIMENSION img_width
+%define input_buf(b)    (b)+12          ; JSAMPARRAY input_buf
+%define output_buf(b)   (b)+16          ; JSAMPIMAGE output_buf
+%define output_row(b)   (b)+20          ; JDIMENSION output_row
+%define num_rows(b)     (b)+24          ; int num_rows
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          8
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+        align   16
+        global  EXTN(jsimd_rgb_ycc_convert_mmx)
+
+EXTN(jsimd_rgb_ycc_convert_mmx):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
+
+        mov     ecx, JDIMENSION [img_width(eax)]        ; num_cols
+        test    ecx,ecx
+        jz      near .return
+
+        push    ecx
+
+        mov     esi, JSAMPIMAGE [output_buf(eax)]
+        mov     ecx, JDIMENSION [output_row(eax)]
+        mov     edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
+        lea     edi, [edi+ecx*SIZEOF_JSAMPROW]
+        lea     ebx, [ebx+ecx*SIZEOF_JSAMPROW]
+        lea     edx, [edx+ecx*SIZEOF_JSAMPROW]
+
+        pop     ecx
+
+        mov     esi, JSAMPARRAY [input_buf(eax)]
+        mov     eax, INT [num_rows(eax)]
+        test    eax,eax
+        jle     near .return
+        alignx  16,7
+.rowloop:
+        pushpic eax
+        push    edx
+        push    ebx
+        push    edi
+        push    esi
+        push    ecx                     ; col
+
+        mov     esi, JSAMPROW [esi]     ; inptr
+        mov     edi, JSAMPROW [edi]     ; outptr0
+        mov     ebx, JSAMPROW [ebx]     ; outptr1
+        mov     edx, JSAMPROW [edx]     ; outptr2
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+
+        cmp     ecx, byte SIZEOF_MMWORD
+        jae     short .columnloop
+        alignx  16,7
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+        push    eax
+        push    edx
+        lea     ecx,[ecx+ecx*2]         ; imul ecx,RGB_PIXELSIZE
+        test    cl, SIZEOF_BYTE
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_BYTE
+        xor     eax,eax
+        mov     al, BYTE [esi+ecx]
+.column_ld2:
+        test    cl, SIZEOF_WORD
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_WORD
+        xor     edx,edx
+        mov     dx, WORD [esi+ecx]
+        shl     eax, WORD_BIT
+        or      eax,edx
+.column_ld4:
+        movd    mmA,eax
+        pop     edx
+        pop     eax
+        test    cl, SIZEOF_DWORD
+        jz      short .column_ld8
+        sub     ecx, byte SIZEOF_DWORD
+        movd    mmG, DWORD [esi+ecx]
+        psllq   mmA, DWORD_BIT
+        por     mmA,mmG
+.column_ld8:
+        test    cl, SIZEOF_MMWORD
+        jz      short .column_ld16
+        movq    mmG,mmA
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        mov     ecx, SIZEOF_MMWORD
+        jmp     short .rgb_ycc_cnv
+.column_ld16:
+        test    cl, 2*SIZEOF_MMWORD
+        mov     ecx, SIZEOF_MMWORD
+        jz      short .rgb_ycc_cnv
+        movq    mmF,mmA
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+        jmp     short .rgb_ycc_cnv
+        alignx  16,7
+
+.columnloop:
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mmF, MMWORD [esi+2*SIZEOF_MMWORD]
+
+.rgb_ycc_cnv:
+        ; mmA=(00 10 20 01 11 21 02 12)
+        ; mmG=(22 03 13 23 04 14 24 05)
+        ; mmF=(15 25 06 16 26 07 17 27)
+
+        movq      mmD,mmA
+        psllq     mmA,4*BYTE_BIT        ; mmA=(-- -- -- -- 00 10 20 01)
+        psrlq     mmD,4*BYTE_BIT        ; mmD=(11 21 02 12 -- -- -- --)
+
+        punpckhbw mmA,mmG               ; mmA=(00 04 10 14 20 24 01 05)
+        psllq     mmG,4*BYTE_BIT        ; mmG=(-- -- -- -- 22 03 13 23)
+
+        punpcklbw mmD,mmF               ; mmD=(11 15 21 25 02 06 12 16)
+        punpckhbw mmG,mmF               ; mmG=(22 26 03 07 13 17 23 27)
+
+        movq      mmE,mmA
+        psllq     mmA,4*BYTE_BIT        ; mmA=(-- -- -- -- 00 04 10 14)
+        psrlq     mmE,4*BYTE_BIT        ; mmE=(20 24 01 05 -- -- -- --)
+
+        punpckhbw mmA,mmD               ; mmA=(00 02 04 06 10 12 14 16)
+        psllq     mmD,4*BYTE_BIT        ; mmD=(-- -- -- -- 11 15 21 25)
+
+        punpcklbw mmE,mmG               ; mmE=(20 22 24 26 01 03 05 07)
+        punpckhbw mmD,mmG               ; mmD=(11 13 15 17 21 23 25 27)
+
+        pxor      mmH,mmH
+
+        movq      mmC,mmA
+        punpcklbw mmA,mmH               ; mmA=(00 02 04 06)
+        punpckhbw mmC,mmH               ; mmC=(10 12 14 16)
+
+        movq      mmB,mmE
+        punpcklbw mmE,mmH               ; mmE=(20 22 24 26)
+        punpckhbw mmB,mmH               ; mmB=(01 03 05 07)
+
+        movq      mmF,mmD
+        punpcklbw mmD,mmH               ; mmD=(11 13 15 17)
+        punpckhbw mmF,mmH               ; mmF=(21 23 25 27)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+        test    cl, SIZEOF_MMWORD/8
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_MMWORD/8
+        movd    mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld2:
+        test    cl, SIZEOF_MMWORD/4
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_MMWORD/4
+        movq    mmF,mmA
+        movq    mmA, MMWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld4:
+        test    cl, SIZEOF_MMWORD/2
+        mov     ecx, SIZEOF_MMWORD
+        jz      short .rgb_ycc_cnv
+        movq    mmD,mmA
+        movq    mmC,mmF
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+        jmp     short .rgb_ycc_cnv
+        alignx  16,7
+
+.columnloop:
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mmD, MMWORD [esi+2*SIZEOF_MMWORD]
+        movq    mmC, MMWORD [esi+3*SIZEOF_MMWORD]
+
+.rgb_ycc_cnv:
+        ; mmA=(00 10 20 30 01 11 21 31)
+        ; mmF=(02 12 22 32 03 13 23 33)
+        ; mmD=(04 14 24 34 05 15 25 35)
+        ; mmC=(06 16 26 36 07 17 27 37)
+
+        movq      mmB,mmA
+        punpcklbw mmA,mmF               ; mmA=(00 02 10 12 20 22 30 32)
+        punpckhbw mmB,mmF               ; mmB=(01 03 11 13 21 23 31 33)
+
+        movq      mmG,mmD
+        punpcklbw mmD,mmC               ; mmD=(04 06 14 16 24 26 34 36)
+        punpckhbw mmG,mmC               ; mmG=(05 07 15 17 25 27 35 37)
+
+        movq      mmE,mmA
+        punpcklwd mmA,mmD               ; mmA=(00 02 04 06 10 12 14 16)
+        punpckhwd mmE,mmD               ; mmE=(20 22 24 26 30 32 34 36)
+
+        movq      mmH,mmB
+        punpcklwd mmB,mmG               ; mmB=(01 03 05 07 11 13 15 17)
+        punpckhwd mmH,mmG               ; mmH=(21 23 25 27 31 33 35 37)
+
+        pxor      mmF,mmF
+
+        movq      mmC,mmA
+        punpcklbw mmA,mmF               ; mmA=(00 02 04 06)
+        punpckhbw mmC,mmF               ; mmC=(10 12 14 16)
+
+        movq      mmD,mmB
+        punpcklbw mmB,mmF               ; mmB=(01 03 05 07)
+        punpckhbw mmD,mmF               ; mmD=(11 13 15 17)
+
+        movq      mmG,mmE
+        punpcklbw mmE,mmF               ; mmE=(20 22 24 26)
+        punpckhbw mmG,mmF               ; mmG=(30 32 34 36)
+
+        punpcklbw mmF,mmH
+        punpckhbw mmH,mmH
+        psrlw     mmF,BYTE_BIT          ; mmF=(21 23 25 27)
+        psrlw     mmH,BYTE_BIT          ; mmH=(31 33 35 37)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+        ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE
+        ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO
+
+        ; (Original)
+        ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+        ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+        ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+        ;
+        ; (This implementation)
+        ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+        ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+        ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+
+        movq      MMWORD [wk(0)], mm0   ; wk(0)=RE
+        movq      MMWORD [wk(1)], mm1   ; wk(1)=RO
+        movq      MMWORD [wk(2)], mm4   ; wk(2)=BE
+        movq      MMWORD [wk(3)], mm5   ; wk(3)=BO
+
+        movq      mm6,mm1
+        punpcklwd mm1,mm3
+        punpckhwd mm6,mm3
+        movq      mm7,mm1
+        movq      mm4,mm6
+        pmaddwd   mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+        pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+        pmaddwd   mm7,[GOTOFF(eax,PW_MF016_MF033)] ; mm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
+        pmaddwd   mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
+
+        movq      MMWORD [wk(4)], mm1   ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
+        movq      MMWORD [wk(5)], mm6   ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+        pxor      mm1,mm1
+        pxor      mm6,mm6
+        punpcklwd mm1,mm5               ; mm1=BOL
+        punpckhwd mm6,mm5               ; mm6=BOH
+        psrld     mm1,1                 ; mm1=BOL*FIX(0.500)
+        psrld     mm6,1                 ; mm6=BOH*FIX(0.500)
+
+        movq      mm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm5=[PD_ONEHALFM1_CJ]
+
+        paddd     mm7,mm1
+        paddd     mm4,mm6
+        paddd     mm7,mm5
+        paddd     mm4,mm5
+        psrld     mm7,SCALEBITS         ; mm7=CbOL
+        psrld     mm4,SCALEBITS         ; mm4=CbOH
+        packssdw  mm7,mm4               ; mm7=CbO
+
+        movq      mm1, MMWORD [wk(2)]   ; mm1=BE
+
+        movq      mm6,mm0
+        punpcklwd mm0,mm2
+        punpckhwd mm6,mm2
+        movq      mm5,mm0
+        movq      mm4,mm6
+        pmaddwd   mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337)
+        pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337)
+        pmaddwd   mm5,[GOTOFF(eax,PW_MF016_MF033)] ; mm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
+        pmaddwd   mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
+
+        movq      MMWORD [wk(6)], mm0   ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
+        movq      MMWORD [wk(7)], mm6   ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
+
+        pxor      mm0,mm0
+        pxor      mm6,mm6
+        punpcklwd mm0,mm1               ; mm0=BEL
+        punpckhwd mm6,mm1               ; mm6=BEH
+        psrld     mm0,1                 ; mm0=BEL*FIX(0.500)
+        psrld     mm6,1                 ; mm6=BEH*FIX(0.500)
+
+        movq      mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
+
+        paddd     mm5,mm0
+        paddd     mm4,mm6
+        paddd     mm5,mm1
+        paddd     mm4,mm1
+        psrld     mm5,SCALEBITS         ; mm5=CbEL
+        psrld     mm4,SCALEBITS         ; mm4=CbEH
+        packssdw  mm5,mm4               ; mm5=CbE
+
+        psllw     mm7,BYTE_BIT
+        por       mm5,mm7               ; mm5=Cb
+        movq      MMWORD [ebx], mm5     ; Save Cb
+
+        movq      mm0, MMWORD [wk(3)]   ; mm0=BO
+        movq      mm6, MMWORD [wk(2)]   ; mm6=BE
+        movq      mm1, MMWORD [wk(1)]   ; mm1=RO
+
+        movq      mm4,mm0
+        punpcklwd mm0,mm3
+        punpckhwd mm4,mm3
+        movq      mm7,mm0
+        movq      mm5,mm4
+        pmaddwd   mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+        pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+        pmaddwd   mm7,[GOTOFF(eax,PW_MF008_MF041)] ; mm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
+        pmaddwd   mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
+
+        movq      mm3,[GOTOFF(eax,PD_ONEHALF)]  ; mm3=[PD_ONEHALF]
+
+        paddd     mm0, MMWORD [wk(4)]
+        paddd     mm4, MMWORD [wk(5)]
+        paddd     mm0,mm3
+        paddd     mm4,mm3
+        psrld     mm0,SCALEBITS         ; mm0=YOL
+        psrld     mm4,SCALEBITS         ; mm4=YOH
+        packssdw  mm0,mm4               ; mm0=YO
+
+        pxor      mm3,mm3
+        pxor      mm4,mm4
+        punpcklwd mm3,mm1               ; mm3=ROL
+        punpckhwd mm4,mm1               ; mm4=ROH
+        psrld     mm3,1                 ; mm3=ROL*FIX(0.500)
+        psrld     mm4,1                 ; mm4=ROH*FIX(0.500)
+
+        movq      mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
+
+        paddd     mm7,mm3
+        paddd     mm5,mm4
+        paddd     mm7,mm1
+        paddd     mm5,mm1
+        psrld     mm7,SCALEBITS         ; mm7=CrOL
+        psrld     mm5,SCALEBITS         ; mm5=CrOH
+        packssdw  mm7,mm5               ; mm7=CrO
+
+        movq      mm3, MMWORD [wk(0)]   ; mm3=RE
+
+        movq      mm4,mm6
+        punpcklwd mm6,mm2
+        punpckhwd mm4,mm2
+        movq      mm1,mm6
+        movq      mm5,mm4
+        pmaddwd   mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+        pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+        pmaddwd   mm1,[GOTOFF(eax,PW_MF008_MF041)] ; mm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
+        pmaddwd   mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
+
+        movq      mm2,[GOTOFF(eax,PD_ONEHALF)]  ; mm2=[PD_ONEHALF]
+
+        paddd     mm6, MMWORD [wk(6)]
+        paddd     mm4, MMWORD [wk(7)]
+        paddd     mm6,mm2
+        paddd     mm4,mm2
+        psrld     mm6,SCALEBITS         ; mm6=YEL
+        psrld     mm4,SCALEBITS         ; mm4=YEH
+        packssdw  mm6,mm4               ; mm6=YE
+
+        psllw     mm0,BYTE_BIT
+        por       mm6,mm0               ; mm6=Y
+        movq      MMWORD [edi], mm6     ; Save Y
+
+        pxor      mm2,mm2
+        pxor      mm4,mm4
+        punpcklwd mm2,mm3               ; mm2=REL
+        punpckhwd mm4,mm3               ; mm4=REH
+        psrld     mm2,1                 ; mm2=REL*FIX(0.500)
+        psrld     mm4,1                 ; mm4=REH*FIX(0.500)
+
+        movq      mm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm0=[PD_ONEHALFM1_CJ]
+
+        paddd     mm1,mm2
+        paddd     mm5,mm4
+        paddd     mm1,mm0
+        paddd     mm5,mm0
+        psrld     mm1,SCALEBITS         ; mm1=CrEL
+        psrld     mm5,SCALEBITS         ; mm5=CrEH
+        packssdw  mm1,mm5               ; mm1=CrE
+
+        psllw     mm7,BYTE_BIT
+        por       mm1,mm7               ; mm1=Cr
+        movq      MMWORD [edx], mm1     ; Save Cr
+
+        sub     ecx, byte SIZEOF_MMWORD
+        add     esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; inptr
+        add     edi, byte SIZEOF_MMWORD                 ; outptr0
+        add     ebx, byte SIZEOF_MMWORD                 ; outptr1
+        add     edx, byte SIZEOF_MMWORD                 ; outptr2
+        cmp     ecx, byte SIZEOF_MMWORD
+        jae     near .columnloop
+        test    ecx,ecx
+        jnz     near .column_ld1
+
+        pop     ecx                     ; col
+        pop     esi
+        pop     edi
+        pop     ebx
+        pop     edx
+        poppic  eax
+
+        add     esi, byte SIZEOF_JSAMPROW       ; input_buf
+        add     edi, byte SIZEOF_JSAMPROW
+        add     ebx, byte SIZEOF_JSAMPROW
+        add     edx, byte SIZEOF_JSAMPROW
+        dec     eax                             ; num_rows
+        jg      near .rowloop
+
+        emms            ; empty MMX state
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jccolext-sse2-64.asm b/simd/jccolext-sse2-64.asm
new file mode 100644
index 0000000..079953a
--- /dev/null
+++ b/simd/jccolext-sse2-64.asm
@@ -0,0 +1,464 @@
+;
+; jccolext.asm - colorspace conversion (64-bit SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2009, 2014, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_rgb_ycc_convert_sse2 (JDIMENSION img_width,
+;                             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+;                             JDIMENSION output_row, int num_rows);
+;
+
+; r10 = JDIMENSION img_width
+; r11 = JSAMPARRAY input_buf
+; r12 = JSAMPIMAGE output_buf
+; r13 = JDIMENSION output_row
+; r14 = int num_rows
+
+        align   16
+
+        global  EXTN(jsimd_rgb_ycc_convert_sse2)
+
+EXTN(jsimd_rgb_ycc_convert_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+        push    rbx
+
+        mov     rcx, r10
+        test    rcx,rcx
+        jz      near .return
+
+        push    rcx
+
+        mov rsi, r12
+        mov rcx, r13
+        mov     rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
+        mov     rbx, JSAMPARRAY [rsi+1*SIZEOF_JSAMPARRAY]
+        mov     rdx, JSAMPARRAY [rsi+2*SIZEOF_JSAMPARRAY]
+        lea     rdi, [rdi+rcx*SIZEOF_JSAMPROW]
+        lea     rbx, [rbx+rcx*SIZEOF_JSAMPROW]
+        lea     rdx, [rdx+rcx*SIZEOF_JSAMPROW]
+
+        pop     rcx
+
+        mov rsi, r11
+        mov     eax, r14d
+        test    rax,rax
+        jle     near .return
+.rowloop:
+        push    rdx
+        push    rbx
+        push    rdi
+        push    rsi
+        push    rcx                     ; col
+
+        mov     rsi, JSAMPROW [rsi]     ; inptr
+        mov     rdi, JSAMPROW [rdi]     ; outptr0
+        mov     rbx, JSAMPROW [rbx]     ; outptr1
+        mov     rdx, JSAMPROW [rdx]     ; outptr2
+
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+        push    rax
+        push    rdx
+        lea     rcx,[rcx+rcx*2]         ; imul ecx,RGB_PIXELSIZE
+        test    cl, SIZEOF_BYTE
+        jz      short .column_ld2
+        sub     rcx, byte SIZEOF_BYTE
+        movzx   rax, BYTE [rsi+rcx]
+.column_ld2:
+        test    cl, SIZEOF_WORD
+        jz      short .column_ld4
+        sub     rcx, byte SIZEOF_WORD
+        movzx   rdx, WORD [rsi+rcx]
+        shl     rax, WORD_BIT
+        or      rax,rdx
+.column_ld4:
+        movd    xmmA,eax
+        pop     rdx
+        pop     rax
+        test    cl, SIZEOF_DWORD
+        jz      short .column_ld8
+        sub     rcx, byte SIZEOF_DWORD
+        movd    xmmF, XMM_DWORD [rsi+rcx]
+        pslldq  xmmA, SIZEOF_DWORD
+        por     xmmA,xmmF
+.column_ld8:
+        test    cl, SIZEOF_MMWORD
+        jz      short .column_ld16
+        sub     rcx, byte SIZEOF_MMWORD
+        movq    xmmB, XMM_MMWORD [rsi+rcx]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmB
+.column_ld16:
+        test    cl, SIZEOF_XMMWORD
+        jz      short .column_ld32
+        movdqa  xmmF,xmmA
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        mov     rcx, SIZEOF_XMMWORD
+        jmp     short .rgb_ycc_cnv
+.column_ld32:
+        test    cl, 2*SIZEOF_XMMWORD
+        mov     rcx, SIZEOF_XMMWORD
+        jz      short .rgb_ycc_cnv
+        movdqa  xmmB,xmmA
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_ycc_cnv
+
+.columnloop:
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        movdqu  xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD]
+
+.rgb_ycc_cnv:
+        ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+        movdqa    xmmG,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+        psrldq    xmmG,8        ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+
+        punpckhbw xmmA,xmmF     ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+        pslldq    xmmF,8        ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+
+        punpcklbw xmmG,xmmB     ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+        punpckhbw xmmF,xmmB     ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+
+        movdqa    xmmD,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+        psrldq    xmmD,8        ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+
+        punpckhbw xmmA,xmmG     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+        pslldq    xmmG,8        ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+
+        punpcklbw xmmD,xmmF     ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+        punpckhbw xmmG,xmmF     ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+
+        movdqa    xmmE,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+        psrldq    xmmE,8        ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+
+        punpckhbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        pslldq    xmmD,8        ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+
+        punpcklbw xmmE,xmmG     ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmG     ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+
+        pxor      xmmH,xmmH
+
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmH     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmH     ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+        movdqa    xmmB,xmmE
+        punpcklbw xmmE,xmmH     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmB,xmmH     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+
+        movdqa    xmmF,xmmD
+        punpcklbw xmmD,xmmH     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmF,xmmH     ; xmmF=(21 23 25 27 29 2B 2D 2F)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+        test    cl, SIZEOF_XMMWORD/16
+        jz      short .column_ld2
+        sub     rcx, byte SIZEOF_XMMWORD/16
+        movd    xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE]
+.column_ld2:
+        test    cl, SIZEOF_XMMWORD/8
+        jz      short .column_ld4
+        sub     rcx, byte SIZEOF_XMMWORD/8
+        movq    xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmE
+.column_ld4:
+        test    cl, SIZEOF_XMMWORD/4
+        jz      short .column_ld8
+        sub     rcx, byte SIZEOF_XMMWORD/4
+        movdqa  xmmE,xmmA
+        movdqu  xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE]
+.column_ld8:
+        test    cl, SIZEOF_XMMWORD/2
+        mov     rcx, SIZEOF_XMMWORD
+        jz      short .rgb_ycc_cnv
+        movdqa  xmmF,xmmA
+        movdqa  xmmH,xmmE
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_ycc_cnv
+
+.columnloop:
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD]
+        movdqu  xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD]
+
+.rgb_ycc_cnv:
+        ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+        movdqa    xmmD,xmmA
+        punpcklbw xmmA,xmmE     ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+        punpckhbw xmmD,xmmE     ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+
+        movdqa    xmmC,xmmF
+        punpcklbw xmmF,xmmH     ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+        punpckhbw xmmC,xmmH     ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+
+        movdqa    xmmB,xmmA
+        punpcklwd xmmA,xmmF     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+        punpckhwd xmmB,xmmF     ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+
+        movdqa    xmmG,xmmD
+        punpcklwd xmmD,xmmC     ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+        punpckhwd xmmG,xmmC     ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+
+        movdqa    xmmE,xmmA
+        punpcklbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        punpckhbw xmmE,xmmD     ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+
+        movdqa    xmmH,xmmB
+        punpcklbw xmmB,xmmG     ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmH,xmmG     ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+
+        pxor      xmmF,xmmF
+
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmF     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmF     ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+        movdqa    xmmD,xmmB
+        punpcklbw xmmB,xmmF     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmF     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+
+        movdqa    xmmG,xmmE
+        punpcklbw xmmE,xmmF     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmG,xmmF     ; xmmG=(30 32 34 36 38 3A 3C 3E)
+
+        punpcklbw xmmF,xmmH
+        punpckhbw xmmH,xmmH
+        psrlw     xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
+        psrlw     xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+        ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+        ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+
+        ; (Original)
+        ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+        ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+        ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+        ;
+        ; (This implementation)
+        ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+        ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+        ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+
+        movdqa    xmm8,xmm0             ; xmm8=RE
+        movdqa    xmm9,xmm1             ; xmm9=RO
+        movdqa    xmm10,xmm4            ; xmm10=BE
+        movdqa    xmm11,xmm5            ; xmm11=BO
+
+        movdqa    xmm13,xmm1
+        punpcklwd xmm1,xmm3
+        punpckhwd xmm13,xmm3
+        movdqa    xmm7,xmm1
+        movdqa    xmm4,xmm13
+        pmaddwd   xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+        pmaddwd   xmm13,[rel PW_F0299_F0337] ; xmm13=ROH*FIX(0.299)+GOH*FIX(0.337)
+        pmaddwd   xmm7,[rel PW_MF016_MF033] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
+        pmaddwd   xmm4,[rel PW_MF016_MF033] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
+
+        pxor      xmm12,xmm12
+        pxor      xmm6,xmm6
+        punpcklwd xmm12,xmm5            ; xmm12=BOL
+        punpckhwd xmm6,xmm5             ; xmm6=BOH
+        psrld     xmm12,1               ; xmm12=BOL*FIX(0.500)
+        psrld     xmm6,1                ; xmm6=BOH*FIX(0.500)
+
+        movdqa    xmm5,[rel PD_ONEHALFM1_CJ] ; xmm5=[PD_ONEHALFM1_CJ]
+
+        paddd     xmm7,xmm12
+        paddd     xmm4,xmm6
+        paddd     xmm7,xmm5
+        paddd     xmm4,xmm5
+        psrld     xmm7,SCALEBITS        ; xmm7=CbOL
+        psrld     xmm4,SCALEBITS        ; xmm4=CbOH
+        packssdw  xmm7,xmm4             ; xmm7=CbO
+
+        movdqa    xmm15,xmm0
+        punpcklwd xmm0,xmm2
+        punpckhwd xmm15,xmm2
+        movdqa    xmm5,xmm0
+        movdqa    xmm4,xmm15
+        pmaddwd   xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+        pmaddwd   xmm15,[rel PW_F0299_F0337] ; xmm15=REH*FIX(0.299)+GEH*FIX(0.337)
+        pmaddwd   xmm5,[rel PW_MF016_MF033] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
+        pmaddwd   xmm4,[rel PW_MF016_MF033] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
+
+        pxor      xmm14,xmm14
+        pxor      xmm6,xmm6
+        punpcklwd xmm14,xmm10           ; xmm14=BEL
+        punpckhwd xmm6,xmm10            ; xmm6=BEH
+        psrld     xmm14,1               ; xmm14=BEL*FIX(0.500)
+        psrld     xmm6,1                ; xmm6=BEH*FIX(0.500)
+
+        movdqa    xmm12,[rel PD_ONEHALFM1_CJ] ; xmm12=[PD_ONEHALFM1_CJ]
+
+        paddd     xmm5,xmm14
+        paddd     xmm4,xmm6
+        paddd     xmm5,xmm12
+        paddd     xmm4,xmm12
+        psrld     xmm5,SCALEBITS        ; xmm5=CbEL
+        psrld     xmm4,SCALEBITS        ; xmm4=CbEH
+        packssdw  xmm5,xmm4             ; xmm5=CbE
+
+        psllw     xmm7,BYTE_BIT
+        por       xmm5,xmm7             ; xmm5=Cb
+        movdqa    XMMWORD [rbx], xmm5   ; Save Cb
+
+        movdqa    xmm4,xmm11
+        punpcklwd xmm11,xmm3
+        punpckhwd xmm4,xmm3
+        movdqa    xmm7,xmm11
+        movdqa    xmm5,xmm4
+        pmaddwd   xmm11,[rel PW_F0114_F0250] ; xmm11=BOL*FIX(0.114)+GOL*FIX(0.250)
+        pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+        pmaddwd   xmm7,[rel PW_MF008_MF041] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
+        pmaddwd   xmm5,[rel PW_MF008_MF041] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
+
+        movdqa    xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF]
+
+        paddd     xmm11, xmm1
+        paddd     xmm4, xmm13
+        paddd     xmm11,xmm3
+        paddd     xmm4,xmm3
+        psrld     xmm11,SCALEBITS       ; xmm11=YOL
+        psrld     xmm4,SCALEBITS        ; xmm4=YOH
+        packssdw  xmm11,xmm4            ; xmm11=YO
+
+        pxor      xmm3,xmm3
+        pxor      xmm4,xmm4
+        punpcklwd xmm3,xmm9             ; xmm3=ROL
+        punpckhwd xmm4,xmm9             ; xmm4=ROH
+        psrld     xmm3,1                ; xmm3=ROL*FIX(0.500)
+        psrld     xmm4,1                ; xmm4=ROH*FIX(0.500)
+
+        movdqa    xmm1,[rel PD_ONEHALFM1_CJ] ; xmm1=[PD_ONEHALFM1_CJ]
+
+        paddd     xmm7,xmm3
+        paddd     xmm5,xmm4
+        paddd     xmm7,xmm1
+        paddd     xmm5,xmm1
+        psrld     xmm7,SCALEBITS        ; xmm7=CrOL
+        psrld     xmm5,SCALEBITS        ; xmm5=CrOH
+        packssdw  xmm7,xmm5             ; xmm7=CrO
+
+        movdqa    xmm3, xmm8 ; xmm3=RE
+
+        movdqa    xmm4,xmm10
+        punpcklwd xmm10,xmm2
+        punpckhwd xmm4,xmm2
+        movdqa    xmm1,xmm10
+        movdqa    xmm5,xmm4
+        pmaddwd   xmm10,[rel PW_F0114_F0250] ; xmm10=BEL*FIX(0.114)+GEL*FIX(0.250)
+        pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+        pmaddwd   xmm1,[rel PW_MF008_MF041] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
+        pmaddwd   xmm5,[rel PW_MF008_MF041] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
+
+        movdqa    xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF]
+
+        paddd     xmm10, xmm0
+        paddd     xmm4, xmm15
+        paddd     xmm10,xmm2
+        paddd     xmm4,xmm2
+        psrld     xmm10,SCALEBITS       ; xmm10=YEL
+        psrld     xmm4,SCALEBITS        ; xmm4=YEH
+        packssdw  xmm10,xmm4            ; xmm10=YE
+
+        psllw     xmm11,BYTE_BIT
+        por       xmm10,xmm11           ; xmm10=Y
+        movdqa    XMMWORD [rdi], xmm10  ; Save Y
+
+        pxor      xmm2,xmm2
+        pxor      xmm4,xmm4
+        punpcklwd xmm2,xmm3             ; xmm2=REL
+        punpckhwd xmm4,xmm3             ; xmm4=REH
+        psrld     xmm2,1                ; xmm2=REL*FIX(0.500)
+        psrld     xmm4,1                ; xmm4=REH*FIX(0.500)
+
+        movdqa    xmm0,[rel PD_ONEHALFM1_CJ] ; xmm0=[PD_ONEHALFM1_CJ]
+
+        paddd     xmm1,xmm2
+        paddd     xmm5,xmm4
+        paddd     xmm1,xmm0
+        paddd     xmm5,xmm0
+        psrld     xmm1,SCALEBITS        ; xmm1=CrEL
+        psrld     xmm5,SCALEBITS        ; xmm5=CrEH
+        packssdw  xmm1,xmm5             ; xmm1=CrE
+
+        psllw     xmm7,BYTE_BIT
+        por       xmm1,xmm7             ; xmm1=Cr
+        movdqa    XMMWORD [rdx], xmm1   ; Save Cr
+
+        sub     rcx, byte SIZEOF_XMMWORD
+        add     rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; inptr
+        add     rdi, byte SIZEOF_XMMWORD                ; outptr0
+        add     rbx, byte SIZEOF_XMMWORD                ; outptr1
+        add     rdx, byte SIZEOF_XMMWORD                ; outptr2
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        test    rcx,rcx
+        jnz     near .column_ld1
+
+        pop     rcx                     ; col
+        pop     rsi
+        pop     rdi
+        pop     rbx
+        pop     rdx
+
+        add     rsi, byte SIZEOF_JSAMPROW       ; input_buf
+        add     rdi, byte SIZEOF_JSAMPROW
+        add     rbx, byte SIZEOF_JSAMPROW
+        add     rdx, byte SIZEOF_JSAMPROW
+        dec     rax                             ; num_rows
+        jg      near .rowloop
+
+.return:
+        pop     rbx
+        uncollect_args
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jccolext-sse2.asm b/simd/jccolext-sse2.asm
new file mode 100644
index 0000000..cc38e98
--- /dev/null
+++ b/simd/jccolext-sse2.asm
@@ -0,0 +1,503 @@
+;
+; jccolext.asm - colorspace conversion (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_rgb_ycc_convert_sse2 (JDIMENSION img_width,
+;                             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+;                             JDIMENSION output_row, int num_rows);
+;
+
+%define img_width(b)    (b)+8           ; JDIMENSION img_width
+%define input_buf(b)    (b)+12          ; JSAMPARRAY input_buf
+%define output_buf(b)   (b)+16          ; JSAMPIMAGE output_buf
+%define output_row(b)   (b)+20          ; JDIMENSION output_row
+%define num_rows(b)     (b)+24          ; int num_rows
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          8
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+        align   16
+
+        global  EXTN(jsimd_rgb_ycc_convert_sse2)
+
+EXTN(jsimd_rgb_ycc_convert_sse2):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
+
+        mov     ecx, JDIMENSION [img_width(eax)]
+        test    ecx,ecx
+        jz      near .return
+
+        push    ecx
+
+        mov     esi, JSAMPIMAGE [output_buf(eax)]
+        mov     ecx, JDIMENSION [output_row(eax)]
+        mov     edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
+        lea     edi, [edi+ecx*SIZEOF_JSAMPROW]
+        lea     ebx, [ebx+ecx*SIZEOF_JSAMPROW]
+        lea     edx, [edx+ecx*SIZEOF_JSAMPROW]
+
+        pop     ecx
+
+        mov     esi, JSAMPARRAY [input_buf(eax)]
+        mov     eax, INT [num_rows(eax)]
+        test    eax,eax
+        jle     near .return
+        alignx  16,7
+.rowloop:
+        pushpic eax
+        push    edx
+        push    ebx
+        push    edi
+        push    esi
+        push    ecx                     ; col
+
+        mov     esi, JSAMPROW [esi]     ; inptr
+        mov     edi, JSAMPROW [edi]     ; outptr0
+        mov     ebx, JSAMPROW [ebx]     ; outptr1
+        mov     edx, JSAMPROW [edx]     ; outptr2
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        alignx  16,7
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+        push    eax
+        push    edx
+        lea     ecx,[ecx+ecx*2]         ; imul ecx,RGB_PIXELSIZE
+        test    cl, SIZEOF_BYTE
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_BYTE
+        movzx   eax, BYTE [esi+ecx]
+.column_ld2:
+        test    cl, SIZEOF_WORD
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_WORD
+        movzx   edx, WORD [esi+ecx]
+        shl     eax, WORD_BIT
+        or      eax,edx
+.column_ld4:
+        movd    xmmA,eax
+        pop     edx
+        pop     eax
+        test    cl, SIZEOF_DWORD
+        jz      short .column_ld8
+        sub     ecx, byte SIZEOF_DWORD
+        movd    xmmF, XMM_DWORD [esi+ecx]
+        pslldq  xmmA, SIZEOF_DWORD
+        por     xmmA,xmmF
+.column_ld8:
+        test    cl, SIZEOF_MMWORD
+        jz      short .column_ld16
+        sub     ecx, byte SIZEOF_MMWORD
+        movq    xmmB, XMM_MMWORD [esi+ecx]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmB
+.column_ld16:
+        test    cl, SIZEOF_XMMWORD
+        jz      short .column_ld32
+        movdqa  xmmF,xmmA
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        mov     ecx, SIZEOF_XMMWORD
+        jmp     short .rgb_ycc_cnv
+.column_ld32:
+        test    cl, 2*SIZEOF_XMMWORD
+        mov     ecx, SIZEOF_XMMWORD
+        jz      short .rgb_ycc_cnv
+        movdqa  xmmB,xmmA
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_ycc_cnv
+        alignx  16,7
+
+.columnloop:
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        movdqu  xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
+
+.rgb_ycc_cnv:
+        ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+        movdqa    xmmG,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+        psrldq    xmmG,8        ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+
+        punpckhbw xmmA,xmmF     ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+        pslldq    xmmF,8        ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+
+        punpcklbw xmmG,xmmB     ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+        punpckhbw xmmF,xmmB     ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+
+        movdqa    xmmD,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+        psrldq    xmmD,8        ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+
+        punpckhbw xmmA,xmmG     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+        pslldq    xmmG,8        ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+
+        punpcklbw xmmD,xmmF     ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+        punpckhbw xmmG,xmmF     ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+
+        movdqa    xmmE,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+        psrldq    xmmE,8        ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+
+        punpckhbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        pslldq    xmmD,8        ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+
+        punpcklbw xmmE,xmmG     ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmG     ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+
+        pxor      xmmH,xmmH
+
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmH     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmH     ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+        movdqa    xmmB,xmmE
+        punpcklbw xmmE,xmmH     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmB,xmmH     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+
+        movdqa    xmmF,xmmD
+        punpcklbw xmmD,xmmH     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmF,xmmH     ; xmmF=(21 23 25 27 29 2B 2D 2F)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+        test    cl, SIZEOF_XMMWORD/16
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_XMMWORD/16
+        movd    xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld2:
+        test    cl, SIZEOF_XMMWORD/8
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_XMMWORD/8
+        movq    xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmE
+.column_ld4:
+        test    cl, SIZEOF_XMMWORD/4
+        jz      short .column_ld8
+        sub     ecx, byte SIZEOF_XMMWORD/4
+        movdqa  xmmE,xmmA
+        movdqu  xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld8:
+        test    cl, SIZEOF_XMMWORD/2
+        mov     ecx, SIZEOF_XMMWORD
+        jz      short .rgb_ycc_cnv
+        movdqa  xmmF,xmmA
+        movdqa  xmmH,xmmE
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_ycc_cnv
+        alignx  16,7
+
+.columnloop:
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
+        movdqu  xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
+
+.rgb_ycc_cnv:
+        ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+        movdqa    xmmD,xmmA
+        punpcklbw xmmA,xmmE     ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+        punpckhbw xmmD,xmmE     ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+
+        movdqa    xmmC,xmmF
+        punpcklbw xmmF,xmmH     ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+        punpckhbw xmmC,xmmH     ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+
+        movdqa    xmmB,xmmA
+        punpcklwd xmmA,xmmF     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+        punpckhwd xmmB,xmmF     ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+
+        movdqa    xmmG,xmmD
+        punpcklwd xmmD,xmmC     ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+        punpckhwd xmmG,xmmC     ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+
+        movdqa    xmmE,xmmA
+        punpcklbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        punpckhbw xmmE,xmmD     ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+
+        movdqa    xmmH,xmmB
+        punpcklbw xmmB,xmmG     ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmH,xmmG     ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+
+        pxor      xmmF,xmmF
+
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmF     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmF     ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+        movdqa    xmmD,xmmB
+        punpcklbw xmmB,xmmF     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmF     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+
+        movdqa    xmmG,xmmE
+        punpcklbw xmmE,xmmF     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmG,xmmF     ; xmmG=(30 32 34 36 38 3A 3C 3E)
+
+        punpcklbw xmmF,xmmH
+        punpckhbw xmmH,xmmH
+        psrlw     xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
+        psrlw     xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+        ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+        ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+
+        ; (Original)
+        ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+        ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+        ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+        ;
+        ; (This implementation)
+        ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+        ; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
+        ; Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
+
+        movdqa    XMMWORD [wk(0)], xmm0 ; wk(0)=RE
+        movdqa    XMMWORD [wk(1)], xmm1 ; wk(1)=RO
+        movdqa    XMMWORD [wk(2)], xmm4 ; wk(2)=BE
+        movdqa    XMMWORD [wk(3)], xmm5 ; wk(3)=BO
+
+        movdqa    xmm6,xmm1
+        punpcklwd xmm1,xmm3
+        punpckhwd xmm6,xmm3
+        movdqa    xmm7,xmm1
+        movdqa    xmm4,xmm6
+        pmaddwd   xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+        pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+        pmaddwd   xmm7,[GOTOFF(eax,PW_MF016_MF033)] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
+        pmaddwd   xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
+
+        movdqa    XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
+        movdqa    XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+        pxor      xmm1,xmm1
+        pxor      xmm6,xmm6
+        punpcklwd xmm1,xmm5             ; xmm1=BOL
+        punpckhwd xmm6,xmm5             ; xmm6=BOH
+        psrld     xmm1,1                ; xmm1=BOL*FIX(0.500)
+        psrld     xmm6,1                ; xmm6=BOH*FIX(0.500)
+
+        movdqa    xmm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm5=[PD_ONEHALFM1_CJ]
+
+        paddd     xmm7,xmm1
+        paddd     xmm4,xmm6
+        paddd     xmm7,xmm5
+        paddd     xmm4,xmm5
+        psrld     xmm7,SCALEBITS        ; xmm7=CbOL
+        psrld     xmm4,SCALEBITS        ; xmm4=CbOH
+        packssdw  xmm7,xmm4             ; xmm7=CbO
+
+        movdqa    xmm1, XMMWORD [wk(2)] ; xmm1=BE
+
+        movdqa    xmm6,xmm0
+        punpcklwd xmm0,xmm2
+        punpckhwd xmm6,xmm2
+        movdqa    xmm5,xmm0
+        movdqa    xmm4,xmm6
+        pmaddwd   xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+        pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+        pmaddwd   xmm5,[GOTOFF(eax,PW_MF016_MF033)] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
+        pmaddwd   xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
+
+        movdqa    XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
+        movdqa    XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
+
+        pxor      xmm0,xmm0
+        pxor      xmm6,xmm6
+        punpcklwd xmm0,xmm1             ; xmm0=BEL
+        punpckhwd xmm6,xmm1             ; xmm6=BEH
+        psrld     xmm0,1                ; xmm0=BEL*FIX(0.500)
+        psrld     xmm6,1                ; xmm6=BEH*FIX(0.500)
+
+        movdqa    xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
+
+        paddd     xmm5,xmm0
+        paddd     xmm4,xmm6
+        paddd     xmm5,xmm1
+        paddd     xmm4,xmm1
+        psrld     xmm5,SCALEBITS        ; xmm5=CbEL
+        psrld     xmm4,SCALEBITS        ; xmm4=CbEH
+        packssdw  xmm5,xmm4             ; xmm5=CbE
+
+        psllw     xmm7,BYTE_BIT
+        por       xmm5,xmm7             ; xmm5=Cb
+        movdqa    XMMWORD [ebx], xmm5   ; Save Cb
+
+        movdqa    xmm0, XMMWORD [wk(3)] ; xmm0=BO
+        movdqa    xmm6, XMMWORD [wk(2)] ; xmm6=BE
+        movdqa    xmm1, XMMWORD [wk(1)] ; xmm1=RO
+
+        movdqa    xmm4,xmm0
+        punpcklwd xmm0,xmm3
+        punpckhwd xmm4,xmm3
+        movdqa    xmm7,xmm0
+        movdqa    xmm5,xmm4
+        pmaddwd   xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+        pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+        pmaddwd   xmm7,[GOTOFF(eax,PW_MF008_MF041)] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
+        pmaddwd   xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
+
+        movdqa    xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF]
+
+        paddd     xmm0, XMMWORD [wk(4)]
+        paddd     xmm4, XMMWORD [wk(5)]
+        paddd     xmm0,xmm3
+        paddd     xmm4,xmm3
+        psrld     xmm0,SCALEBITS        ; xmm0=YOL
+        psrld     xmm4,SCALEBITS        ; xmm4=YOH
+        packssdw  xmm0,xmm4             ; xmm0=YO
+
+        pxor      xmm3,xmm3
+        pxor      xmm4,xmm4
+        punpcklwd xmm3,xmm1             ; xmm3=ROL
+        punpckhwd xmm4,xmm1             ; xmm4=ROH
+        psrld     xmm3,1                ; xmm3=ROL*FIX(0.500)
+        psrld     xmm4,1                ; xmm4=ROH*FIX(0.500)
+
+        movdqa    xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
+
+        paddd     xmm7,xmm3
+        paddd     xmm5,xmm4
+        paddd     xmm7,xmm1
+        paddd     xmm5,xmm1
+        psrld     xmm7,SCALEBITS        ; xmm7=CrOL
+        psrld     xmm5,SCALEBITS        ; xmm5=CrOH
+        packssdw  xmm7,xmm5             ; xmm7=CrO
+
+        movdqa    xmm3, XMMWORD [wk(0)] ; xmm3=RE
+
+        movdqa    xmm4,xmm6
+        punpcklwd xmm6,xmm2
+        punpckhwd xmm4,xmm2
+        movdqa    xmm1,xmm6
+        movdqa    xmm5,xmm4
+        pmaddwd   xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+        pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+        pmaddwd   xmm1,[GOTOFF(eax,PW_MF008_MF041)] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
+        pmaddwd   xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
+
+        movdqa    xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF]
+
+        paddd     xmm6, XMMWORD [wk(6)]
+        paddd     xmm4, XMMWORD [wk(7)]
+        paddd     xmm6,xmm2
+        paddd     xmm4,xmm2
+        psrld     xmm6,SCALEBITS        ; xmm6=YEL
+        psrld     xmm4,SCALEBITS        ; xmm4=YEH
+        packssdw  xmm6,xmm4             ; xmm6=YE
+
+        psllw     xmm0,BYTE_BIT
+        por       xmm6,xmm0             ; xmm6=Y
+        movdqa    XMMWORD [edi], xmm6   ; Save Y
+
+        pxor      xmm2,xmm2
+        pxor      xmm4,xmm4
+        punpcklwd xmm2,xmm3             ; xmm2=REL
+        punpckhwd xmm4,xmm3             ; xmm4=REH
+        psrld     xmm2,1                ; xmm2=REL*FIX(0.500)
+        psrld     xmm4,1                ; xmm4=REH*FIX(0.500)
+
+        movdqa    xmm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm0=[PD_ONEHALFM1_CJ]
+
+        paddd     xmm1,xmm2
+        paddd     xmm5,xmm4
+        paddd     xmm1,xmm0
+        paddd     xmm5,xmm0
+        psrld     xmm1,SCALEBITS        ; xmm1=CrEL
+        psrld     xmm5,SCALEBITS        ; xmm5=CrEH
+        packssdw  xmm1,xmm5             ; xmm1=CrE
+
+        psllw     xmm7,BYTE_BIT
+        por       xmm1,xmm7             ; xmm1=Cr
+        movdqa    XMMWORD [edx], xmm1   ; Save Cr
+
+        sub     ecx, byte SIZEOF_XMMWORD
+        add     esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; inptr
+        add     edi, byte SIZEOF_XMMWORD                ; outptr0
+        add     ebx, byte SIZEOF_XMMWORD                ; outptr1
+        add     edx, byte SIZEOF_XMMWORD                ; outptr2
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        test    ecx,ecx
+        jnz     near .column_ld1
+
+        pop     ecx                     ; col
+        pop     esi
+        pop     edi
+        pop     ebx
+        pop     edx
+        poppic  eax
+
+        add     esi, byte SIZEOF_JSAMPROW       ; input_buf
+        add     edi, byte SIZEOF_JSAMPROW
+        add     ebx, byte SIZEOF_JSAMPROW
+        add     edx, byte SIZEOF_JSAMPROW
+        dec     eax                             ; num_rows
+        jg      near .rowloop
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jccolor-mmx.asm b/simd/jccolor-mmx.asm
new file mode 100644
index 0000000..c5d3764
--- /dev/null
+++ b/simd/jccolor-mmx.asm
@@ -0,0 +1,123 @@
+;
+; jccolor.asm - colorspace conversion (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS       16
+
+F_0_081 equ      5329                   ; FIX(0.08131)
+F_0_114 equ      7471                   ; FIX(0.11400)
+F_0_168 equ     11059                   ; FIX(0.16874)
+F_0_250 equ     16384                   ; FIX(0.25000)
+F_0_299 equ     19595                   ; FIX(0.29900)
+F_0_331 equ     21709                   ; FIX(0.33126)
+F_0_418 equ     27439                   ; FIX(0.41869)
+F_0_587 equ     38470                   ; FIX(0.58700)
+F_0_337 equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_rgb_ycc_convert_mmx)
+
+EXTN(jconst_rgb_ycc_convert_mmx):
+
+PW_F0299_F0337  times 2 dw  F_0_299, F_0_337
+PW_F0114_F0250  times 2 dw  F_0_114, F_0_250
+PW_MF016_MF033  times 2 dw -F_0_168,-F_0_331
+PW_MF008_MF041  times 2 dw -F_0_081,-F_0_418
+PD_ONEHALFM1_CJ times 2 dd  (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
+PD_ONEHALF      times 2 dd  (1 << (SCALEBITS-1))
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+
+%include "jccolext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_rgb_ycc_convert_mmx jsimd_extrgb_ycc_convert_mmx
+%include "jccolext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_rgb_ycc_convert_mmx jsimd_extrgbx_ycc_convert_mmx
+%include "jccolext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_rgb_ycc_convert_mmx jsimd_extbgr_ycc_convert_mmx
+%include "jccolext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_rgb_ycc_convert_mmx jsimd_extbgrx_ycc_convert_mmx
+%include "jccolext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_rgb_ycc_convert_mmx jsimd_extxbgr_ycc_convert_mmx
+%include "jccolext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_rgb_ycc_convert_mmx jsimd_extxrgb_ycc_convert_mmx
+%include "jccolext-mmx.asm"
diff --git a/simd/jccolor-sse2-64.asm b/simd/jccolor-sse2-64.asm
new file mode 100644
index 0000000..55c7e12
--- /dev/null
+++ b/simd/jccolor-sse2-64.asm
@@ -0,0 +1,120 @@
+;
+; jccolor.asm - colorspace conversion (64-bit SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2009, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS       16
+
+F_0_081 equ      5329                   ; FIX(0.08131)
+F_0_114 equ      7471                   ; FIX(0.11400)
+F_0_168 equ     11059                   ; FIX(0.16874)
+F_0_250 equ     16384                   ; FIX(0.25000)
+F_0_299 equ     19595                   ; FIX(0.29900)
+F_0_331 equ     21709                   ; FIX(0.33126)
+F_0_418 equ     27439                   ; FIX(0.41869)
+F_0_587 equ     38470                   ; FIX(0.58700)
+F_0_337 equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_rgb_ycc_convert_sse2)
+
+EXTN(jconst_rgb_ycc_convert_sse2):
+
+PW_F0299_F0337  times 4 dw  F_0_299, F_0_337
+PW_F0114_F0250  times 4 dw  F_0_114, F_0_250
+PW_MF016_MF033  times 4 dw -F_0_168,-F_0_331
+PW_MF008_MF041  times 4 dw -F_0_081,-F_0_418
+PD_ONEHALFM1_CJ times 4 dd  (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
+PD_ONEHALF      times 4 dd  (1 << (SCALEBITS-1))
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+
+%include "jccolext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2
+%include "jccolext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2
+%include "jccolext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2
+%include "jccolext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2
+%include "jccolext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2
+%include "jccolext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2
+%include "jccolext-sse2-64.asm"
diff --git a/simd/jccolor-sse2.asm b/simd/jccolor-sse2.asm
new file mode 100644
index 0000000..890e2a3
--- /dev/null
+++ b/simd/jccolor-sse2.asm
@@ -0,0 +1,120 @@
+;
+; jccolor.asm - colorspace conversion (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2009, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS       16
+
+F_0_081 equ      5329                   ; FIX(0.08131)
+F_0_114 equ      7471                   ; FIX(0.11400)
+F_0_168 equ     11059                   ; FIX(0.16874)
+F_0_250 equ     16384                   ; FIX(0.25000)
+F_0_299 equ     19595                   ; FIX(0.29900)
+F_0_331 equ     21709                   ; FIX(0.33126)
+F_0_418 equ     27439                   ; FIX(0.41869)
+F_0_587 equ     38470                   ; FIX(0.58700)
+F_0_337 equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_rgb_ycc_convert_sse2)
+
+EXTN(jconst_rgb_ycc_convert_sse2):
+
+PW_F0299_F0337  times 4 dw  F_0_299, F_0_337
+PW_F0114_F0250  times 4 dw  F_0_114, F_0_250
+PW_MF016_MF033  times 4 dw -F_0_168,-F_0_331
+PW_MF008_MF041  times 4 dw -F_0_081,-F_0_418
+PD_ONEHALFM1_CJ times 4 dd  (1 << (SCALEBITS-1)) - 1 + (CENTERJSAMPLE << SCALEBITS)
+PD_ONEHALF      times 4 dd  (1 << (SCALEBITS-1))
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+
+%include "jccolext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2
+%include "jccolext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2
+%include "jccolext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2
+%include "jccolext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2
+%include "jccolext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2
+%include "jccolext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2
+%include "jccolext-sse2.asm"
diff --git a/simd/jcgray-mmx.asm b/simd/jcgray-mmx.asm
new file mode 100644
index 0000000..b2708ad
--- /dev/null
+++ b/simd/jcgray-mmx.asm
@@ -0,0 +1,116 @@
+;
+; jcgray.asm - grayscale colorspace conversion (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2011 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS       16
+
+F_0_114 equ      7471                   ; FIX(0.11400)
+F_0_250 equ     16384                   ; FIX(0.25000)
+F_0_299 equ     19595                   ; FIX(0.29900)
+F_0_587 equ     38470                   ; FIX(0.58700)
+F_0_337 equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_rgb_gray_convert_mmx)
+
+EXTN(jconst_rgb_gray_convert_mmx):
+
+PW_F0299_F0337  times 2 dw  F_0_299, F_0_337
+PW_F0114_F0250  times 2 dw  F_0_114, F_0_250
+PD_ONEHALF      times 2 dd  (1 << (SCALEBITS-1))
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+
+%include "jcgryext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_rgb_gray_convert_mmx jsimd_extrgb_gray_convert_mmx
+%include "jcgryext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_rgb_gray_convert_mmx jsimd_extrgbx_gray_convert_mmx
+%include "jcgryext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_rgb_gray_convert_mmx jsimd_extbgr_gray_convert_mmx
+%include "jcgryext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_rgb_gray_convert_mmx jsimd_extbgrx_gray_convert_mmx
+%include "jcgryext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_rgb_gray_convert_mmx jsimd_extxbgr_gray_convert_mmx
+%include "jcgryext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_rgb_gray_convert_mmx jsimd_extxrgb_gray_convert_mmx
+%include "jcgryext-mmx.asm"
diff --git a/simd/jcgray-sse2-64.asm b/simd/jcgray-sse2-64.asm
new file mode 100644
index 0000000..dfc0577
--- /dev/null
+++ b/simd/jcgray-sse2-64.asm
@@ -0,0 +1,113 @@
+;
+; jcgray.asm - grayscale colorspace conversion (64-bit SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2011, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS       16
+
+F_0_114 equ      7471                   ; FIX(0.11400)
+F_0_250 equ     16384                   ; FIX(0.25000)
+F_0_299 equ     19595                   ; FIX(0.29900)
+F_0_587 equ     38470                   ; FIX(0.58700)
+F_0_337 equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_rgb_gray_convert_sse2)
+
+EXTN(jconst_rgb_gray_convert_sse2):
+
+PW_F0299_F0337  times 4 dw  F_0_299, F_0_337
+PW_F0114_F0250  times 4 dw  F_0_114, F_0_250
+PD_ONEHALF      times 4 dd  (1 << (SCALEBITS-1))
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+
+%include "jcgryext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2
+%include "jcgryext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2
+%include "jcgryext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2
+%include "jcgryext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2
+%include "jcgryext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2
+%include "jcgryext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2
+%include "jcgryext-sse2-64.asm"
diff --git a/simd/jcgray-sse2.asm b/simd/jcgray-sse2.asm
new file mode 100644
index 0000000..5fa7273
--- /dev/null
+++ b/simd/jcgray-sse2.asm
@@ -0,0 +1,113 @@
+;
+; jcgray.asm - grayscale colorspace conversion (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2011, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS       16
+
+F_0_114 equ      7471                   ; FIX(0.11400)
+F_0_250 equ     16384                   ; FIX(0.25000)
+F_0_299 equ     19595                   ; FIX(0.29900)
+F_0_587 equ     38470                   ; FIX(0.58700)
+F_0_337 equ     (F_0_587 - F_0_250)     ; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_rgb_gray_convert_sse2)
+
+EXTN(jconst_rgb_gray_convert_sse2):
+
+PW_F0299_F0337  times 4 dw  F_0_299, F_0_337
+PW_F0114_F0250  times 4 dw  F_0_114, F_0_250
+PD_ONEHALF      times 4 dd  (1 << (SCALEBITS-1))
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+
+%include "jcgryext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2
+%include "jcgryext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2
+%include "jcgryext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2
+%include "jcgryext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2
+%include "jcgryext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2
+%include "jcgryext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2
+%include "jcgryext-sse2.asm"
diff --git a/simd/jcgryext-mmx.asm b/simd/jcgryext-mmx.asm
new file mode 100644
index 0000000..13b9600
--- /dev/null
+++ b/simd/jcgryext-mmx.asm
@@ -0,0 +1,357 @@
+;
+; jcgryext.asm - grayscale colorspace conversion (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2011 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_rgb_gray_convert_mmx (JDIMENSION img_width,
+;                             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+;                             JDIMENSION output_row, int num_rows);
+;
+
+%define img_width(b)    (b)+8           ; JDIMENSION img_width
+%define input_buf(b)    (b)+12          ; JSAMPARRAY input_buf
+%define output_buf(b)   (b)+16          ; JSAMPIMAGE output_buf
+%define output_row(b)   (b)+20          ; JDIMENSION output_row
+%define num_rows(b)     (b)+24          ; int num_rows
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+        align   16
+        global  EXTN(jsimd_rgb_gray_convert_mmx)
+
+EXTN(jsimd_rgb_gray_convert_mmx):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
+
+        mov     ecx, JDIMENSION [img_width(eax)]        ; num_cols
+        test    ecx,ecx
+        jz      near .return
+
+        push    ecx
+
+        mov     esi, JSAMPIMAGE [output_buf(eax)]
+        mov     ecx, JDIMENSION [output_row(eax)]
+        mov     edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+        lea     edi, [edi+ecx*SIZEOF_JSAMPROW]
+
+        pop     ecx
+
+        mov     esi, JSAMPARRAY [input_buf(eax)]
+        mov     eax, INT [num_rows(eax)]
+        test    eax,eax
+        jle     near .return
+        alignx  16,7
+.rowloop:
+        pushpic eax
+        push    edi
+        push    esi
+        push    ecx                     ; col
+
+        mov     esi, JSAMPROW [esi]     ; inptr
+        mov     edi, JSAMPROW [edi]     ; outptr0
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+
+        cmp     ecx, byte SIZEOF_MMWORD
+        jae     short .columnloop
+        alignx  16,7
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+        push    eax
+        push    edx
+        lea     ecx,[ecx+ecx*2]         ; imul ecx,RGB_PIXELSIZE
+        test    cl, SIZEOF_BYTE
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_BYTE
+        xor     eax,eax
+        mov     al, BYTE [esi+ecx]
+.column_ld2:
+        test    cl, SIZEOF_WORD
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_WORD
+        xor     edx,edx
+        mov     dx, WORD [esi+ecx]
+        shl     eax, WORD_BIT
+        or      eax,edx
+.column_ld4:
+        movd    mmA,eax
+        pop     edx
+        pop     eax
+        test    cl, SIZEOF_DWORD
+        jz      short .column_ld8
+        sub     ecx, byte SIZEOF_DWORD
+        movd    mmG, DWORD [esi+ecx]
+        psllq   mmA, DWORD_BIT
+        por     mmA,mmG
+.column_ld8:
+        test    cl, SIZEOF_MMWORD
+        jz      short .column_ld16
+        movq    mmG,mmA
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        mov     ecx, SIZEOF_MMWORD
+        jmp     short .rgb_gray_cnv
+.column_ld16:
+        test    cl, 2*SIZEOF_MMWORD
+        mov     ecx, SIZEOF_MMWORD
+        jz      short .rgb_gray_cnv
+        movq    mmF,mmA
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+        jmp     short .rgb_gray_cnv
+        alignx  16,7
+
+.columnloop:
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mmF, MMWORD [esi+2*SIZEOF_MMWORD]
+
+.rgb_gray_cnv:
+        ; mmA=(00 10 20 01 11 21 02 12)
+        ; mmG=(22 03 13 23 04 14 24 05)
+        ; mmF=(15 25 06 16 26 07 17 27)
+
+        movq      mmD,mmA
+        psllq     mmA,4*BYTE_BIT        ; mmA=(-- -- -- -- 00 10 20 01)
+        psrlq     mmD,4*BYTE_BIT        ; mmD=(11 21 02 12 -- -- -- --)
+
+        punpckhbw mmA,mmG               ; mmA=(00 04 10 14 20 24 01 05)
+        psllq     mmG,4*BYTE_BIT        ; mmG=(-- -- -- -- 22 03 13 23)
+
+        punpcklbw mmD,mmF               ; mmD=(11 15 21 25 02 06 12 16)
+        punpckhbw mmG,mmF               ; mmG=(22 26 03 07 13 17 23 27)
+
+        movq      mmE,mmA
+        psllq     mmA,4*BYTE_BIT        ; mmA=(-- -- -- -- 00 04 10 14)
+        psrlq     mmE,4*BYTE_BIT        ; mmE=(20 24 01 05 -- -- -- --)
+
+        punpckhbw mmA,mmD               ; mmA=(00 02 04 06 10 12 14 16)
+        psllq     mmD,4*BYTE_BIT        ; mmD=(-- -- -- -- 11 15 21 25)
+
+        punpcklbw mmE,mmG               ; mmE=(20 22 24 26 01 03 05 07)
+        punpckhbw mmD,mmG               ; mmD=(11 13 15 17 21 23 25 27)
+
+        pxor      mmH,mmH
+
+        movq      mmC,mmA
+        punpcklbw mmA,mmH               ; mmA=(00 02 04 06)
+        punpckhbw mmC,mmH               ; mmC=(10 12 14 16)
+
+        movq      mmB,mmE
+        punpcklbw mmE,mmH               ; mmE=(20 22 24 26)
+        punpckhbw mmB,mmH               ; mmB=(01 03 05 07)
+
+        movq      mmF,mmD
+        punpcklbw mmD,mmH               ; mmD=(11 13 15 17)
+        punpckhbw mmF,mmH               ; mmF=(21 23 25 27)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+        test    cl, SIZEOF_MMWORD/8
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_MMWORD/8
+        movd    mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld2:
+        test    cl, SIZEOF_MMWORD/4
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_MMWORD/4
+        movq    mmF,mmA
+        movq    mmA, MMWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld4:
+        test    cl, SIZEOF_MMWORD/2
+        mov     ecx, SIZEOF_MMWORD
+        jz      short .rgb_gray_cnv
+        movq    mmD,mmA
+        movq    mmC,mmF
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+        jmp     short .rgb_gray_cnv
+        alignx  16,7
+
+.columnloop:
+        movq    mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mmD, MMWORD [esi+2*SIZEOF_MMWORD]
+        movq    mmC, MMWORD [esi+3*SIZEOF_MMWORD]
+
+.rgb_gray_cnv:
+        ; mmA=(00 10 20 30 01 11 21 31)
+        ; mmF=(02 12 22 32 03 13 23 33)
+        ; mmD=(04 14 24 34 05 15 25 35)
+        ; mmC=(06 16 26 36 07 17 27 37)
+
+        movq      mmB,mmA
+        punpcklbw mmA,mmF               ; mmA=(00 02 10 12 20 22 30 32)
+        punpckhbw mmB,mmF               ; mmB=(01 03 11 13 21 23 31 33)
+
+        movq      mmG,mmD
+        punpcklbw mmD,mmC               ; mmD=(04 06 14 16 24 26 34 36)
+        punpckhbw mmG,mmC               ; mmG=(05 07 15 17 25 27 35 37)
+
+        movq      mmE,mmA
+        punpcklwd mmA,mmD               ; mmA=(00 02 04 06 10 12 14 16)
+        punpckhwd mmE,mmD               ; mmE=(20 22 24 26 30 32 34 36)
+
+        movq      mmH,mmB
+        punpcklwd mmB,mmG               ; mmB=(01 03 05 07 11 13 15 17)
+        punpckhwd mmH,mmG               ; mmH=(21 23 25 27 31 33 35 37)
+
+        pxor      mmF,mmF
+
+        movq      mmC,mmA
+        punpcklbw mmA,mmF               ; mmA=(00 02 04 06)
+        punpckhbw mmC,mmF               ; mmC=(10 12 14 16)
+
+        movq      mmD,mmB
+        punpcklbw mmB,mmF               ; mmB=(01 03 05 07)
+        punpckhbw mmD,mmF               ; mmD=(11 13 15 17)
+
+        movq      mmG,mmE
+        punpcklbw mmE,mmF               ; mmE=(20 22 24 26)
+        punpckhbw mmG,mmF               ; mmG=(30 32 34 36)
+
+        punpcklbw mmF,mmH
+        punpckhbw mmH,mmH
+        psrlw     mmF,BYTE_BIT          ; mmF=(21 23 25 27)
+        psrlw     mmH,BYTE_BIT          ; mmH=(31 33 35 37)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+        ; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE
+        ; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO
+
+        ; (Original)
+        ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+        ;
+        ; (This implementation)
+        ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+
+        movq      mm6,mm1
+        punpcklwd mm1,mm3
+        punpckhwd mm6,mm3
+        pmaddwd   mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+        pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+        movq      mm7, mm6      ; mm7=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+        movq      mm6,mm0
+        punpcklwd mm0,mm2
+        punpckhwd mm6,mm2
+        pmaddwd   mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337)
+        pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337)
+
+        movq      MMWORD [wk(0)], mm0   ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
+        movq      MMWORD [wk(1)], mm6   ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
+
+        movq      mm0, mm5      ; mm0=BO
+        movq      mm6, mm4      ; mm6=BE
+
+        movq      mm4,mm0
+        punpcklwd mm0,mm3
+        punpckhwd mm4,mm3
+        pmaddwd   mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+        pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+
+        movq      mm3,[GOTOFF(eax,PD_ONEHALF)]  ; mm3=[PD_ONEHALF]
+
+        paddd     mm0, mm1
+        paddd     mm4, mm7
+        paddd     mm0,mm3
+        paddd     mm4,mm3
+        psrld     mm0,SCALEBITS         ; mm0=YOL
+        psrld     mm4,SCALEBITS         ; mm4=YOH
+        packssdw  mm0,mm4               ; mm0=YO
+
+        movq      mm4,mm6
+        punpcklwd mm6,mm2
+        punpckhwd mm4,mm2
+        pmaddwd   mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+        pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+
+        movq      mm2,[GOTOFF(eax,PD_ONEHALF)]  ; mm2=[PD_ONEHALF]
+
+        paddd     mm6, MMWORD [wk(0)]
+        paddd     mm4, MMWORD [wk(1)]
+        paddd     mm6,mm2
+        paddd     mm4,mm2
+        psrld     mm6,SCALEBITS         ; mm6=YEL
+        psrld     mm4,SCALEBITS         ; mm4=YEH
+        packssdw  mm6,mm4               ; mm6=YE
+
+        psllw     mm0,BYTE_BIT
+        por       mm6,mm0               ; mm6=Y
+        movq      MMWORD [edi], mm6     ; Save Y
+
+        sub     ecx, byte SIZEOF_MMWORD
+        add     esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; inptr
+        add     edi, byte SIZEOF_MMWORD                 ; outptr0
+        cmp     ecx, byte SIZEOF_MMWORD
+        jae     near .columnloop
+        test    ecx,ecx
+        jnz     near .column_ld1
+
+        pop     ecx                     ; col
+        pop     esi
+        pop     edi
+        poppic  eax
+
+        add     esi, byte SIZEOF_JSAMPROW       ; input_buf
+        add     edi, byte SIZEOF_JSAMPROW
+        dec     eax                             ; num_rows
+        jg      near .rowloop
+
+        emms            ; empty MMX state
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jcgryext-sse2-64.asm b/simd/jcgryext-sse2-64.asm
new file mode 100644
index 0000000..abdd9a0
--- /dev/null
+++ b/simd/jcgryext-sse2-64.asm
@@ -0,0 +1,349 @@
+;
+; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2011, 2014, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_rgb_gray_convert_sse2 (JDIMENSION img_width,
+;                              JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+;                              JDIMENSION output_row, int num_rows);
+;
+
+; r10 = JDIMENSION img_width
+; r11 = JSAMPARRAY input_buf
+; r12 = JSAMPIMAGE output_buf
+; r13 = JDIMENSION output_row
+; r14 = int num_rows
+
+        align   16
+
+        global  EXTN(jsimd_rgb_gray_convert_sse2)
+
+EXTN(jsimd_rgb_gray_convert_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+        push    rbx
+
+        mov     rcx, r10
+        test    rcx,rcx
+        jz      near .return
+
+        push    rcx
+
+        mov rsi, r12
+        mov rcx, r13
+        mov     rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
+        lea     rdi, [rdi+rcx*SIZEOF_JSAMPROW]
+
+        pop     rcx
+
+        mov rsi, r11
+        mov     eax, r14d
+        test    rax,rax
+        jle     near .return
+.rowloop:
+        push    rdi
+        push    rsi
+        push    rcx                     ; col
+
+        mov     rsi, JSAMPROW [rsi]     ; inptr
+        mov     rdi, JSAMPROW [rdi]     ; outptr0
+
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+        push    rax
+        push    rdx
+        lea     rcx,[rcx+rcx*2]         ; imul ecx,RGB_PIXELSIZE
+        test    cl, SIZEOF_BYTE
+        jz      short .column_ld2
+        sub     rcx, byte SIZEOF_BYTE
+        movzx   rax, BYTE [rsi+rcx]
+.column_ld2:
+        test    cl, SIZEOF_WORD
+        jz      short .column_ld4
+        sub     rcx, byte SIZEOF_WORD
+        movzx   rdx, WORD [rsi+rcx]
+        shl     rax, WORD_BIT
+        or      rax,rdx
+.column_ld4:
+        movd    xmmA,eax
+        pop     rdx
+        pop     rax
+        test    cl, SIZEOF_DWORD
+        jz      short .column_ld8
+        sub     rcx, byte SIZEOF_DWORD
+        movd    xmmF, XMM_DWORD [rsi+rcx]
+        pslldq  xmmA, SIZEOF_DWORD
+        por     xmmA,xmmF
+.column_ld8:
+        test    cl, SIZEOF_MMWORD
+        jz      short .column_ld16
+        sub     rcx, byte SIZEOF_MMWORD
+        movq    xmmB, XMM_MMWORD [rsi+rcx]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmB
+.column_ld16:
+        test    cl, SIZEOF_XMMWORD
+        jz      short .column_ld32
+        movdqa  xmmF,xmmA
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        mov     rcx, SIZEOF_XMMWORD
+        jmp     short .rgb_gray_cnv
+.column_ld32:
+        test    cl, 2*SIZEOF_XMMWORD
+        mov     rcx, SIZEOF_XMMWORD
+        jz      short .rgb_gray_cnv
+        movdqa  xmmB,xmmA
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_gray_cnv
+
+.columnloop:
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        movdqu  xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD]
+
+.rgb_gray_cnv:
+        ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+        movdqa    xmmG,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+        psrldq    xmmG,8        ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+
+        punpckhbw xmmA,xmmF     ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+        pslldq    xmmF,8        ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+
+        punpcklbw xmmG,xmmB     ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+        punpckhbw xmmF,xmmB     ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+
+        movdqa    xmmD,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+        psrldq    xmmD,8        ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+
+        punpckhbw xmmA,xmmG     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+        pslldq    xmmG,8        ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+
+        punpcklbw xmmD,xmmF     ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+        punpckhbw xmmG,xmmF     ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+
+        movdqa    xmmE,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+        psrldq    xmmE,8        ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+
+        punpckhbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        pslldq    xmmD,8        ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+
+        punpcklbw xmmE,xmmG     ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmG     ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+
+        pxor      xmmH,xmmH
+
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmH     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmH     ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+        movdqa    xmmB,xmmE
+        punpcklbw xmmE,xmmH     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmB,xmmH     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+
+        movdqa    xmmF,xmmD
+        punpcklbw xmmD,xmmH     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmF,xmmH     ; xmmF=(21 23 25 27 29 2B 2D 2F)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+        test    cl, SIZEOF_XMMWORD/16
+        jz      short .column_ld2
+        sub     rcx, byte SIZEOF_XMMWORD/16
+        movd    xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE]
+.column_ld2:
+        test    cl, SIZEOF_XMMWORD/8
+        jz      short .column_ld4
+        sub     rcx, byte SIZEOF_XMMWORD/8
+        movq    xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmE
+.column_ld4:
+        test    cl, SIZEOF_XMMWORD/4
+        jz      short .column_ld8
+        sub     rcx, byte SIZEOF_XMMWORD/4
+        movdqa  xmmE,xmmA
+        movdqu  xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE]
+.column_ld8:
+        test    cl, SIZEOF_XMMWORD/2
+        mov     rcx, SIZEOF_XMMWORD
+        jz      short .rgb_gray_cnv
+        movdqa  xmmF,xmmA
+        movdqa  xmmH,xmmE
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_gray_cnv
+
+.columnloop:
+        movdqu  xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD]
+        movdqu  xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD]
+
+.rgb_gray_cnv:
+        ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+        movdqa    xmmD,xmmA
+        punpcklbw xmmA,xmmE     ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+        punpckhbw xmmD,xmmE     ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+
+        movdqa    xmmC,xmmF
+        punpcklbw xmmF,xmmH     ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+        punpckhbw xmmC,xmmH     ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+
+        movdqa    xmmB,xmmA
+        punpcklwd xmmA,xmmF     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+        punpckhwd xmmB,xmmF     ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+
+        movdqa    xmmG,xmmD
+        punpcklwd xmmD,xmmC     ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+        punpckhwd xmmG,xmmC     ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+
+        movdqa    xmmE,xmmA
+        punpcklbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        punpckhbw xmmE,xmmD     ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+
+        movdqa    xmmH,xmmB
+        punpcklbw xmmB,xmmG     ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmH,xmmG     ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+
+        pxor      xmmF,xmmF
+
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmF     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmF     ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+        movdqa    xmmD,xmmB
+        punpcklbw xmmB,xmmF     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmF     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+
+        movdqa    xmmG,xmmE
+        punpcklbw xmmE,xmmF     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmG,xmmF     ; xmmG=(30 32 34 36 38 3A 3C 3E)
+
+        punpcklbw xmmF,xmmH
+        punpckhbw xmmH,xmmH
+        psrlw     xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
+        psrlw     xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+        ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+        ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+
+        ; (Original)
+        ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+        ;
+        ; (This implementation)
+        ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+
+        movdqa    xmm6,xmm1
+        punpcklwd xmm1,xmm3
+        punpckhwd xmm6,xmm3
+        pmaddwd   xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+        pmaddwd   xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+        movdqa    xmm7, xmm6            ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+        movdqa    xmm6,xmm0
+        punpcklwd xmm0,xmm2
+        punpckhwd xmm6,xmm2
+        pmaddwd   xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+        pmaddwd   xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+
+        movdqa    xmm12,xmm5
+        punpcklwd xmm5,xmm3
+        punpckhwd xmm12,xmm3
+        pmaddwd   xmm5,[rel PW_F0114_F0250] ; xmm5=BOL*FIX(0.114)+GOL*FIX(0.250)
+        pmaddwd   xmm12,[rel PW_F0114_F0250] ; xmm12=BOH*FIX(0.114)+GOH*FIX(0.250)
+
+        movdqa    xmm3,[rel PD_ONEHALF] ; xmm3=[PD_ONEHALF]
+
+        paddd     xmm5, xmm1
+        paddd     xmm12, xmm7
+        paddd     xmm5,xmm3
+        paddd     xmm12,xmm3
+        psrld     xmm5,SCALEBITS        ; xmm5=YOL
+        psrld     xmm12,SCALEBITS       ; xmm12=YOH
+        packssdw  xmm5,xmm12            ; xmm5=YO
+
+        movdqa    xmm12,xmm4
+        punpcklwd xmm4,xmm2
+        punpckhwd xmm12,xmm2
+        pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BEL*FIX(0.114)+GEL*FIX(0.250)
+        pmaddwd   xmm12,[rel PW_F0114_F0250] ; xmm12=BEH*FIX(0.114)+GEH*FIX(0.250)
+
+        movdqa    xmm2,[rel PD_ONEHALF] ; xmm2=[PD_ONEHALF]
+
+        paddd     xmm4,xmm0             ; xmm4=REL*FIX(0.299)+GEL*FIX(0.337)
+        paddd     xmm12,xmm6            ; xmm12=REH*FIX(0.299)+GEH*FIX(0.337)
+        paddd     xmm4,xmm2
+        paddd     xmm12,xmm2
+        psrld     xmm4,SCALEBITS        ; xmm4=YEL
+        psrld     xmm12,SCALEBITS       ; xmm12=YEH
+        packssdw  xmm4,xmm12            ; xmm4=YE
+
+        psllw     xmm5,BYTE_BIT
+        por       xmm4,xmm5             ; xmm4=Y
+        movdqa    XMMWORD [rdi], xmm4   ; Save Y
+
+        sub     rcx, byte SIZEOF_XMMWORD
+        add     rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; inptr
+        add     rdi, byte SIZEOF_XMMWORD                ; outptr0
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        test    rcx,rcx
+        jnz     near .column_ld1
+
+        pop     rcx                     ; col
+        pop     rsi
+        pop     rdi
+
+        add     rsi, byte SIZEOF_JSAMPROW       ; input_buf
+        add     rdi, byte SIZEOF_JSAMPROW
+        dec     rax                             ; num_rows
+        jg      near .rowloop
+
+.return:
+        pop     rbx
+        uncollect_args
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jcgryext-sse2.asm b/simd/jcgryext-sse2.asm
new file mode 100644
index 0000000..1097b29
--- /dev/null
+++ b/simd/jcgryext-sse2.asm
@@ -0,0 +1,383 @@
+;
+; jcgryext.asm - grayscale colorspace conversion (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2011, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_rgb_gray_convert_sse2 (JDIMENSION img_width,
+;                              JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+;                              JDIMENSION output_row, int num_rows);
+;
+
+%define img_width(b)    (b)+8           ; JDIMENSION img_width
+%define input_buf(b)    (b)+12          ; JSAMPARRAY input_buf
+%define output_buf(b)   (b)+16          ; JSAMPIMAGE output_buf
+%define output_row(b)   (b)+20          ; JDIMENSION output_row
+%define num_rows(b)     (b)+24          ; int num_rows
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+        align   16
+
+        global  EXTN(jsimd_rgb_gray_convert_sse2)
+
+EXTN(jsimd_rgb_gray_convert_sse2):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
+
+        mov     ecx, JDIMENSION [img_width(eax)]
+        test    ecx,ecx
+        jz      near .return
+
+        push    ecx
+
+        mov     esi, JSAMPIMAGE [output_buf(eax)]
+        mov     ecx, JDIMENSION [output_row(eax)]
+        mov     edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+        lea     edi, [edi+ecx*SIZEOF_JSAMPROW]
+
+        pop     ecx
+
+        mov     esi, JSAMPARRAY [input_buf(eax)]
+        mov     eax, INT [num_rows(eax)]
+        test    eax,eax
+        jle     near .return
+        alignx  16,7
+.rowloop:
+        pushpic eax
+        push    edi
+        push    esi
+        push    ecx                     ; col
+
+        mov     esi, JSAMPROW [esi]     ; inptr
+        mov     edi, JSAMPROW [edi]     ; outptr0
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        alignx  16,7
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+        push    eax
+        push    edx
+        lea     ecx,[ecx+ecx*2]         ; imul ecx,RGB_PIXELSIZE
+        test    cl, SIZEOF_BYTE
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_BYTE
+        movzx   eax, BYTE [esi+ecx]
+.column_ld2:
+        test    cl, SIZEOF_WORD
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_WORD
+        movzx   edx, WORD [esi+ecx]
+        shl     eax, WORD_BIT
+        or      eax,edx
+.column_ld4:
+        movd    xmmA,eax
+        pop     edx
+        pop     eax
+        test    cl, SIZEOF_DWORD
+        jz      short .column_ld8
+        sub     ecx, byte SIZEOF_DWORD
+        movd    xmmF, XMM_DWORD [esi+ecx]
+        pslldq  xmmA, SIZEOF_DWORD
+        por     xmmA,xmmF
+.column_ld8:
+        test    cl, SIZEOF_MMWORD
+        jz      short .column_ld16
+        sub     ecx, byte SIZEOF_MMWORD
+        movq    xmmB, XMM_MMWORD [esi+ecx]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmB
+.column_ld16:
+        test    cl, SIZEOF_XMMWORD
+        jz      short .column_ld32
+        movdqa  xmmF,xmmA
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        mov     ecx, SIZEOF_XMMWORD
+        jmp     short .rgb_gray_cnv
+.column_ld32:
+        test    cl, 2*SIZEOF_XMMWORD
+        mov     ecx, SIZEOF_XMMWORD
+        jz      short .rgb_gray_cnv
+        movdqa  xmmB,xmmA
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_gray_cnv
+        alignx  16,7
+
+.columnloop:
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        movdqu  xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
+
+.rgb_gray_cnv:
+        ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        ; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        ; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+        movdqa    xmmG,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+        psrldq    xmmG,8        ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+
+        punpckhbw xmmA,xmmF     ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+        pslldq    xmmF,8        ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+
+        punpcklbw xmmG,xmmB     ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+        punpckhbw xmmF,xmmB     ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+
+        movdqa    xmmD,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+        psrldq    xmmD,8        ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+
+        punpckhbw xmmA,xmmG     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+        pslldq    xmmG,8        ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+
+        punpcklbw xmmD,xmmF     ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+        punpckhbw xmmG,xmmF     ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+
+        movdqa    xmmE,xmmA
+        pslldq    xmmA,8        ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+        psrldq    xmmE,8        ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+
+        punpckhbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        pslldq    xmmD,8        ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+
+        punpcklbw xmmE,xmmG     ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmG     ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+
+        pxor      xmmH,xmmH
+
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmH     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmH     ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+        movdqa    xmmB,xmmE
+        punpcklbw xmmE,xmmH     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmB,xmmH     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+
+        movdqa    xmmF,xmmD
+        punpcklbw xmmD,xmmH     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmF,xmmH     ; xmmF=(21 23 25 27 29 2B 2D 2F)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+        test    cl, SIZEOF_XMMWORD/16
+        jz      short .column_ld2
+        sub     ecx, byte SIZEOF_XMMWORD/16
+        movd    xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld2:
+        test    cl, SIZEOF_XMMWORD/8
+        jz      short .column_ld4
+        sub     ecx, byte SIZEOF_XMMWORD/8
+        movq    xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE]
+        pslldq  xmmA, SIZEOF_MMWORD
+        por     xmmA,xmmE
+.column_ld4:
+        test    cl, SIZEOF_XMMWORD/4
+        jz      short .column_ld8
+        sub     ecx, byte SIZEOF_XMMWORD/4
+        movdqa  xmmE,xmmA
+        movdqu  xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld8:
+        test    cl, SIZEOF_XMMWORD/2
+        mov     ecx, SIZEOF_XMMWORD
+        jz      short .rgb_gray_cnv
+        movdqa  xmmF,xmmA
+        movdqa  xmmH,xmmE
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        jmp     short .rgb_gray_cnv
+        alignx  16,7
+
+.columnloop:
+        movdqu  xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqu  xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        movdqu  xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
+        movdqu  xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
+
+.rgb_gray_cnv:
+        ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        ; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        ; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+        movdqa    xmmD,xmmA
+        punpcklbw xmmA,xmmE     ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+        punpckhbw xmmD,xmmE     ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+
+        movdqa    xmmC,xmmF
+        punpcklbw xmmF,xmmH     ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+        punpckhbw xmmC,xmmH     ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+
+        movdqa    xmmB,xmmA
+        punpcklwd xmmA,xmmF     ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+        punpckhwd xmmB,xmmF     ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+
+        movdqa    xmmG,xmmD
+        punpcklwd xmmD,xmmC     ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+        punpckhwd xmmG,xmmC     ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+
+        movdqa    xmmE,xmmA
+        punpcklbw xmmA,xmmD     ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+        punpckhbw xmmE,xmmD     ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+
+        movdqa    xmmH,xmmB
+        punpcklbw xmmB,xmmG     ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+        punpckhbw xmmH,xmmG     ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+
+        pxor      xmmF,xmmF
+
+        movdqa    xmmC,xmmA
+        punpcklbw xmmA,xmmF     ; xmmA=(00 02 04 06 08 0A 0C 0E)
+        punpckhbw xmmC,xmmF     ; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+        movdqa    xmmD,xmmB
+        punpcklbw xmmB,xmmF     ; xmmB=(01 03 05 07 09 0B 0D 0F)
+        punpckhbw xmmD,xmmF     ; xmmD=(11 13 15 17 19 1B 1D 1F)
+
+        movdqa    xmmG,xmmE
+        punpcklbw xmmE,xmmF     ; xmmE=(20 22 24 26 28 2A 2C 2E)
+        punpckhbw xmmG,xmmF     ; xmmG=(30 32 34 36 38 3A 3C 3E)
+
+        punpcklbw xmmF,xmmH
+        punpckhbw xmmH,xmmH
+        psrlw     xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
+        psrlw     xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+        ; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+        ; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+
+        ; (Original)
+        ; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+        ;
+        ; (This implementation)
+        ; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+
+        movdqa    xmm6,xmm1
+        punpcklwd xmm1,xmm3
+        punpckhwd xmm6,xmm3
+        pmaddwd   xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+        pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+        movdqa    xmm7, xmm6    ; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+        movdqa    xmm6,xmm0
+        punpcklwd xmm0,xmm2
+        punpckhwd xmm6,xmm2
+        pmaddwd   xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+        pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+
+        movdqa    XMMWORD [wk(0)], xmm0 ; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
+        movdqa    XMMWORD [wk(1)], xmm6 ; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
+
+        movdqa    xmm0, xmm5    ; xmm0=BO
+        movdqa    xmm6, xmm4    ; xmm6=BE
+
+        movdqa    xmm4,xmm0
+        punpcklwd xmm0,xmm3
+        punpckhwd xmm4,xmm3
+        pmaddwd   xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+        pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+
+        movdqa    xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF]
+
+        paddd     xmm0, xmm1
+        paddd     xmm4, xmm7
+        paddd     xmm0,xmm3
+        paddd     xmm4,xmm3
+        psrld     xmm0,SCALEBITS        ; xmm0=YOL
+        psrld     xmm4,SCALEBITS        ; xmm4=YOH
+        packssdw  xmm0,xmm4             ; xmm0=YO
+
+        movdqa    xmm4,xmm6
+        punpcklwd xmm6,xmm2
+        punpckhwd xmm4,xmm2
+        pmaddwd   xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+        pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+
+        movdqa    xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF]
+
+        paddd     xmm6, XMMWORD [wk(0)]
+        paddd     xmm4, XMMWORD [wk(1)]
+        paddd     xmm6,xmm2
+        paddd     xmm4,xmm2
+        psrld     xmm6,SCALEBITS        ; xmm6=YEL
+        psrld     xmm4,SCALEBITS        ; xmm4=YEH
+        packssdw  xmm6,xmm4             ; xmm6=YE
+
+        psllw     xmm0,BYTE_BIT
+        por       xmm6,xmm0             ; xmm6=Y
+        movdqa    XMMWORD [edi], xmm6   ; Save Y
+
+        sub     ecx, byte SIZEOF_XMMWORD
+        add     esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; inptr
+        add     edi, byte SIZEOF_XMMWORD                ; outptr0
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        test    ecx,ecx
+        jnz     near .column_ld1
+
+        pop     ecx                     ; col
+        pop     esi
+        pop     edi
+        poppic  eax
+
+        add     esi, byte SIZEOF_JSAMPROW       ; input_buf
+        add     edi, byte SIZEOF_JSAMPROW
+        dec     eax                             ; num_rows
+        jg      near .rowloop
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jcolsamp.inc b/simd/jcolsamp.inc
new file mode 100644
index 0000000..79751b7
--- /dev/null
+++ b/simd/jcolsamp.inc
@@ -0,0 +1,105 @@
+;
+; jcolsamp.inc - private declarations for color conversion & up/downsampling
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; [TAB8]
+
+; --------------------------------------------------------------------------
+
+; pseudo-resisters to make ordering of RGB configurable
+;
+%if RGB_RED == 0
+%define  mmA  mm0
+%define  mmB  mm1
+%define xmmA xmm0
+%define xmmB xmm1
+%elif RGB_GREEN == 0
+%define  mmA  mm2
+%define  mmB  mm3
+%define xmmA xmm2
+%define xmmB xmm3
+%elif RGB_BLUE == 0
+%define  mmA  mm4
+%define  mmB  mm5
+%define xmmA xmm4
+%define xmmB xmm5
+%else
+%define  mmA  mm6
+%define  mmB  mm7
+%define xmmA xmm6
+%define xmmB xmm7
+%endif
+
+%if RGB_RED == 1
+%define  mmC  mm0
+%define  mmD  mm1
+%define xmmC xmm0
+%define xmmD xmm1
+%elif RGB_GREEN == 1
+%define  mmC  mm2
+%define  mmD  mm3
+%define xmmC xmm2
+%define xmmD xmm3
+%elif RGB_BLUE == 1
+%define  mmC  mm4
+%define  mmD  mm5
+%define xmmC xmm4
+%define xmmD xmm5
+%else
+%define  mmC  mm6
+%define  mmD  mm7
+%define xmmC xmm6
+%define xmmD xmm7
+%endif
+
+%if RGB_RED == 2
+%define  mmE  mm0
+%define  mmF  mm1
+%define xmmE xmm0
+%define xmmF xmm1
+%elif RGB_GREEN == 2
+%define  mmE  mm2
+%define  mmF  mm3
+%define xmmE xmm2
+%define xmmF xmm3
+%elif RGB_BLUE == 2
+%define  mmE  mm4
+%define  mmF  mm5
+%define xmmE xmm4
+%define xmmF xmm5
+%else
+%define  mmE  mm6
+%define  mmF  mm7
+%define xmmE xmm6
+%define xmmF xmm7
+%endif
+
+%if RGB_RED == 3
+%define  mmG  mm0
+%define  mmH  mm1
+%define xmmG xmm0
+%define xmmH xmm1
+%elif RGB_GREEN == 3
+%define  mmG  mm2
+%define  mmH  mm3
+%define xmmG xmm2
+%define xmmH xmm3
+%elif RGB_BLUE == 3
+%define  mmG  mm4
+%define  mmH  mm5
+%define xmmG xmm4
+%define xmmH xmm5
+%else
+%define  mmG  mm6
+%define  mmH  mm7
+%define xmmG xmm6
+%define xmmH xmm7
+%endif
+
+; --------------------------------------------------------------------------
diff --git a/simd/jcsample-mmx.asm b/simd/jcsample-mmx.asm
new file mode 100644
index 0000000..6881a56
--- /dev/null
+++ b/simd/jcsample-mmx.asm
@@ -0,0 +1,324 @@
+;
+; jcsample.asm - downsampling (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Downsample pixel values of a single component.
+; This version handles the common case of 2:1 horizontal and 1:1 vertical,
+; without smoothing.
+;
+; GLOBAL(void)
+; jsimd_h2v1_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor,
+;                            JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+;                            JSAMPARRAY input_data, JSAMPARRAY output_data);
+;
+
+%define img_width(b)    (b)+8           ; JDIMENSION image_width
+%define max_v_samp(b)   (b)+12          ; int max_v_samp_factor
+%define v_samp(b)       (b)+16          ; JDIMENSION v_samp_factor
+%define width_blks(b)   (b)+20          ; JDIMENSION width_blocks
+%define input_data(b)   (b)+24          ; JSAMPARRAY input_data
+%define output_data(b)  (b)+28          ; JSAMPARRAY output_data
+
+        align   16
+        global  EXTN(jsimd_h2v1_downsample_mmx)
+
+EXTN(jsimd_h2v1_downsample_mmx):
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     ecx, JDIMENSION [width_blks(ebp)]
+        shl     ecx,3                   ; imul ecx,DCTSIZE (ecx = output_cols)
+        jz      near .return
+
+        mov     edx, JDIMENSION [img_width(ebp)]
+
+        ; -- expand_right_edge
+
+        push    ecx
+        shl     ecx,1                           ; output_cols * 2
+        sub     ecx,edx
+        jle     short .expand_end
+
+        mov     eax, INT [max_v_samp(ebp)]
+        test    eax,eax
+        jle     short .expand_end
+
+        cld
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        alignx  16,7
+.expandloop:
+        push    eax
+        push    ecx
+
+        mov     edi, JSAMPROW [esi]
+        add     edi,edx
+        mov     al, JSAMPLE [edi-1]
+
+        rep stosb
+
+        pop     ecx
+        pop     eax
+
+        add     esi, byte SIZEOF_JSAMPROW
+        dec     eax
+        jg      short .expandloop
+
+.expand_end:
+        pop     ecx                             ; output_cols
+
+        ; -- h2v1_downsample
+
+        mov     eax, JDIMENSION [v_samp(ebp)]   ; rowctr
+        test    eax,eax
+        jle     near .return
+
+        mov       edx, 0x00010000       ; bias pattern
+        movd      mm7,edx
+        pcmpeqw   mm6,mm6
+        punpckldq mm7,mm7               ; mm7={0, 1, 0, 1}
+        psrlw     mm6,BYTE_BIT          ; mm6={0xFF 0x00 0xFF 0x00 ..}
+
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, JSAMPARRAY [output_data(ebp)]      ; output_data
+        alignx  16,7
+.rowloop:
+        push    ecx
+        push    edi
+        push    esi
+
+        mov     esi, JSAMPROW [esi]             ; inptr
+        mov     edi, JSAMPROW [edi]             ; outptr
+        alignx  16,7
+.columnloop:
+
+        movq    mm0, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mm1, MMWORD [esi+1*SIZEOF_MMWORD]
+        movq    mm2,mm0
+        movq    mm3,mm1
+
+        pand    mm0,mm6
+        psrlw   mm2,BYTE_BIT
+        pand    mm1,mm6
+        psrlw   mm3,BYTE_BIT
+
+        paddw   mm0,mm2
+        paddw   mm1,mm3
+        paddw   mm0,mm7
+        paddw   mm1,mm7
+        psrlw   mm0,1
+        psrlw   mm1,1
+
+        packuswb mm0,mm1
+
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm0
+
+        add     esi, byte 2*SIZEOF_MMWORD       ; inptr
+        add     edi, byte 1*SIZEOF_MMWORD       ; outptr
+        sub     ecx, byte SIZEOF_MMWORD         ; outcol
+        jnz     short .columnloop
+
+        pop     esi
+        pop     edi
+        pop     ecx
+
+        add     esi, byte SIZEOF_JSAMPROW       ; input_data
+        add     edi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     eax                             ; rowctr
+        jg      short .rowloop
+
+        emms            ; empty MMX state
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Downsample pixel values of a single component.
+; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+; without smoothing.
+;
+; GLOBAL(void)
+; jsimd_h2v2_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor,
+;                            JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+;                            JSAMPARRAY input_data, JSAMPARRAY output_data);
+;
+
+%define img_width(b)    (b)+8           ; JDIMENSION image_width
+%define max_v_samp(b)   (b)+12          ; int max_v_samp_factor
+%define v_samp(b)       (b)+16          ; JDIMENSION v_samp_factor
+%define width_blks(b)   (b)+20          ; JDIMENSION width_blocks
+%define input_data(b)   (b)+24          ; JSAMPARRAY input_data
+%define output_data(b)  (b)+28          ; JSAMPARRAY output_data
+
+        align   16
+        global  EXTN(jsimd_h2v2_downsample_mmx)
+
+EXTN(jsimd_h2v2_downsample_mmx):
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     ecx, JDIMENSION [width_blks(ebp)]
+        shl     ecx,3                   ; imul ecx,DCTSIZE (ecx = output_cols)
+        jz      near .return
+
+        mov     edx, JDIMENSION [img_width(ebp)]
+
+        ; -- expand_right_edge
+
+        push    ecx
+        shl     ecx,1                           ; output_cols * 2
+        sub     ecx,edx
+        jle     short .expand_end
+
+        mov     eax, INT [max_v_samp(ebp)]
+        test    eax,eax
+        jle     short .expand_end
+
+        cld
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        alignx  16,7
+.expandloop:
+        push    eax
+        push    ecx
+
+        mov     edi, JSAMPROW [esi]
+        add     edi,edx
+        mov     al, JSAMPLE [edi-1]
+
+        rep stosb
+
+        pop     ecx
+        pop     eax
+
+        add     esi, byte SIZEOF_JSAMPROW
+        dec     eax
+        jg      short .expandloop
+
+.expand_end:
+        pop     ecx                             ; output_cols
+
+        ; -- h2v2_downsample
+
+        mov     eax, JDIMENSION [v_samp(ebp)]   ; rowctr
+        test    eax,eax
+        jle     near .return
+
+        mov       edx, 0x00020001       ; bias pattern
+        movd      mm7,edx
+        pcmpeqw   mm6,mm6
+        punpckldq mm7,mm7               ; mm7={1, 2, 1, 2}
+        psrlw     mm6,BYTE_BIT          ; mm6={0xFF 0x00 0xFF 0x00 ..}
+
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, JSAMPARRAY [output_data(ebp)]      ; output_data
+        alignx  16,7
+.rowloop:
+        push    ecx
+        push    edi
+        push    esi
+
+        mov     edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+        mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1
+        mov     edi, JSAMPROW [edi]                     ; outptr
+        alignx  16,7
+.columnloop:
+
+        movq    mm0, MMWORD [edx+0*SIZEOF_MMWORD]
+        movq    mm1, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mm2, MMWORD [edx+1*SIZEOF_MMWORD]
+        movq    mm3, MMWORD [esi+1*SIZEOF_MMWORD]
+
+        movq    mm4,mm0
+        movq    mm5,mm1
+        pand    mm0,mm6
+        psrlw   mm4,BYTE_BIT
+        pand    mm1,mm6
+        psrlw   mm5,BYTE_BIT
+        paddw   mm0,mm4
+        paddw   mm1,mm5
+
+        movq    mm4,mm2
+        movq    mm5,mm3
+        pand    mm2,mm6
+        psrlw   mm4,BYTE_BIT
+        pand    mm3,mm6
+        psrlw   mm5,BYTE_BIT
+        paddw   mm2,mm4
+        paddw   mm3,mm5
+
+        paddw   mm0,mm1
+        paddw   mm2,mm3
+        paddw   mm0,mm7
+        paddw   mm2,mm7
+        psrlw   mm0,2
+        psrlw   mm2,2
+
+        packuswb mm0,mm2
+
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm0
+
+        add     edx, byte 2*SIZEOF_MMWORD       ; inptr0
+        add     esi, byte 2*SIZEOF_MMWORD       ; inptr1
+        add     edi, byte 1*SIZEOF_MMWORD       ; outptr
+        sub     ecx, byte SIZEOF_MMWORD         ; outcol
+        jnz     near .columnloop
+
+        pop     esi
+        pop     edi
+        pop     ecx
+
+        add     esi, byte 2*SIZEOF_JSAMPROW     ; input_data
+        add     edi, byte 1*SIZEOF_JSAMPROW     ; output_data
+        dec     eax                             ; rowctr
+        jg      near .rowloop
+
+        emms            ; empty MMX state
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jcsample-sse2-64.asm b/simd/jcsample-sse2-64.asm
new file mode 100644
index 0000000..f32fb4f
--- /dev/null
+++ b/simd/jcsample-sse2-64.asm
@@ -0,0 +1,330 @@
+;
+; jcsample.asm - downsampling (64-bit SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+;
+; Downsample pixel values of a single component.
+; This version handles the common case of 2:1 horizontal and 1:1 vertical,
+; without smoothing.
+;
+; GLOBAL(void)
+; jsimd_h2v1_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor,
+;                             JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+;                             JSAMPARRAY input_data, JSAMPARRAY output_data);
+;
+
+; r10 = JDIMENSION image_width
+; r11 = int max_v_samp_factor
+; r12 = JDIMENSION v_samp_factor
+; r13 = JDIMENSION width_blocks
+; r14 = JSAMPARRAY input_data
+; r15 = JSAMPARRAY output_data
+
+        align   16
+        global  EXTN(jsimd_h2v1_downsample_sse2)
+
+EXTN(jsimd_h2v1_downsample_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+
+        mov rcx, r13
+        shl     rcx,3                   ; imul rcx,DCTSIZE (rcx = output_cols)
+        jz      near .return
+
+        mov rdx, r10
+
+        ; -- expand_right_edge
+
+        push    rcx
+        shl     rcx,1                           ; output_cols * 2
+        sub     rcx,rdx
+        jle     short .expand_end
+
+        mov     rax, r11
+        test    rax,rax
+        jle     short .expand_end
+
+        cld
+        mov     rsi, r14        ; input_data
+.expandloop:
+        push    rax
+        push    rcx
+
+        mov     rdi, JSAMPROW [rsi]
+        add     rdi,rdx
+        mov     al, JSAMPLE [rdi-1]
+
+        rep stosb
+
+        pop     rcx
+        pop     rax
+
+        add     rsi, byte SIZEOF_JSAMPROW
+        dec     rax
+        jg      short .expandloop
+
+.expand_end:
+        pop     rcx                             ; output_cols
+
+        ; -- h2v1_downsample
+
+        mov     rax, r12        ; rowctr
+        test    eax,eax
+        jle     near .return
+
+        mov     rdx, 0x00010000         ; bias pattern
+        movd    xmm7,edx
+        pcmpeqw xmm6,xmm6
+        pshufd  xmm7,xmm7,0x00          ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
+        psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
+
+        mov     rsi, r14        ; input_data
+        mov     rdi, r15        ; output_data
+.rowloop:
+        push    rcx
+        push    rdi
+        push    rsi
+
+        mov     rsi, JSAMPROW [rsi]             ; inptr
+        mov rdi, JSAMPROW [rdi]         ; outptr
+
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     short .columnloop
+
+.columnloop_r8:
+        movdqa  xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        pxor    xmm1,xmm1
+        mov     rcx, SIZEOF_XMMWORD
+        jmp     short .downsample
+
+.columnloop:
+        movdqa  xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqa  xmm1, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+
+.downsample:
+        movdqa  xmm2,xmm0
+        movdqa  xmm3,xmm1
+
+        pand    xmm0,xmm6
+        psrlw   xmm2,BYTE_BIT
+        pand    xmm1,xmm6
+        psrlw   xmm3,BYTE_BIT
+
+        paddw   xmm0,xmm2
+        paddw   xmm1,xmm3
+        paddw   xmm0,xmm7
+        paddw   xmm1,xmm7
+        psrlw   xmm0,1
+        psrlw   xmm1,1
+
+        packuswb xmm0,xmm1
+
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
+
+        sub     rcx, byte SIZEOF_XMMWORD        ; outcol
+        add     rsi, byte 2*SIZEOF_XMMWORD      ; inptr
+        add     rdi, byte 1*SIZEOF_XMMWORD      ; outptr
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     short .columnloop
+        test    rcx,rcx
+        jnz     short .columnloop_r8
+
+        pop     rsi
+        pop     rdi
+        pop     rcx
+
+        add     rsi, byte SIZEOF_JSAMPROW       ; input_data
+        add     rdi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     rax                             ; rowctr
+        jg      near .rowloop
+
+.return:
+        uncollect_args
+        pop     rbp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Downsample pixel values of a single component.
+; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+; without smoothing.
+;
+; GLOBAL(void)
+; jsimd_h2v2_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor,
+;                             JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+;                             JSAMPARRAY input_data, JSAMPARRAY output_data);
+;
+
+; r10 = JDIMENSION image_width
+; r11 = int max_v_samp_factor
+; r12 = JDIMENSION v_samp_factor
+; r13 = JDIMENSION width_blocks
+; r14 = JSAMPARRAY input_data
+; r15 = JSAMPARRAY output_data
+
+        align   16
+        global  EXTN(jsimd_h2v2_downsample_sse2)
+
+EXTN(jsimd_h2v2_downsample_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+
+        mov     rcx, r13
+        shl     rcx,3                   ; imul rcx,DCTSIZE (rcx = output_cols)
+        jz      near .return
+
+        mov     rdx, r10
+
+        ; -- expand_right_edge
+
+        push    rcx
+        shl     rcx,1                           ; output_cols * 2
+        sub     rcx,rdx
+        jle     short .expand_end
+
+        mov     rax, r11
+        test    rax,rax
+        jle     short .expand_end
+
+        cld
+        mov     rsi, r14        ; input_data
+.expandloop:
+        push    rax
+        push    rcx
+
+        mov     rdi, JSAMPROW [rsi]
+        add     rdi,rdx
+        mov     al, JSAMPLE [rdi-1]
+
+        rep stosb
+
+        pop     rcx
+        pop     rax
+
+        add     rsi, byte SIZEOF_JSAMPROW
+        dec     rax
+        jg      short .expandloop
+
+.expand_end:
+        pop     rcx                             ; output_cols
+
+        ; -- h2v2_downsample
+
+        mov     rax, r12        ; rowctr
+        test    rax,rax
+        jle     near .return
+
+        mov     rdx, 0x00020001         ; bias pattern
+        movd    xmm7,edx
+        pcmpeqw xmm6,xmm6
+        pshufd  xmm7,xmm7,0x00          ; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
+        psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
+
+        mov     rsi, r14        ; input_data
+        mov     rdi, r15        ; output_data
+.rowloop:
+        push    rcx
+        push    rdi
+        push    rsi
+
+        mov     rdx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW]   ; inptr0
+        mov     rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW]   ; inptr1
+        mov     rdi, JSAMPROW [rdi]                     ; outptr
+
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     short .columnloop
+
+.columnloop_r8:
+        movdqa  xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD]
+        movdqa  xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        pxor    xmm2,xmm2
+        pxor    xmm3,xmm3
+        mov     rcx, SIZEOF_XMMWORD
+        jmp     short .downsample
+
+.columnloop:
+        movdqa  xmm0, XMMWORD [rdx+0*SIZEOF_XMMWORD]
+        movdqa  xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqa  xmm2, XMMWORD [rdx+1*SIZEOF_XMMWORD]
+        movdqa  xmm3, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+
+.downsample:
+        movdqa  xmm4,xmm0
+        movdqa  xmm5,xmm1
+        pand    xmm0,xmm6
+        psrlw   xmm4,BYTE_BIT
+        pand    xmm1,xmm6
+        psrlw   xmm5,BYTE_BIT
+        paddw   xmm0,xmm4
+        paddw   xmm1,xmm5
+
+        movdqa  xmm4,xmm2
+        movdqa  xmm5,xmm3
+        pand    xmm2,xmm6
+        psrlw   xmm4,BYTE_BIT
+        pand    xmm3,xmm6
+        psrlw   xmm5,BYTE_BIT
+        paddw   xmm2,xmm4
+        paddw   xmm3,xmm5
+
+        paddw   xmm0,xmm1
+        paddw   xmm2,xmm3
+        paddw   xmm0,xmm7
+        paddw   xmm2,xmm7
+        psrlw   xmm0,2
+        psrlw   xmm2,2
+
+        packuswb xmm0,xmm2
+
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
+
+        sub     rcx, byte SIZEOF_XMMWORD        ; outcol
+        add     rdx, byte 2*SIZEOF_XMMWORD      ; inptr0
+        add     rsi, byte 2*SIZEOF_XMMWORD      ; inptr1
+        add     rdi, byte 1*SIZEOF_XMMWORD      ; outptr
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        test    rcx,rcx
+        jnz     near .columnloop_r8
+
+        pop     rsi
+        pop     rdi
+        pop     rcx
+
+        add     rsi, byte 2*SIZEOF_JSAMPROW     ; input_data
+        add     rdi, byte 1*SIZEOF_JSAMPROW     ; output_data
+        dec     rax                             ; rowctr
+        jg      near .rowloop
+
+.return:
+        uncollect_args
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jcsample-sse2.asm b/simd/jcsample-sse2.asm
new file mode 100644
index 0000000..11202db
--- /dev/null
+++ b/simd/jcsample-sse2.asm
@@ -0,0 +1,351 @@
+;
+; jcsample.asm - downsampling (SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Downsample pixel values of a single component.
+; This version handles the common case of 2:1 horizontal and 1:1 vertical,
+; without smoothing.
+;
+; GLOBAL(void)
+; jsimd_h2v1_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor,
+;                             JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+;                             JSAMPARRAY input_data, JSAMPARRAY output_data);
+;
+
+%define img_width(b)    (b)+8           ; JDIMENSION image_width
+%define max_v_samp(b)   (b)+12          ; int max_v_samp_factor
+%define v_samp(b)       (b)+16          ; JDIMENSION v_samp_factor
+%define width_blks(b)   (b)+20          ; JDIMENSION width_blocks
+%define input_data(b)   (b)+24          ; JSAMPARRAY input_data
+%define output_data(b)  (b)+28          ; JSAMPARRAY output_data
+
+        align   16
+        global  EXTN(jsimd_h2v1_downsample_sse2)
+
+EXTN(jsimd_h2v1_downsample_sse2):
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     ecx, JDIMENSION [width_blks(ebp)]
+        shl     ecx,3                   ; imul ecx,DCTSIZE (ecx = output_cols)
+        jz      near .return
+
+        mov     edx, JDIMENSION [img_width(ebp)]
+
+        ; -- expand_right_edge
+
+        push    ecx
+        shl     ecx,1                           ; output_cols * 2
+        sub     ecx,edx
+        jle     short .expand_end
+
+        mov     eax, INT [max_v_samp(ebp)]
+        test    eax,eax
+        jle     short .expand_end
+
+        cld
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        alignx  16,7
+.expandloop:
+        push    eax
+        push    ecx
+
+        mov     edi, JSAMPROW [esi]
+        add     edi,edx
+        mov     al, JSAMPLE [edi-1]
+
+        rep stosb
+
+        pop     ecx
+        pop     eax
+
+        add     esi, byte SIZEOF_JSAMPROW
+        dec     eax
+        jg      short .expandloop
+
+.expand_end:
+        pop     ecx                             ; output_cols
+
+        ; -- h2v1_downsample
+
+        mov     eax, JDIMENSION [v_samp(ebp)]   ; rowctr
+        test    eax,eax
+        jle     near .return
+
+        mov     edx, 0x00010000         ; bias pattern
+        movd    xmm7,edx
+        pcmpeqw xmm6,xmm6
+        pshufd  xmm7,xmm7,0x00          ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
+        psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
+
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, JSAMPARRAY [output_data(ebp)]      ; output_data
+        alignx  16,7
+.rowloop:
+        push    ecx
+        push    edi
+        push    esi
+
+        mov     esi, JSAMPROW [esi]             ; inptr
+        mov     edi, JSAMPROW [edi]             ; outptr
+
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     short .columnloop
+        alignx  16,7
+
+.columnloop_r8:
+        movdqa  xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        pxor    xmm1,xmm1
+        mov     ecx, SIZEOF_XMMWORD
+        jmp     short .downsample
+        alignx  16,7
+
+.columnloop:
+        movdqa  xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqa  xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD]
+
+.downsample:
+        movdqa  xmm2,xmm0
+        movdqa  xmm3,xmm1
+
+        pand    xmm0,xmm6
+        psrlw   xmm2,BYTE_BIT
+        pand    xmm1,xmm6
+        psrlw   xmm3,BYTE_BIT
+
+        paddw   xmm0,xmm2
+        paddw   xmm1,xmm3
+        paddw   xmm0,xmm7
+        paddw   xmm1,xmm7
+        psrlw   xmm0,1
+        psrlw   xmm1,1
+
+        packuswb xmm0,xmm1
+
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
+
+        sub     ecx, byte SIZEOF_XMMWORD        ; outcol
+        add     esi, byte 2*SIZEOF_XMMWORD      ; inptr
+        add     edi, byte 1*SIZEOF_XMMWORD      ; outptr
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     short .columnloop
+        test    ecx,ecx
+        jnz     short .columnloop_r8
+
+        pop     esi
+        pop     edi
+        pop     ecx
+
+        add     esi, byte SIZEOF_JSAMPROW       ; input_data
+        add     edi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     eax                             ; rowctr
+        jg      near .rowloop
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Downsample pixel values of a single component.
+; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+; without smoothing.
+;
+; GLOBAL(void)
+; jsimd_h2v2_downsample_sse2 (JDIMENSION image_width, int max_v_samp_factor,
+;                             JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+;                             JSAMPARRAY input_data, JSAMPARRAY output_data);
+;
+
+%define img_width(b)    (b)+8           ; JDIMENSION image_width
+%define max_v_samp(b)   (b)+12          ; int max_v_samp_factor
+%define v_samp(b)       (b)+16          ; JDIMENSION v_samp_factor
+%define width_blks(b)   (b)+20          ; JDIMENSION width_blocks
+%define input_data(b)   (b)+24          ; JSAMPARRAY input_data
+%define output_data(b)  (b)+28          ; JSAMPARRAY output_data
+
+        align   16
+        global  EXTN(jsimd_h2v2_downsample_sse2)
+
+EXTN(jsimd_h2v2_downsample_sse2):
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     ecx, JDIMENSION [width_blks(ebp)]
+        shl     ecx,3                   ; imul ecx,DCTSIZE (ecx = output_cols)
+        jz      near .return
+
+        mov     edx, JDIMENSION [img_width(ebp)]
+
+        ; -- expand_right_edge
+
+        push    ecx
+        shl     ecx,1                           ; output_cols * 2
+        sub     ecx,edx
+        jle     short .expand_end
+
+        mov     eax, INT [max_v_samp(ebp)]
+        test    eax,eax
+        jle     short .expand_end
+
+        cld
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        alignx  16,7
+.expandloop:
+        push    eax
+        push    ecx
+
+        mov     edi, JSAMPROW [esi]
+        add     edi,edx
+        mov     al, JSAMPLE [edi-1]
+
+        rep stosb
+
+        pop     ecx
+        pop     eax
+
+        add     esi, byte SIZEOF_JSAMPROW
+        dec     eax
+        jg      short .expandloop
+
+.expand_end:
+        pop     ecx                             ; output_cols
+
+        ; -- h2v2_downsample
+
+        mov     eax, JDIMENSION [v_samp(ebp)]   ; rowctr
+        test    eax,eax
+        jle     near .return
+
+        mov     edx, 0x00020001         ; bias pattern
+        movd    xmm7,edx
+        pcmpeqw xmm6,xmm6
+        pshufd  xmm7,xmm7,0x00          ; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
+        psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
+
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, JSAMPARRAY [output_data(ebp)]      ; output_data
+        alignx  16,7
+.rowloop:
+        push    ecx
+        push    edi
+        push    esi
+
+        mov     edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+        mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1
+        mov     edi, JSAMPROW [edi]                     ; outptr
+
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     short .columnloop
+        alignx  16,7
+
+.columnloop_r8:
+        movdqa  xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
+        movdqa  xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        pxor    xmm2,xmm2
+        pxor    xmm3,xmm3
+        mov     ecx, SIZEOF_XMMWORD
+        jmp     short .downsample
+        alignx  16,7
+
+.columnloop:
+        movdqa  xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
+        movdqa  xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqa  xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD]
+        movdqa  xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD]
+
+.downsample:
+        movdqa  xmm4,xmm0
+        movdqa  xmm5,xmm1
+        pand    xmm0,xmm6
+        psrlw   xmm4,BYTE_BIT
+        pand    xmm1,xmm6
+        psrlw   xmm5,BYTE_BIT
+        paddw   xmm0,xmm4
+        paddw   xmm1,xmm5
+
+        movdqa  xmm4,xmm2
+        movdqa  xmm5,xmm3
+        pand    xmm2,xmm6
+        psrlw   xmm4,BYTE_BIT
+        pand    xmm3,xmm6
+        psrlw   xmm5,BYTE_BIT
+        paddw   xmm2,xmm4
+        paddw   xmm3,xmm5
+
+        paddw   xmm0,xmm1
+        paddw   xmm2,xmm3
+        paddw   xmm0,xmm7
+        paddw   xmm2,xmm7
+        psrlw   xmm0,2
+        psrlw   xmm2,2
+
+        packuswb xmm0,xmm2
+
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
+
+        sub     ecx, byte SIZEOF_XMMWORD        ; outcol
+        add     edx, byte 2*SIZEOF_XMMWORD      ; inptr0
+        add     esi, byte 2*SIZEOF_XMMWORD      ; inptr1
+        add     edi, byte 1*SIZEOF_XMMWORD      ; outptr
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jae     near .columnloop
+        test    ecx,ecx
+        jnz     near .columnloop_r8
+
+        pop     esi
+        pop     edi
+        pop     ecx
+
+        add     esi, byte 2*SIZEOF_JSAMPROW     ; input_data
+        add     edi, byte 1*SIZEOF_JSAMPROW     ; output_data
+        dec     eax                             ; rowctr
+        jg      near .rowloop
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jdcolext-mmx.asm b/simd/jdcolext-mmx.asm
new file mode 100644
index 0000000..de1f00f
--- /dev/null
+++ b/simd/jdcolext-mmx.asm
@@ -0,0 +1,405 @@
+;
+; jdcolext.asm - colorspace conversion (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_ycc_rgb_convert_mmx (JDIMENSION out_width,
+;                            JSAMPIMAGE input_buf, JDIMENSION input_row,
+;                            JSAMPARRAY output_buf, int num_rows)
+;
+
+%define out_width(b)    (b)+8           ; JDIMENSION out_width
+%define input_buf(b)    (b)+12          ; JSAMPIMAGE input_buf
+%define input_row(b)    (b)+16          ; JDIMENSION input_row
+%define output_buf(b)   (b)+20          ; JSAMPARRAY output_buf
+%define num_rows(b)     (b)+24          ; int num_rows
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+        align   16
+        global  EXTN(jsimd_ycc_rgb_convert_mmx)
+
+EXTN(jsimd_ycc_rgb_convert_mmx):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
+
+        mov     ecx, JDIMENSION [out_width(eax)]        ; num_cols
+        test    ecx,ecx
+        jz      near .return
+
+        push    ecx
+
+        mov     edi, JSAMPIMAGE [input_buf(eax)]
+        mov     ecx, JDIMENSION [input_row(eax)]
+        mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+        lea     esi, [esi+ecx*SIZEOF_JSAMPROW]
+        lea     ebx, [ebx+ecx*SIZEOF_JSAMPROW]
+        lea     edx, [edx+ecx*SIZEOF_JSAMPROW]
+
+        pop     ecx
+
+        mov     edi, JSAMPARRAY [output_buf(eax)]
+        mov     eax, INT [num_rows(eax)]
+        test    eax,eax
+        jle     near .return
+        alignx  16,7
+.rowloop:
+        push    eax
+        push    edi
+        push    edx
+        push    ebx
+        push    esi
+        push    ecx                     ; col
+
+        mov     esi, JSAMPROW [esi]     ; inptr0
+        mov     ebx, JSAMPROW [ebx]     ; inptr1
+        mov     edx, JSAMPROW [edx]     ; inptr2
+        mov     edi, JSAMPROW [edi]     ; outptr
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+        alignx  16,7
+.columnloop:
+
+        movq    mm5, MMWORD [ebx]       ; mm5=Cb(01234567)
+        movq    mm1, MMWORD [edx]       ; mm1=Cr(01234567)
+
+        pcmpeqw mm4,mm4
+        pcmpeqw mm7,mm7
+        psrlw   mm4,BYTE_BIT
+        psllw   mm7,7                   ; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
+        movq    mm0,mm4                 ; mm0=mm4={0xFF 0x00 0xFF 0x00 ..}
+
+        pand    mm4,mm5                 ; mm4=Cb(0246)=CbE
+        psrlw   mm5,BYTE_BIT            ; mm5=Cb(1357)=CbO
+        pand    mm0,mm1                 ; mm0=Cr(0246)=CrE
+        psrlw   mm1,BYTE_BIT            ; mm1=Cr(1357)=CrO
+
+        paddw   mm4,mm7
+        paddw   mm5,mm7
+        paddw   mm0,mm7
+        paddw   mm1,mm7
+
+        ; (Original)
+        ; R = Y                + 1.40200 * Cr
+        ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+        ; B = Y + 1.77200 * Cb
+        ;
+        ; (This implementation)
+        ; R = Y                + 0.40200 * Cr + Cr
+        ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+        ; B = Y - 0.22800 * Cb + Cb + Cb
+
+        movq    mm2,mm4                 ; mm2=CbE
+        movq    mm3,mm5                 ; mm3=CbO
+        paddw   mm4,mm4                 ; mm4=2*CbE
+        paddw   mm5,mm5                 ; mm5=2*CbO
+        movq    mm6,mm0                 ; mm6=CrE
+        movq    mm7,mm1                 ; mm7=CrO
+        paddw   mm0,mm0                 ; mm0=2*CrE
+        paddw   mm1,mm1                 ; mm1=2*CrO
+
+        pmulhw  mm4,[GOTOFF(eax,PW_MF0228)]     ; mm4=(2*CbE * -FIX(0.22800))
+        pmulhw  mm5,[GOTOFF(eax,PW_MF0228)]     ; mm5=(2*CbO * -FIX(0.22800))
+        pmulhw  mm0,[GOTOFF(eax,PW_F0402)]      ; mm0=(2*CrE * FIX(0.40200))
+        pmulhw  mm1,[GOTOFF(eax,PW_F0402)]      ; mm1=(2*CrO * FIX(0.40200))
+
+        paddw   mm4,[GOTOFF(eax,PW_ONE)]
+        paddw   mm5,[GOTOFF(eax,PW_ONE)]
+        psraw   mm4,1                   ; mm4=(CbE * -FIX(0.22800))
+        psraw   mm5,1                   ; mm5=(CbO * -FIX(0.22800))
+        paddw   mm0,[GOTOFF(eax,PW_ONE)]
+        paddw   mm1,[GOTOFF(eax,PW_ONE)]
+        psraw   mm0,1                   ; mm0=(CrE * FIX(0.40200))
+        psraw   mm1,1                   ; mm1=(CrO * FIX(0.40200))
+
+        paddw   mm4,mm2
+        paddw   mm5,mm3
+        paddw   mm4,mm2                 ; mm4=(CbE * FIX(1.77200))=(B-Y)E
+        paddw   mm5,mm3                 ; mm5=(CbO * FIX(1.77200))=(B-Y)O
+        paddw   mm0,mm6                 ; mm0=(CrE * FIX(1.40200))=(R-Y)E
+        paddw   mm1,mm7                 ; mm1=(CrO * FIX(1.40200))=(R-Y)O
+
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=(B-Y)E
+        movq    MMWORD [wk(1)], mm5     ; wk(1)=(B-Y)O
+
+        movq      mm4,mm2
+        movq      mm5,mm3
+        punpcklwd mm2,mm6
+        punpckhwd mm4,mm6
+        pmaddwd   mm2,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   mm4,[GOTOFF(eax,PW_MF0344_F0285)]
+        punpcklwd mm3,mm7
+        punpckhwd mm5,mm7
+        pmaddwd   mm3,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   mm5,[GOTOFF(eax,PW_MF0344_F0285)]
+
+        paddd     mm2,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     mm4,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     mm2,SCALEBITS
+        psrad     mm4,SCALEBITS
+        paddd     mm3,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     mm5,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     mm3,SCALEBITS
+        psrad     mm5,SCALEBITS
+
+        packssdw  mm2,mm4       ; mm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
+        packssdw  mm3,mm5       ; mm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
+        psubw     mm2,mm6       ; mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
+        psubw     mm3,mm7       ; mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
+
+        movq      mm5, MMWORD [esi]     ; mm5=Y(01234567)
+
+        pcmpeqw   mm4,mm4
+        psrlw     mm4,BYTE_BIT          ; mm4={0xFF 0x00 0xFF 0x00 ..}
+        pand      mm4,mm5               ; mm4=Y(0246)=YE
+        psrlw     mm5,BYTE_BIT          ; mm5=Y(1357)=YO
+
+        paddw     mm0,mm4               ; mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6)
+        paddw     mm1,mm5               ; mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7)
+        packuswb  mm0,mm0               ; mm0=(R0 R2 R4 R6 ** ** ** **)
+        packuswb  mm1,mm1               ; mm1=(R1 R3 R5 R7 ** ** ** **)
+
+        paddw     mm2,mm4               ; mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6)
+        paddw     mm3,mm5               ; mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7)
+        packuswb  mm2,mm2               ; mm2=(G0 G2 G4 G6 ** ** ** **)
+        packuswb  mm3,mm3               ; mm3=(G1 G3 G5 G7 ** ** ** **)
+
+        paddw     mm4, MMWORD [wk(0)]   ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6)
+        paddw     mm5, MMWORD [wk(1)]   ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7)
+        packuswb  mm4,mm4               ; mm4=(B0 B2 B4 B6 ** ** ** **)
+        packuswb  mm5,mm5               ; mm5=(B1 B3 B5 B7 ** ** ** **)
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+        ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+        ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+        ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+        ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **)
+
+        punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+        punpcklbw mmE,mmB               ; mmE=(20 01 22 03 24 05 26 07)
+        punpcklbw mmD,mmF               ; mmD=(11 21 13 23 15 25 17 27)
+
+        movq      mmG,mmA
+        movq      mmH,mmA
+        punpcklwd mmA,mmE               ; mmA=(00 10 20 01 02 12 22 03)
+        punpckhwd mmG,mmE               ; mmG=(04 14 24 05 06 16 26 07)
+
+        psrlq     mmH,2*BYTE_BIT        ; mmH=(02 12 04 14 06 16 -- --)
+        psrlq     mmE,2*BYTE_BIT        ; mmE=(22 03 24 05 26 07 -- --)
+
+        movq      mmC,mmD
+        movq      mmB,mmD
+        punpcklwd mmD,mmH               ; mmD=(11 21 02 12 13 23 04 14)
+        punpckhwd mmC,mmH               ; mmC=(15 25 06 16 17 27 -- --)
+
+        psrlq     mmB,2*BYTE_BIT        ; mmB=(13 23 15 25 17 27 -- --)
+
+        movq      mmF,mmE
+        punpcklwd mmE,mmB               ; mmE=(22 03 13 23 24 05 15 25)
+        punpckhwd mmF,mmB               ; mmF=(26 07 17 27 -- -- -- --)
+
+        punpckldq mmA,mmD               ; mmA=(00 10 20 01 11 21 02 12)
+        punpckldq mmE,mmG               ; mmE=(22 03 13 23 04 14 24 05)
+        punpckldq mmC,mmF               ; mmC=(15 25 06 16 26 07 17 27)
+
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st16
+
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
+
+        sub     ecx, byte SIZEOF_MMWORD
+        jz      short .nextrow
+
+        add     esi, byte SIZEOF_MMWORD                 ; inptr0
+        add     ebx, byte SIZEOF_MMWORD                 ; inptr1
+        add     edx, byte SIZEOF_MMWORD                 ; inptr2
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr
+        jmp     near .columnloop
+        alignx  16,7
+
+.column_st16:
+        lea     ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
+        cmp     ecx, byte 2*SIZEOF_MMWORD
+        jb      short .column_st8
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+        movq    mmA,mmC
+        sub     ecx, byte 2*SIZEOF_MMWORD
+        add     edi, byte 2*SIZEOF_MMWORD
+        jmp     short .column_st4
+.column_st8:
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st4
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    mmA,mmE
+        sub     ecx, byte SIZEOF_MMWORD
+        add     edi, byte SIZEOF_MMWORD
+.column_st4:
+        movd    eax,mmA
+        cmp     ecx, byte SIZEOF_DWORD
+        jb      short .column_st2
+        mov     DWORD [edi+0*SIZEOF_DWORD], eax
+        psrlq   mmA,DWORD_BIT
+        movd    eax,mmA
+        sub     ecx, byte SIZEOF_DWORD
+        add     edi, byte SIZEOF_DWORD
+.column_st2:
+        cmp     ecx, byte SIZEOF_WORD
+        jb      short .column_st1
+        mov     WORD [edi+0*SIZEOF_WORD], ax
+        shr     eax,WORD_BIT
+        sub     ecx, byte SIZEOF_WORD
+        add     edi, byte SIZEOF_WORD
+.column_st1:
+        cmp     ecx, byte SIZEOF_BYTE
+        jb      short .nextrow
+        mov     BYTE [edi+0*SIZEOF_BYTE], al
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+%ifdef RGBX_FILLER_0XFF
+        pcmpeqb   mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+        pcmpeqb   mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
+%else
+        pxor      mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+        pxor      mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
+%endif
+        ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+        ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+        ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+        ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **)
+
+        punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+        punpcklbw mmE,mmG               ; mmE=(20 30 22 32 24 34 26 36)
+        punpcklbw mmB,mmD               ; mmB=(01 11 03 13 05 15 07 17)
+        punpcklbw mmF,mmH               ; mmF=(21 31 23 33 25 35 27 37)
+
+        movq      mmC,mmA
+        punpcklwd mmA,mmE               ; mmA=(00 10 20 30 02 12 22 32)
+        punpckhwd mmC,mmE               ; mmC=(04 14 24 34 06 16 26 36)
+        movq      mmG,mmB
+        punpcklwd mmB,mmF               ; mmB=(01 11 21 31 03 13 23 33)
+        punpckhwd mmG,mmF               ; mmG=(05 15 25 35 07 17 27 37)
+
+        movq      mmD,mmA
+        punpckldq mmA,mmB               ; mmA=(00 10 20 30 01 11 21 31)
+        punpckhdq mmD,mmB               ; mmD=(02 12 22 32 03 13 23 33)
+        movq      mmH,mmC
+        punpckldq mmC,mmG               ; mmC=(04 14 24 34 05 15 25 35)
+        punpckhdq mmH,mmG               ; mmH=(06 16 26 36 07 17 27 37)
+
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st16
+
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
+        movq    MMWORD [edi+3*SIZEOF_MMWORD], mmH
+
+        sub     ecx, byte SIZEOF_MMWORD
+        jz      short .nextrow
+
+        add     esi, byte SIZEOF_MMWORD                 ; inptr0
+        add     ebx, byte SIZEOF_MMWORD                 ; inptr1
+        add     edx, byte SIZEOF_MMWORD                 ; inptr2
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr
+        jmp     near .columnloop
+        alignx  16,7
+
+.column_st16:
+        cmp     ecx, byte SIZEOF_MMWORD/2
+        jb      short .column_st8
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+        movq    mmA,mmC
+        movq    mmD,mmH
+        sub     ecx, byte SIZEOF_MMWORD/2
+        add     edi, byte 2*SIZEOF_MMWORD
+.column_st8:
+        cmp     ecx, byte SIZEOF_MMWORD/4
+        jb      short .column_st4
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    mmA,mmD
+        sub     ecx, byte SIZEOF_MMWORD/4
+        add     edi, byte 1*SIZEOF_MMWORD
+.column_st4:
+        cmp     ecx, byte SIZEOF_MMWORD/8
+        jb      short .nextrow
+        movd    DWORD [edi+0*SIZEOF_DWORD], mmA
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+        alignx  16,7
+
+.nextrow:
+        pop     ecx
+        pop     esi
+        pop     ebx
+        pop     edx
+        pop     edi
+        pop     eax
+
+        add     esi, byte SIZEOF_JSAMPROW
+        add     ebx, byte SIZEOF_JSAMPROW
+        add     edx, byte SIZEOF_JSAMPROW
+        add     edi, byte SIZEOF_JSAMPROW       ; output_buf
+        dec     eax                             ; num_rows
+        jg      near .rowloop
+
+        emms            ; empty MMX state
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jdcolext-sse2-64.asm b/simd/jdcolext-sse2-64.asm
new file mode 100644
index 0000000..bfd1f35
--- /dev/null
+++ b/simd/jdcolext-sse2-64.asm
@@ -0,0 +1,441 @@
+;
+; jdcolext.asm - colorspace conversion (64-bit SSE2)
+;
+; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009, 2012 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_ycc_rgb_convert_sse2 (JDIMENSION out_width,
+;                             JSAMPIMAGE input_buf, JDIMENSION input_row,
+;                             JSAMPARRAY output_buf, int num_rows)
+;
+
+; r10 = JDIMENSION out_width
+; r11 = JSAMPIMAGE input_buf
+; r12 = JDIMENSION input_row
+; r13 = JSAMPARRAY output_buf
+; r14 = int num_rows
+
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+
+        align   16
+        global  EXTN(jsimd_ycc_rgb_convert_sse2)
+
+EXTN(jsimd_ycc_rgb_convert_sse2):
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
+        push    rbx
+
+        mov     rcx, r10        ; num_cols
+        test    rcx,rcx
+        jz      near .return
+
+        push    rcx
+
+        mov     rdi, r11
+        mov     rcx, r12
+        mov     rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
+        mov     rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
+        mov     rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
+        lea     rsi, [rsi+rcx*SIZEOF_JSAMPROW]
+        lea     rbx, [rbx+rcx*SIZEOF_JSAMPROW]
+        lea     rdx, [rdx+rcx*SIZEOF_JSAMPROW]
+
+        pop     rcx
+
+        mov     rdi, r13
+        mov     eax, r14d
+        test    rax,rax
+        jle     near .return
+.rowloop:
+        push    rax
+        push    rdi
+        push    rdx
+        push    rbx
+        push    rsi
+        push    rcx                     ; col
+
+        mov     rsi, JSAMPROW [rsi]     ; inptr0
+        mov     rbx, JSAMPROW [rbx]     ; inptr1
+        mov     rdx, JSAMPROW [rdx]     ; inptr2
+        mov     rdi, JSAMPROW [rdi]     ; outptr
+.columnloop:
+
+        movdqa  xmm5, XMMWORD [rbx]     ; xmm5=Cb(0123456789ABCDEF)
+        movdqa  xmm1, XMMWORD [rdx]     ; xmm1=Cr(0123456789ABCDEF)
+
+        pcmpeqw xmm4,xmm4
+        pcmpeqw xmm7,xmm7
+        psrlw   xmm4,BYTE_BIT
+        psllw   xmm7,7                  ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+        movdqa  xmm0,xmm4               ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..}
+
+        pand    xmm4,xmm5               ; xmm4=Cb(02468ACE)=CbE
+        psrlw   xmm5,BYTE_BIT           ; xmm5=Cb(13579BDF)=CbO
+        pand    xmm0,xmm1               ; xmm0=Cr(02468ACE)=CrE
+        psrlw   xmm1,BYTE_BIT           ; xmm1=Cr(13579BDF)=CrO
+
+        paddw   xmm4,xmm7
+        paddw   xmm5,xmm7
+        paddw   xmm0,xmm7
+        paddw   xmm1,xmm7
+
+        ; (Original)
+        ; R = Y                + 1.40200 * Cr
+        ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+        ; B = Y + 1.77200 * Cb
+        ;
+        ; (This implementation)
+        ; R = Y                + 0.40200 * Cr + Cr
+        ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+        ; B = Y - 0.22800 * Cb + Cb + Cb
+
+        movdqa  xmm2,xmm4               ; xmm2=CbE
+        movdqa  xmm3,xmm5               ; xmm3=CbO
+        paddw   xmm4,xmm4               ; xmm4=2*CbE
+        paddw   xmm5,xmm5               ; xmm5=2*CbO
+        movdqa  xmm6,xmm0               ; xmm6=CrE
+        movdqa  xmm7,xmm1               ; xmm7=CrO
+        paddw   xmm0,xmm0               ; xmm0=2*CrE
+        paddw   xmm1,xmm1               ; xmm1=2*CrO
+
+        pmulhw  xmm4,[rel PW_MF0228]    ; xmm4=(2*CbE * -FIX(0.22800))
+        pmulhw  xmm5,[rel PW_MF0228]    ; xmm5=(2*CbO * -FIX(0.22800))
+        pmulhw  xmm0,[rel PW_F0402]     ; xmm0=(2*CrE * FIX(0.40200))
+        pmulhw  xmm1,[rel PW_F0402]     ; xmm1=(2*CrO * FIX(0.40200))
+
+        paddw   xmm4,[rel PW_ONE]
+        paddw   xmm5,[rel PW_ONE]
+        psraw   xmm4,1                  ; xmm4=(CbE * -FIX(0.22800))
+        psraw   xmm5,1                  ; xmm5=(CbO * -FIX(0.22800))
+        paddw   xmm0,[rel PW_ONE]
+        paddw   xmm1,[rel PW_ONE]
+        psraw   xmm0,1                  ; xmm0=(CrE * FIX(0.40200))
+        psraw   xmm1,1                  ; xmm1=(CrO * FIX(0.40200))
+
+        paddw   xmm4,xmm2
+        paddw   xmm5,xmm3
+        paddw   xmm4,xmm2               ; xmm4=(CbE * FIX(1.77200))=(B-Y)E
+        paddw   xmm5,xmm3               ; xmm5=(CbO * FIX(1.77200))=(B-Y)O
+        paddw   xmm0,xmm6               ; xmm0=(CrE * FIX(1.40200))=(R-Y)E
+        paddw   xmm1,xmm7               ; xmm1=(CrO * FIX(1.40200))=(R-Y)O
+
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=(B-Y)E
+        movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(B-Y)O
+
+        movdqa    xmm4,xmm2
+        movdqa    xmm5,xmm3
+        punpcklwd xmm2,xmm6
+        punpckhwd xmm4,xmm6
+        pmaddwd   xmm2,[rel PW_MF0344_F0285]
+        pmaddwd   xmm4,[rel PW_MF0344_F0285]
+        punpcklwd xmm3,xmm7
+        punpckhwd xmm5,xmm7
+        pmaddwd   xmm3,[rel PW_MF0344_F0285]
+        pmaddwd   xmm5,[rel PW_MF0344_F0285]
+
+        paddd     xmm2,[rel PD_ONEHALF]
+        paddd     xmm4,[rel PD_ONEHALF]
+        psrad     xmm2,SCALEBITS
+        psrad     xmm4,SCALEBITS
+        paddd     xmm3,[rel PD_ONEHALF]
+        paddd     xmm5,[rel PD_ONEHALF]
+        psrad     xmm3,SCALEBITS
+        psrad     xmm5,SCALEBITS
+
+        packssdw  xmm2,xmm4     ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
+        packssdw  xmm3,xmm5     ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
+        psubw     xmm2,xmm6     ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
+        psubw     xmm3,xmm7     ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
+
+        movdqa    xmm5, XMMWORD [rsi]   ; xmm5=Y(0123456789ABCDEF)
+
+        pcmpeqw   xmm4,xmm4
+        psrlw     xmm4,BYTE_BIT         ; xmm4={0xFF 0x00 0xFF 0x00 ..}
+        pand      xmm4,xmm5             ; xmm4=Y(02468ACE)=YE
+        psrlw     xmm5,BYTE_BIT         ; xmm5=Y(13579BDF)=YO
+
+        paddw     xmm0,xmm4             ; xmm0=((R-Y)E+YE)=RE=R(02468ACE)
+        paddw     xmm1,xmm5             ; xmm1=((R-Y)O+YO)=RO=R(13579BDF)
+        packuswb  xmm0,xmm0             ; xmm0=R(02468ACE********)
+        packuswb  xmm1,xmm1             ; xmm1=R(13579BDF********)
+
+        paddw     xmm2,xmm4             ; xmm2=((G-Y)E+YE)=GE=G(02468ACE)
+        paddw     xmm3,xmm5             ; xmm3=((G-Y)O+YO)=GO=G(13579BDF)
+        packuswb  xmm2,xmm2             ; xmm2=G(02468ACE********)
+        packuswb  xmm3,xmm3             ; xmm3=G(13579BDF********)
+
+        paddw     xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE)
+        paddw     xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF)
+        packuswb  xmm4,xmm4             ; xmm4=B(02468ACE********)
+        packuswb  xmm5,xmm5             ; xmm5=B(13579BDF********)
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
+
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmB     ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
+        punpcklbw xmmD,xmmF     ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
+
+        movdqa    xmmG,xmmA
+        movdqa    xmmH,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
+        punpckhwd xmmG,xmmE     ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
+
+        psrldq    xmmH,2        ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
+        psrldq    xmmE,2        ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
+
+        movdqa    xmmC,xmmD
+        movdqa    xmmB,xmmD
+        punpcklwd xmmD,xmmH     ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
+        punpckhwd xmmC,xmmH     ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
+
+        psrldq    xmmB,2        ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
+
+        movdqa    xmmF,xmmE
+        punpcklwd xmmE,xmmB     ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
+        punpckhwd xmmF,xmmB     ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
+
+        pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
+        movdqa    xmmB,xmmE
+        punpckldq xmmA,xmmD     ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
+        punpckldq xmmE,xmmH     ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
+        punpckhdq xmmD,xmmB     ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
+
+        pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
+        movdqa    xmmB,xmmF
+        punpckldq xmmG,xmmC     ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
+        punpckldq xmmF,xmmH     ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
+        punpckhdq xmmC,xmmB     ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
+
+        punpcklqdq xmmA,xmmE    ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        punpcklqdq xmmD,xmmG    ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        punpcklqdq xmmF,xmmC    ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
+
+        test    rdi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
+.out0:
+        add     rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     rcx, byte SIZEOF_XMMWORD
+        jz      near .nextrow
+
+        add     rsi, byte SIZEOF_XMMWORD        ; inptr0
+        add     rbx, byte SIZEOF_XMMWORD        ; inptr1
+        add     rdx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
+
+.column_st32:
+        lea     rcx, [rcx+rcx*2]                ; imul ecx, RGB_PIXELSIZE
+        cmp     rcx, byte 2*SIZEOF_XMMWORD
+        jb      short .column_st16
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        add     rdi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmF
+        sub     rcx, byte 2*SIZEOF_XMMWORD
+        jmp     short .column_st15
+.column_st16:
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jb      short .column_st15
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        add     rdi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     rcx, byte SIZEOF_XMMWORD
+.column_st15:
+        ; Store the lower 8 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     rcx, byte SIZEOF_MMWORD
+        jb      short .column_st7
+        movq    XMM_MMWORD [rdi], xmmA
+        add     rdi, byte SIZEOF_MMWORD
+        sub     rcx, byte SIZEOF_MMWORD
+        psrldq  xmmA, SIZEOF_MMWORD
+.column_st7:
+        ; Store the lower 4 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     rcx, byte SIZEOF_DWORD
+        jb      short .column_st3
+        movd    XMM_DWORD [rdi], xmmA
+        add     rdi, byte SIZEOF_DWORD
+        sub     rcx, byte SIZEOF_DWORD
+        psrldq  xmmA, SIZEOF_DWORD
+.column_st3:
+        ; Store the lower 2 bytes of rax to the output when it has enough
+        ; space.
+        movd    eax, xmmA
+        cmp     rcx, byte SIZEOF_WORD
+        jb      short .column_st1
+        mov     WORD [rdi], ax
+        add     rdi, byte SIZEOF_WORD
+        sub     rcx, byte SIZEOF_WORD
+        shr     rax, 16
+.column_st1:
+        ; Store the lower 1 byte of rax to the output when it has enough
+        ; space.
+        test    rcx, rcx
+        jz      short .nextrow
+        mov     BYTE [rdi], al
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+%ifdef RGBX_FILLER_0XFF
+        pcmpeqb   xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pcmpeqb   xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%else
+        pxor      xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pxor      xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%endif
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
+
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmG     ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
+        punpcklbw xmmB,xmmD     ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
+        punpcklbw xmmF,xmmH     ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
+
+        movdqa    xmmC,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
+        punpckhwd xmmC,xmmE     ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
+        movdqa    xmmG,xmmB
+        punpcklwd xmmB,xmmF     ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
+        punpckhwd xmmG,xmmF     ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
+
+        movdqa    xmmD,xmmA
+        punpckldq xmmA,xmmB     ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        punpckhdq xmmD,xmmB     ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        movdqa    xmmH,xmmC
+        punpckldq xmmC,xmmG     ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        punpckhdq xmmH,xmmG     ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
+
+        test    rdi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
+        movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
+        movdqu  XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
+.out0:
+        add     rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     rcx, byte SIZEOF_XMMWORD
+        jz      near .nextrow
+
+        add     rsi, byte SIZEOF_XMMWORD        ; inptr0
+        add     rbx, byte SIZEOF_XMMWORD        ; inptr1
+        add     rdx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
+
+.column_st32:
+        cmp     rcx, byte SIZEOF_XMMWORD/2
+        jb      short .column_st16
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        add     rdi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmC
+        movdqa  xmmD,xmmH
+        sub     rcx, byte SIZEOF_XMMWORD/2
+.column_st16:
+        cmp     rcx, byte SIZEOF_XMMWORD/4
+        jb      short .column_st15
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        add     rdi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     rcx, byte SIZEOF_XMMWORD/4
+.column_st15:
+        ; Store two pixels (8 bytes) of xmmA to the output when it has enough
+        ; space.
+        cmp     rcx, byte SIZEOF_XMMWORD/8
+        jb      short .column_st7
+        movq    MMWORD [rdi], xmmA
+        add     rdi, byte SIZEOF_XMMWORD/8*4
+        sub     rcx, byte SIZEOF_XMMWORD/8
+        psrldq  xmmA, SIZEOF_XMMWORD/8*4
+.column_st7:
+        ; Store one pixel (4 bytes) of xmmA to the output when it has enough
+        ; space.
+        test    rcx, rcx
+        jz      short .nextrow
+        movd    XMM_DWORD [rdi], xmmA
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+.nextrow:
+        pop     rcx
+        pop     rsi
+        pop     rbx
+        pop     rdx
+        pop     rdi
+        pop     rax
+
+        add     rsi, byte SIZEOF_JSAMPROW
+        add     rbx, byte SIZEOF_JSAMPROW
+        add     rdx, byte SIZEOF_JSAMPROW
+        add     rdi, byte SIZEOF_JSAMPROW       ; output_buf
+        dec     rax                             ; num_rows
+        jg      near .rowloop
+
+        sfence          ; flush the write buffer
+
+.return:
+        pop     rbx
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jdcolext-sse2.asm b/simd/jdcolext-sse2.asm
new file mode 100644
index 0000000..54ae4db
--- /dev/null
+++ b/simd/jdcolext-sse2.asm
@@ -0,0 +1,460 @@
+;
+; jdcolext.asm - colorspace conversion (SSE2)
+;
+; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2012 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_ycc_rgb_convert_sse2 (JDIMENSION out_width,
+;                             JSAMPIMAGE input_buf, JDIMENSION input_row,
+;                             JSAMPARRAY output_buf, int num_rows)
+;
+
+%define out_width(b)    (b)+8           ; JDIMENSION out_width
+%define input_buf(b)    (b)+12          ; JSAMPIMAGE input_buf
+%define input_row(b)    (b)+16          ; JDIMENSION input_row
+%define output_buf(b)   (b)+20          ; JSAMPARRAY output_buf
+%define num_rows(b)     (b)+24          ; int num_rows
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+        align   16
+        global  EXTN(jsimd_ycc_rgb_convert_sse2)
+
+EXTN(jsimd_ycc_rgb_convert_sse2):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
+
+        mov     ecx, JDIMENSION [out_width(eax)]        ; num_cols
+        test    ecx,ecx
+        jz      near .return
+
+        push    ecx
+
+        mov     edi, JSAMPIMAGE [input_buf(eax)]
+        mov     ecx, JDIMENSION [input_row(eax)]
+        mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+        lea     esi, [esi+ecx*SIZEOF_JSAMPROW]
+        lea     ebx, [ebx+ecx*SIZEOF_JSAMPROW]
+        lea     edx, [edx+ecx*SIZEOF_JSAMPROW]
+
+        pop     ecx
+
+        mov     edi, JSAMPARRAY [output_buf(eax)]
+        mov     eax, INT [num_rows(eax)]
+        test    eax,eax
+        jle     near .return
+        alignx  16,7
+.rowloop:
+        push    eax
+        push    edi
+        push    edx
+        push    ebx
+        push    esi
+        push    ecx                     ; col
+
+        mov     esi, JSAMPROW [esi]     ; inptr0
+        mov     ebx, JSAMPROW [ebx]     ; inptr1
+        mov     edx, JSAMPROW [edx]     ; inptr2
+        mov     edi, JSAMPROW [edi]     ; outptr
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+        alignx  16,7
+.columnloop:
+
+        movdqa  xmm5, XMMWORD [ebx]     ; xmm5=Cb(0123456789ABCDEF)
+        movdqa  xmm1, XMMWORD [edx]     ; xmm1=Cr(0123456789ABCDEF)
+
+        pcmpeqw xmm4,xmm4
+        pcmpeqw xmm7,xmm7
+        psrlw   xmm4,BYTE_BIT
+        psllw   xmm7,7                  ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+        movdqa  xmm0,xmm4               ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..}
+
+        pand    xmm4,xmm5               ; xmm4=Cb(02468ACE)=CbE
+        psrlw   xmm5,BYTE_BIT           ; xmm5=Cb(13579BDF)=CbO
+        pand    xmm0,xmm1               ; xmm0=Cr(02468ACE)=CrE
+        psrlw   xmm1,BYTE_BIT           ; xmm1=Cr(13579BDF)=CrO
+
+        paddw   xmm4,xmm7
+        paddw   xmm5,xmm7
+        paddw   xmm0,xmm7
+        paddw   xmm1,xmm7
+
+        ; (Original)
+        ; R = Y                + 1.40200 * Cr
+        ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+        ; B = Y + 1.77200 * Cb
+        ;
+        ; (This implementation)
+        ; R = Y                + 0.40200 * Cr + Cr
+        ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+        ; B = Y - 0.22800 * Cb + Cb + Cb
+
+        movdqa  xmm2,xmm4               ; xmm2=CbE
+        movdqa  xmm3,xmm5               ; xmm3=CbO
+        paddw   xmm4,xmm4               ; xmm4=2*CbE
+        paddw   xmm5,xmm5               ; xmm5=2*CbO
+        movdqa  xmm6,xmm0               ; xmm6=CrE
+        movdqa  xmm7,xmm1               ; xmm7=CrO
+        paddw   xmm0,xmm0               ; xmm0=2*CrE
+        paddw   xmm1,xmm1               ; xmm1=2*CrO
+
+        pmulhw  xmm4,[GOTOFF(eax,PW_MF0228)]    ; xmm4=(2*CbE * -FIX(0.22800))
+        pmulhw  xmm5,[GOTOFF(eax,PW_MF0228)]    ; xmm5=(2*CbO * -FIX(0.22800))
+        pmulhw  xmm0,[GOTOFF(eax,PW_F0402)]     ; xmm0=(2*CrE * FIX(0.40200))
+        pmulhw  xmm1,[GOTOFF(eax,PW_F0402)]     ; xmm1=(2*CrO * FIX(0.40200))
+
+        paddw   xmm4,[GOTOFF(eax,PW_ONE)]
+        paddw   xmm5,[GOTOFF(eax,PW_ONE)]
+        psraw   xmm4,1                  ; xmm4=(CbE * -FIX(0.22800))
+        psraw   xmm5,1                  ; xmm5=(CbO * -FIX(0.22800))
+        paddw   xmm0,[GOTOFF(eax,PW_ONE)]
+        paddw   xmm1,[GOTOFF(eax,PW_ONE)]
+        psraw   xmm0,1                  ; xmm0=(CrE * FIX(0.40200))
+        psraw   xmm1,1                  ; xmm1=(CrO * FIX(0.40200))
+
+        paddw   xmm4,xmm2
+        paddw   xmm5,xmm3
+        paddw   xmm4,xmm2               ; xmm4=(CbE * FIX(1.77200))=(B-Y)E
+        paddw   xmm5,xmm3               ; xmm5=(CbO * FIX(1.77200))=(B-Y)O
+        paddw   xmm0,xmm6               ; xmm0=(CrE * FIX(1.40200))=(R-Y)E
+        paddw   xmm1,xmm7               ; xmm1=(CrO * FIX(1.40200))=(R-Y)O
+
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=(B-Y)E
+        movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(B-Y)O
+
+        movdqa    xmm4,xmm2
+        movdqa    xmm5,xmm3
+        punpcklwd xmm2,xmm6
+        punpckhwd xmm4,xmm6
+        pmaddwd   xmm2,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   xmm4,[GOTOFF(eax,PW_MF0344_F0285)]
+        punpcklwd xmm3,xmm7
+        punpckhwd xmm5,xmm7
+        pmaddwd   xmm3,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   xmm5,[GOTOFF(eax,PW_MF0344_F0285)]
+
+        paddd     xmm2,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     xmm4,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     xmm2,SCALEBITS
+        psrad     xmm4,SCALEBITS
+        paddd     xmm3,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     xmm5,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     xmm3,SCALEBITS
+        psrad     xmm5,SCALEBITS
+
+        packssdw  xmm2,xmm4     ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
+        packssdw  xmm3,xmm5     ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
+        psubw     xmm2,xmm6     ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
+        psubw     xmm3,xmm7     ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
+
+        movdqa    xmm5, XMMWORD [esi]   ; xmm5=Y(0123456789ABCDEF)
+
+        pcmpeqw   xmm4,xmm4
+        psrlw     xmm4,BYTE_BIT         ; xmm4={0xFF 0x00 0xFF 0x00 ..}
+        pand      xmm4,xmm5             ; xmm4=Y(02468ACE)=YE
+        psrlw     xmm5,BYTE_BIT         ; xmm5=Y(13579BDF)=YO
+
+        paddw     xmm0,xmm4             ; xmm0=((R-Y)E+YE)=RE=R(02468ACE)
+        paddw     xmm1,xmm5             ; xmm1=((R-Y)O+YO)=RO=R(13579BDF)
+        packuswb  xmm0,xmm0             ; xmm0=R(02468ACE********)
+        packuswb  xmm1,xmm1             ; xmm1=R(13579BDF********)
+
+        paddw     xmm2,xmm4             ; xmm2=((G-Y)E+YE)=GE=G(02468ACE)
+        paddw     xmm3,xmm5             ; xmm3=((G-Y)O+YO)=GO=G(13579BDF)
+        packuswb  xmm2,xmm2             ; xmm2=G(02468ACE********)
+        packuswb  xmm3,xmm3             ; xmm3=G(13579BDF********)
+
+        paddw     xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE)
+        paddw     xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF)
+        packuswb  xmm4,xmm4             ; xmm4=B(02468ACE********)
+        packuswb  xmm5,xmm5             ; xmm5=B(13579BDF********)
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
+
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmB     ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
+        punpcklbw xmmD,xmmF     ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
+
+        movdqa    xmmG,xmmA
+        movdqa    xmmH,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
+        punpckhwd xmmG,xmmE     ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
+
+        psrldq    xmmH,2        ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
+        psrldq    xmmE,2        ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
+
+        movdqa    xmmC,xmmD
+        movdqa    xmmB,xmmD
+        punpcklwd xmmD,xmmH     ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
+        punpckhwd xmmC,xmmH     ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
+
+        psrldq    xmmB,2        ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
+
+        movdqa    xmmF,xmmE
+        punpcklwd xmmE,xmmB     ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
+        punpckhwd xmmF,xmmB     ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
+
+        pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
+        movdqa    xmmB,xmmE
+        punpckldq xmmA,xmmD     ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
+        punpckldq xmmE,xmmH     ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
+        punpckhdq xmmD,xmmB     ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
+
+        pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
+        movdqa    xmmB,xmmF
+        punpckldq xmmG,xmmC     ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
+        punpckldq xmmF,xmmH     ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
+        punpckhdq xmmC,xmmB     ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
+
+        punpcklqdq xmmA,xmmE    ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        punpcklqdq xmmD,xmmG    ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        punpcklqdq xmmF,xmmC    ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
+
+        test    edi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
+.out0:
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     ecx, byte SIZEOF_XMMWORD
+        jz      near .nextrow
+
+        add     esi, byte SIZEOF_XMMWORD        ; inptr0
+        add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+        add     edx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
+        alignx  16,7
+
+.column_st32:
+        lea     ecx, [ecx+ecx*2]                ; imul ecx, RGB_PIXELSIZE
+        cmp     ecx, byte 2*SIZEOF_XMMWORD
+        jb      short .column_st16
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        add     edi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmF
+        sub     ecx, byte 2*SIZEOF_XMMWORD
+        jmp     short .column_st15
+.column_st16:
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jb      short .column_st15
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        add     edi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     ecx, byte SIZEOF_XMMWORD
+.column_st15:
+        ; Store the lower 8 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st7
+        movq    XMM_MMWORD [edi], xmmA
+        add     edi, byte SIZEOF_MMWORD
+        sub     ecx, byte SIZEOF_MMWORD
+        psrldq  xmmA, SIZEOF_MMWORD
+.column_st7:
+        ; Store the lower 4 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     ecx, byte SIZEOF_DWORD
+        jb      short .column_st3
+        movd    XMM_DWORD [edi], xmmA
+        add     edi, byte SIZEOF_DWORD
+        sub     ecx, byte SIZEOF_DWORD
+        psrldq  xmmA, SIZEOF_DWORD
+.column_st3:
+        ; Store the lower 2 bytes of eax to the output when it has enough
+        ; space.
+        movd    eax, xmmA
+        cmp     ecx, byte SIZEOF_WORD
+        jb      short .column_st1
+        mov     WORD [edi], ax
+        add     edi, byte SIZEOF_WORD
+        sub     ecx, byte SIZEOF_WORD
+        shr     eax, 16
+.column_st1:
+        ; Store the lower 1 byte of eax to the output when it has enough
+        ; space.
+        test    ecx, ecx
+        jz      short .nextrow
+        mov     BYTE [edi], al
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+%ifdef RGBX_FILLER_0XFF
+        pcmpeqb   xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pcmpeqb   xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%else
+        pxor      xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pxor      xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%endif
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
+
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmG     ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
+        punpcklbw xmmB,xmmD     ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
+        punpcklbw xmmF,xmmH     ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
+
+        movdqa    xmmC,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
+        punpckhwd xmmC,xmmE     ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
+        movdqa    xmmG,xmmB
+        punpcklwd xmmB,xmmF     ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
+        punpckhwd xmmG,xmmF     ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
+
+        movdqa    xmmD,xmmA
+        punpckldq xmmA,xmmB     ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        punpckhdq xmmD,xmmB     ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        movdqa    xmmH,xmmC
+        punpckldq xmmC,xmmG     ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        punpckhdq xmmH,xmmG     ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
+
+        test    edi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
+        movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
+        movdqu  XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
+.out0:
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     ecx, byte SIZEOF_XMMWORD
+        jz      near .nextrow
+
+        add     esi, byte SIZEOF_XMMWORD        ; inptr0
+        add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+        add     edx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
+        alignx  16,7
+
+.column_st32:
+        cmp     ecx, byte SIZEOF_XMMWORD/2
+        jb      short .column_st16
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        add     edi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmC
+        movdqa  xmmD,xmmH
+        sub     ecx, byte SIZEOF_XMMWORD/2
+.column_st16:
+        cmp     ecx, byte SIZEOF_XMMWORD/4
+        jb      short .column_st15
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        add     edi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     ecx, byte SIZEOF_XMMWORD/4
+.column_st15:
+        ; Store two pixels (8 bytes) of xmmA to the output when it has enough
+        ; space.
+        cmp     ecx, byte SIZEOF_XMMWORD/8
+        jb      short .column_st7
+        movq    XMM_MMWORD [edi], xmmA
+        add     edi, byte SIZEOF_XMMWORD/8*4
+        sub     ecx, byte SIZEOF_XMMWORD/8
+        psrldq  xmmA, SIZEOF_XMMWORD/8*4
+.column_st7:
+        ; Store one pixel (4 bytes) of xmmA to the output when it has enough
+        ; space.
+        test    ecx, ecx
+        jz      short .nextrow
+        movd    XMM_DWORD [edi], xmmA
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+        alignx  16,7
+
+.nextrow:
+        pop     ecx
+        pop     esi
+        pop     ebx
+        pop     edx
+        pop     edi
+        pop     eax
+
+        add     esi, byte SIZEOF_JSAMPROW
+        add     ebx, byte SIZEOF_JSAMPROW
+        add     edx, byte SIZEOF_JSAMPROW
+        add     edi, byte SIZEOF_JSAMPROW       ; output_buf
+        dec     eax                             ; num_rows
+        jg      near .rowloop
+
+        sfence          ; flush the write buffer
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jdcolor-mmx.asm b/simd/jdcolor-mmx.asm
new file mode 100644
index 0000000..6730e48
--- /dev/null
+++ b/simd/jdcolor-mmx.asm
@@ -0,0 +1,120 @@
+;
+; jdcolor.asm - colorspace conversion (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS       16
+
+F_0_344 equ      22554                  ; FIX(0.34414)
+F_0_714 equ      46802                  ; FIX(0.71414)
+F_1_402 equ      91881                  ; FIX(1.40200)
+F_1_772 equ     116130                  ; FIX(1.77200)
+F_0_402 equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285 equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228 equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_ycc_rgb_convert_mmx)
+
+EXTN(jconst_ycc_rgb_convert_mmx):
+
+PW_F0402        times 4 dw  F_0_402
+PW_MF0228       times 4 dw -F_0_228
+PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
+PW_ONE          times 4 dw  1
+PD_ONEHALF      times 2 dd  1 << (SCALEBITS-1)
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+
+%include "jdcolext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgb_convert_mmx
+%include "jdcolext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgbx_convert_mmx
+%include "jdcolext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgr_convert_mmx
+%include "jdcolext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgrx_convert_mmx
+%include "jdcolext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxbgr_convert_mmx
+%include "jdcolext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxrgb_convert_mmx
+%include "jdcolext-mmx.asm"
diff --git a/simd/jdcolor-sse2-64.asm b/simd/jdcolor-sse2-64.asm
new file mode 100644
index 0000000..e9277f1
--- /dev/null
+++ b/simd/jdcolor-sse2-64.asm
@@ -0,0 +1,120 @@
+;
+; jdcolor.asm - colorspace conversion (64-bit SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS       16
+
+F_0_344 equ      22554                  ; FIX(0.34414)
+F_0_714 equ      46802                  ; FIX(0.71414)
+F_1_402 equ      91881                  ; FIX(1.40200)
+F_1_772 equ     116130                  ; FIX(1.77200)
+F_0_402 equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285 equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228 equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_ycc_rgb_convert_sse2)
+
+EXTN(jconst_ycc_rgb_convert_sse2):
+
+PW_F0402        times 8 dw  F_0_402
+PW_MF0228       times 8 dw -F_0_228
+PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
+PW_ONE          times 8 dw  1
+PD_ONEHALF      times 4 dd  1 << (SCALEBITS-1)
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+
+%include "jdcolext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2
+%include "jdcolext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2
+%include "jdcolext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2
+%include "jdcolext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2
+%include "jdcolext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2
+%include "jdcolext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2
+%include "jdcolext-sse2-64.asm"
diff --git a/simd/jdcolor-sse2.asm b/simd/jdcolor-sse2.asm
new file mode 100644
index 0000000..c122cc7
--- /dev/null
+++ b/simd/jdcolor-sse2.asm
@@ -0,0 +1,120 @@
+;
+; jdcolor.asm - colorspace conversion (SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS       16
+
+F_0_344 equ      22554                  ; FIX(0.34414)
+F_0_714 equ      46802                  ; FIX(0.71414)
+F_1_402 equ      91881                  ; FIX(1.40200)
+F_1_772 equ     116130                  ; FIX(1.77200)
+F_0_402 equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285 equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228 equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_ycc_rgb_convert_sse2)
+
+EXTN(jconst_ycc_rgb_convert_sse2):
+
+PW_F0402        times 8 dw  F_0_402
+PW_MF0228       times 8 dw -F_0_228
+PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
+PW_ONE          times 8 dw  1
+PD_ONEHALF      times 4 dd  1 << (SCALEBITS-1)
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+
+%include "jdcolext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2
+%include "jdcolext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2
+%include "jdcolext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2
+%include "jdcolext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2
+%include "jdcolext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2
+%include "jdcolext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2
+%include "jdcolext-sse2.asm"
diff --git a/simd/jdct.inc b/simd/jdct.inc
new file mode 100644
index 0000000..ad5890c
--- /dev/null
+++ b/simd/jdct.inc
@@ -0,0 +1,28 @@
+;
+; jdct.inc - private declarations for forward & reverse DCT subsystems
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; [TAB8]
+
+; Each IDCT routine is responsible for range-limiting its results and
+; converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
+; be quite far out of range if the input data is corrupt, so a bulletproof
+; range-limiting step is required.  We use a mask-and-table-lookup method
+; to do the combined operations quickly.
+;
+%define RANGE_MASK  (MAXJSAMPLE * 4 + 3)  ; 2 bits wider than legal samples
+
+%define ROW(n,b,s)              ((b)+(n)*(s))
+%define COL(n,b,s)              ((b)+(n)*(s)*DCTSIZE)
+
+%define DWBLOCK(m,n,b,s)        ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_DWORD)
+%define MMBLOCK(m,n,b,s)        ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_MMWORD)
+%define XMMBLOCK(m,n,b,s)       ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_XMMWORD)
+
+; --------------------------------------------------------------------------
diff --git a/simd/jdmerge-mmx.asm b/simd/jdmerge-mmx.asm
new file mode 100644
index 0000000..2daa7fa
--- /dev/null
+++ b/simd/jdmerge-mmx.asm
@@ -0,0 +1,126 @@
+;
+; jdmerge.asm - merged upsampling/color conversion (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS       16
+
+F_0_344 equ      22554                  ; FIX(0.34414)
+F_0_714 equ      46802                  ; FIX(0.71414)
+F_1_402 equ      91881                  ; FIX(1.40200)
+F_1_772 equ     116130                  ; FIX(1.77200)
+F_0_402 equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285 equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228 equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_merged_upsample_mmx)
+
+EXTN(jconst_merged_upsample_mmx):
+
+PW_F0402        times 4 dw  F_0_402
+PW_MF0228       times 4 dw -F_0_228
+PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
+PW_ONE          times 4 dw  1
+PD_ONEHALF      times 2 dd  1 << (SCALEBITS-1)
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+
+%include "jdmrgext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgb_merged_upsample_mmx
+%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgb_merged_upsample_mmx
+%include "jdmrgext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgbx_merged_upsample_mmx
+%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgbx_merged_upsample_mmx
+%include "jdmrgext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgr_merged_upsample_mmx
+%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgr_merged_upsample_mmx
+%include "jdmrgext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgrx_merged_upsample_mmx
+%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgrx_merged_upsample_mmx
+%include "jdmrgext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxbgr_merged_upsample_mmx
+%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxbgr_merged_upsample_mmx
+%include "jdmrgext-mmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxrgb_merged_upsample_mmx
+%define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxrgb_merged_upsample_mmx
+%include "jdmrgext-mmx.asm"
diff --git a/simd/jdmerge-sse2-64.asm b/simd/jdmerge-sse2-64.asm
new file mode 100644
index 0000000..8f953c7
--- /dev/null
+++ b/simd/jdmerge-sse2-64.asm
@@ -0,0 +1,126 @@
+;
+; jdmerge.asm - merged upsampling/color conversion (64-bit SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS       16
+
+F_0_344 equ      22554                  ; FIX(0.34414)
+F_0_714 equ      46802                  ; FIX(0.71414)
+F_1_402 equ      91881                  ; FIX(1.40200)
+F_1_772 equ     116130                  ; FIX(1.77200)
+F_0_402 equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285 equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228 equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_merged_upsample_sse2)
+
+EXTN(jconst_merged_upsample_sse2):
+
+PW_F0402        times 8 dw  F_0_402
+PW_MF0228       times 8 dw -F_0_228
+PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
+PW_ONE          times 8 dw  1
+PD_ONEHALF      times 4 dd  1 << (SCALEBITS-1)
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+
+%include "jdmrgext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2
+%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2
+%include "jdmrgext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2
+%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2
+%include "jdmrgext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2
+%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2
+%include "jdmrgext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2
+%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2
+%include "jdmrgext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2
+%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2
+%include "jdmrgext-sse2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2
+%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2
+%include "jdmrgext-sse2-64.asm"
diff --git a/simd/jdmerge-sse2.asm b/simd/jdmerge-sse2.asm
new file mode 100644
index 0000000..d22e828
--- /dev/null
+++ b/simd/jdmerge-sse2.asm
@@ -0,0 +1,126 @@
+;
+; jdmerge.asm - merged upsampling/color conversion (SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS       16
+
+F_0_344 equ      22554                  ; FIX(0.34414)
+F_0_714 equ      46802                  ; FIX(0.71414)
+F_1_402 equ      91881                  ; FIX(1.40200)
+F_1_772 equ     116130                  ; FIX(1.77200)
+F_0_402 equ     (F_1_402 - 65536)       ; FIX(1.40200) - FIX(1)
+F_0_285 equ     ( 65536 - F_0_714)      ; FIX(1) - FIX(0.71414)
+F_0_228 equ     (131072 - F_1_772)      ; FIX(2) - FIX(1.77200)
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_merged_upsample_sse2)
+
+EXTN(jconst_merged_upsample_sse2):
+
+PW_F0402        times 8 dw  F_0_402
+PW_MF0228       times 8 dw -F_0_228
+PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
+PW_ONE          times 8 dw  1
+PD_ONEHALF      times 4 dd  1 << (SCALEBITS-1)
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+
+%include "jdmrgext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2
+%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2
+%include "jdmrgext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2
+%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2
+%include "jdmrgext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2
+%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2
+%include "jdmrgext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2
+%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2
+%include "jdmrgext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2
+%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2
+%include "jdmrgext-sse2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2
+%define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2
+%include "jdmrgext-sse2.asm"
diff --git a/simd/jdmrgext-mmx.asm b/simd/jdmrgext-mmx.asm
new file mode 100644
index 0000000..a92e934
--- /dev/null
+++ b/simd/jdmrgext-mmx.asm
@@ -0,0 +1,464 @@
+;
+; jdmrgext.asm - merged upsampling/color conversion (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+;
+; GLOBAL(void)
+; jsimd_h2v1_merged_upsample_mmx (JDIMENSION output_width,
+;                                 JSAMPIMAGE input_buf,
+;                                 JDIMENSION in_row_group_ctr,
+;                                 JSAMPARRAY output_buf);
+;
+
+%define output_width(b) (b)+8                   ; JDIMENSION output_width
+%define input_buf(b)            (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)     (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)           (b)+20          ; JSAMPARRAY output_buf
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          3
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+        align   16
+        global  EXTN(jsimd_h2v1_merged_upsample_mmx)
+
+EXTN(jsimd_h2v1_merged_upsample_mmx):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
+
+        mov     ecx, JDIMENSION [output_width(eax)]     ; col
+        test    ecx,ecx
+        jz      near .return
+
+        push    ecx
+
+        mov     edi, JSAMPIMAGE [input_buf(eax)]
+        mov     ecx, JDIMENSION [in_row_group_ctr(eax)]
+        mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+        mov     edi, JSAMPARRAY [output_buf(eax)]
+        mov     esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW]         ; inptr0
+        mov     ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW]         ; inptr1
+        mov     edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW]         ; inptr2
+        mov     edi, JSAMPROW [edi]                             ; outptr
+
+        pop     ecx                     ; col
+
+        alignx  16,7
+.columnloop:
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+
+        movq      mm6, MMWORD [ebx]     ; mm6=Cb(01234567)
+        movq      mm7, MMWORD [edx]     ; mm7=Cr(01234567)
+
+        pxor      mm1,mm1               ; mm1=(all 0's)
+        pcmpeqw   mm3,mm3
+        psllw     mm3,7                 ; mm3={0xFF80 0xFF80 0xFF80 0xFF80}
+
+        movq      mm4,mm6
+        punpckhbw mm6,mm1               ; mm6=Cb(4567)=CbH
+        punpcklbw mm4,mm1               ; mm4=Cb(0123)=CbL
+        movq      mm0,mm7
+        punpckhbw mm7,mm1               ; mm7=Cr(4567)=CrH
+        punpcklbw mm0,mm1               ; mm0=Cr(0123)=CrL
+
+        paddw     mm6,mm3
+        paddw     mm4,mm3
+        paddw     mm7,mm3
+        paddw     mm0,mm3
+
+        ; (Original)
+        ; R = Y                + 1.40200 * Cr
+        ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+        ; B = Y + 1.77200 * Cb
+        ;
+        ; (This implementation)
+        ; R = Y                + 0.40200 * Cr + Cr
+        ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+        ; B = Y - 0.22800 * Cb + Cb + Cb
+
+        movq    mm5,mm6                 ; mm5=CbH
+        movq    mm2,mm4                 ; mm2=CbL
+        paddw   mm6,mm6                 ; mm6=2*CbH
+        paddw   mm4,mm4                 ; mm4=2*CbL
+        movq    mm1,mm7                 ; mm1=CrH
+        movq    mm3,mm0                 ; mm3=CrL
+        paddw   mm7,mm7                 ; mm7=2*CrH
+        paddw   mm0,mm0                 ; mm0=2*CrL
+
+        pmulhw  mm6,[GOTOFF(eax,PW_MF0228)]     ; mm6=(2*CbH * -FIX(0.22800))
+        pmulhw  mm4,[GOTOFF(eax,PW_MF0228)]     ; mm4=(2*CbL * -FIX(0.22800))
+        pmulhw  mm7,[GOTOFF(eax,PW_F0402)]      ; mm7=(2*CrH * FIX(0.40200))
+        pmulhw  mm0,[GOTOFF(eax,PW_F0402)]      ; mm0=(2*CrL * FIX(0.40200))
+
+        paddw   mm6,[GOTOFF(eax,PW_ONE)]
+        paddw   mm4,[GOTOFF(eax,PW_ONE)]
+        psraw   mm6,1                   ; mm6=(CbH * -FIX(0.22800))
+        psraw   mm4,1                   ; mm4=(CbL * -FIX(0.22800))
+        paddw   mm7,[GOTOFF(eax,PW_ONE)]
+        paddw   mm0,[GOTOFF(eax,PW_ONE)]
+        psraw   mm7,1                   ; mm7=(CrH * FIX(0.40200))
+        psraw   mm0,1                   ; mm0=(CrL * FIX(0.40200))
+
+        paddw   mm6,mm5
+        paddw   mm4,mm2
+        paddw   mm6,mm5                 ; mm6=(CbH * FIX(1.77200))=(B-Y)H
+        paddw   mm4,mm2                 ; mm4=(CbL * FIX(1.77200))=(B-Y)L
+        paddw   mm7,mm1                 ; mm7=(CrH * FIX(1.40200))=(R-Y)H
+        paddw   mm0,mm3                 ; mm0=(CrL * FIX(1.40200))=(R-Y)L
+
+        movq    MMWORD [wk(0)], mm6     ; wk(0)=(B-Y)H
+        movq    MMWORD [wk(1)], mm7     ; wk(1)=(R-Y)H
+
+        movq      mm6,mm5
+        movq      mm7,mm2
+        punpcklwd mm5,mm1
+        punpckhwd mm6,mm1
+        pmaddwd   mm5,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   mm6,[GOTOFF(eax,PW_MF0344_F0285)]
+        punpcklwd mm2,mm3
+        punpckhwd mm7,mm3
+        pmaddwd   mm2,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   mm7,[GOTOFF(eax,PW_MF0344_F0285)]
+
+        paddd     mm5,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     mm6,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     mm5,SCALEBITS
+        psrad     mm6,SCALEBITS
+        paddd     mm2,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     mm7,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     mm2,SCALEBITS
+        psrad     mm7,SCALEBITS
+
+        packssdw  mm5,mm6       ; mm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
+        packssdw  mm2,mm7       ; mm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
+        psubw     mm5,mm1       ; mm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
+        psubw     mm2,mm3       ; mm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
+
+        movq    MMWORD [wk(2)], mm5     ; wk(2)=(G-Y)H
+
+        mov     al,2                    ; Yctr
+        jmp     short .Yloop_1st
+        alignx  16,7
+
+.Yloop_2nd:
+        movq    mm0, MMWORD [wk(1)]     ; mm0=(R-Y)H
+        movq    mm2, MMWORD [wk(2)]     ; mm2=(G-Y)H
+        movq    mm4, MMWORD [wk(0)]     ; mm4=(B-Y)H
+        alignx  16,7
+
+.Yloop_1st:
+        movq    mm7, MMWORD [esi]       ; mm7=Y(01234567)
+
+        pcmpeqw mm6,mm6
+        psrlw   mm6,BYTE_BIT            ; mm6={0xFF 0x00 0xFF 0x00 ..}
+        pand    mm6,mm7                 ; mm6=Y(0246)=YE
+        psrlw   mm7,BYTE_BIT            ; mm7=Y(1357)=YO
+
+        movq    mm1,mm0                 ; mm1=mm0=(R-Y)(L/H)
+        movq    mm3,mm2                 ; mm3=mm2=(G-Y)(L/H)
+        movq    mm5,mm4                 ; mm5=mm4=(B-Y)(L/H)
+
+        paddw     mm0,mm6               ; mm0=((R-Y)+YE)=RE=(R0 R2 R4 R6)
+        paddw     mm1,mm7               ; mm1=((R-Y)+YO)=RO=(R1 R3 R5 R7)
+        packuswb  mm0,mm0               ; mm0=(R0 R2 R4 R6 ** ** ** **)
+        packuswb  mm1,mm1               ; mm1=(R1 R3 R5 R7 ** ** ** **)
+
+        paddw     mm2,mm6               ; mm2=((G-Y)+YE)=GE=(G0 G2 G4 G6)
+        paddw     mm3,mm7               ; mm3=((G-Y)+YO)=GO=(G1 G3 G5 G7)
+        packuswb  mm2,mm2               ; mm2=(G0 G2 G4 G6 ** ** ** **)
+        packuswb  mm3,mm3               ; mm3=(G1 G3 G5 G7 ** ** ** **)
+
+        paddw     mm4,mm6               ; mm4=((B-Y)+YE)=BE=(B0 B2 B4 B6)
+        paddw     mm5,mm7               ; mm5=((B-Y)+YO)=BO=(B1 B3 B5 B7)
+        packuswb  mm4,mm4               ; mm4=(B0 B2 B4 B6 ** ** ** **)
+        packuswb  mm5,mm5               ; mm5=(B1 B3 B5 B7 ** ** ** **)
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+        ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+        ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+        ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+        ; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **)
+
+        punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+        punpcklbw mmE,mmB               ; mmE=(20 01 22 03 24 05 26 07)
+        punpcklbw mmD,mmF               ; mmD=(11 21 13 23 15 25 17 27)
+
+        movq      mmG,mmA
+        movq      mmH,mmA
+        punpcklwd mmA,mmE               ; mmA=(00 10 20 01 02 12 22 03)
+        punpckhwd mmG,mmE               ; mmG=(04 14 24 05 06 16 26 07)
+
+        psrlq     mmH,2*BYTE_BIT        ; mmH=(02 12 04 14 06 16 -- --)
+        psrlq     mmE,2*BYTE_BIT        ; mmE=(22 03 24 05 26 07 -- --)
+
+        movq      mmC,mmD
+        movq      mmB,mmD
+        punpcklwd mmD,mmH               ; mmD=(11 21 02 12 13 23 04 14)
+        punpckhwd mmC,mmH               ; mmC=(15 25 06 16 17 27 -- --)
+
+        psrlq     mmB,2*BYTE_BIT        ; mmB=(13 23 15 25 17 27 -- --)
+
+        movq      mmF,mmE
+        punpcklwd mmE,mmB               ; mmE=(22 03 13 23 24 05 15 25)
+        punpckhwd mmF,mmB               ; mmF=(26 07 17 27 -- -- -- --)
+
+        punpckldq mmA,mmD               ; mmA=(00 10 20 01 11 21 02 12)
+        punpckldq mmE,mmG               ; mmE=(22 03 13 23 04 14 24 05)
+        punpckldq mmC,mmF               ; mmC=(15 25 06 16 26 07 17 27)
+
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st16
+
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
+
+        sub     ecx, byte SIZEOF_MMWORD
+        jz      near .endcolumn
+
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr
+        add     esi, byte SIZEOF_MMWORD                 ; inptr0
+        dec     al                      ; Yctr
+        jnz     near .Yloop_2nd
+
+        add     ebx, byte SIZEOF_MMWORD                 ; inptr1
+        add     edx, byte SIZEOF_MMWORD                 ; inptr2
+        jmp     near .columnloop
+        alignx  16,7
+
+.column_st16:
+        lea     ecx, [ecx+ecx*2]        ; imul ecx, RGB_PIXELSIZE
+        cmp     ecx, byte 2*SIZEOF_MMWORD
+        jb      short .column_st8
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmE
+        movq    mmA,mmC
+        sub     ecx, byte 2*SIZEOF_MMWORD
+        add     edi, byte 2*SIZEOF_MMWORD
+        jmp     short .column_st4
+.column_st8:
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st4
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    mmA,mmE
+        sub     ecx, byte SIZEOF_MMWORD
+        add     edi, byte SIZEOF_MMWORD
+.column_st4:
+        movd    eax,mmA
+        cmp     ecx, byte SIZEOF_DWORD
+        jb      short .column_st2
+        mov     DWORD [edi+0*SIZEOF_DWORD], eax
+        psrlq   mmA,DWORD_BIT
+        movd    eax,mmA
+        sub     ecx, byte SIZEOF_DWORD
+        add     edi, byte SIZEOF_DWORD
+.column_st2:
+        cmp     ecx, byte SIZEOF_WORD
+        jb      short .column_st1
+        mov     WORD [edi+0*SIZEOF_WORD], ax
+        shr     eax,WORD_BIT
+        sub     ecx, byte SIZEOF_WORD
+        add     edi, byte SIZEOF_WORD
+.column_st1:
+        cmp     ecx, byte SIZEOF_BYTE
+        jb      short .endcolumn
+        mov     BYTE [edi+0*SIZEOF_BYTE], al
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+%ifdef RGBX_FILLER_0XFF
+        pcmpeqb   mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+        pcmpeqb   mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
+%else
+        pxor      mm6,mm6               ; mm6=(X0 X2 X4 X6 ** ** ** **)
+        pxor      mm7,mm7               ; mm7=(X1 X3 X5 X7 ** ** ** **)
+%endif
+        ; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
+        ; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
+        ; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
+        ; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **)
+
+        punpcklbw mmA,mmC               ; mmA=(00 10 02 12 04 14 06 16)
+        punpcklbw mmE,mmG               ; mmE=(20 30 22 32 24 34 26 36)
+        punpcklbw mmB,mmD               ; mmB=(01 11 03 13 05 15 07 17)
+        punpcklbw mmF,mmH               ; mmF=(21 31 23 33 25 35 27 37)
+
+        movq      mmC,mmA
+        punpcklwd mmA,mmE               ; mmA=(00 10 20 30 02 12 22 32)
+        punpckhwd mmC,mmE               ; mmC=(04 14 24 34 06 16 26 36)
+        movq      mmG,mmB
+        punpcklwd mmB,mmF               ; mmB=(01 11 21 31 03 13 23 33)
+        punpckhwd mmG,mmF               ; mmG=(05 15 25 35 07 17 27 37)
+
+        movq      mmD,mmA
+        punpckldq mmA,mmB               ; mmA=(00 10 20 30 01 11 21 31)
+        punpckhdq mmD,mmB               ; mmD=(02 12 22 32 03 13 23 33)
+        movq      mmH,mmC
+        punpckldq mmC,mmG               ; mmC=(04 14 24 34 05 15 25 35)
+        punpckhdq mmH,mmG               ; mmH=(06 16 26 36 07 17 27 37)
+
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st16
+
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mmC
+        movq    MMWORD [edi+3*SIZEOF_MMWORD], mmH
+
+        sub     ecx, byte SIZEOF_MMWORD
+        jz      short .endcolumn
+
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD   ; outptr
+        add     esi, byte SIZEOF_MMWORD                 ; inptr0
+        dec     al                      ; Yctr
+        jnz     near .Yloop_2nd
+
+        add     ebx, byte SIZEOF_MMWORD                 ; inptr1
+        add     edx, byte SIZEOF_MMWORD                 ; inptr2
+        jmp     near .columnloop
+        alignx  16,7
+
+.column_st16:
+        cmp     ecx, byte SIZEOF_MMWORD/2
+        jb      short .column_st8
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mmD
+        movq    mmA,mmC
+        movq    mmD,mmH
+        sub     ecx, byte SIZEOF_MMWORD/2
+        add     edi, byte 2*SIZEOF_MMWORD
+.column_st8:
+        cmp     ecx, byte SIZEOF_MMWORD/4
+        jb      short .column_st4
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mmA
+        movq    mmA,mmD
+        sub     ecx, byte SIZEOF_MMWORD/4
+        add     edi, byte 1*SIZEOF_MMWORD
+.column_st4:
+        cmp     ecx, byte SIZEOF_MMWORD/8
+        jb      short .endcolumn
+        movd    DWORD [edi+0*SIZEOF_DWORD], mmA
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+.endcolumn:
+        emms            ; empty MMX state
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+;
+; GLOBAL(void)
+; jsimd_h2v2_merged_upsample_mmx (JDIMENSION output_width,
+;                                 JSAMPIMAGE input_buf,
+;                                 JDIMENSION in_row_group_ctr,
+;                                 JSAMPARRAY output_buf);
+;
+
+%define output_width(b) (b)+8                   ; JDIMENSION output_width
+%define input_buf(b)            (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)     (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)           (b)+20          ; JSAMPARRAY output_buf
+
+        align   16
+        global  EXTN(jsimd_h2v2_merged_upsample_mmx)
+
+EXTN(jsimd_h2v2_merged_upsample_mmx):
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     eax, JDIMENSION [output_width(ebp)]
+
+        mov     edi, JSAMPIMAGE [input_buf(ebp)]
+        mov     ecx, JDIMENSION [in_row_group_ctr(ebp)]
+        mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+        mov     edi, JSAMPARRAY [output_buf(ebp)]
+        lea     esi, [esi+ecx*SIZEOF_JSAMPROW]
+
+        push    edx                     ; inptr2
+        push    ebx                     ; inptr1
+        push    esi                     ; inptr00
+        mov     ebx,esp
+
+        push    edi                     ; output_buf (outptr0)
+        push    ecx                     ; in_row_group_ctr
+        push    ebx                     ; input_buf
+        push    eax                     ; output_width
+
+        call    near EXTN(jsimd_h2v1_merged_upsample_mmx)
+
+        add     esi, byte SIZEOF_JSAMPROW       ; inptr01
+        add     edi, byte SIZEOF_JSAMPROW       ; outptr1
+        mov     POINTER [ebx+0*SIZEOF_POINTER], esi
+        mov     POINTER [ebx-1*SIZEOF_POINTER], edi
+
+        call    near EXTN(jsimd_h2v1_merged_upsample_mmx)
+
+        add     esp, byte 7*SIZEOF_DWORD
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jdmrgext-sse2-64.asm b/simd/jdmrgext-sse2-64.asm
new file mode 100644
index 0000000..ff127b5
--- /dev/null
+++ b/simd/jdmrgext-sse2-64.asm
@@ -0,0 +1,538 @@
+;
+; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2)
+;
+; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009, 2012 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+;
+; GLOBAL(void)
+; jsimd_h2v1_merged_upsample_sse2 (JDIMENSION output_width,
+;                                  JSAMPIMAGE input_buf,
+;                                  JDIMENSION in_row_group_ctr,
+;                                  JSAMPARRAY output_buf);
+;
+
+; r10 = JDIMENSION output_width
+; r11 = JSAMPIMAGE input_buf
+; r12 = JDIMENSION in_row_group_ctr
+; r13 = JSAMPARRAY output_buf
+
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          3
+
+        align   16
+        global  EXTN(jsimd_h2v1_merged_upsample_sse2)
+
+EXTN(jsimd_h2v1_merged_upsample_sse2):
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
+        push    rbx
+
+        mov     rcx, r10        ; col
+        test    rcx,rcx
+        jz      near .return
+
+        push    rcx
+
+        mov     rdi, r11
+        mov     rcx, r12
+        mov     rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
+        mov     rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
+        mov     rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
+        mov     rdi, r13
+        mov     rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW]         ; inptr0
+        mov     rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW]         ; inptr1
+        mov     rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW]         ; inptr2
+        mov     rdi, JSAMPROW [rdi]                             ; outptr
+
+        pop     rcx                     ; col
+
+.columnloop:
+
+        movdqa    xmm6, XMMWORD [rbx]   ; xmm6=Cb(0123456789ABCDEF)
+        movdqa    xmm7, XMMWORD [rdx]   ; xmm7=Cr(0123456789ABCDEF)
+
+        pxor      xmm1,xmm1             ; xmm1=(all 0's)
+        pcmpeqw   xmm3,xmm3
+        psllw     xmm3,7                ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+
+        movdqa    xmm4,xmm6
+        punpckhbw xmm6,xmm1             ; xmm6=Cb(89ABCDEF)=CbH
+        punpcklbw xmm4,xmm1             ; xmm4=Cb(01234567)=CbL
+        movdqa    xmm0,xmm7
+        punpckhbw xmm7,xmm1             ; xmm7=Cr(89ABCDEF)=CrH
+        punpcklbw xmm0,xmm1             ; xmm0=Cr(01234567)=CrL
+
+        paddw     xmm6,xmm3
+        paddw     xmm4,xmm3
+        paddw     xmm7,xmm3
+        paddw     xmm0,xmm3
+
+        ; (Original)
+        ; R = Y                + 1.40200 * Cr
+        ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+        ; B = Y + 1.77200 * Cb
+        ;
+        ; (This implementation)
+        ; R = Y                + 0.40200 * Cr + Cr
+        ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+        ; B = Y - 0.22800 * Cb + Cb + Cb
+
+        movdqa  xmm5,xmm6               ; xmm5=CbH
+        movdqa  xmm2,xmm4               ; xmm2=CbL
+        paddw   xmm6,xmm6               ; xmm6=2*CbH
+        paddw   xmm4,xmm4               ; xmm4=2*CbL
+        movdqa  xmm1,xmm7               ; xmm1=CrH
+        movdqa  xmm3,xmm0               ; xmm3=CrL
+        paddw   xmm7,xmm7               ; xmm7=2*CrH
+        paddw   xmm0,xmm0               ; xmm0=2*CrL
+
+        pmulhw  xmm6,[rel PW_MF0228]    ; xmm6=(2*CbH * -FIX(0.22800))
+        pmulhw  xmm4,[rel PW_MF0228]    ; xmm4=(2*CbL * -FIX(0.22800))
+        pmulhw  xmm7,[rel PW_F0402]     ; xmm7=(2*CrH * FIX(0.40200))
+        pmulhw  xmm0,[rel PW_F0402]     ; xmm0=(2*CrL * FIX(0.40200))
+
+        paddw   xmm6,[rel PW_ONE]
+        paddw   xmm4,[rel PW_ONE]
+        psraw   xmm6,1                  ; xmm6=(CbH * -FIX(0.22800))
+        psraw   xmm4,1                  ; xmm4=(CbL * -FIX(0.22800))
+        paddw   xmm7,[rel PW_ONE]
+        paddw   xmm0,[rel PW_ONE]
+        psraw   xmm7,1                  ; xmm7=(CrH * FIX(0.40200))
+        psraw   xmm0,1                  ; xmm0=(CrL * FIX(0.40200))
+
+        paddw   xmm6,xmm5
+        paddw   xmm4,xmm2
+        paddw   xmm6,xmm5               ; xmm6=(CbH * FIX(1.77200))=(B-Y)H
+        paddw   xmm4,xmm2               ; xmm4=(CbL * FIX(1.77200))=(B-Y)L
+        paddw   xmm7,xmm1               ; xmm7=(CrH * FIX(1.40200))=(R-Y)H
+        paddw   xmm0,xmm3               ; xmm0=(CrL * FIX(1.40200))=(R-Y)L
+
+        movdqa  XMMWORD [wk(0)], xmm6   ; wk(0)=(B-Y)H
+        movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=(R-Y)H
+
+        movdqa    xmm6,xmm5
+        movdqa    xmm7,xmm2
+        punpcklwd xmm5,xmm1
+        punpckhwd xmm6,xmm1
+        pmaddwd   xmm5,[rel PW_MF0344_F0285]
+        pmaddwd   xmm6,[rel PW_MF0344_F0285]
+        punpcklwd xmm2,xmm3
+        punpckhwd xmm7,xmm3
+        pmaddwd   xmm2,[rel PW_MF0344_F0285]
+        pmaddwd   xmm7,[rel PW_MF0344_F0285]
+
+        paddd     xmm5,[rel PD_ONEHALF]
+        paddd     xmm6,[rel PD_ONEHALF]
+        psrad     xmm5,SCALEBITS
+        psrad     xmm6,SCALEBITS
+        paddd     xmm2,[rel PD_ONEHALF]
+        paddd     xmm7,[rel PD_ONEHALF]
+        psrad     xmm2,SCALEBITS
+        psrad     xmm7,SCALEBITS
+
+        packssdw  xmm5,xmm6     ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
+        packssdw  xmm2,xmm7     ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
+        psubw     xmm5,xmm1     ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
+        psubw     xmm2,xmm3     ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
+
+        movdqa  XMMWORD [wk(2)], xmm5   ; wk(2)=(G-Y)H
+
+        mov     al,2                    ; Yctr
+        jmp     short .Yloop_1st
+
+.Yloop_2nd:
+        movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(R-Y)H
+        movdqa  xmm2, XMMWORD [wk(2)]   ; xmm2=(G-Y)H
+        movdqa  xmm4, XMMWORD [wk(0)]   ; xmm4=(B-Y)H
+
+.Yloop_1st:
+        movdqa  xmm7, XMMWORD [rsi]     ; xmm7=Y(0123456789ABCDEF)
+
+        pcmpeqw xmm6,xmm6
+        psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
+        pand    xmm6,xmm7               ; xmm6=Y(02468ACE)=YE
+        psrlw   xmm7,BYTE_BIT           ; xmm7=Y(13579BDF)=YO
+
+        movdqa  xmm1,xmm0               ; xmm1=xmm0=(R-Y)(L/H)
+        movdqa  xmm3,xmm2               ; xmm3=xmm2=(G-Y)(L/H)
+        movdqa  xmm5,xmm4               ; xmm5=xmm4=(B-Y)(L/H)
+
+        paddw     xmm0,xmm6             ; xmm0=((R-Y)+YE)=RE=R(02468ACE)
+        paddw     xmm1,xmm7             ; xmm1=((R-Y)+YO)=RO=R(13579BDF)
+        packuswb  xmm0,xmm0             ; xmm0=R(02468ACE********)
+        packuswb  xmm1,xmm1             ; xmm1=R(13579BDF********)
+
+        paddw     xmm2,xmm6             ; xmm2=((G-Y)+YE)=GE=G(02468ACE)
+        paddw     xmm3,xmm7             ; xmm3=((G-Y)+YO)=GO=G(13579BDF)
+        packuswb  xmm2,xmm2             ; xmm2=G(02468ACE********)
+        packuswb  xmm3,xmm3             ; xmm3=G(13579BDF********)
+
+        paddw     xmm4,xmm6             ; xmm4=((B-Y)+YE)=BE=B(02468ACE)
+        paddw     xmm5,xmm7             ; xmm5=((B-Y)+YO)=BO=B(13579BDF)
+        packuswb  xmm4,xmm4             ; xmm4=B(02468ACE********)
+        packuswb  xmm5,xmm5             ; xmm5=B(13579BDF********)
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
+
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmB     ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
+        punpcklbw xmmD,xmmF     ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
+
+        movdqa    xmmG,xmmA
+        movdqa    xmmH,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
+        punpckhwd xmmG,xmmE     ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
+
+        psrldq    xmmH,2        ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
+        psrldq    xmmE,2        ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
+
+        movdqa    xmmC,xmmD
+        movdqa    xmmB,xmmD
+        punpcklwd xmmD,xmmH     ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
+        punpckhwd xmmC,xmmH     ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
+
+        psrldq    xmmB,2        ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
+
+        movdqa    xmmF,xmmE
+        punpcklwd xmmE,xmmB     ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
+        punpckhwd xmmF,xmmB     ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
+
+        pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
+        movdqa    xmmB,xmmE
+        punpckldq xmmA,xmmD     ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
+        punpckldq xmmE,xmmH     ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
+        punpckhdq xmmD,xmmB     ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
+
+        pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
+        movdqa    xmmB,xmmF
+        punpckldq xmmG,xmmC     ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
+        punpckldq xmmF,xmmH     ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
+        punpckhdq xmmC,xmmB     ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
+
+        punpcklqdq xmmA,xmmE    ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        punpcklqdq xmmD,xmmG    ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        punpcklqdq xmmF,xmmC    ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
+
+        test    rdi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmF
+.out0:
+        add     rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     rcx, byte SIZEOF_XMMWORD
+        jz      near .endcolumn
+
+        add     rsi, byte SIZEOF_XMMWORD        ; inptr0
+        dec     al                      ; Yctr
+        jnz     near .Yloop_2nd
+
+        add     rbx, byte SIZEOF_XMMWORD        ; inptr1
+        add     rdx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
+
+.column_st32:
+        lea     rcx, [rcx+rcx*2]                ; imul ecx, RGB_PIXELSIZE
+        cmp     rcx, byte 2*SIZEOF_XMMWORD
+        jb      short .column_st16
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        add     rdi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmF
+        sub     rcx, byte 2*SIZEOF_XMMWORD
+        jmp     short .column_st15
+.column_st16:
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jb      short .column_st15
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        add     rdi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     rcx, byte SIZEOF_XMMWORD
+.column_st15:
+        ; Store the lower 8 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     rcx, byte SIZEOF_MMWORD
+        jb      short .column_st7
+        movq    XMM_MMWORD [rdi], xmmA
+        add     rdi, byte SIZEOF_MMWORD
+        sub     rcx, byte SIZEOF_MMWORD
+        psrldq  xmmA, SIZEOF_MMWORD
+.column_st7:
+        ; Store the lower 4 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     rcx, byte SIZEOF_DWORD
+        jb      short .column_st3
+        movd    XMM_DWORD [rdi], xmmA
+        add     rdi, byte SIZEOF_DWORD
+        sub     rcx, byte SIZEOF_DWORD
+        psrldq  xmmA, SIZEOF_DWORD
+.column_st3:
+        ; Store the lower 2 bytes of rax to the output when it has enough
+        ; space.
+        movd    eax, xmmA
+        cmp     rcx, byte SIZEOF_WORD
+        jb      short .column_st1
+        mov     WORD [rdi], ax
+        add     rdi, byte SIZEOF_WORD
+        sub     rcx, byte SIZEOF_WORD
+        shr     rax, 16
+.column_st1:
+        ; Store the lower 1 byte of rax to the output when it has enough
+        ; space.
+        test    rcx, rcx
+        jz      short .endcolumn
+        mov     BYTE [rdi], al
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+%ifdef RGBX_FILLER_0XFF
+        pcmpeqb   xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pcmpeqb   xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%else
+        pxor      xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pxor      xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%endif
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
+
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmG     ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
+        punpcklbw xmmB,xmmD     ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
+        punpcklbw xmmF,xmmH     ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
+
+        movdqa    xmmC,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
+        punpckhwd xmmC,xmmE     ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
+        movdqa    xmmG,xmmB
+        punpcklwd xmmB,xmmF     ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
+        punpckhwd xmmG,xmmF     ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
+
+        movdqa    xmmD,xmmA
+        punpckldq xmmA,xmmB     ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        punpckhdq xmmD,xmmB     ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        movdqa    xmmH,xmmC
+        punpckldq xmmC,xmmG     ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        punpckhdq xmmH,xmmG     ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+        cmp     rcx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
+
+        test    rdi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
+        movntdq XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmmC
+        movdqu  XMMWORD [rdi+3*SIZEOF_XMMWORD], xmmH
+.out0:
+        add     rdi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     rcx, byte SIZEOF_XMMWORD
+        jz      near .endcolumn
+
+        add     rsi, byte SIZEOF_XMMWORD        ; inptr0
+        dec     al                      ; Yctr
+        jnz     near .Yloop_2nd
+
+        add     rbx, byte SIZEOF_XMMWORD        ; inptr1
+        add     rdx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
+
+.column_st32:
+        cmp     rcx, byte SIZEOF_XMMWORD/2
+        jb      short .column_st16
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmmD
+        add     rdi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmC
+        movdqa  xmmD,xmmH
+        sub     rcx, byte SIZEOF_XMMWORD/2
+.column_st16:
+        cmp     rcx, byte SIZEOF_XMMWORD/4
+        jb      short .column_st15
+        movdqu  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmmA
+        add     rdi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     rcx, byte SIZEOF_XMMWORD/4
+.column_st15:
+        ; Store two pixels (8 bytes) of xmmA to the output when it has enough
+        ; space.
+        cmp     rcx, byte SIZEOF_XMMWORD/8
+        jb      short .column_st7
+        movq    XMM_MMWORD [rdi], xmmA
+        add     rdi, byte SIZEOF_XMMWORD/8*4
+        sub     rcx, byte SIZEOF_XMMWORD/8
+        psrldq  xmmA, SIZEOF_XMMWORD/8*4
+.column_st7:
+        ; Store one pixel (4 bytes) of xmmA to the output when it has enough
+        ; space.
+        test    rcx, rcx
+        jz      short .endcolumn
+        movd    XMM_DWORD [rdi], xmmA
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+.endcolumn:
+        sfence          ; flush the write buffer
+
+.return:
+        pop     rbx
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+;
+; GLOBAL(void)
+; jsimd_h2v2_merged_upsample_sse2 (JDIMENSION output_width,
+;                                  JSAMPIMAGE input_buf,
+;                                  JDIMENSION in_row_group_ctr,
+;                                  JSAMPARRAY output_buf);
+;
+
+; r10 = JDIMENSION output_width
+; r11 = JSAMPIMAGE input_buf
+; r12 = JDIMENSION in_row_group_ctr
+; r13 = JSAMPARRAY output_buf
+
+        align   16
+        global  EXTN(jsimd_h2v2_merged_upsample_sse2)
+
+EXTN(jsimd_h2v2_merged_upsample_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+        push    rbx
+
+        mov     rax, r10
+
+        mov     rdi, r11
+        mov     rcx, r12
+        mov     rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
+        mov     rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
+        mov     rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY]
+        mov     rdi, r13
+        lea     rsi, [rsi+rcx*SIZEOF_JSAMPROW]
+
+        push    rdx                     ; inptr2
+        push    rbx                     ; inptr1
+        push    rsi                     ; inptr00
+        mov     rbx,rsp
+
+        push    rdi
+        push    rcx
+        push    rax
+
+        %ifdef WIN64
+        mov r8, rcx
+        mov r9, rdi
+        mov rcx, rax
+        mov rdx, rbx
+        %else
+        mov rdx, rcx
+        mov rcx, rdi
+        mov     rdi, rax
+        mov rsi, rbx
+        %endif
+
+        call    EXTN(jsimd_h2v1_merged_upsample_sse2)
+
+        pop rax
+        pop rcx
+        pop rdi
+        pop rsi
+        pop rbx
+        pop rdx
+
+        add     rdi, byte SIZEOF_JSAMPROW       ; outptr1
+        add     rsi, byte SIZEOF_JSAMPROW       ; inptr01
+
+        push    rdx                     ; inptr2
+        push    rbx                     ; inptr1
+        push    rsi                     ; inptr00
+        mov     rbx,rsp
+
+        push    rdi
+        push    rcx
+        push    rax
+
+        %ifdef WIN64
+        mov r8, rcx
+        mov r9, rdi
+        mov rcx, rax
+        mov rdx, rbx
+        %else
+        mov rdx, rcx
+        mov rcx, rdi
+        mov     rdi, rax
+        mov rsi, rbx
+        %endif
+
+        call    EXTN(jsimd_h2v1_merged_upsample_sse2)
+
+        pop rax
+        pop rcx
+        pop rdi
+        pop rsi
+        pop rbx
+        pop rdx
+
+        pop     rbx
+        uncollect_args
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jdmrgext-sse2.asm b/simd/jdmrgext-sse2.asm
new file mode 100644
index 0000000..c47916f
--- /dev/null
+++ b/simd/jdmrgext-sse2.asm
@@ -0,0 +1,519 @@
+;
+; jdmrgext.asm - merged upsampling/color conversion (SSE2)
+;
+; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2012 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+;
+; GLOBAL(void)
+; jsimd_h2v1_merged_upsample_sse2 (JDIMENSION output_width,
+;                                  JSAMPIMAGE input_buf,
+;                                  JDIMENSION in_row_group_ctr,
+;                                  JSAMPARRAY output_buf);
+;
+
+%define output_width(b) (b)+8                   ; JDIMENSION output_width
+%define input_buf(b)            (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)     (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)           (b)+20          ; JSAMPARRAY output_buf
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          3
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+        align   16
+        global  EXTN(jsimd_h2v1_merged_upsample_sse2)
+
+EXTN(jsimd_h2v1_merged_upsample_sse2):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
+
+        mov     ecx, JDIMENSION [output_width(eax)]     ; col
+        test    ecx,ecx
+        jz      near .return
+
+        push    ecx
+
+        mov     edi, JSAMPIMAGE [input_buf(eax)]
+        mov     ecx, JDIMENSION [in_row_group_ctr(eax)]
+        mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+        mov     edi, JSAMPARRAY [output_buf(eax)]
+        mov     esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW]         ; inptr0
+        mov     ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW]         ; inptr1
+        mov     edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW]         ; inptr2
+        mov     edi, JSAMPROW [edi]                             ; outptr
+
+        pop     ecx                     ; col
+
+        alignx  16,7
+.columnloop:
+        movpic  eax, POINTER [gotptr]   ; load GOT address (eax)
+
+        movdqa    xmm6, XMMWORD [ebx]   ; xmm6=Cb(0123456789ABCDEF)
+        movdqa    xmm7, XMMWORD [edx]   ; xmm7=Cr(0123456789ABCDEF)
+
+        pxor      xmm1,xmm1             ; xmm1=(all 0's)
+        pcmpeqw   xmm3,xmm3
+        psllw     xmm3,7                ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+
+        movdqa    xmm4,xmm6
+        punpckhbw xmm6,xmm1             ; xmm6=Cb(89ABCDEF)=CbH
+        punpcklbw xmm4,xmm1             ; xmm4=Cb(01234567)=CbL
+        movdqa    xmm0,xmm7
+        punpckhbw xmm7,xmm1             ; xmm7=Cr(89ABCDEF)=CrH
+        punpcklbw xmm0,xmm1             ; xmm0=Cr(01234567)=CrL
+
+        paddw     xmm6,xmm3
+        paddw     xmm4,xmm3
+        paddw     xmm7,xmm3
+        paddw     xmm0,xmm3
+
+        ; (Original)
+        ; R = Y                + 1.40200 * Cr
+        ; G = Y - 0.34414 * Cb - 0.71414 * Cr
+        ; B = Y + 1.77200 * Cb
+        ;
+        ; (This implementation)
+        ; R = Y                + 0.40200 * Cr + Cr
+        ; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
+        ; B = Y - 0.22800 * Cb + Cb + Cb
+
+        movdqa  xmm5,xmm6               ; xmm5=CbH
+        movdqa  xmm2,xmm4               ; xmm2=CbL
+        paddw   xmm6,xmm6               ; xmm6=2*CbH
+        paddw   xmm4,xmm4               ; xmm4=2*CbL
+        movdqa  xmm1,xmm7               ; xmm1=CrH
+        movdqa  xmm3,xmm0               ; xmm3=CrL
+        paddw   xmm7,xmm7               ; xmm7=2*CrH
+        paddw   xmm0,xmm0               ; xmm0=2*CrL
+
+        pmulhw  xmm6,[GOTOFF(eax,PW_MF0228)]    ; xmm6=(2*CbH * -FIX(0.22800))
+        pmulhw  xmm4,[GOTOFF(eax,PW_MF0228)]    ; xmm4=(2*CbL * -FIX(0.22800))
+        pmulhw  xmm7,[GOTOFF(eax,PW_F0402)]     ; xmm7=(2*CrH * FIX(0.40200))
+        pmulhw  xmm0,[GOTOFF(eax,PW_F0402)]     ; xmm0=(2*CrL * FIX(0.40200))
+
+        paddw   xmm6,[GOTOFF(eax,PW_ONE)]
+        paddw   xmm4,[GOTOFF(eax,PW_ONE)]
+        psraw   xmm6,1                  ; xmm6=(CbH * -FIX(0.22800))
+        psraw   xmm4,1                  ; xmm4=(CbL * -FIX(0.22800))
+        paddw   xmm7,[GOTOFF(eax,PW_ONE)]
+        paddw   xmm0,[GOTOFF(eax,PW_ONE)]
+        psraw   xmm7,1                  ; xmm7=(CrH * FIX(0.40200))
+        psraw   xmm0,1                  ; xmm0=(CrL * FIX(0.40200))
+
+        paddw   xmm6,xmm5
+        paddw   xmm4,xmm2
+        paddw   xmm6,xmm5               ; xmm6=(CbH * FIX(1.77200))=(B-Y)H
+        paddw   xmm4,xmm2               ; xmm4=(CbL * FIX(1.77200))=(B-Y)L
+        paddw   xmm7,xmm1               ; xmm7=(CrH * FIX(1.40200))=(R-Y)H
+        paddw   xmm0,xmm3               ; xmm0=(CrL * FIX(1.40200))=(R-Y)L
+
+        movdqa  XMMWORD [wk(0)], xmm6   ; wk(0)=(B-Y)H
+        movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=(R-Y)H
+
+        movdqa    xmm6,xmm5
+        movdqa    xmm7,xmm2
+        punpcklwd xmm5,xmm1
+        punpckhwd xmm6,xmm1
+        pmaddwd   xmm5,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   xmm6,[GOTOFF(eax,PW_MF0344_F0285)]
+        punpcklwd xmm2,xmm3
+        punpckhwd xmm7,xmm3
+        pmaddwd   xmm2,[GOTOFF(eax,PW_MF0344_F0285)]
+        pmaddwd   xmm7,[GOTOFF(eax,PW_MF0344_F0285)]
+
+        paddd     xmm5,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     xmm6,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     xmm5,SCALEBITS
+        psrad     xmm6,SCALEBITS
+        paddd     xmm2,[GOTOFF(eax,PD_ONEHALF)]
+        paddd     xmm7,[GOTOFF(eax,PD_ONEHALF)]
+        psrad     xmm2,SCALEBITS
+        psrad     xmm7,SCALEBITS
+
+        packssdw  xmm5,xmm6     ; xmm5=CbH*-FIX(0.344)+CrH*FIX(0.285)
+        packssdw  xmm2,xmm7     ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
+        psubw     xmm5,xmm1     ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
+        psubw     xmm2,xmm3     ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
+
+        movdqa  XMMWORD [wk(2)], xmm5   ; wk(2)=(G-Y)H
+
+        mov     al,2                    ; Yctr
+        jmp     short .Yloop_1st
+        alignx  16,7
+
+.Yloop_2nd:
+        movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(R-Y)H
+        movdqa  xmm2, XMMWORD [wk(2)]   ; xmm2=(G-Y)H
+        movdqa  xmm4, XMMWORD [wk(0)]   ; xmm4=(B-Y)H
+        alignx  16,7
+
+.Yloop_1st:
+        movdqa  xmm7, XMMWORD [esi]     ; xmm7=Y(0123456789ABCDEF)
+
+        pcmpeqw xmm6,xmm6
+        psrlw   xmm6,BYTE_BIT           ; xmm6={0xFF 0x00 0xFF 0x00 ..}
+        pand    xmm6,xmm7               ; xmm6=Y(02468ACE)=YE
+        psrlw   xmm7,BYTE_BIT           ; xmm7=Y(13579BDF)=YO
+
+        movdqa  xmm1,xmm0               ; xmm1=xmm0=(R-Y)(L/H)
+        movdqa  xmm3,xmm2               ; xmm3=xmm2=(G-Y)(L/H)
+        movdqa  xmm5,xmm4               ; xmm5=xmm4=(B-Y)(L/H)
+
+        paddw     xmm0,xmm6             ; xmm0=((R-Y)+YE)=RE=R(02468ACE)
+        paddw     xmm1,xmm7             ; xmm1=((R-Y)+YO)=RO=R(13579BDF)
+        packuswb  xmm0,xmm0             ; xmm0=R(02468ACE********)
+        packuswb  xmm1,xmm1             ; xmm1=R(13579BDF********)
+
+        paddw     xmm2,xmm6             ; xmm2=((G-Y)+YE)=GE=G(02468ACE)
+        paddw     xmm3,xmm7             ; xmm3=((G-Y)+YO)=GO=G(13579BDF)
+        packuswb  xmm2,xmm2             ; xmm2=G(02468ACE********)
+        packuswb  xmm3,xmm3             ; xmm3=G(13579BDF********)
+
+        paddw     xmm4,xmm6             ; xmm4=((B-Y)+YE)=BE=B(02468ACE)
+        paddw     xmm5,xmm7             ; xmm5=((B-Y)+YO)=BO=B(13579BDF)
+        packuswb  xmm4,xmm4             ; xmm4=B(02468ACE********)
+        packuswb  xmm5,xmm5             ; xmm5=B(13579BDF********)
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
+
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmB     ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
+        punpcklbw xmmD,xmmF     ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
+
+        movdqa    xmmG,xmmA
+        movdqa    xmmH,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
+        punpckhwd xmmG,xmmE     ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
+
+        psrldq    xmmH,2        ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
+        psrldq    xmmE,2        ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
+
+        movdqa    xmmC,xmmD
+        movdqa    xmmB,xmmD
+        punpcklwd xmmD,xmmH     ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
+        punpckhwd xmmC,xmmH     ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
+
+        psrldq    xmmB,2        ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
+
+        movdqa    xmmF,xmmE
+        punpcklwd xmmE,xmmB     ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
+        punpckhwd xmmF,xmmB     ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
+
+        pshufd    xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
+        movdqa    xmmB,xmmE
+        punpckldq xmmA,xmmD     ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
+        punpckldq xmmE,xmmH     ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
+        punpckhdq xmmD,xmmB     ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
+
+        pshufd    xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
+        movdqa    xmmB,xmmF
+        punpckldq xmmG,xmmC     ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
+        punpckldq xmmF,xmmH     ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
+        punpckhdq xmmC,xmmB     ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
+
+        punpcklqdq xmmA,xmmE    ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+        punpcklqdq xmmD,xmmG    ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+        punpcklqdq xmmF,xmmC    ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
+
+        test    edi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
+.out0:
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     ecx, byte SIZEOF_XMMWORD
+        jz      near .endcolumn
+
+        add     esi, byte SIZEOF_XMMWORD        ; inptr0
+        dec     al                      ; Yctr
+        jnz     near .Yloop_2nd
+
+        add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+        add     edx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
+        alignx  16,7
+
+.column_st32:
+        lea     ecx, [ecx+ecx*2]                ; imul ecx, RGB_PIXELSIZE
+        cmp     ecx, byte 2*SIZEOF_XMMWORD
+        jb      short .column_st16
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        add     edi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmF
+        sub     ecx, byte 2*SIZEOF_XMMWORD
+        jmp     short .column_st15
+.column_st16:
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jb      short .column_st15
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        add     edi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     ecx, byte SIZEOF_XMMWORD
+.column_st15:
+        ; Store the lower 8 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     ecx, byte SIZEOF_MMWORD
+        jb      short .column_st7
+        movq    XMM_MMWORD [edi], xmmA
+        add     edi, byte SIZEOF_MMWORD
+        sub     ecx, byte SIZEOF_MMWORD
+        psrldq  xmmA, SIZEOF_MMWORD
+.column_st7:
+        ; Store the lower 4 bytes of xmmA to the output when it has enough
+        ; space.
+        cmp     ecx, byte SIZEOF_DWORD
+        jb      short .column_st3
+        movd    XMM_DWORD [edi], xmmA
+        add     edi, byte SIZEOF_DWORD
+        sub     ecx, byte SIZEOF_DWORD
+        psrldq  xmmA, SIZEOF_DWORD
+.column_st3:
+        ; Store the lower 2 bytes of eax to the output when it has enough
+        ; space.
+        movd    eax, xmmA
+        cmp     ecx, byte SIZEOF_WORD
+        jb      short .column_st1
+        mov     WORD [edi], ax
+        add     edi, byte SIZEOF_WORD
+        sub     ecx, byte SIZEOF_WORD
+        shr     eax, 16
+.column_st1:
+        ; Store the lower 1 byte of eax to the output when it has enough
+        ; space.
+        test    ecx, ecx
+        jz      short .endcolumn
+        mov     BYTE [edi], al
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+%ifdef RGBX_FILLER_0XFF
+        pcmpeqb   xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pcmpeqb   xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%else
+        pxor      xmm6,xmm6             ; xmm6=XE=X(02468ACE********)
+        pxor      xmm7,xmm7             ; xmm7=XO=X(13579BDF********)
+%endif
+        ; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
+        ; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
+        ; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
+        ; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
+
+        punpcklbw xmmA,xmmC     ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
+        punpcklbw xmmE,xmmG     ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
+        punpcklbw xmmB,xmmD     ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
+        punpcklbw xmmF,xmmH     ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
+
+        movdqa    xmmC,xmmA
+        punpcklwd xmmA,xmmE     ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
+        punpckhwd xmmC,xmmE     ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
+        movdqa    xmmG,xmmB
+        punpcklwd xmmB,xmmF     ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
+        punpckhwd xmmG,xmmF     ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
+
+        movdqa    xmmD,xmmA
+        punpckldq xmmA,xmmB     ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+        punpckhdq xmmD,xmmB     ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+        movdqa    xmmH,xmmC
+        punpckldq xmmC,xmmG     ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+        punpckhdq xmmH,xmmG     ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+        cmp     ecx, byte SIZEOF_XMMWORD
+        jb      short .column_st32
+
+        test    edi, SIZEOF_XMMWORD-1
+        jnz     short .out1
+        ; --(aligned)-------------------
+        movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
+        movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
+        jmp     short .out0
+.out1:  ; --(unaligned)-----------------
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        movdqu  XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
+        movdqu  XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
+.out0:
+        add     edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD  ; outptr
+        sub     ecx, byte SIZEOF_XMMWORD
+        jz      near .endcolumn
+
+        add     esi, byte SIZEOF_XMMWORD        ; inptr0
+        dec     al                      ; Yctr
+        jnz     near .Yloop_2nd
+
+        add     ebx, byte SIZEOF_XMMWORD        ; inptr1
+        add     edx, byte SIZEOF_XMMWORD        ; inptr2
+        jmp     near .columnloop
+        alignx  16,7
+
+.column_st32:
+        cmp     ecx, byte SIZEOF_XMMWORD/2
+        jb      short .column_st16
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        movdqu  XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
+        add     edi, byte 2*SIZEOF_XMMWORD      ; outptr
+        movdqa  xmmA,xmmC
+        movdqa  xmmD,xmmH
+        sub     ecx, byte SIZEOF_XMMWORD/2
+.column_st16:
+        cmp     ecx, byte SIZEOF_XMMWORD/4
+        jb      short .column_st15
+        movdqu  XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
+        add     edi, byte SIZEOF_XMMWORD        ; outptr
+        movdqa  xmmA,xmmD
+        sub     ecx, byte SIZEOF_XMMWORD/4
+.column_st15:
+        ; Store two pixels (8 bytes) of xmmA to the output when it has enough
+        ; space.
+        cmp     ecx, byte SIZEOF_XMMWORD/8
+        jb      short .column_st7
+        movq    XMM_MMWORD [edi], xmmA
+        add     edi, byte SIZEOF_XMMWORD/8*4
+        sub     ecx, byte SIZEOF_XMMWORD/8
+        psrldq  xmmA, SIZEOF_XMMWORD/8*4
+.column_st7:
+        ; Store one pixel (4 bytes) of xmmA to the output when it has enough
+        ; space.
+        test    ecx, ecx
+        jz      short .endcolumn
+        movd    XMM_DWORD [edi], xmmA
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+.endcolumn:
+        sfence          ; flush the write buffer
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+;
+; GLOBAL(void)
+; jsimd_h2v2_merged_upsample_sse2 (JDIMENSION output_width,
+;                                  JSAMPIMAGE input_buf,
+;                                  JDIMENSION in_row_group_ctr,
+;                                  JSAMPARRAY output_buf);
+;
+
+%define output_width(b) (b)+8                   ; JDIMENSION output_width
+%define input_buf(b)            (b)+12          ; JSAMPIMAGE input_buf
+%define in_row_group_ctr(b)     (b)+16          ; JDIMENSION in_row_group_ctr
+%define output_buf(b)           (b)+20          ; JSAMPARRAY output_buf
+
+        align   16
+        global  EXTN(jsimd_h2v2_merged_upsample_sse2)
+
+EXTN(jsimd_h2v2_merged_upsample_sse2):
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     eax, POINTER [output_width(ebp)]
+
+        mov     edi, JSAMPIMAGE [input_buf(ebp)]
+        mov     ecx, JDIMENSION [in_row_group_ctr(ebp)]
+        mov     esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
+        mov     ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
+        mov     edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
+        mov     edi, JSAMPARRAY [output_buf(ebp)]
+        lea     esi, [esi+ecx*SIZEOF_JSAMPROW]
+
+        push    edx                     ; inptr2
+        push    ebx                     ; inptr1
+        push    esi                     ; inptr00
+        mov     ebx,esp
+
+        push    edi                     ; output_buf (outptr0)
+        push    ecx                     ; in_row_group_ctr
+        push    ebx                     ; input_buf
+        push    eax                     ; output_width
+
+        call    near EXTN(jsimd_h2v1_merged_upsample_sse2)
+
+        add     esi, byte SIZEOF_JSAMPROW       ; inptr01
+        add     edi, byte SIZEOF_JSAMPROW       ; outptr1
+        mov     POINTER [ebx+0*SIZEOF_POINTER], esi
+        mov     POINTER [ebx-1*SIZEOF_POINTER], edi
+
+        call    near EXTN(jsimd_h2v1_merged_upsample_sse2)
+
+        add     esp, byte 7*SIZEOF_DWORD
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jdsample-mmx.asm b/simd/jdsample-mmx.asm
new file mode 100644
index 0000000..88564e4
--- /dev/null
+++ b/simd/jdsample-mmx.asm
@@ -0,0 +1,737 @@
+;
+; jdsample.asm - upsampling (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_fancy_upsample_mmx)
+
+EXTN(jconst_fancy_upsample_mmx):
+
+PW_ONE          times 4 dw  1
+PW_TWO          times 4 dw  2
+PW_THREE        times 4 dw  3
+PW_SEVEN        times 4 dw  7
+PW_EIGHT        times 4 dw  8
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
+;
+; The upsampling algorithm is linear interpolation between pixel centers,
+; also known as a "triangle filter".  This is a good compromise between
+; speed and visual quality.  The centers of the output pixels are 1/4 and 3/4
+; of the way between input pixel centers.
+;
+; GLOBAL(void)
+; jsimd_h2v1_fancy_upsample_mmx (int max_v_samp_factor,
+;                                JDIMENSION downsampled_width,
+;                                JSAMPARRAY input_data,
+;                                JSAMPARRAY * output_data_ptr);
+;
+
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define downsamp_width(b)       (b)+12          ; JDIMENSION downsampled_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
+
+        align   16
+        global  EXTN(jsimd_h2v1_fancy_upsample_mmx)
+
+EXTN(jsimd_h2v1_fancy_upsample_mmx):
+        push    ebp
+        mov     ebp,esp
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        mov     eax, JDIMENSION [downsamp_width(ebp)]  ; colctr
+        test    eax,eax
+        jz      near .return
+
+        mov     ecx, INT [max_v_samp(ebp)]      ; rowctr
+        test    ecx,ecx
+        jz      near .return
+
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(ebp)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
+.rowloop:
+        push    eax                     ; colctr
+        push    edi
+        push    esi
+
+        mov     esi, JSAMPROW [esi]     ; inptr
+        mov     edi, JSAMPROW [edi]     ; outptr
+
+        test    eax, SIZEOF_MMWORD-1
+        jz      short .skip
+        mov     dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+.skip:
+        pxor    mm0,mm0                 ; mm0=(all 0's)
+        pcmpeqb mm7,mm7
+        psrlq   mm7,(SIZEOF_MMWORD-1)*BYTE_BIT
+        pand    mm7, MMWORD [esi+0*SIZEOF_MMWORD]
+
+        add     eax, byte SIZEOF_MMWORD-1
+        and     eax, byte -SIZEOF_MMWORD
+        cmp     eax, byte SIZEOF_MMWORD
+        ja      short .columnloop
+        alignx  16,7
+
+.columnloop_last:
+        pcmpeqb mm6,mm6
+        psllq   mm6,(SIZEOF_MMWORD-1)*BYTE_BIT
+        pand    mm6, MMWORD [esi+0*SIZEOF_MMWORD]
+        jmp     short .upsample
+        alignx  16,7
+
+.columnloop:
+        movq    mm6, MMWORD [esi+1*SIZEOF_MMWORD]
+        psllq   mm6,(SIZEOF_MMWORD-1)*BYTE_BIT
+
+.upsample:
+        movq    mm1, MMWORD [esi+0*SIZEOF_MMWORD]
+        movq    mm2,mm1
+        movq    mm3,mm1                 ; mm1=( 0 1 2 3 4 5 6 7)
+        psllq   mm2,BYTE_BIT            ; mm2=( - 0 1 2 3 4 5 6)
+        psrlq   mm3,BYTE_BIT            ; mm3=( 1 2 3 4 5 6 7 -)
+
+        por     mm2,mm7                 ; mm2=(-1 0 1 2 3 4 5 6)
+        por     mm3,mm6                 ; mm3=( 1 2 3 4 5 6 7 8)
+
+        movq    mm7,mm1
+        psrlq   mm7,(SIZEOF_MMWORD-1)*BYTE_BIT  ; mm7=( 7 - - - - - - -)
+
+        movq      mm4,mm1
+        punpcklbw mm1,mm0               ; mm1=( 0 1 2 3)
+        punpckhbw mm4,mm0               ; mm4=( 4 5 6 7)
+        movq      mm5,mm2
+        punpcklbw mm2,mm0               ; mm2=(-1 0 1 2)
+        punpckhbw mm5,mm0               ; mm5=( 3 4 5 6)
+        movq      mm6,mm3
+        punpcklbw mm3,mm0               ; mm3=( 1 2 3 4)
+        punpckhbw mm6,mm0               ; mm6=( 5 6 7 8)
+
+        pmullw  mm1,[GOTOFF(ebx,PW_THREE)]
+        pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
+        paddw   mm2,[GOTOFF(ebx,PW_ONE)]
+        paddw   mm5,[GOTOFF(ebx,PW_ONE)]
+        paddw   mm3,[GOTOFF(ebx,PW_TWO)]
+        paddw   mm6,[GOTOFF(ebx,PW_TWO)]
+
+        paddw   mm2,mm1
+        paddw   mm5,mm4
+        psrlw   mm2,2                   ; mm2=OutLE=( 0  2  4  6)
+        psrlw   mm5,2                   ; mm5=OutHE=( 8 10 12 14)
+        paddw   mm3,mm1
+        paddw   mm6,mm4
+        psrlw   mm3,2                   ; mm3=OutLO=( 1  3  5  7)
+        psrlw   mm6,2                   ; mm6=OutHO=( 9 11 13 15)
+
+        psllw   mm3,BYTE_BIT
+        psllw   mm6,BYTE_BIT
+        por     mm2,mm3                 ; mm2=OutL=( 0  1  2  3  4  5  6  7)
+        por     mm5,mm6                 ; mm5=OutH=( 8  9 10 11 12 13 14 15)
+
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm2
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mm5
+
+        sub     eax, byte SIZEOF_MMWORD
+        add     esi, byte 1*SIZEOF_MMWORD       ; inptr
+        add     edi, byte 2*SIZEOF_MMWORD       ; outptr
+        cmp     eax, byte SIZEOF_MMWORD
+        ja      near .columnloop
+        test    eax,eax
+        jnz     near .columnloop_last
+
+        pop     esi
+        pop     edi
+        pop     eax
+
+        add     esi, byte SIZEOF_JSAMPROW       ; input_data
+        add     edi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     ecx                             ; rowctr
+        jg      near .rowloop
+
+        emms            ; empty MMX state
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        pop     ebp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
+; Again a triangle filter; see comments for h2v1 case, above.
+;
+; GLOBAL(void)
+; jsimd_h2v2_fancy_upsample_mmx (int max_v_samp_factor,
+;                                JDIMENSION downsampled_width,
+;                                JSAMPARRAY input_data,
+;                                JSAMPARRAY * output_data_ptr);
+;
+
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define downsamp_width(b)       (b)+12          ; JDIMENSION downsampled_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          4
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+        align   16
+        global  EXTN(jsimd_h2v2_fancy_upsample_mmx)
+
+EXTN(jsimd_h2v2_fancy_upsample_mmx):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
+
+        mov     edx,eax                         ; edx = original ebp
+        mov     eax, JDIMENSION [downsamp_width(edx)]  ; colctr
+        test    eax,eax
+        jz      near .return
+
+        mov     ecx, INT [max_v_samp(edx)]      ; rowctr
+        test    ecx,ecx
+        jz      near .return
+
+        mov     esi, JSAMPARRAY [input_data(edx)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(edx)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
+.rowloop:
+        push    eax                                     ; colctr
+        push    ecx
+        push    edi
+        push    esi
+
+        mov     ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW]   ; inptr1(above)
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+        mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1(below)
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+        mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
+
+        test    eax, SIZEOF_MMWORD-1
+        jz      short .skip
+        push    edx
+        mov     dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl
+        mov     dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl
+        mov     dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+        pop     edx
+.skip:
+        ; -- process the first column block
+
+        movq    mm0, MMWORD [ebx+0*SIZEOF_MMWORD]       ; mm0=row[ 0][0]
+        movq    mm1, MMWORD [ecx+0*SIZEOF_MMWORD]       ; mm1=row[-1][0]
+        movq    mm2, MMWORD [esi+0*SIZEOF_MMWORD]       ; mm2=row[+1][0]
+
+        pushpic ebx
+        movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+        pxor      mm3,mm3               ; mm3=(all 0's)
+        movq      mm4,mm0
+        punpcklbw mm0,mm3               ; mm0=row[ 0][0]( 0 1 2 3)
+        punpckhbw mm4,mm3               ; mm4=row[ 0][0]( 4 5 6 7)
+        movq      mm5,mm1
+        punpcklbw mm1,mm3               ; mm1=row[-1][0]( 0 1 2 3)
+        punpckhbw mm5,mm3               ; mm5=row[-1][0]( 4 5 6 7)
+        movq      mm6,mm2
+        punpcklbw mm2,mm3               ; mm2=row[+1][0]( 0 1 2 3)
+        punpckhbw mm6,mm3               ; mm6=row[+1][0]( 4 5 6 7)
+
+        pmullw  mm0,[GOTOFF(ebx,PW_THREE)]
+        pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
+
+        pcmpeqb mm7,mm7
+        psrlq   mm7,(SIZEOF_MMWORD-2)*BYTE_BIT
+
+        paddw   mm1,mm0                 ; mm1=Int0L=( 0 1 2 3)
+        paddw   mm5,mm4                 ; mm5=Int0H=( 4 5 6 7)
+        paddw   mm2,mm0                 ; mm2=Int1L=( 0 1 2 3)
+        paddw   mm6,mm4                 ; mm6=Int1H=( 4 5 6 7)
+
+        movq    MMWORD [edx+0*SIZEOF_MMWORD], mm1       ; temporarily save
+        movq    MMWORD [edx+1*SIZEOF_MMWORD], mm5       ; the intermediate data
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm2
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mm6
+
+        pand    mm1,mm7                 ; mm1=( 0 - - -)
+        pand    mm2,mm7                 ; mm2=( 0 - - -)
+
+        movq    MMWORD [wk(0)], mm1
+        movq    MMWORD [wk(1)], mm2
+
+        poppic  ebx
+
+        add     eax, byte SIZEOF_MMWORD-1
+        and     eax, byte -SIZEOF_MMWORD
+        cmp     eax, byte SIZEOF_MMWORD
+        ja      short .columnloop
+        alignx  16,7
+
+.columnloop_last:
+        ; -- process the last column block
+
+        pushpic ebx
+        movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+        pcmpeqb mm1,mm1
+        psllq   mm1,(SIZEOF_MMWORD-2)*BYTE_BIT
+        movq    mm2,mm1
+
+        pand    mm1, MMWORD [edx+1*SIZEOF_MMWORD]       ; mm1=( - - - 7)
+        pand    mm2, MMWORD [edi+1*SIZEOF_MMWORD]       ; mm2=( - - - 7)
+
+        movq    MMWORD [wk(2)], mm1
+        movq    MMWORD [wk(3)], mm2
+
+        jmp     short .upsample
+        alignx  16,7
+
+.columnloop:
+        ; -- process the next column block
+
+        movq    mm0, MMWORD [ebx+1*SIZEOF_MMWORD]       ; mm0=row[ 0][1]
+        movq    mm1, MMWORD [ecx+1*SIZEOF_MMWORD]       ; mm1=row[-1][1]
+        movq    mm2, MMWORD [esi+1*SIZEOF_MMWORD]       ; mm2=row[+1][1]
+
+        pushpic ebx
+        movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+        pxor      mm3,mm3               ; mm3=(all 0's)
+        movq      mm4,mm0
+        punpcklbw mm0,mm3               ; mm0=row[ 0][1]( 0 1 2 3)
+        punpckhbw mm4,mm3               ; mm4=row[ 0][1]( 4 5 6 7)
+        movq      mm5,mm1
+        punpcklbw mm1,mm3               ; mm1=row[-1][1]( 0 1 2 3)
+        punpckhbw mm5,mm3               ; mm5=row[-1][1]( 4 5 6 7)
+        movq      mm6,mm2
+        punpcklbw mm2,mm3               ; mm2=row[+1][1]( 0 1 2 3)
+        punpckhbw mm6,mm3               ; mm6=row[+1][1]( 4 5 6 7)
+
+        pmullw  mm0,[GOTOFF(ebx,PW_THREE)]
+        pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
+
+        paddw   mm1,mm0                 ; mm1=Int0L=( 0 1 2 3)
+        paddw   mm5,mm4                 ; mm5=Int0H=( 4 5 6 7)
+        paddw   mm2,mm0                 ; mm2=Int1L=( 0 1 2 3)
+        paddw   mm6,mm4                 ; mm6=Int1H=( 4 5 6 7)
+
+        movq    MMWORD [edx+2*SIZEOF_MMWORD], mm1       ; temporarily save
+        movq    MMWORD [edx+3*SIZEOF_MMWORD], mm5       ; the intermediate data
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mm2
+        movq    MMWORD [edi+3*SIZEOF_MMWORD], mm6
+
+        psllq   mm1,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm1=( - - - 0)
+        psllq   mm2,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm2=( - - - 0)
+
+        movq    MMWORD [wk(2)], mm1
+        movq    MMWORD [wk(3)], mm2
+
+.upsample:
+        ; -- process the upper row
+
+        movq    mm7, MMWORD [edx+0*SIZEOF_MMWORD]       ; mm7=Int0L=( 0 1 2 3)
+        movq    mm3, MMWORD [edx+1*SIZEOF_MMWORD]       ; mm3=Int0H=( 4 5 6 7)
+
+        movq    mm0,mm7
+        movq    mm4,mm3
+        psrlq   mm0,2*BYTE_BIT                  ; mm0=( 1 2 3 -)
+        psllq   mm4,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm4=( - - - 4)
+        movq    mm5,mm7
+        movq    mm6,mm3
+        psrlq   mm5,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm5=( 3 - - -)
+        psllq   mm6,2*BYTE_BIT                  ; mm6=( - 4 5 6)
+
+        por     mm0,mm4                         ; mm0=( 1 2 3 4)
+        por     mm5,mm6                         ; mm5=( 3 4 5 6)
+
+        movq    mm1,mm7
+        movq    mm2,mm3
+        psllq   mm1,2*BYTE_BIT                  ; mm1=( - 0 1 2)
+        psrlq   mm2,2*BYTE_BIT                  ; mm2=( 5 6 7 -)
+        movq    mm4,mm3
+        psrlq   mm4,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm4=( 7 - - -)
+
+        por     mm1, MMWORD [wk(0)]             ; mm1=(-1 0 1 2)
+        por     mm2, MMWORD [wk(2)]             ; mm2=( 5 6 7 8)
+
+        movq    MMWORD [wk(0)], mm4
+
+        pmullw  mm7,[GOTOFF(ebx,PW_THREE)]
+        pmullw  mm3,[GOTOFF(ebx,PW_THREE)]
+        paddw   mm1,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   mm5,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   mm0,[GOTOFF(ebx,PW_SEVEN)]
+        paddw   mm2,[GOTOFF(ebx,PW_SEVEN)]
+
+        paddw   mm1,mm7
+        paddw   mm5,mm3
+        psrlw   mm1,4                   ; mm1=Out0LE=( 0  2  4  6)
+        psrlw   mm5,4                   ; mm5=Out0HE=( 8 10 12 14)
+        paddw   mm0,mm7
+        paddw   mm2,mm3
+        psrlw   mm0,4                   ; mm0=Out0LO=( 1  3  5  7)
+        psrlw   mm2,4                   ; mm2=Out0HO=( 9 11 13 15)
+
+        psllw   mm0,BYTE_BIT
+        psllw   mm2,BYTE_BIT
+        por     mm1,mm0                 ; mm1=Out0L=( 0  1  2  3  4  5  6  7)
+        por     mm5,mm2                 ; mm5=Out0H=( 8  9 10 11 12 13 14 15)
+
+        movq    MMWORD [edx+0*SIZEOF_MMWORD], mm1
+        movq    MMWORD [edx+1*SIZEOF_MMWORD], mm5
+
+        ; -- process the lower row
+
+        movq    mm6, MMWORD [edi+0*SIZEOF_MMWORD]       ; mm6=Int1L=( 0 1 2 3)
+        movq    mm4, MMWORD [edi+1*SIZEOF_MMWORD]       ; mm4=Int1H=( 4 5 6 7)
+
+        movq    mm7,mm6
+        movq    mm3,mm4
+        psrlq   mm7,2*BYTE_BIT                  ; mm7=( 1 2 3 -)
+        psllq   mm3,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm3=( - - - 4)
+        movq    mm0,mm6
+        movq    mm2,mm4
+        psrlq   mm0,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm0=( 3 - - -)
+        psllq   mm2,2*BYTE_BIT                  ; mm2=( - 4 5 6)
+
+        por     mm7,mm3                         ; mm7=( 1 2 3 4)
+        por     mm0,mm2                         ; mm0=( 3 4 5 6)
+
+        movq    mm1,mm6
+        movq    mm5,mm4
+        psllq   mm1,2*BYTE_BIT                  ; mm1=( - 0 1 2)
+        psrlq   mm5,2*BYTE_BIT                  ; mm5=( 5 6 7 -)
+        movq    mm3,mm4
+        psrlq   mm3,(SIZEOF_MMWORD-2)*BYTE_BIT  ; mm3=( 7 - - -)
+
+        por     mm1, MMWORD [wk(1)]             ; mm1=(-1 0 1 2)
+        por     mm5, MMWORD [wk(3)]             ; mm5=( 5 6 7 8)
+
+        movq    MMWORD [wk(1)], mm3
+
+        pmullw  mm6,[GOTOFF(ebx,PW_THREE)]
+        pmullw  mm4,[GOTOFF(ebx,PW_THREE)]
+        paddw   mm1,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   mm0,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   mm7,[GOTOFF(ebx,PW_SEVEN)]
+        paddw   mm5,[GOTOFF(ebx,PW_SEVEN)]
+
+        paddw   mm1,mm6
+        paddw   mm0,mm4
+        psrlw   mm1,4                   ; mm1=Out1LE=( 0  2  4  6)
+        psrlw   mm0,4                   ; mm0=Out1HE=( 8 10 12 14)
+        paddw   mm7,mm6
+        paddw   mm5,mm4
+        psrlw   mm7,4                   ; mm7=Out1LO=( 1  3  5  7)
+        psrlw   mm5,4                   ; mm5=Out1HO=( 9 11 13 15)
+
+        psllw   mm7,BYTE_BIT
+        psllw   mm5,BYTE_BIT
+        por     mm1,mm7                 ; mm1=Out1L=( 0  1  2  3  4  5  6  7)
+        por     mm0,mm5                 ; mm0=Out1H=( 8  9 10 11 12 13 14 15)
+
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm1
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mm0
+
+        poppic  ebx
+
+        sub     eax, byte SIZEOF_MMWORD
+        add     ecx, byte 1*SIZEOF_MMWORD       ; inptr1(above)
+        add     ebx, byte 1*SIZEOF_MMWORD       ; inptr0
+        add     esi, byte 1*SIZEOF_MMWORD       ; inptr1(below)
+        add     edx, byte 2*SIZEOF_MMWORD       ; outptr0
+        add     edi, byte 2*SIZEOF_MMWORD       ; outptr1
+        cmp     eax, byte SIZEOF_MMWORD
+        ja      near .columnloop
+        test    eax,eax
+        jnz     near .columnloop_last
+
+        pop     esi
+        pop     edi
+        pop     ecx
+        pop     eax
+
+        add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+        add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+        sub     ecx, byte 2                     ; rowctr
+        jg      near .rowloop
+
+        emms            ; empty MMX state
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
+; It's still a box filter.
+;
+; GLOBAL(void)
+; jsimd_h2v1_upsample_mmx (int max_v_samp_factor,
+;                          JDIMENSION output_width,
+;                          JSAMPARRAY input_data,
+;                          JSAMPARRAY * output_data_ptr);
+;
+
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define output_width(b)         (b)+12          ; JDIMENSION output_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
+
+        align   16
+        global  EXTN(jsimd_h2v1_upsample_mmx)
+
+EXTN(jsimd_h2v1_upsample_mmx):
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     edx, JDIMENSION [output_width(ebp)]
+        add     edx, byte (2*SIZEOF_MMWORD)-1
+        and     edx, byte -(2*SIZEOF_MMWORD)
+        jz      short .return
+
+        mov     ecx, INT [max_v_samp(ebp)]      ; rowctr
+        test    ecx,ecx
+        jz      short .return
+
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(ebp)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
+.rowloop:
+        push    edi
+        push    esi
+
+        mov     esi, JSAMPROW [esi]             ; inptr
+        mov     edi, JSAMPROW [edi]             ; outptr
+        mov     eax,edx                         ; colctr
+        alignx  16,7
+.columnloop:
+
+        movq    mm0, MMWORD [esi+0*SIZEOF_MMWORD]
+
+        movq      mm1,mm0
+        punpcklbw mm0,mm0
+        punpckhbw mm1,mm1
+
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm0
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mm1
+
+        sub     eax, byte 2*SIZEOF_MMWORD
+        jz      short .nextrow
+
+        movq    mm2, MMWORD [esi+1*SIZEOF_MMWORD]
+
+        movq      mm3,mm2
+        punpcklbw mm2,mm2
+        punpckhbw mm3,mm3
+
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mm2
+        movq    MMWORD [edi+3*SIZEOF_MMWORD], mm3
+
+        sub     eax, byte 2*SIZEOF_MMWORD
+        jz      short .nextrow
+
+        add     esi, byte 2*SIZEOF_MMWORD       ; inptr
+        add     edi, byte 4*SIZEOF_MMWORD       ; outptr
+        jmp     short .columnloop
+        alignx  16,7
+
+.nextrow:
+        pop     esi
+        pop     edi
+
+        add     esi, byte SIZEOF_JSAMPROW       ; input_data
+        add     edi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     ecx                             ; rowctr
+        jg      short .rowloop
+
+        emms            ; empty MMX state
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
+; It's still a box filter.
+;
+; GLOBAL(void)
+; jsimd_h2v2_upsample_mmx (int max_v_samp_factor,
+;                          JDIMENSION output_width,
+;                          JSAMPARRAY input_data,
+;                          JSAMPARRAY * output_data_ptr);
+;
+
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define output_width(b)         (b)+12          ; JDIMENSION output_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
+
+        align   16
+        global  EXTN(jsimd_h2v2_upsample_mmx)
+
+EXTN(jsimd_h2v2_upsample_mmx):
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     edx, JDIMENSION [output_width(ebp)]
+        add     edx, byte (2*SIZEOF_MMWORD)-1
+        and     edx, byte -(2*SIZEOF_MMWORD)
+        jz      near .return
+
+        mov     ecx, INT [max_v_samp(ebp)]      ; rowctr
+        test    ecx,ecx
+        jz      short .return
+
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(ebp)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
+.rowloop:
+        push    edi
+        push    esi
+
+        mov     esi, JSAMPROW [esi]                     ; inptr
+        mov     ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+        mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
+        mov     eax,edx                                 ; colctr
+        alignx  16,7
+.columnloop:
+
+        movq    mm0, MMWORD [esi+0*SIZEOF_MMWORD]
+
+        movq      mm1,mm0
+        punpcklbw mm0,mm0
+        punpckhbw mm1,mm1
+
+        movq    MMWORD [ebx+0*SIZEOF_MMWORD], mm0
+        movq    MMWORD [ebx+1*SIZEOF_MMWORD], mm1
+        movq    MMWORD [edi+0*SIZEOF_MMWORD], mm0
+        movq    MMWORD [edi+1*SIZEOF_MMWORD], mm1
+
+        sub     eax, byte 2*SIZEOF_MMWORD
+        jz      short .nextrow
+
+        movq    mm2, MMWORD [esi+1*SIZEOF_MMWORD]
+
+        movq      mm3,mm2
+        punpcklbw mm2,mm2
+        punpckhbw mm3,mm3
+
+        movq    MMWORD [ebx+2*SIZEOF_MMWORD], mm2
+        movq    MMWORD [ebx+3*SIZEOF_MMWORD], mm3
+        movq    MMWORD [edi+2*SIZEOF_MMWORD], mm2
+        movq    MMWORD [edi+3*SIZEOF_MMWORD], mm3
+
+        sub     eax, byte 2*SIZEOF_MMWORD
+        jz      short .nextrow
+
+        add     esi, byte 2*SIZEOF_MMWORD       ; inptr
+        add     ebx, byte 4*SIZEOF_MMWORD       ; outptr0
+        add     edi, byte 4*SIZEOF_MMWORD       ; outptr1
+        jmp     short .columnloop
+        alignx  16,7
+
+.nextrow:
+        pop     esi
+        pop     edi
+
+        add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+        add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+        sub     ecx, byte 2                     ; rowctr
+        jg      short .rowloop
+
+        emms            ; empty MMX state
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jdsample-sse2-64.asm b/simd/jdsample-sse2-64.asm
new file mode 100644
index 0000000..335ce2a
--- /dev/null
+++ b/simd/jdsample-sse2-64.asm
@@ -0,0 +1,671 @@
+;
+; jdsample.asm - upsampling (64-bit SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_fancy_upsample_sse2)
+
+EXTN(jconst_fancy_upsample_sse2):
+
+PW_ONE          times 8 dw  1
+PW_TWO          times 8 dw  2
+PW_THREE        times 8 dw  3
+PW_SEVEN        times 8 dw  7
+PW_EIGHT        times 8 dw  8
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+;
+; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
+;
+; The upsampling algorithm is linear interpolation between pixel centers,
+; also known as a "triangle filter".  This is a good compromise between
+; speed and visual quality.  The centers of the output pixels are 1/4 and 3/4
+; of the way between input pixel centers.
+;
+; GLOBAL(void)
+; jsimd_h2v1_fancy_upsample_sse2 (int max_v_samp_factor,
+;                                 JDIMENSION downsampled_width,
+;                                 JSAMPARRAY input_data,
+;                                 JSAMPARRAY * output_data_ptr);
+;
+
+; r10 = int max_v_samp_factor
+; r11 = JDIMENSION downsampled_width
+; r12 = JSAMPARRAY input_data
+; r13 = JSAMPARRAY * output_data_ptr
+
+        align   16
+        global  EXTN(jsimd_h2v1_fancy_upsample_sse2)
+
+EXTN(jsimd_h2v1_fancy_upsample_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+
+        mov     rax, r11  ; colctr
+        test    rax,rax
+        jz      near .return
+
+        mov     rcx, r10        ; rowctr
+        test    rcx,rcx
+        jz      near .return
+
+        mov     rsi, r12        ; input_data
+        mov     rdi, r13
+        mov     rdi, JSAMPARRAY [rdi]                   ; output_data
+.rowloop:
+        push    rax                     ; colctr
+        push    rdi
+        push    rsi
+
+        mov     rsi, JSAMPROW [rsi]     ; inptr
+        mov     rdi, JSAMPROW [rdi]     ; outptr
+
+        test    rax, SIZEOF_XMMWORD-1
+        jz      short .skip
+        mov     dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+.skip:
+        pxor    xmm0,xmm0               ; xmm0=(all 0's)
+        pcmpeqb xmm7,xmm7
+        psrldq  xmm7,(SIZEOF_XMMWORD-1)
+        pand    xmm7, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+
+        add     rax, byte SIZEOF_XMMWORD-1
+        and     rax, byte -SIZEOF_XMMWORD
+        cmp     rax, byte SIZEOF_XMMWORD
+        ja      short .columnloop
+
+.columnloop_last:
+        pcmpeqb xmm6,xmm6
+        pslldq  xmm6,(SIZEOF_XMMWORD-1)
+        pand    xmm6, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        jmp     short .upsample
+
+.columnloop:
+        movdqa  xmm6, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+        pslldq  xmm6,(SIZEOF_XMMWORD-1)
+
+.upsample:
+        movdqa  xmm1, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+        movdqa  xmm2,xmm1
+        movdqa  xmm3,xmm1               ; xmm1=( 0  1  2 ... 13 14 15)
+        pslldq  xmm2,1                  ; xmm2=(--  0  1 ... 12 13 14)
+        psrldq  xmm3,1                  ; xmm3=( 1  2  3 ... 14 15 --)
+
+        por     xmm2,xmm7               ; xmm2=(-1  0  1 ... 12 13 14)
+        por     xmm3,xmm6               ; xmm3=( 1  2  3 ... 14 15 16)
+
+        movdqa  xmm7,xmm1
+        psrldq  xmm7,(SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --)
+
+        movdqa    xmm4,xmm1
+        punpcklbw xmm1,xmm0             ; xmm1=( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm4,xmm0             ; xmm4=( 8  9 10 11 12 13 14 15)
+        movdqa    xmm5,xmm2
+        punpcklbw xmm2,xmm0             ; xmm2=(-1  0  1  2  3  4  5  6)
+        punpckhbw xmm5,xmm0             ; xmm5=( 7  8  9 10 11 12 13 14)
+        movdqa    xmm6,xmm3
+        punpcklbw xmm3,xmm0             ; xmm3=( 1  2  3  4  5  6  7  8)
+        punpckhbw xmm6,xmm0             ; xmm6=( 9 10 11 12 13 14 15 16)
+
+        pmullw  xmm1,[rel PW_THREE]
+        pmullw  xmm4,[rel PW_THREE]
+        paddw   xmm2,[rel PW_ONE]
+        paddw   xmm5,[rel PW_ONE]
+        paddw   xmm3,[rel PW_TWO]
+        paddw   xmm6,[rel PW_TWO]
+
+        paddw   xmm2,xmm1
+        paddw   xmm5,xmm4
+        psrlw   xmm2,2                  ; xmm2=OutLE=( 0  2  4  6  8 10 12 14)
+        psrlw   xmm5,2                  ; xmm5=OutHE=(16 18 20 22 24 26 28 30)
+        paddw   xmm3,xmm1
+        paddw   xmm6,xmm4
+        psrlw   xmm3,2                  ; xmm3=OutLO=( 1  3  5  7  9 11 13 15)
+        psrlw   xmm6,2                  ; xmm6=OutHO=(17 19 21 23 25 27 29 31)
+
+        psllw   xmm3,BYTE_BIT
+        psllw   xmm6,BYTE_BIT
+        por     xmm2,xmm3               ; xmm2=OutL=( 0  1  2 ... 13 14 15)
+        por     xmm5,xmm6               ; xmm5=OutH=(16 17 18 ... 29 30 31)
+
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm5
+
+        sub     rax, byte SIZEOF_XMMWORD
+        add     rsi, byte 1*SIZEOF_XMMWORD      ; inptr
+        add     rdi, byte 2*SIZEOF_XMMWORD      ; outptr
+        cmp     rax, byte SIZEOF_XMMWORD
+        ja      near .columnloop
+        test    eax,eax
+        jnz     near .columnloop_last
+
+        pop     rsi
+        pop     rdi
+        pop     rax
+
+        add     rsi, byte SIZEOF_JSAMPROW       ; input_data
+        add     rdi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     rcx                             ; rowctr
+        jg      near .rowloop
+
+.return:
+        uncollect_args
+        pop     rbp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
+; Again a triangle filter; see comments for h2v1 case, above.
+;
+; GLOBAL(void)
+; jsimd_h2v2_fancy_upsample_sse2 (int max_v_samp_factor,
+;                                 JDIMENSION downsampled_width,
+;                                 JSAMPARRAY input_data,
+;                                 JSAMPARRAY * output_data_ptr);
+;
+
+; r10 = int max_v_samp_factor
+; r11 = JDIMENSION downsampled_width
+; r12 = JSAMPARRAY input_data
+; r13 = JSAMPARRAY * output_data_ptr
+
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          4
+
+        align   16
+        global  EXTN(jsimd_h2v2_fancy_upsample_sse2)
+
+EXTN(jsimd_h2v2_fancy_upsample_sse2):
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
+        push    rbx
+
+        mov     rax, r11  ; colctr
+        test    rax,rax
+        jz      near .return
+
+        mov     rcx, r10        ; rowctr
+        test    rcx,rcx
+        jz      near .return
+
+        mov     rsi, r12        ; input_data
+        mov     rdi, r13
+        mov     rdi, JSAMPARRAY [rdi]                   ; output_data
+.rowloop:
+        push    rax                                     ; colctr
+        push    rcx
+        push    rdi
+        push    rsi
+
+        mov     rcx, JSAMPROW [rsi-1*SIZEOF_JSAMPROW]   ; inptr1(above)
+        mov     rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW]   ; inptr0
+        mov     rsi, JSAMPROW [rsi+1*SIZEOF_JSAMPROW]   ; inptr1(below)
+        mov     rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]   ; outptr0
+        mov     rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]   ; outptr1
+
+        test    rax, SIZEOF_XMMWORD-1
+        jz      short .skip
+        push    rdx
+        mov     dl, JSAMPLE [rcx+(rax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [rcx+rax*SIZEOF_JSAMPLE], dl
+        mov     dl, JSAMPLE [rbx+(rax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [rbx+rax*SIZEOF_JSAMPLE], dl
+        mov     dl, JSAMPLE [rsi+(rax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [rsi+rax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+        pop     rdx
+.skip:
+        ; -- process the first column block
+
+        movdqa  xmm0, XMMWORD [rbx+0*SIZEOF_XMMWORD]    ; xmm0=row[ 0][0]
+        movdqa  xmm1, XMMWORD [rcx+0*SIZEOF_XMMWORD]    ; xmm1=row[-1][0]
+        movdqa  xmm2, XMMWORD [rsi+0*SIZEOF_XMMWORD]    ; xmm2=row[+1][0]
+
+        pxor      xmm3,xmm3             ; xmm3=(all 0's)
+        movdqa    xmm4,xmm0
+        punpcklbw xmm0,xmm3             ; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm4,xmm3             ; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm5,xmm1
+        punpcklbw xmm1,xmm3             ; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm5,xmm3             ; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm6,xmm2
+        punpcklbw xmm2,xmm3             ; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm6,xmm3             ; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
+
+        pmullw  xmm0,[rel PW_THREE]
+        pmullw  xmm4,[rel PW_THREE]
+
+        pcmpeqb xmm7,xmm7
+        psrldq  xmm7,(SIZEOF_XMMWORD-2)
+
+        paddw   xmm1,xmm0               ; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm5,xmm4               ; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
+        paddw   xmm2,xmm0               ; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm6,xmm4               ; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
+
+        movdqa  XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1    ; temporarily save
+        movdqa  XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5    ; the intermediate data
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm6
+
+        pand    xmm1,xmm7               ; xmm1=( 0 -- -- -- -- -- -- --)
+        pand    xmm2,xmm7               ; xmm2=( 0 -- -- -- -- -- -- --)
+
+        movdqa  XMMWORD [wk(0)], xmm1
+        movdqa  XMMWORD [wk(1)], xmm2
+
+        add     rax, byte SIZEOF_XMMWORD-1
+        and     rax, byte -SIZEOF_XMMWORD
+        cmp     rax, byte SIZEOF_XMMWORD
+        ja      short .columnloop
+
+.columnloop_last:
+        ; -- process the last column block
+
+        pcmpeqb xmm1,xmm1
+        pslldq  xmm1,(SIZEOF_XMMWORD-2)
+        movdqa  xmm2,xmm1
+
+        pand    xmm1, XMMWORD [rdx+1*SIZEOF_XMMWORD]
+        pand    xmm2, XMMWORD [rdi+1*SIZEOF_XMMWORD]
+
+        movdqa  XMMWORD [wk(2)], xmm1   ; xmm1=(-- -- -- -- -- -- -- 15)
+        movdqa  XMMWORD [wk(3)], xmm2   ; xmm2=(-- -- -- -- -- -- -- 15)
+
+        jmp     near .upsample
+
+.columnloop:
+        ; -- process the next column block
+
+        movdqa  xmm0, XMMWORD [rbx+1*SIZEOF_XMMWORD]    ; xmm0=row[ 0][1]
+        movdqa  xmm1, XMMWORD [rcx+1*SIZEOF_XMMWORD]    ; xmm1=row[-1][1]
+        movdqa  xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD]    ; xmm2=row[+1][1]
+
+        pxor      xmm3,xmm3             ; xmm3=(all 0's)
+        movdqa    xmm4,xmm0
+        punpcklbw xmm0,xmm3             ; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm4,xmm3             ; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm5,xmm1
+        punpcklbw xmm1,xmm3             ; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm5,xmm3             ; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm6,xmm2
+        punpcklbw xmm2,xmm3             ; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm6,xmm3             ; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
+
+        pmullw  xmm0,[rel PW_THREE]
+        pmullw  xmm4,[rel PW_THREE]
+
+        paddw   xmm1,xmm0               ; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm5,xmm4               ; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
+        paddw   xmm2,xmm0               ; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm6,xmm4               ; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
+
+        movdqa  XMMWORD [rdx+2*SIZEOF_XMMWORD], xmm1    ; temporarily save
+        movdqa  XMMWORD [rdx+3*SIZEOF_XMMWORD], xmm5    ; the intermediate data
+        movdqa  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm6
+
+        pslldq  xmm1,(SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- --  0)
+        pslldq  xmm2,(SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- --  0)
+
+        movdqa  XMMWORD [wk(2)], xmm1
+        movdqa  XMMWORD [wk(3)], xmm2
+
+.upsample:
+        ; -- process the upper row
+
+        movdqa  xmm7, XMMWORD [rdx+0*SIZEOF_XMMWORD]
+        movdqa  xmm3, XMMWORD [rdx+1*SIZEOF_XMMWORD]
+
+        movdqa  xmm0,xmm7               ; xmm7=Int0L=( 0  1  2  3  4  5  6  7)
+        movdqa  xmm4,xmm3               ; xmm3=Int0H=( 8  9 10 11 12 13 14 15)
+        psrldq  xmm0,2                  ; xmm0=( 1  2  3  4  5  6  7 --)
+        pslldq  xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- --  8)
+        movdqa  xmm5,xmm7
+        movdqa  xmm6,xmm3
+        psrldq  xmm5,(SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --)
+        pslldq  xmm6,2                  ; xmm6=(--  8  9 10 11 12 13 14)
+
+        por     xmm0,xmm4               ; xmm0=( 1  2  3  4  5  6  7  8)
+        por     xmm5,xmm6               ; xmm5=( 7  8  9 10 11 12 13 14)
+
+        movdqa  xmm1,xmm7
+        movdqa  xmm2,xmm3
+        pslldq  xmm1,2                  ; xmm1=(--  0  1  2  3  4  5  6)
+        psrldq  xmm2,2                  ; xmm2=( 9 10 11 12 13 14 15 --)
+        movdqa  xmm4,xmm3
+        psrldq  xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --)
+
+        por     xmm1, XMMWORD [wk(0)]   ; xmm1=(-1  0  1  2  3  4  5  6)
+        por     xmm2, XMMWORD [wk(2)]   ; xmm2=( 9 10 11 12 13 14 15 16)
+
+        movdqa  XMMWORD [wk(0)], xmm4
+
+        pmullw  xmm7,[rel PW_THREE]
+        pmullw  xmm3,[rel PW_THREE]
+        paddw   xmm1,[rel PW_EIGHT]
+        paddw   xmm5,[rel PW_EIGHT]
+        paddw   xmm0,[rel PW_SEVEN]
+        paddw   xmm2,[rel PW_SEVEN]
+
+        paddw   xmm1,xmm7
+        paddw   xmm5,xmm3
+        psrlw   xmm1,4                  ; xmm1=Out0LE=( 0  2  4  6  8 10 12 14)
+        psrlw   xmm5,4                  ; xmm5=Out0HE=(16 18 20 22 24 26 28 30)
+        paddw   xmm0,xmm7
+        paddw   xmm2,xmm3
+        psrlw   xmm0,4                  ; xmm0=Out0LO=( 1  3  5  7  9 11 13 15)
+        psrlw   xmm2,4                  ; xmm2=Out0HO=(17 19 21 23 25 27 29 31)
+
+        psllw   xmm0,BYTE_BIT
+        psllw   xmm2,BYTE_BIT
+        por     xmm1,xmm0               ; xmm1=Out0L=( 0  1  2 ... 13 14 15)
+        por     xmm5,xmm2               ; xmm5=Out0H=(16 17 18 ... 29 30 31)
+
+        movdqa  XMMWORD [rdx+0*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [rdx+1*SIZEOF_XMMWORD], xmm5
+
+        ; -- process the lower row
+
+        movdqa  xmm6, XMMWORD [rdi+0*SIZEOF_XMMWORD]
+        movdqa  xmm4, XMMWORD [rdi+1*SIZEOF_XMMWORD]
+
+        movdqa  xmm7,xmm6               ; xmm6=Int1L=( 0  1  2  3  4  5  6  7)
+        movdqa  xmm3,xmm4               ; xmm4=Int1H=( 8  9 10 11 12 13 14 15)
+        psrldq  xmm7,2                  ; xmm7=( 1  2  3  4  5  6  7 --)
+        pslldq  xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- --  8)
+        movdqa  xmm0,xmm6
+        movdqa  xmm2,xmm4
+        psrldq  xmm0,(SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --)
+        pslldq  xmm2,2                  ; xmm2=(--  8  9 10 11 12 13 14)
+
+        por     xmm7,xmm3               ; xmm7=( 1  2  3  4  5  6  7  8)
+        por     xmm0,xmm2               ; xmm0=( 7  8  9 10 11 12 13 14)
+
+        movdqa  xmm1,xmm6
+        movdqa  xmm5,xmm4
+        pslldq  xmm1,2                  ; xmm1=(--  0  1  2  3  4  5  6)
+        psrldq  xmm5,2                  ; xmm5=( 9 10 11 12 13 14 15 --)
+        movdqa  xmm3,xmm4
+        psrldq  xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --)
+
+        por     xmm1, XMMWORD [wk(1)]   ; xmm1=(-1  0  1  2  3  4  5  6)
+        por     xmm5, XMMWORD [wk(3)]   ; xmm5=( 9 10 11 12 13 14 15 16)
+
+        movdqa  XMMWORD [wk(1)], xmm3
+
+        pmullw  xmm6,[rel PW_THREE]
+        pmullw  xmm4,[rel PW_THREE]
+        paddw   xmm1,[rel PW_EIGHT]
+        paddw   xmm0,[rel PW_EIGHT]
+        paddw   xmm7,[rel PW_SEVEN]
+        paddw   xmm5,[rel PW_SEVEN]
+
+        paddw   xmm1,xmm6
+        paddw   xmm0,xmm4
+        psrlw   xmm1,4                  ; xmm1=Out1LE=( 0  2  4  6  8 10 12 14)
+        psrlw   xmm0,4                  ; xmm0=Out1HE=(16 18 20 22 24 26 28 30)
+        paddw   xmm7,xmm6
+        paddw   xmm5,xmm4
+        psrlw   xmm7,4                  ; xmm7=Out1LO=( 1  3  5  7  9 11 13 15)
+        psrlw   xmm5,4                  ; xmm5=Out1HO=(17 19 21 23 25 27 29 31)
+
+        psllw   xmm7,BYTE_BIT
+        psllw   xmm5,BYTE_BIT
+        por     xmm1,xmm7               ; xmm1=Out1L=( 0  1  2 ... 13 14 15)
+        por     xmm0,xmm5               ; xmm0=Out1H=(16 17 18 ... 29 30 31)
+
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm0
+
+        sub     rax, byte SIZEOF_XMMWORD
+        add     rcx, byte 1*SIZEOF_XMMWORD      ; inptr1(above)
+        add     rbx, byte 1*SIZEOF_XMMWORD      ; inptr0
+        add     rsi, byte 1*SIZEOF_XMMWORD      ; inptr1(below)
+        add     rdx, byte 2*SIZEOF_XMMWORD      ; outptr0
+        add     rdi, byte 2*SIZEOF_XMMWORD      ; outptr1
+        cmp     rax, byte SIZEOF_XMMWORD
+        ja      near .columnloop
+        test    rax,rax
+        jnz     near .columnloop_last
+
+        pop     rsi
+        pop     rdi
+        pop     rcx
+        pop     rax
+
+        add     rsi, byte 1*SIZEOF_JSAMPROW     ; input_data
+        add     rdi, byte 2*SIZEOF_JSAMPROW     ; output_data
+        sub     rcx, byte 2                     ; rowctr
+        jg      near .rowloop
+
+.return:
+        pop     rbx
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
+; It's still a box filter.
+;
+; GLOBAL(void)
+; jsimd_h2v1_upsample_sse2 (int max_v_samp_factor,
+;                           JDIMENSION output_width,
+;                           JSAMPARRAY input_data,
+;                           JSAMPARRAY * output_data_ptr);
+;
+
+; r10 = int max_v_samp_factor
+; r11 = JDIMENSION output_width
+; r12 = JSAMPARRAY input_data
+; r13 = JSAMPARRAY * output_data_ptr
+
+        align   16
+        global  EXTN(jsimd_h2v1_upsample_sse2)
+
+EXTN(jsimd_h2v1_upsample_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+
+        mov     rdx, r11
+        add     rdx, byte (2*SIZEOF_XMMWORD)-1
+        and     rdx, byte -(2*SIZEOF_XMMWORD)
+        jz      near .return
+
+        mov     rcx, r10        ; rowctr
+        test    rcx,rcx
+        jz      short .return
+
+        mov     rsi, r12 ; input_data
+        mov     rdi, r13
+        mov     rdi, JSAMPARRAY [rdi]                   ; output_data
+.rowloop:
+        push    rdi
+        push    rsi
+
+        mov     rsi, JSAMPROW [rsi]             ; inptr
+        mov     rdi, JSAMPROW [rdi]             ; outptr
+        mov     rax,rdx                         ; colctr
+.columnloop:
+
+        movdqa  xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+
+        movdqa    xmm1,xmm0
+        punpcklbw xmm0,xmm0
+        punpckhbw xmm1,xmm1
+
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1
+
+        sub     rax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
+
+        movdqa  xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+
+        movdqa    xmm3,xmm2
+        punpcklbw xmm2,xmm2
+        punpckhbw xmm3,xmm3
+
+        movdqa  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3
+
+        sub     rax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
+
+        add     rsi, byte 2*SIZEOF_XMMWORD      ; inptr
+        add     rdi, byte 4*SIZEOF_XMMWORD      ; outptr
+        jmp     short .columnloop
+
+.nextrow:
+        pop     rsi
+        pop     rdi
+
+        add     rsi, byte SIZEOF_JSAMPROW       ; input_data
+        add     rdi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     rcx                             ; rowctr
+        jg      short .rowloop
+
+.return:
+        uncollect_args
+        pop     rbp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
+; It's still a box filter.
+;
+; GLOBAL(void)
+; jsimd_h2v2_upsample_sse2 (nt max_v_samp_factor,
+;                           JDIMENSION output_width,
+;                           JSAMPARRAY input_data,
+;                           JSAMPARRAY * output_data_ptr);
+;
+
+; r10 = int max_v_samp_factor
+; r11 = JDIMENSION output_width
+; r12 = JSAMPARRAY input_data
+; r13 = JSAMPARRAY * output_data_ptr
+
+        align   16
+        global  EXTN(jsimd_h2v2_upsample_sse2)
+
+EXTN(jsimd_h2v2_upsample_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+        push    rbx
+
+        mov     rdx, r11
+        add     rdx, byte (2*SIZEOF_XMMWORD)-1
+        and     rdx, byte -(2*SIZEOF_XMMWORD)
+        jz      near .return
+
+        mov     rcx, r10        ; rowctr
+        test    rcx,rcx
+        jz      near .return
+
+        mov     rsi, r12        ; input_data
+        mov     rdi, r13
+        mov     rdi, JSAMPARRAY [rdi]                   ; output_data
+.rowloop:
+        push    rdi
+        push    rsi
+
+        mov     rsi, JSAMPROW [rsi]                     ; inptr
+        mov     rbx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]   ; outptr0
+        mov     rdi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]   ; outptr1
+        mov     rax,rdx                                 ; colctr
+.columnloop:
+
+        movdqa  xmm0, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+
+        movdqa    xmm1,xmm0
+        punpcklbw xmm0,xmm0
+        punpckhbw xmm1,xmm1
+
+        movdqa  XMMWORD [rbx+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [rbx+1*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [rdi+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [rdi+1*SIZEOF_XMMWORD], xmm1
+
+        sub     rax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
+
+        movdqa  xmm2, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+
+        movdqa    xmm3,xmm2
+        punpcklbw xmm2,xmm2
+        punpckhbw xmm3,xmm3
+
+        movdqa  XMMWORD [rbx+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [rbx+3*SIZEOF_XMMWORD], xmm3
+        movdqa  XMMWORD [rdi+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [rdi+3*SIZEOF_XMMWORD], xmm3
+
+        sub     rax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
+
+        add     rsi, byte 2*SIZEOF_XMMWORD      ; inptr
+        add     rbx, byte 4*SIZEOF_XMMWORD      ; outptr0
+        add     rdi, byte 4*SIZEOF_XMMWORD      ; outptr1
+        jmp     short .columnloop
+
+.nextrow:
+        pop     rsi
+        pop     rdi
+
+        add     rsi, byte 1*SIZEOF_JSAMPROW     ; input_data
+        add     rdi, byte 2*SIZEOF_JSAMPROW     ; output_data
+        sub     rcx, byte 2                     ; rowctr
+        jg      near .rowloop
+
+.return:
+        pop     rbx
+        uncollect_args
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jdsample-sse2.asm b/simd/jdsample-sse2.asm
new file mode 100644
index 0000000..51176d4
--- /dev/null
+++ b/simd/jdsample-sse2.asm
@@ -0,0 +1,729 @@
+;
+; jdsample.asm - upsampling (SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_fancy_upsample_sse2)
+
+EXTN(jconst_fancy_upsample_sse2):
+
+PW_ONE          times 8 dw  1
+PW_TWO          times 8 dw  2
+PW_THREE        times 8 dw  3
+PW_SEVEN        times 8 dw  7
+PW_EIGHT        times 8 dw  8
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
+;
+; The upsampling algorithm is linear interpolation between pixel centers,
+; also known as a "triangle filter".  This is a good compromise between
+; speed and visual quality.  The centers of the output pixels are 1/4 and 3/4
+; of the way between input pixel centers.
+;
+; GLOBAL(void)
+; jsimd_h2v1_fancy_upsample_sse2 (int max_v_samp_factor,
+;                                 JDIMENSION downsampled_width,
+;                                 JSAMPARRAY input_data,
+;                                 JSAMPARRAY * output_data_ptr);
+;
+
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define downsamp_width(b)       (b)+12          ; JDIMENSION downsampled_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
+
+        align   16
+        global  EXTN(jsimd_h2v1_fancy_upsample_sse2)
+
+EXTN(jsimd_h2v1_fancy_upsample_sse2):
+        push    ebp
+        mov     ebp,esp
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        mov     eax, JDIMENSION [downsamp_width(ebp)]  ; colctr
+        test    eax,eax
+        jz      near .return
+
+        mov     ecx, INT [max_v_samp(ebp)]      ; rowctr
+        test    ecx,ecx
+        jz      near .return
+
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(ebp)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
+.rowloop:
+        push    eax                     ; colctr
+        push    edi
+        push    esi
+
+        mov     esi, JSAMPROW [esi]     ; inptr
+        mov     edi, JSAMPROW [edi]     ; outptr
+
+        test    eax, SIZEOF_XMMWORD-1
+        jz      short .skip
+        mov     dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+.skip:
+        pxor    xmm0,xmm0               ; xmm0=(all 0's)
+        pcmpeqb xmm7,xmm7
+        psrldq  xmm7,(SIZEOF_XMMWORD-1)
+        pand    xmm7, XMMWORD [esi+0*SIZEOF_XMMWORD]
+
+        add     eax, byte SIZEOF_XMMWORD-1
+        and     eax, byte -SIZEOF_XMMWORD
+        cmp     eax, byte SIZEOF_XMMWORD
+        ja      short .columnloop
+        alignx  16,7
+
+.columnloop_last:
+        pcmpeqb xmm6,xmm6
+        pslldq  xmm6,(SIZEOF_XMMWORD-1)
+        pand    xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        jmp     short .upsample
+        alignx  16,7
+
+.columnloop:
+        movdqa  xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD]
+        pslldq  xmm6,(SIZEOF_XMMWORD-1)
+
+.upsample:
+        movdqa  xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
+        movdqa  xmm2,xmm1
+        movdqa  xmm3,xmm1               ; xmm1=( 0  1  2 ... 13 14 15)
+        pslldq  xmm2,1                  ; xmm2=(--  0  1 ... 12 13 14)
+        psrldq  xmm3,1                  ; xmm3=( 1  2  3 ... 14 15 --)
+
+        por     xmm2,xmm7               ; xmm2=(-1  0  1 ... 12 13 14)
+        por     xmm3,xmm6               ; xmm3=( 1  2  3 ... 14 15 16)
+
+        movdqa  xmm7,xmm1
+        psrldq  xmm7,(SIZEOF_XMMWORD-1) ; xmm7=(15 -- -- ... -- -- --)
+
+        movdqa    xmm4,xmm1
+        punpcklbw xmm1,xmm0             ; xmm1=( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm4,xmm0             ; xmm4=( 8  9 10 11 12 13 14 15)
+        movdqa    xmm5,xmm2
+        punpcklbw xmm2,xmm0             ; xmm2=(-1  0  1  2  3  4  5  6)
+        punpckhbw xmm5,xmm0             ; xmm5=( 7  8  9 10 11 12 13 14)
+        movdqa    xmm6,xmm3
+        punpcklbw xmm3,xmm0             ; xmm3=( 1  2  3  4  5  6  7  8)
+        punpckhbw xmm6,xmm0             ; xmm6=( 9 10 11 12 13 14 15 16)
+
+        pmullw  xmm1,[GOTOFF(ebx,PW_THREE)]
+        pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
+        paddw   xmm2,[GOTOFF(ebx,PW_ONE)]
+        paddw   xmm5,[GOTOFF(ebx,PW_ONE)]
+        paddw   xmm3,[GOTOFF(ebx,PW_TWO)]
+        paddw   xmm6,[GOTOFF(ebx,PW_TWO)]
+
+        paddw   xmm2,xmm1
+        paddw   xmm5,xmm4
+        psrlw   xmm2,2                  ; xmm2=OutLE=( 0  2  4  6  8 10 12 14)
+        psrlw   xmm5,2                  ; xmm5=OutHE=(16 18 20 22 24 26 28 30)
+        paddw   xmm3,xmm1
+        paddw   xmm6,xmm4
+        psrlw   xmm3,2                  ; xmm3=OutLO=( 1  3  5  7  9 11 13 15)
+        psrlw   xmm6,2                  ; xmm6=OutHO=(17 19 21 23 25 27 29 31)
+
+        psllw   xmm3,BYTE_BIT
+        psllw   xmm6,BYTE_BIT
+        por     xmm2,xmm3               ; xmm2=OutL=( 0  1  2 ... 13 14 15)
+        por     xmm5,xmm6               ; xmm5=OutH=(16 17 18 ... 29 30 31)
+
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm5
+
+        sub     eax, byte SIZEOF_XMMWORD
+        add     esi, byte 1*SIZEOF_XMMWORD      ; inptr
+        add     edi, byte 2*SIZEOF_XMMWORD      ; outptr
+        cmp     eax, byte SIZEOF_XMMWORD
+        ja      near .columnloop
+        test    eax,eax
+        jnz     near .columnloop_last
+
+        pop     esi
+        pop     edi
+        pop     eax
+
+        add     esi, byte SIZEOF_JSAMPROW       ; input_data
+        add     edi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     ecx                             ; rowctr
+        jg      near .rowloop
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        pop     ebp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
+; Again a triangle filter; see comments for h2v1 case, above.
+;
+; GLOBAL(void)
+; jsimd_h2v2_fancy_upsample_sse2 (int max_v_samp_factor,
+;                                 JDIMENSION downsampled_width,
+;                                 JSAMPARRAY input_data,
+;                                 JSAMPARRAY * output_data_ptr);
+;
+
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define downsamp_width(b)       (b)+12          ; JDIMENSION downsampled_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          4
+%define gotptr          wk(0)-SIZEOF_POINTER    ; void * gotptr
+
+        align   16
+        global  EXTN(jsimd_h2v2_fancy_upsample_sse2)
+
+EXTN(jsimd_h2v2_fancy_upsample_sse2):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic eax             ; make a room for GOT address
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx                     ; get GOT address
+        movpic  POINTER [gotptr], ebx   ; save GOT address
+
+        mov     edx,eax                         ; edx = original ebp
+        mov     eax, JDIMENSION [downsamp_width(edx)]  ; colctr
+        test    eax,eax
+        jz      near .return
+
+        mov     ecx, INT [max_v_samp(edx)]      ; rowctr
+        test    ecx,ecx
+        jz      near .return
+
+        mov     esi, JSAMPARRAY [input_data(edx)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(edx)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
+.rowloop:
+        push    eax                                     ; colctr
+        push    ecx
+        push    edi
+        push    esi
+
+        mov     ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW]   ; inptr1(above)
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; inptr0
+        mov     esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; inptr1(below)
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+        mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
+
+        test    eax, SIZEOF_XMMWORD-1
+        jz      short .skip
+        push    edx
+        mov     dl, JSAMPLE [ecx+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [ecx+eax*SIZEOF_JSAMPLE], dl
+        mov     dl, JSAMPLE [ebx+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [ebx+eax*SIZEOF_JSAMPLE], dl
+        mov     dl, JSAMPLE [esi+(eax-1)*SIZEOF_JSAMPLE]
+        mov     JSAMPLE [esi+eax*SIZEOF_JSAMPLE], dl    ; insert a dummy sample
+        pop     edx
+.skip:
+        ; -- process the first column block
+
+        movdqa  xmm0, XMMWORD [ebx+0*SIZEOF_XMMWORD]    ; xmm0=row[ 0][0]
+        movdqa  xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD]    ; xmm1=row[-1][0]
+        movdqa  xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD]    ; xmm2=row[+1][0]
+
+        pushpic ebx
+        movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+        pxor      xmm3,xmm3             ; xmm3=(all 0's)
+        movdqa    xmm4,xmm0
+        punpcklbw xmm0,xmm3             ; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm4,xmm3             ; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm5,xmm1
+        punpcklbw xmm1,xmm3             ; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm5,xmm3             ; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm6,xmm2
+        punpcklbw xmm2,xmm3             ; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm6,xmm3             ; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
+
+        pmullw  xmm0,[GOTOFF(ebx,PW_THREE)]
+        pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
+
+        pcmpeqb xmm7,xmm7
+        psrldq  xmm7,(SIZEOF_XMMWORD-2)
+
+        paddw   xmm1,xmm0               ; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm5,xmm4               ; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
+        paddw   xmm2,xmm0               ; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm6,xmm4               ; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
+
+        movdqa  XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1    ; temporarily save
+        movdqa  XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5    ; the intermediate data
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm6
+
+        pand    xmm1,xmm7               ; xmm1=( 0 -- -- -- -- -- -- --)
+        pand    xmm2,xmm7               ; xmm2=( 0 -- -- -- -- -- -- --)
+
+        movdqa  XMMWORD [wk(0)], xmm1
+        movdqa  XMMWORD [wk(1)], xmm2
+
+        poppic  ebx
+
+        add     eax, byte SIZEOF_XMMWORD-1
+        and     eax, byte -SIZEOF_XMMWORD
+        cmp     eax, byte SIZEOF_XMMWORD
+        ja      short .columnloop
+        alignx  16,7
+
+.columnloop_last:
+        ; -- process the last column block
+
+        pushpic ebx
+        movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+        pcmpeqb xmm1,xmm1
+        pslldq  xmm1,(SIZEOF_XMMWORD-2)
+        movdqa  xmm2,xmm1
+
+        pand    xmm1, XMMWORD [edx+1*SIZEOF_XMMWORD]
+        pand    xmm2, XMMWORD [edi+1*SIZEOF_XMMWORD]
+
+        movdqa  XMMWORD [wk(2)], xmm1   ; xmm1=(-- -- -- -- -- -- -- 15)
+        movdqa  XMMWORD [wk(3)], xmm2   ; xmm2=(-- -- -- -- -- -- -- 15)
+
+        jmp     near .upsample
+        alignx  16,7
+
+.columnloop:
+        ; -- process the next column block
+
+        movdqa  xmm0, XMMWORD [ebx+1*SIZEOF_XMMWORD]    ; xmm0=row[ 0][1]
+        movdqa  xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD]    ; xmm1=row[-1][1]
+        movdqa  xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]    ; xmm2=row[+1][1]
+
+        pushpic ebx
+        movpic  ebx, POINTER [gotptr]   ; load GOT address
+
+        pxor      xmm3,xmm3             ; xmm3=(all 0's)
+        movdqa    xmm4,xmm0
+        punpcklbw xmm0,xmm3             ; xmm0=row[ 0]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm4,xmm3             ; xmm4=row[ 0]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm5,xmm1
+        punpcklbw xmm1,xmm3             ; xmm1=row[-1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm5,xmm3             ; xmm5=row[-1]( 8  9 10 11 12 13 14 15)
+        movdqa    xmm6,xmm2
+        punpcklbw xmm2,xmm3             ; xmm2=row[+1]( 0  1  2  3  4  5  6  7)
+        punpckhbw xmm6,xmm3             ; xmm6=row[+1]( 8  9 10 11 12 13 14 15)
+
+        pmullw  xmm0,[GOTOFF(ebx,PW_THREE)]
+        pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
+
+        paddw   xmm1,xmm0               ; xmm1=Int0L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm5,xmm4               ; xmm5=Int0H=( 8  9 10 11 12 13 14 15)
+        paddw   xmm2,xmm0               ; xmm2=Int1L=( 0  1  2  3  4  5  6  7)
+        paddw   xmm6,xmm4               ; xmm6=Int1H=( 8  9 10 11 12 13 14 15)
+
+        movdqa  XMMWORD [edx+2*SIZEOF_XMMWORD], xmm1    ; temporarily save
+        movdqa  XMMWORD [edx+3*SIZEOF_XMMWORD], xmm5    ; the intermediate data
+        movdqa  XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [edi+3*SIZEOF_XMMWORD], xmm6
+
+        pslldq  xmm1,(SIZEOF_XMMWORD-2) ; xmm1=(-- -- -- -- -- -- --  0)
+        pslldq  xmm2,(SIZEOF_XMMWORD-2) ; xmm2=(-- -- -- -- -- -- --  0)
+
+        movdqa  XMMWORD [wk(2)], xmm1
+        movdqa  XMMWORD [wk(3)], xmm2
+
+.upsample:
+        ; -- process the upper row
+
+        movdqa  xmm7, XMMWORD [edx+0*SIZEOF_XMMWORD]
+        movdqa  xmm3, XMMWORD [edx+1*SIZEOF_XMMWORD]
+
+        movdqa  xmm0,xmm7               ; xmm7=Int0L=( 0  1  2  3  4  5  6  7)
+        movdqa  xmm4,xmm3               ; xmm3=Int0H=( 8  9 10 11 12 13 14 15)
+        psrldq  xmm0,2                  ; xmm0=( 1  2  3  4  5  6  7 --)
+        pslldq  xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(-- -- -- -- -- -- --  8)
+        movdqa  xmm5,xmm7
+        movdqa  xmm6,xmm3
+        psrldq  xmm5,(SIZEOF_XMMWORD-2) ; xmm5=( 7 -- -- -- -- -- -- --)
+        pslldq  xmm6,2                  ; xmm6=(--  8  9 10 11 12 13 14)
+
+        por     xmm0,xmm4               ; xmm0=( 1  2  3  4  5  6  7  8)
+        por     xmm5,xmm6               ; xmm5=( 7  8  9 10 11 12 13 14)
+
+        movdqa  xmm1,xmm7
+        movdqa  xmm2,xmm3
+        pslldq  xmm1,2                  ; xmm1=(--  0  1  2  3  4  5  6)
+        psrldq  xmm2,2                  ; xmm2=( 9 10 11 12 13 14 15 --)
+        movdqa  xmm4,xmm3
+        psrldq  xmm4,(SIZEOF_XMMWORD-2) ; xmm4=(15 -- -- -- -- -- -- --)
+
+        por     xmm1, XMMWORD [wk(0)]   ; xmm1=(-1  0  1  2  3  4  5  6)
+        por     xmm2, XMMWORD [wk(2)]   ; xmm2=( 9 10 11 12 13 14 15 16)
+
+        movdqa  XMMWORD [wk(0)], xmm4
+
+        pmullw  xmm7,[GOTOFF(ebx,PW_THREE)]
+        pmullw  xmm3,[GOTOFF(ebx,PW_THREE)]
+        paddw   xmm1,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   xmm5,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   xmm0,[GOTOFF(ebx,PW_SEVEN)]
+        paddw   xmm2,[GOTOFF(ebx,PW_SEVEN)]
+
+        paddw   xmm1,xmm7
+        paddw   xmm5,xmm3
+        psrlw   xmm1,4                  ; xmm1=Out0LE=( 0  2  4  6  8 10 12 14)
+        psrlw   xmm5,4                  ; xmm5=Out0HE=(16 18 20 22 24 26 28 30)
+        paddw   xmm0,xmm7
+        paddw   xmm2,xmm3
+        psrlw   xmm0,4                  ; xmm0=Out0LO=( 1  3  5  7  9 11 13 15)
+        psrlw   xmm2,4                  ; xmm2=Out0HO=(17 19 21 23 25 27 29 31)
+
+        psllw   xmm0,BYTE_BIT
+        psllw   xmm2,BYTE_BIT
+        por     xmm1,xmm0               ; xmm1=Out0L=( 0  1  2 ... 13 14 15)
+        por     xmm5,xmm2               ; xmm5=Out0H=(16 17 18 ... 29 30 31)
+
+        movdqa  XMMWORD [edx+0*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [edx+1*SIZEOF_XMMWORD], xmm5
+
+        ; -- process the lower row
+
+        movdqa  xmm6, XMMWORD [edi+0*SIZEOF_XMMWORD]
+        movdqa  xmm4, XMMWORD [edi+1*SIZEOF_XMMWORD]
+
+        movdqa  xmm7,xmm6               ; xmm6=Int1L=( 0  1  2  3  4  5  6  7)
+        movdqa  xmm3,xmm4               ; xmm4=Int1H=( 8  9 10 11 12 13 14 15)
+        psrldq  xmm7,2                  ; xmm7=( 1  2  3  4  5  6  7 --)
+        pslldq  xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(-- -- -- -- -- -- --  8)
+        movdqa  xmm0,xmm6
+        movdqa  xmm2,xmm4
+        psrldq  xmm0,(SIZEOF_XMMWORD-2) ; xmm0=( 7 -- -- -- -- -- -- --)
+        pslldq  xmm2,2                  ; xmm2=(--  8  9 10 11 12 13 14)
+
+        por     xmm7,xmm3               ; xmm7=( 1  2  3  4  5  6  7  8)
+        por     xmm0,xmm2               ; xmm0=( 7  8  9 10 11 12 13 14)
+
+        movdqa  xmm1,xmm6
+        movdqa  xmm5,xmm4
+        pslldq  xmm1,2                  ; xmm1=(--  0  1  2  3  4  5  6)
+        psrldq  xmm5,2                  ; xmm5=( 9 10 11 12 13 14 15 --)
+        movdqa  xmm3,xmm4
+        psrldq  xmm3,(SIZEOF_XMMWORD-2) ; xmm3=(15 -- -- -- -- -- -- --)
+
+        por     xmm1, XMMWORD [wk(1)]   ; xmm1=(-1  0  1  2  3  4  5  6)
+        por     xmm5, XMMWORD [wk(3)]   ; xmm5=( 9 10 11 12 13 14 15 16)
+
+        movdqa  XMMWORD [wk(1)], xmm3
+
+        pmullw  xmm6,[GOTOFF(ebx,PW_THREE)]
+        pmullw  xmm4,[GOTOFF(ebx,PW_THREE)]
+        paddw   xmm1,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   xmm0,[GOTOFF(ebx,PW_EIGHT)]
+        paddw   xmm7,[GOTOFF(ebx,PW_SEVEN)]
+        paddw   xmm5,[GOTOFF(ebx,PW_SEVEN)]
+
+        paddw   xmm1,xmm6
+        paddw   xmm0,xmm4
+        psrlw   xmm1,4                  ; xmm1=Out1LE=( 0  2  4  6  8 10 12 14)
+        psrlw   xmm0,4                  ; xmm0=Out1HE=(16 18 20 22 24 26 28 30)
+        paddw   xmm7,xmm6
+        paddw   xmm5,xmm4
+        psrlw   xmm7,4                  ; xmm7=Out1LO=( 1  3  5  7  9 11 13 15)
+        psrlw   xmm5,4                  ; xmm5=Out1HO=(17 19 21 23 25 27 29 31)
+
+        psllw   xmm7,BYTE_BIT
+        psllw   xmm5,BYTE_BIT
+        por     xmm1,xmm7               ; xmm1=Out1L=( 0  1  2 ... 13 14 15)
+        por     xmm0,xmm5               ; xmm0=Out1H=(16 17 18 ... 29 30 31)
+
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0
+
+        poppic  ebx
+
+        sub     eax, byte SIZEOF_XMMWORD
+        add     ecx, byte 1*SIZEOF_XMMWORD      ; inptr1(above)
+        add     ebx, byte 1*SIZEOF_XMMWORD      ; inptr0
+        add     esi, byte 1*SIZEOF_XMMWORD      ; inptr1(below)
+        add     edx, byte 2*SIZEOF_XMMWORD      ; outptr0
+        add     edi, byte 2*SIZEOF_XMMWORD      ; outptr1
+        cmp     eax, byte SIZEOF_XMMWORD
+        ja      near .columnloop
+        test    eax,eax
+        jnz     near .columnloop_last
+
+        pop     esi
+        pop     edi
+        pop     ecx
+        pop     eax
+
+        add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+        add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+        sub     ecx, byte 2                     ; rowctr
+        jg      near .rowloop
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
+; It's still a box filter.
+;
+; GLOBAL(void)
+; jsimd_h2v1_upsample_sse2 (int max_v_samp_factor,
+;                           JDIMENSION output_width,
+;                           JSAMPARRAY input_data,
+;                           JSAMPARRAY * output_data_ptr);
+;
+
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define output_width(b)         (b)+12          ; JDIMENSION output_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
+
+        align   16
+        global  EXTN(jsimd_h2v1_upsample_sse2)
+
+EXTN(jsimd_h2v1_upsample_sse2):
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     edx, JDIMENSION [output_width(ebp)]
+        add     edx, byte (2*SIZEOF_XMMWORD)-1
+        and     edx, byte -(2*SIZEOF_XMMWORD)
+        jz      short .return
+
+        mov     ecx, INT [max_v_samp(ebp)]      ; rowctr
+        test    ecx,ecx
+        jz      short .return
+
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(ebp)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
+.rowloop:
+        push    edi
+        push    esi
+
+        mov     esi, JSAMPROW [esi]             ; inptr
+        mov     edi, JSAMPROW [edi]             ; outptr
+        mov     eax,edx                         ; colctr
+        alignx  16,7
+.columnloop:
+
+        movdqa  xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
+
+        movdqa    xmm1,xmm0
+        punpcklbw xmm0,xmm0
+        punpckhbw xmm1,xmm1
+
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1
+
+        sub     eax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
+
+        movdqa  xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]
+
+        movdqa    xmm3,xmm2
+        punpcklbw xmm2,xmm2
+        punpckhbw xmm3,xmm3
+
+        movdqa  XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3
+
+        sub     eax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
+
+        add     esi, byte 2*SIZEOF_XMMWORD      ; inptr
+        add     edi, byte 4*SIZEOF_XMMWORD      ; outptr
+        jmp     short .columnloop
+        alignx  16,7
+
+.nextrow:
+        pop     esi
+        pop     edi
+
+        add     esi, byte SIZEOF_JSAMPROW       ; input_data
+        add     edi, byte SIZEOF_JSAMPROW       ; output_data
+        dec     ecx                             ; rowctr
+        jg      short .rowloop
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
+; It's still a box filter.
+;
+; GLOBAL(void)
+; jsimd_h2v2_upsample_sse2 (nt max_v_samp_factor,
+;                           JDIMENSION output_width,
+;                           JSAMPARRAY input_data,
+;                           JSAMPARRAY * output_data_ptr);
+;
+
+%define max_v_samp(b)           (b)+8           ; int max_v_samp_factor
+%define output_width(b)         (b)+12          ; JDIMENSION output_width
+%define input_data(b)           (b)+16          ; JSAMPARRAY input_data
+%define output_data_ptr(b)      (b)+20          ; JSAMPARRAY * output_data_ptr
+
+        align   16
+        global  EXTN(jsimd_h2v2_upsample_sse2)
+
+EXTN(jsimd_h2v2_upsample_sse2):
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     edx, JDIMENSION [output_width(ebp)]
+        add     edx, byte (2*SIZEOF_XMMWORD)-1
+        and     edx, byte -(2*SIZEOF_XMMWORD)
+        jz      near .return
+
+        mov     ecx, INT [max_v_samp(ebp)]      ; rowctr
+        test    ecx,ecx
+        jz      near .return
+
+        mov     esi, JSAMPARRAY [input_data(ebp)]       ; input_data
+        mov     edi, POINTER [output_data_ptr(ebp)]
+        mov     edi, JSAMPARRAY [edi]                   ; output_data
+        alignx  16,7
+.rowloop:
+        push    edi
+        push    esi
+
+        mov     esi, JSAMPROW [esi]                     ; inptr
+        mov     ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]   ; outptr0
+        mov     edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]   ; outptr1
+        mov     eax,edx                                 ; colctr
+        alignx  16,7
+.columnloop:
+
+        movdqa  xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
+
+        movdqa    xmm1,xmm0
+        punpcklbw xmm0,xmm0
+        punpckhbw xmm1,xmm1
+
+        movdqa  XMMWORD [ebx+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [ebx+1*SIZEOF_XMMWORD], xmm1
+        movdqa  XMMWORD [edi+0*SIZEOF_XMMWORD], xmm0
+        movdqa  XMMWORD [edi+1*SIZEOF_XMMWORD], xmm1
+
+        sub     eax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
+
+        movdqa  xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD]
+
+        movdqa    xmm3,xmm2
+        punpcklbw xmm2,xmm2
+        punpckhbw xmm3,xmm3
+
+        movdqa  XMMWORD [ebx+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [ebx+3*SIZEOF_XMMWORD], xmm3
+        movdqa  XMMWORD [edi+2*SIZEOF_XMMWORD], xmm2
+        movdqa  XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3
+
+        sub     eax, byte 2*SIZEOF_XMMWORD
+        jz      short .nextrow
+
+        add     esi, byte 2*SIZEOF_XMMWORD      ; inptr
+        add     ebx, byte 4*SIZEOF_XMMWORD      ; outptr0
+        add     edi, byte 4*SIZEOF_XMMWORD      ; outptr1
+        jmp     short .columnloop
+        alignx  16,7
+
+.nextrow:
+        pop     esi
+        pop     edi
+
+        add     esi, byte 1*SIZEOF_JSAMPROW     ; input_data
+        add     edi, byte 2*SIZEOF_JSAMPROW     ; output_data
+        sub     ecx, byte 2                     ; rowctr
+        jg      short .rowloop
+
+.return:
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jfdctflt-3dn.asm b/simd/jfdctflt-3dn.asm
new file mode 100644
index 0000000..e23c521
--- /dev/null
+++ b/simd/jfdctflt-3dn.asm
@@ -0,0 +1,320 @@
+;
+; jfdctflt.asm - floating-point FDCT (3DNow!)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the forward DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_fdct_float_3dnow)
+
+EXTN(jconst_fdct_float_3dnow):
+
+PD_0_382        times 2 dd  0.382683432365089771728460
+PD_0_707        times 2 dd  0.707106781186547524400844
+PD_0_541        times 2 dd  0.541196100146196984399723
+PD_1_306        times 2 dd  1.306562964876376527856643
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jsimd_fdct_float_3dnow (FAST_FLOAT * data)
+;
+
+%define data(b)         (b)+8           ; FAST_FLOAT * data
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+
+        align   16
+        global  EXTN(jsimd_fdct_float_3dnow)
+
+EXTN(jsimd_fdct_float_3dnow):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+;       push    edi             ; unused
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process rows.
+
+        mov     edx, POINTER [data(eax)]        ; (FAST_FLOAT *)
+        mov     ecx, DCTSIZE/2
+        alignx  16,7
+.rowloop:
+
+        movq    mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
+
+        ; mm0=(00 01), mm1=(10 11), mm2=(06 07), mm3=(16 17)
+
+        movq      mm4,mm0               ; transpose coefficients
+        punpckldq mm0,mm1               ; mm0=(00 10)=data0
+        punpckhdq mm4,mm1               ; mm4=(01 11)=data1
+        movq      mm5,mm2               ; transpose coefficients
+        punpckldq mm2,mm3               ; mm2=(06 16)=data6
+        punpckhdq mm5,mm3               ; mm5=(07 17)=data7
+
+        movq    mm6,mm4
+        movq    mm7,mm0
+        pfsub   mm4,mm2                 ; mm4=data1-data6=tmp6
+        pfsub   mm0,mm5                 ; mm0=data0-data7=tmp7
+        pfadd   mm6,mm2                 ; mm6=data1+data6=tmp1
+        pfadd   mm7,mm5                 ; mm7=data0+data7=tmp0
+
+        movq    mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm5, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
+
+        ; mm1=(02 03), mm3=(12 13), mm2=(04 05), mm5=(14 15)
+
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=tmp6
+        movq    MMWORD [wk(1)], mm0     ; wk(1)=tmp7
+
+        movq      mm4,mm1               ; transpose coefficients
+        punpckldq mm1,mm3               ; mm1=(02 12)=data2
+        punpckhdq mm4,mm3               ; mm4=(03 13)=data3
+        movq      mm0,mm2               ; transpose coefficients
+        punpckldq mm2,mm5               ; mm2=(04 14)=data4
+        punpckhdq mm0,mm5               ; mm0=(05 15)=data5
+
+        movq    mm3,mm4
+        movq    mm5,mm1
+        pfadd   mm4,mm2                 ; mm4=data3+data4=tmp3
+        pfadd   mm1,mm0                 ; mm1=data2+data5=tmp2
+        pfsub   mm3,mm2                 ; mm3=data3-data4=tmp4
+        pfsub   mm5,mm0                 ; mm5=data2-data5=tmp5
+
+        ; -- Even part
+
+        movq    mm2,mm7
+        movq    mm0,mm6
+        pfsub   mm7,mm4                 ; mm7=tmp13
+        pfsub   mm6,mm1                 ; mm6=tmp12
+        pfadd   mm2,mm4                 ; mm2=tmp10
+        pfadd   mm0,mm1                 ; mm0=tmp11
+
+        pfadd   mm6,mm7
+        pfmul   mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1
+
+        movq    mm4,mm2
+        movq    mm1,mm7
+        pfsub   mm2,mm0                 ; mm2=data4
+        pfsub   mm7,mm6                 ; mm7=data6
+        pfadd   mm4,mm0                 ; mm4=data0
+        pfadd   mm1,mm6                 ; mm1=data2
+
+        movq    MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)], mm2
+        movq    MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)], mm7
+        movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4
+        movq    MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], mm1
+
+        ; -- Odd part
+
+        movq    mm0, MMWORD [wk(0)]     ; mm0=tmp6
+        movq    mm6, MMWORD [wk(1)]     ; mm6=tmp7
+
+        pfadd   mm3,mm5                 ; mm3=tmp10
+        pfadd   mm5,mm0                 ; mm5=tmp11
+        pfadd   mm0,mm6                 ; mm0=tmp12, mm6=tmp7
+
+        pfmul   mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3
+
+        movq    mm2,mm3                 ; mm2=tmp10
+        pfsub   mm3,mm0
+        pfmul   mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5
+        pfmul   mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610)
+        pfmul   mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296)
+        pfadd   mm2,mm3                 ; mm2=z2
+        pfadd   mm0,mm3                 ; mm0=z4
+
+        movq    mm7,mm6
+        pfsub   mm6,mm5                 ; mm6=z13
+        pfadd   mm7,mm5                 ; mm7=z11
+
+        movq    mm4,mm6
+        movq    mm1,mm7
+        pfsub   mm6,mm2                 ; mm6=data3
+        pfsub   mm7,mm0                 ; mm7=data7
+        pfadd   mm4,mm2                 ; mm4=data5
+        pfadd   mm1,mm0                 ; mm1=data1
+
+        movq    MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], mm6
+        movq    MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)], mm7
+        movq    MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)], mm4
+        movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1
+
+        add     edx, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     near .rowloop
+
+        ; ---- Pass 2: process columns.
+
+        mov     edx, POINTER [data(eax)]        ; (FAST_FLOAT *)
+        mov     ecx, DCTSIZE/2
+        alignx  16,7
+.columnloop:
+
+        movq    mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)]
+
+        ; mm0=(00 10), mm1=(01 11), mm2=(60 70), mm3=(61 71)
+
+        movq      mm4,mm0               ; transpose coefficients
+        punpckldq mm0,mm1               ; mm0=(00 01)=data0
+        punpckhdq mm4,mm1               ; mm4=(10 11)=data1
+        movq      mm5,mm2               ; transpose coefficients
+        punpckldq mm2,mm3               ; mm2=(60 61)=data6
+        punpckhdq mm5,mm3               ; mm5=(70 71)=data7
+
+        movq    mm6,mm4
+        movq    mm7,mm0
+        pfsub   mm4,mm2                 ; mm4=data1-data6=tmp6
+        pfsub   mm0,mm5                 ; mm0=data0-data7=tmp7
+        pfadd   mm6,mm2                 ; mm6=data1+data6=tmp1
+        pfadd   mm7,mm5                 ; mm7=data0+data7=tmp0
+
+        movq    mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)]
+
+        ; mm1=(20 30), mm3=(21 31), mm2=(40 50), mm5=(41 51)
+
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=tmp6
+        movq    MMWORD [wk(1)], mm0     ; wk(1)=tmp7
+
+        movq      mm4,mm1               ; transpose coefficients
+        punpckldq mm1,mm3               ; mm1=(20 21)=data2
+        punpckhdq mm4,mm3               ; mm4=(30 31)=data3
+        movq      mm0,mm2               ; transpose coefficients
+        punpckldq mm2,mm5               ; mm2=(40 41)=data4
+        punpckhdq mm0,mm5               ; mm0=(50 51)=data5
+
+        movq    mm3,mm4
+        movq    mm5,mm1
+        pfadd   mm4,mm2                 ; mm4=data3+data4=tmp3
+        pfadd   mm1,mm0                 ; mm1=data2+data5=tmp2
+        pfsub   mm3,mm2                 ; mm3=data3-data4=tmp4
+        pfsub   mm5,mm0                 ; mm5=data2-data5=tmp5
+
+        ; -- Even part
+
+        movq    mm2,mm7
+        movq    mm0,mm6
+        pfsub   mm7,mm4                 ; mm7=tmp13
+        pfsub   mm6,mm1                 ; mm6=tmp12
+        pfadd   mm2,mm4                 ; mm2=tmp10
+        pfadd   mm0,mm1                 ; mm0=tmp11
+
+        pfadd   mm6,mm7
+        pfmul   mm6,[GOTOFF(ebx,PD_0_707)] ; mm6=z1
+
+        movq    mm4,mm2
+        movq    mm1,mm7
+        pfsub   mm2,mm0                 ; mm2=data4
+        pfsub   mm7,mm6                 ; mm7=data6
+        pfadd   mm4,mm0                 ; mm4=data0
+        pfadd   mm1,mm6                 ; mm1=data2
+
+        movq    MMWORD [MMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], mm2
+        movq    MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], mm7
+        movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], mm4
+        movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], mm1
+
+        ; -- Odd part
+
+        movq    mm0, MMWORD [wk(0)]     ; mm0=tmp6
+        movq    mm6, MMWORD [wk(1)]     ; mm6=tmp7
+
+        pfadd   mm3,mm5                 ; mm3=tmp10
+        pfadd   mm5,mm0                 ; mm5=tmp11
+        pfadd   mm0,mm6                 ; mm0=tmp12, mm6=tmp7
+
+        pfmul   mm5,[GOTOFF(ebx,PD_0_707)] ; mm5=z3
+
+        movq    mm2,mm3                 ; mm2=tmp10
+        pfsub   mm3,mm0
+        pfmul   mm3,[GOTOFF(ebx,PD_0_382)] ; mm3=z5
+        pfmul   mm2,[GOTOFF(ebx,PD_0_541)] ; mm2=MULTIPLY(tmp10,FIX_0_54119610)
+        pfmul   mm0,[GOTOFF(ebx,PD_1_306)] ; mm0=MULTIPLY(tmp12,FIX_1_30656296)
+        pfadd   mm2,mm3                 ; mm2=z2
+        pfadd   mm0,mm3                 ; mm0=z4
+
+        movq    mm7,mm6
+        pfsub   mm6,mm5                 ; mm6=z13
+        pfadd   mm7,mm5                 ; mm7=z11
+
+        movq    mm4,mm6
+        movq    mm1,mm7
+        pfsub   mm6,mm2                 ; mm6=data3
+        pfsub   mm7,mm0                 ; mm7=data7
+        pfadd   mm4,mm2                 ; mm4=data5
+        pfadd   mm1,mm0                 ; mm1=data1
+
+        movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], mm6
+        movq    MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], mm7
+        movq    MMWORD [MMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], mm4
+        movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], mm1
+
+        add     edx, byte 2*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     near .columnloop
+
+        femms           ; empty MMX/3DNow! state
+
+;       pop     edi             ; unused
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jfdctflt-sse-64.asm b/simd/jfdctflt-sse-64.asm
new file mode 100644
index 0000000..4184e95
--- /dev/null
+++ b/simd/jfdctflt-sse-64.asm
@@ -0,0 +1,358 @@
+;
+; jfdctflt.asm - floating-point FDCT (64-bit SSE)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the forward DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%macro  unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+        shufps  %1,%2,0x44
+%endmacro
+
+%macro  unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+        shufps  %1,%2,0xEE
+%endmacro
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_fdct_float_sse)
+
+EXTN(jconst_fdct_float_sse):
+
+PD_0_382        times 4 dd  0.382683432365089771728460
+PD_0_707        times 4 dd  0.707106781186547524400844
+PD_0_541        times 4 dd  0.541196100146196984399723
+PD_1_306        times 4 dd  1.306562964876376527856643
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jsimd_fdct_float_sse (FAST_FLOAT * data)
+;
+
+; r10 = FAST_FLOAT * data
+
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+
+        align   16
+        global  EXTN(jsimd_fdct_float_sse)
+
+EXTN(jsimd_fdct_float_sse):
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
+
+        ; ---- Pass 1: process rows.
+
+        mov     rdx, r10        ; (FAST_FLOAT *)
+        mov     rcx, DCTSIZE/4
+.rowloop:
+
+        movaps  xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)]
+
+        ; xmm0=(20 21 22 23), xmm2=(24 25 26 27)
+        ; xmm1=(30 31 32 33), xmm3=(34 35 36 37)
+
+        movaps   xmm4,xmm0              ; transpose coefficients(phase 1)
+        unpcklps xmm0,xmm1              ; xmm0=(20 30 21 31)
+        unpckhps xmm4,xmm1              ; xmm4=(22 32 23 33)
+        movaps   xmm5,xmm2              ; transpose coefficients(phase 1)
+        unpcklps xmm2,xmm3              ; xmm2=(24 34 25 35)
+        unpckhps xmm5,xmm3              ; xmm5=(26 36 27 37)
+
+        movaps  xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)]
+
+        ; xmm6=(00 01 02 03), xmm1=(04 05 06 07)
+        ; xmm7=(10 11 12 13), xmm3=(14 15 16 17)
+
+        movaps  XMMWORD [wk(0)], xmm4   ; wk(0)=(22 32 23 33)
+        movaps  XMMWORD [wk(1)], xmm2   ; wk(1)=(24 34 25 35)
+
+        movaps   xmm4,xmm6              ; transpose coefficients(phase 1)
+        unpcklps xmm6,xmm7              ; xmm6=(00 10 01 11)
+        unpckhps xmm4,xmm7              ; xmm4=(02 12 03 13)
+        movaps   xmm2,xmm1              ; transpose coefficients(phase 1)
+        unpcklps xmm1,xmm3              ; xmm1=(04 14 05 15)
+        unpckhps xmm2,xmm3              ; xmm2=(06 16 07 17)
+
+        movaps    xmm7,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm0             ; xmm6=(00 10 20 30)=data0
+        unpckhps2 xmm7,xmm0             ; xmm7=(01 11 21 31)=data1
+        movaps    xmm3,xmm2             ; transpose coefficients(phase 2)
+        unpcklps2 xmm2,xmm5             ; xmm2=(06 16 26 36)=data6
+        unpckhps2 xmm3,xmm5             ; xmm3=(07 17 27 37)=data7
+
+        movaps  xmm0,xmm7
+        movaps  xmm5,xmm6
+        subps   xmm7,xmm2               ; xmm7=data1-data6=tmp6
+        subps   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+        addps   xmm0,xmm2               ; xmm0=data1+data6=tmp1
+        addps   xmm5,xmm3               ; xmm5=data0+data7=tmp0
+
+        movaps  xmm2, XMMWORD [wk(0)]   ; xmm2=(22 32 23 33)
+        movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=(24 34 25 35)
+        movaps  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp6
+        movaps  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
+
+        movaps    xmm7,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(02 12 22 32)=data2
+        unpckhps2 xmm7,xmm2             ; xmm7=(03 13 23 33)=data3
+        movaps    xmm6,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm3             ; xmm1=(04 14 24 34)=data4
+        unpckhps2 xmm6,xmm3             ; xmm6=(05 15 25 35)=data5
+
+        movaps  xmm2,xmm7
+        movaps  xmm3,xmm4
+        addps   xmm7,xmm1               ; xmm7=data3+data4=tmp3
+        addps   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+        subps   xmm2,xmm1               ; xmm2=data3-data4=tmp4
+        subps   xmm3,xmm6               ; xmm3=data2-data5=tmp5
+
+        ; -- Even part
+
+        movaps  xmm1,xmm5
+        movaps  xmm6,xmm0
+        subps   xmm5,xmm7               ; xmm5=tmp13
+        subps   xmm0,xmm4               ; xmm0=tmp12
+        addps   xmm1,xmm7               ; xmm1=tmp10
+        addps   xmm6,xmm4               ; xmm6=tmp11
+
+        addps   xmm0,xmm5
+        mulps   xmm0,[rel PD_0_707] ; xmm0=z1
+
+        movaps  xmm7,xmm1
+        movaps  xmm4,xmm5
+        subps   xmm1,xmm6               ; xmm1=data4
+        subps   xmm5,xmm0               ; xmm5=data6
+        addps   xmm7,xmm6               ; xmm7=data0
+        addps   xmm4,xmm0               ; xmm4=data2
+
+        movaps  XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(2,1,rdx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
+
+        ; -- Odd part
+
+        movaps  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp6
+        movaps  xmm0, XMMWORD [wk(1)]   ; xmm0=tmp7
+
+        addps   xmm2,xmm3               ; xmm2=tmp10
+        addps   xmm3,xmm6               ; xmm3=tmp11
+        addps   xmm6,xmm0               ; xmm6=tmp12, xmm0=tmp7
+
+        mulps   xmm3,[rel PD_0_707] ; xmm3=z3
+
+        movaps  xmm1,xmm2               ; xmm1=tmp10
+        subps   xmm2,xmm6
+        mulps   xmm2,[rel PD_0_382] ; xmm2=z5
+        mulps   xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
+        mulps   xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
+        addps   xmm1,xmm2               ; xmm1=z2
+        addps   xmm6,xmm2               ; xmm6=z4
+
+        movaps  xmm5,xmm0
+        subps   xmm0,xmm3               ; xmm0=z13
+        addps   xmm5,xmm3               ; xmm5=z11
+
+        movaps  xmm7,xmm0
+        movaps  xmm4,xmm5
+        subps   xmm0,xmm1               ; xmm0=data3
+        subps   xmm5,xmm6               ; xmm5=data7
+        addps   xmm7,xmm1               ; xmm7=data5
+        addps   xmm4,xmm6               ; xmm4=data1
+
+        movaps  XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(3,1,rdx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
+
+        add     rdx, 4*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     rcx
+        jnz     near .rowloop
+
+        ; ---- Pass 2: process columns.
+
+        mov     rdx, r10        ; (FAST_FLOAT *)
+        mov     rcx, DCTSIZE/4
+.columnloop:
+
+        movaps  xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)]
+
+        ; xmm0=(02 12 22 32), xmm2=(42 52 62 72)
+        ; xmm1=(03 13 23 33), xmm3=(43 53 63 73)
+
+        movaps   xmm4,xmm0              ; transpose coefficients(phase 1)
+        unpcklps xmm0,xmm1              ; xmm0=(02 03 12 13)
+        unpckhps xmm4,xmm1              ; xmm4=(22 23 32 33)
+        movaps   xmm5,xmm2              ; transpose coefficients(phase 1)
+        unpcklps xmm2,xmm3              ; xmm2=(42 43 52 53)
+        unpckhps xmm5,xmm3              ; xmm5=(62 63 72 73)
+
+        movaps  xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm7, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)]
+
+        ; xmm6=(00 10 20 30), xmm1=(40 50 60 70)
+        ; xmm7=(01 11 21 31), xmm3=(41 51 61 71)
+
+        movaps  XMMWORD [wk(0)], xmm4   ; wk(0)=(22 23 32 33)
+        movaps  XMMWORD [wk(1)], xmm2   ; wk(1)=(42 43 52 53)
+
+        movaps   xmm4,xmm6              ; transpose coefficients(phase 1)
+        unpcklps xmm6,xmm7              ; xmm6=(00 01 10 11)
+        unpckhps xmm4,xmm7              ; xmm4=(20 21 30 31)
+        movaps   xmm2,xmm1              ; transpose coefficients(phase 1)
+        unpcklps xmm1,xmm3              ; xmm1=(40 41 50 51)
+        unpckhps xmm2,xmm3              ; xmm2=(60 61 70 71)
+
+        movaps    xmm7,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm0             ; xmm6=(00 01 02 03)=data0
+        unpckhps2 xmm7,xmm0             ; xmm7=(10 11 12 13)=data1
+        movaps    xmm3,xmm2             ; transpose coefficients(phase 2)
+        unpcklps2 xmm2,xmm5             ; xmm2=(60 61 62 63)=data6
+        unpckhps2 xmm3,xmm5             ; xmm3=(70 71 72 73)=data7
+
+        movaps  xmm0,xmm7
+        movaps  xmm5,xmm6
+        subps   xmm7,xmm2               ; xmm7=data1-data6=tmp6
+        subps   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+        addps   xmm0,xmm2               ; xmm0=data1+data6=tmp1
+        addps   xmm5,xmm3               ; xmm5=data0+data7=tmp0
+
+        movaps  xmm2, XMMWORD [wk(0)]   ; xmm2=(22 23 32 33)
+        movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=(42 43 52 53)
+        movaps  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp6
+        movaps  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
+
+        movaps    xmm7,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(20 21 22 23)=data2
+        unpckhps2 xmm7,xmm2             ; xmm7=(30 31 32 33)=data3
+        movaps    xmm6,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm3             ; xmm1=(40 41 42 43)=data4
+        unpckhps2 xmm6,xmm3             ; xmm6=(50 51 52 53)=data5
+
+        movaps  xmm2,xmm7
+        movaps  xmm3,xmm4
+        addps   xmm7,xmm1               ; xmm7=data3+data4=tmp3
+        addps   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+        subps   xmm2,xmm1               ; xmm2=data3-data4=tmp4
+        subps   xmm3,xmm6               ; xmm3=data2-data5=tmp5
+
+        ; -- Even part
+
+        movaps  xmm1,xmm5
+        movaps  xmm6,xmm0
+        subps   xmm5,xmm7               ; xmm5=tmp13
+        subps   xmm0,xmm4               ; xmm0=tmp12
+        addps   xmm1,xmm7               ; xmm1=tmp10
+        addps   xmm6,xmm4               ; xmm6=tmp11
+
+        addps   xmm0,xmm5
+        mulps   xmm0,[rel PD_0_707] ; xmm0=z1
+
+        movaps  xmm7,xmm1
+        movaps  xmm4,xmm5
+        subps   xmm1,xmm6               ; xmm1=data4
+        subps   xmm5,xmm0               ; xmm5=data6
+        addps   xmm7,xmm6               ; xmm7=data0
+        addps   xmm4,xmm0               ; xmm4=data2
+
+        movaps  XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
+
+        ; -- Odd part
+
+        movaps  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp6
+        movaps  xmm0, XMMWORD [wk(1)]   ; xmm0=tmp7
+
+        addps   xmm2,xmm3               ; xmm2=tmp10
+        addps   xmm3,xmm6               ; xmm3=tmp11
+        addps   xmm6,xmm0               ; xmm6=tmp12, xmm0=tmp7
+
+        mulps   xmm3,[rel PD_0_707] ; xmm3=z3
+
+        movaps  xmm1,xmm2               ; xmm1=tmp10
+        subps   xmm2,xmm6
+        mulps   xmm2,[rel PD_0_382] ; xmm2=z5
+        mulps   xmm1,[rel PD_0_541] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
+        mulps   xmm6,[rel PD_1_306] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
+        addps   xmm1,xmm2               ; xmm1=z2
+        addps   xmm6,xmm2               ; xmm6=z4
+
+        movaps  xmm5,xmm0
+        subps   xmm0,xmm3               ; xmm0=z13
+        addps   xmm5,xmm3               ; xmm5=z11
+
+        movaps  xmm7,xmm0
+        movaps  xmm4,xmm5
+        subps   xmm0,xmm1               ; xmm0=data3
+        subps   xmm5,xmm6               ; xmm5=data7
+        addps   xmm7,xmm1               ; xmm7=data5
+        addps   xmm4,xmm6               ; xmm4=data1
+
+        movaps  XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
+
+        add     rdx, byte 4*SIZEOF_FAST_FLOAT
+        dec     rcx
+        jnz     near .columnloop
+
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jfdctflt-sse.asm b/simd/jfdctflt-sse.asm
new file mode 100644
index 0000000..477f5c8
--- /dev/null
+++ b/simd/jfdctflt-sse.asm
@@ -0,0 +1,370 @@
+;
+; jfdctflt.asm - floating-point FDCT (SSE)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the forward DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%macro  unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+        shufps  %1,%2,0x44
+%endmacro
+
+%macro  unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+        shufps  %1,%2,0xEE
+%endmacro
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_fdct_float_sse)
+
+EXTN(jconst_fdct_float_sse):
+
+PD_0_382        times 4 dd  0.382683432365089771728460
+PD_0_707        times 4 dd  0.707106781186547524400844
+PD_0_541        times 4 dd  0.541196100146196984399723
+PD_1_306        times 4 dd  1.306562964876376527856643
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jsimd_fdct_float_sse (FAST_FLOAT * data)
+;
+
+%define data(b)         (b)+8           ; FAST_FLOAT * data
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+
+        align   16
+        global  EXTN(jsimd_fdct_float_sse)
+
+EXTN(jsimd_fdct_float_sse):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+;       push    edi             ; unused
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process rows.
+
+        mov     edx, POINTER [data(eax)]        ; (FAST_FLOAT *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
+.rowloop:
+
+        movaps  xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)]
+
+        ; xmm0=(20 21 22 23), xmm2=(24 25 26 27)
+        ; xmm1=(30 31 32 33), xmm3=(34 35 36 37)
+
+        movaps   xmm4,xmm0              ; transpose coefficients(phase 1)
+        unpcklps xmm0,xmm1              ; xmm0=(20 30 21 31)
+        unpckhps xmm4,xmm1              ; xmm4=(22 32 23 33)
+        movaps   xmm5,xmm2              ; transpose coefficients(phase 1)
+        unpcklps xmm2,xmm3              ; xmm2=(24 34 25 35)
+        unpckhps xmm5,xmm3              ; xmm5=(26 36 27 37)
+
+        movaps  xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+
+        ; xmm6=(00 01 02 03), xmm1=(04 05 06 07)
+        ; xmm7=(10 11 12 13), xmm3=(14 15 16 17)
+
+        movaps  XMMWORD [wk(0)], xmm4   ; wk(0)=(22 32 23 33)
+        movaps  XMMWORD [wk(1)], xmm2   ; wk(1)=(24 34 25 35)
+
+        movaps   xmm4,xmm6              ; transpose coefficients(phase 1)
+        unpcklps xmm6,xmm7              ; xmm6=(00 10 01 11)
+        unpckhps xmm4,xmm7              ; xmm4=(02 12 03 13)
+        movaps   xmm2,xmm1              ; transpose coefficients(phase 1)
+        unpcklps xmm1,xmm3              ; xmm1=(04 14 05 15)
+        unpckhps xmm2,xmm3              ; xmm2=(06 16 07 17)
+
+        movaps    xmm7,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm0             ; xmm6=(00 10 20 30)=data0
+        unpckhps2 xmm7,xmm0             ; xmm7=(01 11 21 31)=data1
+        movaps    xmm3,xmm2             ; transpose coefficients(phase 2)
+        unpcklps2 xmm2,xmm5             ; xmm2=(06 16 26 36)=data6
+        unpckhps2 xmm3,xmm5             ; xmm3=(07 17 27 37)=data7
+
+        movaps  xmm0,xmm7
+        movaps  xmm5,xmm6
+        subps   xmm7,xmm2               ; xmm7=data1-data6=tmp6
+        subps   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+        addps   xmm0,xmm2               ; xmm0=data1+data6=tmp1
+        addps   xmm5,xmm3               ; xmm5=data0+data7=tmp0
+
+        movaps  xmm2, XMMWORD [wk(0)]   ; xmm2=(22 32 23 33)
+        movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=(24 34 25 35)
+        movaps  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp6
+        movaps  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
+
+        movaps    xmm7,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(02 12 22 32)=data2
+        unpckhps2 xmm7,xmm2             ; xmm7=(03 13 23 33)=data3
+        movaps    xmm6,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm3             ; xmm1=(04 14 24 34)=data4
+        unpckhps2 xmm6,xmm3             ; xmm6=(05 15 25 35)=data5
+
+        movaps  xmm2,xmm7
+        movaps  xmm3,xmm4
+        addps   xmm7,xmm1               ; xmm7=data3+data4=tmp3
+        addps   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+        subps   xmm2,xmm1               ; xmm2=data3-data4=tmp4
+        subps   xmm3,xmm6               ; xmm3=data2-data5=tmp5
+
+        ; -- Even part
+
+        movaps  xmm1,xmm5
+        movaps  xmm6,xmm0
+        subps   xmm5,xmm7               ; xmm5=tmp13
+        subps   xmm0,xmm4               ; xmm0=tmp12
+        addps   xmm1,xmm7               ; xmm1=tmp10
+        addps   xmm6,xmm4               ; xmm6=tmp11
+
+        addps   xmm0,xmm5
+        mulps   xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1
+
+        movaps  xmm7,xmm1
+        movaps  xmm4,xmm5
+        subps   xmm1,xmm6               ; xmm1=data4
+        subps   xmm5,xmm0               ; xmm5=data6
+        addps   xmm7,xmm6               ; xmm7=data0
+        addps   xmm4,xmm0               ; xmm4=data2
+
+        movaps  XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4
+
+        ; -- Odd part
+
+        movaps  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp6
+        movaps  xmm0, XMMWORD [wk(1)]   ; xmm0=tmp7
+
+        addps   xmm2,xmm3               ; xmm2=tmp10
+        addps   xmm3,xmm6               ; xmm3=tmp11
+        addps   xmm6,xmm0               ; xmm6=tmp12, xmm0=tmp7
+
+        mulps   xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3
+
+        movaps  xmm1,xmm2               ; xmm1=tmp10
+        subps   xmm2,xmm6
+        mulps   xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5
+        mulps   xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
+        mulps   xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
+        addps   xmm1,xmm2               ; xmm1=z2
+        addps   xmm6,xmm2               ; xmm6=z4
+
+        movaps  xmm5,xmm0
+        subps   xmm0,xmm3               ; xmm0=z13
+        addps   xmm5,xmm3               ; xmm5=z11
+
+        movaps  xmm7,xmm0
+        movaps  xmm4,xmm5
+        subps   xmm0,xmm1               ; xmm0=data3
+        subps   xmm5,xmm6               ; xmm5=data7
+        addps   xmm7,xmm1               ; xmm7=data5
+        addps   xmm4,xmm6               ; xmm4=data1
+
+        movaps  XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4
+
+        add     edx, 4*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     near .rowloop
+
+        ; ---- Pass 2: process columns.
+
+        mov     edx, POINTER [data(eax)]        ; (FAST_FLOAT *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
+.columnloop:
+
+        movaps  xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)]
+
+        ; xmm0=(02 12 22 32), xmm2=(42 52 62 72)
+        ; xmm1=(03 13 23 33), xmm3=(43 53 63 73)
+
+        movaps   xmm4,xmm0              ; transpose coefficients(phase 1)
+        unpcklps xmm0,xmm1              ; xmm0=(02 03 12 13)
+        unpckhps xmm4,xmm1              ; xmm4=(22 23 32 33)
+        movaps   xmm5,xmm2              ; transpose coefficients(phase 1)
+        unpcklps xmm2,xmm3              ; xmm2=(42 43 52 53)
+        unpckhps xmm5,xmm3              ; xmm5=(62 63 72 73)
+
+        movaps  xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm7, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)]
+
+        ; xmm6=(00 10 20 30), xmm1=(40 50 60 70)
+        ; xmm7=(01 11 21 31), xmm3=(41 51 61 71)
+
+        movaps  XMMWORD [wk(0)], xmm4   ; wk(0)=(22 23 32 33)
+        movaps  XMMWORD [wk(1)], xmm2   ; wk(1)=(42 43 52 53)
+
+        movaps   xmm4,xmm6              ; transpose coefficients(phase 1)
+        unpcklps xmm6,xmm7              ; xmm6=(00 01 10 11)
+        unpckhps xmm4,xmm7              ; xmm4=(20 21 30 31)
+        movaps   xmm2,xmm1              ; transpose coefficients(phase 1)
+        unpcklps xmm1,xmm3              ; xmm1=(40 41 50 51)
+        unpckhps xmm2,xmm3              ; xmm2=(60 61 70 71)
+
+        movaps    xmm7,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm0             ; xmm6=(00 01 02 03)=data0
+        unpckhps2 xmm7,xmm0             ; xmm7=(10 11 12 13)=data1
+        movaps    xmm3,xmm2             ; transpose coefficients(phase 2)
+        unpcklps2 xmm2,xmm5             ; xmm2=(60 61 62 63)=data6
+        unpckhps2 xmm3,xmm5             ; xmm3=(70 71 72 73)=data7
+
+        movaps  xmm0,xmm7
+        movaps  xmm5,xmm6
+        subps   xmm7,xmm2               ; xmm7=data1-data6=tmp6
+        subps   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+        addps   xmm0,xmm2               ; xmm0=data1+data6=tmp1
+        addps   xmm5,xmm3               ; xmm5=data0+data7=tmp0
+
+        movaps  xmm2, XMMWORD [wk(0)]   ; xmm2=(22 23 32 33)
+        movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=(42 43 52 53)
+        movaps  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp6
+        movaps  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
+
+        movaps    xmm7,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(20 21 22 23)=data2
+        unpckhps2 xmm7,xmm2             ; xmm7=(30 31 32 33)=data3
+        movaps    xmm6,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm3             ; xmm1=(40 41 42 43)=data4
+        unpckhps2 xmm6,xmm3             ; xmm6=(50 51 52 53)=data5
+
+        movaps  xmm2,xmm7
+        movaps  xmm3,xmm4
+        addps   xmm7,xmm1               ; xmm7=data3+data4=tmp3
+        addps   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+        subps   xmm2,xmm1               ; xmm2=data3-data4=tmp4
+        subps   xmm3,xmm6               ; xmm3=data2-data5=tmp5
+
+        ; -- Even part
+
+        movaps  xmm1,xmm5
+        movaps  xmm6,xmm0
+        subps   xmm5,xmm7               ; xmm5=tmp13
+        subps   xmm0,xmm4               ; xmm0=tmp12
+        addps   xmm1,xmm7               ; xmm1=tmp10
+        addps   xmm6,xmm4               ; xmm6=tmp11
+
+        addps   xmm0,xmm5
+        mulps   xmm0,[GOTOFF(ebx,PD_0_707)] ; xmm0=z1
+
+        movaps  xmm7,xmm1
+        movaps  xmm4,xmm5
+        subps   xmm1,xmm6               ; xmm1=data4
+        subps   xmm5,xmm0               ; xmm5=data6
+        addps   xmm7,xmm6               ; xmm7=data0
+        addps   xmm4,xmm0               ; xmm4=data2
+
+        movaps  XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)], xmm4
+
+        ; -- Odd part
+
+        movaps  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp6
+        movaps  xmm0, XMMWORD [wk(1)]   ; xmm0=tmp7
+
+        addps   xmm2,xmm3               ; xmm2=tmp10
+        addps   xmm3,xmm6               ; xmm3=tmp11
+        addps   xmm6,xmm0               ; xmm6=tmp12, xmm0=tmp7
+
+        mulps   xmm3,[GOTOFF(ebx,PD_0_707)] ; xmm3=z3
+
+        movaps  xmm1,xmm2               ; xmm1=tmp10
+        subps   xmm2,xmm6
+        mulps   xmm2,[GOTOFF(ebx,PD_0_382)] ; xmm2=z5
+        mulps   xmm1,[GOTOFF(ebx,PD_0_541)] ; xmm1=MULTIPLY(tmp10,FIX_0_541196)
+        mulps   xmm6,[GOTOFF(ebx,PD_1_306)] ; xmm6=MULTIPLY(tmp12,FIX_1_306562)
+        addps   xmm1,xmm2               ; xmm1=z2
+        addps   xmm6,xmm2               ; xmm6=z4
+
+        movaps  xmm5,xmm0
+        subps   xmm0,xmm3               ; xmm0=z13
+        addps   xmm5,xmm3               ; xmm5=z11
+
+        movaps  xmm7,xmm0
+        movaps  xmm4,xmm5
+        subps   xmm0,xmm1               ; xmm0=data3
+        subps   xmm5,xmm6               ; xmm5=data7
+        addps   xmm7,xmm1               ; xmm7=data5
+        addps   xmm4,xmm6               ; xmm4=data1
+
+        movaps  XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FAST_FLOAT)], xmm7
+        movaps  XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)], xmm4
+
+        add     edx, byte 4*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     near .columnloop
+
+;       pop     edi             ; unused
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jfdctfst-mmx.asm b/simd/jfdctfst-mmx.asm
new file mode 100644
index 0000000..2c2d20c
--- /dev/null
+++ b/simd/jfdctfst-mmx.asm
@@ -0,0 +1,397 @@
+;
+; jfdctfst.asm - fast integer FDCT (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a fast, not so accurate integer implementation of
+; the forward DCT (Discrete Cosine Transform). The following code is
+; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
+; for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      8       ; 14 is also OK.
+
+%if CONST_BITS == 8
+F_0_382 equ      98             ; FIX(0.382683433)
+F_0_541 equ     139             ; FIX(0.541196100)
+F_0_707 equ     181             ; FIX(0.707106781)
+F_1_306 equ     334             ; FIX(1.306562965)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_382 equ     DESCALE( 410903207,30-CONST_BITS)       ; FIX(0.382683433)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_707 equ     DESCALE( 759250124,30-CONST_BITS)       ; FIX(0.707106781)
+F_1_306 equ     DESCALE(1402911301,30-CONST_BITS)       ; FIX(1.306562965)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
+; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
+
+%define PRE_MULTIPLY_SCALE_BITS   2
+%define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
+
+        alignz  16
+        global  EXTN(jconst_fdct_ifast_mmx)
+
+EXTN(jconst_fdct_ifast_mmx):
+
+PW_F0707        times 4 dw  F_0_707 << CONST_SHIFT
+PW_F0382        times 4 dw  F_0_382 << CONST_SHIFT
+PW_F0541        times 4 dw  F_0_541 << CONST_SHIFT
+PW_F1306        times 4 dw  F_1_306 << CONST_SHIFT
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jsimd_fdct_ifast_mmx (DCTELEM * data)
+;
+
+%define data(b)         (b)+8           ; DCTELEM * data
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+
+        align   16
+        global  EXTN(jsimd_fdct_ifast_mmx)
+
+EXTN(jsimd_fdct_ifast_mmx):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+;       push    edi             ; unused
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process rows.
+
+        mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
+.rowloop:
+
+        movq    mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+        movq    mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)]
+
+        ; mm0=(20 21 22 23), mm2=(24 25 26 27)
+        ; mm1=(30 31 32 33), mm3=(34 35 36 37)
+
+        movq      mm4,mm0               ; transpose coefficients(phase 1)
+        punpcklwd mm0,mm1               ; mm0=(20 30 21 31)
+        punpckhwd mm4,mm1               ; mm4=(22 32 23 33)
+        movq      mm5,mm2               ; transpose coefficients(phase 1)
+        punpcklwd mm2,mm3               ; mm2=(24 34 25 35)
+        punpckhwd mm5,mm3               ; mm5=(26 36 27 37)
+
+        movq    mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+        movq    mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)]
+
+        ; mm6=(00 01 02 03), mm1=(04 05 06 07)
+        ; mm7=(10 11 12 13), mm3=(14 15 16 17)
+
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=(22 32 23 33)
+        movq    MMWORD [wk(1)], mm2     ; wk(1)=(24 34 25 35)
+
+        movq      mm4,mm6               ; transpose coefficients(phase 1)
+        punpcklwd mm6,mm7               ; mm6=(00 10 01 11)
+        punpckhwd mm4,mm7               ; mm4=(02 12 03 13)
+        movq      mm2,mm1               ; transpose coefficients(phase 1)
+        punpcklwd mm1,mm3               ; mm1=(04 14 05 15)
+        punpckhwd mm2,mm3               ; mm2=(06 16 07 17)
+
+        movq      mm7,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm0               ; mm6=(00 10 20 30)=data0
+        punpckhdq mm7,mm0               ; mm7=(01 11 21 31)=data1
+        movq      mm3,mm2               ; transpose coefficients(phase 2)
+        punpckldq mm2,mm5               ; mm2=(06 16 26 36)=data6
+        punpckhdq mm3,mm5               ; mm3=(07 17 27 37)=data7
+
+        movq    mm0,mm7
+        movq    mm5,mm6
+        psubw   mm7,mm2                 ; mm7=data1-data6=tmp6
+        psubw   mm6,mm3                 ; mm6=data0-data7=tmp7
+        paddw   mm0,mm2                 ; mm0=data1+data6=tmp1
+        paddw   mm5,mm3                 ; mm5=data0+data7=tmp0
+
+        movq    mm2, MMWORD [wk(0)]     ; mm2=(22 32 23 33)
+        movq    mm3, MMWORD [wk(1)]     ; mm3=(24 34 25 35)
+        movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp6
+        movq    MMWORD [wk(1)], mm6     ; wk(1)=tmp7
+
+        movq      mm7,mm4               ; transpose coefficients(phase 2)
+        punpckldq mm4,mm2               ; mm4=(02 12 22 32)=data2
+        punpckhdq mm7,mm2               ; mm7=(03 13 23 33)=data3
+        movq      mm6,mm1               ; transpose coefficients(phase 2)
+        punpckldq mm1,mm3               ; mm1=(04 14 24 34)=data4
+        punpckhdq mm6,mm3               ; mm6=(05 15 25 35)=data5
+
+        movq    mm2,mm7
+        movq    mm3,mm4
+        paddw   mm7,mm1                 ; mm7=data3+data4=tmp3
+        paddw   mm4,mm6                 ; mm4=data2+data5=tmp2
+        psubw   mm2,mm1                 ; mm2=data3-data4=tmp4
+        psubw   mm3,mm6                 ; mm3=data2-data5=tmp5
+
+        ; -- Even part
+
+        movq    mm1,mm5
+        movq    mm6,mm0
+        psubw   mm5,mm7                 ; mm5=tmp13
+        psubw   mm0,mm4                 ; mm0=tmp12
+        paddw   mm1,mm7                 ; mm1=tmp10
+        paddw   mm6,mm4                 ; mm6=tmp11
+
+        paddw   mm0,mm5
+        psllw   mm0,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1
+
+        movq    mm7,mm1
+        movq    mm4,mm5
+        psubw   mm1,mm6                 ; mm1=data4
+        psubw   mm5,mm0                 ; mm5=data6
+        paddw   mm7,mm6                 ; mm7=data0
+        paddw   mm4,mm0                 ; mm4=data2
+
+        movq    MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7
+        movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
+
+        ; -- Odd part
+
+        movq    mm6, MMWORD [wk(0)]     ; mm6=tmp6
+        movq    mm0, MMWORD [wk(1)]     ; mm0=tmp7
+
+        paddw   mm2,mm3                 ; mm2=tmp10
+        paddw   mm3,mm6                 ; mm3=tmp11
+        paddw   mm6,mm0                 ; mm6=tmp12, mm0=tmp7
+
+        psllw   mm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   mm6,PRE_MULTIPLY_SCALE_BITS
+
+        psllw   mm3,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3
+
+        movq    mm1,mm2                 ; mm1=tmp10
+        psubw   mm2,mm6
+        pmulhw  mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5
+        pmulhw  mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610)
+        pmulhw  mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296)
+        paddw   mm1,mm2                 ; mm1=z2
+        paddw   mm6,mm2                 ; mm6=z4
+
+        movq    mm5,mm0
+        psubw   mm0,mm3                 ; mm0=z13
+        paddw   mm5,mm3                 ; mm5=z11
+
+        movq    mm7,mm0
+        movq    mm4,mm5
+        psubw   mm0,mm1                 ; mm0=data3
+        psubw   mm5,mm6                 ; mm5=data7
+        paddw   mm7,mm1                 ; mm7=data5
+        paddw   mm4,mm6                 ; mm4=data1
+
+        movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0
+        movq    MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm7
+        movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4
+
+        add     edx, byte 4*DCTSIZE*SIZEOF_DCTELEM
+        dec     ecx
+        jnz     near .rowloop
+
+        ; ---- Pass 2: process columns.
+
+        mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
+.columnloop:
+
+        movq    mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+        movq    mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
+
+        ; mm0=(02 12 22 32), mm2=(42 52 62 72)
+        ; mm1=(03 13 23 33), mm3=(43 53 63 73)
+
+        movq      mm4,mm0               ; transpose coefficients(phase 1)
+        punpcklwd mm0,mm1               ; mm0=(02 03 12 13)
+        punpckhwd mm4,mm1               ; mm4=(22 23 32 33)
+        movq      mm5,mm2               ; transpose coefficients(phase 1)
+        punpcklwd mm2,mm3               ; mm2=(42 43 52 53)
+        punpckhwd mm5,mm3               ; mm5=(62 63 72 73)
+
+        movq    mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+        movq    mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
+
+        ; mm6=(00 10 20 30), mm1=(40 50 60 70)
+        ; mm7=(01 11 21 31), mm3=(41 51 61 71)
+
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=(22 23 32 33)
+        movq    MMWORD [wk(1)], mm2     ; wk(1)=(42 43 52 53)
+
+        movq      mm4,mm6               ; transpose coefficients(phase 1)
+        punpcklwd mm6,mm7               ; mm6=(00 01 10 11)
+        punpckhwd mm4,mm7               ; mm4=(20 21 30 31)
+        movq      mm2,mm1               ; transpose coefficients(phase 1)
+        punpcklwd mm1,mm3               ; mm1=(40 41 50 51)
+        punpckhwd mm2,mm3               ; mm2=(60 61 70 71)
+
+        movq      mm7,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm0               ; mm6=(00 01 02 03)=data0
+        punpckhdq mm7,mm0               ; mm7=(10 11 12 13)=data1
+        movq      mm3,mm2               ; transpose coefficients(phase 2)
+        punpckldq mm2,mm5               ; mm2=(60 61 62 63)=data6
+        punpckhdq mm3,mm5               ; mm3=(70 71 72 73)=data7
+
+        movq    mm0,mm7
+        movq    mm5,mm6
+        psubw   mm7,mm2                 ; mm7=data1-data6=tmp6
+        psubw   mm6,mm3                 ; mm6=data0-data7=tmp7
+        paddw   mm0,mm2                 ; mm0=data1+data6=tmp1
+        paddw   mm5,mm3                 ; mm5=data0+data7=tmp0
+
+        movq    mm2, MMWORD [wk(0)]     ; mm2=(22 23 32 33)
+        movq    mm3, MMWORD [wk(1)]     ; mm3=(42 43 52 53)
+        movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp6
+        movq    MMWORD [wk(1)], mm6     ; wk(1)=tmp7
+
+        movq      mm7,mm4               ; transpose coefficients(phase 2)
+        punpckldq mm4,mm2               ; mm4=(20 21 22 23)=data2
+        punpckhdq mm7,mm2               ; mm7=(30 31 32 33)=data3
+        movq      mm6,mm1               ; transpose coefficients(phase 2)
+        punpckldq mm1,mm3               ; mm1=(40 41 42 43)=data4
+        punpckhdq mm6,mm3               ; mm6=(50 51 52 53)=data5
+
+        movq    mm2,mm7
+        movq    mm3,mm4
+        paddw   mm7,mm1                 ; mm7=data3+data4=tmp3
+        paddw   mm4,mm6                 ; mm4=data2+data5=tmp2
+        psubw   mm2,mm1                 ; mm2=data3-data4=tmp4
+        psubw   mm3,mm6                 ; mm3=data2-data5=tmp5
+
+        ; -- Even part
+
+        movq    mm1,mm5
+        movq    mm6,mm0
+        psubw   mm5,mm7                 ; mm5=tmp13
+        psubw   mm0,mm4                 ; mm0=tmp12
+        paddw   mm1,mm7                 ; mm1=tmp10
+        paddw   mm6,mm4                 ; mm6=tmp11
+
+        paddw   mm0,mm5
+        psllw   mm0,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm0,[GOTOFF(ebx,PW_F0707)] ; mm0=z1
+
+        movq    mm7,mm1
+        movq    mm4,mm5
+        psubw   mm1,mm6                 ; mm1=data4
+        psubw   mm5,mm0                 ; mm5=data6
+        paddw   mm7,mm6                 ; mm7=data0
+        paddw   mm4,mm0                 ; mm4=data2
+
+        movq    MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm7
+        movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
+
+        ; -- Odd part
+
+        movq    mm6, MMWORD [wk(0)]     ; mm6=tmp6
+        movq    mm0, MMWORD [wk(1)]     ; mm0=tmp7
+
+        paddw   mm2,mm3                 ; mm2=tmp10
+        paddw   mm3,mm6                 ; mm3=tmp11
+        paddw   mm6,mm0                 ; mm6=tmp12, mm0=tmp7
+
+        psllw   mm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   mm6,PRE_MULTIPLY_SCALE_BITS
+
+        psllw   mm3,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm3,[GOTOFF(ebx,PW_F0707)] ; mm3=z3
+
+        movq    mm1,mm2                 ; mm1=tmp10
+        psubw   mm2,mm6
+        pmulhw  mm2,[GOTOFF(ebx,PW_F0382)] ; mm2=z5
+        pmulhw  mm1,[GOTOFF(ebx,PW_F0541)] ; mm1=MULTIPLY(tmp10,FIX_0_54119610)
+        pmulhw  mm6,[GOTOFF(ebx,PW_F1306)] ; mm6=MULTIPLY(tmp12,FIX_1_30656296)
+        paddw   mm1,mm2                 ; mm1=z2
+        paddw   mm6,mm2                 ; mm6=z4
+
+        movq    mm5,mm0
+        psubw   mm0,mm3                 ; mm0=z13
+        paddw   mm5,mm3                 ; mm5=z11
+
+        movq    mm7,mm0
+        movq    mm4,mm5
+        psubw   mm0,mm1                 ; mm0=data3
+        psubw   mm5,mm6                 ; mm5=data7
+        paddw   mm7,mm1                 ; mm7=data5
+        paddw   mm4,mm6                 ; mm4=data1
+
+        movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm0
+        movq    MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm7
+        movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm4
+
+        add     edx, byte 4*SIZEOF_DCTELEM
+        dec     ecx
+        jnz     near .columnloop
+
+        emms            ; empty MMX state
+
+;       pop     edi             ; unused
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jfdctfst-sse2-64.asm b/simd/jfdctfst-sse2-64.asm
new file mode 100644
index 0000000..fe87fcc
--- /dev/null
+++ b/simd/jfdctfst-sse2-64.asm
@@ -0,0 +1,354 @@
+;
+; jfdctfst.asm - fast integer FDCT (64-bit SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009, 2014, D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a fast, not so accurate integer implementation of
+; the forward DCT (Discrete Cosine Transform). The following code is
+; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
+; for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      8       ; 14 is also OK.
+
+%if CONST_BITS == 8
+F_0_382 equ      98             ; FIX(0.382683433)
+F_0_541 equ     139             ; FIX(0.541196100)
+F_0_707 equ     181             ; FIX(0.707106781)
+F_1_306 equ     334             ; FIX(1.306562965)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_382 equ     DESCALE( 410903207,30-CONST_BITS)       ; FIX(0.382683433)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_707 equ     DESCALE( 759250124,30-CONST_BITS)       ; FIX(0.707106781)
+F_1_306 equ     DESCALE(1402911301,30-CONST_BITS)       ; FIX(1.306562965)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
+; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
+
+%define PRE_MULTIPLY_SCALE_BITS   2
+%define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
+
+        alignz  16
+        global  EXTN(jconst_fdct_ifast_sse2)
+
+EXTN(jconst_fdct_ifast_sse2):
+
+PW_F0707        times 8 dw  F_0_707 << CONST_SHIFT
+PW_F0382        times 8 dw  F_0_382 << CONST_SHIFT
+PW_F0541        times 8 dw  F_0_541 << CONST_SHIFT
+PW_F1306        times 8 dw  F_1_306 << CONST_SHIFT
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jsimd_fdct_ifast_sse2 (DCTELEM * data)
+;
+
+; r10 = DCTELEM * data
+
+        align   16
+        global  EXTN(jsimd_fdct_ifast_sse2)
+
+EXTN(jsimd_fdct_ifast_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+
+        ; ---- Pass 1: process rows.
+
+        mov     rdx, r10        ; (DCTELEM *)
+
+        movdqa  xmm8, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm9, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm11, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm4, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm13, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm15, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)]
+
+        ; xmm8=(00 01 02 03 04 05 06 07), xmm9=(10 11 12 13 14 15 16 17)
+        ; xmm2=(20 21 22 23 24 25 26 27), xmm11=(30 31 32 33 34 35 36 37)
+        ; xmm4=(40 41 42 43 44 45 46 47), xmm13=(50 51 52 53 54 55 56 57)
+        ; xmm5=(60 61 62 63 64 65 66 67), xmm15=(70 71 72 73 74 75 76 77)
+
+        movdqa    xmm12,xmm8            ; transpose coefficients(phase 1)
+        punpcklwd xmm8,xmm9             ; xmm8=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm12,xmm9            ; xmm12=(04 14 05 15 06 16 07 17)
+        movdqa    xmm1,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm11            ; xmm2=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm1,xmm11            ; xmm1=(24 34 25 35 26 36 27 37)
+
+        movdqa    xmm0,xmm4             ; transpose coefficients(phase 1)
+        punpcklwd xmm4,xmm13            ; xmm4=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm0,xmm13            ; xmm0=(44 54 45 55 46 56 47 57)
+        movdqa    xmm3,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm15            ; xmm5=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm3,xmm15            ; xmm3=(64 74 65 75 66 76 67 77)
+
+        movdqa    xmm10,xmm8            ; transpose coefficients(phase 2)
+        punpckldq xmm8,xmm2             ; xmm8=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm10,xmm2            ; xmm10=(02 12 22 32 03 13 23 33)
+        movdqa    xmm14,xmm12           ; transpose coefficients(phase 2)
+        punpckldq xmm12,xmm1            ; xmm12=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm14,xmm1            ; xmm14=(06 16 26 36 07 17 27 37)
+
+        movdqa    xmm6,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm5             ; xmm4=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm6,xmm5             ; xmm6=(42 52 62 72 43 53 63 73)
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm3             ; xmm0=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm7,xmm3             ; xmm7=(46 56 66 76 47 57 67 77)
+
+        movdqa     xmm9,xmm8            ; transpose coefficients(phase 3)
+        punpcklqdq xmm8,xmm4            ; xmm8=(00 10 20 30 40 50 60 70)=data0
+        punpckhqdq xmm9,xmm4            ; xmm9=(01 11 21 31 41 51 61 71)=data1
+        movdqa     xmm11,xmm10          ; transpose coefficients(phase 3)
+        punpcklqdq xmm10,xmm6           ; xmm10=(02 12 22 32 42 52 62 72)=data2
+        punpckhqdq xmm11,xmm6           ; xmm11=(03 13 23 33 43 53 63 73)=data3
+
+        movdqa     xmm13,xmm12          ; transpose coefficients(phase 3)
+        punpcklqdq xmm12,xmm0           ; xmm12=(04 14 24 34 44 54 64 74)=data4
+        punpckhqdq xmm13,xmm0           ; xmm13=(05 15 25 35 45 55 65 75)=data5
+        movdqa     xmm15,xmm14          ; transpose coefficients(phase 3)
+        punpcklqdq xmm14,xmm7           ; xmm14=(06 16 26 36 46 56 66 76)=data6
+        punpckhqdq xmm15,xmm7           ; xmm15=(07 17 27 37 47 57 67 77)=data7
+
+        movdqa  xmm0,xmm8
+        paddw   xmm0,xmm15              ; xmm0=data0+data7=tmp0
+        movdqa  xmm1,xmm9
+        paddw   xmm1,xmm14              ; xmm1=data1+data6=tmp1
+        movdqa  xmm2,xmm10
+        paddw   xmm2,xmm13              ; xmm2=data2+data5=tmp2
+        movdqa  xmm3,xmm11
+        paddw   xmm3,xmm12              ; xmm3=data3+data4=tmp3
+
+        psubw   xmm11,xmm12             ; xmm11=data3-data4=tmp4
+        psubw   xmm10,xmm13             ; xmm10=data2-data5=tmp5
+        psubw   xmm9,xmm14              ; xmm9=data1-data6=tmp6
+        psubw   xmm8,xmm15              ; xmm8=data0-data7=tmp7
+
+        ; -- Even part
+
+        movdqa  xmm4,xmm0
+        paddw   xmm4,xmm3               ; xmm4=tmp0+tmp3=tmp10
+        movdqa  xmm5,xmm1
+        paddw   xmm5,xmm2               ; xmm5=tmp1+tmp2=tmp11
+        psubw   xmm1,xmm2               ; xmm1=tmp1-tmp2=tmp12
+        psubw   xmm0,xmm3               ; xmm0=tmp0-tmp3=tmp13
+
+        paddw   xmm1,xmm0               ; xmm1=tmp12+tmp13
+        psllw   xmm1,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm1,[rel PW_F0707]     ; xmm1=z1
+
+        movdqa  xmm12,xmm4
+        paddw   xmm12,xmm5              ; xmm12=tmp10+tmp11=out0
+        movdqa  xmm14,xmm0
+        paddw   xmm14,xmm1              ; xmm14=tmp13+z1=out2
+        psubw   xmm4,xmm5               ; xmm4=tmp10-tmp11=out4
+        psubw   xmm0,xmm1               ; xmm0=tmp13-z1=out6
+
+        ; -- Odd part
+
+        paddw   xmm11,xmm10             ; xmm11=tmp4+tmp5=tmp10
+        paddw   xmm10,xmm9              ; xmm10=tmp5+tmp6=tmp11
+        paddw   xmm9,xmm8               ; xmm9=tmp6+tmp7=tmp12
+
+        psllw   xmm11,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm10,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm9,PRE_MULTIPLY_SCALE_BITS
+
+        movdqa  xmm5,xmm11
+        psubw   xmm5,xmm9               ; xmm5=tmp10-tmp12
+        pmulhw  xmm5,[rel PW_F0382]     ; xmm5=z5
+
+        pmulhw  xmm11,[rel PW_F0541]    ; xmm11=MULTIPLY(tmp10,FIX_0_541196)
+        paddw   xmm11,xmm5              ;       +z5=z2
+
+        pmulhw  xmm9,[rel PW_F1306]     ; xmm9=MULTIPLY(tmp12,FIX_1_306562)
+        paddw   xmm9,xmm5               ;      +z5=z4
+
+        pmulhw  xmm10,[rel PW_F0707]    ; xmm10=MULTIPLY(tmp11,FIX_1_306562)=z3
+
+        movdqa  xmm1,xmm8
+        paddw   xmm1,xmm10              ; xmm1=tmp7+z3=z11
+        psubw   xmm8,xmm10              ; xmm8=tmp7-z3=z13
+
+        movdqa  xmm13,xmm8
+        paddw   xmm13,xmm11             ; xmm13=z13+z2=out5
+        movdqa  xmm15,xmm1
+        psubw   xmm15,xmm9              ; xmm15=z11-z4=out7
+        paddw   xmm9,xmm1               ; xmm9=z11+z4=out1
+        psubw   xmm8,xmm11              ; xmm8=z13-z2=out3
+
+        ; ---- Pass 2: process columns.
+
+        ; Re-order registers so we can reuse the same transpose code
+        movdqa    xmm11,xmm8
+        movdqa    xmm8,xmm12
+        movdqa    xmm2,xmm14
+        movdqa    xmm5,xmm0
+
+        ; xmm8=(00 10 20 30 40 50 60 70), xmm9=(01 11 21 31 41 51 61 71)
+        ; xmm2=(02 12 22 32 42 52 62 72), xmm11=(03 13 23 33 43 53 63 73)
+        ; xmm4=(04 14 24 34 44 54 64 74), xmm13=(05 15 25 35 45 55 65 75)
+        ; xmm5=(06 16 26 36 46 56 66 76), xmm15=(07 17 27 37 47 57 67 77)
+
+        movdqa    xmm12,xmm8            ; transpose coefficients(phase 1)
+        punpcklwd xmm8,xmm9             ; xmm8=(00 01 10 11 20 21 30 31)
+        punpckhwd xmm12,xmm9            ; xmm12=(40 41 50 51 60 61 70 71)
+        movdqa    xmm1,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm11            ; xmm2=(02 03 12 13 22 23 32 33)
+        punpckhwd xmm1,xmm11            ; xmm1=(42 43 52 53 62 63 72 73)
+
+        movdqa    xmm0,xmm4             ; transpose coefficients(phase 1)
+        punpcklwd xmm4,xmm13            ; xmm4=(04 05 14 15 24 25 34 35)
+        punpckhwd xmm0,xmm13            ; xmm0=(44 45 54 55 64 65 74 75)
+        movdqa    xmm3,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm15            ; xmm5=(06 07 16 17 26 27 36 37)
+        punpckhwd xmm3,xmm15            ; xmm3=(46 47 56 57 66 67 76 77)
+
+        movdqa    xmm10,xmm8            ; transpose coefficients(phase 2)
+        punpckldq xmm8,xmm2             ; xmm8=(00 01 02 03 10 11 12 13)
+        punpckhdq xmm10,xmm2            ; xmm10=(20 21 22 23 30 31 32 33)
+        movdqa    xmm14,xmm12           ; transpose coefficients(phase 2)
+        punpckldq xmm12,xmm1            ; xmm12=(40 41 42 43 50 51 52 53)
+        punpckhdq xmm14,xmm1            ; xmm14=(60 61 62 63 70 71 72 73)
+
+        movdqa    xmm6,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm5             ; xmm4=(04 05 06 07 14 15 16 17)
+        punpckhdq xmm6,xmm5             ; xmm6=(24 25 26 27 34 35 36 37)
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm3             ; xmm0=(44 45 46 47 54 55 56 57)
+        punpckhdq xmm7,xmm3             ; xmm7=(64 65 66 67 74 75 76 77)
+
+        movdqa     xmm9,xmm8            ; transpose coefficients(phase 3)
+        punpcklqdq xmm8,xmm4            ; xmm8=(00 01 02 03 04 05 06 07)=data0
+        punpckhqdq xmm9,xmm4            ; xmm9=(10 11 12 13 14 15 16 17)=data1
+        movdqa     xmm11,xmm10          ; transpose coefficients(phase 3)
+        punpcklqdq xmm10,xmm6           ; xmm10=(20 21 22 23 24 25 26 27)=data2
+        punpckhqdq xmm11,xmm6           ; xmm11=(30 31 32 33 34 35 36 37)=data3
+
+        movdqa     xmm13,xmm12          ; transpose coefficients(phase 3)
+        punpcklqdq xmm12,xmm0           ; xmm12=(40 41 42 43 44 45 46 47)=data4
+        punpckhqdq xmm13,xmm0           ; xmm13=(50 51 52 53 54 55 56 57)=data5
+        movdqa     xmm15,xmm14          ; transpose coefficients(phase 3)
+        punpcklqdq xmm14,xmm7           ; xmm14=(60 61 62 63 64 65 66 67)=data6
+        punpckhqdq xmm15,xmm7           ; xmm15=(70 71 72 73 74 75 76 77)=data7
+
+        movdqa  xmm0,xmm8
+        paddw   xmm0,xmm15              ; xmm0=data0+data7=tmp0
+        movdqa  xmm1,xmm9
+        paddw   xmm1,xmm14              ; xmm1=data1+data6=tmp1
+        movdqa  xmm2,xmm10
+        paddw   xmm2,xmm13              ; xmm2=data2+data5=tmp2
+        movdqa  xmm3,xmm11
+        paddw   xmm3,xmm12              ; xmm3=data3+data4=tmp3
+
+        psubw   xmm11,xmm12             ; xmm11=data3-data4=tmp4
+        psubw   xmm10,xmm13             ; xmm10=data2-data5=tmp5
+        psubw   xmm9,xmm14              ; xmm9=data1-data6=tmp6
+        psubw   xmm8,xmm15              ; xmm8=data0-data7=tmp7
+
+        ; -- Even part
+
+        movdqa  xmm4,xmm0
+        paddw   xmm4,xmm3               ; xmm4=tmp0+tmp3=tmp10
+        movdqa  xmm5,xmm1
+        paddw   xmm5,xmm2               ; xmm5=tmp1+tmp2=tmp11
+        psubw   xmm1,xmm2               ; xmm1=tmp1-tmp2=tmp12
+        psubw   xmm0,xmm3               ; xmm0=tmp0-tmp3=tmp13
+
+        paddw   xmm1,xmm0               ; xmm1=tmp12+tmp13
+        psllw   xmm1,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm1,[rel PW_F0707]     ; xmm1=z1
+
+        movdqa  xmm12,xmm4
+        paddw   xmm12,xmm5              ; xmm12=tmp10+tmp11=out0
+        movdqa  xmm14,xmm0
+        paddw   xmm14,xmm1              ; xmm14=tmp13+z1=out2
+        psubw   xmm4,xmm5               ; xmm4=tmp10-tmp11=out4
+        psubw   xmm0,xmm1               ; xmm0=tmp13-z1=out6
+
+        ; -- Odd part
+
+        paddw   xmm11,xmm10             ; xmm11=tmp4+tmp5=tmp10
+        paddw   xmm10,xmm9              ; xmm10=tmp5+tmp6=tmp11
+        paddw   xmm9,xmm8               ; xmm9=tmp6+tmp7=tmp12
+
+        psllw   xmm11,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm10,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm9,PRE_MULTIPLY_SCALE_BITS
+
+        movdqa  xmm5,xmm11
+        psubw   xmm5,xmm9               ; xmm5=tmp10-tmp12
+        pmulhw  xmm5,[rel PW_F0382]     ; xmm5=z5
+
+        pmulhw  xmm11,[rel PW_F0541]    ; xmm11=MULTIPLY(tmp10,FIX_0_541196)
+        paddw   xmm11,xmm5              ;       +z5=z2
+
+        pmulhw  xmm9,[rel PW_F1306]     ; xmm9=MULTIPLY(tmp12,FIX_1_306562)
+        paddw   xmm9,xmm5               ;      +z5=z4
+
+        pmulhw  xmm10,[rel PW_F0707]    ; xmm10=MULTIPLY(tmp11,FIX_1_306562)=z3
+
+        movdqa  xmm1,xmm8
+        paddw   xmm1,xmm10              ; xmm1=tmp7+z3=z11
+        psubw   xmm8,xmm10              ; xmm8=tmp7-z3=z13
+
+        movdqa  xmm13,xmm8
+        paddw   xmm13,xmm11             ; xmm13=z13+z2=out5
+        movdqa  xmm15,xmm1
+        psubw   xmm15,xmm9              ; xmm15=z11-z4=out7
+        paddw   xmm9,xmm1               ; xmm9=z11+z4=out1
+        psubw   xmm8,xmm11              ; xmm8=z13-z2=out3
+
+        ; -- Write result
+
+        movdqa  XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm12
+        movdqa  XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm9
+        movdqa  XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm14
+        movdqa  XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm8
+        movdqa  XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm4
+        movdqa  XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm13
+        movdqa  XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm15
+
+        uncollect_args
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jfdctfst-sse2.asm b/simd/jfdctfst-sse2.asm
new file mode 100644
index 0000000..e6e4b72
--- /dev/null
+++ b/simd/jfdctfst-sse2.asm
@@ -0,0 +1,404 @@
+;
+; jfdctfst.asm - fast integer FDCT (SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a fast, not so accurate integer implementation of
+; the forward DCT (Discrete Cosine Transform). The following code is
+; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
+; for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      8       ; 14 is also OK.
+
+%if CONST_BITS == 8
+F_0_382 equ      98             ; FIX(0.382683433)
+F_0_541 equ     139             ; FIX(0.541196100)
+F_0_707 equ     181             ; FIX(0.707106781)
+F_1_306 equ     334             ; FIX(1.306562965)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_382 equ     DESCALE( 410903207,30-CONST_BITS)       ; FIX(0.382683433)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_707 equ     DESCALE( 759250124,30-CONST_BITS)       ; FIX(0.707106781)
+F_1_306 equ     DESCALE(1402911301,30-CONST_BITS)       ; FIX(1.306562965)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
+; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
+
+%define PRE_MULTIPLY_SCALE_BITS   2
+%define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
+
+        alignz  16
+        global  EXTN(jconst_fdct_ifast_sse2)
+
+EXTN(jconst_fdct_ifast_sse2):
+
+PW_F0707        times 8 dw  F_0_707 << CONST_SHIFT
+PW_F0382        times 8 dw  F_0_382 << CONST_SHIFT
+PW_F0541        times 8 dw  F_0_541 << CONST_SHIFT
+PW_F1306        times 8 dw  F_1_306 << CONST_SHIFT
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jsimd_fdct_ifast_sse2 (DCTELEM * data)
+;
+
+%define data(b)         (b)+8           ; DCTELEM * data
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+
+        align   16
+        global  EXTN(jsimd_fdct_ifast_sse2)
+
+EXTN(jsimd_fdct_ifast_sse2):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+;       push    edi             ; unused
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process rows.
+
+        mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+
+        ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27)
+        ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37)
+
+        movdqa    xmm4,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm1             ; xmm0=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm4,xmm1             ; xmm4=(04 14 05 15 06 16 07 17)
+        movdqa    xmm5,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm3             ; xmm2=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm5,xmm3             ; xmm5=(24 34 25 35 26 36 27 37)
+
+        movdqa  xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
+
+        ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62)
+        ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63)
+
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=(20 30 21 31 22 32 23 33)
+        movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(24 34 25 35 26 36 27 37)
+
+        movdqa    xmm2,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm7             ; xmm6=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm2,xmm7             ; xmm2=(44 54 45 55 46 56 47 57)
+        movdqa    xmm5,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm3             ; xmm1=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm5,xmm3             ; xmm5=(64 74 65 75 66 76 67 77)
+
+        movdqa    xmm7,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm1             ; xmm6=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm7,xmm1             ; xmm7=(42 52 62 72 43 53 63 73)
+        movdqa    xmm3,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm5             ; xmm2=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm3,xmm5             ; xmm3=(46 56 66 76 47 57 67 77)
+
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(20 30 21 31 22 32 23 33)
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=(24 34 25 35 26 36 27 37)
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=(42 52 62 72 43 53 63 73)
+        movdqa  XMMWORD [wk(1)], xmm2   ; wk(1)=(44 54 64 74 45 55 65 75)
+
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm1             ; xmm0=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm7,xmm1             ; xmm7=(02 12 22 32 03 13 23 33)
+        movdqa    xmm2,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm5             ; xmm4=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm2,xmm5             ; xmm2=(06 16 26 36 07 17 27 37)
+
+        movdqa     xmm1,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm6            ; xmm0=(00 10 20 30 40 50 60 70)=data0
+        punpckhqdq xmm1,xmm6            ; xmm1=(01 11 21 31 41 51 61 71)=data1
+        movdqa     xmm5,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm3            ; xmm2=(06 16 26 36 46 56 66 76)=data6
+        punpckhqdq xmm5,xmm3            ; xmm5=(07 17 27 37 47 57 67 77)=data7
+
+        movdqa  xmm6,xmm1
+        movdqa  xmm3,xmm0
+        psubw   xmm1,xmm2               ; xmm1=data1-data6=tmp6
+        psubw   xmm0,xmm5               ; xmm0=data0-data7=tmp7
+        paddw   xmm6,xmm2               ; xmm6=data1+data6=tmp1
+        paddw   xmm3,xmm5               ; xmm3=data0+data7=tmp0
+
+        movdqa  xmm2, XMMWORD [wk(0)]   ; xmm2=(42 52 62 72 43 53 63 73)
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=(44 54 64 74 45 55 65 75)
+        movdqa  XMMWORD [wk(0)], xmm1   ; wk(0)=tmp6
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp7
+
+        movdqa     xmm1,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm2            ; xmm7=(02 12 22 32 42 52 62 72)=data2
+        punpckhqdq xmm1,xmm2            ; xmm1=(03 13 23 33 43 53 63 73)=data3
+        movdqa     xmm0,xmm4            ; transpose coefficients(phase 3)
+        punpcklqdq xmm4,xmm5            ; xmm4=(04 14 24 34 44 54 64 74)=data4
+        punpckhqdq xmm0,xmm5            ; xmm0=(05 15 25 35 45 55 65 75)=data5
+
+        movdqa  xmm2,xmm1
+        movdqa  xmm5,xmm7
+        paddw   xmm1,xmm4               ; xmm1=data3+data4=tmp3
+        paddw   xmm7,xmm0               ; xmm7=data2+data5=tmp2
+        psubw   xmm2,xmm4               ; xmm2=data3-data4=tmp4
+        psubw   xmm5,xmm0               ; xmm5=data2-data5=tmp5
+
+        ; -- Even part
+
+        movdqa  xmm4,xmm3
+        movdqa  xmm0,xmm6
+        psubw   xmm3,xmm1               ; xmm3=tmp13
+        psubw   xmm6,xmm7               ; xmm6=tmp12
+        paddw   xmm4,xmm1               ; xmm4=tmp10
+        paddw   xmm0,xmm7               ; xmm0=tmp11
+
+        paddw   xmm6,xmm3
+        psllw   xmm6,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm6,[GOTOFF(ebx,PW_F0707)] ; xmm6=z1
+
+        movdqa  xmm1,xmm4
+        movdqa  xmm7,xmm3
+        psubw   xmm4,xmm0               ; xmm4=data4
+        psubw   xmm3,xmm6               ; xmm3=data6
+        paddw   xmm1,xmm0               ; xmm1=data0
+        paddw   xmm7,xmm6               ; xmm7=data2
+
+        movdqa  xmm0, XMMWORD [wk(0)]   ; xmm0=tmp6
+        movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=tmp7
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=data4
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=data6
+
+        ; -- Odd part
+
+        paddw   xmm2,xmm5               ; xmm2=tmp10
+        paddw   xmm5,xmm0               ; xmm5=tmp11
+        paddw   xmm0,xmm6               ; xmm0=tmp12, xmm6=tmp7
+
+        psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
+
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z3
+
+        movdqa  xmm4,xmm2               ; xmm4=tmp10
+        psubw   xmm2,xmm0
+        pmulhw  xmm2,[GOTOFF(ebx,PW_F0382)] ; xmm2=z5
+        pmulhw  xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
+        pmulhw  xmm0,[GOTOFF(ebx,PW_F1306)] ; xmm0=MULTIPLY(tmp12,FIX_1_306562)
+        paddw   xmm4,xmm2               ; xmm4=z2
+        paddw   xmm0,xmm2               ; xmm0=z4
+
+        movdqa  xmm3,xmm6
+        psubw   xmm6,xmm5               ; xmm6=z13
+        paddw   xmm3,xmm5               ; xmm3=z11
+
+        movdqa  xmm2,xmm6
+        movdqa  xmm5,xmm3
+        psubw   xmm6,xmm4               ; xmm6=data3
+        psubw   xmm3,xmm0               ; xmm3=data7
+        paddw   xmm2,xmm4               ; xmm2=data5
+        paddw   xmm5,xmm0               ; xmm5=data1
+
+        ; ---- Pass 2: process columns.
+
+;       mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+
+        ; xmm1=(00 10 20 30 40 50 60 70), xmm7=(02 12 22 32 42 52 62 72)
+        ; xmm5=(01 11 21 31 41 51 61 71), xmm6=(03 13 23 33 43 53 63 73)
+
+        movdqa    xmm4,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm5             ; xmm1=(00 01 10 11 20 21 30 31)
+        punpckhwd xmm4,xmm5             ; xmm4=(40 41 50 51 60 61 70 71)
+        movdqa    xmm0,xmm7             ; transpose coefficients(phase 1)
+        punpcklwd xmm7,xmm6             ; xmm7=(02 03 12 13 22 23 32 33)
+        punpckhwd xmm0,xmm6             ; xmm0=(42 43 52 53 62 63 72 73)
+
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=col4
+        movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=col6
+
+        ; xmm5=(04 14 24 34 44 54 64 74), xmm6=(06 16 26 36 46 56 66 76)
+        ; xmm2=(05 15 25 35 45 55 65 75), xmm3=(07 17 27 37 47 57 67 77)
+
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=(02 03 12 13 22 23 32 33)
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(42 43 52 53 62 63 72 73)
+
+        movdqa    xmm7,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm2             ; xmm5=(04 05 14 15 24 25 34 35)
+        punpckhwd xmm7,xmm2             ; xmm7=(44 45 54 55 64 65 74 75)
+        movdqa    xmm0,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm3             ; xmm6=(06 07 16 17 26 27 36 37)
+        punpckhwd xmm0,xmm3             ; xmm0=(46 47 56 57 66 67 76 77)
+
+        movdqa    xmm2,xmm5             ; transpose coefficients(phase 2)
+        punpckldq xmm5,xmm6             ; xmm5=(04 05 06 07 14 15 16 17)
+        punpckhdq xmm2,xmm6             ; xmm2=(24 25 26 27 34 35 36 37)
+        movdqa    xmm3,xmm7             ; transpose coefficients(phase 2)
+        punpckldq xmm7,xmm0             ; xmm7=(44 45 46 47 54 55 56 57)
+        punpckhdq xmm3,xmm0             ; xmm3=(64 65 66 67 74 75 76 77)
+
+        movdqa  xmm6, XMMWORD [wk(0)]   ; xmm6=(02 03 12 13 22 23 32 33)
+        movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(42 43 52 53 62 63 72 73)
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=(24 25 26 27 34 35 36 37)
+        movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=(44 45 46 47 54 55 56 57)
+
+        movdqa    xmm2,xmm1             ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm6             ; xmm1=(00 01 02 03 10 11 12 13)
+        punpckhdq xmm2,xmm6             ; xmm2=(20 21 22 23 30 31 32 33)
+        movdqa    xmm7,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm0             ; xmm4=(40 41 42 43 50 51 52 53)
+        punpckhdq xmm7,xmm0             ; xmm7=(60 61 62 63 70 71 72 73)
+
+        movdqa     xmm6,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm5            ; xmm1=(00 01 02 03 04 05 06 07)=data0
+        punpckhqdq xmm6,xmm5            ; xmm6=(10 11 12 13 14 15 16 17)=data1
+        movdqa     xmm0,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm3            ; xmm7=(60 61 62 63 64 65 66 67)=data6
+        punpckhqdq xmm0,xmm3            ; xmm0=(70 71 72 73 74 75 76 77)=data7
+
+        movdqa  xmm5,xmm6
+        movdqa  xmm3,xmm1
+        psubw   xmm6,xmm7               ; xmm6=data1-data6=tmp6
+        psubw   xmm1,xmm0               ; xmm1=data0-data7=tmp7
+        paddw   xmm5,xmm7               ; xmm5=data1+data6=tmp1
+        paddw   xmm3,xmm0               ; xmm3=data0+data7=tmp0
+
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=(24 25 26 27 34 35 36 37)
+        movdqa  xmm0, XMMWORD [wk(1)]   ; xmm0=(44 45 46 47 54 55 56 57)
+        movdqa  XMMWORD [wk(0)], xmm6   ; wk(0)=tmp6
+        movdqa  XMMWORD [wk(1)], xmm1   ; wk(1)=tmp7
+
+        movdqa     xmm6,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm7            ; xmm2=(20 21 22 23 24 25 26 27)=data2
+        punpckhqdq xmm6,xmm7            ; xmm6=(30 31 32 33 34 35 36 37)=data3
+        movdqa     xmm1,xmm4            ; transpose coefficients(phase 3)
+        punpcklqdq xmm4,xmm0            ; xmm4=(40 41 42 43 44 45 46 47)=data4
+        punpckhqdq xmm1,xmm0            ; xmm1=(50 51 52 53 54 55 56 57)=data5
+
+        movdqa  xmm7,xmm6
+        movdqa  xmm0,xmm2
+        paddw   xmm6,xmm4               ; xmm6=data3+data4=tmp3
+        paddw   xmm2,xmm1               ; xmm2=data2+data5=tmp2
+        psubw   xmm7,xmm4               ; xmm7=data3-data4=tmp4
+        psubw   xmm0,xmm1               ; xmm0=data2-data5=tmp5
+
+        ; -- Even part
+
+        movdqa  xmm4,xmm3
+        movdqa  xmm1,xmm5
+        psubw   xmm3,xmm6               ; xmm3=tmp13
+        psubw   xmm5,xmm2               ; xmm5=tmp12
+        paddw   xmm4,xmm6               ; xmm4=tmp10
+        paddw   xmm1,xmm2               ; xmm1=tmp11
+
+        paddw   xmm5,xmm3
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm5,[GOTOFF(ebx,PW_F0707)] ; xmm5=z1
+
+        movdqa  xmm6,xmm4
+        movdqa  xmm2,xmm3
+        psubw   xmm4,xmm1               ; xmm4=data4
+        psubw   xmm3,xmm5               ; xmm3=data6
+        paddw   xmm6,xmm1               ; xmm6=data0
+        paddw   xmm2,xmm5               ; xmm2=data2
+
+        movdqa  XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm4
+        movdqa  XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm3
+        movdqa  XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm6
+        movdqa  XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm2
+
+        ; -- Odd part
+
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=tmp6
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp7
+
+        paddw   xmm7,xmm0               ; xmm7=tmp10
+        paddw   xmm0,xmm1               ; xmm0=tmp11
+        paddw   xmm1,xmm5               ; xmm1=tmp12, xmm5=tmp7
+
+        psllw   xmm7,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm1,PRE_MULTIPLY_SCALE_BITS
+
+        psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm0,[GOTOFF(ebx,PW_F0707)] ; xmm0=z3
+
+        movdqa  xmm4,xmm7               ; xmm4=tmp10
+        psubw   xmm7,xmm1
+        pmulhw  xmm7,[GOTOFF(ebx,PW_F0382)] ; xmm7=z5
+        pmulhw  xmm4,[GOTOFF(ebx,PW_F0541)] ; xmm4=MULTIPLY(tmp10,FIX_0_541196)
+        pmulhw  xmm1,[GOTOFF(ebx,PW_F1306)] ; xmm1=MULTIPLY(tmp12,FIX_1_306562)
+        paddw   xmm4,xmm7               ; xmm4=z2
+        paddw   xmm1,xmm7               ; xmm1=z4
+
+        movdqa  xmm3,xmm5
+        psubw   xmm5,xmm0               ; xmm5=z13
+        paddw   xmm3,xmm0               ; xmm3=z11
+
+        movdqa  xmm6,xmm5
+        movdqa  xmm2,xmm3
+        psubw   xmm5,xmm4               ; xmm5=data3
+        psubw   xmm3,xmm1               ; xmm3=data7
+        paddw   xmm6,xmm4               ; xmm6=data5
+        paddw   xmm2,xmm1               ; xmm2=data1
+
+        movdqa  XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm5
+        movdqa  XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm3
+        movdqa  XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm6
+        movdqa  XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm2
+
+;       pop     edi             ; unused
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jfdctint-mmx.asm b/simd/jfdctint-mmx.asm
new file mode 100644
index 0000000..8536ae2
--- /dev/null
+++ b/simd/jfdctint-mmx.asm
@@ -0,0 +1,622 @@
+;
+; jfdctint.asm - accurate integer FDCT (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a slow-but-accurate integer implementation of the
+; forward DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jfdctint.c; see the jfdctint.c for
+; more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      13
+%define PASS1_BITS      2
+
+%define DESCALE_P1      (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2      (CONST_BITS+PASS1_BITS)
+
+%if CONST_BITS == 13
+F_0_298 equ      2446           ; FIX(0.298631336)
+F_0_390 equ      3196           ; FIX(0.390180644)
+F_0_541 equ      4433           ; FIX(0.541196100)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_175 equ      9633           ; FIX(1.175875602)
+F_1_501 equ     12299           ; FIX(1.501321110)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_1_961 equ     16069           ; FIX(1.961570560)
+F_2_053 equ     16819           ; FIX(2.053119869)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_072 equ     25172           ; FIX(3.072711026)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_298 equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390 equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175 equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501 equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961 equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053 equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072 equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_fdct_islow_mmx)
+
+EXTN(jconst_fdct_islow_mmx):
+
+PW_F130_F054    times 2 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130   times 2 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117   times 2 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078    times 2 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089  times 2 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060   times 2 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256  times 2 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050   times 2 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1   times 2 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2   times 2 dd  1 << (DESCALE_P2-1)
+PW_DESCALE_P2X  times 4 dw  1 << (PASS1_BITS-1)
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jsimd_fdct_islow_mmx (DCTELEM * data)
+;
+
+%define data(b)         (b)+8           ; DCTELEM * data
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+
+        align   16
+        global  EXTN(jsimd_fdct_islow_mmx)
+
+EXTN(jsimd_fdct_islow_mmx):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+;       push    edi             ; unused
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process rows.
+
+        mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
+.rowloop:
+
+        movq    mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+        movq    mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)]
+
+        ; mm0=(20 21 22 23), mm2=(24 25 26 27)
+        ; mm1=(30 31 32 33), mm3=(34 35 36 37)
+
+        movq      mm4,mm0               ; transpose coefficients(phase 1)
+        punpcklwd mm0,mm1               ; mm0=(20 30 21 31)
+        punpckhwd mm4,mm1               ; mm4=(22 32 23 33)
+        movq      mm5,mm2               ; transpose coefficients(phase 1)
+        punpcklwd mm2,mm3               ; mm2=(24 34 25 35)
+        punpckhwd mm5,mm3               ; mm5=(26 36 27 37)
+
+        movq    mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+        movq    mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)]
+
+        ; mm6=(00 01 02 03), mm1=(04 05 06 07)
+        ; mm7=(10 11 12 13), mm3=(14 15 16 17)
+
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=(22 32 23 33)
+        movq    MMWORD [wk(1)], mm2     ; wk(1)=(24 34 25 35)
+
+        movq      mm4,mm6               ; transpose coefficients(phase 1)
+        punpcklwd mm6,mm7               ; mm6=(00 10 01 11)
+        punpckhwd mm4,mm7               ; mm4=(02 12 03 13)
+        movq      mm2,mm1               ; transpose coefficients(phase 1)
+        punpcklwd mm1,mm3               ; mm1=(04 14 05 15)
+        punpckhwd mm2,mm3               ; mm2=(06 16 07 17)
+
+        movq      mm7,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm0               ; mm6=(00 10 20 30)=data0
+        punpckhdq mm7,mm0               ; mm7=(01 11 21 31)=data1
+        movq      mm3,mm2               ; transpose coefficients(phase 2)
+        punpckldq mm2,mm5               ; mm2=(06 16 26 36)=data6
+        punpckhdq mm3,mm5               ; mm3=(07 17 27 37)=data7
+
+        movq    mm0,mm7
+        movq    mm5,mm6
+        psubw   mm7,mm2                 ; mm7=data1-data6=tmp6
+        psubw   mm6,mm3                 ; mm6=data0-data7=tmp7
+        paddw   mm0,mm2                 ; mm0=data1+data6=tmp1
+        paddw   mm5,mm3                 ; mm5=data0+data7=tmp0
+
+        movq    mm2, MMWORD [wk(0)]     ; mm2=(22 32 23 33)
+        movq    mm3, MMWORD [wk(1)]     ; mm3=(24 34 25 35)
+        movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp6
+        movq    MMWORD [wk(1)], mm6     ; wk(1)=tmp7
+
+        movq      mm7,mm4               ; transpose coefficients(phase 2)
+        punpckldq mm4,mm2               ; mm4=(02 12 22 32)=data2
+        punpckhdq mm7,mm2               ; mm7=(03 13 23 33)=data3
+        movq      mm6,mm1               ; transpose coefficients(phase 2)
+        punpckldq mm1,mm3               ; mm1=(04 14 24 34)=data4
+        punpckhdq mm6,mm3               ; mm6=(05 15 25 35)=data5
+
+        movq    mm2,mm7
+        movq    mm3,mm4
+        paddw   mm7,mm1                 ; mm7=data3+data4=tmp3
+        paddw   mm4,mm6                 ; mm4=data2+data5=tmp2
+        psubw   mm2,mm1                 ; mm2=data3-data4=tmp4
+        psubw   mm3,mm6                 ; mm3=data2-data5=tmp5
+
+        ; -- Even part
+
+        movq    mm1,mm5
+        movq    mm6,mm0
+        paddw   mm5,mm7                 ; mm5=tmp10
+        paddw   mm0,mm4                 ; mm0=tmp11
+        psubw   mm1,mm7                 ; mm1=tmp13
+        psubw   mm6,mm4                 ; mm6=tmp12
+
+        movq    mm7,mm5
+        paddw   mm5,mm0                 ; mm5=tmp10+tmp11
+        psubw   mm7,mm0                 ; mm7=tmp10-tmp11
+
+        psllw   mm5,PASS1_BITS          ; mm5=data0
+        psllw   mm7,PASS1_BITS          ; mm7=data4
+
+        movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(0,1,edx,SIZEOF_DCTELEM)], mm7
+
+        ; (Original)
+        ; z1 = (tmp12 + tmp13) * 0.541196100;
+        ; data2 = z1 + tmp13 * 0.765366865;
+        ; data6 = z1 + tmp12 * -1.847759065;
+        ;
+        ; (This implementation)
+        ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+        ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+
+        movq      mm4,mm1               ; mm1=tmp13
+        movq      mm0,mm1
+        punpcklwd mm4,mm6               ; mm6=tmp12
+        punpckhwd mm0,mm6
+        movq      mm1,mm4
+        movq      mm6,mm0
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]        ; mm4=data2L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_F130_F054)]        ; mm0=data2H
+        pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]       ; mm1=data6L
+        pmaddwd   mm6,[GOTOFF(ebx,PW_F054_MF130)]       ; mm6=data6H
+
+        paddd   mm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   mm0,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   mm4,DESCALE_P1
+        psrad   mm0,DESCALE_P1
+        paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   mm6,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   mm1,DESCALE_P1
+        psrad   mm6,DESCALE_P1
+
+        packssdw  mm4,mm0               ; mm4=data2
+        packssdw  mm1,mm6               ; mm1=data6
+
+        movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)], mm1
+
+        ; -- Odd part
+
+        movq    mm5, MMWORD [wk(0)]     ; mm5=tmp6
+        movq    mm7, MMWORD [wk(1)]     ; mm7=tmp7
+
+        movq    mm0,mm2                 ; mm2=tmp4
+        movq    mm6,mm3                 ; mm3=tmp5
+        paddw   mm0,mm5                 ; mm0=z3
+        paddw   mm6,mm7                 ; mm6=z4
+
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+        movq      mm4,mm0
+        movq      mm1,mm0
+        punpcklwd mm4,mm6
+        punpckhwd mm1,mm6
+        movq      mm0,mm4
+        movq      mm6,mm1
+        pmaddwd   mm4,[GOTOFF(ebx,PW_MF078_F117)]       ; mm4=z3L
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF078_F117)]       ; mm1=z3H
+        pmaddwd   mm0,[GOTOFF(ebx,PW_F117_F078)]        ; mm0=z4L
+        pmaddwd   mm6,[GOTOFF(ebx,PW_F117_F078)]        ; mm6=z4H
+
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=z3L
+        movq    MMWORD [wk(1)], mm1     ; wk(1)=z3H
+
+        ; (Original)
+        ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+        ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+        ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+        ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+        ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+        ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+        ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+        ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+        ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+
+        movq      mm4,mm2
+        movq      mm1,mm2
+        punpcklwd mm4,mm7
+        punpckhwd mm1,mm7
+        movq      mm2,mm4
+        movq      mm7,mm1
+        pmaddwd   mm4,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm4=tmp4L
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm1=tmp4H
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF089_F060)]       ; mm2=tmp7L
+        pmaddwd   mm7,[GOTOFF(ebx,PW_MF089_F060)]       ; mm7=tmp7H
+
+        paddd   mm4, MMWORD [wk(0)]     ; mm4=data7L
+        paddd   mm1, MMWORD [wk(1)]     ; mm1=data7H
+        paddd   mm2,mm0                 ; mm2=data1L
+        paddd   mm7,mm6                 ; mm7=data1H
+
+        paddd   mm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   mm4,DESCALE_P1
+        psrad   mm1,DESCALE_P1
+        paddd   mm2,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   mm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   mm2,DESCALE_P1
+        psrad   mm7,DESCALE_P1
+
+        packssdw  mm4,mm1               ; mm4=data7
+        packssdw  mm2,mm7               ; mm2=data1
+
+        movq    MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2
+
+        movq      mm1,mm3
+        movq      mm7,mm3
+        punpcklwd mm1,mm5
+        punpckhwd mm7,mm5
+        movq      mm3,mm1
+        movq      mm5,mm7
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm1=tmp5L
+        pmaddwd   mm7,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm7=tmp5H
+        pmaddwd   mm3,[GOTOFF(ebx,PW_MF256_F050)]       ; mm3=tmp6L
+        pmaddwd   mm5,[GOTOFF(ebx,PW_MF256_F050)]       ; mm5=tmp6H
+
+        paddd   mm1,mm0                 ; mm1=data5L
+        paddd   mm7,mm6                 ; mm7=data5H
+        paddd   mm3, MMWORD [wk(0)]     ; mm3=data3L
+        paddd   mm5, MMWORD [wk(1)]     ; mm5=data3H
+
+        paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   mm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   mm1,DESCALE_P1
+        psrad   mm7,DESCALE_P1
+        paddd   mm3,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   mm5,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   mm3,DESCALE_P1
+        psrad   mm5,DESCALE_P1
+
+        packssdw  mm1,mm7               ; mm1=data5
+        packssdw  mm3,mm5               ; mm3=data3
+
+        movq    MMWORD [MMBLOCK(1,1,edx,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3
+
+        add     edx, byte 4*DCTSIZE*SIZEOF_DCTELEM
+        dec     ecx
+        jnz     near .rowloop
+
+        ; ---- Pass 2: process columns.
+
+        mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
+.columnloop:
+
+        movq    mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+        movq    mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
+
+        ; mm0=(02 12 22 32), mm2=(42 52 62 72)
+        ; mm1=(03 13 23 33), mm3=(43 53 63 73)
+
+        movq      mm4,mm0               ; transpose coefficients(phase 1)
+        punpcklwd mm0,mm1               ; mm0=(02 03 12 13)
+        punpckhwd mm4,mm1               ; mm4=(22 23 32 33)
+        movq      mm5,mm2               ; transpose coefficients(phase 1)
+        punpcklwd mm2,mm3               ; mm2=(42 43 52 53)
+        punpckhwd mm5,mm3               ; mm5=(62 63 72 73)
+
+        movq    mm6, MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+        movq    mm7, MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+        movq    mm1, MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
+
+        ; mm6=(00 10 20 30), mm1=(40 50 60 70)
+        ; mm7=(01 11 21 31), mm3=(41 51 61 71)
+
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=(22 23 32 33)
+        movq    MMWORD [wk(1)], mm2     ; wk(1)=(42 43 52 53)
+
+        movq      mm4,mm6               ; transpose coefficients(phase 1)
+        punpcklwd mm6,mm7               ; mm6=(00 01 10 11)
+        punpckhwd mm4,mm7               ; mm4=(20 21 30 31)
+        movq      mm2,mm1               ; transpose coefficients(phase 1)
+        punpcklwd mm1,mm3               ; mm1=(40 41 50 51)
+        punpckhwd mm2,mm3               ; mm2=(60 61 70 71)
+
+        movq      mm7,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm0               ; mm6=(00 01 02 03)=data0
+        punpckhdq mm7,mm0               ; mm7=(10 11 12 13)=data1
+        movq      mm3,mm2               ; transpose coefficients(phase 2)
+        punpckldq mm2,mm5               ; mm2=(60 61 62 63)=data6
+        punpckhdq mm3,mm5               ; mm3=(70 71 72 73)=data7
+
+        movq    mm0,mm7
+        movq    mm5,mm6
+        psubw   mm7,mm2                 ; mm7=data1-data6=tmp6
+        psubw   mm6,mm3                 ; mm6=data0-data7=tmp7
+        paddw   mm0,mm2                 ; mm0=data1+data6=tmp1
+        paddw   mm5,mm3                 ; mm5=data0+data7=tmp0
+
+        movq    mm2, MMWORD [wk(0)]     ; mm2=(22 23 32 33)
+        movq    mm3, MMWORD [wk(1)]     ; mm3=(42 43 52 53)
+        movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp6
+        movq    MMWORD [wk(1)], mm6     ; wk(1)=tmp7
+
+        movq      mm7,mm4               ; transpose coefficients(phase 2)
+        punpckldq mm4,mm2               ; mm4=(20 21 22 23)=data2
+        punpckhdq mm7,mm2               ; mm7=(30 31 32 33)=data3
+        movq      mm6,mm1               ; transpose coefficients(phase 2)
+        punpckldq mm1,mm3               ; mm1=(40 41 42 43)=data4
+        punpckhdq mm6,mm3               ; mm6=(50 51 52 53)=data5
+
+        movq    mm2,mm7
+        movq    mm3,mm4
+        paddw   mm7,mm1                 ; mm7=data3+data4=tmp3
+        paddw   mm4,mm6                 ; mm4=data2+data5=tmp2
+        psubw   mm2,mm1                 ; mm2=data3-data4=tmp4
+        psubw   mm3,mm6                 ; mm3=data2-data5=tmp5
+
+        ; -- Even part
+
+        movq    mm1,mm5
+        movq    mm6,mm0
+        paddw   mm5,mm7                 ; mm5=tmp10
+        paddw   mm0,mm4                 ; mm0=tmp11
+        psubw   mm1,mm7                 ; mm1=tmp13
+        psubw   mm6,mm4                 ; mm6=tmp12
+
+        movq    mm7,mm5
+        paddw   mm5,mm0                 ; mm5=tmp10+tmp11
+        psubw   mm7,mm0                 ; mm7=tmp10-tmp11
+
+        paddw   mm5,[GOTOFF(ebx,PW_DESCALE_P2X)]
+        paddw   mm7,[GOTOFF(ebx,PW_DESCALE_P2X)]
+        psraw   mm5,PASS1_BITS          ; mm5=data0
+        psraw   mm7,PASS1_BITS          ; mm7=data4
+
+        movq    MMWORD [MMBLOCK(0,0,edx,SIZEOF_DCTELEM)], mm5
+        movq    MMWORD [MMBLOCK(4,0,edx,SIZEOF_DCTELEM)], mm7
+
+        ; (Original)
+        ; z1 = (tmp12 + tmp13) * 0.541196100;
+        ; data2 = z1 + tmp13 * 0.765366865;
+        ; data6 = z1 + tmp12 * -1.847759065;
+        ;
+        ; (This implementation)
+        ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+        ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+
+        movq      mm4,mm1               ; mm1=tmp13
+        movq      mm0,mm1
+        punpcklwd mm4,mm6               ; mm6=tmp12
+        punpckhwd mm0,mm6
+        movq      mm1,mm4
+        movq      mm6,mm0
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]        ; mm4=data2L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_F130_F054)]        ; mm0=data2H
+        pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]       ; mm1=data6L
+        pmaddwd   mm6,[GOTOFF(ebx,PW_F054_MF130)]       ; mm6=data6H
+
+        paddd   mm4,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   mm0,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   mm4,DESCALE_P2
+        psrad   mm0,DESCALE_P2
+        paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   mm6,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   mm1,DESCALE_P2
+        psrad   mm6,DESCALE_P2
+
+        packssdw  mm4,mm0               ; mm4=data2
+        packssdw  mm1,mm6               ; mm1=data6
+
+        movq    MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)], mm1
+
+        ; -- Odd part
+
+        movq    mm5, MMWORD [wk(0)]     ; mm5=tmp6
+        movq    mm7, MMWORD [wk(1)]     ; mm7=tmp7
+
+        movq    mm0,mm2                 ; mm2=tmp4
+        movq    mm6,mm3                 ; mm3=tmp5
+        paddw   mm0,mm5                 ; mm0=z3
+        paddw   mm6,mm7                 ; mm6=z4
+
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+        movq      mm4,mm0
+        movq      mm1,mm0
+        punpcklwd mm4,mm6
+        punpckhwd mm1,mm6
+        movq      mm0,mm4
+        movq      mm6,mm1
+        pmaddwd   mm4,[GOTOFF(ebx,PW_MF078_F117)]       ; mm4=z3L
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF078_F117)]       ; mm1=z3H
+        pmaddwd   mm0,[GOTOFF(ebx,PW_F117_F078)]        ; mm0=z4L
+        pmaddwd   mm6,[GOTOFF(ebx,PW_F117_F078)]        ; mm6=z4H
+
+        movq    MMWORD [wk(0)], mm4     ; wk(0)=z3L
+        movq    MMWORD [wk(1)], mm1     ; wk(1)=z3H
+
+        ; (Original)
+        ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+        ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+        ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+        ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+        ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+        ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+        ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+        ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+        ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+
+        movq      mm4,mm2
+        movq      mm1,mm2
+        punpcklwd mm4,mm7
+        punpckhwd mm1,mm7
+        movq      mm2,mm4
+        movq      mm7,mm1
+        pmaddwd   mm4,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm4=tmp4L
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm1=tmp4H
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF089_F060)]       ; mm2=tmp7L
+        pmaddwd   mm7,[GOTOFF(ebx,PW_MF089_F060)]       ; mm7=tmp7H
+
+        paddd   mm4, MMWORD [wk(0)]     ; mm4=data7L
+        paddd   mm1, MMWORD [wk(1)]     ; mm1=data7H
+        paddd   mm2,mm0                 ; mm2=data1L
+        paddd   mm7,mm6                 ; mm7=data1H
+
+        paddd   mm4,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   mm4,DESCALE_P2
+        psrad   mm1,DESCALE_P2
+        paddd   mm2,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   mm7,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   mm2,DESCALE_P2
+        psrad   mm7,DESCALE_P2
+
+        packssdw  mm4,mm1               ; mm4=data7
+        packssdw  mm2,mm7               ; mm2=data1
+
+        movq    MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(1,0,edx,SIZEOF_DCTELEM)], mm2
+
+        movq      mm1,mm3
+        movq      mm7,mm3
+        punpcklwd mm1,mm5
+        punpckhwd mm7,mm5
+        movq      mm3,mm1
+        movq      mm5,mm7
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm1=tmp5L
+        pmaddwd   mm7,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm7=tmp5H
+        pmaddwd   mm3,[GOTOFF(ebx,PW_MF256_F050)]       ; mm3=tmp6L
+        pmaddwd   mm5,[GOTOFF(ebx,PW_MF256_F050)]       ; mm5=tmp6H
+
+        paddd   mm1,mm0                 ; mm1=data5L
+        paddd   mm7,mm6                 ; mm7=data5H
+        paddd   mm3, MMWORD [wk(0)]     ; mm3=data3L
+        paddd   mm5, MMWORD [wk(1)]     ; mm5=data3H
+
+        paddd   mm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   mm7,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   mm1,DESCALE_P2
+        psrad   mm7,DESCALE_P2
+        paddd   mm3,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   mm5,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   mm3,DESCALE_P2
+        psrad   mm5,DESCALE_P2
+
+        packssdw  mm1,mm7               ; mm1=data5
+        packssdw  mm3,mm5               ; mm3=data3
+
+        movq    MMWORD [MMBLOCK(5,0,edx,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)], mm3
+
+        add     edx, byte 4*SIZEOF_DCTELEM
+        dec     ecx
+        jnz     near .columnloop
+
+        emms            ; empty MMX state
+
+;       pop     edi             ; unused
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jfdctint-sse2-64.asm b/simd/jfdctint-sse2-64.asm
new file mode 100644
index 0000000..996a53b
--- /dev/null
+++ b/simd/jfdctint-sse2-64.asm
@@ -0,0 +1,563 @@
+;
+; jfdctint.asm - accurate integer FDCT (64-bit SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009, 2014, D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a slow-but-accurate integer implementation of the
+; forward DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jfdctint.c; see the jfdctint.c for
+; more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      13
+%define PASS1_BITS      2
+
+%define DESCALE_P1      (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2      (CONST_BITS+PASS1_BITS)
+
+%if CONST_BITS == 13
+F_0_298 equ      2446           ; FIX(0.298631336)
+F_0_390 equ      3196           ; FIX(0.390180644)
+F_0_541 equ      4433           ; FIX(0.541196100)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_175 equ      9633           ; FIX(1.175875602)
+F_1_501 equ     12299           ; FIX(1.501321110)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_1_961 equ     16069           ; FIX(1.961570560)
+F_2_053 equ     16819           ; FIX(2.053119869)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_072 equ     25172           ; FIX(3.072711026)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_298 equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390 equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175 equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501 equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961 equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053 equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072 equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_fdct_islow_sse2)
+
+EXTN(jconst_fdct_islow_sse2):
+
+PW_F130_F054    times 4 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130   times 4 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117   times 4 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078    times 4 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089  times 4 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060   times 4 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256  times 4 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050   times 4 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1   times 4 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2   times 4 dd  1 << (DESCALE_P2-1)
+PW_DESCALE_P2X  times 8 dw  1 << (PASS1_BITS-1)
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jsimd_fdct_islow_sse2 (DCTELEM * data)
+;
+
+; r10 = DCTELEM * data
+
+        align   16
+        global  EXTN(jsimd_fdct_islow_sse2)
+
+EXTN(jsimd_fdct_islow_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+
+        ; ---- Pass 1: process rows.
+
+        mov     rdx, r10        ; (DCTELEM *)
+
+        movdqa  xmm8, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm9, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm11, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm4, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm13, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)]
+        movdqa  xmm15, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)]
+
+        ; xmm8=(00 01 02 03 04 05 06 07), xmm9=(10 11 12 13 14 15 16 17)
+        ; xmm2=(20 21 22 23 24 25 26 27), xmm11=(30 31 32 33 34 35 36 37)
+        ; xmm4=(40 41 42 43 44 45 46 47), xmm13=(50 51 52 53 54 55 56 57)
+        ; xmm5=(60 61 62 63 64 65 66 67), xmm15=(70 71 72 73 74 75 76 77)
+
+        movdqa    xmm12,xmm8            ; transpose coefficients(phase 1)
+        punpcklwd xmm8,xmm9             ; xmm8=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm12,xmm9            ; xmm12=(04 14 05 15 06 16 07 17)
+        movdqa    xmm1,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm11            ; xmm2=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm1,xmm11            ; xmm1=(24 34 25 35 26 36 27 37)
+
+        movdqa    xmm0,xmm4             ; transpose coefficients(phase 1)
+        punpcklwd xmm4,xmm13            ; xmm4=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm0,xmm13            ; xmm0=(44 54 45 55 46 56 47 57)
+        movdqa    xmm3,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm15            ; xmm5=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm3,xmm15            ; xmm3=(64 74 65 75 66 76 67 77)
+
+        movdqa    xmm10,xmm8            ; transpose coefficients(phase 2)
+        punpckldq xmm8,xmm2             ; xmm8=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm10,xmm2            ; xmm10=(02 12 22 32 03 13 23 33)
+        movdqa    xmm14,xmm12           ; transpose coefficients(phase 2)
+        punpckldq xmm12,xmm1            ; xmm12=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm14,xmm1            ; xmm14=(06 16 26 36 07 17 27 37)
+
+        movdqa    xmm6,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm5             ; xmm4=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm6,xmm5             ; xmm6=(42 52 62 72 43 53 63 73)
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm3             ; xmm0=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm7,xmm3             ; xmm7=(46 56 66 76 47 57 67 77)
+
+        movdqa     xmm9,xmm8            ; transpose coefficients(phase 3)
+        punpcklqdq xmm8,xmm4            ; xmm8=(00 10 20 30 40 50 60 70)=data0
+        punpckhqdq xmm9,xmm4            ; xmm9=(01 11 21 31 41 51 61 71)=data1
+        movdqa     xmm11,xmm10          ; transpose coefficients(phase 3)
+        punpcklqdq xmm10,xmm6           ; xmm10=(02 12 22 32 42 52 62 72)=data2
+        punpckhqdq xmm11,xmm6           ; xmm11=(03 13 23 33 43 53 63 73)=data3
+
+        movdqa     xmm13,xmm12          ; transpose coefficients(phase 3)
+        punpcklqdq xmm12,xmm0           ; xmm12=(04 14 24 34 44 54 64 74)=data4
+        punpckhqdq xmm13,xmm0           ; xmm13=(05 15 25 35 45 55 65 75)=data5
+        movdqa     xmm15,xmm14          ; transpose coefficients(phase 3)
+        punpcklqdq xmm14,xmm7           ; xmm14=(06 16 26 36 46 56 66 76)=data6
+        punpckhqdq xmm15,xmm7           ; xmm15=(07 17 27 37 47 57 67 77)=data7
+
+        movdqa  xmm0,xmm8
+        paddw   xmm0,xmm15              ; xmm0=data0+data7=tmp0
+        movdqa  xmm1,xmm9
+        paddw   xmm1,xmm14              ; xmm1=data1+data6=tmp1
+        movdqa  xmm2,xmm10
+        paddw   xmm2,xmm13              ; xmm2=data2+data5=tmp2
+        movdqa  xmm3,xmm11
+        paddw   xmm3,xmm12              ; xmm3=data3+data4=tmp3
+
+        psubw   xmm11,xmm12             ; xmm11=data3-data4=tmp4
+        psubw   xmm10,xmm13             ; xmm10=data2-data5=tmp5
+        psubw   xmm9,xmm14              ; xmm9=data1-data6=tmp6
+        psubw   xmm8,xmm15              ; xmm8=data0-data7=tmp7
+
+        ; -- Even part
+
+        movdqa  xmm4,xmm0
+        paddw   xmm4,xmm3               ; xmm4=tmp0+tmp3=tmp10
+        movdqa  xmm5,xmm1
+        paddw   xmm5,xmm2               ; xmm5=tmp1+tmp2=tmp11
+        psubw   xmm1,xmm2               ; xmm1=tmp1-tmp2=tmp12
+        psubw   xmm0,xmm3               ; xmm0=tmp0-tmp3=tmp13
+
+        movdqa  xmm12,xmm4
+        paddw   xmm12,xmm5              ; xmm12=tmp10+tmp11
+        psubw   xmm4,xmm5               ; xmm4=tmp10-tmp11
+
+        psllw   xmm12,PASS1_BITS        ; xmm12=out0
+        psllw   xmm4,PASS1_BITS         ; xmm4=out4
+
+        ; (Original)
+        ; z1 = (tmp12 + tmp13) * 0.541196100;
+        ; out2 = z1 + tmp13 * 0.765366865;
+        ; out6 = z1 + tmp12 * -1.847759065;
+        ;
+        ; (This implementation)
+        ; out2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+        ; out6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+
+        movdqa    xmm2,xmm0             ; xmm0=tmp13
+        movdqa    xmm6,xmm0
+        punpcklwd xmm2,xmm1             ; xmm1=tmp12
+        punpckhwd xmm6,xmm1
+        movdqa    xmm5,xmm2
+        movdqa    xmm0,xmm6
+        pmaddwd   xmm2,[rel PW_F130_F054]       ; xmm2=out2L
+        pmaddwd   xmm6,[rel PW_F130_F054]       ; xmm6=out2H
+        pmaddwd   xmm5,[rel PW_F054_MF130]      ; xmm5=out6L
+        pmaddwd   xmm0,[rel PW_F054_MF130]      ; xmm0=out6H
+
+        paddd   xmm2,[rel PD_DESCALE_P1]
+        paddd   xmm6,[rel PD_DESCALE_P1]
+        psrad   xmm2,DESCALE_P1
+        psrad   xmm6,DESCALE_P1
+        paddd   xmm5,[rel PD_DESCALE_P1]
+        paddd   xmm0,[rel PD_DESCALE_P1]
+        psrad   xmm5,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
+
+        packssdw  xmm2,xmm6             ; xmm2=out2
+        packssdw  xmm5,xmm0             ; xmm5=out6
+
+        ; -- Odd part
+
+        movdqa  xmm6,xmm11              ; xmm11=tmp4
+        movdqa  xmm0,xmm10              ; xmm10=tmp5
+        paddw   xmm6,xmm9               ; xmm6=tmp4+tmp6=z3
+        paddw   xmm0,xmm8               ; xmm0=tmp5+tmp7=z4
+
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+        movdqa    xmm7,xmm6
+        movdqa    xmm14,xmm6
+        punpcklwd xmm7,xmm0
+        punpckhwd xmm14,xmm0
+        movdqa    xmm6,xmm7
+        movdqa    xmm0,xmm14
+        pmaddwd   xmm7,[rel PW_MF078_F117]      ; xmm7=z3L
+        pmaddwd   xmm14,[rel PW_MF078_F117]     ; xmm14=z3H
+        pmaddwd   xmm6,[rel PW_F117_F078]       ; xmm6=z4L
+        pmaddwd   xmm0,[rel PW_F117_F078]       ; xmm0=z4H
+
+        ; (Original)
+        ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+        ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+        ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; out7 = tmp4 + z1 + z3;  out5 = tmp5 + z2 + z4;
+        ; out3 = tmp6 + z2 + z3;  out1 = tmp7 + z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+        ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+        ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+        ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+        ; out7 = tmp4 + z3;  out5 = tmp5 + z4;
+        ; out3 = tmp6 + z3;  out1 = tmp7 + z4;
+
+        movdqa    xmm15,xmm11           ; xmm11=tmp4
+        movdqa    xmm13,xmm11
+        punpcklwd xmm15,xmm8            ; xmm8=tmp7
+        punpckhwd xmm13,xmm8
+        movdqa    xmm8,xmm15
+        movdqa    xmm1,xmm13
+        pmaddwd   xmm15,[rel PW_MF060_MF089]    ; xmm15=tmp4L
+        pmaddwd   xmm13,[rel PW_MF060_MF089]    ; xmm13=tmp4H
+        pmaddwd   xmm8,[rel PW_MF089_F060]      ; xmm8=tmp7L
+        pmaddwd   xmm1,[rel PW_MF089_F060]      ; xmm1=tmp7H
+
+        paddd   xmm15,xmm7              ; xmm15=out7L
+        paddd   xmm13,xmm14             ; xmm13=out7H
+        paddd   xmm8,xmm6               ; xmm8=out1L
+        paddd   xmm1,xmm0               ; xmm1=out1H
+
+        paddd   xmm15,[rel PD_DESCALE_P1]
+        paddd   xmm13,[rel PD_DESCALE_P1]
+        psrad   xmm15,DESCALE_P1
+        psrad   xmm13,DESCALE_P1
+        paddd   xmm8,[rel PD_DESCALE_P1]
+        paddd   xmm1,[rel PD_DESCALE_P1]
+        psrad   xmm8,DESCALE_P1
+        psrad   xmm1,DESCALE_P1
+
+        packssdw  xmm15,xmm13           ; xmm15=out7
+        packssdw  xmm8,xmm1             ; xmm8=out1
+
+        movdqa    xmm13,xmm10           ; xmm10=tmp5
+        movdqa    xmm1,xmm10
+        punpcklwd xmm13,xmm9            ; xmm9=tmp6
+        punpckhwd xmm1,xmm9
+        movdqa    xmm11,xmm13
+        movdqa    xmm3,xmm1
+        pmaddwd   xmm13,[rel PW_MF050_MF256]    ; xmm13=tmp5L
+        pmaddwd   xmm1,[rel PW_MF050_MF256]     ; xmm1=tmp5H
+        pmaddwd   xmm11,[rel PW_MF256_F050]     ; xmm11=tmp6L
+        pmaddwd   xmm3,[rel PW_MF256_F050]      ; xmm3=tmp6H
+
+        paddd   xmm13,xmm6              ; xmm13=out5L
+        paddd   xmm1,xmm0               ; xmm1=out5H
+        paddd   xmm11,xmm7              ; xmm11=out3L
+        paddd   xmm3,xmm14              ; xmm3=out3H
+
+        paddd   xmm13,[rel PD_DESCALE_P1]
+        paddd   xmm1,[rel PD_DESCALE_P1]
+        psrad   xmm13,DESCALE_P1
+        psrad   xmm1,DESCALE_P1
+        paddd   xmm11,[rel PD_DESCALE_P1]
+        paddd   xmm3,[rel PD_DESCALE_P1]
+        psrad   xmm11,DESCALE_P1
+        psrad   xmm3,DESCALE_P1
+
+        packssdw  xmm13,xmm1            ; xmm13=out5
+        packssdw  xmm11,xmm3            ; xmm11=out3
+
+        ; ---- Pass 2: process columns.
+
+        ; Re-order registers so we can reuse the same transpose code
+        movdqa    xmm9,xmm8
+        movdqa    xmm8,xmm12
+
+        ; xmm8=(00 10 20 30 40 50 60 70), xmm9=(01 11 21 31 41 51 61 71)
+        ; xmm2=(02 12 22 32 42 52 62 72), xmm11=(03 13 23 33 43 53 63 73)
+        ; xmm4=(04 14 24 34 44 54 64 74), xmm13=(05 15 25 35 45 55 65 75)
+        ; xmm5=(06 16 26 36 46 56 66 76), xmm15=(07 17 27 37 47 57 67 77)
+
+        movdqa    xmm12,xmm8            ; transpose coefficients(phase 1)
+        punpcklwd xmm8,xmm9             ; xmm8=(00 01 10 11 20 21 30 31)
+        punpckhwd xmm12,xmm9            ; xmm12=(40 41 50 51 60 61 70 71)
+        movdqa    xmm1,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm11            ; xmm2=(02 03 12 13 22 23 32 33)
+        punpckhwd xmm1,xmm11            ; xmm1=(42 43 52 53 62 63 72 73)
+
+        movdqa    xmm0,xmm4             ; transpose coefficients(phase 1)
+        punpcklwd xmm4,xmm13            ; xmm4=(04 05 14 15 24 25 34 35)
+        punpckhwd xmm0,xmm13            ; xmm0=(44 45 54 55 64 65 74 75)
+        movdqa    xmm3,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm15            ; xmm5=(06 07 16 17 26 27 36 37)
+        punpckhwd xmm3,xmm15            ; xmm3=(46 47 56 57 66 67 76 77)
+
+        movdqa    xmm10,xmm8            ; transpose coefficients(phase 2)
+        punpckldq xmm8,xmm2             ; xmm8=(00 01 02 03 10 11 12 13)
+        punpckhdq xmm10,xmm2            ; xmm10=(20 21 22 23 30 31 32 33)
+        movdqa    xmm14,xmm12           ; transpose coefficients(phase 2)
+        punpckldq xmm12,xmm1            ; xmm12=(40 41 42 43 50 51 52 53)
+        punpckhdq xmm14,xmm1            ; xmm14=(60 61 62 63 70 71 72 73)
+
+        movdqa    xmm6,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm5             ; xmm4=(04 05 06 07 14 15 16 17)
+        punpckhdq xmm6,xmm5             ; xmm6=(24 25 26 27 34 35 36 37)
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm3             ; xmm0=(44 45 46 47 54 55 56 57)
+        punpckhdq xmm7,xmm3             ; xmm7=(64 65 66 67 74 75 76 77)
+
+        movdqa     xmm9,xmm8            ; transpose coefficients(phase 3)
+        punpcklqdq xmm8,xmm4            ; xmm8=(00 01 02 03 04 05 06 07)=data0
+        punpckhqdq xmm9,xmm4            ; xmm9=(10 11 12 13 14 15 16 17)=data1
+        movdqa     xmm11,xmm10          ; transpose coefficients(phase 3)
+        punpcklqdq xmm10,xmm6           ; xmm10=(20 21 22 23 24 25 26 27)=data2
+        punpckhqdq xmm11,xmm6           ; xmm11=(30 31 32 33 34 35 36 37)=data3
+
+        movdqa     xmm13,xmm12          ; transpose coefficients(phase 3)
+        punpcklqdq xmm12,xmm0           ; xmm12=(40 41 42 43 44 45 46 47)=data4
+        punpckhqdq xmm13,xmm0           ; xmm13=(50 51 52 53 54 55 56 57)=data5
+        movdqa     xmm15,xmm14          ; transpose coefficients(phase 3)
+        punpcklqdq xmm14,xmm7           ; xmm14=(60 61 62 63 64 65 66 67)=data6
+        punpckhqdq xmm15,xmm7           ; xmm15=(70 71 72 73 74 75 76 77)=data7
+
+        movdqa  xmm0,xmm8
+        paddw   xmm0,xmm15              ; xmm0=data0+data7=tmp0
+        movdqa  xmm1,xmm9
+        paddw   xmm1,xmm14              ; xmm1=data1+data6=tmp1
+        movdqa  xmm2,xmm10
+        paddw   xmm2,xmm13              ; xmm2=data2+data5=tmp2
+        movdqa  xmm3,xmm11
+        paddw   xmm3,xmm12              ; xmm3=data3+data4=tmp3
+
+        psubw   xmm11,xmm12             ; xmm11=data3-data4=tmp4
+        psubw   xmm10,xmm13             ; xmm10=data2-data5=tmp5
+        psubw   xmm9,xmm14              ; xmm9=data1-data6=tmp6
+        psubw   xmm8,xmm15              ; xmm8=data0-data7=tmp7
+
+        ; -- Even part
+
+        movdqa  xmm4,xmm0
+        paddw   xmm4,xmm3               ; xmm4=tmp0+tmp3=tmp10
+        movdqa  xmm5,xmm1
+        paddw   xmm5,xmm2               ; xmm5=tmp1+tmp2=tmp11
+        psubw   xmm1,xmm2               ; xmm1=tmp1-tmp2=tmp12
+        psubw   xmm0,xmm3               ; xmm0=tmp0-tmp3=tmp13
+
+        movdqa  xmm12,xmm4
+        paddw   xmm12,xmm5              ; xmm12=tmp10+tmp11
+        psubw   xmm4,xmm5               ; xmm4=tmp10-tmp11
+
+        paddw   xmm12,[rel PW_DESCALE_P2X]
+        paddw   xmm4,[rel PW_DESCALE_P2X]
+        psraw   xmm12,PASS1_BITS        ; xmm12=out0
+        psraw   xmm4,PASS1_BITS         ; xmm4=out4
+
+        ; (Original)
+        ; z1 = (tmp12 + tmp13) * 0.541196100;
+        ; out2 = z1 + tmp13 * 0.765366865;
+        ; out6 = z1 + tmp12 * -1.847759065;
+        ;
+        ; (This implementation)
+        ; out2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+        ; out6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+
+        movdqa    xmm2,xmm0             ; xmm0=tmp13
+        movdqa    xmm6,xmm0
+        punpcklwd xmm2,xmm1             ; xmm1=tmp12
+        punpckhwd xmm6,xmm1
+        movdqa    xmm5,xmm2
+        movdqa    xmm0,xmm6
+        pmaddwd   xmm2,[rel PW_F130_F054]       ; xmm2=out2L
+        pmaddwd   xmm6,[rel PW_F130_F054]       ; xmm6=out2H
+        pmaddwd   xmm5,[rel PW_F054_MF130]      ; xmm5=out6L
+        pmaddwd   xmm0,[rel PW_F054_MF130]      ; xmm0=out6H
+
+        paddd   xmm2,[rel PD_DESCALE_P2]
+        paddd   xmm6,[rel PD_DESCALE_P2]
+        psrad   xmm2,DESCALE_P2
+        psrad   xmm6,DESCALE_P2
+        paddd   xmm5,[rel PD_DESCALE_P2]
+        paddd   xmm0,[rel PD_DESCALE_P2]
+        psrad   xmm5,DESCALE_P2
+        psrad   xmm0,DESCALE_P2
+
+        packssdw  xmm2,xmm6             ; xmm2=out2
+        packssdw  xmm5,xmm0             ; xmm5=out6
+
+        ; -- Odd part
+
+        movdqa  xmm6,xmm11              ; xmm11=tmp4
+        movdqa  xmm0,xmm10              ; xmm10=tmp5
+        paddw   xmm6,xmm9               ; xmm6=tmp4+tmp6=z3
+        paddw   xmm0,xmm8               ; xmm0=tmp5+tmp7=z4
+
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+        movdqa    xmm7,xmm6
+        movdqa    xmm14,xmm6
+        punpcklwd xmm7,xmm0
+        punpckhwd xmm14,xmm0
+        movdqa    xmm6,xmm7
+        movdqa    xmm0,xmm14
+        pmaddwd   xmm7,[rel PW_MF078_F117]      ; xmm7=z3L
+        pmaddwd   xmm14,[rel PW_MF078_F117]     ; xmm14=z3H
+        pmaddwd   xmm6,[rel PW_F117_F078]       ; xmm6=z4L
+        pmaddwd   xmm0,[rel PW_F117_F078]       ; xmm0=z4H
+
+        ; (Original)
+        ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+        ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+        ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; out7 = tmp4 + z1 + z3;  out5 = tmp5 + z2 + z4;
+        ; out3 = tmp6 + z2 + z3;  out1 = tmp7 + z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+        ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+        ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+        ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+        ; out7 = tmp4 + z3;  out5 = tmp5 + z4;
+        ; out3 = tmp6 + z3;  out1 = tmp7 + z4;
+
+        movdqa    xmm15,xmm11           ; xmm11=tmp4
+        movdqa    xmm13,xmm11
+        punpcklwd xmm15,xmm8            ; xmm8=tmp7
+        punpckhwd xmm13,xmm8
+        movdqa    xmm8,xmm15
+        movdqa    xmm1,xmm13
+        pmaddwd   xmm15,[rel PW_MF060_MF089]    ; xmm15=tmp4L
+        pmaddwd   xmm13,[rel PW_MF060_MF089]    ; xmm13=tmp4H
+        pmaddwd   xmm8,[rel PW_MF089_F060]      ; xmm8=tmp7L
+        pmaddwd   xmm1,[rel PW_MF089_F060]      ; xmm1=tmp7H
+
+        paddd   xmm15,xmm7              ; xmm15=out7L
+        paddd   xmm13,xmm14             ; xmm13=out7H
+        paddd   xmm8,xmm6               ; xmm8=out1L
+        paddd   xmm1,xmm0               ; xmm1=out1H
+
+        paddd   xmm15,[rel PD_DESCALE_P2]
+        paddd   xmm13,[rel PD_DESCALE_P2]
+        psrad   xmm15,DESCALE_P2
+        psrad   xmm13,DESCALE_P2
+        paddd   xmm8,[rel PD_DESCALE_P2]
+        paddd   xmm1,[rel PD_DESCALE_P2]
+        psrad   xmm8,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+
+        packssdw  xmm15,xmm13           ; xmm15=out7
+        packssdw  xmm8,xmm1             ; xmm8=out1
+
+        movdqa    xmm13,xmm10           ; xmm10=tmp5
+        movdqa    xmm1,xmm10
+        punpcklwd xmm13,xmm9            ; xmm9=tmp6
+        punpckhwd xmm1,xmm9
+        movdqa    xmm11,xmm13
+        movdqa    xmm3,xmm1
+        pmaddwd   xmm13,[rel PW_MF050_MF256]    ; xmm13=tmp5L
+        pmaddwd   xmm1,[rel PW_MF050_MF256]     ; xmm1=tmp5H
+        pmaddwd   xmm11,[rel PW_MF256_F050]     ; xmm11=tmp6L
+        pmaddwd   xmm3,[rel PW_MF256_F050]      ; xmm3=tmp6H
+
+        paddd   xmm13,xmm6               ; xmm13=out5L
+        paddd   xmm1,xmm0                ; xmm1=out5H
+        paddd   xmm11,xmm7               ; xmm11=out3L
+        paddd   xmm3,xmm14               ; xmm3=out3H
+
+        paddd   xmm13,[rel PD_DESCALE_P2]
+        paddd   xmm1,[rel PD_DESCALE_P2]
+        psrad   xmm13,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+        paddd   xmm11,[rel PD_DESCALE_P2]
+        paddd   xmm3,[rel PD_DESCALE_P2]
+        psrad   xmm11,DESCALE_P2
+        psrad   xmm3,DESCALE_P2
+
+        packssdw  xmm13,xmm1            ; xmm13=out5
+        packssdw  xmm11,xmm3            ; xmm11=out3
+
+        ; -- Write result
+
+        movdqa  XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)], xmm12
+        movdqa  XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm8
+        movdqa  XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm11
+        movdqa  XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_DCTELEM)], xmm4
+        movdqa  XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm13
+        movdqa  XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_DCTELEM)], xmm5
+        movdqa  XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm15
+
+        uncollect_args
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jfdctint-sse2.asm b/simd/jfdctint-sse2.asm
new file mode 100644
index 0000000..85133f8
--- /dev/null
+++ b/simd/jfdctint-sse2.asm
@@ -0,0 +1,634 @@
+;
+; jfdctint.asm - accurate integer FDCT (SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a slow-but-accurate integer implementation of the
+; forward DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jfdctint.c; see the jfdctint.c for
+; more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      13
+%define PASS1_BITS      2
+
+%define DESCALE_P1      (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2      (CONST_BITS+PASS1_BITS)
+
+%if CONST_BITS == 13
+F_0_298 equ      2446           ; FIX(0.298631336)
+F_0_390 equ      3196           ; FIX(0.390180644)
+F_0_541 equ      4433           ; FIX(0.541196100)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_175 equ      9633           ; FIX(1.175875602)
+F_1_501 equ     12299           ; FIX(1.501321110)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_1_961 equ     16069           ; FIX(1.961570560)
+F_2_053 equ     16819           ; FIX(2.053119869)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_072 equ     25172           ; FIX(3.072711026)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_298 equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390 equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175 equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501 equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961 equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053 equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072 equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_fdct_islow_sse2)
+
+EXTN(jconst_fdct_islow_sse2):
+
+PW_F130_F054    times 4 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130   times 4 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117   times 4 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078    times 4 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089  times 4 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060   times 4 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256  times 4 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050   times 4 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1   times 4 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2   times 4 dd  1 << (DESCALE_P2-1)
+PW_DESCALE_P2X  times 8 dw  1 << (PASS1_BITS-1)
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform the forward DCT on one block of samples.
+;
+; GLOBAL(void)
+; jsimd_fdct_islow_sse2 (DCTELEM * data)
+;
+
+%define data(b)         (b)+8           ; DCTELEM * data
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          6
+
+        align   16
+        global  EXTN(jsimd_fdct_islow_sse2)
+
+EXTN(jsimd_fdct_islow_sse2):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+;       push    edi             ; unused
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process rows.
+
+        mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
+
+        ; xmm0=(00 01 02 03 04 05 06 07), xmm2=(20 21 22 23 24 25 26 27)
+        ; xmm1=(10 11 12 13 14 15 16 17), xmm3=(30 31 32 33 34 35 36 37)
+
+        movdqa    xmm4,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm1             ; xmm0=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm4,xmm1             ; xmm4=(04 14 05 15 06 16 07 17)
+        movdqa    xmm5,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm3             ; xmm2=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm5,xmm3             ; xmm5=(24 34 25 35 26 36 27 37)
+
+        movdqa  xmm6, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm7, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
+
+        ; xmm6=( 4 12 20 28 36 44 52 60), xmm1=( 6 14 22 30 38 46 54 62)
+        ; xmm7=( 5 13 21 29 37 45 53 61), xmm3=( 7 15 23 31 39 47 55 63)
+
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=(20 30 21 31 22 32 23 33)
+        movdqa  XMMWORD [wk(1)], xmm5   ; wk(1)=(24 34 25 35 26 36 27 37)
+
+        movdqa    xmm2,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm7             ; xmm6=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm2,xmm7             ; xmm2=(44 54 45 55 46 56 47 57)
+        movdqa    xmm5,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm3             ; xmm1=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm5,xmm3             ; xmm5=(64 74 65 75 66 76 67 77)
+
+        movdqa    xmm7,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm1             ; xmm6=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm7,xmm1             ; xmm7=(42 52 62 72 43 53 63 73)
+        movdqa    xmm3,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm5             ; xmm2=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm3,xmm5             ; xmm3=(46 56 66 76 47 57 67 77)
+
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(20 30 21 31 22 32 23 33)
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=(24 34 25 35 26 36 27 37)
+        movdqa  XMMWORD [wk(2)], xmm7   ; wk(2)=(42 52 62 72 43 53 63 73)
+        movdqa  XMMWORD [wk(3)], xmm2   ; wk(3)=(44 54 64 74 45 55 65 75)
+
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm1             ; xmm0=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm7,xmm1             ; xmm7=(02 12 22 32 03 13 23 33)
+        movdqa    xmm2,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm5             ; xmm4=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm2,xmm5             ; xmm2=(06 16 26 36 07 17 27 37)
+
+        movdqa     xmm1,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm6            ; xmm0=(00 10 20 30 40 50 60 70)=data0
+        punpckhqdq xmm1,xmm6            ; xmm1=(01 11 21 31 41 51 61 71)=data1
+        movdqa     xmm5,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm3            ; xmm2=(06 16 26 36 46 56 66 76)=data6
+        punpckhqdq xmm5,xmm3            ; xmm5=(07 17 27 37 47 57 67 77)=data7
+
+        movdqa  xmm6,xmm1
+        movdqa  xmm3,xmm0
+        psubw   xmm1,xmm2               ; xmm1=data1-data6=tmp6
+        psubw   xmm0,xmm5               ; xmm0=data0-data7=tmp7
+        paddw   xmm6,xmm2               ; xmm6=data1+data6=tmp1
+        paddw   xmm3,xmm5               ; xmm3=data0+data7=tmp0
+
+        movdqa  xmm2, XMMWORD [wk(2)]   ; xmm2=(42 52 62 72 43 53 63 73)
+        movdqa  xmm5, XMMWORD [wk(3)]   ; xmm5=(44 54 64 74 45 55 65 75)
+        movdqa  XMMWORD [wk(0)], xmm1   ; wk(0)=tmp6
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp7
+
+        movdqa     xmm1,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm2            ; xmm7=(02 12 22 32 42 52 62 72)=data2
+        punpckhqdq xmm1,xmm2            ; xmm1=(03 13 23 33 43 53 63 73)=data3
+        movdqa     xmm0,xmm4            ; transpose coefficients(phase 3)
+        punpcklqdq xmm4,xmm5            ; xmm4=(04 14 24 34 44 54 64 74)=data4
+        punpckhqdq xmm0,xmm5            ; xmm0=(05 15 25 35 45 55 65 75)=data5
+
+        movdqa  xmm2,xmm1
+        movdqa  xmm5,xmm7
+        paddw   xmm1,xmm4               ; xmm1=data3+data4=tmp3
+        paddw   xmm7,xmm0               ; xmm7=data2+data5=tmp2
+        psubw   xmm2,xmm4               ; xmm2=data3-data4=tmp4
+        psubw   xmm5,xmm0               ; xmm5=data2-data5=tmp5
+
+        ; -- Even part
+
+        movdqa  xmm4,xmm3
+        movdqa  xmm0,xmm6
+        paddw   xmm3,xmm1               ; xmm3=tmp10
+        paddw   xmm6,xmm7               ; xmm6=tmp11
+        psubw   xmm4,xmm1               ; xmm4=tmp13
+        psubw   xmm0,xmm7               ; xmm0=tmp12
+
+        movdqa  xmm1,xmm3
+        paddw   xmm3,xmm6               ; xmm3=tmp10+tmp11
+        psubw   xmm1,xmm6               ; xmm1=tmp10-tmp11
+
+        psllw   xmm3,PASS1_BITS         ; xmm3=data0
+        psllw   xmm1,PASS1_BITS         ; xmm1=data4
+
+        movdqa  XMMWORD [wk(2)], xmm3   ; wk(2)=data0
+        movdqa  XMMWORD [wk(3)], xmm1   ; wk(3)=data4
+
+        ; (Original)
+        ; z1 = (tmp12 + tmp13) * 0.541196100;
+        ; data2 = z1 + tmp13 * 0.765366865;
+        ; data6 = z1 + tmp12 * -1.847759065;
+        ;
+        ; (This implementation)
+        ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+        ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+
+        movdqa    xmm7,xmm4             ; xmm4=tmp13
+        movdqa    xmm6,xmm4
+        punpcklwd xmm7,xmm0             ; xmm0=tmp12
+        punpckhwd xmm6,xmm0
+        movdqa    xmm4,xmm7
+        movdqa    xmm0,xmm6
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_F130_F054)]       ; xmm7=data2L
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_F130_F054)]       ; xmm6=data2H
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm4=data6L
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm0=data6H
+
+        paddd   xmm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   xmm6,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   xmm7,DESCALE_P1
+        psrad   xmm6,DESCALE_P1
+        paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   xmm0,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   xmm4,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
+
+        packssdw  xmm7,xmm6             ; xmm7=data2
+        packssdw  xmm4,xmm0             ; xmm4=data6
+
+        movdqa  XMMWORD [wk(4)], xmm7   ; wk(4)=data2
+        movdqa  XMMWORD [wk(5)], xmm4   ; wk(5)=data6
+
+        ; -- Odd part
+
+        movdqa  xmm3, XMMWORD [wk(0)]   ; xmm3=tmp6
+        movdqa  xmm1, XMMWORD [wk(1)]   ; xmm1=tmp7
+
+        movdqa  xmm6,xmm2               ; xmm2=tmp4
+        movdqa  xmm0,xmm5               ; xmm5=tmp5
+        paddw   xmm6,xmm3               ; xmm6=z3
+        paddw   xmm0,xmm1               ; xmm0=z4
+
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+        movdqa    xmm7,xmm6
+        movdqa    xmm4,xmm6
+        punpcklwd xmm7,xmm0
+        punpckhwd xmm4,xmm0
+        movdqa    xmm6,xmm7
+        movdqa    xmm0,xmm4
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm7=z3L
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm4=z3H
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_F117_F078)]       ; xmm6=z4L
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_F117_F078)]       ; xmm0=z4H
+
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=z3L
+        movdqa  XMMWORD [wk(1)], xmm4   ; wk(1)=z3H
+
+        ; (Original)
+        ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+        ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+        ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+        ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+        ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+        ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+        ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+        ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+        ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+
+        movdqa    xmm7,xmm2
+        movdqa    xmm4,xmm2
+        punpcklwd xmm7,xmm1
+        punpckhwd xmm4,xmm1
+        movdqa    xmm2,xmm7
+        movdqa    xmm1,xmm4
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm7=tmp4L
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm4=tmp4H
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm2=tmp7L
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm1=tmp7H
+
+        paddd   xmm7, XMMWORD [wk(0)]   ; xmm7=data7L
+        paddd   xmm4, XMMWORD [wk(1)]   ; xmm4=data7H
+        paddd   xmm2,xmm6               ; xmm2=data1L
+        paddd   xmm1,xmm0               ; xmm1=data1H
+
+        paddd   xmm7,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   xmm7,DESCALE_P1
+        psrad   xmm4,DESCALE_P1
+        paddd   xmm2,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   xmm2,DESCALE_P1
+        psrad   xmm1,DESCALE_P1
+
+        packssdw  xmm7,xmm4             ; xmm7=data7
+        packssdw  xmm2,xmm1             ; xmm2=data1
+
+        movdqa    xmm4,xmm5
+        movdqa    xmm1,xmm5
+        punpcklwd xmm4,xmm3
+        punpckhwd xmm1,xmm3
+        movdqa    xmm5,xmm4
+        movdqa    xmm3,xmm1
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm4=tmp5L
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm1=tmp5H
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm5=tmp6L
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm3=tmp6H
+
+        paddd   xmm4,xmm6               ; xmm4=data5L
+        paddd   xmm1,xmm0               ; xmm1=data5H
+        paddd   xmm5, XMMWORD [wk(0)]   ; xmm5=data3L
+        paddd   xmm3, XMMWORD [wk(1)]   ; xmm3=data3H
+
+        paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   xmm4,DESCALE_P1
+        psrad   xmm1,DESCALE_P1
+        paddd   xmm5,[GOTOFF(ebx,PD_DESCALE_P1)]
+        paddd   xmm3,[GOTOFF(ebx,PD_DESCALE_P1)]
+        psrad   xmm5,DESCALE_P1
+        psrad   xmm3,DESCALE_P1
+
+        packssdw  xmm4,xmm1             ; xmm4=data5
+        packssdw  xmm5,xmm3             ; xmm5=data3
+
+        ; ---- Pass 2: process columns.
+
+;       mov     edx, POINTER [data(eax)]        ; (DCTELEM *)
+
+        movdqa  xmm6, XMMWORD [wk(2)]   ; xmm6=col0
+        movdqa  xmm0, XMMWORD [wk(4)]   ; xmm0=col2
+
+        ; xmm6=(00 10 20 30 40 50 60 70), xmm0=(02 12 22 32 42 52 62 72)
+        ; xmm2=(01 11 21 31 41 51 61 71), xmm5=(03 13 23 33 43 53 63 73)
+
+        movdqa    xmm1,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm2             ; xmm6=(00 01 10 11 20 21 30 31)
+        punpckhwd xmm1,xmm2             ; xmm1=(40 41 50 51 60 61 70 71)
+        movdqa    xmm3,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm5             ; xmm0=(02 03 12 13 22 23 32 33)
+        punpckhwd xmm3,xmm5             ; xmm3=(42 43 52 53 62 63 72 73)
+
+        movdqa  xmm2, XMMWORD [wk(3)]   ; xmm2=col4
+        movdqa  xmm5, XMMWORD [wk(5)]   ; xmm5=col6
+
+        ; xmm2=(04 14 24 34 44 54 64 74), xmm5=(06 16 26 36 46 56 66 76)
+        ; xmm4=(05 15 25 35 45 55 65 75), xmm7=(07 17 27 37 47 57 67 77)
+
+        movdqa  XMMWORD [wk(0)], xmm0   ; wk(0)=(02 03 12 13 22 23 32 33)
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=(42 43 52 53 62 63 72 73)
+
+        movdqa    xmm0,xmm2             ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm4             ; xmm2=(04 05 14 15 24 25 34 35)
+        punpckhwd xmm0,xmm4             ; xmm0=(44 45 54 55 64 65 74 75)
+        movdqa    xmm3,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm7             ; xmm5=(06 07 16 17 26 27 36 37)
+        punpckhwd xmm3,xmm7             ; xmm3=(46 47 56 57 66 67 76 77)
+
+        movdqa    xmm4,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm5             ; xmm2=(04 05 06 07 14 15 16 17)
+        punpckhdq xmm4,xmm5             ; xmm4=(24 25 26 27 34 35 36 37)
+        movdqa    xmm7,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm3             ; xmm0=(44 45 46 47 54 55 56 57)
+        punpckhdq xmm7,xmm3             ; xmm7=(64 65 66 67 74 75 76 77)
+
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=(02 03 12 13 22 23 32 33)
+        movdqa  xmm3, XMMWORD [wk(1)]   ; xmm3=(42 43 52 53 62 63 72 73)
+        movdqa  XMMWORD [wk(2)], xmm4   ; wk(2)=(24 25 26 27 34 35 36 37)
+        movdqa  XMMWORD [wk(3)], xmm0   ; wk(3)=(44 45 46 47 54 55 56 57)
+
+        movdqa    xmm4,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm5             ; xmm6=(00 01 02 03 10 11 12 13)
+        punpckhdq xmm4,xmm5             ; xmm4=(20 21 22 23 30 31 32 33)
+        movdqa    xmm0,xmm1             ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm3             ; xmm1=(40 41 42 43 50 51 52 53)
+        punpckhdq xmm0,xmm3             ; xmm0=(60 61 62 63 70 71 72 73)
+
+        movdqa     xmm5,xmm6            ; transpose coefficients(phase 3)
+        punpcklqdq xmm6,xmm2            ; xmm6=(00 01 02 03 04 05 06 07)=data0
+        punpckhqdq xmm5,xmm2            ; xmm5=(10 11 12 13 14 15 16 17)=data1
+        movdqa     xmm3,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm7            ; xmm0=(60 61 62 63 64 65 66 67)=data6
+        punpckhqdq xmm3,xmm7            ; xmm3=(70 71 72 73 74 75 76 77)=data7
+
+        movdqa  xmm2,xmm5
+        movdqa  xmm7,xmm6
+        psubw   xmm5,xmm0               ; xmm5=data1-data6=tmp6
+        psubw   xmm6,xmm3               ; xmm6=data0-data7=tmp7
+        paddw   xmm2,xmm0               ; xmm2=data1+data6=tmp1
+        paddw   xmm7,xmm3               ; xmm7=data0+data7=tmp0
+
+        movdqa  xmm0, XMMWORD [wk(2)]   ; xmm0=(24 25 26 27 34 35 36 37)
+        movdqa  xmm3, XMMWORD [wk(3)]   ; xmm3=(44 45 46 47 54 55 56 57)
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=tmp6
+        movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp7
+
+        movdqa     xmm5,xmm4            ; transpose coefficients(phase 3)
+        punpcklqdq xmm4,xmm0            ; xmm4=(20 21 22 23 24 25 26 27)=data2
+        punpckhqdq xmm5,xmm0            ; xmm5=(30 31 32 33 34 35 36 37)=data3
+        movdqa     xmm6,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm3            ; xmm1=(40 41 42 43 44 45 46 47)=data4
+        punpckhqdq xmm6,xmm3            ; xmm6=(50 51 52 53 54 55 56 57)=data5
+
+        movdqa  xmm0,xmm5
+        movdqa  xmm3,xmm4
+        paddw   xmm5,xmm1               ; xmm5=data3+data4=tmp3
+        paddw   xmm4,xmm6               ; xmm4=data2+data5=tmp2
+        psubw   xmm0,xmm1               ; xmm0=data3-data4=tmp4
+        psubw   xmm3,xmm6               ; xmm3=data2-data5=tmp5
+
+        ; -- Even part
+
+        movdqa  xmm1,xmm7
+        movdqa  xmm6,xmm2
+        paddw   xmm7,xmm5               ; xmm7=tmp10
+        paddw   xmm2,xmm4               ; xmm2=tmp11
+        psubw   xmm1,xmm5               ; xmm1=tmp13
+        psubw   xmm6,xmm4               ; xmm6=tmp12
+
+        movdqa  xmm5,xmm7
+        paddw   xmm7,xmm2               ; xmm7=tmp10+tmp11
+        psubw   xmm5,xmm2               ; xmm5=tmp10-tmp11
+
+        paddw   xmm7,[GOTOFF(ebx,PW_DESCALE_P2X)]
+        paddw   xmm5,[GOTOFF(ebx,PW_DESCALE_P2X)]
+        psraw   xmm7,PASS1_BITS         ; xmm7=data0
+        psraw   xmm5,PASS1_BITS         ; xmm5=data4
+
+        movdqa  XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], xmm7
+        movdqa  XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], xmm5
+
+        ; (Original)
+        ; z1 = (tmp12 + tmp13) * 0.541196100;
+        ; data2 = z1 + tmp13 * 0.765366865;
+        ; data6 = z1 + tmp12 * -1.847759065;
+        ;
+        ; (This implementation)
+        ; data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100;
+        ; data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065);
+
+        movdqa    xmm4,xmm1             ; xmm1=tmp13
+        movdqa    xmm2,xmm1
+        punpcklwd xmm4,xmm6             ; xmm6=tmp12
+        punpckhwd xmm2,xmm6
+        movdqa    xmm1,xmm4
+        movdqa    xmm6,xmm2
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_F130_F054)]       ; xmm4=data2L
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_F130_F054)]       ; xmm2=data2H
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm1=data6L
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm6=data6H
+
+        paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   xmm2,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm2,DESCALE_P2
+        paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   xmm6,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   xmm1,DESCALE_P2
+        psrad   xmm6,DESCALE_P2
+
+        packssdw  xmm4,xmm2             ; xmm4=data2
+        packssdw  xmm1,xmm6             ; xmm1=data6
+
+        movdqa  XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)], xmm4
+        movdqa  XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], xmm1
+
+        ; -- Odd part
+
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp6
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp7
+
+        movdqa  xmm2,xmm0               ; xmm0=tmp4
+        movdqa  xmm6,xmm3               ; xmm3=tmp5
+        paddw   xmm2,xmm7               ; xmm2=z3
+        paddw   xmm6,xmm5               ; xmm6=z4
+
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+        movdqa    xmm4,xmm2
+        movdqa    xmm1,xmm2
+        punpcklwd xmm4,xmm6
+        punpckhwd xmm1,xmm6
+        movdqa    xmm2,xmm4
+        movdqa    xmm6,xmm1
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm4=z3L
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm1=z3H
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_F117_F078)]       ; xmm2=z4L
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_F117_F078)]       ; xmm6=z4H
+
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=z3L
+        movdqa  XMMWORD [wk(1)], xmm1   ; wk(1)=z3H
+
+        ; (Original)
+        ; z1 = tmp4 + tmp7;  z2 = tmp5 + tmp6;
+        ; tmp4 = tmp4 * 0.298631336;  tmp5 = tmp5 * 2.053119869;
+        ; tmp6 = tmp6 * 3.072711026;  tmp7 = tmp7 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; data7 = tmp4 + z1 + z3;  data5 = tmp5 + z2 + z4;
+        ; data3 = tmp6 + z2 + z3;  data1 = tmp7 + z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223;
+        ; tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447;
+        ; tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447);
+        ; tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223);
+        ; data7 = tmp4 + z3;  data5 = tmp5 + z4;
+        ; data3 = tmp6 + z3;  data1 = tmp7 + z4;
+
+        movdqa    xmm4,xmm0
+        movdqa    xmm1,xmm0
+        punpcklwd xmm4,xmm5
+        punpckhwd xmm1,xmm5
+        movdqa    xmm0,xmm4
+        movdqa    xmm5,xmm1
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm4=tmp4L
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm1=tmp4H
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm0=tmp7L
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm5=tmp7H
+
+        paddd   xmm4, XMMWORD [wk(0)]   ; xmm4=data7L
+        paddd   xmm1, XMMWORD [wk(1)]   ; xmm1=data7H
+        paddd   xmm0,xmm2               ; xmm0=data1L
+        paddd   xmm5,xmm6               ; xmm5=data1H
+
+        paddd   xmm4,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+        paddd   xmm0,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   xmm5,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   xmm0,DESCALE_P2
+        psrad   xmm5,DESCALE_P2
+
+        packssdw  xmm4,xmm1             ; xmm4=data7
+        packssdw  xmm0,xmm5             ; xmm0=data1
+
+        movdqa  XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm4
+        movdqa  XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm0
+
+        movdqa    xmm1,xmm3
+        movdqa    xmm5,xmm3
+        punpcklwd xmm1,xmm7
+        punpckhwd xmm5,xmm7
+        movdqa    xmm3,xmm1
+        movdqa    xmm7,xmm5
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm1=tmp5L
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm5=tmp5H
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm3=tmp6L
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm7=tmp6H
+
+        paddd   xmm1,xmm2               ; xmm1=data5L
+        paddd   xmm5,xmm6               ; xmm5=data5H
+        paddd   xmm3, XMMWORD [wk(0)]   ; xmm3=data3L
+        paddd   xmm7, XMMWORD [wk(1)]   ; xmm7=data3H
+
+        paddd   xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   xmm5,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   xmm1,DESCALE_P2
+        psrad   xmm5,DESCALE_P2
+        paddd   xmm3,[GOTOFF(ebx,PD_DESCALE_P2)]
+        paddd   xmm7,[GOTOFF(ebx,PD_DESCALE_P2)]
+        psrad   xmm3,DESCALE_P2
+        psrad   xmm7,DESCALE_P2
+
+        packssdw  xmm1,xmm5             ; xmm1=data5
+        packssdw  xmm3,xmm7             ; xmm3=data3
+
+        movdqa  XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm3
+
+;       pop     edi             ; unused
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctflt-3dn.asm b/simd/jidctflt-3dn.asm
new file mode 100644
index 0000000..ea2e270
--- /dev/null
+++ b/simd/jidctflt-3dn.asm
@@ -0,0 +1,452 @@
+;
+; jidctflt.asm - floating-point IDCT (3DNow! & MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the inverse DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jidctflt.c; see the jidctflt.c for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_idct_float_3dnow)
+
+EXTN(jconst_idct_float_3dnow):
+
+PD_1_414        times 2 dd  1.414213562373095048801689
+PD_1_847        times 2 dd  1.847759065022573512256366
+PD_1_082        times 2 dd  1.082392200292393968799446
+PD_2_613        times 2 dd  2.613125929752753055713286
+PD_RNDINT_MAGIC times 2 dd  100663296.0 ; (float)(0x00C00000 << 3)
+PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jsimd_idct_float_3dnow (void * dct_table, JCOEFPTR coef_block,
+;                         JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
+                                        ; FAST_FLOAT workspace[DCTSIZE2]
+
+        align   16
+        global  EXTN(jsimd_idct_float_3dnow)
+
+EXTN(jsimd_idct_float_3dnow):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [workspace]
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process columns from input, store into work array.
+
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+        lea     edi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     ecx, DCTSIZE/2                          ; ctr
+        alignx  16,7
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     short .columnDCT
+
+        pushpic ebx             ; save GOT address
+        mov     ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        mov     eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        or      ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        or      ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        or      eax,ebx
+        poppic  ebx             ; restore GOT address
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movd      mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
+
+        punpcklwd mm0,mm0
+        psrad     mm0,(DWORD_BIT-WORD_BIT)
+        pi2fd     mm0,mm0
+
+        pfmul     mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movq      mm1,mm0
+        punpckldq mm0,mm0
+        punpckhdq mm1,mm1
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
+        jmp     near .nextcolumn
+        alignx  16,7
+%endif
+.columnDCT:
+
+        ; -- Even part
+
+        movd      mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movd      mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movd      mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movd      mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
+
+        punpcklwd mm0,mm0
+        punpcklwd mm1,mm1
+        psrad     mm0,(DWORD_BIT-WORD_BIT)
+        psrad     mm1,(DWORD_BIT-WORD_BIT)
+        pi2fd     mm0,mm0
+        pi2fd     mm1,mm1
+
+        pfmul     mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        punpcklwd mm2,mm2
+        punpcklwd mm3,mm3
+        psrad     mm2,(DWORD_BIT-WORD_BIT)
+        psrad     mm3,(DWORD_BIT-WORD_BIT)
+        pi2fd     mm2,mm2
+        pi2fd     mm3,mm3
+
+        pfmul     mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movq    mm4,mm0
+        movq    mm5,mm1
+        pfsub   mm0,mm2                 ; mm0=tmp11
+        pfsub   mm1,mm3
+        pfadd   mm4,mm2                 ; mm4=tmp10
+        pfadd   mm5,mm3                 ; mm5=tmp13
+
+        pfmul   mm1,[GOTOFF(ebx,PD_1_414)]
+        pfsub   mm1,mm5                 ; mm1=tmp12
+
+        movq    mm6,mm4
+        movq    mm7,mm0
+        pfsub   mm4,mm5                 ; mm4=tmp3
+        pfsub   mm0,mm1                 ; mm0=tmp2
+        pfadd   mm6,mm5                 ; mm6=tmp0
+        pfadd   mm7,mm1                 ; mm7=tmp1
+
+        movq    MMWORD [wk(1)], mm4     ; tmp3
+        movq    MMWORD [wk(0)], mm0     ; tmp2
+
+        ; -- Odd part
+
+        movd      mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movd      mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movd      mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movd      mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
+
+        punpcklwd mm2,mm2
+        punpcklwd mm3,mm3
+        psrad     mm2,(DWORD_BIT-WORD_BIT)
+        psrad     mm3,(DWORD_BIT-WORD_BIT)
+        pi2fd     mm2,mm2
+        pi2fd     mm3,mm3
+
+        pfmul     mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        punpcklwd mm5,mm5
+        punpcklwd mm1,mm1
+        psrad     mm5,(DWORD_BIT-WORD_BIT)
+        psrad     mm1,(DWORD_BIT-WORD_BIT)
+        pi2fd     mm5,mm5
+        pi2fd     mm1,mm1
+
+        pfmul     mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        pfmul     mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movq    mm4,mm2
+        movq    mm0,mm5
+        pfadd   mm2,mm1                 ; mm2=z11
+        pfadd   mm5,mm3                 ; mm5=z13
+        pfsub   mm4,mm1                 ; mm4=z12
+        pfsub   mm0,mm3                 ; mm0=z10
+
+        movq    mm1,mm2
+        pfsub   mm2,mm5
+        pfadd   mm1,mm5                 ; mm1=tmp7
+
+        pfmul   mm2,[GOTOFF(ebx,PD_1_414)]      ; mm2=tmp11
+
+        movq    mm3,mm0
+        pfadd   mm0,mm4
+        pfmul   mm0,[GOTOFF(ebx,PD_1_847)]      ; mm0=z5
+        pfmul   mm3,[GOTOFF(ebx,PD_2_613)]      ; mm3=(z10 * 2.613125930)
+        pfmul   mm4,[GOTOFF(ebx,PD_1_082)]      ; mm4=(z12 * 1.082392200)
+        pfsubr  mm3,mm0                 ; mm3=tmp12
+        pfsub   mm4,mm0                 ; mm4=tmp10
+
+        ; -- Final output stage
+
+        pfsub   mm3,mm1                 ; mm3=tmp6
+        movq    mm5,mm6
+        movq    mm0,mm7
+        pfadd   mm6,mm1                 ; mm6=data0=(00 01)
+        pfadd   mm7,mm3                 ; mm7=data1=(10 11)
+        pfsub   mm5,mm1                 ; mm5=data7=(70 71)
+        pfsub   mm0,mm3                 ; mm0=data6=(60 61)
+        pfsub   mm2,mm3                 ; mm2=tmp5
+
+        movq      mm1,mm6               ; transpose coefficients
+        punpckldq mm6,mm7               ; mm6=(00 10)
+        punpckhdq mm1,mm7               ; mm1=(01 11)
+        movq      mm3,mm0               ; transpose coefficients
+        punpckldq mm0,mm5               ; mm0=(60 70)
+        punpckhdq mm3,mm5               ; mm3=(61 71)
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm6
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1
+        movq    MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm3
+
+        movq    mm7, MMWORD [wk(0)]     ; mm7=tmp2
+        movq    mm5, MMWORD [wk(1)]     ; mm5=tmp3
+
+        pfadd   mm4,mm2                 ; mm4=tmp4
+        movq    mm6,mm7
+        movq    mm1,mm5
+        pfadd   mm7,mm2                 ; mm7=data2=(20 21)
+        pfadd   mm5,mm4                 ; mm5=data4=(40 41)
+        pfsub   mm6,mm2                 ; mm6=data5=(50 51)
+        pfsub   mm1,mm4                 ; mm1=data3=(30 31)
+
+        movq      mm0,mm7               ; transpose coefficients
+        punpckldq mm7,mm1               ; mm7=(20 30)
+        punpckhdq mm0,mm1               ; mm0=(21 31)
+        movq      mm3,mm5               ; transpose coefficients
+        punpckldq mm5,mm6               ; mm5=(40 50)
+        punpckhdq mm3,mm6               ; mm3=(41 51)
+
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm7
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm0
+        movq    MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5
+        movq    MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm3
+
+.nextcolumn:
+        add     esi, byte 2*SIZEOF_JCOEF                ; coef_block
+        add     edx, byte 2*SIZEOF_FLOAT_MULT_TYPE      ; quantptr
+        add     edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT   ; wsptr
+        dec     ecx                                     ; ctr
+        jnz     near .columnloop
+
+        ; -- Prefetch the next coefficient block
+
+        prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
+        prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
+        prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
+        prefetch [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
+
+        ; ---- Pass 2: process rows from work array, store into output array.
+
+        mov     eax, [original_ebp]
+        lea     esi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+        mov     ecx, DCTSIZE/2                          ; ctr
+        alignx  16,7
+.rowloop:
+
+        ; -- Even part
+
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
+
+        movq    mm4,mm0
+        movq    mm5,mm1
+        pfsub   mm0,mm2                 ; mm0=tmp11
+        pfsub   mm1,mm3
+        pfadd   mm4,mm2                 ; mm4=tmp10
+        pfadd   mm5,mm3                 ; mm5=tmp13
+
+        pfmul   mm1,[GOTOFF(ebx,PD_1_414)]
+        pfsub   mm1,mm5                 ; mm1=tmp12
+
+        movq    mm6,mm4
+        movq    mm7,mm0
+        pfsub   mm4,mm5                 ; mm4=tmp3
+        pfsub   mm0,mm1                 ; mm0=tmp2
+        pfadd   mm6,mm5                 ; mm6=tmp0
+        pfadd   mm7,mm1                 ; mm7=tmp1
+
+        movq    MMWORD [wk(1)], mm4     ; tmp3
+        movq    MMWORD [wk(0)], mm0     ; tmp2
+
+        ; -- Odd part
+
+        movq    mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)]
+
+        movq    mm4,mm2
+        movq    mm0,mm5
+        pfadd   mm2,mm1                 ; mm2=z11
+        pfadd   mm5,mm3                 ; mm5=z13
+        pfsub   mm4,mm1                 ; mm4=z12
+        pfsub   mm0,mm3                 ; mm0=z10
+
+        movq    mm1,mm2
+        pfsub   mm2,mm5
+        pfadd   mm1,mm5                 ; mm1=tmp7
+
+        pfmul   mm2,[GOTOFF(ebx,PD_1_414)]      ; mm2=tmp11
+
+        movq    mm3,mm0
+        pfadd   mm0,mm4
+        pfmul   mm0,[GOTOFF(ebx,PD_1_847)]      ; mm0=z5
+        pfmul   mm3,[GOTOFF(ebx,PD_2_613)]      ; mm3=(z10 * 2.613125930)
+        pfmul   mm4,[GOTOFF(ebx,PD_1_082)]      ; mm4=(z12 * 1.082392200)
+        pfsubr  mm3,mm0                 ; mm3=tmp12
+        pfsub   mm4,mm0                 ; mm4=tmp10
+
+        ; -- Final output stage
+
+        pfsub   mm3,mm1                 ; mm3=tmp6
+        movq    mm5,mm6
+        movq    mm0,mm7
+        pfadd   mm6,mm1                 ; mm6=data0=(00 10)
+        pfadd   mm7,mm3                 ; mm7=data1=(01 11)
+        pfsub   mm5,mm1                 ; mm5=data7=(07 17)
+        pfsub   mm0,mm3                 ; mm0=data6=(06 16)
+        pfsub   mm2,mm3                 ; mm2=tmp5
+
+        movq    mm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)]       ; mm1=[PD_RNDINT_MAGIC]
+        pcmpeqd mm3,mm3
+        psrld   mm3,WORD_BIT            ; mm3={0xFFFF 0x0000 0xFFFF 0x0000}
+
+        pfadd   mm6,mm1                 ; mm6=roundint(data0/8)=(00 ** 10 **)
+        pfadd   mm7,mm1                 ; mm7=roundint(data1/8)=(01 ** 11 **)
+        pfadd   mm0,mm1                 ; mm0=roundint(data6/8)=(06 ** 16 **)
+        pfadd   mm5,mm1                 ; mm5=roundint(data7/8)=(07 ** 17 **)
+
+        pand    mm6,mm3                 ; mm6=(00 -- 10 --)
+        pslld   mm7,WORD_BIT            ; mm7=(-- 01 -- 11)
+        pand    mm0,mm3                 ; mm0=(06 -- 16 --)
+        pslld   mm5,WORD_BIT            ; mm5=(-- 07 -- 17)
+        por     mm6,mm7                 ; mm6=(00 01 10 11)
+        por     mm0,mm5                 ; mm0=(06 07 16 17)
+
+        movq    mm1, MMWORD [wk(0)]     ; mm1=tmp2
+        movq    mm3, MMWORD [wk(1)]     ; mm3=tmp3
+
+        pfadd   mm4,mm2                 ; mm4=tmp4
+        movq    mm7,mm1
+        movq    mm5,mm3
+        pfadd   mm1,mm2                 ; mm1=data2=(02 12)
+        pfadd   mm3,mm4                 ; mm3=data4=(04 14)
+        pfsub   mm7,mm2                 ; mm7=data5=(05 15)
+        pfsub   mm5,mm4                 ; mm5=data3=(03 13)
+
+        movq    mm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)]       ; mm2=[PD_RNDINT_MAGIC]
+        pcmpeqd mm4,mm4
+        psrld   mm4,WORD_BIT            ; mm4={0xFFFF 0x0000 0xFFFF 0x0000}
+
+        pfadd   mm3,mm2                 ; mm3=roundint(data4/8)=(04 ** 14 **)
+        pfadd   mm7,mm2                 ; mm7=roundint(data5/8)=(05 ** 15 **)
+        pfadd   mm1,mm2                 ; mm1=roundint(data2/8)=(02 ** 12 **)
+        pfadd   mm5,mm2                 ; mm5=roundint(data3/8)=(03 ** 13 **)
+
+        pand    mm3,mm4                 ; mm3=(04 -- 14 --)
+        pslld   mm7,WORD_BIT            ; mm7=(-- 05 -- 15)
+        pand    mm1,mm4                 ; mm1=(02 -- 12 --)
+        pslld   mm5,WORD_BIT            ; mm5=(-- 03 -- 13)
+        por     mm3,mm7                 ; mm3=(04 05 14 15)
+        por     mm1,mm5                 ; mm1=(02 03 12 13)
+
+        movq      mm2,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm2=[PB_CENTERJSAMP]
+
+        packsswb  mm6,mm3               ; mm6=(00 01 10 11 04 05 14 15)
+        packsswb  mm1,mm0               ; mm1=(02 03 12 13 06 07 16 17)
+        paddb     mm6,mm2
+        paddb     mm1,mm2
+
+        movq      mm4,mm6               ; transpose coefficients(phase 2)
+        punpcklwd mm6,mm1               ; mm6=(00 01 02 03 10 11 12 13)
+        punpckhwd mm4,mm1               ; mm4=(04 05 06 07 14 15 16 17)
+
+        movq      mm7,mm6               ; transpose coefficients(phase 3)
+        punpckldq mm6,mm4               ; mm6=(00 01 02 03 04 05 06 07)
+        punpckhdq mm7,mm4               ; mm7=(10 11 12 13 14 15 16 17)
+
+        pushpic ebx                     ; save GOT address
+
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
+
+        poppic  ebx                     ; restore GOT address
+
+        add     esi, byte 2*SIZEOF_FAST_FLOAT   ; wsptr
+        add     edi, byte 2*SIZEOF_JSAMPROW
+        dec     ecx                             ; ctr
+        jnz     near .rowloop
+
+        femms           ; empty MMX/3DNow! state
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctflt-sse.asm b/simd/jidctflt-sse.asm
new file mode 100644
index 0000000..6160e41
--- /dev/null
+++ b/simd/jidctflt-sse.asm
@@ -0,0 +1,572 @@
+;
+; jidctflt.asm - floating-point IDCT (SSE & MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the inverse DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jidctflt.c; see the jidctflt.c for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%macro  unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+        shufps  %1,%2,0x44
+%endmacro
+
+%macro  unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+        shufps  %1,%2,0xEE
+%endmacro
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_idct_float_sse)
+
+EXTN(jconst_idct_float_sse):
+
+PD_1_414        times 4 dd  1.414213562373095048801689
+PD_1_847        times 4 dd  1.847759065022573512256366
+PD_1_082        times 4 dd  1.082392200292393968799446
+PD_M2_613       times 4 dd -2.613125929752753055713286
+PD_0_125        times 4 dd  0.125       ; 1/8
+PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jsimd_idct_float_sse (void * dct_table, JCOEFPTR coef_block,
+;                       JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
+                                        ; FAST_FLOAT workspace[DCTSIZE2]
+
+        align   16
+        global  EXTN(jsimd_idct_float_sse)
+
+EXTN(jsimd_idct_float_sse):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [workspace]
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process columns from input, store into work array.
+
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+        lea     edi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
+
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     mm1,mm0
+        packsswb mm1,mm1
+        movd    eax,mm1
+        test    eax,eax
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movq      mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+
+        punpckhwd mm1,mm0                       ; mm1=(** 02 ** 03)
+        punpcklwd mm0,mm0                       ; mm0=(00 00 01 01)
+        psrad     mm1,(DWORD_BIT-WORD_BIT)      ; mm1=in0H=(02 03)
+        psrad     mm0,(DWORD_BIT-WORD_BIT)      ; mm0=in0L=(00 01)
+        cvtpi2ps  xmm3,mm1                      ; xmm3=(02 03 ** **)
+        cvtpi2ps  xmm0,mm0                      ; xmm0=(00 01 ** **)
+        movlhps   xmm0,xmm3                     ; xmm0=in0=(00 01 02 03)
+
+        mulps   xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movaps  xmm1,xmm0
+        movaps  xmm2,xmm0
+        movaps  xmm3,xmm0
+
+        shufps  xmm0,xmm0,0x00                  ; xmm0=(00 00 00 00)
+        shufps  xmm1,xmm1,0x55                  ; xmm1=(01 01 01 01)
+        shufps  xmm2,xmm2,0xAA                  ; xmm2=(02 02 02 02)
+        shufps  xmm3,xmm3,0xFF                  ; xmm3=(03 03 03 03)
+
+        movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
+        jmp     near .nextcolumn
+        alignx  16,7
+%endif
+.columnDCT:
+
+        ; -- Even part
+
+        movq      mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq      mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq      mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq      mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+
+        punpckhwd mm4,mm0                       ; mm4=(** 02 ** 03)
+        punpcklwd mm0,mm0                       ; mm0=(00 00 01 01)
+        punpckhwd mm5,mm1                       ; mm5=(** 22 ** 23)
+        punpcklwd mm1,mm1                       ; mm1=(20 20 21 21)
+
+        psrad     mm4,(DWORD_BIT-WORD_BIT)      ; mm4=in0H=(02 03)
+        psrad     mm0,(DWORD_BIT-WORD_BIT)      ; mm0=in0L=(00 01)
+        cvtpi2ps  xmm4,mm4                      ; xmm4=(02 03 ** **)
+        cvtpi2ps  xmm0,mm0                      ; xmm0=(00 01 ** **)
+        psrad     mm5,(DWORD_BIT-WORD_BIT)      ; mm5=in2H=(22 23)
+        psrad     mm1,(DWORD_BIT-WORD_BIT)      ; mm1=in2L=(20 21)
+        cvtpi2ps  xmm5,mm5                      ; xmm5=(22 23 ** **)
+        cvtpi2ps  xmm1,mm1                      ; xmm1=(20 21 ** **)
+
+        punpckhwd mm6,mm2                       ; mm6=(** 42 ** 43)
+        punpcklwd mm2,mm2                       ; mm2=(40 40 41 41)
+        punpckhwd mm7,mm3                       ; mm7=(** 62 ** 63)
+        punpcklwd mm3,mm3                       ; mm3=(60 60 61 61)
+
+        psrad     mm6,(DWORD_BIT-WORD_BIT)      ; mm6=in4H=(42 43)
+        psrad     mm2,(DWORD_BIT-WORD_BIT)      ; mm2=in4L=(40 41)
+        cvtpi2ps  xmm6,mm6                      ; xmm6=(42 43 ** **)
+        cvtpi2ps  xmm2,mm2                      ; xmm2=(40 41 ** **)
+        psrad     mm7,(DWORD_BIT-WORD_BIT)      ; mm7=in6H=(62 63)
+        psrad     mm3,(DWORD_BIT-WORD_BIT)      ; mm3=in6L=(60 61)
+        cvtpi2ps  xmm7,mm7                      ; xmm7=(62 63 ** **)
+        cvtpi2ps  xmm3,mm3                      ; xmm3=(60 61 ** **)
+
+        movlhps   xmm0,xmm4                     ; xmm0=in0=(00 01 02 03)
+        movlhps   xmm1,xmm5                     ; xmm1=in2=(20 21 22 23)
+        mulps     xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movlhps   xmm2,xmm6                     ; xmm2=in4=(40 41 42 43)
+        movlhps   xmm3,xmm7                     ; xmm3=in6=(60 61 62 63)
+        mulps     xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movaps  xmm4,xmm0
+        movaps  xmm5,xmm1
+        subps   xmm0,xmm2               ; xmm0=tmp11
+        subps   xmm1,xmm3
+        addps   xmm4,xmm2               ; xmm4=tmp10
+        addps   xmm5,xmm3               ; xmm5=tmp13
+
+        mulps   xmm1,[GOTOFF(ebx,PD_1_414)]
+        subps   xmm1,xmm5               ; xmm1=tmp12
+
+        movaps  xmm6,xmm4
+        movaps  xmm7,xmm0
+        subps   xmm4,xmm5               ; xmm4=tmp3
+        subps   xmm0,xmm1               ; xmm0=tmp2
+        addps   xmm6,xmm5               ; xmm6=tmp0
+        addps   xmm7,xmm1               ; xmm7=tmp1
+
+        movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+        movaps  XMMWORD [wk(0)], xmm0   ; tmp2
+
+        ; -- Odd part
+
+        movq      mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq      mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movq      mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq      mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+
+        punpckhwd mm6,mm4                       ; mm6=(** 12 ** 13)
+        punpcklwd mm4,mm4                       ; mm4=(10 10 11 11)
+        punpckhwd mm2,mm0                       ; mm2=(** 32 ** 33)
+        punpcklwd mm0,mm0                       ; mm0=(30 30 31 31)
+
+        psrad     mm6,(DWORD_BIT-WORD_BIT)      ; mm6=in1H=(12 13)
+        psrad     mm4,(DWORD_BIT-WORD_BIT)      ; mm4=in1L=(10 11)
+        cvtpi2ps  xmm4,mm6                      ; xmm4=(12 13 ** **)
+        cvtpi2ps  xmm2,mm4                      ; xmm2=(10 11 ** **)
+        psrad     mm2,(DWORD_BIT-WORD_BIT)      ; mm2=in3H=(32 33)
+        psrad     mm0,(DWORD_BIT-WORD_BIT)      ; mm0=in3L=(30 31)
+        cvtpi2ps  xmm0,mm2                      ; xmm0=(32 33 ** **)
+        cvtpi2ps  xmm3,mm0                      ; xmm3=(30 31 ** **)
+
+        punpckhwd mm7,mm5                       ; mm7=(** 52 ** 53)
+        punpcklwd mm5,mm5                       ; mm5=(50 50 51 51)
+        punpckhwd mm3,mm1                       ; mm3=(** 72 ** 73)
+        punpcklwd mm1,mm1                       ; mm1=(70 70 71 71)
+
+        movlhps   xmm2,xmm4                     ; xmm2=in1=(10 11 12 13)
+        movlhps   xmm3,xmm0                     ; xmm3=in3=(30 31 32 33)
+
+        psrad     mm7,(DWORD_BIT-WORD_BIT)      ; mm7=in5H=(52 53)
+        psrad     mm5,(DWORD_BIT-WORD_BIT)      ; mm5=in5L=(50 51)
+        cvtpi2ps  xmm4,mm7                      ; xmm4=(52 53 ** **)
+        cvtpi2ps  xmm5,mm5                      ; xmm5=(50 51 ** **)
+        psrad     mm3,(DWORD_BIT-WORD_BIT)      ; mm3=in7H=(72 73)
+        psrad     mm1,(DWORD_BIT-WORD_BIT)      ; mm1=in7L=(70 71)
+        cvtpi2ps  xmm0,mm3                      ; xmm0=(72 73 ** **)
+        cvtpi2ps  xmm1,mm1                      ; xmm1=(70 71 ** **)
+
+        mulps     xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movlhps   xmm5,xmm4                     ; xmm5=in5=(50 51 52 53)
+        movlhps   xmm1,xmm0                     ; xmm1=in7=(70 71 72 73)
+        mulps     xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movaps  xmm4,xmm2
+        movaps  xmm0,xmm5
+        addps   xmm2,xmm1               ; xmm2=z11
+        addps   xmm5,xmm3               ; xmm5=z13
+        subps   xmm4,xmm1               ; xmm4=z12
+        subps   xmm0,xmm3               ; xmm0=z10
+
+        movaps  xmm1,xmm2
+        subps   xmm2,xmm5
+        addps   xmm1,xmm5               ; xmm1=tmp7
+
+        mulps   xmm2,[GOTOFF(ebx,PD_1_414)]     ; xmm2=tmp11
+
+        movaps  xmm3,xmm0
+        addps   xmm0,xmm4
+        mulps   xmm0,[GOTOFF(ebx,PD_1_847)]     ; xmm0=z5
+        mulps   xmm3,[GOTOFF(ebx,PD_M2_613)]    ; xmm3=(z10 * -2.613125930)
+        mulps   xmm4,[GOTOFF(ebx,PD_1_082)]     ; xmm4=(z12 * 1.082392200)
+        addps   xmm3,xmm0               ; xmm3=tmp12
+        subps   xmm4,xmm0               ; xmm4=tmp10
+
+        ; -- Final output stage
+
+        subps   xmm3,xmm1               ; xmm3=tmp6
+        movaps  xmm5,xmm6
+        movaps  xmm0,xmm7
+        addps   xmm6,xmm1               ; xmm6=data0=(00 01 02 03)
+        addps   xmm7,xmm3               ; xmm7=data1=(10 11 12 13)
+        subps   xmm5,xmm1               ; xmm5=data7=(70 71 72 73)
+        subps   xmm0,xmm3               ; xmm0=data6=(60 61 62 63)
+        subps   xmm2,xmm3               ; xmm2=tmp5
+
+        movaps    xmm1,xmm6             ; transpose coefficients(phase 1)
+        unpcklps  xmm6,xmm7             ; xmm6=(00 10 01 11)
+        unpckhps  xmm1,xmm7             ; xmm1=(02 12 03 13)
+        movaps    xmm3,xmm0             ; transpose coefficients(phase 1)
+        unpcklps  xmm0,xmm5             ; xmm0=(60 70 61 71)
+        unpckhps  xmm3,xmm5             ; xmm3=(62 72 63 73)
+
+        movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+        movaps  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp3
+
+        movaps  XMMWORD [wk(0)], xmm0   ; wk(0)=(60 70 61 71)
+        movaps  XMMWORD [wk(1)], xmm3   ; wk(1)=(62 72 63 73)
+
+        addps   xmm4,xmm2               ; xmm4=tmp4
+        movaps  xmm0,xmm7
+        movaps  xmm3,xmm5
+        addps   xmm7,xmm2               ; xmm7=data2=(20 21 22 23)
+        addps   xmm5,xmm4               ; xmm5=data4=(40 41 42 43)
+        subps   xmm0,xmm2               ; xmm0=data5=(50 51 52 53)
+        subps   xmm3,xmm4               ; xmm3=data3=(30 31 32 33)
+
+        movaps    xmm2,xmm7             ; transpose coefficients(phase 1)
+        unpcklps  xmm7,xmm3             ; xmm7=(20 30 21 31)
+        unpckhps  xmm2,xmm3             ; xmm2=(22 32 23 33)
+        movaps    xmm4,xmm5             ; transpose coefficients(phase 1)
+        unpcklps  xmm5,xmm0             ; xmm5=(40 50 41 51)
+        unpckhps  xmm4,xmm0             ; xmm4=(42 52 43 53)
+
+        movaps    xmm3,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm7             ; xmm6=(00 10 20 30)
+        unpckhps2 xmm3,xmm7             ; xmm3=(01 11 21 31)
+        movaps    xmm0,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm2             ; xmm1=(02 12 22 32)
+        unpckhps2 xmm0,xmm2             ; xmm0=(03 13 23 33)
+
+        movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
+        movaps  xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
+
+        movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+
+        movaps    xmm6,xmm5             ; transpose coefficients(phase 2)
+        unpcklps2 xmm5,xmm7             ; xmm5=(40 50 60 70)
+        unpckhps2 xmm6,xmm7             ; xmm6=(41 51 61 71)
+        movaps    xmm3,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(42 52 62 72)
+        unpckhps2 xmm3,xmm2             ; xmm3=(43 53 63 73)
+
+        movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
+
+.nextcolumn:
+        add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+        add     edx, byte 4*SIZEOF_FLOAT_MULT_TYPE      ; quantptr
+        add     edi,      4*DCTSIZE*SIZEOF_FAST_FLOAT   ; wsptr
+        dec     ecx                                     ; ctr
+        jnz     near .columnloop
+
+        ; -- Prefetch the next coefficient block
+
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
+
+        ; ---- Pass 2: process rows from work array, store into output array.
+
+        mov     eax, [original_ebp]
+        lea     esi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
+.rowloop:
+
+        ; -- Even part
+
+        movaps  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
+
+        movaps  xmm4,xmm0
+        movaps  xmm5,xmm1
+        subps   xmm0,xmm2               ; xmm0=tmp11
+        subps   xmm1,xmm3
+        addps   xmm4,xmm2               ; xmm4=tmp10
+        addps   xmm5,xmm3               ; xmm5=tmp13
+
+        mulps   xmm1,[GOTOFF(ebx,PD_1_414)]
+        subps   xmm1,xmm5               ; xmm1=tmp12
+
+        movaps  xmm6,xmm4
+        movaps  xmm7,xmm0
+        subps   xmm4,xmm5               ; xmm4=tmp3
+        subps   xmm0,xmm1               ; xmm0=tmp2
+        addps   xmm6,xmm5               ; xmm6=tmp0
+        addps   xmm7,xmm1               ; xmm7=tmp1
+
+        movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+        movaps  XMMWORD [wk(0)], xmm0   ; tmp2
+
+        ; -- Odd part
+
+        movaps  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)]
+
+        movaps  xmm4,xmm2
+        movaps  xmm0,xmm5
+        addps   xmm2,xmm1               ; xmm2=z11
+        addps   xmm5,xmm3               ; xmm5=z13
+        subps   xmm4,xmm1               ; xmm4=z12
+        subps   xmm0,xmm3               ; xmm0=z10
+
+        movaps  xmm1,xmm2
+        subps   xmm2,xmm5
+        addps   xmm1,xmm5               ; xmm1=tmp7
+
+        mulps   xmm2,[GOTOFF(ebx,PD_1_414)]     ; xmm2=tmp11
+
+        movaps  xmm3,xmm0
+        addps   xmm0,xmm4
+        mulps   xmm0,[GOTOFF(ebx,PD_1_847)]     ; xmm0=z5
+        mulps   xmm3,[GOTOFF(ebx,PD_M2_613)]    ; xmm3=(z10 * -2.613125930)
+        mulps   xmm4,[GOTOFF(ebx,PD_1_082)]     ; xmm4=(z12 * 1.082392200)
+        addps   xmm3,xmm0               ; xmm3=tmp12
+        subps   xmm4,xmm0               ; xmm4=tmp10
+
+        ; -- Final output stage
+
+        subps   xmm3,xmm1               ; xmm3=tmp6
+        movaps  xmm5,xmm6
+        movaps  xmm0,xmm7
+        addps   xmm6,xmm1               ; xmm6=data0=(00 10 20 30)
+        addps   xmm7,xmm3               ; xmm7=data1=(01 11 21 31)
+        subps   xmm5,xmm1               ; xmm5=data7=(07 17 27 37)
+        subps   xmm0,xmm3               ; xmm0=data6=(06 16 26 36)
+        subps   xmm2,xmm3               ; xmm2=tmp5
+
+        movaps  xmm1,[GOTOFF(ebx,PD_0_125)]     ; xmm1=[PD_0_125]
+
+        mulps   xmm6,xmm1               ; descale(1/8)
+        mulps   xmm7,xmm1               ; descale(1/8)
+        mulps   xmm5,xmm1               ; descale(1/8)
+        mulps   xmm0,xmm1               ; descale(1/8)
+
+        movhlps   xmm3,xmm6
+        movhlps   xmm1,xmm7
+        cvtps2pi  mm0,xmm6              ; round to int32, mm0=data0L=(00 10)
+        cvtps2pi  mm1,xmm7              ; round to int32, mm1=data1L=(01 11)
+        cvtps2pi  mm2,xmm3              ; round to int32, mm2=data0H=(20 30)
+        cvtps2pi  mm3,xmm1              ; round to int32, mm3=data1H=(21 31)
+        packssdw  mm0,mm2               ; mm0=data0=(00 10 20 30)
+        packssdw  mm1,mm3               ; mm1=data1=(01 11 21 31)
+
+        movhlps   xmm6,xmm5
+        movhlps   xmm7,xmm0
+        cvtps2pi  mm4,xmm5              ; round to int32, mm4=data7L=(07 17)
+        cvtps2pi  mm5,xmm0              ; round to int32, mm5=data6L=(06 16)
+        cvtps2pi  mm6,xmm6              ; round to int32, mm6=data7H=(27 37)
+        cvtps2pi  mm7,xmm7              ; round to int32, mm7=data6H=(26 36)
+        packssdw  mm4,mm6               ; mm4=data7=(07 17 27 37)
+        packssdw  mm5,mm7               ; mm5=data6=(06 16 26 36)
+
+        packsswb  mm0,mm5               ; mm0=(00 10 20 30 06 16 26 36)
+        packsswb  mm1,mm4               ; mm1=(01 11 21 31 07 17 27 37)
+
+        movaps  xmm3, XMMWORD [wk(0)]   ; xmm3=tmp2
+        movaps  xmm1, XMMWORD [wk(1)]   ; xmm1=tmp3
+
+        movaps  xmm6,[GOTOFF(ebx,PD_0_125)]     ; xmm6=[PD_0_125]
+
+        addps   xmm4,xmm2               ; xmm4=tmp4
+        movaps  xmm5,xmm3
+        movaps  xmm0,xmm1
+        addps   xmm3,xmm2               ; xmm3=data2=(02 12 22 32)
+        addps   xmm1,xmm4               ; xmm1=data4=(04 14 24 34)
+        subps   xmm5,xmm2               ; xmm5=data5=(05 15 25 35)
+        subps   xmm0,xmm4               ; xmm0=data3=(03 13 23 33)
+
+        mulps   xmm3,xmm6               ; descale(1/8)
+        mulps   xmm1,xmm6               ; descale(1/8)
+        mulps   xmm5,xmm6               ; descale(1/8)
+        mulps   xmm0,xmm6               ; descale(1/8)
+
+        movhlps   xmm7,xmm3
+        movhlps   xmm2,xmm1
+        cvtps2pi  mm2,xmm3              ; round to int32, mm2=data2L=(02 12)
+        cvtps2pi  mm3,xmm1              ; round to int32, mm3=data4L=(04 14)
+        cvtps2pi  mm6,xmm7              ; round to int32, mm6=data2H=(22 32)
+        cvtps2pi  mm7,xmm2              ; round to int32, mm7=data4H=(24 34)
+        packssdw  mm2,mm6               ; mm2=data2=(02 12 22 32)
+        packssdw  mm3,mm7               ; mm3=data4=(04 14 24 34)
+
+        movhlps   xmm4,xmm5
+        movhlps   xmm6,xmm0
+        cvtps2pi  mm5,xmm5              ; round to int32, mm5=data5L=(05 15)
+        cvtps2pi  mm4,xmm0              ; round to int32, mm4=data3L=(03 13)
+        cvtps2pi  mm6,xmm4              ; round to int32, mm6=data5H=(25 35)
+        cvtps2pi  mm7,xmm6              ; round to int32, mm7=data3H=(23 33)
+        packssdw  mm5,mm6               ; mm5=data5=(05 15 25 35)
+        packssdw  mm4,mm7               ; mm4=data3=(03 13 23 33)
+
+        movq      mm6,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm6=[PB_CENTERJSAMP]
+
+        packsswb  mm2,mm3               ; mm2=(02 12 22 32 04 14 24 34)
+        packsswb  mm4,mm5               ; mm4=(03 13 23 33 05 15 25 35)
+
+        paddb     mm0,mm6
+        paddb     mm1,mm6
+        paddb     mm2,mm6
+        paddb     mm4,mm6
+
+        movq      mm7,mm0               ; transpose coefficients(phase 1)
+        punpcklbw mm0,mm1               ; mm0=(00 01 10 11 20 21 30 31)
+        punpckhbw mm7,mm1               ; mm7=(06 07 16 17 26 27 36 37)
+        movq      mm3,mm2               ; transpose coefficients(phase 1)
+        punpcklbw mm2,mm4               ; mm2=(02 03 12 13 22 23 32 33)
+        punpckhbw mm3,mm4               ; mm3=(04 05 14 15 24 25 34 35)
+
+        movq      mm5,mm0               ; transpose coefficients(phase 2)
+        punpcklwd mm0,mm2               ; mm0=(00 01 02 03 10 11 12 13)
+        punpckhwd mm5,mm2               ; mm5=(20 21 22 23 30 31 32 33)
+        movq      mm6,mm3               ; transpose coefficients(phase 2)
+        punpcklwd mm3,mm7               ; mm3=(04 05 06 07 14 15 16 17)
+        punpckhwd mm6,mm7               ; mm6=(24 25 26 27 34 35 36 37)
+
+        movq      mm1,mm0               ; transpose coefficients(phase 3)
+        punpckldq mm0,mm3               ; mm0=(00 01 02 03 04 05 06 07)
+        punpckhdq mm1,mm3               ; mm1=(10 11 12 13 14 15 16 17)
+        movq      mm4,mm5               ; transpose coefficients(phase 3)
+        punpckldq mm5,mm6               ; mm5=(20 21 22 23 24 25 26 27)
+        punpckhdq mm4,mm6               ; mm4=(30 31 32 33 34 35 36 37)
+
+        pushpic ebx                     ; save GOT address
+
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
+        mov     edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
+
+        poppic  ebx                     ; restore GOT address
+
+        add     esi, byte 4*SIZEOF_FAST_FLOAT   ; wsptr
+        add     edi, byte 4*SIZEOF_JSAMPROW
+        dec     ecx                             ; ctr
+        jnz     near .rowloop
+
+        emms            ; empty MMX state
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctflt-sse2-64.asm b/simd/jidctflt-sse2-64.asm
new file mode 100644
index 0000000..32e4ec2
--- /dev/null
+++ b/simd/jidctflt-sse2-64.asm
@@ -0,0 +1,483 @@
+;
+; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the inverse DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jidctflt.c; see the jidctflt.c for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%macro  unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+        shufps  %1,%2,0x44
+%endmacro
+
+%macro  unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+        shufps  %1,%2,0xEE
+%endmacro
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_idct_float_sse2)
+
+EXTN(jconst_idct_float_sse2):
+
+PD_1_414        times 4 dd  1.414213562373095048801689
+PD_1_847        times 4 dd  1.847759065022573512256366
+PD_1_082        times 4 dd  1.082392200292393968799446
+PD_M2_613       times 4 dd -2.613125929752753055713286
+PD_RNDINT_MAGIC times 4 dd  100663296.0 ; (float)(0x00C00000 << 3)
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jsimd_idct_float_sse2 (void * dct_table, JCOEFPTR coef_block,
+;                        JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+; r10 = void * dct_table
+; r11 = JCOEFPTR coef_block
+; r12 = JSAMPARRAY output_buf
+; r13 = JDIMENSION output_col
+
+%define original_rbp    rbp+0
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
+                                        ; FAST_FLOAT workspace[DCTSIZE2]
+
+        align   16
+        global  EXTN(jsimd_idct_float_sse2)
+
+EXTN(jsimd_idct_float_sse2):
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [workspace]
+        collect_args
+        push    rbx
+
+        ; ---- Pass 1: process columns from input, store into work array.
+
+        mov     rdx, r10                ; quantptr
+        mov     rsi, r11                ; inptr
+        lea     rdi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     rcx, DCTSIZE/4                          ; ctr
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
+        mov     eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
+
+        movq    xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movq    xmm2, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        movq    xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        movq    xmm4, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
+        movq    xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        movq    xmm6, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        movq    xmm7, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1,xmm2
+        por     xmm3,xmm4
+        por     xmm5,xmm6
+        por     xmm1,xmm3
+        por     xmm5,xmm7
+        por     xmm1,xmm5
+        packsswb xmm1,xmm1
+        movd    eax,xmm1
+        test    rax,rax
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movq      xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+
+        punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+        psrad     xmm0,(DWORD_BIT-WORD_BIT)     ; xmm0=in0=(00 01 02 03)
+        cvtdq2ps  xmm0,xmm0                     ; xmm0=in0=(00 01 02 03)
+
+        mulps   xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movaps  xmm1,xmm0
+        movaps  xmm2,xmm0
+        movaps  xmm3,xmm0
+
+        shufps  xmm0,xmm0,0x00                  ; xmm0=(00 00 00 00)
+        shufps  xmm1,xmm1,0x55                  ; xmm1=(01 01 01 01)
+        shufps  xmm2,xmm2,0xAA                  ; xmm2=(02 02 02 02)
+        shufps  xmm3,xmm3,0xFF                  ; xmm3=(03 03 03 03)
+
+        movaps  XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3
+        jmp     near .nextcolumn
+%endif
+.columnDCT:
+
+        ; -- Even part
+
+        movq      xmm0, XMM_MMWORD [MMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm1, XMM_MMWORD [MMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm2, XMM_MMWORD [MMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm3, XMM_MMWORD [MMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+
+        punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+        punpcklwd xmm1,xmm1             ; xmm1=(20 20 21 21 22 22 23 23)
+        psrad     xmm0,(DWORD_BIT-WORD_BIT)     ; xmm0=in0=(00 01 02 03)
+        psrad     xmm1,(DWORD_BIT-WORD_BIT)     ; xmm1=in2=(20 21 22 23)
+        cvtdq2ps  xmm0,xmm0                     ; xmm0=in0=(00 01 02 03)
+        cvtdq2ps  xmm1,xmm1                     ; xmm1=in2=(20 21 22 23)
+
+        punpcklwd xmm2,xmm2             ; xmm2=(40 40 41 41 42 42 43 43)
+        punpcklwd xmm3,xmm3             ; xmm3=(60 60 61 61 62 62 63 63)
+        psrad     xmm2,(DWORD_BIT-WORD_BIT)     ; xmm2=in4=(40 41 42 43)
+        psrad     xmm3,(DWORD_BIT-WORD_BIT)     ; xmm3=in6=(60 61 62 63)
+        cvtdq2ps  xmm2,xmm2                     ; xmm2=in4=(40 41 42 43)
+        cvtdq2ps  xmm3,xmm3                     ; xmm3=in6=(60 61 62 63)
+
+        mulps     xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movaps  xmm4,xmm0
+        movaps  xmm5,xmm1
+        subps   xmm0,xmm2               ; xmm0=tmp11
+        subps   xmm1,xmm3
+        addps   xmm4,xmm2               ; xmm4=tmp10
+        addps   xmm5,xmm3               ; xmm5=tmp13
+
+        mulps   xmm1,[rel PD_1_414]
+        subps   xmm1,xmm5               ; xmm1=tmp12
+
+        movaps  xmm6,xmm4
+        movaps  xmm7,xmm0
+        subps   xmm4,xmm5               ; xmm4=tmp3
+        subps   xmm0,xmm1               ; xmm0=tmp2
+        addps   xmm6,xmm5               ; xmm6=tmp0
+        addps   xmm7,xmm1               ; xmm7=tmp1
+
+        movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+        movaps  XMMWORD [wk(0)], xmm0   ; tmp2
+
+        ; -- Odd part
+
+        movq      xmm2, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm3, XMM_MMWORD [MMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm5, XMM_MMWORD [MMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        movq      xmm1, XMM_MMWORD [MMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+
+        punpcklwd xmm2,xmm2             ; xmm2=(10 10 11 11 12 12 13 13)
+        punpcklwd xmm3,xmm3             ; xmm3=(30 30 31 31 32 32 33 33)
+        psrad     xmm2,(DWORD_BIT-WORD_BIT)     ; xmm2=in1=(10 11 12 13)
+        psrad     xmm3,(DWORD_BIT-WORD_BIT)     ; xmm3=in3=(30 31 32 33)
+        cvtdq2ps  xmm2,xmm2                     ; xmm2=in1=(10 11 12 13)
+        cvtdq2ps  xmm3,xmm3                     ; xmm3=in3=(30 31 32 33)
+
+        punpcklwd xmm5,xmm5             ; xmm5=(50 50 51 51 52 52 53 53)
+        punpcklwd xmm1,xmm1             ; xmm1=(70 70 71 71 72 72 73 73)
+        psrad     xmm5,(DWORD_BIT-WORD_BIT)     ; xmm5=in5=(50 51 52 53)
+        psrad     xmm1,(DWORD_BIT-WORD_BIT)     ; xmm1=in7=(70 71 72 73)
+        cvtdq2ps  xmm5,xmm5                     ; xmm5=in5=(50 51 52 53)
+        cvtdq2ps  xmm1,xmm1                     ; xmm1=in7=(70 71 72 73)
+
+        mulps     xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movaps  xmm4,xmm2
+        movaps  xmm0,xmm5
+        addps   xmm2,xmm1               ; xmm2=z11
+        addps   xmm5,xmm3               ; xmm5=z13
+        subps   xmm4,xmm1               ; xmm4=z12
+        subps   xmm0,xmm3               ; xmm0=z10
+
+        movaps  xmm1,xmm2
+        subps   xmm2,xmm5
+        addps   xmm1,xmm5               ; xmm1=tmp7
+
+        mulps   xmm2,[rel PD_1_414]     ; xmm2=tmp11
+
+        movaps  xmm3,xmm0
+        addps   xmm0,xmm4
+        mulps   xmm0,[rel PD_1_847]     ; xmm0=z5
+        mulps   xmm3,[rel PD_M2_613]    ; xmm3=(z10 * -2.613125930)
+        mulps   xmm4,[rel PD_1_082]     ; xmm4=(z12 * 1.082392200)
+        addps   xmm3,xmm0               ; xmm3=tmp12
+        subps   xmm4,xmm0               ; xmm4=tmp10
+
+        ; -- Final output stage
+
+        subps   xmm3,xmm1               ; xmm3=tmp6
+        movaps  xmm5,xmm6
+        movaps  xmm0,xmm7
+        addps   xmm6,xmm1               ; xmm6=data0=(00 01 02 03)
+        addps   xmm7,xmm3               ; xmm7=data1=(10 11 12 13)
+        subps   xmm5,xmm1               ; xmm5=data7=(70 71 72 73)
+        subps   xmm0,xmm3               ; xmm0=data6=(60 61 62 63)
+        subps   xmm2,xmm3               ; xmm2=tmp5
+
+        movaps    xmm1,xmm6             ; transpose coefficients(phase 1)
+        unpcklps  xmm6,xmm7             ; xmm6=(00 10 01 11)
+        unpckhps  xmm1,xmm7             ; xmm1=(02 12 03 13)
+        movaps    xmm3,xmm0             ; transpose coefficients(phase 1)
+        unpcklps  xmm0,xmm5             ; xmm0=(60 70 61 71)
+        unpckhps  xmm3,xmm5             ; xmm3=(62 72 63 73)
+
+        movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+        movaps  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp3
+
+        movaps  XMMWORD [wk(0)], xmm0   ; wk(0)=(60 70 61 71)
+        movaps  XMMWORD [wk(1)], xmm3   ; wk(1)=(62 72 63 73)
+
+        addps   xmm4,xmm2               ; xmm4=tmp4
+        movaps  xmm0,xmm7
+        movaps  xmm3,xmm5
+        addps   xmm7,xmm2               ; xmm7=data2=(20 21 22 23)
+        addps   xmm5,xmm4               ; xmm5=data4=(40 41 42 43)
+        subps   xmm0,xmm2               ; xmm0=data5=(50 51 52 53)
+        subps   xmm3,xmm4               ; xmm3=data3=(30 31 32 33)
+
+        movaps    xmm2,xmm7             ; transpose coefficients(phase 1)
+        unpcklps  xmm7,xmm3             ; xmm7=(20 30 21 31)
+        unpckhps  xmm2,xmm3             ; xmm2=(22 32 23 33)
+        movaps    xmm4,xmm5             ; transpose coefficients(phase 1)
+        unpcklps  xmm5,xmm0             ; xmm5=(40 50 41 51)
+        unpckhps  xmm4,xmm0             ; xmm4=(42 52 43 53)
+
+        movaps    xmm3,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm7             ; xmm6=(00 10 20 30)
+        unpckhps2 xmm3,xmm7             ; xmm3=(01 11 21 31)
+        movaps    xmm0,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm2             ; xmm1=(02 12 22 32)
+        unpckhps2 xmm0,xmm2             ; xmm0=(03 13 23 33)
+
+        movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
+        movaps  xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
+
+        movaps  XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
+
+        movaps    xmm6,xmm5             ; transpose coefficients(phase 2)
+        unpcklps2 xmm5,xmm7             ; xmm5=(40 50 60 70)
+        unpckhps2 xmm6,xmm7             ; xmm6=(41 51 61 71)
+        movaps    xmm3,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(42 52 62 72)
+        unpckhps2 xmm3,xmm2             ; xmm3=(43 53 63 73)
+
+        movaps  XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3
+
+.nextcolumn:
+        add     rsi, byte 4*SIZEOF_JCOEF                ; coef_block
+        add     rdx, byte 4*SIZEOF_FLOAT_MULT_TYPE      ; quantptr
+        add     rdi,      4*DCTSIZE*SIZEOF_FAST_FLOAT   ; wsptr
+        dec     rcx                                     ; ctr
+        jnz     near .columnloop
+
+        ; -- Prefetch the next coefficient block
+
+        prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
+        prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
+        prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
+        prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
+
+        ; ---- Pass 2: process rows from work array, store into output array.
+
+        mov     rax, [original_rbp]
+        lea     rsi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     rdi, r12        ; (JSAMPROW *)
+        mov     rax, r13
+        mov     rcx, DCTSIZE/4                          ; ctr
+.rowloop:
+
+        ; -- Even part
+
+        movaps  xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_FAST_FLOAT)]
+
+        movaps  xmm4,xmm0
+        movaps  xmm5,xmm1
+        subps   xmm0,xmm2               ; xmm0=tmp11
+        subps   xmm1,xmm3
+        addps   xmm4,xmm2               ; xmm4=tmp10
+        addps   xmm5,xmm3               ; xmm5=tmp13
+
+        mulps   xmm1,[rel PD_1_414]
+        subps   xmm1,xmm5               ; xmm1=tmp12
+
+        movaps  xmm6,xmm4
+        movaps  xmm7,xmm0
+        subps   xmm4,xmm5               ; xmm4=tmp3
+        subps   xmm0,xmm1               ; xmm0=tmp2
+        addps   xmm6,xmm5               ; xmm6=tmp0
+        addps   xmm7,xmm1               ; xmm7=tmp1
+
+        movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+        movaps  XMMWORD [wk(0)], xmm0   ; tmp2
+
+        ; -- Odd part
+
+        movaps  xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_FAST_FLOAT)]
+
+        movaps  xmm4,xmm2
+        movaps  xmm0,xmm5
+        addps   xmm2,xmm1               ; xmm2=z11
+        addps   xmm5,xmm3               ; xmm5=z13
+        subps   xmm4,xmm1               ; xmm4=z12
+        subps   xmm0,xmm3               ; xmm0=z10
+
+        movaps  xmm1,xmm2
+        subps   xmm2,xmm5
+        addps   xmm1,xmm5               ; xmm1=tmp7
+
+        mulps   xmm2,[rel PD_1_414]     ; xmm2=tmp11
+
+        movaps  xmm3,xmm0
+        addps   xmm0,xmm4
+        mulps   xmm0,[rel PD_1_847]     ; xmm0=z5
+        mulps   xmm3,[rel PD_M2_613]    ; xmm3=(z10 * -2.613125930)
+        mulps   xmm4,[rel PD_1_082]     ; xmm4=(z12 * 1.082392200)
+        addps   xmm3,xmm0               ; xmm3=tmp12
+        subps   xmm4,xmm0               ; xmm4=tmp10
+
+        ; -- Final output stage
+
+        subps   xmm3,xmm1               ; xmm3=tmp6
+        movaps  xmm5,xmm6
+        movaps  xmm0,xmm7
+        addps   xmm6,xmm1               ; xmm6=data0=(00 10 20 30)
+        addps   xmm7,xmm3               ; xmm7=data1=(01 11 21 31)
+        subps   xmm5,xmm1               ; xmm5=data7=(07 17 27 37)
+        subps   xmm0,xmm3               ; xmm0=data6=(06 16 26 36)
+        subps   xmm2,xmm3               ; xmm2=tmp5
+
+        movaps  xmm1,[rel PD_RNDINT_MAGIC]      ; xmm1=[rel PD_RNDINT_MAGIC]
+        pcmpeqd xmm3,xmm3
+        psrld   xmm3,WORD_BIT           ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..}
+
+        addps   xmm6,xmm1       ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **)
+        addps   xmm7,xmm1       ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **)
+        addps   xmm0,xmm1       ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **)
+        addps   xmm5,xmm1       ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **)
+
+        pand    xmm6,xmm3               ; xmm6=(00 -- 10 -- 20 -- 30 --)
+        pslld   xmm7,WORD_BIT           ; xmm7=(-- 01 -- 11 -- 21 -- 31)
+        pand    xmm0,xmm3               ; xmm0=(06 -- 16 -- 26 -- 36 --)
+        pslld   xmm5,WORD_BIT           ; xmm5=(-- 07 -- 17 -- 27 -- 37)
+        por     xmm6,xmm7               ; xmm6=(00 01 10 11 20 21 30 31)
+        por     xmm0,xmm5               ; xmm0=(06 07 16 17 26 27 36 37)
+
+        movaps  xmm1, XMMWORD [wk(0)]   ; xmm1=tmp2
+        movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=tmp3
+
+        addps   xmm4,xmm2               ; xmm4=tmp4
+        movaps  xmm7,xmm1
+        movaps  xmm5,xmm3
+        addps   xmm1,xmm2               ; xmm1=data2=(02 12 22 32)
+        addps   xmm3,xmm4               ; xmm3=data4=(04 14 24 34)
+        subps   xmm7,xmm2               ; xmm7=data5=(05 15 25 35)
+        subps   xmm5,xmm4               ; xmm5=data3=(03 13 23 33)
+
+        movaps  xmm2,[rel PD_RNDINT_MAGIC]      ; xmm2=[rel PD_RNDINT_MAGIC]
+        pcmpeqd xmm4,xmm4
+        psrld   xmm4,WORD_BIT           ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..}
+
+        addps   xmm3,xmm2       ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **)
+        addps   xmm7,xmm2       ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **)
+        addps   xmm1,xmm2       ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **)
+        addps   xmm5,xmm2       ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **)
+
+        pand    xmm3,xmm4               ; xmm3=(04 -- 14 -- 24 -- 34 --)
+        pslld   xmm7,WORD_BIT           ; xmm7=(-- 05 -- 15 -- 25 -- 35)
+        pand    xmm1,xmm4               ; xmm1=(02 -- 12 -- 22 -- 32 --)
+        pslld   xmm5,WORD_BIT           ; xmm5=(-- 03 -- 13 -- 23 -- 33)
+        por     xmm3,xmm7               ; xmm3=(04 05 14 15 24 25 34 35)
+        por     xmm1,xmm5               ; xmm1=(02 03 12 13 22 23 32 33)
+
+        movdqa    xmm2,[rel PB_CENTERJSAMP]     ; xmm2=[rel PB_CENTERJSAMP]
+
+        packsswb  xmm6,xmm3     ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35)
+        packsswb  xmm1,xmm0     ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37)
+        paddb     xmm6,xmm2
+        paddb     xmm1,xmm2
+
+        movdqa    xmm4,xmm6     ; transpose coefficients(phase 2)
+        punpcklwd xmm6,xmm1     ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+        punpckhwd xmm4,xmm1     ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+
+        movdqa    xmm7,xmm6     ; transpose coefficients(phase 3)
+        punpckldq xmm6,xmm4     ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+        punpckhdq xmm7,xmm4     ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+
+        pshufd  xmm5,xmm6,0x4E  ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+        pshufd  xmm3,xmm7,0x4E  ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+
+        mov     rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+        mov     rbx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
+        movq    XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm7
+        mov     rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+        mov     rbx, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
+        movq    XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm3
+
+        add     rsi, byte 4*SIZEOF_FAST_FLOAT   ; wsptr
+        add     rdi, byte 4*SIZEOF_JSAMPROW
+        dec     rcx                             ; ctr
+        jnz     near .rowloop
+
+        pop     rbx
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctflt-sse2.asm b/simd/jidctflt-sse2.asm
new file mode 100644
index 0000000..de2cd1f
--- /dev/null
+++ b/simd/jidctflt-sse2.asm
@@ -0,0 +1,498 @@
+;
+; jidctflt.asm - floating-point IDCT (SSE & SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a floating-point implementation of the inverse DCT
+; (Discrete Cosine Transform). The following code is based directly on
+; the IJG's original jidctflt.c; see the jidctflt.c for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%macro  unpcklps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
+        shufps  %1,%2,0x44
+%endmacro
+
+%macro  unpckhps2 2     ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
+        shufps  %1,%2,0xEE
+%endmacro
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_idct_float_sse2)
+
+EXTN(jconst_idct_float_sse2):
+
+PD_1_414        times 4 dd  1.414213562373095048801689
+PD_1_847        times 4 dd  1.847759065022573512256366
+PD_1_082        times 4 dd  1.082392200292393968799446
+PD_M2_613       times 4 dd -2.613125929752753055713286
+PD_RNDINT_MAGIC times 4 dd  100663296.0 ; (float)(0x00C00000 << 3)
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jsimd_idct_float_sse2 (void * dct_table, JCOEFPTR coef_block,
+;                        JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT
+                                        ; FAST_FLOAT workspace[DCTSIZE2]
+
+        align   16
+        global  EXTN(jsimd_idct_float_sse2)
+
+EXTN(jsimd_idct_float_sse2):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [workspace]
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process columns from input, store into work array.
+
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+        lea     edi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
+
+        movq    xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    xmm2, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq    xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movq    xmm4, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq    xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    xmm6, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        movq    xmm7, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     xmm1,xmm2
+        por     xmm3,xmm4
+        por     xmm5,xmm6
+        por     xmm1,xmm3
+        por     xmm5,xmm7
+        por     xmm1,xmm5
+        packsswb xmm1,xmm1
+        movd    eax,xmm1
+        test    eax,eax
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movq      xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+
+        punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+        psrad     xmm0,(DWORD_BIT-WORD_BIT)     ; xmm0=in0=(00 01 02 03)
+        cvtdq2ps  xmm0,xmm0                     ; xmm0=in0=(00 01 02 03)
+
+        mulps   xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movaps  xmm1,xmm0
+        movaps  xmm2,xmm0
+        movaps  xmm3,xmm0
+
+        shufps  xmm0,xmm0,0x00                  ; xmm0=(00 00 00 00)
+        shufps  xmm1,xmm1,0x55                  ; xmm1=(01 01 01 01)
+        shufps  xmm2,xmm2,0xAA                  ; xmm2=(02 02 02 02)
+        shufps  xmm3,xmm3,0xFF                  ; xmm3=(03 03 03 03)
+
+        movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
+        jmp     near .nextcolumn
+        alignx  16,7
+%endif
+.columnDCT:
+
+        ; -- Even part
+
+        movq      xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq      xmm1, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq      xmm2, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq      xmm3, XMM_MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+
+        punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+        punpcklwd xmm1,xmm1             ; xmm1=(20 20 21 21 22 22 23 23)
+        psrad     xmm0,(DWORD_BIT-WORD_BIT)     ; xmm0=in0=(00 01 02 03)
+        psrad     xmm1,(DWORD_BIT-WORD_BIT)     ; xmm1=in2=(20 21 22 23)
+        cvtdq2ps  xmm0,xmm0                     ; xmm0=in0=(00 01 02 03)
+        cvtdq2ps  xmm1,xmm1                     ; xmm1=in2=(20 21 22 23)
+
+        punpcklwd xmm2,xmm2             ; xmm2=(40 40 41 41 42 42 43 43)
+        punpcklwd xmm3,xmm3             ; xmm3=(60 60 61 61 62 62 63 63)
+        psrad     xmm2,(DWORD_BIT-WORD_BIT)     ; xmm2=in4=(40 41 42 43)
+        psrad     xmm3,(DWORD_BIT-WORD_BIT)     ; xmm3=in6=(60 61 62 63)
+        cvtdq2ps  xmm2,xmm2                     ; xmm2=in4=(40 41 42 43)
+        cvtdq2ps  xmm3,xmm3                     ; xmm3=in6=(60 61 62 63)
+
+        mulps     xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movaps  xmm4,xmm0
+        movaps  xmm5,xmm1
+        subps   xmm0,xmm2               ; xmm0=tmp11
+        subps   xmm1,xmm3
+        addps   xmm4,xmm2               ; xmm4=tmp10
+        addps   xmm5,xmm3               ; xmm5=tmp13
+
+        mulps   xmm1,[GOTOFF(ebx,PD_1_414)]
+        subps   xmm1,xmm5               ; xmm1=tmp12
+
+        movaps  xmm6,xmm4
+        movaps  xmm7,xmm0
+        subps   xmm4,xmm5               ; xmm4=tmp3
+        subps   xmm0,xmm1               ; xmm0=tmp2
+        addps   xmm6,xmm5               ; xmm6=tmp0
+        addps   xmm7,xmm1               ; xmm7=tmp1
+
+        movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+        movaps  XMMWORD [wk(0)], xmm0   ; tmp2
+
+        ; -- Odd part
+
+        movq      xmm2, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq      xmm3, XMM_MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movq      xmm5, XMM_MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq      xmm1, XMM_MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+
+        punpcklwd xmm2,xmm2             ; xmm2=(10 10 11 11 12 12 13 13)
+        punpcklwd xmm3,xmm3             ; xmm3=(30 30 31 31 32 32 33 33)
+        psrad     xmm2,(DWORD_BIT-WORD_BIT)     ; xmm2=in1=(10 11 12 13)
+        psrad     xmm3,(DWORD_BIT-WORD_BIT)     ; xmm3=in3=(30 31 32 33)
+        cvtdq2ps  xmm2,xmm2                     ; xmm2=in1=(10 11 12 13)
+        cvtdq2ps  xmm3,xmm3                     ; xmm3=in3=(30 31 32 33)
+
+        punpcklwd xmm5,xmm5             ; xmm5=(50 50 51 51 52 52 53 53)
+        punpcklwd xmm1,xmm1             ; xmm1=(70 70 71 71 72 72 73 73)
+        psrad     xmm5,(DWORD_BIT-WORD_BIT)     ; xmm5=in5=(50 51 52 53)
+        psrad     xmm1,(DWORD_BIT-WORD_BIT)     ; xmm1=in7=(70 71 72 73)
+        cvtdq2ps  xmm5,xmm5                     ; xmm5=in5=(50 51 52 53)
+        cvtdq2ps  xmm1,xmm1                     ; xmm1=in7=(70 71 72 73)
+
+        mulps     xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+        mulps     xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FLOAT_MULT_TYPE)]
+
+        movaps  xmm4,xmm2
+        movaps  xmm0,xmm5
+        addps   xmm2,xmm1               ; xmm2=z11
+        addps   xmm5,xmm3               ; xmm5=z13
+        subps   xmm4,xmm1               ; xmm4=z12
+        subps   xmm0,xmm3               ; xmm0=z10
+
+        movaps  xmm1,xmm2
+        subps   xmm2,xmm5
+        addps   xmm1,xmm5               ; xmm1=tmp7
+
+        mulps   xmm2,[GOTOFF(ebx,PD_1_414)]     ; xmm2=tmp11
+
+        movaps  xmm3,xmm0
+        addps   xmm0,xmm4
+        mulps   xmm0,[GOTOFF(ebx,PD_1_847)]     ; xmm0=z5
+        mulps   xmm3,[GOTOFF(ebx,PD_M2_613)]    ; xmm3=(z10 * -2.613125930)
+        mulps   xmm4,[GOTOFF(ebx,PD_1_082)]     ; xmm4=(z12 * 1.082392200)
+        addps   xmm3,xmm0               ; xmm3=tmp12
+        subps   xmm4,xmm0               ; xmm4=tmp10
+
+        ; -- Final output stage
+
+        subps   xmm3,xmm1               ; xmm3=tmp6
+        movaps  xmm5,xmm6
+        movaps  xmm0,xmm7
+        addps   xmm6,xmm1               ; xmm6=data0=(00 01 02 03)
+        addps   xmm7,xmm3               ; xmm7=data1=(10 11 12 13)
+        subps   xmm5,xmm1               ; xmm5=data7=(70 71 72 73)
+        subps   xmm0,xmm3               ; xmm0=data6=(60 61 62 63)
+        subps   xmm2,xmm3               ; xmm2=tmp5
+
+        movaps    xmm1,xmm6             ; transpose coefficients(phase 1)
+        unpcklps  xmm6,xmm7             ; xmm6=(00 10 01 11)
+        unpckhps  xmm1,xmm7             ; xmm1=(02 12 03 13)
+        movaps    xmm3,xmm0             ; transpose coefficients(phase 1)
+        unpcklps  xmm0,xmm5             ; xmm0=(60 70 61 71)
+        unpckhps  xmm3,xmm5             ; xmm3=(62 72 63 73)
+
+        movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+        movaps  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp3
+
+        movaps  XMMWORD [wk(0)], xmm0   ; wk(0)=(60 70 61 71)
+        movaps  XMMWORD [wk(1)], xmm3   ; wk(1)=(62 72 63 73)
+
+        addps   xmm4,xmm2               ; xmm4=tmp4
+        movaps  xmm0,xmm7
+        movaps  xmm3,xmm5
+        addps   xmm7,xmm2               ; xmm7=data2=(20 21 22 23)
+        addps   xmm5,xmm4               ; xmm5=data4=(40 41 42 43)
+        subps   xmm0,xmm2               ; xmm0=data5=(50 51 52 53)
+        subps   xmm3,xmm4               ; xmm3=data3=(30 31 32 33)
+
+        movaps    xmm2,xmm7             ; transpose coefficients(phase 1)
+        unpcklps  xmm7,xmm3             ; xmm7=(20 30 21 31)
+        unpckhps  xmm2,xmm3             ; xmm2=(22 32 23 33)
+        movaps    xmm4,xmm5             ; transpose coefficients(phase 1)
+        unpcklps  xmm5,xmm0             ; xmm5=(40 50 41 51)
+        unpckhps  xmm4,xmm0             ; xmm4=(42 52 43 53)
+
+        movaps    xmm3,xmm6             ; transpose coefficients(phase 2)
+        unpcklps2 xmm6,xmm7             ; xmm6=(00 10 20 30)
+        unpckhps2 xmm3,xmm7             ; xmm3=(01 11 21 31)
+        movaps    xmm0,xmm1             ; transpose coefficients(phase 2)
+        unpcklps2 xmm1,xmm2             ; xmm1=(02 12 22 32)
+        unpckhps2 xmm0,xmm2             ; xmm0=(03 13 23 33)
+
+        movaps  xmm7, XMMWORD [wk(0)]   ; xmm7=(60 70 61 71)
+        movaps  xmm2, XMMWORD [wk(1)]   ; xmm2=(62 72 63 73)
+
+        movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1
+        movaps  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+
+        movaps    xmm6,xmm5             ; transpose coefficients(phase 2)
+        unpcklps2 xmm5,xmm7             ; xmm5=(40 50 60 70)
+        unpckhps2 xmm6,xmm7             ; xmm6=(41 51 61 71)
+        movaps    xmm3,xmm4             ; transpose coefficients(phase 2)
+        unpcklps2 xmm4,xmm2             ; xmm4=(42 52 62 72)
+        unpckhps2 xmm3,xmm2             ; xmm3=(43 53 63 73)
+
+        movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
+        movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
+        movaps  XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
+
+.nextcolumn:
+        add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+        add     edx, byte 4*SIZEOF_FLOAT_MULT_TYPE      ; quantptr
+        add     edi,      4*DCTSIZE*SIZEOF_FAST_FLOAT   ; wsptr
+        dec     ecx                                     ; ctr
+        jnz     near .columnloop
+
+        ; -- Prefetch the next coefficient block
+
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
+        prefetchnta [esi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
+
+        ; ---- Pass 2: process rows from work array, store into output array.
+
+        mov     eax, [original_ebp]
+        lea     esi, [workspace]                        ; FAST_FLOAT * wsptr
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
+.rowloop:
+
+        ; -- Even part
+
+        movaps  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
+
+        movaps  xmm4,xmm0
+        movaps  xmm5,xmm1
+        subps   xmm0,xmm2               ; xmm0=tmp11
+        subps   xmm1,xmm3
+        addps   xmm4,xmm2               ; xmm4=tmp10
+        addps   xmm5,xmm3               ; xmm5=tmp13
+
+        mulps   xmm1,[GOTOFF(ebx,PD_1_414)]
+        subps   xmm1,xmm5               ; xmm1=tmp12
+
+        movaps  xmm6,xmm4
+        movaps  xmm7,xmm0
+        subps   xmm4,xmm5               ; xmm4=tmp3
+        subps   xmm0,xmm1               ; xmm0=tmp2
+        addps   xmm6,xmm5               ; xmm6=tmp0
+        addps   xmm7,xmm1               ; xmm7=tmp1
+
+        movaps  XMMWORD [wk(1)], xmm4   ; tmp3
+        movaps  XMMWORD [wk(0)], xmm0   ; tmp2
+
+        ; -- Odd part
+
+        movaps  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_FAST_FLOAT)]
+
+        movaps  xmm4,xmm2
+        movaps  xmm0,xmm5
+        addps   xmm2,xmm1               ; xmm2=z11
+        addps   xmm5,xmm3               ; xmm5=z13
+        subps   xmm4,xmm1               ; xmm4=z12
+        subps   xmm0,xmm3               ; xmm0=z10
+
+        movaps  xmm1,xmm2
+        subps   xmm2,xmm5
+        addps   xmm1,xmm5               ; xmm1=tmp7
+
+        mulps   xmm2,[GOTOFF(ebx,PD_1_414)]     ; xmm2=tmp11
+
+        movaps  xmm3,xmm0
+        addps   xmm0,xmm4
+        mulps   xmm0,[GOTOFF(ebx,PD_1_847)]     ; xmm0=z5
+        mulps   xmm3,[GOTOFF(ebx,PD_M2_613)]    ; xmm3=(z10 * -2.613125930)
+        mulps   xmm4,[GOTOFF(ebx,PD_1_082)]     ; xmm4=(z12 * 1.082392200)
+        addps   xmm3,xmm0               ; xmm3=tmp12
+        subps   xmm4,xmm0               ; xmm4=tmp10
+
+        ; -- Final output stage
+
+        subps   xmm3,xmm1               ; xmm3=tmp6
+        movaps  xmm5,xmm6
+        movaps  xmm0,xmm7
+        addps   xmm6,xmm1               ; xmm6=data0=(00 10 20 30)
+        addps   xmm7,xmm3               ; xmm7=data1=(01 11 21 31)
+        subps   xmm5,xmm1               ; xmm5=data7=(07 17 27 37)
+        subps   xmm0,xmm3               ; xmm0=data6=(06 16 26 36)
+        subps   xmm2,xmm3               ; xmm2=tmp5
+
+        movaps  xmm1,[GOTOFF(ebx,PD_RNDINT_MAGIC)]      ; xmm1=[PD_RNDINT_MAGIC]
+        pcmpeqd xmm3,xmm3
+        psrld   xmm3,WORD_BIT           ; xmm3={0xFFFF 0x0000 0xFFFF 0x0000 ..}
+
+        addps   xmm6,xmm1       ; xmm6=roundint(data0/8)=(00 ** 10 ** 20 ** 30 **)
+        addps   xmm7,xmm1       ; xmm7=roundint(data1/8)=(01 ** 11 ** 21 ** 31 **)
+        addps   xmm0,xmm1       ; xmm0=roundint(data6/8)=(06 ** 16 ** 26 ** 36 **)
+        addps   xmm5,xmm1       ; xmm5=roundint(data7/8)=(07 ** 17 ** 27 ** 37 **)
+
+        pand    xmm6,xmm3               ; xmm6=(00 -- 10 -- 20 -- 30 --)
+        pslld   xmm7,WORD_BIT           ; xmm7=(-- 01 -- 11 -- 21 -- 31)
+        pand    xmm0,xmm3               ; xmm0=(06 -- 16 -- 26 -- 36 --)
+        pslld   xmm5,WORD_BIT           ; xmm5=(-- 07 -- 17 -- 27 -- 37)
+        por     xmm6,xmm7               ; xmm6=(00 01 10 11 20 21 30 31)
+        por     xmm0,xmm5               ; xmm0=(06 07 16 17 26 27 36 37)
+
+        movaps  xmm1, XMMWORD [wk(0)]   ; xmm1=tmp2
+        movaps  xmm3, XMMWORD [wk(1)]   ; xmm3=tmp3
+
+        addps   xmm4,xmm2               ; xmm4=tmp4
+        movaps  xmm7,xmm1
+        movaps  xmm5,xmm3
+        addps   xmm1,xmm2               ; xmm1=data2=(02 12 22 32)
+        addps   xmm3,xmm4               ; xmm3=data4=(04 14 24 34)
+        subps   xmm7,xmm2               ; xmm7=data5=(05 15 25 35)
+        subps   xmm5,xmm4               ; xmm5=data3=(03 13 23 33)
+
+        movaps  xmm2,[GOTOFF(ebx,PD_RNDINT_MAGIC)]      ; xmm2=[PD_RNDINT_MAGIC]
+        pcmpeqd xmm4,xmm4
+        psrld   xmm4,WORD_BIT           ; xmm4={0xFFFF 0x0000 0xFFFF 0x0000 ..}
+
+        addps   xmm3,xmm2       ; xmm3=roundint(data4/8)=(04 ** 14 ** 24 ** 34 **)
+        addps   xmm7,xmm2       ; xmm7=roundint(data5/8)=(05 ** 15 ** 25 ** 35 **)
+        addps   xmm1,xmm2       ; xmm1=roundint(data2/8)=(02 ** 12 ** 22 ** 32 **)
+        addps   xmm5,xmm2       ; xmm5=roundint(data3/8)=(03 ** 13 ** 23 ** 33 **)
+
+        pand    xmm3,xmm4               ; xmm3=(04 -- 14 -- 24 -- 34 --)
+        pslld   xmm7,WORD_BIT           ; xmm7=(-- 05 -- 15 -- 25 -- 35)
+        pand    xmm1,xmm4               ; xmm1=(02 -- 12 -- 22 -- 32 --)
+        pslld   xmm5,WORD_BIT           ; xmm5=(-- 03 -- 13 -- 23 -- 33)
+        por     xmm3,xmm7               ; xmm3=(04 05 14 15 24 25 34 35)
+        por     xmm1,xmm5               ; xmm1=(02 03 12 13 22 23 32 33)
+
+        movdqa    xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)]     ; xmm2=[PB_CENTERJSAMP]
+
+        packsswb  xmm6,xmm3     ; xmm6=(00 01 10 11 20 21 30 31 04 05 14 15 24 25 34 35)
+        packsswb  xmm1,xmm0     ; xmm1=(02 03 12 13 22 23 32 33 06 07 16 17 26 27 36 37)
+        paddb     xmm6,xmm2
+        paddb     xmm1,xmm2
+
+        movdqa    xmm4,xmm6     ; transpose coefficients(phase 2)
+        punpcklwd xmm6,xmm1     ; xmm6=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+        punpckhwd xmm4,xmm1     ; xmm4=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+
+        movdqa    xmm7,xmm6     ; transpose coefficients(phase 3)
+        punpckldq xmm6,xmm4     ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+        punpckhdq xmm7,xmm4     ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+
+        pshufd  xmm5,xmm6,0x4E  ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+        pshufd  xmm3,xmm7,0x4E  ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+
+        pushpic ebx                     ; save GOT address
+
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
+        movq    XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm7
+        mov     edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
+        movq    XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3
+
+        poppic  ebx                     ; restore GOT address
+
+        add     esi, byte 4*SIZEOF_FAST_FLOAT   ; wsptr
+        add     edi, byte 4*SIZEOF_JSAMPROW
+        dec     ecx                             ; ctr
+        jnz     near .rowloop
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctfst-mmx.asm b/simd/jidctfst-mmx.asm
new file mode 100644
index 0000000..d97c02a
--- /dev/null
+++ b/simd/jidctfst-mmx.asm
@@ -0,0 +1,500 @@
+;
+; jidctfst.asm - fast integer IDCT (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a fast, not so accurate integer implementation of
+; the inverse DCT (Discrete Cosine Transform). The following code is
+; based directly on the IJG's original jidctfst.c; see the jidctfst.c
+; for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      8       ; 14 is also OK.
+%define PASS1_BITS      2
+
+%if IFAST_SCALE_BITS != PASS1_BITS
+%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'."
+%endif
+
+%if CONST_BITS == 8
+F_1_082 equ     277             ; FIX(1.082392200)
+F_1_414 equ     362             ; FIX(1.414213562)
+F_1_847 equ     473             ; FIX(1.847759065)
+F_2_613 equ     669             ; FIX(2.613125930)
+F_1_613 equ     (F_2_613 - 256) ; FIX(2.613125930) - FIX(1)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_1_082 equ     DESCALE(1162209775,30-CONST_BITS)       ; FIX(1.082392200)
+F_1_414 equ     DESCALE(1518500249,30-CONST_BITS)       ; FIX(1.414213562)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_613 equ     DESCALE(2805822602,30-CONST_BITS)       ; FIX(2.613125930)
+F_1_613 equ     (F_2_613 - (1 << CONST_BITS))   ; FIX(2.613125930) - FIX(1)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
+; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
+
+%define PRE_MULTIPLY_SCALE_BITS   2
+%define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
+
+        alignz  16
+        global  EXTN(jconst_idct_ifast_mmx)
+
+EXTN(jconst_idct_ifast_mmx):
+
+PW_F1414        times 4 dw  F_1_414 << CONST_SHIFT
+PW_F1847        times 4 dw  F_1_847 << CONST_SHIFT
+PW_MF1613       times 4 dw -F_1_613 << CONST_SHIFT
+PW_F1082        times 4 dw  F_1_082 << CONST_SHIFT
+PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jsimd_idct_ifast_mmx (void * dct_table, JCOEFPTR coef_block,
+;                       JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define dct_table(b)    (b)+8           ; jpeg_component_info * compptr
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_JCOEF
+                                        ; JCOEF workspace[DCTSIZE2]
+
+        align   16
+        global  EXTN(jsimd_idct_ifast_mmx)
+
+EXTN(jsimd_idct_ifast_mmx):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [workspace]
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process columns from input, store into work array.
+
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+        lea     edi, [workspace]                        ; JCOEF * wsptr
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     short .columnDCT
+
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     mm1,mm0
+        packsswb mm1,mm1
+        movd    eax,mm1
+        test    eax,eax
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+        movq      mm2,mm0               ; mm0=in0=(00 01 02 03)
+        punpcklwd mm0,mm0               ; mm0=(00 00 01 01)
+        punpckhwd mm2,mm2               ; mm2=(02 02 03 03)
+
+        movq      mm1,mm0
+        punpckldq mm0,mm0               ; mm0=(00 00 00 00)
+        punpckhdq mm1,mm1               ; mm1=(01 01 01 01)
+        movq      mm3,mm2
+        punpckldq mm2,mm2               ; mm2=(02 02 02 02)
+        punpckhdq mm3,mm3               ; mm3=(03 03 03 03)
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
+        movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
+        jmp     near .nextcolumn
+        alignx  16,7
+%endif
+.columnDCT:
+
+        ; -- Even part
+
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+        movq    mm4,mm0
+        movq    mm5,mm1
+        psubw   mm0,mm2                 ; mm0=tmp11
+        psubw   mm1,mm3
+        paddw   mm4,mm2                 ; mm4=tmp10
+        paddw   mm5,mm3                 ; mm5=tmp13
+
+        psllw   mm1,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm1,[GOTOFF(ebx,PW_F1414)]
+        psubw   mm1,mm5                 ; mm1=tmp12
+
+        movq    mm6,mm4
+        movq    mm7,mm0
+        psubw   mm4,mm5                 ; mm4=tmp3
+        psubw   mm0,mm1                 ; mm0=tmp2
+        paddw   mm6,mm5                 ; mm6=tmp0
+        paddw   mm7,mm1                 ; mm7=tmp1
+
+        movq    MMWORD [wk(1)], mm4     ; wk(1)=tmp3
+        movq    MMWORD [wk(0)], mm0     ; wk(0)=tmp2
+
+        ; -- Odd part
+
+        movq    mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm2, MMWORD [MMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  mm3, MMWORD [MMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movq    mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm5, MMWORD [MMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  mm1, MMWORD [MMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+        movq    mm4,mm2
+        movq    mm0,mm5
+        psubw   mm2,mm1                 ; mm2=z12
+        psubw   mm5,mm3                 ; mm5=z10
+        paddw   mm4,mm1                 ; mm4=z11
+        paddw   mm0,mm3                 ; mm0=z13
+
+        movq    mm1,mm5                 ; mm1=z10(unscaled)
+        psllw   mm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   mm5,PRE_MULTIPLY_SCALE_BITS
+
+        movq    mm3,mm4
+        psubw   mm4,mm0
+        paddw   mm3,mm0                 ; mm3=tmp7
+
+        psllw   mm4,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm4,[GOTOFF(ebx,PW_F1414)]      ; mm4=tmp11
+
+        ; To avoid overflow...
+        ;
+        ; (Original)
+        ; tmp12 = -2.613125930 * z10 + z5;
+        ;
+        ; (This implementation)
+        ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+        ;       = -1.613125930 * z10 - z10 + z5;
+
+        movq    mm0,mm5
+        paddw   mm5,mm2
+        pmulhw  mm5,[GOTOFF(ebx,PW_F1847)]      ; mm5=z5
+        pmulhw  mm0,[GOTOFF(ebx,PW_MF1613)]
+        pmulhw  mm2,[GOTOFF(ebx,PW_F1082)]
+        psubw   mm0,mm1
+        psubw   mm2,mm5                 ; mm2=tmp10
+        paddw   mm0,mm5                 ; mm0=tmp12
+
+        ; -- Final output stage
+
+        psubw   mm0,mm3                 ; mm0=tmp6
+        movq    mm1,mm6
+        movq    mm5,mm7
+        paddw   mm6,mm3                 ; mm6=data0=(00 01 02 03)
+        paddw   mm7,mm0                 ; mm7=data1=(10 11 12 13)
+        psubw   mm1,mm3                 ; mm1=data7=(70 71 72 73)
+        psubw   mm5,mm0                 ; mm5=data6=(60 61 62 63)
+        psubw   mm4,mm0                 ; mm4=tmp5
+
+        movq      mm3,mm6               ; transpose coefficients(phase 1)
+        punpcklwd mm6,mm7               ; mm6=(00 10 01 11)
+        punpckhwd mm3,mm7               ; mm3=(02 12 03 13)
+        movq      mm0,mm5               ; transpose coefficients(phase 1)
+        punpcklwd mm5,mm1               ; mm5=(60 70 61 71)
+        punpckhwd mm0,mm1               ; mm0=(62 72 63 73)
+
+        movq    mm7, MMWORD [wk(0)]     ; mm7=tmp2
+        movq    mm1, MMWORD [wk(1)]     ; mm1=tmp3
+
+        movq    MMWORD [wk(0)], mm5     ; wk(0)=(60 70 61 71)
+        movq    MMWORD [wk(1)], mm0     ; wk(1)=(62 72 63 73)
+
+        paddw   mm2,mm4                 ; mm2=tmp4
+        movq    mm5,mm7
+        movq    mm0,mm1
+        paddw   mm7,mm4                 ; mm7=data2=(20 21 22 23)
+        paddw   mm1,mm2                 ; mm1=data4=(40 41 42 43)
+        psubw   mm5,mm4                 ; mm5=data5=(50 51 52 53)
+        psubw   mm0,mm2                 ; mm0=data3=(30 31 32 33)
+
+        movq      mm4,mm7               ; transpose coefficients(phase 1)
+        punpcklwd mm7,mm0               ; mm7=(20 30 21 31)
+        punpckhwd mm4,mm0               ; mm4=(22 32 23 33)
+        movq      mm2,mm1               ; transpose coefficients(phase 1)
+        punpcklwd mm1,mm5               ; mm1=(40 50 41 51)
+        punpckhwd mm2,mm5               ; mm2=(42 52 43 53)
+
+        movq      mm0,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm7               ; mm6=(00 10 20 30)
+        punpckhdq mm0,mm7               ; mm0=(01 11 21 31)
+        movq      mm5,mm3               ; transpose coefficients(phase 2)
+        punpckldq mm3,mm4               ; mm3=(02 12 22 32)
+        punpckhdq mm5,mm4               ; mm5=(03 13 23 33)
+
+        movq    mm7, MMWORD [wk(0)]     ; mm7=(60 70 61 71)
+        movq    mm4, MMWORD [wk(1)]     ; mm4=(62 72 63 73)
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm6
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm3
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5
+
+        movq      mm6,mm1               ; transpose coefficients(phase 2)
+        punpckldq mm1,mm7               ; mm1=(40 50 60 70)
+        punpckhdq mm6,mm7               ; mm6=(41 51 61 71)
+        movq      mm0,mm2               ; transpose coefficients(phase 2)
+        punpckldq mm2,mm4               ; mm2=(42 52 62 72)
+        punpckhdq mm0,mm4               ; mm0=(43 53 63 73)
+
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm6
+        movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm0
+
+.nextcolumn:
+        add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+        add     edx, byte 4*SIZEOF_IFAST_MULT_TYPE      ; quantptr
+        add     edi, byte 4*DCTSIZE*SIZEOF_JCOEF        ; wsptr
+        dec     ecx                                     ; ctr
+        jnz     near .columnloop
+
+        ; ---- Pass 2: process rows from work array, store into output array.
+
+        mov     eax, [original_ebp]
+        lea     esi, [workspace]                        ; JCOEF * wsptr
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
+.rowloop:
+
+        ; -- Even part
+
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+
+        movq    mm4,mm0
+        movq    mm5,mm1
+        psubw   mm0,mm2                 ; mm0=tmp11
+        psubw   mm1,mm3
+        paddw   mm4,mm2                 ; mm4=tmp10
+        paddw   mm5,mm3                 ; mm5=tmp13
+
+        psllw   mm1,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm1,[GOTOFF(ebx,PW_F1414)]
+        psubw   mm1,mm5                 ; mm1=tmp12
+
+        movq    mm6,mm4
+        movq    mm7,mm0
+        psubw   mm4,mm5                 ; mm4=tmp3
+        psubw   mm0,mm1                 ; mm0=tmp2
+        paddw   mm6,mm5                 ; mm6=tmp0
+        paddw   mm7,mm1                 ; mm7=tmp1
+
+        movq    MMWORD [wk(1)], mm4     ; wk(1)=tmp3
+        movq    MMWORD [wk(0)], mm0     ; wk(0)=tmp2
+
+        ; -- Odd part
+
+        movq    mm2, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movq    mm5, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+
+        movq    mm4,mm2
+        movq    mm0,mm5
+        psubw   mm2,mm1                 ; mm2=z12
+        psubw   mm5,mm3                 ; mm5=z10
+        paddw   mm4,mm1                 ; mm4=z11
+        paddw   mm0,mm3                 ; mm0=z13
+
+        movq    mm1,mm5                 ; mm1=z10(unscaled)
+        psllw   mm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   mm5,PRE_MULTIPLY_SCALE_BITS
+
+        movq    mm3,mm4
+        psubw   mm4,mm0
+        paddw   mm3,mm0                 ; mm3=tmp7
+
+        psllw   mm4,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  mm4,[GOTOFF(ebx,PW_F1414)]      ; mm4=tmp11
+
+        ; To avoid overflow...
+        ;
+        ; (Original)
+        ; tmp12 = -2.613125930 * z10 + z5;
+        ;
+        ; (This implementation)
+        ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+        ;       = -1.613125930 * z10 - z10 + z5;
+
+        movq    mm0,mm5
+        paddw   mm5,mm2
+        pmulhw  mm5,[GOTOFF(ebx,PW_F1847)]      ; mm5=z5
+        pmulhw  mm0,[GOTOFF(ebx,PW_MF1613)]
+        pmulhw  mm2,[GOTOFF(ebx,PW_F1082)]
+        psubw   mm0,mm1
+        psubw   mm2,mm5                 ; mm2=tmp10
+        paddw   mm0,mm5                 ; mm0=tmp12
+
+        ; -- Final output stage
+
+        psubw   mm0,mm3                 ; mm0=tmp6
+        movq    mm1,mm6
+        movq    mm5,mm7
+        paddw   mm6,mm3                 ; mm6=data0=(00 10 20 30)
+        paddw   mm7,mm0                 ; mm7=data1=(01 11 21 31)
+        psraw   mm6,(PASS1_BITS+3)      ; descale
+        psraw   mm7,(PASS1_BITS+3)      ; descale
+        psubw   mm1,mm3                 ; mm1=data7=(07 17 27 37)
+        psubw   mm5,mm0                 ; mm5=data6=(06 16 26 36)
+        psraw   mm1,(PASS1_BITS+3)      ; descale
+        psraw   mm5,(PASS1_BITS+3)      ; descale
+        psubw   mm4,mm0                 ; mm4=tmp5
+
+        packsswb  mm6,mm5               ; mm6=(00 10 20 30 06 16 26 36)
+        packsswb  mm7,mm1               ; mm7=(01 11 21 31 07 17 27 37)
+
+        movq    mm3, MMWORD [wk(0)]     ; mm3=tmp2
+        movq    mm0, MMWORD [wk(1)]     ; mm0=tmp3
+
+        paddw   mm2,mm4                 ; mm2=tmp4
+        movq    mm5,mm3
+        movq    mm1,mm0
+        paddw   mm3,mm4                 ; mm3=data2=(02 12 22 32)
+        paddw   mm0,mm2                 ; mm0=data4=(04 14 24 34)
+        psraw   mm3,(PASS1_BITS+3)      ; descale
+        psraw   mm0,(PASS1_BITS+3)      ; descale
+        psubw   mm5,mm4                 ; mm5=data5=(05 15 25 35)
+        psubw   mm1,mm2                 ; mm1=data3=(03 13 23 33)
+        psraw   mm5,(PASS1_BITS+3)      ; descale
+        psraw   mm1,(PASS1_BITS+3)      ; descale
+
+        movq      mm4,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm4=[PB_CENTERJSAMP]
+
+        packsswb  mm3,mm0               ; mm3=(02 12 22 32 04 14 24 34)
+        packsswb  mm1,mm5               ; mm1=(03 13 23 33 05 15 25 35)
+
+        paddb     mm6,mm4
+        paddb     mm7,mm4
+        paddb     mm3,mm4
+        paddb     mm1,mm4
+
+        movq      mm2,mm6               ; transpose coefficients(phase 1)
+        punpcklbw mm6,mm7               ; mm6=(00 01 10 11 20 21 30 31)
+        punpckhbw mm2,mm7               ; mm2=(06 07 16 17 26 27 36 37)
+        movq      mm0,mm3               ; transpose coefficients(phase 1)
+        punpcklbw mm3,mm1               ; mm3=(02 03 12 13 22 23 32 33)
+        punpckhbw mm0,mm1               ; mm0=(04 05 14 15 24 25 34 35)
+
+        movq      mm5,mm6               ; transpose coefficients(phase 2)
+        punpcklwd mm6,mm3               ; mm6=(00 01 02 03 10 11 12 13)
+        punpckhwd mm5,mm3               ; mm5=(20 21 22 23 30 31 32 33)
+        movq      mm4,mm0               ; transpose coefficients(phase 2)
+        punpcklwd mm0,mm2               ; mm0=(04 05 06 07 14 15 16 17)
+        punpckhwd mm4,mm2               ; mm4=(24 25 26 27 34 35 36 37)
+
+        movq      mm7,mm6               ; transpose coefficients(phase 3)
+        punpckldq mm6,mm0               ; mm6=(00 01 02 03 04 05 06 07)
+        punpckhdq mm7,mm0               ; mm7=(10 11 12 13 14 15 16 17)
+        movq      mm1,mm5               ; transpose coefficients(phase 3)
+        punpckldq mm5,mm4               ; mm5=(20 21 22 23 24 25 26 27)
+        punpckhdq mm1,mm4               ; mm1=(30 31 32 33 34 35 36 37)
+
+        pushpic ebx                     ; save GOT address
+
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
+        mov     edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
+
+        poppic  ebx                     ; restore GOT address
+
+        add     esi, byte 4*SIZEOF_JCOEF        ; wsptr
+        add     edi, byte 4*SIZEOF_JSAMPROW
+        dec     ecx                             ; ctr
+        jnz     near .rowloop
+
+        emms            ; empty MMX state
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctfst-sse2-64.asm b/simd/jidctfst-sse2-64.asm
new file mode 100644
index 0000000..8566952
--- /dev/null
+++ b/simd/jidctfst-sse2-64.asm
@@ -0,0 +1,492 @@
+;
+; jidctfst.asm - fast integer IDCT (64-bit SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/projecpt/showfiles.php?group_id=6208
+;
+; This file contains a fast, not so accurate integer implementation of
+; the inverse DCT (Discrete Cosine Transform). The following code is
+; based directly on the IJG's original jidctfst.c; see the jidctfst.c
+; for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      8       ; 14 is also OK.
+%define PASS1_BITS      2
+
+%if IFAST_SCALE_BITS != PASS1_BITS
+%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'."
+%endif
+
+%if CONST_BITS == 8
+F_1_082 equ     277             ; FIX(1.082392200)
+F_1_414 equ     362             ; FIX(1.414213562)
+F_1_847 equ     473             ; FIX(1.847759065)
+F_2_613 equ     669             ; FIX(2.613125930)
+F_1_613 equ     (F_2_613 - 256) ; FIX(2.613125930) - FIX(1)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_1_082 equ     DESCALE(1162209775,30-CONST_BITS)       ; FIX(1.082392200)
+F_1_414 equ     DESCALE(1518500249,30-CONST_BITS)       ; FIX(1.414213562)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_613 equ     DESCALE(2805822602,30-CONST_BITS)       ; FIX(2.613125930)
+F_1_613 equ     (F_2_613 - (1 << CONST_BITS))   ; FIX(2.613125930) - FIX(1)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
+; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
+
+%define PRE_MULTIPLY_SCALE_BITS   2
+%define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
+
+        alignz  16
+        global  EXTN(jconst_idct_ifast_sse2)
+
+EXTN(jconst_idct_ifast_sse2):
+
+PW_F1414        times 8 dw  F_1_414 << CONST_SHIFT
+PW_F1847        times 8 dw  F_1_847 << CONST_SHIFT
+PW_MF1613       times 8 dw -F_1_613 << CONST_SHIFT
+PW_F1082        times 8 dw  F_1_082 << CONST_SHIFT
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jsimd_idct_ifast_sse2 (void * dct_table, JCOEFPTR coef_block,
+;                       JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+; r10 = jpeg_component_info * compptr
+; r11 = JCOEFPTR coef_block
+; r12 = JSAMPARRAY output_buf
+; r13 = JDIMENSION output_col
+
+%define original_rbp    rbp+0
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+
+        align   16
+        global  EXTN(jsimd_idct_ifast_sse2)
+
+EXTN(jsimd_idct_ifast_sse2):
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
+
+        ; ---- Pass 1: process columns from input.
+
+        mov     rdx, r10                ; quantptr
+        mov     rsi, r11                ; inptr
+
+%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
+        mov     eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1,xmm0
+        packsswb xmm1,xmm1
+        packsswb xmm1,xmm1
+        movd    eax,xmm1
+        test    rax,rax
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        movdqa    xmm7,xmm0             ; xmm0=in0=(00 01 02 03 04 05 06 07)
+        punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+        punpckhwd xmm7,xmm7             ; xmm7=(04 04 05 05 06 06 07 07)
+
+        pshufd  xmm6,xmm0,0x00          ; xmm6=col0=(00 00 00 00 00 00 00 00)
+        pshufd  xmm2,xmm0,0x55          ; xmm2=col1=(01 01 01 01 01 01 01 01)
+        pshufd  xmm5,xmm0,0xAA          ; xmm5=col2=(02 02 02 02 02 02 02 02)
+        pshufd  xmm0,xmm0,0xFF          ; xmm0=col3=(03 03 03 03 03 03 03 03)
+        pshufd  xmm1,xmm7,0x00          ; xmm1=col4=(04 04 04 04 04 04 04 04)
+        pshufd  xmm4,xmm7,0x55          ; xmm4=col5=(05 05 05 05 05 05 05 05)
+        pshufd  xmm3,xmm7,0xAA          ; xmm3=col6=(06 06 06 06 06 06 06 06)
+        pshufd  xmm7,xmm7,0xFF          ; xmm7=col7=(07 07 07 07 07 07 07 07)
+
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=col1
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=col3
+        jmp     near .column_end
+%endif
+.columnDCT:
+
+        ; -- Even part
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+
+        movdqa  xmm4,xmm0
+        movdqa  xmm5,xmm1
+        psubw   xmm0,xmm2               ; xmm0=tmp11
+        psubw   xmm1,xmm3
+        paddw   xmm4,xmm2               ; xmm4=tmp10
+        paddw   xmm5,xmm3               ; xmm5=tmp13
+
+        psllw   xmm1,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm1,[rel PW_F1414]
+        psubw   xmm1,xmm5               ; xmm1=tmp12
+
+        movdqa  xmm6,xmm4
+        movdqa  xmm7,xmm0
+        psubw   xmm4,xmm5               ; xmm4=tmp3
+        psubw   xmm0,xmm1               ; xmm0=tmp2
+        paddw   xmm6,xmm5               ; xmm6=tmp0
+        paddw   xmm7,xmm1               ; xmm7=tmp1
+
+        movdqa  XMMWORD [wk(1)], xmm4   ; wk(1)=tmp3
+        movdqa  XMMWORD [wk(0)], xmm0   ; wk(0)=tmp2
+
+        ; -- Odd part
+
+        movdqa  xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm5, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_IFAST_MULT_TYPE)]
+
+        movdqa  xmm4,xmm2
+        movdqa  xmm0,xmm5
+        psubw   xmm2,xmm1               ; xmm2=z12
+        psubw   xmm5,xmm3               ; xmm5=z10
+        paddw   xmm4,xmm1               ; xmm4=z11
+        paddw   xmm0,xmm3               ; xmm0=z13
+
+        movdqa  xmm1,xmm5               ; xmm1=z10(unscaled)
+        psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+
+        movdqa  xmm3,xmm4
+        psubw   xmm4,xmm0
+        paddw   xmm3,xmm0               ; xmm3=tmp7
+
+        psllw   xmm4,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm4,[rel PW_F1414]     ; xmm4=tmp11
+
+        ; To avoid overflow...
+        ;
+        ; (Original)
+        ; tmp12 = -2.613125930 * z10 + z5;
+        ;
+        ; (This implementation)
+        ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+        ;       = -1.613125930 * z10 - z10 + z5;
+
+        movdqa  xmm0,xmm5
+        paddw   xmm5,xmm2
+        pmulhw  xmm5,[rel PW_F1847]     ; xmm5=z5
+        pmulhw  xmm0,[rel PW_MF1613]
+        pmulhw  xmm2,[rel PW_F1082]
+        psubw   xmm0,xmm1
+        psubw   xmm2,xmm5               ; xmm2=tmp10
+        paddw   xmm0,xmm5               ; xmm0=tmp12
+
+        ; -- Final output stage
+
+        psubw   xmm0,xmm3               ; xmm0=tmp6
+        movdqa  xmm1,xmm6
+        movdqa  xmm5,xmm7
+        paddw   xmm6,xmm3               ; xmm6=data0=(00 01 02 03 04 05 06 07)
+        paddw   xmm7,xmm0               ; xmm7=data1=(10 11 12 13 14 15 16 17)
+        psubw   xmm1,xmm3               ; xmm1=data7=(70 71 72 73 74 75 76 77)
+        psubw   xmm5,xmm0               ; xmm5=data6=(60 61 62 63 64 65 66 67)
+        psubw   xmm4,xmm0               ; xmm4=tmp5
+
+        movdqa    xmm3,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm7             ; xmm6=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm3,xmm7             ; xmm3=(04 14 05 15 06 16 07 17)
+        movdqa    xmm0,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm1             ; xmm5=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm0,xmm1             ; xmm0=(64 74 65 75 66 76 67 77)
+
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+        movdqa  xmm1, XMMWORD [wk(1)]   ; xmm1=tmp3
+
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(60 70 61 71 62 72 63 73)
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(64 74 65 75 66 76 67 77)
+
+        paddw   xmm2,xmm4               ; xmm2=tmp4
+        movdqa  xmm5,xmm7
+        movdqa  xmm0,xmm1
+        paddw   xmm7,xmm4               ; xmm7=data2=(20 21 22 23 24 25 26 27)
+        paddw   xmm1,xmm2               ; xmm1=data4=(40 41 42 43 44 45 46 47)
+        psubw   xmm5,xmm4               ; xmm5=data5=(50 51 52 53 54 55 56 57)
+        psubw   xmm0,xmm2               ; xmm0=data3=(30 31 32 33 34 35 36 37)
+
+        movdqa    xmm4,xmm7             ; transpose coefficients(phase 1)
+        punpcklwd xmm7,xmm0             ; xmm7=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm4,xmm0             ; xmm4=(24 34 25 35 26 36 27 37)
+        movdqa    xmm2,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm5             ; xmm1=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm2,xmm5             ; xmm2=(44 54 45 55 46 56 47 57)
+
+        movdqa    xmm0,xmm3             ; transpose coefficients(phase 2)
+        punpckldq xmm3,xmm4             ; xmm3=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm0,xmm4             ; xmm0=(06 16 26 36 07 17 27 37)
+        movdqa    xmm5,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm7             ; xmm6=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm5,xmm7             ; xmm5=(02 12 22 32 03 13 23 33)
+
+        movdqa  xmm4, XMMWORD [wk(0)]   ; xmm4=(60 70 61 71 62 72 63 73)
+        movdqa  xmm7, XMMWORD [wk(1)]   ; xmm7=(64 74 65 75 66 76 67 77)
+
+        movdqa  XMMWORD [wk(0)], xmm3   ; wk(0)=(04 14 24 34 05 15 25 35)
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(06 16 26 36 07 17 27 37)
+
+        movdqa    xmm3,xmm1             ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm4             ; xmm1=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm3,xmm4             ; xmm3=(42 52 62 72 43 53 63 73)
+        movdqa    xmm0,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm7             ; xmm2=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm0,xmm7             ; xmm0=(46 56 66 76 47 57 67 77)
+
+        movdqa     xmm4,xmm6            ; transpose coefficients(phase 3)
+        punpcklqdq xmm6,xmm1            ; xmm6=col0=(00 10 20 30 40 50 60 70)
+        punpckhqdq xmm4,xmm1            ; xmm4=col1=(01 11 21 31 41 51 61 71)
+        movdqa     xmm7,xmm5            ; transpose coefficients(phase 3)
+        punpcklqdq xmm5,xmm3            ; xmm5=col2=(02 12 22 32 42 52 62 72)
+        punpckhqdq xmm7,xmm3            ; xmm7=col3=(03 13 23 33 43 53 63 73)
+
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(04 14 24 34 05 15 25 35)
+        movdqa  xmm3, XMMWORD [wk(1)]   ; xmm3=(06 16 26 36 07 17 27 37)
+
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=col1
+        movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=col3
+
+        movdqa     xmm4,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm2            ; xmm1=col4=(04 14 24 34 44 54 64 74)
+        punpckhqdq xmm4,xmm2            ; xmm4=col5=(05 15 25 35 45 55 65 75)
+        movdqa     xmm7,xmm3            ; transpose coefficients(phase 3)
+        punpcklqdq xmm3,xmm0            ; xmm3=col6=(06 16 26 36 46 56 66 76)
+        punpckhqdq xmm7,xmm0            ; xmm7=col7=(07 17 27 37 47 57 67 77)
+.column_end:
+
+        ; -- Prefetch the next coefficient block
+
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+
+        ; ---- Pass 2: process rows from work array, store into output array.
+
+        mov     rax, [original_rbp]
+        mov     rdi, r12        ; (JSAMPROW *)
+        mov     rax, r13
+
+        ; -- Even part
+
+        ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6
+
+        movdqa  xmm2,xmm6
+        movdqa  xmm0,xmm5
+        psubw   xmm6,xmm1               ; xmm6=tmp11
+        psubw   xmm5,xmm3
+        paddw   xmm2,xmm1               ; xmm2=tmp10
+        paddw   xmm0,xmm3               ; xmm0=tmp13
+
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm5,[rel PW_F1414]
+        psubw   xmm5,xmm0               ; xmm5=tmp12
+
+        movdqa  xmm1,xmm2
+        movdqa  xmm3,xmm6
+        psubw   xmm2,xmm0               ; xmm2=tmp3
+        psubw   xmm6,xmm5               ; xmm6=tmp2
+        paddw   xmm1,xmm0               ; xmm1=tmp0
+        paddw   xmm3,xmm5               ; xmm3=tmp1
+
+        movdqa  xmm0, XMMWORD [wk(0)]   ; xmm0=col1
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=col3
+
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=tmp3
+        movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp2
+
+        ; -- Odd part
+
+        ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7
+
+        movdqa  xmm2,xmm0
+        movdqa  xmm6,xmm4
+        psubw   xmm0,xmm7               ; xmm0=z12
+        psubw   xmm4,xmm5               ; xmm4=z10
+        paddw   xmm2,xmm7               ; xmm2=z11
+        paddw   xmm6,xmm5               ; xmm6=z13
+
+        movdqa  xmm7,xmm4               ; xmm7=z10(unscaled)
+        psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm4,PRE_MULTIPLY_SCALE_BITS
+
+        movdqa  xmm5,xmm2
+        psubw   xmm2,xmm6
+        paddw   xmm5,xmm6               ; xmm5=tmp7
+
+        psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm2,[rel PW_F1414]     ; xmm2=tmp11
+
+        ; To avoid overflow...
+        ;
+        ; (Original)
+        ; tmp12 = -2.613125930 * z10 + z5;
+        ;
+        ; (This implementation)
+        ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+        ;       = -1.613125930 * z10 - z10 + z5;
+
+        movdqa  xmm6,xmm4
+        paddw   xmm4,xmm0
+        pmulhw  xmm4,[rel PW_F1847]     ; xmm4=z5
+        pmulhw  xmm6,[rel PW_MF1613]
+        pmulhw  xmm0,[rel PW_F1082]
+        psubw   xmm6,xmm7
+        psubw   xmm0,xmm4               ; xmm0=tmp10
+        paddw   xmm6,xmm4               ; xmm6=tmp12
+
+        ; -- Final output stage
+
+        psubw   xmm6,xmm5               ; xmm6=tmp6
+        movdqa  xmm7,xmm1
+        movdqa  xmm4,xmm3
+        paddw   xmm1,xmm5               ; xmm1=data0=(00 10 20 30 40 50 60 70)
+        paddw   xmm3,xmm6               ; xmm3=data1=(01 11 21 31 41 51 61 71)
+        psraw   xmm1,(PASS1_BITS+3)     ; descale
+        psraw   xmm3,(PASS1_BITS+3)     ; descale
+        psubw   xmm7,xmm5               ; xmm7=data7=(07 17 27 37 47 57 67 77)
+        psubw   xmm4,xmm6               ; xmm4=data6=(06 16 26 36 46 56 66 76)
+        psraw   xmm7,(PASS1_BITS+3)     ; descale
+        psraw   xmm4,(PASS1_BITS+3)     ; descale
+        psubw   xmm2,xmm6               ; xmm2=tmp5
+
+        packsswb  xmm1,xmm4     ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        packsswb  xmm3,xmm7     ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp2
+        movdqa  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp3
+
+        paddw   xmm0,xmm2               ; xmm0=tmp4
+        movdqa  xmm4,xmm5
+        movdqa  xmm7,xmm6
+        paddw   xmm5,xmm2               ; xmm5=data2=(02 12 22 32 42 52 62 72)
+        paddw   xmm6,xmm0               ; xmm6=data4=(04 14 24 34 44 54 64 74)
+        psraw   xmm5,(PASS1_BITS+3)     ; descale
+        psraw   xmm6,(PASS1_BITS+3)     ; descale
+        psubw   xmm4,xmm2               ; xmm4=data5=(05 15 25 35 45 55 65 75)
+        psubw   xmm7,xmm0               ; xmm7=data3=(03 13 23 33 43 53 63 73)
+        psraw   xmm4,(PASS1_BITS+3)     ; descale
+        psraw   xmm7,(PASS1_BITS+3)     ; descale
+
+        movdqa    xmm2,[rel PB_CENTERJSAMP]     ; xmm2=[rel PB_CENTERJSAMP]
+
+        packsswb  xmm5,xmm6     ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
+        packsswb  xmm7,xmm4     ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
+
+        paddb     xmm1,xmm2
+        paddb     xmm3,xmm2
+        paddb     xmm5,xmm2
+        paddb     xmm7,xmm2
+
+        movdqa    xmm0,xmm1     ; transpose coefficients(phase 1)
+        punpcklbw xmm1,xmm3     ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
+        punpckhbw xmm0,xmm3     ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
+        movdqa    xmm6,xmm5     ; transpose coefficients(phase 1)
+        punpcklbw xmm5,xmm7     ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
+        punpckhbw xmm6,xmm7     ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
+
+        movdqa    xmm4,xmm1     ; transpose coefficients(phase 2)
+        punpcklwd xmm1,xmm5     ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+        punpckhwd xmm4,xmm5     ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
+        movdqa    xmm2,xmm6     ; transpose coefficients(phase 2)
+        punpcklwd xmm6,xmm0     ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+        punpckhwd xmm2,xmm0     ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
+
+        movdqa    xmm3,xmm1     ; transpose coefficients(phase 3)
+        punpckldq xmm1,xmm6     ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+        punpckhdq xmm3,xmm6     ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+        movdqa    xmm7,xmm4     ; transpose coefficients(phase 3)
+        punpckldq xmm4,xmm2     ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
+        punpckhdq xmm7,xmm2     ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
+
+        pshufd  xmm5,xmm1,0x4E  ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+        pshufd  xmm0,xmm3,0x4E  ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+        pshufd  xmm6,xmm4,0x4E  ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
+        pshufd  xmm2,xmm7,0x4E  ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
+
+        mov     rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
+        mov     rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
+
+        mov     rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
+        mov     rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
+
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctfst-sse2.asm b/simd/jidctfst-sse2.asm
new file mode 100644
index 0000000..4658be3
--- /dev/null
+++ b/simd/jidctfst-sse2.asm
@@ -0,0 +1,502 @@
+;
+; jidctfst.asm - fast integer IDCT (SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a fast, not so accurate integer implementation of
+; the inverse DCT (Discrete Cosine Transform). The following code is
+; based directly on the IJG's original jidctfst.c; see the jidctfst.c
+; for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      8       ; 14 is also OK.
+%define PASS1_BITS      2
+
+%if IFAST_SCALE_BITS != PASS1_BITS
+%error "'IFAST_SCALE_BITS' must be equal to 'PASS1_BITS'."
+%endif
+
+%if CONST_BITS == 8
+F_1_082 equ     277             ; FIX(1.082392200)
+F_1_414 equ     362             ; FIX(1.414213562)
+F_1_847 equ     473             ; FIX(1.847759065)
+F_2_613 equ     669             ; FIX(2.613125930)
+F_1_613 equ     (F_2_613 - 256) ; FIX(2.613125930) - FIX(1)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_1_082 equ     DESCALE(1162209775,30-CONST_BITS)       ; FIX(1.082392200)
+F_1_414 equ     DESCALE(1518500249,30-CONST_BITS)       ; FIX(1.414213562)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_613 equ     DESCALE(2805822602,30-CONST_BITS)       ; FIX(2.613125930)
+F_1_613 equ     (F_2_613 - (1 << CONST_BITS))   ; FIX(2.613125930) - FIX(1)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
+; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
+
+%define PRE_MULTIPLY_SCALE_BITS   2
+%define CONST_SHIFT     (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
+
+        alignz  16
+        global  EXTN(jconst_idct_ifast_sse2)
+
+EXTN(jconst_idct_ifast_sse2):
+
+PW_F1414        times 8 dw  F_1_414 << CONST_SHIFT
+PW_F1847        times 8 dw  F_1_847 << CONST_SHIFT
+PW_MF1613       times 8 dw -F_1_613 << CONST_SHIFT
+PW_F1082        times 8 dw  F_1_082 << CONST_SHIFT
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jsimd_idct_ifast_sse2 (void * dct_table, JCOEFPTR coef_block,
+;                       JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define dct_table(b)    (b)+8           ; jpeg_component_info * compptr
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+
+        align   16
+        global  EXTN(jsimd_idct_ifast_sse2)
+
+EXTN(jsimd_idct_ifast_sse2):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process columns from input.
+
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+
+%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     xmm1,xmm0
+        packsswb xmm1,xmm1
+        packsswb xmm1,xmm1
+        movd    eax,xmm1
+        test    eax,eax
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        movdqa    xmm7,xmm0             ; xmm0=in0=(00 01 02 03 04 05 06 07)
+        punpcklwd xmm0,xmm0             ; xmm0=(00 00 01 01 02 02 03 03)
+        punpckhwd xmm7,xmm7             ; xmm7=(04 04 05 05 06 06 07 07)
+
+        pshufd  xmm6,xmm0,0x00          ; xmm6=col0=(00 00 00 00 00 00 00 00)
+        pshufd  xmm2,xmm0,0x55          ; xmm2=col1=(01 01 01 01 01 01 01 01)
+        pshufd  xmm5,xmm0,0xAA          ; xmm5=col2=(02 02 02 02 02 02 02 02)
+        pshufd  xmm0,xmm0,0xFF          ; xmm0=col3=(03 03 03 03 03 03 03 03)
+        pshufd  xmm1,xmm7,0x00          ; xmm1=col4=(04 04 04 04 04 04 04 04)
+        pshufd  xmm4,xmm7,0x55          ; xmm4=col5=(05 05 05 05 05 05 05 05)
+        pshufd  xmm3,xmm7,0xAA          ; xmm3=col6=(06 06 06 06 06 06 06 06)
+        pshufd  xmm7,xmm7,0xFF          ; xmm7=col7=(07 07 07 07 07 07 07 07)
+
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=col1
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=col3
+        jmp     near .column_end
+        alignx  16,7
+%endif
+.columnDCT:
+
+        ; -- Even part
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+        movdqa  xmm4,xmm0
+        movdqa  xmm5,xmm1
+        psubw   xmm0,xmm2               ; xmm0=tmp11
+        psubw   xmm1,xmm3
+        paddw   xmm4,xmm2               ; xmm4=tmp10
+        paddw   xmm5,xmm3               ; xmm5=tmp13
+
+        psllw   xmm1,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm1,[GOTOFF(ebx,PW_F1414)]
+        psubw   xmm1,xmm5               ; xmm1=tmp12
+
+        movdqa  xmm6,xmm4
+        movdqa  xmm7,xmm0
+        psubw   xmm4,xmm5               ; xmm4=tmp3
+        psubw   xmm0,xmm1               ; xmm0=tmp2
+        paddw   xmm6,xmm5               ; xmm6=tmp0
+        paddw   xmm7,xmm1               ; xmm7=tmp1
+
+        movdqa  XMMWORD [wk(1)], xmm4   ; wk(1)=tmp3
+        movdqa  XMMWORD [wk(0)], xmm0   ; wk(0)=tmp2
+
+        ; -- Odd part
+
+        movdqa  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm5, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_IFAST_MULT_TYPE)]
+
+        movdqa  xmm4,xmm2
+        movdqa  xmm0,xmm5
+        psubw   xmm2,xmm1               ; xmm2=z12
+        psubw   xmm5,xmm3               ; xmm5=z10
+        paddw   xmm4,xmm1               ; xmm4=z11
+        paddw   xmm0,xmm3               ; xmm0=z13
+
+        movdqa  xmm1,xmm5               ; xmm1=z10(unscaled)
+        psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+
+        movdqa  xmm3,xmm4
+        psubw   xmm4,xmm0
+        paddw   xmm3,xmm0               ; xmm3=tmp7
+
+        psllw   xmm4,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm4,[GOTOFF(ebx,PW_F1414)]     ; xmm4=tmp11
+
+        ; To avoid overflow...
+        ;
+        ; (Original)
+        ; tmp12 = -2.613125930 * z10 + z5;
+        ;
+        ; (This implementation)
+        ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+        ;       = -1.613125930 * z10 - z10 + z5;
+
+        movdqa  xmm0,xmm5
+        paddw   xmm5,xmm2
+        pmulhw  xmm5,[GOTOFF(ebx,PW_F1847)]     ; xmm5=z5
+        pmulhw  xmm0,[GOTOFF(ebx,PW_MF1613)]
+        pmulhw  xmm2,[GOTOFF(ebx,PW_F1082)]
+        psubw   xmm0,xmm1
+        psubw   xmm2,xmm5               ; xmm2=tmp10
+        paddw   xmm0,xmm5               ; xmm0=tmp12
+
+        ; -- Final output stage
+
+        psubw   xmm0,xmm3               ; xmm0=tmp6
+        movdqa  xmm1,xmm6
+        movdqa  xmm5,xmm7
+        paddw   xmm6,xmm3               ; xmm6=data0=(00 01 02 03 04 05 06 07)
+        paddw   xmm7,xmm0               ; xmm7=data1=(10 11 12 13 14 15 16 17)
+        psubw   xmm1,xmm3               ; xmm1=data7=(70 71 72 73 74 75 76 77)
+        psubw   xmm5,xmm0               ; xmm5=data6=(60 61 62 63 64 65 66 67)
+        psubw   xmm4,xmm0               ; xmm4=tmp5
+
+        movdqa    xmm3,xmm6             ; transpose coefficients(phase 1)
+        punpcklwd xmm6,xmm7             ; xmm6=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm3,xmm7             ; xmm3=(04 14 05 15 06 16 07 17)
+        movdqa    xmm0,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm1             ; xmm5=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm0,xmm1             ; xmm0=(64 74 65 75 66 76 67 77)
+
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp2
+        movdqa  xmm1, XMMWORD [wk(1)]   ; xmm1=tmp3
+
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(60 70 61 71 62 72 63 73)
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(64 74 65 75 66 76 67 77)
+
+        paddw   xmm2,xmm4               ; xmm2=tmp4
+        movdqa  xmm5,xmm7
+        movdqa  xmm0,xmm1
+        paddw   xmm7,xmm4               ; xmm7=data2=(20 21 22 23 24 25 26 27)
+        paddw   xmm1,xmm2               ; xmm1=data4=(40 41 42 43 44 45 46 47)
+        psubw   xmm5,xmm4               ; xmm5=data5=(50 51 52 53 54 55 56 57)
+        psubw   xmm0,xmm2               ; xmm0=data3=(30 31 32 33 34 35 36 37)
+
+        movdqa    xmm4,xmm7             ; transpose coefficients(phase 1)
+        punpcklwd xmm7,xmm0             ; xmm7=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm4,xmm0             ; xmm4=(24 34 25 35 26 36 27 37)
+        movdqa    xmm2,xmm1             ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm5             ; xmm1=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm2,xmm5             ; xmm2=(44 54 45 55 46 56 47 57)
+
+        movdqa    xmm0,xmm3             ; transpose coefficients(phase 2)
+        punpckldq xmm3,xmm4             ; xmm3=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm0,xmm4             ; xmm0=(06 16 26 36 07 17 27 37)
+        movdqa    xmm5,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm7             ; xmm6=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm5,xmm7             ; xmm5=(02 12 22 32 03 13 23 33)
+
+        movdqa  xmm4, XMMWORD [wk(0)]   ; xmm4=(60 70 61 71 62 72 63 73)
+        movdqa  xmm7, XMMWORD [wk(1)]   ; xmm7=(64 74 65 75 66 76 67 77)
+
+        movdqa  XMMWORD [wk(0)], xmm3   ; wk(0)=(04 14 24 34 05 15 25 35)
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=(06 16 26 36 07 17 27 37)
+
+        movdqa    xmm3,xmm1             ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm4             ; xmm1=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm3,xmm4             ; xmm3=(42 52 62 72 43 53 63 73)
+        movdqa    xmm0,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm7             ; xmm2=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm0,xmm7             ; xmm0=(46 56 66 76 47 57 67 77)
+
+        movdqa     xmm4,xmm6            ; transpose coefficients(phase 3)
+        punpcklqdq xmm6,xmm1            ; xmm6=col0=(00 10 20 30 40 50 60 70)
+        punpckhqdq xmm4,xmm1            ; xmm4=col1=(01 11 21 31 41 51 61 71)
+        movdqa     xmm7,xmm5            ; transpose coefficients(phase 3)
+        punpcklqdq xmm5,xmm3            ; xmm5=col2=(02 12 22 32 42 52 62 72)
+        punpckhqdq xmm7,xmm3            ; xmm7=col3=(03 13 23 33 43 53 63 73)
+
+        movdqa  xmm1, XMMWORD [wk(0)]   ; xmm1=(04 14 24 34 05 15 25 35)
+        movdqa  xmm3, XMMWORD [wk(1)]   ; xmm3=(06 16 26 36 07 17 27 37)
+
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=col1
+        movdqa  XMMWORD [wk(1)], xmm7   ; wk(1)=col3
+
+        movdqa     xmm4,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm2            ; xmm1=col4=(04 14 24 34 44 54 64 74)
+        punpckhqdq xmm4,xmm2            ; xmm4=col5=(05 15 25 35 45 55 65 75)
+        movdqa     xmm7,xmm3            ; transpose coefficients(phase 3)
+        punpcklqdq xmm3,xmm0            ; xmm3=col6=(06 16 26 36 46 56 66 76)
+        punpckhqdq xmm7,xmm0            ; xmm7=col7=(07 17 27 37 47 57 67 77)
+.column_end:
+
+        ; -- Prefetch the next coefficient block
+
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+
+        ; ---- Pass 2: process rows from work array, store into output array.
+
+        mov     eax, [original_ebp]
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+
+        ; -- Even part
+
+        ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6
+
+        movdqa  xmm2,xmm6
+        movdqa  xmm0,xmm5
+        psubw   xmm6,xmm1               ; xmm6=tmp11
+        psubw   xmm5,xmm3
+        paddw   xmm2,xmm1               ; xmm2=tmp10
+        paddw   xmm0,xmm3               ; xmm0=tmp13
+
+        psllw   xmm5,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm5,[GOTOFF(ebx,PW_F1414)]
+        psubw   xmm5,xmm0               ; xmm5=tmp12
+
+        movdqa  xmm1,xmm2
+        movdqa  xmm3,xmm6
+        psubw   xmm2,xmm0               ; xmm2=tmp3
+        psubw   xmm6,xmm5               ; xmm6=tmp2
+        paddw   xmm1,xmm0               ; xmm1=tmp0
+        paddw   xmm3,xmm5               ; xmm3=tmp1
+
+        movdqa  xmm0, XMMWORD [wk(0)]   ; xmm0=col1
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=col3
+
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=tmp3
+        movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=tmp2
+
+        ; -- Odd part
+
+        ; xmm0=col1, xmm5=col3, xmm4=col5, xmm7=col7
+
+        movdqa  xmm2,xmm0
+        movdqa  xmm6,xmm4
+        psubw   xmm0,xmm7               ; xmm0=z12
+        psubw   xmm4,xmm5               ; xmm4=z10
+        paddw   xmm2,xmm7               ; xmm2=z11
+        paddw   xmm6,xmm5               ; xmm6=z13
+
+        movdqa  xmm7,xmm4               ; xmm7=z10(unscaled)
+        psllw   xmm0,PRE_MULTIPLY_SCALE_BITS
+        psllw   xmm4,PRE_MULTIPLY_SCALE_BITS
+
+        movdqa  xmm5,xmm2
+        psubw   xmm2,xmm6
+        paddw   xmm5,xmm6               ; xmm5=tmp7
+
+        psllw   xmm2,PRE_MULTIPLY_SCALE_BITS
+        pmulhw  xmm2,[GOTOFF(ebx,PW_F1414)]     ; xmm2=tmp11
+
+        ; To avoid overflow...
+        ;
+        ; (Original)
+        ; tmp12 = -2.613125930 * z10 + z5;
+        ;
+        ; (This implementation)
+        ; tmp12 = (-1.613125930 - 1) * z10 + z5;
+        ;       = -1.613125930 * z10 - z10 + z5;
+
+        movdqa  xmm6,xmm4
+        paddw   xmm4,xmm0
+        pmulhw  xmm4,[GOTOFF(ebx,PW_F1847)]     ; xmm4=z5
+        pmulhw  xmm6,[GOTOFF(ebx,PW_MF1613)]
+        pmulhw  xmm0,[GOTOFF(ebx,PW_F1082)]
+        psubw   xmm6,xmm7
+        psubw   xmm0,xmm4               ; xmm0=tmp10
+        paddw   xmm6,xmm4               ; xmm6=tmp12
+
+        ; -- Final output stage
+
+        psubw   xmm6,xmm5               ; xmm6=tmp6
+        movdqa  xmm7,xmm1
+        movdqa  xmm4,xmm3
+        paddw   xmm1,xmm5               ; xmm1=data0=(00 10 20 30 40 50 60 70)
+        paddw   xmm3,xmm6               ; xmm3=data1=(01 11 21 31 41 51 61 71)
+        psraw   xmm1,(PASS1_BITS+3)     ; descale
+        psraw   xmm3,(PASS1_BITS+3)     ; descale
+        psubw   xmm7,xmm5               ; xmm7=data7=(07 17 27 37 47 57 67 77)
+        psubw   xmm4,xmm6               ; xmm4=data6=(06 16 26 36 46 56 66 76)
+        psraw   xmm7,(PASS1_BITS+3)     ; descale
+        psraw   xmm4,(PASS1_BITS+3)     ; descale
+        psubw   xmm2,xmm6               ; xmm2=tmp5
+
+        packsswb  xmm1,xmm4     ; xmm1=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        packsswb  xmm3,xmm7     ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+
+        movdqa  xmm5, XMMWORD [wk(1)]   ; xmm5=tmp2
+        movdqa  xmm6, XMMWORD [wk(0)]   ; xmm6=tmp3
+
+        paddw   xmm0,xmm2               ; xmm0=tmp4
+        movdqa  xmm4,xmm5
+        movdqa  xmm7,xmm6
+        paddw   xmm5,xmm2               ; xmm5=data2=(02 12 22 32 42 52 62 72)
+        paddw   xmm6,xmm0               ; xmm6=data4=(04 14 24 34 44 54 64 74)
+        psraw   xmm5,(PASS1_BITS+3)     ; descale
+        psraw   xmm6,(PASS1_BITS+3)     ; descale
+        psubw   xmm4,xmm2               ; xmm4=data5=(05 15 25 35 45 55 65 75)
+        psubw   xmm7,xmm0               ; xmm7=data3=(03 13 23 33 43 53 63 73)
+        psraw   xmm4,(PASS1_BITS+3)     ; descale
+        psraw   xmm7,(PASS1_BITS+3)     ; descale
+
+        movdqa    xmm2,[GOTOFF(ebx,PB_CENTERJSAMP)]     ; xmm2=[PB_CENTERJSAMP]
+
+        packsswb  xmm5,xmm6     ; xmm5=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
+        packsswb  xmm7,xmm4     ; xmm7=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
+
+        paddb     xmm1,xmm2
+        paddb     xmm3,xmm2
+        paddb     xmm5,xmm2
+        paddb     xmm7,xmm2
+
+        movdqa    xmm0,xmm1     ; transpose coefficients(phase 1)
+        punpcklbw xmm1,xmm3     ; xmm1=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
+        punpckhbw xmm0,xmm3     ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
+        movdqa    xmm6,xmm5     ; transpose coefficients(phase 1)
+        punpcklbw xmm5,xmm7     ; xmm5=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
+        punpckhbw xmm6,xmm7     ; xmm6=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
+
+        movdqa    xmm4,xmm1     ; transpose coefficients(phase 2)
+        punpcklwd xmm1,xmm5     ; xmm1=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+        punpckhwd xmm4,xmm5     ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
+        movdqa    xmm2,xmm6     ; transpose coefficients(phase 2)
+        punpcklwd xmm6,xmm0     ; xmm6=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+        punpckhwd xmm2,xmm0     ; xmm2=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
+
+        movdqa    xmm3,xmm1     ; transpose coefficients(phase 3)
+        punpckldq xmm1,xmm6     ; xmm1=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+        punpckhdq xmm3,xmm6     ; xmm3=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+        movdqa    xmm7,xmm4     ; transpose coefficients(phase 3)
+        punpckldq xmm4,xmm2     ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
+        punpckhdq xmm7,xmm2     ; xmm7=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
+
+        pshufd  xmm5,xmm1,0x4E  ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+        pshufd  xmm0,xmm3,0x4E  ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+        pshufd  xmm6,xmm4,0x4E  ; xmm6=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
+        pshufd  xmm2,xmm7,0x4E  ; xmm2=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
+
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm1
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
+        mov     edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm7
+
+        mov     edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0
+        mov     edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm2
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctint-mmx.asm b/simd/jidctint-mmx.asm
new file mode 100644
index 0000000..7e25b82
--- /dev/null
+++ b/simd/jidctint-mmx.asm
@@ -0,0 +1,852 @@
+;
+; jidctint.asm - accurate integer IDCT (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a slow-but-accurate integer implementation of the
+; inverse DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jidctint.c; see the jidctint.c for
+; more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      13
+%define PASS1_BITS      2
+
+%define DESCALE_P1      (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2      (CONST_BITS+PASS1_BITS+3)
+
+%if CONST_BITS == 13
+F_0_298 equ      2446           ; FIX(0.298631336)
+F_0_390 equ      3196           ; FIX(0.390180644)
+F_0_541 equ      4433           ; FIX(0.541196100)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_175 equ      9633           ; FIX(1.175875602)
+F_1_501 equ     12299           ; FIX(1.501321110)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_1_961 equ     16069           ; FIX(1.961570560)
+F_2_053 equ     16819           ; FIX(2.053119869)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_072 equ     25172           ; FIX(3.072711026)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_298 equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390 equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175 equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501 equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961 equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053 equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072 equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_idct_islow_mmx)
+
+EXTN(jconst_idct_islow_mmx):
+
+PW_F130_F054    times 2 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130   times 2 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117   times 2 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078    times 2 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089  times 2 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060   times 2 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256  times 2 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050   times 2 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1   times 2 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2   times 2 dd  1 << (DESCALE_P2-1)
+PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jsimd_idct_islow_mmx (void * dct_table, JCOEFPTR coef_block,
+;                       JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define dct_table(b)    (b)+8           ; jpeg_component_info * compptr
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          12
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_JCOEF
+                                        ; JCOEF workspace[DCTSIZE2]
+
+        align   16
+        global  EXTN(jsimd_idct_islow_mmx)
+
+EXTN(jsimd_idct_islow_mmx):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [workspace]
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process columns from input, store into work array.
+
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+        lea     edi, [workspace]                        ; JCOEF * wsptr
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     short .columnDCT
+
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     mm1,mm0
+        packsswb mm1,mm1
+        movd    eax,mm1
+        test    eax,eax
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        psllw   mm0,PASS1_BITS
+
+        movq      mm2,mm0               ; mm0=in0=(00 01 02 03)
+        punpcklwd mm0,mm0               ; mm0=(00 00 01 01)
+        punpckhwd mm2,mm2               ; mm2=(02 02 03 03)
+
+        movq      mm1,mm0
+        punpckldq mm0,mm0               ; mm0=(00 00 00 00)
+        punpckhdq mm1,mm1               ; mm1=(01 01 01 01)
+        movq      mm3,mm2
+        punpckldq mm2,mm2               ; mm2=(02 02 02 02)
+        punpckhdq mm3,mm3               ; mm3=(03 03 03 03)
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
+        movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
+        jmp     near .nextcolumn
+        alignx  16,7
+%endif
+.columnDCT:
+
+        ; -- Even part
+
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm1, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm2, MMWORD [MMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm3, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        ; (Original)
+        ; z1 = (z2 + z3) * 0.541196100;
+        ; tmp2 = z1 + z3 * -1.847759065;
+        ; tmp3 = z1 + z2 * 0.765366865;
+        ;
+        ; (This implementation)
+        ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+        ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+
+        movq      mm4,mm1               ; mm1=in2=z2
+        movq      mm5,mm1
+        punpcklwd mm4,mm3               ; mm3=in6=z3
+        punpckhwd mm5,mm3
+        movq      mm1,mm4
+        movq      mm3,mm5
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]        ; mm4=tmp3L
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F130_F054)]        ; mm5=tmp3H
+        pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]       ; mm1=tmp2L
+        pmaddwd   mm3,[GOTOFF(ebx,PW_F054_MF130)]       ; mm3=tmp2H
+
+        movq      mm6,mm0
+        paddw     mm0,mm2               ; mm0=in0+in4
+        psubw     mm6,mm2               ; mm6=in0-in4
+
+        pxor      mm7,mm7
+        pxor      mm2,mm2
+        punpcklwd mm7,mm0               ; mm7=tmp0L
+        punpckhwd mm2,mm0               ; mm2=tmp0H
+        psrad     mm7,(16-CONST_BITS)   ; psrad mm7,16 & pslld mm7,CONST_BITS
+        psrad     mm2,(16-CONST_BITS)   ; psrad mm2,16 & pslld mm2,CONST_BITS
+
+        movq    mm0,mm7
+        paddd   mm7,mm4                 ; mm7=tmp10L
+        psubd   mm0,mm4                 ; mm0=tmp13L
+        movq    mm4,mm2
+        paddd   mm2,mm5                 ; mm2=tmp10H
+        psubd   mm4,mm5                 ; mm4=tmp13H
+
+        movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp10L
+        movq    MMWORD [wk(1)], mm2     ; wk(1)=tmp10H
+        movq    MMWORD [wk(2)], mm0     ; wk(2)=tmp13L
+        movq    MMWORD [wk(3)], mm4     ; wk(3)=tmp13H
+
+        pxor      mm5,mm5
+        pxor      mm7,mm7
+        punpcklwd mm5,mm6               ; mm5=tmp1L
+        punpckhwd mm7,mm6               ; mm7=tmp1H
+        psrad     mm5,(16-CONST_BITS)   ; psrad mm5,16 & pslld mm5,CONST_BITS
+        psrad     mm7,(16-CONST_BITS)   ; psrad mm7,16 & pslld mm7,CONST_BITS
+
+        movq    mm2,mm5
+        paddd   mm5,mm1                 ; mm5=tmp11L
+        psubd   mm2,mm1                 ; mm2=tmp12L
+        movq    mm0,mm7
+        paddd   mm7,mm3                 ; mm7=tmp11H
+        psubd   mm0,mm3                 ; mm0=tmp12H
+
+        movq    MMWORD [wk(4)], mm5     ; wk(4)=tmp11L
+        movq    MMWORD [wk(5)], mm7     ; wk(5)=tmp11H
+        movq    MMWORD [wk(6)], mm2     ; wk(6)=tmp12L
+        movq    MMWORD [wk(7)], mm0     ; wk(7)=tmp12H
+
+        ; -- Odd part
+
+        movq    mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm4, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm6, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm1, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        movq    mm5,mm6
+        movq    mm7,mm4
+        paddw   mm5,mm3                 ; mm5=z3
+        paddw   mm7,mm1                 ; mm7=z4
+
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+        movq      mm2,mm5
+        movq      mm0,mm5
+        punpcklwd mm2,mm7
+        punpckhwd mm0,mm7
+        movq      mm5,mm2
+        movq      mm7,mm0
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF078_F117)]       ; mm2=z3L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_MF078_F117)]       ; mm0=z3H
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F117_F078)]        ; mm5=z4L
+        pmaddwd   mm7,[GOTOFF(ebx,PW_F117_F078)]        ; mm7=z4H
+
+        movq    MMWORD [wk(10)], mm2    ; wk(10)=z3L
+        movq    MMWORD [wk(11)], mm0    ; wk(11)=z3H
+
+        ; (Original)
+        ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+        ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+        ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+        ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+        ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+        ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+        ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+        ; tmp0 += z3;  tmp1 += z4;
+        ; tmp2 += z3;  tmp3 += z4;
+
+        movq      mm2,mm3
+        movq      mm0,mm3
+        punpcklwd mm2,mm4
+        punpckhwd mm0,mm4
+        movq      mm3,mm2
+        movq      mm4,mm0
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm2=tmp0L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm0=tmp0H
+        pmaddwd   mm3,[GOTOFF(ebx,PW_MF089_F060)]       ; mm3=tmp3L
+        pmaddwd   mm4,[GOTOFF(ebx,PW_MF089_F060)]       ; mm4=tmp3H
+
+        paddd   mm2, MMWORD [wk(10)]    ; mm2=tmp0L
+        paddd   mm0, MMWORD [wk(11)]    ; mm0=tmp0H
+        paddd   mm3,mm5                 ; mm3=tmp3L
+        paddd   mm4,mm7                 ; mm4=tmp3H
+
+        movq    MMWORD [wk(8)], mm2     ; wk(8)=tmp0L
+        movq    MMWORD [wk(9)], mm0     ; wk(9)=tmp0H
+
+        movq      mm2,mm1
+        movq      mm0,mm1
+        punpcklwd mm2,mm6
+        punpckhwd mm0,mm6
+        movq      mm1,mm2
+        movq      mm6,mm0
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm2=tmp1L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm0=tmp1H
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF256_F050)]       ; mm1=tmp2L
+        pmaddwd   mm6,[GOTOFF(ebx,PW_MF256_F050)]       ; mm6=tmp2H
+
+        paddd   mm2,mm5                 ; mm2=tmp1L
+        paddd   mm0,mm7                 ; mm0=tmp1H
+        paddd   mm1, MMWORD [wk(10)]    ; mm1=tmp2L
+        paddd   mm6, MMWORD [wk(11)]    ; mm6=tmp2H
+
+        movq    MMWORD [wk(10)], mm2    ; wk(10)=tmp1L
+        movq    MMWORD [wk(11)], mm0    ; wk(11)=tmp1H
+
+        ; -- Final output stage
+
+        movq    mm5, MMWORD [wk(0)]     ; mm5=tmp10L
+        movq    mm7, MMWORD [wk(1)]     ; mm7=tmp10H
+
+        movq    mm2,mm5
+        movq    mm0,mm7
+        paddd   mm5,mm3                 ; mm5=data0L
+        paddd   mm7,mm4                 ; mm7=data0H
+        psubd   mm2,mm3                 ; mm2=data7L
+        psubd   mm0,mm4                 ; mm0=data7H
+
+        movq    mm3,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm3=[PD_DESCALE_P1]
+
+        paddd   mm5,mm3
+        paddd   mm7,mm3
+        psrad   mm5,DESCALE_P1
+        psrad   mm7,DESCALE_P1
+        paddd   mm2,mm3
+        paddd   mm0,mm3
+        psrad   mm2,DESCALE_P1
+        psrad   mm0,DESCALE_P1
+
+        packssdw  mm5,mm7               ; mm5=data0=(00 01 02 03)
+        packssdw  mm2,mm0               ; mm2=data7=(70 71 72 73)
+
+        movq    mm4, MMWORD [wk(4)]     ; mm4=tmp11L
+        movq    mm3, MMWORD [wk(5)]     ; mm3=tmp11H
+
+        movq    mm7,mm4
+        movq    mm0,mm3
+        paddd   mm4,mm1                 ; mm4=data1L
+        paddd   mm3,mm6                 ; mm3=data1H
+        psubd   mm7,mm1                 ; mm7=data6L
+        psubd   mm0,mm6                 ; mm0=data6H
+
+        movq    mm1,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm1=[PD_DESCALE_P1]
+
+        paddd   mm4,mm1
+        paddd   mm3,mm1
+        psrad   mm4,DESCALE_P1
+        psrad   mm3,DESCALE_P1
+        paddd   mm7,mm1
+        paddd   mm0,mm1
+        psrad   mm7,DESCALE_P1
+        psrad   mm0,DESCALE_P1
+
+        packssdw  mm4,mm3               ; mm4=data1=(10 11 12 13)
+        packssdw  mm7,mm0               ; mm7=data6=(60 61 62 63)
+
+        movq      mm6,mm5               ; transpose coefficients(phase 1)
+        punpcklwd mm5,mm4               ; mm5=(00 10 01 11)
+        punpckhwd mm6,mm4               ; mm6=(02 12 03 13)
+        movq      mm1,mm7               ; transpose coefficients(phase 1)
+        punpcklwd mm7,mm2               ; mm7=(60 70 61 71)
+        punpckhwd mm1,mm2               ; mm1=(62 72 63 73)
+
+        movq    mm3, MMWORD [wk(6)]     ; mm3=tmp12L
+        movq    mm0, MMWORD [wk(7)]     ; mm0=tmp12H
+        movq    mm4, MMWORD [wk(10)]    ; mm4=tmp1L
+        movq    mm2, MMWORD [wk(11)]    ; mm2=tmp1H
+
+        movq    MMWORD [wk(0)], mm5     ; wk(0)=(00 10 01 11)
+        movq    MMWORD [wk(1)], mm6     ; wk(1)=(02 12 03 13)
+        movq    MMWORD [wk(4)], mm7     ; wk(4)=(60 70 61 71)
+        movq    MMWORD [wk(5)], mm1     ; wk(5)=(62 72 63 73)
+
+        movq    mm5,mm3
+        movq    mm6,mm0
+        paddd   mm3,mm4                 ; mm3=data2L
+        paddd   mm0,mm2                 ; mm0=data2H
+        psubd   mm5,mm4                 ; mm5=data5L
+        psubd   mm6,mm2                 ; mm6=data5H
+
+        movq    mm7,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm7=[PD_DESCALE_P1]
+
+        paddd   mm3,mm7
+        paddd   mm0,mm7
+        psrad   mm3,DESCALE_P1
+        psrad   mm0,DESCALE_P1
+        paddd   mm5,mm7
+        paddd   mm6,mm7
+        psrad   mm5,DESCALE_P1
+        psrad   mm6,DESCALE_P1
+
+        packssdw  mm3,mm0               ; mm3=data2=(20 21 22 23)
+        packssdw  mm5,mm6               ; mm5=data5=(50 51 52 53)
+
+        movq    mm1, MMWORD [wk(2)]     ; mm1=tmp13L
+        movq    mm4, MMWORD [wk(3)]     ; mm4=tmp13H
+        movq    mm2, MMWORD [wk(8)]     ; mm2=tmp0L
+        movq    mm7, MMWORD [wk(9)]     ; mm7=tmp0H
+
+        movq    mm0,mm1
+        movq    mm6,mm4
+        paddd   mm1,mm2                 ; mm1=data3L
+        paddd   mm4,mm7                 ; mm4=data3H
+        psubd   mm0,mm2                 ; mm0=data4L
+        psubd   mm6,mm7                 ; mm6=data4H
+
+        movq    mm2,[GOTOFF(ebx,PD_DESCALE_P1)] ; mm2=[PD_DESCALE_P1]
+
+        paddd   mm1,mm2
+        paddd   mm4,mm2
+        psrad   mm1,DESCALE_P1
+        psrad   mm4,DESCALE_P1
+        paddd   mm0,mm2
+        paddd   mm6,mm2
+        psrad   mm0,DESCALE_P1
+        psrad   mm6,DESCALE_P1
+
+        packssdw  mm1,mm4               ; mm1=data3=(30 31 32 33)
+        packssdw  mm0,mm6               ; mm0=data4=(40 41 42 43)
+
+        movq    mm7, MMWORD [wk(0)]     ; mm7=(00 10 01 11)
+        movq    mm2, MMWORD [wk(1)]     ; mm2=(02 12 03 13)
+
+        movq      mm4,mm3               ; transpose coefficients(phase 1)
+        punpcklwd mm3,mm1               ; mm3=(20 30 21 31)
+        punpckhwd mm4,mm1               ; mm4=(22 32 23 33)
+        movq      mm6,mm0               ; transpose coefficients(phase 1)
+        punpcklwd mm0,mm5               ; mm0=(40 50 41 51)
+        punpckhwd mm6,mm5               ; mm6=(42 52 43 53)
+
+        movq      mm1,mm7               ; transpose coefficients(phase 2)
+        punpckldq mm7,mm3               ; mm7=(00 10 20 30)
+        punpckhdq mm1,mm3               ; mm1=(01 11 21 31)
+        movq      mm5,mm2               ; transpose coefficients(phase 2)
+        punpckldq mm2,mm4               ; mm2=(02 12 22 32)
+        punpckhdq mm5,mm4               ; mm5=(03 13 23 33)
+
+        movq    mm3, MMWORD [wk(4)]     ; mm3=(60 70 61 71)
+        movq    mm4, MMWORD [wk(5)]     ; mm4=(62 72 63 73)
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm7
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm5
+
+        movq      mm7,mm0               ; transpose coefficients(phase 2)
+        punpckldq mm0,mm3               ; mm0=(40 50 60 70)
+        punpckhdq mm7,mm3               ; mm7=(41 51 61 71)
+        movq      mm1,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm4               ; mm6=(42 52 62 72)
+        punpckhdq mm1,mm4               ; mm1=(43 53 63 73)
+
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm7
+        movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm6
+        movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm1
+
+.nextcolumn:
+        add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+        add     edx, byte 4*SIZEOF_ISLOW_MULT_TYPE      ; quantptr
+        add     edi, byte 4*DCTSIZE*SIZEOF_JCOEF        ; wsptr
+        dec     ecx                                     ; ctr
+        jnz     near .columnloop
+
+        ; ---- Pass 2: process rows from work array, store into output array.
+
+        mov     eax, [original_ebp]
+        lea     esi, [workspace]                        ; JCOEF * wsptr
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
+.rowloop:
+
+        ; -- Even part
+
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq    mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+
+        ; (Original)
+        ; z1 = (z2 + z3) * 0.541196100;
+        ; tmp2 = z1 + z3 * -1.847759065;
+        ; tmp3 = z1 + z2 * 0.765366865;
+        ;
+        ; (This implementation)
+        ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+        ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+
+        movq      mm4,mm1               ; mm1=in2=z2
+        movq      mm5,mm1
+        punpcklwd mm4,mm3               ; mm3=in6=z3
+        punpckhwd mm5,mm3
+        movq      mm1,mm4
+        movq      mm3,mm5
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F130_F054)]        ; mm4=tmp3L
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F130_F054)]        ; mm5=tmp3H
+        pmaddwd   mm1,[GOTOFF(ebx,PW_F054_MF130)]       ; mm1=tmp2L
+        pmaddwd   mm3,[GOTOFF(ebx,PW_F054_MF130)]       ; mm3=tmp2H
+
+        movq      mm6,mm0
+        paddw     mm0,mm2               ; mm0=in0+in4
+        psubw     mm6,mm2               ; mm6=in0-in4
+
+        pxor      mm7,mm7
+        pxor      mm2,mm2
+        punpcklwd mm7,mm0               ; mm7=tmp0L
+        punpckhwd mm2,mm0               ; mm2=tmp0H
+        psrad     mm7,(16-CONST_BITS)   ; psrad mm7,16 & pslld mm7,CONST_BITS
+        psrad     mm2,(16-CONST_BITS)   ; psrad mm2,16 & pslld mm2,CONST_BITS
+
+        movq    mm0,mm7
+        paddd   mm7,mm4                 ; mm7=tmp10L
+        psubd   mm0,mm4                 ; mm0=tmp13L
+        movq    mm4,mm2
+        paddd   mm2,mm5                 ; mm2=tmp10H
+        psubd   mm4,mm5                 ; mm4=tmp13H
+
+        movq    MMWORD [wk(0)], mm7     ; wk(0)=tmp10L
+        movq    MMWORD [wk(1)], mm2     ; wk(1)=tmp10H
+        movq    MMWORD [wk(2)], mm0     ; wk(2)=tmp13L
+        movq    MMWORD [wk(3)], mm4     ; wk(3)=tmp13H
+
+        pxor      mm5,mm5
+        pxor      mm7,mm7
+        punpcklwd mm5,mm6               ; mm5=tmp1L
+        punpckhwd mm7,mm6               ; mm7=tmp1H
+        psrad     mm5,(16-CONST_BITS)   ; psrad mm5,16 & pslld mm5,CONST_BITS
+        psrad     mm7,(16-CONST_BITS)   ; psrad mm7,16 & pslld mm7,CONST_BITS
+
+        movq    mm2,mm5
+        paddd   mm5,mm1                 ; mm5=tmp11L
+        psubd   mm2,mm1                 ; mm2=tmp12L
+        movq    mm0,mm7
+        paddd   mm7,mm3                 ; mm7=tmp11H
+        psubd   mm0,mm3                 ; mm0=tmp12H
+
+        movq    MMWORD [wk(4)], mm5     ; wk(4)=tmp11L
+        movq    MMWORD [wk(5)], mm7     ; wk(5)=tmp11H
+        movq    MMWORD [wk(6)], mm2     ; wk(6)=tmp12L
+        movq    MMWORD [wk(7)], mm0     ; wk(7)=tmp12H
+
+        ; -- Odd part
+
+        movq    mm4, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm6, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+
+        movq    mm5,mm6
+        movq    mm7,mm4
+        paddw   mm5,mm3                 ; mm5=z3
+        paddw   mm7,mm1                 ; mm7=z4
+
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+        movq      mm2,mm5
+        movq      mm0,mm5
+        punpcklwd mm2,mm7
+        punpckhwd mm0,mm7
+        movq      mm5,mm2
+        movq      mm7,mm0
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF078_F117)]       ; mm2=z3L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_MF078_F117)]       ; mm0=z3H
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F117_F078)]        ; mm5=z4L
+        pmaddwd   mm7,[GOTOFF(ebx,PW_F117_F078)]        ; mm7=z4H
+
+        movq    MMWORD [wk(10)], mm2    ; wk(10)=z3L
+        movq    MMWORD [wk(11)], mm0    ; wk(11)=z3H
+
+        ; (Original)
+        ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+        ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+        ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+        ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+        ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+        ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+        ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+        ; tmp0 += z3;  tmp1 += z4;
+        ; tmp2 += z3;  tmp3 += z4;
+
+        movq      mm2,mm3
+        movq      mm0,mm3
+        punpcklwd mm2,mm4
+        punpckhwd mm0,mm4
+        movq      mm3,mm2
+        movq      mm4,mm0
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm2=tmp0L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_MF060_MF089)]      ; mm0=tmp0H
+        pmaddwd   mm3,[GOTOFF(ebx,PW_MF089_F060)]       ; mm3=tmp3L
+        pmaddwd   mm4,[GOTOFF(ebx,PW_MF089_F060)]       ; mm4=tmp3H
+
+        paddd   mm2, MMWORD [wk(10)]    ; mm2=tmp0L
+        paddd   mm0, MMWORD [wk(11)]    ; mm0=tmp0H
+        paddd   mm3,mm5                 ; mm3=tmp3L
+        paddd   mm4,mm7                 ; mm4=tmp3H
+
+        movq    MMWORD [wk(8)], mm2     ; wk(8)=tmp0L
+        movq    MMWORD [wk(9)], mm0     ; wk(9)=tmp0H
+
+        movq      mm2,mm1
+        movq      mm0,mm1
+        punpcklwd mm2,mm6
+        punpckhwd mm0,mm6
+        movq      mm1,mm2
+        movq      mm6,mm0
+        pmaddwd   mm2,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm2=tmp1L
+        pmaddwd   mm0,[GOTOFF(ebx,PW_MF050_MF256)]      ; mm0=tmp1H
+        pmaddwd   mm1,[GOTOFF(ebx,PW_MF256_F050)]       ; mm1=tmp2L
+        pmaddwd   mm6,[GOTOFF(ebx,PW_MF256_F050)]       ; mm6=tmp2H
+
+        paddd   mm2,mm5                 ; mm2=tmp1L
+        paddd   mm0,mm7                 ; mm0=tmp1H
+        paddd   mm1, MMWORD [wk(10)]    ; mm1=tmp2L
+        paddd   mm6, MMWORD [wk(11)]    ; mm6=tmp2H
+
+        movq    MMWORD [wk(10)], mm2    ; wk(10)=tmp1L
+        movq    MMWORD [wk(11)], mm0    ; wk(11)=tmp1H
+
+        ; -- Final output stage
+
+        movq    mm5, MMWORD [wk(0)]     ; mm5=tmp10L
+        movq    mm7, MMWORD [wk(1)]     ; mm7=tmp10H
+
+        movq    mm2,mm5
+        movq    mm0,mm7
+        paddd   mm5,mm3                 ; mm5=data0L
+        paddd   mm7,mm4                 ; mm7=data0H
+        psubd   mm2,mm3                 ; mm2=data7L
+        psubd   mm0,mm4                 ; mm0=data7H
+
+        movq    mm3,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm3=[PD_DESCALE_P2]
+
+        paddd   mm5,mm3
+        paddd   mm7,mm3
+        psrad   mm5,DESCALE_P2
+        psrad   mm7,DESCALE_P2
+        paddd   mm2,mm3
+        paddd   mm0,mm3
+        psrad   mm2,DESCALE_P2
+        psrad   mm0,DESCALE_P2
+
+        packssdw  mm5,mm7               ; mm5=data0=(00 10 20 30)
+        packssdw  mm2,mm0               ; mm2=data7=(07 17 27 37)
+
+        movq    mm4, MMWORD [wk(4)]     ; mm4=tmp11L
+        movq    mm3, MMWORD [wk(5)]     ; mm3=tmp11H
+
+        movq    mm7,mm4
+        movq    mm0,mm3
+        paddd   mm4,mm1                 ; mm4=data1L
+        paddd   mm3,mm6                 ; mm3=data1H
+        psubd   mm7,mm1                 ; mm7=data6L
+        psubd   mm0,mm6                 ; mm0=data6H
+
+        movq    mm1,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm1=[PD_DESCALE_P2]
+
+        paddd   mm4,mm1
+        paddd   mm3,mm1
+        psrad   mm4,DESCALE_P2
+        psrad   mm3,DESCALE_P2
+        paddd   mm7,mm1
+        paddd   mm0,mm1
+        psrad   mm7,DESCALE_P2
+        psrad   mm0,DESCALE_P2
+
+        packssdw  mm4,mm3               ; mm4=data1=(01 11 21 31)
+        packssdw  mm7,mm0               ; mm7=data6=(06 16 26 36)
+
+        packsswb  mm5,mm7               ; mm5=(00 10 20 30 06 16 26 36)
+        packsswb  mm4,mm2               ; mm4=(01 11 21 31 07 17 27 37)
+
+        movq    mm6, MMWORD [wk(6)]     ; mm6=tmp12L
+        movq    mm1, MMWORD [wk(7)]     ; mm1=tmp12H
+        movq    mm3, MMWORD [wk(10)]    ; mm3=tmp1L
+        movq    mm0, MMWORD [wk(11)]    ; mm0=tmp1H
+
+        movq    MMWORD [wk(0)], mm5     ; wk(0)=(00 10 20 30 06 16 26 36)
+        movq    MMWORD [wk(1)], mm4     ; wk(1)=(01 11 21 31 07 17 27 37)
+
+        movq    mm7,mm6
+        movq    mm2,mm1
+        paddd   mm6,mm3                 ; mm6=data2L
+        paddd   mm1,mm0                 ; mm1=data2H
+        psubd   mm7,mm3                 ; mm7=data5L
+        psubd   mm2,mm0                 ; mm2=data5H
+
+        movq    mm5,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm5=[PD_DESCALE_P2]
+
+        paddd   mm6,mm5
+        paddd   mm1,mm5
+        psrad   mm6,DESCALE_P2
+        psrad   mm1,DESCALE_P2
+        paddd   mm7,mm5
+        paddd   mm2,mm5
+        psrad   mm7,DESCALE_P2
+        psrad   mm2,DESCALE_P2
+
+        packssdw  mm6,mm1               ; mm6=data2=(02 12 22 32)
+        packssdw  mm7,mm2               ; mm7=data5=(05 15 25 35)
+
+        movq    mm4, MMWORD [wk(2)]     ; mm4=tmp13L
+        movq    mm3, MMWORD [wk(3)]     ; mm3=tmp13H
+        movq    mm0, MMWORD [wk(8)]     ; mm0=tmp0L
+        movq    mm5, MMWORD [wk(9)]     ; mm5=tmp0H
+
+        movq    mm1,mm4
+        movq    mm2,mm3
+        paddd   mm4,mm0                 ; mm4=data3L
+        paddd   mm3,mm5                 ; mm3=data3H
+        psubd   mm1,mm0                 ; mm1=data4L
+        psubd   mm2,mm5                 ; mm2=data4H
+
+        movq    mm0,[GOTOFF(ebx,PD_DESCALE_P2)] ; mm0=[PD_DESCALE_P2]
+
+        paddd   mm4,mm0
+        paddd   mm3,mm0
+        psrad   mm4,DESCALE_P2
+        psrad   mm3,DESCALE_P2
+        paddd   mm1,mm0
+        paddd   mm2,mm0
+        psrad   mm1,DESCALE_P2
+        psrad   mm2,DESCALE_P2
+
+        movq      mm5,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm5=[PB_CENTERJSAMP]
+
+        packssdw  mm4,mm3               ; mm4=data3=(03 13 23 33)
+        packssdw  mm1,mm2               ; mm1=data4=(04 14 24 34)
+
+        movq      mm0, MMWORD [wk(0)]   ; mm0=(00 10 20 30 06 16 26 36)
+        movq      mm3, MMWORD [wk(1)]   ; mm3=(01 11 21 31 07 17 27 37)
+
+        packsswb  mm6,mm1               ; mm6=(02 12 22 32 04 14 24 34)
+        packsswb  mm4,mm7               ; mm4=(03 13 23 33 05 15 25 35)
+
+        paddb     mm0,mm5
+        paddb     mm3,mm5
+        paddb     mm6,mm5
+        paddb     mm4,mm5
+
+        movq      mm2,mm0               ; transpose coefficients(phase 1)
+        punpcklbw mm0,mm3               ; mm0=(00 01 10 11 20 21 30 31)
+        punpckhbw mm2,mm3               ; mm2=(06 07 16 17 26 27 36 37)
+        movq      mm1,mm6               ; transpose coefficients(phase 1)
+        punpcklbw mm6,mm4               ; mm6=(02 03 12 13 22 23 32 33)
+        punpckhbw mm1,mm4               ; mm1=(04 05 14 15 24 25 34 35)
+
+        movq      mm7,mm0               ; transpose coefficients(phase 2)
+        punpcklwd mm0,mm6               ; mm0=(00 01 02 03 10 11 12 13)
+        punpckhwd mm7,mm6               ; mm7=(20 21 22 23 30 31 32 33)
+        movq      mm5,mm1               ; transpose coefficients(phase 2)
+        punpcklwd mm1,mm2               ; mm1=(04 05 06 07 14 15 16 17)
+        punpckhwd mm5,mm2               ; mm5=(24 25 26 27 34 35 36 37)
+
+        movq      mm3,mm0               ; transpose coefficients(phase 3)
+        punpckldq mm0,mm1               ; mm0=(00 01 02 03 04 05 06 07)
+        punpckhdq mm3,mm1               ; mm3=(10 11 12 13 14 15 16 17)
+        movq      mm4,mm7               ; transpose coefficients(phase 3)
+        punpckldq mm7,mm5               ; mm7=(20 21 22 23 24 25 26 27)
+        punpckhdq mm4,mm5               ; mm4=(30 31 32 33 34 35 36 37)
+
+        pushpic ebx                     ; save GOT address
+
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm3
+        mov     edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        mov     ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movq    MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7
+        movq    MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
+
+        poppic  ebx                     ; restore GOT address
+
+        add     esi, byte 4*SIZEOF_JCOEF        ; wsptr
+        add     edi, byte 4*SIZEOF_JSAMPROW
+        dec     ecx                             ; ctr
+        jnz     near .rowloop
+
+        emms            ; empty MMX state
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctint-sse2-64.asm b/simd/jidctint-sse2-64.asm
new file mode 100644
index 0000000..32bbfd8
--- /dev/null
+++ b/simd/jidctint-sse2-64.asm
@@ -0,0 +1,848 @@
+;
+; jidctint.asm - accurate integer IDCT (64-bit SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a slow-but-accurate integer implementation of the
+; inverse DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jidctint.c; see the jidctint.c for
+; more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      13
+%define PASS1_BITS      2
+
+%define DESCALE_P1      (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2      (CONST_BITS+PASS1_BITS+3)
+
+%if CONST_BITS == 13
+F_0_298 equ      2446           ; FIX(0.298631336)
+F_0_390 equ      3196           ; FIX(0.390180644)
+F_0_541 equ      4433           ; FIX(0.541196100)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_175 equ      9633           ; FIX(1.175875602)
+F_1_501 equ     12299           ; FIX(1.501321110)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_1_961 equ     16069           ; FIX(1.961570560)
+F_2_053 equ     16819           ; FIX(2.053119869)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_072 equ     25172           ; FIX(3.072711026)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_298 equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390 equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175 equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501 equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961 equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053 equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072 equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_idct_islow_sse2)
+
+EXTN(jconst_idct_islow_sse2):
+
+PW_F130_F054    times 4 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130   times 4 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117   times 4 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078    times 4 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089  times 4 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060   times 4 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256  times 4 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050   times 4 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1   times 4 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2   times 4 dd  1 << (DESCALE_P2-1)
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jsimd_idct_islow_sse2 (void * dct_table, JCOEFPTR coef_block,
+;                        JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+; r10 = jpeg_component_info * compptr
+; r11 = JCOEFPTR coef_block
+; r12 = JSAMPARRAY output_buf
+; r13 = JDIMENSION output_col
+
+%define original_rbp    rbp+0
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          12
+
+        align   16
+        global  EXTN(jsimd_idct_islow_sse2)
+
+EXTN(jsimd_idct_islow_sse2):
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
+
+        ; ---- Pass 1: process columns from input.
+
+        mov     rdx, r10                ; quantptr
+        mov     rsi, r11                ; inptr
+
+%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
+        mov     eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1,xmm0
+        packsswb xmm1,xmm1
+        packsswb xmm1,xmm1
+        movd    eax,xmm1
+        test    rax,rax
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movdqa  xmm5, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm5, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        psllw   xmm5,PASS1_BITS
+
+        movdqa    xmm4,xmm5             ; xmm5=in0=(00 01 02 03 04 05 06 07)
+        punpcklwd xmm5,xmm5             ; xmm5=(00 00 01 01 02 02 03 03)
+        punpckhwd xmm4,xmm4             ; xmm4=(04 04 05 05 06 06 07 07)
+
+        pshufd  xmm7,xmm5,0x00          ; xmm7=col0=(00 00 00 00 00 00 00 00)
+        pshufd  xmm6,xmm5,0x55          ; xmm6=col1=(01 01 01 01 01 01 01 01)
+        pshufd  xmm1,xmm5,0xAA          ; xmm1=col2=(02 02 02 02 02 02 02 02)
+        pshufd  xmm5,xmm5,0xFF          ; xmm5=col3=(03 03 03 03 03 03 03 03)
+        pshufd  xmm0,xmm4,0x00          ; xmm0=col4=(04 04 04 04 04 04 04 04)
+        pshufd  xmm3,xmm4,0x55          ; xmm3=col5=(05 05 05 05 05 05 05 05)
+        pshufd  xmm2,xmm4,0xAA          ; xmm2=col6=(06 06 06 06 06 06 06 06)
+        pshufd  xmm4,xmm4,0xFF          ; xmm4=col7=(07 07 07 07 07 07 07 07)
+
+        movdqa  XMMWORD [wk(8)], xmm6   ; wk(8)=col1
+        movdqa  XMMWORD [wk(9)], xmm5   ; wk(9)=col3
+        movdqa  XMMWORD [wk(10)], xmm3  ; wk(10)=col5
+        movdqa  XMMWORD [wk(11)], xmm4  ; wk(11)=col7
+        jmp     near .column_end
+%endif
+.columnDCT:
+
+        ; -- Even part
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(4,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(4,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        ; (Original)
+        ; z1 = (z2 + z3) * 0.541196100;
+        ; tmp2 = z1 + z3 * -1.847759065;
+        ; tmp3 = z1 + z2 * 0.765366865;
+        ;
+        ; (This implementation)
+        ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+        ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+
+        movdqa    xmm4,xmm1             ; xmm1=in2=z2
+        movdqa    xmm5,xmm1
+        punpcklwd xmm4,xmm3             ; xmm3=in6=z3
+        punpckhwd xmm5,xmm3
+        movdqa    xmm1,xmm4
+        movdqa    xmm3,xmm5
+        pmaddwd   xmm4,[rel PW_F130_F054]       ; xmm4=tmp3L
+        pmaddwd   xmm5,[rel PW_F130_F054]       ; xmm5=tmp3H
+        pmaddwd   xmm1,[rel PW_F054_MF130]      ; xmm1=tmp2L
+        pmaddwd   xmm3,[rel PW_F054_MF130]      ; xmm3=tmp2H
+
+        movdqa    xmm6,xmm0
+        paddw     xmm0,xmm2             ; xmm0=in0+in4
+        psubw     xmm6,xmm2             ; xmm6=in0-in4
+
+        pxor      xmm7,xmm7
+        pxor      xmm2,xmm2
+        punpcklwd xmm7,xmm0             ; xmm7=tmp0L
+        punpckhwd xmm2,xmm0             ; xmm2=tmp0H
+        psrad     xmm7,(16-CONST_BITS)  ; psrad xmm7,16 & pslld xmm7,CONST_BITS
+        psrad     xmm2,(16-CONST_BITS)  ; psrad xmm2,16 & pslld xmm2,CONST_BITS
+
+        movdqa  xmm0,xmm7
+        paddd   xmm7,xmm4               ; xmm7=tmp10L
+        psubd   xmm0,xmm4               ; xmm0=tmp13L
+        movdqa  xmm4,xmm2
+        paddd   xmm2,xmm5               ; xmm2=tmp10H
+        psubd   xmm4,xmm5               ; xmm4=tmp13H
+
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp10L
+        movdqa  XMMWORD [wk(1)], xmm2   ; wk(1)=tmp10H
+        movdqa  XMMWORD [wk(2)], xmm0   ; wk(2)=tmp13L
+        movdqa  XMMWORD [wk(3)], xmm4   ; wk(3)=tmp13H
+
+        pxor      xmm5,xmm5
+        pxor      xmm7,xmm7
+        punpcklwd xmm5,xmm6             ; xmm5=tmp1L
+        punpckhwd xmm7,xmm6             ; xmm7=tmp1H
+        psrad     xmm5,(16-CONST_BITS)  ; psrad xmm5,16 & pslld xmm5,CONST_BITS
+        psrad     xmm7,(16-CONST_BITS)  ; psrad xmm7,16 & pslld xmm7,CONST_BITS
+
+        movdqa  xmm2,xmm5
+        paddd   xmm5,xmm1               ; xmm5=tmp11L
+        psubd   xmm2,xmm1               ; xmm2=tmp12L
+        movdqa  xmm0,xmm7
+        paddd   xmm7,xmm3               ; xmm7=tmp11H
+        psubd   xmm0,xmm3               ; xmm0=tmp12H
+
+        movdqa  XMMWORD [wk(4)], xmm5   ; wk(4)=tmp11L
+        movdqa  XMMWORD [wk(5)], xmm7   ; wk(5)=tmp11H
+        movdqa  XMMWORD [wk(6)], xmm2   ; wk(6)=tmp12L
+        movdqa  XMMWORD [wk(7)], xmm0   ; wk(7)=tmp12H
+
+        ; -- Odd part
+
+        movdqa  xmm4, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm4, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm6, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        movdqa  xmm5,xmm6
+        movdqa  xmm7,xmm4
+        paddw   xmm5,xmm3               ; xmm5=z3
+        paddw   xmm7,xmm1               ; xmm7=z4
+
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+        movdqa    xmm2,xmm5
+        movdqa    xmm0,xmm5
+        punpcklwd xmm2,xmm7
+        punpckhwd xmm0,xmm7
+        movdqa    xmm5,xmm2
+        movdqa    xmm7,xmm0
+        pmaddwd   xmm2,[rel PW_MF078_F117]      ; xmm2=z3L
+        pmaddwd   xmm0,[rel PW_MF078_F117]      ; xmm0=z3H
+        pmaddwd   xmm5,[rel PW_F117_F078]       ; xmm5=z4L
+        pmaddwd   xmm7,[rel PW_F117_F078]       ; xmm7=z4H
+
+        movdqa  XMMWORD [wk(10)], xmm2  ; wk(10)=z3L
+        movdqa  XMMWORD [wk(11)], xmm0  ; wk(11)=z3H
+
+        ; (Original)
+        ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+        ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+        ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+        ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+        ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+        ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+        ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+        ; tmp0 += z3;  tmp1 += z4;
+        ; tmp2 += z3;  tmp3 += z4;
+
+        movdqa    xmm2,xmm3
+        movdqa    xmm0,xmm3
+        punpcklwd xmm2,xmm4
+        punpckhwd xmm0,xmm4
+        movdqa    xmm3,xmm2
+        movdqa    xmm4,xmm0
+        pmaddwd   xmm2,[rel PW_MF060_MF089]     ; xmm2=tmp0L
+        pmaddwd   xmm0,[rel PW_MF060_MF089]     ; xmm0=tmp0H
+        pmaddwd   xmm3,[rel PW_MF089_F060]      ; xmm3=tmp3L
+        pmaddwd   xmm4,[rel PW_MF089_F060]      ; xmm4=tmp3H
+
+        paddd   xmm2, XMMWORD [wk(10)]  ; xmm2=tmp0L
+        paddd   xmm0, XMMWORD [wk(11)]  ; xmm0=tmp0H
+        paddd   xmm3,xmm5               ; xmm3=tmp3L
+        paddd   xmm4,xmm7               ; xmm4=tmp3H
+
+        movdqa  XMMWORD [wk(8)], xmm2   ; wk(8)=tmp0L
+        movdqa  XMMWORD [wk(9)], xmm0   ; wk(9)=tmp0H
+
+        movdqa    xmm2,xmm1
+        movdqa    xmm0,xmm1
+        punpcklwd xmm2,xmm6
+        punpckhwd xmm0,xmm6
+        movdqa    xmm1,xmm2
+        movdqa    xmm6,xmm0
+        pmaddwd   xmm2,[rel PW_MF050_MF256]     ; xmm2=tmp1L
+        pmaddwd   xmm0,[rel PW_MF050_MF256]     ; xmm0=tmp1H
+        pmaddwd   xmm1,[rel PW_MF256_F050]      ; xmm1=tmp2L
+        pmaddwd   xmm6,[rel PW_MF256_F050]      ; xmm6=tmp2H
+
+        paddd   xmm2,xmm5               ; xmm2=tmp1L
+        paddd   xmm0,xmm7               ; xmm0=tmp1H
+        paddd   xmm1, XMMWORD [wk(10)]  ; xmm1=tmp2L
+        paddd   xmm6, XMMWORD [wk(11)]  ; xmm6=tmp2H
+
+        movdqa  XMMWORD [wk(10)], xmm2  ; wk(10)=tmp1L
+        movdqa  XMMWORD [wk(11)], xmm0  ; wk(11)=tmp1H
+
+        ; -- Final output stage
+
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=tmp10L
+        movdqa  xmm7, XMMWORD [wk(1)]   ; xmm7=tmp10H
+
+        movdqa  xmm2,xmm5
+        movdqa  xmm0,xmm7
+        paddd   xmm5,xmm3               ; xmm5=data0L
+        paddd   xmm7,xmm4               ; xmm7=data0H
+        psubd   xmm2,xmm3               ; xmm2=data7L
+        psubd   xmm0,xmm4               ; xmm0=data7H
+
+        movdqa  xmm3,[rel PD_DESCALE_P1]        ; xmm3=[rel PD_DESCALE_P1]
+
+        paddd   xmm5,xmm3
+        paddd   xmm7,xmm3
+        psrad   xmm5,DESCALE_P1
+        psrad   xmm7,DESCALE_P1
+        paddd   xmm2,xmm3
+        paddd   xmm0,xmm3
+        psrad   xmm2,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
+
+        packssdw  xmm5,xmm7             ; xmm5=data0=(00 01 02 03 04 05 06 07)
+        packssdw  xmm2,xmm0             ; xmm2=data7=(70 71 72 73 74 75 76 77)
+
+        movdqa  xmm4, XMMWORD [wk(4)]   ; xmm4=tmp11L
+        movdqa  xmm3, XMMWORD [wk(5)]   ; xmm3=tmp11H
+
+        movdqa  xmm7,xmm4
+        movdqa  xmm0,xmm3
+        paddd   xmm4,xmm1               ; xmm4=data1L
+        paddd   xmm3,xmm6               ; xmm3=data1H
+        psubd   xmm7,xmm1               ; xmm7=data6L
+        psubd   xmm0,xmm6               ; xmm0=data6H
+
+        movdqa  xmm1,[rel PD_DESCALE_P1]        ; xmm1=[rel PD_DESCALE_P1]
+
+        paddd   xmm4,xmm1
+        paddd   xmm3,xmm1
+        psrad   xmm4,DESCALE_P1
+        psrad   xmm3,DESCALE_P1
+        paddd   xmm7,xmm1
+        paddd   xmm0,xmm1
+        psrad   xmm7,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
+
+        packssdw  xmm4,xmm3             ; xmm4=data1=(10 11 12 13 14 15 16 17)
+        packssdw  xmm7,xmm0             ; xmm7=data6=(60 61 62 63 64 65 66 67)
+
+        movdqa    xmm6,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm4             ; xmm5=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm6,xmm4             ; xmm6=(04 14 05 15 06 16 07 17)
+        movdqa    xmm1,xmm7             ; transpose coefficients(phase 1)
+        punpcklwd xmm7,xmm2             ; xmm7=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm1,xmm2             ; xmm1=(64 74 65 75 66 76 67 77)
+
+        movdqa  xmm3, XMMWORD [wk(6)]   ; xmm3=tmp12L
+        movdqa  xmm0, XMMWORD [wk(7)]   ; xmm0=tmp12H
+        movdqa  xmm4, XMMWORD [wk(10)]  ; xmm4=tmp1L
+        movdqa  xmm2, XMMWORD [wk(11)]  ; xmm2=tmp1H
+
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(00 10 01 11 02 12 03 13)
+        movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=(04 14 05 15 06 16 07 17)
+        movdqa  XMMWORD [wk(4)], xmm7   ; wk(4)=(60 70 61 71 62 72 63 73)
+        movdqa  XMMWORD [wk(5)], xmm1   ; wk(5)=(64 74 65 75 66 76 67 77)
+
+        movdqa  xmm5,xmm3
+        movdqa  xmm6,xmm0
+        paddd   xmm3,xmm4               ; xmm3=data2L
+        paddd   xmm0,xmm2               ; xmm0=data2H
+        psubd   xmm5,xmm4               ; xmm5=data5L
+        psubd   xmm6,xmm2               ; xmm6=data5H
+
+        movdqa  xmm7,[rel PD_DESCALE_P1]        ; xmm7=[rel PD_DESCALE_P1]
+
+        paddd   xmm3,xmm7
+        paddd   xmm0,xmm7
+        psrad   xmm3,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
+        paddd   xmm5,xmm7
+        paddd   xmm6,xmm7
+        psrad   xmm5,DESCALE_P1
+        psrad   xmm6,DESCALE_P1
+
+        packssdw  xmm3,xmm0             ; xmm3=data2=(20 21 22 23 24 25 26 27)
+        packssdw  xmm5,xmm6             ; xmm5=data5=(50 51 52 53 54 55 56 57)
+
+        movdqa  xmm1, XMMWORD [wk(2)]   ; xmm1=tmp13L
+        movdqa  xmm4, XMMWORD [wk(3)]   ; xmm4=tmp13H
+        movdqa  xmm2, XMMWORD [wk(8)]   ; xmm2=tmp0L
+        movdqa  xmm7, XMMWORD [wk(9)]   ; xmm7=tmp0H
+
+        movdqa  xmm0,xmm1
+        movdqa  xmm6,xmm4
+        paddd   xmm1,xmm2               ; xmm1=data3L
+        paddd   xmm4,xmm7               ; xmm4=data3H
+        psubd   xmm0,xmm2               ; xmm0=data4L
+        psubd   xmm6,xmm7               ; xmm6=data4H
+
+        movdqa  xmm2,[rel PD_DESCALE_P1]        ; xmm2=[rel PD_DESCALE_P1]
+
+        paddd   xmm1,xmm2
+        paddd   xmm4,xmm2
+        psrad   xmm1,DESCALE_P1
+        psrad   xmm4,DESCALE_P1
+        paddd   xmm0,xmm2
+        paddd   xmm6,xmm2
+        psrad   xmm0,DESCALE_P1
+        psrad   xmm6,DESCALE_P1
+
+        packssdw  xmm1,xmm4             ; xmm1=data3=(30 31 32 33 34 35 36 37)
+        packssdw  xmm0,xmm6             ; xmm0=data4=(40 41 42 43 44 45 46 47)
+
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=(00 10 01 11 02 12 03 13)
+        movdqa  xmm2, XMMWORD [wk(1)]   ; xmm2=(04 14 05 15 06 16 07 17)
+
+        movdqa    xmm4,xmm3             ; transpose coefficients(phase 1)
+        punpcklwd xmm3,xmm1             ; xmm3=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm4,xmm1             ; xmm4=(24 34 25 35 26 36 27 37)
+        movdqa    xmm6,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm5             ; xmm0=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm6,xmm5             ; xmm6=(44 54 45 55 46 56 47 57)
+
+        movdqa    xmm1,xmm7             ; transpose coefficients(phase 2)
+        punpckldq xmm7,xmm3             ; xmm7=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm1,xmm3             ; xmm1=(02 12 22 32 03 13 23 33)
+        movdqa    xmm5,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm4             ; xmm2=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm5,xmm4             ; xmm5=(06 16 26 36 07 17 27 37)
+
+        movdqa  xmm3, XMMWORD [wk(4)]   ; xmm3=(60 70 61 71 62 72 63 73)
+        movdqa  xmm4, XMMWORD [wk(5)]   ; xmm4=(64 74 65 75 66 76 67 77)
+
+        movdqa  XMMWORD [wk(6)], xmm2   ; wk(6)=(04 14 24 34 05 15 25 35)
+        movdqa  XMMWORD [wk(7)], xmm5   ; wk(7)=(06 16 26 36 07 17 27 37)
+
+        movdqa    xmm2,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm3             ; xmm0=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm2,xmm3             ; xmm2=(42 52 62 72 43 53 63 73)
+        movdqa    xmm5,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm4             ; xmm6=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm5,xmm4             ; xmm5=(46 56 66 76 47 57 67 77)
+
+        movdqa     xmm3,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm0            ; xmm7=col0=(00 10 20 30 40 50 60 70)
+        punpckhqdq xmm3,xmm0            ; xmm3=col1=(01 11 21 31 41 51 61 71)
+        movdqa     xmm4,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm2            ; xmm1=col2=(02 12 22 32 42 52 62 72)
+        punpckhqdq xmm4,xmm2            ; xmm4=col3=(03 13 23 33 43 53 63 73)
+
+        movdqa  xmm0, XMMWORD [wk(6)]   ; xmm0=(04 14 24 34 05 15 25 35)
+        movdqa  xmm2, XMMWORD [wk(7)]   ; xmm2=(06 16 26 36 07 17 27 37)
+
+        movdqa  XMMWORD [wk(8)], xmm3   ; wk(8)=col1
+        movdqa  XMMWORD [wk(9)], xmm4   ; wk(9)=col3
+
+        movdqa     xmm3,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm6            ; xmm0=col4=(04 14 24 34 44 54 64 74)
+        punpckhqdq xmm3,xmm6            ; xmm3=col5=(05 15 25 35 45 55 65 75)
+        movdqa     xmm4,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm5            ; xmm2=col6=(06 16 26 36 46 56 66 76)
+        punpckhqdq xmm4,xmm5            ; xmm4=col7=(07 17 27 37 47 57 67 77)
+
+        movdqa  XMMWORD [wk(10)], xmm3  ; wk(10)=col5
+        movdqa  XMMWORD [wk(11)], xmm4  ; wk(11)=col7
+.column_end:
+
+        ; -- Prefetch the next coefficient block
+
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+
+        ; ---- Pass 2: process rows from work array, store into output array.
+
+        mov     rax, [original_rbp]
+        mov     rdi, r12        ; (JSAMPROW *)
+        mov     rax, r13
+
+        ; -- Even part
+
+        ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6
+
+        ; (Original)
+        ; z1 = (z2 + z3) * 0.541196100;
+        ; tmp2 = z1 + z3 * -1.847759065;
+        ; tmp3 = z1 + z2 * 0.765366865;
+        ;
+        ; (This implementation)
+        ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+        ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+
+        movdqa    xmm6,xmm1             ; xmm1=in2=z2
+        movdqa    xmm5,xmm1
+        punpcklwd xmm6,xmm2             ; xmm2=in6=z3
+        punpckhwd xmm5,xmm2
+        movdqa    xmm1,xmm6
+        movdqa    xmm2,xmm5
+        pmaddwd   xmm6,[rel PW_F130_F054]       ; xmm6=tmp3L
+        pmaddwd   xmm5,[rel PW_F130_F054]       ; xmm5=tmp3H
+        pmaddwd   xmm1,[rel PW_F054_MF130]      ; xmm1=tmp2L
+        pmaddwd   xmm2,[rel PW_F054_MF130]      ; xmm2=tmp2H
+
+        movdqa    xmm3,xmm7
+        paddw     xmm7,xmm0             ; xmm7=in0+in4
+        psubw     xmm3,xmm0             ; xmm3=in0-in4
+
+        pxor      xmm4,xmm4
+        pxor      xmm0,xmm0
+        punpcklwd xmm4,xmm7             ; xmm4=tmp0L
+        punpckhwd xmm0,xmm7             ; xmm0=tmp0H
+        psrad     xmm4,(16-CONST_BITS)  ; psrad xmm4,16 & pslld xmm4,CONST_BITS
+        psrad     xmm0,(16-CONST_BITS)  ; psrad xmm0,16 & pslld xmm0,CONST_BITS
+
+        movdqa  xmm7,xmm4
+        paddd   xmm4,xmm6               ; xmm4=tmp10L
+        psubd   xmm7,xmm6               ; xmm7=tmp13L
+        movdqa  xmm6,xmm0
+        paddd   xmm0,xmm5               ; xmm0=tmp10H
+        psubd   xmm6,xmm5               ; xmm6=tmp13H
+
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=tmp10L
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp10H
+        movdqa  XMMWORD [wk(2)], xmm7   ; wk(2)=tmp13L
+        movdqa  XMMWORD [wk(3)], xmm6   ; wk(3)=tmp13H
+
+        pxor      xmm5,xmm5
+        pxor      xmm4,xmm4
+        punpcklwd xmm5,xmm3             ; xmm5=tmp1L
+        punpckhwd xmm4,xmm3             ; xmm4=tmp1H
+        psrad     xmm5,(16-CONST_BITS)  ; psrad xmm5,16 & pslld xmm5,CONST_BITS
+        psrad     xmm4,(16-CONST_BITS)  ; psrad xmm4,16 & pslld xmm4,CONST_BITS
+
+        movdqa  xmm0,xmm5
+        paddd   xmm5,xmm1               ; xmm5=tmp11L
+        psubd   xmm0,xmm1               ; xmm0=tmp12L
+        movdqa  xmm7,xmm4
+        paddd   xmm4,xmm2               ; xmm4=tmp11H
+        psubd   xmm7,xmm2               ; xmm7=tmp12H
+
+        movdqa  XMMWORD [wk(4)], xmm5   ; wk(4)=tmp11L
+        movdqa  XMMWORD [wk(5)], xmm4   ; wk(5)=tmp11H
+        movdqa  XMMWORD [wk(6)], xmm0   ; wk(6)=tmp12L
+        movdqa  XMMWORD [wk(7)], xmm7   ; wk(7)=tmp12H
+
+        ; -- Odd part
+
+        movdqa  xmm6, XMMWORD [wk(9)]   ; xmm6=col3
+        movdqa  xmm3, XMMWORD [wk(8)]   ; xmm3=col1
+        movdqa  xmm1, XMMWORD [wk(11)]  ; xmm1=col7
+        movdqa  xmm2, XMMWORD [wk(10)]  ; xmm2=col5
+
+        movdqa  xmm5,xmm6
+        movdqa  xmm4,xmm3
+        paddw   xmm5,xmm1               ; xmm5=z3
+        paddw   xmm4,xmm2               ; xmm4=z4
+
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+        movdqa    xmm0,xmm5
+        movdqa    xmm7,xmm5
+        punpcklwd xmm0,xmm4
+        punpckhwd xmm7,xmm4
+        movdqa    xmm5,xmm0
+        movdqa    xmm4,xmm7
+        pmaddwd   xmm0,[rel PW_MF078_F117]      ; xmm0=z3L
+        pmaddwd   xmm7,[rel PW_MF078_F117]      ; xmm7=z3H
+        pmaddwd   xmm5,[rel PW_F117_F078]       ; xmm5=z4L
+        pmaddwd   xmm4,[rel PW_F117_F078]       ; xmm4=z4H
+
+        movdqa  XMMWORD [wk(10)], xmm0  ; wk(10)=z3L
+        movdqa  XMMWORD [wk(11)], xmm7  ; wk(11)=z3H
+
+        ; (Original)
+        ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+        ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+        ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+        ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+        ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+        ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+        ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+        ; tmp0 += z3;  tmp1 += z4;
+        ; tmp2 += z3;  tmp3 += z4;
+
+        movdqa    xmm0,xmm1
+        movdqa    xmm7,xmm1
+        punpcklwd xmm0,xmm3
+        punpckhwd xmm7,xmm3
+        movdqa    xmm1,xmm0
+        movdqa    xmm3,xmm7
+        pmaddwd   xmm0,[rel PW_MF060_MF089]     ; xmm0=tmp0L
+        pmaddwd   xmm7,[rel PW_MF060_MF089]     ; xmm7=tmp0H
+        pmaddwd   xmm1,[rel PW_MF089_F060]      ; xmm1=tmp3L
+        pmaddwd   xmm3,[rel PW_MF089_F060]      ; xmm3=tmp3H
+
+        paddd   xmm0, XMMWORD [wk(10)]  ; xmm0=tmp0L
+        paddd   xmm7, XMMWORD [wk(11)]  ; xmm7=tmp0H
+        paddd   xmm1,xmm5               ; xmm1=tmp3L
+        paddd   xmm3,xmm4               ; xmm3=tmp3H
+
+        movdqa  XMMWORD [wk(8)], xmm0   ; wk(8)=tmp0L
+        movdqa  XMMWORD [wk(9)], xmm7   ; wk(9)=tmp0H
+
+        movdqa    xmm0,xmm2
+        movdqa    xmm7,xmm2
+        punpcklwd xmm0,xmm6
+        punpckhwd xmm7,xmm6
+        movdqa    xmm2,xmm0
+        movdqa    xmm6,xmm7
+        pmaddwd   xmm0,[rel PW_MF050_MF256]     ; xmm0=tmp1L
+        pmaddwd   xmm7,[rel PW_MF050_MF256]     ; xmm7=tmp1H
+        pmaddwd   xmm2,[rel PW_MF256_F050]      ; xmm2=tmp2L
+        pmaddwd   xmm6,[rel PW_MF256_F050]      ; xmm6=tmp2H
+
+        paddd   xmm0,xmm5               ; xmm0=tmp1L
+        paddd   xmm7,xmm4               ; xmm7=tmp1H
+        paddd   xmm2, XMMWORD [wk(10)]  ; xmm2=tmp2L
+        paddd   xmm6, XMMWORD [wk(11)]  ; xmm6=tmp2H
+
+        movdqa  XMMWORD [wk(10)], xmm0  ; wk(10)=tmp1L
+        movdqa  XMMWORD [wk(11)], xmm7  ; wk(11)=tmp1H
+
+        ; -- Final output stage
+
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=tmp10L
+        movdqa  xmm4, XMMWORD [wk(1)]   ; xmm4=tmp10H
+
+        movdqa  xmm0,xmm5
+        movdqa  xmm7,xmm4
+        paddd   xmm5,xmm1               ; xmm5=data0L
+        paddd   xmm4,xmm3               ; xmm4=data0H
+        psubd   xmm0,xmm1               ; xmm0=data7L
+        psubd   xmm7,xmm3               ; xmm7=data7H
+
+        movdqa  xmm1,[rel PD_DESCALE_P2]        ; xmm1=[rel PD_DESCALE_P2]
+
+        paddd   xmm5,xmm1
+        paddd   xmm4,xmm1
+        psrad   xmm5,DESCALE_P2
+        psrad   xmm4,DESCALE_P2
+        paddd   xmm0,xmm1
+        paddd   xmm7,xmm1
+        psrad   xmm0,DESCALE_P2
+        psrad   xmm7,DESCALE_P2
+
+        packssdw  xmm5,xmm4             ; xmm5=data0=(00 10 20 30 40 50 60 70)
+        packssdw  xmm0,xmm7             ; xmm0=data7=(07 17 27 37 47 57 67 77)
+
+        movdqa  xmm3, XMMWORD [wk(4)]   ; xmm3=tmp11L
+        movdqa  xmm1, XMMWORD [wk(5)]   ; xmm1=tmp11H
+
+        movdqa  xmm4,xmm3
+        movdqa  xmm7,xmm1
+        paddd   xmm3,xmm2               ; xmm3=data1L
+        paddd   xmm1,xmm6               ; xmm1=data1H
+        psubd   xmm4,xmm2               ; xmm4=data6L
+        psubd   xmm7,xmm6               ; xmm7=data6H
+
+        movdqa  xmm2,[rel PD_DESCALE_P2]        ; xmm2=[rel PD_DESCALE_P2]
+
+        paddd   xmm3,xmm2
+        paddd   xmm1,xmm2
+        psrad   xmm3,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+        paddd   xmm4,xmm2
+        paddd   xmm7,xmm2
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm7,DESCALE_P2
+
+        packssdw  xmm3,xmm1             ; xmm3=data1=(01 11 21 31 41 51 61 71)
+        packssdw  xmm4,xmm7             ; xmm4=data6=(06 16 26 36 46 56 66 76)
+
+        packsswb  xmm5,xmm4             ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        packsswb  xmm3,xmm0             ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+
+        movdqa  xmm6, XMMWORD [wk(6)]   ; xmm6=tmp12L
+        movdqa  xmm2, XMMWORD [wk(7)]   ; xmm2=tmp12H
+        movdqa  xmm1, XMMWORD [wk(10)]  ; xmm1=tmp1L
+        movdqa  xmm7, XMMWORD [wk(11)]  ; xmm7=tmp1H
+
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+
+        movdqa  xmm4,xmm6
+        movdqa  xmm0,xmm2
+        paddd   xmm6,xmm1               ; xmm6=data2L
+        paddd   xmm2,xmm7               ; xmm2=data2H
+        psubd   xmm4,xmm1               ; xmm4=data5L
+        psubd   xmm0,xmm7               ; xmm0=data5H
+
+        movdqa  xmm5,[rel PD_DESCALE_P2]        ; xmm5=[rel PD_DESCALE_P2]
+
+        paddd   xmm6,xmm5
+        paddd   xmm2,xmm5
+        psrad   xmm6,DESCALE_P2
+        psrad   xmm2,DESCALE_P2
+        paddd   xmm4,xmm5
+        paddd   xmm0,xmm5
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm0,DESCALE_P2
+
+        packssdw  xmm6,xmm2             ; xmm6=data2=(02 12 22 32 42 52 62 72)
+        packssdw  xmm4,xmm0             ; xmm4=data5=(05 15 25 35 45 55 65 75)
+
+        movdqa  xmm3, XMMWORD [wk(2)]   ; xmm3=tmp13L
+        movdqa  xmm1, XMMWORD [wk(3)]   ; xmm1=tmp13H
+        movdqa  xmm7, XMMWORD [wk(8)]   ; xmm7=tmp0L
+        movdqa  xmm5, XMMWORD [wk(9)]   ; xmm5=tmp0H
+
+        movdqa  xmm2,xmm3
+        movdqa  xmm0,xmm1
+        paddd   xmm3,xmm7               ; xmm3=data3L
+        paddd   xmm1,xmm5               ; xmm1=data3H
+        psubd   xmm2,xmm7               ; xmm2=data4L
+        psubd   xmm0,xmm5               ; xmm0=data4H
+
+        movdqa  xmm7,[rel PD_DESCALE_P2]        ; xmm7=[rel PD_DESCALE_P2]
+
+        paddd   xmm3,xmm7
+        paddd   xmm1,xmm7
+        psrad   xmm3,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+        paddd   xmm2,xmm7
+        paddd   xmm0,xmm7
+        psrad   xmm2,DESCALE_P2
+        psrad   xmm0,DESCALE_P2
+
+        movdqa    xmm5,[rel PB_CENTERJSAMP]     ; xmm5=[rel PB_CENTERJSAMP]
+
+        packssdw  xmm3,xmm1             ; xmm3=data3=(03 13 23 33 43 53 63 73)
+        packssdw  xmm2,xmm0             ; xmm2=data4=(04 14 24 34 44 54 64 74)
+
+        movdqa    xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        movdqa    xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+
+        packsswb  xmm6,xmm2             ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
+        packsswb  xmm3,xmm4             ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
+
+        paddb     xmm7,xmm5
+        paddb     xmm1,xmm5
+        paddb     xmm6,xmm5
+        paddb     xmm3,xmm5
+
+        movdqa    xmm0,xmm7     ; transpose coefficients(phase 1)
+        punpcklbw xmm7,xmm1     ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
+        punpckhbw xmm0,xmm1     ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
+        movdqa    xmm2,xmm6     ; transpose coefficients(phase 1)
+        punpcklbw xmm6,xmm3     ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
+        punpckhbw xmm2,xmm3     ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
+
+        movdqa    xmm4,xmm7     ; transpose coefficients(phase 2)
+        punpcklwd xmm7,xmm6     ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+        punpckhwd xmm4,xmm6     ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
+        movdqa    xmm5,xmm2     ; transpose coefficients(phase 2)
+        punpcklwd xmm2,xmm0     ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+        punpckhwd xmm5,xmm0     ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
+
+        movdqa    xmm1,xmm7     ; transpose coefficients(phase 3)
+        punpckldq xmm7,xmm2     ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+        punpckhdq xmm1,xmm2     ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+        movdqa    xmm3,xmm4     ; transpose coefficients(phase 3)
+        punpckldq xmm4,xmm5     ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
+        punpckhdq xmm3,xmm5     ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
+
+        pshufd  xmm6,xmm7,0x4E  ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+        pshufd  xmm0,xmm1,0x4E  ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+        pshufd  xmm2,xmm4,0x4E  ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
+        pshufd  xmm5,xmm3,0x4E  ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
+
+        mov     rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm7
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm1
+        mov     rdx, JSAMPROW [rdi+4*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+6*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
+
+        mov     rdx, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
+        mov     rdx, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
+        movq    XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
+
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctint-sse2.asm b/simd/jidctint-sse2.asm
new file mode 100644
index 0000000..4a35f3d
--- /dev/null
+++ b/simd/jidctint-sse2.asm
@@ -0,0 +1,859 @@
+;
+; jidctint.asm - accurate integer IDCT (SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains a slow-but-accurate integer implementation of the
+; inverse DCT (Discrete Cosine Transform). The following code is based
+; directly on the IJG's original jidctint.c; see the jidctint.c for
+; more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      13
+%define PASS1_BITS      2
+
+%define DESCALE_P1      (CONST_BITS-PASS1_BITS)
+%define DESCALE_P2      (CONST_BITS+PASS1_BITS+3)
+
+%if CONST_BITS == 13
+F_0_298 equ      2446           ; FIX(0.298631336)
+F_0_390 equ      3196           ; FIX(0.390180644)
+F_0_541 equ      4433           ; FIX(0.541196100)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_175 equ      9633           ; FIX(1.175875602)
+F_1_501 equ     12299           ; FIX(1.501321110)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_1_961 equ     16069           ; FIX(1.961570560)
+F_2_053 equ     16819           ; FIX(2.053119869)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_072 equ     25172           ; FIX(3.072711026)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_298 equ     DESCALE( 320652955,30-CONST_BITS)       ; FIX(0.298631336)
+F_0_390 equ     DESCALE( 418953276,30-CONST_BITS)       ; FIX(0.390180644)
+F_0_541 equ     DESCALE( 581104887,30-CONST_BITS)       ; FIX(0.541196100)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_175 equ     DESCALE(1262586813,30-CONST_BITS)       ; FIX(1.175875602)
+F_1_501 equ     DESCALE(1612031267,30-CONST_BITS)       ; FIX(1.501321110)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_1_961 equ     DESCALE(2106220350,30-CONST_BITS)       ; FIX(1.961570560)
+F_2_053 equ     DESCALE(2204520673,30-CONST_BITS)       ; FIX(2.053119869)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_072 equ     DESCALE(3299298341,30-CONST_BITS)       ; FIX(3.072711026)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_idct_islow_sse2)
+
+EXTN(jconst_idct_islow_sse2):
+
+PW_F130_F054    times 4 dw  (F_0_541+F_0_765), F_0_541
+PW_F054_MF130   times 4 dw  F_0_541, (F_0_541-F_1_847)
+PW_MF078_F117   times 4 dw  (F_1_175-F_1_961), F_1_175
+PW_F117_F078    times 4 dw  F_1_175, (F_1_175-F_0_390)
+PW_MF060_MF089  times 4 dw  (F_0_298-F_0_899),-F_0_899
+PW_MF089_F060   times 4 dw -F_0_899, (F_1_501-F_0_899)
+PW_MF050_MF256  times 4 dw  (F_2_053-F_2_562),-F_2_562
+PW_MF256_F050   times 4 dw -F_2_562, (F_3_072-F_2_562)
+PD_DESCALE_P1   times 4 dd  1 << (DESCALE_P1-1)
+PD_DESCALE_P2   times 4 dd  1 << (DESCALE_P2-1)
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients.
+;
+; GLOBAL(void)
+; jsimd_idct_islow_sse2 (void * dct_table, JCOEFPTR coef_block,
+;                        JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define dct_table(b)    (b)+8           ; jpeg_component_info * compptr
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          12
+
+        align   16
+        global  EXTN(jsimd_idct_islow_sse2)
+
+EXTN(jsimd_idct_islow_sse2):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process columns from input.
+
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+
+%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     near .columnDCT
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     xmm1,xmm0
+        packsswb xmm1,xmm1
+        packsswb xmm1,xmm1
+        movd    eax,xmm1
+        test    eax,eax
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movdqa  xmm5, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm5, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        psllw   xmm5,PASS1_BITS
+
+        movdqa    xmm4,xmm5             ; xmm5=in0=(00 01 02 03 04 05 06 07)
+        punpcklwd xmm5,xmm5             ; xmm5=(00 00 01 01 02 02 03 03)
+        punpckhwd xmm4,xmm4             ; xmm4=(04 04 05 05 06 06 07 07)
+
+        pshufd  xmm7,xmm5,0x00          ; xmm7=col0=(00 00 00 00 00 00 00 00)
+        pshufd  xmm6,xmm5,0x55          ; xmm6=col1=(01 01 01 01 01 01 01 01)
+        pshufd  xmm1,xmm5,0xAA          ; xmm1=col2=(02 02 02 02 02 02 02 02)
+        pshufd  xmm5,xmm5,0xFF          ; xmm5=col3=(03 03 03 03 03 03 03 03)
+        pshufd  xmm0,xmm4,0x00          ; xmm0=col4=(04 04 04 04 04 04 04 04)
+        pshufd  xmm3,xmm4,0x55          ; xmm3=col5=(05 05 05 05 05 05 05 05)
+        pshufd  xmm2,xmm4,0xAA          ; xmm2=col6=(06 06 06 06 06 06 06 06)
+        pshufd  xmm4,xmm4,0xFF          ; xmm4=col7=(07 07 07 07 07 07 07 07)
+
+        movdqa  XMMWORD [wk(8)], xmm6   ; wk(8)=col1
+        movdqa  XMMWORD [wk(9)], xmm5   ; wk(9)=col3
+        movdqa  XMMWORD [wk(10)], xmm3  ; wk(10)=col5
+        movdqa  XMMWORD [wk(11)], xmm4  ; wk(11)=col7
+        jmp     near .column_end
+        alignx  16,7
+%endif
+.columnDCT:
+
+        ; -- Even part
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        ; (Original)
+        ; z1 = (z2 + z3) * 0.541196100;
+        ; tmp2 = z1 + z3 * -1.847759065;
+        ; tmp3 = z1 + z2 * 0.765366865;
+        ;
+        ; (This implementation)
+        ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+        ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+
+        movdqa    xmm4,xmm1             ; xmm1=in2=z2
+        movdqa    xmm5,xmm1
+        punpcklwd xmm4,xmm3             ; xmm3=in6=z3
+        punpckhwd xmm5,xmm3
+        movdqa    xmm1,xmm4
+        movdqa    xmm3,xmm5
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_F130_F054)]       ; xmm4=tmp3L
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F130_F054)]       ; xmm5=tmp3H
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm1=tmp2L
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm3=tmp2H
+
+        movdqa    xmm6,xmm0
+        paddw     xmm0,xmm2             ; xmm0=in0+in4
+        psubw     xmm6,xmm2             ; xmm6=in0-in4
+
+        pxor      xmm7,xmm7
+        pxor      xmm2,xmm2
+        punpcklwd xmm7,xmm0             ; xmm7=tmp0L
+        punpckhwd xmm2,xmm0             ; xmm2=tmp0H
+        psrad     xmm7,(16-CONST_BITS)  ; psrad xmm7,16 & pslld xmm7,CONST_BITS
+        psrad     xmm2,(16-CONST_BITS)  ; psrad xmm2,16 & pslld xmm2,CONST_BITS
+
+        movdqa  xmm0,xmm7
+        paddd   xmm7,xmm4               ; xmm7=tmp10L
+        psubd   xmm0,xmm4               ; xmm0=tmp13L
+        movdqa  xmm4,xmm2
+        paddd   xmm2,xmm5               ; xmm2=tmp10H
+        psubd   xmm4,xmm5               ; xmm4=tmp13H
+
+        movdqa  XMMWORD [wk(0)], xmm7   ; wk(0)=tmp10L
+        movdqa  XMMWORD [wk(1)], xmm2   ; wk(1)=tmp10H
+        movdqa  XMMWORD [wk(2)], xmm0   ; wk(2)=tmp13L
+        movdqa  XMMWORD [wk(3)], xmm4   ; wk(3)=tmp13H
+
+        pxor      xmm5,xmm5
+        pxor      xmm7,xmm7
+        punpcklwd xmm5,xmm6             ; xmm5=tmp1L
+        punpckhwd xmm7,xmm6             ; xmm7=tmp1H
+        psrad     xmm5,(16-CONST_BITS)  ; psrad xmm5,16 & pslld xmm5,CONST_BITS
+        psrad     xmm7,(16-CONST_BITS)  ; psrad xmm7,16 & pslld xmm7,CONST_BITS
+
+        movdqa  xmm2,xmm5
+        paddd   xmm5,xmm1               ; xmm5=tmp11L
+        psubd   xmm2,xmm1               ; xmm2=tmp12L
+        movdqa  xmm0,xmm7
+        paddd   xmm7,xmm3               ; xmm7=tmp11H
+        psubd   xmm0,xmm3               ; xmm0=tmp12H
+
+        movdqa  XMMWORD [wk(4)], xmm5   ; wk(4)=tmp11L
+        movdqa  XMMWORD [wk(5)], xmm7   ; wk(5)=tmp11H
+        movdqa  XMMWORD [wk(6)], xmm2   ; wk(6)=tmp12L
+        movdqa  XMMWORD [wk(7)], xmm0   ; wk(7)=tmp12H
+
+        ; -- Odd part
+
+        movdqa  xmm4, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm4, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm6, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        movdqa  xmm5,xmm6
+        movdqa  xmm7,xmm4
+        paddw   xmm5,xmm3               ; xmm5=z3
+        paddw   xmm7,xmm1               ; xmm7=z4
+
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+        movdqa    xmm2,xmm5
+        movdqa    xmm0,xmm5
+        punpcklwd xmm2,xmm7
+        punpckhwd xmm0,xmm7
+        movdqa    xmm5,xmm2
+        movdqa    xmm7,xmm0
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm2=z3L
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm0=z3H
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F117_F078)]       ; xmm5=z4L
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_F117_F078)]       ; xmm7=z4H
+
+        movdqa  XMMWORD [wk(10)], xmm2  ; wk(10)=z3L
+        movdqa  XMMWORD [wk(11)], xmm0  ; wk(11)=z3H
+
+        ; (Original)
+        ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+        ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+        ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+        ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+        ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+        ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+        ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+        ; tmp0 += z3;  tmp1 += z4;
+        ; tmp2 += z3;  tmp3 += z4;
+
+        movdqa    xmm2,xmm3
+        movdqa    xmm0,xmm3
+        punpcklwd xmm2,xmm4
+        punpckhwd xmm0,xmm4
+        movdqa    xmm3,xmm2
+        movdqa    xmm4,xmm0
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm2=tmp0L
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm0=tmp0H
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm3=tmp3L
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm4=tmp3H
+
+        paddd   xmm2, XMMWORD [wk(10)]  ; xmm2=tmp0L
+        paddd   xmm0, XMMWORD [wk(11)]  ; xmm0=tmp0H
+        paddd   xmm3,xmm5               ; xmm3=tmp3L
+        paddd   xmm4,xmm7               ; xmm4=tmp3H
+
+        movdqa  XMMWORD [wk(8)], xmm2   ; wk(8)=tmp0L
+        movdqa  XMMWORD [wk(9)], xmm0   ; wk(9)=tmp0H
+
+        movdqa    xmm2,xmm1
+        movdqa    xmm0,xmm1
+        punpcklwd xmm2,xmm6
+        punpckhwd xmm0,xmm6
+        movdqa    xmm1,xmm2
+        movdqa    xmm6,xmm0
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm2=tmp1L
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm0=tmp1H
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm1=tmp2L
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm6=tmp2H
+
+        paddd   xmm2,xmm5               ; xmm2=tmp1L
+        paddd   xmm0,xmm7               ; xmm0=tmp1H
+        paddd   xmm1, XMMWORD [wk(10)]  ; xmm1=tmp2L
+        paddd   xmm6, XMMWORD [wk(11)]  ; xmm6=tmp2H
+
+        movdqa  XMMWORD [wk(10)], xmm2  ; wk(10)=tmp1L
+        movdqa  XMMWORD [wk(11)], xmm0  ; wk(11)=tmp1H
+
+        ; -- Final output stage
+
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=tmp10L
+        movdqa  xmm7, XMMWORD [wk(1)]   ; xmm7=tmp10H
+
+        movdqa  xmm2,xmm5
+        movdqa  xmm0,xmm7
+        paddd   xmm5,xmm3               ; xmm5=data0L
+        paddd   xmm7,xmm4               ; xmm7=data0H
+        psubd   xmm2,xmm3               ; xmm2=data7L
+        psubd   xmm0,xmm4               ; xmm0=data7H
+
+        movdqa  xmm3,[GOTOFF(ebx,PD_DESCALE_P1)]        ; xmm3=[PD_DESCALE_P1]
+
+        paddd   xmm5,xmm3
+        paddd   xmm7,xmm3
+        psrad   xmm5,DESCALE_P1
+        psrad   xmm7,DESCALE_P1
+        paddd   xmm2,xmm3
+        paddd   xmm0,xmm3
+        psrad   xmm2,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
+
+        packssdw  xmm5,xmm7             ; xmm5=data0=(00 01 02 03 04 05 06 07)
+        packssdw  xmm2,xmm0             ; xmm2=data7=(70 71 72 73 74 75 76 77)
+
+        movdqa  xmm4, XMMWORD [wk(4)]   ; xmm4=tmp11L
+        movdqa  xmm3, XMMWORD [wk(5)]   ; xmm3=tmp11H
+
+        movdqa  xmm7,xmm4
+        movdqa  xmm0,xmm3
+        paddd   xmm4,xmm1               ; xmm4=data1L
+        paddd   xmm3,xmm6               ; xmm3=data1H
+        psubd   xmm7,xmm1               ; xmm7=data6L
+        psubd   xmm0,xmm6               ; xmm0=data6H
+
+        movdqa  xmm1,[GOTOFF(ebx,PD_DESCALE_P1)]        ; xmm1=[PD_DESCALE_P1]
+
+        paddd   xmm4,xmm1
+        paddd   xmm3,xmm1
+        psrad   xmm4,DESCALE_P1
+        psrad   xmm3,DESCALE_P1
+        paddd   xmm7,xmm1
+        paddd   xmm0,xmm1
+        psrad   xmm7,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
+
+        packssdw  xmm4,xmm3             ; xmm4=data1=(10 11 12 13 14 15 16 17)
+        packssdw  xmm7,xmm0             ; xmm7=data6=(60 61 62 63 64 65 66 67)
+
+        movdqa    xmm6,xmm5             ; transpose coefficients(phase 1)
+        punpcklwd xmm5,xmm4             ; xmm5=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm6,xmm4             ; xmm6=(04 14 05 15 06 16 07 17)
+        movdqa    xmm1,xmm7             ; transpose coefficients(phase 1)
+        punpcklwd xmm7,xmm2             ; xmm7=(60 70 61 71 62 72 63 73)
+        punpckhwd xmm1,xmm2             ; xmm1=(64 74 65 75 66 76 67 77)
+
+        movdqa  xmm3, XMMWORD [wk(6)]   ; xmm3=tmp12L
+        movdqa  xmm0, XMMWORD [wk(7)]   ; xmm0=tmp12H
+        movdqa  xmm4, XMMWORD [wk(10)]  ; xmm4=tmp1L
+        movdqa  xmm2, XMMWORD [wk(11)]  ; xmm2=tmp1H
+
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(00 10 01 11 02 12 03 13)
+        movdqa  XMMWORD [wk(1)], xmm6   ; wk(1)=(04 14 05 15 06 16 07 17)
+        movdqa  XMMWORD [wk(4)], xmm7   ; wk(4)=(60 70 61 71 62 72 63 73)
+        movdqa  XMMWORD [wk(5)], xmm1   ; wk(5)=(64 74 65 75 66 76 67 77)
+
+        movdqa  xmm5,xmm3
+        movdqa  xmm6,xmm0
+        paddd   xmm3,xmm4               ; xmm3=data2L
+        paddd   xmm0,xmm2               ; xmm0=data2H
+        psubd   xmm5,xmm4               ; xmm5=data5L
+        psubd   xmm6,xmm2               ; xmm6=data5H
+
+        movdqa  xmm7,[GOTOFF(ebx,PD_DESCALE_P1)]        ; xmm7=[PD_DESCALE_P1]
+
+        paddd   xmm3,xmm7
+        paddd   xmm0,xmm7
+        psrad   xmm3,DESCALE_P1
+        psrad   xmm0,DESCALE_P1
+        paddd   xmm5,xmm7
+        paddd   xmm6,xmm7
+        psrad   xmm5,DESCALE_P1
+        psrad   xmm6,DESCALE_P1
+
+        packssdw  xmm3,xmm0             ; xmm3=data2=(20 21 22 23 24 25 26 27)
+        packssdw  xmm5,xmm6             ; xmm5=data5=(50 51 52 53 54 55 56 57)
+
+        movdqa  xmm1, XMMWORD [wk(2)]   ; xmm1=tmp13L
+        movdqa  xmm4, XMMWORD [wk(3)]   ; xmm4=tmp13H
+        movdqa  xmm2, XMMWORD [wk(8)]   ; xmm2=tmp0L
+        movdqa  xmm7, XMMWORD [wk(9)]   ; xmm7=tmp0H
+
+        movdqa  xmm0,xmm1
+        movdqa  xmm6,xmm4
+        paddd   xmm1,xmm2               ; xmm1=data3L
+        paddd   xmm4,xmm7               ; xmm4=data3H
+        psubd   xmm0,xmm2               ; xmm0=data4L
+        psubd   xmm6,xmm7               ; xmm6=data4H
+
+        movdqa  xmm2,[GOTOFF(ebx,PD_DESCALE_P1)]        ; xmm2=[PD_DESCALE_P1]
+
+        paddd   xmm1,xmm2
+        paddd   xmm4,xmm2
+        psrad   xmm1,DESCALE_P1
+        psrad   xmm4,DESCALE_P1
+        paddd   xmm0,xmm2
+        paddd   xmm6,xmm2
+        psrad   xmm0,DESCALE_P1
+        psrad   xmm6,DESCALE_P1
+
+        packssdw  xmm1,xmm4             ; xmm1=data3=(30 31 32 33 34 35 36 37)
+        packssdw  xmm0,xmm6             ; xmm0=data4=(40 41 42 43 44 45 46 47)
+
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=(00 10 01 11 02 12 03 13)
+        movdqa  xmm2, XMMWORD [wk(1)]   ; xmm2=(04 14 05 15 06 16 07 17)
+
+        movdqa    xmm4,xmm3             ; transpose coefficients(phase 1)
+        punpcklwd xmm3,xmm1             ; xmm3=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm4,xmm1             ; xmm4=(24 34 25 35 26 36 27 37)
+        movdqa    xmm6,xmm0             ; transpose coefficients(phase 1)
+        punpcklwd xmm0,xmm5             ; xmm0=(40 50 41 51 42 52 43 53)
+        punpckhwd xmm6,xmm5             ; xmm6=(44 54 45 55 46 56 47 57)
+
+        movdqa    xmm1,xmm7             ; transpose coefficients(phase 2)
+        punpckldq xmm7,xmm3             ; xmm7=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm1,xmm3             ; xmm1=(02 12 22 32 03 13 23 33)
+        movdqa    xmm5,xmm2             ; transpose coefficients(phase 2)
+        punpckldq xmm2,xmm4             ; xmm2=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm5,xmm4             ; xmm5=(06 16 26 36 07 17 27 37)
+
+        movdqa  xmm3, XMMWORD [wk(4)]   ; xmm3=(60 70 61 71 62 72 63 73)
+        movdqa  xmm4, XMMWORD [wk(5)]   ; xmm4=(64 74 65 75 66 76 67 77)
+
+        movdqa  XMMWORD [wk(6)], xmm2   ; wk(6)=(04 14 24 34 05 15 25 35)
+        movdqa  XMMWORD [wk(7)], xmm5   ; wk(7)=(06 16 26 36 07 17 27 37)
+
+        movdqa    xmm2,xmm0             ; transpose coefficients(phase 2)
+        punpckldq xmm0,xmm3             ; xmm0=(40 50 60 70 41 51 61 71)
+        punpckhdq xmm2,xmm3             ; xmm2=(42 52 62 72 43 53 63 73)
+        movdqa    xmm5,xmm6             ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm4             ; xmm6=(44 54 64 74 45 55 65 75)
+        punpckhdq xmm5,xmm4             ; xmm5=(46 56 66 76 47 57 67 77)
+
+        movdqa     xmm3,xmm7            ; transpose coefficients(phase 3)
+        punpcklqdq xmm7,xmm0            ; xmm7=col0=(00 10 20 30 40 50 60 70)
+        punpckhqdq xmm3,xmm0            ; xmm3=col1=(01 11 21 31 41 51 61 71)
+        movdqa     xmm4,xmm1            ; transpose coefficients(phase 3)
+        punpcklqdq xmm1,xmm2            ; xmm1=col2=(02 12 22 32 42 52 62 72)
+        punpckhqdq xmm4,xmm2            ; xmm4=col3=(03 13 23 33 43 53 63 73)
+
+        movdqa  xmm0, XMMWORD [wk(6)]   ; xmm0=(04 14 24 34 05 15 25 35)
+        movdqa  xmm2, XMMWORD [wk(7)]   ; xmm2=(06 16 26 36 07 17 27 37)
+
+        movdqa  XMMWORD [wk(8)], xmm3   ; wk(8)=col1
+        movdqa  XMMWORD [wk(9)], xmm4   ; wk(9)=col3
+
+        movdqa     xmm3,xmm0            ; transpose coefficients(phase 3)
+        punpcklqdq xmm0,xmm6            ; xmm0=col4=(04 14 24 34 44 54 64 74)
+        punpckhqdq xmm3,xmm6            ; xmm3=col5=(05 15 25 35 45 55 65 75)
+        movdqa     xmm4,xmm2            ; transpose coefficients(phase 3)
+        punpcklqdq xmm2,xmm5            ; xmm2=col6=(06 16 26 36 46 56 66 76)
+        punpckhqdq xmm4,xmm5            ; xmm4=col7=(07 17 27 37 47 57 67 77)
+
+        movdqa  XMMWORD [wk(10)], xmm3  ; wk(10)=col5
+        movdqa  XMMWORD [wk(11)], xmm4  ; wk(11)=col7
+.column_end:
+
+        ; -- Prefetch the next coefficient block
+
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+
+        ; ---- Pass 2: process rows from work array, store into output array.
+
+        mov     eax, [original_ebp]
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+
+        ; -- Even part
+
+        ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6
+
+        ; (Original)
+        ; z1 = (z2 + z3) * 0.541196100;
+        ; tmp2 = z1 + z3 * -1.847759065;
+        ; tmp3 = z1 + z2 * 0.765366865;
+        ;
+        ; (This implementation)
+        ; tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065);
+        ; tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100;
+
+        movdqa    xmm6,xmm1             ; xmm1=in2=z2
+        movdqa    xmm5,xmm1
+        punpcklwd xmm6,xmm2             ; xmm2=in6=z3
+        punpckhwd xmm5,xmm2
+        movdqa    xmm1,xmm6
+        movdqa    xmm2,xmm5
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_F130_F054)]       ; xmm6=tmp3L
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F130_F054)]       ; xmm5=tmp3H
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm1=tmp2L
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_F054_MF130)]      ; xmm2=tmp2H
+
+        movdqa    xmm3,xmm7
+        paddw     xmm7,xmm0             ; xmm7=in0+in4
+        psubw     xmm3,xmm0             ; xmm3=in0-in4
+
+        pxor      xmm4,xmm4
+        pxor      xmm0,xmm0
+        punpcklwd xmm4,xmm7             ; xmm4=tmp0L
+        punpckhwd xmm0,xmm7             ; xmm0=tmp0H
+        psrad     xmm4,(16-CONST_BITS)  ; psrad xmm4,16 & pslld xmm4,CONST_BITS
+        psrad     xmm0,(16-CONST_BITS)  ; psrad xmm0,16 & pslld xmm0,CONST_BITS
+
+        movdqa  xmm7,xmm4
+        paddd   xmm4,xmm6               ; xmm4=tmp10L
+        psubd   xmm7,xmm6               ; xmm7=tmp13L
+        movdqa  xmm6,xmm0
+        paddd   xmm0,xmm5               ; xmm0=tmp10H
+        psubd   xmm6,xmm5               ; xmm6=tmp13H
+
+        movdqa  XMMWORD [wk(0)], xmm4   ; wk(0)=tmp10L
+        movdqa  XMMWORD [wk(1)], xmm0   ; wk(1)=tmp10H
+        movdqa  XMMWORD [wk(2)], xmm7   ; wk(2)=tmp13L
+        movdqa  XMMWORD [wk(3)], xmm6   ; wk(3)=tmp13H
+
+        pxor      xmm5,xmm5
+        pxor      xmm4,xmm4
+        punpcklwd xmm5,xmm3             ; xmm5=tmp1L
+        punpckhwd xmm4,xmm3             ; xmm4=tmp1H
+        psrad     xmm5,(16-CONST_BITS)  ; psrad xmm5,16 & pslld xmm5,CONST_BITS
+        psrad     xmm4,(16-CONST_BITS)  ; psrad xmm4,16 & pslld xmm4,CONST_BITS
+
+        movdqa  xmm0,xmm5
+        paddd   xmm5,xmm1               ; xmm5=tmp11L
+        psubd   xmm0,xmm1               ; xmm0=tmp12L
+        movdqa  xmm7,xmm4
+        paddd   xmm4,xmm2               ; xmm4=tmp11H
+        psubd   xmm7,xmm2               ; xmm7=tmp12H
+
+        movdqa  XMMWORD [wk(4)], xmm5   ; wk(4)=tmp11L
+        movdqa  XMMWORD [wk(5)], xmm4   ; wk(5)=tmp11H
+        movdqa  XMMWORD [wk(6)], xmm0   ; wk(6)=tmp12L
+        movdqa  XMMWORD [wk(7)], xmm7   ; wk(7)=tmp12H
+
+        ; -- Odd part
+
+        movdqa  xmm6, XMMWORD [wk(9)]   ; xmm6=col3
+        movdqa  xmm3, XMMWORD [wk(8)]   ; xmm3=col1
+        movdqa  xmm1, XMMWORD [wk(11)]  ; xmm1=col7
+        movdqa  xmm2, XMMWORD [wk(10)]  ; xmm2=col5
+
+        movdqa  xmm5,xmm6
+        movdqa  xmm4,xmm3
+        paddw   xmm5,xmm1               ; xmm5=z3
+        paddw   xmm4,xmm2               ; xmm4=z4
+
+        ; (Original)
+        ; z5 = (z3 + z4) * 1.175875602;
+        ; z3 = z3 * -1.961570560;  z4 = z4 * -0.390180644;
+        ; z3 += z5;  z4 += z5;
+        ;
+        ; (This implementation)
+        ; z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602;
+        ; z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644);
+
+        movdqa    xmm0,xmm5
+        movdqa    xmm7,xmm5
+        punpcklwd xmm0,xmm4
+        punpckhwd xmm7,xmm4
+        movdqa    xmm5,xmm0
+        movdqa    xmm4,xmm7
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm0=z3L
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF078_F117)]      ; xmm7=z3H
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F117_F078)]       ; xmm5=z4L
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_F117_F078)]       ; xmm4=z4H
+
+        movdqa  XMMWORD [wk(10)], xmm0  ; wk(10)=z3L
+        movdqa  XMMWORD [wk(11)], xmm7  ; wk(11)=z3H
+
+        ; (Original)
+        ; z1 = tmp0 + tmp3;  z2 = tmp1 + tmp2;
+        ; tmp0 = tmp0 * 0.298631336;  tmp1 = tmp1 * 2.053119869;
+        ; tmp2 = tmp2 * 3.072711026;  tmp3 = tmp3 * 1.501321110;
+        ; z1 = z1 * -0.899976223;  z2 = z2 * -2.562915447;
+        ; tmp0 += z1 + z3;  tmp1 += z2 + z4;
+        ; tmp2 += z2 + z3;  tmp3 += z1 + z4;
+        ;
+        ; (This implementation)
+        ; tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223;
+        ; tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447;
+        ; tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447);
+        ; tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223);
+        ; tmp0 += z3;  tmp1 += z4;
+        ; tmp2 += z3;  tmp3 += z4;
+
+        movdqa    xmm0,xmm1
+        movdqa    xmm7,xmm1
+        punpcklwd xmm0,xmm3
+        punpckhwd xmm7,xmm3
+        movdqa    xmm1,xmm0
+        movdqa    xmm3,xmm7
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm0=tmp0L
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF060_MF089)]     ; xmm7=tmp0H
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm1=tmp3L
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_MF089_F060)]      ; xmm3=tmp3H
+
+        paddd   xmm0, XMMWORD [wk(10)]  ; xmm0=tmp0L
+        paddd   xmm7, XMMWORD [wk(11)]  ; xmm7=tmp0H
+        paddd   xmm1,xmm5               ; xmm1=tmp3L
+        paddd   xmm3,xmm4               ; xmm3=tmp3H
+
+        movdqa  XMMWORD [wk(8)], xmm0   ; wk(8)=tmp0L
+        movdqa  XMMWORD [wk(9)], xmm7   ; wk(9)=tmp0H
+
+        movdqa    xmm0,xmm2
+        movdqa    xmm7,xmm2
+        punpcklwd xmm0,xmm6
+        punpckhwd xmm7,xmm6
+        movdqa    xmm2,xmm0
+        movdqa    xmm6,xmm7
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm0=tmp1L
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF050_MF256)]     ; xmm7=tmp1H
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm2=tmp2L
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_MF256_F050)]      ; xmm6=tmp2H
+
+        paddd   xmm0,xmm5               ; xmm0=tmp1L
+        paddd   xmm7,xmm4               ; xmm7=tmp1H
+        paddd   xmm2, XMMWORD [wk(10)]  ; xmm2=tmp2L
+        paddd   xmm6, XMMWORD [wk(11)]  ; xmm6=tmp2H
+
+        movdqa  XMMWORD [wk(10)], xmm0  ; wk(10)=tmp1L
+        movdqa  XMMWORD [wk(11)], xmm7  ; wk(11)=tmp1H
+
+        ; -- Final output stage
+
+        movdqa  xmm5, XMMWORD [wk(0)]   ; xmm5=tmp10L
+        movdqa  xmm4, XMMWORD [wk(1)]   ; xmm4=tmp10H
+
+        movdqa  xmm0,xmm5
+        movdqa  xmm7,xmm4
+        paddd   xmm5,xmm1               ; xmm5=data0L
+        paddd   xmm4,xmm3               ; xmm4=data0H
+        psubd   xmm0,xmm1               ; xmm0=data7L
+        psubd   xmm7,xmm3               ; xmm7=data7H
+
+        movdqa  xmm1,[GOTOFF(ebx,PD_DESCALE_P2)]        ; xmm1=[PD_DESCALE_P2]
+
+        paddd   xmm5,xmm1
+        paddd   xmm4,xmm1
+        psrad   xmm5,DESCALE_P2
+        psrad   xmm4,DESCALE_P2
+        paddd   xmm0,xmm1
+        paddd   xmm7,xmm1
+        psrad   xmm0,DESCALE_P2
+        psrad   xmm7,DESCALE_P2
+
+        packssdw  xmm5,xmm4             ; xmm5=data0=(00 10 20 30 40 50 60 70)
+        packssdw  xmm0,xmm7             ; xmm0=data7=(07 17 27 37 47 57 67 77)
+
+        movdqa  xmm3, XMMWORD [wk(4)]   ; xmm3=tmp11L
+        movdqa  xmm1, XMMWORD [wk(5)]   ; xmm1=tmp11H
+
+        movdqa  xmm4,xmm3
+        movdqa  xmm7,xmm1
+        paddd   xmm3,xmm2               ; xmm3=data1L
+        paddd   xmm1,xmm6               ; xmm1=data1H
+        psubd   xmm4,xmm2               ; xmm4=data6L
+        psubd   xmm7,xmm6               ; xmm7=data6H
+
+        movdqa  xmm2,[GOTOFF(ebx,PD_DESCALE_P2)]        ; xmm2=[PD_DESCALE_P2]
+
+        paddd   xmm3,xmm2
+        paddd   xmm1,xmm2
+        psrad   xmm3,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+        paddd   xmm4,xmm2
+        paddd   xmm7,xmm2
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm7,DESCALE_P2
+
+        packssdw  xmm3,xmm1             ; xmm3=data1=(01 11 21 31 41 51 61 71)
+        packssdw  xmm4,xmm7             ; xmm4=data6=(06 16 26 36 46 56 66 76)
+
+        packsswb  xmm5,xmm4             ; xmm5=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        packsswb  xmm3,xmm0             ; xmm3=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+
+        movdqa  xmm6, XMMWORD [wk(6)]   ; xmm6=tmp12L
+        movdqa  xmm2, XMMWORD [wk(7)]   ; xmm2=tmp12H
+        movdqa  xmm1, XMMWORD [wk(10)]  ; xmm1=tmp1L
+        movdqa  xmm7, XMMWORD [wk(11)]  ; xmm7=tmp1H
+
+        movdqa  XMMWORD [wk(0)], xmm5   ; wk(0)=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+
+        movdqa  xmm4,xmm6
+        movdqa  xmm0,xmm2
+        paddd   xmm6,xmm1               ; xmm6=data2L
+        paddd   xmm2,xmm7               ; xmm2=data2H
+        psubd   xmm4,xmm1               ; xmm4=data5L
+        psubd   xmm0,xmm7               ; xmm0=data5H
+
+        movdqa  xmm5,[GOTOFF(ebx,PD_DESCALE_P2)]        ; xmm5=[PD_DESCALE_P2]
+
+        paddd   xmm6,xmm5
+        paddd   xmm2,xmm5
+        psrad   xmm6,DESCALE_P2
+        psrad   xmm2,DESCALE_P2
+        paddd   xmm4,xmm5
+        paddd   xmm0,xmm5
+        psrad   xmm4,DESCALE_P2
+        psrad   xmm0,DESCALE_P2
+
+        packssdw  xmm6,xmm2             ; xmm6=data2=(02 12 22 32 42 52 62 72)
+        packssdw  xmm4,xmm0             ; xmm4=data5=(05 15 25 35 45 55 65 75)
+
+        movdqa  xmm3, XMMWORD [wk(2)]   ; xmm3=tmp13L
+        movdqa  xmm1, XMMWORD [wk(3)]   ; xmm1=tmp13H
+        movdqa  xmm7, XMMWORD [wk(8)]   ; xmm7=tmp0L
+        movdqa  xmm5, XMMWORD [wk(9)]   ; xmm5=tmp0H
+
+        movdqa  xmm2,xmm3
+        movdqa  xmm0,xmm1
+        paddd   xmm3,xmm7               ; xmm3=data3L
+        paddd   xmm1,xmm5               ; xmm1=data3H
+        psubd   xmm2,xmm7               ; xmm2=data4L
+        psubd   xmm0,xmm5               ; xmm0=data4H
+
+        movdqa  xmm7,[GOTOFF(ebx,PD_DESCALE_P2)]        ; xmm7=[PD_DESCALE_P2]
+
+        paddd   xmm3,xmm7
+        paddd   xmm1,xmm7
+        psrad   xmm3,DESCALE_P2
+        psrad   xmm1,DESCALE_P2
+        paddd   xmm2,xmm7
+        paddd   xmm0,xmm7
+        psrad   xmm2,DESCALE_P2
+        psrad   xmm0,DESCALE_P2
+
+        movdqa    xmm5,[GOTOFF(ebx,PB_CENTERJSAMP)]     ; xmm5=[PB_CENTERJSAMP]
+
+        packssdw  xmm3,xmm1             ; xmm3=data3=(03 13 23 33 43 53 63 73)
+        packssdw  xmm2,xmm0             ; xmm2=data4=(04 14 24 34 44 54 64 74)
+
+        movdqa    xmm7, XMMWORD [wk(0)] ; xmm7=(00 10 20 30 40 50 60 70 06 16 26 36 46 56 66 76)
+        movdqa    xmm1, XMMWORD [wk(1)] ; xmm1=(01 11 21 31 41 51 61 71 07 17 27 37 47 57 67 77)
+
+        packsswb  xmm6,xmm2             ; xmm6=(02 12 22 32 42 52 62 72 04 14 24 34 44 54 64 74)
+        packsswb  xmm3,xmm4             ; xmm3=(03 13 23 33 43 53 63 73 05 15 25 35 45 55 65 75)
+
+        paddb     xmm7,xmm5
+        paddb     xmm1,xmm5
+        paddb     xmm6,xmm5
+        paddb     xmm3,xmm5
+
+        movdqa    xmm0,xmm7     ; transpose coefficients(phase 1)
+        punpcklbw xmm7,xmm1     ; xmm7=(00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71)
+        punpckhbw xmm0,xmm1     ; xmm0=(06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77)
+        movdqa    xmm2,xmm6     ; transpose coefficients(phase 1)
+        punpcklbw xmm6,xmm3     ; xmm6=(02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73)
+        punpckhbw xmm2,xmm3     ; xmm2=(04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75)
+
+        movdqa    xmm4,xmm7     ; transpose coefficients(phase 2)
+        punpcklwd xmm7,xmm6     ; xmm7=(00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33)
+        punpckhwd xmm4,xmm6     ; xmm4=(40 41 42 43 50 51 52 53 60 61 62 63 70 71 72 73)
+        movdqa    xmm5,xmm2     ; transpose coefficients(phase 2)
+        punpcklwd xmm2,xmm0     ; xmm2=(04 05 06 07 14 15 16 17 24 25 26 27 34 35 36 37)
+        punpckhwd xmm5,xmm0     ; xmm5=(44 45 46 47 54 55 56 57 64 65 66 67 74 75 76 77)
+
+        movdqa    xmm1,xmm7     ; transpose coefficients(phase 3)
+        punpckldq xmm7,xmm2     ; xmm7=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
+        punpckhdq xmm1,xmm2     ; xmm1=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
+        movdqa    xmm3,xmm4     ; transpose coefficients(phase 3)
+        punpckldq xmm4,xmm5     ; xmm4=(40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57)
+        punpckhdq xmm3,xmm5     ; xmm3=(60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77)
+
+        pshufd  xmm6,xmm7,0x4E  ; xmm6=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
+        pshufd  xmm0,xmm1,0x4E  ; xmm0=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
+        pshufd  xmm2,xmm4,0x4E  ; xmm2=(50 51 52 53 54 55 56 57 40 41 42 43 44 45 46 47)
+        pshufd  xmm5,xmm3,0x4E  ; xmm5=(70 71 72 73 74 75 76 77 60 61 62 63 64 65 66 67)
+
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm7
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm1
+        mov     edx, JSAMPROW [edi+4*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+6*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm4
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
+
+        mov     edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm0
+        mov     edx, JSAMPROW [edi+5*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW]
+        movq    XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm2
+        movq    XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm5
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctred-mmx.asm b/simd/jidctred-mmx.asm
new file mode 100644
index 0000000..1c93901
--- /dev/null
+++ b/simd/jidctred-mmx.asm
@@ -0,0 +1,706 @@
+;
+; jidctred.asm - reduced-size IDCT (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains inverse-DCT routines that produce reduced-size
+; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
+; The following code is based directly on the IJG's original jidctred.c;
+; see the jidctred.c for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      13
+%define PASS1_BITS      2
+
+%define DESCALE_P1_4    (CONST_BITS-PASS1_BITS+1)
+%define DESCALE_P2_4    (CONST_BITS+PASS1_BITS+3+1)
+%define DESCALE_P1_2    (CONST_BITS-PASS1_BITS+2)
+%define DESCALE_P2_2    (CONST_BITS+PASS1_BITS+3+2)
+
+%if CONST_BITS == 13
+F_0_211 equ      1730           ; FIX(0.211164243)
+F_0_509 equ      4176           ; FIX(0.509795579)
+F_0_601 equ      4926           ; FIX(0.601344887)
+F_0_720 equ      5906           ; FIX(0.720959822)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_850 equ      6967           ; FIX(0.850430095)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_061 equ      8697           ; FIX(1.061594337)
+F_1_272 equ     10426           ; FIX(1.272758580)
+F_1_451 equ     11893           ; FIX(1.451774981)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_2_172 equ     17799           ; FIX(2.172734803)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_624 equ     29692           ; FIX(3.624509785)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_211 equ     DESCALE( 226735879,30-CONST_BITS)       ; FIX(0.211164243)
+F_0_509 equ     DESCALE( 547388834,30-CONST_BITS)       ; FIX(0.509795579)
+F_0_601 equ     DESCALE( 645689155,30-CONST_BITS)       ; FIX(0.601344887)
+F_0_720 equ     DESCALE( 774124714,30-CONST_BITS)       ; FIX(0.720959822)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_850 equ     DESCALE( 913142361,30-CONST_BITS)       ; FIX(0.850430095)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_061 equ     DESCALE(1139878239,30-CONST_BITS)       ; FIX(1.061594337)
+F_1_272 equ     DESCALE(1366614119,30-CONST_BITS)       ; FIX(1.272758580)
+F_1_451 equ     DESCALE(1558831516,30-CONST_BITS)       ; FIX(1.451774981)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_172 equ     DESCALE(2332956230,30-CONST_BITS)       ; FIX(2.172734803)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_624 equ     DESCALE(3891787747,30-CONST_BITS)       ; FIX(3.624509785)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_idct_red_mmx)
+
+EXTN(jconst_idct_red_mmx):
+
+PW_F184_MF076   times 2 dw  F_1_847,-F_0_765
+PW_F256_F089    times 2 dw  F_2_562, F_0_899
+PW_F106_MF217   times 2 dw  F_1_061,-F_2_172
+PW_MF060_MF050  times 2 dw -F_0_601,-F_0_509
+PW_F145_MF021   times 2 dw  F_1_451,-F_0_211
+PW_F362_MF127   times 2 dw  F_3_624,-F_1_272
+PW_F085_MF072   times 2 dw  F_0_850,-F_0_720
+PD_DESCALE_P1_4 times 2 dd  1 << (DESCALE_P1_4-1)
+PD_DESCALE_P2_4 times 2 dd  1 << (DESCALE_P2_4-1)
+PD_DESCALE_P1_2 times 2 dd  1 << (DESCALE_P1_2-1)
+PD_DESCALE_P2_2 times 2 dd  1 << (DESCALE_P2_2-1)
+PB_CENTERJSAMP  times 8 db  CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 4x4 output block.
+;
+; GLOBAL(void)
+; jsimd_idct_4x4_mmx (void * dct_table, JCOEFPTR coef_block,
+;                     JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_MMWORD  ; mmword wk[WK_NUM]
+%define WK_NUM          2
+%define workspace       wk(0)-DCTSIZE2*SIZEOF_JCOEF
+                                        ; JCOEF workspace[DCTSIZE2]
+
+        align   16
+        global  EXTN(jsimd_idct_4x4_mmx)
+
+EXTN(jsimd_idct_4x4_mmx):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_MMWORD)      ; align to 64 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [workspace]
+        pushpic ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process columns from input, store into work array.
+
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+        lea     edi, [workspace]                        ; JCOEF * wsptr
+        mov     ecx, DCTSIZE/4                          ; ctr
+        alignx  16,7
+.columnloop:
+%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     short .columnDCT
+
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     mm1, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     mm0,mm1
+        packsswb mm0,mm0
+        movd    eax,mm0
+        test    eax,eax
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        psllw   mm0,PASS1_BITS
+
+        movq      mm2,mm0               ; mm0=in0=(00 01 02 03)
+        punpcklwd mm0,mm0               ; mm0=(00 00 01 01)
+        punpckhwd mm2,mm2               ; mm2=(02 02 03 03)
+
+        movq      mm1,mm0
+        punpckldq mm0,mm0               ; mm0=(00 00 00 00)
+        punpckhdq mm1,mm1               ; mm1=(01 01 01 01)
+        movq      mm3,mm2
+        punpckldq mm2,mm2               ; mm2=(02 02 02 02)
+        punpckhdq mm3,mm3               ; mm3=(03 03 03 03)
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
+        jmp     near .nextcolumn
+        alignx  16,7
+%endif
+.columnDCT:
+
+        ; -- Odd part
+
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        movq      mm4,mm0
+        movq      mm5,mm0
+        punpcklwd mm4,mm1
+        punpckhwd mm5,mm1
+        movq      mm0,mm4
+        movq      mm1,mm5
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F256_F089)]        ; mm4=(tmp2L)
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F256_F089)]        ; mm5=(tmp2H)
+        pmaddwd   mm0,[GOTOFF(ebx,PW_F106_MF217)]       ; mm0=(tmp0L)
+        pmaddwd   mm1,[GOTOFF(ebx,PW_F106_MF217)]       ; mm1=(tmp0H)
+
+        movq      mm6,mm2
+        movq      mm7,mm2
+        punpcklwd mm6,mm3
+        punpckhwd mm7,mm3
+        movq      mm2,mm6
+        movq      mm3,mm7
+        pmaddwd   mm6,[GOTOFF(ebx,PW_MF060_MF050)]      ; mm6=(tmp2L)
+        pmaddwd   mm7,[GOTOFF(ebx,PW_MF060_MF050)]      ; mm7=(tmp2H)
+        pmaddwd   mm2,[GOTOFF(ebx,PW_F145_MF021)]       ; mm2=(tmp0L)
+        pmaddwd   mm3,[GOTOFF(ebx,PW_F145_MF021)]       ; mm3=(tmp0H)
+
+        paddd   mm6,mm4                 ; mm6=tmp2L
+        paddd   mm7,mm5                 ; mm7=tmp2H
+        paddd   mm2,mm0                 ; mm2=tmp0L
+        paddd   mm3,mm1                 ; mm3=tmp0H
+
+        movq    MMWORD [wk(0)], mm2     ; wk(0)=tmp0L
+        movq    MMWORD [wk(1)], mm3     ; wk(1)=tmp0H
+
+        ; -- Even part
+
+        movq    mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq    mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm4, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm5, MMWORD [MMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm0, MMWORD [MMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        pxor      mm1,mm1
+        pxor      mm2,mm2
+        punpcklwd mm1,mm4               ; mm1=tmp0L
+        punpckhwd mm2,mm4               ; mm2=tmp0H
+        psrad     mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1
+        psrad     mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1
+
+        movq      mm3,mm5               ; mm5=in2=z2
+        punpcklwd mm5,mm0               ; mm0=in6=z3
+        punpckhwd mm3,mm0
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F184_MF076)]       ; mm5=tmp2L
+        pmaddwd   mm3,[GOTOFF(ebx,PW_F184_MF076)]       ; mm3=tmp2H
+
+        movq    mm4,mm1
+        movq    mm0,mm2
+        paddd   mm1,mm5                 ; mm1=tmp10L
+        paddd   mm2,mm3                 ; mm2=tmp10H
+        psubd   mm4,mm5                 ; mm4=tmp12L
+        psubd   mm0,mm3                 ; mm0=tmp12H
+
+        ; -- Final output stage
+
+        movq    mm5,mm1
+        movq    mm3,mm2
+        paddd   mm1,mm6                 ; mm1=data0L
+        paddd   mm2,mm7                 ; mm2=data0H
+        psubd   mm5,mm6                 ; mm5=data3L
+        psubd   mm3,mm7                 ; mm3=data3H
+
+        movq    mm6,[GOTOFF(ebx,PD_DESCALE_P1_4)]       ; mm6=[PD_DESCALE_P1_4]
+
+        paddd   mm1,mm6
+        paddd   mm2,mm6
+        psrad   mm1,DESCALE_P1_4
+        psrad   mm2,DESCALE_P1_4
+        paddd   mm5,mm6
+        paddd   mm3,mm6
+        psrad   mm5,DESCALE_P1_4
+        psrad   mm3,DESCALE_P1_4
+
+        packssdw  mm1,mm2               ; mm1=data0=(00 01 02 03)
+        packssdw  mm5,mm3               ; mm5=data3=(30 31 32 33)
+
+        movq    mm7, MMWORD [wk(0)]     ; mm7=tmp0L
+        movq    mm6, MMWORD [wk(1)]     ; mm6=tmp0H
+
+        movq    mm2,mm4
+        movq    mm3,mm0
+        paddd   mm4,mm7                 ; mm4=data1L
+        paddd   mm0,mm6                 ; mm0=data1H
+        psubd   mm2,mm7                 ; mm2=data2L
+        psubd   mm3,mm6                 ; mm3=data2H
+
+        movq    mm7,[GOTOFF(ebx,PD_DESCALE_P1_4)]       ; mm7=[PD_DESCALE_P1_4]
+
+        paddd   mm4,mm7
+        paddd   mm0,mm7
+        psrad   mm4,DESCALE_P1_4
+        psrad   mm0,DESCALE_P1_4
+        paddd   mm2,mm7
+        paddd   mm3,mm7
+        psrad   mm2,DESCALE_P1_4
+        psrad   mm3,DESCALE_P1_4
+
+        packssdw  mm4,mm0               ; mm4=data1=(10 11 12 13)
+        packssdw  mm2,mm3               ; mm2=data2=(20 21 22 23)
+
+        movq      mm6,mm1               ; transpose coefficients(phase 1)
+        punpcklwd mm1,mm4               ; mm1=(00 10 01 11)
+        punpckhwd mm6,mm4               ; mm6=(02 12 03 13)
+        movq      mm7,mm2               ; transpose coefficients(phase 1)
+        punpcklwd mm2,mm5               ; mm2=(20 30 21 31)
+        punpckhwd mm7,mm5               ; mm7=(22 32 23 33)
+
+        movq      mm0,mm1               ; transpose coefficients(phase 2)
+        punpckldq mm1,mm2               ; mm1=(00 10 20 30)
+        punpckhdq mm0,mm2               ; mm0=(01 11 21 31)
+        movq      mm3,mm6               ; transpose coefficients(phase 2)
+        punpckldq mm6,mm7               ; mm6=(02 12 22 32)
+        punpckhdq mm3,mm7               ; mm3=(03 13 23 33)
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm6
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
+
+.nextcolumn:
+        add     esi, byte 4*SIZEOF_JCOEF                ; coef_block
+        add     edx, byte 4*SIZEOF_ISLOW_MULT_TYPE      ; quantptr
+        add     edi, byte 4*DCTSIZE*SIZEOF_JCOEF        ; wsptr
+        dec     ecx                                     ; ctr
+        jnz     near .columnloop
+
+        ; ---- Pass 2: process rows from work array, store into output array.
+
+        mov     eax, [original_ebp]
+        lea     esi, [workspace]                        ; JCOEF * wsptr
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+
+        ; -- Odd part
+
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        movq    mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+
+        movq      mm4,mm0
+        movq      mm5,mm0
+        punpcklwd mm4,mm1
+        punpckhwd mm5,mm1
+        movq      mm0,mm4
+        movq      mm1,mm5
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F256_F089)]        ; mm4=(tmp2L)
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F256_F089)]        ; mm5=(tmp2H)
+        pmaddwd   mm0,[GOTOFF(ebx,PW_F106_MF217)]       ; mm0=(tmp0L)
+        pmaddwd   mm1,[GOTOFF(ebx,PW_F106_MF217)]       ; mm1=(tmp0H)
+
+        movq      mm6,mm2
+        movq      mm7,mm2
+        punpcklwd mm6,mm3
+        punpckhwd mm7,mm3
+        movq      mm2,mm6
+        movq      mm3,mm7
+        pmaddwd   mm6,[GOTOFF(ebx,PW_MF060_MF050)]      ; mm6=(tmp2L)
+        pmaddwd   mm7,[GOTOFF(ebx,PW_MF060_MF050)]      ; mm7=(tmp2H)
+        pmaddwd   mm2,[GOTOFF(ebx,PW_F145_MF021)]       ; mm2=(tmp0L)
+        pmaddwd   mm3,[GOTOFF(ebx,PW_F145_MF021)]       ; mm3=(tmp0H)
+
+        paddd   mm6,mm4                 ; mm6=tmp2L
+        paddd   mm7,mm5                 ; mm7=tmp2H
+        paddd   mm2,mm0                 ; mm2=tmp0L
+        paddd   mm3,mm1                 ; mm3=tmp0H
+
+        movq    MMWORD [wk(0)], mm2     ; wk(0)=tmp0L
+        movq    MMWORD [wk(1)], mm3     ; wk(1)=tmp0H
+
+        ; -- Even part
+
+        movq    mm4, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm5, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movq    mm0, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+
+        pxor      mm1,mm1
+        pxor      mm2,mm2
+        punpcklwd mm1,mm4               ; mm1=tmp0L
+        punpckhwd mm2,mm4               ; mm2=tmp0H
+        psrad     mm1,(16-CONST_BITS-1) ; psrad mm1,16 & pslld mm1,CONST_BITS+1
+        psrad     mm2,(16-CONST_BITS-1) ; psrad mm2,16 & pslld mm2,CONST_BITS+1
+
+        movq      mm3,mm5               ; mm5=in2=z2
+        punpcklwd mm5,mm0               ; mm0=in6=z3
+        punpckhwd mm3,mm0
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F184_MF076)]       ; mm5=tmp2L
+        pmaddwd   mm3,[GOTOFF(ebx,PW_F184_MF076)]       ; mm3=tmp2H
+
+        movq    mm4,mm1
+        movq    mm0,mm2
+        paddd   mm1,mm5                 ; mm1=tmp10L
+        paddd   mm2,mm3                 ; mm2=tmp10H
+        psubd   mm4,mm5                 ; mm4=tmp12L
+        psubd   mm0,mm3                 ; mm0=tmp12H
+
+        ; -- Final output stage
+
+        movq    mm5,mm1
+        movq    mm3,mm2
+        paddd   mm1,mm6                 ; mm1=data0L
+        paddd   mm2,mm7                 ; mm2=data0H
+        psubd   mm5,mm6                 ; mm5=data3L
+        psubd   mm3,mm7                 ; mm3=data3H
+
+        movq    mm6,[GOTOFF(ebx,PD_DESCALE_P2_4)]       ; mm6=[PD_DESCALE_P2_4]
+
+        paddd   mm1,mm6
+        paddd   mm2,mm6
+        psrad   mm1,DESCALE_P2_4
+        psrad   mm2,DESCALE_P2_4
+        paddd   mm5,mm6
+        paddd   mm3,mm6
+        psrad   mm5,DESCALE_P2_4
+        psrad   mm3,DESCALE_P2_4
+
+        packssdw  mm1,mm2               ; mm1=data0=(00 10 20 30)
+        packssdw  mm5,mm3               ; mm5=data3=(03 13 23 33)
+
+        movq    mm7, MMWORD [wk(0)]     ; mm7=tmp0L
+        movq    mm6, MMWORD [wk(1)]     ; mm6=tmp0H
+
+        movq    mm2,mm4
+        movq    mm3,mm0
+        paddd   mm4,mm7                 ; mm4=data1L
+        paddd   mm0,mm6                 ; mm0=data1H
+        psubd   mm2,mm7                 ; mm2=data2L
+        psubd   mm3,mm6                 ; mm3=data2H
+
+        movq    mm7,[GOTOFF(ebx,PD_DESCALE_P2_4)]       ; mm7=[PD_DESCALE_P2_4]
+
+        paddd   mm4,mm7
+        paddd   mm0,mm7
+        psrad   mm4,DESCALE_P2_4
+        psrad   mm0,DESCALE_P2_4
+        paddd   mm2,mm7
+        paddd   mm3,mm7
+        psrad   mm2,DESCALE_P2_4
+        psrad   mm3,DESCALE_P2_4
+
+        packssdw  mm4,mm0               ; mm4=data1=(01 11 21 31)
+        packssdw  mm2,mm3               ; mm2=data2=(02 12 22 32)
+
+        movq      mm6,[GOTOFF(ebx,PB_CENTERJSAMP)]      ; mm6=[PB_CENTERJSAMP]
+
+        packsswb  mm1,mm2               ; mm1=(00 10 20 30 02 12 22 32)
+        packsswb  mm4,mm5               ; mm4=(01 11 21 31 03 13 23 33)
+        paddb     mm1,mm6
+        paddb     mm4,mm6
+
+        movq      mm7,mm1               ; transpose coefficients(phase 1)
+        punpcklbw mm1,mm4               ; mm1=(00 01 10 11 20 21 30 31)
+        punpckhbw mm7,mm4               ; mm7=(02 03 12 13 22 23 32 33)
+
+        movq      mm0,mm1               ; transpose coefficients(phase 2)
+        punpcklwd mm1,mm7               ; mm1=(00 01 02 03 10 11 12 13)
+        punpckhwd mm0,mm7               ; mm0=(20 21 22 23 30 31 32 33)
+
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        movd    DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
+        movd    DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
+
+        psrlq   mm1,4*BYTE_BIT
+        psrlq   mm0,4*BYTE_BIT
+
+        mov     edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movd    DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
+        movd    DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
+
+        emms            ; empty MMX state
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+
+; --------------------------------------------------------------------------
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 2x2 output block.
+;
+; GLOBAL(void)
+; jsimd_idct_2x2_mmx (void * dct_table, JCOEFPTR coef_block,
+;                     JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
+
+        align   16
+        global  EXTN(jsimd_idct_2x2_mmx)
+
+EXTN(jsimd_idct_2x2_mmx):
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process columns from input.
+
+        mov     edx, POINTER [dct_table(ebp)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(ebp)]         ; inptr
+
+        ; | input:                  | result:        |
+        ; | 00 01 ** 03 ** 05 ** 07 |                |
+        ; | 10 11 ** 13 ** 15 ** 17 |                |
+        ; | ** ** ** ** ** ** ** ** |                |
+        ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 |
+        ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 |
+        ; | 50 51 ** 53 ** 55 ** 57 |                |
+        ; | ** ** ** ** ** ** ** ** |                |
+        ; | 70 71 ** 73 ** 75 ** 77 |                |
+
+        ; -- Odd part
+
+        movq    mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm2, MMWORD [MMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movq    mm3, MMWORD [MMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  mm2, MMWORD [MMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        ; mm0=(10 11 ** 13), mm1=(30 31 ** 33)
+        ; mm2=(50 51 ** 53), mm3=(70 71 ** 73)
+
+        pcmpeqd   mm7,mm7
+        pslld     mm7,WORD_BIT          ; mm7={0x0000 0xFFFF 0x0000 0xFFFF}
+
+        movq      mm4,mm0               ; mm4=(10 11 ** 13)
+        movq      mm5,mm2               ; mm5=(50 51 ** 53)
+        punpcklwd mm4,mm1               ; mm4=(10 30 11 31)
+        punpcklwd mm5,mm3               ; mm5=(50 70 51 71)
+        pmaddwd   mm4,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F085_MF072)]
+
+        psrld   mm0,WORD_BIT            ; mm0=(11 -- 13 --)
+        pand    mm1,mm7                 ; mm1=(-- 31 -- 33)
+        psrld   mm2,WORD_BIT            ; mm2=(51 -- 53 --)
+        pand    mm3,mm7                 ; mm3=(-- 71 -- 73)
+        por     mm0,mm1                 ; mm0=(11 31 13 33)
+        por     mm2,mm3                 ; mm2=(51 71 53 73)
+        pmaddwd mm0,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd mm2,[GOTOFF(ebx,PW_F085_MF072)]
+
+        paddd   mm4,mm5                 ; mm4=tmp0[col0 col1]
+
+        movq    mm6, MMWORD [MMBLOCK(1,1,esi,SIZEOF_JCOEF)]
+        movq    mm1, MMWORD [MMBLOCK(3,1,esi,SIZEOF_JCOEF)]
+        pmullw  mm6, MMWORD [MMBLOCK(1,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm1, MMWORD [MMBLOCK(3,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movq    mm3, MMWORD [MMBLOCK(5,1,esi,SIZEOF_JCOEF)]
+        movq    mm5, MMWORD [MMBLOCK(7,1,esi,SIZEOF_JCOEF)]
+        pmullw  mm3, MMWORD [MMBLOCK(5,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm5, MMWORD [MMBLOCK(7,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        ; mm6=(** 15 ** 17), mm1=(** 35 ** 37)
+        ; mm3=(** 55 ** 57), mm5=(** 75 ** 77)
+
+        psrld   mm6,WORD_BIT            ; mm6=(15 -- 17 --)
+        pand    mm1,mm7                 ; mm1=(-- 35 -- 37)
+        psrld   mm3,WORD_BIT            ; mm3=(55 -- 57 --)
+        pand    mm5,mm7                 ; mm5=(-- 75 -- 77)
+        por     mm6,mm1                 ; mm6=(15 35 17 37)
+        por     mm3,mm5                 ; mm3=(55 75 57 77)
+        pmaddwd mm6,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd mm3,[GOTOFF(ebx,PW_F085_MF072)]
+
+        paddd   mm0,mm2                 ; mm0=tmp0[col1 col3]
+        paddd   mm6,mm3                 ; mm6=tmp0[col5 col7]
+
+        ; -- Even part
+
+        movq    mm1, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movq    mm5, MMWORD [MMBLOCK(0,1,esi,SIZEOF_JCOEF)]
+        pmullw  mm1, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  mm5, MMWORD [MMBLOCK(0,1,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        ; mm1=(00 01 ** 03), mm5=(** 05 ** 07)
+
+        movq    mm2,mm1                         ; mm2=(00 01 ** 03)
+        pslld   mm1,WORD_BIT                    ; mm1=(-- 00 -- **)
+        psrad   mm1,(WORD_BIT-CONST_BITS-2)     ; mm1=tmp10[col0 ****]
+
+        pand    mm2,mm7                         ; mm2=(-- 01 -- 03)
+        pand    mm5,mm7                         ; mm5=(-- 05 -- 07)
+        psrad   mm2,(WORD_BIT-CONST_BITS-2)     ; mm2=tmp10[col1 col3]
+        psrad   mm5,(WORD_BIT-CONST_BITS-2)     ; mm5=tmp10[col5 col7]
+
+        ; -- Final output stage
+
+        movq      mm3,mm1
+        paddd     mm1,mm4               ; mm1=data0[col0 ****]=(A0 **)
+        psubd     mm3,mm4               ; mm3=data1[col0 ****]=(B0 **)
+        punpckldq mm1,mm3               ; mm1=(A0 B0)
+
+        movq    mm7,[GOTOFF(ebx,PD_DESCALE_P1_2)]       ; mm7=[PD_DESCALE_P1_2]
+
+        movq    mm4,mm2
+        movq    mm3,mm5
+        paddd   mm2,mm0                 ; mm2=data0[col1 col3]=(A1 A3)
+        paddd   mm5,mm6                 ; mm5=data0[col5 col7]=(A5 A7)
+        psubd   mm4,mm0                 ; mm4=data1[col1 col3]=(B1 B3)
+        psubd   mm3,mm6                 ; mm3=data1[col5 col7]=(B5 B7)
+
+        paddd   mm1,mm7
+        psrad   mm1,DESCALE_P1_2
+
+        paddd   mm2,mm7
+        paddd   mm5,mm7
+        psrad   mm2,DESCALE_P1_2
+        psrad   mm5,DESCALE_P1_2
+        paddd   mm4,mm7
+        paddd   mm3,mm7
+        psrad   mm4,DESCALE_P1_2
+        psrad   mm3,DESCALE_P1_2
+
+        ; ---- Pass 2: process rows, store into output array.
+
+        mov     edi, JSAMPARRAY [output_buf(ebp)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(ebp)]
+
+        ; | input:| result:|
+        ; | A0 B0 |        |
+        ; | A1 B1 | C0 C1  |
+        ; | A3 B3 | D0 D1  |
+        ; | A5 B5 |        |
+        ; | A7 B7 |        |
+
+        ; -- Odd part
+
+        packssdw  mm2,mm4               ; mm2=(A1 A3 B1 B3)
+        packssdw  mm5,mm3               ; mm5=(A5 A7 B5 B7)
+        pmaddwd   mm2,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd   mm5,[GOTOFF(ebx,PW_F085_MF072)]
+
+        paddd     mm2,mm5               ; mm2=tmp0[row0 row1]
+
+        ; -- Even part
+
+        pslld     mm1,(CONST_BITS+2)    ; mm1=tmp10[row0 row1]
+
+        ; -- Final output stage
+
+        movq      mm0,[GOTOFF(ebx,PD_DESCALE_P2_2)]     ; mm0=[PD_DESCALE_P2_2]
+
+        movq      mm6,mm1
+        paddd     mm1,mm2               ; mm1=data0[row0 row1]=(C0 C1)
+        psubd     mm6,mm2               ; mm6=data1[row0 row1]=(D0 D1)
+
+        paddd     mm1,mm0
+        paddd     mm6,mm0
+        psrad     mm1,DESCALE_P2_2
+        psrad     mm6,DESCALE_P2_2
+
+        movq      mm7,mm1               ; transpose coefficients
+        punpckldq mm1,mm6               ; mm1=(C0 D0)
+        punpckhdq mm7,mm6               ; mm7=(C1 D1)
+
+        packssdw  mm1,mm7               ; mm1=(C0 D0 C1 D1)
+        packsswb  mm1,mm1               ; mm1=(C0 D0 C1 D1 C0 D0 C1 D1)
+        paddb     mm1,[GOTOFF(ebx,PB_CENTERJSAMP)]
+
+        movd    ecx,mm1
+        movd    ebx,mm1                 ; ebx=(C0 D0 C1 D1)
+        shr     ecx,2*BYTE_BIT          ; ecx=(C1 D1 -- --)
+
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        mov     WORD [edx+eax*SIZEOF_JSAMPLE], bx
+        mov     WORD [esi+eax*SIZEOF_JSAMPLE], cx
+
+        emms            ; empty MMX state
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctred-sse2-64.asm b/simd/jidctred-sse2-64.asm
new file mode 100644
index 0000000..dad43d9
--- /dev/null
+++ b/simd/jidctred-sse2-64.asm
@@ -0,0 +1,576 @@
+;
+; jidctred.asm - reduced-size IDCT (64-bit SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains inverse-DCT routines that produce reduced-size
+; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
+; The following code is based directly on the IJG's original jidctred.c;
+; see the jidctred.c for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      13
+%define PASS1_BITS      2
+
+%define DESCALE_P1_4    (CONST_BITS-PASS1_BITS+1)
+%define DESCALE_P2_4    (CONST_BITS+PASS1_BITS+3+1)
+%define DESCALE_P1_2    (CONST_BITS-PASS1_BITS+2)
+%define DESCALE_P2_2    (CONST_BITS+PASS1_BITS+3+2)
+
+%if CONST_BITS == 13
+F_0_211 equ      1730           ; FIX(0.211164243)
+F_0_509 equ      4176           ; FIX(0.509795579)
+F_0_601 equ      4926           ; FIX(0.601344887)
+F_0_720 equ      5906           ; FIX(0.720959822)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_850 equ      6967           ; FIX(0.850430095)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_061 equ      8697           ; FIX(1.061594337)
+F_1_272 equ     10426           ; FIX(1.272758580)
+F_1_451 equ     11893           ; FIX(1.451774981)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_2_172 equ     17799           ; FIX(2.172734803)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_624 equ     29692           ; FIX(3.624509785)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_211 equ     DESCALE( 226735879,30-CONST_BITS)       ; FIX(0.211164243)
+F_0_509 equ     DESCALE( 547388834,30-CONST_BITS)       ; FIX(0.509795579)
+F_0_601 equ     DESCALE( 645689155,30-CONST_BITS)       ; FIX(0.601344887)
+F_0_720 equ     DESCALE( 774124714,30-CONST_BITS)       ; FIX(0.720959822)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_850 equ     DESCALE( 913142361,30-CONST_BITS)       ; FIX(0.850430095)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_061 equ     DESCALE(1139878239,30-CONST_BITS)       ; FIX(1.061594337)
+F_1_272 equ     DESCALE(1366614119,30-CONST_BITS)       ; FIX(1.272758580)
+F_1_451 equ     DESCALE(1558831516,30-CONST_BITS)       ; FIX(1.451774981)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_172 equ     DESCALE(2332956230,30-CONST_BITS)       ; FIX(2.172734803)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_624 equ     DESCALE(3891787747,30-CONST_BITS)       ; FIX(3.624509785)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_idct_red_sse2)
+
+EXTN(jconst_idct_red_sse2):
+
+PW_F184_MF076   times 4 dw  F_1_847,-F_0_765
+PW_F256_F089    times 4 dw  F_2_562, F_0_899
+PW_F106_MF217   times 4 dw  F_1_061,-F_2_172
+PW_MF060_MF050  times 4 dw -F_0_601,-F_0_509
+PW_F145_MF021   times 4 dw  F_1_451,-F_0_211
+PW_F362_MF127   times 4 dw  F_3_624,-F_1_272
+PW_F085_MF072   times 4 dw  F_0_850,-F_0_720
+PD_DESCALE_P1_4 times 4 dd  1 << (DESCALE_P1_4-1)
+PD_DESCALE_P2_4 times 4 dd  1 << (DESCALE_P2_4-1)
+PD_DESCALE_P1_2 times 4 dd  1 << (DESCALE_P1_2-1)
+PD_DESCALE_P2_2 times 4 dd  1 << (DESCALE_P2_2-1)
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 4x4 output block.
+;
+; GLOBAL(void)
+; jsimd_idct_4x4_sse2 (void * dct_table, JCOEFPTR coef_block,
+;                      JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+; r10 = void * dct_table
+; r11 = JCOEFPTR coef_block
+; r12 = JSAMPARRAY output_buf
+; r13 = JDIMENSION output_col
+
+%define original_rbp    rbp+0
+%define wk(i)           rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+
+        align   16
+        global  EXTN(jsimd_idct_4x4_sse2)
+
+EXTN(jsimd_idct_4x4_sse2):
+        push    rbp
+        mov     rax,rsp                         ; rax = original rbp
+        sub     rsp, byte 4
+        and     rsp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [rsp],rax
+        mov     rbp,rsp                         ; rbp = aligned rbp
+        lea     rsp, [wk(0)]
+        collect_args
+
+        ; ---- Pass 1: process columns from input.
+
+        mov     rdx, r10                ; quantptr
+        mov     rsi, r11                ; inptr
+
+%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
+        mov     eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        jnz     short .columnDCT
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        por     xmm0,xmm1
+        packsswb xmm0,xmm0
+        packsswb xmm0,xmm0
+        movd    eax,xmm0
+        test    rax,rax
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        psllw   xmm0,PASS1_BITS
+
+        movdqa    xmm3,xmm0     ; xmm0=in0=(00 01 02 03 04 05 06 07)
+        punpcklwd xmm0,xmm0     ; xmm0=(00 00 01 01 02 02 03 03)
+        punpckhwd xmm3,xmm3     ; xmm3=(04 04 05 05 06 06 07 07)
+
+        pshufd  xmm1,xmm0,0x50  ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01)
+        pshufd  xmm0,xmm0,0xFA  ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03)
+        pshufd  xmm6,xmm3,0x50  ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05)
+        pshufd  xmm3,xmm3,0xFA  ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
+
+        jmp     near .column_end
+%endif
+.columnDCT:
+
+        ; -- Odd part
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        movdqa    xmm4,xmm0
+        movdqa    xmm5,xmm0
+        punpcklwd xmm4,xmm1
+        punpckhwd xmm5,xmm1
+        movdqa    xmm0,xmm4
+        movdqa    xmm1,xmm5
+        pmaddwd   xmm4,[rel PW_F256_F089]       ; xmm4=(tmp2L)
+        pmaddwd   xmm5,[rel PW_F256_F089]       ; xmm5=(tmp2H)
+        pmaddwd   xmm0,[rel PW_F106_MF217]      ; xmm0=(tmp0L)
+        pmaddwd   xmm1,[rel PW_F106_MF217]      ; xmm1=(tmp0H)
+
+        movdqa    xmm6,xmm2
+        movdqa    xmm7,xmm2
+        punpcklwd xmm6,xmm3
+        punpckhwd xmm7,xmm3
+        movdqa    xmm2,xmm6
+        movdqa    xmm3,xmm7
+        pmaddwd   xmm6,[rel PW_MF060_MF050]     ; xmm6=(tmp2L)
+        pmaddwd   xmm7,[rel PW_MF060_MF050]     ; xmm7=(tmp2H)
+        pmaddwd   xmm2,[rel PW_F145_MF021]      ; xmm2=(tmp0L)
+        pmaddwd   xmm3,[rel PW_F145_MF021]      ; xmm3=(tmp0H)
+
+        paddd   xmm6,xmm4               ; xmm6=tmp2L
+        paddd   xmm7,xmm5               ; xmm7=tmp2H
+        paddd   xmm2,xmm0               ; xmm2=tmp0L
+        paddd   xmm3,xmm1               ; xmm3=tmp0H
+
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=tmp0L
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=tmp0H
+
+        ; -- Even part
+
+        movdqa  xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(6,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm4, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm5, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(6,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        pxor      xmm1,xmm1
+        pxor      xmm2,xmm2
+        punpcklwd xmm1,xmm4             ; xmm1=tmp0L
+        punpckhwd xmm2,xmm4             ; xmm2=tmp0H
+        psrad     xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1
+        psrad     xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1
+
+        movdqa    xmm3,xmm5             ; xmm5=in2=z2
+        punpcklwd xmm5,xmm0             ; xmm0=in6=z3
+        punpckhwd xmm3,xmm0
+        pmaddwd   xmm5,[rel PW_F184_MF076]      ; xmm5=tmp2L
+        pmaddwd   xmm3,[rel PW_F184_MF076]      ; xmm3=tmp2H
+
+        movdqa  xmm4,xmm1
+        movdqa  xmm0,xmm2
+        paddd   xmm1,xmm5               ; xmm1=tmp10L
+        paddd   xmm2,xmm3               ; xmm2=tmp10H
+        psubd   xmm4,xmm5               ; xmm4=tmp12L
+        psubd   xmm0,xmm3               ; xmm0=tmp12H
+
+        ; -- Final output stage
+
+        movdqa  xmm5,xmm1
+        movdqa  xmm3,xmm2
+        paddd   xmm1,xmm6               ; xmm1=data0L
+        paddd   xmm2,xmm7               ; xmm2=data0H
+        psubd   xmm5,xmm6               ; xmm5=data3L
+        psubd   xmm3,xmm7               ; xmm3=data3H
+
+        movdqa  xmm6,[rel PD_DESCALE_P1_4]      ; xmm6=[rel PD_DESCALE_P1_4]
+
+        paddd   xmm1,xmm6
+        paddd   xmm2,xmm6
+        psrad   xmm1,DESCALE_P1_4
+        psrad   xmm2,DESCALE_P1_4
+        paddd   xmm5,xmm6
+        paddd   xmm3,xmm6
+        psrad   xmm5,DESCALE_P1_4
+        psrad   xmm3,DESCALE_P1_4
+
+        packssdw  xmm1,xmm2             ; xmm1=data0=(00 01 02 03 04 05 06 07)
+        packssdw  xmm5,xmm3             ; xmm5=data3=(30 31 32 33 34 35 36 37)
+
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp0L
+        movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=tmp0H
+
+        movdqa  xmm2,xmm4
+        movdqa  xmm3,xmm0
+        paddd   xmm4,xmm7               ; xmm4=data1L
+        paddd   xmm0,xmm6               ; xmm0=data1H
+        psubd   xmm2,xmm7               ; xmm2=data2L
+        psubd   xmm3,xmm6               ; xmm3=data2H
+
+        movdqa  xmm7,[rel PD_DESCALE_P1_4]      ; xmm7=[rel PD_DESCALE_P1_4]
+
+        paddd   xmm4,xmm7
+        paddd   xmm0,xmm7
+        psrad   xmm4,DESCALE_P1_4
+        psrad   xmm0,DESCALE_P1_4
+        paddd   xmm2,xmm7
+        paddd   xmm3,xmm7
+        psrad   xmm2,DESCALE_P1_4
+        psrad   xmm3,DESCALE_P1_4
+
+        packssdw  xmm4,xmm0             ; xmm4=data1=(10 11 12 13 14 15 16 17)
+        packssdw  xmm2,xmm3             ; xmm2=data2=(20 21 22 23 24 25 26 27)
+
+        movdqa    xmm6,xmm1     ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm4     ; xmm1=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm6,xmm4     ; xmm6=(04 14 05 15 06 16 07 17)
+        movdqa    xmm7,xmm2     ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm5     ; xmm2=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm7,xmm5     ; xmm7=(24 34 25 35 26 36 27 37)
+
+        movdqa    xmm0,xmm1     ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm2     ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm0,xmm2     ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33)
+        movdqa    xmm3,xmm6     ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm7     ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm3,xmm7     ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37)
+.column_end:
+
+        ; -- Prefetch the next coefficient block
+
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+
+        ; ---- Pass 2: process rows, store into output array.
+
+        mov     rax, [original_rbp]
+        mov     rdi, r12        ; (JSAMPROW *)
+        mov     rax, r13
+
+        ; -- Even part
+
+        pxor      xmm4,xmm4
+        punpcklwd xmm4,xmm1             ; xmm4=tmp0
+        psrad     xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1
+
+        ; -- Odd part
+
+        punpckhwd xmm1,xmm0
+        punpckhwd xmm6,xmm3
+        movdqa    xmm5,xmm1
+        movdqa    xmm2,xmm6
+        pmaddwd   xmm1,[rel PW_F256_F089]       ; xmm1=(tmp2)
+        pmaddwd   xmm6,[rel PW_MF060_MF050]     ; xmm6=(tmp2)
+        pmaddwd   xmm5,[rel PW_F106_MF217]      ; xmm5=(tmp0)
+        pmaddwd   xmm2,[rel PW_F145_MF021]      ; xmm2=(tmp0)
+
+        paddd     xmm6,xmm1             ; xmm6=tmp2
+        paddd     xmm2,xmm5             ; xmm2=tmp0
+
+        ; -- Even part
+
+        punpcklwd xmm0,xmm3
+        pmaddwd   xmm0,[rel PW_F184_MF076]      ; xmm0=tmp2
+
+        movdqa    xmm7,xmm4
+        paddd     xmm4,xmm0             ; xmm4=tmp10
+        psubd     xmm7,xmm0             ; xmm7=tmp12
+
+        ; -- Final output stage
+
+        movdqa  xmm1,[rel PD_DESCALE_P2_4]      ; xmm1=[rel PD_DESCALE_P2_4]
+
+        movdqa  xmm5,xmm4
+        movdqa  xmm3,xmm7
+        paddd   xmm4,xmm6               ; xmm4=data0=(00 10 20 30)
+        paddd   xmm7,xmm2               ; xmm7=data1=(01 11 21 31)
+        psubd   xmm5,xmm6               ; xmm5=data3=(03 13 23 33)
+        psubd   xmm3,xmm2               ; xmm3=data2=(02 12 22 32)
+
+        paddd   xmm4,xmm1
+        paddd   xmm7,xmm1
+        psrad   xmm4,DESCALE_P2_4
+        psrad   xmm7,DESCALE_P2_4
+        paddd   xmm5,xmm1
+        paddd   xmm3,xmm1
+        psrad   xmm5,DESCALE_P2_4
+        psrad   xmm3,DESCALE_P2_4
+
+        packssdw  xmm4,xmm3             ; xmm4=(00 10 20 30 02 12 22 32)
+        packssdw  xmm7,xmm5             ; xmm7=(01 11 21 31 03 13 23 33)
+
+        movdqa    xmm0,xmm4             ; transpose coefficients(phase 1)
+        punpcklwd xmm4,xmm7             ; xmm4=(00 01 10 11 20 21 30 31)
+        punpckhwd xmm0,xmm7             ; xmm0=(02 03 12 13 22 23 32 33)
+
+        movdqa    xmm6,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm0             ; xmm4=(00 01 02 03 10 11 12 13)
+        punpckhdq xmm6,xmm0             ; xmm6=(20 21 22 23 30 31 32 33)
+
+        packsswb  xmm4,xmm6             ; xmm4=(00 01 02 03 10 11 12 13 20 ..)
+        paddb     xmm4,[rel PB_CENTERJSAMP]
+
+        pshufd    xmm2,xmm4,0x39        ; xmm2=(10 11 12 13 20 21 22 23 30 ..)
+        pshufd    xmm1,xmm4,0x4E        ; xmm1=(20 21 22 23 30 31 32 33 00 ..)
+        pshufd    xmm3,xmm4,0x93        ; xmm3=(30 31 32 33 00 01 02 03 10 ..)
+
+        mov     rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+        movd    XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
+        movd    XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
+        mov     rdx, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
+        movd    XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
+        movd    XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
+
+        uncollect_args
+        mov     rsp,rbp         ; rsp <- aligned rbp
+        pop     rsp             ; rsp <- original rbp
+        pop     rbp
+        ret
+
+
+; --------------------------------------------------------------------------
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 2x2 output block.
+;
+; GLOBAL(void)
+; jsimd_idct_2x2_sse2 (void * dct_table, JCOEFPTR coef_block,
+;                      JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+; r10 = void * dct_table
+; r11 = JCOEFPTR coef_block
+; r12 = JSAMPARRAY output_buf
+; r13 = JDIMENSION output_col
+
+        align   16
+        global  EXTN(jsimd_idct_2x2_sse2)
+
+EXTN(jsimd_idct_2x2_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+        push    rbx
+
+        ; ---- Pass 1: process columns from input.
+
+        mov     rdx, r10                ; quantptr
+        mov     rsi, r11                ; inptr
+
+        ; | input:                  | result:        |
+        ; | 00 01 ** 03 ** 05 ** 07 |                |
+        ; | 10 11 ** 13 ** 15 ** 17 |                |
+        ; | ** ** ** ** ** ** ** ** |                |
+        ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 |
+        ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 |
+        ; | 50 51 ** 53 ** 55 ** 57 |                |
+        ; | ** ** ** ** ** ** ** ** |                |
+        ; | 70 71 ** 73 ** 75 ** 77 |                |
+
+        ; -- Odd part
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(5,0,rsi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37)
+        ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77)
+
+        pcmpeqd   xmm7,xmm7
+        pslld     xmm7,WORD_BIT         ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..}
+
+        movdqa    xmm4,xmm0             ; xmm4=(10 11 ** 13 ** 15 ** 17)
+        movdqa    xmm5,xmm2             ; xmm5=(50 51 ** 53 ** 55 ** 57)
+        punpcklwd xmm4,xmm1             ; xmm4=(10 30 11 31 ** ** 13 33)
+        punpcklwd xmm5,xmm3             ; xmm5=(50 70 51 71 ** ** 53 73)
+        pmaddwd   xmm4,[rel PW_F362_MF127]
+        pmaddwd   xmm5,[rel PW_F085_MF072]
+
+        psrld   xmm0,WORD_BIT           ; xmm0=(11 -- 13 -- 15 -- 17 --)
+        pand    xmm1,xmm7               ; xmm1=(-- 31 -- 33 -- 35 -- 37)
+        psrld   xmm2,WORD_BIT           ; xmm2=(51 -- 53 -- 55 -- 57 --)
+        pand    xmm3,xmm7               ; xmm3=(-- 71 -- 73 -- 75 -- 77)
+        por     xmm0,xmm1               ; xmm0=(11 31 13 33 15 35 17 37)
+        por     xmm2,xmm3               ; xmm2=(51 71 53 73 55 75 57 77)
+        pmaddwd xmm0,[rel PW_F362_MF127]
+        pmaddwd xmm2,[rel PW_F085_MF072]
+
+        paddd   xmm4,xmm5               ; xmm4=tmp0[col0 col1 **** col3]
+        paddd   xmm0,xmm2               ; xmm0=tmp0[col1 col3 col5 col7]
+
+        ; -- Even part
+
+        movdqa  xmm6, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_JCOEF)]
+        pmullw  xmm6, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        ; xmm6=(00 01 ** 03 ** 05 ** 07)
+
+        movdqa  xmm1,xmm6               ; xmm1=(00 01 ** 03 ** 05 ** 07)
+        pslld   xmm6,WORD_BIT           ; xmm6=(-- 00 -- ** -- ** -- **)
+        pand    xmm1,xmm7               ; xmm1=(-- 01 -- 03 -- 05 -- 07)
+        psrad   xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****]
+        psrad   xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7]
+
+        ; -- Final output stage
+
+        movdqa  xmm3,xmm6
+        movdqa  xmm5,xmm1
+        paddd   xmm6,xmm4       ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **)
+        paddd   xmm1,xmm0       ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7)
+        psubd   xmm3,xmm4       ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **)
+        psubd   xmm5,xmm0       ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7)
+
+        movdqa  xmm2,[rel PD_DESCALE_P1_2]      ; xmm2=[rel PD_DESCALE_P1_2]
+
+        punpckldq  xmm6,xmm3            ; xmm6=(A0 B0 ** **)
+
+        movdqa     xmm7,xmm1
+        punpcklqdq xmm1,xmm5            ; xmm1=(A1 A3 B1 B3)
+        punpckhqdq xmm7,xmm5            ; xmm7=(A5 A7 B5 B7)
+
+        paddd   xmm6,xmm2
+        psrad   xmm6,DESCALE_P1_2
+
+        paddd   xmm1,xmm2
+        paddd   xmm7,xmm2
+        psrad   xmm1,DESCALE_P1_2
+        psrad   xmm7,DESCALE_P1_2
+
+        ; -- Prefetch the next coefficient block
+
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+
+        ; ---- Pass 2: process rows, store into output array.
+
+        mov     rdi, r12        ; (JSAMPROW *)
+        mov     rax, r13
+
+        ; | input:| result:|
+        ; | A0 B0 |        |
+        ; | A1 B1 | C0 C1  |
+        ; | A3 B3 | D0 D1  |
+        ; | A5 B5 |        |
+        ; | A7 B7 |        |
+
+        ; -- Odd part
+
+        packssdw  xmm1,xmm1             ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3)
+        packssdw  xmm7,xmm7             ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7)
+        pmaddwd   xmm1,[rel PW_F362_MF127]
+        pmaddwd   xmm7,[rel PW_F085_MF072]
+
+        paddd     xmm1,xmm7             ; xmm1=tmp0[row0 row1 row0 row1]
+
+        ; -- Even part
+
+        pslld     xmm6,(CONST_BITS+2)   ; xmm6=tmp10[row0 row1 **** ****]
+
+        ; -- Final output stage
+
+        movdqa    xmm4,xmm6
+        paddd     xmm6,xmm1     ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **)
+        psubd     xmm4,xmm1     ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **)
+
+        punpckldq xmm6,xmm4     ; xmm6=(C0 D0 C1 D1)
+
+        paddd     xmm6,[rel PD_DESCALE_P2_2]
+        psrad     xmm6,DESCALE_P2_2
+
+        packssdw  xmm6,xmm6             ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1)
+        packsswb  xmm6,xmm6             ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..)
+        paddb     xmm6,[rel PB_CENTERJSAMP]
+
+        pextrw  ebx,xmm6,0x00           ; ebx=(C0 D0 -- --)
+        pextrw  ecx,xmm6,0x01           ; ecx=(C1 D1 -- --)
+
+        mov     rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
+        mov     rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
+        mov     WORD [rdx+rax*SIZEOF_JSAMPLE], bx
+        mov     WORD [rsi+rax*SIZEOF_JSAMPLE], cx
+
+        pop     rbx
+        uncollect_args
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jidctred-sse2.asm b/simd/jidctred-sse2.asm
new file mode 100644
index 0000000..06dade8
--- /dev/null
+++ b/simd/jidctred-sse2.asm
@@ -0,0 +1,594 @@
+;
+; jidctred.asm - reduced-size IDCT (SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; This file contains inverse-DCT routines that produce reduced-size
+; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
+; The following code is based directly on the IJG's original jidctred.c;
+; see the jidctred.c for more details.
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+
+%define CONST_BITS      13
+%define PASS1_BITS      2
+
+%define DESCALE_P1_4    (CONST_BITS-PASS1_BITS+1)
+%define DESCALE_P2_4    (CONST_BITS+PASS1_BITS+3+1)
+%define DESCALE_P1_2    (CONST_BITS-PASS1_BITS+2)
+%define DESCALE_P2_2    (CONST_BITS+PASS1_BITS+3+2)
+
+%if CONST_BITS == 13
+F_0_211 equ      1730           ; FIX(0.211164243)
+F_0_509 equ      4176           ; FIX(0.509795579)
+F_0_601 equ      4926           ; FIX(0.601344887)
+F_0_720 equ      5906           ; FIX(0.720959822)
+F_0_765 equ      6270           ; FIX(0.765366865)
+F_0_850 equ      6967           ; FIX(0.850430095)
+F_0_899 equ      7373           ; FIX(0.899976223)
+F_1_061 equ      8697           ; FIX(1.061594337)
+F_1_272 equ     10426           ; FIX(1.272758580)
+F_1_451 equ     11893           ; FIX(1.451774981)
+F_1_847 equ     15137           ; FIX(1.847759065)
+F_2_172 equ     17799           ; FIX(2.172734803)
+F_2_562 equ     20995           ; FIX(2.562915447)
+F_3_624 equ     29692           ; FIX(3.624509785)
+%else
+; NASM cannot do compile-time arithmetic on floating-point constants.
+%define DESCALE(x,n)  (((x)+(1<<((n)-1)))>>(n))
+F_0_211 equ     DESCALE( 226735879,30-CONST_BITS)       ; FIX(0.211164243)
+F_0_509 equ     DESCALE( 547388834,30-CONST_BITS)       ; FIX(0.509795579)
+F_0_601 equ     DESCALE( 645689155,30-CONST_BITS)       ; FIX(0.601344887)
+F_0_720 equ     DESCALE( 774124714,30-CONST_BITS)       ; FIX(0.720959822)
+F_0_765 equ     DESCALE( 821806413,30-CONST_BITS)       ; FIX(0.765366865)
+F_0_850 equ     DESCALE( 913142361,30-CONST_BITS)       ; FIX(0.850430095)
+F_0_899 equ     DESCALE( 966342111,30-CONST_BITS)       ; FIX(0.899976223)
+F_1_061 equ     DESCALE(1139878239,30-CONST_BITS)       ; FIX(1.061594337)
+F_1_272 equ     DESCALE(1366614119,30-CONST_BITS)       ; FIX(1.272758580)
+F_1_451 equ     DESCALE(1558831516,30-CONST_BITS)       ; FIX(1.451774981)
+F_1_847 equ     DESCALE(1984016188,30-CONST_BITS)       ; FIX(1.847759065)
+F_2_172 equ     DESCALE(2332956230,30-CONST_BITS)       ; FIX(2.172734803)
+F_2_562 equ     DESCALE(2751909506,30-CONST_BITS)       ; FIX(2.562915447)
+F_3_624 equ     DESCALE(3891787747,30-CONST_BITS)       ; FIX(3.624509785)
+%endif
+
+; --------------------------------------------------------------------------
+        SECTION SEG_CONST
+
+        alignz  16
+        global  EXTN(jconst_idct_red_sse2)
+
+EXTN(jconst_idct_red_sse2):
+
+PW_F184_MF076   times 4 dw  F_1_847,-F_0_765
+PW_F256_F089    times 4 dw  F_2_562, F_0_899
+PW_F106_MF217   times 4 dw  F_1_061,-F_2_172
+PW_MF060_MF050  times 4 dw -F_0_601,-F_0_509
+PW_F145_MF021   times 4 dw  F_1_451,-F_0_211
+PW_F362_MF127   times 4 dw  F_3_624,-F_1_272
+PW_F085_MF072   times 4 dw  F_0_850,-F_0_720
+PD_DESCALE_P1_4 times 4 dd  1 << (DESCALE_P1_4-1)
+PD_DESCALE_P2_4 times 4 dd  1 << (DESCALE_P2_4-1)
+PD_DESCALE_P1_2 times 4 dd  1 << (DESCALE_P1_2-1)
+PD_DESCALE_P2_2 times 4 dd  1 << (DESCALE_P2_2-1)
+PB_CENTERJSAMP  times 16 db CENTERJSAMPLE
+
+        alignz  16
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 4x4 output block.
+;
+; GLOBAL(void)
+; jsimd_idct_4x4_sse2 (void * dct_table, JCOEFPTR coef_block,
+;                      JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
+
+%define original_ebp    ebp+0
+%define wk(i)           ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
+%define WK_NUM          2
+
+        align   16
+        global  EXTN(jsimd_idct_4x4_sse2)
+
+EXTN(jsimd_idct_4x4_sse2):
+        push    ebp
+        mov     eax,esp                         ; eax = original ebp
+        sub     esp, byte 4
+        and     esp, byte (-SIZEOF_XMMWORD)     ; align to 128 bits
+        mov     [esp],eax
+        mov     ebp,esp                         ; ebp = aligned ebp
+        lea     esp, [wk(0)]
+        pushpic ebx
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process columns from input.
+
+;       mov     eax, [original_ebp]
+        mov     edx, POINTER [dct_table(eax)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(eax)]         ; inptr
+
+%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
+        mov     eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        or      eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        jnz     short .columnDCT
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        por     xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        por     xmm1, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        por     xmm0,xmm1
+        packsswb xmm0,xmm0
+        packsswb xmm0,xmm0
+        movd    eax,xmm0
+        test    eax,eax
+        jnz     short .columnDCT
+
+        ; -- AC terms all zero
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        psllw   xmm0,PASS1_BITS
+
+        movdqa    xmm3,xmm0     ; xmm0=in0=(00 01 02 03 04 05 06 07)
+        punpcklwd xmm0,xmm0     ; xmm0=(00 00 01 01 02 02 03 03)
+        punpckhwd xmm3,xmm3     ; xmm3=(04 04 05 05 06 06 07 07)
+
+        pshufd  xmm1,xmm0,0x50  ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01)
+        pshufd  xmm0,xmm0,0xFA  ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03)
+        pshufd  xmm6,xmm3,0x50  ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05)
+        pshufd  xmm3,xmm3,0xFA  ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
+
+        jmp     near .column_end
+        alignx  16,7
+%endif
+.columnDCT:
+
+        ; -- Odd part
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        movdqa    xmm4,xmm0
+        movdqa    xmm5,xmm0
+        punpcklwd xmm4,xmm1
+        punpckhwd xmm5,xmm1
+        movdqa    xmm0,xmm4
+        movdqa    xmm1,xmm5
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_F256_F089)]       ; xmm4=(tmp2L)
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F256_F089)]       ; xmm5=(tmp2H)
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_F106_MF217)]      ; xmm0=(tmp0L)
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_F106_MF217)]      ; xmm1=(tmp0H)
+
+        movdqa    xmm6,xmm2
+        movdqa    xmm7,xmm2
+        punpcklwd xmm6,xmm3
+        punpckhwd xmm7,xmm3
+        movdqa    xmm2,xmm6
+        movdqa    xmm3,xmm7
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_MF060_MF050)]     ; xmm6=(tmp2L)
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_MF060_MF050)]     ; xmm7=(tmp2H)
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_F145_MF021)]      ; xmm2=(tmp0L)
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_F145_MF021)]      ; xmm3=(tmp0H)
+
+        paddd   xmm6,xmm4               ; xmm6=tmp2L
+        paddd   xmm7,xmm5               ; xmm7=tmp2H
+        paddd   xmm2,xmm0               ; xmm2=tmp0L
+        paddd   xmm3,xmm1               ; xmm3=tmp0H
+
+        movdqa  XMMWORD [wk(0)], xmm2   ; wk(0)=tmp0L
+        movdqa  XMMWORD [wk(1)], xmm3   ; wk(1)=tmp0H
+
+        ; -- Even part
+
+        movdqa  xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm0, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm4, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm5, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        pxor      xmm1,xmm1
+        pxor      xmm2,xmm2
+        punpcklwd xmm1,xmm4             ; xmm1=tmp0L
+        punpckhwd xmm2,xmm4             ; xmm2=tmp0H
+        psrad     xmm1,(16-CONST_BITS-1) ; psrad xmm1,16 & pslld xmm1,CONST_BITS+1
+        psrad     xmm2,(16-CONST_BITS-1) ; psrad xmm2,16 & pslld xmm2,CONST_BITS+1
+
+        movdqa    xmm3,xmm5             ; xmm5=in2=z2
+        punpcklwd xmm5,xmm0             ; xmm0=in6=z3
+        punpckhwd xmm3,xmm0
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F184_MF076)]      ; xmm5=tmp2L
+        pmaddwd   xmm3,[GOTOFF(ebx,PW_F184_MF076)]      ; xmm3=tmp2H
+
+        movdqa  xmm4,xmm1
+        movdqa  xmm0,xmm2
+        paddd   xmm1,xmm5               ; xmm1=tmp10L
+        paddd   xmm2,xmm3               ; xmm2=tmp10H
+        psubd   xmm4,xmm5               ; xmm4=tmp12L
+        psubd   xmm0,xmm3               ; xmm0=tmp12H
+
+        ; -- Final output stage
+
+        movdqa  xmm5,xmm1
+        movdqa  xmm3,xmm2
+        paddd   xmm1,xmm6               ; xmm1=data0L
+        paddd   xmm2,xmm7               ; xmm2=data0H
+        psubd   xmm5,xmm6               ; xmm5=data3L
+        psubd   xmm3,xmm7               ; xmm3=data3H
+
+        movdqa  xmm6,[GOTOFF(ebx,PD_DESCALE_P1_4)]      ; xmm6=[PD_DESCALE_P1_4]
+
+        paddd   xmm1,xmm6
+        paddd   xmm2,xmm6
+        psrad   xmm1,DESCALE_P1_4
+        psrad   xmm2,DESCALE_P1_4
+        paddd   xmm5,xmm6
+        paddd   xmm3,xmm6
+        psrad   xmm5,DESCALE_P1_4
+        psrad   xmm3,DESCALE_P1_4
+
+        packssdw  xmm1,xmm2             ; xmm1=data0=(00 01 02 03 04 05 06 07)
+        packssdw  xmm5,xmm3             ; xmm5=data3=(30 31 32 33 34 35 36 37)
+
+        movdqa  xmm7, XMMWORD [wk(0)]   ; xmm7=tmp0L
+        movdqa  xmm6, XMMWORD [wk(1)]   ; xmm6=tmp0H
+
+        movdqa  xmm2,xmm4
+        movdqa  xmm3,xmm0
+        paddd   xmm4,xmm7               ; xmm4=data1L
+        paddd   xmm0,xmm6               ; xmm0=data1H
+        psubd   xmm2,xmm7               ; xmm2=data2L
+        psubd   xmm3,xmm6               ; xmm3=data2H
+
+        movdqa  xmm7,[GOTOFF(ebx,PD_DESCALE_P1_4)]      ; xmm7=[PD_DESCALE_P1_4]
+
+        paddd   xmm4,xmm7
+        paddd   xmm0,xmm7
+        psrad   xmm4,DESCALE_P1_4
+        psrad   xmm0,DESCALE_P1_4
+        paddd   xmm2,xmm7
+        paddd   xmm3,xmm7
+        psrad   xmm2,DESCALE_P1_4
+        psrad   xmm3,DESCALE_P1_4
+
+        packssdw  xmm4,xmm0             ; xmm4=data1=(10 11 12 13 14 15 16 17)
+        packssdw  xmm2,xmm3             ; xmm2=data2=(20 21 22 23 24 25 26 27)
+
+        movdqa    xmm6,xmm1     ; transpose coefficients(phase 1)
+        punpcklwd xmm1,xmm4     ; xmm1=(00 10 01 11 02 12 03 13)
+        punpckhwd xmm6,xmm4     ; xmm6=(04 14 05 15 06 16 07 17)
+        movdqa    xmm7,xmm2     ; transpose coefficients(phase 1)
+        punpcklwd xmm2,xmm5     ; xmm2=(20 30 21 31 22 32 23 33)
+        punpckhwd xmm7,xmm5     ; xmm7=(24 34 25 35 26 36 27 37)
+
+        movdqa    xmm0,xmm1     ; transpose coefficients(phase 2)
+        punpckldq xmm1,xmm2     ; xmm1=[col0 col1]=(00 10 20 30 01 11 21 31)
+        punpckhdq xmm0,xmm2     ; xmm0=[col2 col3]=(02 12 22 32 03 13 23 33)
+        movdqa    xmm3,xmm6     ; transpose coefficients(phase 2)
+        punpckldq xmm6,xmm7     ; xmm6=[col4 col5]=(04 14 24 34 05 15 25 35)
+        punpckhdq xmm3,xmm7     ; xmm3=[col6 col7]=(06 16 26 36 07 17 27 37)
+.column_end:
+
+        ; -- Prefetch the next coefficient block
+
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+
+        ; ---- Pass 2: process rows, store into output array.
+
+        mov     eax, [original_ebp]
+        mov     edi, JSAMPARRAY [output_buf(eax)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(eax)]
+
+        ; -- Even part
+
+        pxor      xmm4,xmm4
+        punpcklwd xmm4,xmm1             ; xmm4=tmp0
+        psrad     xmm4,(16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1
+
+        ; -- Odd part
+
+        punpckhwd xmm1,xmm0
+        punpckhwd xmm6,xmm3
+        movdqa    xmm5,xmm1
+        movdqa    xmm2,xmm6
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_F256_F089)]       ; xmm1=(tmp2)
+        pmaddwd   xmm6,[GOTOFF(ebx,PW_MF060_MF050)]     ; xmm6=(tmp2)
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F106_MF217)]      ; xmm5=(tmp0)
+        pmaddwd   xmm2,[GOTOFF(ebx,PW_F145_MF021)]      ; xmm2=(tmp0)
+
+        paddd     xmm6,xmm1             ; xmm6=tmp2
+        paddd     xmm2,xmm5             ; xmm2=tmp0
+
+        ; -- Even part
+
+        punpcklwd xmm0,xmm3
+        pmaddwd   xmm0,[GOTOFF(ebx,PW_F184_MF076)]      ; xmm0=tmp2
+
+        movdqa    xmm7,xmm4
+        paddd     xmm4,xmm0             ; xmm4=tmp10
+        psubd     xmm7,xmm0             ; xmm7=tmp12
+
+        ; -- Final output stage
+
+        movdqa  xmm1,[GOTOFF(ebx,PD_DESCALE_P2_4)]      ; xmm1=[PD_DESCALE_P2_4]
+
+        movdqa  xmm5,xmm4
+        movdqa  xmm3,xmm7
+        paddd   xmm4,xmm6               ; xmm4=data0=(00 10 20 30)
+        paddd   xmm7,xmm2               ; xmm7=data1=(01 11 21 31)
+        psubd   xmm5,xmm6               ; xmm5=data3=(03 13 23 33)
+        psubd   xmm3,xmm2               ; xmm3=data2=(02 12 22 32)
+
+        paddd   xmm4,xmm1
+        paddd   xmm7,xmm1
+        psrad   xmm4,DESCALE_P2_4
+        psrad   xmm7,DESCALE_P2_4
+        paddd   xmm5,xmm1
+        paddd   xmm3,xmm1
+        psrad   xmm5,DESCALE_P2_4
+        psrad   xmm3,DESCALE_P2_4
+
+        packssdw  xmm4,xmm3             ; xmm4=(00 10 20 30 02 12 22 32)
+        packssdw  xmm7,xmm5             ; xmm7=(01 11 21 31 03 13 23 33)
+
+        movdqa    xmm0,xmm4             ; transpose coefficients(phase 1)
+        punpcklwd xmm4,xmm7             ; xmm4=(00 01 10 11 20 21 30 31)
+        punpckhwd xmm0,xmm7             ; xmm0=(02 03 12 13 22 23 32 33)
+
+        movdqa    xmm6,xmm4             ; transpose coefficients(phase 2)
+        punpckldq xmm4,xmm0             ; xmm4=(00 01 02 03 10 11 12 13)
+        punpckhdq xmm6,xmm0             ; xmm6=(20 21 22 23 30 31 32 33)
+
+        packsswb  xmm4,xmm6             ; xmm4=(00 01 02 03 10 11 12 13 20 ..)
+        paddb     xmm4,[GOTOFF(ebx,PB_CENTERJSAMP)]
+
+        pshufd    xmm2,xmm4,0x39        ; xmm2=(10 11 12 13 20 21 22 23 30 ..)
+        pshufd    xmm1,xmm4,0x4E        ; xmm1=(20 21 22 23 30 31 32 33 00 ..)
+        pshufd    xmm3,xmm4,0x93        ; xmm3=(30 31 32 33 00 01 02 03 10 ..)
+
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        movd    XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm4
+        movd    XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm2
+        mov     edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
+        movd    XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm1
+        movd    XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+        poppic  ebx
+        mov     esp,ebp         ; esp <- aligned ebp
+        pop     esp             ; esp <- original ebp
+        pop     ebp
+        ret
+
+
+; --------------------------------------------------------------------------
+;
+; Perform dequantization and inverse DCT on one block of coefficients,
+; producing a reduced-size 2x2 output block.
+;
+; GLOBAL(void)
+; jsimd_idct_2x2_sse2 (void * dct_table, JCOEFPTR coef_block,
+;                      JSAMPARRAY output_buf, JDIMENSION output_col)
+;
+
+%define dct_table(b)    (b)+8           ; void * dct_table
+%define coef_block(b)   (b)+12          ; JCOEFPTR coef_block
+%define output_buf(b)   (b)+16          ; JSAMPARRAY output_buf
+%define output_col(b)   (b)+20          ; JDIMENSION output_col
+
+        align   16
+        global  EXTN(jsimd_idct_2x2_sse2)
+
+EXTN(jsimd_idct_2x2_sse2):
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        get_GOT ebx             ; get GOT address
+
+        ; ---- Pass 1: process columns from input.
+
+        mov     edx, POINTER [dct_table(ebp)]           ; quantptr
+        mov     esi, JCOEFPTR [coef_block(ebp)]         ; inptr
+
+        ; | input:                  | result:        |
+        ; | 00 01 ** 03 ** 05 ** 07 |                |
+        ; | 10 11 ** 13 ** 15 ** 17 |                |
+        ; | ** ** ** ** ** ** ** ** |                |
+        ; | 30 31 ** 33 ** 35 ** 37 | A0 A1 A3 A5 A7 |
+        ; | ** ** ** ** ** ** ** ** | B0 B1 B3 B5 B7 |
+        ; | 50 51 ** 53 ** 55 ** 57 |                |
+        ; | ** ** ** ** ** ** ** ** |                |
+        ; | 70 71 ** 73 ** 75 ** 77 |                |
+
+        ; -- Odd part
+
+        movdqa  xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        movdqa  xmm2, XMMWORD [XMMBLOCK(5,0,esi,SIZEOF_JCOEF)]
+        movdqa  xmm3, XMMWORD [XMMBLOCK(7,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm2, XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+        pmullw  xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        ; xmm0=(10 11 ** 13 ** 15 ** 17), xmm1=(30 31 ** 33 ** 35 ** 37)
+        ; xmm2=(50 51 ** 53 ** 55 ** 57), xmm3=(70 71 ** 73 ** 75 ** 77)
+
+        pcmpeqd   xmm7,xmm7
+        pslld     xmm7,WORD_BIT         ; xmm7={0x0000 0xFFFF 0x0000 0xFFFF ..}
+
+        movdqa    xmm4,xmm0             ; xmm4=(10 11 ** 13 ** 15 ** 17)
+        movdqa    xmm5,xmm2             ; xmm5=(50 51 ** 53 ** 55 ** 57)
+        punpcklwd xmm4,xmm1             ; xmm4=(10 30 11 31 ** ** 13 33)
+        punpcklwd xmm5,xmm3             ; xmm5=(50 70 51 71 ** ** 53 73)
+        pmaddwd   xmm4,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd   xmm5,[GOTOFF(ebx,PW_F085_MF072)]
+
+        psrld   xmm0,WORD_BIT           ; xmm0=(11 -- 13 -- 15 -- 17 --)
+        pand    xmm1,xmm7               ; xmm1=(-- 31 -- 33 -- 35 -- 37)
+        psrld   xmm2,WORD_BIT           ; xmm2=(51 -- 53 -- 55 -- 57 --)
+        pand    xmm3,xmm7               ; xmm3=(-- 71 -- 73 -- 75 -- 77)
+        por     xmm0,xmm1               ; xmm0=(11 31 13 33 15 35 17 37)
+        por     xmm2,xmm3               ; xmm2=(51 71 53 73 55 75 57 77)
+        pmaddwd xmm0,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd xmm2,[GOTOFF(ebx,PW_F085_MF072)]
+
+        paddd   xmm4,xmm5               ; xmm4=tmp0[col0 col1 **** col3]
+        paddd   xmm0,xmm2               ; xmm0=tmp0[col1 col3 col5 col7]
+
+        ; -- Even part
+
+        movdqa  xmm6, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
+        pmullw  xmm6, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
+
+        ; xmm6=(00 01 ** 03 ** 05 ** 07)
+
+        movdqa  xmm1,xmm6               ; xmm1=(00 01 ** 03 ** 05 ** 07)
+        pslld   xmm6,WORD_BIT           ; xmm6=(-- 00 -- ** -- ** -- **)
+        pand    xmm1,xmm7               ; xmm1=(-- 01 -- 03 -- 05 -- 07)
+        psrad   xmm6,(WORD_BIT-CONST_BITS-2) ; xmm6=tmp10[col0 **** **** ****]
+        psrad   xmm1,(WORD_BIT-CONST_BITS-2) ; xmm1=tmp10[col1 col3 col5 col7]
+
+        ; -- Final output stage
+
+        movdqa  xmm3,xmm6
+        movdqa  xmm5,xmm1
+        paddd   xmm6,xmm4       ; xmm6=data0[col0 **** **** ****]=(A0 ** ** **)
+        paddd   xmm1,xmm0       ; xmm1=data0[col1 col3 col5 col7]=(A1 A3 A5 A7)
+        psubd   xmm3,xmm4       ; xmm3=data1[col0 **** **** ****]=(B0 ** ** **)
+        psubd   xmm5,xmm0       ; xmm5=data1[col1 col3 col5 col7]=(B1 B3 B5 B7)
+
+        movdqa  xmm2,[GOTOFF(ebx,PD_DESCALE_P1_2)]      ; xmm2=[PD_DESCALE_P1_2]
+
+        punpckldq  xmm6,xmm3            ; xmm6=(A0 B0 ** **)
+
+        movdqa     xmm7,xmm1
+        punpcklqdq xmm1,xmm5            ; xmm1=(A1 A3 B1 B3)
+        punpckhqdq xmm7,xmm5            ; xmm7=(A5 A7 B5 B7)
+
+        paddd   xmm6,xmm2
+        psrad   xmm6,DESCALE_P1_2
+
+        paddd   xmm1,xmm2
+        paddd   xmm7,xmm2
+        psrad   xmm1,DESCALE_P1_2
+        psrad   xmm7,DESCALE_P1_2
+
+        ; -- Prefetch the next coefficient block
+
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
+        prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
+
+        ; ---- Pass 2: process rows, store into output array.
+
+        mov     edi, JSAMPARRAY [output_buf(ebp)]       ; (JSAMPROW *)
+        mov     eax, JDIMENSION [output_col(ebp)]
+
+        ; | input:| result:|
+        ; | A0 B0 |        |
+        ; | A1 B1 | C0 C1  |
+        ; | A3 B3 | D0 D1  |
+        ; | A5 B5 |        |
+        ; | A7 B7 |        |
+
+        ; -- Odd part
+
+        packssdw  xmm1,xmm1             ; xmm1=(A1 A3 B1 B3 A1 A3 B1 B3)
+        packssdw  xmm7,xmm7             ; xmm7=(A5 A7 B5 B7 A5 A7 B5 B7)
+        pmaddwd   xmm1,[GOTOFF(ebx,PW_F362_MF127)]
+        pmaddwd   xmm7,[GOTOFF(ebx,PW_F085_MF072)]
+
+        paddd     xmm1,xmm7             ; xmm1=tmp0[row0 row1 row0 row1]
+
+        ; -- Even part
+
+        pslld     xmm6,(CONST_BITS+2)   ; xmm6=tmp10[row0 row1 **** ****]
+
+        ; -- Final output stage
+
+        movdqa    xmm4,xmm6
+        paddd     xmm6,xmm1     ; xmm6=data0[row0 row1 **** ****]=(C0 C1 ** **)
+        psubd     xmm4,xmm1     ; xmm4=data1[row0 row1 **** ****]=(D0 D1 ** **)
+
+        punpckldq xmm6,xmm4     ; xmm6=(C0 D0 C1 D1)
+
+        paddd     xmm6,[GOTOFF(ebx,PD_DESCALE_P2_2)]
+        psrad     xmm6,DESCALE_P2_2
+
+        packssdw  xmm6,xmm6             ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1)
+        packsswb  xmm6,xmm6             ; xmm6=(C0 D0 C1 D1 C0 D0 C1 D1 ..)
+        paddb     xmm6,[GOTOFF(ebx,PB_CENTERJSAMP)]
+
+        pextrw  ebx,xmm6,0x00           ; ebx=(C0 D0 -- --)
+        pextrw  ecx,xmm6,0x01           ; ecx=(C1 D1 -- --)
+
+        mov     edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
+        mov     esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
+        mov     WORD [edx+eax*SIZEOF_JSAMPLE], bx
+        mov     WORD [esi+eax*SIZEOF_JSAMPLE], cx
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jquant-3dn.asm b/simd/jquant-3dn.asm
new file mode 100644
index 0000000..76e19f7
--- /dev/null
+++ b/simd/jquant-3dn.asm
@@ -0,0 +1,233 @@
+;
+; jquant.asm - sample data conversion and quantization (3DNow! & MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jsimd_convsamp_float_3dnow (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                             FAST_FLOAT * workspace);
+;
+
+%define sample_data     ebp+8           ; JSAMPARRAY sample_data
+%define start_col       ebp+12          ; JDIMENSION start_col
+%define workspace       ebp+16          ; FAST_FLOAT * workspace
+
+        align   16
+        global  EXTN(jsimd_convsamp_float_3dnow)
+
+EXTN(jsimd_convsamp_float_3dnow):
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        pcmpeqw  mm7,mm7
+        psllw    mm7,7
+        packsswb mm7,mm7                ; mm7 = PB_CENTERJSAMPLE (0x808080..)
+
+        mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+        mov     eax, JDIMENSION [start_col]
+        mov     edi, POINTER [workspace]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/2
+        alignx  16,7
+.convloop:
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+        movq    mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]
+        movq    mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]
+
+        psubb   mm0,mm7                         ; mm0=(01234567)
+        psubb   mm1,mm7                         ; mm1=(89ABCDEF)
+
+        punpcklbw mm2,mm0                       ; mm2=(*0*1*2*3)
+        punpckhbw mm0,mm0                       ; mm0=(*4*5*6*7)
+        punpcklbw mm3,mm1                       ; mm3=(*8*9*A*B)
+        punpckhbw mm1,mm1                       ; mm1=(*C*D*E*F)
+
+        punpcklwd mm4,mm2                       ; mm4=(***0***1)
+        punpckhwd mm2,mm2                       ; mm2=(***2***3)
+        punpcklwd mm5,mm0                       ; mm5=(***4***5)
+        punpckhwd mm0,mm0                       ; mm0=(***6***7)
+
+        psrad   mm4,(DWORD_BIT-BYTE_BIT)        ; mm4=(01)
+        psrad   mm2,(DWORD_BIT-BYTE_BIT)        ; mm2=(23)
+        pi2fd   mm4,mm4
+        pi2fd   mm2,mm2
+        psrad   mm5,(DWORD_BIT-BYTE_BIT)        ; mm5=(45)
+        psrad   mm0,(DWORD_BIT-BYTE_BIT)        ; mm0=(67)
+        pi2fd   mm5,mm5
+        pi2fd   mm0,mm0
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], mm4
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm2
+        movq    MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm5
+        movq    MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
+
+        punpcklwd mm6,mm3                       ; mm6=(***8***9)
+        punpckhwd mm3,mm3                       ; mm3=(***A***B)
+        punpcklwd mm4,mm1                       ; mm4=(***C***D)
+        punpckhwd mm1,mm1                       ; mm1=(***E***F)
+
+        psrad   mm6,(DWORD_BIT-BYTE_BIT)        ; mm6=(89)
+        psrad   mm3,(DWORD_BIT-BYTE_BIT)        ; mm3=(AB)
+        pi2fd   mm6,mm6
+        pi2fd   mm3,mm3
+        psrad   mm4,(DWORD_BIT-BYTE_BIT)        ; mm4=(CD)
+        psrad   mm1,(DWORD_BIT-BYTE_BIT)        ; mm1=(EF)
+        pi2fd   mm4,mm4
+        pi2fd   mm1,mm1
+
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm6
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm3
+        movq    MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm4
+        movq    MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
+
+        add     esi, byte 2*SIZEOF_JSAMPROW
+        add     edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     near .convloop
+
+        femms           ; empty MMX/3DNow! state
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
+
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; GLOBAL(void)
+; jsimd_quantize_float_3dnow (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+;                             FAST_FLOAT * workspace);
+;
+
+%define coef_block      ebp+8           ; JCOEFPTR coef_block
+%define divisors        ebp+12          ; FAST_FLOAT * divisors
+%define workspace       ebp+16          ; FAST_FLOAT * workspace
+
+        align   16
+        global  EXTN(jsimd_quantize_float_3dnow)
+
+EXTN(jsimd_quantize_float_3dnow):
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov       eax, 0x4B400000       ; (float)0x00C00000 (rndint_magic)
+        movd      mm7,eax
+        punpckldq mm7,mm7               ; mm7={12582912.0F 12582912.0F}
+
+        mov     esi, POINTER [workspace]
+        mov     edx, POINTER [divisors]
+        mov     edi, JCOEFPTR [coef_block]
+        mov     eax, DCTSIZE2/16
+        alignx  16,7
+.quantloop:
+        movq    mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
+        pfmul   mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        pfmul   mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)]
+        pfmul   mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
+        pfmul   mm3, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
+
+        pfadd   mm0,mm7                 ; mm0=(00 ** 01 **)
+        pfadd   mm1,mm7                 ; mm1=(02 ** 03 **)
+        pfadd   mm2,mm7                 ; mm0=(04 ** 05 **)
+        pfadd   mm3,mm7                 ; mm1=(06 ** 07 **)
+
+        movq      mm4,mm0
+        punpcklwd mm0,mm1               ; mm0=(00 02 ** **)
+        punpckhwd mm4,mm1               ; mm4=(01 03 ** **)
+        movq      mm5,mm2
+        punpcklwd mm2,mm3               ; mm2=(04 06 ** **)
+        punpckhwd mm5,mm3               ; mm5=(05 07 ** **)
+
+        punpcklwd mm0,mm4               ; mm0=(00 01 02 03)
+        punpcklwd mm2,mm5               ; mm2=(04 05 06 07)
+
+        movq    mm6, MMWORD [MMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm1, MMWORD [MMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
+        pfmul   mm6, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        pfmul   mm1, MMWORD [MMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+        movq    mm3, MMWORD [MMBLOCK(1,2,esi,SIZEOF_FAST_FLOAT)]
+        movq    mm4, MMWORD [MMBLOCK(1,3,esi,SIZEOF_FAST_FLOAT)]
+        pfmul   mm3, MMWORD [MMBLOCK(1,2,edx,SIZEOF_FAST_FLOAT)]
+        pfmul   mm4, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
+
+        pfadd   mm6,mm7                 ; mm0=(10 ** 11 **)
+        pfadd   mm1,mm7                 ; mm4=(12 ** 13 **)
+        pfadd   mm3,mm7                 ; mm0=(14 ** 15 **)
+        pfadd   mm4,mm7                 ; mm4=(16 ** 17 **)
+
+        movq      mm5,mm6
+        punpcklwd mm6,mm1               ; mm6=(10 12 ** **)
+        punpckhwd mm5,mm1               ; mm5=(11 13 ** **)
+        movq      mm1,mm3
+        punpcklwd mm3,mm4               ; mm3=(14 16 ** **)
+        punpckhwd mm1,mm4               ; mm1=(15 17 ** **)
+
+        punpcklwd mm6,mm5               ; mm6=(10 11 12 13)
+        punpcklwd mm3,mm1               ; mm3=(14 15 16 17)
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm6
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3
+
+        add     esi, byte 16*SIZEOF_FAST_FLOAT
+        add     edx, byte 16*SIZEOF_FAST_FLOAT
+        add     edi, byte 16*SIZEOF_JCOEF
+        dec     eax
+        jnz     near .quantloop
+
+        femms           ; empty MMX/3DNow! state
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jquant-mmx.asm b/simd/jquant-mmx.asm
new file mode 100644
index 0000000..822c7ee
--- /dev/null
+++ b/simd/jquant-mmx.asm
@@ -0,0 +1,274 @@
+;
+; jquant.asm - sample data conversion and quantization (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jsimd_convsamp_mmx (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                     DCTELEM * workspace);
+;
+
+%define sample_data     ebp+8           ; JSAMPARRAY sample_data
+%define start_col       ebp+12          ; JDIMENSION start_col
+%define workspace       ebp+16          ; DCTELEM * workspace
+
+        align   16
+        global  EXTN(jsimd_convsamp_mmx)
+
+EXTN(jsimd_convsamp_mmx):
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        pxor    mm6,mm6                 ; mm6=(all 0's)
+        pcmpeqw mm7,mm7
+        psllw   mm7,7                   ; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
+
+        mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+        mov     eax, JDIMENSION [start_col]
+        mov     edi, POINTER [workspace]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
+.convloop:
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+        movq    mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]    ; mm0=(01234567)
+        movq    mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]    ; mm1=(89ABCDEF)
+
+        mov     ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+        movq    mm2, MMWORD [ebx+eax*SIZEOF_JSAMPLE]    ; mm2=(GHIJKLMN)
+        movq    mm3, MMWORD [edx+eax*SIZEOF_JSAMPLE]    ; mm3=(OPQRSTUV)
+
+        movq      mm4,mm0
+        punpcklbw mm0,mm6               ; mm0=(0123)
+        punpckhbw mm4,mm6               ; mm4=(4567)
+        movq      mm5,mm1
+        punpcklbw mm1,mm6               ; mm1=(89AB)
+        punpckhbw mm5,mm6               ; mm5=(CDEF)
+
+        paddw   mm0,mm7
+        paddw   mm4,mm7
+        paddw   mm1,mm7
+        paddw   mm5,mm7
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm4
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_DCTELEM)], mm1
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_DCTELEM)], mm5
+
+        movq      mm0,mm2
+        punpcklbw mm2,mm6               ; mm2=(GHIJ)
+        punpckhbw mm0,mm6               ; mm0=(KLMN)
+        movq      mm4,mm3
+        punpcklbw mm3,mm6               ; mm3=(OPQR)
+        punpckhbw mm4,mm6               ; mm4=(STUV)
+
+        paddw   mm2,mm7
+        paddw   mm0,mm7
+        paddw   mm3,mm7
+        paddw   mm4,mm7
+
+        movq    MMWORD [MMBLOCK(2,0,edi,SIZEOF_DCTELEM)], mm2
+        movq    MMWORD [MMBLOCK(2,1,edi,SIZEOF_DCTELEM)], mm0
+        movq    MMWORD [MMBLOCK(3,0,edi,SIZEOF_DCTELEM)], mm3
+        movq    MMWORD [MMBLOCK(3,1,edi,SIZEOF_DCTELEM)], mm4
+
+        add     esi, byte 4*SIZEOF_JSAMPROW
+        add     edi, byte 4*DCTSIZE*SIZEOF_DCTELEM
+        dec     ecx
+        jnz     short .convloop
+
+        emms            ; empty MMX state
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; This implementation is based on an algorithm described in
+;   "How to optimize for the Pentium family of microprocessors"
+;   (http://www.agner.org/assem/).
+;
+; GLOBAL(void)
+; jsimd_quantize_mmx (JCOEFPTR coef_block, DCTELEM * divisors,
+;                     DCTELEM * workspace);
+;
+
+%define RECIPROCAL(m,n,b) MMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
+%define CORRECTION(m,n,b) MMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
+%define SCALE(m,n,b)      MMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
+%define SHIFT(m,n,b)      MMBLOCK(DCTSIZE*3+(m),(n),(b),SIZEOF_DCTELEM)
+
+%define coef_block      ebp+8           ; JCOEFPTR coef_block
+%define divisors        ebp+12          ; DCTELEM * divisors
+%define workspace       ebp+16          ; DCTELEM * workspace
+
+        align   16
+        global  EXTN(jsimd_quantize_mmx)
+
+EXTN(jsimd_quantize_mmx):
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     esi, POINTER [workspace]
+        mov     edx, POINTER [divisors]
+        mov     edi, JCOEFPTR [coef_block]
+        mov     ah, 2
+        alignx  16,7
+.quantloop1:
+        mov     al, DCTSIZE2/8/2
+        alignx  16,7
+.quantloop2:
+        movq    mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
+        movq    mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)]
+
+        movq    mm0,mm2
+        movq    mm1,mm3
+
+        psraw   mm2,(WORD_BIT-1)  ; -1 if value < 0, 0 otherwise
+        psraw   mm3,(WORD_BIT-1)
+
+        pxor    mm0,mm2   ; val = -val
+        pxor    mm1,mm3
+        psubw   mm0,mm2
+        psubw   mm1,mm3
+
+        ;
+        ; MMX is an annoyingly crappy instruction set. It has two
+        ; misfeatures that are causing problems here:
+        ;
+        ; - All multiplications are signed.
+        ;
+        ; - The second operand for the shifts is not treated as packed.
+        ;
+        ;
+        ; We work around the first problem by implementing this algorithm:
+        ;
+        ; unsigned long unsigned_multiply(unsigned short x, unsigned short y)
+        ; {
+        ;   enum { SHORT_BIT = 16 };
+        ;   signed short sx = (signed short) x;
+        ;   signed short sy = (signed short) y;
+        ;   signed long sz;
+        ;
+        ;   sz = (long) sx * (long) sy;     /* signed multiply */
+        ;
+        ;   if (sx < 0) sz += (long) sy << SHORT_BIT;
+        ;   if (sy < 0) sz += (long) sx << SHORT_BIT;
+        ;
+        ;   return (unsigned long) sz;
+        ; }
+        ;
+        ; (note that a negative sx adds _sy_ and vice versa)
+        ;
+        ; For the second problem, we replace the shift by a multiplication.
+        ; Unfortunately that means we have to deal with the signed issue again.
+        ;
+
+        paddw   mm0, MMWORD [CORRECTION(0,0,edx)]   ; correction + roundfactor
+        paddw   mm1, MMWORD [CORRECTION(0,1,edx)]
+
+        movq    mm4,mm0   ; store current value for later
+        movq    mm5,mm1
+        pmulhw  mm0, MMWORD [RECIPROCAL(0,0,edx)]   ; reciprocal
+        pmulhw  mm1, MMWORD [RECIPROCAL(0,1,edx)]
+        paddw   mm0,mm4         ; reciprocal is always negative (MSB=1),
+        paddw   mm1,mm5   ; so we always need to add the initial value
+                        ; (input value is never negative as we
+                        ; inverted it at the start of this routine)
+
+        ; here it gets a bit tricky as both scale
+        ; and mm0/mm1 can be negative
+        movq    mm6, MMWORD [SCALE(0,0,edx)]    ; scale
+        movq    mm7, MMWORD [SCALE(0,1,edx)]
+        movq    mm4,mm0
+        movq    mm5,mm1
+        pmulhw  mm0,mm6
+        pmulhw  mm1,mm7
+
+        psraw   mm6,(WORD_BIT-1)    ; determine if scale is negative
+        psraw   mm7,(WORD_BIT-1)
+
+        pand    mm6,mm4             ; and add input if it is
+        pand    mm7,mm5
+        paddw   mm0,mm6
+        paddw   mm1,mm7
+
+        psraw   mm4,(WORD_BIT-1)    ; then check if negative input
+        psraw   mm5,(WORD_BIT-1)
+
+        pand    mm4, MMWORD [SCALE(0,0,edx)]    ; and add scale if it is
+        pand    mm5, MMWORD [SCALE(0,1,edx)]
+        paddw   mm0,mm4
+        paddw   mm1,mm5
+
+        pxor    mm0,mm2   ; val = -val
+        pxor    mm1,mm3
+        psubw   mm0,mm2
+        psubw   mm1,mm3
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_DCTELEM)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_DCTELEM)], mm1
+
+        add     esi, byte 8*SIZEOF_DCTELEM
+        add     edx, byte 8*SIZEOF_DCTELEM
+        add     edi, byte 8*SIZEOF_JCOEF
+        dec     al
+        jnz     near .quantloop2
+        dec     ah
+        jnz     near .quantloop1        ; to avoid branch misprediction
+
+        emms            ; empty MMX state
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jquant-sse.asm b/simd/jquant-sse.asm
new file mode 100644
index 0000000..3f7fa5d
--- /dev/null
+++ b/simd/jquant-sse.asm
@@ -0,0 +1,211 @@
+;
+; jquant.asm - sample data conversion and quantization (SSE & MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jsimd_convsamp_float_sse (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                           FAST_FLOAT * workspace);
+;
+
+%define sample_data     ebp+8           ; JSAMPARRAY sample_data
+%define start_col       ebp+12          ; JDIMENSION start_col
+%define workspace       ebp+16          ; FAST_FLOAT * workspace
+
+        align   16
+        global  EXTN(jsimd_convsamp_float_sse)
+
+EXTN(jsimd_convsamp_float_sse):
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        pcmpeqw  mm7,mm7
+        psllw    mm7,7
+        packsswb mm7,mm7                ; mm7 = PB_CENTERJSAMPLE (0x808080..)
+
+        mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+        mov     eax, JDIMENSION [start_col]
+        mov     edi, POINTER [workspace]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/2
+        alignx  16,7
+.convloop:
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+        movq    mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]
+        movq    mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]
+
+        psubb   mm0,mm7                         ; mm0=(01234567)
+        psubb   mm1,mm7                         ; mm1=(89ABCDEF)
+
+        punpcklbw mm2,mm0                       ; mm2=(*0*1*2*3)
+        punpckhbw mm0,mm0                       ; mm0=(*4*5*6*7)
+        punpcklbw mm3,mm1                       ; mm3=(*8*9*A*B)
+        punpckhbw mm1,mm1                       ; mm1=(*C*D*E*F)
+
+        punpcklwd mm4,mm2                       ; mm4=(***0***1)
+        punpckhwd mm2,mm2                       ; mm2=(***2***3)
+        punpcklwd mm5,mm0                       ; mm5=(***4***5)
+        punpckhwd mm0,mm0                       ; mm0=(***6***7)
+
+        psrad     mm4,(DWORD_BIT-BYTE_BIT)      ; mm4=(01)
+        psrad     mm2,(DWORD_BIT-BYTE_BIT)      ; mm2=(23)
+        cvtpi2ps  xmm0,mm4                      ; xmm0=(01**)
+        cvtpi2ps  xmm1,mm2                      ; xmm1=(23**)
+        psrad     mm5,(DWORD_BIT-BYTE_BIT)      ; mm5=(45)
+        psrad     mm0,(DWORD_BIT-BYTE_BIT)      ; mm0=(67)
+        cvtpi2ps  xmm2,mm5                      ; xmm2=(45**)
+        cvtpi2ps  xmm3,mm0                      ; xmm3=(67**)
+
+        punpcklwd mm6,mm3                       ; mm6=(***8***9)
+        punpckhwd mm3,mm3                       ; mm3=(***A***B)
+        punpcklwd mm4,mm1                       ; mm4=(***C***D)
+        punpckhwd mm1,mm1                       ; mm1=(***E***F)
+
+        psrad     mm6,(DWORD_BIT-BYTE_BIT)      ; mm6=(89)
+        psrad     mm3,(DWORD_BIT-BYTE_BIT)      ; mm3=(AB)
+        cvtpi2ps  xmm4,mm6                      ; xmm4=(89**)
+        cvtpi2ps  xmm5,mm3                      ; xmm5=(AB**)
+        psrad     mm4,(DWORD_BIT-BYTE_BIT)      ; mm4=(CD)
+        psrad     mm1,(DWORD_BIT-BYTE_BIT)      ; mm1=(EF)
+        cvtpi2ps  xmm6,mm4                      ; xmm6=(CD**)
+        cvtpi2ps  xmm7,mm1                      ; xmm7=(EF**)
+
+        movlhps   xmm0,xmm1                     ; xmm0=(0123)
+        movlhps   xmm2,xmm3                     ; xmm2=(4567)
+        movlhps   xmm4,xmm5                     ; xmm4=(89AB)
+        movlhps   xmm6,xmm7                     ; xmm6=(CDEF)
+
+        movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm4
+        movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
+
+        add     esi, byte 2*SIZEOF_JSAMPROW
+        add     edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     near .convloop
+
+        emms            ; empty MMX state
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
+
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; GLOBAL(void)
+; jsimd_quantize_float_sse (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+;                           FAST_FLOAT * workspace);
+;
+
+%define coef_block      ebp+8           ; JCOEFPTR coef_block
+%define divisors        ebp+12          ; FAST_FLOAT * divisors
+%define workspace       ebp+16          ; FAST_FLOAT * workspace
+
+        align   16
+        global  EXTN(jsimd_quantize_float_sse)
+
+EXTN(jsimd_quantize_float_sse):
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     esi, POINTER [workspace]
+        mov     edx, POINTER [divisors]
+        mov     edi, JCOEFPTR [coef_block]
+        mov     eax, DCTSIZE2/16
+        alignx  16,7
+.quantloop:
+        movaps  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
+        mulps   xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        mulps   xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
+        mulps   xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        mulps   xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+
+        movhlps  xmm4,xmm0
+        movhlps  xmm5,xmm1
+
+        cvtps2pi mm0,xmm0
+        cvtps2pi mm1,xmm1
+        cvtps2pi mm4,xmm4
+        cvtps2pi mm5,xmm5
+
+        movhlps  xmm6,xmm2
+        movhlps  xmm7,xmm3
+
+        cvtps2pi mm2,xmm2
+        cvtps2pi mm3,xmm3
+        cvtps2pi mm6,xmm6
+        cvtps2pi mm7,xmm7
+
+        packssdw mm0,mm4
+        packssdw mm1,mm5
+        packssdw mm2,mm6
+        packssdw mm3,mm7
+
+        movq    MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
+        movq    MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm1
+        movq    MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm2
+        movq    MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm3
+
+        add     esi, byte 16*SIZEOF_FAST_FLOAT
+        add     edx, byte 16*SIZEOF_FAST_FLOAT
+        add     edi, byte 16*SIZEOF_JCOEF
+        dec     eax
+        jnz     short .quantloop
+
+        emms            ; empty MMX state
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jquantf-sse2-64.asm b/simd/jquantf-sse2-64.asm
new file mode 100644
index 0000000..20e815f
--- /dev/null
+++ b/simd/jquantf-sse2-64.asm
@@ -0,0 +1,158 @@
+;
+; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                            FAST_FLOAT * workspace);
+;
+
+; r10 = JSAMPARRAY sample_data
+; r11 = JDIMENSION start_col
+; r12 = FAST_FLOAT * workspace
+
+        align   16
+        global  EXTN(jsimd_convsamp_float_sse2)
+
+EXTN(jsimd_convsamp_float_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+        push    rbx
+
+        pcmpeqw  xmm7,xmm7
+        psllw    xmm7,7
+        packsswb xmm7,xmm7              ; xmm7 = PB_CENTERJSAMPLE (0x808080..)
+
+        mov rsi, r10
+        mov     rax, r11
+        mov rdi, r12
+        mov     rcx, DCTSIZE/2
+.convloop:
+        mov     rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW]       ; (JSAMPLE *)
+
+        movq    xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]
+        movq    xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]
+
+        psubb   xmm0,xmm7                       ; xmm0=(01234567)
+        psubb   xmm1,xmm7                       ; xmm1=(89ABCDEF)
+
+        punpcklbw xmm0,xmm0                     ; xmm0=(*0*1*2*3*4*5*6*7)
+        punpcklbw xmm1,xmm1                     ; xmm1=(*8*9*A*B*C*D*E*F)
+
+        punpcklwd xmm2,xmm0                     ; xmm2=(***0***1***2***3)
+        punpckhwd xmm0,xmm0                     ; xmm0=(***4***5***6***7)
+        punpcklwd xmm3,xmm1                     ; xmm3=(***8***9***A***B)
+        punpckhwd xmm1,xmm1                     ; xmm1=(***C***D***E***F)
+
+        psrad     xmm2,(DWORD_BIT-BYTE_BIT)     ; xmm2=(0123)
+        psrad     xmm0,(DWORD_BIT-BYTE_BIT)     ; xmm0=(4567)
+        cvtdq2ps  xmm2,xmm2                     ; xmm2=(0123)
+        cvtdq2ps  xmm0,xmm0                     ; xmm0=(4567)
+        psrad     xmm3,(DWORD_BIT-BYTE_BIT)     ; xmm3=(89AB)
+        psrad     xmm1,(DWORD_BIT-BYTE_BIT)     ; xmm1=(CDEF)
+        cvtdq2ps  xmm3,xmm3                     ; xmm3=(89AB)
+        cvtdq2ps  xmm1,xmm1                     ; xmm1=(CDEF)
+
+        movaps  XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1
+
+        add     rsi, byte 2*SIZEOF_JSAMPROW
+        add     rdi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     rcx
+        jnz     short .convloop
+
+        pop     rbx
+        uncollect_args
+        pop     rbp
+        ret
+
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; GLOBAL(void)
+; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+;                         FAST_FLOAT * workspace);
+;
+
+; r10 = JCOEFPTR coef_block
+; r11 = FAST_FLOAT * divisors
+; r12 = FAST_FLOAT * workspace
+
+        align   16
+        global  EXTN(jsimd_quantize_float_sse2)
+
+EXTN(jsimd_quantize_float_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+
+        mov rsi, r12
+        mov rdx, r11
+        mov rdi, r10
+        mov     rax, DCTSIZE2/16
+.quantloop:
+        movaps  xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(0,1,rsi,SIZEOF_FAST_FLOAT)]
+        mulps   xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_FAST_FLOAT)]
+        mulps   xmm1, XMMWORD [XMMBLOCK(0,1,rdx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(1,1,rsi,SIZEOF_FAST_FLOAT)]
+        mulps   xmm2, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)]
+        mulps   xmm3, XMMWORD [XMMBLOCK(1,1,rdx,SIZEOF_FAST_FLOAT)]
+
+        cvtps2dq xmm0,xmm0
+        cvtps2dq xmm1,xmm1
+        cvtps2dq xmm2,xmm2
+        cvtps2dq xmm3,xmm3
+
+        packssdw xmm0,xmm1
+        packssdw xmm2,xmm3
+
+        movdqa  XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_JCOEF)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_JCOEF)], xmm2
+
+        add     rsi, byte 16*SIZEOF_FAST_FLOAT
+        add     rdx, byte 16*SIZEOF_FAST_FLOAT
+        add     rdi, byte 16*SIZEOF_JCOEF
+        dec     rax
+        jnz     short .quantloop
+
+        uncollect_args
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jquantf-sse2.asm b/simd/jquantf-sse2.asm
new file mode 100644
index 0000000..b1d3efc
--- /dev/null
+++ b/simd/jquantf-sse2.asm
@@ -0,0 +1,171 @@
+;
+; jquantf.asm - sample data conversion and quantization (SSE & SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                            FAST_FLOAT * workspace);
+;
+
+%define sample_data     ebp+8           ; JSAMPARRAY sample_data
+%define start_col       ebp+12          ; JDIMENSION start_col
+%define workspace       ebp+16          ; FAST_FLOAT * workspace
+
+        align   16
+        global  EXTN(jsimd_convsamp_float_sse2)
+
+EXTN(jsimd_convsamp_float_sse2):
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        pcmpeqw  xmm7,xmm7
+        psllw    xmm7,7
+        packsswb xmm7,xmm7              ; xmm7 = PB_CENTERJSAMPLE (0x808080..)
+
+        mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+        mov     eax, JDIMENSION [start_col]
+        mov     edi, POINTER [workspace]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/2
+        alignx  16,7
+.convloop:
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+        movq    xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE]
+        movq    xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE]
+
+        psubb   xmm0,xmm7                       ; xmm0=(01234567)
+        psubb   xmm1,xmm7                       ; xmm1=(89ABCDEF)
+
+        punpcklbw xmm0,xmm0                     ; xmm0=(*0*1*2*3*4*5*6*7)
+        punpcklbw xmm1,xmm1                     ; xmm1=(*8*9*A*B*C*D*E*F)
+
+        punpcklwd xmm2,xmm0                     ; xmm2=(***0***1***2***3)
+        punpckhwd xmm0,xmm0                     ; xmm0=(***4***5***6***7)
+        punpcklwd xmm3,xmm1                     ; xmm3=(***8***9***A***B)
+        punpckhwd xmm1,xmm1                     ; xmm1=(***C***D***E***F)
+
+        psrad     xmm2,(DWORD_BIT-BYTE_BIT)     ; xmm2=(0123)
+        psrad     xmm0,(DWORD_BIT-BYTE_BIT)     ; xmm0=(4567)
+        cvtdq2ps  xmm2,xmm2                     ; xmm2=(0123)
+        cvtdq2ps  xmm0,xmm0                     ; xmm0=(4567)
+        psrad     xmm3,(DWORD_BIT-BYTE_BIT)     ; xmm3=(89AB)
+        psrad     xmm1,(DWORD_BIT-BYTE_BIT)     ; xmm1=(CDEF)
+        cvtdq2ps  xmm3,xmm3                     ; xmm3=(89AB)
+        cvtdq2ps  xmm1,xmm1                     ; xmm1=(CDEF)
+
+        movaps  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm2
+        movaps  XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
+        movaps  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
+        movaps  XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
+
+        add     esi, byte 2*SIZEOF_JSAMPROW
+        add     edi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
+        dec     ecx
+        jnz     short .convloop
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
+
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; GLOBAL(void)
+; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+;                         FAST_FLOAT * workspace);
+;
+
+%define coef_block      ebp+8           ; JCOEFPTR coef_block
+%define divisors        ebp+12          ; FAST_FLOAT * divisors
+%define workspace       ebp+16          ; FAST_FLOAT * workspace
+
+        align   16
+        global  EXTN(jsimd_quantize_float_sse2)
+
+EXTN(jsimd_quantize_float_sse2):
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     esi, POINTER [workspace]
+        mov     edx, POINTER [divisors]
+        mov     edi, JCOEFPTR [coef_block]
+        mov     eax, DCTSIZE2/16
+        alignx  16,7
+.quantloop:
+        movaps  xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
+        mulps   xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
+        mulps   xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
+        movaps  xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
+        movaps  xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
+        mulps   xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
+        mulps   xmm3, XMMWORD [XMMBLOCK(1,1,edx,SIZEOF_FAST_FLOAT)]
+
+        cvtps2dq xmm0,xmm0
+        cvtps2dq xmm1,xmm1
+        cvtps2dq xmm2,xmm2
+        cvtps2dq xmm3,xmm3
+
+        packssdw xmm0,xmm1
+        packssdw xmm2,xmm3
+
+        movdqa  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_JCOEF)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_JCOEF)], xmm2
+
+        add     esi, byte 16*SIZEOF_FAST_FLOAT
+        add     edx, byte 16*SIZEOF_FAST_FLOAT
+        add     edi, byte 16*SIZEOF_JCOEF
+        dec     eax
+        jnz     short .quantloop
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jquanti-sse2-64.asm b/simd/jquanti-sse2-64.asm
new file mode 100644
index 0000000..50b8dce
--- /dev/null
+++ b/simd/jquanti-sse2-64.asm
@@ -0,0 +1,187 @@
+;
+; jquanti.asm - sample data conversion and quantization (64-bit SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2009 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    64
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jsimd_convsamp_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                      DCTELEM * workspace);
+;
+
+; r10 = JSAMPARRAY sample_data
+; r11 = JDIMENSION start_col
+; r12 = DCTELEM * workspace
+
+        align   16
+        global  EXTN(jsimd_convsamp_sse2)
+
+EXTN(jsimd_convsamp_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+        push    rbx
+
+        pxor    xmm6,xmm6               ; xmm6=(all 0's)
+        pcmpeqw xmm7,xmm7
+        psllw   xmm7,7                  ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+
+        mov rsi, r10
+        mov rax, r11
+        mov rdi, r12
+        mov     rcx, DCTSIZE/4
+.convloop:
+        mov     rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW]       ; (JSAMPLE *)
+
+        movq    xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]       ; xmm0=(01234567)
+        movq    xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]       ; xmm1=(89ABCDEF)
+
+        mov     rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     rdx, JSAMPROW [rsi+3*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+        movq    xmm2, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE]       ; xmm2=(GHIJKLMN)
+        movq    xmm3, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE]       ; xmm3=(OPQRSTUV)
+
+        punpcklbw xmm0,xmm6             ; xmm0=(01234567)
+        punpcklbw xmm1,xmm6             ; xmm1=(89ABCDEF)
+        paddw     xmm0,xmm7
+        paddw     xmm1,xmm7
+        punpcklbw xmm2,xmm6             ; xmm2=(GHIJKLMN)
+        punpcklbw xmm3,xmm6             ; xmm3=(OPQRSTUV)
+        paddw     xmm2,xmm7
+        paddw     xmm3,xmm7
+
+        movdqa  XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3
+
+        add     rsi, byte 4*SIZEOF_JSAMPROW
+        add     rdi, byte 4*DCTSIZE*SIZEOF_DCTELEM
+        dec     rcx
+        jnz     short .convloop
+
+        pop     rbx
+        uncollect_args
+        pop     rbp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; This implementation is based on an algorithm described in
+;   "How to optimize for the Pentium family of microprocessors"
+;   (http://www.agner.org/assem/).
+;
+; GLOBAL(void)
+; jsimd_quantize_sse2 (JCOEFPTR coef_block, DCTELEM * divisors,
+;                      DCTELEM * workspace);
+;
+
+%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
+%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
+%define SCALE(m,n,b)      XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
+
+; r10 = JCOEFPTR coef_block
+; r11 = DCTELEM * divisors
+; r12 = DCTELEM * workspace
+
+        align   16
+        global  EXTN(jsimd_quantize_sse2)
+
+EXTN(jsimd_quantize_sse2):
+        push    rbp
+        mov     rax,rsp
+        mov     rbp,rsp
+        collect_args
+
+        mov rsi, r12
+        mov rdx, r11
+        mov rdi, r10
+        mov     rax, DCTSIZE2/32
+.quantloop:
+        movdqa  xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_DCTELEM)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_DCTELEM)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(2,0,rsi,SIZEOF_DCTELEM)]
+        movdqa  xmm7, XMMWORD [XMMBLOCK(3,0,rsi,SIZEOF_DCTELEM)]
+        movdqa  xmm0,xmm4
+        movdqa  xmm1,xmm5
+        movdqa  xmm2,xmm6
+        movdqa  xmm3,xmm7
+        psraw   xmm4,(WORD_BIT-1)
+        psraw   xmm5,(WORD_BIT-1)
+        psraw   xmm6,(WORD_BIT-1)
+        psraw   xmm7,(WORD_BIT-1)
+        pxor    xmm0,xmm4
+        pxor    xmm1,xmm5
+        pxor    xmm2,xmm6
+        pxor    xmm3,xmm7
+        psubw   xmm0,xmm4               ; if (xmm0 < 0) xmm0 = -xmm0;
+        psubw   xmm1,xmm5               ; if (xmm1 < 0) xmm1 = -xmm1;
+        psubw   xmm2,xmm6               ; if (xmm2 < 0) xmm2 = -xmm2;
+        psubw   xmm3,xmm7               ; if (xmm3 < 0) xmm3 = -xmm3;
+
+        paddw   xmm0, XMMWORD [CORRECTION(0,0,rdx)]  ; correction + roundfactor
+        paddw   xmm1, XMMWORD [CORRECTION(1,0,rdx)]
+        paddw   xmm2, XMMWORD [CORRECTION(2,0,rdx)]
+        paddw   xmm3, XMMWORD [CORRECTION(3,0,rdx)]
+        pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,rdx)]  ; reciprocal
+        pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,rdx)]
+        pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,rdx)]
+        pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,rdx)]
+        pmulhuw xmm0, XMMWORD [SCALE(0,0,rdx)]  ; scale
+        pmulhuw xmm1, XMMWORD [SCALE(1,0,rdx)]
+        pmulhuw xmm2, XMMWORD [SCALE(2,0,rdx)]
+        pmulhuw xmm3, XMMWORD [SCALE(3,0,rdx)]
+
+        pxor    xmm0,xmm4
+        pxor    xmm1,xmm5
+        pxor    xmm2,xmm6
+        pxor    xmm3,xmm7
+        psubw   xmm0,xmm4
+        psubw   xmm1,xmm5
+        psubw   xmm2,xmm6
+        psubw   xmm3,xmm7
+        movdqa  XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_DCTELEM)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3
+
+        add     rsi, byte 32*SIZEOF_DCTELEM
+        add     rdx, byte 32*SIZEOF_DCTELEM
+        add     rdi, byte 32*SIZEOF_JCOEF
+        dec     rax
+        jnz     near .quantloop
+
+        uncollect_args
+        pop     rbp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jquanti-sse2.asm b/simd/jquanti-sse2.asm
new file mode 100644
index 0000000..79ca3db
--- /dev/null
+++ b/simd/jquanti-sse2.asm
@@ -0,0 +1,200 @@
+;
+; jquanti.asm - sample data conversion and quantization (SSE2)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+%include "jdct.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Load data into workspace, applying unsigned->signed conversion
+;
+; GLOBAL(void)
+; jsimd_convsamp_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col,
+;                      DCTELEM * workspace);
+;
+
+%define sample_data     ebp+8           ; JSAMPARRAY sample_data
+%define start_col       ebp+12          ; JDIMENSION start_col
+%define workspace       ebp+16          ; DCTELEM * workspace
+
+        align   16
+        global  EXTN(jsimd_convsamp_sse2)
+
+EXTN(jsimd_convsamp_sse2):
+        push    ebp
+        mov     ebp,esp
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        pxor    xmm6,xmm6               ; xmm6=(all 0's)
+        pcmpeqw xmm7,xmm7
+        psllw   xmm7,7                  ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
+
+        mov     esi, JSAMPARRAY [sample_data]   ; (JSAMPROW *)
+        mov     eax, JDIMENSION [start_col]
+        mov     edi, POINTER [workspace]        ; (DCTELEM *)
+        mov     ecx, DCTSIZE/4
+        alignx  16,7
+.convloop:
+        mov     ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+        movq    xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE]       ; xmm0=(01234567)
+        movq    xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE]       ; xmm1=(89ABCDEF)
+
+        mov     ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+        mov     edx, JSAMPROW [esi+3*SIZEOF_JSAMPROW]   ; (JSAMPLE *)
+
+        movq    xmm2, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE]       ; xmm2=(GHIJKLMN)
+        movq    xmm3, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE]       ; xmm3=(OPQRSTUV)
+
+        punpcklbw xmm0,xmm6             ; xmm0=(01234567)
+        punpcklbw xmm1,xmm6             ; xmm1=(89ABCDEF)
+        paddw     xmm0,xmm7
+        paddw     xmm1,xmm7
+        punpcklbw xmm2,xmm6             ; xmm2=(GHIJKLMN)
+        punpcklbw xmm3,xmm6             ; xmm3=(OPQRSTUV)
+        paddw     xmm2,xmm7
+        paddw     xmm3,xmm7
+
+        movdqa  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3
+
+        add     esi, byte 4*SIZEOF_JSAMPROW
+        add     edi, byte 4*DCTSIZE*SIZEOF_DCTELEM
+        dec     ecx
+        jnz     short .convloop
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        pop     ebp
+        ret
+
+; --------------------------------------------------------------------------
+;
+; Quantize/descale the coefficients, and store into coef_block
+;
+; This implementation is based on an algorithm described in
+;   "How to optimize for the Pentium family of microprocessors"
+;   (http://www.agner.org/assem/).
+;
+; GLOBAL(void)
+; jsimd_quantize_sse2 (JCOEFPTR coef_block, DCTELEM * divisors,
+;                      DCTELEM * workspace);
+;
+
+%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
+%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
+%define SCALE(m,n,b)      XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
+
+%define coef_block      ebp+8           ; JCOEFPTR coef_block
+%define divisors        ebp+12          ; DCTELEM * divisors
+%define workspace       ebp+16          ; DCTELEM * workspace
+
+        align   16
+        global  EXTN(jsimd_quantize_sse2)
+
+EXTN(jsimd_quantize_sse2):
+        push    ebp
+        mov     ebp,esp
+;       push    ebx             ; unused
+;       push    ecx             ; unused
+;       push    edx             ; need not be preserved
+        push    esi
+        push    edi
+
+        mov     esi, POINTER [workspace]
+        mov     edx, POINTER [divisors]
+        mov     edi, JCOEFPTR [coef_block]
+        mov     eax, DCTSIZE2/32
+        alignx  16,7
+.quantloop:
+        movdqa  xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
+        movdqa  xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)]
+        movdqa  xmm6, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)]
+        movdqa  xmm7, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_DCTELEM)]
+        movdqa  xmm0,xmm4
+        movdqa  xmm1,xmm5
+        movdqa  xmm2,xmm6
+        movdqa  xmm3,xmm7
+        psraw   xmm4,(WORD_BIT-1)
+        psraw   xmm5,(WORD_BIT-1)
+        psraw   xmm6,(WORD_BIT-1)
+        psraw   xmm7,(WORD_BIT-1)
+        pxor    xmm0,xmm4
+        pxor    xmm1,xmm5
+        pxor    xmm2,xmm6
+        pxor    xmm3,xmm7
+        psubw   xmm0,xmm4               ; if (xmm0 < 0) xmm0 = -xmm0;
+        psubw   xmm1,xmm5               ; if (xmm1 < 0) xmm1 = -xmm1;
+        psubw   xmm2,xmm6               ; if (xmm2 < 0) xmm2 = -xmm2;
+        psubw   xmm3,xmm7               ; if (xmm3 < 0) xmm3 = -xmm3;
+
+        paddw   xmm0, XMMWORD [CORRECTION(0,0,edx)]  ; correction + roundfactor
+        paddw   xmm1, XMMWORD [CORRECTION(1,0,edx)]
+        paddw   xmm2, XMMWORD [CORRECTION(2,0,edx)]
+        paddw   xmm3, XMMWORD [CORRECTION(3,0,edx)]
+        pmulhuw xmm0, XMMWORD [RECIPROCAL(0,0,edx)]  ; reciprocal
+        pmulhuw xmm1, XMMWORD [RECIPROCAL(1,0,edx)]
+        pmulhuw xmm2, XMMWORD [RECIPROCAL(2,0,edx)]
+        pmulhuw xmm3, XMMWORD [RECIPROCAL(3,0,edx)]
+        pmulhuw xmm0, XMMWORD [SCALE(0,0,edx)]  ; scale
+        pmulhuw xmm1, XMMWORD [SCALE(1,0,edx)]
+        pmulhuw xmm2, XMMWORD [SCALE(2,0,edx)]
+        pmulhuw xmm3, XMMWORD [SCALE(3,0,edx)]
+
+        pxor    xmm0,xmm4
+        pxor    xmm1,xmm5
+        pxor    xmm2,xmm6
+        pxor    xmm3,xmm7
+        psubw   xmm0,xmm4
+        psubw   xmm1,xmm5
+        psubw   xmm2,xmm6
+        psubw   xmm3,xmm7
+        movdqa  XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], xmm0
+        movdqa  XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_DCTELEM)], xmm1
+        movdqa  XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], xmm2
+        movdqa  XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_DCTELEM)], xmm3
+
+        add     esi, byte 32*SIZEOF_DCTELEM
+        add     edx, byte 32*SIZEOF_DCTELEM
+        add     edi, byte 32*SIZEOF_JCOEF
+        dec     eax
+        jnz     near .quantloop
+
+        pop     edi
+        pop     esi
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; unused
+;       pop     ebx             ; unused
+        pop     ebp
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jsimd.h b/simd/jsimd.h
new file mode 100644
index 0000000..609b91f
--- /dev/null
+++ b/simd/jsimd.h
@@ -0,0 +1,674 @@
+/*
+ * simd/jsimd.h
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2011 D. R. Commander
+ * Copyright (C) 2013-2014, MIPS Technologies, Inc., California
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ */
+
+/* Bitmask for supported acceleration methods */
+
+#define JSIMD_NONE       0x00
+#define JSIMD_MMX        0x01
+#define JSIMD_3DNOW      0x02
+#define JSIMD_SSE        0x04
+#define JSIMD_SSE2       0x08
+#define JSIMD_ARM_NEON   0x10
+#define JSIMD_MIPS_DSPR2 0x20
+
+/* SIMD Ext: retrieve SIMD/CPU information */
+EXTERN(unsigned int) jpeg_simd_cpu_support (void);
+
+/* RGB & extended RGB --> YCC Colorspace Conversion */
+EXTERN(void) jsimd_rgb_ycc_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgb_ycc_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgbx_ycc_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgr_ycc_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgrx_ycc_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxbgr_ycc_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxrgb_ycc_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+
+extern const int jconst_rgb_ycc_convert_sse2[];
+EXTERN(void) jsimd_rgb_ycc_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgb_ycc_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgbx_ycc_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgr_ycc_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgrx_ycc_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxbgr_ycc_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxrgb_ycc_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+
+EXTERN(void) jsimd_rgb_ycc_convert_neon
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgb_ycc_convert_neon
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgbx_ycc_convert_neon
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgr_ycc_convert_neon
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgrx_ycc_convert_neon
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxbgr_ycc_convert_neon
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxrgb_ycc_convert_neon
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+
+EXTERN(void) jsimd_rgb_ycc_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgb_ycc_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgbx_ycc_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgr_ycc_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgrx_ycc_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxbgr_ycc_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxrgb_ycc_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+
+/* RGB & extended RGB --> Grayscale Colorspace Conversion */
+EXTERN(void) jsimd_rgb_gray_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgb_gray_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgbx_gray_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgr_gray_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgrx_gray_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxbgr_gray_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxrgb_gray_convert_mmx
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+
+extern const int jconst_rgb_gray_convert_sse2[];
+EXTERN(void) jsimd_rgb_gray_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgb_gray_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgbx_gray_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgr_gray_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgrx_gray_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxbgr_gray_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxrgb_gray_convert_sse2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+
+EXTERN(void) jsimd_rgb_gray_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgb_gray_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extrgbx_gray_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgr_gray_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extbgrx_gray_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxbgr_gray_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_extxrgb_gray_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows);
+
+/* YCC --> RGB & extended RGB Colorspace Conversion */
+EXTERN(void) jsimd_ycc_rgb_convert_mmx
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extrgb_convert_mmx
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extrgbx_convert_mmx
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extbgr_convert_mmx
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extbgrx_convert_mmx
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extxbgr_convert_mmx
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extxrgb_convert_mmx
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+
+extern const int jconst_ycc_rgb_convert_sse2[];
+EXTERN(void) jsimd_ycc_rgb_convert_sse2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extrgb_convert_sse2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extrgbx_convert_sse2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extbgr_convert_sse2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extbgrx_convert_sse2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extxbgr_convert_sse2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extxrgb_convert_sse2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+
+EXTERN(void) jsimd_ycc_rgb_convert_neon
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extrgb_convert_neon
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extrgbx_convert_neon
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extbgr_convert_neon
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extbgrx_convert_neon
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extxbgr_convert_neon
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extxrgb_convert_neon
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+
+EXTERN(void) jsimd_ycc_rgb_convert_mips_dspr2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extrgb_convert_mips_dspr2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extrgbx_convert_mips_dspr2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extbgr_convert_mips_dspr2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extbgrx_convert_mips_dspr2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extxbgr_convert_mips_dspr2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_extxrgb_convert_mips_dspr2
+        (JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
+         JSAMPARRAY output_buf, int num_rows);
+
+/* NULL Colorspace Conversion */
+EXTERN(void) jsimd_c_null_convert_mips_dspr2
+        (JDIMENSION img_width, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+         JDIMENSION output_row, int num_rows, int num_components);
+
+/* h2v1 Downsampling */
+EXTERN(void) jsimd_h2v1_downsample_mmx
+        (JDIMENSION image_width, int max_v_samp_factor,
+         JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+         JSAMPARRAY input_data, JSAMPARRAY output_data);
+
+EXTERN(void) jsimd_h2v1_downsample_sse2
+        (JDIMENSION image_width, int max_v_samp_factor,
+         JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+         JSAMPARRAY input_data, JSAMPARRAY output_data);
+
+EXTERN(void) jsimd_h2v1_downsample_mips_dspr2
+        (JDIMENSION image_width, int max_v_samp_factor,
+         JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+         JSAMPARRAY input_data, JSAMPARRAY output_data);
+
+/* h2v2 Downsampling */
+EXTERN(void) jsimd_h2v2_downsample_mmx
+        (JDIMENSION image_width, int max_v_samp_factor,
+         JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+         JSAMPARRAY input_data, JSAMPARRAY output_data);
+
+EXTERN(void) jsimd_h2v2_downsample_sse2
+        (JDIMENSION image_width, int max_v_samp_factor,
+         JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+         JSAMPARRAY input_data, JSAMPARRAY output_data);
+
+EXTERN(void) jsimd_h2v2_downsample_mips_dspr2
+        (JDIMENSION image_width, int max_v_samp_factor,
+         JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+         JSAMPARRAY input_data, JSAMPARRAY output_data);
+
+/* h2v2 Smooth Downsampling */
+EXTERN(void) jsimd_h2v2_smooth_downsample_mips_dspr2
+        (JSAMPARRAY input_data, JSAMPARRAY output_data,
+         JDIMENSION v_samp_factor, int max_v_samp_factor,
+         int smoothing_factor, JDIMENSION width_blocks,
+         JDIMENSION image_width);
+
+
+/* Upsampling */
+EXTERN(void) jsimd_h2v1_upsample_mmx
+        (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
+         JSAMPARRAY * output_data_ptr);
+EXTERN(void) jsimd_h2v2_upsample_mmx
+        (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
+         JSAMPARRAY * output_data_ptr);
+
+EXTERN(void) jsimd_h2v1_upsample_sse2
+        (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
+         JSAMPARRAY * output_data_ptr);
+EXTERN(void) jsimd_h2v2_upsample_sse2
+        (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
+         JSAMPARRAY * output_data_ptr);
+
+EXTERN(void) jsimd_h2v1_upsample_mips_dspr2
+        (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
+         JSAMPARRAY * output_data_ptr);
+EXTERN(void) jsimd_h2v2_upsample_mips_dspr2
+        (int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
+         JSAMPARRAY * output_data_ptr);
+
+EXTERN(void) jsimd_int_upsample_mips_dspr2
+        (UINT8 h_expand, UINT8 v_expand, JSAMPARRAY input_data,
+         JSAMPARRAY * output_data_ptr, JDIMENSION output_width,
+         int max_v_samp_factor);
+
+
+/* Fancy Upsampling */
+EXTERN(void) jsimd_h2v1_fancy_upsample_mmx
+        (int max_v_samp_factor, JDIMENSION downsampled_width,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+EXTERN(void) jsimd_h2v2_fancy_upsample_mmx
+        (int max_v_samp_factor, JDIMENSION downsampled_width,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+
+extern const int jconst_fancy_upsample_sse2[];
+EXTERN(void) jsimd_h2v1_fancy_upsample_sse2
+        (int max_v_samp_factor, JDIMENSION downsampled_width,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+EXTERN(void) jsimd_h2v2_fancy_upsample_sse2
+        (int max_v_samp_factor, JDIMENSION downsampled_width,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+
+EXTERN(void) jsimd_h2v1_fancy_upsample_neon
+        (int max_v_samp_factor, JDIMENSION downsampled_width,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+
+EXTERN(void) jsimd_h2v1_fancy_upsample_mips_dspr2
+        (int max_v_samp_factor, JDIMENSION downsampled_width,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+EXTERN(void) jsimd_h2v2_fancy_upsample_mips_dspr2
+        (int max_v_samp_factor, JDIMENSION downsampled_width,
+         JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
+
+/* Merged Upsampling */
+EXTERN(void) jsimd_h2v1_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+
+EXTERN(void) jsimd_h2v2_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_mmx
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+
+extern const int jconst_merged_upsample_sse2[];
+EXTERN(void) jsimd_h2v1_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+
+EXTERN(void) jsimd_h2v2_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_sse2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
+
+EXTERN(void) jsimd_h2v1_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+
+EXTERN(void) jsimd_h2v2_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2
+        (JDIMENSION output_width, JSAMPIMAGE input_buf,
+         JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf, JSAMPLE* range);
+
+/* Sample Conversion */
+EXTERN(void) jsimd_convsamp_mmx
+        (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace);
+
+EXTERN(void) jsimd_convsamp_sse2
+        (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace);
+
+EXTERN(void) jsimd_convsamp_neon
+        (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace);
+
+EXTERN(void) jsimd_convsamp_mips_dspr2
+        (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace);
+
+/* Floating Point Sample Conversion */
+EXTERN(void) jsimd_convsamp_float_3dnow
+        (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace);
+
+EXTERN(void) jsimd_convsamp_float_sse
+        (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace);
+
+EXTERN(void) jsimd_convsamp_float_sse2
+        (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace);
+
+EXTERN(void) jsimd_convsamp_float_mips_dspr2
+        (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace);
+
+/* Slow Integer Forward DCT */
+EXTERN(void) jsimd_fdct_islow_mmx (DCTELEM * data);
+
+extern const int jconst_fdct_islow_sse2[];
+EXTERN(void) jsimd_fdct_islow_sse2 (DCTELEM * data);
+
+EXTERN(void) jsimd_fdct_islow_mips_dspr2 (DCTELEM * data);
+
+/* Fast Integer Forward DCT */
+EXTERN(void) jsimd_fdct_ifast_mmx (DCTELEM * data);
+
+extern const int jconst_fdct_ifast_sse2[];
+EXTERN(void) jsimd_fdct_ifast_sse2 (DCTELEM * data);
+
+EXTERN(void) jsimd_fdct_ifast_neon (DCTELEM * data);
+
+EXTERN(void) jsimd_fdct_ifast_mips_dspr2 (DCTELEM * data);
+
+/* Floating Point Forward DCT */
+EXTERN(void) jsimd_fdct_float_3dnow (FAST_FLOAT * data);
+
+extern const int jconst_fdct_float_sse[];
+EXTERN(void) jsimd_fdct_float_sse (FAST_FLOAT * data);
+
+/* Quantization */
+EXTERN(void) jsimd_quantize_mmx
+        (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace);
+
+EXTERN(void) jsimd_quantize_sse2
+        (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace);
+
+EXTERN(void) jsimd_quantize_neon
+        (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace);
+
+EXTERN(void) jsimd_quantize_mips_dspr2
+        (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace);
+
+/* Floating Point Quantization */
+EXTERN(void) jsimd_quantize_float_3dnow
+        (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace);
+
+EXTERN(void) jsimd_quantize_float_sse
+        (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace);
+
+EXTERN(void) jsimd_quantize_float_sse2
+        (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace);
+
+EXTERN(void) jsimd_quantize_float_mips_dspr2
+        (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace);
+
+/* Scaled Inverse DCT */
+EXTERN(void) jsimd_idct_2x2_mmx
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+EXTERN(void) jsimd_idct_4x4_mmx
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+
+extern const int jconst_idct_red_sse2[];
+EXTERN(void) jsimd_idct_2x2_sse2
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+EXTERN(void) jsimd_idct_4x4_sse2
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+
+EXTERN(void) jsimd_idct_2x2_neon
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+EXTERN(void) jsimd_idct_4x4_neon
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+
+EXTERN(void) jsimd_idct_2x2_mips_dspr2
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+EXTERN(void) jsimd_idct_4x4_mips_dspr2
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col, int * workspace);
+EXTERN(void) jsimd_idct_6x6_mips_dspr2
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+EXTERN(void) jsimd_idct_12x12_pass1_mips_dspr2
+        (JCOEFPTR coef_block, void * dct_table, int * workspace);
+EXTERN(void) jsimd_idct_12x12_pass2_mips_dspr2
+        (int * workspace, int * output);
+
+/* Slow Integer Inverse DCT */
+EXTERN(void) jsimd_idct_islow_mmx
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+
+extern const int jconst_idct_islow_sse2[];
+EXTERN(void) jsimd_idct_islow_sse2
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+
+EXTERN(void) jsimd_idct_islow_neon
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+
+EXTERN(void) jsimd_idct_islow_mips_dspr2
+        (void * dct_table, JCOEFPTR coef_block, int * output_buf,
+         JSAMPLE * output_col);
+
+/* Fast Integer Inverse DCT */
+EXTERN(void) jsimd_idct_ifast_mmx
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+
+extern const int jconst_idct_ifast_sse2[];
+EXTERN(void) jsimd_idct_ifast_sse2
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+
+EXTERN(void) jsimd_idct_ifast_neon
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+
+EXTERN(void) jsimd_idct_ifast_cols_mips_dspr2
+        (JCOEF * inptr, IFAST_MULT_TYPE * quantptr, DCTELEM * wsptr,
+         const int * idct_coefs);
+EXTERN(void) jsimd_idct_ifast_rows_mips_dspr2
+        (DCTELEM * wsptr, JSAMPARRAY output_buf, JDIMENSION output_col,
+         const int * idct_coefs);
+
+/* Floating Point Inverse DCT */
+EXTERN(void) jsimd_idct_float_3dnow
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+
+extern const int jconst_idct_float_sse[];
+EXTERN(void) jsimd_idct_float_sse
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
+
+extern const int jconst_idct_float_sse2[];
+EXTERN(void) jsimd_idct_float_sse2
+        (void * dct_table, JCOEFPTR coef_block, JSAMPARRAY output_buf,
+         JDIMENSION output_col);
diff --git a/simd/jsimd_arm.c b/simd/jsimd_arm.c
new file mode 100644
index 0000000..aefb1e6
--- /dev/null
+++ b/simd/jsimd_arm.c
@@ -0,0 +1,681 @@
+/*
+ * jsimd_arm.c
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2009-2011, 2013 D. R. Commander
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ * This file contains the interface between the "normal" portions
+ * of the library and the SIMD implementations when running on a
+ * 32-bit ARM architecture.
+ */
+
+#define JPEG_INTERNALS
+#include "../jinclude.h"
+#include "../jpeglib.h"
+#include "../jsimd.h"
+#include "../jdct.h"
+#include "../jsimddct.h"
+#include "jsimd.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+static unsigned int simd_support = ~0;
+
+#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
+
+#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
+
+LOCAL(int)
+check_feature (char *buffer, char *feature)
+{
+  char *p;
+  if (*feature == 0)
+    return 0;
+  if (strncmp(buffer, "Features", 8) != 0)
+    return 0;
+  buffer += 8;
+  while (isspace(*buffer))
+    buffer++;
+
+  /* Check if 'feature' is present in the buffer as a separate word */
+  while ((p = strstr(buffer, feature))) {
+    if (p > buffer && !isspace(*(p - 1))) {
+      buffer++;
+      continue;
+    }
+    p += strlen(feature);
+    if (*p != 0 && !isspace(*p)) {
+      buffer++;
+      continue;
+    }
+    return 1;
+  }
+  return 0;
+}
+
+LOCAL(int)
+parse_proc_cpuinfo (int bufsize)
+{
+  char *buffer = (char *)malloc(bufsize);
+  FILE *fd;
+  simd_support = 0;
+
+  if (!buffer)
+    return 0;
+
+  fd = fopen("/proc/cpuinfo", "r");
+  if (fd) {
+    while (fgets(buffer, bufsize, fd)) {
+      if (!strchr(buffer, '\n') && !feof(fd)) {
+        /* "impossible" happened - insufficient size of the buffer! */
+        fclose(fd);
+        free(buffer);
+        return 0;
+      }
+      if (check_feature(buffer, "neon"))
+        simd_support |= JSIMD_ARM_NEON;
+    }
+    fclose(fd);
+  }
+  free(buffer);
+  return 1;
+}
+
+#endif
+
+/*
+ * Check what SIMD accelerations are supported.
+ *
+ * FIXME: This code is racy under a multi-threaded environment.
+ */
+LOCAL(void)
+init_simd (void)
+{
+  char *env = NULL;
+#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
+  int bufsize = 1024; /* an initial guess for the line buffer size limit */
+#endif
+
+  if (simd_support != ~0U)
+    return;
+
+  simd_support = 0;
+
+#if defined(__ARM_NEON__)
+  simd_support |= JSIMD_ARM_NEON;
+#elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
+  /* We still have a chance to use NEON regardless of globally used
+   * -mcpu/-mfpu options passed to gcc by performing runtime detection via
+   * /proc/cpuinfo parsing on linux/android */
+  while (!parse_proc_cpuinfo(bufsize)) {
+    bufsize *= 2;
+    if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
+      break;
+  }
+#endif
+
+  /* Force different settings through environment variables */
+  env = getenv("JSIMD_FORCENEON");
+  if ((env != NULL) && (strcmp(env, "1") == 0))
+    simd_support &= JSIMD_ARM_NEON;
+  env = getenv("JSIMD_FORCENONE");
+  if ((env != NULL) && (strcmp(env, "1") == 0))
+    simd_support = 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_ycc (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_ycc_rgb (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
+                       JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                       JDIMENSION output_row, int num_rows)
+{
+  void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+
+  switch(cinfo->in_color_space) {
+    case JCS_EXT_RGB:
+      neonfct=jsimd_extrgb_ycc_convert_neon;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      neonfct=jsimd_extrgbx_ycc_convert_neon;
+      break;
+    case JCS_EXT_BGR:
+      neonfct=jsimd_extbgr_ycc_convert_neon;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      neonfct=jsimd_extbgrx_ycc_convert_neon;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      neonfct=jsimd_extxbgr_ycc_convert_neon;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      neonfct=jsimd_extxrgb_ycc_convert_neon;
+      break;
+    default:
+      neonfct=jsimd_extrgb_ycc_convert_neon;
+      break;
+  }
+
+  if (simd_support & JSIMD_ARM_NEON)
+    neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
+}
+
+GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                        JDIMENSION output_row, int num_rows)
+{
+}
+
+GLOBAL(void)
+jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
+                       JSAMPIMAGE input_buf, JDIMENSION input_row,
+                       JSAMPARRAY output_buf, int num_rows)
+{
+  void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
+
+  switch(cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      neonfct=jsimd_ycc_extrgb_convert_neon;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      neonfct=jsimd_ycc_extrgbx_convert_neon;
+      break;
+    case JCS_EXT_BGR:
+      neonfct=jsimd_ycc_extbgr_convert_neon;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      neonfct=jsimd_ycc_extbgrx_convert_neon;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      neonfct=jsimd_ycc_extxbgr_convert_neon;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      neonfct=jsimd_ycc_extxrgb_convert_neon;
+      break;
+  default:
+      neonfct=jsimd_ycc_extrgb_convert_neon;
+      break;
+  }
+
+  if (simd_support & JSIMD_ARM_NEON)
+    neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_downsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_downsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr,
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr,
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_fancy_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_fancy_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr,
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr,
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
+                                   compptr->downsampled_width, input_data,
+                                   output_data_ptr);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_merged_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_merged_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+}
+
+GLOBAL(int)
+jsimd_can_convsamp (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_convsamp_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
+                DCTELEM * workspace)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_convsamp_neon(sample_data, start_col, workspace);
+}
+
+GLOBAL(void)
+jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
+                      FAST_FLOAT * workspace)
+{
+}
+
+GLOBAL(int)
+jsimd_can_fdct_islow (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_ifast (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_fdct_islow (DCTELEM * data)
+{
+}
+
+GLOBAL(void)
+jsimd_fdct_ifast (DCTELEM * data)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_fdct_ifast_neon(data);
+}
+
+GLOBAL(void)
+jsimd_fdct_float (FAST_FLOAT * data)
+{
+}
+
+GLOBAL(int)
+jsimd_can_quantize (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_quantize_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
+                DCTELEM * workspace)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_quantize_neon(coef_block, divisors, workspace);
+}
+
+GLOBAL(void)
+jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+                      FAST_FLOAT * workspace)
+{
+}
+
+GLOBAL(int)
+jsimd_can_idct_2x2 (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_4x4 (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
+                        output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
+                        output_col);
+}
+
+GLOBAL(int)
+jsimd_can_idct_islow (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_ifast (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(IFAST_MULT_TYPE) != 2)
+    return 0;
+  if (IFAST_SCALE_BITS != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
+                          output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
+                          output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+}
diff --git a/simd/jsimd_arm64.c b/simd/jsimd_arm64.c
new file mode 100644
index 0000000..44225aa
--- /dev/null
+++ b/simd/jsimd_arm64.c
@@ -0,0 +1,517 @@
+/*
+ * jsimd_arm64.c
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2009-2011, 2013-2014 D. R. Commander
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ * This file contains the interface between the "normal" portions
+ * of the library and the SIMD implementations when running on a
+ * 64-bit ARM architecture.
+ */
+
+#define JPEG_INTERNALS
+#include "../jinclude.h"
+#include "../jpeglib.h"
+#include "../jsimd.h"
+#include "../jdct.h"
+#include "../jsimddct.h"
+#include "jsimd.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+static unsigned int simd_support = ~0;
+
+/*
+ * Check what SIMD accelerations are supported.
+ *
+ * FIXME: This code is racy under a multi-threaded environment.
+ */
+
+/* 
+ * ARMv8 architectures support NEON extensions by default.
+ * It is no longer optional as it was with ARMv7.
+ */ 
+
+
+LOCAL(void)
+init_simd (void)
+{
+  char *env = NULL;
+
+  if (simd_support != ~0U)
+    return;
+
+  simd_support = 0;
+
+  simd_support |= JSIMD_ARM_NEON;
+
+  /* Force different settings through environment variables */
+  env = getenv("JSIMD_FORCENEON");
+  if ((env != NULL) && (strcmp(env, "1") == 0))
+    simd_support &= JSIMD_ARM_NEON;
+  env = getenv("JSIMD_FORCENONE");
+  if ((env != NULL) && (strcmp(env, "1") == 0))
+    simd_support = 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_ycc (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_ycc_rgb (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
+                       JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                       JDIMENSION output_row, int num_rows)
+{
+}
+
+GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                        JDIMENSION output_row, int num_rows)
+{
+}
+
+GLOBAL(void)
+jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
+                       JSAMPIMAGE input_buf, JDIMENSION input_row,
+                       JSAMPARRAY output_buf, int num_rows)
+{
+  void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
+
+  switch(cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      neonfct=jsimd_ycc_extrgb_convert_neon;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      neonfct=jsimd_ycc_extrgbx_convert_neon;
+      break;
+    case JCS_EXT_BGR:
+      neonfct=jsimd_ycc_extbgr_convert_neon;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      neonfct=jsimd_ycc_extbgrx_convert_neon;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      neonfct=jsimd_ycc_extxbgr_convert_neon;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      neonfct=jsimd_ycc_extxrgb_convert_neon;
+      break;
+  default:
+      neonfct=jsimd_ycc_extrgb_convert_neon;
+      break;
+  }
+
+  if (simd_support & JSIMD_ARM_NEON)
+    neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_downsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_downsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr,
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr,
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_fancy_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_fancy_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr,
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr,
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_merged_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_merged_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+}
+
+GLOBAL(int)
+jsimd_can_convsamp (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_convsamp_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
+                DCTELEM * workspace)
+{
+}
+
+GLOBAL(void)
+jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
+                      FAST_FLOAT * workspace)
+{
+}
+
+GLOBAL(int)
+jsimd_can_fdct_islow (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_ifast (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_fdct_islow (DCTELEM * data)
+{
+}
+
+GLOBAL(void)
+jsimd_fdct_ifast (DCTELEM * data)
+{
+}
+
+GLOBAL(void)
+jsimd_fdct_float (FAST_FLOAT * data)
+{
+}
+
+GLOBAL(int)
+jsimd_can_quantize (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_quantize_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
+                DCTELEM * workspace)
+{
+}
+
+GLOBAL(void)
+jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+                      FAST_FLOAT * workspace)
+{
+}
+
+GLOBAL(int)
+jsimd_can_idct_2x2 (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_4x4 (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf,
+                        output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf,
+                        output_col);
+}
+
+GLOBAL(int)
+jsimd_can_idct_islow (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_ifast (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(IFAST_MULT_TYPE) != 2)
+    return 0;
+  if (IFAST_SCALE_BITS != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
+                          output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
+                          output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+}
diff --git a/simd/jsimd_arm_neon.S b/simd/jsimd_arm_neon.S
new file mode 100644
index 0000000..ac6c860
--- /dev/null
+++ b/simd/jsimd_arm_neon.S
@@ -0,0 +1,2408 @@
+/*
+ * ARMv7 NEON optimizations for libjpeg-turbo
+ *
+ * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies).
+ * All rights reserved.
+ * Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
+#endif
+
+.text
+.fpu neon
+.arch armv7a
+.object_arch armv4
+.arm
+
+
+#define RESPECT_STRICT_ALIGNMENT 1
+
+
+/*****************************************************************************/
+
+/* Supplementary macro for setting function attributes */
+.macro asm_function fname
+#ifdef __APPLE__
+    .func _\fname
+    .globl _\fname
+_\fname:
+#else
+    .func \fname
+    .global \fname
+#ifdef __ELF__
+    .hidden \fname
+    .type \fname, %function
+#endif
+\fname:
+#endif
+.endm
+
+/* Transpose a block of 4x4 coefficients in four 64-bit registers */
+.macro transpose_4x4 x0, x1, x2, x3
+    vtrn.16 \x0, \x1
+    vtrn.16 \x2, \x3
+    vtrn.32 \x0, \x2
+    vtrn.32 \x1, \x3
+.endm
+
+
+#define CENTERJSAMPLE 128
+
+/*****************************************************************************/
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ *
+ * GLOBAL(void)
+ * jsimd_idct_islow_neon (void * dct_table, JCOEFPTR coef_block,
+ *                        JSAMPARRAY output_buf, JDIMENSION output_col)
+ */
+
+#define FIX_0_298631336  (2446)
+#define FIX_0_390180644  (3196)
+#define FIX_0_541196100  (4433)
+#define FIX_0_765366865  (6270)
+#define FIX_0_899976223  (7373)
+#define FIX_1_175875602  (9633)
+#define FIX_1_501321110  (12299)
+#define FIX_1_847759065  (15137)
+#define FIX_1_961570560  (16069)
+#define FIX_2_053119869  (16819)
+#define FIX_2_562915447  (20995)
+#define FIX_3_072711026  (25172)
+
+#define FIX_1_175875602_MINUS_1_961570560 (FIX_1_175875602 - FIX_1_961570560)
+#define FIX_1_175875602_MINUS_0_390180644 (FIX_1_175875602 - FIX_0_390180644)
+#define FIX_0_541196100_MINUS_1_847759065 (FIX_0_541196100 - FIX_1_847759065)
+#define FIX_3_072711026_MINUS_2_562915447 (FIX_3_072711026 - FIX_2_562915447)
+#define FIX_0_298631336_MINUS_0_899976223 (FIX_0_298631336 - FIX_0_899976223)
+#define FIX_1_501321110_MINUS_0_899976223 (FIX_1_501321110 - FIX_0_899976223)
+#define FIX_2_053119869_MINUS_2_562915447 (FIX_2_053119869 - FIX_2_562915447)
+#define FIX_0_541196100_PLUS_0_765366865  (FIX_0_541196100 + FIX_0_765366865)
+
+/*
+ * Reference SIMD-friendly 1-D ISLOW iDCT C implementation.
+ * Uses some ideas from the comments in 'simd/jiss2int-64.asm'
+ */
+#define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7)   \
+{                                                                             \
+    DCTELEM row0, row1, row2, row3, row4, row5, row6, row7;                   \
+    INT32   q1, q2, q3, q4, q5, q6, q7;                                       \
+    INT32   tmp11_plus_tmp2, tmp11_minus_tmp2;                                \
+                                                                              \
+    /* 1-D iDCT input data */                                                 \
+    row0 = xrow0;                                                             \
+    row1 = xrow1;                                                             \
+    row2 = xrow2;                                                             \
+    row3 = xrow3;                                                             \
+    row4 = xrow4;                                                             \
+    row5 = xrow5;                                                             \
+    row6 = xrow6;                                                             \
+    row7 = xrow7;                                                             \
+                                                                              \
+    q5 = row7 + row3;                                                         \
+    q4 = row5 + row1;                                                         \
+    q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) +                    \
+         MULTIPLY(q4, FIX_1_175875602);                                       \
+    q7 = MULTIPLY(q5, FIX_1_175875602) +                                      \
+         MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644);                     \
+    q2 = MULTIPLY(row2, FIX_0_541196100) +                                    \
+         MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065);                   \
+    q4 = q6;                                                                  \
+    q3 = ((INT32) row0 - (INT32) row4) << 13;                                 \
+    q6 += MULTIPLY(row5, -FIX_2_562915447) +                                  \
+          MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447);                  \
+    /* now we can use q1 (reloadable constants have been used up) */          \
+    q1 = q3 + q2;                                                             \
+    q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) +                 \
+          MULTIPLY(row1, -FIX_0_899976223);                                   \
+    q5 = q7;                                                                  \
+    q1 = q1 + q6;                                                             \
+    q7 += MULTIPLY(row7, -FIX_0_899976223) +                                  \
+          MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223);                  \
+                                                                              \
+    /* (tmp11 + tmp2) has been calculated (out_row1 before descale) */        \
+    tmp11_plus_tmp2 = q1;                                                     \
+    row1 = 0;                                                                 \
+                                                                              \
+    q1 = q1 - q6;                                                             \
+    q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) +                 \
+          MULTIPLY(row3, -FIX_2_562915447);                                   \
+    q1 = q1 - q6;                                                             \
+    q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) +                   \
+         MULTIPLY(row6, FIX_0_541196100);                                     \
+    q3 = q3 - q2;                                                             \
+                                                                              \
+    /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */        \
+    tmp11_minus_tmp2 = q1;                                                    \
+                                                                              \
+    q1 = ((INT32) row0 + (INT32) row4) << 13;                                 \
+    q2 = q1 + q6;                                                             \
+    q1 = q1 - q6;                                                             \
+                                                                              \
+    /* pick up the results */                                                 \
+    tmp0  = q4;                                                               \
+    tmp1  = q5;                                                               \
+    tmp2  = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2;                         \
+    tmp3  = q7;                                                               \
+    tmp10 = q2;                                                               \
+    tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2;                         \
+    tmp12 = q3;                                                               \
+    tmp13 = q1;                                                               \
+}
+
+#define XFIX_0_899976223                    d0[0]
+#define XFIX_0_541196100                    d0[1]
+#define XFIX_2_562915447                    d0[2]
+#define XFIX_0_298631336_MINUS_0_899976223  d0[3]
+#define XFIX_1_501321110_MINUS_0_899976223  d1[0]
+#define XFIX_2_053119869_MINUS_2_562915447  d1[1]
+#define XFIX_0_541196100_PLUS_0_765366865   d1[2]
+#define XFIX_1_175875602                    d1[3]
+#define XFIX_1_175875602_MINUS_0_390180644  d2[0]
+#define XFIX_0_541196100_MINUS_1_847759065  d2[1]
+#define XFIX_3_072711026_MINUS_2_562915447  d2[2]
+#define XFIX_1_175875602_MINUS_1_961570560  d2[3]
+
+.balign 16
+jsimd_idct_islow_neon_consts:
+    .short FIX_0_899976223                    /* d0[0] */
+    .short FIX_0_541196100                    /* d0[1] */
+    .short FIX_2_562915447                    /* d0[2] */
+    .short FIX_0_298631336_MINUS_0_899976223  /* d0[3] */
+    .short FIX_1_501321110_MINUS_0_899976223  /* d1[0] */
+    .short FIX_2_053119869_MINUS_2_562915447  /* d1[1] */
+    .short FIX_0_541196100_PLUS_0_765366865   /* d1[2] */
+    .short FIX_1_175875602                    /* d1[3] */
+    /* reloadable constants */
+    .short FIX_1_175875602_MINUS_0_390180644  /* d2[0] */
+    .short FIX_0_541196100_MINUS_1_847759065  /* d2[1] */
+    .short FIX_3_072711026_MINUS_2_562915447  /* d2[2] */
+    .short FIX_1_175875602_MINUS_1_961570560  /* d2[3] */
+
+asm_function jsimd_idct_islow_neon
+
+    DCT_TABLE       .req r0
+    COEF_BLOCK      .req r1
+    OUTPUT_BUF      .req r2
+    OUTPUT_COL      .req r3
+    TMP1            .req r0
+    TMP2            .req r1
+    TMP3            .req r2
+    TMP4            .req ip
+
+    ROW0L           .req d16
+    ROW0R           .req d17
+    ROW1L           .req d18
+    ROW1R           .req d19
+    ROW2L           .req d20
+    ROW2R           .req d21
+    ROW3L           .req d22
+    ROW3R           .req d23
+    ROW4L           .req d24
+    ROW4R           .req d25
+    ROW5L           .req d26
+    ROW5R           .req d27
+    ROW6L           .req d28
+    ROW6R           .req d29
+    ROW7L           .req d30
+    ROW7R           .req d31
+
+    /* Load and dequantize coefficients into NEON registers
+     * with the following allocation:
+     *       0 1 2 3 | 4 5 6 7
+     *      ---------+--------
+     *   0 | d16     | d17     ( q8  )
+     *   1 | d18     | d19     ( q9  )
+     *   2 | d20     | d21     ( q10 )
+     *   3 | d22     | d23     ( q11 )
+     *   4 | d24     | d25     ( q12 )
+     *   5 | d26     | d27     ( q13 )
+     *   6 | d28     | d29     ( q14 )
+     *   7 | d30     | d31     ( q15 )
+     */
+    adr             ip, jsimd_idct_islow_neon_consts
+    vld1.16         {d16, d17, d18, d19}, [COEF_BLOCK, :128]!
+    vld1.16         {d0, d1, d2, d3}, [DCT_TABLE, :128]!
+    vld1.16         {d20, d21, d22, d23}, [COEF_BLOCK, :128]!
+    vmul.s16        q8, q8, q0
+    vld1.16         {d4, d5, d6, d7}, [DCT_TABLE, :128]!
+    vmul.s16        q9, q9, q1
+    vld1.16         {d24, d25, d26, d27}, [COEF_BLOCK, :128]!
+    vmul.s16        q10, q10, q2
+    vld1.16         {d0, d1, d2, d3}, [DCT_TABLE, :128]!
+    vmul.s16        q11, q11, q3
+    vld1.16         {d28, d29, d30, d31}, [COEF_BLOCK, :128]
+    vmul.s16        q12, q12, q0
+    vld1.16         {d4, d5, d6, d7}, [DCT_TABLE, :128]!
+    vmul.s16        q14, q14, q2
+    vmul.s16        q13, q13, q1
+    vld1.16         {d0, d1, d2, d3}, [ip, :128] /* load constants */
+    add             ip, ip, #16
+    vmul.s16        q15, q15, q3
+    vpush           {d8-d15} /* save NEON registers */
+    /* 1-D IDCT, pass 1, left 4x8 half */
+    vadd.s16        d4,    ROW7L, ROW3L
+    vadd.s16        d5,    ROW5L, ROW1L
+    vmull.s16       q6,    d4,    XFIX_1_175875602_MINUS_1_961570560
+    vmlal.s16       q6,    d5,    XFIX_1_175875602
+    vmull.s16       q7,    d4,    XFIX_1_175875602
+      /* Check for the zero coefficients in the right 4x8 half */
+      push            {r4, r5}
+    vmlal.s16       q7,    d5,    XFIX_1_175875602_MINUS_0_390180644
+    vsubl.s16       q3,    ROW0L, ROW4L
+      ldrd            r4,    [COEF_BLOCK, #(-96 + 2 * (4 + 1 * 8))]
+    vmull.s16       q2,    ROW2L, XFIX_0_541196100
+    vmlal.s16       q2,    ROW6L, XFIX_0_541196100_MINUS_1_847759065
+      orr             r0,    r4,    r5
+    vmov            q4,    q6
+    vmlsl.s16       q6,    ROW5L, XFIX_2_562915447
+      ldrd            r4,    [COEF_BLOCK, #(-96 + 2 * (4 + 2 * 8))]
+    vmlal.s16       q6,    ROW3L, XFIX_3_072711026_MINUS_2_562915447
+    vshl.s32        q3,    q3,    #13
+      orr             r0,    r0,    r4
+    vmlsl.s16       q4,    ROW1L, XFIX_0_899976223
+      orr             r0,    r0,    r5
+    vadd.s32        q1,    q3,    q2
+      ldrd            r4,    [COEF_BLOCK, #(-96 + 2 * (4 + 3 * 8))]
+    vmov            q5,    q7
+    vadd.s32        q1,    q1,    q6
+      orr             r0,    r0,    r4
+    vmlsl.s16       q7,    ROW7L, XFIX_0_899976223
+      orr             r0,    r0,    r5
+    vmlal.s16       q7,    ROW1L, XFIX_1_501321110_MINUS_0_899976223
+    vrshrn.s32      ROW1L, q1,    #11
+      ldrd            r4,    [COEF_BLOCK, #(-96 + 2 * (4 + 4 * 8))]
+    vsub.s32        q1,    q1,    q6
+    vmlal.s16       q5,    ROW5L, XFIX_2_053119869_MINUS_2_562915447
+      orr             r0,    r0,    r4
+    vmlsl.s16       q5,    ROW3L, XFIX_2_562915447
+      orr             r0,    r0,    r5
+    vsub.s32        q1,    q1,    q6
+    vmull.s16       q6,    ROW2L, XFIX_0_541196100_PLUS_0_765366865
+      ldrd            r4,    [COEF_BLOCK, #(-96 + 2 * (4 + 5 * 8))]
+    vmlal.s16       q6,    ROW6L, XFIX_0_541196100
+    vsub.s32        q3,    q3,    q2
+      orr             r0,    r0,    r4
+    vrshrn.s32      ROW6L, q1,    #11
+      orr             r0,    r0,    r5
+    vadd.s32        q1,    q3,    q5
+      ldrd            r4,    [COEF_BLOCK, #(-96 + 2 * (4 + 6 * 8))]
+    vsub.s32        q3,    q3,    q5
+    vaddl.s16       q5,    ROW0L, ROW4L
+      orr             r0,    r0,    r4
+    vrshrn.s32      ROW2L, q1,    #11
+      orr             r0,    r0,    r5
+    vrshrn.s32      ROW5L, q3,    #11
+      ldrd            r4,    [COEF_BLOCK, #(-96 + 2 * (4 + 7 * 8))]
+    vshl.s32        q5,    q5,    #13
+    vmlal.s16       q4,    ROW7L, XFIX_0_298631336_MINUS_0_899976223
+      orr             r0,    r0,    r4
+    vadd.s32        q2,    q5,    q6
+      orrs            r0,    r0,    r5
+    vsub.s32        q1,    q5,    q6
+    vadd.s32        q6,    q2,    q7
+      ldrd            r4,    [COEF_BLOCK, #(-96 + 2 * (4 + 0 * 8))]
+    vsub.s32        q2,    q2,    q7
+    vadd.s32        q5,    q1,    q4
+      orr             r0,    r4,    r5
+    vsub.s32        q3,    q1,    q4
+      pop             {r4, r5}
+    vrshrn.s32      ROW7L, q2,    #11
+    vrshrn.s32      ROW3L, q5,    #11
+    vrshrn.s32      ROW0L, q6,    #11
+    vrshrn.s32      ROW4L, q3,    #11
+
+      beq             3f /* Go to do some special handling for the sparse right 4x8 half */
+
+    /* 1-D IDCT, pass 1, right 4x8 half */
+    vld1.s16        {d2},  [ip, :64]    /* reload constants */
+    vadd.s16        d10,   ROW7R, ROW3R
+    vadd.s16        d8,    ROW5R, ROW1R
+      /* Transpose left 4x8 half */
+      vtrn.16         ROW6L, ROW7L
+    vmull.s16       q6,    d10,   XFIX_1_175875602_MINUS_1_961570560
+    vmlal.s16       q6,    d8,    XFIX_1_175875602
+      vtrn.16         ROW2L, ROW3L
+    vmull.s16       q7,    d10,   XFIX_1_175875602
+    vmlal.s16       q7,    d8,    XFIX_1_175875602_MINUS_0_390180644
+      vtrn.16         ROW0L, ROW1L
+    vsubl.s16       q3,    ROW0R, ROW4R
+    vmull.s16       q2,    ROW2R, XFIX_0_541196100
+    vmlal.s16       q2,    ROW6R, XFIX_0_541196100_MINUS_1_847759065
+      vtrn.16         ROW4L, ROW5L
+    vmov            q4,    q6
+    vmlsl.s16       q6,    ROW5R, XFIX_2_562915447
+    vmlal.s16       q6,    ROW3R, XFIX_3_072711026_MINUS_2_562915447
+      vtrn.32         ROW1L, ROW3L
+    vshl.s32        q3,    q3,    #13
+    vmlsl.s16       q4,    ROW1R, XFIX_0_899976223
+      vtrn.32         ROW4L, ROW6L
+    vadd.s32        q1,    q3,    q2
+    vmov            q5,    q7
+    vadd.s32        q1,    q1,    q6
+      vtrn.32         ROW0L, ROW2L
+    vmlsl.s16       q7,    ROW7R, XFIX_0_899976223
+    vmlal.s16       q7,    ROW1R, XFIX_1_501321110_MINUS_0_899976223
+    vrshrn.s32      ROW1R, q1,    #11
+      vtrn.32         ROW5L, ROW7L
+    vsub.s32        q1,    q1,    q6
+    vmlal.s16       q5,    ROW5R, XFIX_2_053119869_MINUS_2_562915447
+    vmlsl.s16       q5,    ROW3R, XFIX_2_562915447
+    vsub.s32        q1,    q1,    q6
+    vmull.s16       q6,    ROW2R, XFIX_0_541196100_PLUS_0_765366865
+    vmlal.s16       q6,    ROW6R, XFIX_0_541196100
+    vsub.s32        q3,    q3,    q2
+    vrshrn.s32      ROW6R, q1,    #11
+    vadd.s32        q1,    q3,    q5
+    vsub.s32        q3,    q3,    q5
+    vaddl.s16       q5,    ROW0R, ROW4R
+    vrshrn.s32      ROW2R, q1,    #11
+    vrshrn.s32      ROW5R, q3,    #11
+    vshl.s32        q5,    q5,    #13
+    vmlal.s16       q4,    ROW7R, XFIX_0_298631336_MINUS_0_899976223
+    vadd.s32        q2,    q5,    q6
+    vsub.s32        q1,    q5,    q6
+    vadd.s32        q6,    q2,    q7
+    vsub.s32        q2,    q2,    q7
+    vadd.s32        q5,    q1,    q4
+    vsub.s32        q3,    q1,    q4
+    vrshrn.s32      ROW7R, q2,    #11
+    vrshrn.s32      ROW3R, q5,    #11
+    vrshrn.s32      ROW0R, q6,    #11
+    vrshrn.s32      ROW4R, q3,    #11
+    /* Transpose right 4x8 half */
+    vtrn.16         ROW6R, ROW7R
+    vtrn.16         ROW2R, ROW3R
+    vtrn.16         ROW0R, ROW1R
+    vtrn.16         ROW4R, ROW5R
+    vtrn.32         ROW1R, ROW3R
+    vtrn.32         ROW4R, ROW6R
+    vtrn.32         ROW0R, ROW2R
+    vtrn.32         ROW5R, ROW7R
+
+1:  /* 1-D IDCT, pass 2 (normal variant), left 4x8 half */
+    vld1.s16        {d2},  [ip, :64]    /* reload constants */
+    vmull.s16       q6,    ROW1R, XFIX_1_175875602 /* ROW5L <-> ROW1R */
+    vmlal.s16       q6,    ROW1L, XFIX_1_175875602
+    vmlal.s16       q6,    ROW3R, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L <-> ROW3R */
+    vmlal.s16       q6,    ROW3L, XFIX_1_175875602_MINUS_1_961570560
+    vmull.s16       q7,    ROW3R, XFIX_1_175875602 /* ROW7L <-> ROW3R */
+    vmlal.s16       q7,    ROW3L, XFIX_1_175875602
+    vmlal.s16       q7,    ROW1R, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L <-> ROW1R */
+    vmlal.s16       q7,    ROW1L, XFIX_1_175875602_MINUS_0_390180644
+    vsubl.s16       q3,    ROW0L, ROW0R /* ROW4L <-> ROW0R */
+    vmull.s16       q2,    ROW2L, XFIX_0_541196100
+    vmlal.s16       q2,    ROW2R, XFIX_0_541196100_MINUS_1_847759065 /* ROW6L <-> ROW2R */
+    vmov            q4,    q6
+    vmlsl.s16       q6,    ROW1R, XFIX_2_562915447 /* ROW5L <-> ROW1R */
+    vmlal.s16       q6,    ROW3L, XFIX_3_072711026_MINUS_2_562915447
+    vshl.s32        q3,    q3,    #13
+    vmlsl.s16       q4,    ROW1L, XFIX_0_899976223
+    vadd.s32        q1,    q3,    q2
+    vmov            q5,    q7
+    vadd.s32        q1,    q1,    q6
+    vmlsl.s16       q7,    ROW3R, XFIX_0_899976223 /* ROW7L <-> ROW3R */
+    vmlal.s16       q7,    ROW1L, XFIX_1_501321110_MINUS_0_899976223
+    vshrn.s32       ROW1L, q1,    #16
+    vsub.s32        q1,    q1,    q6
+    vmlal.s16       q5,    ROW1R, XFIX_2_053119869_MINUS_2_562915447 /* ROW5L <-> ROW1R */
+    vmlsl.s16       q5,    ROW3L, XFIX_2_562915447
+    vsub.s32        q1,    q1,    q6
+    vmull.s16       q6,    ROW2L, XFIX_0_541196100_PLUS_0_765366865
+    vmlal.s16       q6,    ROW2R, XFIX_0_541196100 /* ROW6L <-> ROW2R */
+    vsub.s32        q3,    q3,    q2
+    vshrn.s32       ROW2R, q1,    #16 /* ROW6L <-> ROW2R */
+    vadd.s32        q1,    q3,    q5
+    vsub.s32        q3,    q3,    q5
+    vaddl.s16       q5,    ROW0L, ROW0R /* ROW4L <-> ROW0R */
+    vshrn.s32       ROW2L, q1,    #16
+    vshrn.s32       ROW1R, q3,    #16 /* ROW5L <-> ROW1R */
+    vshl.s32        q5,    q5,    #13
+    vmlal.s16       q4,    ROW3R, XFIX_0_298631336_MINUS_0_899976223 /* ROW7L <-> ROW3R */
+    vadd.s32        q2,    q5,    q6
+    vsub.s32        q1,    q5,    q6
+    vadd.s32        q6,    q2,    q7
+    vsub.s32        q2,    q2,    q7
+    vadd.s32        q5,    q1,    q4
+    vsub.s32        q3,    q1,    q4
+    vshrn.s32       ROW3R, q2,    #16 /* ROW7L <-> ROW3R */
+    vshrn.s32       ROW3L, q5,    #16
+    vshrn.s32       ROW0L, q6,    #16
+    vshrn.s32       ROW0R, q3,    #16 /* ROW4L <-> ROW0R */
+    /* 1-D IDCT, pass 2, right 4x8 half */
+    vld1.s16        {d2},  [ip, :64]    /* reload constants */
+    vmull.s16       q6,    ROW5R, XFIX_1_175875602
+    vmlal.s16       q6,    ROW5L, XFIX_1_175875602 /* ROW5L <-> ROW1R */
+    vmlal.s16       q6,    ROW7R, XFIX_1_175875602_MINUS_1_961570560
+    vmlal.s16       q6,    ROW7L, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L <-> ROW3R */
+    vmull.s16       q7,    ROW7R, XFIX_1_175875602
+    vmlal.s16       q7,    ROW7L, XFIX_1_175875602 /* ROW7L <-> ROW3R */
+    vmlal.s16       q7,    ROW5R, XFIX_1_175875602_MINUS_0_390180644
+    vmlal.s16       q7,    ROW5L, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L <-> ROW1R */
+    vsubl.s16       q3,    ROW4L, ROW4R /* ROW4L <-> ROW0R */
+    vmull.s16       q2,    ROW6L, XFIX_0_541196100 /* ROW6L <-> ROW2R */
+    vmlal.s16       q2,    ROW6R, XFIX_0_541196100_MINUS_1_847759065
+    vmov            q4,    q6
+    vmlsl.s16       q6,    ROW5R, XFIX_2_562915447
+    vmlal.s16       q6,    ROW7L, XFIX_3_072711026_MINUS_2_562915447 /* ROW7L <-> ROW3R */
+    vshl.s32        q3,    q3,    #13
+    vmlsl.s16       q4,    ROW5L, XFIX_0_899976223 /* ROW5L <-> ROW1R */
+    vadd.s32        q1,    q3,    q2
+    vmov            q5,    q7
+    vadd.s32        q1,    q1,    q6
+    vmlsl.s16       q7,    ROW7R, XFIX_0_899976223
+    vmlal.s16       q7,    ROW5L, XFIX_1_501321110_MINUS_0_899976223 /* ROW5L <-> ROW1R */
+    vshrn.s32       ROW5L, q1,    #16 /* ROW5L <-> ROW1R */
+    vsub.s32        q1,    q1,    q6
+    vmlal.s16       q5,    ROW5R, XFIX_2_053119869_MINUS_2_562915447
+    vmlsl.s16       q5,    ROW7L, XFIX_2_562915447 /* ROW7L <-> ROW3R */
+    vsub.s32        q1,    q1,    q6
+    vmull.s16       q6,    ROW6L, XFIX_0_541196100_PLUS_0_765366865 /* ROW6L <-> ROW2R */
+    vmlal.s16       q6,    ROW6R, XFIX_0_541196100
+    vsub.s32        q3,    q3,    q2
+    vshrn.s32       ROW6R, q1,    #16
+    vadd.s32        q1,    q3,    q5
+    vsub.s32        q3,    q3,    q5
+    vaddl.s16       q5,    ROW4L, ROW4R /* ROW4L <-> ROW0R */
+    vshrn.s32       ROW6L, q1,    #16 /* ROW6L <-> ROW2R */
+    vshrn.s32       ROW5R, q3,    #16
+    vshl.s32        q5,    q5,    #13
+    vmlal.s16       q4,    ROW7R, XFIX_0_298631336_MINUS_0_899976223
+    vadd.s32        q2,    q5,    q6
+    vsub.s32        q1,    q5,    q6
+    vadd.s32        q6,    q2,    q7
+    vsub.s32        q2,    q2,    q7
+    vadd.s32        q5,    q1,    q4
+    vsub.s32        q3,    q1,    q4
+    vshrn.s32       ROW7R, q2,    #16
+    vshrn.s32       ROW7L, q5,    #16 /* ROW7L <-> ROW3R */
+    vshrn.s32       ROW4L, q6,    #16 /* ROW4L <-> ROW0R */
+    vshrn.s32       ROW4R, q3,    #16
+
+2:  /* Descale to 8-bit and range limit */
+    vqrshrn.s16     d16,   q8,    #2
+    vqrshrn.s16     d17,   q9,    #2
+    vqrshrn.s16     d18,   q10,   #2
+    vqrshrn.s16     d19,   q11,   #2
+    vpop            {d8-d15} /* restore NEON registers */
+    vqrshrn.s16     d20,   q12,   #2
+      /* Transpose the final 8-bit samples and do signed->unsigned conversion */
+      vtrn.16         q8,    q9
+    vqrshrn.s16     d21,   q13,   #2
+    vqrshrn.s16     d22,   q14,   #2
+      vmov.u8         q0,    #(CENTERJSAMPLE)
+    vqrshrn.s16     d23,   q15,   #2
+      vtrn.8          d16,   d17
+      vtrn.8          d18,   d19
+      vadd.u8         q8,    q8,    q0
+      vadd.u8         q9,    q9,    q0
+      vtrn.16         q10,   q11
+        /* Store results to the output buffer */
+        ldmia           OUTPUT_BUF!, {TMP1, TMP2}
+        add             TMP1, TMP1, OUTPUT_COL
+        add             TMP2, TMP2, OUTPUT_COL
+        vst1.8          {d16}, [TMP1]
+      vtrn.8          d20, d21
+        vst1.8          {d17}, [TMP2]
+        ldmia           OUTPUT_BUF!, {TMP1, TMP2}
+        add             TMP1, TMP1, OUTPUT_COL
+        add             TMP2, TMP2, OUTPUT_COL
+        vst1.8          {d18}, [TMP1]
+      vadd.u8         q10,   q10,   q0
+        vst1.8          {d19}, [TMP2]
+        ldmia           OUTPUT_BUF, {TMP1, TMP2, TMP3, TMP4}
+        add             TMP1, TMP1, OUTPUT_COL
+        add             TMP2, TMP2, OUTPUT_COL
+        add             TMP3, TMP3, OUTPUT_COL
+        add             TMP4, TMP4, OUTPUT_COL
+      vtrn.8          d22, d23
+        vst1.8          {d20}, [TMP1]
+      vadd.u8         q11,   q11,   q0
+        vst1.8          {d21}, [TMP2]
+        vst1.8          {d22}, [TMP3]
+        vst1.8          {d23}, [TMP4]
+    bx              lr
+
+3:  /* Left 4x8 half is done, right 4x8 half contains mostly zeros */
+
+    /* Transpose left 4x8 half */
+    vtrn.16         ROW6L, ROW7L
+    vtrn.16         ROW2L, ROW3L
+    vtrn.16         ROW0L, ROW1L
+    vtrn.16         ROW4L, ROW5L
+    vshl.s16        ROW0R, ROW0R, #2 /* PASS1_BITS */
+    vtrn.32         ROW1L, ROW3L
+    vtrn.32         ROW4L, ROW6L
+    vtrn.32         ROW0L, ROW2L
+    vtrn.32         ROW5L, ROW7L
+
+    cmp             r0, #0
+    beq             4f /* Right 4x8 half has all zeros, go to 'sparse' second pass */
+
+    /* Only row 0 is non-zero for the right 4x8 half  */
+    vdup.s16        ROW1R, ROW0R[1]
+    vdup.s16        ROW2R, ROW0R[2]
+    vdup.s16        ROW3R, ROW0R[3]
+    vdup.s16        ROW4R, ROW0R[0]
+    vdup.s16        ROW5R, ROW0R[1]
+    vdup.s16        ROW6R, ROW0R[2]
+    vdup.s16        ROW7R, ROW0R[3]
+    vdup.s16        ROW0R, ROW0R[0]
+    b               1b /* Go to 'normal' second pass */
+
+4:  /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), left 4x8 half */
+    vld1.s16        {d2},  [ip, :64]    /* reload constants */
+    vmull.s16       q6,    ROW1L, XFIX_1_175875602
+    vmlal.s16       q6,    ROW3L, XFIX_1_175875602_MINUS_1_961570560
+    vmull.s16       q7,    ROW3L, XFIX_1_175875602
+    vmlal.s16       q7,    ROW1L, XFIX_1_175875602_MINUS_0_390180644
+    vmull.s16       q2,    ROW2L, XFIX_0_541196100
+    vshll.s16       q3,    ROW0L, #13
+    vmov            q4,    q6
+    vmlal.s16       q6,    ROW3L, XFIX_3_072711026_MINUS_2_562915447
+    vmlsl.s16       q4,    ROW1L, XFIX_0_899976223
+    vadd.s32        q1,    q3,    q2
+    vmov            q5,    q7
+    vmlal.s16       q7,    ROW1L, XFIX_1_501321110_MINUS_0_899976223
+    vadd.s32        q1,    q1,    q6
+    vadd.s32        q6,    q6,    q6
+    vmlsl.s16       q5,    ROW3L, XFIX_2_562915447
+    vshrn.s32       ROW1L, q1,    #16
+    vsub.s32        q1,    q1,    q6
+    vmull.s16       q6,    ROW2L, XFIX_0_541196100_PLUS_0_765366865
+    vsub.s32        q3,    q3,    q2
+    vshrn.s32       ROW2R, q1,    #16 /* ROW6L <-> ROW2R */
+    vadd.s32        q1,    q3,    q5
+    vsub.s32        q3,    q3,    q5
+    vshll.s16       q5,    ROW0L, #13
+    vshrn.s32       ROW2L, q1,    #16
+    vshrn.s32       ROW1R, q3,    #16 /* ROW5L <-> ROW1R */
+    vadd.s32        q2,    q5,    q6
+    vsub.s32        q1,    q5,    q6
+    vadd.s32        q6,    q2,    q7
+    vsub.s32        q2,    q2,    q7
+    vadd.s32        q5,    q1,    q4
+    vsub.s32        q3,    q1,    q4
+    vshrn.s32       ROW3R, q2,    #16 /* ROW7L <-> ROW3R */
+    vshrn.s32       ROW3L, q5,    #16
+    vshrn.s32       ROW0L, q6,    #16
+    vshrn.s32       ROW0R, q3,    #16 /* ROW4L <-> ROW0R */
+    /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), right 4x8 half */
+    vld1.s16        {d2},  [ip, :64]    /* reload constants */
+    vmull.s16       q6,    ROW5L, XFIX_1_175875602
+    vmlal.s16       q6,    ROW7L, XFIX_1_175875602_MINUS_1_961570560
+    vmull.s16       q7,    ROW7L, XFIX_1_175875602
+    vmlal.s16       q7,    ROW5L, XFIX_1_175875602_MINUS_0_390180644
+    vmull.s16       q2,    ROW6L, XFIX_0_541196100
+    vshll.s16       q3,    ROW4L, #13
+    vmov            q4,    q6
+    vmlal.s16       q6,    ROW7L, XFIX_3_072711026_MINUS_2_562915447
+    vmlsl.s16       q4,    ROW5L, XFIX_0_899976223
+    vadd.s32        q1,    q3,    q2
+    vmov            q5,    q7
+    vmlal.s16       q7,    ROW5L, XFIX_1_501321110_MINUS_0_899976223
+    vadd.s32        q1,    q1,    q6
+    vadd.s32        q6,    q6,    q6
+    vmlsl.s16       q5,    ROW7L, XFIX_2_562915447
+    vshrn.s32       ROW5L, q1,    #16 /* ROW5L <-> ROW1R */
+    vsub.s32        q1,    q1,    q6
+    vmull.s16       q6,    ROW6L, XFIX_0_541196100_PLUS_0_765366865
+    vsub.s32        q3,    q3,    q2
+    vshrn.s32       ROW6R, q1,    #16
+    vadd.s32        q1,    q3,    q5
+    vsub.s32        q3,    q3,    q5
+    vshll.s16       q5,    ROW4L, #13
+    vshrn.s32       ROW6L, q1,    #16 /* ROW6L <-> ROW2R */
+    vshrn.s32       ROW5R, q3,    #16
+    vadd.s32        q2,    q5,    q6
+    vsub.s32        q1,    q5,    q6
+    vadd.s32        q6,    q2,    q7
+    vsub.s32        q2,    q2,    q7
+    vadd.s32        q5,    q1,    q4
+    vsub.s32        q3,    q1,    q4
+    vshrn.s32       ROW7R, q2,    #16
+    vshrn.s32       ROW7L, q5,    #16 /* ROW7L <-> ROW3R */
+    vshrn.s32       ROW4L, q6,    #16 /* ROW4L <-> ROW0R */
+    vshrn.s32       ROW4R, q3,    #16
+    b               2b /* Go to epilogue */
+
+    .unreq          DCT_TABLE
+    .unreq          COEF_BLOCK
+    .unreq          OUTPUT_BUF
+    .unreq          OUTPUT_COL
+    .unreq          TMP1
+    .unreq          TMP2
+    .unreq          TMP3
+    .unreq          TMP4
+
+    .unreq          ROW0L
+    .unreq          ROW0R
+    .unreq          ROW1L
+    .unreq          ROW1R
+    .unreq          ROW2L
+    .unreq          ROW2R
+    .unreq          ROW3L
+    .unreq          ROW3R
+    .unreq          ROW4L
+    .unreq          ROW4R
+    .unreq          ROW5L
+    .unreq          ROW5R
+    .unreq          ROW6L
+    .unreq          ROW6R
+    .unreq          ROW7L
+    .unreq          ROW7R
+.endfunc
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_idct_ifast_neon
+ *
+ * This function contains a fast, not so accurate integer implementation of
+ * the inverse DCT (Discrete Cosine Transform). It uses the same calculations
+ * and produces exactly the same output as IJG's original 'jpeg_idct_ifast'
+ * function from jidctfst.c
+ *
+ * Normally 1-D AAN DCT needs 5 multiplications and 29 additions.
+ * But in ARM NEON case some extra additions are required because VQDMULH
+ * instruction can't handle the constants larger than 1. So the expressions
+ * like "x * 1.082392200" have to be converted to "x * 0.082392200 + x",
+ * which introduces an extra addition. Overall, there are 6 extra additions
+ * per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions.
+ */
+
+#define XFIX_1_082392200 d0[0]
+#define XFIX_1_414213562 d0[1]
+#define XFIX_1_847759065 d0[2]
+#define XFIX_2_613125930 d0[3]
+
+.balign 16
+jsimd_idct_ifast_neon_consts:
+    .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */
+    .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */
+    .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
+    .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */
+
+asm_function jsimd_idct_ifast_neon
+
+    DCT_TABLE       .req r0
+    COEF_BLOCK      .req r1
+    OUTPUT_BUF      .req r2
+    OUTPUT_COL      .req r3
+    TMP1            .req r0
+    TMP2            .req r1
+    TMP3            .req r2
+    TMP4            .req ip
+
+    /* Load and dequantize coefficients into NEON registers
+     * with the following allocation:
+     *       0 1 2 3 | 4 5 6 7
+     *      ---------+--------
+     *   0 | d16     | d17     ( q8  )
+     *   1 | d18     | d19     ( q9  )
+     *   2 | d20     | d21     ( q10 )
+     *   3 | d22     | d23     ( q11 )
+     *   4 | d24     | d25     ( q12 )
+     *   5 | d26     | d27     ( q13 )
+     *   6 | d28     | d29     ( q14 )
+     *   7 | d30     | d31     ( q15 )
+     */
+    adr             ip, jsimd_idct_ifast_neon_consts
+    vld1.16         {d16, d17, d18, d19}, [COEF_BLOCK, :128]!
+    vld1.16         {d0, d1, d2, d3}, [DCT_TABLE, :128]!
+    vld1.16         {d20, d21, d22, d23}, [COEF_BLOCK, :128]!
+    vmul.s16        q8,  q8,  q0
+    vld1.16         {d4, d5, d6, d7}, [DCT_TABLE, :128]!
+    vmul.s16        q9,  q9,  q1
+    vld1.16         {d24, d25, d26, d27}, [COEF_BLOCK, :128]!
+    vmul.s16        q10, q10, q2
+    vld1.16         {d0, d1, d2, d3}, [DCT_TABLE, :128]!
+    vmul.s16        q11, q11, q3
+    vld1.16         {d28, d29, d30, d31}, [COEF_BLOCK, :128]
+    vmul.s16        q12, q12, q0
+    vld1.16         {d4, d5, d6, d7}, [DCT_TABLE, :128]!
+    vmul.s16        q14, q14, q2
+    vmul.s16        q13, q13, q1
+    vld1.16         {d0}, [ip, :64] /* load constants */
+    vmul.s16        q15, q15, q3
+    vpush           {d8-d13}        /* save NEON registers */
+    /* 1-D IDCT, pass 1 */
+    vsub.s16        q2,  q10, q14
+    vadd.s16        q14, q10, q14
+    vsub.s16        q1,  q11, q13
+    vadd.s16        q13, q11, q13
+    vsub.s16        q5,  q9,  q15
+    vadd.s16        q15, q9,  q15
+    vqdmulh.s16     q4,  q2,  XFIX_1_414213562
+    vqdmulh.s16     q6,  q1,  XFIX_2_613125930
+    vadd.s16        q3,  q1,  q1
+    vsub.s16        q1,  q5,  q1
+    vadd.s16        q10, q2,  q4
+    vqdmulh.s16     q4,  q1,  XFIX_1_847759065
+    vsub.s16        q2,  q15, q13
+    vadd.s16        q3,  q3,  q6
+    vqdmulh.s16     q6,  q2,  XFIX_1_414213562
+    vadd.s16        q1,  q1,  q4
+    vqdmulh.s16     q4,  q5,  XFIX_1_082392200
+    vsub.s16        q10, q10, q14
+    vadd.s16        q2,  q2,  q6
+    vsub.s16        q6,  q8,  q12
+    vadd.s16        q12, q8,  q12
+    vadd.s16        q9,  q5,  q4
+    vadd.s16        q5,  q6,  q10
+    vsub.s16        q10, q6,  q10
+    vadd.s16        q6,  q15, q13
+    vadd.s16        q8,  q12, q14
+    vsub.s16        q3,  q6,  q3
+    vsub.s16        q12, q12, q14
+    vsub.s16        q3,  q3,  q1
+    vsub.s16        q1,  q9,  q1
+    vadd.s16        q2,  q3,  q2
+    vsub.s16        q15, q8,  q6
+    vadd.s16        q1,  q1,  q2
+    vadd.s16        q8,  q8,  q6
+    vadd.s16        q14, q5,  q3
+    vsub.s16        q9,  q5,  q3
+    vsub.s16        q13, q10, q2
+    vadd.s16        q10, q10, q2
+      /* Transpose */
+      vtrn.16         q8,  q9
+    vsub.s16        q11, q12, q1
+      vtrn.16         q14, q15
+    vadd.s16        q12, q12, q1
+      vtrn.16         q10, q11
+      vtrn.16         q12, q13
+      vtrn.32         q9,  q11
+      vtrn.32         q12, q14
+      vtrn.32         q8,  q10
+      vtrn.32         q13, q15
+      vswp            d28, d21
+      vswp            d26, d19
+    /* 1-D IDCT, pass 2 */
+    vsub.s16        q2,  q10, q14
+      vswp            d30, d23
+    vadd.s16        q14, q10, q14
+      vswp            d24, d17
+    vsub.s16        q1,  q11, q13
+    vadd.s16        q13, q11, q13
+    vsub.s16        q5,  q9,  q15
+    vadd.s16        q15, q9,  q15
+    vqdmulh.s16     q4,  q2,  XFIX_1_414213562
+    vqdmulh.s16     q6,  q1,  XFIX_2_613125930
+    vadd.s16        q3,  q1,  q1
+    vsub.s16        q1,  q5,  q1
+    vadd.s16        q10, q2,  q4
+    vqdmulh.s16     q4,  q1,  XFIX_1_847759065
+    vsub.s16        q2,  q15, q13
+    vadd.s16        q3,  q3,  q6
+    vqdmulh.s16     q6,  q2,  XFIX_1_414213562
+    vadd.s16        q1,  q1,  q4
+    vqdmulh.s16     q4,  q5,  XFIX_1_082392200
+    vsub.s16        q10, q10, q14
+    vadd.s16        q2,  q2,  q6
+    vsub.s16        q6,  q8,  q12
+    vadd.s16        q12, q8,  q12
+    vadd.s16        q9,  q5,  q4
+    vadd.s16        q5,  q6,  q10
+    vsub.s16        q10, q6,  q10
+    vadd.s16        q6,  q15, q13
+    vadd.s16        q8,  q12, q14
+    vsub.s16        q3,  q6,  q3
+    vsub.s16        q12, q12, q14
+    vsub.s16        q3,  q3,  q1
+    vsub.s16        q1,  q9,  q1
+    vadd.s16        q2,  q3,  q2
+    vsub.s16        q15, q8,  q6
+    vadd.s16        q1,  q1,  q2
+    vadd.s16        q8,  q8,  q6
+    vadd.s16        q14, q5,  q3
+    vsub.s16        q9,  q5,  q3
+    vsub.s16        q13, q10, q2
+    vpop            {d8-d13}        /* restore NEON registers */
+    vadd.s16        q10, q10, q2
+    vsub.s16        q11, q12, q1
+    vadd.s16        q12, q12, q1
+    /* Descale to 8-bit and range limit */
+    vmov.u8         q0,  #0x80
+    vqshrn.s16      d16, q8,  #5
+    vqshrn.s16      d17, q9,  #5
+    vqshrn.s16      d18, q10, #5
+    vqshrn.s16      d19, q11, #5
+    vqshrn.s16      d20, q12, #5
+    vqshrn.s16      d21, q13, #5
+    vqshrn.s16      d22, q14, #5
+    vqshrn.s16      d23, q15, #5
+    vadd.u8         q8,  q8,  q0
+    vadd.u8         q9,  q9,  q0
+    vadd.u8         q10, q10, q0
+    vadd.u8         q11, q11, q0
+    /* Transpose the final 8-bit samples */
+    vtrn.16         q8,  q9
+    vtrn.16         q10, q11
+    vtrn.32         q8,  q10
+    vtrn.32         q9,  q11
+    vtrn.8          d16, d17
+    vtrn.8          d18, d19
+      /* Store results to the output buffer */
+      ldmia           OUTPUT_BUF!, {TMP1, TMP2}
+      add             TMP1, TMP1, OUTPUT_COL
+      add             TMP2, TMP2, OUTPUT_COL
+      vst1.8          {d16}, [TMP1]
+      vst1.8          {d17}, [TMP2]
+      ldmia           OUTPUT_BUF!, {TMP1, TMP2}
+      add             TMP1, TMP1, OUTPUT_COL
+      add             TMP2, TMP2, OUTPUT_COL
+      vst1.8          {d18}, [TMP1]
+    vtrn.8          d20, d21
+      vst1.8          {d19}, [TMP2]
+      ldmia           OUTPUT_BUF, {TMP1, TMP2, TMP3, TMP4}
+      add             TMP1, TMP1, OUTPUT_COL
+      add             TMP2, TMP2, OUTPUT_COL
+      add             TMP3, TMP3, OUTPUT_COL
+      add             TMP4, TMP4, OUTPUT_COL
+      vst1.8          {d20}, [TMP1]
+    vtrn.8          d22, d23
+      vst1.8          {d21}, [TMP2]
+      vst1.8          {d22}, [TMP3]
+      vst1.8          {d23}, [TMP4]
+    bx              lr
+
+    .unreq          DCT_TABLE
+    .unreq          COEF_BLOCK
+    .unreq          OUTPUT_BUF
+    .unreq          OUTPUT_COL
+    .unreq          TMP1
+    .unreq          TMP2
+    .unreq          TMP3
+    .unreq          TMP4
+.endfunc
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_idct_4x4_neon
+ *
+ * This function contains inverse-DCT code for getting reduced-size
+ * 4x4 pixels output from an 8x8 DCT block. It uses the same  calculations
+ * and produces exactly the same output as IJG's original 'jpeg_idct_4x4'
+ * function from jpeg-6b (jidctred.c).
+ *
+ * NOTE: jpeg-8 has an improved implementation of 4x4 inverse-DCT, which
+ *       requires much less arithmetic operations and hence should be faster.
+ *       The primary purpose of this particular NEON optimized function is
+ *       bit exact compatibility with jpeg-6b.
+ *
+ * TODO: a bit better instructions scheduling can be achieved by expanding
+ *       idct_helper/transpose_4x4 macros and reordering instructions,
+ *       but readability will suffer somewhat.
+ */
+
+#define CONST_BITS  13
+
+#define FIX_0_211164243  (1730)  /* FIX(0.211164243) */
+#define FIX_0_509795579  (4176)  /* FIX(0.509795579) */
+#define FIX_0_601344887  (4926)  /* FIX(0.601344887) */
+#define FIX_0_720959822  (5906)  /* FIX(0.720959822) */
+#define FIX_0_765366865  (6270)  /* FIX(0.765366865) */
+#define FIX_0_850430095  (6967)  /* FIX(0.850430095) */
+#define FIX_0_899976223  (7373)  /* FIX(0.899976223) */
+#define FIX_1_061594337  (8697)  /* FIX(1.061594337) */
+#define FIX_1_272758580  (10426) /* FIX(1.272758580) */
+#define FIX_1_451774981  (11893) /* FIX(1.451774981) */
+#define FIX_1_847759065  (15137) /* FIX(1.847759065) */
+#define FIX_2_172734803  (17799) /* FIX(2.172734803) */
+#define FIX_2_562915447  (20995) /* FIX(2.562915447) */
+#define FIX_3_624509785  (29692) /* FIX(3.624509785) */
+
+.balign 16
+jsimd_idct_4x4_neon_consts:
+    .short     FIX_1_847759065     /* d0[0] */
+    .short     -FIX_0_765366865    /* d0[1] */
+    .short     -FIX_0_211164243    /* d0[2] */
+    .short     FIX_1_451774981     /* d0[3] */
+    .short     -FIX_2_172734803    /* d1[0] */
+    .short     FIX_1_061594337     /* d1[1] */
+    .short     -FIX_0_509795579    /* d1[2] */
+    .short     -FIX_0_601344887    /* d1[3] */
+    .short     FIX_0_899976223     /* d2[0] */
+    .short     FIX_2_562915447     /* d2[1] */
+    .short     1 << (CONST_BITS+1) /* d2[2] */
+    .short     0                   /* d2[3] */
+
+.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29
+    vmull.s16       q14, \x4,  d2[2]
+    vmlal.s16       q14, \x8,  d0[0]
+    vmlal.s16       q14, \x14, d0[1]
+
+    vmull.s16       q13, \x16, d1[2]
+    vmlal.s16       q13, \x12, d1[3]
+    vmlal.s16       q13, \x10, d2[0]
+    vmlal.s16       q13, \x6,  d2[1]
+
+    vmull.s16       q15, \x4,  d2[2]
+    vmlsl.s16       q15, \x8,  d0[0]
+    vmlsl.s16       q15, \x14, d0[1]
+
+    vmull.s16       q12, \x16, d0[2]
+    vmlal.s16       q12, \x12, d0[3]
+    vmlal.s16       q12, \x10, d1[0]
+    vmlal.s16       q12, \x6,  d1[1]
+
+    vadd.s32        q10, q14, q13
+    vsub.s32        q14, q14, q13
+
+.if \shift > 16
+    vrshr.s32       q10,  q10, #\shift
+    vrshr.s32       q14,  q14, #\shift
+    vmovn.s32       \y26, q10
+    vmovn.s32       \y29, q14
+.else
+    vrshrn.s32      \y26, q10, #\shift
+    vrshrn.s32      \y29, q14, #\shift
+.endif
+
+    vadd.s32        q10, q15, q12
+    vsub.s32        q15, q15, q12
+
+.if \shift > 16
+    vrshr.s32       q10,  q10, #\shift
+    vrshr.s32       q15,  q15, #\shift
+    vmovn.s32       \y27, q10
+    vmovn.s32       \y28, q15
+.else
+    vrshrn.s32      \y27, q10, #\shift
+    vrshrn.s32      \y28, q15, #\shift
+.endif
+
+.endm
+
+asm_function jsimd_idct_4x4_neon
+
+    DCT_TABLE       .req r0
+    COEF_BLOCK      .req r1
+    OUTPUT_BUF      .req r2
+    OUTPUT_COL      .req r3
+    TMP1            .req r0
+    TMP2            .req r1
+    TMP3            .req r2
+    TMP4            .req ip
+
+    vpush           {d8-d15}
+
+    /* Load constants (d3 is just used for padding) */
+    adr             TMP4, jsimd_idct_4x4_neon_consts
+    vld1.16         {d0, d1, d2, d3}, [TMP4, :128]
+
+    /* Load all COEF_BLOCK into NEON registers with the following allocation:
+     *       0 1 2 3 | 4 5 6 7
+     *      ---------+--------
+     *   0 | d4      | d5
+     *   1 | d6      | d7
+     *   2 | d8      | d9
+     *   3 | d10     | d11
+     *   4 | -       | -
+     *   5 | d12     | d13
+     *   6 | d14     | d15
+     *   7 | d16     | d17
+     */
+    vld1.16         {d4, d5, d6, d7}, [COEF_BLOCK, :128]!
+    vld1.16         {d8, d9, d10, d11}, [COEF_BLOCK, :128]!
+    add COEF_BLOCK, COEF_BLOCK, #16
+    vld1.16         {d12, d13, d14, d15}, [COEF_BLOCK, :128]!
+    vld1.16         {d16, d17}, [COEF_BLOCK, :128]!
+    /* dequantize */
+    vld1.16         {d18, d19, d20, d21}, [DCT_TABLE, :128]!
+    vmul.s16        q2, q2, q9
+    vld1.16         {d22, d23, d24, d25}, [DCT_TABLE, :128]!
+    vmul.s16        q3, q3, q10
+    vmul.s16        q4, q4, q11
+    add             DCT_TABLE, DCT_TABLE, #16
+    vld1.16         {d26, d27, d28, d29}, [DCT_TABLE, :128]!
+    vmul.s16        q5, q5, q12
+    vmul.s16        q6, q6, q13
+    vld1.16         {d30, d31}, [DCT_TABLE, :128]!
+    vmul.s16        q7, q7, q14
+    vmul.s16        q8, q8, q15
+
+    /* Pass 1 */
+    idct_helper     d4, d6, d8, d10, d12, d14, d16, 12, d4, d6, d8, d10
+    transpose_4x4   d4, d6, d8, d10
+    idct_helper     d5, d7, d9, d11, d13, d15, d17, 12, d5, d7, d9, d11
+    transpose_4x4   d5, d7, d9, d11
+
+    /* Pass 2 */
+    idct_helper     d4, d6, d8, d10, d7, d9, d11, 19, d26, d27, d28, d29
+    transpose_4x4   d26, d27, d28, d29
+
+    /* Range limit */
+    vmov.u16        q15, #0x80
+    vadd.s16        q13, q13, q15
+    vadd.s16        q14, q14, q15
+    vqmovun.s16     d26, q13
+    vqmovun.s16     d27, q14
+
+    /* Store results to the output buffer */
+    ldmia           OUTPUT_BUF, {TMP1, TMP2, TMP3, TMP4}
+    add             TMP1, TMP1, OUTPUT_COL
+    add             TMP2, TMP2, OUTPUT_COL
+    add             TMP3, TMP3, OUTPUT_COL
+    add             TMP4, TMP4, OUTPUT_COL
+
+#if defined(__ARMEL__) && !RESPECT_STRICT_ALIGNMENT
+    /* We can use much less instructions on little endian systems if the
+     * OS kernel is not configured to trap unaligned memory accesses
+     */
+    vst1.32         {d26[0]}, [TMP1]!
+    vst1.32         {d27[0]}, [TMP3]!
+    vst1.32         {d26[1]}, [TMP2]!
+    vst1.32         {d27[1]}, [TMP4]!
+#else
+    vst1.8          {d26[0]}, [TMP1]!
+    vst1.8          {d27[0]}, [TMP3]!
+    vst1.8          {d26[1]}, [TMP1]!
+    vst1.8          {d27[1]}, [TMP3]!
+    vst1.8          {d26[2]}, [TMP1]!
+    vst1.8          {d27[2]}, [TMP3]!
+    vst1.8          {d26[3]}, [TMP1]!
+    vst1.8          {d27[3]}, [TMP3]!
+
+    vst1.8          {d26[4]}, [TMP2]!
+    vst1.8          {d27[4]}, [TMP4]!
+    vst1.8          {d26[5]}, [TMP2]!
+    vst1.8          {d27[5]}, [TMP4]!
+    vst1.8          {d26[6]}, [TMP2]!
+    vst1.8          {d27[6]}, [TMP4]!
+    vst1.8          {d26[7]}, [TMP2]!
+    vst1.8          {d27[7]}, [TMP4]!
+#endif
+
+    vpop            {d8-d15}
+    bx              lr
+
+    .unreq          DCT_TABLE
+    .unreq          COEF_BLOCK
+    .unreq          OUTPUT_BUF
+    .unreq          OUTPUT_COL
+    .unreq          TMP1
+    .unreq          TMP2
+    .unreq          TMP3
+    .unreq          TMP4
+.endfunc
+
+.purgem idct_helper
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_idct_2x2_neon
+ *
+ * This function contains inverse-DCT code for getting reduced-size
+ * 2x2 pixels output from an 8x8 DCT block. It uses the same  calculations
+ * and produces exactly the same output as IJG's original 'jpeg_idct_2x2'
+ * function from jpeg-6b (jidctred.c).
+ *
+ * NOTE: jpeg-8 has an improved implementation of 2x2 inverse-DCT, which
+ *       requires much less arithmetic operations and hence should be faster.
+ *       The primary purpose of this particular NEON optimized function is
+ *       bit exact compatibility with jpeg-6b.
+ */
+
+.balign 8
+jsimd_idct_2x2_neon_consts:
+    .short     -FIX_0_720959822    /* d0[0] */
+    .short     FIX_0_850430095     /* d0[1] */
+    .short     -FIX_1_272758580    /* d0[2] */
+    .short     FIX_3_624509785     /* d0[3] */
+
+.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27
+    vshll.s16  q14,  \x4,  #15
+    vmull.s16  q13,  \x6,  d0[3]
+    vmlal.s16  q13,  \x10, d0[2]
+    vmlal.s16  q13,  \x12, d0[1]
+    vmlal.s16  q13,  \x16, d0[0]
+
+    vadd.s32   q10,  q14,  q13
+    vsub.s32   q14,  q14,  q13
+
+.if \shift > 16
+    vrshr.s32  q10,  q10,  #\shift
+    vrshr.s32  q14,  q14,  #\shift
+    vmovn.s32  \y26, q10
+    vmovn.s32  \y27, q14
+.else
+    vrshrn.s32 \y26, q10,  #\shift
+    vrshrn.s32 \y27, q14,  #\shift
+.endif
+
+.endm
+
+asm_function jsimd_idct_2x2_neon
+
+    DCT_TABLE       .req r0
+    COEF_BLOCK      .req r1
+    OUTPUT_BUF      .req r2
+    OUTPUT_COL      .req r3
+    TMP1            .req r0
+    TMP2            .req ip
+
+    vpush           {d8-d15}
+
+    /* Load constants */
+    adr             TMP2, jsimd_idct_2x2_neon_consts
+    vld1.16         {d0}, [TMP2, :64]
+
+    /* Load all COEF_BLOCK into NEON registers with the following allocation:
+     *       0 1 2 3 | 4 5 6 7
+     *      ---------+--------
+     *   0 | d4      | d5
+     *   1 | d6      | d7
+     *   2 | -       | -
+     *   3 | d10     | d11
+     *   4 | -       | -
+     *   5 | d12     | d13
+     *   6 | -       | -
+     *   7 | d16     | d17
+     */
+    vld1.16         {d4, d5, d6, d7}, [COEF_BLOCK, :128]!
+    add             COEF_BLOCK, COEF_BLOCK, #16
+    vld1.16         {d10, d11}, [COEF_BLOCK, :128]!
+    add             COEF_BLOCK, COEF_BLOCK, #16
+    vld1.16         {d12, d13}, [COEF_BLOCK, :128]!
+    add             COEF_BLOCK, COEF_BLOCK, #16
+    vld1.16         {d16, d17}, [COEF_BLOCK, :128]!
+    /* Dequantize */
+    vld1.16         {d18, d19, d20, d21}, [DCT_TABLE, :128]!
+    vmul.s16        q2, q2, q9
+    vmul.s16        q3, q3, q10
+    add             DCT_TABLE, DCT_TABLE, #16
+    vld1.16         {d24, d25}, [DCT_TABLE, :128]!
+    vmul.s16        q5, q5, q12
+    add             DCT_TABLE, DCT_TABLE, #16
+    vld1.16         {d26, d27}, [DCT_TABLE, :128]!
+    vmul.s16        q6, q6, q13
+    add             DCT_TABLE, DCT_TABLE, #16
+    vld1.16         {d30, d31}, [DCT_TABLE, :128]!
+    vmul.s16        q8, q8, q15
+
+    /* Pass 1 */
+#if 0
+    idct_helper     d4, d6, d10, d12, d16, 13, d4, d6
+    transpose_4x4   d4, d6, d8,  d10
+    idct_helper     d5, d7, d11, d13, d17, 13, d5, d7
+    transpose_4x4   d5, d7, d9,  d11
+#else
+    vmull.s16       q13, d6,  d0[3]
+    vmlal.s16       q13, d10, d0[2]
+    vmlal.s16       q13, d12, d0[1]
+    vmlal.s16       q13, d16, d0[0]
+    vmull.s16       q12, d7,  d0[3]
+    vmlal.s16       q12, d11, d0[2]
+    vmlal.s16       q12, d13, d0[1]
+    vmlal.s16       q12, d17, d0[0]
+    vshll.s16       q14, d4,  #15
+    vshll.s16       q15, d5,  #15
+    vadd.s32        q10, q14, q13
+    vsub.s32        q14, q14, q13
+    vrshrn.s32      d4,  q10, #13
+    vrshrn.s32      d6,  q14, #13
+    vadd.s32        q10, q15, q12
+    vsub.s32        q14, q15, q12
+    vrshrn.s32      d5,  q10, #13
+    vrshrn.s32      d7,  q14, #13
+    vtrn.16         q2,  q3
+    vtrn.32         q3,  q5
+#endif
+
+    /* Pass 2 */
+    idct_helper     d4, d6, d10, d7, d11, 20, d26, d27
+
+    /* Range limit */
+    vmov.u16        q15, #0x80
+    vadd.s16        q13, q13, q15
+    vqmovun.s16     d26, q13
+    vqmovun.s16     d27, q13
+
+    /* Store results to the output buffer */
+    ldmia           OUTPUT_BUF, {TMP1, TMP2}
+    add             TMP1, TMP1, OUTPUT_COL
+    add             TMP2, TMP2, OUTPUT_COL
+
+    vst1.8          {d26[0]}, [TMP1]!
+    vst1.8          {d27[4]}, [TMP1]!
+    vst1.8          {d26[1]}, [TMP2]!
+    vst1.8          {d27[5]}, [TMP2]!
+
+    vpop            {d8-d15}
+    bx              lr
+
+    .unreq          DCT_TABLE
+    .unreq          COEF_BLOCK
+    .unreq          OUTPUT_BUF
+    .unreq          OUTPUT_COL
+    .unreq          TMP1
+    .unreq          TMP2
+.endfunc
+
+.purgem idct_helper
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_ycc_extrgb_convert_neon
+ * jsimd_ycc_extbgr_convert_neon
+ * jsimd_ycc_extrgbx_convert_neon
+ * jsimd_ycc_extbgrx_convert_neon
+ * jsimd_ycc_extxbgr_convert_neon
+ * jsimd_ycc_extxrgb_convert_neon
+ *
+ * Colorspace conversion YCbCr -> RGB
+ */
+
+
+.macro do_load size
+    .if \size == 8
+        vld1.8  {d4}, [U, :64]!
+        vld1.8  {d5}, [V, :64]!
+        vld1.8  {d0}, [Y, :64]!
+        pld     [U, #64]
+        pld     [V, #64]
+        pld     [Y, #64]
+    .elseif \size == 4
+        vld1.8  {d4[0]}, [U]!
+        vld1.8  {d4[1]}, [U]!
+        vld1.8  {d4[2]}, [U]!
+        vld1.8  {d4[3]}, [U]!
+        vld1.8  {d5[0]}, [V]!
+        vld1.8  {d5[1]}, [V]!
+        vld1.8  {d5[2]}, [V]!
+        vld1.8  {d5[3]}, [V]!
+        vld1.8  {d0[0]}, [Y]!
+        vld1.8  {d0[1]}, [Y]!
+        vld1.8  {d0[2]}, [Y]!
+        vld1.8  {d0[3]}, [Y]!
+    .elseif \size == 2
+        vld1.8  {d4[4]}, [U]!
+        vld1.8  {d4[5]}, [U]!
+        vld1.8  {d5[4]}, [V]!
+        vld1.8  {d5[5]}, [V]!
+        vld1.8  {d0[4]}, [Y]!
+        vld1.8  {d0[5]}, [Y]!
+    .elseif \size == 1
+        vld1.8  {d4[6]}, [U]!
+        vld1.8  {d5[6]}, [V]!
+        vld1.8  {d0[6]}, [Y]!
+    .else
+        .error unsupported macroblock size
+    .endif
+.endm
+
+.macro do_store bpp, size
+    .if \bpp == 24
+        .if \size == 8
+            vst3.8  {d10, d11, d12}, [RGB]!
+        .elseif \size == 4
+            vst3.8  {d10[0], d11[0], d12[0]}, [RGB]!
+            vst3.8  {d10[1], d11[1], d12[1]}, [RGB]!
+            vst3.8  {d10[2], d11[2], d12[2]}, [RGB]!
+            vst3.8  {d10[3], d11[3], d12[3]}, [RGB]!
+        .elseif \size == 2
+            vst3.8  {d10[4], d11[4], d12[4]}, [RGB]!
+            vst3.8  {d10[5], d11[5], d12[5]}, [RGB]!
+        .elseif \size == 1
+            vst3.8  {d10[6], d11[6], d12[6]}, [RGB]!
+        .else
+            .error unsupported macroblock size
+        .endif
+    .elseif \bpp == 32
+        .if \size == 8
+            vst4.8  {d10, d11, d12, d13}, [RGB]!
+        .elseif \size == 4
+            vst4.8  {d10[0], d11[0], d12[0], d13[0]}, [RGB]!
+            vst4.8  {d10[1], d11[1], d12[1], d13[1]}, [RGB]!
+            vst4.8  {d10[2], d11[2], d12[2], d13[2]}, [RGB]!
+            vst4.8  {d10[3], d11[3], d12[3], d13[3]}, [RGB]!
+        .elseif \size == 2
+            vst4.8  {d10[4], d11[4], d12[4], d13[4]}, [RGB]!
+            vst4.8  {d10[5], d11[5], d12[5], d13[5]}, [RGB]!
+        .elseif \size == 1
+            vst4.8  {d10[6], d11[6], d12[6], d13[6]}, [RGB]!
+        .else
+            .error unsupported macroblock size
+        .endif
+    .else
+        .error unsupported bpp
+    .endif
+.endm
+
+.macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, g_offs, b_offs
+
+/*
+ * 2 stage pipelined YCbCr->RGB conversion
+ */
+
+.macro do_yuv_to_rgb_stage1
+    vaddw.u8        q3, q1, d4     /* q3 = u - 128 */
+    vaddw.u8        q4, q1, d5     /* q2 = v - 128 */
+    vmull.s16       q10, d6, d1[1] /* multiply by -11277 */
+    vmlal.s16       q10, d8, d1[2] /* multiply by -23401 */
+    vmull.s16       q11, d7, d1[1] /* multiply by -11277 */
+    vmlal.s16       q11, d9, d1[2] /* multiply by -23401 */
+    vmull.s16       q12, d8, d1[0] /* multiply by 22971 */
+    vmull.s16       q13, d9, d1[0] /* multiply by 22971 */
+    vmull.s16       q14, d6, d1[3] /* multiply by 29033 */
+    vmull.s16       q15, d7, d1[3] /* multiply by 29033 */
+.endm
+
+.macro do_yuv_to_rgb_stage2
+    vrshrn.s32      d20, q10, #15
+    vrshrn.s32      d21, q11, #15
+    vrshrn.s32      d24, q12, #14
+    vrshrn.s32      d25, q13, #14
+    vrshrn.s32      d28, q14, #14
+    vrshrn.s32      d29, q15, #14
+    vaddw.u8        q10, q10, d0
+    vaddw.u8        q12, q12, d0
+    vaddw.u8        q14, q14, d0
+    vqmovun.s16     d1\g_offs, q10
+    vqmovun.s16     d1\r_offs, q12
+    vqmovun.s16     d1\b_offs, q14
+.endm
+
+.macro do_yuv_to_rgb_stage2_store_load_stage1
+    vld1.8          {d4}, [U, :64]!
+      vrshrn.s32      d20, q10, #15
+      vrshrn.s32      d21, q11, #15
+      vrshrn.s32      d24, q12, #14
+      vrshrn.s32      d25, q13, #14
+      vrshrn.s32      d28, q14, #14
+    vld1.8          {d5}, [V, :64]!
+      vrshrn.s32      d29, q15, #14
+      vaddw.u8        q10, q10, d0
+      vaddw.u8        q12, q12, d0
+      vaddw.u8        q14, q14, d0
+      vqmovun.s16     d1\g_offs, q10
+    vld1.8          {d0}, [Y, :64]!
+      vqmovun.s16     d1\r_offs, q12
+    pld             [U, #64]
+    pld             [V, #64]
+    pld             [Y, #64]
+      vqmovun.s16     d1\b_offs, q14
+    vaddw.u8        q3, q1, d4     /* q3 = u - 128 */
+    vaddw.u8        q4, q1, d5     /* q2 = v - 128 */
+      do_store        \bpp, 8
+    vmull.s16       q10, d6, d1[1] /* multiply by -11277 */
+    vmlal.s16       q10, d8, d1[2] /* multiply by -23401 */
+    vmull.s16       q11, d7, d1[1] /* multiply by -11277 */
+    vmlal.s16       q11, d9, d1[2] /* multiply by -23401 */
+    vmull.s16       q12, d8, d1[0] /* multiply by 22971 */
+    vmull.s16       q13, d9, d1[0] /* multiply by 22971 */
+    vmull.s16       q14, d6, d1[3] /* multiply by 29033 */
+    vmull.s16       q15, d7, d1[3] /* multiply by 29033 */
+.endm
+
+.macro do_yuv_to_rgb
+    do_yuv_to_rgb_stage1
+    do_yuv_to_rgb_stage2
+.endm
+
+/* Apple gas crashes on adrl, work around that by using adr.
+ * But this requires a copy of these constants for each function.
+ */
+
+.balign 16
+jsimd_ycc_\colorid\()_neon_consts:
+    .short          0,      0,     0,      0
+    .short          22971, -11277, -23401, 29033
+    .short          -128,  -128,   -128,   -128
+    .short          -128,  -128,   -128,   -128
+
+asm_function jsimd_ycc_\colorid\()_convert_neon
+    OUTPUT_WIDTH    .req r0
+    INPUT_BUF       .req r1
+    INPUT_ROW       .req r2
+    OUTPUT_BUF      .req r3
+    NUM_ROWS        .req r4
+
+    INPUT_BUF0      .req r5
+    INPUT_BUF1      .req r6
+    INPUT_BUF2      .req INPUT_BUF
+
+    RGB             .req r7
+    Y               .req r8
+    U               .req r9
+    V               .req r10
+    N               .req ip
+
+    /* Load constants to d1, d2, d3 (d0 is just used for padding) */
+    adr             ip, jsimd_ycc_\colorid\()_neon_consts
+    vld1.16         {d0, d1, d2, d3}, [ip, :128]
+
+    /* Save ARM registers and handle input arguments */
+    push            {r4, r5, r6, r7, r8, r9, r10, lr}
+    ldr             NUM_ROWS, [sp, #(4 * 8)]
+    ldr             INPUT_BUF0, [INPUT_BUF]
+    ldr             INPUT_BUF1, [INPUT_BUF, #4]
+    ldr             INPUT_BUF2, [INPUT_BUF, #8]
+    .unreq          INPUT_BUF
+
+    /* Save NEON registers */
+    vpush           {d8-d15}
+
+    /* Initially set d10, d11, d12, d13 to 0xFF */
+    vmov.u8         q5, #255
+    vmov.u8         q6, #255
+
+    /* Outer loop over scanlines */
+    cmp             NUM_ROWS, #1
+    blt             9f
+0:
+    ldr             Y, [INPUT_BUF0, INPUT_ROW, lsl #2]
+    ldr             U, [INPUT_BUF1, INPUT_ROW, lsl #2]
+    mov             N, OUTPUT_WIDTH
+    ldr             V, [INPUT_BUF2, INPUT_ROW, lsl #2]
+    add             INPUT_ROW, INPUT_ROW, #1
+    ldr             RGB, [OUTPUT_BUF], #4
+
+    /* Inner loop over pixels */
+    subs            N, N, #8
+    blt             3f
+    do_load         8
+    do_yuv_to_rgb_stage1
+    subs            N, N, #8
+    blt             2f
+1:
+    do_yuv_to_rgb_stage2_store_load_stage1
+    subs            N, N, #8
+    bge             1b
+2:
+    do_yuv_to_rgb_stage2
+    do_store        \bpp, 8
+    tst             N, #7
+    beq             8f
+3:
+    tst             N, #4
+    beq             3f
+    do_load         4
+3:
+    tst             N, #2
+    beq             4f
+    do_load         2
+4:
+    tst             N, #1
+    beq             5f
+    do_load         1
+5:
+    do_yuv_to_rgb
+    tst             N, #4
+    beq             6f
+    do_store        \bpp, 4
+6:
+    tst             N, #2
+    beq             7f
+    do_store        \bpp, 2
+7:
+    tst             N, #1
+    beq             8f
+    do_store        \bpp, 1
+8:
+    subs            NUM_ROWS, NUM_ROWS, #1
+    bgt             0b
+9:
+    /* Restore all registers and return */
+    vpop            {d8-d15}
+    pop             {r4, r5, r6, r7, r8, r9, r10, pc}
+
+    .unreq          OUTPUT_WIDTH
+    .unreq          INPUT_ROW
+    .unreq          OUTPUT_BUF
+    .unreq          NUM_ROWS
+    .unreq          INPUT_BUF0
+    .unreq          INPUT_BUF1
+    .unreq          INPUT_BUF2
+    .unreq          RGB
+    .unreq          Y
+    .unreq          U
+    .unreq          V
+    .unreq          N
+.endfunc
+
+.purgem do_yuv_to_rgb
+.purgem do_yuv_to_rgb_stage1
+.purgem do_yuv_to_rgb_stage2
+.purgem do_yuv_to_rgb_stage2_store_load_stage1
+
+.endm
+
+/*--------------------------------- id ----- bpp R  G  B */
+generate_jsimd_ycc_rgb_convert_neon extrgb,  24, 0, 1, 2
+generate_jsimd_ycc_rgb_convert_neon extbgr,  24, 2, 1, 0
+generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, 1, 2
+generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, 1, 0
+generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, 2, 1
+generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, 2, 3
+
+.purgem do_load
+.purgem do_store
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_extrgb_ycc_convert_neon
+ * jsimd_extbgr_ycc_convert_neon
+ * jsimd_extrgbx_ycc_convert_neon
+ * jsimd_extbgrx_ycc_convert_neon
+ * jsimd_extxbgr_ycc_convert_neon
+ * jsimd_extxrgb_ycc_convert_neon
+ *
+ * Colorspace conversion RGB -> YCbCr
+ */
+
+.macro do_store size
+    .if \size == 8
+        vst1.8  {d20}, [Y]!
+        vst1.8  {d21}, [U]!
+        vst1.8  {d22}, [V]!
+    .elseif \size == 4
+        vst1.8  {d20[0]}, [Y]!
+        vst1.8  {d20[1]}, [Y]!
+        vst1.8  {d20[2]}, [Y]!
+        vst1.8  {d20[3]}, [Y]!
+        vst1.8  {d21[0]}, [U]!
+        vst1.8  {d21[1]}, [U]!
+        vst1.8  {d21[2]}, [U]!
+        vst1.8  {d21[3]}, [U]!
+        vst1.8  {d22[0]}, [V]!
+        vst1.8  {d22[1]}, [V]!
+        vst1.8  {d22[2]}, [V]!
+        vst1.8  {d22[3]}, [V]!
+    .elseif \size == 2
+        vst1.8  {d20[4]}, [Y]!
+        vst1.8  {d20[5]}, [Y]!
+        vst1.8  {d21[4]}, [U]!
+        vst1.8  {d21[5]}, [U]!
+        vst1.8  {d22[4]}, [V]!
+        vst1.8  {d22[5]}, [V]!
+    .elseif \size == 1
+        vst1.8  {d20[6]}, [Y]!
+        vst1.8  {d21[6]}, [U]!
+        vst1.8  {d22[6]}, [V]!
+    .else
+        .error unsupported macroblock size
+    .endif
+.endm
+
+.macro do_load bpp, size
+    .if \bpp == 24
+        .if \size == 8
+            vld3.8  {d10, d11, d12}, [RGB]!
+            pld     [RGB, #128]
+        .elseif \size == 4
+            vld3.8  {d10[0], d11[0], d12[0]}, [RGB]!
+            vld3.8  {d10[1], d11[1], d12[1]}, [RGB]!
+            vld3.8  {d10[2], d11[2], d12[2]}, [RGB]!
+            vld3.8  {d10[3], d11[3], d12[3]}, [RGB]!
+        .elseif \size == 2
+            vld3.8  {d10[4], d11[4], d12[4]}, [RGB]!
+            vld3.8  {d10[5], d11[5], d12[5]}, [RGB]!
+        .elseif \size == 1
+            vld3.8  {d10[6], d11[6], d12[6]}, [RGB]!
+        .else
+            .error unsupported macroblock size
+        .endif
+    .elseif \bpp == 32
+        .if \size == 8
+            vld4.8  {d10, d11, d12, d13}, [RGB]!
+            pld     [RGB, #128]
+        .elseif \size == 4
+            vld4.8  {d10[0], d11[0], d12[0], d13[0]}, [RGB]!
+            vld4.8  {d10[1], d11[1], d12[1], d13[1]}, [RGB]!
+            vld4.8  {d10[2], d11[2], d12[2], d13[2]}, [RGB]!
+            vld4.8  {d10[3], d11[3], d12[3], d13[3]}, [RGB]!
+        .elseif \size == 2
+            vld4.8  {d10[4], d11[4], d12[4], d13[4]}, [RGB]!
+            vld4.8  {d10[5], d11[5], d12[5], d13[5]}, [RGB]!
+        .elseif \size == 1
+            vld4.8  {d10[6], d11[6], d12[6], d13[6]}, [RGB]!
+        .else
+            .error unsupported macroblock size
+        .endif
+    .else
+        .error unsupported bpp
+    .endif
+.endm
+
+.macro generate_jsimd_rgb_ycc_convert_neon colorid, bpp, r_offs, g_offs, b_offs
+
+/*
+ * 2 stage pipelined RGB->YCbCr conversion
+ */
+
+.macro do_rgb_to_yuv_stage1
+    vmovl.u8    q2, d1\r_offs /* r = { d4, d5 } */
+    vmovl.u8    q3, d1\g_offs /* g = { d6, d7 } */
+    vmovl.u8    q4, d1\b_offs /* b = { d8, d9 } */
+    vmull.u16   q7, d4, d0[0]
+    vmlal.u16   q7, d6, d0[1]
+    vmlal.u16   q7, d8, d0[2]
+    vmull.u16   q8, d5, d0[0]
+    vmlal.u16   q8, d7, d0[1]
+    vmlal.u16   q8, d9, d0[2]
+    vrev64.32   q9,  q1
+    vrev64.32   q13, q1
+    vmlsl.u16   q9,  d4, d0[3]
+    vmlsl.u16   q9,  d6, d1[0]
+    vmlal.u16   q9,  d8, d1[1]
+    vmlsl.u16   q13, d5, d0[3]
+    vmlsl.u16   q13, d7, d1[0]
+    vmlal.u16   q13, d9, d1[1]
+    vrev64.32   q14, q1
+    vrev64.32   q15, q1
+    vmlal.u16   q14, d4, d1[1]
+    vmlsl.u16   q14, d6, d1[2]
+    vmlsl.u16   q14, d8, d1[3]
+    vmlal.u16   q15, d5, d1[1]
+    vmlsl.u16   q15, d7, d1[2]
+    vmlsl.u16   q15, d9, d1[3]
+.endm
+
+.macro do_rgb_to_yuv_stage2
+    vrshrn.u32  d20, q7,  #16
+    vrshrn.u32  d21, q8,  #16
+    vshrn.u32   d22, q9,  #16
+    vshrn.u32   d23, q13, #16
+    vshrn.u32   d24, q14, #16
+    vshrn.u32   d25, q15, #16
+    vmovn.u16   d20, q10      /* d20 = y */
+    vmovn.u16   d21, q11      /* d21 = u */
+    vmovn.u16   d22, q12      /* d22 = v */
+.endm
+
+.macro do_rgb_to_yuv
+    do_rgb_to_yuv_stage1
+    do_rgb_to_yuv_stage2
+.endm
+
+.macro do_rgb_to_yuv_stage2_store_load_stage1
+      vrshrn.u32  d20, q7,  #16
+      vrshrn.u32  d21, q8,  #16
+      vshrn.u32   d22, q9,  #16
+    vrev64.32   q9,  q1
+      vshrn.u32   d23, q13, #16
+    vrev64.32   q13, q1
+      vshrn.u32   d24, q14, #16
+      vshrn.u32   d25, q15, #16
+    do_load     \bpp, 8
+      vmovn.u16   d20, q10      /* d20 = y */
+    vmovl.u8    q2, d1\r_offs   /* r = { d4, d5 } */
+      vmovn.u16   d21, q11      /* d21 = u */
+    vmovl.u8    q3, d1\g_offs   /* g = { d6, d7 } */
+      vmovn.u16   d22, q12      /* d22 = v */
+    vmovl.u8    q4, d1\b_offs   /* b = { d8, d9 } */
+    vmull.u16   q7, d4, d0[0]
+    vmlal.u16   q7, d6, d0[1]
+    vmlal.u16   q7, d8, d0[2]
+      vst1.8      {d20}, [Y]!
+    vmull.u16   q8, d5, d0[0]
+    vmlal.u16   q8, d7, d0[1]
+    vmlal.u16   q8, d9, d0[2]
+    vmlsl.u16   q9,  d4, d0[3]
+    vmlsl.u16   q9,  d6, d1[0]
+    vmlal.u16   q9,  d8, d1[1]
+      vst1.8      {d21}, [U]!
+    vmlsl.u16   q13, d5, d0[3]
+    vmlsl.u16   q13, d7, d1[0]
+    vmlal.u16   q13, d9, d1[1]
+    vrev64.32   q14, q1
+    vrev64.32   q15, q1
+    vmlal.u16   q14, d4, d1[1]
+    vmlsl.u16   q14, d6, d1[2]
+    vmlsl.u16   q14, d8, d1[3]
+      vst1.8      {d22}, [V]!
+    vmlal.u16   q15, d5, d1[1]
+    vmlsl.u16   q15, d7, d1[2]
+    vmlsl.u16   q15, d9, d1[3]
+.endm
+
+.balign 16
+jsimd_\colorid\()_ycc_neon_consts:
+    .short          19595, 38470, 7471,  11059
+    .short          21709, 32768, 27439, 5329
+    .short          32767, 128,   32767, 128
+    .short          32767, 128,   32767, 128
+
+asm_function jsimd_\colorid\()_ycc_convert_neon
+    OUTPUT_WIDTH    .req r0
+    INPUT_BUF       .req r1
+    OUTPUT_BUF      .req r2
+    OUTPUT_ROW      .req r3
+    NUM_ROWS        .req r4
+
+    OUTPUT_BUF0     .req r5
+    OUTPUT_BUF1     .req r6
+    OUTPUT_BUF2     .req OUTPUT_BUF
+
+    RGB             .req r7
+    Y               .req r8
+    U               .req r9
+    V               .req r10
+    N               .req ip
+
+    /* Load constants to d0, d1, d2, d3 */
+    adr             ip, jsimd_\colorid\()_ycc_neon_consts
+    vld1.16         {d0, d1, d2, d3}, [ip, :128]
+
+    /* Save ARM registers and handle input arguments */
+    push            {r4, r5, r6, r7, r8, r9, r10, lr}
+    ldr             NUM_ROWS, [sp, #(4 * 8)]
+    ldr             OUTPUT_BUF0, [OUTPUT_BUF]
+    ldr             OUTPUT_BUF1, [OUTPUT_BUF, #4]
+    ldr             OUTPUT_BUF2, [OUTPUT_BUF, #8]
+    .unreq          OUTPUT_BUF
+
+    /* Save NEON registers */
+    vpush           {d8-d15}
+
+    /* Outer loop over scanlines */
+    cmp             NUM_ROWS, #1
+    blt             9f
+0:
+    ldr             Y, [OUTPUT_BUF0, OUTPUT_ROW, lsl #2]
+    ldr             U, [OUTPUT_BUF1, OUTPUT_ROW, lsl #2]
+    mov             N, OUTPUT_WIDTH
+    ldr             V, [OUTPUT_BUF2, OUTPUT_ROW, lsl #2]
+    add             OUTPUT_ROW, OUTPUT_ROW, #1
+    ldr             RGB, [INPUT_BUF], #4
+
+    /* Inner loop over pixels */
+    subs            N, N, #8
+    blt             3f
+    do_load         \bpp, 8
+    do_rgb_to_yuv_stage1
+    subs            N, N, #8
+    blt             2f
+1:
+    do_rgb_to_yuv_stage2_store_load_stage1
+    subs            N, N, #8
+    bge             1b
+2:
+    do_rgb_to_yuv_stage2
+    do_store        8
+    tst             N, #7
+    beq             8f
+3:
+    tst             N, #4
+    beq             3f
+    do_load         \bpp, 4
+3:
+    tst             N, #2
+    beq             4f
+    do_load         \bpp, 2
+4:
+    tst             N, #1
+    beq             5f
+    do_load         \bpp, 1
+5:
+    do_rgb_to_yuv
+    tst             N, #4
+    beq             6f
+    do_store        4
+6:
+    tst             N, #2
+    beq             7f
+    do_store        2
+7:
+    tst             N, #1
+    beq             8f
+    do_store        1
+8:
+    subs            NUM_ROWS, NUM_ROWS, #1
+    bgt             0b
+9:
+    /* Restore all registers and return */
+    vpop            {d8-d15}
+    pop             {r4, r5, r6, r7, r8, r9, r10, pc}
+
+    .unreq          OUTPUT_WIDTH
+    .unreq          OUTPUT_ROW
+    .unreq          INPUT_BUF
+    .unreq          NUM_ROWS
+    .unreq          OUTPUT_BUF0
+    .unreq          OUTPUT_BUF1
+    .unreq          OUTPUT_BUF2
+    .unreq          RGB
+    .unreq          Y
+    .unreq          U
+    .unreq          V
+    .unreq          N
+.endfunc
+
+.purgem do_rgb_to_yuv
+.purgem do_rgb_to_yuv_stage1
+.purgem do_rgb_to_yuv_stage2
+.purgem do_rgb_to_yuv_stage2_store_load_stage1
+
+.endm
+
+/*--------------------------------- id ----- bpp R  G  B */
+generate_jsimd_rgb_ycc_convert_neon extrgb,  24, 0, 1, 2
+generate_jsimd_rgb_ycc_convert_neon extbgr,  24, 2, 1, 0
+generate_jsimd_rgb_ycc_convert_neon extrgbx, 32, 0, 1, 2
+generate_jsimd_rgb_ycc_convert_neon extbgrx, 32, 2, 1, 0
+generate_jsimd_rgb_ycc_convert_neon extxbgr, 32, 3, 2, 1
+generate_jsimd_rgb_ycc_convert_neon extxrgb, 32, 1, 2, 3
+
+.purgem do_load
+.purgem do_store
+
+
+/*****************************************************************************/
+
+/*
+ * Load data into workspace, applying unsigned->signed conversion
+ *
+ * TODO: can be combined with 'jsimd_fdct_ifast_neon' to get
+ *       rid of VST1.16 instructions
+ */
+
+asm_function jsimd_convsamp_neon
+    SAMPLE_DATA     .req r0
+    START_COL       .req r1
+    WORKSPACE       .req r2
+    TMP1            .req r3
+    TMP2            .req r4
+    TMP3            .req r5
+    TMP4            .req ip
+
+    push            {r4, r5}
+    vmov.u8         d0, #128
+
+    ldmia           SAMPLE_DATA!, {TMP1, TMP2, TMP3, TMP4}
+    add             TMP1, TMP1, START_COL
+    add             TMP2, TMP2, START_COL
+    add             TMP3, TMP3, START_COL
+    add             TMP4, TMP4, START_COL
+    vld1.8          {d16}, [TMP1]
+    vsubl.u8        q8, d16, d0
+    vld1.8          {d18}, [TMP2]
+    vsubl.u8        q9, d18, d0
+    vld1.8          {d20}, [TMP3]
+    vsubl.u8        q10, d20, d0
+    vld1.8          {d22}, [TMP4]
+    ldmia           SAMPLE_DATA!, {TMP1, TMP2, TMP3, TMP4}
+    vsubl.u8        q11, d22, d0
+    vst1.16         {d16, d17, d18, d19}, [WORKSPACE, :128]!
+    add             TMP1, TMP1, START_COL
+    add             TMP2, TMP2, START_COL
+    vst1.16         {d20, d21, d22, d23}, [WORKSPACE, :128]!
+    add             TMP3, TMP3, START_COL
+    add             TMP4, TMP4, START_COL
+    vld1.8          {d24}, [TMP1]
+    vsubl.u8        q12, d24, d0
+    vld1.8          {d26}, [TMP2]
+    vsubl.u8        q13, d26, d0
+    vld1.8          {d28}, [TMP3]
+    vsubl.u8        q14, d28, d0
+    vld1.8          {d30}, [TMP4]
+    vsubl.u8        q15, d30, d0
+    vst1.16         {d24, d25, d26, d27}, [WORKSPACE, :128]!
+    vst1.16         {d28, d29, d30, d31}, [WORKSPACE, :128]!
+    pop             {r4, r5}
+    bx              lr
+
+    .unreq          SAMPLE_DATA
+    .unreq          START_COL
+    .unreq          WORKSPACE
+    .unreq          TMP1
+    .unreq          TMP2
+    .unreq          TMP3
+    .unreq          TMP4
+.endfunc
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_fdct_ifast_neon
+ *
+ * This function contains a fast, not so accurate integer implementation of
+ * the forward DCT (Discrete Cosine Transform). It uses the same calculations
+ * and produces exactly the same output as IJG's original 'jpeg_fdct_ifast'
+ * function from jfdctfst.c
+ *
+ * TODO: can be combined with 'jsimd_convsamp_neon' to get
+ *       rid of a bunch of VLD1.16 instructions
+ */
+
+#define XFIX_0_382683433 d0[0]
+#define XFIX_0_541196100 d0[1]
+#define XFIX_0_707106781 d0[2]
+#define XFIX_1_306562965 d0[3]
+
+.balign 16
+jsimd_fdct_ifast_neon_consts:
+    .short (98 * 128)              /* XFIX_0_382683433 */
+    .short (139 * 128)             /* XFIX_0_541196100 */
+    .short (181 * 128)             /* XFIX_0_707106781 */
+    .short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */
+
+asm_function jsimd_fdct_ifast_neon
+
+    DATA            .req r0
+    TMP             .req ip
+
+    vpush           {d8-d15}
+
+    /* Load constants */
+    adr             TMP, jsimd_fdct_ifast_neon_consts
+    vld1.16         {d0}, [TMP, :64]
+
+    /* Load all DATA into NEON registers with the following allocation:
+     *       0 1 2 3 | 4 5 6 7
+     *      ---------+--------
+     *   0 | d16     | d17    | q8
+     *   1 | d18     | d19    | q9
+     *   2 | d20     | d21    | q10
+     *   3 | d22     | d23    | q11
+     *   4 | d24     | d25    | q12
+     *   5 | d26     | d27    | q13
+     *   6 | d28     | d29    | q14
+     *   7 | d30     | d31    | q15
+     */
+
+    vld1.16         {d16, d17, d18, d19}, [DATA, :128]!
+    vld1.16         {d20, d21, d22, d23}, [DATA, :128]!
+    vld1.16         {d24, d25, d26, d27}, [DATA, :128]!
+    vld1.16         {d28, d29, d30, d31}, [DATA, :128]
+    sub             DATA, DATA, #(128 - 32)
+
+    mov             TMP, #2
+1:
+    /* Transpose */
+    vtrn.16         q12, q13
+    vtrn.16         q10, q11
+    vtrn.16         q8,  q9
+    vtrn.16         q14, q15
+    vtrn.32         q9,  q11
+    vtrn.32         q13, q15
+    vtrn.32         q8,  q10
+    vtrn.32         q12, q14
+    vswp            d30, d23
+    vswp            d24, d17
+    vswp            d26, d19
+      /* 1-D FDCT */
+      vadd.s16        q2,  q11, q12
+    vswp            d28, d21
+      vsub.s16        q12, q11, q12
+      vsub.s16        q6,  q10, q13
+      vadd.s16        q10, q10, q13
+      vsub.s16        q7,  q9,  q14
+      vadd.s16        q9,  q9,  q14
+      vsub.s16        q1,  q8,  q15
+      vadd.s16        q8,  q8,  q15
+      vsub.s16        q4,  q9,  q10
+      vsub.s16        q5,  q8,  q2
+      vadd.s16        q3,  q9,  q10
+      vadd.s16        q4,  q4,  q5
+      vadd.s16        q2,  q8,  q2
+      vqdmulh.s16     q4,  q4,  XFIX_0_707106781
+      vadd.s16        q11, q12, q6
+      vadd.s16        q8,  q2,  q3
+      vsub.s16        q12, q2,  q3
+      vadd.s16        q3,  q6,  q7
+      vadd.s16        q7,  q7,  q1
+      vqdmulh.s16     q3,  q3,  XFIX_0_707106781
+      vsub.s16        q6,  q11, q7
+      vadd.s16        q10, q5,  q4
+      vqdmulh.s16     q6,  q6,  XFIX_0_382683433
+      vsub.s16        q14, q5,  q4
+      vqdmulh.s16     q11, q11, XFIX_0_541196100
+      vqdmulh.s16     q5,  q7,  XFIX_1_306562965
+      vadd.s16        q4,  q1,  q3
+      vsub.s16        q3,  q1,  q3
+      vadd.s16        q7,  q7,  q6
+      vadd.s16        q11, q11, q6
+      vadd.s16        q7,  q7,  q5
+      vadd.s16        q13, q3,  q11
+      vsub.s16        q11, q3,  q11
+      vadd.s16        q9,  q4,  q7
+      vsub.s16        q15, q4,  q7
+    subs            TMP, TMP, #1
+    bne             1b
+
+    /* store results */
+    vst1.16         {d16, d17, d18, d19}, [DATA, :128]!
+    vst1.16         {d20, d21, d22, d23}, [DATA, :128]!
+    vst1.16         {d24, d25, d26, d27}, [DATA, :128]!
+    vst1.16         {d28, d29, d30, d31}, [DATA, :128]
+
+    vpop            {d8-d15}
+    bx              lr
+
+    .unreq          DATA
+    .unreq          TMP
+.endfunc
+
+
+/*****************************************************************************/
+
+/*
+ * GLOBAL(void)
+ * jsimd_quantize_neon (JCOEFPTR coef_block, DCTELEM * divisors,
+ *                      DCTELEM * workspace);
+ *
+ * Note: the code uses 2 stage pipelining in order to improve instructions
+ *       scheduling and eliminate stalls (this provides ~15% better
+ *       performance for this function on both ARM Cortex-A8 and
+ *       ARM Cortex-A9 when compared to the non-pipelined variant).
+ *       The instructions which belong to the second stage use different
+ *       indentation for better readiability.
+ */
+asm_function jsimd_quantize_neon
+
+    COEF_BLOCK      .req r0
+    DIVISORS        .req r1
+    WORKSPACE       .req r2
+
+    RECIPROCAL      .req DIVISORS
+    CORRECTION      .req r3
+    SHIFT           .req ip
+    LOOP_COUNT      .req r4
+
+    vld1.16         {d0, d1, d2, d3}, [WORKSPACE, :128]!
+    vabs.s16        q12, q0
+    add             CORRECTION, DIVISORS, #(64 * 2)
+    add             SHIFT, DIVISORS, #(64 * 6)
+    vld1.16         {d20, d21, d22, d23}, [CORRECTION, :128]!
+    vabs.s16        q13, q1
+    vld1.16         {d16, d17, d18, d19}, [RECIPROCAL, :128]!
+    vadd.u16        q12, q12, q10 /* add correction */
+    vadd.u16        q13, q13, q11
+    vmull.u16       q10, d24, d16 /* multiply by reciprocal */
+    vmull.u16       q11, d25, d17
+    vmull.u16       q8,  d26, d18
+    vmull.u16       q9,  d27, d19
+    vld1.16         {d24, d25, d26, d27}, [SHIFT, :128]!
+    vshrn.u32       d20, q10, #16
+    vshrn.u32       d21, q11, #16
+    vshrn.u32       d22, q8,  #16
+    vshrn.u32       d23, q9,  #16
+    vneg.s16        q12, q12
+    vneg.s16        q13, q13
+    vshr.s16        q2,  q0,  #15 /* extract sign */
+    vshr.s16        q3,  q1,  #15
+    vshl.u16        q14, q10, q12 /* shift */
+    vshl.u16        q15, q11, q13
+
+    push            {r4, r5}
+    mov             LOOP_COUNT, #3
+1:
+    vld1.16         {d0, d1, d2, d3}, [WORKSPACE, :128]!
+      veor.u16        q14, q14, q2  /* restore sign */
+    vabs.s16        q12, q0
+    vld1.16         {d20, d21, d22, d23}, [CORRECTION, :128]!
+    vabs.s16        q13, q1
+      veor.u16        q15, q15, q3
+    vld1.16         {d16, d17, d18, d19}, [RECIPROCAL, :128]!
+    vadd.u16        q12, q12, q10 /* add correction */
+    vadd.u16        q13, q13, q11
+    vmull.u16       q10, d24, d16 /* multiply by reciprocal */
+    vmull.u16       q11, d25, d17
+    vmull.u16       q8,  d26, d18
+    vmull.u16       q9,  d27, d19
+      vsub.u16        q14, q14, q2
+    vld1.16         {d24, d25, d26, d27}, [SHIFT, :128]!
+      vsub.u16        q15, q15, q3
+    vshrn.u32       d20, q10, #16
+    vshrn.u32       d21, q11, #16
+      vst1.16         {d28, d29, d30, d31}, [COEF_BLOCK, :128]!
+    vshrn.u32       d22, q8,  #16
+    vshrn.u32       d23, q9,  #16
+    vneg.s16        q12, q12
+    vneg.s16        q13, q13
+    vshr.s16        q2,  q0,  #15 /* extract sign */
+    vshr.s16        q3,  q1,  #15
+    vshl.u16        q14, q10, q12 /* shift */
+    vshl.u16        q15, q11, q13
+    subs            LOOP_COUNT, LOOP_COUNT, #1
+    bne             1b
+    pop             {r4, r5}
+
+      veor.u16        q14, q14, q2  /* restore sign */
+      veor.u16        q15, q15, q3
+      vsub.u16        q14, q14, q2
+      vsub.u16        q15, q15, q3
+      vst1.16         {d28, d29, d30, d31}, [COEF_BLOCK, :128]!
+
+    bx              lr /* return */
+
+    .unreq          COEF_BLOCK
+    .unreq          DIVISORS
+    .unreq          WORKSPACE
+    .unreq          RECIPROCAL
+    .unreq          CORRECTION
+    .unreq          SHIFT
+    .unreq          LOOP_COUNT
+.endfunc
+
+
+/*****************************************************************************/
+
+/*
+ * GLOBAL(void)
+ * jsimd_h2v1_fancy_upsample_neon (int          max_v_samp_factor,
+ *                                 JDIMENSION   downsampled_width,
+ *                                 JSAMPARRAY   input_data,
+ *                                 JSAMPARRAY * output_data_ptr);
+ *
+ * Note: the use of unaligned writes is the main remaining bottleneck in
+ *       this code, which can be potentially solved to get up to tens
+ *       of percents performance improvement on Cortex-A8/Cortex-A9.
+ */
+
+/*
+ * Upsample 16 source pixels to 32 destination pixels. The new 16 source
+ * pixels are loaded to q0. The previous 16 source pixels are in q1. The
+ * shifted-by-one source pixels are constructed in q2 by using q0 and q1.
+ * Register d28 is used for multiplication by 3. Register q15 is used
+ * for adding +1 bias.
+ */
+.macro upsample16   OUTPTR, INPTR
+    vld1.8          {q0}, [\INPTR]!
+    vmovl.u8        q8,  d0
+    vext.8          q2,  q1,  q0, #15
+    vmovl.u8        q9,  d1
+    vaddw.u8        q10, q15, d4
+    vaddw.u8        q11, q15, d5
+    vmlal.u8        q8,  d4,  d28
+    vmlal.u8        q9,  d5,  d28
+    vmlal.u8        q10, d0,  d28
+    vmlal.u8        q11, d1,  d28
+    vmov            q1,  q0       /* backup source pixels to q1 */
+    vrshrn.u16      d6,  q8,  #2
+    vrshrn.u16      d7,  q9,  #2
+    vshrn.u16       d8,  q10, #2
+    vshrn.u16       d9,  q11, #2
+    vst2.8          {d6, d7, d8, d9}, [\OUTPTR]!
+.endm
+
+/*
+ * Upsample 32 source pixels to 64 destination pixels. Compared to 'usample16'
+ * macro, the roles of q0 and q1 registers are reversed for even and odd
+ * groups of 16 pixels, that's why "vmov q1, q0" instructions are not needed.
+ * Also this unrolling allows to reorder loads and stores to compensate
+ * multiplication latency and reduce stalls.
+ */
+.macro upsample32   OUTPTR, INPTR
+    /* even 16 pixels group */
+    vld1.8          {q0}, [\INPTR]!
+    vmovl.u8        q8,  d0
+    vext.8          q2,  q1,  q0, #15
+    vmovl.u8        q9,  d1
+    vaddw.u8        q10, q15, d4
+    vaddw.u8        q11, q15, d5
+    vmlal.u8        q8,  d4,  d28
+    vmlal.u8        q9,  d5,  d28
+    vmlal.u8        q10, d0,  d28
+    vmlal.u8        q11, d1,  d28
+        /* odd 16 pixels group */
+        vld1.8          {q1}, [\INPTR]!
+    vrshrn.u16      d6,  q8,  #2
+    vrshrn.u16      d7,  q9,  #2
+    vshrn.u16       d8,  q10, #2
+    vshrn.u16       d9,  q11, #2
+        vmovl.u8        q8,  d2
+        vext.8          q2,  q0,  q1, #15
+        vmovl.u8        q9,  d3
+        vaddw.u8        q10, q15, d4
+        vaddw.u8        q11, q15, d5
+        vmlal.u8        q8,  d4,  d28
+        vmlal.u8        q9,  d5,  d28
+        vmlal.u8        q10, d2,  d28
+        vmlal.u8        q11, d3,  d28
+    vst2.8          {d6, d7, d8, d9}, [\OUTPTR]!
+        vrshrn.u16      d6,  q8,  #2
+        vrshrn.u16      d7,  q9,  #2
+        vshrn.u16       d8,  q10, #2
+        vshrn.u16       d9,  q11, #2
+        vst2.8          {d6, d7, d8, d9}, [\OUTPTR]!
+.endm
+
+/*
+ * Upsample a row of WIDTH pixels from INPTR to OUTPTR.
+ */
+.macro upsample_row OUTPTR, INPTR, WIDTH, TMP1
+    /* special case for the first and last pixels */
+    sub             \WIDTH, \WIDTH, #1
+    add             \OUTPTR, \OUTPTR, #1
+    ldrb            \TMP1, [\INPTR, \WIDTH]
+    strb            \TMP1, [\OUTPTR, \WIDTH, asl #1]
+    ldrb            \TMP1, [\INPTR], #1
+    strb            \TMP1, [\OUTPTR, #-1]
+    vmov.8          d3[7], \TMP1
+
+    subs            \WIDTH, \WIDTH, #32
+    blt             5f
+0:  /* process 32 pixels per iteration */
+    upsample32      \OUTPTR, \INPTR
+    subs            \WIDTH, \WIDTH, #32
+    bge             0b
+5:
+    adds            \WIDTH, \WIDTH, #16
+    blt             1f
+0:  /* process 16 pixels if needed */
+    upsample16      \OUTPTR, \INPTR
+    subs            \WIDTH, \WIDTH, #16
+1:
+    adds            \WIDTH, \WIDTH, #16
+    beq             9f
+
+    /* load the remaining 1-15 pixels */
+    add             \INPTR, \INPTR, \WIDTH
+    tst             \WIDTH, #1
+    beq             2f
+    sub             \INPTR, \INPTR, #1
+    vld1.8          {d0[0]}, [\INPTR]
+2:
+    tst             \WIDTH, #2
+    beq             2f
+    vext.8          d0, d0, d0, #6
+    sub             \INPTR, \INPTR, #1
+    vld1.8          {d0[1]}, [\INPTR]
+    sub             \INPTR, \INPTR, #1
+    vld1.8          {d0[0]}, [\INPTR]
+2:
+    tst             \WIDTH, #4
+    beq             2f
+    vrev64.32       d0, d0
+    sub             \INPTR, \INPTR, #1
+    vld1.8          {d0[3]}, [\INPTR]
+    sub             \INPTR, \INPTR, #1
+    vld1.8          {d0[2]}, [\INPTR]
+    sub             \INPTR, \INPTR, #1
+    vld1.8          {d0[1]}, [\INPTR]
+    sub             \INPTR, \INPTR, #1
+    vld1.8          {d0[0]}, [\INPTR]
+2:
+    tst             \WIDTH, #8
+    beq             2f
+    vmov            d1,  d0
+    sub             \INPTR, \INPTR, #8
+    vld1.8          {d0}, [\INPTR]
+2:  /* upsample the remaining pixels */
+    vmovl.u8        q8,  d0
+    vext.8          q2,  q1,  q0, #15
+    vmovl.u8        q9,  d1
+    vaddw.u8        q10, q15, d4
+    vaddw.u8        q11, q15, d5
+    vmlal.u8        q8,  d4,  d28
+    vmlal.u8        q9,  d5,  d28
+    vmlal.u8        q10, d0,  d28
+    vmlal.u8        q11, d1,  d28
+    vrshrn.u16      d10, q8,  #2
+    vrshrn.u16      d12, q9,  #2
+    vshrn.u16       d11, q10, #2
+    vshrn.u16       d13, q11, #2
+    vzip.8          d10, d11
+    vzip.8          d12, d13
+    /* store the remaining pixels */
+    tst             \WIDTH, #8
+    beq             2f
+    vst1.8          {d10, d11}, [\OUTPTR]!
+    vmov            q5,  q6
+2:
+    tst             \WIDTH, #4
+    beq             2f
+    vst1.8          {d10}, [\OUTPTR]!
+    vmov            d10,  d11
+2:
+    tst             \WIDTH, #2
+    beq             2f
+    vst1.8          {d10[0]}, [\OUTPTR]!
+    vst1.8          {d10[1]}, [\OUTPTR]!
+    vst1.8          {d10[2]}, [\OUTPTR]!
+    vst1.8          {d10[3]}, [\OUTPTR]!
+    vext.8          d10, d10, d10, #4
+2:
+    tst             \WIDTH, #1
+    beq             2f
+    vst1.8          {d10[0]}, [\OUTPTR]!
+    vst1.8          {d10[1]}, [\OUTPTR]!
+2:
+9:
+.endm
+
+asm_function jsimd_h2v1_fancy_upsample_neon
+
+    MAX_V_SAMP_FACTOR .req r0
+    DOWNSAMPLED_WIDTH .req r1
+    INPUT_DATA        .req r2
+    OUTPUT_DATA_PTR   .req r3
+    OUTPUT_DATA       .req OUTPUT_DATA_PTR
+
+    OUTPTR            .req r4
+    INPTR             .req r5
+    WIDTH             .req ip
+    TMP               .req lr
+
+    push            {r4, r5, r6, lr}
+    vpush           {d8-d15}
+
+    ldr             OUTPUT_DATA, [OUTPUT_DATA_PTR]
+    cmp             MAX_V_SAMP_FACTOR, #0
+    ble             99f
+
+    /* initialize constants */
+    vmov.u8         d28, #3
+    vmov.u16        q15, #1
+11:
+    ldr             INPTR, [INPUT_DATA], #4
+    ldr             OUTPTR, [OUTPUT_DATA], #4
+    mov             WIDTH, DOWNSAMPLED_WIDTH
+    upsample_row    OUTPTR, INPTR, WIDTH, TMP
+    subs            MAX_V_SAMP_FACTOR, MAX_V_SAMP_FACTOR, #1
+    bgt             11b
+
+99:
+    vpop            {d8-d15}
+    pop             {r4, r5, r6, pc}
+
+    .unreq          MAX_V_SAMP_FACTOR
+    .unreq          DOWNSAMPLED_WIDTH
+    .unreq          INPUT_DATA
+    .unreq          OUTPUT_DATA_PTR
+    .unreq          OUTPUT_DATA
+
+    .unreq          OUTPTR
+    .unreq          INPTR
+    .unreq          WIDTH
+    .unreq          TMP
+
+.endfunc
+
+.purgem upsample16
+.purgem upsample32
+.purgem upsample_row
diff --git a/simd/jsimd_arm_neon_64.S b/simd/jsimd_arm_neon_64.S
new file mode 100644
index 0000000..2c3989c
--- /dev/null
+++ b/simd/jsimd_arm_neon_64.S
@@ -0,0 +1,1824 @@
+/*
+ * ARMv8 NEON optimizations for libjpeg-turbo
+ *
+ * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies).
+ * All rights reserved.
+ * Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+ * Copyright (C) 2013, Linaro Limited
+ * Author: Ragesh Radhakrishnan <ragesh.r@linaro.org>
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
+#endif
+
+.text
+.arch armv8-a+fp+simd
+
+
+#define RESPECT_STRICT_ALIGNMENT 1
+
+
+
+/*****************************************************************************/
+
+/* Supplementary macro for setting function attributes */
+.macro asm_function fname
+#ifdef __APPLE__
+    .func _\fname
+    .globl _\fname
+_\fname:
+#else
+    .func \fname
+    .global \fname
+#ifdef __ELF__
+    .hidden \fname
+    .type \fname, %function
+#endif
+\fname:
+#endif
+.endm
+
+/* Transpose elements of single 128 bit registers */
+.macro transpose_single x0,x1,xi,xilen,literal
+    ins  \xi\xilen[0],  \x0\xilen[0]
+    ins  \x1\xilen[0],  \x0\xilen[1]
+    trn1 \x0\literal,   \x0\literal, \x1\literal
+    trn2 \x1\literal,   \xi\literal, \x1\literal
+.endm
+
+/* Transpose elements of 2 differnet registers */
+.macro transpose x0,x1,xi,xilen,literal
+    mov  \xi\xilen,     \x0\xilen
+    trn1 \x0\literal,   \x0\literal, \x1\literal
+    trn2 \x1\literal,   \xi\literal, \x1\literal
+.endm
+
+/* Transpose a block of 4x4 coefficients in four 64-bit registers */
+.macro transpose_4x4_32 x0,x0len x1,x1len x2,x2len x3,x3len,xi,xilen
+    mov  \xi\xilen, \x0\xilen
+    trn1 \x0\x0len, \x0\x0len, \x2\x2len
+    trn2 \x2\x2len, \xi\x0len, \x2\x2len
+    mov  \xi\xilen, \x1\xilen
+    trn1 \x1\x1len, \x1\x1len, \x3\x3len
+    trn2 \x3\x3len, \xi\x1len, \x3\x3len
+.endm
+
+.macro transpose_4x4_16 x0,x0len x1,x1len, x2,x2len, x3,x3len,xi,xilen
+    mov  \xi\xilen, \x0\xilen
+    trn1 \x0\x0len, \x0\x0len, \x1\x1len
+    trn2 \x1\x2len, \xi\x0len, \x1\x2len
+    mov  \xi\xilen, \x2\xilen
+    trn1 \x2\x2len, \x2\x2len, \x3\x3len
+    trn2 \x3\x2len, \xi\x1len, \x3\x3len
+.endm
+
+.macro transpose_4x4 x0, x1, x2, x3,x5
+    transpose_4x4_16 \x0,.4h, \x1,.4h, \x2,.4h,\x3,.4h,\x5,.16b
+    transpose_4x4_32 \x0,.2s, \x1,.2s, \x2,.2s,\x3,.2s,\x5,.16b
+.endm
+
+
+#define CENTERJSAMPLE 128
+
+/*****************************************************************************/
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ *
+ * GLOBAL(void)
+ * jsimd_idct_islow_neon (void * dct_table, JCOEFPTR coef_block,
+ *                        JSAMPARRAY output_buf, JDIMENSION output_col)
+ */
+
+#define FIX_0_298631336  (2446)
+#define FIX_0_390180644  (3196)
+#define FIX_0_541196100  (4433)
+#define FIX_0_765366865  (6270)
+#define FIX_0_899976223  (7373)
+#define FIX_1_175875602  (9633)
+#define FIX_1_501321110  (12299)
+#define FIX_1_847759065  (15137)
+#define FIX_1_961570560  (16069)
+#define FIX_2_053119869  (16819)
+#define FIX_2_562915447  (20995)
+#define FIX_3_072711026  (25172)
+
+#define FIX_1_175875602_MINUS_1_961570560 (FIX_1_175875602 - FIX_1_961570560)
+#define FIX_1_175875602_MINUS_0_390180644 (FIX_1_175875602 - FIX_0_390180644)
+#define FIX_0_541196100_MINUS_1_847759065 (FIX_0_541196100 - FIX_1_847759065)
+#define FIX_3_072711026_MINUS_2_562915447 (FIX_3_072711026 - FIX_2_562915447)
+#define FIX_0_298631336_MINUS_0_899976223 (FIX_0_298631336 - FIX_0_899976223)
+#define FIX_1_501321110_MINUS_0_899976223 (FIX_1_501321110 - FIX_0_899976223)
+#define FIX_2_053119869_MINUS_2_562915447 (FIX_2_053119869 - FIX_2_562915447)
+#define FIX_0_541196100_PLUS_0_765366865  (FIX_0_541196100 + FIX_0_765366865)
+
+/*
+ * Reference SIMD-friendly 1-D ISLOW iDCT C implementation.
+ * Uses some ideas from the comments in 'simd/jiss2int-64.asm'
+ */
+#define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7)   \
+{                                                                             \
+    DCTELEM row0, row1, row2, row3, row4, row5, row6, row7;                   \
+    INT32   q1, q2, q3, q4, q5, q6, q7;                                       \
+    INT32   tmp11_plus_tmp2, tmp11_minus_tmp2;                                \
+                                                                              \
+    /* 1-D iDCT input data */                                                 \
+    row0 = xrow0;                                                             \
+    row1 = xrow1;                                                             \
+    row2 = xrow2;                                                             \
+    row3 = xrow3;                                                             \
+    row4 = xrow4;                                                             \
+    row5 = xrow5;                                                             \
+    row6 = xrow6;                                                             \
+    row7 = xrow7;                                                             \
+                                                                              \
+    q5 = row7 + row3;                                                         \
+    q4 = row5 + row1;                                                         \
+    q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) +                    \
+         MULTIPLY(q4, FIX_1_175875602);                                       \
+    q7 = MULTIPLY(q5, FIX_1_175875602) +                                      \
+         MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644);                     \
+    q2 = MULTIPLY(row2, FIX_0_541196100) +                                    \
+         MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065);                   \
+    q4 = q6;                                                                  \
+    q3 = ((INT32) row0 - (INT32) row4) << 13;                                 \
+    q6 += MULTIPLY(row5, -FIX_2_562915447) +                                  \
+          MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447);                  \
+    /* now we can use q1 (reloadable constants have been used up) */          \
+    q1 = q3 + q2;                                                             \
+    q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) +                 \
+          MULTIPLY(row1, -FIX_0_899976223);                                   \
+    q5 = q7;                                                                  \
+    q1 = q1 + q6;                                                             \
+    q7 += MULTIPLY(row7, -FIX_0_899976223) +                                  \
+          MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223);                  \
+                                                                              \
+    /* (tmp11 + tmp2) has been calculated (out_row1 before descale) */        \
+    tmp11_plus_tmp2 = q1;                                                     \
+    row1 = 0;                                                                 \
+                                                                              \
+    q1 = q1 - q6;                                                             \
+    q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) +                 \
+          MULTIPLY(row3, -FIX_2_562915447);                                   \
+    q1 = q1 - q6;                                                             \
+    q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) +                   \
+         MULTIPLY(row6, FIX_0_541196100);                                     \
+    q3 = q3 - q2;                                                             \
+                                                                              \
+    /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */        \
+    tmp11_minus_tmp2 = q1;                                                    \
+                                                                              \
+    q1 = ((INT32) row0 + (INT32) row4) << 13;                                 \
+    q2 = q1 + q6;                                                             \
+    q1 = q1 - q6;                                                             \
+                                                                              \
+    /* pick up the results */                                                 \
+    tmp0  = q4;                                                               \
+    tmp1  = q5;                                                               \
+    tmp2  = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2;                         \
+    tmp3  = q7;                                                               \
+    tmp10 = q2;                                                               \
+    tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2;                         \
+    tmp12 = q3;                                                               \
+    tmp13 = q1;                                                               \
+}
+
+#define XFIX_0_899976223                    v0.4h[0]
+#define XFIX_0_541196100                    v0.4h[1]
+#define XFIX_2_562915447                    v0.4h[2]
+#define XFIX_0_298631336_MINUS_0_899976223  v0.4h[3]
+#define XFIX_1_501321110_MINUS_0_899976223  v1.4h[0]
+#define XFIX_2_053119869_MINUS_2_562915447  v1.4h[1]
+#define XFIX_0_541196100_PLUS_0_765366865   v1.4h[2]
+#define XFIX_1_175875602                    v1.4h[3]
+#define XFIX_1_175875602_MINUS_0_390180644  v2.4h[0]
+#define XFIX_0_541196100_MINUS_1_847759065  v2.4h[1]
+#define XFIX_3_072711026_MINUS_2_562915447  v2.4h[2]
+#define XFIX_1_175875602_MINUS_1_961570560  v2.4h[3]
+
+.balign 16
+jsimd_idct_islow_neon_consts:
+    .short FIX_0_899976223                    /* d0[0] */
+    .short FIX_0_541196100                    /* d0[1] */
+    .short FIX_2_562915447                    /* d0[2] */
+    .short FIX_0_298631336_MINUS_0_899976223  /* d0[3] */
+    .short FIX_1_501321110_MINUS_0_899976223  /* d1[0] */
+    .short FIX_2_053119869_MINUS_2_562915447  /* d1[1] */
+    .short FIX_0_541196100_PLUS_0_765366865   /* d1[2] */
+    .short FIX_1_175875602                    /* d1[3] */
+    /* reloadable constants */
+    .short FIX_1_175875602_MINUS_0_390180644  /* d2[0] */
+    .short FIX_0_541196100_MINUS_1_847759065  /* d2[1] */
+    .short FIX_3_072711026_MINUS_2_562915447  /* d2[2] */
+    .short FIX_1_175875602_MINUS_1_961570560  /* d2[3] */
+
+asm_function jsimd_idct_islow_neon
+
+    DCT_TABLE       .req x0
+    COEF_BLOCK      .req x1
+    OUTPUT_BUF      .req x2
+    OUTPUT_COL      .req x3
+    TMP1            .req x0
+    TMP2            .req x1
+    TMP3            .req x2
+    TMP4            .req x15
+
+    ROW0L           .req v16
+    ROW0R           .req v17
+    ROW1L           .req v18
+    ROW1R           .req v19
+    ROW2L           .req v20
+    ROW2R           .req v21
+    ROW3L           .req v22
+    ROW3R           .req v23
+    ROW4L           .req v24
+    ROW4R           .req v25
+    ROW5L           .req v26
+    ROW5R           .req v27
+    ROW6L           .req v28
+    ROW6R           .req v29
+    ROW7L           .req v30
+    ROW7R           .req v31
+    /* Save all NEON registers and x15 (32 NEON registers * 8 bytes + 16) */
+    sub             sp, sp, 272
+    str             x15, [sp], 16
+    adr             x15, jsimd_idct_islow_neon_consts
+    st1             {v0.8b - v3.8b}, [sp], 32
+    st1             {v4.8b - v7.8b}, [sp], 32
+    st1             {v8.8b - v11.8b}, [sp], 32
+    st1             {v12.8b - v15.8b}, [sp], 32
+    st1             {v16.8b - v19.8b}, [sp], 32
+    st1             {v20.8b - v23.8b}, [sp], 32
+    st1             {v24.8b - v27.8b}, [sp], 32
+    st1             {v28.8b - v31.8b}, [sp], 32
+    ld1             {v16.4h, v17.4h, v18.4h, v19.4h}, [COEF_BLOCK], 32
+    ld1             {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32
+    ld1             {v20.4h, v21.4h, v22.4h, v23.4h}, [COEF_BLOCK], 32
+    mul             v16.4h, v16.4h, v0.4h
+    mul             v17.4h, v17.4h, v1.4h
+    ins             v16.2d[1], v17.2d[0]  /* 128 bit q8 */
+    ld1             {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32
+    mul             v18.4h, v18.4h, v2.4h
+    mul             v19.4h, v19.4h, v3.4h
+    ins             v18.2d[1], v19.2d[0]  /* 128 bit q9 */
+    ld1             {v24.4h, v25.4h, v26.4h, v27.4h}, [COEF_BLOCK], 32
+    mul             v20.4h, v20.4h, v4.4h
+    mul             v21.4h, v21.4h, v5.4h
+    ins             v20.2d[1], v21.2d[0]  /* 128 bit q10 */
+    ld1             {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32
+    mul             v22.4h, v22.4h, v6.4h
+    mul             v23.4h, v23.4h, v7.4h
+    ins             v22.2d[1], v23.2d[0]  /* 128 bit q11 */
+    ld1             {v28.4h, v29.4h, v30.4h, v31.4h}, [COEF_BLOCK]
+    mul             v24.4h, v24.4h, v0.4h
+    mul             v25.4h, v25.4h, v1.4h
+    ins             v24.2d[1], v25.2d[0]  /* 128 bit q12 */
+    ld1             {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32
+    mul             v28.4h, v28.4h, v4.4h
+    mul             v29.4h, v29.4h, v5.4h
+    ins             v28.2d[1], v29.2d[0]  /* 128 bit q14 */
+    mul             v26.4h, v26.4h, v2.4h
+    mul             v27.4h, v27.4h, v3.4h
+    ins             v26.2d[1], v27.2d[0]  /* 128 bit q13 */
+    ld1             {v0.4h, v1.4h, v2.4h, v3.4h}, [x15]  /* load constants */
+    add             x15, x15, #16
+    mul             v30.4h, v30.4h, v6.4h
+    mul             v31.4h, v31.4h, v7.4h
+    ins             v30.2d[1], v31.2d[0]  /* 128 bit q15 */
+    /* Go to the bottom of the stack */
+    sub             sp, sp, 352
+    stp             x4, x5, [sp], 16
+    st1             {v8.4h - v11.4h}, [sp], 32  /* save NEON registers */
+    st1             {v12.4h - v15.4h}, [sp], 32
+    /* 1-D IDCT, pass 1, left 4x8 half */
+    add             v4.4h,    ROW7L.4h, ROW3L.4h
+    add             v5.4h,    ROW5L.4h, ROW1L.4h
+    smull           v12.4s,   v4.4h,    XFIX_1_175875602_MINUS_1_961570560
+    smlal           v12.4s,   v5.4h,    XFIX_1_175875602
+    smull           v14.4s,   v4.4h,    XFIX_1_175875602
+    /* Check for the zero coefficients in the right 4x8 half */
+    smlal           v14.4s,   v5.4h,    XFIX_1_175875602_MINUS_0_390180644
+    ssubl           v6.4s,    ROW0L.4h, ROW4L.4h
+      ldp           w4,       w5,       [COEF_BLOCK, #(-96 + 2 * (4 + 1 * 8))]
+    smull           v4.4s,    ROW2L.4h, XFIX_0_541196100
+    smlal           v4.4s,    ROW6L.4h, XFIX_0_541196100_MINUS_1_847759065
+      orr           x0,       x4,       x5
+    mov             v8.16b,   v12.16b
+    smlsl           v12.4s,   ROW5L.4h, XFIX_2_562915447
+      ldp           w4,       w5,       [COEF_BLOCK, #(-96 + 2 * (4 + 2 * 8))]
+    smlal           v12.4s,   ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447
+    shl             v6.4s,    v6.4s,    #13
+      orr           x0,       x0,       x4
+    smlsl           v8.4s,    ROW1L.4h, XFIX_0_899976223
+      orr           x0,       x0 ,      x5
+    add             v2.4s,    v6.4s,    v4.4s
+      ldp           w4,       w5,       [COEF_BLOCK, #(-96 + 2 * (4 + 3 * 8))]
+    mov             v10.16b,  v14.16b
+    add             v2.4s,    v2.4s,    v12.4s
+      orr           x0,       x0,       x4
+    smlsl           v14.4s,   ROW7L.4h, XFIX_0_899976223
+      orr           x0,       x0,       x5
+    smlal           v14.4s,   ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223
+    rshrn           ROW1L.4h, v2.4s,    #11
+      ldp           w4,       w5,       [COEF_BLOCK, #(-96 + 2 * (4 + 4 * 8))]
+    sub             v2.4s,    v2.4s,    v12.4s
+    smlal           v10.4s,   ROW5L.4h, XFIX_2_053119869_MINUS_2_562915447
+      orr           x0,       x0,       x4
+    smlsl           v10.4s,   ROW3L.4h, XFIX_2_562915447
+      orr           x0,       x0,       x5
+    sub             v2.4s,    v2.4s,    v12.4s
+    smull           v12.4s,   ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865
+      ldp           w4,       w5,       [COEF_BLOCK, #(-96 + 2 * (4 + 5 * 8))]
+    smlal           v12.4s,   ROW6L.4h, XFIX_0_541196100
+    sub             v6.4s,    v6.4s,    v4.4s
+      orr           x0,       x0,       x4
+    rshrn           ROW6L.4h, v2.4s,    #11
+      orr           x0,       x0,       x5
+    add             v2.4s,    v6.4s,    v10.4s
+      ldp           w4,       w5,       [COEF_BLOCK, #(-96 + 2 * (4 + 6 * 8))]
+    sub             v6.4s,    v6.4s,    v10.4s
+    saddl           v10.4s,   ROW0L.4h, ROW4L.4h
+      orr           x0,       x0,       x4
+    rshrn           ROW2L.4h, v2.4s,    #11
+      orr           x0,       x0,       x5
+    rshrn           ROW5L.4h, v6.4s,    #11
+      ldp           w4,       w5,       [COEF_BLOCK, #(-96 + 2 * (4 + 7 * 8))]
+    shl             v10.4s,   v10.4s,   #13
+    smlal           v8.4s,    ROW7L.4h, XFIX_0_298631336_MINUS_0_899976223
+      orr           x0,       x0,       x4
+    add             v4.4s,    v10.4s,   v12.4s
+      orr           x0,       x0,       x5
+    sub             v2.4s,    v10.4s,   v12.4s
+    add             v12.4s,   v4.4s,    v14.4s
+      ldp           w4,       w5,       [COEF_BLOCK, #(-96 + 2 * (4 + 0 * 8))]
+    sub             v4.4s,    v4.4s,    v14.4s
+    add             v10.4s,   v2.4s,    v8.4s
+      orr           x0,       x4,       x5
+    sub             v6.4s,    v2.4s,    v8.4s
+      /* pop             {x4, x5} */
+      sub           sp, sp, 80
+      ldp           x4, x5, [sp], 16
+    rshrn           ROW7L.4h, v4.4s,    #11
+    rshrn           ROW3L.4h, v10.4s,   #11
+    rshrn           ROW0L.4h, v12.4s,   #11
+    rshrn           ROW4L.4h, v6.4s,    #11
+    cmp             x0, #0 /* orrs instruction removed */
+
+      beq             3f /* Go to do some special handling for the sparse right 4x8 half */
+
+    /* 1-D IDCT, pass 1, right 4x8 half */
+    ld1             {v2.4h},  [x15]    /* reload constants */
+    add             v10.4h,   ROW7R.4h, ROW3R.4h
+    add             v8.4h,    ROW5R.4h, ROW1R.4h
+    /* Transpose ROW6L <-> ROW7L   (v3 available free register) */
+    transpose       ROW6L, ROW7L, v3, .16b, .4h
+    smull           v12.4s,   v10.4h,   XFIX_1_175875602_MINUS_1_961570560
+    smlal           v12.4s,   v8.4h,    XFIX_1_175875602
+    /* Transpose ROW2L <-> ROW3L   (v3 available free register) */
+    transpose       ROW2L, ROW3L, v3, .16b, .4h
+    smull           v14.4s,   v10.4h,   XFIX_1_175875602
+    smlal           v14.4s,   v8.4h,    XFIX_1_175875602_MINUS_0_390180644
+    /* Transpose ROW0L <-> ROW1L   (v3 available free register) */
+    transpose       ROW0L, ROW1L, v3, .16b, .4h
+    ssubl           v6.4s,    ROW0R.4h, ROW4R.4h
+    smull           v4.4s,    ROW2R.4h, XFIX_0_541196100
+    smlal           v4.4s,    ROW6R.4h, XFIX_0_541196100_MINUS_1_847759065
+    /* Transpose ROW4L <-> ROW5L   (v3 available free register) */
+    transpose       ROW4L, ROW5L, v3, .16b, .4h
+    mov             v8.16b,   v12.16b
+    smlsl           v12.4s,   ROW5R.4h, XFIX_2_562915447
+    smlal           v12.4s,   ROW3R.4h, XFIX_3_072711026_MINUS_2_562915447
+    /* Transpose ROW1L <-> ROW3L   (v3 available free register) */
+    transpose       ROW1L, ROW3L, v3, .16b, .2s
+    shl             v6.4s,    v6.4s,    #13
+    smlsl           v8.4s,    ROW1R.4h, XFIX_0_899976223
+    /* Transpose ROW4L <-> ROW6L   (v3 available free register) */
+    transpose       ROW4L, ROW6L, v3, .16b, .2s
+    add             v2.4s,    v6.4s,    v4.4s
+    mov             v10.16b,  v14.16b
+    add             v2.4s,    v2.4s,    v12.4s
+    /* Transpose ROW0L <-> ROW2L   (v3 available free register) */
+    transpose       ROW0L, ROW2L, v3, .16b, .2s
+    smlsl           v14.4s,   ROW7R.4h, XFIX_0_899976223
+    smlal           v14.4s,   ROW1R.4h, XFIX_1_501321110_MINUS_0_899976223
+    rshrn           ROW1R.4h, v2.4s,    #11
+    /* Transpose ROW5L <-> ROW7L   (v3 available free register) */
+    transpose       ROW5L, ROW7L, v3, .16b, .2s
+    sub             v2.4s,    v2.4s,    v12.4s
+    smlal           v10.4s,   ROW5R.4h, XFIX_2_053119869_MINUS_2_562915447
+    smlsl           v10.4s,   ROW3R.4h, XFIX_2_562915447
+    sub             v2.4s,    v2.4s,    v12.4s
+    smull           v12.4s,   ROW2R.4h, XFIX_0_541196100_PLUS_0_765366865
+    smlal           v12.4s,   ROW6R.4h, XFIX_0_541196100
+    sub             v6.4s,    v6.4s,    v4.4s
+    rshrn           ROW6R.4h, v2.4s,    #11
+    add             v2.4s,    v6.4s,    v10.4s
+    sub             v6.4s,    v6.4s,    v10.4s
+    saddl           v10.4s,   ROW0R.4h, ROW4R.4h
+    rshrn           ROW2R.4h, v2.4s,    #11
+    rshrn           ROW5R.4h, v6.4s,    #11
+    shl             v10.4s,   v10.4s,   #13
+    smlal           v8.4s,    ROW7R.4h, XFIX_0_298631336_MINUS_0_899976223
+    add             v4.4s,    v10.4s,   v12.4s
+    sub             v2.4s,    v10.4s,   v12.4s
+    add             v12.4s,   v4.4s,    v14.4s
+    sub             v4.4s,    v4.4s,    v14.4s
+    add             v10.4s,   v2.4s,    v8.4s
+    sub             v12.4s,   v2.4s,    v8.4s
+    rshrn           ROW7R.4h, v4.4s,    #11
+    rshrn           ROW3R.4h, v10.4s,   #11
+    rshrn           ROW0R.4h, v12.4s,   #11
+    rshrn           ROW4R.4h, v6.4s,    #11
+    /* Transpose right 4x8 half */
+    transpose       ROW6R, ROW7R, v3, .16b, .4h
+    transpose       ROW2R, ROW3R, v3, .16b, .4h
+    transpose       ROW0R, ROW1R, v3, .16b, .4h
+    transpose       ROW4R, ROW5R, v3, .16b, .4h
+    transpose       ROW1R, ROW3R, v3, .16b, .2s
+    transpose       ROW4R, ROW6R, v3, .16b, .2s
+    transpose       ROW0R, ROW2R, v3, .16b, .2s
+    transpose       ROW5R, ROW7R, v3, .16b, .2s
+
+1:  /* 1-D IDCT, pass 2 (normal variant), left 4x8 half */
+    ld1             {v2.4h},  [x15]    /* reload constants */
+    smull           v12.4S,   ROW1R.4h, XFIX_1_175875602 /* ROW5L.4h <-> ROW1R.4h */
+    smlal           v12.4s,   ROW1L.4h, XFIX_1_175875602
+    smlal           v12.4s,   ROW3R.4h, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L.4h <-> ROW3R.4h */
+    smlal           v12.4s,   ROW3L.4h, XFIX_1_175875602_MINUS_1_961570560
+    smull           v14.4s,   ROW3R.4h, XFIX_1_175875602 /* ROW7L.4h <-> ROW3R.4h */
+    smlal           v14.4s,   ROW3L.4h, XFIX_1_175875602
+    smlal           v14.4s,   ROW1R.4h, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L.4h <-> ROW1R.4h */
+    smlal           v14.4s,   ROW1L.4h, XFIX_1_175875602_MINUS_0_390180644
+    ssubl           v6.4s,    ROW0L.4h, ROW0R.4h /* ROW4L.4h <-> ROW0R.4h */
+    smull           v4.4s,    ROW2L.4h, XFIX_0_541196100
+    smlal           v4.4s,    ROW2R.4h, XFIX_0_541196100_MINUS_1_847759065 /* ROW6L.4h <-> ROW2R.4h */
+    mov             v8.16b,   v12.16b
+    smlsl           v12.4s,   ROW1R.4h, XFIX_2_562915447 /* ROW5L.4h <-> ROW1R.4h */
+    smlal           v12.4s,   ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447
+    shl             v6.4s,    v6.4s,    #13
+    smlsl           v8.4s,    ROW1L.4h, XFIX_0_899976223
+    add             v2.4s,    v6.4s,    v4.4s
+    mov             v10.16b,  v14.16b
+    add             v2.4s,    v2.4s,    v12.4s
+    smlsl           v14.4s,   ROW3R.4h, XFIX_0_899976223 /* ROW7L.4h <-> ROW3R.4h */
+    smlal           v14.4s,   ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223
+    shrn            ROW1L.4h, v2.4s,    #16
+    sub             v2.4s,    v2.4s,    v12.4s
+    smlal           v10.4s,   ROW1R.4h, XFIX_2_053119869_MINUS_2_562915447 /* ROW5L.4h <-> ROW1R.4h */
+    smlsl           v10.4s,   ROW3L.4h, XFIX_2_562915447
+    sub             v2.4s,    v2.4s,    v12.4s
+    smull           v12.4s,   ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865
+    smlal           v12.4s,   ROW2R.4h, XFIX_0_541196100 /* ROW6L.4h <-> ROW2R.4h */
+    sub             v6.4s,    v6.4s,    v4.4s
+    shrn            ROW2R.4h, v2.4s,    #16 /* ROW6L.4h <-> ROW2R.4h */
+    add             v2.4s,    v6.4s,    v10.4s
+    sub             v6.4s,    v6.4s,    v10.4s
+    saddl           v10.4s,   ROW0L.4h, ROW0R.4h /* ROW4L.4h <-> ROW0R.4h */
+    shrn            ROW2L.4h, v2.4s,    #16
+    shrn            ROW1R.4h, v6.4s,    #16 /* ROW5L.4h <-> ROW1R.4h */
+    shl             v10.4s,   v10.4s,   #13
+    smlal           v8.4s,    ROW3R.4h, XFIX_0_298631336_MINUS_0_899976223 /* ROW7L.4h <-> ROW3R.4h */
+    add             v4.4s,    v10.4s,   v12.4s
+    sub             v2.4s,    v10.4s,   v12.4s
+    add             v12.4s,   v4.4s,    v14.4s
+    sub             v4.4s,    v4.4s,    v14.4s
+    add             v10.4s,   v2.4s,    v8.4s
+    sub             v6.4s,    v2.4s,    v8.4s
+    shrn            ROW3R.4h, v4.4s,    #16 /* ROW7L.4h <-> ROW3R.4h */
+    shrn            ROW3L.4h, v10.4s,   #16
+    shrn            ROW0L.4h, v12.4s,   #16
+    shrn            ROW0R.4h, v6.4s,    #16 /* ROW4L.4h <-> ROW0R.4h */
+    /* 1-D IDCT, pass 2, right 4x8 half */
+    ld1             {v2.4h},  [x15]    /* reload constants */
+    smull           v12.4s,   ROW5R.4h, XFIX_1_175875602
+    smlal           v12.4s,   ROW5L.4h, XFIX_1_175875602 /* ROW5L.4h <-> ROW1R.4h */
+    smlal           v12.4s,   ROW7R.4h, XFIX_1_175875602_MINUS_1_961570560
+    smlal           v12.4s,   ROW7L.4h, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L.4h <-> ROW3R.4h */
+    smull           v14.4s,   ROW7R.4h, XFIX_1_175875602
+    smlal           v14.4s,   ROW7L.4h, XFIX_1_175875602 /* ROW7L.4h <-> ROW3R.4h */
+    smlal           v14.4s,   ROW5R.4h, XFIX_1_175875602_MINUS_0_390180644
+    smlal           v14.4s,   ROW5L.4h, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L.4h <-> ROW1R.4h */
+    ssubl           v6.4s,    ROW4L.4h, ROW4R.4h /* ROW4L.4h <-> ROW0R.4h */
+    smull           v4.4s,    ROW6L.4h, XFIX_0_541196100 /* ROW6L.4h <-> ROW2R.4h */
+    smlal           v4.4s,    ROW6R.4h, XFIX_0_541196100_MINUS_1_847759065
+    mov             v8.16b,   v12.16b
+    smlsl           v12.4s,   ROW5R.4h, XFIX_2_562915447
+    smlal           v12.4s,   ROW7L.4h, XFIX_3_072711026_MINUS_2_562915447 /* ROW7L.4h <-> ROW3R.4h */
+    shl             v6.4s,    v6.4s,    #13
+    smlsl           v8.4s,    ROW5L.4h, XFIX_0_899976223 /* ROW5L.4h <-> ROW1R.4h */
+    add             v2.4s,    v6.4s,    v4.4s
+    mov             v10.16b,  v14.16b
+    add             v2.4s,    v2.4s,    v12.4s
+    smlsl           v14.4s,   ROW7R.4h, XFIX_0_899976223
+    smlal           v14.4s,   ROW5L.4h, XFIX_1_501321110_MINUS_0_899976223 /* ROW5L.4h <-> ROW1R.4h */
+    shrn            ROW5L.4h, v2.4s,    #16 /* ROW5L.4h <-> ROW1R.4h */
+    sub             v2.4s,    v2.4s,    v12.4s
+    smlal           v10.4s,   ROW5R.4h, XFIX_2_053119869_MINUS_2_562915447
+    smlsl           v10.4s,   ROW7L.4h, XFIX_2_562915447 /* ROW7L.4h <-> ROW3R.4h */
+    sub             v2.4s,    v2.4s,    v12.4s
+    smull           v12.4s,   ROW6L.4h, XFIX_0_541196100_PLUS_0_765366865 /* ROW6L.4h <-> ROW2R.4h */
+    smlal           v12.4s,   ROW6R.4h, XFIX_0_541196100
+    sub             v6.4s,    v6.4s,    v4.4s
+    shrn            ROW6R.4h, v2.4s,    #16
+    add             v2.4s,    v6.4s,    v10.4s
+    sub             v6.4s,    v6.4s,    v10.4s
+    saddl           v10.4s,   ROW4L.4h, ROW4R.4h /* ROW4L.4h <-> ROW0R.4h */
+    shrn            ROW6L.4h, v2.4s,    #16 /* ROW6L.4h <-> ROW2R.4h */
+    shrn            ROW5R.4h, v6.4s,    #16
+    shl             v10.4s,   v10.4s,   #13
+    smlal           v8.4s,    ROW7R.4h, XFIX_0_298631336_MINUS_0_899976223
+    add             v4.4s,    v10.4s,   v12.4s
+    sub             v2.4s,    v10.4s,   v12.4s
+    add             v12.4s,   v4.4s,    v14.4s
+    sub             v4.4s,    v4.4s,    v14.4s
+    add             v10.4s,   v2.4s,    v8.4s
+    sub             v6.4s,    v2.4s,    v8.4s
+    shrn            ROW7R.4h, v4.4s,    #16
+    shrn            ROW7L.4h, v10.4s,   #16 /* ROW7L.4h <-> ROW3R.4h */
+    shrn            ROW4L.4h, v12.4s,   #16 /* ROW4L.4h <-> ROW0R.4h */
+    shrn            ROW4R.4h, v6.4s,    #16
+
+2:  /* Descale to 8-bit and range limit */
+    ins             v16.2d[1], v17.2d[0]
+    ins             v18.2d[1], v19.2d[0]
+    ins             v20.2d[1], v21.2d[0]
+    ins             v22.2d[1], v23.2d[0]
+    sqrshrn         v16.8b,   v16.8h,   #2
+    sqrshrn2        v16.16b,  v18.8h,   #2
+    sqrshrn         v18.8b,   v20.8h,   #2
+    sqrshrn2        v18.16b,  v22.8h,   #2
+
+    /* vpop            {v8.4h - d15.4h} */ /* restore NEON registers */
+    ld1             {v8.4h - v11.4h}, [sp], 32
+    ld1             {v12.4h - v15.4h}, [sp], 32
+    ins             v24.2d[1], v25.2d[0]
+
+    sqrshrn         v20.8b,   v24.8h,   #2
+      /* Transpose the final 8-bit samples and do signed->unsigned conversion */
+    /* trn1            v16.8h,    v16.8h,  v18.8h */
+    transpose       v16, v18, v3, .16b, .8h
+    ins             v26.2d[1], v27.2d[0]
+    ins             v28.2d[1], v29.2d[0]
+    ins             v30.2d[1], v31.2d[0]
+    sqrshrn2        v20.16b,  v26.8h,   #2
+    sqrshrn         v22.8b,   v28.8h,   #2
+    movi            v0.16b,   #(CENTERJSAMPLE)
+    sqrshrn2        v22.16b,  v30.8h,   #2
+    transpose_single v16, v17, v3, .2d, .8b
+    transpose_single v18, v19, v3, .2d, .8b
+    add             v16.8b,   v16.8b,   v0.8b
+    add             v17.8b,   v17.8b,   v0.8b
+    add             v18.8b,   v18.8b,   v0.8b
+    add             v19.8b,   v19.8b,   v0.8b
+    transpose       v20, v22, v3, .16b, .8h
+    /* Store results to the output buffer */
+    ldp             TMP1,     TMP2,     [OUTPUT_BUF], 16
+    add             TMP1,     TMP1,     OUTPUT_COL
+    add             TMP2,     TMP2,     OUTPUT_COL
+    st1             {v16.8b}, [TMP1]
+    transpose_single v20, v21, v3, .2d, .8b
+    st1             {v17.8b}, [TMP2]
+    ldp             TMP1,     TMP2,     [OUTPUT_BUF], 16
+    add             TMP1,     TMP1,     OUTPUT_COL
+    add             TMP2,     TMP2,     OUTPUT_COL
+    st1             {v18.8b}, [TMP1]
+    add             v20.8b,   v20.8b,   v0.8b
+    add             v21.8b,   v21.8b,   v0.8b
+    st1             {v19.8b}, [TMP2]
+    ldp             TMP1,     TMP2,     [OUTPUT_BUF], 16
+    ldp             TMP3,     TMP4,     [OUTPUT_BUF]
+    add             TMP1,     TMP1,     OUTPUT_COL
+    add             TMP2,     TMP2,     OUTPUT_COL
+    add             TMP3,     TMP3,     OUTPUT_COL
+    add             TMP4,     TMP4,     OUTPUT_COL
+    transpose_single v22, v23, v3, .2d, .8b
+    st1             {v20.8b}, [TMP1]
+    add             v22.8b,   v22.8b,   v0.8b
+    add             v23.8b,   v23.8b,   v0.8b
+    st1             {v21.8b}, [TMP2]
+    st1             {v22.8b}, [TMP3]
+    st1             {v23.8b}, [TMP4]
+    ldr             x15, [sp], 16
+    ld1             {v0.8b - v3.8b}, [sp], 32
+    ld1             {v4.8b - v7.8b}, [sp], 32
+    ld1             {v8.8b - v11.8b}, [sp], 32
+    ld1             {v12.8b - v15.8b}, [sp], 32
+    ld1             {v16.8b - v19.8b}, [sp], 32
+    ld1             {v20.8b - v23.8b}, [sp], 32
+    ld1             {v24.8b - v27.8b}, [sp], 32
+    ld1             {v28.8b - v31.8b}, [sp], 32
+    blr             x30
+
+3:  /* Left 4x8 half is done, right 4x8 half contains mostly zeros */
+
+    /* Transpose left 4x8 half */
+    transpose       ROW6L, ROW7L, v3, .16b, .4h
+    transpose       ROW2L, ROW3L, v3, .16b, .4h
+    transpose       ROW0L, ROW1L, v3, .16b, .4h
+    transpose       ROW4L, ROW5L, v3, .16b, .4h
+    shl             ROW0R.4h, ROW0R.4h, #2 /* PASS1_BITS */
+    transpose       ROW1L, ROW3L, v3, .16b, .2s
+    transpose       ROW4L, ROW6L, v3, .16b, .2s
+    transpose       ROW0L, ROW2L, v3, .16b, .2s
+    transpose       ROW5L, ROW7L, v3, .16b, .2s
+    cmp             x0, #0
+    beq             4f /* Right 4x8 half has all zeros, go to 'sparse' second pass */
+
+    /* Only row 0 is non-zero for the right 4x8 half  */
+    dup             ROW1R.4h, ROW0R.4h[1]
+    dup             ROW2R.4h, ROW0R.4h[2]
+    dup             ROW3R.4h, ROW0R.4h[3]
+    dup             ROW4R.4h, ROW0R.4h[0]
+    dup             ROW5R.4h, ROW0R.4h[1]
+    dup             ROW6R.4h, ROW0R.4h[2]
+    dup             ROW7R.4h, ROW0R.4h[3]
+    dup             ROW0R.4h, ROW0R.4h[0]
+    b               1b /* Go to 'normal' second pass */
+
+4:  /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), left 4x8 half */
+    ld1             {v2.4h},  [x15]    /* reload constants */
+    smull           v12.4s,   ROW1L.4h, XFIX_1_175875602
+    smlal           v12.4s,   ROW3L.4h, XFIX_1_175875602_MINUS_1_961570560
+    smull           v14.4s,   ROW3L.4h, XFIX_1_175875602
+    smlal           v14.4s,   ROW1L.4h, XFIX_1_175875602_MINUS_0_390180644
+    smull           v4.4s,    ROW2L.4h, XFIX_0_541196100
+    sshll           v6.4s,    ROW0L.4h, #13
+    mov             v8.16b,   v12.16b
+    smlal           v12.4s,   ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447
+    smlsl           v8.4s,    ROW1L.4h, XFIX_0_899976223
+    add             v2.4s,    v6.4s,    v4.4s
+    mov             v10.16b,  v14.16b
+    smlal           v14.4s,   ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223
+    add             v2.4s,    v2.4s,    v12.4s
+    add             v12.4s,   v12.4s,   v12.4s
+    smlsl           v10.4s,   ROW3L.4h, XFIX_2_562915447
+    shrn            ROW1L.4h, v2.4s,    #16
+    sub             v2.4s,    v2.4s,    v12.4s
+    smull           v12.4s,   ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865
+    sub             v6.4s,    v6.4s,    v4.4s
+    shrn            ROW2R.4h, v2.4s,    #16 /* ROW6L.4h <-> ROW2R.4h */
+    add             v2.4s,    v6.4s,    v10.4s
+    sub             v6.4s,    v6.4s,    v10.4s
+    sshll           v10.4s,   ROW0L.4h, #13
+    shrn            ROW2L.4h, v2.4s,    #16
+    shrn            ROW1R.4h, v6.4s,    #16 /* ROW5L.4h <-> ROW1R.4h */
+    add             v4.4s,    v10.4s,   v12.4s
+    sub             v2.4s,    v10.4s,   v12.4s
+    add             v12.4s,   v4.4s,    v14.4s
+    sub             v4.4s,    v4.4s,    v14.4s
+    add             v10.4s,   v2.4s,    v8.4s
+    sub             v6.4s,    v2.4s,    v8.4s
+    shrn            ROW3R.4h, v4.4s,    #16 /* ROW7L.4h <-> ROW3R.4h */
+    shrn            ROW3L.4h, v10.4s,   #16
+    shrn            ROW0L.4h, v12.4s,   #16
+    shrn            ROW0R.4h, v6.4s,    #16 /* ROW4L.4h <-> ROW0R.4h */
+    /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), right 4x8 half */
+    ld1             {v2.4h},  [x15]    /* reload constants */
+    smull           v12.4s,   ROW5L.4h, XFIX_1_175875602
+    smlal           v12.4s,   ROW7L.4h, XFIX_1_175875602_MINUS_1_961570560
+    smull           v14.4s,   ROW7L.4h, XFIX_1_175875602
+    smlal           v14.4s,   ROW5L.4h, XFIX_1_175875602_MINUS_0_390180644
+    smull           v4.4s,    ROW6L.4h, XFIX_0_541196100
+    sshll           v6.4s,    ROW4L.4h, #13
+    mov             v8.16b,   v12.16b
+    smlal           v12.4s,   ROW7L.4h, XFIX_3_072711026_MINUS_2_562915447
+    smlsl           v8.4s,    ROW5L.4h, XFIX_0_899976223
+    add             v2.4s,    v6.4s,    v4.4s
+    mov             v10.16b,  v14.16b
+    smlal           v14.4s,   ROW5L.4h, XFIX_1_501321110_MINUS_0_899976223
+    add             v2.4s,    v2.4s,    v12.4s
+    add             v12.4s,   v12.4s,   v12.4s
+    smlsl           v10.4s,   ROW7L.4h, XFIX_2_562915447
+    shrn            ROW5L.4h, v2.4s,    #16 /* ROW5L.4h <-> ROW1R.4h */
+    sub             v2.4s,    v2.4s,    v12.4s
+    smull           v12.4s,   ROW6L.4h, XFIX_0_541196100_PLUS_0_765366865
+    sub             v6.4s,    v6.4s,    v4.4s
+    shrn            ROW6R.4h, v2.4s,    #16
+    add             v2.4s,    v6.4s,    v10.4s
+    sub             v6.4s,    v6.4s,    v10.4s
+    sshll           v10.4s,   ROW4L.4h, #13
+    shrn            ROW6L.4h, v2.4s,    #16 /* ROW6L.4h <-> ROW2R.4h */
+    shrn            ROW5R.4h, v6.4s,    #16
+    add             v4.4s,    v10.4s,   v12.4s
+    sub             v2.4s,    v10.4s,   v12.4s
+    add             v12.4s,   v4.4s,    v14.4s
+    sub             v4.4s,    v4.4s,    v14.4s
+    add             v10.4s,   v2.4s,    v8.4s
+    sub             v6.4s,    v2.4s,    v8.4s
+    shrn            ROW7R.4h, v4.4s,    #16
+    shrn            ROW7L.4h, v10.4s,   #16 /* ROW7L.4h <-> ROW3R.4h */
+    shrn            ROW4L.4h, v12.4s,   #16 /* ROW4L.4h <-> ROW0R.4h */
+    shrn            ROW4R.4h, v6.4s,    #16
+    b               2b /* Go to epilogue */
+
+    .unreq          DCT_TABLE
+    .unreq          COEF_BLOCK
+    .unreq          OUTPUT_BUF
+    .unreq          OUTPUT_COL
+    .unreq          TMP1
+    .unreq          TMP2
+    .unreq          TMP3
+    .unreq          TMP4
+
+    .unreq          ROW0L
+    .unreq          ROW0R
+    .unreq          ROW1L
+    .unreq          ROW1R
+    .unreq          ROW2L
+    .unreq          ROW2R
+    .unreq          ROW3L
+    .unreq          ROW3R
+    .unreq          ROW4L
+    .unreq          ROW4R
+    .unreq          ROW5L
+    .unreq          ROW5R
+    .unreq          ROW6L
+    .unreq          ROW6R
+    .unreq          ROW7L
+    .unreq          ROW7R
+.endfunc
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_idct_ifast_neon
+ *
+ * This function contains a fast, not so accurate integer implementation of
+ * the inverse DCT (Discrete Cosine Transform). It uses the same calculations
+ * and produces exactly the same output as IJG's original 'jpeg_idct_ifast'
+ * function from jidctfst.c
+ *
+ * Normally 1-D AAN DCT needs 5 multiplications and 29 additions.
+ * But in ARM NEON case some extra additions are required because VQDMULH
+ * instruction can't handle the constants larger than 1. So the expressions
+ * like "x * 1.082392200" have to be converted to "x * 0.082392200 + x",
+ * which introduces an extra addition. Overall, there are 6 extra additions
+ * per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions.
+ */
+
+#define XFIX_1_082392200 v0.4h[0]
+#define XFIX_1_414213562 v0.4h[1]
+#define XFIX_1_847759065 v0.4h[2]
+#define XFIX_2_613125930 v0.4h[3]
+
+.balign 16
+jsimd_idct_ifast_neon_consts:
+    .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */
+    .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */
+    .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
+    .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */
+
+asm_function jsimd_idct_ifast_neon
+
+    DCT_TABLE       .req x0
+    COEF_BLOCK      .req x1
+    OUTPUT_BUF      .req x2
+    OUTPUT_COL      .req x3
+    TMP1            .req x0
+    TMP2            .req x1
+    TMP3            .req x2
+    TMP4            .req x22
+    TMP5            .req x23
+
+    /* Load and dequantize coefficients into NEON registers
+     * with the following allocation:
+     *       0 1 2 3 | 4 5 6 7
+     *      ---------+--------
+     *   0 | d16     | d17     ( v8.8h  )
+     *   1 | d18     | d19     ( v9.8h  )
+     *   2 | d20     | d21     ( v10.8h )
+     *   3 | d22     | d23     ( v11.8h )
+     *   4 | d24     | d25     ( v12.8h )
+     *   5 | d26     | d27     ( v13.8h )
+     *   6 | d28     | d29     ( v14.8h )
+     *   7 | d30     | d31     ( v15.8h )
+     */
+    /* Save NEON registers used in fast IDCT */
+    sub             sp, sp, #176
+    stp             x22, x23, [sp], 16
+    adr             x23, jsimd_idct_ifast_neon_consts
+    st1             {v0.8b - v3.8b}, [sp], 32
+    st1             {v4.8b - v7.8b}, [sp], 32
+    st1             {v8.8b - v11.8b}, [sp], 32
+    st1             {v12.8b - v15.8b}, [sp], 32
+    st1             {v16.8b - v19.8b}, [sp], 32
+    ld1             {v8.8h, v9.8h}, [COEF_BLOCK], 32
+    ld1             {v0.8h, v1.8h}, [DCT_TABLE], 32
+    ld1             {v10.8h, v11.8h}, [COEF_BLOCK], 32
+    mul             v8.8h,  v8.8h,  v0.8h
+    ld1             {v2.8h, v3.8h}, [DCT_TABLE], 32
+    mul             v9.8h,  v9.8h,  v1.8h
+    ld1             {v12.8h, v13.8h}, [COEF_BLOCK], 32
+    mul             v10.8h, v10.8h, v2.8h
+    ld1             {v0.8h, v1.8h}, [DCT_TABLE], 32
+    mul             v11.8h, v11.8h, v3.8h
+    ld1             {v14.8h, v15.8h}, [COEF_BLOCK], 32
+    mul             v12.8h, v12.8h, v0.8h
+    ld1             {v2.8h, v3.8h}, [DCT_TABLE], 32
+    mul             v14.8h, v14.8h, v2.8h
+    mul             v13.8h, v13.8h, v1.8h
+    ld1             {v0.4h}, [x23]      /* load constants */
+    mul             v15.8h, v15.8h, v3.8h
+
+    /* 1-D IDCT, pass 1 */
+    sub             v2.8h,    v10.8h,   v14.8h
+    add             v14.8h,   v10.8h,   v14.8h
+    sub             v1.8h,    v11.8h,   v13.8h
+    add             v13.8h,   v11.8h,   v13.8h
+    sub             v5.8h,    v9.8h,    v15.8h
+    add             v15.8h,   v9.8h,    v15.8h
+    sqdmulh         v4.8h,    v2.8h,    XFIX_1_414213562
+    sqdmulh         v6.8h,    v1.8h,    XFIX_2_613125930
+    add             v3.8h,    v1.8h,    v1.8h
+    sub             v1.8h,    v5.8h,    v1.8h
+    add             v10.8h,   v2.8h,    v4.8h
+    sqdmulh         v4.8h,    v1.8h,    XFIX_1_847759065
+    sub             v2.8h,    v15.8h,   v13.8h
+    add             v3.8h,    v3.8h,    v6.8h
+    sqdmulh         v6.8h,    v2.8h,    XFIX_1_414213562
+    add             v1.8h,    v1.8h,    v4.8h
+    sqdmulh         v4.8h,    v5.8h,    XFIX_1_082392200
+    sub             v10.8h,   v10.8h,   v14.8h
+    add             v2.8h,    v2.8h,    v6.8h
+    sub             v6.8h,    v8.8h,    v12.8h
+    add             v12.8h,   v8.8h,    v12.8h
+    add             v9.8h,    v5.8h,    v4.8h
+    add             v5.8h,    v6.8h,    v10.8h
+    sub             v10.8h,   v6.8h,    v10.8h
+    add             v6.8h,    v15.8h,   v13.8h
+    add             v8.8h,    v12.8h,   v14.8h
+    sub             v3.8h,    v6.8h,    v3.8h
+    sub             v12.8h,   v12.8h,   v14.8h
+    sub             v3.8h,    v3.8h,    v1.8h
+    sub             v1.8h,    v9.8h,    v1.8h
+    add             v2.8h,    v3.8h,    v2.8h
+    sub             v15.8h,   v8.8h,    v6.8h
+    add             v1.8h,    v1.8h,    v2.8h
+    add             v8.8h,    v8.8h,    v6.8h
+    add             v14.8h,   v5.8h,    v3.8h
+    sub             v9.8h,    v5.8h,    v3.8h
+    sub             v13.8h,   v10.8h,   v2.8h
+    add             v10.8h,   v10.8h,   v2.8h
+    /* Transpose  q8-q9 */
+    mov             v18.16b,  v8.16b
+    trn1            v8.8h,    v8.8h,    v9.8h
+    trn2            v9.8h,    v18.8h,   v9.8h
+    sub             v11.8h,   v12.8h,   v1.8h
+    /* Transpose  q14-q15 */
+    mov             v18.16b,  v14.16b
+    trn1            v14.8h,   v14.8h,   v15.8h
+    trn2            v15.8h,   v18.8h,   v15.8h
+    add             v12.8h,   v12.8h,   v1.8h
+    /* Transpose  q10-q11 */
+    mov             v18.16b,  v10.16b
+    trn1            v10.8h,   v10.8h,   v11.8h
+    trn2            v11.8h,   v18.8h,   v11.8h
+    /* Transpose  q12-q13 */
+    mov             v18.16b,  v12.16b
+    trn1            v12.8h,   v12.8h,   v13.8h
+    trn2            v13.8h,   v18.8h,   v13.8h
+    /* Transpose  q9-q11 */
+    mov             v18.16b,  v9.16b
+    trn1            v9.4s,    v9.4s,    v11.4s
+    trn2            v11.4s,   v18.4s,   v11.4s
+    /* Transpose  q12-q14 */
+    mov             v18.16b,  v12.16b
+    trn1            v12.4s,   v12.4s,   v14.4s
+    trn2            v14.4s,   v18.4s,   v14.4s
+    /* Transpose  q8-q10 */
+    mov             v18.16b,  v8.16b
+    trn1            v8.4s,    v8.4s,    v10.4s
+    trn2            v10.4s,   v18.4s,   v10.4s
+    /* Transpose  q13-q15 */
+    mov             v18.16b,  v13.16b
+    trn1            v13.4s,   v13.4s,   v15.4s
+    trn2            v15.4s,   v18.4s,   v15.4s
+    /* vswp            v14.4h,   v10-MSB.4h */
+    umov            x22, v14.d[0]
+    ins             v14.2d[0], v10.2d[1]
+    ins             v10.2d[1], x22
+    /* vswp            v13.4h,   v9MSB.4h */
+
+    umov            x22, v13.d[0]
+    ins             v13.2d[0], v9.2d[1]
+    ins             v9.2d[1], x22
+    /* 1-D IDCT, pass 2 */
+    sub             v2.8h,    v10.8h,   v14.8h
+    /* vswp            v15.4h,   v11MSB.4h */
+    umov            x22, v15.d[0]
+    ins             v15.2d[0], v11.2d[1]
+    ins             v11.2d[1], x22
+    add             v14.8h,   v10.8h,   v14.8h
+    /* vswp            v12.4h,   v8-MSB.4h */
+    umov            x22, v12.d[0]
+    ins             v12.2d[0], v8.2d[1]
+    ins             v8.2d[1], x22
+    sub             v1.8h,    v11.8h,   v13.8h
+    add             v13.8h,   v11.8h,   v13.8h
+    sub             v5.8h,    v9.8h,    v15.8h
+    add             v15.8h,   v9.8h,    v15.8h
+    sqdmulh         v4.8h,    v2.8h,    XFIX_1_414213562
+    sqdmulh         v6.8h,    v1.8h,    XFIX_2_613125930
+    add             v3.8h,    v1.8h,    v1.8h
+    sub             v1.8h,    v5.8h,    v1.8h
+    add             v10.8h,   v2.8h,    v4.8h
+    sqdmulh         v4.8h,    v1.8h,    XFIX_1_847759065
+    sub             v2.8h,    v15.8h,   v13.8h
+    add             v3.8h,    v3.8h,    v6.8h
+    sqdmulh         v6.8h,    v2.8h,    XFIX_1_414213562
+    add             v1.8h,    v1.8h,    v4.8h
+    sqdmulh         v4.8h,    v5.8h,    XFIX_1_082392200
+    sub             v10.8h,   v10.8h,   v14.8h
+    add             v2.8h,    v2.8h,    v6.8h
+    sub             v6.8h,    v8.8h,    v12.8h
+    add             v12.8h,   v8.8h,    v12.8h
+    add             v9.8h,    v5.8h,    v4.8h
+    add             v5.8h,    v6.8h,    v10.8h
+    sub             v10.8h,   v6.8h,    v10.8h
+    add             v6.8h,    v15.8h,   v13.8h
+    add             v8.8h,    v12.8h,   v14.8h
+    sub             v3.8h,    v6.8h,    v3.8h
+    sub             v12.8h,   v12.8h,   v14.8h
+    sub             v3.8h,    v3.8h,    v1.8h
+    sub             v1.8h,    v9.8h,    v1.8h
+    add             v2.8h,    v3.8h,    v2.8h
+    sub             v15.8h,   v8.8h,    v6.8h
+    add             v1.8h,    v1.8h,    v2.8h
+    add             v8.8h,    v8.8h,    v6.8h
+    add             v14.8h,   v5.8h,    v3.8h
+    sub             v9.8h,    v5.8h,    v3.8h
+    sub             v13.8h,   v10.8h,   v2.8h
+    add             v10.8h,   v10.8h,   v2.8h
+    sub             v11.8h,   v12.8h,   v1.8h
+    add             v12.8h,   v12.8h,   v1.8h
+    /* Descale to 8-bit and range limit */
+    movi            v0.16b,   #0x80
+    sqshrn          v8.8b,    v8.8h,    #5
+    sqshrn2         v8.16b,   v9.8h,    #5
+    sqshrn          v9.8b,    v10.8h,   #5
+    sqshrn2         v9.16b,   v11.8h,   #5
+    sqshrn          v10.8b,   v12.8h,   #5
+    sqshrn2         v10.16b,  v13.8h,   #5
+    sqshrn          v11.8b,   v14.8h,   #5
+    sqshrn2         v11.16b,  v15.8h,   #5
+    add             v8.16b,   v8.16b,   v0.16b
+    add             v9.16b,   v9.16b,   v0.16b
+    add             v10.16b,  v10.16b,  v0.16b
+    add             v11.16b,  v11.16b,  v0.16b
+    /* Transpose the final 8-bit samples */
+    /* Transpose  q8-q9 */
+    mov             v18.16b,  v8.16b
+    trn1            v8.8h,    v8.8h,    v9.8h
+    trn2            v9.8h,    v18.8h,   v9.8h
+    /* Transpose  q10-q11 */
+    mov             v18.16b,  v10.16b
+    trn1            v10.8h,   v10.8h,   v11.8h
+    trn2            v11.8h,   v18.8h,   v11.8h
+    /* Transpose  q8-q10 */
+    mov             v18.16b,  v8.16b
+    trn1            v8.4s,    v8.4s,    v10.4s
+    trn2            v10.4s,   v18.4s,   v10.4s
+    /* Transpose  q9-q11 */
+    mov             v18.16b,  v9.16b
+    trn1            v9.4s,    v9.4s,    v11.4s
+    trn2            v11.4s,   v18.4s,   v11.4s
+    /* make copy */
+    ins             v17.2d[0], v8.2d[1]
+    /* Transpose  d16-d17-msb */
+    mov             v18.16b,  v8.16b
+    trn1            v8.8b,    v8.8b,    v17.8b
+    trn2            v17.8b,   v18.8b,   v17.8b
+    /* make copy */
+    ins             v19.2d[0], v9.2d[1]
+    mov             v18.16b,  v9.16b
+    trn1            v9.8b,    v9.8b,    v19.8b
+    trn2            v19.8b,   v18.8b,   v19.8b
+    /* Store results to the output buffer */
+    ldp             TMP1,     TMP2,     [OUTPUT_BUF], 16
+    add             TMP1,     TMP1,     OUTPUT_COL
+    add             TMP2,     TMP2,     OUTPUT_COL
+    st1             {v8.8b},  [TMP1]
+    st1             {v17.8b}, [TMP2]
+    ldp             TMP1,     TMP2,     [OUTPUT_BUF], 16
+    add             TMP1,     TMP1,     OUTPUT_COL
+    add             TMP2,     TMP2,     OUTPUT_COL
+    st1             {v9.8b},  [TMP1]
+    /* make copy */
+    ins             v7.2d[0], v10.2d[1]
+    mov             v18.16b,  v10.16b
+    trn1            v10.8b,   v10.8b,   v7.8b
+    trn2            v7.8b,    v18.8b,   v7.8b
+    st1             {v19.8b}, [TMP2]
+    ldp             TMP1,     TMP2,     [OUTPUT_BUF], 16
+    ldp             TMP4,     TMP5,     [OUTPUT_BUF], 16
+    add             TMP1,     TMP1,     OUTPUT_COL
+    add             TMP2,     TMP2,     OUTPUT_COL
+    add             TMP4,     TMP4,     OUTPUT_COL
+    add             TMP5,     TMP5,     OUTPUT_COL
+    st1             {v10.8b}, [TMP1]
+    /* make copy */
+    ins             v16.2d[0], v11.2d[1]
+    mov             v18.16b,  v11.16b
+    trn1            v11.8b,   v11.8b,   v16.8b
+    trn2            v16.8b,   v18.8b,   v16.8b
+    st1             {v7.8b},  [TMP2]
+    st1             {v11.8b}, [TMP4]
+    st1             {v16.8b}, [TMP5]
+    sub             sp, sp, #176
+    ldp             x22, x23, [sp], 16
+    ld1             {v0.8b - v3.8b}, [sp], 32
+    ld1             {v4.8b - v7.8b}, [sp], 32
+    ld1             {v8.8b - v11.8b}, [sp], 32
+    ld1             {v12.8b - v15.8b}, [sp], 32
+    ld1             {v16.8b - v19.8b}, [sp], 32
+    blr             x30
+
+    .unreq          DCT_TABLE
+    .unreq          COEF_BLOCK
+    .unreq          OUTPUT_BUF
+    .unreq          OUTPUT_COL
+    .unreq          TMP1
+    .unreq          TMP2
+    .unreq          TMP3
+    .unreq          TMP4
+.endfunc
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_idct_4x4_neon
+ *
+ * This function contains inverse-DCT code for getting reduced-size
+ * 4x4 pixels output from an 8x8 DCT block. It uses the same  calculations
+ * and produces exactly the same output as IJG's original 'jpeg_idct_4x4'
+ * function from jpeg-6b (jidctred.c).
+ *
+ * NOTE: jpeg-8 has an improved implementation of 4x4 inverse-DCT, which
+ *       requires much less arithmetic operations and hence should be faster.
+ *       The primary purpose of this particular NEON optimized function is
+ *       bit exact compatibility with jpeg-6b.
+ *
+ * TODO: a bit better instructions scheduling can be achieved by expanding
+ *       idct_helper/transpose_4x4 macros and reordering instructions,
+ *       but readability will suffer somewhat.
+ */
+
+#define CONST_BITS  13
+
+#define FIX_0_211164243  (1730)  /* FIX(0.211164243) */
+#define FIX_0_509795579  (4176)  /* FIX(0.509795579) */
+#define FIX_0_601344887  (4926)  /* FIX(0.601344887) */
+#define FIX_0_720959822  (5906)  /* FIX(0.720959822) */
+#define FIX_0_765366865  (6270)  /* FIX(0.765366865) */
+#define FIX_0_850430095  (6967)  /* FIX(0.850430095) */
+#define FIX_0_899976223  (7373)  /* FIX(0.899976223) */
+#define FIX_1_061594337  (8697)  /* FIX(1.061594337) */
+#define FIX_1_272758580  (10426) /* FIX(1.272758580) */
+#define FIX_1_451774981  (11893) /* FIX(1.451774981) */
+#define FIX_1_847759065  (15137) /* FIX(1.847759065) */
+#define FIX_2_172734803  (17799) /* FIX(2.172734803) */
+#define FIX_2_562915447  (20995) /* FIX(2.562915447) */
+#define FIX_3_624509785  (29692) /* FIX(3.624509785) */
+
+.balign 16
+jsimd_idct_4x4_neon_consts:
+    .short     FIX_1_847759065     /* v0.4h[0] */
+    .short     -FIX_0_765366865    /* v0.4h[1] */
+    .short     -FIX_0_211164243    /* v0.4h[2] */
+    .short     FIX_1_451774981     /* v0.4h[3] */
+    .short     -FIX_2_172734803    /* d1[0] */
+    .short     FIX_1_061594337     /* d1[1] */
+    .short     -FIX_0_509795579    /* d1[2] */
+    .short     -FIX_0_601344887    /* d1[3] */
+    .short     FIX_0_899976223     /* v2.4h[0] */
+    .short     FIX_2_562915447     /* v2.4h[1] */
+    .short     1 << (CONST_BITS+1) /* v2.4h[2] */
+    .short     0                   /* v2.4h[3] */
+
+.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29
+    smull           v28.4s, \x4,    v2.4h[2]
+    smlal           v28.4s, \x8,    v0.4h[0]
+    smlal           v28.4s, \x14,   v0.4h[1]
+
+    smull           v26.4s, \x16,   v1.4h[2]
+    smlal           v26.4s, \x12,   v1.4h[3]
+    smlal           v26.4s, \x10,   v2.4h[0]
+    smlal           v26.4s, \x6,    v2.4h[1]
+
+    smull           v30.4s, \x4,    v2.4h[2]
+    smlsl           v30.4s, \x8,    v0.4h[0]
+    smlsl           v30.4s, \x14,   v0.4h[1]
+
+    smull           v24.4s, \x16,   v0.4h[2]
+    smlal           v24.4s, \x12,   v0.4h[3]
+    smlal           v24.4s, \x10,   v1.4h[0]
+    smlal           v24.4s, \x6,    v1.4h[1]
+
+    add             v20.4s, v28.4s, v26.4s
+    sub             v28.4s, v28.4s, v26.4s
+
+.if \shift > 16
+    srshr           v20.4s, v20.4s, #\shift
+    srshr           v28.4s, v28.4s, #\shift
+    xtn             \y26,   v20.4s
+    xtn             \y29,   v28.4s
+.else
+    rshrn           \y26,   v20.4s, #\shift
+    rshrn           \y29,   v28.4s, #\shift
+.endif
+
+    add             v20.4s, v30.4s, v24.4s
+    sub             v30.4s, v30.4s, v24.4s
+
+.if \shift > 16
+    srshr           v20.4s, v20.4s, #\shift
+    srshr           v30.4s, v30.4s, #\shift
+    xtn             \y27,   v20.4s
+    xtn             \y28,   v30.4s
+.else
+    rshrn           \y27,   v20.4s, #\shift
+    rshrn           \y28,   v30.4s, #\shift
+.endif
+
+.endm
+
+asm_function jsimd_idct_4x4_neon
+
+    DCT_TABLE       .req x0
+    COEF_BLOCK      .req x1
+    OUTPUT_BUF      .req x2
+    OUTPUT_COL      .req x3
+    TMP1            .req x0
+    TMP2            .req x1
+    TMP3            .req x2
+    TMP4            .req x15
+
+    /* Save all used NEON registers */
+    sub             sp, sp, 272
+    str             x15, [sp], 16
+    /* Load constants (v3.4h is just used for padding) */
+    adr             TMP4, jsimd_idct_4x4_neon_consts
+    st1             {v0.8b - v3.8b}, [sp], 32
+    st1             {v4.8b - v7.8b}, [sp], 32
+    st1             {v8.8b - v11.8b}, [sp], 32
+    st1             {v12.8b - v15.8b}, [sp], 32
+    st1             {v16.8b - v19.8b}, [sp], 32
+    st1             {v20.8b - v23.8b}, [sp], 32
+    st1             {v24.8b - v27.8b}, [sp], 32
+    st1             {v28.8b - v31.8b}, [sp], 32
+    ld1             {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4]
+
+    /* Load all COEF_BLOCK into NEON registers with the following allocation:
+     *       0 1 2 3 | 4 5 6 7
+     *      ---------+--------
+     *   0 | v4.4h   | v5.4h
+     *   1 | v6.4h   | v7.4h
+     *   2 | v8.4h   | v9.4h
+     *   3 | v10.4h  | v11.4h
+     *   4 | -       | -
+     *   5 | v12.4h  | v13.4h
+     *   6 | v14.4h  | v15.4h
+     *   7 | v16.4h  | v17.4h
+     */
+    ld1             {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32
+    ld1             {v8.4h, v9.4h, v10.4h, v11.4h}, [COEF_BLOCK], 32
+    add             COEF_BLOCK, COEF_BLOCK, #16
+    ld1             {v12.4h, v13.4h, v14.4h, v15.4h}, [COEF_BLOCK], 32
+    ld1             {v16.4h, v17.4h}, [COEF_BLOCK], 16
+    /* dequantize */
+    ld1             {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32
+    mul             v4.4h, v4.4h, v18.4h
+    mul             v5.4h, v5.4h, v19.4h
+    ins             v4.2d[1], v5.2d[0]    /* 128 bit q4 */
+    ld1             {v22.4h, v23.4h, v24.4h, v25.4h}, [DCT_TABLE], 32
+    mul             v6.4h, v6.4h, v20.4h
+    mul             v7.4h, v7.4h, v21.4h
+    ins             v6.2d[1], v7.2d[0]    /* 128 bit q6 */
+    mul             v8.4h, v8.4h, v22.4h
+    mul             v9.4h, v9.4h, v23.4h
+    ins             v8.2d[1], v9.2d[0]    /* 128 bit q8 */
+    add             DCT_TABLE, DCT_TABLE, #16
+    ld1             {v26.4h, v27.4h, v28.4h, v29.4h}, [DCT_TABLE], 32
+    mul             v10.4h, v10.4h, v24.4h
+    mul             v11.4h, v11.4h, v25.4h
+    ins             v10.2d[1], v11.2d[0]  /* 128 bit q10 */
+    mul             v12.4h, v12.4h, v26.4h
+    mul             v13.4h, v13.4h, v27.4h
+    ins             v12.2d[1], v13.2d[0]  /* 128 bit q12 */
+    ld1             {v30.8h}, [DCT_TABLE], 16
+    mul             v14.4h, v14.4h, v28.4h
+    mul             v15.4h, v15.4h, v29.4h
+    ins             v14.2d[1], v15.2d[0]  /* 128 bit q14 */
+    mul             v16.4h, v16.4h, v30.4h
+    mul             v17.4h, v17.4h, v31.4h
+    ins             v16.2d[1], v17.2d[0]  /* 128 bit q16 */
+
+    /* Pass 1 */
+    idct_helper     v4.4h, v6.4h, v8.4h, v10.4h, v12.4h, v14.4h, v16.4h, 12, v4.4h, v6.4h, v8.4h, v10.4h
+    transpose_4x4   v4, v6, v8, v10, v3
+    ins             v10.2d[1], v11.2d[0]
+    idct_helper     v5.4h, v7.4h, v9.4h, v11.4h, v13.4h, v15.4h, v17.4h, 12, v5.4h, v7.4h, v9.4h, v11.4h
+    transpose_4x4   v5, v7, v9, v11, v3
+    ins             v10.2d[1], v11.2d[0]
+    /* Pass 2 */
+    idct_helper     v4.4h, v6.4h, v8.4h, v10.4h, v7.4h, v9.4h, v11.4h, 19, v26.4h, v27.4h, v28.4h, v29.4h
+    transpose_4x4   v26, v27, v28, v29, v3
+
+    /* Range limit */
+    movi            v30.8h, #0x80
+    ins             v26.2d[1], v27.2d[0]
+    ins             v28.2d[1], v29.2d[0]
+    add             v26.8h, v26.8h, v30.8h
+    add             v28.8h, v28.8h, v30.8h
+    sqxtun          v26.8b, v26.8h
+    sqxtun          v27.8b, v28.8h
+
+    /* Store results to the output buffer */
+    ldp             TMP1, TMP2, [OUTPUT_BUF], 16
+    ldp             TMP3, TMP4, [OUTPUT_BUF]
+    add             TMP1, TMP1, OUTPUT_COL
+    add             TMP2, TMP2, OUTPUT_COL
+    add             TMP3, TMP3, OUTPUT_COL
+    add             TMP4, TMP4, OUTPUT_COL
+
+#if defined(__ARMEL__) && !RESPECT_STRICT_ALIGNMENT
+    /* We can use much less instructions on little endian systems if the
+     * OS kernel is not configured to trap unaligned memory accesses
+     */
+    st1             {v26.s}[0], [TMP1], 4
+    st1             {v27.s}[0], [TMP3], 4
+    st1             {v26.s}[1], [TMP2], 4
+    st1             {v27.s}[1], [TMP4], 4
+#else
+    st1             {v26.b}[0], [TMP1], 1
+    st1             {v27.b}[0], [TMP3], 1
+    st1             {v26.b}[1], [TMP1], 1
+    st1             {v27.b}[1], [TMP3], 1
+    st1             {v26.b}[2], [TMP1], 1
+    st1             {v27.b}[2], [TMP3], 1
+    st1             {v26.b}[3], [TMP1], 1
+    st1             {v27.b}[3], [TMP3], 1
+
+    st1             {v26.b}[4], [TMP2], 1
+    st1             {v27.b}[4], [TMP4], 1
+    st1             {v26.b}[5], [TMP2], 1
+    st1             {v27.b}[5], [TMP4], 1
+    st1             {v26.b}[6], [TMP2], 1
+    st1             {v27.b}[6], [TMP4], 1
+    st1             {v26.b}[7], [TMP2], 1
+    st1             {v27.b}[7], [TMP4], 1
+#endif
+
+    /* vpop            {v8.4h - v15.4h}    ;not available */
+    sub             sp, sp, #272
+    ldr             x15, [sp], 16
+    ld1             {v0.8b - v3.8b}, [sp], 32
+    ld1             {v4.8b - v7.8b}, [sp], 32
+    ld1             {v8.8b - v11.8b}, [sp], 32
+    ld1             {v12.8b - v15.8b}, [sp], 32
+    ld1             {v16.8b - v19.8b}, [sp], 32
+    ld1             {v20.8b - v23.8b}, [sp], 32
+    ld1             {v24.8b - v27.8b}, [sp], 32
+    ld1             {v28.8b - v31.8b}, [sp], 32
+    blr             x30
+
+    .unreq          DCT_TABLE
+    .unreq          COEF_BLOCK
+    .unreq          OUTPUT_BUF
+    .unreq          OUTPUT_COL
+    .unreq          TMP1
+    .unreq          TMP2
+    .unreq          TMP3
+    .unreq          TMP4
+.endfunc
+
+.purgem idct_helper
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_idct_2x2_neon
+ *
+ * This function contains inverse-DCT code for getting reduced-size
+ * 2x2 pixels output from an 8x8 DCT block. It uses the same  calculations
+ * and produces exactly the same output as IJG's original 'jpeg_idct_2x2'
+ * function from jpeg-6b (jidctred.c).
+ *
+ * NOTE: jpeg-8 has an improved implementation of 2x2 inverse-DCT, which
+ *       requires much less arithmetic operations and hence should be faster.
+ *       The primary purpose of this particular NEON optimized function is
+ *       bit exact compatibility with jpeg-6b.
+ */
+
+.balign 8
+jsimd_idct_2x2_neon_consts:
+    .short     -FIX_0_720959822    /* d0[0] */
+    .short     FIX_0_850430095     /* d0[1] */
+    .short     -FIX_1_272758580    /* d0[2] */
+    .short     FIX_3_624509785     /* d0[3] */
+
+.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27
+    sshll      v15.4s, \x4,    #15
+    smull      v26.4s, \x6,    v0.4h[3]
+    smlal      v26.4s, \x10,   v0.4h[2]
+    smlal      v26.4s, \x12,   v0.4h[1]
+    smlal      v26.4s, \x16,   v0.4h[0]
+
+    add        v20.4s, v15.4s, v26.4s
+    sub        v15.4s, v15.4s, v26.4s
+
+.if \shift > 16
+    srshr      v20.4s, v20.4s, #\shift
+    srshr      v15.4s, v15.4s, #\shift
+    xtn        \y26,   v20.4s
+    xtn        \y27,   v15.4s
+.else
+    rshrn      \y26,   v20.4s, #\shift
+    rshrn      \y27,   v15.4s, #\shift
+.endif
+
+.endm
+
+asm_function jsimd_idct_2x2_neon
+
+    DCT_TABLE       .req x0
+    COEF_BLOCK      .req x1
+    OUTPUT_BUF      .req x2
+    OUTPUT_COL      .req x3
+    TMP1            .req x0
+    TMP2            .req x15
+
+    /* vpush           {v8.4h - v15.4h}            ; not available */
+    sub             sp, sp, 208
+    str             x15, [sp], 16
+
+    /* Load constants */
+    adr             TMP2, jsimd_idct_2x2_neon_consts
+    st1             {v4.8b - v7.8b}, [sp], 32
+    st1             {v8.8b - v11.8b}, [sp], 32
+    st1             {v12.8b - v15.8b}, [sp], 32
+    st1             {v16.8b - v19.8b}, [sp], 32
+    st1             {v21.8b - v22.8b}, [sp], 16
+    st1             {v24.8b - v27.8b}, [sp], 32
+    st1             {v30.8b - v31.8b}, [sp], 16
+    ld1             {v14.4h}, [TMP2]
+
+    /* Load all COEF_BLOCK into NEON registers with the following allocation:
+     *       0 1 2 3 | 4 5 6 7
+     *      ---------+--------
+     *   0 | v4.4h   | v5.4h
+     *   1 | v6.4h   | v7.4h
+     *   2 | -       | -
+     *   3 | v10.4h  | v11.4h
+     *   4 | -       | -
+     *   5 | v12.4h  | v13.4h
+     *   6 | -       | -
+     *   7 | v16.4h  | v17.4h
+     */
+    ld1             {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32
+    add             COEF_BLOCK, COEF_BLOCK, #16
+    ld1             {v10.4h, v11.4h}, [COEF_BLOCK], 16
+    add             COEF_BLOCK, COEF_BLOCK, #16
+    ld1             {v12.4h, v13.4h}, [COEF_BLOCK], 16
+    add             COEF_BLOCK, COEF_BLOCK, #16
+    ld1             {v16.4h, v17.4h}, [COEF_BLOCK], 16
+    /* Dequantize */
+    ld1             {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32
+    mul             v4.8h, v4.8h, v18.8h
+    mul             v5.8h, v5.8h, v18.8h
+    ins             v4.2d[1], v5.2d[0]
+    mul             v6.8h, v6.8h, v20.8h
+    mul             v7.8h, v7.8h, v21.8h
+    ins             v6.2d[1], v7.2d[0]
+    add             DCT_TABLE, DCT_TABLE, #16
+    ld1             {v24.4h, v25.4h}, [DCT_TABLE], 16
+    mul             v10.8h, v10.8h, v24.8h
+    mul             v11.8h, v11.8h, v25.8h
+    ins             v10.2d[1], v11.2d[0]
+    add             DCT_TABLE, DCT_TABLE, #16
+    ld1             {v26.4h, v27.4h}, [DCT_TABLE], 16
+    mul             v12.8h, v12.8h, v26.8h
+    mul             v13.8h, v13.8h, v27.8h
+    ins             v12.2d[1], v13.2d[0]
+    add             DCT_TABLE, DCT_TABLE, #16
+    ld1             {v30.4h, v31.4h}, [DCT_TABLE], 16
+    mul             v16.8h, v16.8h, v30.8h
+    mul             v17.8h, v17.8h, v31.8h
+    ins             v16.2d[1], v17.2d[0]
+
+    /* Pass 1 */
+#if 0
+    idct_helper     v4.4h, v6.4h, v10.4h, v12.4h, v16.4h, 13, v4.4h, v6.4h
+    transpose_4x4   v4.4h, v6.4h, v8.4h,  v10.4h
+    idct_helper     v5.4h, v7.4h, v11.4h, v13.4h, v17.4h, 13, v5.4h, v7.4h
+    transpose_4x4   v5.4h, v7.4h, v9.4h,  v11.4h
+#else
+    smull           v26.4s, v6.4h,  v14.4h[3]
+    smlal           v26.4s, v10.4h, v14.4h[2]
+    smlal           v26.4s, v12.4h, v14.4h[1]
+    smlal           v26.4s, v16.4h, v14.4h[0]
+    smull           v24.4s, v7.4h,  v14.4h[3]
+    smlal           v24.4s, v11.4h, v14.4h[2]
+    smlal           v24.4s, v13.4h, v14.4h[1]
+    smlal           v24.4s, v17.4h, v14.4h[0]
+    sshll           v15.4s, v4.4h,  #15
+    sshll           v30.4s, v5.4h,  #15
+    add             v20.4s, v15.4s, v26.4s
+    sub             v15.4s, v15.4s, v26.4s
+    rshrn           v4.4h,  v20.4s, #13
+    rshrn           v6.4h,  v15.4s, #13
+    add             v20.4s, v30.4s, v24.4s
+    sub             v15.4s, v30.4s, v24.4s
+    rshrn           v5.4h,  v20.4s, #13
+    rshrn           v7.4h,  v15.4s, #13
+    transpose       v4, v6, v3, .16b, .8h
+    transpose       v6, v10, v3, .16b, .4s
+#endif
+
+    /* Pass 2 */
+    idct_helper     v4.4h, v6.4h, v10.4h, v7.4h, v11.4h, 20, v26.4h, v27.4h
+
+    /* Range limit */
+    movi            v30.8h, #0x80
+    ins             v26.2d[1], v27.2d[0]
+    add             v26.8h, v26.8h, v30.8h
+    sqxtun          v30.8b, v26.8h
+    ins             v26.2d[0], v30.2d[0]
+    sqxtun          v27.8b, v26.8h
+
+    /* Store results to the output buffer */
+    ldp             TMP1, TMP2, [OUTPUT_BUF]
+    add             TMP1, TMP1, OUTPUT_COL
+    add             TMP2, TMP2, OUTPUT_COL
+
+    st1             {v26.b}[0], [TMP1], 1
+    st1             {v27.b}[4], [TMP1], 1
+    st1             {v26.b}[1], [TMP2], 1
+    st1             {v27.b}[5], [TMP2], 1
+
+    sub             sp, sp, #208
+    ldr             x15, [sp], 16
+    ld1             {v4.8b - v7.8b}, [sp], 32
+    ld1             {v8.8b - v11.8b}, [sp], 32
+    ld1             {v12.8b - v15.8b}, [sp], 32
+    ld1             {v16.8b - v19.8b}, [sp], 32
+    ld1             {v21.8b - v22.8b}, [sp], 16
+    ld1             {v24.8b - v27.8b}, [sp], 32
+    ld1             {v30.8b - v31.8b}, [sp], 16
+    blr             x30
+
+    .unreq          DCT_TABLE
+    .unreq          COEF_BLOCK
+    .unreq          OUTPUT_BUF
+    .unreq          OUTPUT_COL
+    .unreq          TMP1
+    .unreq          TMP2
+.endfunc
+
+.purgem idct_helper
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_ycc_extrgb_convert_neon
+ * jsimd_ycc_extbgr_convert_neon
+ * jsimd_ycc_extrgbx_convert_neon
+ * jsimd_ycc_extbgrx_convert_neon
+ * jsimd_ycc_extxbgr_convert_neon
+ * jsimd_ycc_extxrgb_convert_neon
+ *
+ * Colorspace conversion YCbCr -> RGB
+ */
+
+
+.macro do_load size
+    .if \size == 8
+        ld1  {v4.8b}, [U], 8
+        ld1  {v5.8b}, [V], 8
+        ld1  {v0.8b}, [Y], 8
+        prfm PLDL1KEEP, [U, #64]
+        prfm PLDL1KEEP, [V, #64]
+        prfm PLDL1KEEP, [Y, #64]
+    .elseif \size == 4
+        ld1  {v4.b}[0], [U]
+        ld1  {v4.b}[1], [U]
+        ld1  {v4.b}[2], [U]
+        ld1  {v4.b}[3], [U]
+        ld1  {v5.b}[0], [V]
+        ld1  {v5.b}[1], [V], 1
+        ld1  {v5.b}[2], [V], 1
+        ld1  {v5.b}[3], [V], 1
+        ld1  {v0.b}[0], [Y], 1
+        ld1  {v0.b}[1], [Y], 1
+        ld1  {v0.b}[2], [Y], 1
+        ld1  {v0.b}[3], [Y], 1
+    .elseif \size == 2
+        ld1  {v4.b}[4], [U], 1
+        ld1  {v4.b}[5], [U], 1
+        ld1  {v5.b}[4], [V], 1
+        ld1  {v5.b}[5], [V], 1
+        ld1  {v0.b}[4], [Y], 1
+        ld1  {v0.b}[5], [Y], 1
+    .elseif \size == 1
+        ld1  {v4.b}[6], [U], 1
+        ld1  {v5.b}[6], [V], 1
+        ld1  {v0.b}[6], [Y], 1
+    .else
+        .error unsupported macroblock size
+    .endif
+.endm
+
+.macro do_store bpp, size
+    .if \bpp == 24
+        .if \size == 8
+            st3  {v10.8b, v11.8b, v12.8b}, [RGB], 24
+        .elseif \size == 4
+            st3  {v10.b, v11.b, v12.b}[0], [RGB], 3
+            st3  {v10.b, v11.b, v12.b}[1], [RGB], 3
+            st3  {v10.b, v11.b, v12.b}[2], [RGB], 3
+            st3  {v10.b, v11.b, v12.b}[3], [RGB], 3
+        .elseif \size == 2
+            st3  {v10.b, v11.b, v12.b}[4], [RGB], 3
+            st3  {v10.b, v11.b, v12.b}[4], [RGB], 3
+        .elseif \size == 1
+            st3  {v10.b, v11.b, v12.b}[6], [RGB], 3
+        .else
+            .error unsupported macroblock size
+        .endif
+    .elseif \bpp == 32
+        .if \size == 8
+            st4  {v10.8b, v11.8b, v12.8b, v13.8b}, [RGB], 32
+        .elseif \size == 4
+            st4  {v10.b, v11.b, v12.b, v13.b}[0], [RGB], 4
+            st4  {v10.b, v11.b, v12.b, v13.b}[1], [RGB], 4
+            st4  {v10.b, v11.b, v12.b, v13.b}[2], [RGB], 4
+            st4  {v10.b, v11.b, v12.b, v13.b}[3], [RGB], 4
+        .elseif \size == 2
+            st4  {v10.b, v11.b, v12.b, v13.b}[4], [RGB], 4
+            st4  {v10.b, v11.b, v12.b, v13.b}[5], [RGB], 4
+        .elseif \size == 1
+            st4  {v10.b, v11.b, v12.b, v13.b}[6], [RGB], 4
+        .else
+            .error unsupported macroblock size
+        .endif
+    .else
+        .error unsupported bpp
+    .endif
+.endm
+
+.macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, rsize, g_offs, gsize, b_offs, bsize, defsize
+
+/*
+ * 2-stage pipelined YCbCr->RGB conversion
+ */
+
+.macro do_yuv_to_rgb_stage1
+    uaddw        v6.8h, v2.8h, v4.8b     /* q3 = u - 128 */
+    uaddw        v8.8h, v2.8h, v5.8b     /* q2 = v - 128 */
+    smull        v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */
+    smlal        v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */
+    smull2       v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */
+    smlal2       v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */
+    smull        v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */
+    smull2       v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */
+    smull        v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */
+    smull2       v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */
+.endm
+
+.macro do_yuv_to_rgb_stage2
+    rshrn        v20.4h, v20.4s, #15
+    rshrn2       v20.8h, v22.4s, #15
+    rshrn        v24.4h, v24.4s, #14
+    rshrn2       v24.8h, v26.4s, #14
+    rshrn        v28.4h, v28.4s, #14
+    rshrn2       v28.8h, v30.4s, #14
+    uaddw        v20.8h, v20.8h, v0.8b
+    uaddw        v24.8h, v24.8h, v0.8b
+    uaddw        v28.8h, v28.8h, v0.8b
+    sqxtun       v1\g_offs\defsize, v20.8h
+    sqxtun       v1\r_offs\defsize, v24.8h
+    sqxtun       v1\b_offs\defsize, v28.8h
+
+.endm
+
+.macro do_yuv_to_rgb_stage2_store_load_stage1
+    ld1          {v4.8b}, [U], 8
+    rshrn        v20.4h, v20.4s, #15
+    rshrn2       v20.8h, v22.4s, #15
+    rshrn        v24.4h, v24.4s, #14
+    rshrn2       v24.8h, v26.4s, #14
+    rshrn        v28.4h, v28.4s, #14
+    ld1          {v5.8b}, [V], 8
+    rshrn2       v28.8h, v30.4s, #14
+    uaddw        v20.8h, v20.8h, v0.8b
+    uaddw        v24.8h, v24.8h, v0.8b
+    uaddw        v28.8h, v28.8h, v0.8b
+    sqxtun       v1\g_offs\defsize, v20.8h
+    ld1          {v0.8b}, [Y], 8
+    sqxtun       v1\r_offs\defsize, v24.8h
+    prfm         PLDL1KEEP, [U, #64]
+    prfm         PLDL1KEEP, [V, #64]
+    prfm         PLDL1KEEP, [Y, #64]
+    sqxtun       v1\b_offs\defsize, v28.8h
+    uaddw        v6.8h, v2.8h, v4.8b     /* v6.16b = u - 128 */
+    uaddw        v8.8h, v2.8h, v5.8b     /* q2 = v - 128 */
+    do_store     \bpp, 8
+    smull        v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */
+    smlal        v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */
+    smull2       v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */
+    smlal2       v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */
+    smull        v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */
+    smull2       v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */
+    smull        v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */
+    smull2       v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */
+.endm
+
+.macro do_yuv_to_rgb
+    do_yuv_to_rgb_stage1
+    do_yuv_to_rgb_stage2
+.endm
+
+/* Apple gas crashes on adrl, work around that by using adr.
+ * But this requires a copy of these constants for each function.
+ */
+
+.balign 16
+jsimd_ycc_\colorid\()_neon_consts:
+    .short          0,      0,     0,      0
+    .short          22971, -11277, -23401, 29033
+    .short          -128,  -128,   -128,   -128
+    .short          -128,  -128,   -128,   -128
+
+asm_function jsimd_ycc_\colorid\()_convert_neon
+    OUTPUT_WIDTH    .req x0
+    INPUT_BUF       .req x1
+    INPUT_ROW       .req x2
+    OUTPUT_BUF      .req x3
+    NUM_ROWS        .req x4
+
+    INPUT_BUF0      .req x5
+    INPUT_BUF1      .req x6
+    INPUT_BUF2      .req INPUT_BUF
+
+    RGB             .req x7
+    Y               .req x8
+    U               .req x9
+    V               .req x10
+    N               .req x15
+
+    sub             sp, sp, 336
+    str             x15, [sp], 16
+    /* Load constants to d1, d2, d3 (v0.4h is just used for padding) */
+    adr             x15, jsimd_ycc_\colorid\()_neon_consts
+    /* Save NEON registers */
+    st1             {v0.8b - v3.8b}, [sp], 32
+    st1             {v4.8b - v7.8b}, [sp], 32
+    st1             {v8.8b - v11.8b}, [sp], 32
+    st1             {v12.8b - v15.8b}, [sp], 32
+    st1             {v16.8b - v19.8b}, [sp], 32
+    st1             {v20.8b - v23.8b}, [sp], 32
+    st1             {v24.8b - v27.8b}, [sp], 32
+    st1             {v28.8b - v31.8b}, [sp], 32
+    ld1             {v0.4h, v1.4h}, [x15], 16
+    ld1             {v2.8h}, [x15]
+
+    /* Save ARM registers and handle input arguments */
+    /* push            {x4, x5, x6, x7, x8, x9, x10, x30} */
+    stp             x4, x5, [sp], 16
+    stp             x6, x7, [sp], 16
+    stp             x8, x9, [sp], 16
+    stp             x10, x30, [sp], 16
+    ldr             INPUT_BUF0, [INPUT_BUF]
+    ldr             INPUT_BUF1, [INPUT_BUF, 8]
+    ldr             INPUT_BUF2, [INPUT_BUF, 16]
+    .unreq          INPUT_BUF
+
+    /* Initially set v10, v11.4h, v12.8b, d13 to 0xFF */
+    movi            v10.16b, #255
+    movi            v12.16b, #255
+
+    /* Outer loop over scanlines */
+    cmp             NUM_ROWS, #1
+    blt             9f
+0:
+    lsl             x16, INPUT_ROW, #3
+    ldr             Y, [INPUT_BUF0, x16]
+    ldr             U, [INPUT_BUF1, x16]
+    mov             N, OUTPUT_WIDTH
+    ldr             V, [INPUT_BUF2, x16]
+    add             INPUT_ROW, INPUT_ROW, #1
+    ldr             RGB, [OUTPUT_BUF], #8
+
+    /* Inner loop over pixels */
+    subs            N, N, #8
+    blt             3f
+    do_load         8
+    do_yuv_to_rgb_stage1
+    subs            N, N, #8
+    blt             2f
+1:
+    do_yuv_to_rgb_stage2_store_load_stage1
+    subs            N, N, #8
+    bge             1b
+2:
+    do_yuv_to_rgb_stage2
+    do_store        \bpp, 8
+    tst             N, #7
+    beq             8f
+3:
+    tst             N, #4
+    beq             3f
+    do_load         4
+3:
+    tst             N, #2
+    beq             4f
+    do_load         2
+4:
+    tst             N, #1
+    beq             5f
+    do_load         1
+5:
+    do_yuv_to_rgb
+    tst             N, #4
+    beq             6f
+    do_store        \bpp, 4
+6:
+    tst             N, #2
+    beq             7f
+    do_store        \bpp, 2
+7:
+    tst             N, #1
+    beq             8f
+    do_store        \bpp, 1
+8:
+    subs            NUM_ROWS, NUM_ROWS, #1
+    bgt             0b
+9:
+    /* Restore all registers and return */
+    sub             sp, sp, #336
+    ldr             x15, [sp], 16
+    ld1             {v0.8b - v3.8b}, [sp], 32
+    ld1             {v4.8b - v7.8b}, [sp], 32
+    ld1             {v8.8b - v11.8b}, [sp], 32
+    ld1             {v12.8b - v15.8b}, [sp], 32
+    ld1             {v16.8b - v19.8b}, [sp], 32
+    ld1             {v20.8b - v23.8b}, [sp], 32
+    ld1             {v24.8b - v27.8b}, [sp], 32
+    ld1             {v28.8b - v31.8b}, [sp], 32
+    /* pop             {r4, r5, r6, r7, r8, r9, r10, pc} */
+    ldp             x4, x5, [sp], 16
+    ldp             x6, x7, [sp], 16
+    ldp             x8, x9, [sp], 16
+    ldp             x10, x30, [sp], 16
+    br              x30
+    .unreq          OUTPUT_WIDTH
+    .unreq          INPUT_ROW
+    .unreq          OUTPUT_BUF
+    .unreq          NUM_ROWS
+    .unreq          INPUT_BUF0
+    .unreq          INPUT_BUF1
+    .unreq          INPUT_BUF2
+    .unreq          RGB
+    .unreq          Y
+    .unreq          U
+    .unreq          V
+    .unreq          N
+.endfunc
+
+.purgem do_yuv_to_rgb
+.purgem do_yuv_to_rgb_stage1
+.purgem do_yuv_to_rgb_stage2
+.purgem do_yuv_to_rgb_stage2_store_load_stage1
+.endm
+
+/*--------------------------------- id ----- bpp R  rsize  G  gsize  B  bsize  defsize   */
+generate_jsimd_ycc_rgb_convert_neon extrgb,  24, 0, .4h,   1, .4h,   2, .4h,   .8b
+generate_jsimd_ycc_rgb_convert_neon extbgr,  24, 2, .4h,   1, .4h,   0, .4h,   .8b
+generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, .4h,   1, .4h,   2, .4h,   .8b
+generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, .4h,   1, .4h,   0, .4h,   .8b
+generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, .4h,   2, .4h,   1, .4h,   .8b
+generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, .4h,   2, .4h,   3, .4h,   .8b
+
+.purgem do_load
+.purgem do_store
diff --git a/simd/jsimd_i386.c b/simd/jsimd_i386.c
new file mode 100644
index 0000000..c173e74
--- /dev/null
+++ b/simd/jsimd_i386.c
@@ -0,0 +1,1048 @@
+/*
+ * jsimd_i386.c
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2009-2011, 2013 D. R. Commander
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ * This file contains the interface between the "normal" portions
+ * of the library and the SIMD implementations when running on a
+ * 32-bit x86 architecture.
+ */
+
+#define JPEG_INTERNALS
+#include "../jinclude.h"
+#include "../jpeglib.h"
+#include "../jsimd.h"
+#include "../jdct.h"
+#include "../jsimddct.h"
+#include "jsimd.h"
+
+/*
+ * In the PIC cases, we have no guarantee that constants will keep
+ * their alignment. This macro allows us to verify it at runtime.
+ */
+#define IS_ALIGNED(ptr, order) (((unsigned)ptr & ((1 << order) - 1)) == 0)
+
+#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
+
+static unsigned int simd_support = ~0;
+
+/*
+ * Check what SIMD accelerations are supported.
+ *
+ * FIXME: This code is racy under a multi-threaded environment.
+ */
+LOCAL(void)
+init_simd (void)
+{
+  char *env = NULL;
+
+  if (simd_support != ~0U)
+    return;
+
+  simd_support = jpeg_simd_cpu_support();
+
+  /* Force different settings through environment variables */
+  env = getenv("JSIMD_FORCEMMX");
+  if ((env != NULL) && (strcmp(env, "1") == 0))
+    simd_support &= JSIMD_MMX;
+  env = getenv("JSIMD_FORCE3DNOW");
+  if ((env != NULL) && (strcmp(env, "1") == 0))
+    simd_support &= JSIMD_3DNOW|JSIMD_MMX;
+  env = getenv("JSIMD_FORCESSE");
+  if ((env != NULL) && (strcmp(env, "1") == 0))
+    simd_support &= JSIMD_SSE|JSIMD_MMX;
+  env = getenv("JSIMD_FORCESSE2");
+  if ((env != NULL) && (strcmp(env, "1") == 0))
+    simd_support &= JSIMD_SSE2;
+  env = getenv("JSIMD_FORCENONE");
+  if ((env != NULL) && (strcmp(env, "1") == 0))
+    simd_support = 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_ycc (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_ycc_rgb (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
+                       JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                       JDIMENSION output_row, int num_rows)
+{
+  void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+  void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+
+  switch(cinfo->in_color_space) {
+    case JCS_EXT_RGB:
+      sse2fct=jsimd_extrgb_ycc_convert_sse2;
+      mmxfct=jsimd_extrgb_ycc_convert_mmx;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      sse2fct=jsimd_extrgbx_ycc_convert_sse2;
+      mmxfct=jsimd_extrgbx_ycc_convert_mmx;
+      break;
+    case JCS_EXT_BGR:
+      sse2fct=jsimd_extbgr_ycc_convert_sse2;
+      mmxfct=jsimd_extbgr_ycc_convert_mmx;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      sse2fct=jsimd_extbgrx_ycc_convert_sse2;
+      mmxfct=jsimd_extbgrx_ycc_convert_mmx;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      sse2fct=jsimd_extxbgr_ycc_convert_sse2;
+      mmxfct=jsimd_extxbgr_ycc_convert_mmx;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      sse2fct=jsimd_extxrgb_ycc_convert_sse2;
+      mmxfct=jsimd_extxrgb_ycc_convert_mmx;
+      break;
+    default:
+      sse2fct=jsimd_rgb_ycc_convert_sse2;
+      mmxfct=jsimd_rgb_ycc_convert_mmx;
+      break;
+  }
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
+    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
+  else if (simd_support & JSIMD_MMX)
+    mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
+}
+
+GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                        JDIMENSION output_row, int num_rows)
+{
+  void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+  void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+
+  switch(cinfo->in_color_space) {
+    case JCS_EXT_RGB:
+      sse2fct=jsimd_extrgb_gray_convert_sse2;
+      mmxfct=jsimd_extrgb_gray_convert_mmx;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      sse2fct=jsimd_extrgbx_gray_convert_sse2;
+      mmxfct=jsimd_extrgbx_gray_convert_mmx;
+      break;
+    case JCS_EXT_BGR:
+      sse2fct=jsimd_extbgr_gray_convert_sse2;
+      mmxfct=jsimd_extbgr_gray_convert_mmx;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      sse2fct=jsimd_extbgrx_gray_convert_sse2;
+      mmxfct=jsimd_extbgrx_gray_convert_mmx;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      sse2fct=jsimd_extxbgr_gray_convert_sse2;
+      mmxfct=jsimd_extxbgr_gray_convert_mmx;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      sse2fct=jsimd_extxrgb_gray_convert_sse2;
+      mmxfct=jsimd_extxrgb_gray_convert_mmx;
+      break;
+    default:
+      sse2fct=jsimd_rgb_gray_convert_sse2;
+      mmxfct=jsimd_rgb_gray_convert_mmx;
+      break;
+  }
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
+    sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
+  else if (simd_support & JSIMD_MMX)
+    mmxfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
+}
+
+GLOBAL(void)
+jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
+                       JSAMPIMAGE input_buf, JDIMENSION input_row,
+                       JSAMPARRAY output_buf, int num_rows)
+{
+  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
+  void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
+
+  switch(cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      sse2fct=jsimd_ycc_extrgb_convert_sse2;
+      mmxfct=jsimd_ycc_extrgb_convert_mmx;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      sse2fct=jsimd_ycc_extrgbx_convert_sse2;
+      mmxfct=jsimd_ycc_extrgbx_convert_mmx;
+      break;
+    case JCS_EXT_BGR:
+      sse2fct=jsimd_ycc_extbgr_convert_sse2;
+      mmxfct=jsimd_ycc_extbgr_convert_mmx;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      sse2fct=jsimd_ycc_extbgrx_convert_sse2;
+      mmxfct=jsimd_ycc_extbgrx_convert_mmx;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      sse2fct=jsimd_ycc_extxbgr_convert_sse2;
+      mmxfct=jsimd_ycc_extxbgr_convert_mmx;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      sse2fct=jsimd_ycc_extxrgb_convert_sse2;
+      mmxfct=jsimd_ycc_extxrgb_convert_mmx;
+      break;
+    default:
+      sse2fct=jsimd_ycc_rgb_convert_sse2;
+      mmxfct=jsimd_ycc_rgb_convert_mmx;
+      break;
+  }
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
+    sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
+  else if (simd_support & JSIMD_MMX)
+    mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_downsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_SSE2)
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_downsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_SSE2)
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  if (simd_support & JSIMD_SSE2)
+    jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
+                               compptr->v_samp_factor,
+                               compptr->width_in_blocks, input_data,
+                               output_data);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_h2v2_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
+                              compptr->v_samp_factor, compptr->width_in_blocks,
+                              input_data, output_data);
+}
+
+GLOBAL(void)
+jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  if (simd_support & JSIMD_SSE2)
+    jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
+                               compptr->v_samp_factor,
+                               compptr->width_in_blocks, input_data,
+                               output_data);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_h2v1_downsample_mmx(cinfo->image_width, cinfo->max_v_samp_factor,
+                              compptr->v_samp_factor, compptr->width_in_blocks,
+                              input_data, output_data);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_SSE2)
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_SSE2)
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr,
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+  if (simd_support & JSIMD_SSE2)
+    jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
+                             input_data, output_data_ptr);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_h2v2_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
+                            input_data, output_data_ptr);
+}
+
+GLOBAL(void)
+jsimd_h2v1_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr,
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+  if (simd_support & JSIMD_SSE2)
+    jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
+                             input_data, output_data_ptr);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_h2v1_upsample_mmx(cinfo->max_v_samp_factor, cinfo->output_width,
+                            input_data, output_data_ptr);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_fancy_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_fancy_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr,
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
+    jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
+                                   compptr->downsampled_width, input_data,
+                                   output_data_ptr);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_h2v2_fancy_upsample_mmx(cinfo->max_v_samp_factor,
+                                  compptr->downsampled_width, input_data,
+                                  output_data_ptr);
+}
+
+GLOBAL(void)
+jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr,
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
+    jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
+                                   compptr->downsampled_width, input_data,
+                                   output_data_ptr);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_h2v1_fancy_upsample_mmx(cinfo->max_v_samp_factor,
+                                  compptr->downsampled_width, input_data,
+                                  output_data_ptr);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_merged_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_merged_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
+  void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
+
+  switch(cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
+      mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
+      mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
+      break;
+    case JCS_EXT_BGR:
+      sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
+      mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
+      mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
+      mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
+      mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
+      break;
+    default:
+      sse2fct=jsimd_h2v2_merged_upsample_sse2;
+      mmxfct=jsimd_h2v2_merged_upsample_mmx;
+      break;
+  }
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
+    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
+  else if (simd_support & JSIMD_MMX)
+    mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
+}
+
+GLOBAL(void)
+jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
+  void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
+
+  switch(cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
+      mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
+      mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
+      break;
+    case JCS_EXT_BGR:
+      sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
+      mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
+      mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
+      mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
+      mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
+      break;
+    default:
+      sse2fct=jsimd_h2v1_merged_upsample_sse2;
+      mmxfct=jsimd_h2v1_merged_upsample_mmx;
+      break;
+  }
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
+    sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
+  else if (simd_support & JSIMD_MMX)
+    mmxfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
+}
+
+GLOBAL(int)
+jsimd_can_convsamp (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_SSE2)
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_convsamp_float (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(FAST_FLOAT) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_SSE2)
+    return 1;
+  if (simd_support & JSIMD_SSE)
+    return 1;
+  if (simd_support & JSIMD_3DNOW)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
+                DCTELEM * workspace)
+{
+  if (simd_support & JSIMD_SSE2)
+    jsimd_convsamp_sse2(sample_data, start_col, workspace);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_convsamp_mmx(sample_data, start_col, workspace);
+}
+
+GLOBAL(void)
+jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
+                      FAST_FLOAT * workspace)
+{
+  if (simd_support & JSIMD_SSE2)
+    jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
+  else if (simd_support & JSIMD_SSE)
+    jsimd_convsamp_float_sse(sample_data, start_col, workspace);
+  else if (simd_support & JSIMD_3DNOW)
+    jsimd_convsamp_float_3dnow(sample_data, start_col, workspace);
+}
+
+GLOBAL(int)
+jsimd_can_fdct_islow (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_ifast (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_float (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(FAST_FLOAT) != 4)
+    return 0;
+
+  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
+    return 1;
+  if (simd_support & JSIMD_3DNOW)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_fdct_islow (DCTELEM * data)
+{
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
+    jsimd_fdct_islow_sse2(data);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_fdct_islow_mmx(data);
+}
+
+GLOBAL(void)
+jsimd_fdct_ifast (DCTELEM * data)
+{
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
+    jsimd_fdct_ifast_sse2(data);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_fdct_ifast_mmx(data);
+}
+
+GLOBAL(void)
+jsimd_fdct_float (FAST_FLOAT * data)
+{
+  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
+    jsimd_fdct_float_sse(data);
+  else if (simd_support & JSIMD_3DNOW)
+    jsimd_fdct_float_3dnow(data);
+}
+
+GLOBAL(int)
+jsimd_can_quantize (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_SSE2)
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_quantize_float (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (sizeof(FAST_FLOAT) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_SSE2)
+    return 1;
+  if (simd_support & JSIMD_SSE)
+    return 1;
+  if (simd_support & JSIMD_3DNOW)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
+                DCTELEM * workspace)
+{
+  if (simd_support & JSIMD_SSE2)
+    jsimd_quantize_sse2(coef_block, divisors, workspace);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_quantize_mmx(coef_block, divisors, workspace);
+}
+
+GLOBAL(void)
+jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+                      FAST_FLOAT * workspace)
+{
+  if (simd_support & JSIMD_SSE2)
+    jsimd_quantize_float_sse2(coef_block, divisors, workspace);
+  else if (simd_support & JSIMD_SSE)
+    jsimd_quantize_float_sse(coef_block, divisors, workspace);
+  else if (simd_support & JSIMD_3DNOW)
+    jsimd_quantize_float_3dnow(coef_block, divisors, workspace);
+}
+
+GLOBAL(int)
+jsimd_can_idct_2x2 (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_4x4 (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
+    jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
+                        output_col);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_idct_2x2_mmx(compptr->dct_table, coef_block, output_buf, output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
+    jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
+                        output_col);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_idct_4x4_mmx(compptr->dct_table, coef_block, output_buf, output_col);
+}
+
+GLOBAL(int)
+jsimd_can_idct_islow (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_ifast (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(IFAST_MULT_TYPE) != 2)
+    return 0;
+  if (IFAST_SCALE_BITS != 2)
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_float (void)
+{
+  init_simd();
+
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(FAST_FLOAT) != 4)
+    return 0;
+  if (sizeof(FLOAT_MULT_TYPE) != 4)
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
+    return 1;
+  if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
+    return 1;
+  if (simd_support & JSIMD_3DNOW)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
+    jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
+                          output_col);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_idct_islow_mmx(compptr->dct_table, coef_block, output_buf,
+                         output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
+    jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
+                          output_col);
+  else if (simd_support & JSIMD_MMX)
+    jsimd_idct_ifast_mmx(compptr->dct_table, coef_block, output_buf,
+                         output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+  if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
+    jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
+                          output_col);
+  else if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_idct_float_sse))
+    jsimd_idct_float_sse(compptr->dct_table, coef_block, output_buf,
+                         output_col);
+  else if (simd_support & JSIMD_3DNOW)
+    jsimd_idct_float_3dnow(compptr->dct_table, coef_block, output_buf,
+                           output_col);
+}
+
diff --git a/simd/jsimd_mips.c b/simd/jsimd_mips.c
new file mode 100644
index 0000000..e95023a
--- /dev/null
+++ b/simd/jsimd_mips.c
@@ -0,0 +1,1098 @@
+/*
+ * jsimd_mips.c
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2009-2011 D. R. Commander
+ * Copyright (C) 2013-2014, MIPS Technologies, Inc., California
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ * This file contains the interface between the "normal" portions
+ * of the library and the SIMD implementations when running on a
+ * MIPS architecture.
+ */
+
+#define JPEG_INTERNALS
+#include "../jinclude.h"
+#include "../jpeglib.h"
+#include "../jsimd.h"
+#include "../jdct.h"
+#include "../jsimddct.h"
+#include "jsimd.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+static unsigned int simd_support = ~0;
+
+#if defined(__linux__)
+
+LOCAL(int)
+parse_proc_cpuinfo(const char* search_string)
+{
+  const char* file_name = "/proc/cpuinfo";
+  char cpuinfo_line[256];
+  FILE* f = NULL;
+  simd_support = 0;
+
+  if ((f = fopen(file_name, "r")) != NULL) {
+    while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f) != NULL) {
+      if (strstr(cpuinfo_line, search_string) != NULL) {
+        fclose(f);
+        simd_support |= JSIMD_MIPS_DSPR2;
+        return 1;
+      }
+    }
+    fclose(f);
+  }
+  /* Did not find string in the proc file, or not Linux ELF. */
+  return 0;
+}
+
+#endif
+
+/*
+ * Check what SIMD accelerations are supported.
+ *
+ * FIXME: This code is racy under a multi-threaded environment.
+ */
+LOCAL(void)
+init_simd (void)
+{
+  if (simd_support != ~0U)
+    return;
+
+  simd_support = 0;
+
+#if defined(__MIPSEL__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
+  simd_support |= JSIMD_MIPS_DSPR2;
+#elif defined(__linux__)
+  /* We still have a chance to use MIPS DSPR2 regardless of globally used
+   * -mdspr2 options passed to gcc by performing runtime detection via
+   * /proc/cpuinfo parsing on linux */
+  if (!parse_proc_cpuinfo("MIPS 74K"))
+    return;
+#endif
+}
+
+static const int mips_idct_ifast_coefs[4] = {
+  0x45404540,           // FIX( 1.082392200 / 2) =  17734 = 0x4546
+  0x5A805A80,           // FIX( 1.414213562 / 2) =  23170 = 0x5A82
+  0x76407640,           // FIX( 1.847759065 / 2) =  30274 = 0x7642
+  0xAC60AC60            // FIX(-2.613125930 / 4) = -21407 = 0xAC61
+};
+
+/* The following struct is borrowed from jdsample.c */
+typedef void (*upsample1_ptr) (j_decompress_ptr cinfo,
+                               jpeg_component_info * compptr,
+                               JSAMPARRAY input_data,
+                               JSAMPARRAY * output_data_ptr);
+
+typedef struct {
+  struct jpeg_upsampler pub;
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+  upsample1_ptr methods[MAX_COMPONENTS];
+  int next_row_out;
+  JDIMENSION rows_to_go;
+  int rowgroup_height[MAX_COMPONENTS];
+  UINT8 h_expand[MAX_COMPONENTS];
+  UINT8 v_expand[MAX_COMPONENTS];
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+GLOBAL(int)
+jsimd_can_rgb_ycc (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_ycc_rgb (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_c_can_null_convert (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
+                       JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                       JDIMENSION output_row, int num_rows)
+{
+  void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+
+  switch(cinfo->in_color_space) {
+    case JCS_EXT_RGB:
+      mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      mipsdspr2fct=jsimd_extrgbx_ycc_convert_mips_dspr2;
+      break;
+    case JCS_EXT_BGR:
+      mipsdspr2fct=jsimd_extbgr_ycc_convert_mips_dspr2;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      mipsdspr2fct=jsimd_extbgrx_ycc_convert_mips_dspr2;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      mipsdspr2fct=jsimd_extxbgr_ycc_convert_mips_dspr2;
+
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      mipsdspr2fct=jsimd_extxrgb_ycc_convert_mips_dspr2;
+      break;
+    default:
+      mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2;
+      break;
+  }
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    mipsdspr2fct(cinfo->image_width, input_buf, output_buf, output_row,
+                 num_rows);
+}
+
+GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                        JDIMENSION output_row, int num_rows)
+{
+  void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+
+  switch(cinfo->in_color_space) {
+    case JCS_EXT_RGB:
+      mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      mipsdspr2fct=jsimd_extrgbx_gray_convert_mips_dspr2;
+      break;
+    case JCS_EXT_BGR:
+      mipsdspr2fct=jsimd_extbgr_gray_convert_mips_dspr2;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      mipsdspr2fct=jsimd_extbgrx_gray_convert_mips_dspr2;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      mipsdspr2fct=jsimd_extxbgr_gray_convert_mips_dspr2;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      mipsdspr2fct=jsimd_extxrgb_gray_convert_mips_dspr2;
+      break;
+    default:
+      mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2;
+      break;
+  }
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    mipsdspr2fct(cinfo->image_width, input_buf, output_buf, output_row,
+                 num_rows);
+}
+
+GLOBAL(void)
+jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
+                       JSAMPIMAGE input_buf, JDIMENSION input_row,
+                       JSAMPARRAY output_buf, int num_rows)
+{
+  void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
+
+  switch(cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      mipsdspr2fct=jsimd_ycc_extrgbx_convert_mips_dspr2;
+      break;
+    case JCS_EXT_BGR:
+      mipsdspr2fct=jsimd_ycc_extbgr_convert_mips_dspr2;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      mipsdspr2fct=jsimd_ycc_extbgrx_convert_mips_dspr2;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      mipsdspr2fct=jsimd_ycc_extxbgr_convert_mips_dspr2;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      mipsdspr2fct=jsimd_ycc_extxrgb_convert_mips_dspr2;
+      break;
+  default:
+      mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2;
+      break;
+  }
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    mipsdspr2fct(cinfo->output_width, input_buf, input_row, output_buf,
+                 num_rows);
+}
+
+GLOBAL(void)
+jsimd_c_null_convert (j_compress_ptr cinfo,
+                      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                      JDIMENSION output_row, int num_rows)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_c_null_convert_mips_dspr2(cinfo->image_width, input_buf,
+                                    output_buf, output_row, num_rows,
+                                    cinfo->num_components);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_downsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_smooth_downsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if(DCTSIZE != 8)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_downsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_h2v2_downsample_mips_dspr2(cinfo->image_width,
+                                     cinfo->max_v_samp_factor,
+                                     compptr->v_samp_factor,
+                                     compptr->width_in_blocks, input_data,
+                                     output_data);
+}
+
+GLOBAL(void)
+jsimd_h2v2_smooth_downsample (j_compress_ptr cinfo,
+                              jpeg_component_info * compptr,
+                              JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  jsimd_h2v2_smooth_downsample_mips_dspr2(input_data, output_data,
+                                          compptr->v_samp_factor,
+                                          cinfo->max_v_samp_factor,
+                                          cinfo->smoothing_factor,
+                                          compptr->width_in_blocks,
+                                          cinfo->image_width);
+}
+
+GLOBAL(void)
+jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_h2v1_downsample_mips_dspr2(cinfo->image_width,
+                                     cinfo->max_v_samp_factor,
+                                     compptr->v_samp_factor,
+                                     compptr->width_in_blocks,
+                                     input_data, output_data);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_int_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr,
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_h2v2_upsample_mips_dspr2(cinfo->max_v_samp_factor,
+                                   cinfo->output_width, input_data,
+                                   output_data_ptr);
+}
+
+GLOBAL(void)
+jsimd_h2v1_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr,
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_h2v1_upsample_mips_dspr2(cinfo->max_v_samp_factor,
+                                   cinfo->output_width, input_data,
+                                   output_data_ptr);
+}
+
+GLOBAL(void)
+jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                    JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  jsimd_int_upsample_mips_dspr2(upsample->h_expand[compptr->component_index],
+                                upsample->v_expand[compptr->component_index],
+                                input_data, output_data_ptr,
+                                cinfo->output_width,
+                                cinfo->max_v_samp_factor);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_fancy_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_fancy_upsample (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr,
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_h2v2_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor,
+                                         compptr->downsampled_width,
+                                         input_data, output_data_ptr);
+}
+
+GLOBAL(void)
+jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr,
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_h2v1_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor,
+                                         compptr->downsampled_width,
+                                         input_data, output_data_ptr);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_merged_upsample (void)
+{
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_merged_upsample (void)
+{
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+  void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY,
+                       JSAMPLE *);
+
+  switch(cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      mipsdspr2fct=jsimd_h2v2_extrgb_merged_upsample_mips_dspr2;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      mipsdspr2fct=jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2;
+      break;
+    case JCS_EXT_BGR:
+      mipsdspr2fct=jsimd_h2v2_extbgr_merged_upsample_mips_dspr2;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      mipsdspr2fct=jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      mipsdspr2fct=jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      mipsdspr2fct=jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2;
+      break;
+    default:
+      mipsdspr2fct=jsimd_h2v2_extrgb_merged_upsample_mips_dspr2;
+      break;
+  }
+
+  mipsdspr2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf,
+               cinfo->sample_range_limit);
+}
+
+GLOBAL(void)
+jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+  void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY,
+                       JSAMPLE *);
+
+  switch(cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      mipsdspr2fct=jsimd_h2v1_extrgb_merged_upsample_mips_dspr2;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      mipsdspr2fct=jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2;
+      break;
+    case JCS_EXT_BGR:
+      mipsdspr2fct=jsimd_h2v1_extbgr_merged_upsample_mips_dspr2;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      mipsdspr2fct=jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      mipsdspr2fct=jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      mipsdspr2fct=jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2;
+      break;
+    default:
+      mipsdspr2fct=jsimd_h2v1_extrgb_merged_upsample_mips_dspr2;
+      break;
+  }
+
+  mipsdspr2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf,
+               cinfo->sample_range_limit);
+}
+
+GLOBAL(int)
+jsimd_can_convsamp (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_convsamp_float (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
+                DCTELEM * workspace)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_convsamp_mips_dspr2(sample_data, start_col, workspace);
+}
+
+GLOBAL(void)
+jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
+                      FAST_FLOAT * workspace)
+{
+  if ((simd_support & JSIMD_MIPS_DSPR2))
+    jsimd_convsamp_float_mips_dspr2(sample_data, start_col, workspace);
+}
+
+GLOBAL(int)
+jsimd_can_fdct_islow (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_ifast (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_fdct_islow (DCTELEM * data)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_fdct_islow_mips_dspr2(data);
+}
+
+GLOBAL(void)
+jsimd_fdct_ifast (DCTELEM * data)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_fdct_ifast_mips_dspr2(data);
+}
+
+GLOBAL(void)
+jsimd_fdct_float (FAST_FLOAT * data)
+{
+}
+
+GLOBAL(int)
+jsimd_can_quantize (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_quantize_float (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
+                DCTELEM * workspace)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_quantize_mips_dspr2(coef_block, divisors, workspace);
+}
+
+GLOBAL(void)
+jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+                      FAST_FLOAT * workspace)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_quantize_float_mips_dspr2(coef_block, divisors, workspace);
+}
+
+GLOBAL(int)
+jsimd_can_idct_2x2 (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_4x4 (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_6x6 (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_12x12 (void)
+{
+  init_simd();
+
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    jsimd_idct_2x2_mips_dspr2(compptr->dct_table, coef_block, output_buf,
+                              output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2) {
+    int workspace[DCTSIZE*4];  /* buffers data between passes */
+    jsimd_idct_4x4_mips_dspr2(compptr->dct_table, coef_block, output_buf,
+                              output_col, workspace);
+  }
+}
+
+GLOBAL(void)
+jsimd_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+           JCOEFPTR coef_block, JSAMPARRAY output_buf,
+           JDIMENSION output_col)
+{
+    if (simd_support & JSIMD_MIPS_DSPR2)
+      jsimd_idct_6x6_mips_dspr2(compptr->dct_table, coef_block, output_buf,
+                                output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block,
+                  JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2) {
+    int workspace[96];
+    int output[12] = {
+      (int)(output_buf[0] + output_col),
+      (int)(output_buf[1] + output_col),
+      (int)(output_buf[2] + output_col),
+      (int)(output_buf[3] + output_col),
+      (int)(output_buf[4] + output_col),
+      (int)(output_buf[5] + output_col),
+      (int)(output_buf[6] + output_col),
+      (int)(output_buf[7] + output_col),
+      (int)(output_buf[8] + output_col),
+      (int)(output_buf[9] + output_col),
+      (int)(output_buf[10] + output_col),
+      (int)(output_buf[11] + output_col),
+    };
+    jsimd_idct_12x12_pass1_mips_dspr2(coef_block, compptr->dct_table,
+                                      workspace);
+    jsimd_idct_12x12_pass2_mips_dspr2(workspace, output);
+  }
+}
+
+GLOBAL(int)
+jsimd_can_idct_islow (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_ifast (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(IFAST_MULT_TYPE) != 2)
+    return 0;
+  if (IFAST_SCALE_BITS != 2)
+    return 0;
+
+  if (simd_support & JSIMD_MIPS_DSPR2)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2) {
+    int output[8] = {
+      (int)(output_buf[0] + output_col),
+      (int)(output_buf[1] + output_col),
+      (int)(output_buf[2] + output_col),
+      (int)(output_buf[3] + output_col),
+      (int)(output_buf[4] + output_col),
+      (int)(output_buf[5] + output_col),
+      (int)(output_buf[6] + output_col),
+      (int)(output_buf[7] + output_col),
+    };
+
+    jsimd_idct_islow_mips_dspr2(coef_block, compptr->dct_table,
+                                output, IDCT_range_limit(cinfo));
+  }
+}
+
+GLOBAL(void)
+jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+  if (simd_support & JSIMD_MIPS_DSPR2) {
+    JCOEFPTR inptr;
+    IFAST_MULT_TYPE * quantptr;
+    DCTELEM workspace[DCTSIZE2];  /* buffers data between passes */
+
+    /* Pass 1: process columns from input, store into work array. */
+
+    inptr = coef_block;
+    quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
+
+    jsimd_idct_ifast_cols_mips_dspr2(inptr, quantptr,
+                                     workspace, mips_idct_ifast_coefs);
+
+    /* Pass 2: process rows from work array, store into output array. */
+    /* Note that we must descale the results by a factor of 8 == 2**3, */
+    /* and also undo the PASS1_BITS scaling. */
+
+    jsimd_idct_ifast_rows_mips_dspr2(workspace, output_buf,
+                                     output_col, mips_idct_ifast_coefs);
+  }
+}
+
+GLOBAL(void)
+jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+}
diff --git a/simd/jsimd_mips_dspr2.S b/simd/jsimd_mips_dspr2.S
new file mode 100644
index 0000000..4572a51
--- /dev/null
+++ b/simd/jsimd_mips_dspr2.S
@@ -0,0 +1,4485 @@
+/*
+ * MIPS DSPr2 optimizations for libjpeg-turbo
+ *
+ * Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
+ * All rights reserved.
+ * Authors:  Teodora Novkovic (teodora.novkovic@imgtec.com)
+ *           Darko Laus       (darko.laus@imgtec.com)
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#include "jsimd_mips_dspr2_asm.h"
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_c_null_convert_mips_dspr2)
+/*
+ * a0     - cinfo->image_width
+ * a1     - input_buf
+ * a2     - output_buf
+ * a3     - output_row
+ * 16(sp) - num_rows
+ * 20(sp) - cinfo->num_components
+ *
+ * Null conversion for compression
+ */
+
+    SAVE_REGS_ON_STACK 8, s0, s1
+
+    lw        t9, 24(sp)   // t9 = num_rows
+    lw        s0, 28(sp)   // s0 = cinfo->num_components
+    andi      t0, a0, 3    // t0 = cinfo->image_width & 3
+    beqz      t0, 4f       // no residual
+     nop
+0:
+    addiu     t9, t9, -1
+    bltz      t9, 7f
+     li       t1, 0
+1:
+    sll       t3, t1, 2
+    lwx       t5, t3(a2)   // t5 = outptr = output_buf[ci]
+    lw        t2, 0(a1)    // t2 = inptr = *input_buf
+    sll       t4, a3, 2
+    lwx       t5, t4(t5)   // t5 = outptr = output_buf[ci][output_row]
+    addu      t2, t2, t1
+    addu      s1, t5, a0
+    addu      t6, t5, t0
+2:
+    lbu       t3, 0(t2)
+    addiu     t5, t5, 1
+    sb        t3, -1(t5)
+    bne       t6, t5, 2b
+     addu     t2, t2, s0
+3:
+    lbu       t3, 0(t2)
+    addu      t4, t2, s0
+    addu      t7, t4, s0
+    addu      t8, t7, s0
+    addu      t2, t8, s0
+    lbu       t4, 0(t4)
+    lbu       t7, 0(t7)
+    lbu       t8, 0(t8)
+    addiu     t5, t5, 4
+    sb        t3, -4(t5)
+    sb        t4, -3(t5)
+    sb        t7, -2(t5)
+    bne       s1, t5, 3b
+     sb       t8, -1(t5)
+    addiu     t1, t1, 1
+    bne       t1, s0, 1b
+     nop
+    addiu     a1, a1, 4
+    bgez      t9, 0b
+     addiu    a3, a3, 1
+    b         7f
+     nop
+4:
+    addiu     t9, t9, -1
+    bltz      t9, 7f
+     li       t1, 0
+5:
+    sll       t3, t1, 2
+    lwx       t5, t3(a2)   // t5 = outptr = output_buf[ci]
+    lw        t2, 0(a1)    // t2 = inptr = *input_buf
+    sll       t4, a3, 2
+    lwx       t5, t4(t5)   // t5 = outptr = output_buf[ci][output_row]
+    addu      t2, t2, t1
+    addu      s1, t5, a0
+    addu      t6, t5, t0
+6:
+    lbu       t3, 0(t2)
+    addu      t4, t2, s0
+    addu      t7, t4, s0
+    addu      t8, t7, s0
+    addu      t2, t8, s0
+    lbu       t4, 0(t4)
+    lbu       t7, 0(t7)
+    lbu       t8, 0(t8)
+    addiu     t5, t5, 4
+    sb        t3, -4(t5)
+    sb        t4, -3(t5)
+    sb        t7, -2(t5)
+    bne       s1, t5, 6b
+     sb       t8, -1(t5)
+    addiu     t1, t1, 1
+    bne       t1, s0, 5b
+     nop
+    addiu     a1, a1, 4
+    bgez      t9, 4b
+     addiu    a3, a3, 1
+7:
+    RESTORE_REGS_FROM_STACK 8, s0, s1
+
+    j         ra
+     nop
+
+END(jsimd_c_null_convert_mips_dspr2)
+
+/*****************************************************************************/
+/*
+ * jsimd_extrgb_ycc_convert_mips_dspr2
+ * jsimd_extbgr_ycc_convert_mips_dspr2
+ * jsimd_extrgbx_ycc_convert_mips_dspr2
+ * jsimd_extbgrx_ycc_convert_mips_dspr2
+ * jsimd_extxbgr_ycc_convert_mips_dspr2
+ * jsimd_extxrgb_ycc_convert_mips_dspr2
+ *
+ * Colorspace conversion RGB -> YCbCr
+ */
+
+.macro GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs
+
+.macro DO_RGB_TO_YCC r,    \
+                     g,    \
+                     b,    \
+                     inptr
+    lbu     \r, \r_offs(\inptr)
+    lbu     \g, \g_offs(\inptr)
+    lbu     \b, \b_offs(\inptr)
+    addiu   \inptr, \pixel_size
+.endm
+
+LEAF_MIPS_DSPR2(jsimd_\colorid\()_ycc_convert_mips_dspr2)
+/*
+ * a0     - cinfo->image_width
+ * a1     - input_buf
+ * a2     - output_buf
+ * a3     - output_row
+ * 16(sp) - num_rows
+ */
+
+    SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    lw      t7, 48(sp)        // t7 = num_rows
+    li      s0, 0x4c8b        // FIX(0.29900)
+    li      s1, 0x9646        // FIX(0.58700)
+    li      s2, 0x1d2f        // FIX(0.11400)
+    li      s3, 0xffffd4cd    // -FIX(0.16874)
+    li      s4, 0xffffab33    // -FIX(0.33126)
+    li      s5, 0x8000        // FIX(0.50000)
+    li      s6, 0xffff94d1    // -FIX(0.41869)
+    li      s7, 0xffffeb2f    // -FIX(0.08131)
+    li      t8, 0x807fff      // CBCR_OFFSET + ONE_HALF-1
+
+0:
+    addiu   t7, -1            // --num_rows
+    lw      t6, 0(a1)         // t6 = input_buf[0]
+    lw      t0, 0(a2)
+    lw      t1, 4(a2)
+    lw      t2, 8(a2)
+    sll     t3, a3, 2
+    lwx     t0, t3(t0)        // t0 = output_buf[0][output_row]
+    lwx     t1, t3(t1)        // t1 = output_buf[1][output_row]
+    lwx     t2, t3(t2)        // t2 = output_buf[2][output_row]
+
+    addu    t9, t2, a0        // t9 = end address
+    addiu   a3, 1
+
+1:
+    DO_RGB_TO_YCC t3, t4, t5, t6
+
+    mtlo    s5, $ac0
+    mtlo    t8, $ac1
+    mtlo    t8, $ac2
+    maddu   $ac0, s2, t5
+    maddu   $ac1, s5, t5
+    maddu   $ac2, s5, t3
+    maddu   $ac0, s0, t3
+    maddu   $ac1, s3, t3
+    maddu   $ac2, s6, t4
+    maddu   $ac0, s1, t4
+    maddu   $ac1, s4, t4
+    maddu   $ac2, s7, t5
+    extr.w  t3, $ac0, 16
+    extr.w  t4, $ac1, 16
+    extr.w  t5, $ac2, 16
+    sb      t3, 0(t0)
+    sb      t4, 0(t1)
+    sb      t5, 0(t2)
+    addiu   t0, 1
+    addiu   t2, 1
+    bne     t2, t9, 1b
+     addiu  t1, 1
+    bgtz    t7, 0b
+     addiu  a1, 4
+
+    RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    j ra
+     nop
+END(jsimd_\colorid\()_ycc_convert_mips_dspr2)
+
+.purgem DO_RGB_TO_YCC
+
+.endm
+
+/*------------------------------------------id -- pix R  G  B */
+GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgb,  3, 0, 1, 2
+GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgr,  3, 2, 1, 0
+GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2
+GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0
+GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1
+GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3
+
+/*****************************************************************************/
+/*
+ * jsimd_ycc_extrgb_convert_mips_dspr2
+ * jsimd_ycc_extbgr_convert_mips_dspr2
+ * jsimd_ycc_extrgbx_convert_mips_dspr2
+ * jsimd_ycc_extbgrx_convert_mips_dspr2
+ * jsimd_ycc_extxbgr_convert_mips_dspr2
+ * jsimd_ycc_extxrgb_convert_mips_dspr2
+ *
+ * Colorspace conversion YCbCr -> RGB
+ */
+
+.macro GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs, a_offs
+
+.macro STORE_YCC_TO_RGB  scratch0 \
+                         scratch1 \
+                         scratch2 \
+                         outptr
+    sb       \scratch0, \r_offs(\outptr)
+    sb       \scratch1, \g_offs(\outptr)
+    sb       \scratch2, \b_offs(\outptr)
+.if (\pixel_size == 4)
+    li       t0, 0xFF
+    sb       t0, \a_offs(\outptr)
+.endif
+    addiu    \outptr, \pixel_size
+.endm
+
+LEAF_MIPS_DSPR2(jsimd_ycc_\colorid\()_convert_mips_dspr2)
+/*
+ * a0     - cinfo->image_width
+ * a1     - input_buf
+ * a2     - input_row
+ * a3     - output_buf
+ * 16(sp) - num_rows
+ */
+
+    SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    lw         s1, 48(sp)
+    li         t3, 0x8000
+    li         t4, 0x166e9     // FIX(1.40200)
+    li         t5, 0x1c5a2     // FIX(1.77200)
+    li         t6, 0xffff492e  // -FIX(0.71414)
+    li         t7, 0xffffa7e6  // -FIX(0.34414)
+    repl.ph    t8, 128
+
+0:
+    lw         s0, 0(a3)
+    lw         t0, 0(a1)
+    lw         t1, 4(a1)
+    lw         t2, 8(a1)
+    sll        s5, a2, 2
+    addiu      s1, -1
+    lwx        s2, s5(t0)
+    lwx        s3, s5(t1)
+    lwx        s4, s5(t2)
+    addu       t9, s2, a0
+    addiu      a2, 1
+
+1:
+    lbu        s7, 0(s4)       // cr
+    lbu        s6, 0(s3)       // cb
+    lbu        s5, 0(s2)       // y
+    addiu      s2, 1
+    addiu      s4, 1
+    addiu      s7, -128
+    addiu      s6, -128
+    mul        t2, t7, s6
+    mul        t0, t6, s7      // Crgtab[cr]
+    sll        s7, 15
+    mulq_rs.w  t1, t4, s7      // Crrtab[cr]
+    sll        s6, 15
+    addu       t2, t3          // Cbgtab[cb]
+    addu       t2, t0
+
+    mulq_rs.w  t0, t5, s6      // Cbbtab[cb]
+    sra        t2, 16
+    addu       t1, s5
+    addu       t2, s5          // add y
+    ins        t2, t1, 16, 16
+    subu.ph    t2, t2, t8
+    addu       t0, s5
+    shll_s.ph  t2, t2, 8
+    subu       t0, 128
+    shra.ph    t2, t2, 8
+    shll_s.w   t0, t0, 24
+    addu.ph    t2, t2, t8      // clip & store
+    sra        t0, t0, 24
+    sra        t1, t2, 16
+    addiu      t0, 128
+
+    STORE_YCC_TO_RGB t1, t2, t0, s0
+
+    bne        s2, t9, 1b
+     addiu     s3, 1
+    bgtz       s1, 0b
+     addiu     a3, 4
+
+    RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    j ra
+     nop
+END(jsimd_ycc_\colorid\()_convert_mips_dspr2)
+
+.purgem STORE_YCC_TO_RGB
+
+.endm
+
+/*------------------------------------------id -- pix R  G  B  A */
+GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgb,  3, 0, 1, 2, 3
+GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgr,  3, 2, 1, 0, 3
+GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2, 3
+GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0, 3
+GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1, 0
+GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3, 0
+
+/*****************************************************************************/
+/*
+ * jsimd_extrgb_gray_convert_mips_dspr2
+ * jsimd_extbgr_gray_convert_mips_dspr2
+ * jsimd_extrgbx_gray_convert_mips_dspr2
+ * jsimd_extbgrx_gray_convert_mips_dspr2
+ * jsimd_extxbgr_gray_convert_mips_dspr2
+ * jsimd_extxrgb_gray_convert_mips_dspr2
+ *
+ * Colorspace conversion RGB -> GRAY
+ */
+
+.macro GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs
+
+.macro DO_RGB_TO_GRAY r,    \
+                      g,    \
+                      b,    \
+                      inptr
+    lbu     \r, \r_offs(\inptr)
+    lbu     \g, \g_offs(\inptr)
+    lbu     \b, \b_offs(\inptr)
+    addiu   \inptr, \pixel_size
+.endm
+
+LEAF_MIPS_DSPR2(jsimd_\colorid\()_gray_convert_mips_dspr2)
+/*
+ * a0     - cinfo->image_width
+ * a1     - input_buf
+ * a2     - output_buf
+ * a3     - output_row
+ * 16(sp) - num_rows
+ */
+
+    SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    li      s0, 0x4c8b             // s0 = FIX(0.29900)
+    li      s1, 0x9646             // s1 = FIX(0.58700)
+    li      s2, 0x1d2f             // s2 = FIX(0.11400)
+    li      s7, 0x8000             // s7 = FIX(0.50000)
+    lw      s6, 48(sp)
+    andi    t7, a0, 3
+
+0:
+    addiu   s6, -1                 // s6 = num_rows
+    lw      t0, 0(a1)
+    lw      t1, 0(a2)
+    sll     t3, a3, 2
+    lwx     t1, t3(t1)
+    addiu   a3, 1
+    addu    t9, t1, a0
+    subu    t8, t9, t7
+    beq     t1, t8, 2f
+     nop
+
+1:
+    DO_RGB_TO_GRAY t3, t4, t5, t0
+    DO_RGB_TO_GRAY s3, s4, s5, t0
+
+    mtlo    s7, $ac0
+    maddu   $ac0, s2, t5
+    maddu   $ac0, s1, t4
+    maddu   $ac0, s0, t3
+    mtlo    s7, $ac1
+    maddu   $ac1, s2, s5
+    maddu   $ac1, s1, s4
+    maddu   $ac1, s0, s3
+    extr.w  t6, $ac0, 16
+
+    DO_RGB_TO_GRAY t3, t4, t5, t0
+    DO_RGB_TO_GRAY s3, s4, s5, t0
+
+    mtlo    s7, $ac0
+    maddu   $ac0, s2, t5
+    maddu   $ac0, s1, t4
+    extr.w  t2, $ac1, 16
+    maddu   $ac0, s0, t3
+    mtlo    s7, $ac1
+    maddu   $ac1, s2, s5
+    maddu   $ac1, s1, s4
+    maddu   $ac1, s0, s3
+    extr.w  t5, $ac0, 16
+    sb      t6, 0(t1)
+    sb      t2, 1(t1)
+    extr.w  t3, $ac1, 16
+    addiu   t1, 4
+    sb      t5, -2(t1)
+    sb      t3, -1(t1)
+    bne     t1, t8, 1b
+     nop
+
+2:
+    beqz    t7, 4f
+     nop
+
+3:
+    DO_RGB_TO_GRAY t3, t4, t5, t0
+
+    mtlo    s7, $ac0
+    maddu   $ac0, s2, t5
+    maddu   $ac0, s1, t4
+    maddu   $ac0, s0, t3
+    extr.w  t6, $ac0, 16
+    sb      t6, 0(t1)
+    addiu   t1, 1
+    bne     t1, t9, 3b
+     nop
+
+4:
+    bgtz    s6, 0b
+     addiu  a1, 4
+
+    RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    j ra
+     nop
+END(jsimd_\colorid\()_gray_convert_mips_dspr2)
+
+.purgem DO_RGB_TO_GRAY
+
+.endm
+
+/*------------------------------------------id --  pix R  G  B */
+GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgb,  3, 0, 1, 2
+GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgr,  3, 2, 1, 0
+GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2
+GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0
+GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1
+GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3
+/*****************************************************************************/
+/*
+ * jsimd_h2v2_merged_upsample_mips_dspr2
+ * jsimd_h2v2_extrgb_merged_upsample_mips_dspr2
+ * jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2
+ * jsimd_h2v2_extbgr_merged_upsample_mips_dspr2
+ * jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2
+ * jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2
+ * jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2
+ *
+ * Merged h2v2 upsample routines
+ */
+.macro GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 colorid,    \
+                                                pixel_size, \
+                                                r1_offs,    \
+                                                g1_offs,    \
+                                                b1_offs,    \
+                                                a1_offs,    \
+                                                r2_offs,    \
+                                                g2_offs,    \
+                                                b2_offs,    \
+                                                a2_offs
+
+.macro STORE_H2V2_2_PIXELS  scratch0 \
+                            scratch1 \
+                            scratch2 \
+                            scratch3 \
+                            scratch4 \
+                            scratch5 \
+                            outptr
+    sb       \scratch0, \r1_offs(\outptr)
+    sb       \scratch1, \g1_offs(\outptr)
+    sb       \scratch2, \b1_offs(\outptr)
+    sb       \scratch3, \r2_offs(\outptr)
+    sb       \scratch4, \g2_offs(\outptr)
+    sb       \scratch5, \b2_offs(\outptr)
+.if (\pixel_size == 8)
+    li       \scratch0, 0xFF
+    sb       \scratch0, \a1_offs(\outptr)
+    sb       \scratch0, \a2_offs(\outptr)
+.endif
+    addiu    \outptr, \pixel_size
+.endm
+
+.macro STORE_H2V2_1_PIXEL  scratch0 \
+                           scratch1 \
+                           scratch2 \
+                           outptr
+    sb    \scratch0, \r1_offs(\outptr)
+    sb    \scratch1, \g1_offs(\outptr)
+    sb    \scratch2, \b1_offs(\outptr)
+
+.if (\pixel_size == 8)
+    li    t0, 0xFF
+    sb    t0, \a1_offs(\outptr)
+.endif
+.endm
+
+LEAF_MIPS_DSPR2(jsimd_h2v2_\colorid\()_merged_upsample_mips_dspr2)
+/*
+ * a0     - cinfo->output_width
+ * a1     - input_buf
+ * a2     - in_row_group_ctr
+ * a3     - output_buf
+ * 16(sp) - cinfo->sample_range_limit
+ */
+
+    SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
+
+    lw           t9, 56(sp)        // cinfo->sample_range_limit
+    lw           v0, 0(a1)
+    lw           v1, 4(a1)
+    lw           t0, 8(a1)
+    sll          t1, a2, 3
+    addiu        t2, t1, 4
+    sll          t3, a2, 2
+    lw           t4, 0(a3)         // t4 = output_buf[0]
+    lwx          t1, t1(v0)        // t1 = input_buf[0][in_row_group_ctr*2]
+    lwx          t2, t2(v0)        // t2 = input_buf[0][in_row_group_ctr*2 + 1]
+    lwx          t5, t3(v1)        // t5 = input_buf[1][in_row_group_ctr]
+    lwx          t6, t3(t0)        // t6 = input_buf[2][in_row_group_ctr]
+    lw           t7, 4(a3)         // t7 = output_buf[1]
+    li           s1, 0xe6ea
+    addiu        t8, s1, 0x7fff    // t8 = 0x166e9 [FIX(1.40200)]
+    addiu        s0, t8, 0x5eb9    // s0 = 0x1c5a2 [FIX(1.77200)]
+    addiu        s1, zero, 0xa7e6  // s4 = 0xffffa7e6 [-FIX(0.34414)]
+    xori         s2, s1, 0xeec8    // s3 = 0xffff492e [-FIX(0.71414)]
+    srl          t3, a0, 1
+    blez         t3, 2f
+     addu        t0, t5, t3        // t0 = end address
+ 1:
+    lbu          t3, 0(t5)
+    lbu          s3, 0(t6)
+    addiu        t5, t5, 1
+    addiu        t3, t3, -128      // (cb - 128)
+    addiu        s3, s3, -128      // (cr - 128)
+    mult         $ac1, s1, t3
+    madd         $ac1, s2, s3
+    sll          s3, s3, 15
+    sll          t3, t3, 15
+    mulq_rs.w    s4, t8, s3        // s4 = (C1 * cr + ONE_HALF)>> SCALEBITS
+    extr_r.w     s5, $ac1, 16
+    mulq_rs.w    s6, s0, t3        // s6 = (C2 * cb + ONE_HALF)>> SCALEBITS
+    lbu          v0, 0(t1)
+    addiu        t6, t6, 1
+    addiu        t1, t1, 2
+    addu         t3, v0, s4        // y+cred
+    addu         s3, v0, s5        // y+cgreen
+    addu         v1, v0, s6        // y+cblue
+    addu         t3, t9, t3        // y+cred
+    addu         s3, t9, s3        // y+cgreen
+    addu         v1, t9, v1        // y+cblue
+    lbu          AT, 0(t3)
+    lbu          s7, 0(s3)
+    lbu          ra, 0(v1)
+    lbu          v0, -1(t1)
+    addu         t3, v0, s4        // y+cred
+    addu         s3, v0, s5        // y+cgreen
+    addu         v1, v0, s6        // y+cblue
+    addu         t3, t9, t3        // y+cred
+    addu         s3, t9, s3        // y+cgreen
+    addu         v1, t9, v1        // y+cblue
+    lbu          t3, 0(t3)
+    lbu          s3, 0(s3)
+    lbu          v1, 0(v1)
+    lbu          v0, 0(t2)
+
+    STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t4
+
+    addu         t3, v0, s4        // y+cred
+    addu         s3, v0, s5        // y+cgreen
+    addu         v1, v0, s6        // y+cblue
+    addu         t3, t9, t3        // y+cred
+    addu         s3, t9, s3        // y+cgreen
+    addu         v1, t9, v1        // y+cblue
+    lbu          AT, 0(t3)
+    lbu          s7, 0(s3)
+    lbu          ra, 0(v1)
+    lbu          v0, 1(t2)
+    addiu        t2, t2, 2
+    addu         t3, v0, s4        // y+cred
+    addu         s3, v0, s5        // y+cgreen
+    addu         v1, v0, s6        // y+cblue
+    addu         t3, t9, t3        // y+cred
+    addu         s3, t9, s3        // y+cgreen
+    addu         v1, t9, v1        // y+cblue
+    lbu          t3, 0(t3)
+    lbu          s3, 0(s3)
+    lbu          v1, 0(v1)
+
+    STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t7
+
+    bne          t0, t5, 1b
+     nop
+2:
+    andi         t0, a0, 1
+    beqz         t0, 4f
+     lbu          t3, 0(t5)
+    lbu          s3, 0(t6)
+    addiu        t3, t3, -128      // (cb - 128)
+    addiu        s3, s3, -128      // (cr - 128)
+    mult         $ac1, s1, t3
+    madd         $ac1, s2, s3
+    sll          s3, s3, 15
+    sll          t3, t3, 15
+    lbu          v0, 0(t1)
+    extr_r.w     s5, $ac1, 16
+    mulq_rs.w    s4, t8, s3        // s4 = (C1 * cr + ONE_HALF)>> SCALEBITS
+    mulq_rs.w    s6, s0, t3        // s6 = (C2 * cb + ONE_HALF)>> SCALEBITS
+    addu         t3, v0, s4        // y+cred
+    addu         s3, v0, s5        // y+cgreen
+    addu         v1, v0, s6        // y+cblue
+    addu         t3, t9, t3        // y+cred
+    addu         s3, t9, s3        // y+cgreen
+    addu         v1, t9, v1        // y+cblue
+    lbu          t3, 0(t3)
+    lbu          s3, 0(s3)
+    lbu          v1, 0(v1)
+    lbu          v0, 0(t2)
+
+    STORE_H2V2_1_PIXEL t3, s3, v1, t4
+
+    addu         t3, v0, s4        // y+cred
+    addu         s3, v0, s5        // y+cgreen
+    addu         v1, v0, s6        // y+cblue
+    addu         t3, t9, t3        // y+cred
+    addu         s3, t9, s3        // y+cgreen
+    addu         v1, t9, v1        // y+cblue
+    lbu          t3, 0(t3)
+    lbu          s3, 0(s3)
+    lbu          v1, 0(v1)
+
+    STORE_H2V2_1_PIXEL t3, s3, v1, t7
+4:
+    RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
+
+    j           ra
+     nop
+
+END(jsimd_h2v2_\colorid\()_merged_upsample_mips_dspr2)
+
+.purgem STORE_H2V2_1_PIXEL
+.purgem STORE_H2V2_2_PIXELS
+.endm
+
+/*-----------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */
+GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extrgb,  6, 0, 1, 2, 6, 3, 4, 5, 6
+GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extbgr,  6, 2, 1, 0, 3, 5, 4, 3, 6
+GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7
+GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7
+GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4
+GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4
+/*****************************************************************************/
+/*
+ * jsimd_h2v1_merged_upsample_mips_dspr2
+ * jsimd_h2v1_extrgb_merged_upsample_mips_dspr2
+ * jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2
+ * jsimd_h2v1_extbgr_merged_upsample_mips_dspr2
+ * jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2
+ * jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2
+ * jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2
+ *
+ * Merged h2v1 upsample routines
+ */
+
+.macro GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 colorid,    \
+                                                pixel_size, \
+                                                r1_offs,    \
+                                                g1_offs,    \
+                                                b1_offs,    \
+                                                a1_offs,    \
+                                                r2_offs,    \
+                                                g2_offs,    \
+                                                b2_offs,    \
+                                                a2_offs
+
+.macro STORE_H2V1_2_PIXELS  scratch0 \
+                            scratch1 \
+                            scratch2 \
+                            scratch3 \
+                            scratch4 \
+                            scratch5 \
+                            outptr
+    sb       \scratch0, \r1_offs(\outptr)
+    sb       \scratch1, \g1_offs(\outptr)
+    sb       \scratch2, \b1_offs(\outptr)
+    sb       \scratch3, \r2_offs(\outptr)
+    sb       \scratch4, \g2_offs(\outptr)
+    sb       \scratch5, \b2_offs(\outptr)
+.if (\pixel_size == 8)
+    li       t0, 0xFF
+    sb       t0, \a1_offs(\outptr)
+    sb       t0, \a2_offs(\outptr)
+.endif
+    addiu    \outptr, \pixel_size
+.endm
+
+.macro STORE_H2V1_1_PIXEL  scratch0 \
+                           scratch1 \
+                           scratch2 \
+                           outptr
+    sb    \scratch0, \r1_offs(\outptr)
+    sb    \scratch1, \g1_offs(\outptr)
+    sb    \scratch2, \b1_offs(\outptr)
+.if (\pixel_size == 8)
+    li    t0, 0xFF
+    sb    t0, \a1_offs(\outptr)
+.endif
+.endm
+
+LEAF_MIPS_DSPR2(jsimd_h2v1_\colorid\()_merged_upsample_mips_dspr2)
+/*
+ * a0     - cinfo->output_width
+ * a1     - input_buf
+ * a2     - in_row_group_ctr
+ * a3     - output_buf
+ * 16(sp) - range_limit
+ */
+
+    SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
+
+    li           t0, 0xe6ea
+    lw           t1, 0(a1)         // t1 = input_buf[0]
+    lw           t2, 4(a1)         // t2 = input_buf[1]
+    lw           t3, 8(a1)         // t3 = input_buf[2]
+    lw           t8, 56(sp)        // t8 = range_limit
+    addiu        s1, t0, 0x7fff    // s1 = 0x166e9 [FIX(1.40200)]
+    addiu        s2, s1, 0x5eb9    // s2 = 0x1c5a2 [FIX(1.77200)]
+    addiu        s0, t0, 0x9916    // s0 = 0x8000
+    addiu        s4, zero, 0xa7e6  // s4 = 0xffffa7e6 [-FIX(0.34414)]
+    xori         s3, s4, 0xeec8    // s3 = 0xffff492e [-FIX(0.71414)]
+    srl          t0, a0, 1
+    sll          t4, a2, 2
+    lwx          s5, t4(t1)        // s5 = inptr0
+    lwx          s6, t4(t2)        // s6 = inptr1
+    lwx          s7, t4(t3)        // s7 = inptr2
+    lw           t7, 0(a3)         // t7 = outptr
+    blez         t0, 2f
+     addu        t9, s6, t0        // t9 = end address
+1:
+    lbu          t2, 0(s6)         // t2 = cb
+    lbu          t0, 0(s7)         // t0 = cr
+    lbu          t1, 0(s5)         // t1 = y
+    addiu        t2, t2, -128      // t2 = cb - 128
+    addiu        t0, t0, -128      // t0 = cr - 128
+    mult         $ac1, s4, t2
+    madd         $ac1, s3, t0
+    sll          t0, t0, 15
+    sll          t2, t2, 15
+    mulq_rs.w    t0, s1, t0        // t0 = (C1*cr + ONE_HALF)>> SCALEBITS
+    extr_r.w     t5, $ac1, 16
+    mulq_rs.w    t6, s2, t2        // t6 = (C2*cb + ONE_HALF)>> SCALEBITS
+    addiu        s7, s7, 1
+    addiu        s6, s6, 1
+    addu         t2, t1, t0        // t2 = y + cred
+    addu         t3, t1, t5        // t3 = y + cgreen
+    addu         t4, t1, t6        // t4 = y + cblue
+    addu         t2, t8, t2
+    addu         t3, t8, t3
+    addu         t4, t8, t4
+    lbu          t1, 1(s5)
+    lbu          v0, 0(t2)
+    lbu          v1, 0(t3)
+    lbu          ra, 0(t4)
+    addu         t2, t1, t0
+    addu         t3, t1, t5
+    addu         t4, t1, t6
+    addu         t2, t8, t2
+    addu         t3, t8, t3
+    addu         t4, t8, t4
+    lbu          t2, 0(t2)
+    lbu          t3, 0(t3)
+    lbu          t4, 0(t4)
+
+    STORE_H2V1_2_PIXELS v0, v1, ra, t2, t3, t4, t7
+
+    bne          t9, s6, 1b
+     addiu       s5, s5, 2
+2:
+    andi         t0, a0, 1
+    beqz         t0, 4f
+     nop
+3:
+    lbu          t2, 0(s6)
+    lbu          t0, 0(s7)
+    lbu          t1, 0(s5)
+    addiu        t2, t2, -128      //(cb - 128)
+    addiu        t0, t0, -128      //(cr - 128)
+    mul          t3, s4, t2
+    mul          t4, s3, t0
+    sll          t0, t0, 15
+    sll          t2, t2, 15
+    mulq_rs.w    t0, s1, t0       // (C1*cr + ONE_HALF)>> SCALEBITS
+    mulq_rs.w    t6, s2, t2       // (C2*cb + ONE_HALF)>> SCALEBITS
+    addu         t3, t3, s0
+    addu         t3, t4, t3
+    sra          t5, t3, 16       // (C4*cb + ONE_HALF + C3*cr)>> SCALEBITS
+    addu         t2, t1, t0       // y + cred
+    addu         t3, t1, t5       // y + cgreen
+    addu         t4, t1, t6       // y + cblue
+    addu         t2, t8, t2
+    addu         t3, t8, t3
+    addu         t4, t8, t4
+    lbu          t2, 0(t2)
+    lbu          t3, 0(t3)
+    lbu          t4, 0(t4)
+
+    STORE_H2V1_1_PIXEL t2, t3, t4, t7
+4:
+    RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
+
+    j            ra
+     nop
+
+END(jsimd_h2v1_\colorid\()_merged_upsample_mips_dspr2)
+
+.purgem STORE_H2V1_1_PIXEL
+.purgem STORE_H2V1_2_PIXELS
+.endm
+
+/*-----------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */
+GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extrgb,  6, 0, 1, 2, 6, 3, 4, 5, 6
+GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extbgr,  6, 2, 1, 0, 3, 5, 4, 3, 6
+GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7
+GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7
+GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4
+GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4
+/*****************************************************************************/
+/*
+ * jsimd_h2v2_fancy_upsample_mips_dspr2
+ *
+ * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
+ */
+LEAF_MIPS_DSPR2(jsimd_h2v2_fancy_upsample_mips_dspr2)
+/*
+ * a0     - cinfo->max_v_samp_factor
+ * a1     - downsampled_width
+ * a2     - input_data
+ * a3     - output_data_ptr
+ */
+
+    SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5
+
+    li             s4, 0
+    lw             s2, 0(a3)       // s2 = *output_data_ptr
+0:
+    li             t9, 2
+    lw             s1, -4(a2)      // s1 = inptr1
+
+1:
+    lw             s0, 0(a2)       // s0 = inptr0
+    lwx            s3, s4(s2)
+    addiu          s5, a1, -2      // s5 = downsampled_width - 2
+    srl            t4, s5, 1
+    sll            t4, t4, 1
+    lbu            t0, 0(s0)
+    lbu            t1, 1(s0)
+    lbu            t2, 0(s1)
+    lbu            t3, 1(s1)
+    addiu          s0, 2
+    addiu          s1, 2
+    addu           t8, s0, t4      // t8 = end address
+    andi           s5, s5, 1       // s5 = residual
+    sll            t4, t0, 1
+    sll            t6, t1, 1
+    addu           t0, t0, t4      // t0 = (*inptr0++) * 3
+    addu           t1, t1, t6      // t1 = (*inptr0++) * 3
+    addu           t7, t0, t2      // t7 = thiscolsum
+    addu           t6, t1, t3      // t5 = nextcolsum
+    sll            t0, t7, 2       // t0 = thiscolsum * 4
+    subu           t1, t0, t7      // t1 = thiscolsum * 3
+    shra_r.w       t0, t0, 4
+    addiu          t1, 7
+    addu           t1, t1, t6
+    srl            t1, t1, 4
+    sb             t0, 0(s3)
+    sb             t1, 1(s3)
+    addiu          s3, 2
+2:
+    lh             t0, 0(s0)       // t0 = A3|A2
+    lh             t2, 0(s1)       // t2 = B3|B2
+    addiu          s0, 2
+    addiu          s1, 2
+    preceu.ph.qbr  t0, t0          // t0 = 0|A3|0|A2
+    preceu.ph.qbr  t2, t2          // t2 = 0|B3|0|B2
+    shll.ph        t1, t0, 1
+    sll            t3, t6, 1
+    addu.ph        t0, t1, t0      // t0 = A3*3|A2*3
+    addu           t3, t3, t6      // t3 = this * 3
+    addu.ph        t0, t0, t2      // t0 = next2|next1
+    addu           t1, t3, t7
+    andi           t7, t0, 0xFFFF  // t7 = next1
+    sll            t2, t7, 1
+    addu           t2, t7, t2      // t2 = next1*3
+    addu           t4, t2, t6
+    srl            t6, t0, 16      // t6 = next2
+    shra_r.w       t1, t1, 4       // t1 = (this*3 + last + 8) >> 4
+    addu           t0, t3, t7
+    addiu          t0, 7
+    srl            t0, t0, 4       // t0 = (this*3 + next1 + 7) >> 4
+    shra_r.w       t4, t4, 4       // t3 = (next1*3 + this + 8) >> 4
+    addu           t2, t2, t6
+    addiu          t2, 7
+    srl            t2, t2, 4       // t2 = (next1*3 + next2 + 7) >> 4
+    sb             t1, 0(s3)
+    sb             t0, 1(s3)
+    sb             t4, 2(s3)
+    sb             t2, 3(s3)
+    bne            t8, s0, 2b
+     addiu         s3, 4
+    beqz           s5, 4f
+     addu          t8, s0, s5
+3:
+    lbu            t0, 0(s0)
+    lbu            t2, 0(s1)
+    addiu          s0, 1
+    addiu          s1, 1
+    sll            t3, t6, 1
+    sll            t1, t0, 1
+    addu           t1, t0, t1      // t1 = inptr0 * 3
+    addu           t3, t3, t6      // t3 = thiscolsum * 3
+    addu           t5, t1, t2
+    addu           t1, t3, t7
+    shra_r.w       t1, t1, 4
+    addu           t0, t3, t5
+    addiu          t0, 7
+    srl            t0, t0, 4
+    sb             t1, 0(s3)
+    sb             t0, 1(s3)
+    addiu          s3, 2
+    move           t7, t6
+    bne            t8, s0, 3b
+     move          t6, t5
+4:
+    sll            t0, t6, 2       // t0 = thiscolsum * 4
+    subu           t1, t0, t6      // t1 = thiscolsum * 3
+    addu           t1, t1, t7
+    addiu          s4, 4
+    shra_r.w       t1, t1, 4
+    addiu          t0, 7
+    srl            t0, t0, 4
+    sb             t1, 0(s3)
+    sb             t0, 1(s3)
+    addiu          t9, -1
+    addiu          s3, 2
+    bnez           t9, 1b
+     lw            s1, 4(a2)
+    srl            t0, s4, 2
+    subu           t0, a0, t0
+    bgtz           t0, 0b
+     addiu         a2, 4
+
+    RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5
+
+    j ra
+     nop
+END(jsimd_h2v2_fancy_upsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_h2v1_fancy_upsample_mips_dspr2)
+/*
+ * a0     - cinfo->max_v_samp_factor
+ * a1     - downsampled_width
+ * a2     - input_data
+ * a3     - output_data_ptr
+ */
+
+    SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
+
+    .set at
+
+    beqz           a0, 3f
+     sll           t0, a0, 2
+    lw             s1, 0(a3)
+    li             s3, 0x10001
+    addu           s0, s1, t0
+0:
+    addiu          t8, a1, -2
+    srl            t9, t8, 2
+    lw             t7, 0(a2)
+    lw             s2, 0(s1)
+    lbu            t0, 0(t7)
+    lbu            t1, 1(t7)   // t1 = inptr[1]
+    sll            t2, t0, 1
+    addu           t2, t2, t0  // t2 = invalue*3
+    addu           t2, t2, t1
+    shra_r.w       t2, t2, 2
+    sb             t0, 0(s2)
+    sb             t2, 1(s2)
+    beqz           t9, 11f
+     addiu         s2, 2
+1:
+    ulw            t0, 0(t7)   // t0 = |P3|P2|P1|P0|
+    ulw            t1, 1(t7)
+    ulh            t2, 4(t7)   // t2 = |0|0|P5|P4|
+    preceu.ph.qbl  t3, t0      // t3 = |0|P3|0|P2|
+    preceu.ph.qbr  t0, t0      // t0 = |0|P1|0|P0|
+    preceu.ph.qbr  t2, t2      // t2 = |0|P5|0|P4|
+    preceu.ph.qbl  t4, t1      // t4 = |0|P4|0|P3|
+    preceu.ph.qbr  t1, t1      // t1 = |0|P2|0|P1|
+    shll.ph        t5, t4, 1
+    shll.ph        t6, t1, 1
+    addu.ph        t5, t5, t4  // t5 = |P4*3|P3*3|
+    addu.ph        t6, t6, t1  // t6 = |P2*3|P1*3|
+    addu.ph        t4, t3, s3
+    addu.ph        t0, t0, s3
+    addu.ph        t4, t4, t5
+    addu.ph        t0, t0, t6
+    shrl.ph        t4, t4, 2   // t4 = |0|P3|0|P2|
+    shrl.ph        t0, t0, 2   // t0 = |0|P1|0|P0|
+    addu.ph        t2, t2, t5
+    addu.ph        t3, t3, t6
+    shra_r.ph      t2, t2, 2   // t2 = |0|P5|0|P4|
+    shra_r.ph      t3, t3, 2   // t3 = |0|P3|0|P2|
+    shll.ph        t2, t2, 8
+    shll.ph        t3, t3, 8
+    or             t2, t4, t2
+    or             t3, t3, t0
+    addiu          t9, -1
+    usw            t3, 0(s2)
+    usw            t2, 4(s2)
+    addiu          s2, 8
+    bgtz           t9, 1b
+     addiu         t7, 4
+11:
+    andi           t8, 3
+    beqz           t8, 22f
+     addiu         t7, 1
+
+2:
+    lbu            t0, 0(t7)
+    addiu          t7, 1
+    sll            t1, t0, 1
+    addu           t2, t0, t1  // t2 = invalue
+    lbu            t3, -2(t7)
+    lbu            t4, 0(t7)
+    addiu          t3, 1
+    addiu          t4, 2
+    addu           t3, t3, t2
+    addu           t4, t4, t2
+    srl            t3, 2
+    srl            t4, 2
+    sb             t3, 0(s2)
+    sb             t4, 1(s2)
+    addiu          t8, -1
+    bgtz           t8, 2b
+     addiu         s2, 2
+
+22:
+    lbu            t0, 0(t7)
+    lbu            t2, -1(t7)
+    sll            t1, t0, 1
+    addu           t1, t1, t0 // t1 = invalue * 3
+    addu           t1, t1, t2
+    addiu          t1, 1
+    srl            t1, t1, 2
+    sb             t1, 0(s2)
+    sb             t0, 1(s2)
+    addiu          s1, 4
+    bne            s1, s0, 0b
+     addiu         a2, 4
+3:
+    RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
+
+    j              ra
+     nop
+END(jsimd_h2v1_fancy_upsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_h2v1_downsample_mips_dspr2)
+/*
+ * a0     - cinfo->image_width
+ * a1     - cinfo->max_v_samp_factor
+ * a2     - compptr->v_samp_factor
+ * a3     - compptr->width_in_blocks
+ * 16(sp) - input_data
+ * 20(sp) - output_data
+ */
+    .set at
+
+    SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4
+
+    beqz        a2, 7f
+     lw         s1, 44(sp)  // s1 = output_data
+    lw          s0, 40(sp)  // s0 = input_data
+    srl         s2, a0, 2
+    andi        t9, a0, 2
+    srl         t7, t9, 1
+    addu        s2, t7, s2
+    sll         t0, a3, 3   // t0 = width_in_blocks*DCT
+    srl         t7, t0, 1
+    subu        s2, t7, s2
+0:
+    andi        t6, a0, 1   // t6 = temp_index
+    addiu       t6, -1
+    lw          t4, 0(s1)   // t4 = outptr
+    lw          t5, 0(s0)   // t5 = inptr0
+    li          s3, 0       // s3 = bias
+    srl         t7, a0, 1   // t7 = image_width1
+    srl         s4, t7, 2
+    andi        t8, t7, 3
+1:
+    ulhu        t0, 0(t5)
+    ulhu        t1, 2(t5)
+    ulhu        t2, 4(t5)
+    ulhu        t3, 6(t5)
+    raddu.w.qb  t0, t0
+    raddu.w.qb  t1, t1
+    raddu.w.qb  t2, t2
+    raddu.w.qb  t3, t3
+    shra.ph     t0, t0, 1
+    shra_r.ph   t1, t1, 1
+    shra.ph     t2, t2, 1
+    shra_r.ph   t3, t3, 1
+    sb          t0, 0(t4)
+    sb          t1, 1(t4)
+    sb          t2, 2(t4)
+    sb          t3, 3(t4)
+    addiu       s4, -1
+    addiu       t4, 4
+    bgtz        s4, 1b
+     addiu      t5, 8
+    beqz        t8, 3f
+     addu       s4, t4, t8
+2:
+    ulhu        t0, 0(t5)
+    raddu.w.qb  t0, t0
+    addqh.w     t0, t0, s3
+    xori        s3, s3, 1
+    sb          t0, 0(t4)
+    addiu       t4, 1
+    bne         t4, s4, 2b
+     addiu      t5, 2
+3:
+    lbux        t1, t6(t5)
+    sll         t1, 1
+    addqh.w     t2, t1, s3  // t2 = pixval1
+    xori        s3, s3, 1
+    addqh.w     t3, t1, s3  // t3 = pixval2
+    blez        s2, 5f
+     append     t3, t2,  8
+    addu        t5, t4, s2  // t5 = loop_end2
+4:
+    ush         t3, 0(t4)
+    addiu       s2, -1
+    bgtz        s2, 4b
+     addiu      t4,  2
+5:
+    beqz        t9, 6f
+     nop
+    sb          t2, 0(t4)
+6:
+    addiu       s1, 4
+    addiu       a2, -1
+    bnez        a2, 0b
+     addiu      s0, 4
+7:
+    RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4
+
+    j           ra
+    nop
+END(jsimd_h2v1_downsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_h2v2_downsample_mips_dspr2)
+
+/*
+ * a0     - cinfo->image_width
+ * a1     - cinfo->max_v_samp_factor
+ * a2     - compptr->v_samp_factor
+ * a3     - compptr->width_in_blocks
+ * 16(sp) - input_data
+ * 20(sp) - output_data
+ */
+    .set at
+    SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    beqz         a2, 8f
+     lw          s1, 52(sp)      // s1 = output_data
+    lw           s0, 48(sp)      // s0 = input_data
+
+    andi         t6, a0, 1       // t6 = temp_index
+    addiu        t6, -1
+    srl          t7, a0, 1       // t7 = image_width1
+    srl          s4, t7, 2
+    andi         t8, t7, 3
+    andi         t9, a0, 2
+    srl          s2, a0, 2
+    srl          t7, t9, 1
+    addu         s2, t7, s2
+    sll          t0, a3, 3       // s2 = width_in_blocks*DCT
+    srl          t7, t0, 1
+    subu         s2, t7, s2
+0:
+    lw           t4, 0(s1)       // t4 = outptr
+    lw           t5, 0(s0)       // t5 = inptr0
+    lw           s7, 4(s0)       // s7 = inptr1
+    li           s6, 1           // s6 = bias
+2:
+    ulw          t0, 0(t5)       // t0 = |P3|P2|P1|P0|
+    ulw          t1, 0(s7)       // t1 = |Q3|Q2|Q1|Q0|
+    ulw          t2, 4(t5)
+    ulw          t3, 4(s7)
+    precrq.ph.w  t7, t0, t1      // t2 = |P3|P2|Q3|Q2|
+    ins          t0, t1, 16, 16  // t0 = |Q1|Q0|P1|P0|
+    raddu.w.qb   t1, t7
+    raddu.w.qb   t0, t0
+    shra_r.w     t1, t1, 2
+    addiu        t0, 1
+    srl          t0, 2
+    precrq.ph.w  t7, t2, t3
+    ins          t2, t3, 16, 16
+    raddu.w.qb   t7, t7
+    raddu.w.qb   t2, t2
+    shra_r.w     t7, t7, 2
+    addiu        t2, 1
+    srl          t2, 2
+    sb           t0, 0(t4)
+    sb           t1, 1(t4)
+    sb           t2, 2(t4)
+    sb           t7, 3(t4)
+    addiu        t4, 4
+    addiu        t5, 8
+    addiu        s4, s4, -1
+    bgtz         s4, 2b
+     addiu       s7, 8
+    beqz         t8, 4f
+     addu        t8, t4, t8
+3:
+    ulhu         t0, 0(t5)
+    ulhu         t1, 0(s7)
+    ins          t0, t1, 16, 16
+    raddu.w.qb   t0, t0
+    addu         t0, t0, s6
+    srl          t0, 2
+    xori         s6, s6, 3
+    sb           t0, 0(t4)
+    addiu        t5, 2
+    addiu        t4, 1
+    bne          t8, t4, 3b
+     addiu       s7, 2
+4:
+    lbux         t1, t6(t5)
+    sll          t1, 1
+    lbux         t0, t6(s7)
+    sll          t0, 1
+    addu         t1, t1, t0
+    addu         t3, t1, s6
+    srl          t0, t3, 2       // t2 = pixval1
+    xori         s6, s6, 3
+    addu         t2, t1, s6
+    srl          t1, t2, 2       // t3 = pixval2
+    blez         s2, 6f
+     append      t1, t0, 8
+5:
+    ush          t1, 0(t4)
+    addiu        s2, -1
+    bgtz         s2, 5b
+     addiu       t4, 2
+6:
+    beqz         t9, 7f
+     nop
+    sb           t0, 0(t4)
+7:
+    addiu        s1, 4
+    addiu        a2, -1
+    bnez         a2, 0b
+     addiu       s0, 8
+8:
+    RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    j            ra
+     nop
+END(jsimd_h2v2_downsample_mips_dspr2)
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_h2v2_smooth_downsample_mips_dspr2)
+/*
+ * a0     - input_data
+ * a1     - output_data
+ * a2     - compptr->v_samp_factor
+ * a3     - cinfo->max_v_samp_factor
+ * 16(sp) - cinfo->smoothing_factor
+ * 20(sp) - compptr->width_in_blocks
+ * 24(sp) - cinfo->image_width
+ */
+
+    .set at
+
+    SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    lw          s7, 52(sp)      // compptr->width_in_blocks
+    lw          s0, 56(sp)      // cinfo->image_width
+    lw          s6, 48(sp)      // cinfo->smoothing_factor
+    sll         s7, 3           // output_cols = width_in_blocks * DCTSIZE
+    sll         v0, s7, 1
+    subu        v0, v0, s0
+    blez        v0, 2f
+    move        v1, zero
+    addiu       t0, a3, 2       // t0 = cinfo->max_v_samp_factor + 2
+0:
+    addiu       t1, a0, -4
+    sll         t2, v1, 2
+    lwx         t1, t2(t1)
+    move        t3, v0
+    addu        t1, t1, s0
+    lbu         t2, -1(t1)
+1:
+    addiu       t3, t3, -1
+    sb          t2, 0(t1)
+    bgtz        t3, 1b
+    addiu       t1, t1, 1
+    addiu       v1, v1, 1
+    bne         v1, t0, 0b
+    nop
+2:
+    li          v0, 80
+    mul         v0, s6, v0
+    li          v1, 16384
+    move        t4, zero
+    move        t5, zero
+    subu        t6, v1, v0      // t6 = 16384 - tmp_smoot_f * 80
+    sll         t7, s6, 4       // t7 = tmp_smoot_f * 16
+3:
+/* Special case for first column: pretend column -1 is same as column 0 */
+    sll         v0, t4, 2
+    lwx         t8, v0(a1)      //  outptr = output_data[outrow]
+    sll         v1, t5, 2
+    addiu       t9, v1, 4
+    addiu       s0, v1, -4
+    addiu       s1, v1, 8
+    lwx         s2, v1(a0)      // inptr0 = input_data[inrow]
+    lwx         t9, t9(a0)      // inptr1 = input_data[inrow+1]
+    lwx         s0, s0(a0)      // above_ptr = input_data[inrow-1]
+    lwx         s1, s1(a0)      // below_ptr = input_data[inrow+2]
+    lh          v0, 0(s2)
+    lh          v1, 0(t9)
+    lh          t0, 0(s0)
+    lh          t1, 0(s1)
+    ins         v0, v1, 16, 16
+    ins         t0, t1, 16, 16
+    raddu.w.qb  t2, v0
+    raddu.w.qb  s3, t0
+    lbu         v0, 0(s2)
+    lbu         v1, 2(s2)
+    lbu         t0, 0(t9)
+    lbu         t1, 2(t9)
+    addu        v0, v0, v1
+    mult        $ac1,t2, t6
+    addu        t0, t0, t1
+    lbu         t2, 2(s0)
+    addu        t0, t0, v0
+    lbu         t3, 2(s1)
+    addu        s3, t0, s3
+    lbu         v0, 0(s0)
+    lbu         t0, 0(s1)
+    sll         s3, s3, 1
+    addu        v0, v0, t2
+    addu        t0, t0, t3
+    addu        t0, t0, v0
+    addu        s3, t0, s3
+    madd        $ac1,s3, t7
+    extr_r.w    v0, $ac1, 16
+    addiu       t8, t8, 1
+    addiu       s2, s2, 2
+    addiu       t9, t9, 2
+    addiu       s0, s0, 2
+    addiu       s1, s1, 2
+    sb          v0, -1(t8)
+    addiu       s4, s7, -2
+    and         s4, s4, 3
+    addu        s5, s4, t8      //end adress
+4:
+    lh          v0, 0(s2)
+    lh          v1, 0(t9)
+    lh          t0, 0(s0)
+    lh          t1, 0(s1)
+    ins         v0, v1, 16, 16
+    ins         t0, t1, 16, 16
+    raddu.w.qb  t2, v0
+    raddu.w.qb  s3, t0
+    lbu         v0, -1(s2)
+    lbu         v1, 2(s2)
+    lbu         t0, -1(t9)
+    lbu         t1, 2(t9)
+    addu        v0, v0, v1
+    mult        $ac1, t2, t6
+    addu        t0, t0, t1
+    lbu         t2, 2(s0)
+    addu        t0, t0, v0
+    lbu         t3, 2(s1)
+    addu        s3, t0, s3
+    lbu         v0, -1(s0)
+    lbu         t0, -1(s1)
+    sll         s3, s3, 1
+    addu        v0, v0, t2
+    addu        t0, t0, t3
+    addu        t0, t0, v0
+    addu        s3, t0, s3
+    madd        $ac1, s3, t7
+    extr_r.w    t2, $ac1, 16
+    addiu       t8, t8, 1
+    addiu       s2, s2, 2
+    addiu       t9, t9, 2
+    addiu       s0, s0, 2
+    sb          t2, -1(t8)
+    bne         s5, t8, 4b
+    addiu       s1, s1, 2
+    addiu       s5, s7, -2
+    subu        s5, s5, s4
+    addu        s5, s5, t8      //end adress
+5:
+    lh          v0, 0(s2)
+    lh          v1, 0(t9)
+    lh          t0, 0(s0)
+    lh          t1, 0(s1)
+    ins         v0, v1, 16, 16
+    ins         t0, t1, 16, 16
+    raddu.w.qb  t2, v0
+    raddu.w.qb  s3, t0
+    lbu         v0, -1(s2)
+    lbu         v1, 2(s2)
+    lbu         t0, -1(t9)
+    lbu         t1, 2(t9)
+    addu        v0, v0, v1
+    mult        $ac1, t2, t6
+    addu        t0, t0, t1
+    lbu         t2, 2(s0)
+    addu        t0, t0, v0
+    lbu         t3, 2(s1)
+    addu        s3, t0, s3
+    lbu         v0, -1(s0)
+    lbu         t0, -1(s1)
+    sll         s3, s3, 1
+    addu        v0, v0, t2
+    addu        t0, t0, t3
+    lh          v1, 2(t9)
+    addu        t0, t0, v0
+    lh          v0, 2(s2)
+    addu        s3, t0, s3
+    lh          t0, 2(s0)
+    lh          t1, 2(s1)
+    madd        $ac1, s3, t7
+    extr_r.w    t2, $ac1, 16
+    ins         t0, t1, 16, 16
+    ins         v0, v1, 16, 16
+    raddu.w.qb  s3, t0
+    lbu         v1, 4(s2)
+    lbu         t0, 1(t9)
+    lbu         t1, 4(t9)
+    sb          t2, 0(t8)
+    raddu.w.qb  t3, v0
+    lbu         v0, 1(s2)
+    addu        t0, t0, t1
+    mult        $ac1, t3, t6
+    addu        v0, v0, v1
+    lbu         t2, 4(s0)
+    addu        t0, t0, v0
+    lbu         v0, 1(s0)
+    addu        s3, t0, s3
+    lbu         t0, 1(s1)
+    lbu         t3, 4(s1)
+    addu        v0, v0, t2
+    sll         s3, s3, 1
+    addu        t0, t0, t3
+    lh          v1, 4(t9)
+    addu        t0, t0, v0
+    lh          v0, 4(s2)
+    addu        s3, t0, s3
+    lh          t0, 4(s0)
+    lh          t1, 4(s1)
+    madd        $ac1, s3, t7
+    extr_r.w    t2, $ac1, 16
+    ins         t0, t1, 16, 16
+    ins         v0, v1, 16, 16
+    raddu.w.qb  s3, t0
+    lbu         v1, 6(s2)
+    lbu         t0, 3(t9)
+    lbu         t1, 6(t9)
+    sb          t2, 1(t8)
+    raddu.w.qb  t3, v0
+    lbu         v0, 3(s2)
+    addu        t0, t0,t1
+    mult        $ac1, t3, t6
+    addu        v0, v0, v1
+    lbu         t2, 6(s0)
+    addu        t0, t0, v0
+    lbu         v0, 3(s0)
+    addu        s3, t0, s3
+    lbu         t0, 3(s1)
+    lbu         t3, 6(s1)
+    addu        v0, v0, t2
+    sll         s3, s3, 1
+    addu        t0, t0, t3
+    lh          v1, 6(t9)
+    addu        t0, t0, v0
+    lh          v0, 6(s2)
+    addu        s3, t0, s3
+    lh          t0, 6(s0)
+    lh          t1, 6(s1)
+    madd        $ac1, s3, t7
+    extr_r.w    t3, $ac1, 16
+    ins         t0, t1, 16, 16
+    ins         v0, v1, 16, 16
+    raddu.w.qb  s3, t0
+    lbu         v1, 8(s2)
+    lbu         t0, 5(t9)
+    lbu         t1, 8(t9)
+    sb          t3, 2(t8)
+    raddu.w.qb  t2, v0
+    lbu         v0, 5(s2)
+    addu        t0, t0, t1
+    mult        $ac1, t2, t6
+    addu        v0, v0, v1
+    lbu         t2, 8(s0)
+    addu        t0, t0, v0
+    lbu         v0, 5(s0)
+    addu        s3, t0, s3
+    lbu         t0, 5(s1)
+    lbu         t3, 8(s1)
+    addu        v0, v0, t2
+    sll         s3, s3, 1
+    addu        t0, t0, t3
+    addiu       t8, t8, 4
+    addu        t0, t0, v0
+    addiu       s2, s2, 8
+    addu        s3, t0, s3
+    addiu       t9, t9, 8
+    madd        $ac1, s3, t7
+    extr_r.w    t1, $ac1, 16
+    addiu       s0, s0, 8
+    addiu       s1, s1, 8
+    bne         s5, t8, 5b
+    sb          t1, -1(t8)
+/* Special case for last column */
+    lh          v0, 0(s2)
+    lh          v1, 0(t9)
+    lh          t0, 0(s0)
+    lh          t1, 0(s1)
+    ins         v0, v1, 16, 16
+    ins         t0, t1, 16, 16
+    raddu.w.qb  t2, v0
+    raddu.w.qb  s3, t0
+    lbu         v0, -1(s2)
+    lbu         v1, 1(s2)
+    lbu         t0, -1(t9)
+    lbu         t1, 1(t9)
+    addu        v0, v0, v1
+    mult        $ac1, t2, t6
+    addu        t0, t0, t1
+    lbu         t2, 1(s0)
+    addu        t0, t0, v0
+    lbu         t3, 1(s1)
+    addu        s3, t0, s3
+    lbu         v0, -1(s0)
+    lbu         t0, -1(s1)
+    sll         s3, s3, 1
+    addu        v0, v0, t2
+    addu        t0, t0, t3
+    addu        t0, t0, v0
+    addu        s3, t0, s3
+    madd        $ac1, s3, t7
+    extr_r.w    t0, $ac1, 16
+    addiu       t5, t5, 2
+    sb          t0, 0(t8)
+    addiu       t4, t4, 1
+    bne         t4, a2, 3b
+    addiu       t5, t5, 2
+
+    RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    j           ra
+     nop
+
+END(jsimd_h2v2_smooth_downsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_int_upsample_mips_dspr2)
+/*
+ * a0     - upsample->h_expand[compptr->component_index]
+ * a1     - upsample->v_expand[compptr->component_index]
+ * a2     - input_data
+ * a3     - output_data_ptr
+ * 16(sp) - cinfo->output_width
+ * 20(sp) - cinfo->max_v_samp_factor
+ */
+    .set at
+
+    SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
+
+    lw      s0, 0(a3)    // s0 = output_data
+    lw      s1, 32(sp)   // s1 = cinfo->output_width
+    lw      s2, 36(sp)   // s2 = cinfo->max_v_samp_factor
+    li      t6, 0        // t6 = inrow
+    beqz    s2, 10f
+     li     s3, 0        // s3 = outrow
+0:
+    addu    t0, a2, t6
+    addu    t7, s0, s3
+    lw      t3, 0(t0)    // t3 = inptr
+    lw      t8, 0(t7)    // t8 = outptr
+    beqz    s1, 4f
+     addu   t5, t8, s1   // t5 = outend
+1:
+    lb      t2, 0(t3)    // t2 = invalue = *inptr++
+    addiu   t3, 1
+    beqz    a0, 3f
+     move   t0, a0       // t0 = h_expand
+2:
+    sb      t2, 0(t8)
+    addiu   t0, -1
+    bgtz    t0, 2b
+     addiu  t8, 1
+3:
+    bgt     t5, t8, 1b
+     nop
+4:
+    addiu   t9, a1, -1   // t9 = v_expand - 1
+    blez    t9, 9f
+     nop
+5:
+    lw      t3, 0(s0)
+    lw      t4, 4(s0)
+    subu    t0, s1, 0xF
+    blez    t0, 7f
+     addu   t5, t3, s1   // t5 = end address
+    andi    t7, s1, 0xF  // t7 = residual
+    subu    t8, t5, t7
+6:
+    ulw     t0, 0(t3)
+    ulw     t1, 4(t3)
+    ulw     t2, 8(t3)
+    usw     t0, 0(t4)
+    ulw     t0, 12(t3)
+    usw     t1, 4(t4)
+    usw     t2, 8(t4)
+    usw     t0, 12(t4)
+    addiu   t3, 16
+    bne     t3, t8, 6b
+     addiu  t4, 16
+    beqz    t7, 8f
+     nop
+7:
+    lbu     t0, 0(t3)
+    sb      t0, 0(t4)
+    addiu   t3, 1
+    bne     t3, t5, 7b
+     addiu  t4, 1
+8:
+    addiu   t9, -1
+    bgtz    t9, 5b
+     addiu  s0, 8
+9:
+    addu    s3, s3, a1
+    bne     s3, s2, 0b
+     addiu  t6, 1
+10:
+    RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
+
+    j       ra
+     nop
+END(jsimd_int_upsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_h2v1_upsample_mips_dspr2)
+/*
+ * a0     - cinfo->max_v_samp_factor
+ * a1     - cinfo->output_width
+ * a2     - input_data
+ * a3     - output_data_ptr
+ */
+    lw      t7, 0(a3)       // t7 = output_data
+    andi    t8, a1, 0xf     // t8 = residual
+    sll     t0, a0, 2
+    blez    a0, 4f
+     addu   t9, t7, t0      // t9 = output_data end address
+0:
+    lw      t5, 0(t7)       // t5 = outptr
+    lw      t6, 0(a2)       // t6 = inptr
+    addu    t3, t5, a1      // t3 = outptr + output_width (end address)
+    subu    t3, t8          // t3 = end address - residual
+    beq     t5, t3, 2f
+     move   t4, t8
+1:
+    ulw     t0, 0(t6)       // t0 = |P3|P2|P1|P0|
+    ulw     t2, 4(t6)       // t2 = |P7|P6|P5|P4|
+    srl     t1, t0, 16      // t1 = |X|X|P3|P2|
+    ins     t0, t0, 16, 16  // t0 = |P1|P0|P1|P0|
+    ins     t1, t1, 16, 16  // t1 = |P3|P2|P3|P2|
+    ins     t0, t0, 8, 16   // t0 = |P1|P1|P0|P0|
+    ins     t1, t1, 8, 16   // t1 = |P3|P3|P2|P2|
+    usw     t0, 0(t5)
+    usw     t1, 4(t5)
+    srl     t0, t2, 16      // t0 = |X|X|P7|P6|
+    ins     t2, t2, 16, 16  // t2 = |P5|P4|P5|P4|
+    ins     t0, t0, 16, 16  // t0 = |P7|P6|P7|P6|
+    ins     t2, t2, 8, 16   // t2 = |P5|P5|P4|P4|
+    ins     t0, t0, 8, 16   // t0 = |P7|P7|P6|P6|
+    usw     t2, 8(t5)
+    usw     t0, 12(t5)
+    addiu   t5, 16
+    bne     t5, t3, 1b
+     addiu  t6, 8
+    beqz    t8, 3f
+     move   t4, t8
+2:
+    lbu     t1, 0(t6)
+    sb      t1, 0(t5)
+    sb      t1, 1(t5)
+    addiu   t4, -2
+    addiu   t6, 1
+    bgtz    t4, 2b
+     addiu  t5, 2
+3:
+    addiu   t7, 4
+    bne     t9, t7, 0b
+     addiu  a2, 4
+4:
+    j       ra
+     nop
+END(jsimd_h2v1_upsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_h2v2_upsample_mips_dspr2)
+/*
+ * a0     - cinfo->max_v_samp_factor
+ * a1     - cinfo->output_width
+ * a2     - input_data
+ * a3     - output_data_ptr
+ */
+    lw      t7, 0(a3)
+    blez    a0, 7f
+     andi   t9, a1, 0xf     // t9 = residual
+0:
+    lw      t6, 0(a2)       // t6 = inptr
+    lw      t5, 0(t7)       // t5 = outptr
+    addu    t8, t5, a1      // t8 = outptr end address
+    subu    t8, t9          // t8 = end address - residual
+    beq     t5, t8, 2f
+     move   t4, t9
+1:
+    ulw     t0, 0(t6)
+    srl     t1, t0, 16
+    ins     t0, t0, 16, 16
+    ins     t0, t0, 8, 16
+    ins     t1, t1, 16, 16
+    ins     t1, t1, 8, 16
+    ulw     t2, 4(t6)
+    usw     t0, 0(t5)
+    usw     t1, 4(t5)
+    srl     t3, t2, 16
+    ins     t2, t2, 16, 16
+    ins     t2, t2, 8, 16
+    ins     t3, t3, 16, 16
+    ins     t3, t3, 8, 16
+    usw     t2, 8(t5)
+    usw     t3, 12(t5)
+    addiu   t5, 16
+    bne     t5, t8, 1b
+     addiu  t6, 8
+    beqz    t9, 3f
+     move   t4, t9
+2:
+    lbu     t0, 0(t6)
+    sb      t0, 0(t5)
+    sb      t0, 1(t5)
+    addiu   t4, -2
+    addiu   t6, 1
+    bgtz    t4, 2b
+     addiu  t5, 2
+3:
+    ulw     t6, 0(t7)       // t6 = outptr
+    ulw     t5, 4(t7)       // t5 = outptr[1]
+    addu    t4, t6, a1      // t4 = new end address
+    subu    t8, t4, t9
+    beqz    t8, 5f
+     nop
+4:
+    ulw     t0, 0(t6)
+    ulw     t1, 4(t6)
+    ulw     t2, 8(t6)
+    usw     t0, 0(t5)
+    ulw     t0, 12(t6)
+    usw     t1, 4(t5)
+    usw     t2, 8(t5)
+    usw     t0, 12(t5)
+    addiu   t6, 16
+    bne     t6, t8, 4b
+     addiu  t5, 16
+    beqz    t9, 6f
+     nop
+5:
+    lbu     t0, 0(t6)
+    sb      t0, 0(t5)
+    addiu   t6, 1
+    bne     t6, t4, 5b
+     addiu  t5, 1
+6:
+    addiu   t7, 8
+    addiu   a0, -2
+    bgtz    a0, 0b
+     addiu  a2, 4
+7:
+    j       ra
+     nop
+END(jsimd_h2v2_upsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_islow_mips_dspr2)
+/*
+ * a0     - coef_block
+ * a1     - compptr->dcttable
+ * a2     - output
+ * a3     - range_limit
+ */
+
+    SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    addiu     sp, sp, -256
+    move      v0, sp
+    addiu     v1, zero, 8      // v1 = DCTSIZE = 8
+1:
+    lh        s4, 32(a0)       // s4 = inptr[16]
+    lh        s5, 64(a0)       // s5 = inptr[32]
+    lh        s6, 96(a0)       // s6 = inptr[48]
+    lh        t1, 112(a0)      // t1 = inptr[56]
+    lh        t7, 16(a0)       // t7 = inptr[8]
+    lh        t5, 80(a0)       // t5 = inptr[40]
+    lh        t3, 48(a0)       // t3 = inptr[24]
+    or        s4, s4, t1
+    or        s4, s4, t3
+    or        s4, s4, t5
+    or        s4, s4, t7
+    or        s4, s4, s5
+    or        s4, s4, s6
+    bnez      s4, 2f
+     addiu    v1, v1, -1
+    lh        s5, 0(a1)        // quantptr[DCTSIZE*0]
+    lh        s6, 0(a0)        // inptr[DCTSIZE*0]
+    mul       s5, s5, s6       // DEQUANTIZE(inptr[0], quantptr[0])
+    sll       s5, s5, 2
+    sw        s5, 0(v0)
+    sw        s5, 32(v0)
+    sw        s5, 64(v0)
+    sw        s5, 96(v0)
+    sw        s5, 128(v0)
+    sw        s5, 160(v0)
+    sw        s5, 192(v0)
+    b         3f
+     sw       s5, 224(v0)
+2:
+    lh        t0, 112(a1)
+    lh        t2, 48(a1)
+    lh        t4, 80(a1)
+    lh        t6, 16(a1)
+    mul       t0, t0, t1       // DEQUANTIZE(inptr[DCTSIZE*7],quant[DCTSIZE*7])
+    mul       t1, t2, t3       // DEQUANTIZE(inptr[DCTSIZE*3],quant[DCTSIZE*3])
+    mul       t2, t4, t5       // DEQUANTIZE(inptr[DCTSIZE*5],quant[DCTSIZE*5])
+    mul       t3, t6, t7       // DEQUANTIZE(inptr[DCTSIZE*1],quant[DCTSIZE*1])
+    lh        t4, 32(a1)
+    lh        t5, 32(a0)
+    lh        t6, 96(a1)
+    lh        t7, 96(a0)
+    addu      s0, t0, t1       // z3 = tmp0 + tmp2
+    addu      s1, t1, t2       // z2 = tmp1 + tmp2
+    addu      s2, t2, t3       // z4 = tmp1 + tmp3
+    addu      s3, s0, s2       // z3 + z4
+    addiu     t9, zero, 9633   // FIX_1_175875602
+    mul       s3, s3, t9       // z5 = MULTIPLY(z3 + z4, FIX_1_175875602)
+    addu      t8, t0, t3       // z1 = tmp0 + tmp3
+    addiu     t9, zero, 2446   // FIX_0_298631336
+    mul       t0, t0, t9       // tmp0 = MULTIPLY(tmp0, FIX_0_298631336)
+    addiu     t9, zero, 16819  // FIX_2_053119869
+    mul       t2, t2, t9       // tmp1 = MULTIPLY(tmp1, FIX_2_053119869)
+    addiu     t9, zero, 25172  // FIX_3_072711026
+    mul       t1, t1, t9       // tmp2 = MULTIPLY(tmp2, FIX_3_072711026)
+    addiu     t9, zero, 12299  // FIX_1_501321110
+    mul       t3, t3, t9       // tmp3 = MULTIPLY(tmp3, FIX_1_501321110)
+    addiu     t9, zero, 16069  // FIX_1_961570560
+    mul       s0, s0, t9       // -z3 = MULTIPLY(z3, FIX_1_961570560)
+    addiu     t9, zero, 3196   // FIX_0_390180644
+    mul       s2, s2, t9       // -z4 = MULTIPLY(z4, FIX_0_390180644)
+    addiu     t9, zero, 7373   // FIX_0_899976223
+    mul       t8, t8, t9       // -z1 = MULTIPLY(z1, FIX_0_899976223)
+    addiu     t9, zero, 20995  // FIX_2_562915447
+    mul       s1, s1, t9       // -z2 = MULTIPLY(z2, FIX_2_562915447)
+    subu      s0, s3, s0       // z3 += z5
+    addu      t0, t0, s0       // tmp0 += z3
+    addu      t1, t1, s0       // tmp2 += z3
+    subu      s2, s3, s2       // z4 += z5
+    addu      t2, t2, s2       // tmp1 += z4
+    addu      t3, t3, s2       // tmp3 += z4
+    subu      t0, t0, t8       // tmp0 += z1
+    subu      t1, t1, s1       // tmp2 += z2
+    subu      t2, t2, s1       // tmp1 += z2
+    subu      t3, t3, t8       // tmp3 += z1
+    mul       s0, t4, t5       // DEQUANTIZE(inptr[DCTSIZE*2],quant[DCTSIZE*2])
+    addiu     t9, zero, 6270   // FIX_0_765366865
+    mul       s1, t6, t7       // DEQUANTIZE(inptr[DCTSIZE*6],quant[DCTSIZE*6])
+    lh        t4, 0(a1)
+    lh        t5, 0(a0)
+    lh        t6, 64(a1)
+    lh        t7, 64(a0)
+    mul       s2, t9, s0       // MULTIPLY(z2, FIX_0_765366865)
+    mul       t5, t4, t5       // DEQUANTIZE(inptr[DCTSIZE*0],quant[DCTSIZE*0])
+    mul       t6, t6, t7       // DEQUANTIZE(inptr[DCTSIZE*4],quant[DCTSIZE*4])
+    addiu     t9, zero, 4433   // FIX_0_541196100
+    addu      s3, s0, s1       // z2 + z3
+    mul       s3, s3, t9       // z1 = MULTIPLY(z2 + z3, FIX_0_541196100)
+    addiu     t9, zero, 15137  // FIX_1_847759065
+    mul       t8, s1, t9       // MULTIPLY(z3, FIX_1_847759065)
+    addu      t4, t5, t6
+    subu      t5, t5, t6
+    sll       t4, t4, 13       // tmp0 = (z2 + z3) << CONST_BITS
+    sll       t5, t5, 13       // tmp1 = (z2 - z3) << CONST_BITS
+    addu      t7, s3, s2       // tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865)
+    subu      t6, s3, t8       // tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065)
+    addu      s0, t4, t7
+    subu      s1, t4, t7
+    addu      s2, t5, t6
+    subu      s3, t5, t6
+    addu      t4, s0, t3
+    subu      s0, s0, t3
+    addu      t3, s2, t1
+    subu      s2, s2, t1
+    addu      t1, s3, t2
+    subu      s3, s3, t2
+    addu      t2, s1, t0
+    subu      s1, s1, t0
+    shra_r.w  t4, t4, 11
+    shra_r.w  t3, t3, 11
+    shra_r.w  t1, t1, 11
+    shra_r.w  t2, t2, 11
+    shra_r.w  s1, s1, 11
+    shra_r.w  s3, s3, 11
+    shra_r.w  s2, s2, 11
+    shra_r.w  s0, s0, 11
+    sw        t4, 0(v0)
+    sw        t3, 32(v0)
+    sw        t1, 64(v0)
+    sw        t2, 96(v0)
+    sw        s1, 128(v0)
+    sw        s3, 160(v0)
+    sw        s2, 192(v0)
+    sw        s0, 224(v0)
+3:
+    addiu     a1, a1, 2
+    addiu     a0, a0, 2
+    bgtz      v1, 1b
+     addiu    v0, v0, 4
+    move      v0, sp
+    addiu     v1, zero, 8
+4:
+    lw        t0, 8(v0)        // z2 = (INT32) wsptr[2]
+    lw        t1, 24(v0)       // z3 = (INT32) wsptr[6]
+    lw        t2, 0(v0)        // (INT32) wsptr[0]
+    lw        t3, 16(v0)       // (INT32) wsptr[4]
+    lw        s4, 4(v0)        // (INT32) wsptr[1]
+    lw        s5, 12(v0)       // (INT32) wsptr[3]
+    lw        s6, 20(v0)       // (INT32) wsptr[5]
+    lw        s7, 28(v0)       // (INT32) wsptr[7]
+    or        s4, s4, t0
+    or        s4, s4, t1
+    or        s4, s4, t3
+    or        s4, s4, s7
+    or        s4, s4, s5
+    or        s4, s4, s6
+    bnez      s4, 5f
+     addiu    v1, v1, -1
+    shra_r.w  s5, t2, 5
+    andi      s5, s5, 0x3ff
+    lbux      s5, s5(a3)
+    lw        s1, 0(a2)
+    replv.qb  s5, s5
+    usw       s5, 0(s1)
+    usw       s5, 4(s1)
+    b         6f
+     nop
+5:
+    addu      t4, t0, t1       // z2 + z3
+    addiu     t8, zero, 4433   // FIX_0_541196100
+    mul       t5, t4, t8       // z1 = MULTIPLY(z2 + z3, FIX_0_541196100)
+    addiu     t8, zero, 15137  // FIX_1_847759065
+    mul       t1, t1, t8       // MULTIPLY(z3, FIX_1_847759065)
+    addiu     t8, zero, 6270   // FIX_0_765366865
+    mul       t0, t0, t8       // MULTIPLY(z2, FIX_0_765366865)
+    addu      t4, t2, t3       // (INT32) wsptr[0] + (INT32) wsptr[4]
+    subu      t2, t2, t3       // (INT32) wsptr[0] - (INT32) wsptr[4]
+    sll       t4, t4, 13       // tmp0 = ((wsptr[0] + wsptr[4]) << CONST_BITS
+    sll       t2, t2, 13       // tmp1 = ((wsptr[0] - wsptr[4]) << CONST_BITS
+    subu      t1, t5, t1       // tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065)
+    subu      t3, t2, t1       // tmp12 = tmp1 - tmp2
+    addu      t2, t2, t1       // tmp11 = tmp1 + tmp2
+    addu      t5, t5, t0       // tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865)
+    subu      t1, t4, t5       // tmp13 = tmp0 - tmp3
+    addu      t0, t4, t5       // tmp10 = tmp0 + tmp3
+    lw        t4, 28(v0)       // tmp0 = (INT32) wsptr[7]
+    lw        t6, 12(v0)       // tmp2 = (INT32) wsptr[3]
+    lw        t5, 20(v0)       // tmp1 = (INT32) wsptr[5]
+    lw        t7, 4(v0)        // tmp3 = (INT32) wsptr[1]
+    addu      s0, t4, t6       // z3 = tmp0 + tmp2
+    addiu     t8, zero, 9633   // FIX_1_175875602
+    addu      s1, t5, t7       // z4 = tmp1 + tmp3
+    addu      s2, s0, s1       // z3 + z4
+    mul       s2, s2, t8       // z5 = MULTIPLY(z3 + z4, FIX_1_175875602)
+    addu      s3, t4, t7       // z1 = tmp0 + tmp3
+    addu      t9, t5, t6       // z2 = tmp1 + tmp2
+    addiu     t8, zero, 16069  // FIX_1_961570560
+    mul       s0, s0, t8       // -z3 = MULTIPLY(z3, FIX_1_961570560)
+    addiu     t8, zero, 3196   // FIX_0_390180644
+    mul       s1, s1, t8       // -z4 = MULTIPLY(z4, FIX_0_390180644)
+    addiu     t8, zero, 2446   // FIX_0_298631336
+    mul       t4, t4, t8       // tmp0 = MULTIPLY(tmp0, FIX_0_298631336)
+    addiu     t8, zero, 7373   // FIX_0_899976223
+    mul       s3, s3, t8       // -z1 = MULTIPLY(z1, FIX_0_899976223)
+    addiu     t8, zero, 16819  // FIX_2_053119869
+    mul       t5, t5, t8       // tmp1 = MULTIPLY(tmp1, FIX_2_053119869)
+    addiu     t8, zero, 20995  // FIX_2_562915447
+    mul       t9, t9, t8       // -z2 = MULTIPLY(z2, FIX_2_562915447)
+    addiu     t8, zero, 25172  // FIX_3_072711026
+    mul       t6, t6, t8       // tmp2 = MULTIPLY(tmp2, FIX_3_072711026)
+    addiu     t8, zero, 12299  // FIX_1_501321110
+    mul       t7, t7, t8       // tmp3 = MULTIPLY(tmp3, FIX_1_501321110)
+    subu      s0, s2, s0       // z3 += z5
+    subu      s1, s2, s1       // z4 += z5
+    addu      t4, t4, s0
+    subu      t4, t4, s3       // tmp0
+    addu      t5, t5, s1
+    subu      t5, t5, t9       // tmp1
+    addu      t6, t6, s0
+    subu      t6, t6, t9       // tmp2
+    addu      t7, t7, s1
+    subu      t7, t7, s3       // tmp3
+    addu      s0, t0, t7
+    subu      t0, t0, t7
+    addu      t7, t2, t6
+    subu      t2, t2, t6
+    addu      t6, t3, t5
+    subu      t3, t3, t5
+    addu      t5, t1, t4
+    subu      t1, t1, t4
+    shra_r.w  s0, s0, 18
+    shra_r.w  t7, t7, 18
+    shra_r.w  t6, t6, 18
+    shra_r.w  t5, t5, 18
+    shra_r.w  t1, t1, 18
+    shra_r.w  t3, t3, 18
+    shra_r.w  t2, t2, 18
+    shra_r.w  t0, t0, 18
+    andi      s0, s0, 0x3ff
+    andi      t7, t7, 0x3ff
+    andi      t6, t6, 0x3ff
+    andi      t5, t5, 0x3ff
+    andi      t1, t1, 0x3ff
+    andi      t3, t3, 0x3ff
+    andi      t2, t2, 0x3ff
+    andi      t0, t0, 0x3ff
+    lw        s1, 0(a2)
+    lbux      s0, s0(a3)
+    lbux      t7, t7(a3)
+    lbux      t6, t6(a3)
+    lbux      t5, t5(a3)
+    lbux      t1, t1(a3)
+    lbux      t3, t3(a3)
+    lbux      t2, t2(a3)
+    lbux      t0, t0(a3)
+    sb        s0, 0(s1)
+    sb        t7, 1(s1)
+    sb        t6, 2(s1)
+    sb        t5, 3(s1)
+    sb        t1, 4(s1)
+    sb        t3, 5(s1)
+    sb        t2, 6(s1)
+    sb        t0, 7(s1)
+6:
+    addiu     v0, v0, 32
+    bgtz      v1, 4b
+     addiu    a2, a2, 4
+    addiu     sp, sp, 256
+
+    RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    j         ra
+     nop
+
+END(jsimd_idct_islow_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_ifast_cols_mips_dspr2)
+/*
+ * a0     - inptr
+ * a1     - quantptr
+ * a2     - wsptr
+ * a3     - mips_idct_ifast_coefs
+ */
+
+    SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    addiu          t9, a0, 16            // end address
+    or             AT, a3, zero
+
+0:
+    lw             s0, 0(a1)             // quantptr[DCTSIZE*0]
+    lw             t0, 0(a0)             // inptr[DCTSIZE*0]
+    lw             t1, 16(a0)            // inptr[DCTSIZE*1]
+    muleq_s.w.phl  v0, t0, s0            // tmp0 ...
+    lw             t2, 32(a0)            // inptr[DCTSIZE*2]
+    lw             t3, 48(a0)            // inptr[DCTSIZE*3]
+    lw             t4, 64(a0)            // inptr[DCTSIZE*4]
+    lw             t5, 80(a0)            // inptr[DCTSIZE*5]
+    muleq_s.w.phr  t0, t0, s0            // ... tmp0 ...
+    lw             t6, 96(a0)            // inptr[DCTSIZE*6]
+    lw             t7, 112(a0)           // inptr[DCTSIZE*7]
+    or             s4, t1, t2
+    or             s5, t3, t4
+    bnez           s4, 1f
+     ins           t0, v0, 16, 16        // ... tmp0
+    bnez           s5, 1f
+     or            s6, t5, t6
+    or             s6, s6, t7
+    bnez           s6, 1f
+     sw            t0, 0(a2)             // wsptr[DCTSIZE*0]
+    sw             t0, 16(a2)            // wsptr[DCTSIZE*1]
+    sw             t0, 32(a2)            // wsptr[DCTSIZE*2]
+    sw             t0, 48(a2)            // wsptr[DCTSIZE*3]
+    sw             t0, 64(a2)            // wsptr[DCTSIZE*4]
+    sw             t0, 80(a2)            // wsptr[DCTSIZE*5]
+    sw             t0, 96(a2)            // wsptr[DCTSIZE*6]
+    sw             t0, 112(a2)           // wsptr[DCTSIZE*7]
+    addiu          a0, a0, 4
+    b              2f
+     addiu         a1, a1, 4
+
+1:
+    lw             s1, 32(a1)            // quantptr[DCTSIZE*2]
+    lw             s2, 64(a1)            // quantptr[DCTSIZE*4]
+    muleq_s.w.phl  v0, t2, s1            // tmp1 ...
+    muleq_s.w.phr  t2, t2, s1            // ... tmp1 ...
+    lw             s0, 16(a1)            // quantptr[DCTSIZE*1]
+    lw             s1, 48(a1)            // quantptr[DCTSIZE*3]
+    lw             s3, 96(a1)            // quantptr[DCTSIZE*6]
+    muleq_s.w.phl  v1, t4, s2            // tmp2 ...
+    muleq_s.w.phr  t4, t4, s2            // ... tmp2 ...
+    lw             s2, 80(a1)            // quantptr[DCTSIZE*5]
+    lw             t8, 4(AT)             // FIX(1.414213562)
+    ins            t2, v0, 16, 16        // ... tmp1
+    muleq_s.w.phl  v0, t6, s3            // tmp3 ...
+    muleq_s.w.phr  t6, t6, s3            // ... tmp3 ...
+    ins            t4, v1, 16, 16        // ... tmp2
+    addq.ph        s4, t0, t4            // tmp10
+    subq.ph        s5, t0, t4            // tmp11
+    ins            t6, v0, 16, 16        // ... tmp3
+    subq.ph        s6, t2, t6            // tmp12 ...
+    addq.ph        s7, t2, t6            // tmp13
+    mulq_s.ph      s6, s6, t8            // ... tmp12 ...
+    addq.ph        t0, s4, s7            // tmp0
+    subq.ph        t6, s4, s7            // tmp3
+    muleq_s.w.phl  v0, t1, s0            // tmp4 ...
+    muleq_s.w.phr  t1, t1, s0            // ... tmp4 ...
+    shll_s.ph      s6, s6, 1             // x2
+    lw             s3, 112(a1)           // quantptr[DCTSIZE*7]
+    subq.ph        s6, s6, s7            // ... tmp12
+    muleq_s.w.phl  v1, t7, s3            // tmp7 ...
+    muleq_s.w.phr  t7, t7, s3            // ... tmp7 ...
+    ins            t1, v0, 16, 16        // ... tmp4
+    addq.ph        t2, s5, s6            // tmp1
+    subq.ph        t4, s5, s6            // tmp2
+    muleq_s.w.phl  v0, t5, s2            // tmp6 ...
+    muleq_s.w.phr  t5, t5, s2            // ... tmp6 ...
+    ins            t7, v1, 16, 16        // ... tmp7
+    addq.ph        s5, t1, t7            // z11
+    subq.ph        s6, t1, t7            // z12
+    muleq_s.w.phl  v1, t3, s1            // tmp5 ...
+    muleq_s.w.phr  t3, t3, s1            // ... tmp5 ...
+    ins            t5, v0, 16, 16        // ... tmp6
+    ins            t3, v1, 16, 16        // ... tmp5
+    addq.ph        s7, t5, t3            // z13
+    subq.ph        v0, t5, t3            // z10
+    addq.ph        t7, s5, s7            // tmp7
+    subq.ph        s5, s5, s7            // tmp11 ...
+    addq.ph        v1, v0, s6            // z5 ...
+    mulq_s.ph      s5, s5, t8            // ... tmp11
+    lw             t8, 8(AT)             // FIX(1.847759065)
+    lw             s4, 0(AT)             // FIX(1.082392200)
+    addq.ph        s0, t0, t7
+    subq.ph        s1, t0, t7
+    mulq_s.ph      v1, v1, t8            // ... z5
+    shll_s.ph      s5, s5, 1             // x2
+    lw             t8, 12(AT)            // FIX(-2.613125930)
+    sw             s0, 0(a2)             // wsptr[DCTSIZE*0]
+    shll_s.ph      v0, v0, 1             // x4
+    mulq_s.ph      v0, v0, t8            // tmp12 ...
+    mulq_s.ph      s4, s6, s4            // tmp10 ...
+    shll_s.ph      v1, v1, 1             // x2
+    addiu          a0, a0, 4
+    addiu          a1, a1, 4
+    sw             s1, 112(a2)           // wsptr[DCTSIZE*7]
+    shll_s.ph      s6, v0, 1             // x4
+    shll_s.ph      s4, s4, 1             // x2
+    addq.ph        s6, s6, v1            // ... tmp12
+    subq.ph        t5, s6, t7            // tmp6
+    subq.ph        s4, s4, v1            // ... tmp10
+    subq.ph        t3, s5, t5            // tmp5
+    addq.ph        s2, t2, t5
+    addq.ph        t1, s4, t3            // tmp4
+    subq.ph        s3, t2, t5
+    sw             s2, 16(a2)            // wsptr[DCTSIZE*1]
+    sw             s3, 96(a2)            // wsptr[DCTSIZE*6]
+    addq.ph        v0, t4, t3
+    subq.ph        v1, t4, t3
+    sw             v0, 32(a2)            // wsptr[DCTSIZE*2]
+    sw             v1, 80(a2)            // wsptr[DCTSIZE*5]
+    addq.ph        v0, t6, t1
+    subq.ph        v1, t6, t1
+    sw             v0, 64(a2)            // wsptr[DCTSIZE*4]
+    sw             v1, 48(a2)            // wsptr[DCTSIZE*3]
+
+2:
+    bne            a0, t9, 0b
+     addiu         a2, a2, 4
+
+    RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    j              ra
+     nop
+
+END(jsimd_idct_ifast_cols_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_ifast_rows_mips_dspr2)
+/*
+ * a0     - wsptr
+ * a1     - output_buf
+ * a2     - output_col
+ * a3     - mips_idct_ifast_coefs
+ */
+
+    SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3
+
+    addiu         t9, a0, 128        // end address
+    lui           s8, 0x8080
+    ori           s8, s8, 0x8080
+
+0:
+    lw            AT, 36(sp)         // restore $a3 (mips_idct_ifast_coefs)
+    lw            t0, 0(a0)          // wsptr[DCTSIZE*0+0/1]  b a
+    lw            s0, 16(a0)         // wsptr[DCTSIZE*1+0/1]  B A
+    lw            t2, 4(a0)          // wsptr[DCTSIZE*0+2/3]  d c
+    lw            s2, 20(a0)         // wsptr[DCTSIZE*1+2/3]  D C
+    lw            t4, 8(a0)          // wsptr[DCTSIZE*0+4/5]  f e
+    lw            s4, 24(a0)         // wsptr[DCTSIZE*1+4/5]  F E
+    lw            t6, 12(a0)         // wsptr[DCTSIZE*0+6/7]  h g
+    lw            s6, 28(a0)         // wsptr[DCTSIZE*1+6/7]  H G
+    precrq.ph.w   t1, s0, t0         // B b
+    ins           t0, s0, 16, 16     // A a
+    bnez          t1, 1f
+     or           s0, t2, s2
+    bnez          s0, 1f
+     or           s0, t4, s4
+    bnez          s0, 1f
+     or           s0, t6, s6
+    bnez          s0, 1f
+     shll_s.ph    s0, t0, 2          // A a
+    lw            a3, 0(a1)
+    lw            AT, 4(a1)
+    precrq.ph.w   t0, s0, s0         // A A
+    ins           s0, s0, 16, 16     // a a
+    addu          a3, a3, a2
+    addu          AT, AT, a2
+    precrq.qb.ph  t0, t0, t0         // A A A A
+    precrq.qb.ph  s0, s0, s0         // a a a a
+    addu.qb       s0, s0, s8
+    addu.qb       t0, t0, s8
+    sw            s0, 0(a3)
+    sw            s0, 4(a3)
+    sw            t0, 0(AT)
+    sw            t0, 4(AT)
+    addiu         a0, a0, 32
+    bne           a0, t9, 0b
+     addiu        a1, a1, 8
+    b             2f
+     nop
+
+1:
+    precrq.ph.w   t3, s2, t2
+    ins           t2, s2, 16, 16
+    precrq.ph.w   t5, s4, t4
+    ins           t4, s4, 16, 16
+    precrq.ph.w   t7, s6, t6
+    ins           t6, s6, 16, 16
+    lw            t8, 4(AT)          // FIX(1.414213562)
+    addq.ph       s4, t0, t4         // tmp10
+    subq.ph       s5, t0, t4         // tmp11
+    subq.ph       s6, t2, t6         // tmp12 ...
+    addq.ph       s7, t2, t6         // tmp13
+    mulq_s.ph     s6, s6, t8         // ... tmp12 ...
+    addq.ph       t0, s4, s7         // tmp0
+    subq.ph       t6, s4, s7         // tmp3
+    shll_s.ph     s6, s6, 1          // x2
+    subq.ph       s6, s6, s7         // ... tmp12
+    addq.ph       t2, s5, s6         // tmp1
+    subq.ph       t4, s5, s6         // tmp2
+    addq.ph       s5, t1, t7         // z11
+    subq.ph       s6, t1, t7         // z12
+    addq.ph       s7, t5, t3         // z13
+    subq.ph       v0, t5, t3         // z10
+    addq.ph       t7, s5, s7         // tmp7
+    subq.ph       s5, s5, s7         // tmp11 ...
+    addq.ph       v1, v0, s6         // z5 ...
+    mulq_s.ph     s5, s5, t8         // ... tmp11
+    lw            t8, 8(AT)          // FIX(1.847759065)
+    lw            s4, 0(AT)          // FIX(1.082392200)
+    addq.ph       s0, t0, t7         // tmp0 + tmp7
+    subq.ph       s7, t0, t7         // tmp0 - tmp7
+    mulq_s.ph     v1, v1, t8         // ... z5
+    lw            a3, 0(a1)
+    lw            t8, 12(AT)         // FIX(-2.613125930)
+    shll_s.ph     s5, s5, 1          // x2
+    addu          a3, a3, a2
+    shll_s.ph     v0, v0, 1          // x4
+    mulq_s.ph     v0, v0, t8         // tmp12 ...
+    mulq_s.ph     s4, s6, s4         // tmp10 ...
+    shll_s.ph     v1, v1, 1          // x2
+    addiu         a0, a0, 32
+    addiu         a1, a1, 8
+    shll_s.ph     s6, v0, 1          // x4
+    shll_s.ph     s4, s4, 1          // x2
+    addq.ph       s6, s6, v1         // ... tmp12
+    shll_s.ph     s0, s0, 2
+    subq.ph       t5, s6, t7         // tmp6
+    subq.ph       s4, s4, v1         // ... tmp10
+    subq.ph       t3, s5, t5         // tmp5
+    shll_s.ph     s7, s7, 2
+    addq.ph       t1, s4, t3         // tmp4
+    addq.ph       s1, t2, t5         // tmp1 + tmp6
+    subq.ph       s6, t2, t5         // tmp1 - tmp6
+    addq.ph       s2, t4, t3         // tmp2 + tmp5
+    subq.ph       s5, t4, t3         // tmp2 - tmp5
+    addq.ph       s4, t6, t1         // tmp3 + tmp4
+    subq.ph       s3, t6, t1         // tmp3 - tmp4
+    shll_s.ph     s1, s1, 2
+    shll_s.ph     s2, s2, 2
+    shll_s.ph     s3, s3, 2
+    shll_s.ph     s4, s4, 2
+    shll_s.ph     s5, s5, 2
+    shll_s.ph     s6, s6, 2
+    precrq.ph.w   t0, s1, s0         // B A
+    ins           s0, s1, 16, 16     // b a
+    precrq.ph.w   t2, s3, s2         // D C
+    ins           s2, s3, 16, 16     // d c
+    precrq.ph.w   t4, s5, s4         // F E
+    ins           s4, s5, 16, 16     // f e
+    precrq.ph.w   t6, s7, s6         // H G
+    ins           s6, s7, 16, 16     // h g
+    precrq.qb.ph  t0, t2, t0         // D C B A
+    precrq.qb.ph  s0, s2, s0         // d c b a
+    precrq.qb.ph  t4, t6, t4         // H G F E
+    precrq.qb.ph  s4, s6, s4         // h g f e
+    addu.qb       s0, s0, s8
+    addu.qb       s4, s4, s8
+    sw            s0, 0(a3)          // outptr[0/1/2/3]       d c b a
+    sw            s4, 4(a3)          // outptr[4/5/6/7]       h g f e
+    lw            a3, -4(a1)
+    addu.qb       t0, t0, s8
+    addu          a3, a3, a2
+    addu.qb       t4, t4, s8
+    sw            t0, 0(a3)          // outptr[0/1/2/3]       D C B A
+    bne           a0, t9, 0b
+     sw           t4, 4(a3)          // outptr[4/5/6/7]       H G F E
+
+2:
+
+    RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3
+
+    j             ra
+     nop
+
+END(jsimd_idct_ifast_rows_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_fdct_islow_mips_dspr2)
+/*
+ * a0     - data
+ */
+
+    SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+    lui       t0, 6437
+    ori       t0, 2260
+    lui       t1, 9633
+    ori       t1, 11363
+    lui       t2, 0xd39e
+    ori       t2, 0xe6dc
+    lui       t3, 0xf72d
+    ori       t3, 9633
+    lui       t4, 2261
+    ori       t4, 9633
+    lui       t5, 0xd39e
+    ori       t5, 6437
+    lui       t6, 9633
+    ori       t6, 0xd39d
+    lui       t7, 0xe6dc
+    ori       t7, 2260
+    lui       t8, 4433
+    ori       t8, 10703
+    lui       t9, 0xd630
+    ori       t9, 4433
+    li        s8, 8
+    move      a1, a0
+1:
+    lw        s0, 0(a1)     // tmp0 = 1|0
+    lw        s1, 4(a1)     // tmp1 = 3|2
+    lw        s2, 8(a1)     // tmp2 = 5|4
+    lw        s3, 12(a1)    // tmp3 = 7|6
+    packrl.ph s1, s1, s1    // tmp1 = 2|3
+    packrl.ph s3, s3, s3    // tmp3 = 6|7
+    subq.ph   s7, s1, s2    // tmp7 = 2-5|3-4 = t5|t4
+    subq.ph   s5, s0, s3    // tmp5 = 1-6|0-7 = t6|t7
+    mult      $0, $0        // ac0  = 0
+    dpa.w.ph  $ac0, s7, t0  // ac0 += t5*  6437 + t4*  2260
+    dpa.w.ph  $ac0, s5, t1  // ac0 += t6*  9633 + t7* 11363
+    mult      $ac1, $0, $0  // ac1  = 0
+    dpa.w.ph  $ac1, s7, t2  // ac1 += t5*-11362 + t4* -6436
+    dpa.w.ph  $ac1, s5, t3  // ac1 += t6* -2259 + t7*  9633
+    mult      $ac2, $0, $0  // ac2  = 0
+    dpa.w.ph  $ac2, s7, t4  // ac2 += t5*  2261 + t4*  9633
+    dpa.w.ph  $ac2, s5, t5  // ac2 += t6*-11362 + t7*  6437
+    mult      $ac3, $0, $0  // ac3  = 0
+    dpa.w.ph  $ac3, s7, t6  // ac3 += t5*  9633 + t4*-11363
+    dpa.w.ph  $ac3, s5, t7  // ac3 += t6* -6436 + t7*  2260
+    addq.ph   s6, s1, s2    // tmp6 = 2+5|3+4 = t2|t3
+    addq.ph   s4, s0, s3    // tmp4 = 1+6|0+7 = t1|t0
+    extr_r.w  s0, $ac0, 11  // tmp0 = (ac0 + 1024) >> 11
+    extr_r.w  s1, $ac1, 11  // tmp1 = (ac1 + 1024) >> 11
+    extr_r.w  s2, $ac2, 11  // tmp2 = (ac2 + 1024) >> 11
+    extr_r.w  s3, $ac3, 11  // tmp3 = (ac3 + 1024) >> 11
+    addq.ph   s5, s4, s6    // tmp5 = t1+t2|t0+t3 = t11|t10
+    subq.ph   s7, s4, s6    // tmp7 = t1-t2|t0-t3 = t12|t13
+    sh        s0, 2(a1)
+    sh        s1, 6(a1)
+    sh        s2, 10(a1)
+    sh        s3, 14(a1)
+    mult      $0, $0        // ac0  = 0
+    dpa.w.ph  $ac0, s7, t8  // ac0 += t12*  4433 + t13* 10703
+    mult      $ac1, $0, $0  // ac1  = 0
+    dpa.w.ph  $ac1, s7, t9  // ac1 += t12*-10704 + t13*  4433
+    sra       s4, s5, 16    // tmp4 = t11
+    addiu     a1, a1, 16
+    addiu     s8, s8, -1
+    extr_r.w  s0, $ac0, 11  // tmp0 = (ac0 + 1024) >> 11
+    extr_r.w  s1, $ac1, 11  // tmp1 = (ac1 + 1024) >> 11
+    addu      s2, s5, s4    // tmp2 = t10 + t11
+    subu      s3, s5, s4    // tmp3 = t10 - t11
+    sll       s2, s2, 2     // tmp2 = (t10 + t11) << 2
+    sll       s3, s3, 2     // tmp3 = (t10 - t11) << 2
+    sh        s2, -16(a1)
+    sh        s3, -8(a1)
+    sh        s0, -12(a1)
+    bgtz      s8, 1b
+     sh       s1, -4(a1)
+    li        t0, 2260
+    li        t1, 11363
+    li        t2, 9633
+    li        t3, 6436
+    li        t4, 6437
+    li        t5, 2261
+    li        t6, 11362
+    li        t7, 2259
+    li        t8, 4433
+    li        t9, 10703
+    li        a1, 10704
+    li        s8, 8
+
+2:
+    lh        a2, 0(a0)     // 0
+    lh        a3, 16(a0)    // 8
+    lh        v0, 32(a0)    // 16
+    lh        v1, 48(a0)    // 24
+    lh        s4, 64(a0)    // 32
+    lh        s5, 80(a0)    // 40
+    lh        s6, 96(a0)    // 48
+    lh        s7, 112(a0)   // 56
+    addu      s2, v0, s5    // tmp2 = 16 + 40
+    subu      s5, v0, s5    // tmp5 = 16 - 40
+    addu      s3, v1, s4    // tmp3 = 24 + 32
+    subu      s4, v1, s4    // tmp4 = 24 - 32
+    addu      s0, a2, s7    // tmp0 =  0 + 56
+    subu      s7, a2, s7    // tmp7 =  0 - 56
+    addu      s1, a3, s6    // tmp1 =  8 + 48
+    subu      s6, a3, s6    // tmp6 =  8 - 48
+    addu      a2, s0, s3    // tmp10 = tmp0 + tmp3
+    subu      v1, s0, s3    // tmp13 = tmp0 - tmp3
+    addu      a3, s1, s2    // tmp11 = tmp1 + tmp2
+    subu      v0, s1, s2    // tmp12 = tmp1 - tmp2
+    mult      s7, t1        // ac0  = tmp7 * c1
+    madd      s4, t0        // ac0 += tmp4 * c0
+    madd      s5, t4        // ac0 += tmp5 * c4
+    madd      s6, t2        // ac0 += tmp6 * c2
+    mult      $ac1, s7, t2  // ac1  = tmp7 * c2
+    msub      $ac1, s4, t3  // ac1 -= tmp4 * c3
+    msub      $ac1, s5, t6  // ac1 -= tmp5 * c6
+    msub      $ac1, s6, t7  // ac1 -= tmp6 * c7
+    mult      $ac2, s7, t4  // ac2  = tmp7 * c4
+    madd      $ac2, s4, t2  // ac2 += tmp4 * c2
+    madd      $ac2, s5, t5  // ac2 += tmp5 * c5
+    msub      $ac2, s6, t6  // ac2 -= tmp6 * c6
+    mult      $ac3, s7, t0  // ac3  = tmp7 * c0
+    msub      $ac3, s4, t1  // ac3 -= tmp4 * c1
+    madd      $ac3, s5, t2  // ac3 += tmp5 * c2
+    msub      $ac3, s6, t3  // ac3 -= tmp6 * c3
+    extr_r.w  s0, $ac0, 15  // tmp0 = (ac0 + 16384) >> 15
+    extr_r.w  s1, $ac1, 15  // tmp1 = (ac1 + 16384) >> 15
+    extr_r.w  s2, $ac2, 15  // tmp2 = (ac2 + 16384) >> 15
+    extr_r.w  s3, $ac3, 15  // tmp3 = (ac3 + 16384) >> 15
+    addiu     s8, s8, -1
+    addu      s4, a2, a3    // tmp4 = tmp10 + tmp11
+    subu      s5, a2, a3    // tmp5 = tmp10 - tmp11
+    sh        s0, 16(a0)
+    sh        s1, 48(a0)
+    sh        s2, 80(a0)
+    sh        s3, 112(a0)
+    mult      v0, t8        // ac0  = tmp12 * c8
+    madd      v1, t9        // ac0 += tmp13 * c9
+    mult      $ac1, v1, t8  // ac1  = tmp13 * c8
+    msub      $ac1, v0, a1  // ac1 -= tmp12 * c10
+    addiu     a0, a0, 2
+    extr_r.w  s6, $ac0, 15  // tmp6 = (ac0 + 16384) >> 15
+    extr_r.w  s7, $ac1, 15  // tmp7 = (ac1 + 16384) >> 15
+    shra_r.w  s4, s4, 2     // tmp4 = (tmp4 + 2) >> 2
+    shra_r.w  s5, s5, 2     // tmp5 = (tmp5 + 2) >> 2
+    sh        s4, -2(a0)
+    sh        s5, 62(a0)
+    sh        s6, 30(a0)
+    bgtz      s8, 2b
+     sh       s7, 94(a0)
+
+    RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+    jr       ra
+     nop
+
+END(jsimd_fdct_islow_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_fdct_ifast_mips_dspr2)
+/*
+ * a0     - data
+ */
+    .set at
+    SAVE_REGS_ON_STACK 8, s0, s1
+    li           a1, 0x014e014e  // FIX_1_306562965 (334 << 16)|(334 & 0xffff)
+    li           a2, 0x008b008b  // FIX_0_541196100 (139 << 16)|(139 & 0xffff)
+    li           a3, 0x00620062  // FIX_0_382683433 (98 << 16) |(98 & 0xffff)
+    li           s1, 0x00b500b5  // FIX_0_707106781 (181 << 16)|(181 & 0xffff)
+
+    move         v0, a0
+    addiu        v1, v0, 128     // end address
+
+0:
+    lw           t0, 0(v0)       // tmp0 = 1|0
+    lw           t1, 4(v0)       // tmp1 = 3|2
+    lw           t2, 8(v0)       // tmp2 = 5|4
+    lw           t3, 12(v0)      // tmp3 = 7|6
+    packrl.ph    t1, t1, t1      // tmp1 = 2|3
+    packrl.ph    t3, t3, t3      // tmp3 = 6|7
+    subq.ph      t7, t1, t2      // tmp7 = 2-5|3-4 = t5|t4
+    subq.ph      t5, t0, t3      // tmp5 = 1-6|0-7 = t6|t7
+    addq.ph      t6, t1, t2      // tmp6 = 2+5|3+4 = t2|t3
+    addq.ph      t4, t0, t3      // tmp4 = 1+6|0+7 = t1|t0
+    addq.ph      t8, t4, t6      // tmp5 = t1+t2|t0+t3 = t11|t10
+    subq.ph      t9, t4, t6      // tmp7 = t1-t2|t0-t3 = t12|t13
+    sra          t4, t8, 16      // tmp4 = t11
+    mult         $0, $0          // ac0  = 0
+    dpa.w.ph     $ac0, t9, s1
+    mult         $ac1, $0, $0    // ac1  = 0
+    dpa.w.ph     $ac1, t7, a3    // ac1 += t4*98 + t5*98
+    dpsx.w.ph    $ac1, t5, a3    // ac1 += t6*98 + t7*98
+    mult         $ac2, $0, $0    // ac2  = 0
+    dpa.w.ph     $ac2, t7, a2    // ac2 += t4*139 + t5*139
+    mult         $ac3, $0, $0    // ac3  = 0
+    dpa.w.ph     $ac3, t5, a1    // ac3 += t6*334 + t7*334
+    precrq.ph.w  t0, t5, t7      // t0 = t5|t6
+    addq.ph      t2, t8, t4      // tmp2 = t10 + t11
+    subq.ph      t3, t8, t4      // tmp3 = t10 - t11
+    extr.w       t4, $ac0, 8
+    mult         $0, $0          // ac0  = 0
+    dpa.w.ph     $ac0, t0, s1    // ac0 += t5*181 + t6*181
+    extr.w       t0, $ac1, 8     // t0 = z5
+    extr.w       t1, $ac2, 8     // t1 = MULTIPLY(tmp10, 139)
+    extr.w       t7, $ac3, 8     // t2 = MULTIPLY(tmp12, 334)
+    extr.w       t8, $ac0, 8     // t8 = z3 = MULTIPLY(tmp11, 181)
+    add          t6, t1, t0      // t6 = z2
+    add          t7, t7, t0      // t7 = z4
+    subq.ph      t0, t5, t8      // t0 = z13 = tmp7 - z3
+    addq.ph      t8, t5, t8      // t9 = z11 = tmp7 + z3
+    addq.ph      t1, t0, t6      // t1 = z13 + z2
+    subq.ph      t6, t0, t6      // t6 = z13 - z2
+    addq.ph      t0, t8, t7      // t0 = z11 + z4
+    subq.ph      t7, t8, t7      // t7 = z11 - z4
+    addq.ph      t5, t4, t9
+    subq.ph      t4, t9, t4
+    sh           t2, 0(v0)
+    sh           t5, 4(v0)
+    sh           t3, 8(v0)
+    sh           t4, 12(v0)
+    sh           t1, 10(v0)
+    sh           t6, 6(v0)
+    sh           t0, 2(v0)
+    sh           t7, 14(v0)
+    addiu        v0, 16
+    bne          v1, v0, 0b
+     nop
+    move         v0, a0
+    addiu        v1, v0, 16
+
+1:
+    lh           t0, 0(v0)       // 0
+    lh           t1, 16(v0)      // 8
+    lh           t2, 32(v0)      // 16
+    lh           t3, 48(v0)      // 24
+    lh           t4, 64(v0)      // 32
+    lh           t5, 80(v0)      // 40
+    lh           t6, 96(v0)      // 48
+    lh           t7, 112(v0)     // 56
+    add          t8, t0, t7      // t8 = tmp0
+    sub          t7, t0, t7      // t7 = tmp7
+    add          t0, t1, t6      // t0 = tmp1
+    sub          t1, t1, t6      // t1 = tmp6
+    add          t6, t2, t5      // t6 = tmp2
+    sub          t5, t2, t5      // t5 = tmp5
+    add          t2, t3, t4      // t2 = tmp3
+    sub          t3, t3, t4      // t3 = tmp4
+    add          t4, t8, t2      // t4 = tmp10 = tmp0 + tmp3
+    sub          t8, t8, t2      // t8 = tmp13 = tmp0 - tmp3
+    sub          s0, t0, t6      // s0 = tmp12 = tmp1 - tmp2
+    ins          t8, s0, 16, 16  // t8 = tmp12|tmp13
+    add          t2, t0, t6      // t2 = tmp11 = tmp1 + tmp2
+    mult         $0, $0          // ac0  = 0
+    dpa.w.ph     $ac0, t8, s1    // ac0 += t12*181 + t13*181
+    add          s0, t4, t2      // t8 = tmp10+tmp11
+    sub          t4, t4, t2      // t4 = tmp10-tmp11
+    sh           s0, 0(v0)
+    sh           t4, 64(v0)
+    extr.w       t2, $ac0, 8     // z1 = MULTIPLY(tmp12+tmp13,FIX_0_707106781)
+    addq.ph      t4, t8, t2      // t9 = tmp13 + z1
+    subq.ph      t8, t8, t2      // t2 = tmp13 - z1
+    sh           t4, 32(v0)
+    sh           t8, 96(v0)
+    add          t3, t3, t5      // t3 = tmp10 = tmp4 + tmp5
+    add          t0, t5, t1      // t0 = tmp11 = tmp5 + tmp6
+    add          t1, t1, t7      // t1 = tmp12 = tmp6 + tmp7
+    andi         t4, a1, 0xffff
+    mul          s0, t1, t4
+    sra          s0, s0, 8       // s0 = z4 = MULTIPLY(tmp12, FIX_1_306562965)
+    ins          t1, t3, 16, 16  // t1 = tmp10|tmp12
+    mult         $0, $0          // ac0  = 0
+    mulsa.w.ph   $ac0, t1, a3    // ac0 += t10*98 - t12*98
+    extr.w       t8, $ac0, 8     // z5 = MULTIPLY(tmp10-tmp12,FIX_0_382683433)
+    add          t2, t7, t8      // t2 = tmp7 + z5
+    sub          t7, t7, t8      // t7 = tmp7 - z5
+    andi         t4, a2, 0xffff
+    mul          t8, t3, t4
+    sra          t8, t8, 8       // t8 = z2 = MULTIPLY(tmp10, FIX_0_541196100)
+    andi         t4, s1, 0xffff
+    mul          t6, t0, t4
+    sra          t6, t6, 8       // t6 = z3 = MULTIPLY(tmp11, FIX_0_707106781)
+    add          t0, t6, t8      // t0 = z3 + z2
+    sub          t1, t6, t8      // t1 = z3 - z2
+    add          t3, t6, s0      // t3 = z3 + z4
+    sub          t4, t6, s0      // t4 = z3 - z4
+    sub          t5, t2, t1      // t5 = dataptr[5]
+    sub          t6, t7, t0      // t6 = dataptr[3]
+    add          t3, t2, t3      // t3 = dataptr[1]
+    add          t4, t7, t4      // t4 = dataptr[7]
+    sh           t5, 80(v0)
+    sh           t6, 48(v0)
+    sh           t3, 16(v0)
+    sh           t4, 112(v0)
+    addiu        v0, 2
+    bne          v0, v1, 1b
+     nop
+
+    RESTORE_REGS_FROM_STACK 8, s0, s1
+
+    j            ra
+     nop
+END(jsimd_fdct_ifast_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_quantize_mips_dspr2)
+/*
+ * a0     - coef_block
+ * a1     - divisors
+ * a2     - workspace
+ */
+
+    .set at
+
+    SAVE_REGS_ON_STACK 16, s0, s1, s2
+
+    addiu   v0, a2, 124  // v0 = workspace_end
+    lh      t0, 0(a2)
+    lh      t1, 0(a1)
+    lh      t2, 128(a1)
+    sra     t3, t0, 15
+    sll     t3, t3, 1
+    addiu   t3, t3, 1
+    mul     t0, t0, t3
+    lh      t4, 384(a1)
+    lh      t5, 130(a1)
+    lh      t6, 2(a2)
+    lh      t7, 2(a1)
+    lh      t8, 386(a1)
+
+1:
+    andi    t1, 0xffff
+    add     t9, t0, t2
+    andi    t9, 0xffff
+    mul     v1, t9, t1
+    sra     s0, t6, 15
+    sll     s0, s0, 1
+    addiu   s0, s0, 1
+    addiu   t9, t4, 16
+    srav    v1, v1, t9
+    mul     v1, v1, t3
+    mul     t6, t6, s0
+    andi    t7, 0xffff
+    addiu   a2, a2, 4
+    addiu   a1, a1, 4
+    add     s1, t6, t5
+    andi    s1, 0xffff
+    sh      v1, 0(a0)
+
+    mul     s2, s1, t7
+    addiu   s1, t8, 16
+    srav    s2, s2, s1
+    mul     s2,s2, s0
+    lh      t0, 0(a2)
+    lh      t1, 0(a1)
+    sra     t3, t0, 15
+    sll     t3, t3, 1
+    addiu   t3, t3, 1
+    mul     t0, t0, t3
+    lh      t2, 128(a1)
+    lh      t4, 384(a1)
+    lh      t5, 130(a1)
+    lh      t8, 386(a1)
+    lh      t6, 2(a2)
+    lh      t7, 2(a1)
+    sh      s2, 2(a0)
+    lh      t0, 0(a2)
+    sra     t3, t0, 15
+    sll     t3, t3, 1
+    addiu   t3, t3, 1
+    mul     t0, t0,t3
+    bne     a2, v0, 1b
+     addiu  a0, a0, 4
+
+    andi    t1, 0xffff
+    add     t9, t0, t2
+    andi    t9, 0xffff
+    mul     v1, t9, t1
+    sra     s0, t6, 15
+    sll     s0, s0, 1
+    addiu   s0, s0, 1
+    addiu   t9, t4, 16
+    srav    v1, v1, t9
+    mul     v1, v1, t3
+    mul     t6, t6, s0
+    andi    t7, 0xffff
+    sh      v1, 0(a0)
+    add     s1, t6, t5
+    andi    s1, 0xffff
+    mul     s2, s1, t7
+    addiu   s1, t8, 16
+    addiu   a2, a2, 4
+    addiu   a1, a1, 4
+    srav    s2, s2, s1
+    mul     s2, s2, s0
+    sh      s2, 2(a0)
+
+    RESTORE_REGS_FROM_STACK 16, s0, s1, s2
+
+    j       ra
+     nop
+
+END(jsimd_quantize_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_quantize_float_mips_dspr2)
+/*
+ * a0     - coef_block
+ * a1     - divisors
+ * a2     - workspace
+ */
+
+    .set at
+
+    li         t1, 0x46800100     //integer representation 16384.5
+    mtc1       t1, f0
+    li         t0, 63
+0:
+    lwc1       f1, 0(a2)
+    lwc1       f5, 0(a1)
+    lwc1       f2, 4(a2)
+    lwc1       f6, 4(a1)
+    lwc1       f3, 8(a2)
+    lwc1       f7, 8(a1)
+    lwc1       f4, 12(a2)
+    lwc1       f8, 12(a1)
+    madd.s     f1, f0, f1, f5
+    madd.s     f2, f0, f2, f6
+    madd.s     f3, f0, f3, f7
+    madd.s     f4, f0, f4, f8
+    lwc1       f5, 16(a1)
+    lwc1       f6, 20(a1)
+    trunc.w.s  f1, f1
+    trunc.w.s  f2, f2
+    trunc.w.s  f3, f3
+    trunc.w.s  f4, f4
+    lwc1       f7, 24(a1)
+    lwc1       f8, 28(a1)
+    mfc1       t1, f1
+    mfc1       t2, f2
+    mfc1       t3, f3
+    mfc1       t4, f4
+    lwc1       f1, 16(a2)
+    lwc1       f2, 20(a2)
+    lwc1       f3, 24(a2)
+    lwc1       f4, 28(a2)
+    madd.s     f1, f0, f1, f5
+    madd.s     f2, f0, f2, f6
+    madd.s     f3, f0, f3, f7
+    madd.s     f4, f0, f4, f8
+    addiu      t1, t1, -16384
+    addiu      t2, t2, -16384
+    addiu      t3, t3, -16384
+    addiu      t4, t4, -16384
+    trunc.w.s  f1, f1
+    trunc.w.s  f2, f2
+    trunc.w.s  f3, f3
+    trunc.w.s  f4, f4
+    sh         t1, 0(a0)
+    sh         t2, 2(a0)
+    sh         t3, 4(a0)
+    sh         t4, 6(a0)
+    mfc1       t1, f1
+    mfc1       t2, f2
+    mfc1       t3, f3
+    mfc1       t4, f4
+    addiu      t0, t0, -8
+    addiu      a2, a2, 32
+    addiu      a1, a1, 32
+    addiu      t1, t1, -16384
+    addiu      t2, t2, -16384
+    addiu      t3, t3, -16384
+    addiu      t4, t4, -16384
+    sh         t1, 8(a0)
+    sh         t2, 10(a0)
+    sh         t3, 12(a0)
+    sh         t4, 14(a0)
+    bgez       t0, 0b
+     addiu     a0, a0, 16
+
+    j          ra
+     nop
+
+END(jsimd_quantize_float_mips_dspr2)
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_2x2_mips_dspr2)
+/*
+ * a0     - compptr->dct_table
+ * a1     - coef_block
+ * a2     - output_buf
+ * a3     - output_col
+ */
+    .set at
+
+    SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5
+
+    addiu     sp, sp, -40
+    move      v0, sp
+    addiu     s2, zero, 29692
+    addiu     s3, zero, -10426
+    addiu     s4, zero, 6967
+    addiu     s5, zero, -5906
+    lh        t0, 0(a1)         // t0 = inptr[DCTSIZE*0]
+    lh        t5, 0(a0)         // t5 = quantptr[DCTSIZE*0]
+    lh        t1, 48(a1)        // t1 = inptr[DCTSIZE*3]
+    lh        t6, 48(a0)        // t6 = quantptr[DCTSIZE*3]
+    mul       t4, t5, t0
+    lh        t0, 16(a1)        // t0 = inptr[DCTSIZE*1]
+    lh        t5, 16(a0)        // t5 = quantptr[DCTSIZE*1]
+    mul       t6, t6, t1
+    mul       t5, t5, t0
+    lh        t2, 80(a1)        // t2 = inptr[DCTSIZE*5]
+    lh        t7, 80(a0)        // t7 = quantptr[DCTSIZE*5]
+    lh        t3, 112(a1)       // t3 = inptr[DCTSIZE*7]
+    lh        t8, 112(a0)       // t8 = quantptr[DCTSIZE*7]
+    mul       t7, t7, t2
+    mult      zero, zero
+    mul       t8, t8, t3
+    li        s0, 0x73FCD746    // s0 = (29692 << 16) | (-10426 & 0xffff)
+    li        s1, 0x1B37E8EE    // s1 = (6967 << 16) | (-5906 & 0xffff)
+    ins       t6, t5, 16, 16    // t6 = t5|t6
+    sll       t4, t4, 15
+    dpa.w.ph  $ac0, t6, s0
+    lh        t1, 2(a1)
+    lh        t6, 2(a0)
+    ins       t8, t7, 16, 16    // t8 = t7|t8
+    dpa.w.ph  $ac0, t8, s1
+    mflo      t0, $ac0
+    mul       t5, t6, t1
+    lh        t1, 18(a1)
+    lh        t6, 18(a0)
+    lh        t2, 50(a1)
+    lh        t7, 50(a0)
+    mul       t6, t6, t1
+    subu      t8, t4, t0
+    mul       t7, t7, t2
+    addu      t0, t4, t0
+    shra_r.w  t0, t0, 13
+    lh        t1, 82(a1)
+    lh        t2, 82(a0)
+    lh        t3, 114(a1)
+    lh        t4, 114(a0)
+    shra_r.w  t8, t8, 13
+    mul       t1, t1, t2
+    mul       t3, t3, t4
+    sw        t0, 0(v0)
+    sw        t8, 20(v0)
+    sll       t4, t5, 15
+    ins       t7, t6, 16, 16
+    mult      zero, zero
+    dpa.w.ph  $ac0, t7, s0
+    ins       t3, t1, 16, 16
+    lh        t1, 6(a1)
+    lh        t6, 6(a0)
+    dpa.w.ph  $ac0, t3, s1
+    mflo      t0, $ac0
+    mul       t5, t6, t1
+    lh        t1, 22(a1)
+    lh        t6, 22(a0)
+    lh        t2, 54(a1)
+    lh        t7, 54(a0)
+    mul       t6, t6, t1
+    subu      t8, t4, t0
+    mul       t7, t7, t2
+    addu      t0, t4, t0
+    shra_r.w  t0, t0, 13
+    lh        t1, 86(a1)
+    lh        t2, 86(a0)
+    lh        t3, 118(a1)
+    lh        t4, 118(a0)
+    shra_r.w  t8, t8, 13
+    mul       t1, t1, t2
+    mul       t3, t3, t4
+    sw        t0, 4(v0)
+    sw        t8, 24(v0)
+    sll       t4, t5, 15
+    ins       t7, t6, 16, 16
+    mult      zero, zero
+    dpa.w.ph  $ac0, t7, s0
+    ins       t3, t1, 16, 16
+    lh        t1, 10(a1)
+    lh        t6, 10(a0)
+    dpa.w.ph  $ac0, t3, s1
+    mflo      t0, $ac0
+    mul       t5, t6, t1
+    lh        t1, 26(a1)
+    lh        t6, 26(a0)
+    lh        t2, 58(a1)
+    lh        t7, 58(a0)
+    mul       t6, t6, t1
+    subu      t8, t4, t0
+    mul       t7, t7, t2
+    addu      t0, t4, t0
+    shra_r.w  t0, t0, 13
+    lh        t1, 90(a1)
+    lh        t2, 90(a0)
+    lh        t3, 122(a1)
+    lh        t4, 122(a0)
+    shra_r.w  t8, t8, 13
+    mul       t1, t1, t2
+    mul       t3, t3, t4
+    sw        t0, 8(v0)
+    sw        t8, 28(v0)
+    sll       t4, t5, 15
+    ins       t7, t6, 16, 16
+    mult      zero, zero
+    dpa.w.ph  $ac0, t7, s0
+    ins       t3, t1, 16, 16
+    lh        t1, 14(a1)
+    lh        t6, 14(a0)
+    dpa.w.ph  $ac0, t3, s1
+    mflo      t0, $ac0
+    mul       t5, t6, t1
+    lh        t1, 30(a1)
+    lh        t6, 30(a0)
+    lh        t2, 62(a1)
+    lh        t7, 62(a0)
+    mul       t6, t6, t1
+    subu      t8, t4, t0
+    mul       t7, t7, t2
+    addu      t0, t4, t0
+    shra_r.w  t0, t0, 13
+    lh        t1, 94(a1)
+    lh        t2, 94(a0)
+    lh        t3, 126(a1)
+    lh        t4, 126(a0)
+    shra_r.w  t8, t8, 13
+    mul       t1, t1, t2
+    mul       t3, t3, t4
+    sw        t0, 12(v0)
+    sw        t8, 32(v0)
+    sll       t4, t5, 15
+    ins       t7, t6, 16, 16
+    mult      zero, zero
+    dpa.w.ph  $ac0, t7, s0
+    ins       t3, t1, 16, 16
+    dpa.w.ph  $ac0, t3, s1
+    mflo      t0, $ac0
+    lw        t9, 0(a2)
+    lw        t3, 0(v0)
+    lw        t7, 4(v0)
+    lw        t1, 8(v0)
+    addu      t9, t9, a3
+    sll       t3, t3, 15
+    subu      t8, t4, t0
+    addu      t0, t4, t0
+    shra_r.w  t0, t0, 13
+    shra_r.w  t8, t8, 13
+    sw        t0, 16(v0)
+    sw        t8, 36(v0)
+    lw        t5, 12(v0)
+    lw        t6, 16(v0)
+    mult      t7, s2
+    madd      t1, s3
+    madd      t5, s4
+    madd      t6, s5
+    lw        t5, 24(v0)
+    lw        t7, 28(v0)
+    mflo      t0, $ac0
+    lw        t8, 32(v0)
+    lw        t2, 36(v0)
+    mult      $ac1, t5, s2
+    madd      $ac1, t7, s3
+    madd      $ac1, t8, s4
+    madd      $ac1, t2, s5
+    addu      t1, t3, t0
+    subu      t6, t3, t0
+    shra_r.w  t1, t1, 20
+    shra_r.w  t6, t6, 20
+    mflo      t4, $ac1
+    shll_s.w  t1, t1, 24
+    shll_s.w  t6, t6, 24
+    sra       t1, t1, 24
+    sra       t6, t6, 24
+    addiu     t1, t1, 128
+    addiu     t6, t6, 128
+    lw        t0, 20(v0)
+    sb        t1, 0(t9)
+    sb        t6, 1(t9)
+    sll       t0, t0, 15
+    lw        t9, 4(a2)
+    addu      t1, t0, t4
+    subu      t6, t0, t4
+    addu      t9, t9, a3
+    shra_r.w  t1, t1, 20
+    shra_r.w  t6, t6, 20
+    shll_s.w  t1, t1, 24
+    shll_s.w  t6, t6, 24
+    sra       t1, t1, 24
+    sra       t6, t6, 24
+    addiu     t1, t1, 128
+    addiu     t6, t6, 128
+    sb        t1, 0(t9)
+    sb        t6, 1(t9)
+    addiu     sp, sp, 40
+
+    RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5
+
+    j         ra
+     nop
+
+END(jsimd_idct_2x2_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_4x4_mips_dspr2)
+/*
+ * a0     - compptr->dct_table
+ * a1     - coef_block
+ * a2     - output_buf
+ * a3     - output_col
+ * 16(sp) - workspace[DCTSIZE*4];  // buffers data between passes
+ */
+
+    .set at
+    SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    lw        v1, 48(sp)
+    move      t0, a1
+    move      t1, v1
+    li        t9, 4
+    li        s0, 0x2e75f93e
+    li        s1, 0x21f9ba79
+    li        s2, 0xecc2efb0
+    li        s3, 0x52031ccd
+
+0:
+    lh        s6, 32(t0)        // inptr[DCTSIZE*2]
+    lh        t6, 32(a0)        // quantptr[DCTSIZE*2]
+    lh        s7, 96(t0)        // inptr[DCTSIZE*6]
+    lh        t7, 96(a0)        // quantptr[DCTSIZE*6]
+    mul       t6, s6, t6        // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2])
+    lh        s4, 0(t0)         // inptr[DCTSIZE*0]
+    mul       t7, s7, t7        // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6])
+    lh        s5, 0(a0)         // quantptr[0]
+    li        s6, 15137
+    li        s7, 6270
+    mul       t2, s4, s5        // tmp0 = (inptr[0] * quantptr[0])
+    mul       t6, s6, t6        // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2])
+    lh        t5, 112(t0)       // inptr[DCTSIZE*7]
+    mul       t7, s7, t7        // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6])
+    lh        s4, 112(a0)       // quantptr[DCTSIZE*7]
+    lh        v0, 80(t0)        // inptr[DCTSIZE*5]
+    lh        s5, 80(a0)        // quantptr[DCTSIZE*5]
+    lh        s6, 48(a0)        // quantptr[DCTSIZE*3]
+    sll       t2, t2, 14        // tmp0 <<= (CONST_BITS+1)
+    lh        s7, 16(a0)        // quantptr[DCTSIZE*1]
+    lh        t8, 16(t0)        // inptr[DCTSIZE*1]
+    subu      t6, t6, t7        // tmp2 = MULTIPLY(z2, t5) - MULTIPLY(z3, t6)
+    lh        t7, 48(t0)        // inptr[DCTSIZE*3]
+    mul       t5, s4, t5        // z1 = (inptr[DCTSIZE*7] * quantptr[DCTSIZE*7])
+    mul       v0, s5, v0        // z2 = (inptr[DCTSIZE*5] * quantptr[DCTSIZE*5])
+    mul       t7, s6, t7        // z3 = (inptr[DCTSIZE*3] * quantptr[DCTSIZE*3])
+    mul       t8, s7, t8        // z4 = (inptr[DCTSIZE*1] * quantptr[DCTSIZE*1])
+    addu      t3, t2, t6        // tmp10 = tmp0 + z2
+    subu      t4, t2, t6        // tmp10 = tmp0 - z2
+    mult      $ac0, zero, zero
+    mult      $ac1, zero, zero
+    ins       t5, v0, 16, 16
+    ins       t7, t8, 16, 16
+    addiu     t9, t9, -1
+    dpa.w.ph  $ac0, t5, s0
+    dpa.w.ph  $ac0, t7, s1
+    dpa.w.ph  $ac1, t5, s2
+    dpa.w.ph  $ac1, t7, s3
+    mflo      s4, $ac0
+    mflo      s5, $ac1
+    addiu     a0, a0, 2
+    addiu     t1, t1, 4
+    addiu     t0, t0, 2
+    addu      t6, t4, s4
+    subu      t5, t4, s4
+    addu      s6, t3, s5
+    subu      s7, t3, s5
+    shra_r.w  t6, t6, 12        // DESCALE(tmp12 + temp1, 12)
+    shra_r.w  t5, t5, 12        // DESCALE(tmp12 - temp1, 12)
+    shra_r.w  s6, s6, 12        // DESCALE(tmp10 + temp2, 12)
+    shra_r.w  s7, s7, 12        // DESCALE(tmp10 - temp2, 12)
+    sw        t6, 28(t1)
+    sw        t5, 60(t1)
+    sw        s6, -4(t1)
+    bgtz      t9, 0b
+     sw       s7, 92(t1)
+    // second loop three pass
+    li        t9, 3
+1:
+    lh        s6, 34(t0)        // inptr[DCTSIZE*2]
+    lh        t6, 34(a0)        // quantptr[DCTSIZE*2]
+    lh        s7, 98(t0)        // inptr[DCTSIZE*6]
+    lh        t7, 98(a0)        // quantptr[DCTSIZE*6]
+    mul       t6, s6, t6        // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2])
+    lh        s4, 2(t0)         // inptr[DCTSIZE*0]
+    mul       t7, s7, t7        // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6])
+    lh        s5, 2(a0)         // quantptr[DCTSIZE*0]
+    li        s6, 15137
+    li        s7, 6270
+    mul       t2, s4, s5        // tmp0 = (inptr[0] * quantptr[0])
+    mul       v0, s6, t6        // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2])
+    lh        t5, 114(t0)       // inptr[DCTSIZE*7]
+    mul       t7, s7, t7        // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6])
+    lh        s4, 114(a0)       // quantptr[DCTSIZE*7]
+    lh        s5, 82(a0)        // quantptr[DCTSIZE*5]
+    lh        t6, 82(t0)        // inptr[DCTSIZE*5]
+    sll       t2, t2, 14        // tmp0 <<= (CONST_BITS+1)
+    lh        s6, 50(a0)        // quantptr[DCTSIZE*3]
+    lh        t8, 18(t0)        // inptr[DCTSIZE*1]
+    subu      v0, v0, t7        // tmp2 = MULTIPLY(z2, t5) - MULTIPLY(z3, t6)
+    lh        t7, 50(t0)        // inptr[DCTSIZE*3]
+    lh        s7, 18(a0)        // quantptr[DCTSIZE*1]
+    mul       t5, s4, t5        // z1 = (inptr[DCTSIZE*7] * quantptr[DCTSIZE*7])
+    mul       t6, s5, t6        // z2 = (inptr[DCTSIZE*5] * quantptr[DCTSIZE*5])
+    mul       t7, s6, t7        // z3 = (inptr[DCTSIZE*3] * quantptr[DCTSIZE*3])
+    mul       t8, s7, t8        // z4 = (inptr[DCTSIZE*1] * quantptr[DCTSIZE*1])
+    addu      t3, t2, v0        // tmp10 = tmp0 + z2
+    subu      t4, t2, v0        // tmp10 = tmp0 - z2
+    mult      $ac0, zero, zero
+    mult      $ac1, zero, zero
+    ins       t5, t6, 16, 16
+    ins       t7, t8, 16, 16
+    dpa.w.ph  $ac0, t5, s0
+    dpa.w.ph  $ac0, t7, s1
+    dpa.w.ph  $ac1, t5, s2
+    dpa.w.ph  $ac1, t7, s3
+    mflo      t5, $ac0
+    mflo      t6, $ac1
+    addiu     t9, t9, -1
+    addiu     t0, t0, 2
+    addiu     a0, a0, 2
+    addiu     t1, t1, 4
+    addu      s5, t4, t5
+    subu      s4, t4, t5
+    addu      s6, t3, t6
+    subu      s7, t3, t6
+    shra_r.w  s5, s5, 12        // DESCALE(tmp12 + temp1, 12)
+    shra_r.w  s4, s4, 12        // DESCALE(tmp12 - temp1, 12)
+    shra_r.w  s6, s6, 12        // DESCALE(tmp10 + temp2, 12)
+    shra_r.w  s7, s7, 12        // DESCALE(tmp10 - temp2, 12)
+    sw        s5, 32(t1)
+    sw        s4, 64(t1)
+    sw        s6, 0(t1)
+    bgtz      t9, 1b
+     sw       s7, 96(t1)
+    move      t1, v1
+    li        s4, 15137
+    lw        s6, 8(t1)         // wsptr[2]
+    li        s5, 6270
+    lw        s7, 24(t1)        // wsptr[6]
+    mul       s4, s4, s6        // MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+    lw        t2, 0(t1)         // wsptr[0]
+    mul       s5, s5, s7        // MULTIPLY((INT32) wsptr[6], - FIX_0_765366865)
+    lh        t5, 28(t1)        // wsptr[7]
+    lh        t6, 20(t1)        // wsptr[5]
+    lh        t7, 12(t1)        // wsptr[3]
+    lh        t8, 4(t1)         // wsptr[1]
+    ins       t5, t6, 16, 16
+    ins       t7, t8, 16, 16
+    mult      $ac0, zero, zero
+    dpa.w.ph  $ac0, t5, s0
+    dpa.w.ph  $ac0, t7, s1
+    mult      $ac1, zero, zero
+    dpa.w.ph  $ac1, t5, s2
+    dpa.w.ph  $ac1, t7, s3
+    sll       t2, t2, 14        // tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1)
+    mflo      s6, $ac0
+    // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865)
+    subu      s4, s4, s5
+    addu      t3, t2, s4        // tmp10 = tmp0 + z2
+    mflo      s7, $ac1
+    subu      t4, t2, s4        // tmp10 = tmp0 - z2
+    addu      t7, t4, s6
+    subu      t8, t4, s6
+    addu      t5, t3, s7
+    subu      t6, t3, s7
+    shra_r.w  t5, t5, 19        // DESCALE(tmp10 + temp2, 19)
+    shra_r.w  t6, t6, 19        // DESCALE(tmp10 - temp2, 19)
+    shra_r.w  t7, t7, 19        // DESCALE(tmp12 + temp1, 19)
+    shra_r.w  t8, t8, 19        // DESCALE(tmp12 - temp1, 19)
+    sll       s4, t9, 2
+    lw        v0, 0(a2)         // output_buf[ctr]
+    shll_s.w  t5, t5, 24
+    shll_s.w  t6, t6, 24
+    shll_s.w  t7, t7, 24
+    shll_s.w  t8, t8, 24
+    sra       t5, t5, 24
+    sra       t6, t6, 24
+    sra       t7, t7, 24
+    sra       t8, t8, 24
+    addu      v0, v0, a3        // outptr = output_buf[ctr] + output_col
+    addiu     t5, t5, 128
+    addiu     t6, t6, 128
+    addiu     t7, t7, 128
+    addiu     t8, t8, 128
+    sb        t5, 0(v0)
+    sb        t7, 1(v0)
+    sb        t8, 2(v0)
+    sb        t6, 3(v0)
+    // 2
+    li        s4, 15137
+    lw        s6, 40(t1)        // wsptr[2]
+    li        s5, 6270
+    lw        s7, 56(t1)        // wsptr[6]
+    mul       s4, s4, s6        // MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+    lw        t2, 32(t1)        // wsptr[0]
+    mul       s5, s5, s7        // MULTIPLY((INT32) wsptr[6], - FIX_0_765366865)
+    lh        t5, 60(t1)        // wsptr[7]
+    lh        t6, 52(t1)        // wsptr[5]
+    lh        t7, 44(t1)        // wsptr[3]
+    lh        t8, 36(t1)        // wsptr[1]
+    ins       t5, t6, 16, 16
+    ins       t7, t8, 16, 16
+    mult      $ac0, zero, zero
+    dpa.w.ph  $ac0, t5, s0
+    dpa.w.ph  $ac0, t7, s1
+    mult      $ac1, zero, zero
+    dpa.w.ph  $ac1, t5, s2
+    dpa.w.ph  $ac1, t7, s3
+    sll       t2, t2, 14        // tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1)
+    mflo      s6, $ac0
+    // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865)
+    subu      s4, s4, s5
+    addu      t3, t2, s4        // tmp10 = tmp0 + z2
+    mflo      s7, $ac1
+    subu      t4, t2, s4        // tmp10 = tmp0 - z2
+    addu      t7, t4, s6
+    subu      t8, t4, s6
+    addu      t5, t3, s7
+    subu      t6, t3, s7
+    shra_r.w  t5, t5, 19        // DESCALE(tmp10 + temp2, CONST_BITS-PASS1_BITS+1)
+    shra_r.w  t6, t6, 19        // DESCALE(tmp10 - temp2, CONST_BITS-PASS1_BITS+1)
+    shra_r.w  t7, t7, 19        // DESCALE(tmp12 + temp1, CONST_BITS-PASS1_BITS+1)
+    shra_r.w  t8, t8, 19        // DESCALE(tmp12 - temp1, CONST_BITS-PASS1_BITS+1)
+    sll       s4, t9, 2
+    lw        v0, 4(a2)         // output_buf[ctr]
+    shll_s.w  t5, t5, 24
+    shll_s.w  t6, t6, 24
+    shll_s.w  t7, t7, 24
+    shll_s.w  t8, t8, 24
+    sra       t5, t5, 24
+    sra       t6, t6, 24
+    sra       t7, t7, 24
+    sra       t8, t8, 24
+    addu      v0, v0, a3        // outptr = output_buf[ctr] + output_col
+    addiu     t5, t5, 128
+    addiu     t6, t6, 128
+    addiu     t7, t7, 128
+    addiu     t8, t8, 128
+    sb        t5, 0(v0)
+    sb        t7, 1(v0)
+    sb        t8, 2(v0)
+    sb        t6, 3(v0)
+    // 3
+    li        s4, 15137
+    lw        s6, 72(t1)        // wsptr[2]
+    li        s5, 6270
+    lw        s7, 88(t1)        // wsptr[6]
+    mul       s4, s4, s6        // MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+    lw        t2, 64(t1)        // wsptr[0]
+    mul       s5, s5, s7        // MULTIPLY((INT32) wsptr[6], - FIX_0_765366865)
+    lh        t5, 92(t1)        // wsptr[7]
+    lh        t6, 84(t1)        // wsptr[5]
+    lh        t7, 76(t1)        // wsptr[3]
+    lh        t8, 68(t1)        // wsptr[1]
+    ins       t5, t6, 16, 16
+    ins       t7, t8, 16, 16
+    mult      $ac0, zero, zero
+    dpa.w.ph  $ac0, t5, s0
+    dpa.w.ph  $ac0, t7, s1
+    mult      $ac1, zero, zero
+    dpa.w.ph  $ac1, t5, s2
+    dpa.w.ph  $ac1, t7, s3
+    sll       t2, t2, 14        // tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1)
+    mflo      s6, $ac0
+    // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865)
+    subu      s4, s4, s5
+    addu      t3, t2, s4        // tmp10 = tmp0 + z2
+    mflo      s7, $ac1
+    subu      t4, t2, s4        // tmp10 = tmp0 - z2
+    addu      t7, t4, s6
+    subu      t8, t4, s6
+    addu      t5, t3, s7
+    subu      t6, t3, s7
+    shra_r.w  t5, t5, 19        // DESCALE(tmp10 + temp2, 19)
+    shra_r.w  t6, t6, 19        // DESCALE(tmp10 - temp2, 19)
+    shra_r.w  t7, t7, 19        // DESCALE(tmp12 + temp1, 19)
+    shra_r.w  t8, t8, 19        // DESCALE(tmp12 - temp1, 19)
+    sll       s4, t9, 2
+    lw        v0, 8(a2)         // output_buf[ctr]
+    shll_s.w  t5, t5, 24
+    shll_s.w  t6, t6, 24
+    shll_s.w  t7, t7, 24
+    shll_s.w  t8, t8, 24
+    sra       t5, t5, 24
+    sra       t6, t6, 24
+    sra       t7, t7, 24
+    sra       t8, t8, 24
+    addu      v0, v0, a3        // outptr = output_buf[ctr] + output_col
+    addiu     t5, t5, 128
+    addiu     t6, t6, 128
+    addiu     t7, t7, 128
+    addiu     t8, t8, 128
+    sb        t5, 0(v0)
+    sb        t7, 1(v0)
+    sb        t8, 2(v0)
+    sb        t6, 3(v0)
+    li        s4, 15137
+    lw        s6, 104(t1)       // wsptr[2]
+    li        s5, 6270
+    lw        s7, 120(t1)       // wsptr[6]
+    mul       s4, s4, s6        // MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+    lw        t2, 96(t1)        // wsptr[0]
+    mul       s5, s5, s7        // MULTIPLY((INT32) wsptr[6], -FIX_0_765366865)
+    lh        t5, 124(t1)       // wsptr[7]
+    lh        t6, 116(t1)       // wsptr[5]
+    lh        t7, 108(t1)       // wsptr[3]
+    lh        t8, 100(t1)       // wsptr[1]
+    ins       t5, t6, 16, 16
+    ins       t7, t8, 16, 16
+    mult      $ac0, zero, zero
+    dpa.w.ph  $ac0, t5, s0
+    dpa.w.ph  $ac0, t7, s1
+    mult      $ac1, zero, zero
+    dpa.w.ph  $ac1, t5, s2
+    dpa.w.ph  $ac1, t7, s3
+    sll       t2, t2, 14        // tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1)
+    mflo      s6, $ac0
+    // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865)
+    subu      s4, s4, s5
+    addu      t3, t2, s4        // tmp10 = tmp0 + z2;
+    mflo      s7, $ac1
+    subu      t4, t2, s4        // tmp10 = tmp0 - z2;
+    addu      t7, t4, s6
+    subu      t8, t4, s6
+    addu      t5, t3, s7
+    subu      t6, t3, s7
+    shra_r.w  t5, t5, 19        // DESCALE(tmp10 + temp2, 19)
+    shra_r.w  t6, t6, 19        // DESCALE(tmp10 - temp2, 19)
+    shra_r.w  t7, t7, 19        // DESCALE(tmp12 + temp1, 19)
+    shra_r.w  t8, t8, 19        // DESCALE(tmp12 - temp1, 19)
+    sll       s4, t9, 2
+    lw        v0, 12(a2)        // output_buf[ctr]
+    shll_s.w  t5, t5, 24
+    shll_s.w  t6, t6, 24
+    shll_s.w  t7, t7, 24
+    shll_s.w  t8, t8, 24
+    sra       t5, t5, 24
+    sra       t6, t6, 24
+    sra       t7, t7, 24
+    sra       t8, t8, 24
+    addu      v0, v0, a3        // outptr = output_buf[ctr] + output_col
+    addiu     t5, t5, 128
+    addiu     t6, t6, 128
+    addiu     t7, t7, 128
+    addiu     t8, t8, 128
+    sb        t5, 0(v0)
+    sb        t7, 1(v0)
+    sb        t8, 2(v0)
+    sb        t6, 3(v0)
+
+    RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    j         ra
+     nop
+END(jsimd_idct_4x4_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_6x6_mips_dspr2)
+/*
+ * a0     - compptr->dct_table
+ * a1     - coef_block
+ * a2     - output_buf
+ * a3     - output_col
+ */
+    .set at
+
+    SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    addiu     sp, sp, -144
+    move      v0, sp
+    addiu     v1, v0, 24
+    addiu     t9, zero, 5793
+    addiu     s0, zero, 10033
+    addiu     s1, zero, 2998
+
+1:
+    lh        s2, 0(a0)   // q0 = quantptr[ 0]
+    lh        s3, 32(a0)  // q1 = quantptr[16]
+    lh        s4, 64(a0)  // q2 = quantptr[32]
+    lh        t2, 64(a1)  // tmp2 = inptr[32]
+    lh        t1, 32(a1)  // tmp1 = inptr[16]
+    lh        t0, 0(a1)   // tmp0 = inptr[ 0]
+    mul       t2, t2, s4  // tmp2 = tmp2 * q2
+    mul       t1, t1, s3  // tmp1 = tmp1 * q1
+    mul       t0, t0, s2  // tmp0 = tmp0 * q0
+    lh        t6, 16(a1)  // z1 = inptr[ 8]
+    lh        t8, 80(a1)  // z3 = inptr[40]
+    lh        t7, 48(a1)  // z2 = inptr[24]
+    lh        s2, 16(a0)  // q0 = quantptr[ 8]
+    lh        s4, 80(a0)  // q2 = quantptr[40]
+    lh        s3, 48(a0)  // q1 = quantptr[24]
+    mul       t2, t2, t9  // tmp2 = tmp2 * 5793
+    mul       t1, t1, s0  // tmp1 = tmp1 * 10033
+    sll       t0, t0, 13  // tmp0 = tmp0 << 13
+    mul       t6, t6, s2  // z1 = z1 * q0
+    mul       t8, t8, s4  // z3 = z3 * q2
+    mul       t7, t7, s3  // z2 = z2 * q1
+    addu      t3, t0, t2  // tmp10 = tmp0 + tmp2
+    sll       t2, t2, 1   // tmp2 = tmp2 << 2
+    subu      t4, t0, t2  // tmp11 = tmp0 - tmp2;
+    subu      t5, t3, t1  // tmp12 = tmp10 - tmp1
+    addu      t3, t3, t1  // tmp10 = tmp10 + tmp1
+    addu      t1, t6, t8  // tmp1 = z1 + z3
+    mul       t1, t1, s1  // tmp1 = tmp1 * 2998
+    shra_r.w  t4, t4, 11  // tmp11 = (tmp11 + 1024) >> 11
+    subu      t2, t6, t8  // tmp2 = z1 - z3
+    subu      t2, t2, t7  // tmp2 = tmp2 - z2
+    sll       t2, t2, 2   // tmp2 = tmp2 << 2
+    addu      t0, t6, t7  // tmp0 = z1 + z2
+    sll       t0, t0, 13  // tmp0 = tmp0 << 13
+    subu      s2, t8, t7  // q0 = z3 - z2
+    sll       s2, s2, 13  // q0 = q0 << 13
+    addu      t0, t0, t1  // tmp0 = tmp0 + tmp1
+    addu      t1, s2, t1  // tmp1 = q0 + tmp1
+    addu      s2, t4, t2  // q0 = tmp11 + tmp2
+    subu      s3, t4, t2  // q1 = tmp11 - tmp2
+    addu      t6, t3, t0  // z1 = tmp10 + tmp0
+    subu      t7, t3, t0  // z2 = tmp10 - tmp0
+    addu      t4, t5, t1  // tmp11 = tmp12 + tmp1
+    subu      t5, t5, t1  // tmp12 = tmp12 - tmp1
+    shra_r.w  t6, t6, 11  // z1 = (z1 + 1024) >> 11
+    shra_r.w  t7, t7, 11  // z2 = (z2 + 1024) >> 11
+    shra_r.w  t4, t4, 11  // tmp11 = (tmp11 + 1024) >> 11
+    shra_r.w  t5, t5, 11  // tmp12 = (tmp12 + 1024) >> 11
+    sw        s2, 24(v0)
+    sw        s3, 96(v0)
+    sw        t6, 0(v0)
+    sw        t7, 120(v0)
+    sw        t4, 48(v0)
+    sw        t5, 72(v0)
+    addiu     v0, v0, 4
+    addiu     a1, a1, 2
+    bne       v0, v1, 1b
+     addiu    a0, a0, 2
+
+    /* Pass 2: process 6 rows from work array, store into output array. */
+    move      v0, sp
+    addiu     v1, v0, 144
+
+2:
+    lw        t0, 0(v0)
+    lw        t2, 16(v0)
+    lw        s5, 0(a2)
+    addiu     t0, t0, 16
+    sll       t0, t0, 13
+    mul       t3, t2, t9
+    lw        t6, 4(v0)
+    lw        t8, 20(v0)
+    lw        t7, 12(v0)
+    addu      s5, s5, a3
+    addu      s6, t6, t8
+    mul       s6, s6, s1
+    addu      t1, t0, t3
+    subu      t4, t0, t3
+    subu      t4, t4, t3
+    lw        t3, 8(v0)
+    mul       t0, t3, s0
+    addu      s7, t6, t7
+    sll       s7, s7, 13
+    addu      s7, s6, s7
+    subu      t2, t8, t7
+    sll       t2, t2, 13
+    addu      t2, s6, t2
+    subu      s6, t6, t7
+    subu      s6, s6, t8
+    sll       s6, s6, 13
+    addu      t3, t1, t0
+    subu      t5, t1, t0
+    addu      t6, t3, s7
+    subu      t3, t3, s7
+    addu      t7, t4, s6
+    subu      t4, t4, s6
+    addu      t8, t5, t2
+    subu      t5, t5, t2
+    shll_s.w  t6, t6, 6
+    shll_s.w  t3, t3, 6
+    shll_s.w  t7, t7, 6
+    shll_s.w  t4, t4, 6
+    shll_s.w  t8, t8, 6
+    shll_s.w  t5, t5, 6
+    sra       t6, t6, 24
+    addiu     t6, t6, 128
+    sra       t3, t3, 24
+    addiu     t3, t3, 128
+    sb        t6, 0(s5)
+    sra       t7, t7, 24
+    addiu     t7, t7, 128
+    sb        t3, 5(s5)
+    sra       t4, t4, 24
+    addiu     t4, t4, 128
+    sb        t7, 1(s5)
+    sra       t8, t8, 24
+    addiu     t8, t8, 128
+    sb        t4, 4(s5)
+    addiu     v0, v0, 24
+    sra       t5, t5, 24
+    addiu     t5, t5, 128
+    sb        t8, 2(s5)
+    addiu     a2, a2,  4
+    bne       v0, v1, 2b
+     sb       t5, 3(s5)
+
+    addiu     sp, sp, 144
+
+    RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+    j         ra
+     nop
+
+END(jsimd_idct_6x6_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_12x12_pass1_mips_dspr2)
+/*
+ * a0     - compptr->dct_table
+ * a1     - coef_block
+ * a2     - workspace
+ */
+
+    SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
+
+    li         a3, 8
+
+1:
+    // odd part
+    lh         t0, 48(a1)
+    lh         t1, 48(a0)
+    lh         t2, 16(a1)
+    lh         t3, 16(a0)
+    lh         t4, 80(a1)
+    lh         t5, 80(a0)
+    lh         t6, 112(a1)
+    lh         t7, 112(a0)
+    mul        t0, t0, t1    // z2
+    mul        t1, t2, t3    // z1
+    mul        t2, t4, t5    // z3
+    mul        t3, t6, t7    // z4
+    li         t4, 10703     // FIX(1.306562965)
+    li         t5, 4433      // FIX_0_541196100
+    li         t6, 7053      // FIX(0.860918669)
+    mul        t4, t0,t4     // tmp11
+    mul        t5, t0,t5     // -tmp14
+    addu       t7, t1,t2     // tmp10
+    addu       t8, t7,t3     // tmp10 + z4
+    mul        t6, t6, t8    // tmp15
+    li         t8, 2139      // FIX(0.261052384)
+    mul        t8, t7, t8    // MULTIPLY(tmp10, FIX(0.261052384))
+    li         t7, 2295      // FIX(0.280143716)
+    mul        t7, t1, t7    // MULTIPLY(z1, FIX(0.280143716))
+    addu       t9, t2, t3    // z3 + z4
+    li         s0, 8565      // FIX(1.045510580)
+    mul        t9, t9, s0    // -tmp13
+    li         s0, 12112     // FIX(1.478575242)
+    mul        s0, t2, s0    // MULTIPLY(z3, FIX(1.478575242)
+    li         s1, 12998     // FIX(1.586706681)
+    mul        s1, t3, s1    // MULTIPLY(z4, FIX(1.586706681))
+    li         s2, 5540      // FIX(0.676326758)
+    mul        s2, t1, s2    // MULTIPLY(z1, FIX(0.676326758))
+    li         s3, 16244     // FIX(1.982889723)
+    mul        s3, t3, s3    // MULTIPLY(z4, FIX(1.982889723))
+    subu       t1, t1, t3    // z1-=z4
+    subu       t0, t0, t2    // z2-=z3
+    addu       t2, t0, t1    // z1+z2
+    li         t3, 4433      // FIX_0_541196100
+    mul        t2, t2, t3    // z3
+    li         t3, 6270      // FIX_0_765366865
+    mul        t1, t1, t3    // MULTIPLY(z1, FIX_0_765366865)
+    li         t3, 15137     // FIX_0_765366865
+    mul        t0, t0, t3    // MULTIPLY(z2, FIX_1_847759065)
+    addu       t8, t6, t8    // tmp12
+    addu       t3, t8, t4    // tmp12 + tmp11
+    addu       t3, t3, t7    // tmp10
+    subu       t8, t8, t9    // tmp12 + tmp13
+    addu       s0, t5, s0
+    subu       t8, t8, s0    // tmp12
+    subu       t9, t6, t9
+    subu       s1, s1, t4
+    addu       t9, t9, s1    // tmp13
+    subu       t6, t6, t5
+    subu       t6, t6, s2
+    subu       t6, t6, s3    // tmp15
+    // even part start
+    lh         t4, 64(a1)
+    lh         t5, 64(a0)
+    lh         t7, 32(a1)
+    lh         s0, 32(a0)
+    lh         s1, 0(a1)
+    lh         s2, 0(a0)
+    lh         s3, 96(a1)
+    lh         v0, 96(a0)
+    mul        t4, t4, t5    // DEQUANTIZE(inptr[DCTSIZE*4],quantptr[DCTSIZE*4])
+    mul        t5, t7, s0    // DEQUANTIZE(inptr[DCTSIZE*2],quantptr[DCTSIZE*2])
+    mul        t7, s1, s2    // DEQUANTIZE(inptr[DCTSIZE*0],quantptr[DCTSIZE*0])
+    mul        s0, s3, v0    // DEQUANTIZE(inptr[DCTSIZE*6],quantptr[DCTSIZE*6])
+    // odd part end
+    addu       t1, t2, t1    // tmp11
+    subu       t0, t2, t0    // tmp14
+    // update counter and pointers
+    addiu      a3, a3, -1
+    addiu      a0, a0, 2
+    addiu      a1, a1, 2
+    // even part rest
+    li         s1, 10033
+    li         s2, 11190
+    mul        t4, t4, s1    // z4
+    mul        s1, t5, s2    // z4
+    sll        t5, t5, 13    // z1
+    sll        t7, t7, 13
+    addiu      t7, t7, 1024  // z3
+    sll        s0, s0, 13    // z2
+    addu       s2, t7, t4    // tmp10
+    subu       t4, t7, t4    // tmp11
+    subu       s3, t5, s0    // tmp12
+    addu       t2, t7, s3    // tmp21
+    subu       s3, t7, s3    // tmp24
+    addu       t7, s1, s0    // tmp12
+    addu       v0, s2, t7    // tmp20
+    subu       s2, s2, t7    // tmp25
+    subu       s1, s1, t5    // z4 - z1
+    subu       s1, s1, s0    // tmp12
+    addu       s0, t4, s1    // tmp22
+    subu       t4, t4, s1    // tmp23
+    // final output stage
+    addu       t5, v0, t3
+    subu       v0, v0, t3
+    addu       t3, t2, t1
+    subu       t2, t2, t1
+    addu       t1, s0, t8
+    subu       s0, s0, t8
+    addu       t8, t4, t9
+    subu       t4, t4, t9
+    addu       t9, s3, t0
+    subu       s3, s3, t0
+    addu       t0, s2, t6
+    subu       s2, s2, t6
+    sra        t5, t5, 11
+    sra        t3, t3, 11
+    sra        t1, t1, 11
+    sra        t8, t8, 11
+    sra        t9, t9, 11
+    sra        t0, t0, 11
+    sra        s2, s2, 11
+    sra        s3, s3, 11
+    sra        t4, t4, 11
+    sra        s0, s0, 11
+    sra        t2, t2, 11
+    sra        v0, v0, 11
+    sw         t5, 0(a2)
+    sw         t3, 32(a2)
+    sw         t1, 64(a2)
+    sw         t8, 96(a2)
+    sw         t9, 128(a2)
+    sw         t0, 160(a2)
+    sw         s2, 192(a2)
+    sw         s3, 224(a2)
+    sw         t4, 256(a2)
+    sw         s0, 288(a2)
+    sw         t2, 320(a2)
+    sw         v0, 352(a2)
+    bgtz       a3, 1b
+     addiu     a2, a2, 4
+
+    RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
+
+    j          ra
+     nop
+
+END(jsimd_idct_12x12_pass1_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_12x12_pass2_mips_dspr2)
+/*
+ * a0     - workspace
+ * a1     - output
+ */
+
+    SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
+
+    li        a3, 12
+
+1:
+    // Odd part
+    lw        t0, 12(a0)
+    lw        t1, 4(a0)
+    lw        t2, 20(a0)
+    lw        t3, 28(a0)
+    li        t4, 10703     // FIX(1.306562965)
+    li        t5, 4433      // FIX_0_541196100
+    mul       t4, t0, t4    // tmp11
+    mul       t5, t0, t5    // -tmp14
+    addu      t6, t1, t2    // tmp10
+    li        t7, 2139      // FIX(0.261052384)
+    mul       t7, t6, t7    // MULTIPLY(tmp10, FIX(0.261052384))
+    addu      t6, t6, t3    // tmp10 + z4
+    li        t8, 7053      // FIX(0.860918669)
+    mul       t6, t6, t8    // tmp15
+    li        t8, 2295      // FIX(0.280143716)
+    mul       t8, t1, t8    // MULTIPLY(z1, FIX(0.280143716))
+    addu      t9, t2, t3    // z3 + z4
+    li        s0, 8565      // FIX(1.045510580)
+    mul       t9, t9, s0    // -tmp13
+    li        s0, 12112     // FIX(1.478575242)
+    mul       s0, t2, s0    // MULTIPLY(z3, FIX(1.478575242))
+    li        s1, 12998     // FIX(1.586706681)
+    mul       s1, t3, s1    // MULTIPLY(z4, FIX(1.586706681))
+    li        s2, 5540      // FIX(0.676326758)
+    mul       s2, t1, s2    // MULTIPLY(z1, FIX(0.676326758))
+    li        s3, 16244     // FIX(1.982889723)
+    mul       s3, t3, s3    // MULTIPLY(z4, FIX(1.982889723))
+    subu      t1, t1, t3    // z1 -= z4
+    subu      t0, t0, t2    // z2 -= z3
+    addu      t2, t1, t0    // z1 + z2
+    li        t3, 4433      // FIX_0_541196100
+    mul       t2, t2, t3    // z3
+    li        t3, 6270      // FIX_0_765366865
+    mul       t1, t1, t3    // MULTIPLY(z1, FIX_0_765366865)
+    li        t3, 15137     // FIX_1_847759065
+    mul       t0, t0, t3    // MULTIPLY(z2, FIX_1_847759065)
+    addu      t3, t6, t7    // tmp12
+    addu      t7, t3, t4
+    addu      t7, t7, t8    // tmp10
+    subu      t3, t3, t9
+    subu      t3, t3, t5
+    subu      t3, t3, s0    // tmp12
+    subu      t9, t6, t9
+    subu      t9, t9, t4
+    addu      t9, t9, s1    // tmp13
+    subu      t6, t6, t5
+    subu      t6, t6, s2
+    subu      t6, t6, s3    // tmp15
+    addu      t1, t2, t1    // tmp11
+    subu      t0, t2, t0    // tmp14
+    // even part
+    lw        t2, 16(a0)    // z4
+    lw        t4, 8(a0)     // z1
+    lw        t5, 0(a0)     // z3
+    lw        t8, 24(a0)    // z2
+    li        s0, 10033     // FIX(1.224744871)
+    li        s1, 11190     // FIX(1.366025404)
+    mul       t2, t2, s0    // z4
+    mul       s0, t4, s1    // z4
+    addiu     t5, t5, 0x10
+    sll       t5, t5, 13    // z3
+    sll       t4, t4, 13    // z1
+    sll       t8, t8, 13    // z2
+    subu      s1, t4, t8    // tmp12
+    addu      s2, t5, t2    // tmp10
+    subu      t2, t5, t2    // tmp11
+    addu      s3, t5, s1    // tmp21
+    subu      s1, t5, s1    // tmp24
+    addu      t5, s0, t8    // tmp12
+    addu      v0, s2, t5    // tmp20
+    subu      t5, s2, t5    // tmp25
+    subu      t4, s0, t4
+    subu      t4, t4, t8    // tmp12
+    addu      t8, t2, t4    // tmp22
+    subu      t2, t2, t4    // tmp23
+    // increment counter and pointers
+    addiu     a3, a3, -1
+    addiu     a0, a0, 32
+    // Final stage
+    addu      t4, v0, t7
+    subu      v0, v0, t7
+    addu      t7, s3, t1
+    subu      s3, s3, t1
+    addu      t1, t8, t3
+    subu      t8, t8, t3
+    addu      t3, t2, t9
+    subu      t2, t2, t9
+    addu      t9, s1, t0
+    subu      s1, s1, t0
+    addu      t0, t5, t6
+    subu      t5, t5, t6
+    sll       t4, t4, 4
+    sll       t7, t7, 4
+    sll       t1, t1, 4
+    sll       t3, t3, 4
+    sll       t9, t9, 4
+    sll       t0, t0, 4
+    sll       t5, t5, 4
+    sll       s1, s1, 4
+    sll       t2, t2, 4
+    sll       t8, t8, 4
+    sll       s3, s3, 4
+    sll       v0, v0, 4
+    shll_s.w  t4, t4, 2
+    shll_s.w  t7, t7, 2
+    shll_s.w  t1, t1, 2
+    shll_s.w  t3, t3, 2
+    shll_s.w  t9, t9, 2
+    shll_s.w  t0, t0, 2
+    shll_s.w  t5, t5, 2
+    shll_s.w  s1, s1, 2
+    shll_s.w  t2, t2, 2
+    shll_s.w  t8, t8, 2
+    shll_s.w  s3, s3, 2
+    shll_s.w  v0, v0, 2
+    srl       t4, t4, 24
+    srl       t7, t7, 24
+    srl       t1, t1, 24
+    srl       t3, t3, 24
+    srl       t9, t9, 24
+    srl       t0, t0, 24
+    srl       t5, t5, 24
+    srl       s1, s1, 24
+    srl       t2, t2, 24
+    srl       t8, t8, 24
+    srl       s3, s3, 24
+    srl       v0, v0, 24
+    lw        t6, 0(a1)
+    addiu     t4, t4, 0x80
+    addiu     t7, t7, 0x80
+    addiu     t1, t1, 0x80
+    addiu     t3, t3, 0x80
+    addiu     t9, t9, 0x80
+    addiu     t0, t0, 0x80
+    addiu     t5, t5, 0x80
+    addiu     s1, s1, 0x80
+    addiu     t2, t2, 0x80
+    addiu     t8, t8, 0x80
+    addiu     s3, s3, 0x80
+    addiu     v0, v0, 0x80
+    sb        t4, 0(t6)
+    sb        t7, 1(t6)
+    sb        t1, 2(t6)
+    sb        t3, 3(t6)
+    sb        t9, 4(t6)
+    sb        t0, 5(t6)
+    sb        t5, 6(t6)
+    sb        s1, 7(t6)
+    sb        t2, 8(t6)
+    sb        t8, 9(t6)
+    sb        s3, 10(t6)
+    sb        v0, 11(t6)
+    bgtz      a3, 1b
+     addiu    a1, a1, 4
+
+    RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
+
+    jr        ra
+     nop
+
+END(jsimd_idct_12x12_pass2_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_convsamp_mips_dspr2)
+/*
+ * a0     - sample_data
+ * a1     - start_col
+ * a2     - workspace
+ */
+
+    lw             t0, 0(a0)
+    li             t7, 0xff80ff80
+    addu           t0, t0, a1
+    ulw            t1, 0(t0)
+    ulw            t2, 4(t0)
+    preceu.ph.qbr  t3, t1
+    preceu.ph.qbl  t4, t1
+    lw             t0, 4(a0)
+    preceu.ph.qbr  t5, t2
+    preceu.ph.qbl  t6, t2
+    addu           t0, t0, a1
+    addu.ph        t3, t3, t7
+    addu.ph        t4, t4, t7
+    ulw            t1, 0(t0)
+    ulw            t2, 4(t0)
+    addu.ph        t5, t5, t7
+    addu.ph        t6, t6, t7
+    usw            t3, 0(a2)
+    usw            t4, 4(a2)
+    preceu.ph.qbr  t3, t1
+    preceu.ph.qbl  t4, t1
+    usw            t5, 8(a2)
+    usw            t6, 12(a2)
+
+    lw             t0, 8(a0)
+    preceu.ph.qbr  t5, t2
+    preceu.ph.qbl  t6, t2
+    addu           t0, t0, a1
+    addu.ph        t3, t3, t7
+    addu.ph        t4, t4, t7
+    ulw            t1, 0(t0)
+    ulw            t2, 4(t0)
+    addu.ph        t5, t5, t7
+    addu.ph        t6, t6, t7
+    usw            t3, 16(a2)
+    usw            t4, 20(a2)
+    preceu.ph.qbr  t3, t1
+    preceu.ph.qbl  t4, t1
+    usw            t5, 24(a2)
+    usw            t6, 28(a2)
+
+    lw             t0, 12(a0)
+    preceu.ph.qbr  t5, t2
+    preceu.ph.qbl  t6, t2
+    addu           t0, t0, a1
+    addu.ph        t3, t3, t7
+    addu.ph        t4, t4, t7
+    ulw            t1, 0(t0)
+    ulw            t2, 4(t0)
+    addu.ph        t5, t5, t7
+    addu.ph        t6, t6, t7
+    usw            t3, 32(a2)
+    usw            t4, 36(a2)
+    preceu.ph.qbr  t3, t1
+    preceu.ph.qbl  t4, t1
+    usw            t5, 40(a2)
+    usw            t6, 44(a2)
+
+    lw             t0, 16(a0)
+    preceu.ph.qbr  t5, t2
+    preceu.ph.qbl  t6, t2
+    addu           t0, t0, a1
+    addu.ph        t3, t3, t7
+    addu.ph        t4, t4, t7
+    ulw            t1, 0(t0)
+    ulw            t2, 4(t0)
+    addu.ph        t5, t5, t7
+    addu.ph        t6, t6, t7
+    usw            t3, 48(a2)
+    usw            t4, 52(a2)
+    preceu.ph.qbr  t3, t1
+    preceu.ph.qbl  t4, t1
+    usw            t5, 56(a2)
+    usw            t6, 60(a2)
+
+    lw             t0, 20(a0)
+    preceu.ph.qbr  t5, t2
+    preceu.ph.qbl  t6, t2
+    addu           t0, t0, a1
+    addu.ph        t3, t3, t7
+    addu.ph        t4, t4, t7
+    ulw            t1, 0(t0)
+    ulw            t2, 4(t0)
+    addu.ph        t5, t5, t7
+    addu.ph        t6, t6, t7
+    usw            t3, 64(a2)
+    usw            t4, 68(a2)
+    preceu.ph.qbr  t3, t1
+    preceu.ph.qbl  t4, t1
+    usw            t5, 72(a2)
+    usw            t6, 76(a2)
+
+    lw             t0, 24(a0)
+    preceu.ph.qbr  t5, t2
+    preceu.ph.qbl  t6, t2
+    addu           t0, t0, a1
+    addu.ph        t3, t3, t7
+    addu.ph        t4, t4, t7
+    ulw            t1, 0(t0)
+    ulw            t2, 4(t0)
+    addu.ph        t5, t5, t7
+    addu.ph        t6, t6, t7
+    usw            t3, 80(a2)
+    usw            t4, 84(a2)
+    preceu.ph.qbr  t3, t1
+    preceu.ph.qbl  t4, t1
+    usw            t5, 88(a2)
+    usw            t6, 92(a2)
+
+    lw             t0, 28(a0)
+    preceu.ph.qbr  t5, t2
+    preceu.ph.qbl  t6, t2
+    addu           t0, t0, a1
+    addu.ph        t3, t3, t7
+    addu.ph        t4, t4, t7
+    ulw            t1, 0(t0)
+    ulw            t2, 4(t0)
+    addu.ph        t5, t5, t7
+    addu.ph        t6, t6, t7
+    usw            t3, 96(a2)
+    usw            t4, 100(a2)
+    preceu.ph.qbr  t3, t1
+    preceu.ph.qbl  t4, t1
+    usw            t5, 104(a2)
+    usw            t6, 108(a2)
+    preceu.ph.qbr  t5, t2
+    preceu.ph.qbl  t6, t2
+    addu.ph        t3, t3, t7
+    addu.ph        t4, t4, t7
+    addu.ph        t5, t5, t7
+    addu.ph        t6, t6, t7
+    usw            t3, 112(a2)
+    usw            t4, 116(a2)
+    usw            t5, 120(a2)
+    usw            t6, 124(a2)
+
+    j              ra
+     nop
+
+END(jsimd_convsamp_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_convsamp_float_mips_dspr2)
+/*
+ * a0     - sample_data
+ * a1     - start_col
+ * a2     - workspace
+ */
+
+    .set at
+
+    lw       t0, 0(a0)
+    addu     t0, t0, a1
+    lbu      t1, 0(t0)
+    lbu      t2, 1(t0)
+    lbu      t3, 2(t0)
+    lbu      t4, 3(t0)
+    lbu      t5, 4(t0)
+    lbu      t6, 5(t0)
+    lbu      t7, 6(t0)
+    lbu      t8, 7(t0)
+    addiu    t1, t1, -128
+    addiu    t2, t2, -128
+    addiu    t3, t3, -128
+    addiu    t4, t4, -128
+    addiu    t5, t5, -128
+    addiu    t6, t6, -128
+    addiu    t7, t7, -128
+    addiu    t8, t8, -128
+    mtc1     t1, f1
+    mtc1     t2, f2
+    mtc1     t3, f3
+    mtc1     t4, f4
+    mtc1     t5, f5
+    mtc1     t6, f6
+    mtc1     t7, f7
+    mtc1     t8, f8
+    cvt.s.w  f1, f1
+    cvt.s.w  f2, f2
+    cvt.s.w  f3, f3
+    cvt.s.w  f4, f4
+    cvt.s.w  f5, f5
+    cvt.s.w  f6, f6
+    cvt.s.w  f7, f7
+    cvt.s.w  f8, f8
+    lw       t0, 4(a0)
+    swc1     f1, 0(a2)
+    swc1     f2, 4(a2)
+    swc1     f3, 8(a2)
+    addu     t0, t0, a1
+    swc1     f4, 12(a2)
+    swc1     f5, 16(a2)
+    swc1     f6, 20(a2)
+    swc1     f7, 24(a2)
+    swc1     f8, 28(a2)
+    //elemr 1
+    lbu      t1, 0(t0)
+    lbu      t2, 1(t0)
+    lbu      t3, 2(t0)
+    lbu      t4, 3(t0)
+    lbu      t5, 4(t0)
+    lbu      t6, 5(t0)
+    lbu      t7, 6(t0)
+    lbu      t8, 7(t0)
+    addiu    t1, t1, -128
+    addiu    t2, t2, -128
+    addiu    t3, t3, -128
+    addiu    t4, t4, -128
+    addiu    t5, t5, -128
+    addiu    t6, t6, -128
+    addiu    t7, t7, -128
+    addiu    t8, t8, -128
+    mtc1     t1, f1
+    mtc1     t2, f2
+    mtc1     t3, f3
+    mtc1     t4, f4
+    mtc1     t5, f5
+    mtc1     t6, f6
+    mtc1     t7, f7
+    mtc1     t8, f8
+    cvt.s.w  f1, f1
+    cvt.s.w  f2, f2
+    cvt.s.w  f3, f3
+    cvt.s.w  f4, f4
+    cvt.s.w  f5, f5
+    cvt.s.w  f6, f6
+    cvt.s.w  f7, f7
+    cvt.s.w  f8, f8
+    lw       t0, 8(a0)
+    swc1     f1, 32(a2)
+    swc1     f2, 36(a2)
+    swc1     f3, 40(a2)
+    addu     t0, t0, a1
+    swc1     f4, 44(a2)
+    swc1     f5, 48(a2)
+    swc1     f6, 52(a2)
+    swc1     f7, 56(a2)
+    swc1     f8, 60(a2)
+    //elemr 2
+    lbu      t1, 0(t0)
+    lbu      t2, 1(t0)
+    lbu      t3, 2(t0)
+    lbu      t4, 3(t0)
+    lbu      t5, 4(t0)
+    lbu      t6, 5(t0)
+    lbu      t7, 6(t0)
+    lbu      t8, 7(t0)
+    addiu    t1, t1, -128
+    addiu    t2, t2, -128
+    addiu    t3, t3, -128
+    addiu    t4, t4, -128
+    addiu    t5, t5, -128
+    addiu    t6, t6, -128
+    addiu    t7, t7, -128
+    addiu    t8, t8, -128
+    mtc1     t1, f1
+    mtc1     t2, f2
+    mtc1     t3, f3
+    mtc1     t4, f4
+    mtc1     t5, f5
+    mtc1     t6, f6
+    mtc1     t7, f7
+    mtc1     t8, f8
+    cvt.s.w  f1, f1
+    cvt.s.w  f2, f2
+    cvt.s.w  f3, f3
+    cvt.s.w  f4, f4
+    cvt.s.w  f5, f5
+    cvt.s.w  f6, f6
+    cvt.s.w  f7, f7
+    cvt.s.w  f8, f8
+    lw       t0, 12(a0)
+    swc1     f1, 64(a2)
+    swc1     f2, 68(a2)
+    swc1     f3, 72(a2)
+    addu     t0, t0, a1
+    swc1     f4, 76(a2)
+    swc1     f5, 80(a2)
+    swc1     f6, 84(a2)
+    swc1     f7, 88(a2)
+    swc1     f8, 92(a2)
+    //elemr 3
+    lbu      t1, 0(t0)
+    lbu      t2, 1(t0)
+    lbu      t3, 2(t0)
+    lbu      t4, 3(t0)
+    lbu      t5, 4(t0)
+    lbu      t6, 5(t0)
+    lbu      t7, 6(t0)
+    lbu      t8, 7(t0)
+    addiu    t1, t1, -128
+    addiu    t2, t2, -128
+    addiu    t3, t3, -128
+    addiu    t4, t4, -128
+    addiu    t5, t5, -128
+    addiu    t6, t6, -128
+    addiu    t7, t7, -128
+    addiu    t8, t8, -128
+    mtc1     t1, f1
+    mtc1     t2, f2
+    mtc1     t3, f3
+    mtc1     t4, f4
+    mtc1     t5, f5
+    mtc1     t6, f6
+    mtc1     t7, f7
+    mtc1     t8, f8
+    cvt.s.w  f1, f1
+    cvt.s.w  f2, f2
+    cvt.s.w  f3, f3
+    cvt.s.w  f4, f4
+    cvt.s.w  f5, f5
+    cvt.s.w  f6, f6
+    cvt.s.w  f7, f7
+    cvt.s.w  f8, f8
+    lw       t0, 16(a0)
+    swc1     f1, 96(a2)
+    swc1     f2, 100(a2)
+    swc1     f3, 104(a2)
+    addu     t0, t0, a1
+    swc1     f4, 108(a2)
+    swc1     f5, 112(a2)
+    swc1     f6, 116(a2)
+    swc1     f7, 120(a2)
+    swc1     f8, 124(a2)
+    //elemr 4
+    lbu      t1, 0(t0)
+    lbu      t2, 1(t0)
+    lbu      t3, 2(t0)
+    lbu      t4, 3(t0)
+    lbu      t5, 4(t0)
+    lbu      t6, 5(t0)
+    lbu      t7, 6(t0)
+    lbu      t8, 7(t0)
+    addiu    t1, t1, -128
+    addiu    t2, t2, -128
+    addiu    t3, t3, -128
+    addiu    t4, t4, -128
+    addiu    t5, t5, -128
+    addiu    t6, t6, -128
+    addiu    t7, t7, -128
+    addiu    t8, t8, -128
+    mtc1     t1, f1
+    mtc1     t2, f2
+    mtc1     t3, f3
+    mtc1     t4, f4
+    mtc1     t5, f5
+    mtc1     t6, f6
+    mtc1     t7, f7
+    mtc1     t8, f8
+    cvt.s.w  f1, f1
+    cvt.s.w  f2, f2
+    cvt.s.w  f3, f3
+    cvt.s.w  f4, f4
+    cvt.s.w  f5, f5
+    cvt.s.w  f6, f6
+    cvt.s.w  f7, f7
+    cvt.s.w  f8, f8
+    lw       t0, 20(a0)
+    swc1     f1, 128(a2)
+    swc1     f2, 132(a2)
+    swc1     f3, 136(a2)
+    addu     t0, t0, a1
+    swc1     f4, 140(a2)
+    swc1     f5, 144(a2)
+    swc1     f6, 148(a2)
+    swc1     f7, 152(a2)
+    swc1     f8, 156(a2)
+    //elemr 5
+    lbu      t1, 0(t0)
+    lbu      t2, 1(t0)
+    lbu      t3, 2(t0)
+    lbu      t4, 3(t0)
+    lbu      t5, 4(t0)
+    lbu      t6, 5(t0)
+    lbu      t7, 6(t0)
+    lbu      t8, 7(t0)
+    addiu    t1, t1, -128
+    addiu    t2, t2, -128
+    addiu    t3, t3, -128
+    addiu    t4, t4, -128
+    addiu    t5, t5, -128
+    addiu    t6, t6, -128
+    addiu    t7, t7, -128
+    addiu    t8, t8, -128
+    mtc1     t1, f1
+    mtc1     t2, f2
+    mtc1     t3, f3
+    mtc1     t4, f4
+    mtc1     t5, f5
+    mtc1     t6, f6
+    mtc1     t7, f7
+    mtc1     t8, f8
+    cvt.s.w  f1, f1
+    cvt.s.w  f2, f2
+    cvt.s.w  f3, f3
+    cvt.s.w  f4, f4
+    cvt.s.w  f5, f5
+    cvt.s.w  f6, f6
+    cvt.s.w  f7, f7
+    cvt.s.w  f8, f8
+    lw       t0, 24(a0)
+    swc1     f1, 160(a2)
+    swc1     f2, 164(a2)
+    swc1     f3, 168(a2)
+    addu     t0, t0, a1
+    swc1     f4, 172(a2)
+    swc1     f5, 176(a2)
+    swc1     f6, 180(a2)
+    swc1     f7, 184(a2)
+    swc1     f8, 188(a2)
+    //elemr 6
+    lbu      t1, 0(t0)
+    lbu      t2, 1(t0)
+    lbu      t3, 2(t0)
+    lbu      t4, 3(t0)
+    lbu      t5, 4(t0)
+    lbu      t6, 5(t0)
+    lbu      t7, 6(t0)
+    lbu      t8, 7(t0)
+    addiu    t1, t1, -128
+    addiu    t2, t2, -128
+    addiu    t3, t3, -128
+    addiu    t4, t4, -128
+    addiu    t5, t5, -128
+    addiu    t6, t6, -128
+    addiu    t7, t7, -128
+    addiu    t8, t8, -128
+    mtc1     t1, f1
+    mtc1     t2, f2
+    mtc1     t3, f3
+    mtc1     t4, f4
+    mtc1     t5, f5
+    mtc1     t6, f6
+    mtc1     t7, f7
+    mtc1     t8, f8
+    cvt.s.w  f1, f1
+    cvt.s.w  f2, f2
+    cvt.s.w  f3, f3
+    cvt.s.w  f4, f4
+    cvt.s.w  f5, f5
+    cvt.s.w  f6, f6
+    cvt.s.w  f7, f7
+    cvt.s.w  f8, f8
+    lw       t0, 28(a0)
+    swc1     f1, 192(a2)
+    swc1     f2, 196(a2)
+    swc1     f3, 200(a2)
+    addu     t0, t0, a1
+    swc1     f4, 204(a2)
+    swc1     f5, 208(a2)
+    swc1     f6, 212(a2)
+    swc1     f7, 216(a2)
+    swc1     f8, 220(a2)
+    //elemr 7
+    lbu      t1, 0(t0)
+    lbu      t2, 1(t0)
+    lbu      t3, 2(t0)
+    lbu      t4, 3(t0)
+    lbu      t5, 4(t0)
+    lbu      t6, 5(t0)
+    lbu      t7, 6(t0)
+    lbu      t8, 7(t0)
+    addiu    t1, t1, -128
+    addiu    t2, t2, -128
+    addiu    t3, t3, -128
+    addiu    t4, t4, -128
+    addiu    t5, t5, -128
+    addiu    t6, t6, -128
+    addiu    t7, t7, -128
+    addiu    t8, t8, -128
+    mtc1     t1, f1
+    mtc1     t2, f2
+    mtc1     t3, f3
+    mtc1     t4, f4
+    mtc1     t5, f5
+    mtc1     t6, f6
+    mtc1     t7, f7
+    mtc1     t8, f8
+    cvt.s.w  f1, f1
+    cvt.s.w  f2, f2
+    cvt.s.w  f3, f3
+    cvt.s.w  f4, f4
+    cvt.s.w  f5, f5
+    cvt.s.w  f6, f6
+    cvt.s.w  f7, f7
+    cvt.s.w  f8, f8
+    swc1     f1, 224(a2)
+    swc1     f2, 228(a2)
+    swc1     f3, 232(a2)
+    swc1     f4, 236(a2)
+    swc1     f5, 240(a2)
+    swc1     f6, 244(a2)
+    swc1     f7, 248(a2)
+    swc1     f8, 252(a2)
+
+    j        ra
+     nop
+
+END(jsimd_convsamp_float_mips_dspr2)
+
+/*****************************************************************************/
+
diff --git a/simd/jsimd_mips_dspr2_asm.h b/simd/jsimd_mips_dspr2_asm.h
new file mode 100644
index 0000000..50ec31b
--- /dev/null
+++ b/simd/jsimd_mips_dspr2_asm.h
@@ -0,0 +1,285 @@
+/*
+ * MIPS DSPr2 optimizations for libjpeg-turbo
+ *
+ * Copyright (C) 2013, MIPS Technologies, Inc., California.
+ * All rights reserved.
+ * Authors:  Teodora Novkovic (teodora.novkovic@imgtec.com)
+ *           Darko Laus       (darko.laus@imgtec.com)
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#define zero $0
+#define AT   $1
+#define v0   $2
+#define v1   $3
+#define a0   $4
+#define a1   $5
+#define a2   $6
+#define a3   $7
+#define t0   $8
+#define t1   $9
+#define t2   $10
+#define t3   $11
+#define t4   $12
+#define t5   $13
+#define t6   $14
+#define t7   $15
+#define s0   $16
+#define s1   $17
+#define s2   $18
+#define s3   $19
+#define s4   $20
+#define s5   $21
+#define s6   $22
+#define s7   $23
+#define t8   $24
+#define t9   $25
+#define k0   $26
+#define k1   $27
+#define gp   $28
+#define sp   $29
+#define fp   $30
+#define s8   $30
+#define ra   $31
+
+#define f0   $f0
+#define f1   $f1
+#define f2   $f2
+#define f3   $f3
+#define f4   $f4
+#define f5   $f5
+#define f6   $f6
+#define f7   $f7
+#define f8   $f8
+#define f9   $f9
+#define f10  $f10
+#define f11  $f11
+#define f12  $f12
+#define f13  $f13
+#define f14  $f14
+#define f15  $f15
+#define f16  $f16
+#define f17  $f17
+#define f18  $f18
+#define f19  $f19
+#define f20  $f20
+#define f21  $f21
+#define f22  $f22
+#define f23  $f23
+#define f24  $f24
+#define f25  $f25
+#define f26  $f26
+#define f27  $f27
+#define f28  $f28
+#define f29  $f29
+#define f30  $f30
+#define f31  $f31
+
+/*
+ * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
+ */
+#define LEAF_MIPS32R2(symbol)                           \
+                .globl  symbol;                         \
+                .align  2;                              \
+                .type   symbol, @function;              \
+                .ent    symbol, 0;                      \
+symbol:         .frame  sp, 0, ra;                      \
+                .set    push;                           \
+                .set    arch=mips32r2;                  \
+                .set    noreorder;                      \
+                .set    noat;
+
+/*
+ * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2
+ */
+#define LEAF_MIPS_DSPR2(symbol)                         \
+LEAF_MIPS32R2(symbol)                                   \
+                .set    dspr2;
+
+/*
+ * END - mark end of function
+ */
+#define END(function)                                   \
+                .set    pop;                            \
+                .end    function;                       \
+                .size   function,.-function
+
+/*
+ * Checks if stack offset is big enough for storing/restoring regs_num
+ * number of register to/from stack. Stack offset must be greater than
+ * or equal to the number of bytes needed for storing registers (regs_num*4).
+ * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
+ * preserved for input arguments of the functions, already stored in a0-a3),
+ * stack size can be further optimized by utilizing this space.
+ */
+.macro CHECK_STACK_OFFSET regs_num, stack_offset
+.if \stack_offset < \regs_num * 4 - 16
+.error "Stack offset too small."
+.endif
+.endm
+
+/*
+ * Saves set of registers on stack. Maximum number of registers that
+ * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
+ * Stack offset is number of bytes that are added to stack pointer (sp)
+ * before registers are pushed in order to provide enough space on stack
+ * (offset must be multiple of 4, and must be big enough, as described by
+ * CHECK_STACK_OFFSET macro). This macro is intended to be used in
+ * combination with RESTORE_REGS_FROM_STACK macro. Example:
+ *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
+ *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
+ */
+.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
+                          r2  = 0, r3  = 0, r4  = 0, \
+                          r5  = 0, r6  = 0, r7  = 0, \
+                          r8  = 0, r9  = 0, r10 = 0, \
+                          r11 = 0, r12 = 0, r13 = 0, \
+                          r14 = 0
+    .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
+    .error "Stack offset must be pozitive and multiple of 4."
+    .endif
+    .if \stack_offset != 0
+    addiu           sp, sp, -\stack_offset
+    .endif
+    sw              \r1, 0(sp)
+    .if \r2 != 0
+    sw              \r2, 4(sp)
+    .endif
+    .if \r3 != 0
+    sw              \r3, 8(sp)
+    .endif
+    .if \r4 != 0
+    sw              \r4, 12(sp)
+    .endif
+    .if \r5 != 0
+    CHECK_STACK_OFFSET 5, \stack_offset
+    sw              \r5, 16(sp)
+    .endif
+    .if \r6 != 0
+    CHECK_STACK_OFFSET 6, \stack_offset
+    sw              \r6, 20(sp)
+    .endif
+    .if \r7 != 0
+    CHECK_STACK_OFFSET 7, \stack_offset
+    sw              \r7, 24(sp)
+    .endif
+    .if \r8 != 0
+    CHECK_STACK_OFFSET 8, \stack_offset
+    sw              \r8, 28(sp)
+    .endif
+    .if \r9 != 0
+    CHECK_STACK_OFFSET 9, \stack_offset
+    sw              \r9, 32(sp)
+    .endif
+    .if \r10 != 0
+    CHECK_STACK_OFFSET 10, \stack_offset
+    sw              \r10, 36(sp)
+    .endif
+    .if \r11 != 0
+    CHECK_STACK_OFFSET 11, \stack_offset
+    sw              \r11, 40(sp)
+    .endif
+    .if \r12 != 0
+    CHECK_STACK_OFFSET 12, \stack_offset
+    sw              \r12, 44(sp)
+    .endif
+    .if \r13 != 0
+    CHECK_STACK_OFFSET 13, \stack_offset
+    sw              \r13, 48(sp)
+    .endif
+    .if \r14 != 0
+    CHECK_STACK_OFFSET 14, \stack_offset
+    sw              \r14, 52(sp)
+    .endif
+.endm
+
+/*
+ * Restores set of registers from stack. Maximum number of registers that
+ * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
+ * Stack offset is number of bytes that are added to stack pointer (sp)
+ * after registers are restored (offset must be multiple of 4, and must
+ * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
+ * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
+ * Example:
+ *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
+ *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
+ */
+.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
+                               r2  = 0, r3  = 0, r4  = 0, \
+                               r5  = 0, r6  = 0, r7  = 0, \
+                               r8  = 0, r9  = 0, r10 = 0, \
+                               r11 = 0, r12 = 0, r13 = 0, \
+                               r14 = 0
+    .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
+    .error "Stack offset must be pozitive and multiple of 4."
+    .endif
+    lw              \r1, 0(sp)
+    .if \r2 != 0
+    lw              \r2, 4(sp)
+    .endif
+    .if \r3 != 0
+    lw              \r3, 8(sp)
+    .endif
+    .if \r4 != 0
+    lw              \r4, 12(sp)
+    .endif
+    .if \r5 != 0
+    CHECK_STACK_OFFSET 5, \stack_offset
+    lw              \r5, 16(sp)
+    .endif
+    .if \r6 != 0
+    CHECK_STACK_OFFSET 6, \stack_offset
+    lw              \r6, 20(sp)
+    .endif
+    .if \r7 != 0
+    CHECK_STACK_OFFSET 7, \stack_offset
+    lw              \r7, 24(sp)
+    .endif
+    .if \r8 != 0
+    CHECK_STACK_OFFSET 8, \stack_offset
+    lw              \r8, 28(sp)
+    .endif
+    .if \r9 != 0
+    CHECK_STACK_OFFSET 9, \stack_offset
+    lw              \r9, 32(sp)
+    .endif
+    .if \r10 != 0
+    CHECK_STACK_OFFSET 10, \stack_offset
+    lw              \r10, 36(sp)
+    .endif
+    .if \r11 != 0
+    CHECK_STACK_OFFSET 11, \stack_offset
+    lw              \r11, 40(sp)
+    .endif
+    .if \r12 != 0
+    CHECK_STACK_OFFSET 12, \stack_offset
+    lw              \r12, 44(sp)
+    .endif
+    .if \r13 != 0
+    CHECK_STACK_OFFSET 13, \stack_offset
+    lw              \r13, 48(sp)
+    .endif
+    .if \r14 != 0
+    CHECK_STACK_OFFSET 14, \stack_offset
+    lw              \r14, 52(sp)
+    .endif
+    .if \stack_offset != 0
+    addiu           sp, sp, \stack_offset
+    .endif
+.endm
+
+
diff --git a/simd/jsimd_x86_64.c b/simd/jsimd_x86_64.c
new file mode 100644
index 0000000..87c9d56
--- /dev/null
+++ b/simd/jsimd_x86_64.c
@@ -0,0 +1,744 @@
+/*
+ * jsimd_x86_64.c
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2009-2011 D. R. Commander
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ * This file contains the interface between the "normal" portions
+ * of the library and the SIMD implementations when running on a
+ * 64-bit x86 architecture.
+ */
+
+#define JPEG_INTERNALS
+#include "../jinclude.h"
+#include "../jpeglib.h"
+#include "../jsimd.h"
+#include "../jdct.h"
+#include "../jsimddct.h"
+#include "jsimd.h"
+
+/*
+ * In the PIC cases, we have no guarantee that constants will keep
+ * their alignment. This macro allows us to verify it at runtime.
+ */
+#define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
+
+#define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
+
+GLOBAL(int)
+jsimd_can_rgb_ycc (void)
+{
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_ycc_rgb (void)
+{
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(void)
+jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
+                       JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                       JDIMENSION output_row, int num_rows)
+{
+  void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+
+  switch(cinfo->in_color_space) {
+    case JCS_EXT_RGB:
+      sse2fct=jsimd_extrgb_ycc_convert_sse2;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      sse2fct=jsimd_extrgbx_ycc_convert_sse2;
+      break;
+    case JCS_EXT_BGR:
+      sse2fct=jsimd_extbgr_ycc_convert_sse2;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      sse2fct=jsimd_extbgrx_ycc_convert_sse2;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      sse2fct=jsimd_extxbgr_ycc_convert_sse2;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      sse2fct=jsimd_extxrgb_ycc_convert_sse2;
+      break;
+    default:
+      sse2fct=jsimd_rgb_ycc_convert_sse2;
+      break;
+  }
+
+  sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
+}
+
+GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                        JDIMENSION output_row, int num_rows)
+{
+  void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+
+  switch(cinfo->in_color_space) {
+    case JCS_EXT_RGB:
+      sse2fct=jsimd_extrgb_gray_convert_sse2;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      sse2fct=jsimd_extrgbx_gray_convert_sse2;
+      break;
+    case JCS_EXT_BGR:
+      sse2fct=jsimd_extbgr_gray_convert_sse2;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      sse2fct=jsimd_extbgrx_gray_convert_sse2;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      sse2fct=jsimd_extxbgr_gray_convert_sse2;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      sse2fct=jsimd_extxrgb_gray_convert_sse2;
+      break;
+    default:
+      sse2fct=jsimd_rgb_gray_convert_sse2;
+      break;
+  }
+
+  sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
+}
+
+GLOBAL(void)
+jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
+                       JSAMPIMAGE input_buf, JDIMENSION input_row,
+                       JSAMPARRAY output_buf, int num_rows)
+{
+  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
+
+  switch(cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      sse2fct=jsimd_ycc_extrgb_convert_sse2;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      sse2fct=jsimd_ycc_extrgbx_convert_sse2;
+      break;
+    case JCS_EXT_BGR:
+      sse2fct=jsimd_ycc_extbgr_convert_sse2;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      sse2fct=jsimd_ycc_extbgrx_convert_sse2;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      sse2fct=jsimd_ycc_extxbgr_convert_sse2;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      sse2fct=jsimd_ycc_extxrgb_convert_sse2;
+      break;
+    default:
+      sse2fct=jsimd_ycc_rgb_convert_sse2;
+      break;
+  }
+
+  sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_downsample (void)
+{
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_downsample (void)
+{
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(void)
+jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
+                             compptr->v_samp_factor, compptr->width_in_blocks,
+                             input_data, output_data);
+}
+
+GLOBAL(void)
+jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
+                             compptr->v_samp_factor, compptr->width_in_blocks,
+                             input_data, output_data);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_upsample (void)
+{
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_upsample (void)
+{
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(void)
+jsimd_h2v2_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr,
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+  jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
+                           input_data, output_data_ptr);
+}
+
+GLOBAL(void)
+jsimd_h2v1_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr,
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+  jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
+                           input_data, output_data_ptr);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_fancy_upsample (void)
+{
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_fancy_upsample (void)
+{
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(void)
+jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr,
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+  jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
+                                 compptr->downsampled_width, input_data,
+                                 output_data_ptr);
+}
+
+GLOBAL(void)
+jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr,
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+  jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
+                                 compptr->downsampled_width, input_data,
+                                 output_data_ptr);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_merged_upsample (void)
+{
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_merged_upsample (void)
+{
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(void)
+jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
+
+  switch(cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
+      break;
+    case JCS_EXT_BGR:
+      sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
+      break;
+    default:
+      sse2fct=jsimd_h2v2_merged_upsample_sse2;
+      break;
+  }
+
+  sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
+}
+
+GLOBAL(void)
+jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+  void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
+
+  switch(cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
+      break;
+    case JCS_EXT_BGR:
+      sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
+      break;
+    default:
+      sse2fct=jsimd_h2v1_merged_upsample_sse2;
+      break;
+  }
+
+  sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
+}
+
+GLOBAL(int)
+jsimd_can_convsamp (void)
+{
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_convsamp_float (void)
+{
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(FAST_FLOAT) != 4)
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(void)
+jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
+                DCTELEM * workspace)
+{
+  jsimd_convsamp_sse2(sample_data, start_col, workspace);
+}
+
+GLOBAL(void)
+jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
+                      FAST_FLOAT * workspace)
+{
+  jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
+}
+
+GLOBAL(int)
+jsimd_can_fdct_islow (void)
+{
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_ifast (void)
+{
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_float (void)
+{
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(FAST_FLOAT) != 4)
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_fdct_float_sse))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(void)
+jsimd_fdct_islow (DCTELEM * data)
+{
+  jsimd_fdct_islow_sse2(data);
+}
+
+GLOBAL(void)
+jsimd_fdct_ifast (DCTELEM * data)
+{
+  jsimd_fdct_ifast_sse2(data);
+}
+
+GLOBAL(void)
+jsimd_fdct_float (FAST_FLOAT * data)
+{
+  jsimd_fdct_float_sse(data);
+}
+
+GLOBAL(int)
+jsimd_can_quantize (void)
+{
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_quantize_float (void)
+{
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (sizeof(FAST_FLOAT) != 4)
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(void)
+jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
+                DCTELEM * workspace)
+{
+  jsimd_quantize_sse2(coef_block, divisors, workspace);
+}
+
+GLOBAL(void)
+jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+                      FAST_FLOAT * workspace)
+{
+  jsimd_quantize_float_sse2(coef_block, divisors, workspace);
+}
+
+GLOBAL(int)
+jsimd_can_idct_2x2 (void)
+{
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_idct_red_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_idct_4x4 (void)
+{
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_idct_red_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(void)
+jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
+}
+
+GLOBAL(int)
+jsimd_can_idct_islow (void)
+{
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_idct_islow_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_idct_ifast (void)
+{
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(IFAST_MULT_TYPE) != 2)
+    return 0;
+  if (IFAST_SCALE_BITS != 2)
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(int)
+jsimd_can_idct_float (void)
+{
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(FAST_FLOAT) != 4)
+    return 0;
+  if (sizeof(FLOAT_MULT_TYPE) != 4)
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_idct_float_sse2))
+    return 0;
+
+  return 1;
+}
+
+GLOBAL(void)
+jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+  jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
+                        output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+  jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
+                        output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                  JDIMENSION output_col)
+{
+  jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
+                        output_col);
+}
+
diff --git a/simd/jsimdcfg.inc.h b/simd/jsimdcfg.inc.h
new file mode 100644
index 0000000..d2b499f
--- /dev/null
+++ b/simd/jsimdcfg.inc.h
@@ -0,0 +1,130 @@
+// This file generates the include file for the assembly
+// implementations by abusing the C preprocessor.
+//
+// Note: Some things are manually defined as they need to
+// be mapped to NASM types.
+
+;
+; Automatically generated include file from jsimdcfg.inc.h
+;
+
+#define JPEG_INTERNALS
+
+#include "../jpeglib.h"
+#include "../jconfig.h"
+#include "../jmorecfg.h"
+#include "jsimd.h"
+
+;
+; -- jpeglib.h
+;
+
+%define _cpp_protection_DCTSIZE DCTSIZE
+%define _cpp_protection_DCTSIZE2 DCTSIZE2
+
+;
+; -- jmorecfg.h
+;
+
+%define _cpp_protection_RGB_RED RGB_RED
+%define _cpp_protection_RGB_GREEN RGB_GREEN
+%define _cpp_protection_RGB_BLUE RGB_BLUE
+%define _cpp_protection_RGB_PIXELSIZE RGB_PIXELSIZE
+
+%define _cpp_protection_EXT_RGB_RED EXT_RGB_RED
+%define _cpp_protection_EXT_RGB_GREEN EXT_RGB_GREEN
+%define _cpp_protection_EXT_RGB_BLUE EXT_RGB_BLUE
+%define _cpp_protection_EXT_RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+
+%define _cpp_protection_EXT_RGBX_RED EXT_RGBX_RED
+%define _cpp_protection_EXT_RGBX_GREEN EXT_RGBX_GREEN
+%define _cpp_protection_EXT_RGBX_BLUE EXT_RGBX_BLUE
+%define _cpp_protection_EXT_RGBX_PIXELSIZE EXT_RGBX_PIXELSIZE
+
+%define _cpp_protection_EXT_BGR_RED EXT_BGR_RED
+%define _cpp_protection_EXT_BGR_GREEN EXT_BGR_GREEN
+%define _cpp_protection_EXT_BGR_BLUE EXT_BGR_BLUE
+%define _cpp_protection_EXT_BGR_PIXELSIZE EXT_BGR_PIXELSIZE
+
+%define _cpp_protection_EXT_BGRX_RED EXT_BGRX_RED
+%define _cpp_protection_EXT_BGRX_GREEN EXT_BGRX_GREEN
+%define _cpp_protection_EXT_BGRX_BLUE EXT_BGRX_BLUE
+%define _cpp_protection_EXT_BGRX_PIXELSIZE EXT_BGRX_PIXELSIZE
+
+%define _cpp_protection_EXT_XBGR_RED EXT_XBGR_RED
+%define _cpp_protection_EXT_XBGR_GREEN EXT_XBGR_GREEN
+%define _cpp_protection_EXT_XBGR_BLUE EXT_XBGR_BLUE
+%define _cpp_protection_EXT_XBGR_PIXELSIZE EXT_XBGR_PIXELSIZE
+
+%define _cpp_protection_EXT_XRGB_RED EXT_XRGB_RED
+%define _cpp_protection_EXT_XRGB_GREEN EXT_XRGB_GREEN
+%define _cpp_protection_EXT_XRGB_BLUE EXT_XRGB_BLUE
+%define _cpp_protection_EXT_XRGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+
+%define RGBX_FILLER_0XFF        1
+
+; Representation of a single sample (pixel element value).
+; On this SIMD implementation, this must be 'unsigned char'.
+;
+
+%define JSAMPLE                 byte          ; unsigned char
+%define SIZEOF_JSAMPLE          SIZEOF_BYTE   ; sizeof(JSAMPLE)
+
+%define _cpp_protection_CENTERJSAMPLE CENTERJSAMPLE
+
+; Representation of a DCT frequency coefficient.
+; On this SIMD implementation, this must be 'short'.
+;
+%define JCOEF                   word          ; short
+%define SIZEOF_JCOEF            SIZEOF_WORD   ; sizeof(JCOEF)
+
+; Datatype used for image dimensions.
+; On this SIMD implementation, this must be 'unsigned int'.
+;
+%define JDIMENSION              dword         ; unsigned int
+%define SIZEOF_JDIMENSION       SIZEOF_DWORD  ; sizeof(JDIMENSION)
+
+%define JSAMPROW                POINTER       ; JSAMPLE *     (jpeglib.h)
+%define JSAMPARRAY              POINTER       ; JSAMPROW *    (jpeglib.h)
+%define JSAMPIMAGE              POINTER       ; JSAMPARRAY *  (jpeglib.h)
+%define JCOEFPTR                POINTER       ; JCOEF *       (jpeglib.h)
+%define SIZEOF_JSAMPROW         SIZEOF_POINTER  ; sizeof(JSAMPROW)
+%define SIZEOF_JSAMPARRAY       SIZEOF_POINTER  ; sizeof(JSAMPARRAY)
+%define SIZEOF_JSAMPIMAGE       SIZEOF_POINTER  ; sizeof(JSAMPIMAGE)
+%define SIZEOF_JCOEFPTR         SIZEOF_POINTER  ; sizeof(JCOEFPTR)
+
+;
+; -- jdct.h
+;
+
+; A forward DCT routine is given a pointer to a work area of type DCTELEM[];
+; the DCT is to be performed in-place in that buffer.
+; To maximize parallelism, Type DCTELEM is changed to short (originally, int).
+;
+%define DCTELEM                 word          ; short
+%define SIZEOF_DCTELEM          SIZEOF_WORD   ; sizeof(DCTELEM)
+
+%define FAST_FLOAT              FP32            ; float
+%define SIZEOF_FAST_FLOAT       SIZEOF_FP32     ; sizeof(FAST_FLOAT)
+
+; To maximize parallelism, Type MULTIPLIER is changed to short.
+;
+%define ISLOW_MULT_TYPE         word          ; must be short
+%define SIZEOF_ISLOW_MULT_TYPE  SIZEOF_WORD   ; sizeof(ISLOW_MULT_TYPE)
+
+%define IFAST_MULT_TYPE         word          ; must be short
+%define SIZEOF_IFAST_MULT_TYPE  SIZEOF_WORD   ; sizeof(IFAST_MULT_TYPE)
+%define IFAST_SCALE_BITS        2             ; fractional bits in scale factors
+
+%define FLOAT_MULT_TYPE         FP32          ; must be float
+%define SIZEOF_FLOAT_MULT_TYPE  SIZEOF_FP32   ; sizeof(FLOAT_MULT_TYPE)
+
+;
+; -- jsimd.h
+;
+
+%define _cpp_protection_JSIMD_NONE JSIMD_NONE
+%define _cpp_protection_JSIMD_MMX JSIMD_MMX
+%define _cpp_protection_JSIMD_3DNOW JSIMD_3DNOW
+%define _cpp_protection_JSIMD_SSE JSIMD_SSE
+%define _cpp_protection_JSIMD_SSE2 JSIMD_SSE2
diff --git a/simd/jsimdcpu.asm b/simd/jsimdcpu.asm
new file mode 100644
index 0000000..c42c4ad
--- /dev/null
+++ b/simd/jsimdcpu.asm
@@ -0,0 +1,105 @@
+;
+; jsimdcpu.asm - SIMD instruction support check
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+        SECTION SEG_TEXT
+        BITS    32
+;
+; Check if the CPU supports SIMD instructions
+;
+; GLOBAL(unsigned int)
+; jpeg_simd_cpu_support (void)
+;
+
+        align   16
+        global  EXTN(jpeg_simd_cpu_support)
+
+EXTN(jpeg_simd_cpu_support):
+        push    ebx
+;       push    ecx             ; need not be preserved
+;       push    edx             ; need not be preserved
+;       push    esi             ; unused
+        push    edi
+
+        xor     edi,edi                 ; simd support flag
+
+        pushfd
+        pop     eax
+        mov     edx,eax
+        xor     eax, 1<<21              ; flip ID bit in EFLAGS
+        push    eax
+        popfd
+        pushfd
+        pop     eax
+        xor     eax,edx
+        jz      short .return           ; CPUID is not supported
+
+        ; Check for MMX instruction support
+        xor     eax,eax
+        cpuid
+        test    eax,eax
+        jz      short .return
+
+        xor     eax,eax
+        inc     eax
+        cpuid
+        mov     eax,edx                 ; eax = Standard feature flags
+
+        test    eax, 1<<23              ; bit23:MMX
+        jz      short .no_mmx
+        or      edi, byte JSIMD_MMX
+.no_mmx:
+        test    eax, 1<<25              ; bit25:SSE
+        jz      short .no_sse
+        or      edi, byte JSIMD_SSE
+.no_sse:
+        test    eax, 1<<26              ; bit26:SSE2
+        jz      short .no_sse2
+        or      edi, byte JSIMD_SSE2
+.no_sse2:
+
+        ; Check for 3DNow! instruction support
+        mov     eax, 0x80000000
+        cpuid
+        cmp     eax, 0x80000000
+        jbe     short .return
+
+        mov     eax, 0x80000001
+        cpuid
+        mov     eax,edx                 ; eax = Extended feature flags
+
+        test    eax, 1<<31              ; bit31:3DNow!(vendor independent)
+        jz      short .no_3dnow
+        or      edi, byte JSIMD_3DNOW
+.no_3dnow:
+
+.return:
+        mov     eax,edi
+
+        pop     edi
+;       pop     esi             ; unused
+;       pop     edx             ; need not be preserved
+;       pop     ecx             ; need not be preserved
+        pop     ebx
+        ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+        align   16
diff --git a/simd/jsimdext.inc b/simd/jsimdext.inc
new file mode 100644
index 0000000..e1442de
--- /dev/null
+++ b/simd/jsimdext.inc
@@ -0,0 +1,376 @@
+;
+; jsimdext.inc - common declarations
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2010 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library - version 1.02
+;
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+;
+; This software is provided 'as-is', without any express or implied
+; warranty.  In no event will the authors be held liable for any damages
+; arising from the use of this software.
+;
+; Permission is granted to anyone to use this software for any purpose,
+; including commercial applications, and to alter it and redistribute it
+; freely, subject to the following restrictions:
+;
+; 1. The origin of this software must not be misrepresented; you must not
+;    claim that you wrote the original software. If you use this software
+;    in a product, an acknowledgment in the product documentation would be
+;    appreciated but is not required.
+; 2. Altered source versions must be plainly marked as such, and must not be
+;    misrepresented as being the original software.
+; 3. This notice may not be removed or altered from any source distribution.
+;
+; [TAB8]
+
+; ==========================================================================
+;  System-dependent configurations
+
+%ifdef WIN32    ; ----(nasm -fwin32 -DWIN32 ...)--------
+; * Microsoft Visual C++
+; * MinGW (Minimalist GNU for Windows)
+; * CygWin
+; * LCC-Win32
+
+; -- segment definition --
+;
+%ifdef __YASM_VER__
+%define SEG_TEXT    .text  align=16
+%define SEG_CONST   .rdata align=16
+%else
+%define SEG_TEXT    .text  align=16 public use32 class=CODE
+%define SEG_CONST   .rdata align=16 public use32 class=CONST
+%endif
+
+%elifdef WIN64  ; ----(nasm -fwin64 -DWIN64 ...)--------
+; * Microsoft Visual C++
+
+; -- segment definition --
+;
+%ifdef __YASM_VER__
+%define SEG_TEXT    .text  align=16
+%define SEG_CONST   .rdata align=16
+%else
+%define SEG_TEXT    .text  align=16 public use64 class=CODE
+%define SEG_CONST   .rdata align=16 public use64 class=CONST
+%endif
+%define EXTN(name)  name                        ; foo() -> foo
+
+%elifdef OBJ32  ; ----(nasm -fobj -DOBJ32 ...)----------
+; * Borland C++ (Win32)
+
+; -- segment definition --
+;
+%define SEG_TEXT    _text  align=16 public use32 class=CODE
+%define SEG_CONST   _data  align=16 public use32 class=DATA
+
+%elifdef ELF    ; ----(nasm -felf[64] -DELF ...)------------
+; * Linux
+; * *BSD family Unix using elf format
+; * Unix System V, including Solaris x86, UnixWare and SCO Unix
+
+; mark stack as non-executable
+section .note.GNU-stack noalloc noexec nowrite progbits
+
+; -- segment definition --
+;
+%ifdef __x86_64__
+%define SEG_TEXT    .text   progbits align=16
+%define SEG_CONST   .rodata progbits align=16
+%else
+%define SEG_TEXT    .text   progbits alloc exec   nowrite align=16
+%define SEG_CONST   .rodata progbits alloc noexec nowrite align=16
+%endif
+
+; To make the code position-independent, append -DPIC to the commandline
+;
+%define GOT_SYMBOL  _GLOBAL_OFFSET_TABLE_       ; ELF supports PIC
+%define EXTN(name)  name                        ; foo() -> foo
+
+%elifdef AOUT   ; ----(nasm -faoutb/aout -DAOUT ...)----
+; * Older Linux using a.out format  (nasm -f aout -DAOUT ...)
+; * *BSD family Unix using a.out format  (nasm -f aoutb -DAOUT ...)
+
+; -- segment definition --
+;
+%define SEG_TEXT    .text
+%define SEG_CONST   .data
+
+; To make the code position-independent, append -DPIC to the commandline
+;
+%define GOT_SYMBOL  __GLOBAL_OFFSET_TABLE_      ; BSD-style a.out supports PIC
+
+%elifdef MACHO  ; ----(nasm -fmacho -DMACHO ...)--------
+; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
+
+; -- segment definition --
+;
+%define SEG_TEXT    .text  ;align=16    ; nasm doesn't accept align=16. why?
+%define SEG_CONST   .rodata align=16
+
+; The generation of position-independent code (PIC) is the default on Darwin.
+;
+%define PIC
+%define GOT_SYMBOL  _MACHO_PIC_         ; Mach-O style code-relative addressing
+
+%else           ; ----(Other case)----------------------
+
+; -- segment definition --
+;
+%define SEG_TEXT    .text
+%define SEG_CONST   .data
+
+%endif  ; ----------------------------------------------
+
+; ==========================================================================
+
+; --------------------------------------------------------------------------
+;  Common types
+;
+%ifdef __x86_64__
+%define POINTER                 qword           ; general pointer type
+%define SIZEOF_POINTER          SIZEOF_QWORD    ; sizeof(POINTER)
+%define POINTER_BIT             QWORD_BIT       ; sizeof(POINTER)*BYTE_BIT
+%else
+%define POINTER                 dword           ; general pointer type
+%define SIZEOF_POINTER          SIZEOF_DWORD    ; sizeof(POINTER)
+%define POINTER_BIT             DWORD_BIT       ; sizeof(POINTER)*BYTE_BIT
+%endif
+
+%define INT                     dword           ; signed integer type
+%define SIZEOF_INT              SIZEOF_DWORD    ; sizeof(INT)
+%define INT_BIT                 DWORD_BIT       ; sizeof(INT)*BYTE_BIT
+
+%define FP32                    dword           ; IEEE754 single
+%define SIZEOF_FP32             SIZEOF_DWORD    ; sizeof(FP32)
+%define FP32_BIT                DWORD_BIT       ; sizeof(FP32)*BYTE_BIT
+
+%define MMWORD                  qword           ; int64  (MMX register)
+%define SIZEOF_MMWORD           SIZEOF_QWORD    ; sizeof(MMWORD)
+%define MMWORD_BIT              QWORD_BIT       ; sizeof(MMWORD)*BYTE_BIT
+
+; NASM is buggy and doesn't properly handle operand sizes for SSE
+; instructions, so for now we have to define XMMWORD as blank.
+%define XMMWORD                                 ; int128 (SSE register)
+%define SIZEOF_XMMWORD          SIZEOF_OWORD    ; sizeof(XMMWORD)
+%define XMMWORD_BIT             OWORD_BIT       ; sizeof(XMMWORD)*BYTE_BIT
+
+; Similar hacks for when we load a dword or MMWORD into an xmm# register
+%define XMM_DWORD
+%define XMM_MMWORD
+
+%define SIZEOF_BYTE             1               ; sizeof(BYTE)
+%define SIZEOF_WORD             2               ; sizeof(WORD)
+%define SIZEOF_DWORD            4               ; sizeof(DWORD)
+%define SIZEOF_QWORD            8               ; sizeof(QWORD)
+%define SIZEOF_OWORD            16              ; sizeof(OWORD)
+
+%define BYTE_BIT                8               ; CHAR_BIT in C
+%define WORD_BIT                16              ; sizeof(WORD)*BYTE_BIT
+%define DWORD_BIT               32              ; sizeof(DWORD)*BYTE_BIT
+%define QWORD_BIT               64              ; sizeof(QWORD)*BYTE_BIT
+%define OWORD_BIT               128             ; sizeof(OWORD)*BYTE_BIT
+
+; --------------------------------------------------------------------------
+;  External Symbol Name
+;
+%ifndef EXTN
+%define EXTN(name)   _ %+ name          ; foo() -> _foo
+%endif
+
+; --------------------------------------------------------------------------
+;  Macros for position-independent code (PIC) support
+;
+%ifndef GOT_SYMBOL
+%undef PIC
+%endif
+
+%ifdef PIC ; -------------------------------------------
+
+%ifidn GOT_SYMBOL,_MACHO_PIC_ ; --------------------
+
+; At present, nasm doesn't seem to support PIC generation for Mach-O.
+; The PIC support code below is a little tricky.
+
+        SECTION SEG_CONST
+const_base:
+
+%define GOTOFF(got,sym) (got) + (sym) - const_base
+
+%imacro get_GOT 1
+        ; NOTE: this macro destroys ecx resister.
+        call    %%geteip
+        add     ecx, byte (%%ref - $)
+        jmp     short %%adjust
+%%geteip:
+        mov     ecx, POINTER [esp]
+        ret
+%%adjust:
+        push    ebp
+        xor     ebp,ebp         ; ebp = 0
+%ifidni %1,ebx  ; (%1 == ebx)
+        ; db 0x8D,0x9C + jmp near const_base =
+        ;   lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
+        db      0x8D,0x9C               ; 8D,9C
+        jmp     near const_base         ; E9,(const_base-%%ref)
+%%ref:
+%else  ; (%1 != ebx)
+        ; db 0x8D,0x8C + jmp near const_base =
+        ;   lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
+        db      0x8D,0x8C               ; 8D,8C
+        jmp     near const_base         ; E9,(const_base-%%ref)
+%%ref:  mov     %1, ecx
+%endif ; (%1 == ebx)
+        pop     ebp
+%endmacro
+
+%else   ; GOT_SYMBOL != _MACHO_PIC_ ----------------
+
+%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff
+
+%imacro get_GOT 1
+        extern  GOT_SYMBOL
+        call    %%geteip
+        add     %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
+        jmp     short %%done
+%%geteip:
+        mov     %1, POINTER [esp]
+        ret
+%%done:
+%endmacro
+
+%endif  ; GOT_SYMBOL == _MACHO_PIC_ ----------------
+
+%imacro pushpic 1.nolist
+        push    %1
+%endmacro
+%imacro poppic  1.nolist
+        pop     %1
+%endmacro
+%imacro movpic  2.nolist
+        mov     %1,%2
+%endmacro
+
+%else   ; !PIC -----------------------------------------
+
+%define GOTOFF(got,sym) (sym)
+
+%imacro get_GOT 1.nolist
+%endmacro
+%imacro pushpic 1.nolist
+%endmacro
+%imacro poppic  1.nolist
+%endmacro
+%imacro movpic  2.nolist
+%endmacro
+
+%endif  ;  PIC -----------------------------------------
+
+; --------------------------------------------------------------------------
+;  Align the next instruction on {2,4,8,16,..}-byte boundary.
+;  ".balign n,,m" in GNU as
+;
+%define MSKLE(x,y)  (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
+%define FILLB(b,n)  (($$-(b)) & ((n)-1))
+
+%imacro alignx 1-2.nolist 0xFFFF
+%%bs:   times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
+               db 0x90                               ; nop
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
+               db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
+               db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
+               db 0x8D,0xAD,0x00,0x00,0x00,0x00      ; lea ebp,[ebp+0x00000000]
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
+               db 0x8D,0x6C,0x25,0x00                ; lea ebp,[ebp+0x00]
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
+               db 0x8D,0x6D,0x00                     ; lea ebp,[ebp+0x00]
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
+               db 0x8B,0xED                          ; mov ebp,ebp
+        times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
+               db 0x90                               ; nop
+%endmacro
+
+; Align the next data on {2,4,8,16,..}-byte boundary.
+;
+%imacro alignz 1.nolist
+        align %1, db 0          ; filling zeros
+%endmacro
+
+%ifdef __x86_64__
+
+%ifdef WIN64
+
+%imacro collect_args 0
+        push r12
+        push r13
+        push r14
+        push r15
+        mov r10, rcx
+        mov r11, rdx
+        mov r12, r8
+        mov r13, r9
+        mov r14, [rax+48]
+        mov r15, [rax+56]
+        push rsi
+        push rdi
+        sub     rsp, SIZEOF_XMMWORD
+        movaps  XMMWORD [rsp], xmm6
+        sub     rsp, SIZEOF_XMMWORD
+        movaps  XMMWORD [rsp], xmm7
+%endmacro
+
+%imacro uncollect_args 0
+        movaps  xmm7, XMMWORD [rsp]
+        add     rsp, SIZEOF_XMMWORD
+        movaps  xmm6, XMMWORD [rsp]
+        add     rsp, SIZEOF_XMMWORD
+        pop rdi
+        pop rsi
+        pop r15
+        pop r14
+        pop r13
+        pop r12
+%endmacro
+
+%else
+
+%imacro collect_args 0
+        push r10
+        push r11
+        push r12
+        push r13
+        push r14
+        push r15
+        mov r10, rdi
+        mov r11, rsi
+        mov r12, rdx
+        mov r13, rcx
+        mov r14, r8
+        mov r15, r9
+%endmacro
+
+%imacro uncollect_args 0
+        pop r15
+        pop r14
+        pop r13
+        pop r12
+        pop r11
+        pop r10
+%endmacro
+
+%endif
+
+%endif
+
+; --------------------------------------------------------------------------
+;  Defines picked up from the C headers
+;
+%include "jsimdcfg.inc"
+
+; --------------------------------------------------------------------------
diff --git a/simd/nasm_lt.sh b/simd/nasm_lt.sh
new file mode 100755
index 0000000..6cd7329
--- /dev/null
+++ b/simd/nasm_lt.sh
@@ -0,0 +1,57 @@
+#! /bin/sh
+command=""
+infile=""
+o_opt=no
+pic=no
+while [ $# -gt 0 ]; do
+    case "$1" in
+        -DPIC|-fPIC|-fpic|-Kpic|-KPIC)
+            if [ "$pic" != "yes" ] ; then
+                command="$command -DPIC"
+                pic=yes
+            fi
+            ;;
+        -f|-fbin|-faout|-faoutb|-fcoff|-felf|-felf64|-fas86| \
+        -fobj|-fwin32|-fwin64|-frdf|-fieee|-fmacho|-fmacho64)
+            # it's a file format specifier for nasm.
+            command="$command $1"
+            ;;
+        -f*)
+            # maybe a code-generation flag for gcc.
+            ;;
+        -[Ii]*)
+            incdir=`echo "$1" | sed 's/^-[Ii]//'`
+            if [ "x$incdir" = x -a "x$2" != x ] ; then
+                case "$2" in
+                    -*) ;;
+                    *) incdir="$2"; shift;;
+                esac
+            fi
+            if [ "x$incdir" != x ] ; then
+                # In the case of NASM, the trailing slash is necessary.
+                incdir=`echo "$incdir" | sed 's%/*$%/%'`
+                command="$command -I$incdir"
+            fi
+            ;;
+        -o*)
+            o_opt=yes
+            command="$command $1"
+            ;;
+        *.asm)
+            infile=$1
+            command="$command $1"
+            ;;
+        *)
+            command="$command $1"
+            ;;
+    esac
+    shift
+done
+if [ "$o_opt" != yes ] ; then
+    # By default, NASM creates an output file
+    # in the same directory as the input file.
+    outfile="-o `echo $infile | sed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.o"
+    command="$command $outfile"
+fi
+echo $command
+exec $command
diff --git a/structure.doc b/structure.txt
similarity index 89%
rename from structure.doc
rename to structure.txt
index 51c9def..4c9fe39 100644
--- a/structure.doc
+++ b/structure.txt
@@ -1,7 +1,9 @@
 IJG JPEG LIBRARY:  SYSTEM ARCHITECTURE
 
-Copyright (C) 1991-1995, Thomas G. Lane.
-This file is part of the Independent JPEG Group's software.
+This file was part of the Independent JPEG Group's software:
+Copyright (C) 1991-2012, Thomas G. Lane, Guido Vollbeding.
+It was modified by The libjpeg-turbo Project to include only information
+relevant to libjpeg-turbo.
 For conditions of distribution and use, see the accompanying README file.
 
 
@@ -12,9 +14,9 @@
 
 We assume that the reader is already somewhat familiar with the JPEG standard.
 The README file includes references for learning about JPEG.  The file
-libjpeg.doc describes the library from the viewpoint of an application
+libjpeg.txt describes the library from the viewpoint of an application
 programmer using the library; it's best to read that file before this one.
-Also, the file coderules.doc describes the coding style conventions we use.
+Also, the file coderules.txt describes the coding style conventions we use.
 
 In this document, JPEG-specific terminology follows the JPEG standard:
   A "component" means a color channel, e.g., Red or Luminance.
@@ -22,8 +24,8 @@
   A "coefficient" is a frequency coefficient (a DCT transform output number).
   A "block" is an 8x8 group of samples or coefficients.
   An "MCU" (minimum coded unit) is an interleaved set of blocks of size
-	determined by the sampling factors, or a single block in a
-	noninterleaved scan.
+        determined by the sampling factors, or a single block in a
+        noninterleaved scan.
 We do not use the terms "pixel" and "sample" interchangeably.  When we say
 pixel, we mean an element of the full-size image, while a sample is an element
 of the downsampled image.  Thus the number of samples may vary across
@@ -60,9 +62,6 @@
 run-time option, because most machines can store 8-bit pixels much more
 compactly than 12-bit.
 
-For legal reasons, JPEG arithmetic coding is not currently supported, but
-extending the library to include it would be straightforward.
-
 By itself, the library handles only interchange JPEG datastreams --- in
 particular the widely used JFIF file format.  The library can be used by
 surrounding code to process interchange or abbreviated JPEG datastreams that
@@ -85,36 +84,6 @@
 nonetheless, they are useful for viewers.
 
 
-*** Portability issues ***
-
-Portability is an essential requirement for the library.  The key portability
-issues that show up at the level of system architecture are:
-
-1.  Memory usage.  We want the code to be able to run on PC-class machines
-with limited memory.  Images should therefore be processed sequentially (in
-strips), to avoid holding the whole image in memory at once.  Where a
-full-image buffer is necessary, we should be able to use either virtual memory
-or temporary files.
-
-2.  Near/far pointer distinction.  To run efficiently on 80x86 machines, the
-code should distinguish "small" objects (kept in near data space) from
-"large" ones (kept in far data space).  This is an annoying restriction, but
-fortunately it does not impact code quality for less brain-damaged machines,
-and the source code clutter turns out to be minimal with sufficient use of
-pointer typedefs.
-
-3. Data precision.  We assume that "char" is at least 8 bits, "short" and
-"int" at least 16, "long" at least 32.  The code will work fine with larger
-data sizes, although memory may be used inefficiently in some cases.  However,
-the JPEG compressed datastream must ultimately appear on external storage as a
-sequence of 8-bit bytes if it is to conform to the standard.  This may pose a
-problem on machines where char is wider than 8 bits.  The library represents
-compressed data as an array of values of typedef JOCTET.  If no data type
-exactly 8 bits wide is available, custom data source and data destination
-modules must be written to unpack and pack the chosen JOCTET datatype into
-8-bit external representation.
-
-
 *** System overview ***
 
 The compressor and decompressor are each divided into two main sections:
@@ -265,14 +234,14 @@
    1B.  Per-pass control.  This determines how many passes will be performed
         and calls each active processing module to configure itself
         appropriately at the beginning of each pass.  End-of-pass processing,
-	where necessary, is also invoked from the master control module.
+        where necessary, is also invoked from the master control module.
 
    Method selection is partially distributed, in that a particular processing
    module may contain several possible implementations of a particular method,
    which it will select among when given its initialization call.  The master
    control code need only be concerned with decisions that affect more than
    one module.
- 
+
 2. Data buffering control.  A separate control module exists for each
    inter-processing-step data buffer.  This module is responsible for
    invoking the processing steps that write or read that data buffer.
@@ -388,8 +357,9 @@
 
 * Data destination manager: writes the output JPEG datastream to its final
   destination (e.g., a file).  The destination manager supplied with the
-  library knows how to write to a stdio stream; for other behaviors, the
-  surrounding application may provide its own destination manager.
+  library knows how to write to a stdio stream or to a memory buffer;
+  for other behaviors, the surrounding application may provide its own
+  destination manager.
 
 * Memory manager: allocates and releases memory, controls virtual arrays
   (with backing store management, where required).
@@ -457,8 +427,8 @@
   buffered by the coefficient controller have NOT been dequantized; we
   merge dequantization and inverse DCT into a single step for speed reasons.
   When scaled-down output is asked for, simplified DCT algorithms may be used
-  that emit only 1x1, 2x2, or 4x4 samples per DCT block, not the full 8x8.
-  Works on one DCT block at a time.
+  that emit fewer samples per DCT block, not the full 8x8.  Works on one DCT
+  block at a time.
 
 * Postprocessing controller: buffer controller for the color quantization
   input buffer, when quantization is in use.  (Without quantization, this
@@ -507,9 +477,9 @@
 * Marker reading: decodes JPEG markers (except for RSTn).
 
 * Data source manager: supplies the input JPEG datastream.  The source
-  manager supplied with the library knows how to read from a stdio stream;
-  for other behaviors, the surrounding application may provide its own source
-  manager.
+  manager supplied with the library knows how to read from a stdio stream
+  or from a memory buffer;  for other behaviors, the surrounding application
+  may provide its own source manager.
 
 * Memory manager: same as for compression library.
 
@@ -541,7 +511,7 @@
 extra display passes when data is arriving faster than the display pass
 can run.  Furthermore, it is possible to abort an output pass without
 losing anything, since the coefficient buffer is read-only as far as the
-output section is concerned.  See libjpeg.doc for more detail.
+output section is concerned.  See libjpeg.txt for more detail.
 
 A full-image coefficient array is only created if the JPEG file has multiple
 scans (or if the application specifies buffered-image mode anyway).  When
@@ -573,10 +543,10 @@
 
 Arrays of pixel sample values use the following data structure:
 
-    typedef something JSAMPLE;		a pixel component value, 0..MAXJSAMPLE
-    typedef JSAMPLE *JSAMPROW;		ptr to a row of samples
-    typedef JSAMPROW *JSAMPARRAY;	ptr to a list of rows
-    typedef JSAMPARRAY *JSAMPIMAGE;	ptr to a list of color-component arrays
+    typedef something JSAMPLE;          a pixel component value, 0..MAXJSAMPLE
+    typedef JSAMPLE *JSAMPROW;          ptr to a row of samples
+    typedef JSAMPROW *JSAMPARRAY;       ptr to a list of rows
+    typedef JSAMPARRAY *JSAMPIMAGE;     ptr to a list of color-component arrays
 
 The basic element type JSAMPLE will typically be one of unsigned char,
 (signed) char, or short.  Short will be used if samples wider than 8 bits are
@@ -589,8 +559,7 @@
 With these conventions, JSAMPLE values can be assumed to be >= 0.  This helps
 simplify correct rounding during downsampling, etc.  The JPEG standard's
 specification that sample values run from -128..127 is accommodated by
-subtracting 128 just as the sample value is copied into the source array for
-the DCT step (this will be an array of signed ints).  Similarly, during
+subtracting 128 from the sample value in the DCT step.  Similarly, during
 decompression the output of the IDCT step will be immediately shifted back to
 0..255.  (NB: different values are required when 12-bit samples are in use.
 The code is written in terms of MAXJSAMPLE and CENTERJSAMPLE, which will be
@@ -618,7 +587,7 @@
 is helpful when dealing with noninterleaved JPEG files.
 
 In general, a specific sample value is accessed by code such as
-	GETJSAMPLE(image[colorcomponent][row][col])
+        GETJSAMPLE(image[colorcomponent][row][col])
 where col is measured from the image left edge, but row is measured from the
 first sample row currently in memory.  Either of the first two indexings can
 be precomputed by copying the relevant pointer.
@@ -637,11 +606,11 @@
 
 Arrays of DCT-coefficient values use the following data structure:
 
-    typedef short JCOEF;		a 16-bit signed integer
-    typedef JCOEF JBLOCK[DCTSIZE2];	an 8x8 block of coefficients
-    typedef JBLOCK *JBLOCKROW;		ptr to one horizontal row of 8x8 blocks
-    typedef JBLOCKROW *JBLOCKARRAY;	ptr to a list of such rows
-    typedef JBLOCKARRAY *JBLOCKIMAGE;	ptr to a list of color component arrays
+    typedef short JCOEF;                a 16-bit signed integer
+    typedef JCOEF JBLOCK[DCTSIZE2];     an 8x8 block of coefficients
+    typedef JBLOCK *JBLOCKROW;          ptr to one horizontal row of 8x8 blocks
+    typedef JBLOCKROW *JBLOCKARRAY;     ptr to a list of such rows
+    typedef JBLOCKARRAY *JBLOCKIMAGE;   ptr to a list of color component arrays
 
 The underlying type is at least a 16-bit signed integer; while "short" is big
 enough on all machines of interest, on some machines it is preferable to use
@@ -662,17 +631,6 @@
 eight rows of samples.  Otherwise the structure is much the same as for
 samples, and for the same reasons.
 
-On machines where malloc() can't handle a request bigger than 64Kb, this data
-structure limits us to rows of less than 512 JBLOCKs, or a picture width of
-4000+ pixels.  This seems an acceptable restriction.
-
-
-On 80x86 machines, the bottom-level pointer types (JSAMPROW and JBLOCKROW)
-must be declared as "far" pointers, but the upper levels can be "near"
-(implying that the pointer lists are allocated in the DS segment).
-We use a #define symbol FAR, which expands to the "far" keyword when
-compiling on 80x86 machines and to nothing elsewhere.
-
 
 *** Suspendable processing ***
 
@@ -685,7 +643,7 @@
 
 This scenario is supported for simple cases.  (For anything more complex, we
 recommend that the application "bite the bullet" and develop real multitasking
-capability.)  The libjpeg.doc file goes into more detail about the usage and
+capability.)  The libjpeg.txt file goes into more detail about the usage and
 limitations of this capability; here we address the implications for library
 structure.
 
@@ -733,13 +691,14 @@
 without causing problems; otherwise a 64K buffer would be needed in the worst
 case.)
 
-The JPEG marker writer currently does *not* cope with suspension.  I feel that
-this is not necessary; it is much easier simply to require the application to
-ensure there is enough buffer space before starting.  (An empty 2K buffer is
-more than sufficient for the header markers; and ensuring there are a dozen or
-two bytes available before calling jpeg_finish_compress() will suffice for the
-trailer.)  This would not work for writing multi-scan JPEG files, but
-we simply do not intend to support that capability with suspension.
+The JPEG marker writer currently does *not* cope with suspension.
+We feel that this is not necessary; it is much easier simply to require
+the application to ensure there is enough buffer space before starting.  (An
+empty 2K buffer is more than sufficient for the header markers; and ensuring
+there are a dozen or two bytes available before calling jpeg_finish_compress()
+will suffice for the trailer.)  This would not work for writing multi-scan
+JPEG files, but we simply do not intend to support that capability with
+suspension.
 
 
 *** Memory manager services ***
@@ -759,8 +718,8 @@
 it speeds up operations whenever malloc/free are slow (as they often are).
 The pools can be regarded as lifetime identifiers for objects.  Two
 pools/lifetimes are defined:
-  * JPOOL_PERMANENT	lasts until master record is destroyed
-  * JPOOL_IMAGE		lasts until done with image (JPEG datastream)
+  * JPOOL_PERMANENT     lasts until master record is destroyed
+  * JPOOL_IMAGE         lasts until done with image (JPEG datastream)
 Permanent lifetime is used for parameters and tables that should be carried
 across from one datastream to another; this includes all application-visible
 parameters.  Image lifetime is used for everything else.  (A third lifetime,
@@ -773,12 +732,11 @@
 1. "Small" objects.  Typically these require no more than 10K-20K total.
 2. "Large" objects.  These may require tens to hundreds of K depending on
    image size.  Semantically they behave the same as small objects, but we
-   distinguish them for two reasons:
-     * On MS-DOS machines, large objects are referenced by FAR pointers,
-       small objects by NEAR pointers.
-     * Pool allocation heuristics may differ for large and small objects.
-   Note that individual "large" objects cannot exceed the size allowed by
-   type size_t, which may be 64K or less on some machines.
+   distinguish them because pool allocation heuristics may differ for large and
+   small objects (historically, large objects were also referenced by far
+   pointers on MS-DOS machines.)  Note that individual "large" objects cannot
+   exceed the size allowed by type size_t, which may be 64K or less on some
+   machines.
 3. "Virtual" objects.  These are large 2-D arrays of JSAMPLEs or JBLOCKs
    (typically large enough for the entire image being processed).  The
    memory manager provides stripwise access to these arrays.  On machines
@@ -857,20 +815,20 @@
 the following routines for use by the front end (none of these routines
 are known to the rest of the JPEG code):
 
-jpeg_mem_init, jpeg_mem_term	system-dependent initialization/shutdown
+jpeg_mem_init, jpeg_mem_term    system-dependent initialization/shutdown
 
-jpeg_get_small, jpeg_free_small	interface to malloc and free library routines
-				(or their equivalents)
+jpeg_get_small, jpeg_free_small interface to malloc and free library routines
+                                (or their equivalents)
 
-jpeg_get_large, jpeg_free_large	interface to FAR malloc/free in MSDOS machines;
-				else usually the same as
-				jpeg_get_small/jpeg_free_small
+jpeg_get_large, jpeg_free_large historically was used to interface with
+                                FAR malloc/free on MS-DOS machines;  now the
+                                same as jpeg_get_small/jpeg_free_small
 
-jpeg_mem_available		estimate available memory
+jpeg_mem_available              estimate available memory
 
-jpeg_open_backing_store		create a backing-store object
+jpeg_open_backing_store         create a backing-store object
 
-read_backing_store,		manipulate a backing-store object
+read_backing_store,             manipulate a backing-store object
 write_backing_store,
 close_backing_store
 
diff --git a/testimages/nightshot_iso_100.bmp b/testimages/nightshot_iso_100.bmp
new file mode 100644
index 0000000..5a27151
--- /dev/null
+++ b/testimages/nightshot_iso_100.bmp
Binary files differ
diff --git a/testimages/nightshot_iso_100.txt b/testimages/nightshot_iso_100.txt
new file mode 100644
index 0000000..9320886
--- /dev/null
+++ b/testimages/nightshot_iso_100.txt
@@ -0,0 +1,25 @@
+libjpeg-turbo note:  This image was extracted from the 8-bit nightshot_iso_100
+image.  The original can be downloaded at the link below.
+
+The New Image Compression Test Set - Jan 2008
+http://www.imagecompression.info/test_images
+
+The images historically used for compression research (lena, barbra, pepper etc...) have outlived their useful life and its about time they become a part of history only. They are too small, come from data sources too old and are available in only 8-bit precision.
+
+These images have been carefully selected to aid in image compression algorithm research and evaluation. These are photographic images chosen to come from a wide variety of sources and each one picked to stress different aspects of algorithms. Images are available in 8-bit, 16-bit and 16-bit linear variations, RGB and gray.
+
+Images are available without any prohibitive copyright restrictions.
+
+These images are (c) there respective owners. You are granted full redistribution and publication rights on these images provided:
+
+1. The origin of the pictures must not be misrepresented; you must not claim that you took the original pictures. If you use, publish or redistribute them, an acknowledgment would be appreciated but is not required.
+2. Altered versions must be plainly marked as such, and must not be misinterpreted as being the originals.
+3. No payment is required for distribution this material, it must be available freely under the conditions stated here. That is, it is prohibited to sell the material.
+4. This notice may not be removed or altered from any distribution.
+
+Acknowledgments: A lot of people contributed a lot of time and effort in making this test set possible. Thanks to everyone who voiced their opinion in any of the discussions online. Thanks to Axel Becker, Thomas Richter and Niels Fröhling for their extensive help in picking images, running all the various tests etc... Thanks to Pete Fraser, Tony Story, Wayne J. Cosshall, David Coffin, Bruce Lindbloom and raw.fotosite.pl for the images which make up this set.
+
+Sachin Garg [India]
+sachingarg@c10n.info
+
+www.sachingarg.com | www.c10n.info | www.imagecompression.info
diff --git a/testimages/testimgari.jpg b/testimages/testimgari.jpg
new file mode 100644
index 0000000..8966487
--- /dev/null
+++ b/testimages/testimgari.jpg
Binary files differ
diff --git a/testimages/testimgint.jpg b/testimages/testimgint.jpg
new file mode 100644
index 0000000..2501c61
--- /dev/null
+++ b/testimages/testimgint.jpg
Binary files differ
diff --git a/testorig.jpg b/testimages/testorig.jpg
similarity index 100%
rename from testorig.jpg
rename to testimages/testorig.jpg
Binary files differ
diff --git a/testimages/testorig.ppm b/testimages/testorig.ppm
new file mode 100644
index 0000000..2a5d1e9
--- /dev/null
+++ b/testimages/testorig.ppm
@@ -0,0 +1,4 @@
+P6
+227 149
+255
+0/-0/-10.21/51.51.62/62/83/83/:2/:2/:3-:3-:3-:3-:2/:2/91.80-80-91.:2/:2/80-80-80-80-80-80-80-80-6.+6.+6.+5-*5-*5-*4,)4,)4,)4,)4,)4,)4,)4,)4,)2-)/*$/,%0-&0-&1.'2/(30)30)63,63,74-85.96/96/:70:7.A:0B<0D>2F@4IA4JB5KC6KC6NE6MD5OC3NB2OC3OC3PD4RE5R?1Y?2b@4nB5}E6‹H8™G9£F7°H;¸F;¿F;ÅF=ÇG>ËH@ËH@ÐEBçFLíCLìEMëEIîCIïBDò?Cô=Aø;A÷:@ô:?ð<?é?Bâ?@×?<ËA7»=/µ@.µ@.´?-´?-³@-²?-¯@-­?.ªA.¦A-¢B, A-›@+™A+–A,”>-’?/’?/‘>.‘>,‘>,’<+’<+”>-”>-”=*”=*•>+•>+–?,–@/–?6•>5—=4Ÿ?3©B3³D3¼D4¿D4¹?0¶B3¬F:žH;‡G;oA2U9+C3&=52:659548437116005//5//72/72/72/61.61-61-50,41,//-.0-//-//-0/-0/-2.-2.-5,-4+,4*+3)*7(+=.1E69P:<jAE|HJŽNO•OQŸW[ªdnªoƒŸt”{£‡®€†º~ˆ½tz®`a‘TKvQEiSJgPH_MH^TQbfdo|}‚‘™ž˜£©Ÿ¢¨šž “‘ƒ{|lfgWYXFQNEUR[UQbUQb0/-0/-10.10.40-51.62/62/83/83/:2/:2/:3-:3-:3-:3-:2/91.80-80-80-80-91.:2/80-80-80-80-80-80-80-80-6.+6.+6.+5-*5-*4,)4,)4,)5-*5-*5-*5-*5-*5-*5-*3.*1,(0-&0-&1.'2/(30)41*41*63,63,74-85.85.96/:70:7.@9/B<0C=1E?3H@3IA4JB5JB5LC4LC4MA3MA1MA1NB2OC3QD4P>0U?1^A3jC4xD6…F5’E5œC3§C4¯A4µ@6¼B7ÀD:ÄE<ÅF=ÍC@áEIçBIèCIêDHíDGðCEó@Cö?Cø;A÷:@ô:?ð<?é?@à@@Õ@<Ê@6¹>/µ@.´?-´?-´?-²?,°?-¯@-­?.ªA.¦A-¢B,Ÿ@,›@+˜@*–A,”>-’?/’?/‘>.‘>,=+’<+’<+”>-“=,”=*”=*”=*•>+–?,–@1•A6–?6˜>5¡?4«A3µD4½C4¿D5»A2·C6¬F:œH=…G:l@3S9*B4)>63:65:6584382271160060072/72/72/61.61-61-50,41,//-.0-//-//-0/-0/-2.-2.-4..5,-5+,3)*5)+<-0C47N8:d=>vDC†JIMNšTV¤aj¥l}rŽ‘{¢†€®…¹{„»ou©[[RIvOCiOFePH`PH_RN_[Yfnot…†ˆ”™•™ž—š ”™‘ƒ~qjk[][LVSJXSZVRaXQa/.,0/-0/-10.40/40/51.51.72.72.72.72.92,92,92,92,91.80.80.7/-7/-80.80.91/80.80.80.80.80.80.80.80.6.,6.,5-+5-+5-+4,*4,*4,*6.,6.,6.,6.,6.,6.,6.,4/+2-)1.)2/*30+30+41,52-52-63.63.74/74/850961961:70?8.@:.B<0D>2G?4H@5H@3H@3J@4I@1K?1K?1K?1L@2MA1NB2MA3QA2YB4dC4qC4|C2‡B2’A0˜<-¡;,§;.¯=2µ@6ºD:¿F=ÅD>ÙCEá@FãBGèCGêDFðCEôADø?Dú;@ù:?õ;@ð=@è@@ÝB>Ñ@;Æ@5·=.³@-³@-³@-²?-°?-¯>,­@,ª?-§@-¥@.¡@-A,›@+˜@*•@+”>-’?/‘>.‘>.‘>.=-=+=+‘>,‘>,’<+’<+“=,”>-•?.•?0•A6–?5š>3¤?3¯A4¹C5¿D5ÁC5ÀD8¹G<®I=™J=G;h@4Q:,B5,?74=77<66;5594183072/72/62/62/62/51.52-52-41,21,/1.-2./1./1.00.00.10.3/.5//4..5,-4*+4*+9-/>24I56[97l?:}FA†IDOM˜[`›fv•n‰Œwžƒ}­}‚¹u~·gl¤UU‰MEvLAkMAeOFcQHcNI_NK\[[esty‡‰ˆ‡Œ†Šˆ…†Š|xzlfhZZ[MVSLZU[ZT`[S`.-+/.,/.,0/-3/.40/40-51.61-61-61-61-81+81+81+80-80.7/-6.,6.,6.,6.,7/-80.80.80.80.80.80.80.80.80.5-+5-+5-+5-+4,*4,*3+)3+)6.,6.,6.,6.,6.,6.,6.,4/,30+30+30+41,41,52-52-52-52-63.63.74/85096196196/>7-?9-A;/B<0E=2E=2F>3F>1G=1G=1H<.I=/I=/J>0L@0JA0LE5NE4VE5^D3iD2sB1~A/†?-Œ9)”9'9*¤=.¬@3³E8¸H<ÁF>ÒDCÚACÞBEâDEèDEìBCó@C÷?Aú;@ù:?ö<Aî>@åA@ÚB=Í@9Â@3¶>.°@,°@,°@,¯>,®?,®?,¬?+©@-¦?,£@- ?,œ@+˜@*–@)”?*‘>,‘>.‘>.‘>.=-=-<*<*=+=+=+=+’<+‘>,”>-’?/•A6—@6œ>2¦@4²B6¼C8ÁC7ÂB7ÂF<ºJ?¬KB—J@|F:b@4L:.A7-@85>88=77<66:5294183083062/62/62/32.52-32-21,12--2.-2./1./1.00.00.10.10.5106005//5,-4+,6,-:01D22T71c;3rB8{E;ƒIE‰RU_l‹i‚ƒs˜}y«x}µowµae¢SRŒMDyL@pL@hPEgQFfLC^HCWNLZ^^fjnquyxy~xz€vwzokoa`bUWYLTTL]WY]V]]V^------/.,/.,0/-10.3/,40-40-40-50,50,50,50,7/,7/,4/,4/,3.+3.+3.+3.+4/,4/,50-50-50-50-50-50-50-50-3.+3.+3.+2-*2-*1,)1,)1,)4/,4/,4/,4/,4/,4/,4/,4/,41,41,41,41,52-52-52-52-52-52-63.74/74/85096196/<5-=6,?8.@9/B90C;0C;0C;0E;/D:.F:.G;/H<.I=/J>0I@1JG6MH5RG5YF5bE3jD1uB/|?,‚;)‹:)“:*š=,£B2¬F8²J=¼J@ÌGBÔDCØDDÝEDãCCéAAð=@ô<>ù:?ù;=ô<>í?>áB>ÓC:ÅA5¹?0²?-¯@-®?,®?,®?.¬>-¬>-ª?-¨>.¤?- ?,ž?+š?,—?+•>*”?+‘>,?.?.>->-Ž=,Ž=,Ž=,Ž=,Ž=,Ž=,Ž=,<,>-‘>.‘@/”B4—A4ž@4¨@3¶A7¿C9ÅB:ÄA9¾C;·H?¦LC‘KCtE;Z>2E9-=6,A96@86?75>64=53<4294183062/43/43/23.32.23.12-02--2.,2.-2.-2./1./1.00.10.3205105104..2,,5,-7./>0/N5.Y9.e=1oA4tC<yKK€Ze„hp—zxªu{·ltµ_d¦TT”OGƒLBwNAmNBhMAeJA`GBYHEXKKWMPU^bc`fbbia`f\Z`VWZOUXMXXP^YV`WX`WZ,,,,,,.-+/.,/.,0/-3/.3/,2.+2.+3.*3.*3.*3.*5-*5-*3.+3.+2-*2-*2-*2-*3.+3.+3.+3.+3.+3.+3.+3.+3.+3.+3.+2-*2-*2-*1,)1,)1,)0+(4/,4/,4/,4/,4/,4/,4/,4/,40-41,41,41,41,41,41,41,41,52-52-63.74/85085096/;4,<5+=6,?8.@7.A8/A8/A9.C9/C9-E9-F:.G;/I=/J>0HA1JG6JI7NG5VF6\E3dC2lA0t?-|=,ƒ<*Œ;*”=,œ@1£F5ªJ:´J=ÄH@ÌEAÑFAÖE@ÞCAåA?ì>?ò;=÷;<ô:;ð<=é@=ÜC=ÍC8¾@2²?-®?,«@,«@,ª?+ª?-©>,©>,¨?,¥>-£@- ?,œ?-—?+•>*“>)‘?*?,>->->-Ž=,Ž=,Ž=,<+Ž=,‹<+<+‹<+‹<-Œ=,>/Ž?0”B4˜B3¡A3¬B5¹C9ÂC:ÅB:ÃB<»B:±HB£NGNEpH>T@5A;/96-@85A75?75>63=5394194173043/43/34/23.23.13.02-.3--3/-3/.3/.3/02/02/11/11/21/32032040/2.-1-,4..8.,G4-O4)X8+`<0e?6mGFyYd‚k…€uŸ||²w|¼nu»dh¯[[¡SLLB~OArL@hI=cH>`HB^ECX@BO<AGCHKDMJJQJJQIIPHKQGOUKVWO^YS`YS`XU+++,,,,,,---/-.0/-0/-0/-1-*1-*1-*1-*2-)2-)2-)2-)2-*2,,1++1++1++1++2,,2,,1++1++1++1++1++1++1++1++2,,2,,2,,1++1++1++0**0**3--3--3--3--3--3--3--3.+40-40-40-40-3/,3/,3/,3/,40-40-51.62/730730841850:3-;4,<5-=6.?6/?6-?6-?6-C90C9/E8/F90G:1I=1J>2H@3HE6GE6KE5QD4YD3_B2g@/n=,v=,|:*…9+Œ:,“=.›B2¢F7¬F8¼G=ÂF>ÉF>ÐE>ÙD@âC?ê@@ð>>ò::ñ;:ì<<äA<ÖC;ÆD6µ@/ª=)ª?-©@-©@-©@-¨>.¨>.§=-¥>-£=. ?.ž?-š?-–?,”?+‘?*>)>+>->-Œ=,Œ=.Œ=.‹<-‹<-‹<-Š=-Š;,‰<,Š</‹>.Œ>1Œ?/’C4˜B3¡A3®B6¼C:ÃD=ÄC=ÀC=ºGB²QK¦YSXQsRIWI>CC7<?6>93@72>63=60:5194083/63.43.43.34/23.13.13.02-.3--3/-3/.3/.3/.3/.3/02/02/00.11/22021/10./.,2.-4/,?0+D0)K3)T8-Z<4eGGu]jƒs‰€«…„¾~ƒÇtzÆmp½ee¯VSšLC‚K?qJ=hG;cE>_FB]DBW?AN;?H:BE>HGDMHGQIGQHJRGNVKUXM^ZOaYNaXO++++++,,,---.,-/-.0/-0/-1-,1-*1-*1-*2-)2-)2-)2-*2,,1++1++0**0**1++1++2,,0**0**0**0**0**0**0**0**2,,2,,2,,1++1++0**0**0**2,,2,,2,,2,,2,,2,,2,,2,,3/,3/,3/,3/,3/,3/,3/,3/,40-40-51.62/62/73084185092,:3+;4,<5->5.>5.>5.>5,B8/B8/E80F90G:1I<3J=4I?5FB6FB6JB5OA4UB3\@2c?1j<-q<.w9*}8)…7*Œ:,–>0›B4¤B5²F:ºE;ÁF>ÊG?ÔG@ÞFAçCAîB@í;;ë;;ç>;ßB;ÑD:¿D4°A.¤>(¦A-¦A-¦A-¥@.¥@.¤?-¤?-¤>/¢>.Ÿ@.œ?.˜?-•>+‘?*>)>+>->->-Œ=,Œ=.‹<-‹<-Š=-Š=-‰=/ˆ<.ˆ<.ˆ</‡=0ˆ>1‹?1‘D4–C3¢B4­C6ºC;ÁD>ÁD@»EA¹PL²[T¦f]‘f]u_T[UIHNBCI?<92?82>71;6094.74-63.43.43.43.34/23.13.13..3-.3-.3/-3/.3/.3/.3/.3/02/02///-11/22022010.0/-0/-3/,8,,<-*C0*K70S<6^HJtbn‡z”Š¶ŒÆ„ˆÏz€ÌtwÆjl·YW ID„E=nG<dD<aC>^CAY@CV@DP>EKGQRKWUQ^WU`XT`VS]TT^SY_S^[LaZJaZJ,-/,-/--/--/------.,-.-+/.,/.,1-*0,)0,)0,)/+(/+(/+*/+*/+*/+*/+*/+*0,+0,+/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*0,+0,+1-,1-,2.-2.-2.-1-,1-,1-,1-,1-,1-,0,)1-*2.+3/,3/,3/,3/,3/,3/,3/,3/,3/,40-51.62/73080-92,:3-;4.=4/>5.>5.>5.@5/@6-B5-C6.D7/F91H;3G=4G>5H@5J@6P?5T>3X<1^90c7,m9.t8-|8-ƒ9.;/“=0˜?1ž>0§A3­A4µC8¾E:ÊG=ÔG>ÞE?åC@è@?êBAæDAÚE>ÈD8·B1ªA.¢B,¢A.¡@-¢?,¢>.¡=-¡=-¢>. ?/œ<,š=,˜<-•>-“=,=+Ž=*Œ=,‹<+‹<+Š=-‰<,‰<,‰<,ˆ:-‡;-‰=/‡=0‡=0‡=0ˆ>3ˆ@4‰A5‹A4‘E5—D4£E9±I>ºG@»D>»EA¸MGµ[S¯f_£qf‘sh~rdjj^V^SJRGLLBJF=B>5=90:6-74+63,33+54/34.34/23.02-/1,.0-,1--2.-2.-2.-2./1./1./1./1.02/02/11/11/11/11/11/40/4+0;/3A32C41J;8]NQym{‹…Ÿ“»”–ϐ•Ùƒ‰ÓtzÆjn·`c¨Z[”LItHBdA>]>>X?BUAIVLU\U`bbqno~yv†|s€vlyohth_k_W_P^]Ib\Fc]G,-/,-/--/--/------.,-.,-/.,/.,0,+0,)0,)/+(/+(/+(.*).*).*)/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*0,+0,+1-,1-,1-,1-,1-,1-,1-,1-,1-,1-,1-,0,+1-*2.+2.+3/,3/,3/,2.+2.+2.+2.+2.+3/,40-62/62/80.91.:2/;4.=4/>50>50>5.?4.?4.B5/B5-D7/E80G:2H;3H>5H>5L=6O>6R>5V;2Z90_7/i81p7.x8.8/Š:/<1–<1›=1¢@3§A3­C6´D8¾E:ÉF<ÔE=ÛC>ßD@àEAßGBÔG>ÄF:³D3¥B/žB-ŸC.žA/Ÿ@.ž?-ž?-ž?-ž>.Ÿ?/š=,™>,–=-”=,=+>+Œ=,Œ=,‹<+Š=+‰<,‰<,‰<,‡;+‡;-…<-†</…=/…=1ƒ=1…?3†@6ˆB8ˆB6“G9˜F8£G<¯JB¸HD¸GC·KH³TN±d\ªqfŸ~oo|mmseZfZNXMLNCKI=EC7A>5=:188.44,11)23-23-12-01,/1,/1,.0-.0-/1.-2./1./1./1./1./1./1.02/02/11/11/11/11/11/2015+49-7<15?54I?=^UVys}Šˆž““¹”—ʐ–Ô„‹ÏyÂqy¸kt­hnž]`XZqSUjRWjT^hZgmfvvr‚tˆ~’‡ƒ•‰~Žw‡zr€qftgZeT[ZE`ZBb\D-.0-.0-.0-.0-.0-.0.......,-.-+.-+-,*/+*.*).*'.*',+),*+,*+,*+,*+,*++)*+)*-+,-+,-+,-+,-+,-+,-+,-+,,*+-+,-+,-+,-+,.,-.,-.,-.,-/-./-./-./-./-./-./.,0,+0,+1-,2.-2.-2.-2.-1-,1-,1-,1-,1-,2.-3/.40/51.80.91/:20<41=31>42=31=4/?40?4.A4.A4.C60D71F93G:4H;5J;6K<7N=6P;6S:5W83[6.c60k6.t5,}7/‡9/;0”<2—=2ž@6 @4¢@3¨@3±C6ºD8ÅE<ÍD<ÕF@×HBÔIBÌI?¾E:®C3¡B0œA.B/œA/œ?.›>-›>-›>-›>/›?0˜<-–=-”<.“=.>-Œ=,Œ=.Š=-‰<,‰<,‰<,ˆ<,‡;-…<-„:-ƒ;-„<0‚<0‚<2‚>3ƒ?4…A8‡C:ˆD9”J=—H; H>¬KD²KF³LG²SM®`V­sg¦qŒz‘Ž{‚‰ws€ocqbXcUNRDMN@HI;DD8@@4:</46+/1&12*01+/0+/0+./*/0+//-//-//-.0-//-//-//-//-//-//-00.00.00.00.00.00.00.1/26+98,:8/4;63HE@_^Yzy~‹š”±’—¿•Ç„ŽÃ}‰»{‰¶|‹²}Œ«}ˆšyƒq~‡o~ƒn~~pƒyŽ…ƒ™‹¡”‘¨˜¥”ˆŒƒ•…|{j{k\hTXX@]Y>_[@-.0-.0-.0-.0-.0-.0.......,-.,--,*-,*/+*.*).*'.*),*+,*++)*+)*+)*+)**()*(),*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+-+,-+,-+,-+,.,-.,-.,-.,-.,-/-./-./-./+*0,+1-,1-,1-,1-,0,+0,+0,+0,+0,+0,+1-,2.-3/.40/91/:20;31<42=31=31=31=31>3/>3/@2/@3-A4.C60D71E82F93H94I:5J;6L:6N94Q83T50^72e60o6/x8/‚90‹;2<2”=3š@7›?4›?2Ÿ?1¥A2®B5¸C9¿E:ÈH?ËH@ÊJAÃH@¶F:ªB5žA0™@.šA/™@.˜?-—>,—>,™>,™=.˜?/–=-”=,“=.=-Ž=,Š=+Š=-‰<,‰<,ˆ;+‡;+‡;+„;,„;,ƒ;-€;,;/€<1=2>5ƒ@7ƒC:‡D<ˆE<”KB–H>žG@§JE®LI®QL­]Vªj^§€o¡yšš‚›ƒ†•€z‹xm{lbm_SZJQUFKO@EI:@D6;=057,13(01)/0(./*.-).-)/.*0/-0/-0/-0/-0/-0/-0/-//-0/-//-10.00.10.00.00.00.00.3.27,:6*83-1961HJ?bfX{€z‹““£–°Œ•¶ƒ²¯…™²¤¶’¨³”§«‘££Œ ž‡ž˜‚™}˜‰œ‹ˆ£”°š˜±›”­—‹£…›†~‘}k|iXfOSU=ZV;^Z?+/2+/2-.2-.2-.0-.0..0..0------.-+-,*-,*-,*,+),+),*+,*+,*++)*+)**()*()*(),*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*++)*+)*-+,-+,-+,.,-.,-/-./-./-./+*0,+0,+1-,0,+0,+/+*/+*0,+/+*/+*0,+0,+2.-3/.40/:12:12;31<42=32=32<20<20>31=2.?1.?1.@2/A30B41C52D63C84D95E:6G96H94K84N50X72_60i70s80}:1†<1Œ>2>2—@6—?5—?5—?3œ@3£C5«C6³E8ºE;½G=¾H>¹G=°D8¦A5œ@1—@/—@-—@-–?.•>-”=,–=-–=/–=/•<.“;-’</Ž<.‹<-Š=-‰;.ˆ<.‡;+‡;+†:*†:*„;,ƒ:+‚:,€:.€:0€<1€=4?6‚B9ƒD=‡F@ˆH?‘KC’H?šGA£KG¨PL©YR©f]¦vh¢Œw›™€•¤‡Œ£‡†ƒ~‘}t‚qjue\eTV_LNUCEL:?F6<@27:/58-12*/0(.-)/+(/+*0,+1-,2.-1-,1-,1-,1-,1-,/.,1-,/.,2.-0/-2.-0/-0/-0/-0/-2-18,:5)51++66,GL8aiRzƒpˆ’‡—–š ™¥ˆ–£‡›¢ª©›¹±¡¿³š¶¨š¶§›·¨˜·¥‘±œ‰«’‰«±”’³–•´•­ˆ¢…„›{’xgzdTbIQT9VU9YX<,03,03./3./3./1./1//1//1....../.,.-+.-+.-+-,*-,*.,--+,-+,,*++)*+)**()*(),*+,*+,*+,*+,*+,*+,*+,*+,*+,*++)*+)*+)**()*()*(),*+,*+,*+-+,.,-/-./-./-./+*/+*0,+0,+0,+/+*.*)-)(0,+0,+0,+0,+1-,2.-3/.40/:12;23;31<42=32=32<21<20=20=20>0->0-?1.@2/A30?40@51@72@93A:4B94C84F74H5/Q51X5/a6/l8-v:/€</†>0‹=1“?5•>5“?5“?4•B4šB4 C4¥D4¬B5°D8´E:±E;ªB7¢@5š>1–>0•?.•@,”>-“=,“<+“<+”<.”<.“;-’<-<.;-‹<-‰<,‡;-‡;-†:*†:*†:*ƒ:)ƒ:+9+9-9-€<1<3?6€A8‚C<…F?ˆIBŠICŽIBG@—HD OK§VS§`Z©pe¤ƒrœ”}– …¨Šˆ§ˆƒ „~–~z‡uq|kdp\]iUR^JJS@BK:>E5<@29<134,22*1-*/+(/))0**1++2,,1++1++1++1++1++0,+0,+0,+1-,1-,1-,1-,/.,/.,/.,2,.8*75(13+(56&EK1\gGu‚d†“yŽ›Š‘ž”’žšž˜¥——´ž¢Å¥©Î­¥É­§É°§Ë±¤È¬—¾Ÿ‹³‘ˆ°²Ž²Ž²¬Š„¡‚€™{wŽrdx]R`FNR7QQ5SR6,03,03,03,03./1./1./1./1000//////////.,/.,/.,.-+/-./-..,--+,,*++)*+)**()+)*+)*+)*+)*+)*+)*+)*+)*,*++)*+)*+)**()*())'()'(+)*+)*,*+-+,.,-/-./-.0.//+*/+*/+*/+*/+*.*)-)(,('0,+0,+0,+0,+1-,3/.40/510:12;23<34<34=34<21<21;10<1/<1/>0/=/.>0-?1.@2/>3/=52;62;83<94=:5>93@72C60G4.O4-Y4,d5+n8,x:-;.…;.<4‘<5>3@3A2“B1—A2šA1 >1¦@4ªB7ªB9¦A7Ÿ>5˜>3•?2’?-’?-‘>,=+‘;,’<-’<-’<-‘;.‘;.<.;-Š;.ˆ:-‡;-„;,…9)…9)ƒ:)ƒ:)‚9*9+~8,~8,=2€=4€@7B9„E>…HCˆKFŒMFŒICŽGA˜JH¡SO¨]Z©hb©{n¤Žy™œ’§ˆ‰¬‹‚ªˆ€¡„€˜€~‹wxnjwcdr[ZgSQ]IKTCEL<@D6<?467/44,3/,1,)1()1()3*+4+,2)*2)*2)*2)*2)*0**0**0**1++1++0,+0,+0,+0,+0,+1+-6)25)-4-%46!BI'Wc;q€W‚’mœ} †’¡Œ Š¨‰•¸Ž È”¤Ð©Ó«¨Ò®§Ñ­¡Ë¥“½—…¯‡‚¬„‡°†ˆ®…Œ±ˆ‹«†Ÿ{{”tqˆk_sWM]BIO3JM0KK/./1,01./1./1.0/.0/.0////00.00.00.10.0/-0/+/.,1-,2.-1-,1-,0,+/+,,*++)*+)*+)*+)*+)*+)*+)*+)*+),+)*+),+)*-(,*(),())'()'()'(+)*+)*,*+-,*.,-.-+/.,0/--,*-,*/+*/+*/+*.*),(',('1-,2-*2-*2-*3.+4/,50-61.;31;31<42<42=32<21;10;1/<1/<1/=/.=/.=/,>0-?1.=2.=4/=60;81::29:49:4;81?61C2+J1,T1+^3,g7-o9-u=.{=0‡=4‹=3‹?2Œ@2Ž@3@1’?1•=/˜</œ>2¢B6¡C7žB7™?4–>2•?2”>/“=.=-<,Ž<.Œ=.Œ=.>/Ž<.Ž<.‹</Š;.‰;.ˆ:-†:,„;,‚:+‚:+€;+~;+~;+|:,}9,|:.€>2>5€@7‚C:ƒG?†KCˆOH‹OGŒHEŽHF“OL˜[Vžg`uiž‡už˜€”¡…ª‹‰¬‹ƒ¨‡€¡„~›|ytˆoj~ccz^]qXWfOO\HIRAAH8>@399/85.7/,3+)2()2()3)*4*+0*,/+*0*,0**0*,0*,2),2),3*-1+-1+-1+-0,-0,-0,-1+/5*05+,4-%46!?F%T`8n}Rg‰™tžz¡~ˆ¡zŠ§{‘¶‚›ÄˆŸËŸÉ™žÇÆš˜Á•Œ·Š‚­€…®‚Œµ‰„«‰®…‰©‚~œxvolƒfZnSJZ@GM3FJ1DF./0+.0+/0+01,01,12-12-21-43/43/43/62/51.41,3/,4/,50-50-4/,3.+3--1-,0,+0,+.,-.,-..0--/,,.++-*).))+.)/.)-/(/.)-/)-.)-.*+.*+/+*/+*-,*.-)--+./*./*./*------.,-/-./.,0/-2.+2.+2-*4,)5-*6.+90+:1,;2+;2+=4-=4->50>50>50=4/<3.<3.=2.<1-<1/;0.=/.>0/>0/@1.A0)@1*;4*77-39/39/560:3-?-+F*'L)'S*&Z/(`5,d<0k@0yA2€A0†A2‹B3@3“=0“;/’8-“;/”>/”A1•B2”C2’A0‘>.‘;.—:2—:2’;1>1ˆ?0„?/‚?.ƒ>.ˆ@1ˆ?0Š>1Š<0‰;/ˆ:.†:-ƒ;-{9+~@3w<.q7)w=/w=/v;-}?2{;/‚@4ˆE<ŠJAˆLB†MB„PE‡NE‘KI—SR”b[‘ocŒ}j‰‰q‰–|Šž‚¤‡¦‰Ž§Š§Œ‡¤ˆ~Ÿ‚uš{o—uiib„_[zXZsU[mSWeNPWEJK=C?6@93;0.6**4(*3'+3'+1&*,*-)+***,*(),'+.(,1(-2'-3(.3(.3(02(00)00)0/*1/*0/)-1++0-(//#<?,V\BpzX‹gœu‹sŒ¡vŒ¦wŠ¨v‰ªuŽ´{™¿†ŸÁ¾–ºŠ³ƒˆ®}…«|ƒª{…ª~ƒ¨}ƒ¦~€ž|y”ur‰mh|cYkUN[GIP>CH4?B/01+01+01+01+12,12,21,32-43.43.74/74/63.52-50,50,7/,7/,6.,6.,3.+2-*1-,0,+/-./-./-0..0-,1+*/)*.)(-.)//(//(//(//)-/)-.*+.*+/+*0,+.-).-)./*./*./)/0+.....0....../.,/.,2.+3.+5-*5-*7.)8/(:/)<1+<2)=3*>5,>5,>5.?6/>5.=4-=4/<3.=2.<1-<1/;0.=/.>0/>00@1.C0*C0)A2+>3-:5/94.:2/<1/?-+D*)I*(N+'T/)Z5-`;2e=1pA1x@/€A0ˆ@2?4’>4”;3“;19.‹</‹=0Œ@0A1A1B2’@2•;2•;2<2‹=0‡?1ƒ@/?/‚?.…@1†>/‰=0ˆ<.‡;.„;,ƒ;-€;,x8,x<1t:.n9+t?/s>.r8*u:,}=1ƒA5‰E<‹H?ŠKB†MB„PCˆOF‘JH—SR’f]wi„†p€“wž„¥†ˆ§ˆ§Š§§Š¤‰Ÿƒwœ}o˜ve’iaŒaZƒ[Y{X\wXZnSSaJNUCFH;C@7<737/-3*+2)*1(+.(*,*-**,+),+),-(,/)-2(02(04)13)13)21*20+2.+2.+4.+22-13/.0,)--%8:-SXDox]€‹i‡–o†™l†žnˆ£p†¤p„¥pŠ­w“¶€™¹‡–¶„“³¯|†«x‚¨w‚¨yƒ¨|ƒ¦|„¤{x’uqˆnh{eYjWN[IEL<@D5;=/12,12,12,23-23-23-43.43.54/65085085085074/72.72.80-80-7/,7/,4/,3.+1-,1-,1-.0./0.1/-0..0,,.+*/+)./(/1'//)-/)-/)-/)-.*+.*+0,+0,+/.*/.*0/*0/*/0*/0+//-///0./0./0/-/.,1-*2-*6.+7/,90+:1*<1+=3*>4+?5,?6-@7.@7.@7.?6/>5.=4/=4/=2.<1-<1/;0.;0.<1/<1/?1.C2+E0+H/+K--L,/K+.I*/E+.A-,@.*A.(F/)N/*X1*b3-g5.j:,o;-x<1=4‡<6Œ<5:4:4Š;4‡=2†>2…?3†@4ŠB6ŒB5C6Ž?2=/Œ>1Š>.‡>/†?-†=.ˆ<,‰;.‰<,ˆ:-‡;+ƒ;,<)~=+{<+}=1z<1v:/v<0x@1x@1v<.v;-?5ƒC9‰F=‹H?‰JAˆLB‡NEŠNF’KI˜TQ–f\‘wh‡…n‚‘t{¤ƒ…¨‡ˆª‰ªŒ©ŒŽ¥‰ˆŸƒš}y˜xi–ma’e\‹a\†`^ƒa[|]UrVQgPHYGBPA:D93:2.3,+0**,)**(1(+1&*1&*1&*0'*1(+2).1+//*.-+.,+0+,0)-0(-0(-1)-01/23/./+(**"57*QVBmu]|Ši€k~“j™lƒŸo€ o~ m‚¤q‰«x¯|‹­zˆªx„©v‚¦v€¦u€¦w€¥y€£y‚¢}}›yw‘tp‡mh{eYkUN[IDH9>@399-23-23-34.34.45/45/54/54/761761:72:72:72961:51940:2/:2/91.80-50-50-3/.3/.3/03/01/01/20.1..0--/-+.0)02).0*.0*.0*.0*,/+,/+*1-,2.-0/+0/+10+10+01+12-11/1111/010.10.2.+3.+3.*91.92,;2+<3,?5,@6-A7.A7.B8/A8/B90A8/A81@70>50>50=2.=2.<1/<1/<1/<1/=20=2.B3.E2.M//R+0W(0U&.P'/I).C/.<1+;2+?2)H1)S0*_.*d/)i9/k;/u<3}<6†<9‰;9Š;7ˆ:6‰>9†?9†B9…B9…B9†B9‡A7ŠB6ˆ@1ˆA/ˆA/‡@.‡>-ˆ<,‰<,‰:+‹9+ˆ9*‡:*ƒ:)<){=(x>(x>*‚>3{7.z7.z<1v;-w=/|A3{@2€B7ƒE:†H=ˆI@‡KA‡MBˆOFŠQHŽSK\QŽh[‹tb…g€‹m~–vž|ƒ§ƒ„©‡ˆª‰‹ªŠ‹¦‡‡ ‚ƒš~}˜yq•oj“ifgfŒeg‹gd†e_~_\v[PhRK^KBPA8D62:/.4*,/(+*%2&(5%(4%(2&(1&*/)+/+,.,-++-*+-*+-(,-(,/',/',/*+-/+,1++0+(/,%99-PTCiqYvƒe{Œhyh|•kœn}žoyžk{ m€¥r‚§t€¥q}£p}£p~¤s~¥v~¥x}¤x|¡x}Ÿzz™wuqn‡je|bXlSN[GDF9?=1:8,45/45/45/560560671761761872983;83<94<94<94<73<73<41<41;30;3083072/61.61.5106216213123121/00./1-.2,04+.4+.4+.2,.2,,2,,2,,3/.3/,3/,40-21,21,32-32.22022032032051051.61.61-;30<5/>5.?6-A7.B8/E8/C9/E;2E;2E;2D:1C90B8/A60@5/>3/>3/=2.=2.=20=20>31>31@51F42M02T,4X)3W(2R)1K,1B30:6-77-:6*B4)M2)X/)^/)f:1j;3s<7z=:‚<:†<;‡;;†::‚;7>8@:‚C<ƒC:ƒC:…@9„@7‚C2‚C1ƒB0„?/‡>/ˆ<.‰:-‹9-Š8,‡8+…9+‚:+~=+x>*v?*x=+9-|/'‚8/„>4w4+t6+}A6}C7E:€G<‚I>„KB„KB†MDˆRH…WJ}`N{iQ€pYu]‚|bƒ†i†“uˆ|„¢~‚¦‚‚ª…‚¬†©„¥€{ž}z™wz’p{lyŒlwŒkumsŠmm„gkd`rZ[hTR[JIM>@@4;7,70&5*$6('5''3''1'&.((,+)++)+-*(,+(,+*,+*,+++-,*-,*-.*+,#$3+(50*85,BC5UZFfpWn}^tˆeqŠbuex˜ivšjs™htšiwŸkz¢nx lwŸmx n{¢s{¥u{¥w|£wyžuzœww–tsŽom†hd{_WkPN[GCC7>:195,560671671671782782872983983:94=:5>;6>;6>;6>95>95?74?74>63>63;63:5294194184195495484384343151240/6-06-.6-06-.4..4..4..4/,40-40-40-51.32-32-43.43.43/442542540841850:51:5/>71>7/@7.A8/C90D:0G:1H;2G=4G=4F<3F<3E;2C90B71A60@51@51?40?40?42?42@53?53@72D63I35P16T/6S.5P05J22C52=90<:.=9-C7)I6(Q3)W2)]2+d3,l50v64}77‚87ƒ77ƒ77~75}:4}<6~?8€?9ƒ@8†?;…A8C4€C1B1ƒ@0…=/‡;.ˆ:.‡9-…9,ƒ9,‚:,<,|=,y>,x?,|=,‡4,‹2,¡LE¨XQ‹A8|90‚F;€K=yH:zJ<{M@|NA}OBQE‡UJ‚_LrkOptS|uX‡w]yb˜iŸ‹s ”z—–z‘œ|‹£ˆ¨ƒ…§‚€¤~{Ÿy~™vƒ‘p…Žo„pŽpr{ŽpwŒmtˆlj~cfv\_hSV[GOM>GA3A7+=0';,'9+(6+)3+(/,',-'+-().().(+-(-,*/+*3)*4(*7'*7'(3($<3,E>4IG:QR@^cMgqVjyZoƒ^k„\l‰]p‘bq•eo•do—eršfuŸmrœjq›itžnx¢rz¤vy¢vyŸvvštw™vu”rokk„fc|^UlON\ECC7@91;4,782782782782893893:94:94:94;:5>;6?<7@=8@=8@;7@;7B:7B:7A96@85=84=84<73<73<73<74<74<74;639529338308/09/.8/080.80.80.61.61-61-61-52-63.63.63.74/54/540651841952;74<94=84@93@70A8/C90D:0G:1H<0I=1J>2J=4J=4J=4I<3F<3D:1C90B8/A81A81@72?61?61@72A83A83?74@85B86E:8G96I:7H96H94E80E8/E9-E9+G9,I9*K9+Q7*Z/&d/'n3-z63ƒ98‰;9‹;:‹=;‹A>‡@:ƒ>7<4:3‚<4ˆ=8‰@9ƒA5B3‚@2ƒ?2ƒ=1„<0;/€</=/}>/|>/|>/|>/}>/=/†9/1+£;8ÎkfÛ~y­ZTD=ˆLAN@tJ<rN>pQ?qR@tSB{WG‚[J~eOmsOpzU„y[˜u_©oc¶mf¾qk½wo»‚w±Œz§—~žž‚–¡¡~…y…™vˆ‘r‹rˆ‘r…“r”t}•sx“pt’pm‹ii„edx]]kRV^GMP;ED0B;+@3+?2,;0,70*30)00(./)./)01+30+7-,;+,?),D',F%,D'+F5-LC4VP@[XE`bLgmSjxWj{Wl‚[g‚Wg†Zl^o“cn”an–bršfskpšjo™irœnvŸsy¢xxžwvšvs—su—vs’rn‹lk„db{[UmMN\CGH:E<5@707827828938938939:4:94:94;:5<;6?<7@=8@=8@=8A<8A<8D<9C;8C;8B:7?:6>95>95=84>95>95>95>95=85<73:52<41:0/:0.91/91.91/91.91.72.61-61-63.63.63.74/74/74/540651952;83<94?:4B;5C<4A8/B:/D:0E;/H<0I=1J>2J>2K>5K>5K>5J=4F<3E;2C90B8/B92B92A83A83A83A83B94A:4@85A96B;5D=7F=8G<6K<5N;4M6.O7-Q6+Q6+R8+P9+P9)W7(f6*r6,~;3‰@9•D@›HDŸJGŸLF QJ™LDŽD;†<1„7-†8.Œ91=6ˆ>5‡>7†=6…<5…<5=4}=3z>3x@3vA3x@3z>3<3ƒ:3‰84’42˜(&³=;í{zþ•’Åhc–G@‰K@xH:nM<jQ=fT>hV@lWBt[E`L€hP{rQ…vUžt\´l^É__ÓV\ÙQ[×T\äouÙzx̆~À€µ•€«•}£’xŸvœŠrœŠt™u•u‘‘u‹‘u†‘s‚‘r|‹ny†ju{anpZgbN_TBUE5R</O4-N2.I0,D/*>/(9/&7.'6/'92*;0*>/,B-,G*,I),L'.I)*TB6YQ<d^HgeLilOnuVm{Xl~Xn…[hƒVg‡Xm]p•bq–br™dvœit›lq˜ko–jr™mwvzžxxšyt–uu”uu”usroŠkjƒeb{[UmMM]BMN@KB;F=69:49:49:49:4:;5;<6=<7=<7=<7=<7@=8@=8A>9B?:D?;D?;E@<E@<E@<D?;D?9C>8E>8E>8B;5B;5B;5B;5B:7B;5B:7A:4A75A83A75@72@64>71>71>71<71<71;60:5/96185074/74/961961961:72<73>93A:2B;1C:1D<1F<2J>2L@4L@2N@3N@5N@7N@7L?6K>5J=4H;2F<3E;2B90B90@91?80?82?82@93@93<5/MD?M@:K:3T?:S73O0+a<6\3-`5.`4+^/'^/%f6,m=1q=0ŠJ>G;—I?ŸKAŸF>›>7›>7žD;¦ND¥QF©WK«YK¥OB™A5—;0›>6“98’:9Œ65†52†84„?8|@6r>1rB4oA2q=0w;1‚72Œ43’-1š',ÈHIèbaÅEDÍWUáyv«VO†F<„VFnS>i[AibFjeHj`En^D|dLjT’bL´l]ÑnhÛX]à;Lç-Eò+Hõ0Lö@YìI\ä[eånrâ{|ۀ}ۃ؉‚Ã~w½wº„x¶†x²ˆx­‰y¨‹y¥Œxš€o™~m˜xi“ocbY„UM{IBxA>u:<i.2d04b14P&'G%#L0-M51G4.E2,D1-F1.F.,G+*K-/Q51YH6`V;f_CgdEkoNu|Zu„]oYlƒWl‡Zp_r’at”cu—dw™fxšhq•is˜ot–qr”ou”ty˜x{˜|y–zx•ysrm‡lk„gi‚dazZTmMO_DNOASJAQH?9:49:49:49:4:;5;<6>=8?>9>=8>=8A>9A>9B?:C@;D?;D?;FA=FA=E@<E@<D?9D?9E>8E>8E>8E>8E>8D=5D=7C<4D;4C:3B92B92B92B92B94A83@93@93>71<71<71;6096185085074/961961961:70<71>:1A:0B<0C;0E;/I=1J>0L@2L@2N@3N@3N@5N@7M?6K>5I<3H;2E;2E;2C:1B90A:2@91@93@93A:4B94E:4J71L.,V..j68u9;{;;…EC„E@…IA…IA‡H?ŽIB™NH¡PL¢OG“D7”B4šB6 D9 >3œ7-œ7-Ÿ=2£C5œ@1™@0 D5¤G6§F6®H:¶NE´LM¯HL¥@D™9;‘98>:ƒ@8x>3o>0zI;…LA„?8…-,’).³9DÒLWÜKNßMMÊ>=À@?Ð`\³YQ„@5ƒUEyaIe[@_^@ihIslOvgJ~dK–cN¼dXÖ_[æUZëANò,Eÿ&Eÿ&Hÿ'Iÿ.Nù3Nò<RðI[ëP^åQ]ãS]àXbÛbkÕflÔioÒmqÏqrÍutÉxwÇywÊ||ÉyzÈvxÇqtÆkrÅfnÅakÃ_kÅ`n³Sa¦LXšGQŽEL†EIm69HT1-M0*I2,J6/L92K:2O>7UE8[O7cZ;hbBifEnrO{ƒ^z‰bt†^r‰_p‰_p‹`r`u’dw”dw”dw”fs‘kx–t|—x{•x—}„œ„…‡ƒ›…™ƒz’|rˆqm„jhd`y[TmMP`ERSCVPDUOC8938938939:4;<6=>8?>9@?:@?:@?:C@;C@;C@;C@;E@<E@<FA;FA;FA;FA;E@:E@:G@8G@8JA:I@9I@9H?6F=6E<3F<3E;2D:1D:1D:1D:1D;4D;4D;4C<4?80?80<71;60;60:5/94.94.96/96/:5/;7.>7/@9/C:1C;0E;1F<0I=1K?1M?2NA1P@1P@3P?5M?4L>3K=2I<3H;2E;1E;1D;2C:1B;1A:0?;2@<3A=4D;4VE>U4/j25ŽAG­LWÃUbË]fÆ_c›B>—I?–LAœND¬QL¾URÌOSÄLM¥@6B0 @0¥?1¥;.£7+¤8+§=/«E6¡=-˜7$:'¥>-¬A1²C2·E;§02¯:B¹DL½LRºMR­KL–A>‚71{8/v6,x2*„1-œ37¹=GÕGWèM[âFIÏ53Ð:9½31ÈNKÆc]•K@Œ[J}cLk`DgdEtoOpSgLhN°hYäbbõQZôDQö7Iþ2Kÿ2Nÿ/Mÿ+Iÿ2Oü0K÷1Jõ5Nó5Nð2Kó3Lô8Qò@ZïC[îF_íIaëNcêQfèTjèVkåSjäRiäPjäNiçNlëPoòSsóVuþg†ñ`}å[uÕUlÎZmÉdr¨R]r-2^&'Y0,W:4Q?5C;.:8)>B3LN9VO3aV6e_?heDquT†e€‹ixˆdxŒiu‹etŠdr‹cvŒeyhzizj€•v„›Šž… Š’£—¨–˜©™–¦™‘¢’‡˜†zypƒmhc_vYTmMPcGTWDYVGYVG7827828939:4;<6=>8@?:A@;BA<BA<DA<DA<DA<DA<E@<E@<GB<GB<GB<GB<FA;FA;HA9HA9LC<KB9JA8I@7H>5G=3F<2E;1D:0D:0E;1E;2D;2E<3E<5E<5@91?80=82<71;60;60:5/:5/:70:70;7.;7.?8.@9/C;0D</F<0G>/J>0K?/M@0NA1P@1O?0O?2O>4L>3K=2J<3H;2E;1D:0D;2D;2B;1@<1@<3A=4B>5G=4S81[*&Š;@¾T`ÙTeæTgãUcËJO 41—;0’?1™A3®F=ÅIGÕBHÎ?C±B7§F5©B3¬@3¯?3°>3³B4µE7²G7©B1£<)¤;(©;*«:*®6&­3(¶97º9=½7>¼7<¿<BÆJLÄTS¾XS™?7Œ4*‰,%61ÃHKÜOWãBRÛ3@Þ:9Í.*Ó84È74ÆE@È]U©WK‘XGwW@veI~pS€qR„fLcL©o[ÕuiîSWüDP÷;Jù:Jþ=Nÿ@Qÿ;Où4Hÿ?Sÿ9Pû4Ký3Ký1Jÿ/Iÿ0Nÿ6Sÿ3Tý4Vý6Wú7Wù:[ø=\÷>`õ@aøCf÷Agõ?eõ?gø?hüCmÿErÿIsÿOwÿQuþUvòSqçTpåaxÖfv·Wb~17j0/Y2+P8,G>/>A.;D/?G/QL/_T8g`CokN||`ˆŽr‡‘v~‹q|Œq|Žt|szqzŠm}Œoƒ’uˆ–|•£Œœ©•£®¦±¡¬´©±¸°°·°«µ­¡«¢•¡•ƒ‘‚s„qh|c]tXSlNQdHRZCWYDYZH671671782893:;5<=7?>9@?:BA<BA<DA<DA<EB=EB=FA=FA=FC<GD=HC=HC=HD;HD;JC;JC;LC:KB9KA8J@6K>5I=1H<0H<0E;/E;/E;/E;1E;1E;1D;2D;2A:2@91@91?80<71;60;60:5/;7.;7.;7.<8/?8.A:0C;0D</G=1G>/K?1M@0NA1NA0P@0O?0O?2O>4N=3K=2I;0G;/D:0D:0D<1D<1B;1@<1A=2A?3B?6K=4\50w32­LSÛ]iæM_å?S×6E·&+¨1+7(“;'—<)¬@3ÃD=Ô>?Í;<±?5¨B4ª@3­>3²@5¸C9¼G=»I>¬>/ª?/¨=+¦;)ª9)°:,·;/¼<1ÊFAÏEBÐ>?Ð79Ö7<ÞBEâLNÞSPÄC>ÇLDÍTLÔSMÙKJÜ@Cà5>á27Û4.Ú7.Ð1+Ï:4¿84³@9µ[P–TD{N9‚bI†kP…aG’[F®gUËrdçmhêEKò:Dê9Cç<DçBHçFKèGMêFMõKTóAOó8Iö5Hù2Gþ0Gÿ3Mÿ8Rÿ9Vÿ9Xÿ:Xý:Zþ;]ÿ=_ÿ?dÿAeÿCiþDkþDmüCmüCoüCoýCrþCpÿGpÿ@hÿGmÿMpòIjéOkå[rÙcs½YeŽ?D`&$Q+"V@3VO=IL7@D-RJ3eYCvkW€{gŽy™œ‹•œŒˆ“ƒ‚Ž€Š–ˆ›Œ˜Š‡’‚Š“‚—žŽ¤¨™²¶¨¸¼®¿Á¶ÀÁ¹ÂþÅÅÃÃÃþ¾¾°µ±¢©¢—Œy‡vi{e\sYTmORfJQ[BUZCW\F560671671782893:;5=<7>=8@?:@?:C@;DA<EB=EB=GB>GB>GD=GD=HC=ID>IE<IE<LE=LE;LC:LD9LB8KA5L@4K?3J>2J>2G=1G=1F<0F<0E;1E;1D;2C:1A:2A:2@91@91=82<71<71;60;7.;7.<8/<8-@9/A;/D</E=0H?0J>0L?/M@0NA0NA0O?/O?/O?2N=3M<2L;1I;0G;/F90C9/C;0B<0B;1@<1@>2A?3B?6N;4n95—FEÄY_ÛWbßCQÞ8FÔ3;½++­1'¡8%˜:!—9 ¤;&¶>.Ã:2¾71§7,¢:-¥9/§7,¯:1¸A9ºE<·E:®>2­?0§<,¤6'ª6)¶>0ÃD;ÊG=¾8-Î@6âE@ìBBô=Aó9>ë27Þ.0Û75âGBèSLåNGÜ=9Ù10ã/2ê67Ù1(Þ</Í,"Ð71¾3,°7/Ég\¸l^ŽWC‡ZCƒV?‰R=©\LÎlaágbåUUðEKñ=Fä;@Ù=>ÒA>ÒGDÝOMëWWíNSïDMò9Gö5Fø1Fø.Dû1Gþ7Nþ8Sý8Tþ9Wÿ:Xÿ;\ÿ>aÿAeÿCkÿ<fý<hý>jý?mú>nø>oô=mö:jÿHrÿ;aÿ?dÿJpÿKn÷NmïUoãYpèr‚Ä`j˜INt:8\6-P9+PC2UM:^QAreUˆ|n•‚Ÿ‘¦§Ÿ¡¦Ÿ–œ˜•ž›ž§¤¦¯¬¦¬ª¢§£¦¨£µ´°ÂÁ½ÌÉÂÑÎÇÕÐÌÕÐÍÔÎÐÕÏÓÑËÏËÆÊ¿½À°²¯˜Ÿ˜€Œ~m~kau\XoSVjNS`FV`EX`H560560560671782893:94;:5>=8>=8A>9C@;DA<FC>HC?HC?HE>HE>ID>JE?JF=JF=MF<MF<MD;ME:MC9MC7NB6NB4N@3MA3JA2JA2I@1H?0G=1F<0D<1D<1C:1C:1A:2@91?82?82<71<71<8/<8-<8-=9.@9/A;/D</E=0H?0K?1M@0NA1P@0P@0O@-O?/O=1O=1M<2L;1I;0F:.F90C9/B:/A;/A:0?;0?=1@>2@@4Q:2r1/©KLÁRX»>D½06Â03Á//º1)ª2"£9#>"™;›< ¡=#¥;%¤6%ž6) 8-£6/¥4,®92·@:¸C<²@6±B7­A4¨</¦8+¯:0¼C8ÄE>Å@7Ã>-Ï>-Ù5+á*&í"%ú%+ÿ+2ÿ.4í)*ç.+á3,ß4,à3,ä3-é0-ç2+Ú2%Ö6&Ð2&Ï7,Å6.½>7È`UÒq¦eSVBŠM:£WIÈf[ßd_çRTèCIõFMîAEß??Ô@<É@8Æ?9ÑFAãMLïJPô@Kù9Hý7Hü5Hø3E÷6Gú;Mû9Qü7Rû6Rý6Uÿ6Xÿ9]ÿ;aÿ<gÿ>kÿ?mÿ@qÿBtÿBuþBuüBtùBpÿFmúAaþEeÿJlþKkÿVtÿ_zõ]vòj~æp~Ómx¯]aƒCAg6/hB7sUJye\Œ}v£–­£¡²®«¶¶¶²¶¹ª¯³°·½¶½Å¾ÁÊÂÂÊÄÃËÉÆÍÕÎÖÝ×ÛâÚØçÝÛéÝÝæÚÜåØßä×ààÔÞÙÏØÊÅ˺ºº¢§£ˆ’‡t‚qexb\sY[oS[jM\iK]gL561561561560561671983983<;6=<7@=8B?:DA<FC<GD?HE>HE>HE>HE<JF=KG<KG;KG;NH:MG;MG;NF;MD;MD;MD=NC?NC=MC7MD5LC4JA2H>2G=1E=2D;2C:1C:3A:2A:4@93?74<73<71>7/>7/<8/=90>:1B;3C<2D=3H@5I?3L@2MA1NA1P@0O@-O@-L?/L>1M<2L;1J91I81E80C90@9/>:/;;/<<0=>0>?1A?0S8-‡<7Ä\[ÊZY®86«.*²1+¯/&­2#¬9&¬B*ªF,¦D)£A&£A&£=$Ÿ9#¡:+£;0¦:0¦7.­<4¶C<·E;±A6«=0©=0©;.­=1»E9ÇMBÅE<¼8,¿6$Î;)Ý;.æ3,ñ+*ù(+ý&+û%'ø**õ.+ï2,ë4,è3*å3)ä2(à3%à:*Ñ2Ü?0Ï7*È8-Â?5³A6Ñl`¿gY¥RB¨NCËcZçhbæRRêAFôCKë>Dá;=Ö<<Í@9Á>4¶8,¹6.Ä94âHHé@Eð:F÷;Iú:Iø8G÷;Iû?NþAUÿ?Vþ<Tþ9Tÿ8Uÿ9Zÿ:^ÿ;cÿ>iÿ?mÿ>oÿ>pþ?rú@rø@t÷Aq÷CjÿMmÿSs÷JhìAaýTsÿg…ÿe‚÷]wï_xèg{ÜpÆqx§gg^X{ZQ—~wª™’¾°­Ä¼ºÆÂÃÈÇÌÇÇÏÃÃÏËÊØÍÉØÑÊÚ×ÎßßÔäæÛéêàëíãëïåæòèæôèèðäæîáèíàéêÝçäØâÑÊÑÁ¿Â««©‘–z†xlzibu_`rZcqWcpTcmT21/320542651875984984984;:5<;6==5??7AA9CC9EE=EE=HH@HH>HH<JH;KJ8LK7ML8OL9NK:JF:HE<JFCNIMNIPICOE>ENE>LC4J@4KA5MC9LB8F=4B90E<5C:5@93@85@85@86<74<42C97B94A96>95?:7>:7?;8@<9B?:D@7G@6J@4L@0O@-O@+L?,G@.G@0I;0K81M53L76J88F;9C@;6904</9D4:A/69$?=(bC1¼j^¹JA¦7,¥5)§7+¥5'¥7&¨=+£8$¥<&ª@*­@+¬=)­:'®6%«6%¤6'¡7)£9+¦<.ª@2¬B4¬B2¬A1©>.«=.­<,³=/¾D7ÅG;Á?2º2&Ç9+Ì8*Ð8+Ö8-Ü8.â5.é3/î1-ò/-ô.-ó0,ñ1,ê5,ä8,Ý:)Û:(Ü9&Ô3Ð3"Ï8'Æ7'¾6(ÂB5ÏSIÈMEÐTLá[Xî\\ðPRí>Cò9?÷?GêAFßCDÖBBÈ?9»<3±;/°</²<0ÊKBÔKCÜFEá>?ë<A÷?Gý?Iù;GþDQüCQþDRÿDUý@Vü<Uÿ<ZÿBdÿFkÿAjú;gõ:g÷=nùCtýH{ÿK{ÿMyÿMtøPsðOpçMiêMjüYxÿgˆÿ_ƒõGl÷Vxè_{Ã^n½z‡a^xp§‘„»§œÑÄ»ÝÖÐÝÜÚÜÛàÞÜçâÛëæ×ìëØîòØñöÝó÷âó÷çôõëôôîòùôñùôðùñïöíîóèìîãééÞäæÝâÖÐÔÍÉÊ»º¸£¥ ‹‰yƒxr}op{k}†sxlv}k0./1/0320542653873873984;:5;:5==5??7AA7CC9DD:EE;HH@HH>HH<JH9KJ6LK6ML7NM8LK9KI=KJEPPRWU`YXjVTjSO`QHKMD?H?:F=8G>9H?:F=8B;5D:8A96?74?74@85@85=85<73=31=31=52=85=96>:9>=;>=9B?:D?9G@6J@4M@0O@-P?+L@*B?,B?.F<0H:1J65I56F35@65A=::=6;>5@@4E<-J9)^B4€L>®QB«;- 2#¡6&£:'ž7$ž9%£>*¡;%§<(®?,³@.¶=,·9*¹7)¶8*ª9)¥:*¥:*¦<,©?/«A1«B/«@.®?.«:*²=,ÀF7ÆH:Á?2¼8+À6)Ë7+Ï7,Ð9.Ñ;-Ô</Ø:.à8/ç4-î1-ô.-õ/,ò1,ê3+á7*×:'Õ:&Þ;(Ù6#Õ8%Ô=,Ì;*Á5&Ä:/ÏF<ÑF?ÚIFéOOóPQôGKñ<Aõ:AøCHåBEÛEDÒBAÅ>8·;1®:-ª<-«=.¶E5ÆL?ÔNEÜGCä@Aï?Bû@Iþ@JùDM÷CLúDPÿFTþCTú?Tü>XÿBaÿGiÿCiü?iù@jùBpüFvþJzÿK|ÿJzÿHtõKpñQsõ\zù_{úWvüNqýEkúDlïEjÚKiÈ^r±nukLI~t°–‰Ì²¥äÓÉìãÜëçæêéîêçòíâóöãùûáúÿâýÿçÿÿìÿÿñýÿöþÿùûþú÷ýúõü÷ôúõòøïòôëîñèíïæëæÝàÜÖØÊÆÅ´³±ž ›Ž“Œ‰…‡‚•‡‰€†Œ~/.,0/-10.21/43/651762761:94:94<<4>>6@@6BB8CC9DD:IF=JH<JH;LI:MJ7NK8OL9OL9KI:NKBTRS_^fihxmm…lkŠkhƒ`YiYQ\OGRH@KH>GG=EE<AA;=D;<?:7=85<73=82>93>:1>:194.:5/;60<92>;4?<5?<5@<3E>4G?4I?3L@2O?0P?/P?-L?,<;&:=(?<+A;-B71A62>42;30=84B;5H94N2.Y,)l/.ˆ<>¢GF£;0¢2$Ÿ1"¤9)¥<)Ÿ8%ž7$¢;(¤;&«>*±A-¶A/¹>.¼=.¾:-¾</²<.¬<.ª:,©9+«<+­>-­>-­<,²?-°8(¹>/ËK>ËG:¼6*º2&É9.Ð6,Ô6+Ô8,Ö:.Ø:/Ý9/ã6/é4-ð0-ô.+ô.+ð0+ç2'Ý6&Ô7$Ò7#Ý8%Ü5#Ú9'Ú>/Ó=.È6)Å9,ËA7É<5Î?9ÙEAåKIêJJéCCí@BëCCÞCAÕD?ÍB=Â=4¶:.®8*«:*¬;+¬8)ÀD8ÕNHÞJHâ?Bê=A÷BKÿHRô@Iô@IùCOýGTþEUú@Uû?XþA_ÿEgÿCiÿCkÿFpÿIwÿK{ÿL|ÿJzÿFxÿHwÿKtûOuÿZ|ÿa€ýUvñCdûIkÿTvÜ?\ÍI`Ø{†¸||aC;{p®ŽÕ³§óÛÑûìåúòðù÷ú÷ôýôìûúëÿÿêÿÿìÿÿîÿÿóÿÿ÷ÿÿûÿÿýüÿþûÿýùÿúøÿøöÿôøýòöûðôùðóðçêåßáÓÏξ½»­®©¢¥ž£™£—£«žž¦—›£–/.,/.,0/-10,21-43/54/650983983;;3==5??5AA7BB8CC9IF=JF;LI:MJ9PK8OL9QL9OL=LJ>QPL^]bmlzzz’‚¡€‚¨€€¦{u—rkŠe_{YSmTLdMEZG@PB<HD?F@;?<87;63<71=90=90;8/:70991;;3>;2?=1@<1?;/A;-F?/H?0K?1M@0O?0O?/P>0L?/@=*?>,@=.?;/?;2>93=:5:94<94E:6M51V-+j)-†18ž8C¬=C§8/¥7(¥:*ª?/¨?, 9&¡8%¤;&¥:&«<)´?-¹@/¾?0Á=0Å=1Ä>2½?3¶=2²9.®8*¯9+°:,²:,³9*¶;,º<.ÄB5ËE9È>3¿2(À2(Í9/Ö5+Ù5+Ú6-Û7-Þ7.â7-ç4-ì1*ó0,õ/,ô.+ï0(ä2&Û4$Ò5"Ð5!×2Ú0Ú4&Ü<.Ö=/Ñ:/Í=2ÒD:Ä5-Å60Ê;5ÙEAãNJåKIÞC?Ö=8Ó@9ÎA8È?5¿=0µ9-±9+¯9+±;-¬4&½?3ÑHBÙHEßABæ@DôGMýNUð?Gð?GöBMþHUþHWüBWú@XüA^ÿAcÿCfÿFmÿIrÿMyÿO|ÿL{ÿIyÿDvÿN~ÿR}þKrøMoÿVuÿUu÷MjñGdðLgÒ>VádtþŸ§ÓŽd92lJ@¥ypÐ¥œõÔËÿéãÿôòÿýÿÿüÿùóÿüïÿÿïÿÿðÿÿòÿÿôÿÿ÷ÿÿûýÿüûÿýúÿüøÿúøÿù÷ÿö÷ÿôøÿôøþó÷öëïìãäÚÔÔÉź¹´³´¬±´©°¶ª¶¾³°¸­¬´©10,10,0/+0/+10,21,43.54/77/880::2;;3>>4??5AA7DA8IE:LF:NH:PJ:RK9RM:SL<RL@OJDVTUfdqwx‡‡©¹“ϐƎ‹À‡ƒ¶{w©pmšid_[~UPnNJaKGXFCN?=B<8995296/85,85.671783891;;1></@=,B=*C<)G?,J?-K@.L@0M@0M?2L@4K>5M@7K>5F<3B92=82:946;47<59<5>:1H4-Y2-x89”?D¦>E©8<ª;0«=,«@.¬A/©@-¥<)¦;'«>*§8%®;(¶=,¼=.Á<-Å;.Ê<0Ê=3Å?6¾>3º:/·7,¸8-¸8+º8+º8+»7+ÇA5ÍC8Æ9/Á1(Æ3)Ì8.Ï5+Û4+ß3)á4-â6,ä6-ç4-í2-ð/*õ/,ö/*ò/)í1(ã3&Ù4!Ð5Ï4×2Ù/Ø2$Ø6)×9-Õ;1Ö?6ØE=ÖF>Í@7É<5Ë@9ÖKDÙNGÒE>Å;1Å=1Ã=1À</º;,·9+´:+¶<-¹?0µ9-¿<2É@:ÒC?ÛBDãDHíJOöOVí@Fí>EòBLûIUÿJYýFXúCYûC]û@_ûBdÿElÿJsÿNzÿO|ÿLyþIvÿJwÿTÿT}ýJqùImÿStÿVtøNiëD^äF]êXkÿ–¢ÿµ¼óžŽHFk.)¦ke͖óÈÁÿãÞÿñðÿüýÿýÿü÷þþöÿÿöÿÿøÿÿøÿÿúÿÿüÿÿýÿÿýüÿüùÿûøÿùøÿø÷ÿöøÿõ÷ÿôøÿôøýñóóéêäÜÚÖÑÍÍÊÃÉÉ¿ÊÊÀÊÍÂÉÐȾȿ¸Â¹65154/32.21,10+21,32-43.66.77/880::2<<2>>4@@6CA5JD8ME8OH8RK;TK:TM=SK>RKAQLIZW^li|~¸–™Ì™žØšžÞ™˜Ú”‘ÔŒŠÉ†…¿~µxv§nk–fe‡^[xWTiMKYDAJ><?;:8;76875557457664872<:.?<+C>*F@*G?*H@+IA.IB0IA4HB6HA9G@:K=<I;;C9:=77875384/83/917<574+C1']80~C=–GCž>?ž52¨<0ª<+ª?-ª?-ª?-©>*¬?+°?-«8%²9(¹:+¿9-Ã9,È:.Í:0Ï<4Ë>5Æ;4Â91Á8.Á8.Â9/Â8.Ã6,Æ8.ÑC9ÑA8Ã0&Ã,#Ð7/Ö<4Ó2*à3,ä1*ç2+è3,ê3-í2+ð/*ó-*ö/*ö/*ò/)ì1(á4$Ú5"Ñ6 Ð5Ø7#Ù3#Ö3$Õ3&Õ5)Ô8,Ô<1Ö?6ìYQáRJÔIBÌC;ËF=ÍJ@ÈE;À>1¼:*½;+¼:*¹:)·9*¹;,¼>0¿A3ÁA6Â=4Æ=7ÏA=×EEÝGHäIMìMQèBFê@CðBKùKTþN[üJZüF\üF_øA_ùBaýEiÿJpÿNwÿOzÿNxÿLvÿQ{ÿR{þOvýOtÿVyÿ]{ÿSoîD^úTlîQd÷dtÿ’žÿ‹•ôƒ‰Ç`d§MMµjgΏŠï¼¸ÿÞÙÿïîÿúúÿüþýûÿýúÿþûÿÿûÿÿûÿÿüÿÿûýÿûûÿûøÿûøÿüùÿûúÿûúÿøúÿöøÿó÷ýñóùíïòææçÝÛßØÒÛ×ÎÝÛÏàÞÒßáÖÜãÛÏØÓÅÎÉ=:5;8185063,41*41*52+63,74-85.:70;81>;2?<3A>5C?4KC8MC7PG8SJ;WK;UL=UK?SJCSJK]Wcomƒ‚‚¦’”Å› ÚŸ¦ê¢©ñ¡¥ïž ë™›å•˜Ý”•Ö‘ʉ‰½‚ƒ±zy¡rq‘fc~ZWlPN\IGRFCNCAL??K==G;:@;9:=:5?;/C=-E@,F@*FA+EB/EC4CC7BC;AC>@ACB<FD>LD@NEEOCHNAIL?IJ?HECHAA=2L:.gF7ƒOAŽJ=?4”8+¥>/©>,«=,¬>-­@,¯@-°?-²=+±8'¶8)½9*Ã9,È8-Ì8.Ò91Ô;5Ï:4Í:3Ë81Ë81Ì92Í:2Ì70Ë6/Ô=6Õ>5Ò91Ì2(Î4*Ù<3Ú<3Ø4+ä1*é1)ì1*î1+ð0+ò/)ô-(ö,(ø.*ö/*ñ0)ê2(â5'Ù6%Ó6#Ï7"Ô9%Õ8%Ó6%Ò5&Ò4(Ó7+Ò8.Ð7/ãNGèXPçZSÚQIÌG>ÄD9¿@7¹>/¹<*¹<(¹<(¹:'¸9(º;*À>0ÃA4ÉD;Æ@7É@:ÑFCÕIHÖHGØGJßIKæFHèBDíDKõLSüP\ûO]ûL]ûK`öD^÷DaùFfýIlÿNuÿOxÿOxÿPy÷UzóUxôTvüYxÿ]{ÿZwûQkòI`ýWköUgêM^ö^mâKZæTaåTc×XcÁefȃ~嫧ÿÔÑÿëéÿóôÿøùÿþÿûüÿûüÿûüÿüûÿüúýýùúý÷÷ü÷ôÿú÷ÿûøÿüûÿûúÿøúÿóõýîñúëîðáäéÝÝäÙ×âÙÔæßÕëçÛñíáññåíòìÜåâÏØÕ@=6>;4:7074-52+52+52+63,74-85.96/;81=:1?<3@=4B>3JB7MC7SG9VJ<WK;XL>WK?TICSJM^Xfpmˆ„ƒ«“–Ëœ¤ã¤¬õ§°ý«°ÿ¦¬ú¡§ó ¤îŸ£ê¡á™›Ö•–Ì’’ĉˆ´zxŸji‹`]|XUpRPhOMeOKdKG^FBS@=H?:>?:6@<1A>/C@/CB0BC3BD7?D=>D@<ED;BJ>CVDHaMRhV[n\br`dobbje`dfXWaJDlG>„TF•XF“J7‘@-™>+¨A0®@/®@/¯@-°A.´A/²=+±8'µ7(¼8+Â8+È8-Í6-Ñ7/Õ81Ø;4Ò72Ñ83Ð72Ò93Ô94Ô94Ó83Ó6/ÞA:Õ8/Ñ3*Ö8/Ü>5Þ=5Ú91Ý6.ç2+ë0)ï/*ñ0+ô/)÷-)÷,(÷,(÷-)ô/)ð1)ê4)á5'Ú7&Ô7$Ð8#Î7"Ï8%Ï8'Ï6&Ð7)Ò9+Ñ7+Ï5+Ì5,ãNGód\ë`YÕOFÄA7º;2µ9-¸>)¸>'º='¹;%º9&½9*À</Ä>2ÊD;ÊA9ÎE?ÕKHÔMJÎGDÍEEÖHGãIIæCDéFIòMSøSZùR\÷O^úOaôH^õG`øGdûIiÿNrÿQxÿRyÿTzðUuïZxù^}ý^|ùTrõMgùOiÿYnÿ[mÿctðM^øUfôO`üUgõL_äR_À^_»vqל˜úÉÅÿåãÿííÿôôÿÿýüÿÿûÿÿûÿÿûÿÿýýýýûüþøøýøõÿøõÿùöÿúùÿúùÿöøüðòøéìôææëÝÝèÚÙäÙÕèÝ×ðèÝùóåÿúìþþòõúöãìëÔÝÜB>5A=4@<3>:1<8/:6-84+84+62)62)72,83-;60>93A<6E>6I@7MC7RF8UI;WJ:XK;ZLA[NHTIMXR`gd|~§’Ê— ã ©ö¥°ÿª³ÿ«²ÿ¬´ÿ¬²üª±ùª¯ó¨¬ì©«èŸ¡Ú™šÒÁ±ur¡he’]Z‡XR~XQ{YQxVOpNI`C?M?:@?;:B?8>>4AC6BE:?D=<B>:CB>FH?LUEWoSf„bqŽisŽtvx„o€_mŠ[c“XZžWU¦VM¦M?Ÿ@.:% :$¨=)¬=*®?,±@.³@.´?-¶=,¹;,º6'¿7)Ä6*Ë7-Ñ7-Ö8/Ú91Ú83×84Ø95Ù:6Ø93Ø61Ø61Ú83Þ;4çD=à=6Ú70ã@7þ[Rÿlcÿ]Tç?6å3)í2)ñ2*ô/)ô*&õ(%ø((ø**÷-+ó0,í2+ã1'Ú0#Ô1"Ô4$Ó:(Ì;&Ê<(Í<)Ï<*Ò:,Ò9+Ò8,Ñ7-Ú@8Ï81ÜGAôc^åXQËB<ÆA<·4*·:&¶<$¹<&º<&½:(¿:+Ä<0Æ>2Ç>6ÌC;ÙPJßXTÔNKÃ?;À?:ÏHDßIHâGEæJKïPTñRWïNVîLYñL\ùRføOf÷Jf÷JhüMnÿQuÿRvýRvôWvîXuõXwû[wÿ\xÿZsÿWnÿVkÿbuýVgÿ\mÿ[lñFXôDXÿQeîVeÆfgªieʏ‹ç¶²øÒÏÿêéýïîüø÷ùýüùÿÿùÿÿúÿþüþýÿýþÿúúþöôþùõÿû÷ÿùöÿöôÿööÿõõöèèæØØãÕÔÞÐÍáÖÐñçÞüôçþøèÿüéÿÿóúÿùèñðØáàC?6B>3A=4?;2=90;7.:6-95,73*73*72,83-:5/>93A<6D=5H?6LB8OE9TH8VI8WJ:YK@ZMGUJNXP_fay{¤ŠÉ”â›¦õ£®þ«´ÿ¬µÿ®¶ÿ¯·ÿ°·ÿ¯¶ü¯´ø¯²õ©ªë¢£ã˜˜ØŒŠÉ€»uq®jf¡d_™`WŽ`VŠ]TVPtMIbFCTBAIAAC<<:AB=FEACD?A?@DBEOMRWVdch†quš}¢ˆ}Ÿ”|œ |˜¥pŠ¤`u¬Yi¶VaºQU¸HF°@4¬;)¬<&®>(¨8$¨7%¬9'®9(±9(²9(µ7(¸6(»3%Â4(Ê7-Ó:2Ù<3Þ=5ß<5ß<5á>9ß<7ß<7á>9åB=èC=èC=æA;å@:æB9ä@7Ü8/Ô1(Ø5,ëH?ÿZQñ@6î6,ë,$î)#ø-)ÿ//þ,-ö((ú0.õ4/ï61ç6.Þ3)Ö2&Ô2%Ï6&Ë:'È;'Ê<(Í:(Ï9*Ð7)Ñ5)Ñ5)Ô:0Ï5-Ô=6åPJåTQÛLHÍB?·1(¼;(»=&½<'¾;'¾9(Á9+Â:.Ä:/È>4ËB:ÕNHÝXQÓRLÃD=¿C;ÌHCÛLHßJFåMLëSRíTWëPVéNVìOZ÷VføUhùSkúRlþQoÿRsÿStÿRuòMmøUtÿ\|ÿ_}ÿ\vÿUoûRiûReüUg÷RbÿZjÿZjóN^øScþYißS^¶`_—^W¶}Ù©¥îÈÅýáÞùééýøõúüùùÿÿøÿÿ÷ÿýúþýÿÿÿÿûûÿö÷ýøõþùõþùõþöóÿ÷öÿõõöêêêÜÛäÖÓÝÐÊßÕÌïåÛüõåÿùçÿýéÿÿïúÿøèñîÛáßFB7EA5C?4A=2?;0=9.<8-;7,84+84+83-83-:5/=82?:4A=4F?5JB7ND8RF6TG6WG7WI<XKCWKKXP]b^ysuœ…ŠÂ™Ü˜¤ð «úª³þ­µþ¯·ÿ±¹ÿ³ºÿ´»ÿµ¹ÿµ¹ÿ¯²ù¬¬ô££ë™—àŽÓƒ€Çxu¼sm³k`¤i]›cZ‘]W…VRwNKhEDV@@L85<A<@KABL@BQ>B[FKpW]‚cu—s—¤{§®«²{£¸vœÀp“ÂcƒÀSpÂF^ÐK\ÑHRÄ<<º5.¸9*µ<'°<%°;)°;*²:)´;*·<,»<-À<-Ä<.Ä6*Ë8.Ó<1Ù?5ßA8á>7á=4ß:4Ý84Ù40Ù40Þ93çB<ìE?éB<ä=5ã<4Ü5-Ø4+Û7-Ô2'Ì, Ö6*éG:õK>ò@6ï4-ð-)ö,*û-/ü+.ø*,ð*)ì/+é3/ã5.Ü4+Ù2)Õ1'Ñ3'Í7(Ë:)Í:(Ð:)Ò;*Ó:*Ô8+Ô8,Ñ4+Ö<4Õ:5Õ<7åONíYWÙEEÂ3/Á<-¿<(À;*À;*Á9+Á9+À8,À8,Æ=3ÇA6ÏJAØUKÓSJÆG>ÀD:ÇG>ÖKDÙJDÝLIäSPåSTâPSâMSåNWòXdöZhý[pþZrýUoûQnýPnÿQrýGmÿPvÿZ}ÿ[{ÿVrýSmþVmÿ\mûXiøZhû`nö`lí]gïfnìfmÉ\_ ]W{OF˜mfÁ˜”ݺ¸ðÔÓôàßÿõôùù÷ùÿýùÿÿ÷ÿÿúþýÿÿÿÿýÿÿøùüøõüùôüùôýøôÿùöÿøöùîìïäâåÚÖÛÐÊÜÒÈîäØýöäÿûèÿýçÿÿíúýôêðìÞãßIE9HD8FB6EA5B>3A=2?;0>:/:6-:6-94.94.:5/<71?:4@;5C?6HA7LD7OF5SF5UE5UH8VJ>YNLXNW^Zqpp–€†ºŒ–Ô•¡é©õ¨°ù©±ù­³ý°·ÿ±·ÿ²¹ÿ´¸ÿ´¸ÿ¯±ü«­ø¥§òŸžê—–⌋׃€Í}yÅum¶pgªf`ž_ZYX„QQuGGcB?R=5DH8BT=C`@EnCJ€OU—^gªgx½kÉnšËo˜ÉiÈ`ƒÉWxÆIgÄ;UÆ2HÙAPàDOÑ;=Ç82Ä?0½@,±:$­5$­5%¯6%³5&¶7(½9,Ã;-È</Ó@6×@7Ú@6Ý@7Þ=5Þ93Ý6.Û4,Ó.(Ò-'Ó.(Ø3-á:4ã<4á81Ü5-Ü5-Ð,"Î* Ù7,âB6Ü>2Ñ5&Ë.éF7óI<ýH?ü<9ô,,ñ#%õ')û/2ñ-.ì0/ä2.Þ3,Ú1*×0*Ö/)Ó0'Ó7+Ò9+Ö:-Ø<-Ú>/Û?0Ü@3Ü@3Ò6*æLBåJEÎ50ßGFóZ\ßIKÕA?È</Ä;+Ã:*Ã:*Â:,Â:,Á9-¾8,Â<1Â>2ÈF9ÏOBÎPDÆH<¿C7ÁC7ÎH?ÑF?ÕJEÛPKÝROÙMLÚKMÝLQëX`ó[güaqÿauûWoõNhõMgÿMmÿOuÿRyÿUwÿTqüTmý[pÿcuÿizÿguúboõamídnãflÙhjÎghµfa^TeG<^U©ˆÍ«©æÈÈïÚÙÿóóùøöùýüøÿÿ÷ÿÿûÿÿÿþÿÿüÿÿùüþú÷ûúõûúõþûöÿýùÿûøþôòøíéêßÙÞÔËÝÓÉíæÖÿøæÿþèÿÿæÿÿëøúïêïèãæßNH:MG9LF8JD6HB6F@4D=3D=3?80>7/=60=60=60?82@85?:4C>8EA8LD9NE6RE4RE2VF6VH;ZMGVMR[Whlk‹~‚±Š“Δžãœ¦î§­÷©¯ù­°ý¯³ý°³ÿ°´þ°³ÿ¯³ý«®û¨«ø¤§ô ¢ïšœé“•âŒ‹Ù†…уÉ|v¼mk¬dež_a’Y[„RQsNHbWFY_CQmBL‚FP˜LV¬U^¼[dÊZhÙRpâRußTuÙPlÒJbÍBWÇ8JÅ.?Ô8EãCMæGLØ>>Í>8ÌF:ÃH6·@,·>-¸=-º<-½>/Ã?0ÊB4ÐD7ÕE:ÙE9ÚB7Û>5Ù;2Ù6-Ù5,Ú3+Ù2*Õ0*×4-Û60Ý90ß80Þ7/Ý6.Ý6-Ö/&Ú6,Ø8,Ï2#Î2#Ò9)Õ>-Ô;+Ø<-èD8øJAýDAù56õ*-ö*-÷/2÷67í55ã41Ú1,×0*×/,Ù0-Ú1.ã81ä91ã;2â;2á=1ß=0Ü?0Ú>1Ñ7+ïWLøaZÓ;6Ñ;:æPQÜFHæPQÏ<4Ê:/Æ8,Å9,Ä:-Ä<.Ä<0À:.¾:-¾<.ÃA3ÇH9ÇI;ÂD6¾@2¼>0ÉF<ÊE<ÎIBÕPI×RMÕNKÔLL×LOéZ`ð^hüetÿhzü]qóOgóMeýNkÿVzÿTxÿRrøTmö]qûhxûjw÷erüetó_mì`kêktßruÃjfªd\žla}fVYI:kXJ”{t½ œÞÂÁîÕØþîïûõõûûûûÿÿùÿÿýþÿÿþÿÿûÿÿúýÿþúûþõûüöÿþùÿÿúÿþúÿûõÿ÷òñèáäÜÑáÙÌðé×ÿùæÿþèÿþåÿÿê÷ùëíðçèéáQK=PJ<OI;MG9KE9IC7G@6G@6B;3A:2@93?82?82@93A96@;7B?8EB9KE9MF6PE3RE2VG4UH7YLDUKLZScii…|«‰“È“žÞš¤ë¦¬ø©¬û¬­ý­°ÿ®±ÿ®±þ­°ý­°ý©¬ù§ª÷¤¨ò¢¦ðŸ£í›Ÿé–˜ã’”Þ’’Ú‰‰Ï|€Àty³ou©mpigŽj_}y_x‚WjŽP_£O\»S^ÍT]ÖPWÛHRå@Qè?RäCRÜDPÖCKÏ@DÊ9<É46äJLçIJáAAÖ87Î95ÎA8ÈD8¾>1ÃE6ÃE6ÅF7ÉE8ÎF8ÑG:×G<ÚG=Ö>3Õ;1Ö8/Ö5-Ö3*Ø4+Û4,Û7.Û81Þ;4à=4à<3Ý90Ü5-Ü5,Û7-Ò.$Ý;0Ú=.Í1"Ç.Î7&Ò<+Í:(Î8'Ô8)â:1ó=9ÿ@@ÿ<>þ37ó,/ê,.á+*Ø*)Ô+(×/,Þ44ä88é9;ð88ð75î73è71ã7-Ü6*×5(Ñ5&Í5(çPEÿmcÛHAÈ42×CCÔ?CêVVÙA<Ò>4Ë80Ç7,Æ:-Æ</Ã=1Á=0¼:,»<-¾?0ÀA2¿A2½@.¼=.»<-ÄD7ÄD9ÈH?ÐPGÓRLÑPJÒNLÕONç_cîaiûitÿm|ÿdwõVjõQiÿUoÿWvÿVtúXpñ^pônyöw€íksÞXañ`mñ`mìboïs}숊́|ªth™yjskXON<_VG„rh¯–’ÜÀ¿ïÕØüéëþó÷ýûüýþÿúþÿþýÿÿýÿÿûÿÿùþÿÿûùÿõúýöÿÿúÿÿúÿÿúÿý÷ÿþöùðçíåÚéáÔôïÜÿûçÿþåÿÿãÿÿèùúêòôçîðåSM=SM=QK=OI;MG;KE9JC9IB8E>6D=5B;5A:4@93A:4B:7@;7A@;CC;IE:MG7OG4RF0VG2UH5WJASIHXR^jg‚}§‹“Ä’žÚ˜¢ç¤§ö¦§ú«©ü«¬þ¬­ÿ­®þ¬®û¬®û¨¬ö¦ªô¤¨ñ£§ð£§î §íŸ¤è¢æ™žâ”™Û‹“Ò†ÊƒÂ„ˆ·ƒ¨ˆyšmˆšcz©Xi»Q_ÐNZàIRèAIê=Aé=;ä?9ÝC9ÖH:ÎI8ÆG4ÂC0Å@1äTIàG?Ú;7Ø64Ø88Ù;<Ñ98È74À:/¾<.¿;.À:.Â8+Ã7*Æ3)Æ2(Ï5+Ð3*Ñ3(Ô3)Ö3*Ù5,Ý6.Ü8/ß>4Ý<2Ü;1Û8/Û7.Ú6-Ø4*×3)Õ3&Ó3%Ð4%Ð7'Ò=)Ñ>*Ê9$À2È:&Ê4%Õ1(ç51ú<<ÿ=@ÿ8>÷37ê-1á-.Ù--Ø0/Ü43ã9:é;=ð9=ö26ø03ô01í1/å2+Þ2(Õ1%Ï3$É1$Ñ=1új_äTLÈ95Ð@?Ë:=äRSäJHÜC=Ñ94É6.Ç9/Å;0Â<0¿=/º;*¼?-¾A/¼?-º;*º;(½;+¾<,¼>0¼>2ÀD:ÈLBÌPHÊNFÌLIÐNLç`dìahøhrÿo{ÿhx÷Zk÷UjþZrÿYsû]tîaræjtë}€ñ‡‰èwyÚ_dñgtþm|óeuëlwö‘•ì¢ŸÄ”Š¢|ikVHQ<WUFtg^¤ŒˆÛ¾ÀòÕÙùãæÿóøÿúüþþÿüýÿþûÿÿüÿÿúÿÿøýÿÿûùÿõ÷üõýÿùÿÿúÿýøÿý÷ÿÿöÿùïõïáòìÜúõáÿýçÿýäÿþâÿÿçýþìùúì÷÷ëVO?TN>SK>PJ<OG<MG;LD9JC9H?6F?5E<5B;3B92A:2C:5A<6C@9DD:JF;MG7OG2QF0TH2UH5UH?QGFWQ]jh€~‚¨Œ”Å“Ø™¢çŸ ò£ ÷¦£ú§§ý¨ªÿ©«þ©¬ý§­û¥«õ£ªò£§î¢§ë¤©í¦©î¥¨í£§î˜ è– å”Ÿß”Ÿ×•›Íš•¾ Œ¯ªƒ ¥e´]pÃRdÐIWÜBNå>Fë<Aì<<ë@9ãA4ÜE4ÔI4ÊI3ÂF.¿B,Â?-âSEÞD<Ý97æ>>êDFå@DÙ8=Î65ÍB;ÉE9ÉC8ÇA5Æ>2Ã9.Â5+Â2'Æ2(Ë3(Î4*Ó5,Ø5.Ù4.Û4.Û4,äA8Þ>2Ù7,Ú6,ß8/à:.Û5)Õ1%Ø8(Í2 Ê1!Ï9(Ï9(Ç4"Ê7%ÔC0È7&Î8*Ø7-Þ5.ç40î53ö:9û?>ÿLKúHFòBBì>?ê<=è8;å26æ+2ô+1ø)/ô,.ï/.è1+à4*Ù5)Ó7(Æ0!À.ôdYë]SÏ@:ÔDCË;;ßMMêRQàHEÔ<9É6/Æ8.Æ9/Ã;/¾:+¾<,Á@-ÀA.¼=*·8%¸9&½<)¾?.µ7)´8,¸>3ÁG<ÅK@ÅIAÆJBËJEåa_æ^`ñdjþnwþkuó]iðXeø]mú^sõbtèdqãlrì~ù‹ö†…éqsøryÿy†õaqàXföˆ‘ÿ´´Ð®¢ ›‡]jPDR9QTAjbUœ‰‚ÚÀ¿òØÛöàãÿô÷ÿúýÿþÿýüÿþüÿÿýÿÿûÿþùýûÿúôÿôòþòùÿöüÿöûüôÿýöÿÿöÿÿóûõç÷ñáþùåÿýçþüãþýáÿÿêÿÿñýýóüüòXNBWOBWMAUM@TJ>RJ=RH<PH=MC9KC8KA7H@5H>4G?4H>4F?5GC:GE9JF:NI6RJ5UJ4VK5VK9XNEULMZTbkhƒ{~§‰Ã‘™×˜Ÿç£¤ö¥¢ù¤¡ü¡¢ü¡¥ÿ¢¨ÿ£«ÿ¤­ü¡ªõ¡©ñ¡¨ì¥¨íª©ï­©ó±©ö¬©ø ©ø—§ò’¡â–ŸÖ¦£Î¶ž¾¹…œ¸gz¿L]ÑERß>Mç;Gç<Dæ=Bà@@à@@â>?ß<=ÝEBÉ:4ÊD;ÊE<º1)ÜKFèJIéCEë>Bé<@æ<?á=>Ú<=Ô<;ÏB;È?7ÊD;ÑMAÍK>¿=0¸6)½9,¿7+Ã6,È5-Ë2*Ñ/*Õ0,Û2/Þ50ç@7ß9-Ú2%Ý3&å9+ç;-ß8(Õ2Ï2Ì4Ê5!Ç4"Ç4"Ê4%Í5(Ï5)Ê0&Í3)Ñ5)Ò6*Ó5)Õ3&×3'Ü4)à2)æ3.ë52ë33é/2ç-2è-4í.5ö-3ö+/ï+,í0.ê5.ã9,Ù7(Ð4%Ã-Ä3"çXHéYNË;3ßNIÅ41ÚHHáOPÜJJÔBBÍ<7È80Æ8.È:.È<-Æ=+Â;(¾9&¼;(»<)»>,º=+·<,´8,µ;0¹=1»?3»?3¾@4À@5ÄA7ïjaõpiêc_ômjûqqìadôgmÿpyÿlzûhxðdoébiëdjîlnîqoîqoûy{ÿ}…ùZlücwßbpû¬¯¶ªš}‘u]rQJY:LR8snZ¥•ˆË·°íÕÓÿîðÿöùÿ÷ûýøüüüþÿÿÿÿþÿÿþÿûÿþðÿôåþëæûêïÿðøÿöüÿôþÿóÿþñÿýñÿûîÿúêÿùæÿùãÿùáÿùáüúåÿÿõÿÿûÿÿûYOCYOCXNBVL@UK?TJ>SI=RH<ND8ND8LB6KA5J@4J@4J@4IA6GC8HD9LF8OH6RJ5SK4VK7TK:XNEULMYUckhƒ{}¦†ŒÀ•Ó–šáŸžì žò ôžŸù¢ýŸ¦ÿ ªÿ¡¬û ¬ô «íŸ§è¢¥è¥¤ê©£í« î¤Ÿï §÷š¦ðœ¡á£œÐ±˜À¿ªÂq„ÄTbÑCOâ<Fì8Aï7?ê:=å<?Ü@AÛACà<CÛ:@ÚADÌ<<ÍCAÊ@>Á31åOPçDGé@Eê;Bç8?â8;Ü9:Õ:8Ï;7ÔE?Æ=5Æ=5ÌG>ÌLAÂD6¸:,·9+¹7*¿7+Æ6-Ì5.Ò3/Ø3/á55ä84å>5á;-Þ6)ß5&â6(á5'Û6#Ó4Ñ9$Ë9"Ç9%Ç:&É<+Ë<,Ì8.Î4,Ó4.Ö5-Ó6-Ï7)Ë5&È5#Ë4!Ï4"Ø2$Þ2(ã0+æ.,æ,-æ,/æ-2é/4ì+.í,-ì0/è2.á5+Û5'Õ8'Ò9'Õ?.»*ÖD5Ñ?2ÙF>ëVPÚB?ØBAçUVâRRÜJJÔC@Î>6Ê:/È8-Ç8*É<+Ä;(¿:'½<)½>-º?-¹@/¶>.²:,´;0·=0¸>1º>2½?1ÁA4ÅC5ëi\òmdçb[ðkfõolèbañklütxÿq~ÿo|öhtí]fêY`ñbf÷qpþzxûvwÿx~ùRdþ[pä]nö¦©©¦“jŒk[sOO_;SZ;us\©ÖĺöáÜÿôóÿúûÿúûÿûüüüüüþýûÿÿúþÿõÿúíÿóáÿéáýæéÿëóÿñøÿòýÿòÿÿñÿýïÿýíÿúéÿöåûóàúòÝüôßü÷äýúóüüúÿÿý\PB[OA[OAZN@XL>WK=WK=VJ<SG9RF8QE7PD6OC5OC5OC5MC7KD:JF;MG9PI7SK6VK5VK7TK:WMCVMNZWbkiz|¢…‰¹‹‘Ë‘–Ö˜˜Þ™™ãš™éšî› ô£÷Ÿ§ø ©ô ªïž§èœ¤ãœ¢àžŸàžáŸ™ãœ˜â–›ßššÚ¤•Ð±ŒÀ¿‚¬Çr“ÉYqÈCT×ALß>Dã=?å==å<?ã=?á>Cá=Dà<C×8=Ö@BÏA@ÊC?Ã<8Ä96éWWàBCä>Bè;Aã8>Ü68Ö66Ñ96Í<7ÕHAÄ;1¾5+ÅA5ÍK>ÆH:»=/·8)º6)¿7)Ç7,Ï6.×50Þ71æ87é=9×3*×5*Ú6,Ü6*Ý5*Ý7)Ú:*×>,Å4!¿4¹4!º7#¼8)¾9*¾4*À-%ã?=å>8ß>6Õ<.Î8)È7$È7"Í8$Ò7%Õ3&×/&Ù.'Ü/+Ý/.Û//Ü./Û,)ß0+à5.Ü5,Õ3&Ð4%Ð9&Ñ>*ÔA/Â1 çSEÒ;0ÚA;ÞC?éKJðTUíTVèTTãOMÝIEÖC<Ï<2Ê7-Å6(Å8'À7%¼7&¹:'º=+·>-µ=-²=,°:,±;/µ<1µ=/·=0¹?0¾C4ÃE7Ü\Qêg]äaYðlgöpoçabìfgójqàR^ï^kõgsòemñ`gõfjùqqþxwútuÿnuóJ]øRhä[mñœ¡£œŠb`VoHRd>W`Axx^­¥’áÑÄÿîæÿøñÿüøÿþúÿþúýÿüûÿýûÿÿùÿýóÿ÷èÿîÜþãÚøÞáùáéüæïüèöýëýþîýúëÿúêÿøçúñàôèØòçÕöëÙøñáþúñýüøÿþû\PB\PB[OA[OAZN@YM?XL>XL>UI;TH:SG9RF8RF8RF8RF8QG;LE;KG<OI;RK9TL7WL6WL8WK;VLBUMK\V`jh~yy›‚…²ŠÂ”Γ•Ò•”Ö••Û–˜ã˜›è˜žìš¢í›£ëœ¥è™¡ß–ŸÚ•›Õ”˜Õ•–Ö’“Ö‘Ò…„½‘ƒ¶¥}±·s¢ÈfÓW{ÕGaÔ<KÞAJàBCÞCAàB?áAAä?Cç=Fç=Fã?FÖ:>Ñ?@ÑEDÆA<º61Å>:êZYÝABã@Cæ=Bã:?Û89Ó97Ð<8ÎA8ÏE;Â:.»2(À</ÇE8ÄE6½>/¸9(½8)Â9)Ì9/Ò:/Ú91à91è;7ë>8Ù2,Ù2*Ü3,Ý5,à5-ß7.Ü:/Ö=/É7(Æ:)Á<+¾<,¿;/Á;0Å<6Î95ëCBîA=ä@7Ú<1Ï9*É8%È:&É;%Ð=)Ð7'Ï1%Ñ1%Ô1(Ö3,Ô1,Ñ/*Ó2*Ó5,Ó7+Ð6*Í5'Ê7%Ì;(Ì>*Ì;(Ï<,ô^PàF<Ò3/Ó/-æ@@ûWXÿtuÿrqÿjgö^YçPIÕA7Ç5(½. Ä8'À8(»8&¸;)·<,µ=-²=,°<-¯;.®=/°</°</±;-¶>0¼B3ÀF9ËMAß`Wâa[ôpløstæ_cä]aæ]dÖHTçYeógrôgoöelükpþsvþvvûpsûenñDXôKbåXk뒖 •ƒ_zYRkCSh?\gEy|_°ª”çÜÊÿõèÿùðÿûñÿýôýÿ÷ûÿúùÿúöÿøòüôêúíÚøÞÎóÒÊëÌÏìÎ×îÔàïØéòÝò÷ãöõãûöãüõãõìÛíáÑéÝÍíáÑñèÙüõíü÷ñþùó\PB\PB[OA[OAZN@YM?YM?YM?WK=VJ<VJ<UI;UI;UI;UI;TJ>OG<OI=QI<SL:UM:XM9YL9XL<UKAVLJ\U]hexut“|~¥…‡·ŒÂŽÃÈ’Ï’“Ô’•Ü”–à”˜ß•šÞ—œÜ”šÖŽ”ΉÇ…‹Å‡Ã}ƒÁ~¸}tŸŽp”¨gÁ\„ÖNvâAcè:Uê9KæAGàECÜGAÛGCßEEæAGî<Jí=JæBI×>AÐ@?ÐIEÀ?9´3-ÈC<àUPÜDCáACä@Aá>?Ú?=Ô@<ÑD;ÏF<Ç=2À8*½5)½9*À>0¿=-½;+½<)Â:*Ç;*Ï;/Õ;/Û8/á90ç83é:5ç:6å63ä20æ21è43æ40Ý2+Ô0'Ì2(Ë7-É;1Å8/Á3/Ã40Í;<Ý?@ë8;í76ä71Ø5,Ì4&Æ5"Ä7#Å:%Ë=)Ë8&Ì4&Î5'Ô8,Õ;1Ó9/Ð8-Ð>1Ê;-Å8'Å8'Ç:(Ç<)Ç<)È;)Í>-Í:*Ø>2áC:Ô/-ä::à24à67×53Ö;7ÞC?æKFêQIëTIèTHäUGË?0Ä?.¾<,¸=-·=.´>0°?/®>0¬<.¬<0­=/¬<.­<,±=.·A3ºD6ÇMBÝbZàc]ïolôrrå`cå`eç`g÷kvûoz÷kví`hð_fýlqÿy|ÿy|úmsø^jóAWõD^èUh懍¤“ƒh€^[tJ_vJhwP‚ˆf³²–èâÌÿúéÿýíÿþïþÿòûÿôøÿôôÿñíûêåñãÚíÙÆèǻ伺޺¿Þ¼ÇàÀÍâÃØæÌãëÓìîØóðÝ÷òßóêÛìàÒèÚÍêÜÏïáÖòèßñèáòéâ[OA[OA[OAZN@ZN@YM?YM?YM?XL>XL>WK=VJ<VJ<WK=WK=XL>QI>RJ?SK>UL;XM;YN:ZM:YL;XL@ULGYSWe^nnjƒut–~}¥„…±†‰´‡‰¹Š‹Á‹ÈŽÏŽŽÔÕÓ”Ó‹Ê„‰Á|¸u|²ov­io«ljœ€kŠ”dz®YvËPoãBdð7Vö4Lø9Iï@EåFBÝJCÛJEáGGèCJò=Nð>NäCI×BDÌA>ÎID»<5°4,ÊJAÕLFØBAÚ>?Û;;Û=<ÙA>ÔE?ÍG<ËG:À8*Á7*À8*¾9(¼8)¼9'½:(À;(Ä;)Ê=,Ò<.×;.Ü8.á6.ä6/è50é32è./ê-1î02õ47õ79î45æ21Ú.,×4/Õ63Ñ32Ï/1Ò/4Ü7>ê:Dí06î02æ21Ü3,Ñ5)Ê7'È9(É<(Æ8$É6$Ï7)Ô;-Ú>2ÝA5Ý@7ÙB7ÓG8ÉA1Ã<)Á:'Ã<)Æ=*Å<*Å8&Ì:+Î8*Í0'æC<Û2/ï?Aã.3Ú*,Õ1/Ó6/Õ83Ö92Ò8.Ì5*Æ2&À1#ÎE5ÆA2¼=.µ:+±9+¯9+«:,ª:,¬<0«=0¬>/¬<.«<+®=-³?0µA4ÉSIàg_ßd_èkiînoæchðjqõoxúp}üs}ömwîbködnÿrzÿx~ÿquúipøWfùAYô@[êQfâ}ƒ­–†zŽkn‡]o‰ZyŠ`—r¶·˜ßÞÂùôÞüúåÿÿïûÿïõÿïïÿëçúäÜïÙÑãÍÅÞÁ±Õ¯¨Ó¦ªÐ§°Ó«¶Ô®¼Ö±ÉÚºÒßÁàæÌèêÔòíÚóêÛîâÖìÛÑêÙÑìÛÓéÚÓæÙÑä×Ï[N>[N>[N>[N>ZM=ZM=ZM=ZM=ZM=YL<YL<XK;XK;YL<YL<YM?SI?TJ>UL=XL<YN<ZM:ZM:ZM<ZN@VKEXOR_Wbd_sjfsozzž|~¤~€©‚²ƒ„¼‡…ƈ‡Ë‰ˆÌ‰ˆÊˆˆÈƒƒ¿|}µvw­qr¨kl¤fg l`Ž‚_s•V_³N^ÒJ^ì@Xø7Lþ3Fþ9Có?BéD@áHBÞICâFIéBJò=Nï>NÝAEÔFDÈA=ÉHB´:/±7,ÏPGÊC=Ï>9Ð64Ð21Ð53Ó>8ÒE<ÉG:ÃD5À8*Ã:*Â:*¿:'½8'¾9&¿:'Á:&Å:'Ë:)Ó:,Ù9+Ý7+á5+å4,ê3-ë*+ñ*-ø-3ü/4ÿ17ÿ39ÿ49ý58ù8;õ8<ò9?ò9Aô9Dø8Gû6Hþ5Gô+5ô-2í12ã4/Ù6-Ð8+Î:,Î=,Ë5&Ñ8*Ù;/à>3â>4ä=4å>6ßA6ÙJ<ÐH8ÉA1Ä<,Ã:*Â9'Æ8*È9)É2'Ó9/á@8ëD>Þ0/à..ì59è58Ø3/Õ7.×82×:1Õ;1Ò;0Í:0É;/ÖL?ÌF:¿@1´9*°6)¬6*«7*©8*¬;-­=/®>0­=/¬=,«<+­<.±=0ÁLBÚc[Þc^èkiðosêinõrzüvîfr÷oyùq{öjsþlvÿwÿu|ùhoùcn÷RbþB[÷<YêKaÝt{µš‰žz}˜k{—f‚•h‘u¬²ËÌ­ààÄéìÑîöÞåóÙÙíÑÏæÉÅÞÀ¼Õ·´Ë®¬Ç¤šÁ”–‘™Ã“¡È™§Ë¬Ë¡·Í§ÁÒ°ÎÙ»ÙÝÄåâÏìåÕïáÖëÚÒçÒÍãÐÊàÏÈÚÌÃÕǾ[N>[N>ZM=ZM=ZM=ZM=YL<YL<ZM=YL<YL<XK;XK;YL<ZM=ZL?VJ>VJ>XL>YN<ZM<[N;ZM:ZM<[OAWMDWML\RZ^Xfb]qjfspwv–xxœ|{§~µ„¿‡‚ň‚ȉ„Ç‚}¿y·zu­xq§vo¥tk¢pg za‹„P\™HG³EHÓHOîBNø:Fþ6Aÿ:Aõ==îA=æDAãEBäCHèAKî=Më?MØ?BÑGDÅA<ÄH@²9.³:/ÑUKÃ>7É83Ç/,Å*(È/*Î;4ÍC9ÃC6¼>/Á<-Ä;+Ã<)Á:'Â;(Ã<)Â;'Ã8%Ä5$Ê7%Ô8)Ù7*Þ6+á3*å3)ë3+õ33ÿ58ÿ8<ÿ4:ÿ-2ÿ(-ÿ(/þ+1ü-3ö)0ò&1÷)6ÿ,?ÿ*Aý!:ó/ö"1ó'0í,1æ0/Û2-Ô3+Ñ4+Ð6*Ô6+Ý90æ=6ë>7ë:4è71è51â92ßH=×K<ÓE7Ë=/Ä6(Â3%È5+Ï8-Î4,Ô6-æC<à93à21Ò  ì89å55Õ0*Ò4+Ò4+Ï5+Î6+Ê6*È5+Å7+ãYNØRFÊH;¾@4·;/µ;0µ<1µ<1¯9-±;/²>1±=.¯;,­9*­9,®:-±<2ÐYQÜb]ìppôvyîmrõq|ör}ÿz…ÿ‰ÿx‚ôhq÷epÿq|ÿuÿkvø^jöOaÿA^÷8WéG^Ûntº‹™©…‚žn}™f€–gœq¡©„´º˜ÈÍ­ÕܽÌÙ»ÁÔ´²Ì©§ÂŸž¼˜š¶–²Œ°‡‰³†µ¹„•À‹œÂ¡Ã‘¨Å™²È¡¾ÎªËѵÚÙÄæßÍêÞÒéÖÏáÌÉÛÆÃÔÁ»Î½µÈ·°ZM=ZM=ZM=ZM=ZM=ZM=ZM=ZM=XK;XK;YL<YL<YL<ZM=ZM=ZL?ZLAZLA[M@[N=]M=^O<\O<\O>YM=YLCXMIXNO[QY]Ub`Yia\rjfokŽsp›vq§{u³„}ÁŠƒÉŒ‚LjÀŠ~¼ˆy²ƒr¨‡s¨t«‡l£†\„GN¤B9¶@<ÍC@Þ@?è:;õ=?ÿDE÷><ô>=í?@ê@CëBIêCMëANãALÑ??Å@9¾?6·>3°</²<0ÀD8ÒMDÈ72Ô:8Í2.Ì3.ÖC<É?4µ6'»>,»6%Á:'Ã<)Â;(À9%Â9&Æ;(Ç:(Ë:'Ð9(Ö6(Ø2$Ú."á0&ì7.õ=5õ82÷40÷0-ø**û&(û%'û%'û$)û&,÷!)ø".ÿ'7ÿ(>ÿ!;ÿ7ý1ÿ'8ï#.æ#+ã+-Ý--Ô+(Ó.*Ü41â64è64ï77ò65ó23ï./î,,å/,Ü92Ñ:1Ò91ìSK¼#Ä+#ßF@È.&Í.*Þ<7èE@â:7Ø/*Ú.*â51å<7ß>4Õ;/Ï5)Ë3&Ì5*Í;.Ë;0Å8.ìbXàZOÈC:¹6,º:/¸8-±3'´6*±1&°2&®2&°5&°6'²:*´<.³=1°:0ºC;Ö\Wìppñsvöx|ûyƒõq|ÿ|„ýw€út}üs{ÿr~ÿr~ÿmyÿgtü`nôI\ÿ>\ÿ>]ä@XÔelµ”’ |ƒm}™f~”c‡—j”Ÿwž¦ §…œ§…’£Š£|€Ÿvzšqyšo|r vƒ¤u‚­wƒ³y‹¸‘¼„•¾†—¾‡œ¾Œ¢½§¼“±¼š¾Â§Ï̹ßÓÇãÒÊÜÇÄÔ¿¼Ìº¶Á²«¹ª¥ZM=ZM=ZM=ZM=ZM=ZM=ZM=ZM=XK;XK;YL<YL<YL<ZM=ZM=ZM=ZL?ZL?]M>]M=]N;^O<\O<\O>]P@[OC[NFYNJZPQ\SX_U^_Wfd\tibƒng‘qjxp¬€y¼‰ċ€Ä¿~ºt«Œn¢—q¤¡u¨›kŸ™W}<Cª5+²6,Â<3Ó@8Ý>8è?:óA=õ=;õ;<ô;@ñ>Dî@IèAIâ@KÛBGÇ<9½=4¶<1±;/«:,®:-½A5ÐJAÚFBÜA=Ô63Ð51ÖA:ÍC8¼:,²5#½8'Â;(Ä=*Ä=*Ä;(Æ;(È;)Ë:)Ï9(Ï6&Ô2%Ú2%ã5*ê8.ð91ð91ç1&è0&ê.%ï,&ö+'û+)þ,+ÿ-.ü)/ú$,ú$0ÿ'7ÿ$;ÿ8ÿ3ü0õ0ö.9õ3<æ+2Û(,Ü.0Þ02Û+.ñ:>ô7=÷4:ö26÷05ø03ù25ô87Û2-Ú;5×82äE?äEAÎ/+Ñ2.Á"Ô51á?<èE@à=8Ø3-Ù2,Ý60Ý:1Ô:.Ï9*É5'Ç5(È9+Ê<0Ê<2Æ9/ícYáXNÉ@8º4+Â91Ã:0¿6,Â9/¿5+¿5+½4*»5)¸6)µ5(²7(°6)´;0¼C:ÓYTèljïqtöx|þ|„øv€þ|„üyùv~þuÿsÿp}ÿkwüdqý_nöI]ÿ>\ÿ>\êDZÓdj®Žy‹—qz”du’\xŽ]cŠ•k˜p‹•p…”mr‡`mˆ]iŠ]g^lbq•ey›izŸk¬t„±vŠ·|»}¼“¼‚—¼†›»‰›µˆ¢²­¶—¾½¨ÏÆ·ØÇ¿ÖÁ¼Ï¼¸Æ·´»­ª²¤£\L<\L<\L<\L<\L<\L<\L<\L<ZJ:ZJ:[K;[K;[K;\L<\L<\L=\L=\L=^L>]M=]N;^O<^O<\O<_RA]QA\PD[NFZOK[PN]QS]RZ_SgcXvh]…kb‘sk¤vµ‡|¾‰}»—…¿™€·›v©žpž®u¢ºv¥µi˜°Rv¸<D¾5/º4+À<0ËC7ÓE9ÛC8ä?9ñ=<÷:>ù:Aö=Eï@GæAHÙ@EÏ@BÀ;6¸90±8-­9,ª9)­9*»?3ÏF>ÙD>Ø:7Ù75Ú;7ÜC=ÞPFÎH<µ3#¾9(Á:'Ä;)Æ=+É=,È<+É:)Ì9)Ï7)Ð3$Ó/#à5+î=5õ@9ò:2é4+à4&ß5&â2%å/$ì-%î+%ñ+(ó,)ó+-ð(+ò&/ø(6þ%:ý7ü3ø3ô%7ò0;ó5Aò8Cñ<Eð<Eê6?ä,6ð2<ó/;ö.9ö-7õ.3õ.1ó/1í42Ü0,â=9Ú64ß;9ÿmkåCAÒ0.Î/,Ú;8ß@<àA;Û<6Õ7.Ö5-Ö5+Ò6*È6'Ä7&Â6'Ä8)Å;.Æ<1Æ<1Å;1ë`YáTMÉ<3¿0(Ë;3Ñ>6Î;3Ò=6Ø=8×<7Õ>7Ð=3Ê<2Â:.»7*¶6)¹;/¼@6ÏTMãgeíorøz~ÿˆû|ƒû|ƒûyûx€ÿwÿsÿn}ÿgvû`pü[mõG^ÿ<Zÿ>\ìFZÎ`cž~gy…]i„Qh…OmƒRt…X|‰^}ˆ`x„\q‚XazP_Pa…UfŽZn”auœg} j~£mƒ¬r„²tˆ¶x‹¹y‹¹y¹z‘º~”º“±•¬€›©†©­”ºµ¢Ç¹®Ë¸²È¶´¸¬¬ª¡¢ —š[K;[K;[K;[K;[K;[K;[K;[K;ZJ:ZJ:[K;[K;[K;\L<\L<\L<]K=]K=^M=^M=^M;^O<^O<^O<^Q@^Q@]OB[OC\NE\OG]PJ]OO^P_bTmdY{i^‰pgœ}u®…{·ˆ{³™…¸¥…´¯€ª¸z¡ÆxŸÑuœÊdŒÆMlÜFRßA@ÕA=ÏE;ÌH;ÍG;ÓE9ÞA:î@Aõ<Aú=Dø?GñBIãBGÒ@AÆ?<¿?6¶<1°:.¬;-ª9)¯9+¼>2ÎD:Õ<7Õ31à;9åA?áD?èSLßUJ¿:+Â:*¿8%À7%Ä;)É=,Ê=,Ë9*Î8)Î5'Ò2&Ù2)å9/ñ>7õ>8ð50ä/&à5+Þ6)à4*ã1'ç.)ê-)í+)í++ê),è',ë'1ò)9÷'=ø"<ü>û%Aû6Hä(6ç.<ÿP\ÿ`lÿP\ò<Ió9Gè)8í*8ò.:ô0:ñ27ê01á+*Ø)&Ø0-Ú72Ô2/åC@ÿspõVSÝ==åGFÛ=<Ú<9Õ:5Ò80Ñ7-Ð6*Í4&Æ3#¾5#¹6$»8&¿;,À</¿9.À:/Å<4åXQÜMGÊ70Ç0)×<7ÞA:Û<6ß<5ç>9é=9æ?9á@8Ù?5Ï<2È:.Á9-»9,»<3ÌOIàc_ìnoú|ÿ„ˆý~…øy€øy€üyÿx‚ÿt‚ÿn}ýetù^n÷VhóE\ý8Vÿ<ZîH\ÇYZ‹mScrG]uC\yCdzIm~QtVvƒXs‚YoWm‡ZlŽ\o•buh{£n¨q„ªq…«p…­q…±tˆ´u‰·v‰·v‰·v‹·x¸|‘´~«{’¤~›¤‡­«–¾²¤È·¯Ç¸µ´«°¡ž¥—’™[K;[K;[K;[K;[K;[K;[K;[K;ZJ:ZJ:[K;[K;[K;\L<\L<\L<]K=^K=_L=^M=^M;_N<^O<^O<^O<^N>^N>\O?_OB]OB^PE^OJbNYcRecTqdZ}le‘xr¢w¨ƒv¤”}§¬†«Â‰©Î‚žÚw”ál‰ÛXwÚD_ôCUûEQïJPáIHÑF?ËE<ÐE>ÚEAéCCð@Cõ@GõBHíDIßCDÏA?Ã@8¿@7µ=/®:+«:*ª9)®8*»;.Ì?6áFBâ=;îBBëAAÞ;6äKCæXLÕK>ÉA3À8(¼3#Á8(È<-É:*Ë7)Ð8+Î0%×3)à8/é;4í:5í62ë/-å,)ã0,ã1-ç10ì31ó25ö37÷48÷6;ì-5ê,6í.=ñ0Có-Fö)Hý+Nÿ3Rô9Lå3?ð@Mÿ_lÿkyÿWfõCSõ>Pî3Dí3Aí3@ê7=ã99Û83Î4*Ç/$Ë2*Ç.(Ð72òYTö\ZòXVÛAAâHFÜB@Ô<7Î70Í6-Ð9.Ï9+Ê7'Â5#¹8%³8&µ:(¹>.¹;-¶6+¼7.Ç>6ÛNGÙEAÎ50Ð1-á>9ç@:ã81ç51ë20î21ë52ç83á:2Ø:/Ð9.È:.¾5+º7-ÈIBÜ_Yënlû}€ÿ…‰ýƒõv}öw~üyÿx‚ÿsƒÿk}ýdvù^pôRgôF_ý8Vþ=ZïL]ÀTR{`CWh<Yq?[vCfyKn~QtVx…Zyˆ_wŒay•exšhz m|§qªr„«r…«p†©o‡­rˆ°t‰³tˆµt‡µt…·r‰·v‹¸w¶{Ž¬xŽ£x™¤‚­­•À¹§ÎÀµÒÄÁ¼¹À¨ª¶œœ¨[K;[K;[K;[K;[K;[K;[K;[K;ZJ:ZJ:[K;[K;[K;\L<\L<]L<^K=^K=_L=_L=^M;_N<^O<^O<]N;]N;^O<_O?`P@`P@aQAbPFbNPcN]_Pe`Usga…to—{t~r˜v–®ƒŸÌ‹¡Û‚–ær…ëduéPdé;Rù3Lÿ<QûERéDKÕ?@Î@>ÑCAÖEBãEFèBDîAEíCFçDGÚDCËB<À@5»?3±;-©8(©:)ª9)°8*¼:-Ê;3èIEê@@ë=>å78Ú2/ßB;îZPôh[ÛQDÉA3¾4'Â6'È:,È9+Ë7+Ò8,Ñ0&Ú6-æ;4é;4ê40è/,ê-+ë--ó49õ3;ø3=ü3=ÿ1>ÿ1>ý1=ù1>õ0Aò1Dô4Kõ5Nõ/Pö-Qÿ2Yÿ>_æ3HùO\ÿanÿ_nÿWfûP`ôDXé8JóAQê:GÝ2;Ò/2Ê2-Â8-½;+½;+Ä:/À2(ÙJBúkcÝLGáPKÏ;9Ï;9àLJÔ@<Ë81Ê7-Î</Î=,Ë:)Ä;(´;&®;&¯<)³>-±9+¯3'º:1ÊC=ÖGAÙD>Ô72×2.ç;7ì:6è2.í2-ò,-ô,,ó/0ï31è71â:1Ú<1Ò>2Ã6,º4)ÅB:ØYSçjhú|}ÿ†‰þ€„õv}÷xýz‚ÿx‚ÿqÿi{ûbtø]oòPe÷Ibÿ:Xþ@\ðO_ºQNqX:Sf8]uEa|InSv†Y{ˆ]~‹`g‚—l€œl}Ÿm|¤o}¨p€©o‚ªnƒ©l…©lˆ«qˆ®qˆ°r†³r„³oƒµp…·r‰¸t‹µv‰¬tŽ¥wš¨„°´™ÈïØÌÀÛÑÏÂÂÌ«±Á›¡±ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8[L9[L9[L9\M:\M:]L:^K<`J<_L=_L=^M=_N>^O<^O<^O<_P=aP>aP>aP>aP>aP>bOAeMMdMW`O_aUmgb€uq’|v˜€s‘uŽ­€”Ά”ß~‡îqyùclúP[ú;Mÿ*Fÿ2Qÿ<Sñ<Mà<E×@EÕCDÔBCÜCEàBCäADãCEÞDBÔC>É@8¾>3¸>1¯9+¨7'©:)­<*´<,Á=1Ï?7à>;å99ã03â/2à42Û94çNFülaòh]ÙQCÆ</Æ:-È:.É7*Í6+Õ8/Õ4,Þ7/æ93é73é1/è/-î1/õ15ÿ2Cÿ0Eÿ-Fÿ*Bÿ&Aÿ#=ü :ö :÷&Cõ*Gõ.Mõ/Pó+Rô)Tÿ2`ÿAhóCXÿ`mÿhuøVeíKZñL\ëBUÚ2Cë@RÝ7CÉ,3º((±.$«8&¥?&¨@'¸?.¼:,ÝYMá]QÍG>ÍD>Á63É;7äUQÕF@É91È8-Ê;-É:)È9(Ä=)±>)¦<&¨;'¬=*«7(¬3(¼=4ÓLFÖGAÝE@Û96Ü30é73í62ë0+ô1-ý,/ÿ+-ú./ô1/í4/æ81ß;1Ø>2É9.½3(À;2ÏNHáa^÷yzÿˆŠÿƒ‡ùzû|ƒÿ{†ÿx„ÿn~þew÷^põZlëL`õIaü9Wû=YîM]³KHhQ1Qd6\tDf~Nv‡[Žc„h†’jˆ—n‰žsˆ¤t„¦t§r}¨p¨n©mƒ©l…©l…¨n‡«n…­o…¯o°l€²k‚¶n…·r‡³t‡­t§wœ¬…²¹šÈƯ×οÙÒÌ»¿Ê¤¬¿’š­[J8ZK8[J8ZK8[J8ZK8[J8ZK8[J8ZK8\K9[L9\K9\M:]L:]L:^K<^K<_L=^M=^M=^N>^N>\O<aR?aR?aR?bQ?bQ?aP>aN=bL?hOKgNRbP\cWkjfwv•}|œx—‘z–«•Æ„’Ú}…ðuzýklÿY[ýDLÿ0Jÿ5Tÿ<Uô=Qé?LãCMÝBHÖ=@ÛACÜ@CÞ@AÛA?ØC?ÐA9Ç>4½=0µ?1¬;+©8(­<,´?.»@1ÊA7ØC=Ù74á85ß//â30ç;7Ó0'Ñ5)ëUGÿvlë[PÑC7Ê<0Ë;0Ê8+Í6+Ô:0Ú70ß82å63ç32ê01í12ô36ý4<ÿ&7ÿ#9ÿ"9ÿ"9ÿ!8ÿ8þ6ø6ï4î9ð%Bð)Hî&Jñ'Mÿ3^ÿEhÿbuÿ`l÷WcñQ]òP]ïMZâ@MÔ2=Ú=FÌ7;»/.®/(¦5'¢;(œ@'œ>%ª9'²8+ÒVJ»;2Ä?8¾41½2/ÎC@åWSÕHAÈ91Ä6,Æ7)Ä5%Æ5$Ã:(±<(§:&¨7%­9*¬4&¯1%Â?5ÛRJÚIDáHCÞ;6Þ2.ê41í1/ï-+ü22þ,/ý+.ù+-ó++í-*æ1*ß4,Ö8,Ñ=1¿2(¼7.ÈIBÙ\Xówwÿ‰‹ÿ‡Šû€…þˆÿ}ˆÿv„ÿj|þ_s÷XlöTiëE]õD^õ6Uñ:VàLZ¥HC^K-Mc5Wo?b}Ju‹\‚•g‡˜lˆ›nŠ r‹¥vŒ¬z†«wªr~§m|¥i}¥g¥h‚¦i‚¥k‚¨mƒ«m‚¬l®h~°i³j„¶o‰¶s‰¯rŽ¬x™°„­»˜¿Å©ËʵËͶ½Åžªº‹—§]K7\K7]K7\K7]K7\K7]K7\K7^L8]L8^L8]L8^L8]L8^L8]L:]J<]K=^L>]M>\L=[N=]P?^SA`SB_RA`P@bQAcRBdQBdNAdLBjMIhKMdPYf[lnk†x{ž„­‰ˆ²ƒ©¥‡©¾ˆ Ð‚’ây~ðsqújbÿa^ùJWñ>Që:Lî?PïCQê@Mç=Hå>Fâ=Cß<AÚ<=Ö<:Ï>9Ê=4Á;0¹;-¨7'¯A0­<,¬6(¿A5ÌF=Í>6Ð72Ú85Ü71Þ5.Þ6-Þ8*Ý<*Ù:'Ó6%Ù;2ðSLòYQÙE9Ç8*Í>.ÔB3Ò:-Ô3+ðGBá//î5:ó6=î,7ÿBMñ'3ú&2ÿ)2ÿ(2ù'ý!+ÿ'0ÿ(0ñ'ð)ù+7ê .ì&7ñ-Cç#;ÿXsè.Eÿ]lÿ_iþZcùU\õPWëHMÝ?@Î95Ë>7ÄA7·?1¦7&š3"™6#š;'Ÿ<)§8-¯80ÂE?¿;7½31Å75Å54ÔDCÛLHÚKEÑD;È:0Ä4)Ç5(Ì5*Å5*¼</³9,­/#«(³-$Ã9/ÒC;ØH?äOHßE=Ú70Ý0*ç/-ò21ú24ý14÷/2ö/2ö01ó0.ò0.î3.æ5-Ý9/Õ=0¾2%À>1¹=3Ö_Yésqÿû„†û†ŒÿƒŽÿxˆÿh}ÿ_wÿ[uÿUoøLføEcñ:Yô;[æ?YÛ]i‹@;VF-L^6ZuBh‡Nw–]}œcg€¡jƒ¦p‚§q€§n¦m~¥l}¥i}£h£f£f£f~¤k¦mªn¬k~­g®f±gƒ³i‡µm‹´r°v®|š³‰¨¼™±Á¤²Á®¦²°˜žz…‹]K7]K7]K7]K7]K7]K7]K7]K7^L8^L8^L8^L8^L8^L8^L8^K:]K=^L>`N@^N?[N>[N>\P@^RB_SC]QC]OB_NDaOEbPFcOHeNHiMJgNQeS_h_tony~¨€‡»‡ŠÁž•Ê£‹»«£¹xÓyƒê|{ôvjöi`ø]aóOZìFRòHUùJYöFSï?Jë;Eä9Aà:>Ú::Õ=8Î?7È?5À</·<,«<+ª?-ª9)²:,ÁA6Æ=5È72Ô<7Ò50Ö5-×5*Ù5)Ú7&Û:&Û;%Ú9'Ý6.æA;ëMDâJ=Ñ?0Ç8(Ë9*Ô</Þ;4öJFå12ð6;ö6Añ.<ÿ@Nï'4ó$,÷%(û),ý+.ý+.ø((ö(*÷+,ï$'í%(ë&-þ<EÙ)ð4CüATØ&6ÿjuÿbjþZaüW]ôOSåBCØ=9Ñ>6¾8,¿D5¹H6©>,™/)(’(ž-%±;7ÑTRÎHGÃ54Ç56Ñ==ëWUØGBÖGAÐA9Ç9/Æ3)Ê6,Ð7/Ï81Ç=3Â91Â5.Ä3.É4.Ñ83Ø?9ßF@äJBÞA8Ú6-Ý0*ç/-ò12û03û03ô04ó12ô02ó/0ó0.ï2.ç4-ß9-×=1¿3$¾</·=2Ó^Wévsÿù…ˆø…ŠÿŒÿt…ÿf|ÿ\vÿXtÿQoþIhýAbó:Zñ?_âJaÁS\|@8SG1M_9[vCg†Ks•Yxœ_wb{¡f}¤k}¦l|¥i|¥i|¤h{£g{¡d}¡d~ c~¢e|£j}¦n}©l}ªi|«e}­c¯c‚°e…°h‡²kˆ¯pˆ«s‹«|¬„ª‡Œ¢‹x‡€dppR^^]L:]L:]L:]L:]L:]L:]L:]L:^M;^M;^M;^M;^M;^M;^M;^M;^K<_N>aP@`P@]P?\O?\P@[RC\RF[QG\OG\OI_PMcQOfTTgUUhVVhW]h]nmh†sv£|ƒº‚ŠËŠÔ˜‘× ŠÊª‚´¹}ŸÏ€“䄆îvósjüfh÷S\íGQðFQöGT÷EQò@Lî=Gå8>à7:Ù99Ô<7Í@7ÅA4¾?0¶>-­>-ª<+«:,¸B6ÇH?À;2¼1*Ê:2Í6/Ð6.Õ7,Ø8,Ù7(Ù8&Ü9&à:*Ý4-Û4.âA9éOCÞH:È5%É3$Ù@2â>5õHDè13ò5;ù7@ø4@ÿESø2?ð)0ë#&ì$'ù13õ-/ï''ì&'õ12ë)*ö8:è+1â)1ì4@ÿR^ï=Mÿ\iþ^fñV\îOTñNSêEIÝ:;Õ74Ô?9À6,Á?2¼B5³=1¬7-¥3)¡,%œ' š%ª0+ÇECÂ::½--È35×ABô^]Ñ@;ÒC=ÐA9Ë=3Ì9/Ô=4ÛA9Û@;Û@<Ø=;Ý==ã@Aä>>à::ã?=ìJGàA;Ý<4Ù5,Ý2+ç1-ð31õ12ø02ô04ô02ô02ó/0ó0.ï2.ç4-ß9-Ø>2À4%º8+µ;0Ð[Tízwÿ“’û‡Šúƒ‰ý~‰ÿr‚ÿdyÿ[tÿTqÿNmÿFgû=aô<^ïFcßTg¢AHp;3SH2Qa<^yFh‡Nr“Zu˜^tš_xžc{£g|¤hy¢fy¢fy¡cx byŸb{Ÿb}Ÿb| cz¡h{¤j{§h{¨e{©a{©`}«`­b‡²jŠµn‹²s‰¬t†¦w‚žuy“np‡mTdYCOM4@>^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^L8`N:aP<`Q>]P?ZN>YOCXPEXOH[QO_UTbXYeX_iYcl\fk^glaimerok‚ttšy~µ†Ê†ŒÚŽáŽ‡×Ÿ‹Ó­ŠÄ³€©½yŽÌ{Þzò‚wÿsrÿcgùU\ôMTõGQôCMò>Iï;Då6;à88Û97Ô<7Í@6ÆB5¼A1·?.«:*­>-±=0¼F:ÍSHÇH?º5.º1)È91Í81Ô<1Ù=1Û;-Ù6'Ü6&à8+à5-Û2+á>5ìNBäL>Ò:,Ñ8*àB6ß7.ð>:é/0ð16÷4<ü8BÿNZÿLUý>Eò38â$&í/1ê,,ð22è**ë/.ò::Òæ37Ý,2è8BðENÿXcòNWØ=CÛBEáBGá>Cß:>Ý8<Ø88Ó97É83Â91½:2ÀA:ÍNHÖVSÐNNÃCB²72¶95ÏKIÙMLäRSïYZëSRñYVÉ51Î>6ÒB9Ð@7Ó?5ÙB9àC<ãA>é?@ç8=ì9?ò?Cð;@è58ì>?öMJÞ93Û60Ú3+Þ3,å4.í41ò21ô01ô02ö/2ö/2õ/0õ/.ñ1.é4-à8-Ù?3Â6'·5(´:/ËVOï|yÿ“’üˆ‹ý‰þz‡ýoücwÿXqÿOlÿFhÿ@cù9^ô?`éKdØZh…35g;0WO8XhDa{Kj‰Ps’Yt•\u˜^wby¡ez¢fw dv awŸavž`xžaxža{Ÿa| cz¢fz£g|¦fz§dz¨`z¨]|«]~­_†²g‹´nŒ³tŠ­s†¦u€qtŽii€dRbUFSL:G@_N<_N<_N<_N<_N<_N<_N<_N<^M;^M;^M;^M;^M;^M;^M;^M9]K5_M5`P9_P;\O?ZNBXOFXQKYPQ`W\g`hnfsshysg{sg}ri~pm~pq†sv—x|«|Á€†Ò…‰àŒŒæ•ãŒÚ „¿žuŸ l‚¬nq¼vlÒviòsmÿllÿgiþ^`ûRYõFMï;Dê7=ç6<â89Ý98Ö=7ÎA7ÆB5½B0¸@/­7)µA4°;1°;1ÈRHÕ\SÉME»;2¾90Á7-Ì9/×?4Û=1Ú6*Ü4'â6*à3,â70ä=5åE9âF9ÞB3ÞA2â>2Û0&ê72ì0/ï.1ñ.4ú7?ÿPXÿ]dÿZ_ÿOSá.1ã03à,/÷DGè66Ü,,Û-.ë>@Ý36Ù37ÿbhà<CË*2Ê-4Á&*Õ;=á@EÛ6:Ù37ß9=Ü7;Í/0Ì43Í;;ÕGFÞRSì`cõhnôekê]cÄ@>ÆE@å^[øhgÿopÿjjåKK×=;É40Ñ>7ØE=ÖC;Õ>5Õ<4Ø93Ü43ï6<ñ-9ð,6ò.8ð,6ë,3ó9>þJKß3/Ü3.Ú3+Ü3,ã5.ê40ï4/ò21ô02ö/2÷/2ö.0õ/.ñ1.ê3-â7-Ù?3Æ8*´2%²8-ÃNGï|yÿ’‘ÿˆŒÿ‰ÿw…úl|øatûTnýHgÿ>bÿ:_ú6\ôBbàNeÂWaq0,_?0[W>^lIf~Nk‰St’\u–_v™_xžcy¡ex dvž`vž`u_vž_wž_xŸ`{Ÿa| bz¢d{¥e|¦d|§by§^y§\{ª\}¬^€¬a…¯g‡®m‡ªp…¥sƒ r{•nrŠjfwe[k^RbU_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N:^L4_N4`N8^O<[N>YOEZSMZTT_ZahbprmzuŒ}t‘{q“zp“vp’qt“rxšu|¨y€¶{ƒÄ…φ‰Ú‹Þ–Ü™ŠÍ›…¶¢ƒ£ª„‘«~yŸl[›R?¼MBÔQIâWRëWUñSTñKOí>Cæ5;é9<æ9;à;9Ù>9Ó@8ÊB6ÁB1º?/´:-ºA6«5+£.$¾I?Ùg\ÙdZËUK¹=3¸5+À3)Î:0Ù;0Ú6,Þ3)ä6-ß1*å82ã;2Ý9/Þ>0ãF7äB5ß9-ß1(ê5.ð31ð./í*.ô17ÿEJÿV[ÿ]bÿ\aâ9<ã9<Õ+.ôJMâ89Ø00Û57Ì()Ì'+øW\Ý>CÖ:>Ò7=º#Å),Ü<>çBFÝ49Ù,2Þ17Ú16Í(.Ò37äKNöaeûhnùcl÷_kú`løbkÖLLÕNJê\ZêVTçMMãHFÏ42Î3/ÙA<àKDäOHßJCÙ@8×:3×50Ü.-õ1;û,<ø*7õ'4õ)4ø0:ÿ>CÿJLà1.Þ3,Ü3,Ý5,á4-ç4/í4/ñ42ô02÷/2ù.2ø-0ö..ò0.ë2-â7-Ù=1É;-´2%±7,»D>ìyvÿÿˆÿ|ˆÿqöhxö_rùRlùDcÿ9^ÿ6]ú8]ñGdÑN`¢IMe5+VD0[Y@^lIe|NkˆRt‘[w•_x™`{ždz evž`s›]s›\s›\tœ]v^wž_{Ÿaz¡bz¡b{¤b|§b{¦_x¦[x§Yz©Y{ªZ~ª_ƒ­e…¬k‡©m‰§s‰¥u„u|”rvˆpnlfwe_O?_O?_O?_O?_O?_O?_O?_O?^N>^N>^N>^N>^N>^N>^N>_N:aO7aP6aO9^O<[OC[RK\VV_Zakgvso†}z—‚~¡|¥}w£{u¥xv§sy©s}°z¸}ƒ¿€†Ä…‰ÉŽÎ”“Íš“Ç •½«›µ¼¦²É­©Å¦”ªˆmœfJœE2¯A4·A5ÃC:×IEéOOîJKì?Cë=?é;=ä<;ß=:Ù@:Ð@7È>1Á=0½:2¼@8¯4,§1'¼J@ÑbWÖg\ÕeYÃOBº>2º4)Ç7.Ö90Ù5,ß2+ç60ã1-ä50â7/ß8/à>1æD7ä>0Þ4'æ4*é0+ð3/ð..ï-.ò14ó78öBEöMRüY^éFKöQWÙ48ëFJÝ8<æAEÚ7:Ï/1Ñ05äHKÓ8<Ð7:¹#%Õ:>Û8;å;>ë<Aè7=â-6Ý)2á1;ç<DðLSý]eÿksÿjsú`löZhùXh÷[hôddèZVèWTØ@=Ï10Ô44Ó33áC@ãJEéRKêSLáJAÙ?7Ø93Û81ã33ó.8ÿ0>ÿ0>ú*8ü.:ÿ;EÿCHûADã1/ß4-Ý4-Ý5,â4-æ3,í4/ò53ô02÷/2ù.2ø-0ö..ò0.ë2-ã7-Ø:/Í>0µ1%±7,²;5ésqÿŠŠÿ‡ÿv†þi}óató[p÷PjúCbÿ8`ÿ8aù;aìLh¿O[~86]=0NF1VX@ZeEcxMj…RrŽ[v”^yša{ždxžatš]p˜Zp˜Yq™ZqšXt›\uœ]y_xŸ`y _y¢^z¥^y¤\x¤Yx¤Wy¥V{§X©_‚¬d„«h†¨l‡¥o†¢q€™ox‘jr…il~ddv^_O?_O?_O?_O?_O?_O?_O?_O?^N>^N>^N>^N>^N>^N>^N>_N<cQ9cQ9aP<]P@ZPG\TR_Zaa_mtrŠzy™€¨ƒ°}¯{x¯zv±wwµw|¾x€Á}…Ä‚ˆÄ‡ŠÁ¿—–¾ ¼¬§»¶¬µ¾³±È¸«Ð¾¦ÖÁ¢Ñ»–Ьˆ¯oT¯]H¥H6¨>0ÂG?àWQïWVñNOé@Cê=?æ<=ã><ß=:Ø=8Ñ:1É90Á80À<7¹:3·>5ÂPFÇXMÇ]PÑeXÓcUÃM?¼9/Å8.Ò91×4-ß2,ç60æ40â3.à5.ã<3çC7æB6â:-ß3'ê5,æ.&ð0-ð0-ð31ñ54ç-.ã35çBF÷X]öW\ÿouåDJëHMá=Dþ[`ÿouß>Cÿ}‚ÿmqêQTÔ>@Æ02Ö:=óILî9@ñ8@ø=Fò7Bè-8ô>Jÿ]hÿmwÿjrÿcn÷]gú^kÿapÿ]nùYeò\]ãOKâJGÕ74Ó01Ù54Ò0.Ø95ßD?ãJDãJBÙ@8Ó6/Ö4/Ü71é77ê'/ü0<þ2>ø,7ù0:ÿ>Eý>Cì44â3.à5.Þ6-ß4,á3*è3,ï4/ô43ö/2÷/2ù.2ø-0ø..ó0.ë2-ã7-×9.Ñ?2¶0%²6,°61çonÿ‡ˆÿ†Žÿp‚ücxñ\pôXn÷OiûBbÿ9aÿ<döAdäRi°QWd/'WD3GG/RT<U^?cuMiQq‹[v’_z˜b|dxœ_r™Zn–Wn—Un—Uo˜Vr™Zt›\w›]wž]wž[x¡[y£[w£Xv¢Wv¢Uw£Ty¥Xz¤Z~¦_¦c¡d~œf{—frŒ_kZ`vR[nPSfH\L<^N>`P@`P@_O?_O?`P@aQAbRB_O?]M=^N>aQAcSCbRB`Q>dS?aP>]P@^QI[QPZSZeapsq‰yxšz|¥~€±€¹~~ºyy·sr´nn¶twÈv{̀ƒÊ‹Æ–—䤾²°»¼¹´ËƳÓÌ°ÙϬÛϧÜÏ£ÝУ٘٘Ѩˆ¸€g¡_IËzgÖtg½H?×QN÷ccéKLå?Aâ89ä:;â:9Ü75Ù53Ö;6Ë73À50ÇB;¹=3¿I?ÆXK¹OAÎdVÑcTÌXIÅG;Ä:/Í6-Ø7/á6/ã4/ß0-ß2,à70ä=4á=1Ý7+ß5(å7,å0'ê/(ì/+î1-î20ë40è51ã75×79æJNõY]îOT×8=×6<ùX`ÿ~„ÿ~†ÿouÿgoÿipÿflõ\añX[ûZ_ÿU\ÿDLü8Bû7C÷3Aï,:ì0>ñ=IÿS_ÿ`iÿgrÿepþbmÿbpÿ`oý]gíSSßFAÖ85Ö41×11×/.Ø31Û94áD=ÜB:Ù?7Ö<4×90Û81à93æ66ñ7<ô5<ö4<ö4<÷5=ò59ë35ã1/Þ3,Þ5.ß7.á6.ä3+é2,ï2,ö31÷03ø03ú/3ù.1ù//ô1/ì3.å7.à?5Ì8,¿6,¯/&¸;7ÒXWÿ˜šþ{ƒÿm€ÿe|ø\tðPjõHfüCeÿ;dù9`õNlÓRd‰=?R,!C;&CG.KM5Z`DZjEcxMqˆZx’bx–`t•\r–Yq˜Yl•Sm–Rn—So˜Tr™Xt›Zuœ]uœ[t›XtWv Xw¡Wv¢Wv¢Uu¡Tt S{¥[{£]zž^u—[m‹Ub~MXrCSj>AW0?U/<R,\L<^N>`P@aQA`P@_O?`P@aQAcSC`P@_O?`P@bRBcSCbRB`P@fVG_QF]PH^TSaZad`omkƒvw–xz£{}®}¸x|¹rv¶lo´mp·oq¾uvÐ|~Չ‰Ó˜˜Î¨§Ç¹¶ÁÉźÓαÛתâÛ¥çÞ¥æÝ¢äÛ¤âØ£ÛÑ ÔǛӼœ»œ€©|eŋwÊ}m´VJÒa[örpìZ[çKLã?@è>?ë>@è<<â66Û55Ö=8É83Ê?8¹6.¸>3¾LA¶H;È]MÝo`Ô`QÆH:½5)Å1'Ô6-Ü5-Û0)â51å95ä=7à<3Ü8.Û5)ß4*ã5*ì7.í5-í2-ë0+ç0,ã2,á4.Ý52Ö87Ø>@åJNóW[ú]dü\dþ]eÿaiôS[ïNVñQ[ÿ_gÿjqÿflõZ`óNTí4:ñ,5ô*6÷-9ñ'5å!/æ'6í7DþO\ÿZdÿblÿdmÿenÿcmý[hõQZåEEÞ<7Û64Þ63ß55Ü30Ú2/Û62à>9Û>7Ù<5×:3Û81ß82å95ç:6ë8;ë7:í6:ï6;ï79ì57å31ß2,Ý4-Ý6-ß7.â6,å2+ê1,ò1,õ20÷03÷03ú/3ù.1÷//ô1/ì3.å7.ß<3Ô=2Â8.¶1*·74ØZ[ÿ“ûs}ÿfzÿ^vùVq÷NkúGgü@cü<cõAdåOhºLWy<9L2#<:#<B(EG/PT9VdA`rJnƒXu_v’_t“Zq“Wp”Tk’Ok”Pl•Qm–Rp—Vr™Xsš[sšYuœYuWuŸWuŸWtžTržQqRq›Qo—Pl”NiŒLbFXu?Ni6F]/@W+@W-AX.AX.YL<[N>^QA^QA^QA^QA_RB`SCaTD`SC_RB`SCbUEbUE`SC^PCbUM\QM[QRcYbkdtqm„vu•z|¥y{¬y~¶x|»ot¶ei°bf¯jn¸ruÆ}}ׇ„Ý—”Û©§Ø»¹ÏÍÉÈÜØ¿åá´éäªíè¦ðé¥íå¤çá§âÛ§×Ï¡ËÞ¸¯’©››‚l¨ƒp§o`œRGº]UÝmi÷uuï__äJLæAEëADì>@è8;ã68ß=;Ô;6ÔA:Ã81·7,·>3®=/·G9Ûj\ßi[Ú\NÊB6Æ3)Ñ4+Ù6-Ù2,Þ52éA>ìGCà=6Õ2)Õ1'Û3(Þ0'ç4-è1+ç.)æ/)ã2,ã80á=4àB9Æ-(Ê43ÜCEðVXú^b÷ZaóS[ñQ[éGRêHSïO[ü\fÿmvÿntôY_ã>Dé.5ô-4û2<ü2>õ+9í'6ö6EÿKW÷ISõPWøW_þ^fÿckÿ`i÷RYéBIÞ97Þ71ã75é<8ì<<æ95á51Ý60Ý:3Ù;2×90×90Ü71â94è;7é<8ã99ã99æ87è88è88æ66á40Û2+Ú6-Û7-Þ7.â6,ç2+ë0)ó0,÷10õ13õ13ø03÷/1÷//ó1/î3.ç6.Þ:1ÜD9È91»4.µ/.ä_bÿƒˆølwþbxþWsúPmþLlÿBh÷:`ô<`ìIfÔSgDJm<5O>,=<':?(ED/FH0Q\<YkEg|Qr‰[t\qWmRlPiMi‘Kj“Mk”Pn•To–Up—Xq˜YuœYuWtœVrœTpšRm—Mk•Mj”LfŽHaˆE\~AUt;Li3E`-AX*>U'D[/E_2Ic6WJ:ZM=]P@^QA^QA^QA_RBaTD`SC`SCaTDaTDaTDaTD_RB]QEZOM[RWbXcjcsqm†vu•xz£z|­w|´sy·mr´dh¯]d¬ag±lr¾x{ʉ‰ß•‘夡䷴áÇÄÙÕÑÎáÝÄçä¹ëæ¯îê­îè¬éâ«ãÛªÚÓ©Ë¡»¶™¥¥›˜…“ˆv˜q™rc›bW³i`Ïrmû‹‰õutê[]æLNéDHí@Dì;Aé;=ß:8Û<8ßF@ÓC;Ã:0º:/°6)§1#¿I;ÛaRêj]ÛSGÊ:/Ì4)Ô6+×4-Ù42ñMKþZXíKFÚ91×4+Û4+Þ2(ß.&â-&á,%Þ-%Ü1)Ú6,Ù;0×?4Â/'Ä4,Î:8ÙCBßFHàEIäGNêMVñQ]öVbøXdù\gÿgrÿmuôYaÞ;@ë28ñ.4ó.7ò*4ë%2í)5û<KÿS_í?IèAIêGLôQVÿY`ÿX]ôGMå79á53å84ë<9ò@>ô@?ï=;æ95à72Ý82Ú91Ù80Ù80Ü71ä84è96ê;8â:7à;9ã:7æ:8ç98å84à5.Ú3+Ú6,Ü8.Þ8,â6,ç2)ë0)ò/+ö0/ô23ô23÷03ö01ö0/ó1/î3.ç6.Ü8/àF<É91¿4/¶,,îfjÿy€øgtÿbyýTqþKkÿGjÿ=eó6\ë=^ãQhÄXeˆBBgC7[N;GF1AC-JH3EE-LU6Sc>btLlƒUp‹XoŒTmŒPiŽKiŽIhJh‘Kj“Ml“Rn•To•Xp—Xt›Xs›Uq™So—Qk”Nh‘KeŽHdG`‡D]AWy=Rq8Mi6Lf6Oe7Of8Ne7Oi9Sm>UH8XK;[N>\O?]P@]P@_RBaTD_RB`SCaTDaTD`SC`SC_RB^QIVMR_Xhnf{uqŠxt—xw¡xz«y{´tx·lq³bf­\c«`f°kq»v|ƀ„Γ•àŸžä®­ç½¼æËÉßÔÓØÝÛÎáßÆåá¾æâ¼äß¹à׶ÚеÎƱ¾µ¦­§›™™ŒŽ‹…yŠ|q‘vk›si¨meºoi扄î~ósrñedñWYðMNîDGì?Aã:7à;7äB=ÞE?Ì<3Æ=3ÄB5°2$®0"ÈJ<ÜXKÖN@Ì=/Î7,Ñ7+Ñ3*Ö42õUUÿkhú]XãD>Ü;3ß;2ã80â4-ã2,â1+Ý0)Ô0'Í/$Æ/$Â0#Å9,À4'Á2*Ì;6×CAÛEGÛBGÚ@HàDOìP[òVcòVaú^iÿgoóYaáBGê:<í57ï28ï28ò2=÷9CÿHUÿVaå7@Ý4;ß6;ëADùJOüIMõ<Bé13é54ë95ð=9ó=:ó;9ð:7é73ã81ß;2Ü;1Û:2Ü92ß82ä73è64é75æ:8ä;8æ:8è:9è;7æ95á6/Ü3,Û7-Ü8.ß9-á6,å3)é1)ñ0)ô1-ó23ñ33ô23ô01ô1/ó1/î3.ç6.Þ7/àC:Ê70À1-À13økqÿqzúftû]túPmÿBhÿ>gÿ9cõ8^æEdÙZm«V[wE>dJ=aYFTO;KI4PI7GE0GP3M]9ZlFf{Pm…SmŠRlŒMhJiŽIgHgJi’Lk”Rl•Sn”Wo–Wt˜Xs˜Un•Rk’OfKcŒHaŠF_ˆF[‚CY}@St;Pn8Ok:Rl=Xn@ZqCWo?Vq>WqARE5UH8XK;ZM=[N>\O?^QA`SC`SCaTDaTDaTD`SC`SCbUEbWQ^Vckf}yu~z›zy¡xy©vy°txµlq³cj°\c«^d®io¹w}ǁ‡ÑˆŒÓ™Ý£¥à°²ã¼¾åÆÇãÏÎÞÔÓØ×ÖÑÙ×ËÙÖÇØÐÅÒÉÀÌþŹ¹´¨¬£šŠ‹†yztyvquleleŽphŽa[`[ÀsmØyuð~}ü|yÿrqýccöSTðHHðDBê>:á<6áB<Ñ:3ÓC:ÞUKÂ>2¸6)½9,Â=.È?/ÐB4×C5Ö@1Ñ7-Ñ30êLMú_]òWSßD?Ù:4Û81á81æ93ç92ç92â:1Û:0Ñ9,Ê8+Å9*±)¸3"È@2ÓI>ÒD@Ë;:Ì7;Ï:@Ò8BâHRíUaðXdõ^g÷`iêU[ÜAEå@>ì?;ó@CûGJÿLTÿQYÿT]ÿV`á3<Ú/5Ý04è8;õ>Bø;?÷48ó04í42ï96ñ97î53ë20ê20ç40á4.ã<4à=4ß<5Þ;4á83ä73è43è43é99ç98è88ë99ë97é75â5/Þ3,Ü8.Ý9-Ý9-ß7*ä3)è1)ï0(ò1,ñ33ð43ó23ó11ó1/ò2/ì3.ç6.à91Ù<3Ì71Á0-Ñ@CÿnuýkuùbqðRiõHfÿ;cÿ5aÿ6bü?eäNiË]j‹GFjF:^M=^WDYR?RJ7OF5ID0DJ.GU2Sc>^sHgMj‡OjŠKgŒGgŒFfŽGfIh‘Ki’Nk”Rm“Vm”Uq•Up•Rk’OgŽKcŒH`‰E_ˆF_†G^‚E[~DYwAVr?Uo@YpB]rG^uG[sCYtAXs@NB2QE5UI9WK;XL<ZN>\P@^RBaUEbVFbVFaUE`TDaUEfZJi^ZngwupŽ|x™{y wx¦vw­pu¯jo¯ch¬`g¯`f°em¶qxÀ~…͉Ø—Ø™ Ö¡¨Öª±Û´¹ßº¿ßÀÁÝÃÃÛÇÆØÆÃÔÅÁкɼ³Ä»°Áµ¨¼¦˜¯–‹œˆ…Œqqquqppfewhe†nj„_Y•d_›ZT¹f`Ùsoðzvÿ{yÿvuÿhhüZXþRNõHBæ=8èE>Ù<5ÛG=ê\RÅ;0Ä<0½7+¿7)Ç>.ÑC5ÔB3ÔA1ØB4Ò86Ø>>ÝCCÜB@Ö=7Õ81Ø7/Þ7/å:3æ93å:3â;3Û>5ÖB6ÑE8ÌG6®-¼;%ÍJ8ÑL=Æ=5»0+À00Ê9>×AJâLWëU`ðZeð]eìYaßLTÔ>@Ï4/Ø7/à;9ê@@òCHõDJñBIî?Fâ5;ß26ã36ï8<ö8:ø14û/2ü14ï20ð95ñ85ì0.è,*ç0,æ3/á4.ä=5á>5à=4à<3ã:5æ95ê65ì65ì57ì57í57î68î66ë54ä2.Þ1+Þ7.Ý9-Ý9-ß7*á3(æ1&í1(ñ1,ð43î53ñ33ò21ó1/ò2/ì3.ç6.â;3Ò4+Ò:5Æ20èVYÿpxùcnòZiçI`óEfÿ8bÿ0^ÿ5bÿBhÜLe±PYk60^G7XK:UN;[O?VI9M@0JC0@F,AO.L\7Xj@bxGfJhˆIf‹FeŠDc‹BdŽFeŽHgLh‘OjSk’Sn’Rm‘QhNeŒKb‰HaˆG`‡H`‡Ha…H_‚H_}I]yH]wH`wKcxOczN]uC[vA[vAJC3MF6RI:TK<WK=YM?\O?^QAdWFeXGdXHbVF_UIbXNg^Ulcfwr‰zv›zužtsŸss¥tu­lp¯bg©\`©ae¯fl¸ntÀx{ȃ†Ó“Ý—œÞ›¡Õ §Ó©­Ú®³Ý±´Ý²´Û´´Ú¶´Ûµ±Ö³­Ñ®§É«¡ÄªžÂ¥™½™‹¯ˆ~™|x†dbgiefdZ[j[X{c_yVP’c]ŠNF¢UM½`YÖkeñwrÿ{ÿzwÿnjþa\÷TOéGDîOKÝB=ÞG@êWOº,"¾1'¾4'È?/ÑE6Ë>-Á2"Ê7'ÛE7ÞE?Ó97Ì3.Î61Ô;5×=5Ý<4á=4ä;4ä92Þ5.×4+Ï5)Ê8+Å<,Â?-ÕT?ÈG2½<)¿;.Ã=2Ä;5Â74Â35ÜKPÞMRáPUåTYèY]åVXÜMQÓCBÉ5+Ñ7+Ù80Ý52â24å26æ18ç/9ì4>ì3;ó5?û9Bý4>ú+5ÿ(3ÿ0:ø-3ù59ù59ð./ë*+ê1/è43â51ã<6à=4à=4á=4ä;6ç:6ì87ï77ì46ë35ì25í34í55ê41ã1-Û0(Þ8,Ý:+Ý:+Þ8(à4&ã3$é3&ì4,î52î53ð42ð3/ò2/ð3-ì3.ç6.ä=4Í/&×>9Ì35ùbgÿoy÷[iðNcêC]ùEhÿ:eÿ0]ÿ6aöFjÉJ]“BHS-$WJ9SL:OH6[O?UI9F?/HF1>B'@J(IU1Sd:_rEe}Kg„JeˆHcˆCa‰Bc‹DcEeŽJgŽMhPiQlPkMfJcŠIaˆGb†Fb†Hc‡J_€G^H_}I`|Ka{Kd{Mf|Nf~N]xC]z@^{CDB6FD8KG<PG>RH>XJ?]M>`P@`Q>aR?`SB_VGcZSgb_ojnso~wq“zt ~x¨yw©pp¦gg£`a¢\_¤`b­fhµopÀyz̄‚ÖŽŠß–’眙栞ݡ¡×££Ù¤¤Ú¦£Ø¦¡×¥¡Ô¦ŸÓ¥žÑ¤œÍ¢šÉ¢™ÈŸ–ٍ»Ž‚°ƒ|žrn}igld^`g[[kYUrWP}WNŒWO•RI©[QµXPÁZSØkdìyrù‚|ÿ‹†ùuqÿusí_]àQMÙECïZTÌ71Å2*Å7+Ã7(À7'Ã7&Æ9(Ê;+Ñ=/Õ?1Ò8.Ö<2Ø@5Ô<1Í5(Î4(Ø:/ãA6à90á90Þ7.×7+Ï7)É:*Æ?,ÄA/Â=.Æ@4ËE:ÍG<ÌC;Å<4¼3-¶+&Ç<9ÐEBÚPMàVSãYVáXRØNKÐC:Ì;*Ó:(Ü8,â5.è./ì*2ð(3ô'8ú->û,@û*?ý)?ÿ(?ÿ&?ÿ%=ÿ%=ÿ#:ÿ':ÿ+<þ,9ô,6í.5è45ç;9Ý84Ý:3Þ;4à;5ã:5æ95é73ë54ë35ì46ë54é54ç53â5/Ý5,Û5)Ü8,Û9*Ü9(Þ9&ß8&â7%ã6%æ4&ì5/í41ð50ñ4.ñ4.í5+ë6-å7,à90Ø7/Õ:6Û?BÿmwÿbsûSjõIcÿKjö6[ÿ6_ÿ<gø>cçNj¯IV^$#E-!?:'HE4PM<PI9EB1>B1?G/BH&EK%KS.S]8[kDavKd~NdƒJc†F`ˆBa‰@c‹BeFjJlPn’Rl‘LjIgŒGf‹HfŠJf‰Ic…HaƒGe„KdIbIdJgKg€Ie~Ga}C\|=\=_‚B=?4@B7FC<JE?PE?TG>[K>^N>aP>`Q<^SA^VIc[Xhcinlyso†uo•vpžtp¢pm¢gfŸaaŸ]^¡]]¥ed°kjºusÆ}{φ‚ÙŽˆà”Žæ˜“å›–Ùœ˜Õ™Öžš×ž˜Ö—Õž•Ô•Ñž–Ñœ”Íœ“Ê›’ÇšÄ“‰»ˆ®‚xtk|mdif]^f[YiZSoWM{UJŠVKšVK±]S»ZSÁXRÓfaàqjãvoí~wý‡…ÿ‡…÷uuîgdãUSëZUÇ2,Â/%Å7)Ã7&Á8&Á8&Ä8'È;*Ï;-Ó=/Õ9,Õ9,Ó:,Ò:,Ñ9+Ñ9+Ô8+Ø8,ß;1à90Ü8.Ö8,Ï9*È9(Á:&¾9&¾6*Á80Æ=5É@8ÊA9È?7Å<4Ã:2¿4-Ç>6ÒIAØOGÚQI×NFÌC;Ã7*Ë8&Ó8$Ý7)å4,í//ô+3ù)7þ);ÿ+Bÿ*Aÿ)Aÿ'Bÿ#@ÿ!<ÿ=ÿ;ÿ:ÿ ;ÿ%;ÿ);÷-9ð19ê7:å;;Ü94Ü:5Þ;4à;5ã:5æ95é73ë54ì46ì46ì65è64ä71ß6/Ü5,Ù5)Û9*Û9*Ü9(Ü9&Þ9&ß8%á8%ã6&ç5+ê3-ë4,ë4,ë5*ê5*ä7)â8+à<2Õ3.Ó54óTYÿesÿ[pöHcÿIhÿAdÿ;`þ4\ú<bòMmÍLb‰8?S*&A5)7:);<,B@1F@0@?-=B.?H-=CKP'\b<fpKhvRf{Rf}QeNaƒFa†Aa‰@cŒ@gŒFjJkMm’Om’LjGhHgŒGgŠJgŠJf…Je„IfƒKdIdHdHg€Gg€GfEb~A^?_„?b‡B7:/;=2@@8EB;KB=OE<VH=YL<^N>]P?\SD^WMc]]ifmom{sq‰so”qk™jf˜c`•\[”ZZ˜\] \_¦hjµno¿wxʁ҇ƒØ‹‡ÜŠà’Ý“Ó“‘Г‘Г‘ДÑ“ŽÐ“ŒÏ’‹Î•ŒÍ“‹Ç”‹Ä”‹À“‹¾Œ„³ƒz¥€r•|ewu^fi[[e\Wd[RgYLrVH„TH£ZQ½`YÉ\YÐZXÛcbákißokãvqûŠ†ÿŠ†ÿ…ƒÿ}y÷nhîaXÅ5,À2&Å9*Á:'À9&À9%Â9'Æ:)Ì:+Ð:,Ö:-Ô6*Î5'Ð8*Ô>/Ô>/Ï7)Ë/"Ú:.Ú8-Ù7,Ö8,Ð:+É:)Â9&¾7$¾6*À6,Â8.Å;1Ç=3Ê@6ËA7ÌB8Ç:1ËA7ÏE;ÐF<ÒH>ÑG=Ê@6Å7+Ì6%Ö6&ß7,è50ñ03ö-7û+9ÿ*<ü'=ü'=û%=ù#;û!:ú7ü7þ6ÿ8ÿ6ÿ$8ý*;÷2<ñ6=ë8;ä::Ü94Ü:5Þ;6à;7å95ç85é75ë54í36î47í55ê65æ72á6/Ü5,Ù5)Ú8)Ù9)Ú9'Û8%Ý8%Þ7$à7$á6%á5)â4)ã5*ã6(ä7)á7(ß7*Û7+Ý<2Ð1+Ø88ÿkrÿ]nþPiñ<[ÿHlÿ7]ÿ>eù5[î>bæVqª?Qa#(L/+;7+18(37&:;)B=*A<(@>)BB&?FXa4s~T€Œdw‡`i€Tb|MaI`‚E`…@a‰@dAhGjJkMl‘Nn“Mk‘HiŽIgŒGh‹Kh‹Kh‡Lg†KfƒKe‚Jc€HdIf‚Hf‚HeFb€Bc†DeŠEhH25*69.;<4A>7G@:KB=SF>VJ>XL>YM?YQD^WOc^bigron€roŒrm•jg”a_WW‹TUŽWX˜\_¢ac­ln»rsÃ{|΂ƒÕˆ†Ù‹‡ÛŒˆÜŠ×ÒÎŒÏŽ‹Î‰ÏŽˆÐŽ†Ï…Ώ…ÊŽ…Ə…Á‡¼‡¶‰‚¬€xoŒ€_p|Y`lXYd[T^_Q_\IjWF}SEžUL¾\YÑZ\ÙX\äcgçkkáplåxsì|xízuþƒ~ÿ†ÿ{õkaÉ<2Ä8)Â;(À;(¿;&¾:%Á:'Å9(È9)Î8*Ù;/Ô6*Î5'Ï9*Ñ>.Ñ>.Ë8(Ç/!Ò6)Ó5)Ó5)Ó7*Ñ9+Í<+È=*Ä;)Ä:-Ä:/Æ90Æ9/Ç:1È;1É<3Ë=3ÔE=ÓE;ÐA9Ë=3Ì=5ÏA7Ï@8Î;1Ï6(Ù5)ã6/ë31ò/5÷+6ú*:ü)<ù&9÷&;ø'<ø'<û&<ý%:ÿ$:ÿ#:ÿ#8þ#7û&8ù,;÷5>ò9?è8:à87Ü94Ü:5Þ;6à;7å95ç85é75ë54î47ï58î66ê65æ72á6/Ü5,×5*Ù9+Ù9)Ù9)Ú9'Û8'Ý8%Þ7%Þ7%Û5%Ü6(Ý7'Ü9(Ü9(Û:(Ù9)×:+×:1Ñ4/ìILÿoyÿVjùE`ø<_ÿAgÿ3\ÿ8bø>cèMlÅOe€0=J C1-11'/7(5<,>A.B@+B<$E>$IF%U\0p~MŸr—¨|ƒ˜mj„W_{J]~E_ƒCa†Ac‹BfCkJm’Mm’Om’Oo”Nl‘KiŽIhHiŒLiŒLj‰NiˆMg„Le‚JdIe‚HgƒIh„IgƒHd‚DgŠHiŽIl‘L.4(36+891<;6B=9H@=MD=QG=SI=SK>UNDZUQa^eigumklkŠki‘b`XVˆPP†QQXYš`cªfj´orÁuxÉ|Ѓ„Ö†‡Ù‰ˆØŠ‰Ù‹ŠÖŒ‹Ï‹ŠÌŠ‰Í‰‡Ð‰…ш„ÑŠƒÑ‰‚ÐŒ„Ï‹„ȍ…Á†º†°‡¥~w–m…„Yj€SZnVTc\RZaOZ_HeZDxUB“ME¸TRÐSWÛRYå^dèejãklæuqäunâoh÷~vÿ‚{ÿ‡~ôl`É?2Á8(À;(¿<(¿='À<'Â;'Å:'È9)Í7)Ø:/Ù8.Ò8,Ï9+Ì:+É:*È9)É7(Ï9+Ñ7+Ò8,Ñ7+Ï7*Ì8*Ë<,Ë>-Ê<0É=0Ê<2É;/È:0Ç9-Å7-Å5*Ð@7Ð@5Í=4É9.Ë80Î;1Í:2Ì5,Ð/%Ù0)ä20ì25ô/8ù-9û+;ü+>ü-?ü/@ü/@ü/@þ/Aÿ/?ÿ.@ÿ.@ÿ)9ü)8ø,8õ0:ñ6=í8=ã77Ü43Ü86Ü:7Þ;6â:7å97ç77ê65ì44ñ48ñ48ð67í76æ74á6/Ú6,×5*Ø8*Ö9(Ö9(Ø9&Ø9&Ù8&Ú7&Ù8&Õ8%Õ:&Ö;'Õ=(Ô=(Ô=*Ô=,Ô<.Ñ7/Û<9ÿ^eÿaqÿPi÷<[ÿAiÿ4_ÿ5aû1YôKlÛYq•>NZ%-C),:20.1*08-8A0=B,<<"A<UH(`[5|†T¢nª¾¨¾Ž¥wo‹[aK^F`„DcˆBgDl’Go”Np•Po“So“So”Ol‘KiŽIgŒGiŒLiŒLi‹OhŠNg†Ke„IdƒHe„Ih†Jh†Hg…Ge…FiŽKi‘Kl”N+1%.4*470894>:7B=:HA;KD<NG=NG=PKEWSR_^ffeugggf†ba‰[YŠRR†NO‡RS“[^£ei³lo¾uwÊz|сփ…؆‡Ù†‡×ˆ‡Õ‰‰Ó‡ˆË†‡È†…Ë…ƒÎ…€Ð…€Ò‡҇€Ð…ÒŒ…ˍ…ÁŒ…¸‹…«„{v}k{„VcPVnUQd^P[eMYcJb^EsXCOE´WRÐUZÛT[ç\cèagågjèpoìyræqh÷|tósjþxmæ\QÄ;+¾5#Ã<)Á=(Á=(Ã<(Ä;(È;)Ë9*Ï7*×6,Ø7-Ô:.Î:,Ç:)Ä8'Ç;*Ê=,Ï=.Ö@2ÙA4Ö>1Î8*É5'Ê8)Ë<,É:,È:,É;/Ê<.Ê:/È9+È5+Ç5(Ê6,Ï;/Ò>4Ó?3Ö?6Õ>3Î7.É,#Ò+%Ü-*ç02ð39ö1;ú0>ý0Aþ1Bú1A÷1@÷1@ö0=÷/<÷-9ú,9ú*7ü0<ù/;ô0:ñ4;í6;æ69ß55Ú53Ü86Ý97ß:6â:7å97ç77ê65ì44ò59ò59ð67í76æ74à72Ú6-Ö6*Õ7+Ô8)Ô8)Ô9'Õ8'Õ8%×7'Ô9%Ð9$Î<%Ï=&Ï?'Î@(Î@*Ð?.Ó<1Ñ61ëHIÿfrÿOdÿHfû7[ÿAmý/]ÿ7aò4XäQk¼Ufm19E(,E697325426926=-5:#38CCja:…TŸ®w©Á‡µÍ™¯Ç•’®}v”be†Q`‚F`…BcˆBiFo•Jr˜Or—Rq•Up”To”Ol‘KhHf‹Fh‹KiŒLjŒOi‹Og†Kf…Je„If…Ii‡IjˆJi‡Ig‡FjJk“Mn–P(0#+1'14-561764<87@<9C>8IE<HE<KHCRPQ\Zecbtbb|``‚_^ˆZXŠTTŠTT\] gi³psÄvxÍ{Ø~‚Ûƒ„Þ„†Ý„…؃„Ö…„Ô†…у†Ë‚…ȃƒË‚Íƒ€Ñ…€Ôˆ€Õˆ€Ó…Ò‹„È‹ƒ¾‹ƒ²‰‚¤|“wp€wfp~U[|PQnUNf_O^fNZdIaaGq]E‰TF­]TÈZ[ÔVZâ[aæ]déaeîllõzsírj÷xoÞYPå[PÒD8Ã5'Ç:)Å9(Ä;(Å<)È=(Ê;*Î;+Ñ9,Ô6+Ó0'Õ4*Ò8,Í;,Ç;*Â;(À;(Ã<)È;*ÕC4áK=ÞH:Ó=/Ë5'Ê6(Í;,È9)È9)È9+È9)Ê8+Ë9*Ì8,Ë7)Í6+Ò<.×?4Ø@3ÛA7ÛA5×:1Ò1)Ü0.å14ï5:õ6>ù4>ú2?ú0@ù0@ò.<ï/<í/9í07ï-5ï,2ò+2ò+2ö3;ó4;ò5;í6:å57á55Þ65Ü75Ü86Ý97ß:8ã99æ87é77ê67ì46ô5:ô5:ñ7:î79ç85à72Ú6-Ô6+Ó7+Ñ8*Ð8*Ð9(Ð9(Ð9&Ñ8(Ð9&Ë9"È:"È=&Ç?'È@(È@*Ì=-Ï;1×96üU\ÿ^qÿGcÿ?aÿ;bÿ7gþ3bû8`íFdÅNa‰@IT15A55@:<:46?56>6389+6<"9CSZ.‚Q¢§q±ÅŠ¯ËŽ°Ë”¦Ã’®}{™eh‰R_E^ƒ@c‰@iDo•Js™Ps˜Sq•Uo“So’NlJh‹Gf‰EgŠJiŒLi‹Ni‹NfˆKd†Id†Ie‡Ji‰JjŠIi‰Hf‰Ej’Ll–No™Q&.!)/%-2+13.333764:97=<7GD=DD<HGCPPRZZd_ap``z\\~`_‰][YX‘]]›gg­rtÁz|р܁„ႅ䅆ㄅჃہ‚Õƒ‚Òƒ‚΄‡Ìƒ†É„„̃‚Î…‚Ó‡‚ÖŠ‚׌ƒÔ…ÐŒƒÄ‹‚·Š‚«†€œxˆsnuqbewXVtSLjVKg`NbfO_eIcaHp^F{R@ž\N·ZSÆTS×VZâW\êY`òce÷qnðme÷qhÑD;ÔA7Å1%È4&ÔC2Ç8'Ç:(É;'Í<)Ñ;,Ó:,Ø7-Ù6-Ï,#Ï1&Î6)Ë<,Ç@-Â?+¾;'¼7$À4#ÑB2ãOAåOAØB4Ë7)Ë7)Ð>/Ê;+É:*Ç8(Æ7'È6'Ê6(Î8*Ð8+Ó;.Õ;/Õ9-Ò6*Õ7,Ú<1Ý<2Ü71è88ñ8=ø;Bû9Bú4Aö0=ò,;ï,:í/;ê19ê2:ë48í49ñ48ô36ó57ð37ï6;í9<é69â45Þ44ß76à;9Ý86Þ97á98ã99æ87é77ê67ì46ô5:ô5:ñ7:ì89æ95ß82Ø7/Ó7+Ð8+Í9+Í9+Ì;*Í:*Í:(Í:*Ì;(Å9"Ã;#Ã=$Â>'Ã?(Æ?,Ê<.Ð92ß<?ÿ\fÿQhÿIfÿ9]ÿ=hù.]ý<hé;\å\p¦LV^-0G85BC><89?48F37C41=:)>D(GV-bs?›a­½±ËŒ«É‹¦Ã‹º„Ž¬z{™ef‡PZ|@^?a‡>iDo•Jr˜Or—Rp”Tn’Ro’NlJgŠFeˆDf‰Ih‹Ki‹Ni‹NfˆKe‡Jd†IeˆHgŠHh‹GjŠGg‹Ek“Lm—OpšR)/#)/%(-&+-(///3317759:4==5?@8DE@KKKQQYVXe[[s^]}YX‚[Y‹^]–ee£nn¶wxȁ؄…⌌ðŠŠì†‡ä‚‚Ü€€Ö‚€Óƒ‚΂‚ȃ„ǃƒË„„ΆƒÐˆƒÓ‹„ÒŒ„ÏŠ€ÅŠ€¼‰€¯‡}Ÿ€xwozkegf]Xm[Qm[Mi\Lf^Kd_IeaHh`Io_FoP;…UA¦^PÀbZÑYXÝQTîS[ø\`ùggæYRÔD<Î70Ñ7/×:1Ö<0Ô;-Ï9*Í:(Ð:)Ñ:)Ô8+Ø8,Ü5,Ü5-Ð/%Í3)Ì8,Ê>/Å@/ÂA.¾=*½:(Â9)¾0"éWJàL@Ã/#Ñ=1Î</Ç8(Ê;+É:)È9(Ç8'È7&É6&Ë5&Ì4&Ð7)×;.Ü>2Ü<0Ù7,Ú8-â>4ìC<õBEò9?ñ3=ö4?ö2@ñ-;í-:í2=ì7@æ5;ä5:å78ë8;ð9;ò89ï77ò;=î<<ê::æ87á77ß76Ü75Ü75Þ97á98ã99æ::é9;ì9<í9<ï8<ô7;ó6:ð8:í9:ç;9à;5×90Ï7,Ê8+È9+Ç9+Æ:)Ç:)È;)È;*Ç<)Á9#Ä='¾:#¶4¼9%ÉD3Ï?6Ê0.ÿ^eÿVgÿIbÿ<\ø1X÷0Yû6cõBiçYo¬FQo33N3,B?6>C<A89@-1L/3K2.B9(DI)Zm?~š_ž¸w¨Å‚«É‹Ÿ¾‚˜¶€”²~‚ nf„RYwCY{?^?c‡AiFn”Im•Ll”Nk’QlPjIfŠDc†Bc†Bf‰Ih‹Kg‰LfˆKhŒNhŒNg‹Kf‹HeŠGgŒGkIm’Lq›SrUtŸW*0$)/#).',.)//-220561782;<4>?7CD>IIGOPTSUaXYk[[u\Z^]‰db”kj¤utº~·‡ß‹ŒéïŒŒð‰‰ë…„ ؀~ҀρË…†É†‡È‡ˆË‰ˆÎ‹ˆÏŒ†Î†ÌŽ…ÈŠ€¼‰±†|¡‚x‘{s€tknlb`e]Re_Oe^Lf^Kf^Kh]Ij^Hk_Io_Fw_G[C’YF­ZLÇVPÞRSõQXÿX_ðUSãKFÙ>9Õ60Ù6/Ü90Û9.Ö9*Ó7(Ñ8(Ò9)Ô8)×7+Ü5,ß4,Ý4-Î0'É5)È9+Ç>.ÄA/Á@-¿<*¾9(Ä8)Ë<.äREÜH<È4(Ì:-È:,Â4&Ê=,Ë<+Ê;*Ê;*Ë:)Ì9)Î8)Ð7)Ó7*Ö8,Ù7,Ø4*Õ1'Ø1(Þ7.ç<5øDGñ8>í/9ï/:ð0=î.;ë1<ë6?å6=Ü36Ø24ß:8îDDùIIøDCó?>é:7ç98å97â96á77à87á98á98à87â88å99ç9:é9;ì9<í9<ï8<ó6:ò59î68ê88ä;8Ü:5Ó9/Ë7+Æ8*Ã:*Â:*À;*Á<+Á<)Á<+Á<)½9$Â@*»:%³5º;(Á;/Ê94Ø<?ÿ]iÿPdÿA[ù8Wö6[õ;`ô>dãIe«?Lƒ?>`;3G;/:>08>2F<:R>=N0.S8/VK7ciEx[°pÂ}¡Æš¹}š¹€š¶ƒ©xrŽ^YuETp=\{B_@b†@iEm“Hl”Kk“Mj‘PiOgŒGe‰Cc†Bd‡EgŠJiŒLi‹NhŠMiOiMiŽKhHgŒGhGl’Im•Lp›SqœTtŸW-1#-1#,/&,.)//-11/34/56.:<1<>1@B7FGAKMLQRWVVbXWi\Yt^]fdŒpn zx·…ƒÎ‹ßêŽïŽŒíŠ‰ç‡„߁Ó|Ë}Ȁņ…LjˆÆŠŠÊŒŠÉŽ‰É‡ÅŒ„¿‹‚·‰€­†|¡€v{p€ujpoeci_Vc]M_aL_aKc^Jg\Jl[In\Ho]Ir^FwaIxX?‡S=£VDÇXOãUSøOTþMSâ>=Þ<7Ý84Þ71á6/â7/Ý7+Ú7(Ô7&Ó8&Ô9'Õ8'Ø6)Ý5*à3,Ý4-Ë3(Ä6*Â8+Á<-Â>/Â>/Á<-À8*Ã5)ÜI?ÚF<ÕA7Ï<2Ä6*Ä:-¾6&Æ:)Ç:)È;*É:*Ë9*Ë7)Ì6(Î4(Õ9-Õ7,×4+×3*Ú3+ß6/å:3é<8ð<=í49ë07í29î3<í4<ë7@ê=CÝ7;áAAìNMøYUýYWúQNïB>å84â62à72à74à74á85â96ä::ä::á77ä88æ8:é9<ë8<ì9=ë8<ì8;ï58ï58ì57ç77â:7Ù:4Ï8/Ç7,Â8+¿:+½;+»<+»<+»<)»<+»<)¸9&½@*³9$±6$¶;+µ0'Ã40ìPTÿ[iÿJ`õ:Uð7Vñ@]ïHfçHdÍNa‚89e?6R@2DA09=,57)?7,L:0F.$R=,g]Bˆ_“ªt—»{–Áz–¿{”³zœ¸‡š´…€šm]vLHb5Nh8\wB_}?cƒ@hŒDl’Gk“Ji‘JgLgNd‰Dd‡Cc†DeˆFh‹KkŽNkOkOkOkOiMhJgHi‘Hl”Ko™Mu¡Vt¢Wv¤Y24&04&12*01+12-23.34.46+9;-;>-?A3DF9JKENPORQWTT^XVd\Zoda~nl”zw¬„‚ÃŒ‰Ö‘ã”‘ꓐ뎋≅قÌ|Ã|¿€~¿„‚Á†…Á‰ˆÂŠÃŒ‡¿‹„¸‡®…}¤…{€vŽyn~rgmmaak^Vi[Pc\J^aL]aJc^Jh[JmZKqZJtZIv[FsWAxR;ŠQ=«ZGÍ[PãTPðFIñ>AÛ2/Ü5/á51ã60ã5.á5+à6)Û8)Ö6&Ó8&Ò9'Ô9'Ø6)Ü6*à3,Ú6-É7*À8*¼7(½9*¿=-Â>/Ã;-Ä8+Æ3)éUKÔ=4Ì8.ÑA6¾4'À<-À<-À8(Ä8'Å9(Ç:)È9)Ë7)Ì6(Í3'Ò6*Ô3)Õ2)Ü5-ã:3é>7ì?9ì=:è45ì59ï6<î5;ê2:æ39ä7=â<@óUVübbÿmjÿidóVQäA<ß63Þ50ß61Þ71Þ63ß74á85â96å99æ::å78æ89è8;é9<ë8<ë8<ë8<ì7<ï6;î68ê67å97ß<7Ö<4Ì9/Å9,¾9*º;*·<*µ<)µ<+´=)´<+³<(³:'·@,­8&­8'³;-«+"Ã54ý`iÿTdúG]ð<Uë=XèH`áOdÒO_´SZvGAXH9KD2FE1BE0=?*:8#;3?3PG*nkHŠ“f—«v“³tŽ·s‘ºxžº‡¡¸Œ¦l‚[K_:BW0Mb7Yq?`|Adƒ@iŠClFi‘Hg‘IeKeŽJb‡Bc†Bd‡EgŠHjMlOm‘Qn’RlPj‘Nj’Li‘Ji‘Hi“Gm—KpœOv¤Yw¥Zy§\78(68*77-56056167267/68+;>-=@-@C.DG4IK>MNFQPNRRRVUSYX]a_lkius›€}²ˆ†ÇŒŠÓ•’ᔑ␍܋‡Ó„Ä}º|µ|³~·„º‡„¹‰…·Š„²†¨z›€x€tˆznzrfjm_^j[Ti[PjZKfZJb_Lb_Le^Li\Kn[Lr[Kw\K|[HVB‡S>›RA¶VFÏSIÚG@â88å33Ý1-ß3/â51ã4/á3,ß3)Þ6)Ü9(Ó8$Ð9$Ð;'Ñ:'Õ8)Ù7*Ý5,Ø7-Æ:+»9)µ6%·8'½;+Ã>/Ç;.Ç7,Ð90õ\TÖ=5É5+ÏA5¹4%·:(¼?-À;*Ã:*Æ:+È:,Ê;-Ì:-Ï8-Ñ7-Ð3*Ò1)Ö1+Þ71å<7ê>:é<8æ74é77ï;<ð<?ë6;å28ä5:èBFíMOÿxwÿqnñ`[ÝJCÑ83Ñ2.Ü75ç?<à93à93à72à72â64ã75å76å76ç79ç79ê7;ë8<ë8>ë8>è7=ê7;ï8<î7;ê7:ä::ß=:Ö=7Ë;2Ã;/º8*µ:*³;*¯<)¯<*¬<(¬;)¬;)«<)­>+§9(§9*ª8-¬/)Ë@CÿhrôM^óH[îEZåH[ÙO^ÉT]·TWœXUdM?PM:LG4KF2FD-@C(>E$?I$KV.\g<xTŒ˜h‘¤m¨n²r—¹}Ÿ¶ˆ’§€u†dRcC?P0BS1Oa9Wm>b{Be‚BiŠCkEi‘HfHbHcŽGb‡Bf†CgŠHiŒJkŽNmPn’Ro“Sk’Ok’Ok“Mi“Ki“Ik•Io™MržQv¤Yv¦Zw§[?=.=>0==3==5=<7<=7;=2;=/?B/@D-CG.FJ3KL:NOAQQIRRHYWJZZN`_]gerqn‰{y¡ƒ€·‡…ďŒÑŒÓŠÏŠ…ǃº~y¯zw¦yv¥{v¬|y®€|®ƒ}©ƒ}£x—|t‹zrvksrffn`]j\Sk[NlZLl[Kk[Ki]Mg^Mi]Mj]Mo^Ns^Mz]M‚[J“`O›VG§N@¸J=Å@7Ï60Ø1+Þ1+á51â70â5/â4-à2)Þ4'Ü6&Ú;(Ñ9$Í;$Í<'Î;'Ñ:)Ö9*Ù7,Ó9-Â=,µ:(°5#³6$»9)Ä<.É;/Ë7-Ù?7ø[TãIAÎ:0Ì@3¸6&¬3 µ<)½:(¿7'À7'Ã7(Å7)É7*Î7,Ñ7-Ù<3Ü;3ß:4å<7ê>:ë>:ç85ã41è96ë;;ì::è8:ì>@øORÿbeÿppùheãUQÊ=6À1+É40Ö;7ß=;à;9â;5â;5â94á83ã75ä65å55æ66é69ê7:ê7;ë8<ë8>ë8>è7=ê7=ï8=ì8;è8:ã;:Ý>:Ó>7Ê=3À</¶8)²:)®;)«<)©<(¨;'¦;'¥:(¥<) 9( ;) 9*¢6*µ>8ÛTXûepîK\ïI]éJ^ÛM[ÉQZ´VV ZRŠ`RPI7IN:NI5H@+;578>O%Lf6lŠTw”\„›e‰šd›f‘¢l•¬t˜°|ƒ•mn}\Q`C<J09F,AP1O_;Wj=d{EfƒCkŠDlFi‘He‘FaFcŽGcˆCi‰FjKlMlOlOm‘Qn’Rm”Qm•Om–Pl–Ll–Ln˜LržOs¢R{¬]{«_zª^EB1DB3DB6CC;CC;CC;BB6BC3DF0EH-GJ-JM2ON9RQ?TREUSD\ZC\\D\]Oaaaihxrq{z¦~µ†‚¿ˆƒÃˆ„Á„º{®ys¡to—ql•qmŸrn¡vpžwršwr’to…skzrinmb`l_Wj\Qk[Lm\Lo\Mp]Lq[Mq[No[Pm]Nl_Om`Os`O{`O†ZMŸ_S¥QG®A:º;4Æ6.Î1*Ö2)Ý5,â70á6/á4.á4-à4*Ý5(Ù8&Ô9%Ï:"Ê="Ê>%Ê>'Í<)Ò;*Õ9,Ï;-¿@-±<(¬5!®3!º8(Ä<.Ê:/Î5-Ø93ðNIñTMÙB9ÌB5¾?.¦1´=)¿=-Á9+Á7*Ã7*Å7+É9.Ï;1Õ<4âE>ãA<ä?;ç>;é=;é;:è88ç85è96æ95æ66é;<ôJKÿ\]ÿdeøbaÌ=9Á82½4.Ä92ÕA=ÞE@ß=;Ù42â;5â;3â94â94ä84å84ç77ê88ê69ë7:ì7<ë8<ë8>è7=è7?é6<ë6;ê7:æ89à;9Û>9Ñ>6Æ<2¾</³8)®;)ª;(¦;'¤;(£:'¡:'Ÿ:&¡>+–7%œ=+š9)š2'ÃPKíkmî\fìP^éJ\ßHYÎLV¹RS¤[R’cQ€jUJM8DL7JB/H9$B7FH#Up=g“V~²p‚³qƒ¥h}’YƒYŽ–cŠ–d|Œ_XfCHU9:F.7C+:D,=J0JX7Wh>e|FiƒDl‹Em‘Gi“Gd’Ga‘GaGf‹FlŒImNn‘OlOkŽNkOm‘Qp—Tq™Sp™SpšPo›PqPv¢Sw¦V|­^z­^y¬]IC3JD6IE9JF;IF=IG;HF9HG5IH3JJ2LL2NN4RP;TQ>XRDYTA]Y>[Z>\YH_\Udahnk~xs“}x {§ƒ}«…­ƒ|¦}wvooi…lf‚ieˆjf‹mhˆmi‚mh|lfrkbgj`^g]Th\Nh[Kk\Io^Lq_Kq^Mq^Mp]Op]Op]Os]Ow]N~]N‡\L’XL¢VI¤F<«9/º7/Ê7/Ô7.Ú6-Þ6-ß6/à5.ß4,à5-ß7,Þ8*Ø6'Ñ6$Ï:$Ì<$Ë='Ë='Î;)Ñ;*Ó:,Î<-¾A/¯<'©4"¬3 ¶9'Á=.É;/Ì8.Í3+àA;ø[TàI@ÐD7ÊF7«2¹>,ÊF7ÌB5Ê@5Ë?2Î@4ÔA7ÜE<ãF?â@;â=9ã:7ã75ã54ä65æ:8å<9ä?9à=8Ü:5ß@<êLIêQLÓ?;¸)#¸-&¿6.É>7Ñ@;Ö>9Ú;8ã;;ç==â96â96â96ä86å95ç:6é99ê::ê88ê88ë8;ë8;é9<è8;ç6<ç6<é6:æ68â89Þ:8Ö=8Í>6Â<1¹;-®8*ª;*¦;)¢;( ;'Ÿ<)ž;(š;'˜?-Œ5"™=.˜8*—/&Í\X÷y|ßX_æXdÛQ^ÊKT¹LO©VRž`U‘hV€nVPW8?J*<;C=UQ+lvD„¢f”Á~‡¼v†¸s|žbn‚MvR‚‡^u}V[b@;D)4<%5=(=D2<E29C+CP2Vf?g{HiƒFmŒHm‘Gj“Gf’GbGcŽGhJlMo’Po’PlMiŽKjJj’Ls›UsUsSržQržQs¢Ry¥V{ªZv¦Zu¥[s£YRK;RK;RJ=RJ=RJ=RJ=RJ=RK;UN<VO=WP>XQ?YR@[TD\UE\UEc\Ib[IaYLbYRe][kaiqftuj{zn„|pˆs‹s‰|p„uj{mbrf^kc`k`_g_\c_Z^^ZY^YUaZRbZOe[Og[Ki\Kk\Il]Hm]Fm\Hj^HibOqfTqZJuOB‹WJœ\P¤ZM¬ZL¥M?¦E5§<,±7(Â8-Ñ80Ü41Ý1-ß6/Þ7.Þ7.Þ7.Þ7.Û7+Ù7*×7)Õ8)Ô8)Ò9)Ò9)Ò9+Ò9+Ô8+Í;,½;+¶>-®9(©1 «2!¸:+À</Ã9.É6,Î5-Ô:2ÛB:ÝK>ÔH9Â=,·2#ÝPFÙKAÔE=ÕE<ÚG?ÞG@àC>á=;à74æ87í;;ì::ç77ã75à85Ü=7ÙF>ÏB9Ä:0¼3)·1&º6*¿;/Ä@4Ã:0Ê=4Ó@8Û@;â=;ç;;ì8;í9<ç7:å8:å8:å99å97å97å95å95á51á51ä65å76æ89ç9:ç9;ç9;ë;>æ9;ß9;Ü=:ÖA=ÍB;ÁA6·?1©9+£<+ =*›<(™:&–9'•8&9&A0‹<-‡/#’0'¼LJämoçloÍVZ½LNµNOªPO¢UO›XP—]R•aTŠjSWZ/:M7FTa)~ŽP›°o¡¼y¼y’µs~ah€N\mCYdBT]BHP9?D0:=,@C2=A2;?1<C3:C0AN2Rd>]sBgƒHpKo“Kl’Gj’Ii“KgLlPm‘So“Sn“Pm’Mk“Jm–Jm˜IqœMqNrŸNr¡Pt£Rv§Vz©Y{©^y¦cm™ZcPWN?WN?WN?WN?WN?WN?WN?WN?XO@YPAZQB[RC\SD]TE^UF_VGd[Jd[LcYMcYOf[Uj_]nbdpdhreltgpvirvirsfmm`gg[_bXY^[VZZRZWPXULXTIXTH\VH^WGcZKcZIf[Ig\Hi]Gj^Fk_IiaJcbMngTv\MSF›WN«VO°RJ¸RF¯G:¬B2¬=,±9)¾8,Ë80Õ62Ù40Ý6.Ü8.Ü8.Ü8.Ü8.Ú8-Ù7,×7+×7+Õ7+Õ7+Õ7+Ô8+Ô8+Ô8+Î:,¾<.¶>.®9(¦1 §2 ²:)¹>.¾<.Æ:-È5+Ë4+Ò;2ÜE:áOBßQCÝOCÛF?Ø@;Ó;6Ò:5Ö=7Ú=8Ü86Þ44è8:ë78î79ë78ç77å97â=9ÛB<ÊA7À@5¸:,±6'¯5&²8)¸=-¿@1ÅA4Í@6×@7ß=8ã:7é77ï58ñ7<é6<ç6<ç7:æ89æ87å95å95ã:5â92ã:3ã:5ä;6ä;8ä;8æ::å8:ä:=ã:=ß<=Ù?=ÐA;Å@7·=2­<.¥;+ =*œ=+—<)–;(’:&‘8&Œ9'€9'€8)‰5*<5¿QP×eeÑ^a¸NN¬NL¤SOXQ˜\R—^S˜_T˜_VŒfQ`_/Sg*^r3zO™¯n¥¾|™²pˆ bj‚HbvCTe;KY8FP7@I69?158-:</AB4=?4:=2>B4:C0@M3Rd>^tEh„IqMp”Ln’Hm“Jk“Lj‘Nm‘Sn’To“So”Ql”Mk”Hl—Hm˜HpJpJp Ls¢Qu¦Tx©Xx©Xx¨^m—Xb‹QY‚H]TE]TE]TE]TE]TE]TE]TE]TE\SD\SD]TE^UF_VG`WHaXIaXGe]Je]Jd[Je[Oe[Qf[Uh]Yi^\j^^j^`k_ak__i^\f[YbWU_VQZUOWTKUQHRNCQMBSOCWQCXRB^WG_XFaYFc[Fg\Hi^Hk`LicMbaMngUy_PˆXN¢[U±VQ²IE¸E@¹@8¸>3·;/¸8+¼8+Ã9.É;/Ñ:/Ù8.Û7-Ü8.Û9.Ú8-Ú8-Ù7,×7+×7+Õ7+Õ7+Ô8+Ô8+Ô8+Ò9+Í;,À>0·?/­:(£2 £2 ª9'³>,º?/Ä?0Ä8+Å2(È4*Ð<0ØF9ÝN@âNDÙ:6Ø43Ô20Ö42Û97á=<æ<=é;<í9<î5:ë27ê37è58å99á<:Ù@:¾8-µ:+±6'®4%­5%°8(µ<+»=.ÈD7ÐC9ÖB8Ý>8â96è66î66ï79ë6;ê7;ê7:è88è88ç:6æ:6æ;4ã:3ä;4ä;6ã<6ã;8â:9á99Þ88Ý9:Ü<<Ú@>ÓB=È?9º;2­7+¥7(¡;,<+™<+”;)“:(9&8%Š9&z6#y6%ˆ9, F=¹SOÀWT¸PO«IF¢MHœSLšZQ—^S—aW—aW˜_V‹ePsrFzV“¦n¤¹€«À‡ž³{|[[o<@S%AS+BQ0?L2:C06<.69058/9;.@A3=?4:=2=A3:C0@M3Qc=`vGi…Js’Or–Np”Jo•Lm•Nl“Pp”Tq•Uo–Sn–Pl”Ml•Il—Hn™IpLpLpŸNu¤Sx©Xyª[u¦Wq X_‰JVGOx@aXIaXIaXIaXIaXIaXIaXIaXI_VG`WH`WHaXIbYJcZKd[Ld[Je]Hf^If^Kg^Mg^Of\Pf\Rf\Sh]Wh]Wh]Wh]Wg]Tf\RdZPc[P]YNZVKVRFRNBPL@PM>TN@UO?XRBYTA[VC]XDaZGd]Jf_LfaMdcQleUv\OŠ[Q©b\¸ZX¶FD¹<:Ã<9Æ;6Ä92Â8-¾8,½;+¾?.Ç?/Ó9-Ù7,Ú8-Ù9-Ú8-Ù9-Ø8,Õ7+Õ7+Ô8+Ô8+Ô8+Ô8+Ô8+Ò9+Í;,Á?1·?/­:(¢3 ž3¢7#¬=)µ@.¼?-¿:+Ã7*Å5*Ë7+Î:.Î</Ó:2Þ66à24Û12Ý34à88æ<=ê=?ì<?î7<ì38é06æ25å58á77Ú65Î61·5(¯7'®6&¬7&¬7&®9(²:)µ:*ÃA3ÊB6ÔA7Ü?8ã;8æ87ë76î79í6:ë7:ë78ê86ê86è94ç:4æ;4â92á:2ß:4à;7Þ:8Ý;9Ü:8Ù99Ö<:ÖA=ÒC?ÊA;¼<3¯6+¤6'ž7(œ;*˜;)”;)’;(:&Ž9%Œ7#‡9%€=*w6$5'™E:ªPH¬MG©LG¦QLPJ˜UL”ZO”^R•aV–bW—aWgT‰…_™©z­½Ž¦¸ˆ¡ts†YN`8/A->4C&:F.=F3;A3:=2:=4<=5::.?@2<>39<1<@29B/?L2Qc=awHj†Kt“Ps—Or–Lq—No—Pn–Pr—Ts˜Up˜Ro—Pm•Lm–Jn™Jp›KqžMržOt£Sy¨X|«]w§[mSe’M[„HS|DNw?d[Ld[Ld[Ld[Ld[Ld[Ld[Ld[LcZKcZKd[Ld[Le\Mf]Nf]Ng^Mf^Ig_Hh`IiaLiaNi`Oh_Ph_Pj`Tj`Ti`Qh_Ph_Nh_Nh`MhaOd^N`ZL[UGVPBPM>NK:NK:NK:QN=RO>TQ@VS@XWC[ZF]\H^]Kb`Qf^QmWJ†[R­jd¾c`ÁMMÈBCÐ;=Ô89Ó84Ì70Á9-º=+µB-¼B+Î;+×7)Ø8*×:+Ù9+Ö9*Ö9*Ô8)Ô8)Ô8)Ô8)Ò9)Ò9)Ò9)Ò9)Í;,Á?1¶>0«:(¡6"š4›7 £=&¯B-³;*º;,Â:,Ç9-Ë8.Î:0Ñ=3Ú;5é9<ì4<æ39ã28â38â59â38â17é6<é49ç4:ç7:ã9<Û76Ð21À/*´6*­9*­:(«:(«:(«:(¬9&¯7&¸:+Á;/Î>3Ø?7á>9ä;8è96ì87î68î66î66í74ê84è:3ç:3ä<3á:2ß;2ß<5Ü=7Û=:Ù><×=;Ô>=Î@<ÊC=ÅA<º>6­7-£5(ž7(š:*–;)“<)‘;*:(Š9&‰8%ˆ7$ƒ8%ƒ@-u4"{3%“G:¥RJ¡NFžNG¡WN™VM“YMZNŽ]O]R”`U—`YhWˆh˜¤|©ƒ€Žj]kHDS42@&$2-:&0;*5=.9?3<?4=>6;<4::099-?@2;=28;0;?18A.>K1Pb<bxIk‡Lu”Qt˜Ps—Ms™Pq™Rp˜RršSršSršSp˜Om—Mm—Kp›LsžOt Qv¢Uz¦Y{©^z¨_qŸVd’JZ‡D]†JW~GRyBe\Me\Me\Me\Me\Me\Me\Me\Me\Mf]Nf]Nf]Ng^Og^Oh_Ph_Nh`Kh`IiaLjbMjbOjbOjaPjaPjaPiaNh`Mh`Kh`Kh`IiaJjbKf_Mc^K^XHXRBSM=MJ9KH7IH6LK9LK9LM;NO=PQ?QTCSVETUE[YL^VIcQEzXN¡ha¶eaÄVU×RSÙ@Cß:>Ü87Õ83Ç;.¼?-±C*µA(Ê;*Õ8)Õ8)Õ9*×:+Õ9*Õ9*Õ9*Ô8)Ò9)Ò9)Ò9)Ò9)Ò9)Ò9)Í;,Á?1¶>0«<)£:%™6•5›;#§A*²B.¹@/¿;,Ã7*Å2(Ë4+Ò;2ß=:ê5:î3<ë6=é8>ç:>ä;>ã:=â9<ß58á7:â9>á=>Û=>Ñ96Ã2/¶0'¯9+«=,«=,©>*ª=)©<(©:'«8%¯6%¹7)Å;0Ð=3Ù<5ß<7ä;8é:7î87ï75ï75î85ë:4é;4ç<4ä=4â>5à?7ÞA:ÛB<ÙA>ÕA?ÒA>ÍB?Á@:»@8³=3ª8-¡5(›5'™9)•<*‘;*<)Š<(‰;'†9'„7%ƒ6$€7&}:)t3!~9*—OA£YNœRG•OE™WK•YN[MŽZMŒ[MŽ\Q“_T™`Y‘gY~wZyƒ`r|ZVaC;E,/;%0;*0<.3=25=27=3<?6@A9?A6<<277+89+>?1:<17:/;?17@->K1Oa;bxIk‡Lu”Qu™Qt˜Ns™PršSq™Rs›RsœPr›Op™Mn˜LpšNsžOv Ty¥X{§\|¨]z¥]qŸWg”O]ŠEX‚BaˆOY€ISzCg^Og^Og^Og^Og^Og^Og^Og^Og^Og^Og^Oh_Ph_Ph_Pi`Qi`OjaPjbOjbOjbOi`Oi`OiaNiaNiaNh`Kh`Kh`Ih`IhaGhaGhaGg`Me^Kb[I\WDWR?PM:MJ9IH6IG8GH8GJ9GK:HL;IM>JN?KM?PPDXRF[OCiRD„ZN–VL°SLÔZYÛIJâ?Bá99Û75Ï;1Ã@.´B*³<$É:)Ó7(Ô8)Ó:*Õ9*Ó:*Ó:*Ó:*Ò9)Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Í;,Á=0µ=/¬=,£<)˜8"4•9 ¡A)­C-¶A/¿=/À6+Â/%Ç0'Ð6.Þ:8è59î6>ì;Cê?EæAEàBCÜ@AÚ@@Í12Ð66Ó:<Ñ=;É;9¿82µ4.­4)ª<-¥>+¦?,¥@,¥>+¥?)§<(©:'ª7%²7'¼8+Å8.Ì70Ô94Þ<9æ>;ë97ì95ì95ë:4é;4è;4å=4â>4àB7ÝC9ÚC<ÕB;ÏA=Ê?<Ç=;¿>9±<3©:/¢8+œ6(˜7'”8)‘:):(‹<+‰<*‡<)ƒ:'‚9(€7&6%}6$x3#x5%†C3˜UEWK”PCPD•ZL“YMZN\Q\Q’[T•\U™\WŽbUkbEWa>JS4=G,6?*3>-4>35?66=69@9=B;CF=EG<DE7@>/;9*78*=>0:<17:/:>07@-=J0N`:awHk‡Lu”Qu™Qt˜NtšQršSq™Rq›OrNqœMp›LošKrNu¡Ty¥X«`~©az¥^qœWf‘L_‰G]‡E^ˆIb‰PY€KSzEh_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Pi`Qi`Qi`QmdUlcTj`Ti`Qh_Pg^Oh_Nh`MiaNiaLiaLiaLiaJh`IhaGh`IgaKg`Me^Kb[H\WDUR?QN=ML:EF6EH7DH7DH7CJ:CJ:CI;EI;IK>SQDSPAYM=eK:tD6–I?ÂYSÙQQàDEà::Ý86Ö<4Ê>/º=)µ8"É:)Ð9(Ñ:)Ò;*Ò;*Ò;*Ò;*Ò;*Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Í;,Á;0¶:.­<,§>+™9#3“7Ÿ?'¢9$¯<)À>0Ç=2Ì8.Ð7/Ù<5ã?=ì=Bî?DêDHâDEØ@?Ê;7À50»1.¹/,¼51¾:6»<6³:2ª8.¤8,¢;,¡=-Ÿ>+ ?,¡@-¡@-¡A+¤?+¦=*©;*­:(²7(¹5)¿5+É90Ö>9àC>å<7è;5è;5ç<4å<5ä=4á>5Þ@5ÜD9ØD:ÒC;ËB:Ä?:½<7¸85°93¢7-™8(•5'“6'‘:)<,‹:)…8&ˆ=*…<+ƒ<*€<)~9)}9&{6&{6&z2$€8*ŒG8–SC“QC‹L=RE™_S‘WL’YN”[R–]T—^W—\V—ZU_RaX;EM(6@8A&>G2=F53=2-7.1817>6CF?GJ?HI;BC1?>*;:&78(=>09;069.:>06?,=J0N`:awHj†Ku”Qt˜Pt˜NtšQršSq™RqœMqœLqœLp›Kp›LsQw£V{§\‚­e€«dx£^j•P]‡EYƒA^ˆHePcŠSZLSzEi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi_Sj`Wj`Wj_Yj`Wj`Vj`Vj`TjaRjaRjaPjaPjbOjbOjbOjbMjbMg_Jg_Jf^Ie_Ib]IZXCSP=ML:HI9EH7AE6@D5?E7?G8?G:?E9GK=IK=BF5KJ6\Q;aE0ƒJ9Àj]Üg`ãUSäFEåA@àA=Õ>5É=0Æ=-Æ4%Ë5&Ì6'Í7(Í7(Ï9*Ñ;,Ó=.Ò<-Ò<-Ò<-Ò<-Ò<-Ñ;,Ñ;,Î<-»2(¶7.±;/¨:+š7"”5–7!œ9"¬A-²=,º6)Á4*Í6/×<7âC?êHEèDEéFIÝCEÉ98»61´80®8.©4+«9/©9.¥9/ 8-›9,–:+“<+’<+–=+˜=*™>+š?,›@-@. A/£@-¢;*¥:(«7(°6)·7,¾8-Ä;3Ì<4Ú=6à=6â?8âA7âA9ÞB6Ú@6×@5ØH=ÒE;ÇA8»;2²5/ª3-¦0,Ÿ2+™9-’;*‘9+:*‹:)ˆ;+‡;+…<+:(~9)|9(|9(|9){8'y6&x3$w- ‹A6˜PD“MAMA’RF“UJŽRG˜\R˜\R‘UMTL—ZU•XS”WT“gZ]T5IQ*BL)BK,>H/7@+2=-4?14<1HPCZ]RX\MMO:DF.AA'==%:;)>?1;=2:=2<@24=*;H.Pb<cyJlˆMu”Qs—Or–Lr˜OršSršSrNqžKrŸNrŸNsŸPv¢U{§\ªb‚­fx£^j“O`‰G^†Ga‰JfŽPi‘Ud‰S[€LTyEi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi_Sj_Yj_]j_]j_[j_Yj`Wj`Vj`Tj`TjaRjaRjaRjaRjaRjaPjaPh`Mh`Kg_Jg_Jd]J^YEUR?ON<IJ:FI8BD6@D5?E7?G8>F9=E6@F8>E5>F1HL5MI0U?(P<¼sdËcXà_ZëUTçGGâ@>àA>Ø?9Î:0Í9-Î<-Ï=.Ï=.Ï;-Î:,Î:,Î:,Î:,Ð:,Ð:,Ð:,Ð:,Ð:,Ð:,Í;.É?5ÅB:¼B7®=/ 7$—2–1›2¦5#²7(Ã:0ÑA8ßF@æIDéJFêKHÞCAÛEDÐB@¼;5¯9/©=0¥?1Ÿ=0Ÿ?1œ>2˜?1”>/=/Š>.‡@.ˆ?,Ž=*‘;*’<+“=,”>-–?,—>,š?-Ÿ@.¡=-¤:,¨8,¬8+±8-¶:0¼9/Ê;3Ð;4Ó?5ÖB8×C9ÖC9ÔD9ÑE8ÊB6ÃA4¹=3°:0©6/¢5. 5/™7.’</Ž=,Š;,ˆ;+†:*„;*ƒ;,<,~;*|:*{9)y:)x9(w8'v7(w3&€3)C9˜NC’L@K?QD’TIRG“SJ˜XO–UO˜WQœ]X”WRŽSOŠcT]W5Xc9_jBZfBIT66C)4@*<H4OXGYbQ_fTW\HJN7BE*??#<<"9:(=>0:<1:=2<@25>+;H.Pb<dzKlˆMu”Qt˜Pr–Ls™PršSq›QsŸPrŸLpLqNt Sx¤Y|§_ªc{¦aošUcŒJ]†Da‰JeNeNc‹Od‰S[€LTyEi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QjaRjaRjaRjaRjaRjaRjaRj`Vk`\k__k__k`^k`\k`ZkaXkaWkaWkaUkaUkaUkaUkaUkaWkaUlaOk`LiaNiaNf_Ma\IYVESR@NL=IJ:DF8AE6AE6?F6>D6<D5=F57@-@H1IK3F?%W?'‡ZE·rb¶VJÙbZðebìTSæFHëGHæCDØ:7Ë7-É:,É:,É:,Ë9,Ê8+É7*È6)Ê8+Ë7+Ë7+Ì8,Ì8,Ì8,Ì8,Ë8.Â3-¾5/¶6-­3(¡0"ž. ¡2!§3$¼@4ÇC7ÖG?áLFèMIéJGåFBßD@×CAÏDAÁ@:±;1¤</¡A1œE4˜E3“D3‘D4B2ŠA0…@0‚A/~A.@.†:*‹9+‹:)Š;*‹<+<+Ž=,<*–?.—>.›;+9* 8+¥9-©:/®9/¸90¾90Á;0Ä>3Å?4ÅA5ÄB4ÁA4¶:.²9.ª8-£7+ž6+š8-™8/“;1‹</‰=-‡;-„;,‚:+€;+€;,<,};+{<-z;,w9*v8)u7(t6)v4&‰<2•G=–MDŽJ?ˆH<OB’TI’RI’OI™TO™SQ˜WSš]Z“YU[Tl[}X~‰^€‹alxRLX46D#=J.LZ@eqYeoW]eMPU>EI0?C(=@#:<$89';<.9;09<1=A36?,<I/Pb<dzKm‰Nv•Rt˜Ps—Ms™PršSpšPu¡Rp LnLpŸOw£V{§\}¨a|§bp™UfM\„E\„EeŒMj‘RfN`†IcˆRZLSxEi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QjaRjaRjaRjaRjaRjaRjaRj`Vk`\k__k`^k`\k`\kaXkaXkaWkaUkaUkaUkaUkaWkaWkaXkaWnbRmbPjbOjbOibPe^L_YIYVERO@NL=GH:DF8CE7BD6@D5<C3<H4:C.FJ3MG1P;&qL9šbQª^P¡A5ÆSLçc_ð^^íSUëJOéDJáACÌ92Ç9-Å7-Å7+È8/Ê:/Ë;2Í:0Ñ>6Ñ>4Ò=6Ò>4Ó>7Ó?5Ó>7Ó>7Ô@<ÑB>ËD>ÆF=ÂF<ÃI>ÇM@ÏQEÙSJÞQJâMGäIEäFCâC@àA=ÛB=ÕFBÉD?º?7«=0 ?/šC2–H4”I6ŽG5ŒE3ˆC3…B2€A0~A/zA0}@.‚:,†8,…9+†:,†:,‡;-‡;+ˆ;+‰:+‹9+9*‘9+•9,™;/=1¢<0ª:/®9/°:0°:.°:.°:.¯9+®8*¨7)¥7*ž6)›6*•7+’8-9/Ž<0ˆ<.…<-„;,‚:+:+~9*~;+};+|:,z;,y;,w9,s8*q6(r7)u5)ŒC:“I>‘KAŠH<‡I<‹OD“TK—TL–QL›TP—QO”SO–\X_Xh^—~j””p…’fu‚WYh?CR+?O*P`<crQetU\iKOY>CL1?E+?C*=A(;='78(:;-79.9<1=A37@-<I/Oa;bxIlˆMv•Ru™Qt˜NtšQršSpšRsŸPožMožMs¢R|¨]~©ay¤_tŸZcŒJ^‡E[ƒD_‡HgŽOj‘PfL`‡Hb‡QY~KRwDi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QjaRjaRjaRjaRjaRjaRjaRj`Tk`Zk`\k`ZkaXkaXkaUkaUkbSkbSkbSkbSkaUkaWkaWkaXkaWocUnbRkbQlcRkbShaQd]M`ZJXRBSP?NK<IG8GE6DE5BC3>B1=I1?G/LG1S>+g=-•ZL­_Sœ>6ž3+¶?9ØXUóigödeëRUæJNéOQÚIDÓF<Í@7Ë>4Î?7ÒD:ÕF>×G>ÚJBÚJAÝJCÝJBÞKDÞKCàKEàKEëSRèTTãSRÝSPØSLØSLÛTNàUPàLHáIFãEBâC@äB@âC@äEBáGEÙIHÊDAº?8¬=2¡?2˜B1’E3G6ŒE3‰D4‡B3„A1?1~?0z?1|>/€:.ƒ9.ƒ9.„:/ƒ;-ƒ;-ƒ;-ƒ;-9*ƒ:+…9+‡9,Š;,;-<.“;/›:1ž9/Ÿ9- :.¡9,£9,¢8*¢8*Ÿ8)œ8)˜8(”8)9*Œ:,‰:-‡;-„:-‚:,9+€8*}7+}7+|8+{9+{9-y9-x:-v8-q5*o5)q7+u:,ŒH=ŽJ?‰I=„I;†L@ŒRF“WM˜XO˜QMœUQ—SP“VQ’`YˆbWƒg[‡{enrQZi@JY0BS)IZ0WiAdvNj|VXiEL[:?L.:D)<D,>C-<A-:=,89+:;-79.9<1>B47@-;H.L^8_uFi…Jt“Pt˜Pt˜NtšQs›Tq›SnMmžLp¡Pw¨Y|ªaz¨`rXj”RZ‚C\„EaˆIeŒMgŽMgŽKhJg‹Mc†PZ|JRtBi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QkbSkbSkbSkbSkbSkbSkbSkaUlbYlbYlbYlbVlbVlcRlcRldQldQldQlcRlcTlbVlbXlbXlbXocUnbRkbQlcRlcTkbSg`Pd]M^WGYSCTN@MJ;KH9GE6FC4@C0;H.@I.OC-a=-„F;¬YS´NJž,+²:9¹=;ÓSRòpnþvvñehëX^ð^_ãUQÚPFÔG@ÐC:ÑD=ÔG>ÖGAÖG?ÙHCÚJBÜHDÝJCÞJFÞKDàKGàKGâHHáGIÜHHÙGGÖHD×HDØGDÛEDàBAäABæBAèBBçCBäB@ßA>ÛA?ÛIIÊC@¹<6®>3¢@3˜@2‘B3D3ŠA2ˆ@1‡?1…=1‚<2€<1=1=1;1;1ƒ:3;1;1€</€<1=/€</€=-;/€;,‚:,9*‚8+ƒ7*Œ:/8.‘9-”:/—;.š=.ž</Ÿ>.™9+—:+“:*:+Š;,‡;+„;,‚:+‚:,9+€8*~6(|6*{5){5){7*x6*x8,x:/s7,p4)o5)r:-w=1‹M@‰K>„J<‚M=…QC‹VH’XM—WN“NI™TO—VR–]Ve\|_QgWGa_HEO->N)?O(J\4_qGk}SewMXj@N`8@Q-6E&7D*=F1=D4:>07;-8:,:;-68-9<1?C57@-9F,I[5[qBeFr‘Ns—Ot˜NtšQs›Tq›SmœLo Os¤Ux¨\z¨`s [i“Q`ŠHYB^†GeŒMgŽMgŽKeGgŒGhŒLa„NXzHQsAi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QkbSkbSkbSkbSkbSkbSkbSkbSlbXlbVlbVlcRlcRldOldOldMldMldMldOldQlcTlbVlbVlbVocUnbTkbSlcTmdUlcTjaRf_Ob[K^WGXQARL<OI9JG6IC3AC.:G+AG+U@+xG9£PLµII¶8<º39ÈAEÄ?@ÊJIßb`ñqpðnpçbeâZ\ÛQNÕLDÐE@ÍB;ÏD?ÐE>ÐB>ÎA:ÒC?ÒC=ÔC@ÕD?ØDBØD@ÙECÚDCÝADÝADÞDFÝGHßIHàJIãIIåGHçACêADìBEìBEçAAâ@>Û=:Õ=:ÚFFÉ=<¹83±<3¦@4˜?1>0ŽA1‹=0Š<0‰;1ˆ:0‡81†93…:4„;4;3€<3:4€<3<3}=1}=3}=1|<0|=.|<0|=.};-{9)z8*{7*‚:.ƒ9.‡9-‰9.9.‘;.•<.–=-’9+‘9+Ž;+Š;*‡;+ƒ<*€;+<+€8*€8,~6*}5)|3*z4*z4*y5*u2)v6,w9.s7,p4*o5*s:/x@3‹QC†N?N=‚Q@…VF‰XIŽUJ’SJ“PJ—SP”TR‘\VŠe\n[LPK7EK1AN0KZ9WgC`rLgyQdyNWm?G\1AV-7H$1@!6B*=F5<D98=67:3:</;<.79.9<1?C57@-7D*FX2Vl=b~CoŽKq•Ms—MtšQtœUrœTožPq¢Qu¦Wv¦Zs¡Yj—RbŒJ[…E^†H`ˆId‹LfLhJhHf‹Fe‰Ia„NXzHPr@i`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QkbSkbSkbSkbSkbSkbSkbSkbSlbVlcTlcTlcRldQldOldOldMldMldOldOldQlcRlcTlbVlcTnbTmaSmaSmaSmdUlcTjaRg`Pd]M_YIZSCTN>PK8MH5ID1DC.@F*DD([B,ˆTG¯\X«>A¬/5ÎHOÆ?EÀ;>»;<ÄFGØZ[ãefÛY[ÌGHÑJGÐEBÍB?ÐB>ÔFBÔFBÐB>Ê?8ÏD?ÏD?ÒD@ÔEAÖEBÙECÚDEÝDFß@Dß@DÞBEßEGáGGàFFàBCá>?èBDé@Cè>Aç=>ä>>ãA?áC@ßEC×CCÅ98¶50°;4¦?6š>3=/?2ˆ</‡:0‡:0‡81‡83ˆ94ˆ94ˆ;5ƒ:3;3€:2;2~<0~<0|<0|<0x8,y9-z:.z:.{<-{<-{<-z;,};/9/€7.7,„6*…5*†7*‡8)Š8,ˆ9*ˆ:-…<-„<-€=,~<,~<,}9,}7-|6,z6+z4*x4)y5,x5,r2(u5+v8-r6+n4)n5*t;0xB6ŠUG„PB€O@„SDˆWIŒVJŒRGNG•TN”UP‹RK„XOcUf[GGK2=J,IX9ZkIhyUcxQZoFSh?Pb:L^6;M'0?,95>)=C5<B8:<79<5;=2;=079.9<1?C58?-8B)EU1Uh;azCnŠMr“Nr–PtšQtœUrœTt Uu£Xv¤[t¢ZošUfN^ˆH[ƒDdŒNa‰K`‡HcŠIhJiŽKeŠEa…E`ƒMWyGPr@i`Qi`QjaRjaRjaRkbSkbSkbSjaRjaRjaRjaRjaRjaRjaRjaRkbSkbSkbSkbSkbQkbQkbQkbQlcRlcRlcRlcRlcTlcTlcTnbTk]PqaRtdWrdWnbVj`TiaThbTgaSebQc]M[VCVQ=TO9NH2GA)KG,I?$bI3‘gW¶wn»lg¸YWºPR¶BE½BEÂDGÈFHÍHKÐJKÒLMÓKMÏEEÐDEÓEDÖFEÕFBÒC?ÏD=ÏF<ÈC:ÉD;ÊE<ÌE?ÒD@×CCÚADÜ@DáBFâBDáCDàDEàDEáEFãEDåEEãCCäB@ä@?ä@?ã?>â@=ãA>ÞC?ÓB?Æ?;¶93ª70Ÿ:0—=2Ž>3†>2ƒ?4?3=2‚<4„93‡83‰92‰92…;0„<.„<.ƒ;-;/;/€:.€:.~:-~:-~:/~:/};/};/};/};/}90}90~80~8.€7.€7.8/9-€7.~8,}9.|8-{9-z8,x8,x8,x8.x8.w7-v8-v6,t6+s4+q5+m1'r6,n4)m3(o6+m4)q8-I=…SH…SJ‡RJˆQJŒQIQJ”QK•RL“TMTK‰[N‚cQo_HYU:MV7O_;j~YbxQZpIUjCRg@Pa=IY5DR17D&4>#2:#6;'<>0@B5@@6<>358-39-4:.7:/9=/;?.=B,=F)S_;arFo†RtUt”St™SsSsUužZ~§e~§ep˜YbŠK^†G^†H^„G`†IbˆKdŠMfNeŒMcŠK`‡H^„G_‚LTvDKm;i`Qi`QjaRjaRjaRkbSkbSkbSjaRjaRjaRjaRjaRjaRjaRjaRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTnbTrbSqaRrbUrdWrfZogZmeXjdVgcWliZmjYjgThcOc^H[U=RL4PJ0I?&Q?)kP=XI„NDˆE=”EA±WV¸TT»QQÀPOÀNMÁMMÃMKÃKJÐTTÈHIÂ=>Ä=:ÐB@ÓEAÏB;Ç>4ÊD9ÊF:ËF=ÎH?ÔFBØDDÜAEÝAEáBFáCDáCDâDEâDEáEFáFDáFDàEAßD@àB?ßA>àA=àA=àA=ÞC?ÔE?ÈC<º>6­;1 90”:/Š<0ƒ=1€@4~@5~>4€=4ƒ:3†91‰92‰90‡;.…<-…<-„;,ƒ;-‚:,‚:,‚:,9-9-9/9/~:/~:/~:/~:/}90}90}90}90}90}90|90|90|90z:0z:0z:0y9/y9/x:/w9.w8/w8/t8.s7-r6,r6,p6+p6+l2'q7,m4)l3(o6+l3(p7,~H>…SJ…SLˆQLŒQMQL”PM•QN”SOŠOG…SH€[K{ePsiPilMfqQgxT^tMXnGPf@Ma<M^<L[:HU7CP4<F-8@)6;'7:)<=/?@2>>2;=04:04<15;16<0:</<?.>A,>D*Q[9^mDnSsŒUr’Ss˜SsSsžVxŸ^|¢exžak‘T`†I]ƒF]ƒF\‚E`†Ia‡JdŠMe‹Ne‹NbˆK`†I^„I]LRtBIk9i`Qi`QjaRjaRjaRkbSkbSkbSjaRjaRjaRjaRjaRjaRjaRjaRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTnbTqcVo_Rm\Rm_Tqd[og\ldYhbVfbWqma{xi~{j€zj{vcqjWe`JTT<FF.C>(MB.S@/U8*]7*m>4n70u91~;5‡@:’IB›PJ¥XP­ZT»^YºRO·GE¼B?ÈDBÑEDÓD@Ï@:ÐA;ÐC<ÒE>ÔG@ÖGCÚFFÛEFÞDFßCDßCDßCDàDEàDEßEEßECàFDßECÞDBÜD?ÛC>ÜC>ÜC>ÜC=ÛC>ÖE@ÏF@ÃB<³>5¢:1”8-‰9.ƒ;/‚@2?3~>2€>2ƒ:1†91‰92‰90‡;.…<-„:-„:-ƒ;-‚:,‚:.‚:.9-9-~:/~:/~:/~:/~:/~:/}90}90}90}90|90|90|90{8/{;1{;1z:0y;0y;0x:/x:/x:/t8.t8.s7-s7-r6,q5+p6+o5*j1&p7,m4)k2'l6*i3'm7+}G=‡PIˆQJŒQKŽQL‘RM“TO”UP‘VPŒWO‡[P~^OtbNoiQorUlwYfwUQeBK_:EY6DU3EV6GV9GT8ER8@J2<D/9>*7;*:<.<>0;=/9;.6<25=26<26<0:</<>0=@-=C)MW5Zi@h{Mn‡PpQs˜RuŸUtŸWz¡`w`n”Wc‰L\‚E\‚E\‚E[D_…Ha‡Jc‰LdŠMc‰La‡J_…H]ƒH[}JPr@Gh9i`Qi`QjaRjaRjaRkbSkbSkbSjaRjaRjaRjaRjaRjaRjaRjaRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTnbTrdWpbUm_Tl_VkaWlcZle[jf]jf]yujˆ„x‘•’Œ~‰‚r}zg`cNOT>DG2GF2HC0F=,L<,VC4P9+R6*V4*Y4+\6+a9/e=3n>4ŠKBRL®UQ·OL¼FFÆABÒDCÙGGÔ@>ÕA?ÖB>ÖE@ÙECÚFDÛEDÝEDÞDDßCDßCDÞDDÞDDÝEDÞFCÞFCÛFBÛFBÚE?ØE>×D=×D=ÙD=ØE>ÔD<ÓF?ÉF>ºA8§;1˜8,Œ8-…;.ƒ@0@1~?0€>0ƒ:1„:/ˆ:0ˆ:.„:-„;,„:-„:-‚:,‚:,9-9-9-9-~:/~:/~:/~:/~:/~:/}90|90}90{8/{8/{8/{8/y9/{;1z<1y;0y;0x:/x:/x:/v:/t8.s7-s7-r6,q5+o5*o5*m4)j1&o6+l3(h2&k5)h2&l6*|F<ŠOGŒOJPKRM‘TO‘VPWPŒZQ‰]R…aUy_Pj[Hd_IdhO\fKN]@EU8AQ4>M0=L/?M3DQ7FR:GS;BK6>G2:A/9=,9=.9=.8<-7;-7=36=56;47=3:<1;=/<?,<A*JS4Ve>dwIl…LpQušTw¡Wx¢Z{¢ar˜[e‹N\‚EZ€C\‚E\‚EZ€C_…H`†Ia‡JbˆKa‡J`†I^„G]ƒHZ|INp>Ef7haQhaQibRibRibRjcSjcSjcSibRibRibRibRibRibRibRibRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTlcTrfZth\sf]ndZjaXle]snh{xq~w† ”¨¥œ«§œ¨¤™£‘™–‡z{kdgTOR?GJ7EH5BE2BE2EH5KL<JH9GD5D>0B:-A7+@6*G5)S5*qB8ŒPHŸPK¯HI¿GIÌEIÓBEÖ@BÙ@BÚBAÜDCÚEAÛFBÝEBÜDAÞDDÞDFÞDFÞDFÞDDÝEDÝEDÛEDÚFB×FAÖF>ÔE=ÔE=ÓF=ÔE=ÕF>ÔA:ÓC;ÎE=ÁB9®>3Ÿ:.‘;.‹=0„?0?/€>.>.;/„:-‡;.‡;.„:-ƒ;-ƒ;-‚:,‚:,9+9-9-9/~:/~:/~:/}:1}:1}:1}:1|91z:1|91y90y90x8/x8/w8/y:1y:1x90v:0v:0u9/u9/t:/r7/q6.q6.p5-o4,o4,n3+m4+j1(m7-j4*h2(j4*f2'j6+}D;NGNHQJTLUOŽWPŠYR„\R{YMw]PkZJ]UBYXDY^HOYA?K3:H/7E.5A+5A+8D.?H3CL7EN9BK8@I6<E4:A19@07>.5<,4:,5<45<56;56;49;0:</;>-;@)GP1Sb;buGk„Kq‘RwœVy£Yy£[xŸ`n‘W`ƒIZ}C\E^G^G]€F`ƒIa„Jb…Kb…Kb…K`ƒI_‚H^IY{INo@Ef9haQhaQibRibRibRjcSjcSjcSibRibRibRibRibRibRibRibRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTlcTkaUoe[pf\lcZkd\rmg„|‘Œ§£ µ±®ÄÁ¼ÉÆÁÉľÅÀºÀ¹±¹²¨¤œˆ‚rgdUPQ?FI8>E3;D1:F28E38E38C3:B3<B6@C8CD<HE<>4*P:/e?6€EA¡PO¿X[ËRWÊCIÕFJ×EFÚFFÜFEÞFCÞDBÞE@ÞCAÞDDÞDFÞDFÞDFÜDCÜDCÜDCÚDCÙEA×FAÕF@ÓF=ÓF=ÑG=ÑG=ÔE=Ô?8Õ@9ÐC:ÆC9¶A7¥=0–=/Œ=.†>/‚?/>.>.<-ƒ;-…;.…;.ƒ;-‚:,‚:,‚:,9+:+9-~8,~:/~:/~:/~:/}:1}:1}:1}:1z:1z:1z:1y90x8/v7.v7.t8.w8/u9/u9/t:/t8.s9.s9.r8-q6.q6.p5-o4,o4,m4+l3*j4*h2(m7-i3)f2'i5*e1&i5*{B9ŽMG’MH‘PJTLVO‡XN‚ZP|\Qs\Nl\M`WFVSBQRBJQ?@I64@,3<+2;*09(09(2;*5>-9B1<E4?H7?H7>G6;F5:C25@/3>-1<,3:33954954928919;.;>-;@*CL/O^7`sEk„Mr’SyžYy£Yw¡Yq˜Yf‰OZ}CX{A\E^G^G^G`ƒI`ƒI`ƒI`ƒI`ƒI_‚H^G^KXzHLm@Cd7haQhaQibRibRibRjcSjcSjcSibRibRibRibRibRibRibRibRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTlbVj`VjaXkdZlh_uplˆ„Ÿžš°°®ÃÂÀÓÒÐâáßçæäçãàâÞÛÜ×ÓØÏÈμ²¸¥——‡zym]_YIKL:AF2<E0?K7>I8>F7;C6;>5:;3983872==3C<2N71a84ˆHH¯]_Å`dÊWZÍQSÓNO×MKÛKJÞHGáFDãDAâBBßCFÞCGÞDFÝCEÝCEÝCEÜBBÛCBÚDCØD@ÕD?ÓF?ÒE>ÐE>ÐG=ÔE=Ù@:Ù@:ÑB:ÉD;¼C:¬@4™;/Œ9+‰=/ƒ>.>->-<,ƒ;,„<-„<-‚:,‚:,‚:.9-9-9-~8.}9.~:/~:/}:1}:1}:1}:1}:2}:2{;2y:1y:1x90w8/t8.s7-r8-t8.s9.r8-q8-r8-p7,p7,p7,o6-o6-o6-n5,m4+l3*l3*i3)h2(k7,h4)f2'h4)b0%f4)zA8MG“NI‘PJŽULˆXNYOxZOr\Nk^N_XHRPAKN=EI:9B13;,08)17+17-06,06,06*17+2:+4<-9D4;F5=H7<I7;F56C13@,1>-.800621622717829;0:<.:?)?H+LZ6]pCjƒLs’Vyž[w¡YsœVgP^IVyAX{C[~F[~F[~F]€H_‚J_‚J_‚J_‚J_‚J^I]€H]€JUvIJk@Ab7haQhaQibRibRibRjcSjcSjcSibRibRibRibRibRibRibRibRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTlbVqh_ng_mhbtqj‡ƒ€¡ ž¼¼ºÍÏÎÏÏÏàààòòòúøùû÷ø÷óòñëëîãßèÐÆÝÁµÁ«¢‘}ub^\GKM7CH2AI2AH6@G7BD9BB:B=9C97?:6:=6>?7C82J.+e77QR´hjÆnmÀ\\ÇZWÐVS×QNÝMLâHHæCDåBEàDGÝDGÝDFÝCEÝCEÜBDÜBBÜBBÜDCÙCB×C?ÓD>ÒE>ÐE>ÑG=ÔE=ÜC=ÛA9ÒB:ÊD;¿E:°A6œ:-Œ6'Š</…=.‚=->-<,<,„<-„<-‚:,‚:,‚:.9-9-~8,}9.}9.~:/~:/}:1}:1}:1}:1}:2{;2z;2w;1x90v:0u9/s9.r8-r8-r8-q8-p7,n8,p7,m7+o6+o6+o6-o6-n5,m4+m4+j4*i3)h4)f2'k7,h4)c1&f4)b0%e3(xB8NH’OIRM‹VN„XOzZOq[Mj]MZTDIJ:@D5>D69A219,/7*6<247058157247025.06,08-2:-6A3:E5=J9>K9<I77F33B-1>-,6..400511606718:/:<.:?+<E*IW4[nAiMr‘Vxœ\užXp™U`†IX{CSv>WzB[~FZ}EY|D\G_‚J_‚J_‚J^I^I]€H]€H]LStIHhA@^8icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSjcSkbSkbSlcTlcTlcTmdUmdUmdUmdUmdUmdUmdUmdUmdUmdUldWjc[gd]vsn“’Ž¯¯­ÄÆÅÚÜÛêîïóôöö÷ùüüþÿÿÿÿþÿÿûúüööýòî÷áÔòØÇàɷƵ¡ª ‡ˆ„ibbFEH-EI0CG0FE3GC7F<3C60G53I;:>=9>?:B:7I75cGF’jjœ’è­©Ò‹‡ÇtnÀ]XÆSPÕONàKMçFKéFKßDHßIKÛEGÕ<?Ø>@áGIàDGÖ:=Ú@@Ú@@ÙA@ÖB@ÔC>ÒC=ÐA;ÑA9ÞC>ÛA9Ó@9ÊA9¾B8°@5Ÿ:.8*Œ=0‡>/ƒ>.>-€=,€=,<,<,:+9-9-~8,~8.}9.|8-|8-|90|90|90|90z:1z:1z:1z:1y=5w<4w;3u:2t91q8/p7.p7.o6-m7-m7-l8-m7-l8-m7-m7-i2+m6/p92p92l5.g2*g2*h3+e0(i7.d2)^.$`0&_/%b2(s>6ŒOJQLˆQLRJ}XOx]RhXITN>DE5@D5:B37?24</3;04:049279478366457246116//6./7,.9+2=-6C2:G6:I68G44C.1?.*4+,2.-2./4.45/79.:</:?+6>&ES2YkCj‚Ru“]y_r›Yk”R[~DXyDTu@Tu@XyDZ{F[|GZ{F^JbƒNbƒN^J\}H_€K`L_NTsJFd@<W6icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSicSkbSkbSlcTlcTlcTmdUmdUmdUmdUmdUmdUmdUmdUmdUmdUldYgd]onj†…ƒ¤¤¢ÁÃÂÖÚÛêîï÷ûü÷ûþøüÿüýÿþþþÿþÿÿþüÿüûÿúóÿðàüêÔíÞÇ×ͲÀ»ž¤£„‚ƒcgjKLQ3GJ/EC.GA1H>4I:3L95K<9?:7CB@NIF]RPyjg£Ž‹Î²®èÅ¿ÿ×Ñ벩͂|Àc^ÊVVØRSßHMÚADÖDEÕEEØHHÜJKÛEFÕ<>Ö<>ÛACÙ??Ù??ÙA@×A@ÖB>ÓB=ÒA<ÔA:ÜA<Û@;ÔA:ËB:¿C;±A6 ;1’8-Œ=0‡>/ƒ>.>-€=,€=,<,<-9-9-9-~8,}9.}9.|8-|8-|90|90|90|90{;2{;2{;2z;2w<4u<3u:2s:1r90n8.p7.m7-m7-l8-l8-l8-l8-j8-l8-l8-k6.n70p92n91l7/i4,g2*e3*c1(d4*_/%^.$b2(^.$`0&p>7‚KF‡RN‡XR~XOrVKdRFRI:A?0<@18@15@23>04<14<15:37:379677577557246116/07//7,.9+1<.5B19F59H58G44E24B1.8//6/.3--2+23+46+8:-9=,7?(DQ3YkEjUv”`yœbp˜ZgQY|DWxCTtBTu@VvDYzEYyGYzE[{I_€K`€N\}H[{I^J^~L^{MSnKF_A9R5icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSicSkbSkbSlcTlcTlcTmdUmdUmdUmdUmdUmdUmdUneVneVneVlfZed_y{x—™˜²¶·ÍÑÒäéìôùüúÿÿûÿÿüÿÿýþÿþþþÿþüÿÿûÿÿúÿÿôþüçúùÝðïÑáåÄÒØ´½Å  ¨ƒ‡‘l`gEQX9EH-DC.JD4LD9KA8J?;C;9SJKia_wvœ’¾³¯ÙÎÈêÜÓþèÝÿóéÿäÛð©£ÃecµBEÇDIÚRVÛUTÑMKÊDAÍCAÔFE×EFØDDÙCDÖ=?×>@Ø@?Ö@?ÖB@ÓB?ÓB?ÕA=Ù@;Ù@:ÓC;ËE<ÀD<²B7¡<2“9.Ž<0‰=0ƒ>/>.=-=-€=-€=-9-9/~:/}9.}9.}9.{8/{8/}:1}:1{;2{;2{;2{;2z;4x<4t;2q;1r90o9/n8.l8-l6,k7,l8-j8-j8-j8-j8-i9-j8-j8-n91m82m82m82m82i70f4-c1*e3,a0)],%_0(c4,\-%_0(sD<}PKVP~YQpSK^J?OC7@=.46(3:*1<,1<.2=/5=26<27:37:379679668368357227018008-.9+0;-2?.6C27E48G49H59F54>33:205.,2(/0(13(57*7;*6>'ER6[lJm„Zy–fyœfl“Z`‡NWwEWuCUrBUsAWtDXvDYvFYwEZwG^|J_|L]{I]zJ_}K^{KZwKPgJAW@3I2icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSicSjcSkbSlcTlcTlcTmdUmdUmdUmdUmdUmdUneVneVneVofWmg[jkfƒ‡ˆ£§¨¹¾ÁÐÕØåíïóûýõýÿûÿÿûÿÿüÿÿüþûýþùþýøÿþ÷ÿÿóùýæöýÞî÷ØçñÎÞèÃÎÚ´¶À›¡«†€‡efmLNR7FH0JI7KI<HD9D@7OGDj`_‹€¤š™·¯¬ËÆÂÞÛÔêæÝúñèÿûñÿ÷íÿÝÕûµ³åŽÊcf°@?ÃPKÉTMÉPHÄGAÈGBÔMJÖKHÐ@?Ò>>Ó??Õ?>ÕA?ÕA?ÔC@ÓB?ÓB=Ö>9Õ@:ÒC=ÌE?ÀE>²A9¢=5•;2Ž<1‰=0ƒ>/>.=/=-€=-€</9/9/~:/}9.}9.}9.{8/{8/~;2~;2|<3|<3{;2{;2z;4x<4r90o9/o9/m9.l8-k7,j6+h6+i7,i7,i7,h8,h8,h8,h8,h8.m;4l71j5/i70j81j81f4-a0)e4-a2*].&a2*b3+Y*"a2*yNG\VvWRjOHXD;I<3@:.:;-8<.08)/:*3;.4</6<26<27:169279479468349238139/19.19..9+/:,1<,2?.5B17E4;H6<I89C87?428.-3'./'/1&24'48'4<'ER8_pPtŠd}šny›ifXU|GSsBTqASp@Sp@TqAUrBVsCVsCXuE[xH]zJ]zJ^{K_|L\yIWrIK_F<M;.?-gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSjcSkbSlcTlcTlcTmdUmdUmdUmdUmdUneVneVofWofWofWng]qqo‡Œ¤©¬·¿ÂÍÕØãíïòüþôþÿøÿÿûÿÿûÿüýÿúüýõúüñøøìöøê÷ÿèóÿäòüáîùÙêôÒÞèÅÊѯ¶½›£}‚bY]BIM6GJ9FJ;CF;AB:c^X…}z«¡ŸÁ·µÌÂÁ×ÏÍæáÝñîéú÷ðüõíÿñèÿòéÿóíÿåß騤¸id§MBµPDÁYNÈ\RÆQH¿D=ÂA<ËDAÌA>Î@>Ð@?ÑA@ÒC?ÑC?ÑC?ÒC?Ó?;ÒA<ÐE@ÉE@½C>¯@9 =7•<4Ž<1‰<2ƒ=1=0=1=/=/=/~:/~:/~:/}9.|90|90{8/{8/|<3|<3|<3{;2z;4y:3y:3w;3n70m80m80l7/k6.i7.h6-h6-i7.h8.h8.g8.g8.g8.g8.h8.k92h6/f4-g5.i70h70e4-b1*c2+c4,_0(]1(]1(W+"c7.{ULtXTaNHN?8A7.=6,;9-9;.9<139-3;.5;/6<06<07:/68-47.69049238139/39/2:/19.19,08+.9+/:,1<.3>.6C2:E5<G7=E8:B55;/06(02'02%13%26%2:%GS;dtWyk~šrs”g]ƒRJp?Oo>Qn>Qn>Qn>Qn>Ro?Sp@TqAWtD[xH]zJ]zJ_|L`}MZwGSmFCU?6D7(6)gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSjcSkbSlcTlcTlcTmdUmdUmdUmdUmdUneVneVofWpgXpgXoh^lnmz‚…“š «µ·ÄÎÐÜæèí÷ùôþÿõþýöÿüúÿúûÿùûþóøúíõöèñôãõüêôýèôüåóùßòöÛéìÏØ×¹ÅĦ±°’ŽrggONP;GK:FL>HPEKQG{{sŸš”ž¸ØÎÌßÓÓçÛÛóëéü÷ôðïëüûöÿÿöÿýôÿøñÿóêÿçÞÿ×Ëؓƒ¸eS¤K;´TF½WIµF;·@8ÈKEÇC>ÊC?ÍC@ÎC@ÎC>ÎC>ÍD>ÍB=ÑB<ÒC?ÎDAÅD?¸A=ª=8ž;6•<6Ž;3‰<4ƒ=3=2=1=1=1=/~:/~:/~:/}9.|90|90{8/{8/|<3|<3{;2z:1y:3x92w81u91n70m80l7/j8/i7.i7.h6-g7-h8.h8.g8.g8.g8.g8.g8.g8.h70h6/g5.g5.h70h70g6/f5.a2*e6.^2)\0'`4+a5,i=4uSJXG@E>6<5/:6-;8199/57,36+7:/5;/7:/7:/69.58-57,36+28,19,28.19,19.19,19,19,08+08+.9+/:,0;+3>.6A18C3<D5:C28>05;-57*35(13%04%18&HT>gw\wŒmv’lhˆ_RwKBh9Kk<Nk;Pm=Pm=Ol<Nk;Pm=Qn>WtDZwG]zJ^{K`}M_|LVsCNg@<J90;3%/'gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSjcSkbSlcTlcTlcTmdUmdUmdUmdUmdUneVofWofWpgXqhYoh^hikmtz…Œ”£¬±¿ÈÍÓÞâåðòðüüòüûõþùøÿ÷úÿöüÿòúüîøùçö÷çööêù÷ëú÷èüöæýõâ÷ìØçØÃÕƯÀ±šž“}vn[ZWFPQCSVK\c[fme•˜²²¨ÕÎÈèÝÛóãäúêëÿñôÿ÷ùÿýýþýùùúôøùñüüòÿÿóÿÿñÿûêÿæÑñª”¼o[©VD®RCµPD»LA¿JAÂG@ÆE@ÉE@ÊE@ÊE>ÉD=ÈC<ÉB<ÑC?ÐE@ÌEAÂC=³=9¦;5œ;5•<6Œ<5‡<6…<5=4<3=1=1=1~:/~:/~:/}9.|90|90{80{80{;2{;2y:3x92w81v70v70t80n70k90j8/j8/j8/h8.h8.h8.g7-f7-f7-f7-d8-d8-d8-f7-g6/h70i81i81h70i81j;3l=5g80l=5c7.a5,oC:xLCtI@nMDD:167/45/891:;556.14+25,58/58/57,46+46+46+46)48*08).9)08+08)08+08)08)08)19,08+08+08+08+2:-3;.4<-9B1:C0;B2:A1:<.68*35'/3$29'IU?eu[oƒghƒbYyTInE?d8Hg;Li;Ol>Nk=Mj<Li;Mj<Ol>UrDYvH[xJ]zL^{M\yKQn@G_;6B4+4/#)%gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSjcSjcSkcVlbVlbVmcWmdUmdUmdUleUmfVngWogZoi]oh^mjekormtzƒŠ¥®³ÃÌÑÕßáæîñôüþôýú÷ýùùþ÷úÿöýÿòýÿñüÿîýþðûøïþúñÿùïÿúíÿùéÿòßïàËÝηÁ²›¢—|ubb_NXZL^dXmwn|†}¦¬¢¾¿·ÚÕÏîäâüîîÿôôÿô÷þôõþøøûúøûýøüÿúøÿõôüñ÷ýñÿÿñÿùèÿôãÿÌ»½pœRE©SF¶TI²C:ÀIAÄH@ÇF@ÉF>ÉF>ÈE=ÆC;ÆC;ÍG>ÌG>ÇG>¼C;­>5 ;3˜;4‘>6Š=5†=6ƒ=5=4€<3€<1;1;1~:/~:/~:1}90|90|90{80{80z:1z:1x92w81v70v70u6/q6.m80k90k90j8/i9/i9/i9/i9/g7-f7-g7-f7-f7-f7-f7-f7-f5.j81m;4k:3j92j:0m>4oC8mA6oD;e=3d>3zVJ†dZyWMbLA>:13814927<569214-25,9<336-25,24)03(13(25*46+47,/7*-8*/7*/7*/7*/7*/7*/7*19.08-08-/7,/7,/7,08-08+7?09B1<E4=D4;A38<.26'-4$2;*JVBcr[i{a^uXRmLGeAAa:Hd;Kh<Nk?Nk?Kh<Jg9Li=Nk=TqEXuIZwK[xL]zNZvMNjAC[;2>4*00!''heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjdVjdVkeYldYlcZmeZmeXmfVg`NkfSnkXol]he\feasqr‚†Š“–ž¡«³¶ÃÈËØÝàêîïö÷ùùûúüþýþþüÿÿúýþöûüôúüñûýòüþóùúòúúòùùíüúëþüçù÷àéåÊÕÔ¶º¹›˜˜|qtY_dMdkYr}l„‚‘›´¹²ËÌÇãâÞòîë÷óòû÷öýùøüø÷ÿþüþþüþþüýÿúýÿúýÿúýÿúýÿúúü÷ÿÿúÿûöÿêåا £`XœE>µNG¿MCÂF>ÄD9ËF=ÎI@ÎKAËH@ÇG<ÆG8ÃH8½J8³H8¦E5šB4‘B5‹C5†B7„B6ƒA5‚>3‚<2„:1†91†91€:2~:1|91|91{80z7/w7.w7.w7.w7.w8/w8/v7.u6-q5+o4,l7/i81i81h70h70g6/g6/g6/f5.g6/i70i81i81g6/f5.e3,h3-j5/m80k90j:0i;.j>1kA3gB2jI:dH:v_O~k\ŠxjŒoRJ=79.4:04:039/39/39/28.28.17-17-36-06,25,06,14+/5+/4-.5-/4-/4-/4-/4-/4-/4-05.05./4-.3,.3,/4-05./6.1;23=26@59C89D67B45@03>-4?.DQ=XfO[kQRbHEX<BU9BW6Jd?Ke>Ke>Ke>Je<Je:Mh?Oj?UpGYtK\vO]wP]wRWqNHaA;N8&1+&+.$),heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVkdZkdZlc\md[meZmfVidQjhSmjWjk[gg_lll~„“š¨¯µ¸ÂÄÑÖÙâçêòóõûüþÿþÿÿþÿþýûÿþúÿÿúÿþùûüôùúòøûòùüóøùñøùñøúí÷úéùûåòõÚßâÅÌÏ°´·˜‘–vkqUZbJbkVu€oŒ˜Š§œ¿Á¼ÓÓÑççåóóñ÷÷õûûùüüúûûùþþüþþüþþüþþüþþüþþüþþüûÿþõÿÿõÿÿþÿÿÿûúÿíêð¿ºÂ~uŸKA¯MB¹J?ÂL@ÅK>ÃE9¼=4¾?6ÃE9ÄF7ÃJ7½L:³I9¦F6™C4‘C6E7…C7‚B8A7>5‚<4ƒ:3†91„93:3}:2{;2z:1y90x8/v7.v7.v7.v7.u9/u9/t8.r6,p6+m4+l7/i81j81h70i70h70g6/g6/e3,f4-g5.h6/j81j81k92k92k60l71k90k;1l<0j>1j@0iD2gH6lQ>gRAvgTue‹†s†ƒrKL<69.39/39/39/28.28.28.28.17-17-17-06,06,06,/5+/5+/4./4./4./4./4./4./4./4.05//4./4..3-.3-/4./4./6/.80/:22>44@66B66B66B45B16C1CP>P]IR`IIW@AO6BP7FV;Jb@Ke@LfAKe@Ic<Hc:Id;Je<SnEXrK\vQ]wT\uUTmOC\?6I5&1-',0%*.heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVkdZkdZlc\md[meZmfVmhUljUmjWjk[kkcwww‘’—§¬²¿ÆÌÒÜÞìñô÷üÿþÿÿþÿÿÿþÿÿþÿþýûÿþúÿÿúþýøúûóö÷ïóöëóöëõ÷ìöøíöøêõøåõ÷áîîÔÛÛ¿ÉÊ«°±’“–yuy`kpZt{iˆŸ§œ®µ­ÍÏÊÞÞÜïïíøøöûûùþþüþþüýýûþþüþþüþþüþþüþþüþþüþþüýÿþûÿÿûÿÿþþþÿûùÿúôÿ÷íýÑÆ؞•M?¥RD©L=§@1·G;ÏYMÏUJ¼@4ÄD7ÆH:ÄK@¹I=«C8?5—@7“D=‰@9…B:@:>8~>5}=4<4}=4z;4x<4x<4w;3u:2t91t91t91r90r90r90r90o9/n8.l6,k5+m80j81l71j81k60i70i70h6/g5.g5.f5.g6/h70i81k:3l;4l;4j;3i:0h<1k?2jC4iD2fE2gJ8lVAjYGujV†m‘Ž{€mDG658-39/28.28.17-17-17-17-17-17-17-06,06,06,/5+/5+/4./4./4./4./4./4./4./4./4./4..3--2,-2,.3-/4..5.+5-+6.-9//;12>25A56B47D38E3?L:ER>CQ:;I28F->L3DT9J_@Jb@LdBKc?Ia=G`9F_8G`9RkDXpL]uS^vVZqTPgK=T8.A.&1-',0%*.heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVkdZkdZlc\md[meZmfVniVljUlkWlm]pqi€‚ž¡¦¹¾ÄÄËÑÛåçõúýûÿÿüýÿüýÿþüýüûùÿþüÿÿûÿþùüüôõöîïñæëîãéìáîðãñóåñôãñôßòñÜêêÐØؼÇÇ«²±•¡¡‰‘“}“€œž‘ª­¢¶»´ÀÅ¿ÜÜÚééçööôûûùýýûÿÿýÿÿýýýûþþüþþüþþüþþüþþüþþüþþüþþþûûýÿþÿÿþÿ÷òïüóìÿüóÿúíÿôäì²¹p•P@¢O?¸XJ·M?µE9ÀG<ÁA6ÈE;ÅIA½F>¯@9¡;6š=8—B??;‡@<@<}@;z?9y@9y@9y@9v=4v=4v=4u<3s=3r<2r<2r<2o;0o;0o;0n:/k9.k9.j8-i7.m82n72n72l71l71k60i70i70l:3j92h70f7/f7/g80h91g;2j?6f>4e=1f>2hC3iE5fG3cG2cJ6hV@i\IskV…‚ozor_9=,28,28.28.17-17-06,06,06,17-17-17-06,06,06,/5+/5+.3-.3-.3-.3-.3-.3-.3-.3-.3-.3--2,-2,-2,-2,.3--4-*4,)4,*6,+7-.:01=14@46B46C2:G5<I78E12?+2@)8F/>N4DX<E]=K`AJb@K`?G_;H^:F^:ShGXpN`uV^uX[oTLcI9M4):(%.+%*.$)-heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVkdZkdZlc\md[meZmfVlgTljUnmYop`tum…‡†¤§¬¿ÄÊÎÕÛæðòûÿÿûÿÿûüþýþÿÿþÿþýûÿÿýÿÿûþýøøøðïðèèêßâåÚàäÖçëÜêïÛíðÛìðÙîîÖèèÐÚØ¿Ìʱ½§·µ ±®¶´§Á¾µÉÈÃÒÓÎÙÙ×èèæòòðúúøüüúýýûÿÿýÿÿýüüúþþüþþüþþüþþüþþüþþüþþüÿþüÿüÿÿüÿÿûúüû÷ýÿùþÿøÿÿôÿþíÿþìøßËŜŠbP–J:¥L<µQA¼L>¿@7ÆC;ÅF@¿D?±>;¦:8ž::™>=‘=;Š?<„@=}@=xA<tB;rC;rC;q?6s>6s>6r=5r=5r=5p>5p>5n>4m=3l<2k;1j;1i:0i:0j:0n72p62p62n72m61l71i70i70l;4k:3h91e90e90e90f;2d<2fB6dB6cC4dD5dG5dI6bI3_I2^L6eV?jbMrmW„ƒo„‡r\aM3:(28,28.28.17-17-06,06,06,17-17-17-06,06,06,/5+/5+.3-.3-.3-.3-.3-.3-.3-.3-.3--2,,1+,1+,1+,1+-2,-4-,6.+5-*4,*4+,6-.8-0:/2=/2=-5@/7B15A-2>*1=)5A-8F/=O7@T9FX>H\@J]?I^=K^>K`?TgI[oScvZauZZlTJ]G6G4'5&$-*$),"'*heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVkdZkdZlc\md[meZlfVjgTkjUlnYorasvm…„Ÿ¤¨¸¿ÇÖßäí÷ùûÿÿûÿÿûüÿþÿÿÿþÿþýûÿÿûÿÿúüüôôôêëíâäæÙßãÕÞâÓãèÔçìÖéíÖêìÔëëÓèæÏßÚÄÓθÓκÌƶËøÐÉÁ×ÒÎàÛØêæåðïíóòðùù÷ýýûýýûýýûÿÿýþþüüüúþþüþþüþþüþþüþþüþþüþþüÿýüÿûüÿøùÿþýþÿýùÿýôÿøðÿõ÷ÿöð÷çÿÿíÿîÛͧ”¤kX¢[GªWE­L;¹D:ÀD<ÀE>¼E?´A>ª=:¢:9š:;•=<?=…?=~A<wB<sD<oD;oD;p?8q?6q?8q?6q?8q?6p?8o?5o@8n?5m>6k<2i=4h<1h<3l;4o83r73p62p62n72l71j81i81h70g80e90e:1d<2e?4f@5dB6_C5aG8cL:dM;cM8`K6^L4]M4^R:bX?mhRss[€ƒnv{eFO:4=*39-39/28.28.17-17-17-17-17-17-17-06,06,06,/5+/5+.3-.3-.3-.3-.3-.3-.3-.3--2,,1+,1++0*+0*,1+,1+,3,/6/,6.+5-*4,*4++5,-7,.8--8*0;+4?/5@/4?.3?+4?.3@,7F19K3@O8CU;GW<H[?M]BM`DUeJ\nTdtZ`rZXgRGXE4B1$2%$-*$),"'*heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjdVjdVkdZkdZlc\md[lfZlfVliVlkVkmXlo^lrhz€~• ®·¾ÒÛàêóøûÿÿùþÿùúþþÿÿÿþÿúù÷ÿÿûþýøùùñòòèéëÞäæØáåÖâçÓäéÒçíÓèìÓçéÑèèÐçåÎàÛÇ×оÚÓÃÙÑÆÞÔËæÝØîäãóéêúñôÿùûúù÷þþüÿÿýýýûýýûÿÿýÿÿýüüúþþüþþüþþüþþüþþüþþüþþüÿýüÿùøÿúùÿþýüÿý÷ÿþîÿùñÿÿòÿý÷ÿúõùëÿýíÿúèôλ«wbQ:ª\H²N>¸J=¹I>¸I@´G@¯D>¥=:œ:7–=9>:ˆ?9€A:yB;tD:pF:pE<p?8q?8q>:q?8q>:q?8p?:p?8qB<pA9n?9l=5j=7j>5j=7n=6o83r73q73p62n72l71j81i81d8/d90d<2d>3d@4cA5cA5_C5ZE4_N<gVBhWCcS<^O8[O5\R7]V<]Y>nlUtv^|kcmU2>(6B.3;.39/39/39/28.28.28.28.17-17-17-06,06,06,/5+/5+-2,-2,-2,-2,-2,-2,-2,-2,,1+,1++0*+0*+0*+0*,1++2+-4-+5-*4,*4,+5,+5,,6--7,,6+/:,2=/6A17B27B15@04?.2?-4C.:G3=L5AO8DT:JX?L\BTbI\kTcp\^mZUbQDRC2>0#/%'0-&,,$**heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjdVjdVkdZkdZlc\md[lfZlfVolYmlWjlWgkZgmcr{xŽ˜š¨±¸ÊÓØãìñøýÿ÷üÿúûÿþÿÿÿþÿúùõÿÿúýýõøøîññåéëÝæéØåéØçìÖæìÒéíÒêíÒèèÎèæÏèãÏáÚÈØÐÃØÎÄÝÒÌéÞÚøíëÿôöÿõùÿõûÿùüÿýþÿÿýÿÿýþþüþþüÿÿýÿÿýýýûþþüþþüþþüþþüþþüþþüþþüÿþúÿýûÿýûýüúùýüøÿÿõÿÿóÿÿíýúóÿûüÿúÿÿôÿöæÿóßÿãÍ×­•©oY¨WD®Q@¬O>¯OA±OB­K@¤C<™<5•>7?6‰@9A7{B7uD6qE8qE:p?8q>:s>:q>:q>:q>:r?;q@;sB=qB<p?:m>8l=7k>8m>8o>9o83q73o83m82m82j92h91f:1f;2d>3d@4bB5`C5]A3\@2WB1TG4\T?f^Gj_IbX?ZS9ZS7\W:[W<XW;kmUsw^u}fUaI&28D03;.4:04:039/39/39/28.28.17-17-17-06,06,06,/5+/5+-2,-2,-2,-2,-2,-2,-2,-2,,1++0*+0**/)*/)+0*+0*,1++2+)3+*4,+5-,6-,6--7.-7.-7./9.2<16A39D69D47B46A10;+2?-6C19F2<I5AO8FT=JX?R`I[hTan\]jYQ]OBN@0<0",#+1/(..&,,ifWheVheVheVheVheVheVheVifWifWifWifWifWifWifWifWkeWkeWle[le[md]ne\mg[mgWqn[kmWknYkr`kshpyv‡‘“Ÿª°»ÄÉÐÙÞêïóöûþüýÿþÿÿÿþÿþýùÿÿúþþöúúð÷÷ëôöèñôãìðßéîØñõÚíòÔëîÓììÔèæÑáÜÉÝÕÈÞÔËáÔÎèÚÙòääúëîþòôÿ÷ûÿùÿÿüÿÿýþþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþÿúÿþúÿþúþþüýÿþûÿÿúÿÿúÿÿûÿÿþÿÿÿÿýÿûõÿûñÿüìÿûåÿóÛÿêѺ‚i¦bK—O9ŸT?¨ZF£RAžK; M?Ž<0‹=0†>2=/x=/t?/sB3uE7r=7s>:t=:t?;u@<u@<t?;q>:s@<rA<r?;q@;p?:o@:o>9o>9k92m82l:3l;4l;4h<3g<3d>3c?3^>1dG9cG9T=-N9(M8'@1XR<PN7KI0OK2]Y>ieJeaDVU7XX<^aDpt[z€fdlU>J2-9#2>*4</5;15;14:04:039/39/39/39/39/39/28.28.17-17-06,16016005/05/05//4./4./4.,1+,1+,1+,1+,1+,1+,1++0*.5..5.-4--4--4--4-.5./6.+2*-4,1805=29A6=E8@H=?J<7B26A03>-1=)3?+9E/BN8IU?O[GWcO]hWZeULWI<F;/90)3+)/+(.,'-+heVheVheVheVheVheVheVheVifWifWifWifWifWifWifWifWkeWkeWle[le[md]ne\mg[khWonZjlVknYlsajtiq|x‰”–£®´¾ÇÌÒÛàëðô÷üÿýþÿþÿÿÿýþþýùÿÿúþþôûûïøùëô÷æñôáìñÝèíÖêïÑæëËäçÊææÎãàÍßÙËÞÔËßÔÎçÙØíßßöçêûïóÿôøÿ÷ûÿúþÿüÿÿþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüýÿúýÿúýÿüýþÿþýÿÿüÿÿüÿÿûÿÿüÿÿüÿÿûùÿüõÿþñÿÿíÿûåÿòÙÿëÑ౗§u\’\D–ZBšYCšVCšSAŽG5‹F6ˆE4ƒD3|C2wB0s@/o>/u@8t?9u>;u@:t?;p>7p=9tB;q>:p?8p?:o>7o>9m>6n=8m>6k<4l=5m>6k?6j?6gA6eA5bB5dG9[A2^G7_J9O>,F7$G8%?7"LL4HL3DG,CF+MM1[[?baC`aBZZ>dgJsw^sy_X`I:D,/;%7C/6>17=36<26<26<25;15;15;15;15;15;14:04:04:039/39/27127127116016016005/05/.3-.3--2,-2,,1+,1++0*+0**1**1*)0))0)*1**1*+2+,3,.5..5-.5-07/4;3:B7?F>BJ?;F8:E57B14?.3?+6B.<H2@L6LXDS_KYdTWbRLVK=G<1;2+5-+1-*0,)/-heVheVheVheVheVheVheVheVifWifWifWifWifWifWifWifWkeWkeWmd[md[md]ne\mg[khWnmYimVjoYktaiuiq}yŠ˜™¥²¸¿ÊÐÔÝâíòö÷üÿüýÿýÿþÿýþþýùÿÿ÷ýýóúúîøùëô÷äðóÞéîØåëÑâçÉÞáÂÝÝÁÞÞÆÞÛÈÝ×ËàÕÏäÙ×îâäóçëúîòÿôøÿ÷ûÿùüÿúýÿýýÿþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüýÿüúÿüúÿüûÿþþþþÿüÿÿúÿÿùÿÿøÿÿùÿÿúÿÿúúÿýøÿÿöÿÿóýÿïÿúæÿóÝÿøàÿìÕ㿧¬‚j‡YB†R<’[F—]IS?F2u>)r>)tA,tD0vE6q?4n<3q<6tB9tB;o?5q@9xH>n=6m>4m>6m>4l=5j>3l=5j>5g?5gA6hB7gC7eC7cC6aD6^D5bK;UB1ZI7`S@RG3D<'E=(FA+BF-BH.AE*<@%?B%LO2_`AijKaaEnqTvzagmSJO95=&5=(=F39?38>48>48>47=37=37=37=38>48>48>47=37=37=36<26<25:45:449349338238238238205/05//4..3--2,,1++0*+0*).*).*).*).*).**/++0,,1-162/4.,1+,1+05/6;4=B<AH@@K=>K:<I78E34A-2?+3A*5C,DQ=KXDQ^MQ^MHTH<H<2>4.8/-4--4-,2.gdUgdUheVheVheVifWifWifWifWifWifWifWifWifWifWifWkeWkeWmd[md[md]mf\mg[jiWkmXimVjoYjs`hthn}xˆ˜˜¦³¹¿ÊÐÓÜãëðööûÿüýÿýÿþÿÿýÿþúþþöýýñúûíøùéô÷äïóÜçìÕãçÎÜßÀ×Ú»ÖÖ¼Ù×ÂÝ×ÇÞ×ÍåÛÙìààøìðûðöÿõûÿùþÿúýÿûûÿûûÿýüÿþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüýÿüúÿþøÿþûÿþþþþÿüþÿúÿÿùÿÿøþÿúýÿùüÿùùÿýúÿÿúÿÿøûÿõüþðÿÿïÿûëÿñßÿðÝÿïÚñÒ½´zyS>tJ4uH3vI4zM8~O;yL7nA.e7'{M@tE;qB8sD:oC8g;0d8-g=1i?3i?3i?3g?3g?3f>2f>2e?2cA5bB5`C5`D6_C5[D4ZE4WD3XI6OB/\T?oiSc^HMK4IG0IK3>D*?H-@F*<B&<A#EJ,XY:deFmmQxx\tv^[_FAD/6;%:?+>B19?39?59?58>48>48>47=37=39?59?58>48>48>48>48>48>47<67<67<66;56;56;55:45:438238227105//4.-2,,1+,1++0,+0,+0,+0,+0,,1--2.-2.051.3/+0,+0*-2.3828=9;B:AL>@M;@M;=J68E14A-1?(1?(=J6BO;IVDJWFDPB;G;2>4.:0,6..5.-4-gdUgdUgdUheVheVifWifWifWifWifWifWifWifWifWifWifWkeWldWmd[md[md]mf\mg[jiWlnYinWiqZgs_drejyt…••£²·½ÈÎÒÛâêïõôùýûüÿýÿþÿÿýÿÿúÿÿøÿÿóþÿñüýë÷úåñõÞèíÖãçÌÙÜ¿ÕÕ¹ÒйÔѾÚÔÈàÙÑêàßôéíþóùÿ÷üÿûÿÿýÿÿþÿÿþüÿþúÿþúþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüýÿþúÿÿúÿÿûÿÿþþþÿýþÿüüÿûüÿûúÿúøÿù÷ÿúøÿüùÿÿûÿÿûýÿúýþùùôîÿþöÿýôÿüóÿûîÿ÷çûæÕéÒÀÏ´¡¶š…”uawVClI6mH6rM=uP@mF7jC4gB2kF6nI9nJ:oK;pL<eA1cB1cB1cB1cB1cB1bC1`C3]F6\G6]H7ZG6ZG6WH5TG4RG3PH3OI3fdM~~frrZVX@JL4GM3<E*<F+?H+AG+AF(CH*MN/TU6xw[{z^nnVUU=CB.?A,?@.;>-;>39?59?58>48>47=37=37=38>48>48>48>48>48>48>48>49>89>89>89>88=78=77<67<66;56;55:438227105//4./4.-2.-2.,1-,1-,1-,1--2.-2.-10,0/,0/-2..211623764;4;G9=L9@O<@O:<K67F/4C,2A*6E.:I4@O<BP??M>8F71?2-9-,6--4,,3+fcTgdUgdUheVheVifWjgXjgXifWifWifWifWifWifWifWifWldWldWmd[md[md]mf\mg[jiWkmXinWiqZgs_bpcgxr‚””£²·ÀËÑÔÝäëðöõúþûüÿýÿþÿÿýÿÿúÿÿøÿÿôÿÿóÿÿïúýèó÷àéïÕãçÌ×Ú½ÑѵÍË´Î˺ÖÏÅÞ×Ñìâã÷îóÿöþÿúÿÿýÿÿþÿÿÿýþÿúþÿùþÿúþÿúþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþþýþÿýþÿýþÿýÿþýÿüþÿúþÿùÿÿ÷ýýóýýõþýøþýùÿþüþþþÿýÿÿþÿþùÿÿüÿÿûüûöóÿøòÿýôÿþñÿþíÿúéÿóà÷äÓçÒ¿Ôº©ºŸŽŸƒurd|]K}^LtUCbE3X;)Y<*[>,X=*`E2`E2`E2`G3`G3`G3`G3^I4\K7\M:\M:ZM:XM9UM8SM7QL6NL5QQ9jlT|€gmqXSY?FL2>G,<F+<F+?H+DJ.EJ,DG*HI*LM.yx\tsWfdMTR;LI6KJ6FE3<=-<?4:@69?59?58>48>48>48>47=37=38>48>48>49?59?59?5;@::?9:?9:?99>89>89>89>89>88=77<66;55:4493382382/40.3/-2.,1-+0,+0,+0,+0,+/.,0/-10.21/32/32/32.5.4@27F3=L7@O:?N7<K49H18G05D-8G0<K6>M:<J97E61?2-9-+5,-4,,3+fcTfcTgdUheVifWifWjgXjgXifWifWifWifWifWifWifWifWldWldWmd[md[md]mf\mg[jiWjlWhmVhrZfs_cqdhys…——¥·»ÇÒØÚãêðõûøýÿüýÿýÿþÿÿýÿþùÿÿ÷ÿÿóÿÿðýþìøûæðôÝäêÐÞâÇÕÕ¹Î̳ÉÄ°ÊÄ´ÐÉ¿ÚÒÏéàãöíòÿ÷ÿÿúÿÿýÿÿþÿÿÿýýÿúüÿøüÿøýÿúþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþþÿüÿÿüÿþýÿýÿþúÿúøÿùøÿõøÿóøÿóøÿõüÿøýÿúÿýþÿüÿÿüÿÿûÿý÷ÿÿúÿÿüÿÿýÿÿþÿþýûûûóúøìÿÿñÿÿïÿÿïÿþíÿýíÿöçÿîâýêÛúãÑÿæÐãÊ´¥xt^GeO8cM6\H0^J2\J2\J2]K3]K3^L4^L4\M6YN8XP;XP;VP:UO9SN8PN7NN6MO7PT;`fLfoTU^CEO4AK0<F+?I.?I.CL/HN2HM/GJ-OP1YX:qmRkgLa\F[V@YTATQ>MJ9BC3>A6;A7;A7;A7:@6:@6:@69?58>48>48>49?59?5:@6:@6:@6;@:;@:;@::?9:?99>89>89>8:?9:?9:?99>88=78=78=77<6495384162/40-2.,1-,0/+/.,0/-10-10.21.23.23-12,2.+9*/@-6H2<N8=O7<N6:L2:L25G/7I1:L6<M:;L:6G70@3-;.,6--4,,3+cdTcdRdeUgfThfWigXigXjhYhfWifWifWifWifWifWifWkeWkeWkeWmeZle[md]mf\kg[jiWikVhmWis[gt`dqgk|v‰›¬»ÀÍØÞàçíôùÿúÿÿûÿÿýÿüþÿúþþöúúðúûíúûëøùçôöáëí×ßãÊÙÜÁÓѸÌDZž¬Æ¾±ÌüÖÎËæÝàôêòþ÷ÿÿúÿÿþÿÿþÿþþþýÿúüÿúüÿúýÿüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüÿþüÿþüÿþüÿýþÿüÿÿüÿÿýÿýÿþûÿüøÿù÷ÿ÷÷ÿõøÿöùÿöüÿøÿÿûÿþýÿüÿÿûÿÿúÿÿúÿÿûÿÿüÿÿýÿþýÿûýüüþûþÿúùúòûûñýúñþúïÿùìÿûìÿýïÿýìÿðÛÿûâìÜގu`P7RB)XH/XH.[M3[M3ZN4ZN4[O5[O5\P6[Q8UO7TO9TO9QO8PN7NN6KM5IM4JN5LR8S\APY>@I,<E(AL.?J,DM0EN1JP4MR4KN1LL0YY=kjNhdIb^E^ZA_ZD`[GZUAQO:KJ8AB4@B5@B5>A6>A6>A6=@7=@79?59?59?5:@6:@6:B7:B7;C8;@9;@9;@::?8:?9:?89>89>7;@:;@9;@:;@9:?9:?8:?9:?98?87=94;4382160/4..3/.3/.21.21-2.,2.,3,,3+,3+)6%$6*>"2F+8L3;O6<N6;M5;M58J29K3;M5=O7<N87H51B0.<--7..5--4,]eP^fO_gRbgQdiUfiVgjWijXfgUhgUheVheVjdVkeWlfZlfXlfXkhYmg[kg\mf^jf]ieZgeVghVfkUenYbo]dqht„–¨¬¸ÇÎÒÛâäéïò÷û÷üÿøüûüþùüÿøüþó÷÷ëùúêø÷åòñßîíÙëêÖáßÊ×Ò¾×ϼËÀ®½´¥½³©Â·±ÌÂÁÛÒ×êàèúóûüöÿþûÿÿýÿþýÿüýÿüþýüþýþþþþþþþþþþþþþþþþþþÿþüÿþüÿþüÿþüþýùþýùÿüùÿýúÿþûÿþûÿþÿÿýþÿüýÿýþÿÿÿþÿýýÿüûýøÿÿûüý÷ÿýøÿþúÿýøÿúöÿ÷ôÿùùÿúÿÿüÿÿýÿÿþÿÿÿÿþþüýÿþüþýÿÿÿÿÿÿÿþüÿþùÿþõÿþðÿýêÿýæúôÜÿúàçàƏˆnUN2WM2TK.SJ+WN1WN1UN1UN1TO2TO2TN4SO4QM4RN5QO6PP6MO7KO6HM6GL5JP6FO4AJ-=F)@G(DK*FM+EL*PV4PS4KN/IL/TT8bbFccIZZ@[Y@\ZA][B][B[Y@XV=US:SQ8HF1GD1EB1B@1A?2>>2??5>@5;>59?59?59A69A48C58C59C8:B79A69@88@58?79A69@8:B7:A9:B7:A9:B7;B:<D9=D<>E=8B:8B:8B98@56>14</39/271.40/51070.8-.;)0@&6F)7M';V+:W+;U0;T4;R8=P<>O<>O<:L6@R8EW=FY=DX=@T;5H2+<*,6+-4,+2*ZfN[gO^hP_iQbjSckTglXhkXhiWghVifWifWkeWlfXmeZmgYmgYkhYkg[kg\je_jf]hfZfgWhiWglVfo\erajwnzŠ‰œ«²ºÈÑÐÙàãèìò÷ûøþþùþúúÿøûþõùýïøùë÷øèõôâíìÚéæÓåâÏÜ×ÄÒ˹ʿ­Á´¤¹­¡¼¯¦Á¶²ËÁÀÛÐÖæßæûóþý÷ÿþûÿÿýÿþýÿýüÿüýÿüýÿþþþþþþþþþþþþþþþþþþÿþüÿþüÿþüÿþüÿýúÿüùÿüùÿýúÿýùÿþúÿþýÿýüÿüýÿýþÿþÿÿþÿÿýþýûüÿþýÿþûÿüúÿøóøíçòåÝöèßûðêÿúýÿüÿÿýÿÿÿýÿÿýþþüýÿþüýÿþýÿþýÿÿýÿÿþüÿý÷ÿÿðÿþèÿÿäúöÝýûâäàNj‡lRM0SN1RK.QK+UO/UO/TO1TO1SO2SO2QP4QO6QM2PO3PN5NQ6MO7JP6HM6EM5HQ6FO4BK.?H)CH(GM+JP,KQ-OS0PT1VZ9^aB_bCZ]@VV<UU=VV>WU>XV=YW>YW>XW;WV:XT9QK3PJ4LG4ID1DA2B@3A?3>@5<=59?59?59A67B48C57C57C5:B7:B79A68@58@59A6:B7:B7:B7:B7:B7:B7;C8<D9=E:<F;=G?<H><F;;F6<D5:B39?38=67>76=63=52>04C,9L,BY/Ga1Qo;Mm;Li=Fa>BY?<R=<M=:K9=O9DV<K^BNaCL`DH\A:Q71D0+5*+2*)0(ZfN[gO\hP`jRbjSdlUglXilYijXijXkhYkhYmgYmgYog\nhZmgYkhYkg[jf[je_jf]hfZfgWhiWejTdmZgtco|sŽ¬³¶ÄÍÍÖÝÞæéñöúøþþúþýùþøøûòôøêóôæðñáêé×áàÎÜÙÆ×ÔÁÐ˸ÇÀ®¿³£½° ¼°¤Ã¶®ËÀ¾ÕËÌãØÞëäëüôÿý÷ÿþûÿÿýÿþýÿýüÿüýÿýÿþþþþþþþþþþþþþþþþþþþÿþüÿþüÿþüÿþüÿýúÿýúÿüùÿüùÿüøÿüøÿýüÿüûÿûüÿüýÿþÿÿþÿÿýþþüýÿþýÿþûÿýûüóîêßÙâÕÍëÝÔ÷ìæÿúýÿüÿÿýÿÿÿýÿÿýþþüýÿþüýÿþýÿþýÿÿþÿÿÿýÿþõÿþíÿýçÿþãû÷ÞþüããßƉ…jPK.RM0QJ-QJ-UO/UO/UP2UP2TP3SO2QP4PO3QM2ON2PN5MP5MO7JP6IN7GO7HQ6FO4CL/CL-GL,KQ/OU1SY5QY2X`9jpLy]qvVY^@LO4MP5QQ9QQ9RP9SQ8TR9VT;YU<ZV=XR<VP:RM:NI6HE6DB5CA5?A6<=59?5:@69A67B48C57C57C5:B7:B79A69A69A69A6:B7:B7:B7:B7:B7:B7;C8<D9>F;<F;?IA>J@?I>?J:@H9>F7>D8=B;>E><C<7A95A39H1BU5Pg=XrB`~J\|JXuINiFD[A9O:6G74E3<N8EW=NaESfHSgKNbG@W=6I5/9./6.-4,ZfN[gO\hP^jRblTemVhmYinZklZklZlj[liZnhZnhZph]oi[liZkhYkg[jf[id^ie\hfZghXfgUbgQajWerao|s~Ž–¥¬«¹ÂÇÐ×Úáçîó÷öüüøüûõúôóöíîòäéêÜãäÔÜÛÉÓÒÀÎ˸ÉƳþ«½¶¤Ã·§Ç¹¬ÍÁµÖÉÁßÔÒèÞßóèîøñøýõÿþøÿÿüÿÿüÿýüÿýýÿüþýýÿþþþþþþþþþþþþþþþþþþþÿþüÿþüþýûÿþüÿýúÿýúÿýúÿüùÿû÷ÿû÷ÿýüÿüûþúûÿûüÿýþÿþÿÿýþþüýÿüûÿþûÿýûýôïíâÜæÙÑðâÙüñëÿúýÿüÿÿýþÿÿýÿÿûþÿúýÿþüýÿþýÿþýÿÿþÿÿÿýÿþõÿþíÿüæÿüãþùãÿþçäßɊ†mPJ0RM0SL/SL/UO/UO/UP2VQ3TP3SO2PO3ON2PL1ON2OM4LO4MO7KQ7JO8HP8JS8GP5GP3IR3NS3PV4W]9\d=grHtU‡’jšu‡ŽlnuTZaBSW<MO7NN6MM5NL5OM6RP9WR<YT>ZT>XR<TO<QL9KH9GE8EC7AC8=>6:@6:@69A67B47B46B46B4;C8:B7:B79A69A6:B7:B7;C8;C8:B7:B7;C8<D9=E:>F;=G<=G?=I?>H=>I9?G8>F7>D8=B;=D=;B;6@85A3:I2DW7Ri?[uE\zFXxFTqEIdA>U;2H3/@0.?-6H2@R8L_CReGSgKOcH@W=6I51;007/-4,YgNZhO\hP^jRblTemVinZjo[mn\lm[mk\mj[oi[oi[oi]oi[liZkhYjfZieZid^ie\ig[ghXghVchRajWerao|s{‹ŠŸ¦¢°¹ÁËÔÔÝäéñôòúüôúúñöòíðçèìÞâãÕÚÛËÒÑ¿ÌɸÉıþ«¿¸¦¼³¢Ê¾®ÓŸÜÐÄæÙÑíâàöìíÿôúÿùÿÿ÷ÿÿúÿÿüÿÿüÿýüÿüüþüþýþÿÿþþþþþþþþþþþþþþþþþþÿþüÿþüþýûþýûÿýúÿýúÿýúÿüùÿúöþùõÿýüÿûúýùúþúûþüýÿýþÿþÿÿýþÿüûÿýúÿüúÿøó÷ìæòåÝøêáÿôîÿúýÿüÿÿýþÿÿýÿÿûþÿúýÿþüýÿýüÿþýÿÿþÿÿÿýÿþõÿýìÿûåþúáÿüæÿÿéçâ̌ˆoRL2TO2TM0UN1UO/UO/VQ3VQ3UQ4SO2ON2NM1PL1NM1NL3KN3LN6KQ7LQ:JR:LU:IR7JS6OX9TY9U[9]c?fnG‚’c’¦sŸ°ƒ›ª” zˆ”pr{\\bFLP7LN6LK6LJ5LJ5QL8TM:VP:WQ;VP:SN;QL9MJ;IG:GE9CE:=>6:@6:@69A67B47B46B46B4;C8;C8:B7:B7:B7:B7;C8;C8;C8;C8;C8;C8<D9=E:>F;=G<=G?<H>=G<=H8?G8=E6=C7=B;:A:9@96@85A39H1BU5Ne;Vp@Vt@SsAPmAE`=9P6-C.+<,+<*1C-<N4H[?PcERfJNbG@W=5H4,6++2*'.&
\ No newline at end of file
diff --git a/testimages/testorig12.jpg b/testimages/testorig12.jpg
new file mode 100644
index 0000000..861aff9
--- /dev/null
+++ b/testimages/testorig12.jpg
Binary files differ
diff --git a/testimages/vgl_5674_0098.bmp b/testimages/vgl_5674_0098.bmp
new file mode 100644
index 0000000..d74ea64
--- /dev/null
+++ b/testimages/vgl_5674_0098.bmp
Binary files differ
diff --git a/testimages/vgl_6434_0018a.bmp b/testimages/vgl_6434_0018a.bmp
new file mode 100644
index 0000000..25e77b7
--- /dev/null
+++ b/testimages/vgl_6434_0018a.bmp
Binary files differ
diff --git a/testimages/vgl_6548_0026a.bmp b/testimages/vgl_6548_0026a.bmp
new file mode 100644
index 0000000..41e35b5
--- /dev/null
+++ b/testimages/vgl_6548_0026a.bmp
Binary files differ
diff --git a/testimg.bmp b/testimg.bmp
deleted file mode 100644
index 8603d15..0000000
--- a/testimg.bmp
+++ /dev/null
Binary files differ
diff --git a/testimg.jpg b/testimg.jpg
deleted file mode 100644
index b34ca5d..0000000
--- a/testimg.jpg
+++ /dev/null
Binary files differ
diff --git a/testimg.ppm b/testimg.ppm
deleted file mode 100644
index 9d81ce2..0000000
--- a/testimg.ppm
+++ /dev/null
@@ -1,4 +0,0 @@
-P6
-227 149
-255
-0/-0/-10.21/51.51.62/62/83/83/:3-:3-:3-:3-:3-:3-:2/91.91.80-80-91.91.:2/80-80-80-80-80-80-80-80-6.+6.+6.+5-*5-*4,)4,)4,)4,)4,)4,)4,)4,)4,)4,)2-)/*$/,%/,%0-&1.'2/(30)30)63,63,74-85.85.96/:70:7.A:0B<0D>2F@4IA4JB5KC6KC6MD5MD5OC3NB2OC3OC3PD4RE5R?1Y?2b@4nB5}E6‹H8™G9£F7¯G:¸G9¾E:ÅG;ÇG>ÊG?ËH@ÐE@çFLíCLëDKëEIîCIïBDñ>Bô=Aø;A÷:@ô:?ð<?é?@â@>×?<ËA7»=/µ@.µ@.´?-´?-³@-²?-¯@-­@,ªA,¦A-¢B,Ÿ@*›A)˜@*–A,”>-’?/’?/‘>.‘>,=+’<+’<+”?+”?+”=*”=*”=*•>+–?,–@/–?6•>5—=2Ÿ?1©B3³D3¼D4¿D4¹?0µA2¬F8žH;‡H9oA2T8*C3&=5295495473271160050-50-72/72/72/61.61-50,50,41,//-.0-//-//-0/-0/-2.-2.-5,-4+,4*+3)*7(+=.1E69P:<jBC|IHMM•OOŸW[ªdnªoƒŸt”{£‡®€†º~ˆ½sy­`a‘TKvPDhSJgOG^MH^TQbfdo|}‚‘™ž˜£©Ÿ¢¨šž “’{|lfgUXWEQNEUR[UQbUQb0/-0/-10.10.40-51.62/62/83/83/:3-:3-:3-:3-:3-:3-91.91.80-80-80-80-91.91.80-80-80-80-80-80-80-80-6.+6.+5-*5-*5-*4,)4,)4,)5-*5-*5-*5-*5-*5-*5-*3.*0+%0-&0-&1.'2/(30)41*41*63,63,74-74-85.96/:70:7.@9/A;/C=1E?3H@3IA4JB5JB5LC4LC4MA1MA1MA1NB2OC3QD4P>0U?1^A3jC4xD6„E4’E5œC3§C4¯A4µA4¼B7ÀD:ÄE<ÅF=ÍC@áEIçBIèCIêDHíDGïBDó@Cö?Cø;A÷:@ô:?ð<?é?@àA>Õ@<Ê@6¹>/µ@.´?-´?-´?-²?,°?-¯@-­@,©@+¦A-¡A+Ÿ@*›A)˜@*–A,”>-’?/‘>.‘>.‘>,=+’<+’<+”?+“>*”=*”=*”=*•>+–?,–@/”@5•>5˜>3 >1«A3µD4½C4¿D5»A2·C6¬F8œI;…G:l@3S9*B4)>63:6595484382271161.61.72/72/72/61.61-50,50,41,//-.0-//-//-0/-0/-2.-2.-3--5,-4*+3)*5)+<-0C47N8:d>=vEA†JINLšTV¤aj¥l}rŽ‘{¢†€®…¹{„»ou©[[QHuOCiOFeOG_PH_RN_[Yfnotƒ‡ˆ”™•™ž—š ”™‘ƒ~ojkY][LVSJXSZVRaXQa/.,/.,0/-10.40-40-51.51.72.72.72.72.92,92,92,92,91.80.7/-7/-7/-7/-80.91/80.80.80.80.80.80.80.80.6.,5-+5-+5-+4,*4,*4,*4,*5-+5-+5-+5-+5-+5-+5-+3.*2-'1.'2/(30)30)41*41*52+63,63,63,74-85.96/96/:7.?8.@:.B<0D>2G?4H@5H@3H@3I@1I@1K?1K?1K?/L@0MA1NB2MA1QA1YB2dC2qC3|C2‡B2’A0˜<- :+§;.¯=2µ@6ºD:¿F=ÅD>ÙCEá@FãBGçBFêDFðCEôADø?Dú;@ù:?õ;@ð=@è@@ÜA=Ñ@;Æ@5·=.³@-³@-²?,²?-°?-¯>,­@,ª?-§@-¥@,¡A+A,š?*˜@*•@+”>-‘>.‘>.‘>.=+=+=+=+‘>,‘>,’<+’<+“=,“=,”?+•?.•A6–?5š>3£>2¯A4¹C5¿D5ÁC5ÀD8¸F;®I=™J=G;h@4Q:,B5,?74=77<66:4494183072/72/62/62/62/51.52-41,41,21,.0-,1-.0-.0-//-//-0/-2.-5//4..5,-4*+4*+9-/>24I56[97l?9|E@†IDOM˜[`›fv”mˆŒwžƒ}­}‚¹u~·fm¤TV‰MEvLAkMAeOFcQHcMH^NK\[[eqty…‰ˆ‡Œ†Šˆ…†Š|xzlfiXZ[MVSLZU[ZT`[S`.-+/.,/.,0/-3/,40-40-40-61-61-61-61-81+81+81+81+7/-7/-6.,6.,6.,6.,7/-7/-80.80.80.80.80.80.80.80.5-+5-+5-+4,*4,*4,*3+)3+)6.,6.,6.,6.,6.,6.,6.,4/,30+30)30)41*41*52+52+52+52+52+63,74-85.85.96/96->7-?9-@:.B<0E=2E=2F>1F>1G=1G>/H<.I=/I=/J>.L@0JA0KD2NE4UD4^D3iD2sB1~A/†?-Œ9)”9'9*£<-¬@3³E8¸H<ÁF>ÒDCÚACÞBCâDCçCDìBCó@C÷?Aú;@ù:?õ;@î>@åA@ÚB=Í@9Â@3µ=-°@,°@,°@,¯>,®?,®?,¬?+©@-¦?,£@+ @*œ@+˜@*–@)”?*‘>,‘>.‘>.=-=+=+<*<*=+=+<*<*’<+‘>,”>-’?-•A6–?5œ>2¦@4²B6¼C8ÁC7ÂB7ÂF<ºJ?¬L@—K>|F:b@4L:.A7-@85>96=77<74:5294183083062/62/62/32.52-21,21,12--2.-2./1./1.00.00.10.10.5106005//5,-4+,6,-:01D22T71c;3qA7{E;‚HD‰RU_l‹i‚ƒs˜}y«x}µowµ`f¢QR‹LEyL@pL@hPEgQFfLC^GBVMLZ^^fjnquyxx}wz€vwzokoa`bTWYLTTL]WY]V]]V^------.-+/.,0/-10.3/,40-3/,3/,4/+4/+4/+4/+6/)6/)4/,4/,3.+3.+3.+3.+4/,4/,50-50-50-50-50-50-50-50-3.+3.+2-*2-*2-*1,)1,)1,)4/,4/,4/,4/,4/,4/,4/,4/,41,41,41,41,41,52-52-52-52-52-63.63.74/85096196/<5-=6,?8.@9/B:/C;0C;0C;.D:.D:.F:.G;-H<.I=/J>0I@1JG6MH5RG5YF5bE3jD1uB/}>,‚;)‹:)“:*š=,£B2¬F8²J=»I?ÌGBÔDCØDBÝEBâBBéAAð=@ô<>ù:?ø:<ô<>í?>áB>ÓC:ÅA5¹?0²?-®?,®?,®?,­>-¬>-¬>-ª?-¨>.¤?- ?,ž?+š?,—?+•>*“>*‘>,?.>->->-Ž=,Ž=,Ž=,Ž=,Ž=,Ž=,Ž=,<,>-‘>.?.”B4—A4@1¨@3¶A7¿C9ÅB8ÄA7¾C;·H?¦LCJ@tE;Z>2E9-<5+@93@85?75>63=52<4194083/62/43/43/23.32.12-12-02--2.,2.-2.-2./1./1.00.10.3205105104..2,,4+,7./=/.N5.Y9.e=1n@3sB;yKK€Zeƒg€p—zxªu{·ks´_d¦TT”OGƒLBwNAmNBhMAeJA`GBYGFXKKWMPU]cc`fbbia`f\Z`TW[MUXMXXP^YV`WX`WZ,,,,,,.-+.-+/.,0/-3/,3/,2.+2.+3.*3.*3.*3.*5.(5-*3.+3.+2-*1,)1,)2-*3.+3.+3.+3.+3.+3.+3.+3.+3.+3.+2-*2-*2-*2-*1,)1,)1,)0+(3.+3.+3.+3.+3.+3.+3.+3.+41,41,41,41,41,41,41,41,41,52-52-63.74/85085085.;4,<5+=6,>7-@7.A9.A9.A9.C9/C9-E9-F:.G;/H<.J>0HA1JG6IH6NG5VF6\E3dC2n@0v>-{<+‚;)Œ;*”=,œ@1£F5ªJ:´J=ÄH@ÌEAÑFAÖE@ÞCAä@>ì>?ñ:<÷;<ô:;ð<=é@=ÜC=ÍC8¾@2±>,®?,«@,ª?+ª?+ª?-©>,©>,¨?,¥>-¢?, ?,›>,—?+•>*“>)‘?*>+>->->-Ž=,Ž=,Ž=,<+Ž=,‹<+<+‹<+‹<-Œ=,>/Ž?.”B4—A2 @2¬B5¹C9ÂC:ÅB:ÂA;ºA9±I@£NGNEoG=S?4A;/96-@93A75?74>63<4194083/74/43/43/34/23.23.02-02-.3--3/-3/.3/.3/02/02/11/11/21/32032040/2.-1-,4..8.,G4-O4)X8+`<0e?6mGFyYd‚k…€uŸ||²w|¼nu»dh¯[[¡RMLB~OArL@hI=cH>`HB^ECX@BO;@FBGJDMJJQJJQIIQFKQEOUIVWO^YS`YS`XU++++++,,,---/.,/.,0/-0/-1-*1-*1-*1-*2-)2-)2-)2-)2-*2,,1++1++1++1++2,,2,,1++1++1++1++1++1++1++1++2,,2,,2,,1++1++1++0**0**3--3--3--3--3--3--3--3.+41,41,41,30+30+30+30+30+41,41,52-63.74/74/85085.:3+;4,<5-=6.?6/?6-?6-?7,B8.B8.E8/E9-G;/H<0J>2H@3HE6GF4KE5QD4XC2_B2f?.n=,v=,|:*…9)Œ;*“=.›B2¢F7¬F8»F<ÂF>ÉF>ÐE>ÙD@âC?ê@@ð>>ò::ñ;:ì<<äA<ÖC;ÆD6µ@/ª=)ª?-©@-©@-¨?,¨>.¨>.§=-¥>-£=. ?.ž?-š?-–?,“>*‘?*>)>+>->-Œ=,Œ=.Œ=.‹<-‹<-‹<-Š=-Š;,‰<,Š</‹>.‹=0Œ?/’C2˜B1¡A1®B6¼C:ÂC<ÄC=ÀC=¹FA²QJ¥XRXQsRIWI>CC7<?6>93@72>71=60:5/94.83/63.43.43.34/13.13..3-.3-.3--3/-3/-3/-3/.3/.3/02/02/00.11/22021/0/-/.,2.-4/,?0+D0)K3)T8-Z<4dFFu]jƒs‰€«…„¾~ƒÇtzÆmp½ce¯VSšLC‚K?qI<gG;cE>_FB]DBW?AN;?H:BE>HGDMHGQIGQHJRGNVKUXM^ZOaYNaXO++++++,,,,,,.,-/.,0/-0/-1-*1-*1-*1-*2-)2-)2-)2-*2,,1++1++0**0**1++1++2,,0**0**0**0**0**0**0**0**2,,2,,2,,1++1++0**0**0**2,,2,,2,,2,,2,,2,,2,,2,,3/,30+30+30+30+30+30+30+41,41,52-63.63.74/85085092,:3+;4,<5->5.>5.>5.>5,B8/B8.E8/E8/G:1I=1J>2I?3FC4FC4JB5OA4TA2\@2b>0j<-q<.w9*}8)…8(Œ:,•=/›B4¤B5²F:ºE;ÁF>ÊG?ÔG@ÞFAçCAîB@í;;ë;;ç>;ßB;ÑD:¿D4¯@-¤>(¦A-¦A-¥@,¥@.¥@.¤?-¤?-¤>/¢>.Ÿ@.œ?.˜?-•>+‘?*>)>+>->->-Œ=,Œ=.‹<-‹<-Š=-Š=-ˆ<.ˆ<.ˆ<.ˆ</‡>/ˆ>1‹?1‘D2–C1¢B4­C6ºC;ÁD>ÁD>»EA¹PL²[T¥e\‘f]u_T[UIGMACI?<92?82>71;6094.74-63.43.43.34.24/13./4..3-.3-.3--3/-3/-3/-3/.3/.3/02/02///-00.22022010.0/-0/-3/,8,,;,)C0*K70S<6^IHtbn‡z”Š¶ŒÆ„ˆÏz€ÌrxÆik¶WWŸID„E=nG<dD<aC>^CAY@CV@DP>EKGQRKWUQ^WU`XS_UR^TT^SY_S^[LaZJaZJ,-/,-/,-/,.-------.-+.-+/.,/.,1-*0,)0,)0,)/+(/+(/+*/+*/+*/+*/+*/+*/+*0,+/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*0,+0,+1-,1-,2.-1-,1-,1-,1-,1-,1-,1-,1-,0,)1-*2.+3/,3/,3/,3/,3/,3/,3/,3/,3/,40-51.62/74/80-92,:3-;4.=4/>5.>5.>5.?4.?5,B5-C6.D7/F90G:1F<2G?4H@5J@6P?5T>3X<1^90b6+m9.t8-|8+ƒ9,;/“=0˜?1ž>0§A3­A4µC8¾E:ÊG=ÔG>ÞE?åC@è@?êBAæDAÚE>ÈD8·B1ªA.¢B,¢A.¡@-¢?,¢>.¡=-¡=-¢>. ?/œ<,š=,˜<-•>-“=,=+Ž=*Œ>*‹<+‹<+Š=-‰<,‰<,‰<,ˆ:-‡;-‰=/†</†</‡=0ˆ>3ˆ@2‰A5‹A4‘E5—D4£E9±I>ºG@»D>»EA¸MG´ZR¯f_£qf‘sh~rdjj^V^SIQFLLBJF=B>5<8/95,74+63,33+43.34.14-14-02-/1,,1+,1+-2.-2.-2.-2./1./1./1./1.02/02/11/11/11/11/11/40/4+0;/3A32C4/J;6]OOymy‹…Ÿ“»”–ϐ•Ùƒ‰ÓtzÆjn·_b§Z[”LItHBdA>]>>X?BUAIVLU\U`bbqnn}xv†|rulyoguh_k_W_P]\Hb\Fc]G,-/,-/,-/,-/------.,-.-+/.,.-+0,)0,)/+(/+(/+(/+(.*).*).*).*)/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*/+*0,+0,+0,+1-,1-,1-,1-,1-,1-,1-,1-,1-,1-,0,+0,)1-*2.+3/,3/,2.+2.+2.+2.+2.+2.+3/,40-51.62/80.91.:2/;4.=4/>50>50=4-?4.?4.A4.B5-C6.E80G:2H;3H>5H>5L=6O>6Q=4V;2Z90_7/h70p7.w7-9-‰9.<1–<1›=1¢@3§A3¬B5´D8¾E:ÉF<ÔE=ÛD=ßD@àE@ÞFAÔG>ÄF8²C2¥B-žB-ŸC.žB-Ÿ@.ž?-ž?-ž?-ž>.ž>.š=,™>,–=-”=,=+>+Œ>*Œ=,‹<+Š=+‰<,‰<,‰<,‡;+‡;-…<-†</…=/…=1ƒ=1„>2†@6‡A7ˆB6“G9—E7£G<¯J@¸IB¸GA·LF³TN±d\ªqfŸ~oo|mmseZfZNXMLNAKI=EC7@=4=:188.44,11)23-23-03,/2+/1,.0+.0+.0+/1.-2./1./1./1./1./1./1.02/02/11/11/11/11/11/2015+49-7<23?53H?:^VTxr|Šˆž““¹”—ʐ–Ô„‹ÏyÂqy¸kt­hnž\_€XZqSUjRWjT^hZgmfvvr‚tˆ~’‡ƒ•‰~Žv†yr€qfteZeT[ZE`Z@b\D,-/,-/,-/,-/,-/,.-------.-+.-+.-+-,*/+(.*'.*'.*',+),*+,*+,*++)*+)*+)*+)*-+,-+,-+,-+,-+,-+,-+,-+,,*+,*+-+,-+,-+,.,-.,-.,-.,-.,-.,-/-./-./-./-./.,0,+0,+1-,2.-2.-2.-2.-1-,1-,1-,1-,1-,2.-3/.40/51.80.91.:2/;30=31=4/=4/=4/?40?4.A4.A4.C60D71F93G:4H;5J;6K<7N=6P;6S:5V72[6.c60k6.t5,}7/‡9/;0”<0–<1?3 @4¢@3¨@3±C6ºD8ÅE:ÍD:ÕF@×H@ÔIBÌI?¾E:®C3¡B.œA.B/œA.œ?.›>-›>-›>-›>/›?0˜<-–=-”<.“=.>-Œ=,‹<+Š=+‰<*‰<*‰<,‡;+‡;-…<-„:-ƒ;-„<0‚<0‚<2‚>3ƒ?4…A8‡C:ˆD9”J=—H; H>¬KD²KF³LG²SM®`V­sg¦qŒz‘Ž{‚‰ws€ocqbXcUNRDMN@HI;DD8@@49;.46+/1&01)01)/0*/0*./*./*//-//-//-.0-//-//-//-//-//-//-00.00.00.00.00.00.00.1/26+97+98/4;63HE>_^Yzz|‹š”±’—¿•Ç„ŽÃ}‰»{‰¶|‹²}Œ«}ˆšwƒq~‡n}‚n~~o‚~yŽ…ƒ™‹¡”‘¨˜¥”ˆŠƒ–ƒ|{j{i\hTXX@]Y<_[@-.0-.0-.0-.0-.0-.0.......-+.-+-,*-,*.*'.*'.*'.*),*++)*+)*+)*+)**()*()*(),*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+-+,-+,-+,-+,.,-.,-.,-.,-/-./-./-./+*0,+1-,1-,1-,1-,0,+0,+0,+0,+0,+0,+1-,2.-3/.40/91/:20;31<41=31=31=31=4/>3/>3/@2/@3-A4.C60D71E82F93H94I:5J;6L:6N94Q83T50^72e60o6/x8/‚90Š:/<1”>1™?4›?4›?2Ÿ?1¥A2®B5¸D7¿E:ÇG>ËH@ÊJAÃI>¶F:ªB5žA0™@.™A-™A-˜?-—>,—>,™>,™=.—>.•<,”=,“=.=-Ž=,Š=+Š=-‰<,ˆ;)ˆ;)‡;+‡;+„;,„;,ƒ;-€;,;/€<1€<1>5ƒ@7ƒC:‡D<ˆE<”L@–H<žG>§JC®LI®QL­]Vªj^§€o¡yšš‚›ƒ†•€z‹xm{jbm]SZJQUFKO@EI:@D6;=057,13(01)/0(./).-(.-).-)/.,0/-/.,/.,/.,/.,/.,..,/.,..,0/-//-0/-//-//-//-//-2-17,:6*83-1961HJ?bfX{€z‹““£–°Œ•¶ƒ²¯…™²¤¶’¨³“¨«¤£Œ ž‡ž˜‚™}˜‰œ‹ˆ£”°š˜±›”­—‹£…›„’|k|iXfOSV;ZV9^Z=+/2+/2+/2+/2-.0-.0......------.-+-,*-,*,+),+),+),*+,*+,*++)*+)**()*()*(),*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*+,*++)*+)*+)*,*+-+,-+,.,-.,-/-./-./-./+*0,+0,+0,+0,+0,+/+*.*)/+*/+*/+*/+*0,+1-,3/.40/901:20;31<42=32=31<20<20=20=2.?1.?1.@2/A30B5/C60D63C84D95E:6G96H94K84N50X72_60i70r7/}:1†<1Œ>2>2—@6—?5—?5—?3œ@3£C5«C6³E8ºE;½G=¾H>¹G=°D8¦A5›?0—@/—@-—@-–?.•>-”=,•<,–=/–=/•<.“;-’</Ž<.‹<-‰<,‰<,ˆ<,‡;+†:*†:*†:*ƒ:+ƒ:+‚:,€:.€:0€<1€=4?6‚B9ƒD=‡F@ˆH?‘KA’H=šG?£LE¨PL¨XQ©f]¦vh¢Œw›™€•¤‡Œ£‡†ƒ~‘}t‚qjue\eRV_LNUCEL:?F6<@27:/68-01)00(.-(/,'/+(/+*1-,2.-1-,1-,1-,1-,1-,/.,1-,/.,2.-0/-2.-0/-0/-0/-0/-2-18,:5)51++66,GL8`hQzƒpˆ’‡—–š ™¥†–£‡›¢ª©›¹± À³˜·¨š¶§›·¨˜·¥‘±œ‰«’‰«±”’³–•´•­ˆ¢…„›{’xgzdTbIQT9VU9XW;,03,03,03,03./1./1//////....../.,.-+.-+.-+-,*-,*-+,-+,-+,,*++)**()*()*()+)*+)*+)*+)*+)*+)*+)*+)*,*+,*++)*+)*+)**()*()*(),*+,*+,*+-+,.,-.,-/-./-./+*/+*0,+0,+/+*/+*.*)-)(0,+0,+/+*0,+1-,2.-3/.40/:12:12;31<42=32<21<20;1/=20<1/>0->0-?1.@2/A4.?4.@51@72@93A:4B94C84F74H5/Q51X5/a6/l8-u9.€</†>0‹=1“?5•>5“?5“?4•B4šB4 C4¥D4¬B5°D8´E:±E;ªB7¡?4š>1–>0•?.”?+“=,“=,“<+“<+”<.”<.“;-’<-<.;-Š;,‰<,‡;+‡;+†:*†:*…9)ƒ:)ƒ:+9*9-9-€<1<3?6€A8‚C<…F?ˆIBŠICŽJAG>—HCŸNJ§VR§`Z©pe¤ƒrœ”}–¡ƒ¨Šˆ§ˆƒ „~–~yˆup{jdp\]iSR^JJS@BK:>E5:@29<134,22*1.)/,'/))0**1++2,,1++1++1++1++1++0,+0,+0,+1-,1-,1-,1-,/.,/.,/.,2,.8*75(13+(56&EK1\gGu‚d†“yŽ›Š‘ž”Ÿš‹ž˜¥——´ž¢Å¥©Î­¥É­¦Ê°§Ë±¤È¬—¾Ÿ‹³‘ˆ°²Ž²Ž²¬Š„¡‚€™{wŽrdx]Q_ENR7QQ5SR6,03,03,03,03./1./1./1./1/////////////.,/.,.-+.-+/-..,-.,--+,,*++)*+)**()+)*+)*+)*+)*+)*+)*+)*+)*+)*+)*+)**()*()*())'()'(+)*+)*,*+-+,.,-.,-/-./-./+*/+*/+*/+*/+*.*)-)(,('0,+0,+0,+0,+1-,2.-40/40/:12;23;23<34=32<21<21;10<1/<1/>0/=/.>0->0-?1.>3/=60;60;83<94=:5>93@72C60G4.O4+Y4+d5+n8,x:-;.…;.<4‘<5>3@3A2“B1—A2™@0 >1¦@4ªB7ªB9¥@6Ÿ>5—=2•?2’?-’?-‘>,=+‘;*‘;*’<-’<-‘;.‘;.Ž;-;-Š;.ˆ:-†:,„;,…9)…9)ƒ:)‚9(‚9*9*~8,~8,€<1€=4€@7B9„E>…IAˆKFŒMFŒIAŽH@˜JF¡SO¨]X©hb©{n¤Žy™œ’§†‰¬‹‚ªˆ€¡„™€~‹wxnjxadr[ZhQQ]IITCCL;>D6<?467/44,30+0+'1()1()2)*4+,2)*2)*2)*2)*2)*0**0**0**1++1++0,+0,+0,+0,+0,+1+-6)25)-4-%46!AH&Wc;q€W‚’mœ} †¢Œ‹¡Š¨‰•¸Ž È”¤Ð©Ó«¨Ò®§Ñ­¡Ë¥“½—…¯‡‚¬„‡°†ˆ®…Œ±ˆ‹«†Ÿ{{”tqˆk_sWM]BIO3JM0KK/./1,01./1./1.0/.0/.0////00.00.00.0/-0/-0/+/.,1-,2.-1-,1-,0,+/+,,*++)*+)*+)*+)*+)*+)*+)*+)*+),+)*+),+)*-(,*(),())'()'((&'*()+)*,*+,+)-+,.-+/.,/.,-,*-,*/+*/+*.*)-)(,(',('1-,2-*2-*2-*3.+4/,50-61.;31;31<42<42=31<20;1/;1/<1/<1/=/.=/.=/,>0-?1.=2.=4-=6.;819919:29:2;81?6/C2+J1,T2)^3*g7+o9-u=.z</‡=2‹=3‹?2Œ@2Ž@3@1’?1•=/˜</œ>2¢B6¡C7žB7™?4–>2”>1”>/“=.=-<,Ž=,Œ=,Œ=.>/Ž<.Ž<.‹</Š;.ˆ:-ˆ:-†:,„;,‚:+‚:+:*~;*~;+|:*}9,|:.€>2>5€@6‚C:ƒG=…JB‡NE‹OGŒICŽID“PJ˜\Tžg`uiž‡už˜€”¡…ª‰‰¬‹ƒ¨‡€¡„~›zytˆoi}bby]\pUUgOO\HGRAAH8=A388.85.7/,3+)2()2()3)*4*+0*,/+*0*,0**0*,0*,2),2),3*-1+-1+-1+-0,-0,-0,-1+/4)/4*+4-%46!?F%T`8m|Qg‰™tžz¡~ˆ¡zŠ§{‘¶‚›ÄˆŸËŸÉ™žÇÆš˜Á•Œ·Š‚­€…®‚Œµ‰„«‰®…‰©‚~œxvolƒfZnSJZ?GM3FJ1DF./0+.0+/0+01,01,01,12-21-32.43/43/62/51.41,3/,4/,50-50-4/,3.+2,,1-,0,+0,+.,-.,---/--/,,.++-*).))+.)/.)-/(/.)-/)-.)-.*+.*+/+*/+*-,*.-)--+-.)./*./*------.,-/-./.,0/-2.+2.+2-*4,)5-*6.+8/*:1,:1*;2+=4-=4->50>50>50=4/<3.;2-=2.<1-<1/;0.=/.>0/>0/@1.A0)@2);4*77-39-28.56.:3-?-)F*'L)'S*&Z/(`5,d<0k@0yA0@/†A1‹B3@3“=0“;/’8-“;/“=.”A1•B2”C2’A0‘>.‘;,—;0—:2’;1Œ=0ˆ?0„?/‚?.ƒ>.ˆ@1ˆ?0Š>1Š<0‰;/ˆ:.†:-ƒ;-{9+~@3w<.q7)w>-w>-v;-}?2{;/‚@4‡E9‰I?ˆLA†MB„PC‡NE‘KI—SP”bY‘paŒ}j‰‰q‰–|Šž‚¤‡¦‰Ž§Š§Œ‡¤ˆ~Ÿ‚uš{o—uiib„_[zXZsUZnSWeNPWEJK=C?6@93;0.6**3')3'+3'+1&*,*-)+***,*(),'+.(,1(-2'-3(.3(.3(02(00)10)1/*1/*0/)-1++0-(//#<?*V\@pzX‹gœu‹sŒ¡vŒ¦wŠ¨v‰ªuŽ´{™¿†ŸÁ¾–ºŠ³ƒˆ®}…«|ƒª{…ª~ƒ¨}ƒ¦~€ž|y”ur‰mh|cYkUMZFIQ<CH4?B//0*01+01+01+12,12,21,32-43.43.74/74/63.52-50,50,7/,7/,6.,6.,3.+2-*0,+0,+0,-/-./-0--/-,1+*/)*.)(-.)//(//(//(//)-/)-.*+.*+/+*/+*.-).-)./*./*./)./*.....0....../.,/.,1-*3.+5-*5.(7.)8/(:/);0*<2)=3*>5,>5,>5.>5.>5.=4-<3.<3.=2.<1-;0.;0.=/.=/.>0/@1.C0*C0)A2+>4+:6-95,:3-<1-?-+D*)I*(N+'T/)Z5-_:1e>/pA/xA-€A0ˆ@2?4‘=3“:2“;19.‹</‹>.Œ@0A1A1ŽA/‘?1•;2•;2<2‹=0†>0ƒ@/?/‚?.…@1†>/‰=0ˆ<.‡;.„;,‚:,€;,x8,x<1s9-n9+s>.s>.r8*u:,}=1ƒA5‰E:‹I=‰K@†NA„PC‡NE‘JH—SR’f]Œwf„†p€“wž„¥†ˆ§ˆ§Š§§Š¤‰Ÿƒwœ}o˜ve’i`‹`Y‚ZY{X\wXZnSSaJNUCFH;C@7<737/-3*+2)*1(+.(*,*+**,+),+),-(,/)-2(02(02(02(02(10)1/*1-*3-*3-*12-12.-0,)--%8:,SXBox]€‹i‡–o†™l†žnˆ£p†¤p„¥pŠ­w“¶€™¹‡–¶„“³Œ®{†«x‚¨w‚¨yƒ¨|ƒ¦|‚¤{x’uqˆnh{eYjWMZHEL<@D5;=/12,12,12,12,23-23-43.43.54/54/85085085074/72.61-80-80-7/,6.+4/,3.+2,,1-,1-./-.0.1/-0--/,,.+*/+)./(/1'//)-/)-/)-/)-.*+.*+0,+0,+/.*/.*/.)0/*0/*0/+//-///0./0./0/-/.,1-*2-*6.+70*90+:1*<1+=3*>4+?5,?6-?6-@7.@7.?6/>5.=4-=4-=2.<1-;0.;0.;0.;0.<1/?1.C2+E0+H/+K--L,/K+.I*/E+.A-,@.*A.(E.(M.)X1*b3-g5,j:,o;-w;0=4‡<6Œ<5:4:4Š<2‡=2†>2…?3†A2‰A3ŒB5C5Ž?2=/Œ>1Š>.‡>/…>,†=.ˆ<,‰;.‰<,ˆ:-†:*ƒ;,<)~=+{<+}=1z<1v:/u;/x@1x@1v<.v;-?5‚B8‰F=‹H?‰JA‡KA‡NCŠNF‘JH˜TQ–f\ve‡…n‚‘t{¤ƒ…¨‡ˆª‰ªŒ©ŒŽ¥‰ˆŸƒš}y˜xi–ma’e\‹a\†`]‚`[|]UrVPhPFYEBP?:D93:2.3,*/)*,)**(0'(1&*1&*1&*0'*1(+0*.0+//*.-+.,+0+,0)-0(-1(-1)-01/23/..*'**"57*PUAmv[|Ši€k~“j™jƒŸn€ n~ m€¥q‡¬x¯|‹­zˆªx„©v§v§u¦w¦y¤y€¢}}›yw‘tp‡mh|cYkUMZHDH9=?299-23-23-34.34.34.45/54/54/650761961:72:72961940940:2/91.91.80-50-4/,4..4..3/03/01/01/20.1..0--/-+.0)02).0*.0*.0*.0*,/+,/+*1-,1-,0/+0/+10+10+10+10,11/1111/010.10.2.+3.+3.*92,92,;2+<3,>4+@6-@6-A7.B8/A8/B90A8/A81@70>5.>5.=2.=2.<1/;0.;0.<1/=20=2.B3.E2.L..R+0V'/U&.P'/I).C/.<1+;2)?2)G0(R/)_.*d/)i9/k;/u<3}<6…;8‰;9Š;7ˆ:6‰>8†@8†B9…B9…C7†B7‡A7‰A5ˆ@1ˆA/‡@.‡@.‡>-ˆ<,ˆ;+‰:+Š8*ˆ9*‡:*ƒ:)<){=(x>(x>*‚>1{7.z7.z<1v;-w=/|A3{@2€B7ƒE:†H=ˆJ?†J@‡MBˆODŠQHŒSJŽ\QŽh[‹tb…g€‹m~–vž|ƒ§ƒ„©‡‡ª‰‹ªŠ‹¦‡‡ ‚ƒš~}˜yq•oi”ifgfŒeg‹gd†e_~_Zw[PhPK^JBP?8D62:/.4*,/(+*%2&&5%(4%(2&(1'(/)+/+,.,-+++*+-*+-(,-(,/',/',/*+-.*+1++0+'.+"88,PUAiqYv„c{Œhyh|–iœl}Ÿmyžkz m¥r‚§t€¥q}£p}£p}¥s~¥v~¥x}¤x|¡x}Ÿzz™wuqn‡je|bWkRN[GDF9?=1:8,45/45/45/560560560761761872872;83<94<94<94<73<73<41<41;30;3083072/61.61.5106216213122011/00./1-.2,04+.4+.4+.2,.2,,2,,2,,3/.3/,3/,3/,21,21,32-32.32032032032051051.61.61-;4.<5/=4-?6-A7.B8/E8/C9/D:1D:1E;2D:1C90B8/@5/@5/>3/>3/=2.=2.=20=20>31>31@51E31M02T,4X)3W(2R)1K,1B30:6-77+:6*B4)M2)X/)^/)f:1j;3s<7z=:‚<:†<;‡;;†::‚;7>8A8B9ƒC:ƒC:…@9„@7‚C2‚C1ƒB0„?/†=.ˆ<.‰:-‹9-‰7+‡8+…9+‚:+~=+x>*v?*x>*9-|/'‚8/„>4w4+s5*}A6}C7E:€G<‚I>ƒJ?„KB†MD‡QG…WJ|aNzjQ€pYu]‚|bƒ†i†“uˆ|ƒ£~§‚‚ª…‚¬†©„¥€{ž}z™wz’pzlxlwŒkumr‰lm„gkd`rX[iRR[HHL=@@4;7,70&5*$6('5''3''1'&.)&+*(++)+-*',('+*)+*)+***,+),+),-)*,#$2*'50*86*BC5UZFfpWn}^tˆeqŠbuex˜iw›ks™htšiwŸkz¢nx lwŸmx ny£s{¥u{¥w|£wyžuzœww–tsŽom†hd{_WkPN[GCC7>:195,560560671671782782872872983:94=:5>;6>;6>;6>95>95?74?74>63=52;63:5294194184184395484373243151240/6-06-.6-06-.4..4..4..4/,40-40-40-51.32-32-43.43.43/431542540841850940:5/=60>7/@7.A8/C90D:0G:1H;2F<3F<3F<3F<3E;2C90B71A60@51@51?40>3/>31?42@53?53@72C52I35P16T/6S.5P05J22C52=90<:-=:+C7)I6(Q3)W2)]2+d3,l50v64}77‚87ƒ77‚66~75}:4}<6}>5€?9ƒ@8†?9…A8€B3€C1B1ƒ@0…=/‡;.ˆ:.‡9-…9,ƒ9,‚:,<,|=,y>,x?,|=,‡5*‹2,¡LE§XQ‹A8|90‚F;€K=yH:zJ<{M>|N?}OBQE‡UJ‚_LrkOosR|uX‡w]yb˜iŸ‹s ”z––z|‹£ˆ¨…§‚€¤~{Ÿy~™vƒ‘p…ŽoƒŽp€pp{ŽpwŒms‡kj~bfv\_hSV[GOM>GA3@6*=0';,'9+(6+'3+(/,',-'+.').().(+-(-,*/+*3)*4(*7'*7'(3($<3,E>4IG8QR@]bKgqVjyZn‚]k„\l‰]p‘bq•eo•do—eršfuŸmrœjq›itžnx¢rz¤vy¢vyŸvvštw™vu”rokj…fc|^UlON\ECC7@91;4,671782782782893893983:94:94;:5>;6?<7?<7?<7@;7@;7B:7B:7A96@85=84=84<73<73<73<74<74<74;639529338308/09/.8/080.80.80.61.61-61-61-52-52-63.63.74/74/540540841952:63<94=84@93@70A8/C90D:0G:1H<0I=1I=1J=4J=4J=4I<3F<3D:1B8/A7.A81@70@72?61?61@72@72A83?74@85B86D97G96H96H96H94E80E8/E9-E9+G9,I9*K9+Q7*Z/&d/'n3-z63ƒ98‰;9‹;:‹=;‹A>‡@:‚=6<3:3‚<4ˆ=7‰@7ƒA5B3‚@2ƒ?2ƒ=1„<0;/€</~<.|=.{=.|>/|>/}>/=/†9/1+¢<8ÍlfÙ~y­ZTŒC<ˆLAN@tJ<qM=pQ?qR@tS@zWD‚[J~eOmsOo{U„y[˜u_©oc¶mf¾qk½wo»‚w±Œz§—~žŸ€–¡¡~…y…™vˆ‘r‹rˆ‘r„“r€–r|–sx“pt’pm‹ii„edx\]kQV^GMP;ED0B;+@3+?2,;0*70*30)00(./'./)/0*2/*6,+:*+>(+C&+E$+C&(F5-LC4VQ>[YD`bJgmQiwVj{Wl‚[g‚Wf†WlŽ\o”an”an–bršfsko›jo™irœnw ty¢xxžwu›vs—su—vs’rn‹li„cb{[TmMM]CGH:E<5@707827828938938939:4:94:94;:5<;6?<7@=8@=8@=8A<8A<8C;8C;8C;8B:7?:6>95>95=84>95>95>95>95=85<73:51;30:0.:0.91.91.91/91.91.72.61-61-63.63.63.74/74/74/540651952;83<94?:4B;5B;3A8/B:/C9/E;/H<0I=1J>2J>2K>5K>5J=4J=4F<3E;2C90B8/B92B92A83@72@72A83B94A:4?82@93B;5D=7F=6G<6K<5N;4M6.N6,Q6+Q6+Q7*P9+P9)V6'f6*r6,~;3‰@9•D@›HDŸJGŸLF QJ™LDŽD;…;0„7-…7-Œ91=4ˆ>5‡>7†=6…<5…<5=4}=3z>3x@3vA3x@3z>3<3ƒ:3ˆ73‘31˜(&³=;ì|zý•’Åhc–G@‰K@xH:nM<jQ=fT>hV>lX@t[E`L€hPysQ„wUžt\´l^É__ÓV\ÙQ[×T\äouÙzx̆|À~µ•€«•}£’xŸvœŠrœŠt™u•u‘‘u‹’s†‘s‚‘r|‹ny†ju{amoWgbN_TBUE5R</O4-M1-I0,D/*>/(9/&7.'6/'81):/)=.+A,+F)+H(+K'+H)'TB4YQ<d^FgeLilOnuVm{Xl~Xk„Zg„Vg‡Vm]p•bq–br™duit›lo™ko–jr™mwvyŸxwšyt–us”uu”usqoŠki„ea|[TmMM]BMN@KB9F=69:49:49:49:49:4:;5=<7=<7=<7=<7@=8@=8A>9B?:D?;D?;E@<E@<E@:D?9D?9C>8E>8D=7B;5B;5B;5B;5B:7B;5A:4A:4A83A83A83@72@64>71>71>71<71<71;60:5/85.85.74/74/961961961:70<71=82A:2B;1C:1D<1F<2J>2K?3L@2N@3N@5N@7N@7L?6K>5I<3H;2E;2E;2B90A8/@91?80?80?80@93@93<5/MD=M@:K:3T@9R62O0+a<6\3-`5.`4+^/'^/%f6,m=1q=0‰I=G;—I?ŸKAŸF>›>7š=6žD;¦ND¤PE¨VJ«YK¥OB™A5—;0›>6“98‘98Œ65†52†84„?8|@6r>1rB4oA2q=0v:0‚72Œ43’-1š',ÈHIèbaÅEDÍWUáyv«VO†F<„VFmR=i[AibFjeHj`En^D|dLjT’bL²m]ÑnhÛX]à;Lç-Eò+Hõ0Lö@YìI\ä[eånrâ{|ۀ}ۃ؉‚Ãv½wº„x¶†x²ˆx­Šw¨‹y¥Œxš€o™~m˜xi“oabY„UM{IBxA>u:<i.2d02a11P&'G%#L0-M51G4.E2,D1-F1.F.,G+*K--Q6/YH4`W:f_BgdEkoNu|Zu„]oYlƒWl‡Zp]r’as•bu—dw™fv›hr–js˜os—qq•os•tw™xz˜|y–zx•ysrm‡jk„fi‚dazZTmMO_DNO?SK@PG>9:49:49:49:4:;5;<6>=8?>9>=8>=8A>9A>9B?:C@;D?;D?;FA=E@<E@:E@:D?9D?9E>8E>8E>8E>8E>8D=5C<6C<4D;4C:1B92B92B92B92A83A83@93@93>71<71;60;6096/85.74/74/96196/96/:70<71>:1A:0B<0C;0E;/I=1J>0L@2L@2N@3N@3N@5N@7M?6J=4I<3H;2E;2E;2C:1B90@91@91@91@91A:4B92D93I81L/+V.,j76u99{;;…ECƒD?„H@…IA†G>ŽIB™NH¡PL¢OG“D7”B4šB6ŸC8 >3œ7-œ7-Ÿ=0£C5›?0™@0 E3¤G6§F6®H:¶NE´LM¯HL¥@D™9;‘98>:ƒ@8x>3o>0zI;…LA„?8„,+’).³9DÒLWÜKNßMMÊ>=À@?Ð`\³YQ„@5‚TDyaIe[@^]?ihIslOvgJ~dK–cN¼dXÖ_[æUZëANò,Eþ%Dÿ&Hÿ'Iÿ.Nù3Nò<RðIZëP^åQ]ãS]àXbÛbiÕflÔjnÒmqÏqrÍutÉxwÇywÊ||ÉyzÈvxÇqtÆkrÅfnÅakÃ_kÅ`n³S_¥KUšGQDK†EIm69HT1-M0*H1+J6/K81K:2O>6UE8[O5cZ9gaAifEnrOz‚]z‰bt†^r‰_p‰_p‹`qŽ`u’dw”dw”dw”fs’ix–rz—xy–x~˜}ƒ„…‡ƒ›…™ƒz’zr‰om„hhd`y[TmMNaERSAVPBUOC8938938939:4;<6<=7?>9@?:@?:@?:C@;C@;C@;C@;D?;D?;FA;FA;FA;E@:E@:E@:G@8F?7JA:I@9H?8G>5F=6E<3F<3E;1D:1D:1D:1D:1D;4D;4D;4C<4?80?80<71;60:5/:5/94.94.96/96/:5/;7.>7/@9/B90C;0E;1F<0I=1K?1M?2M@0NA1M@0P?5M?4L>3K=2I<3H;2E;1D:0C:1C:1A:0A:0?;2@<3@<3D;4UD<R3.j23ŽAG­LWÃU`Ë]fÆ_b›B>—I?–LAœNB¬RJ¾URÌOSÄLK¥@4B0 @0¥?1¥;.£7+¤8+§=/«E6¡>+˜7$:'¥>-¬A/²C2¶D:§02¯:B¹DL½LRºMR­KL–A>‚71{8/v6,x2*„1-œ37¹=GÕGWèM[âFIÏ53Ð:9½31ÈNKÆc]”J?Œ[J}cLj_CgdEtoOpQhJhN°hYäbbõQZôDQö7Iý1Jÿ2Nÿ/Mÿ*Hÿ2Oü0K÷1Jõ5Lò4Mð2Kó3Ló7Pò@XïC[îF]íIaëNcêQeèUhèVkåSjäRiãOiäNiçNlëPoòSsóVuþh…ña|å[tÕUlÍYlÉdr¨R]r-2^&'Y0,W:4Q?5B:-:8)>B1LN9VO3aV6e_?heDquT†e~ŒiwˆdxŒiu‹er‹dr‹cvŒeyhzizj€•v„›~Šž… Š’£—¨–˜©™–¦™‘¢’‡˜†zypƒmg~b^wYTmMPcGSXDXUDYVG7827828939:4;<6=>8@?:A@;BA<BA<DA<DA<DA<DA<E@<E@<GB<GB<GB<FA;FA;FB9HA9HA9LC<KB9JA8I@7H>5G=3F<2E;1D:0D:0E;1E;2D;2E<3E<5E<5@91?80=82<71;60:5/:5/:5/96/96-;7.;7,?8.@:.C;0D</F<0F=.J>0K?/M@0M@0M@/M@0O?2O>4L>3K=2J<3G:1E;1D:0D;2D;2B;1@<1@<3A=4B>5F<3S81[*&‰:?¾T`ÙTeæUfãUcËJOŸ30–:/‘>0™A3®F=ÅIGÕBHÍ>@±B7§F5©B3¬@3¯?3°>3³B4´D6²G5©B/£<)¤;(©<(«:(®6&­3(¶97º9=½7>¼7<¿<BÆJLÄTS¾XS™?7Œ4*‰,%61ÃHKÜOWâAQÛ3@Þ:9Í.*Ó84È74ÆE@È]U©WKWFwW@veI~pSpQ„fJdK©o[ÕuiîSWüDP÷;Jø9Iþ=Nÿ@Qÿ:Lù4Hÿ?Sþ9Mú4Ký3Ký1Jÿ/Iÿ0Nÿ6Sÿ3Tý4Vý6Wú7Wø9Zø=\÷>^õ@aøCfö@fõ?eõ?gø?hüCmÿErÿIsÿOuÿQuþUvòSqçTnåaxÖfv·Wb}15j0/X1*P8,G>/>A.;D/?G/PK._T8g`CokN||`ˆŽr‡‘v}‹qzq|s|szozŠm}Œoƒ’uˆ—z•£Šœ©•£®¦±¡¬´§±¹®°·°«µ­¡«¢” ”ƒ‘‚s„qh|c]tXSlNPdHRZCWYDYZH671671782893:;5<=7?>9@?:BA<BA<DA<DA<DA<EB=FA=FA=FC<FC<GB<HC=HD;HD;JC;JC9LC:KB9KA8J@6J=4I=1H<0G;/E;/E;/E;/E;1E;1E;1D;2D;2@9/@9/@91?80<71;60;60:5/;7.;7.;7.<8-?8.A;/C;0D</G=1G>/K?1M@0M@0NA0O?/O?0O?2N=3N=3J<1I;0G;/D:0D:0D<1D<1B;1@<1A=2A?3B?6K=4\5.w32­LSÛ]iæM_å@Q×6E·&+¨1)7(’:&—<)¬@3ÃD=Ô>?Í;;±?5¨B4ª@2­?2²@5¸C9¼H;»J<¬>/ª?/¨=+¦;)ª9)°:,·;/¼<1ÊFAÎDAÐ>?Ð79Ö7<ÞBEâLNÝROÄC>ÆKCÍTLÔSMÙKJÛ?Bà5>á27Û4.Ú7.Ð1+Ï:4¿84³@9µ[P–UC{N9‚bI†kP„`F’[F­fTËrdçmhêEKò:Dê9Cç<DçBHçFKèGLêFMôJSòBMò9Iö5Hù2Gý/Fÿ2Lÿ8Rÿ9Vÿ9Xÿ:Xý:Zþ;]ÿ=_ÿ@bÿAeÿCiþDkþDmýBmüCoüCoüBqýBoÿGpÿ@hÿGmÿMpòIhéOkå[rÙcs½YcŽ?D`&$Q+"V@3VO=IL7@D-RJ3eYCujV€{gŽy™œ‹•œŒˆ“ƒ‚Ž€‰—ˆ›Œ™ˆ‡’‚Š“‚—žŽ¤¨™²¶§¸¼®¿Á¶ÀÁ¹ÂýÅÆÁÃÃþ¾¾°µ±¢©¢—Œy‡vi{e\sYTmORfJQ[BUZCW\F560560671782893:;5=<7>=8@?:@?:C@;DA<EB=EB=GB>GB>GD=GD=HC=ID>IE<IE<KD:LE;LC:KC8LB8KA5L@4K?3J>2I=1G=1G=1F<0F<0E;1E;1D;2C:1A:0A:0@91@91=82<71;60;60;7.;7.;7,<8-?9-A;/D</E=0H?0J>0L?/M@0NA0NA0O?/O?/O?2N=3M<2L;1I;0G;/F90C9/C;0B<0B;1@<1@>2A?3B?6N;4m84—FEÄY_ÛWbßCQÞ8FÔ3;½++­1'¡8%˜:!—9 ¤;&¶>.Ã:2¾71§7,¢:-¥9-§7,¯:1¸B8ºE;·E:®>2­?0§<,¤6'ª6)¶>0ÃD;ÊG=¾8-Î@6áD?ìBBô=Aó9>ë27Þ.0Û75âGBèSLåNGÜ=9Ù10ã/2ê67Ù1(Þ</Í,"Ð71¾3,°7/Ég\¸l\ŽWC‡ZCƒV?‰R=©\LÎlaágbäTTðEKñ=Fä;@Ù=>ÒA>ÒGBÝOMëWWíNRïDMð:Gö5F÷2Fø.Dû1Gþ7Nþ8Sý8Tý8Vþ9Wÿ;\ÿ>aÿAeÿCkþ;eý<hý>jý?mú>nø>oô=mö:jÿHrÿ;aÿ?dÿKnÿKn÷NmïUoãZnèr‚Âaj˜ILt:8\6-P9+PC2UM:^QAreUˆ|n•‚Ÿ‘¦§Ÿ¡¦Ÿ–œ˜•ž›ž§¤¦¯¬¦¬¨¢§£¦¨£µ´°ÂÁ¼ÌÉÀÑÎÅÕÐÌÕÐÍÔÎÎÕÏÑÑËÏËÆÊ¿½À°²¯˜Ÿ˜€Œ~m~k`t[WnRVjNS`FV`EX`H560560560560671893:94;:5=<7>=8A>9C@;DA<FC>HC?HC?GD=HE>ID>ID>JF=JF=MF<MF<ME:LD9MC9MC7NB6MA3N@3MA3JA2JA2I@1H?0G=1F<0D<1D<1C:1C:1A:2@91?82?82<71<71<8/<8-<8-<8-@:.A;/D</E=0H?0K?1M@0NA0P@0P@0O@-O?/O=1O=1M=0L</I;0F:.E8/C9/B:/A;/A:0?;0?=1@>2@@4P91r1/¨JKÁSV»>D½06Â03Á//º1)©1!£9#œ=!™;›< ¡=#¥;%¤6%ž6) 8-¢6,¥4,­81·@:¸C<²@6±B7­A4¨</¦8+¯:0¼C8ÃD=Å@7Ã>-Ï>-Ù5+á*&í"%ú%+ÿ*1ÿ.4í)*ç.+á3,ß4,à3,ã2,é0-ç2+Ú2%Ö6&Ð2&Ï7,Å6.½>7È`UÒq¦eSVBŠM:£WIÈf[ßd_çRTèCIõFMí@Dß??Ô@<É@8Æ@7ÑFAãMLïJNô@Kù9Hý7Hü5Hø3E÷6Gú;Mú8Pü7Rû6Rý6Uÿ6Xÿ9]ÿ;aÿ<gÿ>kÿ?mÿ@qÿBtÿBuþBuüBtùBpÿFmúAaþEeÿKjþKkÿVtÿ_zõ]tòj~æp~Óow®^aƒCAg6/hB7sUJye\Œ}v£–­£¡²®«¶¶¶²¶¹ª¯³°·½¶½Å½ÁÊÀÃÊÃÂÊÉÆÍÕÎÖÝ×ÛâÚØçÝÛéÝÝæÚÜåØßä×ààÔÞÙÏØÊÅ˺ºº¢§£ˆ’‡t‚qexb\sW[oS[jM\iK]gL561561561560561671872983<;6=<7@=8B?:DA<FC<GD=HE>HE>HE>HE<JF=JF;KG;KG;NH:MG;MG;ME:MD;MD;MD=NC?NC=MC7MD5KB3JA2H>2G=1E=2D;2C:1C:3A:2A:4@93?74<73<71>7/>7/<8/=90>:1A:2C<2D=3H@5I?3L@2MA1NA0P@0O@-O@-L?/L>1M=0L;1J91I81E80C90@9/>:/;;/<<0=>0>?1A?0S8-‡<7Ä\[ÊZY®86«.*²1+®.%­2#¬9&¬B*ªF,¦D'£A&£A&¢<#Ÿ9# 9*£;0¦:0¦7.­<4¶C<·E;°@5«=0¨</©;.¬<0»E9ÆLAÅE<¼8,¿6$Í:(Ý;.æ3,ñ+*ù(+ý&+û%'ø**õ.+ï2,ë4,è3*å3)ä2(à3%à:*Ñ2Ü?0Î6)È8-Â?5³A6Ñl`¿gY¥RB¨OAËcZçhbæRRêAFóBJë>Dá;=Ö<<Í@9Á>4¶8,¹6.Ä94âHHé@Eð;Dö:Hú:Iø8G÷;Iû?NþAUÿ?Vþ<Tþ9Tÿ8Uÿ9Zÿ:^ÿ;cÿ>iÿ>lÿ>oÿ>pý>qú@rø@t÷Aq÷CjÿMmÿSs÷Jhë@`ýTsÿg…ÿe‚÷]wï_xçh{Üp}Æqx§gg^X{ZQ—~wª™’¾°­Ä¼ºÆÂÃÈÇÌÇÇÏÃÃÏËÊØÌÊØÑÊÚ×ÎßßÔäæÛéêàëíãëïåæòèæôèèðäæîáèíàéêÝçäØâÑÊÑÁ¿Â««©‘–z†xl{hbu_`rXbpVboScmT21/320431651875984984984;:5<;6==5??7AA9CC9EE;EE;HH@HH>HH<JH;KJ8LK7ML8NK8MJ9JF:GD;JFCNIMNIPGDOD?ENE>LC4J@4KA5MC9LB8F=4B90E<5C:5@93?74@85?75<74<42C:5B94A96>95?:7>:7?;8@<9B?:D@7G@6J@4L@0O@-O@+L?,G@.F?/I;0K81M53K65J88F;9B?:6904</9D4:A/69$?=(bC1¼j^¹JA¦7,¥5)§7+¥5'¥7&¨=+£8$¥<&ª@*­@)¬=)¬9&®6%«6%¤6'¡7)¢8*¦<.ª@2¬B4¬B2¬A1©>,«=,­<,³=/½C4ÄF8À>1º2&Ç9+Ì8*Ð8+Ö8-Û7-â5.é3/î1-ñ.,ô.-ó0,ñ1,ê5*ã7)Ý:)Û:(Ü9&Ô3Ð3 Ï8'Æ7'¾6(ÂB5ÏSIÈNCÏSKá\Wî]ZðPRí>Cò9?÷?GêAFßCDÕAAÈ?9»<3±;/°</²<0ÊL@ÔKCÜGCá>?ë<A÷?Gý?Iù;GþDQüCQþDRÿDUý@Vü<Uÿ<ZÿBdÿFkÿAjú;gõ:gö<mùCsýIyÿKzÿMyÿMtøPsðOpçMiêMjüYxÿgˆÿ_ƒõGlöWwè_yÃ^n½{‡a^xp§‘„»§œÑÄ»ÝÖÐÝÜÚÜÛàÞÜçâÛëæ×ìëØîñÙñöÝó÷âó÷çôõëôôîòùôñùôðùñïöíîóèìîãééÞäæÝâÖÐÔÍÉÊ»º¸£¥ ‹‰yƒxr}op{k}†sxlv}k0./1/0320542653762873872:94;:5==5??7AA7CC9DD8EE;HH>HH>HH<JH9KJ6LK6MM5NM8KJ8KI=KJEPPRWU`YXjVTjRO`OIKMD?H?:F=8G>9H?:F=8B;5D:8A96?74?74@85@85=85;62=4/=4/=52<74=96>:9=<:>=9B?:D?9G@6J@4M@0O@-P?+L@*B?,B?.F<0H:1J65I56F35@65@<9:=6;>5@@4E<-J9)^B4€L>®QB«;- 2#¡6&£:'ž7$ž9%£>*¡;%§<(®?+³@-¶=,·9*¸6(µ7)ª9)¥:*¤9)¥;+¨>.«A1«B/«@.®?,«:(±<+¿E6ÆH9Á?1¼8+À6)Ë7+Ï7,Ð9.Ñ;-Ô</Ø:.à8/ç4-î1-ô.+ô.+ñ0+ê4)á7(×:'Õ:$Þ<'Ù6#Õ8%Ô=*Ì;*Á5&Ä:/ÏF<ÑF?ÚIDéOMóPQôGIð;@õ:AøCHåBEÚDCÒBAÅ>8·;1®:-ª<-«=.¶E5ÆL?ÔNEÜGCã?>ï?Bú?Fþ@JøCJ÷CLúDPÿFTýBSù>Sü>XÿBaÿFhÿCiü?iù@jùBpüFvþJzÿK{ÿJzÿHtõKpñQsõ\zù_{ûXwüNqýEkúDlïEiÚKgÈ_p±nukLI~t°–‰Ì²¥äÓÉìãÜëçæêéîêçòíâóöãùûáúÿâýÿçÿÿìÿÿòþÿöþÿùûþú÷ýúõü÷ôúõòøïðôëîñèíïæëæÝàÜÖØÊÆų²®ž¡šŽ“Œ‰…ˆƒ•‡‰€†Œ~/.,0/-10.21/43/540762761:94::2<<4>>6@@6BB8CC9DD:IF=IG;JH;LI8MJ7NL7NL7OL9KI:NKBTRS_^fihxmm…lkŠkhƒ`YiYQ\OGRH@KH>GG=EE<A@:<C:;?:7<74<73=82>93>:1=9083-94.;60<92=:3>;4?<5@<3E>4G?4I?3L@0O?0P?/P?-L?,<;&:=(?<+A;-B71A62>42;30=84B;5H94N2.Y,)l/.‡;= FE£;0¢2$Ÿ1 ¤9'¥<)ž7$ž8"¢<&¤;&ª=)±A-¶A/¹>.»<-¾:-¾</²<.¬<.©9+©9+«<+­>-­>-­<,²?-°8(¹>.ËL=ËG:¼6*º2&É9.Ð6,Ô6+Ô8,Ö:.Ø:/Ü8.ã6/è3,ð0-ô.+ô.+ð1)ç2'Ý6&Ô8"Ò7!Ý8$Ü5#Ú9'Ú>/Ó=.È6)Å9,ËA7É<5Í>8ÙEAåKIêJJéCCì?AëCCÞCAÕD?ÍB=Â=4¶:.®8*«:*¬;+¬8)ÀD8ÔNEÝIEà@@è>A÷BIÿHPôAGô@IùCOýGTþEUú@Uû?XþA_ÿDfÿCiÿCkÿFpÿIwÿK{ÿL|ÿJzÿFxÿHwÿKtúNtÿZ|ÿa€ýUvñCdûIkÿTvÜ?\ÍI`Ø{…¸||aC;|n®ŽÕ³§óÛÑûìåúòðù÷ú÷ôýôìûúëÿÿêÿÿìÿÿîÿÿóÿÿ÷ÿÿûÿÿýüÿþûÿýùÿúøÿøöÿõöýóôûðôùðóðçêåßáÓÏξ½¹­®¨¢¥ž£™£—£«žž¦—›£–/.,/.,0/-10,21-32.54/650880991;;3==5??5AA7BB8CC9HE<JF;KH9MJ9PK8OM8QL8OL;LJ>QPL^]bmlzzz’‚¡€‚¨€€¦{u—rkŠe_{YSmTLdMEZG@PB<HD?F@;?<87;63;60<8/=9.;8/:70991;;3>;2?=1@<1?;/A;-F?/H?.K?/M@/O?0O?/P>0L?/@=*?>,@=.?;/?;2>93=:5:94<94D95M51V-+j)-„28ž8C«>C¦7.¥7(¥:(ª?-¨?, 9&¡8#¤;&¥:&«<)³>,¹@/½>/Á=0Å=1Ä>2½?3¶=2²9.®8*¯9+°:,²:,³9*¶;,¹;-ÄB4ËE9È>3¾1'À2(Í9/Ö5+Ø4*Ú6-Û7-Þ7.á6,æ3,ì1*ó0,ô.+ô/)ï0(ä2&Û4$Ò6 Ð5×2Ú0Ú4$Ü<.Ö=/Ð9.Í=2ÒD:Ä5-Å60Ê;5ØD@âMIåKIÞC?Ö=8Ó@9ÎA8Ç>4¾</µ:+°8*¯9+±;-¬4&½?3ÑH@ÙHCÝB@å@DóFLýNUð?Eð?EöBMþHUþHWüBWú@XüA^ÿAcÿCfÿFmÿIrÿMyÿO|ÿL{ÿIyÿDvÿN~ÿR}þKrøMoÿVuÿUu÷MjðFcïKfÒ>VádtþŸ§ÓŽd92lJ>¥ynÐ¥œõÔËÿéãÿôòÿýÿÿüÿùóÿüïÿÿïÿÿðÿÿòÿÿôÿÿ÷ÿÿûýÿüûÿýúÿüøÿúøÿù÷ÿö÷ÿõöÿôøþó÷öëïìãäÚÕÒÉÆÁº¹´³´¬±´©°¶ª¶¾³°¸­¬´©10,10,0/+0/+10,21,43.54/77/880991;;3==3??5AA7DA8IE:LF:NH:PJ:RK9RM:SL:QK=OJDVTUfdqwx††¨¹“ϐƎ‹À‡ƒ¶{w©pmšid_[~UPnNJaKGXEBM?=B;7895296/85,85,671783891;;1></?<+B=*C<)G?,J?+K@,LA/M@0M?2L@4K>5L?6K>5F<3B92=82:946;47<59<5>:1H4-Y2-w78”?D¦>E§9<ª;0ª<+«@.¬A/©@-¥<)¦;'«>*§8$­:'µ<+¼=.Á<-Å;.Ê<0Ê=3Å?6¾>3º:/·7,·7,¸8+º8+º8+»7+ÇA5ÍC8Å8.Á1&Æ3)Ë7-Ï5+Û4+ß3)á4-â6,ã5,ç5+ì1*ð/*õ/,ö/*ò/)ì0'â2#Ù4!Ð5Ï4Ö1Ù/Ø2"Ø6)×9-Ô:0Ö?6ØE=ÖF>Í@7É<5Ë@9ÖKDÙNGÒE>Å;1Å=1Â<0À</º;,·9*´:+¶<-¹?0µ9-¿<2É@:ÒC?ÛCBãDHíJOöOVì?Eí>CòBLûIUÿJYýFXúCYûC]û@_ûBdÿElÿJsÿNzÿO|ÿLyþIvÿJwÿTÿT}ýJqùImÿStÿVtøNiëD^äF]êXkÿ–¢ÿµ¼óžŽHFk.)¦kc͖óÈÁÿãÞÿñðÿüýÿýÿü÷þþöÿÿöÿÿ÷ÿÿøÿÿúÿÿüÿÿýýÿýúÿüøÿûøÿùøÿø÷ÿöøÿõ÷ÿôøÿôøýñóóéêäÜÚÖÑÍÍÊÃÉÉ¿ÊÊÀÊÍÂÉÐȾȿ·Á¸65143.32.10+10+21,32-43.66.77/880::2<<2>>4@@6B@4JD8ME8OH8RK;TK:TM;SL<RKAQLIZW^li|~Œ¸–™Ì™žØšžÞ™˜Ú”‘ÔŒŠÉ†…¿~µxv§nk–fe‡]ZwVTiMKYDAJ><?;:8;74762555457664872<:.?<+C>*F@*G?*H@+IA,IB0IA4HB6HA9G@:K=<I;;C9:=77875384/83/917<574+C1']80~C=–GCž>?ž51§;/ª<+ª?-ª?-©>,©>*¬?+°?-«8%²9(¹:+¿9-Ã9,È:.Í:0Ï<4Ë>5Æ;4Â91Á8.Á8.Â9/Â8.Ã6,Å7-ÑC9Ð@7Ã0&Ã,#Ð7/Ö<4Ó2*à3,ä2(ç2+è3*ê3+í2+ð/*ó-*ö/*õ.)ñ0)ì1(á4$Ú5"Ñ6 Ð5Ø7#Ù3#Ö3$Õ3&Õ5)Ô8,Ô<1Ö?6ìYQáRJÓHAÌC;ÊE<ÌI?ÈE;À>1¼:*¼;(¼:*¹:)·:(¸:+¼>0¿A3ÁA6Á<3Å<6ÏA=×EEÝGHäIMëLPèBFê@CðCIùKTþNYüJZüF\üF_øA_ùBaýEiÿJpÿNwÿOzÿNxÿLvÿQ{ÿRyþOvýOtÿVyÿ]{ÿSoîD^úTlîQd÷dtÿ’žÿ‹•ôƒ‰Ç`d§MMµjgΏŠï¼¸ÿÞÙÿïîÿúùÿüþýûÿýúÿþûÿÿûÿÿûÿÿüÿÿûýÿûûÿûøÿûøÿüùÿûúÿûúÿøúÿöøÿó÷ýñóùíïòææçÝÛßØÒÛ×ÎÝÛÏàÞÒßáÖÜãÛÏØÓÅÎÉ<94;8185052+41*41*52+63,74-85.96/;81=:1?<3A>5C?4JB7MC7PG8SJ;WK;UL=UK?SJCSJK]Wcnl‚‚‚¦‘•Åš ÚŸ¦ê¢©ñ¡¥ïž ë™›å•˜Ý”•Ö‘ʉ‰½‚ƒ±zy¡rq‘fc~XVkPN\IGRFCLCBJ??K==G;:@;9:<94?;/C=-E@,F@*FA+EB/EC4CC7BC;AC>@ACB<FD>LCANEEOCHNAIL>HI>GDCHAA=2L:.gF7ƒOAI<Ž?2”8)¥>/©>,«>*¬?+­@,¯@-°?-±<*±8'¶8)½9*Ã9,È8-Ì8.Ò91Ô;5Ï:4Í:3Ë81Ë81Ì92Í:2Ì70Ë6/Ó<5Õ>5Ò91Ì2(Î4*Ø;2Ú<3×3*ä1*é1'ì1*î2)ð1)ò/)ô-(ö,(÷-)ô/)ð1)é3(â5'Ù6%Ò7!Ï7 Ô9%Õ8%Ó6%Ñ4%Ò4(Ó7+Ò8.Ð7/ãNGèXPçZSÚQIÌG>ÄD9¿@7¹>/¹<*¹<&¹<(¹:'¸9(º;*À>0ÃA4ÉD;Æ@7É@:ÑFCÕIHÖHGØGJßIKæFHèBDíDIõLSüQZûO]ûL]ûK`öD^÷DaùFfýIlÿNuÿOxÿOxÿPw÷VxóUvôTvüYxÿ]{ÿZwûQkòI`ýWköUgêM^ö^mâKZæTaåTa×XaÁefȃ~嫧ÿÔÑÿëèÿóòÿøùÿþÿûüÿûüÿûüÿüüþüúýýùúý÷÷ü÷ôÿú÷ÿûøÿüûÿûúÿøøÿóóýîñúëîðáäéÝÝäÙÕâÙÒæßÕëçÛñíáññåìóìÛæâÎÙÕ@=6=:3:7074-52+52+52+63,74-74-96/;81=:1?<3@=4B>3JB7MC7RF8VJ:WK;XL<WK?TICSJM^Xfpmˆ‚„«‘—Ëœ¤ã£­õ§°ýª±ÿ¦¬ú¡§ó ¤îŸ£ê¡á™›Ö•–Ì’’ĉˆ´yyŸji‹`]|XUpRPhOMeNJcKG^FBS@=H?:>?:6@<1A>/C@/CB0BC3BD7?D=>D@<ED;BJ>CVCIaLRhU\o\br`dobbjd`afXWaJDlG>ƒSE•XF“J7‘@+™>+¨A.®@/®A-¯@-°A.´A/²=+°7&µ7(¼8+Â8+È8-Í6-Ñ7/Õ81Ø;4Ò72Ð72Ð72Ñ82Ô94Ô94Ó83Ó6/ÞA:Õ8/Ñ3*Ö8/Þ=5Þ=5Ú91Ü5,ç2)ë0'ï0(ñ0)ô/)÷-)÷,(÷,(÷-)ô/)î2)é4)á5'Ú7&Ô7$Ï8#Î7"Ï8%Î7$Ï6&Ð7)Ò9+Ñ7+Î4*Ë4+ãNGód\ë`YÕOFÄA7º;2µ9-¸>)¸>'¹<&¹;%º9&½9*À</Ä>2ÊD9ÊA9ÎE?ÕKHÓLIÎGDÍEEÖHGãIIæCDèEHòMSøSZùR\÷O^úOaôH^õG`øGdûIiÿNrÿQxÿRyÿTzðUuïZwù^}ý^|ùUpõMgùOiÿYnÿ[mÿctðM^÷TeôO`üUgõL_äR_À^_»vqל˜úÉÅÿåãÿíëÿôôÿÿýüÿÿûÿÿûÿÿûÿþýýýýüúþùöýøõÿøõÿùöÿúùÿúùÿööüððøéìôææëÝÝèÚÙäÙÓèÞÕðèÝùóåÿúìþþòôúöáëêÓÝÜB>5A=4@<3>:1<8/:6-84+73*62)62)73*84+;60>93A<6E>6I@7MC7RF8UI9WJ:XK;ZLA[NHTIMXR`gd|~§’Ê— ãžªö¥°ÿª³ÿ«²ÿ¬´ÿ¬²üª±ùª¯ó¨¬ì©«èŸ¡Ú™šÒÁ±tr¡he’]Z‡XR~XQzYQvVOnMH_C?M?:@?;:B?8>>4@B5BE:?D=<B>:CB>FH?LUDVnQfƒarŽisŽtvx„o€€^lŠ[c’WYžWU¦VM¦M?Ÿ@.œ9$ :$§<(¬=*®?,±@.³@-´?-¶=,¹;,º6'¾6(Ä6*Ë7-Ñ7-Ö8/Ú91Ú83×84Ø95Ù:6Ø93Ø61Ø61Ú83Ý:3æC<à=6Ú70ã@7þ[Rÿlcÿ]Tç?6å3)í2)ñ2*ô/)ô*&õ(%ø(&ø+(÷-+ó0,í2+ã1'Ú0#Ô1"Ô4$Ó:(Ì;&Ê<(Í<)Ï<*Ò:,Ò9+Ò8,Ñ7-Ú@8Ï81ÜGAôc^åXQËB<Å@;·4*·:&¶<$¹<&º<&½:(¿:+Ä<0Æ>2Ç>4ÌC;ÙPJßXRÔOJÃ?:À?:ÏHDÞHGâGEæJKïPTñRWïNVíKXñL\ùRføOf÷Ke÷JfüMlÿQsÿRvýRvôWvîXuõXuû[wÿ\xÿZsÿWnÿVkÿbuýVgÿ\mÿ[lñFXôDXÿQeîVeÆfgªieʏ‹ç¶²øÒÏÿëçýïîüø÷ùýüùÿÿùÿÿúÿþüþýÿþüÿûøþöôþùõÿû÷ÿùöÿöôÿööÿõõöèèæØØãÕÔÞÐÍáÖÐñçÞüôçþøèÿüéÿÿóúÿùèñðØáàC?6B>3A=4?;2=90;7.:6-95,73*73*73*84+:5/=82@;5D=5H?6LB6QE7TH8VI8WJ9YK>ZMEUJNWQ_eby{¤ŠÉ”â›¦õ£®þ«´ÿ¬µÿ®¶ÿ¯·ÿ°·ÿ¯¶ü¯´ø¯²õ©ªë¢£ã˜˜ØŒŠÉ€»uq®ie c^˜`WŽ`Vˆ]TVPtMIbFCTBAIAAC<<:AB=FEACD?A?@DBEOMRWVdch†quš}¢‡}Ÿ’}œ {š¥pŠ¤`u¬Yi¶VaºQU¸HF°@4¬;)¬<&­='¨8$¨7%«8&®9(°8'²9(µ7(¸6(»3%Â4(Ê7-Ó:2Ù<3Þ=5ß<5ß<5á>9ß<7ß<7á>9åB=èC=çB<æA;å@:æB9ä@7Ü8/Õ1(Ø6+ëI>ÿ[Oñ@6î6,ê+#î)#ø-)ÿ/-þ,+ö((ú0.õ4/ï61ç6.Þ3)Ö2&Ô2%Ï6&Ë:'È;'Ê<(Í:(Ï9*Ð7)Ñ5)Ð4(Ó9/Ï5-Ô=6åPJåTQÛLHÍB?·1(¼;(»=&½<'¾;'¾9(À8(Â:,Ä:/È>4ËB8ÕOFÜWPÒRIÃD=¿C;ÌHCÛLHßJFåMLëSRíTWëPVéNVìOZ÷VføUhùSiúRküRmÿSqÿStÿRuòMmøUtÿ\zÿ_{ÿ\vÿUoûRiûReüUf÷RbÿZhÿZhóN^øScþYißS^¶`_—^W¶}Ù©¥îÈÅýáÞùééýøõúüùùÿÿøÿÿ÷ÿýúþýÿÿýÿûûÿ÷õýøôþùõþùõþöóÿ÷öÿõõöêêêÜÛäÖÓÝÐÊßÕÌïåÛüõåÿùçÿýéÿÿïúÿøèñîÛáßEA6EA5C?4A=2?;0=9.<8-;7,84+84+73*84+:5/=82?:4C<4G>5JB7ND8RF6TG6WG7YI<YJCWKKXP]b^ysuœ…ŠÂ™Ü˜¤ð «úª³þ­µþ¯·ÿ±¹ÿ³ºÿ´»ÿµ¹ÿµ¹ÿ¯²ù¬¬ô££ë™—àŽÓƒ€Çxu¼sm³k`¢i]›cZ‘]W…VRwNKhEDV@@L85<A<@KABL@BQ>BZEJpW]‚cs˜r—¥z§®«²{£¸vœÀp“ÂcƒÀTnÂG\ÐK\ÑHPÄ<<º5.¸9*µ<'°<%°;'°;)²:)´;*·<,»<-À<-Ä<.Ä6*Ë8.Ó<1Ù?5ßA8á>7à<3ß:4Ü71Ù4.Ø3-Þ93æA;ìE?éB<ä=5ã<4Û4,Ø4+Û7-Ô2'Ì, Ö6*éG:õK>ò@6ï4-ð-)ö,*ú,,ü,,ø*,ð*)ì/+è2.ã5.Ü4+Ø1(Õ1'Ñ3'Í7(Ë:)Í:(Ð:)Ò;*Ó:*Ô8+Ô8,Ñ4+Ö<4Õ:5Õ<7åONíYWÙEEÂ3/À;,¿<(À;*¿:)Á9)À8(À8*À8,Æ=3ÇA6ÏJAØUKÒRGÆG>ÀD:ÇG>ÖKDØICÝLIäSPåSTâPSâMSåNWòXdöZhý[pþZrüVnúRlûQlÿQpýGmÿPvÿZ}ÿ[{ÿVrýTkþWkÿ\mûYhøZhûamö`kí]gïfnìfmÉ\_ ]W{OF˜mfÁ˜”ݺ¶ðÔÑôàßÿõôùù÷ùÿýùÿÿ÷ÿýúþýÿÿÿÿýÿÿøùüùôüùòüùôýøôÿùöÿøöùîìïäâåÚÖÛÐÊÜÒÈîäØýöäÿûèÿýçÿÿíúýôêðìÞãßIE9HD8FB6D@4B>3@<1?;0>:/:6-95,95,95,:5/<71>93@;5E>6I@7LD7OF5RE4UE5WG8WI>XMKXNW^Zqpp–€†ºŒ–Ô•¡é©õ¨°ù©±ù­³ý°·ÿ±·ÿ²¹ÿ´¸ÿ´¸ÿ¯±ü«­ø¥§òŸžê—–⌋ׂÍ}yÅum¶pgªf`ž_ZYX„QQuGGcA>Q=5DH8BT=C`@EnCJ€OU–]d«fx¿kÉnšËo˜ÉiÈ`ƒÉWxÆIgÄ;UÆ2HÙAPàEMÑ;<Æ71Ä?0½@,±:$­6"­5$®5$³5&¶7(¼8+Ã;-È</Ó@6×@7Ú@6Ü?6Þ=5Þ93Ý6.Û4,Ó.(Ò-'Ó.(Ø3-á:4ã<4á81Ü5-Ü5-Ð,"Í)Ù7,âB6Ü?0Ð4%Ë.éF7óI<ýH?ü<7ô-*ñ#%õ')û/2ñ-.ë/.ä2.Þ3,Ú1*×0*Ö/)Ó0'Ó7*Ò9+Ö:+Ø<-Ú>/Û?0Ü@3Ü@3Ò6*æLBåJEÎ50ßGFóZ\ßIKÕA?È</Ä;+Ã:*Ã:*Â:*Â:*Á9+¾9*Â<0Â>2ÈF9ÏOBÎPDÆH<¿C7ÁC7ÎH?ÑF?ÕJEÛPKÝROÙMLÚKMÜKPëX`ó[güaqÿauúXmõOgõMgÿNkÿOuÿRyÿUwÿTqüTmý[pÿcuÿjxÿguúboõamídlãflÙhjÎghµfa^TeG<^U©ˆÍ«©åÇÅïÚÙÿóóùøöùýüøÿÿ÷ÿÿûÿÿÿþÿÿüÿÿùúþûöûûóûúõþûöÿýùÿûøþôòøíéêßÙÞÔËÝÓÉíæÖÿøåÿþèÿÿæÿÿëøúïêïèãæßNH:MG9LF8JD6HB6F@4D=3C<2?80>7/=6.=6.=60>71@93?:4C>8EA8KC8NE6QD3RE2VF6VH;ZMGVMR[Whlk‹~‚±Š“Î’Ÿãš¦î¦®÷©¯ù­°ý¯³ý°³ÿ°´þ°³ÿ¯³ý«®û¨«ø¤§ô ¢ïšœé“•âŠŒÙ†…уÉ{u»mk¬dež_a’Y[„RQsNHbWFY_CQmBLEM˜MT¬U]¼[bÊZhÙRpâRußTuÙPlÒJbÍBWÇ8JÅ.?Ô8EãCKæGKØ>>Í>6ÌF:ÃH6¶?+·>-¸=-º<-½>/Ã?0ÊB4ÐD7ÕE:ÙE9ÚB7Û>5Ù;2Ù6-Ù5,Ú3+×3*Ô1*×4-Ú7.Ý90ß80Þ7/Ý6.Ü5,Ö/&Ù5+×7+Ï2#Í1"Ò9)Õ>-Ô;)Ø<-æD7øJAýD?ù64õ*-ö*-÷/2÷67í55â30Ú1,×0*×/,Ù0-Ù0+ã81ä91â:1â;2à<0ß=0Ü?0Ú>1Ñ7+ïWLøaZÓ;6Ñ;:æPQÜFHæPQÏ<4Ê:/Æ8,Å9,Ä:-Ä<.Ã;-À;,¾:-¾<.ÃA3ÇH9ÇI;ÂD6¾@2¼>0ÉF<ÊE<ÎIBÕPI×RMÕNKÓKK×LOéZ`ð^hüfrÿhxü]qòPeóMeýNkÿVzÿTxÿRrøTmö]qûhxûjw÷erüetó_mì`kêkrßruÃjfªd\žla}fVXH9kXJ”|r½ œÞÂÁîÖÖþîïûõõûûûûÿÿùÿÿýþÿÿþÿÿûÿÿúýÿþùûþõûüöÿþùÿÿúÿþúÿûõÿ÷òñèáäÜÑáÙÌðé×ÿúäÿÿæÿþåÿÿê÷ùëíðçèéáQK=PJ<OI;MG9KE9IC7G@6G@6B;3A:2@91?80?82?82A:4@;5B?8EB9KE9MF6PE3RE2UF3UH7YLCUKLZScii…|«‰“È“žÞš¤ë¦¬ø©¬û¬­ý­°ÿ®±ÿ®±þ­°ý­°ý©¬ù§ª÷¤¨ò¢¦ðŸ£íšžè•™ã’”Þ’’Ú‰ŠÍ|€Àsx²ou©mpigŽj_}y_x‚WjŽP_£O\»S\ÍT]ÖPWÛHRå@Qè?RäCRÜDPÖCKÏ@DÊ9<É46äJLçIJáAAÕ74Î95ÎA8ÈD8¾>1ÃE6ÃE6ÅF7ÉE8ÎF8ÑG:×G<ÚG=Ö>3Õ;1Ö8/Õ4,Ö3*Ø4+Û4,Ú6-Û81Ý<4à=4ß<3Ý90Ü5-Ü5,Û7-Ò.$Ý;0Ú=.Í1"Æ-Î7&Ò=)Í:&Ì9'Ô8)â:/ó=9ÿ@@ÿ<>þ37ó,/ê,.á+*Ø*)Ô+(×/,Þ44ä88é99ï75ð74î73è71ã7-Ü6*×5(Ñ5&Í5(çPEÿmcÛHAÈ42×CCÓ>BêVVÙA<Ò>4Ë8.Æ6+Æ:-Æ</Ã>/Á=.»9+»<-¾?0ÀA2¿A2½@.¼=.»<-ÃC6ÄD9ÈH?ÐPGÓRLÐOIÒNLÕONé^cîagûitÿnzÿduõVjõQiÿUoÿWvÿVtúXpñ^pônyöw€íksÞXañ`mñ`mìboïs}숊́|ªth™yjskXON:_VGƒqg¯–’ÜÀ¿ïÕØüéëþôõýûüýþÿúþÿþýÿÿýÿÿûÿÿùþÿÿûùÿõúýöÿÿúÿÿúÿÿøÿý÷ÿþöùðçíåÚéáÔôïÜÿûåÿþåÿÿãÿÿèùúêòôçîðåSM=RL<QK=OI;MG9KE7JD8IC7E>4D=3B;3A:2@93A:4A96@;7A@;CC;IE9MG7OG2RF0UF1UH5WK?RHFXR^jg‚}§‰“Ä’žÚ˜¢ç¤§ö¦§ú«©ü«¬þ¬­ÿ­®þ¬®û¬®û¨¬ö¦ªô¤¨ñ£§ð¡¨î §í¤è›¢æ™žâ’šÛ‹“Ò†ÊƒÂ„ˆ·ƒ¨ˆyšmˆšcz©Xi»Q_ÏMYàIRèAIê=Aé=;ã>8ÝC9ÖH:ÎI8ÅF3ÂC0Å@1äTIàG?Ú;7Ø64Ø88Ø:;Ñ98È74À:/¾<.¿;.À:.Â8+Ã7*Æ4'Æ2&Î4*Ð3*Ñ3(Ô3)Ö3*Ù5,Ý6.Ü8/Þ=3Û=2Ü;1Û8/Û7-Ú6,Ø4*×3)Õ3&Ó3%Ð4%Ï6&Ò=)Ñ>*Ê9$À2È:&Ê4%Õ1'ç51ú<<ÿ=@ÿ7=÷37ê-1á-.Ù--×/.Ü43ã9:é;=ð9=ö26ø03ô01í1/ä1*Þ2(Õ1%Ï3$É1$Ñ=1új_äTLÈ95Ð@?Ë:=äRSäJHÜC=Ñ:3É6.Æ8.Å;0Â<0¿=/º;*¼?-½@.¼?-º;*º;(½;+¾<,¼>0¼>2ÀD8ÈLBÌPHÊNFÌLIÐNLèaeìaføhrÿo{ÿhx÷Zk÷UjþZrÿYsû]tîaræjtë}€ñ‡‰èwyÚ_dñgtþm|óeuëlwö‘•ì¢ŸÄ”Š¢|hjUHQ<WUFth\¤ŒˆÛ¿¾òÕÙùãæÿóöÿúüþþÿüýÿþûÿÿüÿÿúÿÿøýÿÿûùÿõ÷ýóýÿ÷ÿÿøÿýöÿý÷ÿÿöÿùïõïáòìÜúõáÿýçÿýäÿþâÿÿçýþìùúì÷÷ëVO?TN>SK>PJ<OG:LF8LD7JD8H@5F@4E<3B;1B92A:2C:3A<6C@9DD:JF:MG7OG2QF0TH2UH5UI=QGEWQ]jh€~‚¨Œ”Å“Ø™¢çŸ ò£ ÷¦£ú§§ý¨ªÿ©«þ©¬ý§­û¥«õ£ªò£§î¢§ë¤©í¦©î¥¨í£§î˜ è”¡å”Ÿß”Ÿ×•›Íš•¾ Œ¯ªƒ ¥e´]pÂQcÐIWÜBNå>Fë<Aì<<ê?8ãA4ÜE4ÔI4ÊI3ÂF.¿B,Â?-âSEÞD<Ý97æ>>êDFå@DÙ8=Î65ÍB;ÈD8ÉC8ÇA5Æ>2Ã9,Â6)Â2'Æ2(Ë3(Î4*Ó5,Ø5.Ù4.Û4.Û4,äB7Ý=1Ù7,Ú6,ß9-ß9-Û5)Õ1%Ø8(Í2 Ê1!Ï9(Î8'Ç4"Ê7%ÔC0È7&Î8*×6,Þ5.æ3/î53ö:9û?>ÿLKúHFòBBì>=ê<=ç7:å26æ,1ô+1ø)/ô,.ï/.è1+à4*Ù5)Ó7(Æ0!À.ôdYë]SÏ@:ÔDCË;;ßMMêRQàHEÓ;6É6/Æ8.Æ9/Ã;-¾:+¾<,Á@-ÀA.»<)·8%¸9&½<)¾?.µ7)³7+¸>1ÁG:ÅK@ÅIAÈIBËJEåa_æ^`ñdjþnwþkuó]iðXeø]mú^sõbtèdoãlrì~ù‹ö†…éqsøryÿy†õaqàXföˆ‘ÿ´´Ð®¢ ›‡]jPDR9QT?jbU›ˆÚÀ¿òØÙöàãÿô÷ÿúýÿþÿýüÿþüÿÿýÿÿûÿþùýûÿúôÿòòþòùÿôüÿöûýòÿýôÿÿôÿÿóûõç÷ñáþùåÿýçþüãþýáÿÿêÿÿñýýóüüòXO@WP@WN?UN>TK<RK;RI:PH;MC7KC6KA5H@3H>2G?2H>2F@4GC:GE9JG8NI6RJ5UJ4UJ4VK9XNDULMZTbkhƒ{~§‰Ã‘™×˜Ÿç£¤ö¥¢ù¢¢ü¡¢ü ¤ÿ¢¨ÿ£«ÿ¤­ü¡ªõ¡©ñ¡¨ì¥¨íª©ï­©ó±©ö¬©ø ©ø—§ò’¡â–ŸÖ¦£Î¶ž¾¹…œ¸gz¿L]ÑERß>Mç;Gç<Dæ=Bà@@à@@â>?ß<=ÝEBÉ:4ÊD9ÊF:º1)ÛJEèJIéCEë>Bè;?æ<?á=>Ú<=Ô<;ÏB;È?7ÊD;ÑMAÍK>¿=0¸6)½9,¿7+Ã6,È5-Ë2*Ñ/*Ô/+Û2/Þ5.çA5ß9+Ù1$Ý3&å9+ç;-ß8&Õ2Ï2Ì4Ê5!Ç4"Ç4"Ê4%Í5'Ï5)Ê0&Ï2)Ñ5)Ò6*Ò4(Õ3&×3'Ü4)à2)æ3.ë52ë33é/2ç-2è-4í.5ö-3ö+/ï+,ì/-ê5.ã9,Ù7(Ð4%Ã-Ä3"çXHéYNË;3ßNIÅ41ÚHHáOPÜJJÔC@Ì;6È80Æ8,Ç9+È<+Å<*Â;(¾9&¼;(»<)º=)º=+·<,´8,µ;0¹=1»?3»?3¾@4À@5ÄA7ïjaõpiêc_ômjûqqìadôgmÿpyÿlzûhxðdoébiëdjîlnîqoîqoûy{ÿ}…ùZlücwßbpû¬¯¶ªš}‘u]rQJY:LR8snZ¥•ˆË·°íÕÓÿîðÿö÷ÿ÷ûýøüüüþþÿÿþÿÿÿþÿûÿþðÿôåþéæûêïÿîøÿöüÿôþÿóÿþñÿýñÿûîÿúêÿùæÿùãÿùáÿùáüúåÿÿõÿÿûÿÿûYPAXO@XO@VM>UL=TK<SJ;RI:NE6MD5LC4KB3JA2JA2JA2IA4GC8HD9LF8OH6RJ5SK4VK7TK:XNEULMYUckhƒ{}¦†ŒÀ•Ó–šáŸžìŸñžžôžŸù¢ýŸ¦ÿ ªÿ¡¬û ¬ô «íŸ§è¢¥è¥¤ê©£í« î¤Ÿï §÷š¦ðœ¡á£œÐ±˜À¿ªÂq„ÄTbÑCOâ<Fì8Aï7?ê:=å<?Ü@AÛACà<CÚ9?ÚADË;;ÌB?Ê@=Á31åOPæCFè?Dê;Bç8?â8;Û89Õ:8Ï;7ÔE?Æ=5Å<4ÌG>ÌLAÁC5¸:,·9+¹7*¿7+Æ6-Ë4-Ñ2.Ø3/á53ä84ä>2á;-Þ6)ß5&á5'á5'Ú5"Ó4Ñ9$Ë9"Ç9%Ç:&É<+Ë<,Ì8,Î4*Õ3.Ö5-Ó7+Ï7)É6&Ç4"Ë4!Ï4"Ø2$Þ2&ã0+æ.,æ,-æ,/æ-2è.3ì+.í,-ì0.è2.à4*Û5'Õ8'Ò9'Õ?.»*ÖD5Ð>1ÙF>ëVPÚB?ØBAçUVâRRÜKHÔC>Í=5É9.È9+Ç8(È;*Ä;(¿:'½<)½>+º?-¹@/¶>.²:,´;0·=0¸>1º?0½?1ÁB3ÅC5ëi\òmdçb[ðkfõolèbañklütxÿq~ÿo|öirí]fêY`ñbf÷qpþzxûvwÿx~ùRdþ[pä]nö¦©©¦“jŒkZtON_;SZ;us\©ÖĸöáÜÿõñÿúúÿúûÿûüüüüúþýûÿþúþÿõÿúíÿóáÿéáýæéÿëóÿñøÿòýÿòÿÿñÿýïÿýíÿúéÿöåûóàúòÝüôßü÷äýúóüüúÿÿý\PB[OA[OAYM?XL>WK=VJ<VJ<SG9RF8QE7OC5OC5OC5OC5MC7JC9JF;MG9PI7SK6VK5VK7TK:WMCVMNZWbkiy{¡…‰¹‹‘Ë‘–Ö˜˜Þ™™ã˜™éšî› ô£÷Ÿ§ø ©ô ªï¨èœ¤ãœ¢àžŸàžá™ã›™â–›ßššÚ¤•Ð±ŒÀ¿‚¬Çr“ÉYqÇBS×ALß>Dã=?å==å<?ã=?á>Aá>Cà<C×8=Õ?AÏA@ÊC?Ã<8Ä96éWWàBCä>Bè;?ã9<Ü68Ö66Ñ96Ì;6ÕHAÄ;1¾5+ÅA5ÍK>ÆH:»=/·8)º6)¿7)Ç7,Ï6.×50Þ71æ95é=9×3*×5*Ú6*Ü6*Ý5*Ý7)Ú:*×>,Å4!¿4¹4!¹6"¼8)½8)½3(À-%ã@;å>8Þ=3Õ<.Ì9)È7$È7"Ì7#Ò7%Õ3$×/&Ù.'Û.*Ü.-Û//Ü./Û,)ß0+à5-Ü5,Õ3&Ð4%Ð9&Ñ>*ÔA/Â1 çSEÒ;0ÚA;ÞC?èJIðTUíUTçSQãOMÝJCÖC;Ï<2Ê8+Å6&Ä7&À7%¼7&¹:'¹<*·>-µ=,²=,°:,±;/´<.µ=/·=.¹?0¾C4ÃE7Ü\Qêg]äaYðlgöpmça`ìfgójqàR^ï^kõhqòemñ`gôeiùqqþxwútuÿnuóJ]øRhä[mñœ¡£œŠa€`UoHOd=W`Aww]­¥’áÑÂÿîæÿøñÿüøÿþúþÿúûÿüûÿýûÿýùÿûóÿöèÿîÜþãÚøÞáùáèûåïüèöýëýþìýúéÿúêÿ÷æúñàôèØòçÕöëÙøñáþúñýüøÿþû\PB\PB[OAZN@YM?YM?XL>XL>UI;TH:SG9RF8RF8RF8RF8PF:LE;KG<OI;RK9TL7WL6WL8WK;VLBUMK\V`jh~yy›‚…²ŠÂ”Γ•Ò•”Ö••Û–˜ã˜›è˜žìš¢í›£ëœ¥è˜¢ß–ŸÚ•›Õ”˜Õ“—Ö’“Ö‘Ò…„½‘ƒ¶¥}±·s¢ÈfÒVzÕGaÔ<KÞAJàBCÞCAàB?áAAä?Cç>Eç=Fã?FÖ:>Ñ?@ÑEDÆA<º61Å>:êZYÝABã@Cæ=@ã:=Û89Ó97Ð<8ÎA8ÏE;Â:.»2(À</ÇE7ÄE6½>/¸9(½8)Â9)Ë8.Ò:/Ù80à91è;7ë>8Ø1+Ù2*Ü3,Ý5,à5-ß7.Û9.Ö=/É7(Æ:)Á<+¾<,¿;.Á;0Å<6Î95ëC@í@<ä@7Ù=1Ï9*Ç9%È:&É;%Ï<(Ð7'Ï1%Ñ1%Ô1(Ö3,Ô1,Ñ/*Ó2*Ó5,Ó7+Ð7)Í5'Ê7%Ì;(Ì>*Ì;(Ï<,ô^PàF<Ò3/Ò.,æ@@ûWXÿusÿroÿjgö^YçPGÕA7Ç5(½. Ä8'À8(»8&¸;)·<,µ=-²=,°<-¯;.­<.°</°<-±;-¶>.¼B3ÀF7ËMAß`Wâa[ôpløtræ`aä]aæ]dÖHTçYeógrôgoöelükpþsvþvvûpsûenñDXôKbåXk뒖 •ƒ_zYQlCSh?\gEy|_°ª”çÜÊÿõèÿùðÿûñþþôýÿ÷ûÿúùÿúöÿøòüóêûëÚøÞÎóÒÊëÌÏìÎ×îÒàðÖéóÛò÷áöõáûöãüõãõìÛíáÑéÝÍíáÑñèÙüõíü÷ñþùó\PB\PB[OA[OAZN@YM?YM?YM?WK=VJ<UI;UI;TH:UI;UI;SI=OG<OI=QI<SL:UM8XM7YL9XL<UKAUKI\U]hexut“|~¥…‡·ŒÂŽÃÈ’ϐ”Ô’•Ü“—à”˜ß•šÞ•Ü’›ÖŽ”ΉÇ…‹Å‡Ã}ƒÁ~¸}tŸŽp”¨gÁ\„ÖNvâAcè:Uê9KæAGàECÜGAÛGCßEEæAGî<Jí=JæBI×>@Ð@?ÐIE¿>8³2,ÈC<àUPÜDCáACã?@á>?Ú?=Ô@<ÑD;ÏF<Ç=2À8*½5)½9*¿=-¿=-½;+½<)Â:*Ç;*Ï;/Õ;/Û8/à8/ç83é:5æ95ä52ä20æ21ç32æ40Ý2+Ô0'Ë1'Ë7-È:0Ä7.Á4-Ã40Ì:;Ý?@ë8;í76ä71Ø5,Ì4&Æ5"Ä7#Å:%Ë=)Ë8&Ì4&Î5'Ô8,Ô:0Ó9/Ð8-Ð>1Ê;-Å8'Ä7%Æ9'Ç<)Ç<)Ç:(Í>-Í:*Ø>2áC:Ô/-ä::à24ß56×53Ö;6ÞC>åKCêQIêSHèTHäUGË?0Ä?.¾<,¸=-·>-´?.°?/®>0¬<.¬<.¬<.¬=,­<,±=.·A3ºD6ÆLAÞbXâc]ðnlôrrå`cå`eç`g÷kvûoz÷kví`hð_fýlqÿx{ÿy|úmsø^jóAWôC]èUh懍£“ƒg]YtI^uIhwPˆf³²–èâÌÿúéÿýíÿþïýÿòûÿôøÿôôÿñíûêäòáØî×Åèǻ伺޺¿Þ¼ÇàÀÍâÃØæÌãëÓìîØôñÞ÷òßóêÙìàÒèÚÍêÜÏïáÖòèßñèáòéâ[OA[OA[OAZN@ZN@YM?YM?YM?XL>XL>WK=VJ<VJ<WK=WK=XL>QI>QI>SK>UL;XM9XM7YL9XM;WK?ULGYSWe^nnjƒut–~}¥„…±†‰´‡‰¹Š‹Á‹ÈŽÏŽŽÔÕÓ”Ó‹ÊƒˆÀ|¸u|²ov­io«ljœ€lˆ“cy®YvËPoãBdð7Vö4L÷8Hï@EåFBÝJCÛJEáGGèCJò=Nð>LäCI×CCÌA>ÎIDº;4°4,ÉI@ÕLFØBAÚ?=Û;;Ú<;Ø@=ÔE?ÍG<ËG:À8*Á8(À8*¾9(¼9'»8&½:(À;(Ä;)Ê=,Ò<.×;.Ü8.á6.ä6/è50é32è./é,0î02õ47õ77î45æ21Ú.*×4/Õ62Ñ32Ï/1Ò/4Û6=ê:Dí06î02æ3/Ü3,Ñ5)Ê7'È9(É<(Æ8$É6$Î6(Ó:,Ú>2ÝA5ÝA5ØA6ÓG8ÉA1Â;(À9&Ã<)Æ=*Å<*Å8&Ì:+Í7)Í0'æC<Û2/ï?Aã/2Ú*,Õ1/Ò5.Õ81Ö90Ò8.Ì5*Æ2&À1#ÎE5ÆA2¼=.µ:*±9)¯9+«:,ª:,«;/«=0¬>/¬<.«<+­<,²>/µA2ÉSGàg^àc_êjiînoæchðjqõoxúq{ýt~ömuîbködnÿrzÿx~ÿquúipøWfùAYô@[êQfá~ƒ¬—†ykmˆ]o‰ZyŠ`—r¶·˜ßÞÂùôÞüúåÿÿïûÿïõÿïïÿëçúäÜïÙÐãÍÅÞÁ°Ö­¨Ó¦ªÐ§°Ó«¶Ô®¼Ö±ÇÚºÒßÁàæÌèêÔòíÚòéØîâÔìÛÑêÙÏìÛÓéÚÓæÙÑåØÐ[N>[N>[N>[N>ZM=ZM=ZM=ZM=YL<YL<XK;XK;XK;YL<YL<YM?SI?TJ>UL=XL<YN:ZM:ZM:YN<YM?ULEXOR_Wbd_sjfsoyy|~¤~€©‚²ƒ„¼††Æˆ‡Ë‰ˆÌ‰ˆÊˆˆÈƒƒ¿|}µvw­pq§kl¤fg l`Ž‚`q•V^³N^ÒJ^ì@X÷6Kþ3Fþ9Có?BéD@áHBÞICâFIéBJò=Nï>NÝAEÔFDÈA=ÉHB´:/±7,ÎOFÉB<Î=8Ð64Ï10Ð51Ó>8ÏE;ÈF9ÃD5¾9(Ã:(Â:*¿:'½8%¾9&¿:'Á:&Ä9&Ë:)Ó:,Ù9+Ý7+á5+ä3+ê3-ë*+ñ*-ø-3ü/4ÿ17ÿ37ÿ38ý58ù8;õ8<ò9>ò9Aô9D÷7Fû6Hþ5Eô+5ô-2í12ã4/Ù6-Ð8*Î;+Î=,Ë5&Ñ8*Ù;/ß=2â>4ä=4å>5ßA6ÙJ<ÏG7ÉA1Ä<,Â9)Â9'Æ8*È9)É2'Ó9/á@8ëD>Þ0/à..ë46è66Ø3-Õ7.×90×:1Õ;1Ò;0Í:0É;/ÖL?ÌF:¿@1´9*°6)¬6(«7*©8*¬;-­=/®>0­=/«<+«<+­<.±=0ÀKAÚdZÞc^èkiðosêinõrzüvîfp÷oyùryöjsþlvÿwÿv}ùhoùcn÷RbþB[÷<YêKaÝt{µš‰žz}˜k{—f‚•h‘u¬²ŽËÌ­ààÄéìÑîöÞåóÙÙíÑÏæÉÅÞÀ¼Õ·´Ë®ªÈ¤šÁ”–™Ã“¡È™§Ë¬Ë¡µÎ§ÁÒ°ÎÙ»ÙÝÄåãÎìåÕïáÖëÚÒçÒÍãÐÊàÏÈÚÌÃÕǾ[N>ZM=ZM=ZM=ZM=ZM=YL<YL<ZM=YL<YL<XK;XK;YL<ZM=ZL?VJ>VJ>XL>YN<ZM<ZM:ZM:ZM<[OAWMDWML\RZ^Xfb]qjfspwv–xxœ|{§~µ„¿…‚ň‚ȉ„Ç‚}¿y·zu­xq§vo¥tk¢pg za‹„QZ™HE³EHÓHOîBNø:Fþ6Aþ9@õ==îA=æD?ãEBäCHèAKî=Më?MØ?BÑGDÅA<ÃG?²9.³:/ÑUKÃ>5É83Ç/,Å*&È/*Í:3ÌC9ÃC6¼>/À;*Ä;)Ã<)Á:'Â;(Ã<)Â;'Ã8%Ä5$Ê7%Ô8)Ù7*Ý5*á3*å3)ê2*õ33ÿ58ÿ8<ÿ4:ÿ-2ÿ(-ÿ(-þ+1ü-3ö)0ò&1÷)6ÿ,?ÿ*Aü 9ó/ö"0ó(.í,1æ0/Ú1,Ô3+Ñ5)Ð6*Ô6+Ý90æ=6ë>7ë:4è71è50â92ßH=×K<ÓE7Ë=/Ä6(Â3%È6)Ï8-Î4*Ô6-æC<ß82à21Ò  ì89å63Õ1(Ò4)Ò4)Ï5)Î6+Ê6*È5+Å7+ãYNØRFÊH;¾@2·;/µ;.µ<1µ<1¯9-±;/±=0±=.¯;,­9*­9,®:-±<2ÐYQÜb]ìppôvyîmrõq|ör}ÿzƒÿ‰ÿy€ôhq÷epÿq|ÿuÿktø^jöOaÿA^÷8WéG^Ûntº‹™©„‚žn~šg—hœq¡©‚´º–ÈÍ­ÕܽÌÙ»ÁÔ´²Ì©§ÂŸž¼˜š¶–²Œ±†‰³†µ~¹„•À‹œÂŸÄ‘¨Å™²È¡¾ÎªÉÒµÚÙÄæßÍêÞÒéÖÏáÌÉÛÆÃÔÁ»Î½µÈ·°ZM=ZM=ZM=ZM=ZM=ZM=ZM=ZM=XK;XK;XK;YL<YL<ZM=ZM=ZM=ZL?ZL?ZM=[N=]M=^O<\O<\O>YM=WMCWLHXNOZPX]Ub`Yia\rjfokŽsp›vq§{u³ƒ~ÁŠƒÉŠƒÇˆÀŠ~¼ˆy²ƒr¨†r§t«‡l£†\„HL¤B9¶A:ÍC@Þ@?è:;ô<>ÿDE÷><ó=<ì>=ê@AëBIêCMëANãALÐ>>Å@9¾?6·>3°</²<0ÀD8ÒMDÇ61Ô;6Í2.Ì3.ÖC;Ç?3µ6'»>,º7%Á:'Ã<)Â;(À9%Â9&Å:'Ç:(Ë:'Ð9(Ö6(Ø2$Ú."à/%ì7.õ=5õ82÷40÷0-ø**û&(û%'û%'û%'û&*÷!)ø".ÿ'7ÿ(>ÿ!;ÿ6ý1ÿ'8ï#,æ#+ã+-Ý--Ô+(Ò-)Ü41â62è64î66ò65ò12ï./î,,å/,Ü92Ñ:1Ò91ìSK»"Ä+#ßF>È.&Í.(Þ<7èE@â:7Ø/*Ú.*â51å<7ß>4Õ<.Ï5)Ë3&Ì5*Í;.Ë;0Å8.ìbXàZOÈC:¹6,º:/¸8-±3'´6*°0%°2&®2&¯4%°6'²:*´<.³=1°:0ºC;Ö\Wìppñsvöx|ûyƒõq|ÿ|„ýw€út}üs{ÿr~ÿr}ÿmxÿgsü`nôI\ÿ>\ÿ>[äAVÔek´•€’¡zƒm}™f~”c‡—j”Ÿwž¦ ¨ƒœ¨„’£Š£|€Ÿvzšqyšo|r vƒ¤u‚­wƒ³y‹¸‘¼„“¾†—¾‡œ¾Œ¢½§¼“¯½š¾Â§Ï̹ßÓÇãÒÊÜÇÄÔ¿¼Ìº¶Á²«¹ª¥ZM=ZM=ZM=ZM=ZM=ZM=ZM=ZM=XK;XK;XK;YL<YL<ZM=ZM=ZM=ZL?ZM=\L=]M=]N;^O<\O<\O>]P@[OC[NFYNJZPQ\SX_U^_Wfd\tibƒng‘qjxp¬€y¼‡€ÄŠÄ€Á~ºt«Œn¢—q¤¡u¨šjž™W{=Aª5+±5+Â<3Ó@8Ý>8è?:óA=õ=;õ;<ô;@ñ>Bî@IèAIâ@KÚAFÇ<9½=4¶<1±;/«:,®:+½A5ÐJAÚG@ÜA=Ô63Ð51Õ@9ÍC8»9+²5#½8'Â;(Ä=*Ä=*Ä;(Æ;(Ç:(Ê9(Ï9(Ï6&Ô2%Ú2%ã5*ê8.ï80ð91ç1&ç/%ê.%ï,&ö+'û+)þ,+ÿ-.ü*-ú%+ú$.ÿ'7ÿ$;ÿ7ÿ3ü0õ0ö.9õ3<å*1Û(,Ü.0Þ02Û+.ñ:>ô7=÷4:ö26ö/4ø03ù25ô87Û2-Ú;5×82äE?äE?Î/+Ñ2.Á"Ô51á?:èE@à=8Ø3-Ù2,Ý60Ý:1Ô:.Ï9*É5'Ç5(È9+Ê<0Ê<2Æ9/ícYáXNÉ@8º4+Â91Ã:0¿6,Â9/¿5+¾4*½4*»5)¸6)µ5(´6(°6)´;0»B9ÓYTèljïqtöx|þ|„øv€þ|„üyùv~þuÿsÿp}ÿkwüdqý_nöI]ÿ>\ÿ>\éCYÓeh®Žw‰˜oz•bu’\xŽ]cŠ•k˜p‹•p…”mp‡]l‰]h‰\g^k_q•ey›izŸk¬t„±vŠ·|»}¼“¼‚–½†›»‰›µ†¡³‹­¶—¾½¨ÏÆ·ØÇ¿ÖÁ¼Ï¼¸Æ·´º¬©²¤£\L<\L<\L<\L<\L<\L<\L<\L<ZJ:ZJ:ZJ:[K;[K;\L<\L<\L<\L=\L=]K=]M=]N;^O<^O<\O<_RA]QA\PD[NFZOK[PN]QS]RZ_SgcXvh]…kb‘sk¤vµ†}¾‰}½—…Á™€·›v©žpž®u¢ºv¥µi˜±Rt¸<D¾5-º4+À<0ËC7ÓE9ÛC8ä?9ñ=<÷:>ù:Aö=Eï@GæAHÙ@EÏ@BÀ;6¸90±9+­9*©8(­9*»?3ÏF<ÙD>×96Ù74Ú;7ÜC=ÝOEÎH<µ3#¾9(Á:'Ä;)Æ=+É=,È<+É:)Ì9)Ï7)Ï2#Ó/#ß4*î=3ô?6ñ91é4+à4&ß5&â2%å/$ë,$î+%ñ+(ò+(ó+-ð(+ò&/ø(6þ%:ý7ü3ø3ô%7ò0;ó5Aò8Cð;Dð<Eé5>ä,6ð2<ó/;ö.9ö-7õ.3õ.1ó/1í42Ü0,â=9Ú64ß;9ÿmjåCAÑ/-Î/,Ù:7ß@<àA;Û<6Õ7.Ö5-Ö5+Ò6*È6'Ã6%Â6'Ä8)Å;.Æ<1Æ<1Å;1ë`YáTMÉ<3¿0(Ë;3Ñ>6Î;3Ò=6Ø=8×<7Ô=6Ï<2É;1Ã9.¼6*¶6)¹;/¼@6ÏTMãgeíorøz~ÿˆû|ƒû|ƒûyûx€ÿwÿsÿn}ÿgvû`pü[mõG^ÿ<Zÿ>\ìFZÎ`až~ev…Zi„Qh…OmƒRt…X{ˆ]}ˆ^x„\q‚X`zM_Pa…UeYn•`uœg} j~£mƒ¬r„²tˆ¶x‹¹y‹¹y¹z‘º~”º“±•¬€›©†©­’ºµ¢Ç¹®Ë¸²È¶´¸¬¬ª¡¢ —š[K;[K;[K;[K;[K;[K;[K;[K;ZJ:ZJ:ZJ:[K;[K;\L<\L<\L<]K=]K=]L<^M=^M;^O<^O<^O<^Q@^Q@]OB[OC\NE[NF\OI]OO^P_bTmdY{i^‰pgœ|t­…{·ˆ{³™…º¥…´¯€ª¸z¡ÆxŸÑuœÌcŒÈMlÜFRßA@ÕA=ÏE;ÌH;ÍG;ÓE9ÞA:î@Aõ<Aú=Dø?GðAHâAFÒ@AÆ?<¿?6¶<1¯9+«:*ª9)®8*¼>0ÎD:Õ<7Õ31à;9åA?áD?èSLßUJ¿:+Á9)¾7$À7%Ä;)É=,Ê=,Ë9*Î8)Í4&Ò2&Ù2)å9/ñ>7õ>6ð5.ä/&à6)Þ6)à4*ã1'ç.)ê-)ì*(ì**ê),è',ë'1ò)9ö&<÷!;û=û%Aû6Hä(6ç.<ÿP\ÿ`lÿP\ò<Ió9Gè)8í*8ñ-9ô0:ñ27ê01á+*×(%Ø0-Ú72Ô2/åC@ÿspõVSÝ>;åGDÛ=:Ú<9Õ:5Ò80Ñ7-Ð6*Í4&Æ3#¾5#¹6$»8&¿;,À</¿9-À:/Å<2åXQÜMGÊ70Ç0)×<7ÞA:Ú;5ß<5ç>9é=9æ?9á@8Ù?5Ð<2È:.Á9-»9,»=1ËNHàc_ìnoú|ÿ„ˆý~…øy€øy€üyÿx‚ÿt‚ÿn}ýetù^n÷VhóE\ý8Vÿ<ZîH\ÇYZŠlRcrG\tB\yCdzIl}PtVvƒXrXoWm‡ZlŽ\o•buh{¤l¨o„ªq…«p…­q…±tˆ´u‰·v‰·v‰·v‹¸w¹z‘´|«{’¤|›¤…­«–¾²¤È·¯Ç¸µ´«®¡ž¥—’™[K;[K;[K;[K;[K;[K;[K;[K;ZJ:ZJ:ZJ:[K;[K;\L<\L<\L<]L<^K<^K<^M=^M;_N<^O<^O<^O<^N>^N>\O?^NA]OB^PE^OJbNYbQdcTqdZ}le‘xr¢w¨ƒv¤”}§¬†«Â‰©Î‚žÚw”ál‰ÛXwÚD_óBTúDPïJPáIHÐE>ËE<ÐF<ÙD>èBBð@Cõ@GõBHíDGßCDÏA=Â?7¿@7µ=/®:+«:*ª9)®8*»;.Ì?6áFAâ=;îBBëAAÞ;6ãJBæXLÕK>È@2À8(¼3#Á8(È<-É:*Ë7)Ð8+Î0%×3)à8/é;4í:5í60ë0+å,'ã0,ã1-ç10ì31ó25ö37÷48÷6;ì-5ê,6í.=ñ0Có-Fö)Gý+Nÿ3Rô9Lå3?ð@Mÿ_lÿlzÿWfõCSõ>Pî3Dí3Aí3>é6<ã99Û83Î4*Ç/$Ë2*Ç.(Ð72òYTö\ZòXVÛA?âHFÜB@Ô<7Î70Í6-Ï8-Ï9+Ê7'Â5#¹8#³9$µ:(¸=-¹;,¶6)¼7.Æ=5ÛNGÙEAÎ50Ð1-á>9ç@:ã81ç51ë20î21ë52ç83á:2Ø:/Ð9.È:.¾6*º8+ÈIBÜ_Yënlû}€ÿ…‰ýƒõv}öw~üyÿx‚ÿsƒÿk}ýdvù^pôSeôF]ý8Vþ=ZïL]ÀTRz_BWh<Yq?[vCfyKn~QtVx…Zyˆ_wŒay•exšhz m|§qªr„«r…«p†©o‡­rˆ°t‰³tˆµt‡µt…·r‰¸t‹¸w¶{Ž¬xŽ£x™¤‚­­•À¹§ÎÀµÒÄÁ¼¹À¨ª¶œœ¨ZJ:ZJ:ZJ:ZJ:ZJ:ZJ:ZJ:ZJ:ZJ:ZJ:ZJ:[K;[K;\L<\L<]L<^K<^K<^K<_L=^M;_N<^O<^O<]N;]N;^O<_O?_O?`P@aQAbPFbNPcN]_Pe`Usga…to—{t}q—v–®ƒŸÌ‹¡Û‚–ær…ëduéPdé;Rù3Lÿ<QûERéDKÕ?@Î@>ÑC?ÖEBãEFèBDîAEíCFçDEÚEAËB:¿?4»?3±;-©8(¨9(ª9)°8*¼:-Ê;3èIEé??ë=>å78Ú2/ÞA:îZPôh[ÚPCÉA3¾5%Â6'Ç9+È9+Ë7+Ò8,Ñ0&Ú6-æ;4é;4ê40è/,ê-+ë--ó49õ3;ø3=ü3=ÿ1>þ0=ý1=ù1>õ0Aò1Bô4Kõ5Nõ0Nö-Oÿ1Xÿ>_æ3HùO\ÿanÿ_nÿWfûP`ôDXé8JóAQê:GÝ2:Ò/2Ê2-Â8-½;+¼:*Ä:/À2(ÙJBúkcÝLGáPKÏ;9Ï;9àLJÔ@<Ê70Ê7-Î</Í<+Ë:)Ä;(´;&­:%¯<)³>-±9)¯3'º:/ÊD;ÖGAØC=Ô72×2.ç;7ì:6è2.í2-ò,-ô,,ò./ï31è71â:1Ú<1Ò>2Â6)º4)ÅB:×XRçjhú|}ÿ†‰þ€„õv}÷xýz‚ÿx‚ÿqÿi{ûbtø]oòQc÷I`ÿ:Xý?[ðO_ºQNqX:Sf8]uEa|InSv†Y{ˆ]~‹`g‚—l€œl}Ÿm|¤o}¨p€©o‚ªnƒ©l…©lˆ«qˆ®qˆ°r†³r„³oƒµp…·r‰¸t‹µv‰¬tŽ¥wš¨„°´™ÈïØ̾ÚÑÌÂÂÌ«±Á›¡±ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8ZK8[L9[L9\M:\M:]L:^K<`J<^K<_L=^M;_N<^O<^O<^O:_P;`O;aP>aP>aP>aP<bOAeMMdMW`O_aUmgb€uq’|v˜€s‘uŽ­€”͆”ß~‡îqyùclúP[ú;Mÿ*Fÿ2Qÿ<Sñ<Mà<E×@EÕCDÔBCÜCEàBCäADâBDÞDBÔC>É@8¾>3¸>1¯9+¨7'©:)­<*´<,Á=1Ï?7ß=:å99ã03â/2à42Û94æMEülaòh[ÙQCÆ</Æ:-È:.É7*Í6+Õ8/Õ4,Þ7/æ93è62é1/è/,î1/õ15ÿ2Cÿ0Eÿ-Fÿ*Bÿ%@ÿ#=ü 9ö 8÷'Aõ+Eõ.Mõ/Pó+Pô)Tÿ2`ÿAhóCXÿ`mÿhuøVeíKZñL\ëBUÚ2Cë@RÝ7CÉ-1º((±.$«8&¥?&¨@'¸?.¼:,ÝYMá]QÍG>ÍD>Á63É;7äUQÕF@É91È8-É:,È9(È9(Ä=)±?'¦<&¨;&¬=*«7(¬3(¼=4ÓLFÖGAÝE@Û96Ü30é73ì51ë0+ô1-ý,/ÿ+-ú./ô1/í4/æ81ß;1Ø>2É9.½3(À;2ÏNHáa^÷yzÿˆŠÿƒ‡ùzû|ƒÿ{†ÿx„ÿn~þew÷^põZlëL`õIaü9Wû=YîM]²JGhQ1Qd6\tDf~Nuˆ[Žc„h†’jˆ—n‰žsˆ¤t„¦t§r}¨p¨n©mƒ©l…©l…¨n‡«n…­o…¯o°l€²k‚¶n…·r‡³t‡­t§wœ¬…²¹šÈƯ×νÙÒÌ»¿Ê¤¬¿’š­[J8ZK8[J8ZK8[J8ZK8[J8ZK8[J8ZK8[J8[L9\K9\M:]L:]L:^K<^K<^K<^M=^M=^O<^O<\O<`Q>aR=aR?bQ?bQ?aP>aO;bL>hOKgNRbP\cWkjfuv”}|œx—‘z–«•Æ„’Ú}…ðuzþlmÿY[ýDLþ/Iÿ5Rÿ<Uó<Né?LãCKÝBHÖ=@ÛACÜ@AÞ@AÛA?ØC?ÐA9Ç>4½=0µ?1¬;+©8(­<,´?.»@1ÊA7ØC=Ù74á85Þ..â30æ:6Ó0'Ï5)ëUGÿwjë[PÑC7Ê<0Ë;0É7*Í6+Ô:0Ú70ß82å63ç32ê01í12ô36ý4<þ%6ÿ#9ÿ"9ÿ"9ÿ!8ÿ6ý5ø6ï2î9ð%Bï(Gî&Jñ'Mÿ3\ÿEhÿbuÿ`l÷WcñQ]òP]ïMZâ@MÔ2=Ú=FÌ7;»/.®/(¦5'¢;(›?&œ>%ª9'²8+ÒVJ»;2Ä?8¾41½2/ÎC@åWSÕHAÈ91Ä6*Å6(Ä5%Æ5$Ã:(±<(§:&¨7%­9*¬4&¯1%Â?5ÛRJÙIAáHBÞ;6Þ2.ê41í1/ï-+ü22þ,/ý+.ù+-ó++í-*æ1*ß4,Ö8,Ñ=1¿2(¼7.ÈIBÙ\Xówwÿ‰‹ÿ‡Šû€…þ†ÿ}ˆÿv„ÿj|þ_s÷XlöTiêD\õD^õ6Uñ:VàLZ¥HC^K-Mc5Uo?b}JtŒ\€–g‡˜lˆ›nŠ r‹¥vŒ¬z†«wªr~§m|¥i}¥g¥g‚¦i‚¥k‚¨mƒ«m‚¬l®h~°i³j„¶o‰¶s‰¯rŽ¬x™°„­»˜¿Å©ËʵËͶ½Åžªº‹—§]K7\K7]K7\K7]K7\K7]K7\K7^L8]L8^L8]L8^L8]L8^L8]L:]J<]K=^L>\L<\L<[N=]P?^SA`SB_R?`P@bQAcRBdQBdNAdLBjMIhKMdPYf[lnk†w{ž€…­‰ˆ²ƒ©¥‡©¾ˆ Ð‚’ây~ðsqújbÿa^ùJWñ>Që:Lî?NïCQê@Kæ<Gå>Fá<Bß<?Ú<=Ö<:Ï>9Ê=4Á;0¸:,§6&¯A0­<,¬6(¿A5ÌF=Ì=5Ð72Ú85Ü71Þ5.Þ6-Þ8*Ý<*×:'Ó6%Ù;0ðSLòZOÙE9Ç8*Í>.ÔB3Ò:-Ô3+ðGBá//î5:ó6=î,7ÿBMñ'3ú&2ÿ)2ÿ(2ù'ý"*ÿ'/þ'/ð&ð)ù+7ê .ì&7ñ-Aç#;ÿXrç-Dÿ]lÿ_iþZcùU\õPWëHMÝ?@Í84Ë>7ÄA7·?1¦7&™2!™6#š;'Ÿ<)§8-®7/ÂE?¿;7½31Ä64Å54ÔDCÛLHÚKEÑD;È:0Ä4)Ç5(Ë4)Å5*¼</³9,­/#«(³-$Ã9/ÒC;ØH?äPFßE=Ú70Ý0*ç0,ò21ù13ý14÷/1ö01ö01ó0.ò0.í2-æ5-Ý9/Õ=0¾2%À>1¹=3Ö_Yésqÿû„†û‡Šÿ„Œÿxˆÿh}ÿ_wÿ[uÿUoøLføEcñ:Yô;[æ?YÛ]i‹@;VF-L^6YuBg†Mw–]}œcg€¡jƒ¦n‚¨o€§n¦m~¥l}¥i|¤h}£f£e£f¥l¦m«l¬k~­g®f±gƒ³i‡µm‹´r°vŽ®|™´‰§½™±Á¤²Á®¦²°˜žz…‹]K7]K7]K7]K7]K7]K7]K7]K7^L8^L8^L8^L8^L8^L8^L8^K:\K;^L>_M?^N>[N=[N=\Q?^RB_SC]QA]OB_OB`NDbPFcOHeNHiMJgNQeS_g_tnny~¨€‡»‡ŠÁž•Ê£‹»«£¹xÓyƒê|{ôvjöi`ø]aóOZìFRòHSøIVõEPï?Jë;Eä9?à:<Ú::Õ=8Î?7Ç>4À</·<,«<+ª?-©8(²:,ÁA6Å<4Ç61Ô<7Ò50Ö5-×5*Ø4(Ù8&Û:&Û;%Ú9'Ý6.æA;ëMDáI<Ð>/Ç8(Ê8)Ó;.Þ;4öJFä01ð6;ö6Añ.<ÿ@Nï'4ó$,÷%(û),ý+,ü*+ø((ö((÷+,ï$'í%(ê%,ý;DÙ'ð4CüARØ&6ÿjuÿbjþZaüW]ôOSåBC×<8Ñ>6¾8,¿D5¹H6©>,™/)(’(ž-%°:6ÑTPÎHGÃ54Æ45Ñ==ëWUØGBÖG?ÐA9Ç9/Æ3)Ê6,Ð7/Î70Ç=3Â91Â5.Ã2-É4.Ñ83Ø?9ÞE=äJ@ÞA8Ú6-Ý0*ç0,ò21û03û03ô02ó12ô01ó/0ó0.î1-ç4-ß9-×=1¿3$¾</·=2Ó^Wévsÿù…ˆø…Šÿ‚Šÿt…ÿf|ÿ\vÿXtÿQoþIhü@aó:Zñ?_áI`ÁS\{?7RF.M_9ZvCe‡Ks•Yxœ_wb{¡f}¥i}¦j|¥i|¥i|¤h{£e{¡d{¡d}¡c~¢e|£j}¦l}©l}ªi|«e}­c¯c‚±c…°h‡²kˆ¯pˆ«s‹«|¬„ª‡Œ¢‹x‡€dppR^^]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8]L8^K:_N>aP@`P@]P?\O>[O?[RC[QE[QG\OG\OI_PMcQOfTTgUUgUUgV\h]nli†sv£|ƒº‚ŠËŠÔ˜‘× ŠÊª‚´¹}ŸÏ€“䄆îvósjûeg÷S\íGQðFOöHR÷EQòAKî=Eä7=à7:Ù99Ô<7Í@7ÅA4¾?0¶>-­>-ª<+«:,¸B6ÇH?À;2»0)Ê:2Í6/Ð6.Õ7,Ø8,Ù7(Ù8&Ü9&à:*Ý4-Û4.âA9éOCÞH:È5%È2#Ù@2â>5õHDè13ò5;ù7@ø4@ÿESø2?ð)0ë#&ì$'ù13õ-/ï''ì&%õ12ë)*ö8:è+1â)1ë3=ÿR^ï=Kÿ[gþ^fñV\îOTðMRêEIÝ:;Õ74Ô?9À6,Á?2¼B5³=1¬7-¥3)¡,#œ' š%©/*ÇECÂ::¼,,È35×ABô^]Ð?:ÒC;ÐA9Ë=3Ì9/Ô=4Ú@8Û@;Û@<Ø=;Ý>;ãA?ä>>à::ã?=ëIDàB9Ý<4Ù5,Ý2+ç1-ð31õ12ø02ô02ô02ô01ó/0ó0.î1-ç4-ß9-Ø>2À4%º8+µ;0Ð[Tízwÿ“’û‡Šúƒ‰ý~‰ÿr‚ÿdyÿ[tÿTqÿNmÿFgû=_ô=\îEbßTg¢AHp;3SH2Qa<]xEf‡Nr“Zu˜^tš_xžc{£g{£gy¢fy¢fy¡cx byŸby a| bz cz¢f{¤h{§h{¨e{©a{©`}«`®`‡²jŠµn‹²s‰¬t†¦w‚žuy“no†lSeYAPM2A>^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^M9^L8`N:aP<`Q>]P?ZN>YOCXPEXOH[QO_UTbXYeX_iYcl\fk^glaildqok‚rtšy~µ†Ê…ŒÚŒŽáŽ‡×Ÿ‹Ó­ŠÄ³€©½yŽÌ{Þzò‚wÿsrÿcgùU\ôMTõGPôCMò>Gï<Bå6;à88Û97Ô<7Í@6ÅA4¼A1·?.«:*­>-±=0¼F:ÌRGÇH?º5.º1)È91Í81Ó;0Ù=1Û;-Ù6'Ü6&à8+à5-Û2+à=4ìNBãK=Ò:,Ñ8*àB6ß7.ð>:é/0ð16÷4<ü8BÿNZÿLUý>Eò38â$&í/1ê,,ð22è**ë/.ò::Ñæ37Ý,2è9@ðEMÿXcóMWØ=CÚADáBFá>AÞ9=Ü89Ø88Ó97É83Â91½:2ÀA:ÍNHÖVSÐNNÃC@²72¶95ÏKIÙMLãQRïYZêRQñYVÉ51Î>6ÒB9Ð@7Ó?5ÙB9àC<â@;é?@ç8=ì9=ò?Cï:?è58ì>=öMJÞ93Û60Ú3+Þ3,å4.ì30ò21ô01ô02ö/2ö01õ/0õ/.ð0-é4-à8-Ù?3Â6'·5(³9.ËVOï|yÿ“’üˆ‹ý‰þz‡ýoücwÿXqÿOlÿFhÿ@cû9\ô?^éKdØZh…35g;0WO8WgCa{Kj‰Ps’Yt•\u˜^wby¡ez¢fw dv awŸavž`xŸ`xŸ`{Ÿaz cz¢fz£gz§fz§bz¨`z¨]|«]~­_†²g‹´nŒ³tŠ­s†¦u€qtŽii€dQcUDSL8G@^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M;^M9]K5_M5`P9_P;\O>ZN@XOFXQKYPQ`W\g`hnfsshysg{sg}qh}ol}pq†rw—v}«z‚Á‡Ò„ŠàŠ‹å“ãŒÚ „¿žuŸ l‚¬nq¼vlÒviòsmÿllÿgiþ^`ûRWõFMï<Bê7=æ5;á78Ý98Ö=7ÎA7ÆB5½B0¸@/­7)´@3¯:0°;1ÈRHÕ\QÉMC»;0¾90Á7-Ì9/Ö>3Û=1Ú6*Ü4'â6*à3,â70ä=5åE9âF9ÞB3ÞA2â>2Û0&ê72ì0/ï.1ñ.4ú7?ÿPXÿ]dÿZ_ÿOSá.1ã03à,-÷EEè66Ü,,Û-.ë>@Ü25Ù37ÿbhà=BË*2Ê-4Á&*Õ;=á@EÛ6:Ù37ß9=Ü7;Í/0Ì43Í;;ÕGFÞRSì`cõilôekê^aÄ@>ÆE@å^[øhgÿopÿjjåKK×=;É40Ñ>7×D<ÖC;Õ>5Õ<4Ø:1Ü41î5;ñ-7ð,6ò.8ï,4ë,3ó9<þJKß3/Ü3.Ú3+Ü3,ã5.ê40ï4/ò21ô02ö/2÷/1ö.0õ/.ð0-ê3-â7-Ù?3Æ8*´2%²8-ÃNGï|yÿ’‘ÿˆŒÿ‰ÿw…úl|øatûTnýHgÿ>bÿ:_ú6\óAaàNeÂWap/+_?0[W>^lIe}Mk‰St’\u–_v™_xžcy¡cx buŸ`uŸ`u_vž_wž_xŸ`{Ÿaz¡bz¢d{¥e|¦d|§`y§^y¨Z{ª\}¬^€¬a…¯g‡®m‡ªp…¥sƒ r{•nrŠjexe[k^RbU_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N<_N:^L4_N4`N8^O:[N>YOEZSMZTT_ZahbprmzuŒ{u‘{q“zp“vp’qt“rxšt}¨w¶zƒÄ~†Ï„‰Ú‹ŒÞ–Ü™ŠÍ›…¶¢ƒ£ª„‘«~yŸl[šQ>¼MBÔQIâWRëWUñSRñKMí>Cå4:é9<æ9;à;9Ø=8Ó@8ÊB6ÀA0º?/³9,ºA6«5+£.$¾I?Ùg\ÙdZËUI¸<2¸6)À3)Î:0Ù;0Ú6,Þ3)ä6-ß1*ä71ã;2Ý9/Þ>0ãF5äB3ß9+ß1(é4-ð31ï-.í*.ô17ÿEJÿV[ÿ]bÿ\aâ9<ã9<Õ+,ôJKâ89×//Û57Ì()Ì'+øXZÝ>BÕ9=Ñ6:º#Ä(+Ü<>æAEÝ49Ù,2Þ17Ù05Í(.Ò37äKNöaeûhnùcl÷`iú`løbkÖLLÕNJê\ZêVTçMMãHFÏ42Î3/ÙA<àKDäOHßJCÙ@8×:3×6.Ü.-õ2:û-:ø*7õ'4õ)4ø0:ÿ>CÿJJà1.Þ3,Ü3,Ý5,á4-ç4/í4/ð3/ô02÷/2ù.1ø-0ö..ñ/-ë2-â7-Ù=1É;-³1$±7,ºC=ìyvÿÿˆÿ|ˆÿqöhxö_rùRlûDcÿ9^ÿ6]ú6\ðFcÑN`¢IMe5+VD0[Y@^lIe|NkˆRt‘[w•_x™`{ždz cvž`s›]rœ\s›\tœ]v^wž_{Ÿaz¡by¡bz¤b|§b{¦_x¦[x§Yz©Y{ªZ~ª_ƒ­e…¬k‡©m‰§s‰¥u„s|”ru‰pnlfwe_P=_P=_P=_P=_P=_P=_P=_P=^O<^O<^O<^O<^O<^O<^O<_N:aP6aP6aO9^O<[OA[RK\VV_Zakgvso†}z—‚~¡|¥}w£{u¥xv§sy©s}°x¸{„¿~†Ä„‰ÉŽÎ”“Íš“Ç •½«›µ¼¦²É­©Å¦”ªˆmœfJœE2¯A4¶@4ÂB9ÖHDèNNîJKì?Cë=?é;=ä<;ß=:Ù@8Ð@5È>1Á=0½:0¾?6¯4,§1'»I?ÑcVÖh[ÕeWÃOBº>2º4)Ç7,Ö90Ù5,ß2+ç60â0,ã4/â7/Þ7.à>1åC4ä>0Þ4%æ4*é0+ð3/ð..ï-.ò14ó78öBEöMRüY^éFKöQWØ37ëFJÝ8<æAEÚ7:Î.0Ñ05äHKÒ7;Ð7:¹#%Õ;=Û8;å;>ë<Aç6<â-6Ý)2á1;ç<DðLSý]eÿksÿjsú`jöZgùXh÷[fôddèZVèWTØ@=Ï10Ô44Ó33áC@ãJEéRKêSLáJAÙ?7Ø93Û81ã33ó.7ÿ0>ÿ0=ú*7ü.:ÿ;EÿCHûABã1/ß4-Ý4-Ý5,â4-æ3,í4/ñ40ô02÷/2ù.1ø-0ö..ñ/-ë2-ã7-Ø:/Í>0µ1%±7,²;5ésqÿŠŠÿ‡ÿv†þi}óató[p÷PjúCbÿ7`ÿ7aù;_ìLf¿O[~86]=0NF1VX@ZeEcxMj…RrŽ[v”^yša{ždxžas›]p˜Yp™WqšXqšXt›\uœ]y_xŸ`x¡_y¢^z¥^y¤\x¤Yx¤Wy¥V{§X©_‚¬d„«h†¨l‡¥o†¢q€šmx‘jq…ij~cbv]_P=_P=_P=_P=_P=_P=_P=_P=^O<^O<^O<^O<^O<^O<^O<_N:cQ9cQ9aP<]P@ZPG\TR_Zaa_mtrŠyx˜€¨ƒ°~~°yy¯yu°wwµw|¾x€Á}…Ä‚ˆÄ…ŠÁ¿—–¾ ¼¬§»¶¬µ¾³±È¸«Ð¾¦ÖÁ¢Ñ»–Ьˆ¯oT®\G¤G5¨>0ÂG?àWQïWVñNOé@Cê=?æ<=â=;ß=:×=5Ñ:1É90Á80Á<5¹:3¸=5ÁOEÇXMÇ]OÑeXÓcUÂL>¼:-Å8.Ò91×4-ß2,ç60æ40á2-à5.ã<3çC7æC4â:-ß3%ê5,æ.&ð0-ï/,ð31ñ54ç-.â24çBF÷X]öW\ÿouåDJëHMà<Cþ[`ÿouÞ=Bÿ}‚ÿmqêQTÔ>@Æ02Ö:=òHKî9>ð7?ø=Fò7Bè-8ô>Jÿ]gÿmwÿjrÿcn÷]gú^kÿaoÿ]nùYeò\]ãPIâJGÕ74Ò/0Ù54Ò0.Ø95ßD?ãJDâIAÙ@8Ó6/Ö4/Ü71è66ê'/ü0<þ2=÷+6ù0:ÿ>Eý?Aì44â3.à5.Þ6-ß4,á3*è3,ï4/ô41ö01÷/2ù.1ø-0ø..ò/-ë2-ã7-×9.Ñ?2¶0%²6,°61èpoÿ‡ˆÿ†Žÿp‚ücxñ\pôXn÷OiûBbÿ9aÿ<döAbäRg°QWd/'WD3GG/RT<U^?btLiQq‹[v’_z˜b|dxœ_r™Zn—Un—Un—Uo˜Vr™Zt›\w›]wž]wž[x¡[y£[w£Xv¢Wv¢Uw£Ty¥Xz¤Z~¦_¦c¡d~œf{—frŒ]i‚X`vRZoPRgH\L<^N>`P@`P@_O?_O?_O?aQAbRB_O?]M=]M=aQAcSCbRB`Q>dS?aP>]P@^QI[QPZSZeapsq‰yxšz|¥~€±€¹}ºxz·rs´lo¶twÈv{̀ƒÊ‹Æ–—䤾²°»¼¹´ËƳÓÌ°ÙϬÛϧÜÏ£ÝУ٘٘Ѩˆ¸€g ^FËzgÖue½H?×QN÷ccéKLå?Aâ89ä::â:9Ü75Ù61Ö;6Ë81À5.ÇB;»<3¿I?ÆXK¹OAÎdVÑcTÌXIÅG9Ä:/Í6-Ø7/á6/ã4/ß0-ß2,à70ä=4à<0Ý7)ß5(å7,ä/$ê/&ì/)î1-î20ë40è51â64×79æJNõY]íNS×8=×6<ùX`ÿ~„ÿ~†ÿouÿgoÿipÿflõ\añX[ûZ_ÿUZÿDKü8Bû7Aö2>î+9ì0>ñ=HÿS_ÿ`iÿgrÿepþbmÿcnÿ`oý]gíSSßF@Ö85Ö41×11×/.Ø31Û94áD=ÜB:Ù?7Ö<4×90Û81à93æ66ð7<ó4;ó4;ö4<õ6;ò59ë33ã1/Þ3,Þ5.ß7.à5+ä3+é2,ï2,õ20÷12ø03ú/2ù.1ù//ó0.ì3.å7.à?5Ì8,¿6,¯/&¸;7ÒXWÿ˜šþ{ƒÿm€ÿe|ø\rðPhôGeüCeÿ;dù9`õNlÓRd‰=?P, C;&CG.KM5Y_CZjEcxMqˆZx’bx–`t•\r–Yq˜Yl•Sm–Rn—So˜Tr™Xt›Zuœ]uœ[t›XtWv Xw¡Wv¢Wv¢Uu¡Tt S{¥[{£]zž^u—[m‹Ub~MXrCSj>AW0?U/<R,[K;^N>`P@`P@`P@_O?`P@aQAbRB`P@_O?`P@bRBcSCbRB`P@fVG_QF\OG^TSaZad`omkƒvw–wy¢{}®}¸x|¹quµkp´lo¶nq¾uvÐ|~Չ‰Ó˜˜Î¨§Ç¸¶ÁÉźÓαÛתâÛ¥çÞ¥æÝ¢äÛ¤âØ£ÛÑ ÔǛӼš»œ€©}dŋwÊ}m´VJÒa[örpìZ[çKLã?@ç=>ë>@ç;;â66Û64Ö=8É83Ê?8¹6.¸>3½K@¶H;È]MÝo`Ô`QÆH:½5)Ä0&Ô6-Ü5-Û0)â51å95ä=7à<3Ü8.Û5)ß4*ã5*ì7.í5+í2+ë0+ç0,ã2,á4.Ý52Ö87Ø>@äJLóW[ú^bü\dþ]eÿaiôS[ïNVñQYÿ_gÿjqÿflõZ`óNTí4:ñ,5ô*6÷-9ñ'5å!-æ'6í7CþPZÿZdÿblÿdmÿenÿcmý[hõQZåEEÞ<7Û64Þ63ß55Ü30Ú2/Û62à>9Û>7Ø;4×:3Û81ß82å95ç:6é9;ê7:ë7:î79î79ì65å31ß2,Ü3,Ý6-ß7,â6,å2+ê1,ò1,õ20÷12÷03ú/2ù.1÷//ó0.ì3.å7.ß<3Ô=2Â8.¶1*·74ØZ[ÿ“ûs}ÿfzÿ^vùWo÷OiùFfû?bü<cô@cåOh¹KVx;8L2#<:#<B(EG/OS8VdA`rJnƒXu_v“]t“Xq“Vp”Tk’Ok”Pl•Qm–Rp—Vr™Xsš[sšYt›XuWuŸWuŸWtžTržQqRpœQn˜Pl”NhŒLbFXu?Ni6F]/@W+@W-@W-AX.YL<[N>^QA^QA^QA^QA_RB`SCaTD`SC_RB`SCaTDaTD`SC^PCbUM\QM[QRcYbkdtqm„vu•y{¤y{¬y~¶x|»nsµei°`g¯jn¸ruÆ}}ׇ„Ý—”Û©§Ø»¹ÏËÊÈÜØ¿åá´éäªíè¦ðé¥íå¤çá§âÛ§×Ï¡Ëܸ¯©››ƒk¨ƒp§o`œRGº]UÝmi÷uuï__äJLå@DëADì>@è8:â66ß=:Ô;6ÔA:Â70·7,·>3®=/¶F8Ûj\ßi[Ù[MÊB6Å2(Ñ4+Ù6-Ù2,Þ52éA>ìGCà=6Õ2)Õ1'Ú2'Þ0%æ4*è1)ç.)æ/)ã2,â7/á=4àB9Æ-(Ê43ÜDCðVXú^b÷ZaóS[ñQ[èFQêHSïOYü\fÿmvÿntôY_ã>Dé.5ô-4û2<ü2>õ+9í'4ö6EÿKW÷ISõPWøW_þ^fÿckÿ`i÷RYéBIÞ97Þ71ã75é<8ë;;æ95á51Ý60Ý:3Ù;2×90×90Ü71á83ç:6é<8ã99ã99å97ç98è88æ74á40Û2+Ú6-Û7-Þ8,â6,ç2)ë0)ó0,÷10õ12õ13ø02÷/1÷//ò0.î3.ç6.Ý90ÜD9Ç80»4.µ/.ä_bÿƒˆølwþbxþWsúPmþLlÿCfö:]ó;_ìIfÔSgDJl;4O>,=<':?(ED/FH0Q\<YkEg|Qr‰[tZqUmRkOiMi‘Ki’Lk”Pm–Tn—Up—Xq˜WuœYuWtœUrœTpšRm—Mk•Mj”LeGaˆE[AUt;Li3E`-AX*>U'B\/E_2Ic6WJ:ZM=\O?^QA]P@^QA_RBaTD`SC`SC`SCaTDaTD`SC_RB\PDZOMZQVbXcjcsqm†vu•xz£z|­w|´sy·mr´bi¯]d¬ag±lr¾x{ʉ‰ß•‘夡䶵áÇÄÙÕÑÎáÝÄçä¹ëæ¯îê­îè¬éâ«ãÛªÚÓ©Ë¡»¶™¥¥›™„“ˆv˜q™rc›bW³i`Ïrmû‹‰õutê[]æLNèCGí@Dì<?é;<ß:8Û<8ÞE?ÓC;Ã:0º:/°6)§1#¿I;ÛaRêk\ÛSEÉ:,Ì4'Ô6+×4-Ù42ñMKþZXíKFÙ80Ö3*Û4+Þ2(ß.&á,%á,%Þ-%Ü1)Ú6,Ù;0×?4Â/'Ä4,Î:6ÙCBßFHàEIäGNêMVñQ]öVbøXbù\eÿgpÿmuôYaÞ;@ê17ñ.4ò-6ð+4ë%2ì(4û=IÿS_í?IèAIêGLôQVÿY`ÿX]ôGMä68á53ä73ê;8ò@>ô@?ï=;æ95à72Ý82Ú91Ù80Ù80Ü71ã73è96é:7â:7à;9ã:7æ:8ç:6å84à5.Ú3+Ú6,Û7-Þ8,â6,ç2)ë0)ò/+ö0/ô22ô23÷12ö01ö0/ò0.î3.ç6.Ü8/àF<É91¾3.¶,,îfjÿy€øgtÿbyýTqþKkÿGjÿ>cò6Yë=^ãQhÄXeˆBBgC7[N;GF1AC-JH3EE-LU6Sc>btLlƒUp‹XnRkPiŽKgIhJh‘Kj“Mk”Pm–To–Wp—Vt›Xs›Uq™So—Pj“MgJeŽHdG`‡D]AWy=Rq8Mi6Lf6Ne7Of8Lf7Oi9Sm>UH8WJ:[N>\O?]P@]P@_RBaTD_RB`SCaTDaTD`SC_RB_RB^QIVMR_Xhnf{tp‰xt—xw¡xz«y{´sy·jr³`g­[c«_g°jr»v|ƀ„Γ•àžžä­®ç»½æËÉßÔÓØÝÛÎáßÆåá¾æâ¼äß¹à׶ÚеÎƱ¾µ¦­§›˜šŒŒŽ€‰…yŠ|q‘vk›si¨meºoi扄î~ósrñedñWYðMNîDEì@@ã:7à;7äB=ÞE?Ì<3Æ=3ÄB5¯1#®0"ÇI;ÜXKÖN@Ì=/Î8*Ð6*Ñ3*Ö42õUUÿkhú]XãD>Ü;3ß;2ã80â4-ã2,â1+Ý0)Ô0'Í/$Æ/$Â0#Å9,À4'Á3)Ë:5×CAÛEGÛBGÚ@HàDOëOZòVaòVaú^iÿgoóYaáBGê:<í57í38ï28ñ3=÷9CÿHSÿV_ä6?Ý4;ß6;ëADùJOüIMô;Aé13è51ë95ï<8ò<9ó;9ð:7é73â70ß;2Ü;1Û:0Ü90ß82ä73è64é75æ:8ä;8æ:8è:9è;7æ95á6/Ü3,Û7-Ü8.ß9-á7*å3)é1)ñ0)ô1-ó32ñ33ô22ô01ô1/ò0.î3.ç6.Þ7/àC:Ê70À1-À13÷jpÿqzúftû]túPmÿCfÿ=dÿ9cõ8^æEdÙZm«V[wE>dJ=aYFTO;KI4PI6GE.GP3L\8ZlFf|Nm…SmŠRlŒMhJfŽHgHg‘Ii’Lj“Ol•Sn•Vo–Ut™Vs˜Un•Rj’LfKcŒHaŠF_ˆFZBX|?St;Pn8Ok:Rl=Wn@ZqCUo?Vq>WqARE5UH8XK;ZM=[N>\O?^QA`SC`SCaTDaTDaTD`SC`SCbUEaVP^Vcje|yu}yšzy¡xy©vy°sw´jr³cj°[c«]e®hp¹v~ǁ‡ÑˆŒÓ™Ý£¥à¯³ã»¿åÆÇãÏÎÞÔÓØ×ÖÑÙ×ËÙÖÇ×ÑÅÒÉÀÌþŹ¹´¨¬£šŠ‹†x{tyvquleleogŽa[`[ÀsmØyuð{ü|yÿrpýccöSTðHGïC?ê>:á<6àA;Ñ:1ÓC:ÝTJÂ>2¸6)½9,Â=.È?/ÏB1×C5Ö@1Ñ7-Ð2/êLMú_]òWSßD?Ù:4Û81à70æ93ç92ç92â:1Ú9/Ñ9,Ê8+Ä8)°(¸3"È@2ÓI>ÒD@Ë;:Ì7;Ï:@Ñ7AâHRíV_ðYbõ^g÷`iêU[ÜAEå@>ì?;ñACúGJÿLTÿQYÿT]ÿV^á4:Ú/5Ü/3ç7:ô=Aø;?÷48ó12í42ï95ð86í42ë20ê20ç40á4.ã<4à=4ß<3Þ;2á83ä73è43è43è88ç98è88ë99ë97é75â5/Þ3,Û7-Ü8,Ý:+ß7*ã2(ç0(ï0(ò1,ñ42ð43ó32ó11ó1/ñ1.ì3.ç6.à91Ù<3Ì71Á0-Ñ@CÿnuýkuùbqðRiõHfÿ;aÿ4^ÿ6bû>däNiÊ\i‹GFjF:]L<]VCXQ>RJ7OG4ID.DJ.GU2Sc>^sHgMj‡OjŠKgŒGgŒFfŽGfHgJi’Nk”Rm”Um”Uq•Uo”Qj‘NgŽKcŒH`‰E_ˆF^†G\‚E[~DWxAVr?Uo@YpB]rG^uG[sCYtAXs@NB2QE5UI9WK;XL<ZN>\P@^RBaUEbVFbVFaUE_SCaUEfZJh]YngwupŽ|x™{y wx¦vw­pu¯jo¯ah¬^f®_g°em¶pxÀ}…͉Ø—Ø™ Ö¡¨Öª±Û´¹ßº¿ßÀÁÝÃÃÛÇÆØÆÃÔÅÁÐÁ»É¼³Ä»°Áµ¨¼¦˜¯–‹œˆ…Œqqqtpopfewhe†nj„_Y•d_›ZT¹f`Ùsnðzvÿ{wÿwsÿhfüZWþRNõHBæ=6çD=Ø;4ÛG=ê\RÅ;0Ä<0½7+¾6(Ç>.ÑD3ÔB3ÔA1×A3Ò86Ø>>ÝCCÜB@Ö=7Õ81Ø7/Þ7/å:3æ93å:3â;3Û>5ÕA5ÑE8ÌG6®-¼;%ÍJ8ÑL=Æ=5»0+À00Ê9>×AJáKTëU`ðZeð]eìYaßLRÔ>@Ï4/Ø7/à;9ê@@òCHõDJñBIî?Fâ5;ß26ã36ï8<õ79ø14û/2ü14ï20ð95ð74ì0.è,*ç0,æ3.à3,ä=5à=4à=4à<3ã:5æ95ê65ì65ì57ì57í57î68î66ë54ä2.Þ1+Þ7.Ý9-Ý:+ß7*á4&æ1&í1(ñ1,ð42î53ñ42ò21ó1/ñ1.ì3.ç6.â;3Ò4+Ò:5Æ20çUXÿpxùcnòZiçI`óEfÿ8bÿ/]ÿ5bÿBhÜLe±PYj5/^G7XK:UN;[P>VI8M@0JC0@F,AO.K[6Xj@bxGfJhˆIf‹FeŠDc‹BcEeŽHgLh‘Oi‘Rk’Sn’RlPhNd‹JaŠH_ˆF_‡H_‡H_…H_‚H]~G]yF]wH`wKcxOczN]uC[vAZu@JC3MF6QH9TK<WK=YM?\O?^QAdWFeXGdXHbVF_UIaWMg^Ulcfwr‰yušxvžtsŸrr¤tu­lp¯bg©Za©_e¯fl¸ms¿v|ȁ‡Ó“Ý—œÞ›¡Õ §Ó©­Ú®³Ý±´Ý²´Û´´Ú¶´Ûµ±Ö³­Ñ®§É«¡ÄªžÂ¤˜¼™‹¯ˆ~™|x†dbgiefcYZj[X{c_yVP’c]ŠNF¢UM½`XÖkcñwrÿzÿzwÿnjþa\÷TOéGBîOIÝB=ÞG@êWOº,"¾1'¾4'Ç>.ÑE6Ë>-Á2"Ê7'ÛE7ÞE?Ó97Ì3.Í50Ô;5×=5Ý<4á=4ä;4ä92Þ5.×4+Ï5)Ê8+Å<,Â?-ÔS>ÈG2½<)¾:-Ã=2Ä;5Â74Â43ÜKNÞMRáPUåTYèY]åVXÜMOÓD@É5+Ñ7+Ø7/Ý52â24å26æ18æ.6ì4>ì3;ó5?û9Bý4>ú+5ÿ(3ÿ/9ø-3ù57ù59ð./ë*+é0.è51â5/ã<4à=4à=4á=4ä;6ç:6ë76î66ì46ë35ì25í34í55ê41ã1-Û0(Þ8,Ý:+Ý:+Þ8(à4&ã3$é3&ì4*î50î52ð42ð3/ò2/ï2,ì4,ç6.ä=4Í/&×>9Ì35ùbgÿoy÷[iðNcêC]ùEhÿ:eÿ0]ÿ6aöGhÉJ]“CFR,#VI8SL:OH6[O?UI9F?-HF1>B'@J(IU1Sd:_rEe}Ig„JeˆHcˆCa‰BbŠCcEeŽHfMhPiQlPkMfJcŠIaˆG`‡F`‡H`†I^G^F_}G`|Ia{Kd{Mf|Nf~N]xC]z@^{CDB6FD8JF;PG>RH>WI>]M>_O?`Q>aR?`SB_VGbYRgb_ojnso~wq“zt |y¨yw©pp¦gg£`a¢\_¤`b­fhµopÀyz̄‚֍‹ß–’眙栞ݡ¡×££Ù¤¤Ú¦£Ø¥¢×¥¡Ô¤ Ó¥žÑ£Í¢šÉ¢™ÈŸ–ٍ»Ž‚°ƒ|žrn}igld^`fZZkYUrWP}WNŒWO•RI¨ZPµXPÁZSØkdìyrù‚|ÿ‹†ùuqÿusí_]ßPLÙEAïZTÌ71Å2*Ä6*Ã7(À7'Ã7&Æ9(Ê;+Ð<.Õ?1Ò8.Ö<2×?4Ó;0Í5(Í3'Ø:/â@5à90á90Þ7.×7+Ï7)É:*Æ?,ÄA/Â=.Æ@4ËE:ÍG<ÌC;Å<4»2,¶+&Ç<7ÐEBÙOLàVSãYVáXRØOIÐC:Ì;*Ó:(Ü8,â5.è./ì*2ð(3ô'6ú->ú+?û*?ý)?ÿ(?ÿ&?ÿ$<ÿ$<ÿ$8ÿ'9ÿ+<þ,9ó+5í.5è45ç;9Ý84Ý:3Þ;4à;5ã:5æ95é73ë54ë35ì46ë54é54ç53â5/Ý5,Û5)Ü8,Û9*Ü9(Þ9&ß8&â7%ã6%æ4&ì5-í4/ð50ñ4.ñ5,í5+ê5*å7,à90Ø7/Õ:6Û?BÿmwÿbsûSjõIcÿKjö6[ÿ6_ÿ<gø>cçNj¯IV^$"D, ?:'HE4PM<PI9EB1>C/?G/BH&EK%KS.S]8[kDavKd~NdƒJb†F`ˆBa‰@c‹BeFhJlPm‘Ql‘LiŽHgŒGf‹HfŠJe‰Ic…HaƒGdƒJdIbGc~GdJdHe~Ga}C[{<\=_‚B=?4?A6FC<JE?PE?TG>[K>]M=`O;`Q<^SA^VIc[Xhcinlyso†tp•vpžtp¢pm¢gfŸaaŸ]^¡\\¤ed°kjºtrÅ}{φ‚ٍ‰à”Žæ˜“å›–Ùœ˜Õ™Öžš×ž˜Ö—Õœ–Ô›•Ñž–Ñœ”Íœ“Ê›’ÇšÄ“‰»ˆ®‚xtk|mdif]^f[YiZSoWM{UJŠVKšVK°\RºYRÁXRÓfaàqjãvoí~wý‡…ÿ‡…÷usîgdãUSëZUÇ2,Â/%Å7)Ã7&À7%Á8&Ä8'È;*Ï;-Ó=/Õ9,Õ9,Ó:,Ò:,Ñ9+Ñ9+Ô8+Ø8,ß;1à90Ü8.Ö8,Ï9*È9(Á:&¾9&¾6*Á80Æ=5É@8ÊA9È?7Å<4Â91¿4-Ç>6ÑH@ØOGÚQI×NFÌC;Ã7*Ê7%Ó8$Ý7)å4,í//ô+3ù)7ý(:ÿ+@ÿ*Aÿ)Aÿ&Aÿ#@ÿ!<ÿ=ÿ:ÿ:ÿ 9ÿ%;ÿ);÷-9ð19ê7:å;;Ü94Ü:5Þ;4à;5ã:5æ95é73ë52ë35ì46ë54è64ä71ß6/Ü5,Ù5)Û9*Û9*Ü9(Ü9&Þ9&ß8%á8%ã6%ç5+ê3+ë4,ë5*ë5*ê6(ä7)á7*ß;1Õ4,Ó54óTYÿesÿ[põGbÿIhÿAdÿ;`ý3[ù;aòMmÍLb‰8?S*$A5'69(;<,B@1DA0@?-=B.>G,=CKP'\b<eoJhvRezQd~Od‚LaƒFa†Aa‰@cŒ@gDiŽHkMl‘Nl‘KjGhHf‹FgŠJf‰If…JdƒHfƒKdIdHd€FeGeGfEb~A]€>_„?b‡B69.:<1??7DA:KB;OE<VH=YL<^O<\O<\SB^WMc]]helom{sq‰so”qk™jf˜c`•\[”ZZ˜\] \_¦hjµno¿wxÊ€Ò†„ØŠˆÜ‹à‘ŽÝ“Ó“‘Г‘Г‘Ò”Ñ“ŽÑ“ŒÐ’‹Î•ŒÍ“‹Ç”‹Ä”‹À“‹¾Œ„³ƒz¥€r•|ewu^fi[[d[Vd[RfXKrVH„TH£ZQ½`YÉ\YÏYWÛcbákißokãvqûŠ„ÿŠ†ÿ…ÿ}y÷nhîaXÄ4+À2&Ä8)Á:'À9&À9%Â9&Æ;(Ì:+Ð:,Ö:-Ó5)Î5'Ð8*Ô>/Ô>/Î6(Ë/"Ú:.Ú8-Ù7,Ö8,Ð:+É:)Â9&¾7$½5)À6,Â8.Ä:0Ç=3Ê@6ËA7ÌB8Ç:1Ê@6ÏE;ÐF<ÒH>ÑG=Ê@6Å7+Ì6%Ö6&ß7,ç4/ñ03ö-5û+9ÿ*<ü';ü'=û%=ù#;û!:ú7ü7þ6ÿ8ÿ6ÿ$8ý*;÷2<ñ6=ë8;ä::Ü94Ü:5Þ;4à;5å95ç85é73ë52í36î47í55ê65å61á6/Ü5,Ø4(Ú8)Ù9)Ú9'Û8%Ý8%Þ7$à7$á6%á5'â4)ã5*ã6(ã6(á7(Þ6)Û7+Ý<2Ð1+Ø88ÿkrÿ]nþPiñ<[ÿHlÿ7]ÿ>eù5[î>bæVqª?Qa#(L/);7+07'37&9<)@=*A<(@>)BB&?FXa4s~T€Œdw‡`hSa}La€G`ƒC`…?a‰@cŒ@hŽEjJkMl‘Nn“Mk‘HhHgŒGh‹Kh‹KfˆLd†Je„KdƒJc€FdGf‚Hf‚GeFb€Bc†DeŠEhH25*58-;<4A>7G@:KB;RE<VJ<XL<YM=YQD^WOc^bigron€roŒrm•jg”`^WW‹TUŽWX˜\_¢ac­ln»rsÃ{|΂ƒÕˆ†ÙŠˆÛŒˆÜŠ×ŽÒÎŒÏŽ‹Ð‰ÑŽˆÐŽ†Ï…Ώ…ÊŽ…Ə…Á‡¼‡¶‰‚¬€xoŒ€_p|Y`lXYd[T^_Q_\IjWF}SEžUL¾\YÐY[ÙX\äcgçkkáplåxsì}vízsþƒ~ÿ†ÿ{õkaÉ<2Ä8)Â;(¿:'¿;&¾:%Á:&Å:'È9)Î8*Ø:.Ô6*Î5'Î8)Ñ>.Ñ>.Ë8(Ç/!Ò6)Ó5)Ò4(Ò6)Ñ9+Í<+È=*Ä;)Ä:-Ä:/Æ90Æ9/Ç:1È;1É<3Ë=3ÓD<ÓE;ÐA9Ë=3Ë<4ÏA7Ï@8Î;1Î5'Ù5)â5.ë31ò/5÷+6ù)7ü):ù&9÷&;÷&;ø'<û&<ý%:ÿ$:ÿ#:ÿ"7þ#7ú%7ù,;÷5>ò9?è8:à87Ü94Ü:5Þ;4à;5å95ç85é73ë54î47î47î66ê65æ72á6/Ü5,×5*Ø8*Ø8(Ø8(Ù8&Ú7&Ü7$Ý6$Ý6$Û5%Ü6(Ý7'Ü9(Ü9(Û:(Ù9)×:+×:1Ð3.ëHKÿoyÿVjøD_ø<_ÿAgÿ3\ÿ8bø>cèMlÅOe€0=J C1-11'.6'4=,=B.B@+B<$E>$HE$U\0p~MŸr—¨|ƒ˜mj„W_{J]~E_ƒCa†@bŠAfCkJl‘Lm’Om’Oo”Nl’IiŽIhHiŒLiŒLhŠNg‰Mf…LdƒJdGe‚HgƒHh„If‚Gd‚DgŠHiŽIl‘L-3'36+891<;6B=9G@:MD=QG=SI=SK>UNDZUQa^eigumklkŠjhb`XVˆPP†QQXYš`cªfj´orÁuxÉ|Ђ…Ö†‡Ù‡ˆØŠ‰Ù‹ŠÖ‹ŒÏ‹ŠÌŠ‰Í‰‡Ð‰…ш„ÑŠƒÑ‰‚ÐŒ„Ï‹„ȍ…Á†º†°‡¥~w–m…„Yj€SZnVTc\RZaOZ_HeZDxUB’LD¸TRÐSWÛRYå^dèejãklæuqäunâoh÷~vÿ‚{ÿ‡~ôl`É?2Á8(À;(¿='¿='À<'Â;'Å:'Ç8(Í7)Ø:/Ù8.Ò8,Ï9+Ì:+É:*È9)É7(Ï9+Ñ7+Ò8,Ñ7+Ï7*Ì8*Ë<,Ë>-Ê<0É=0Ê<2É;/È:0Æ8,Å7-Å5*Ð@7Ð@5Í=4É9.Ë80Î;1Í:2Ì5,Ð/%Ù0)ä20ì25ô/8ù-9û+;ü,<ü-?û.?ü/@ü/@þ/Aÿ/?ÿ-?ÿ.@ÿ)9ü)8÷+7õ0:ñ6=í8=ã77Ü43Ü94Ü:5Þ;6â:7å95ç85ê65ì44ñ48ñ48ï56í76æ72á6/Ú6,×5*Ø8*Ö9(Ö9(Ø9&Ø9&Ù8&Ú7&Ù8&Õ8%Õ:&Ö;'Õ=(Ô=(Ô=*Ô=,Ô<.Ñ7/Ú;8ÿ^eÿaqÿPi÷<[ÿAiÿ4_ÿ4`û1YôKlÛYq•>NZ%-C),:2/-0)08+7@/:B+<<"A<UH(`[5{…S£lª¾‹¨¾Ž¥wo‹[aK^F`„DcˆBeDj“Go”Np•Po“So“So”Nl’IiŽIgŒGiŒLiŒLi‹OhŠNf…Je„IdƒHe„Ig…Ih†Hg…Ge…DiŽIi‘Kl”N+1%.4*470893>:7B=:HA;KD<NH<NG=OJDWSR_^ffeugggf†ba‰[YŠQQ…NO‡RS“[^£ei³lo¾uwÊz|сփ…؆‡Ù†‡×ˆ‡Õ‰‰Ó‡ˆË†‡È†…Ë…ƒÎ…€Ð…€Ò‡҇€Ð…ÒŒ…ˍ…ÁŒ…¸‹…«„{v|jz„VcPVnUQd^P[eMXbIb^EsXCOE´WRÐUZÚSZç\cèagågjèpoìyræqh÷|tósjþxmæ\QÄ;+¾5#Â;'Á=(Á=(Ã<(Ä;(È;)Ë9*Ï7*Ö5+Ø7-Ô:.Î:,Ç:)Ä8'Æ:)Ê=,Ï=.Ö@2ÙA4Ö>1Î8*È4&Ê8)Ë<,É:,È:,É;/É;-Ê:/È9+È5+Ç5(Ê6,Ï;/Ò>4Ó?3Ö?6Õ>3Î7.É,#Ò+%Ü-*ç02ð39ö1;ú0<ý0?ý0?ù0@÷1@÷1@ö0=÷/<÷-9ú,9ú*7ý/<ù/;õ0:ð3:í6;æ69ß55Ù42Ü94Ý:5ß:6â:7å95ç85ê65ì44ò59ò59ð67í76æ74ß6/Ú6-Ö6*Õ7+Ô8)Ô8)Ô9'Õ8'Õ8%×7'Ô9%Ð9$Î<%Ï=&Ï?'Î@(Î@*Ð?.Ò;0Ñ61ëHIÿfrÿOdÿHfû7[ÿAmý/]ÿ7aò4XãPj¼Ufm19E(*E697325406906=-3;#38CCja:…TŸ®w©Á‡µÎ—¯Ç“’®}v”be†Q`‚F`…BcˆBgFm–Jr˜Or—Rq•Up”To”Nl‘KhHf‹Fh‹KiŒLi‹Ni‹Og†Kf…Je„If…IhˆIi‰Hi‡Gf†CjJk“Mn–P(0#+1'/4-350764<85@=8C>8IE<HE<KHCRPQ[[eabtbb|``‚_^ˆZXŠTTŠTT\] gi³psÄvxÍ{Ø~‚Ûƒ„Þ„†Ý„…؃„Ö…„Ô†…у†Ë‚…ȃƒË‚Íƒ€Ñ…€Ôˆ€Õˆ€Ó…Ò‹„È‹ƒ¾‹ƒ²‰‚¤|“wp€veo~U[|PQnUNf_O]eMZdIaaGq]E‰TF­]TÈZ[ÔVZâ[aæ]déaeîllõzsírj÷xoÞYPå[PÒD8Ã5'Ç:)Å:'Ä<&Å='È=(Ê;*Í:*Ð8*Ô6*Ó0'Ô3)Ò8,Í;,Ç;*Â;(À;(Ã<)È;*ÕC4áK=ÞH:Ó=/Ê4&É5'Í;,È9)È9)È9+È9)Ê8+Ê8)Ë7+Ë7)Í6+Ò<.×?4Ø@3Ú@6ÛA5×:1Ò1)Ü0.å14î49õ6>ù4>ù1>ú0@ø/?ò.<ï/<í/9í07ï-5ï,2ò+2ñ*1õ2:õ3;ò5;ì59å57á55Þ65Ü75Ü86Ý97ß:6ã:7æ87é77ê67ì46ô5:ô5:ñ7:î87æ74à70Ú6-Ô6*Ó7*Ñ8*Ð8*Ð9(Ð9(Ð9&Ñ8(Ð9&Ë9"È:"È>$Ç?%È@(È@*Ì=-Ï;1×96üU\ÿ^qÿGcÿ?aÿ:aÿ7gþ3bû8`íFdÅNa‰@IS12A64@:<:46?56<7179+4="9CSZ.‚‚P¢§q±ÅŠ¯ËŽ°Ì’¦Ã’®}{™egˆQ_E^ƒ@c‰@iDo•Js™Ps˜Sq•Uo“Sn“NkJgŒGeŠEgŠJh‹Ki‹Ni‹NfˆKd†Id†IeˆHgŠJh‹Ii‰Hf‰Ej’Ll–No™Q&.!(0%-2+13.331764:95<;6GD=DD<GFBOOQZZd_ap^`y\\~`_‰\ZŒYX‘\\šgg­qsÀz|р܁„ႅ䅆ㄅჃہ‚Õƒ‚Òƒ‚΄‡Ìƒ†É„„̃‚Î…‚Ó‡‚ÖŠ‚׌ƒÔ…ÐŒƒÄ‹‚·Š‚«…›xˆsnuqbevWUtSLjVKf_MbfM_eIcbFp^FzQ?[M·ZSÅSR×VZâW\êY`òce÷rmñnf÷qhÑD;ÔA7Å1%È4&ÔC2Ç9%Ç:&É;'Í<)Ð:+Ó:,Ø8,Ù7,Ï,#Î0%Î6)Ë<,Ç@-Á>*¾;'¼7$À4#ÑB2ãOAäN@ØB4Ë7)Ë7)Ð>/Ê;+É:*Ç8(Æ7'È6'Ê6(Î8*Ð8+Ó;.Õ;/Õ9-Ò6*Õ7,Ú<1Ý<2Ü71è88ñ8=÷:Aû9Bú4Aõ/<ò,;ï,:í/;ê19ê38ë48í57ñ56ô36ó57ð37ï6;í9<é69â45Ý33ß76à;9Ý86Þ97á96ã:7æ87é77ê67ì46ô5:ô5:ñ7:ì89æ95ß82Ø7/Ó7+Ð8+Í:*Í9+Ì;*Í:*Í:(Í:*Ì;(Å9"Â:"Ã=$Â>%Ã?(Æ?+Ê<.Ï81ß<?ÿ\fÿQhÿIfÿ9]ÿ=hù.]ý<hé;\å\p¦LV^..G83BC><89?48E26C41=:)>E&FU*bs?›a­½±ËŒ«É‹¦Ã‹º„Ž¬z{™ef‡PZ|@^?a‡>hŽCo•Jr˜Or—Rp”Tn’Rn“NjIf‹Fd‰Df‰Ih‹Ki‹Ni‹NfˆKe‡Jd‡GeˆFgŠHh‹Gh‹GeŠDk“Lm—OpšR(0!'/"(-&).(.0-3317839:4==5?@8DE@JLKORYVXeZ\s]^}YX‚[Y‹^]–ee£nn¶wxȁ؄…⌌ðŠŠì†‡ä‚‚Ü€€Ö‚€Óƒ‚΀ƒÈ„ǃƒË„„ΆƒÐˆƒÓ‹„ÒŒ„ÏŠ€ÅŠ€¼‰€¯‡}Ÿ€xwozkegf]Xm[Qm[Mi\Lf^Kd_IeaHhaGo_FoP;…UA¥]O¿aYÑYXÝQTîS[ø\`ùheæYPÔD<Î70Ñ7/×:1Ö=/Ô;+Ï9(Í:&Ï9(Ñ:)Ô8)Ø8*Ü5,Ü5,Ð/%Í3)Ì8,Ê>/Å@/Á@-¾=*¼9'Â9)¾0"éWJßK?Ã/#Ñ=1Î</Ç8(Ê;+É:)È9(Ç8'È7&É6&Ë5&Ì4&Ð7)×;.Ü>2Ü<0Ù7,Ú8-â>4ìC<õBEò9?ð2<ö4?ö2@ñ-;í-:í2=ì7>æ5;ä68å78ë99ð:9ñ78ï77ò;=î<<ê::æ87á85ß74Ü75Ü75Þ97á98ã99æ::é9;ì9<í9<ï8<ô7;ó6:ð8:í9:ç;9à;5×90Ï7,Ê8+È9)Ç9+Æ:)Ç:)Ç:(È;*Ç<)Á9#Ä='¾:#¶4¼9%ÉD3Ï?6Ê1,ÿ^eÿVgÿIbÿ<\ø1X÷0Yû6cõBiçYo¬FQo33N3,B?6>C<@78@-1L/3K2.A9&DJ(Zn=~š_ž¸w¨Å‚¬ÊŒŸ¾‚˜¶€”²~‚ nf„RXvBY{?]€>c‡AiDn”Im•Ll”Nk’Ol‘NiŽIfŠDc†Bc†Bf‰Ih‹KfŠLdˆJg‹MhŒNg‹Kf‹HeŠEf‹EjIk“Jq›QržSt U*0")/#).').(-/,130561782;<4>?7AD=HJGOPTSUaXYkZ\u\Z^]‰db”kj¤utº~·‡ß‹ŒéïŒŒð‰‰ë…„ ؀~Ҁς‚Ì…†É†‡È‡ˆË‰ˆÎ‹ˆÏŒ†Î†ÌŽ…ÈŠ€¼‰±†|¡‚x‘{s€tknlb`e]Re`Me^Kf^Kh]Ki\Ij^Hk_Gn^Ew_G[C’YF­ZLÇVPÞRSõQXÿX_ðUSãLEÙ>9Õ60Ù6/Ü90Û9,Ö9(Ó8&Ñ8&Ñ8(Ô8)×7)Ü6*ß4,Ý5,Î0'É5)È9+Æ=-ÄA/Á@-¿<*¾9(Ä8)Ë<.äREÜH<È4(Ì:-È:,Â4&Ê=,Ë<+Ê;*Ê;*Ë:)Ì9)Î8)Ï6(Ó7*Ö8,Ù7,×3)Õ1'Ø1(Þ7.ç<5øDGñ8>í/9ï/:ð0=î.;ë1<ë6?ä5:Ü36Ø22ß:8îEBøIFøDCó?>é:7ç98å97â96á85à85á98á98à87â88å99ç9:é9;ì9<í9<î7;ó6:ò59î68ê88ä;8Ü:5Ó9/Ë7+Æ8*Ã:*Â:*À;*À;*Á<)Á<+Á<)½9$Á?)»:%³5¹:'Á;/Ê:2Ø<=ÿ^gÿPdÿA[ù8Wö6[õ;`ô>dãIe«?Lƒ?>_:2G;/:>08>2F<:R>=N0.S8/VK5ciEx[°pÂ}¡Æš¹}š¹€š¶ƒ©xrŽ^YuETp=\{B_@b†@iCm“Hl”Kk“LiMiMgŒGe‰Cc†Bc†DgŠJiŒLhŒNg‹MiOiMiŽKgŒGgŒFfŽEl’Im•LpœQoRt U-1"-1#,/&,.)//-11/34/45-9;0<>1@B7EHAKMLQRWUUaVWi\Yt^]fdŒpn zx·…ƒÎ‹ßêŽïŽŒíŠ‰ç‡„߁Ó|Ë}ȀƅLjˆÆŠŠÊŒŠÉŒŠÉŒˆÅŒ„¿‹‚·‰€­†|¡€v{p€ujpndbi_Vc]M_aL_aKc^Jg\Jl[In\Ho]Gq]EwaIxX?‡S=£VDÇXOãUSøOTþMSâ><Þ<7Ý84Þ71á6/á6.Ý7+Ú7(Ô7&Ó8$Ó8&Õ8'Ø6)Ý5*à3,Ý5,Ë3(Ä6(Â9)Á<+Â?-Â?-Á<+À8(Ã5'ÜJ=ÚF:ÕA5Î</Ä6(Ä;+¾6&Æ:)Ç:)Ç:)É:*Ê8)Ë7)Ì6(Í3'Õ9-Õ7,Ö3*×3*Ú3+ß6/å:3é<8ð<=í49ë07ì18î3<í4<ë7@ê=CÝ7;áAAìNKøYUýZUúQLïB>å84â62à72à72à72á85â96ä;8ä;8á77ä88æ8:è8;ë8<ë8<ë8<ì8;ï58î47ì57ç77â:7Ù:4Ï8/Ç7,Â8+¿:+½;+»<+»<+»<)»<+»<)¸9&½@*³9$±6$¶;+µ0'Ã40ìPTÿ[iÿJ`õ:Uð7Vñ@]ïHfçHdÍNa‚89e?6R@2DA09=,57)>6+K9/E-#R=,g]Bˆ_’«t—»{–Áz–¿{”³xœ¸…š´…€šm\uKHb5Nh8[xB_}?cƒ@hŒDl’Gj’Ih’JgLfKd‰Dd‡Cc†DeˆFh‹KkŽNkOjŽNiOiOiMhJgHh’Hl”Ko™Ms¡Vs£Wv¤Y24&04&01)01+12-23.34.46+9;-;>-?A3DF9IJDMONRQWSS]XVd\Zoda~nl”zw¬„‚ÃŒ‰Ö‘ã”‘ꓐ뎋≅قÌ|Ã|¿€~½„‚Á†…Á‰ˆÂŠÃ‹ˆ¿‰…¸‡®…}¤…{€vŽyn~rgmmaak^Vi[Pc\J]`K]aJc^Jh[JmZKqZJtZIv[FsWAxR;ŠQ=ªYFÍ[PãTPðFIñ>AÛ2/Ü5/á51ã60ã5.á5+à6)Û8)Ö7$Ó8$Ñ8&Ô9'Ø6)Ü6*à3,Ú6-È6)½8'¼7&½:(¿>+Â?-Ã;+Ä8)Æ4'éUIÓ<1Ì8,ÑB4¾5%À=+À=+À8(Ä8'Å9(Ç:)È9)Ë7)Ì6(Í3'Ò6*Ô3)Õ2)Û4,ã:3é>7ì?9ì=:è45ì59ï6;î5;ê2:æ39ä7=â<@óUVübbÿmjÿidóVQäA<ß63Ý4/ß61Þ71Þ71ß82á85â96å97æ:8å78æ89è8;é9<ë8<ë8<ë8<ì7<î5:î68ê67å97ß<7Ö<4Ì9/Å9,¾9*º;*¶;)µ<)µ<+´=)³;*³<(³:'·@,­8&­8'³;-«+"Ã54ý`gÿTdúG\ð<Uë=XçG_áOdÒO_´SZvGAXH9KD2FE1BE0=?*:8#;3?3PG*nkHŠ“f•¬t‘´tŽ·s‘ºxž»…¡¸Š§}l‚[K_:AW0Lc7Wr?_{@dƒ@iŠCk‘Fi‘Hg‘GdHeŽJb‡Bc†Bd‡EgŠHjMlOm‘Qn’Rj‘Pj‘Nj’Lh’Jg‘Gi“Gm—KpœOv¥Wv¦Zx¨\78(68*66,56.56056067/68+;>-=@-@C.DG4IK>MNFQPNRRRVUSYX]a_lkius›€}²ˆ†ÇŒŠÓ•’ᔑ␍܋‡Ó„Ä}º|µ|³~·„º‡„¹‰…·Š„²†¨z›€xs‡znzrfjm_^j[Ti[PjZKfZJb_Lb_Le^Li\Kn[Lr[Kw\K{ZG~UA†R=›RA¶VFÎRHÚG@â88å33Ý1-ß3/â5/ã5.á3,ß3)Ý5(Ü9(Ó8$Ð9$Ï:&Ñ:'Ô8)Ù7*Ý5,Ø7-Ä;+¹:)µ6%·8'½;+Â=.Ç;.Ç7,Ð90õ\TÖ=5É5+ÏA5¸3$·:(¼?+¿:)Ã:*Æ:+È:,Ê;-Ì:-Ï8-Ñ7-Ð3*Ò1)Ö1+Þ71å<7ê>:è;7æ74é75ï;<ð<?ê69ä15ä5:èBFíMOÿxwÿqnñ`[ÝJCÑ83Ñ2.Ü73ç@:à93à91à72à72â62ã73å76å76æ68ç79ê7;ë8<ë8>ë8>è7=ê7;î7;î79ê7:ä::ß=:Ö=7Ë;2Ã;/º8*µ:*³;*¯<)¯<*¬<(¬;)¬;)ª;(¬=*¦8'§9*ª8-¬/)Ë@CÿhrôM^óH[íDYåH[ÙO^ÉT]·TWœXUdM?OL9LG4JE1FD-?B'=D#>H#KV.\g<xT‹™f‘¤m©n²r—¹}Ÿ¶ˆ’§€u†dRcC?P0BS1Oa9Vl=b{Be‚BiŠCjEi’Fe‘FbHcŽGb‡Bf†CgŠHiŒJkŽNmPn’Ro“Sj‘Nk’Oj“Mi“Ki“Ij–InšKržOv¥Wv§Xw¨Y?=.=>.==1==3==5;<4;=2;=/?B/@D-CG.FJ3KL:NOAQQIRRHYWJZZN_^\gerqn‰{y¡ƒ€·‡…ďŒÑŒÓŠÏŠ…ǃº~y¯zw¦yv¥{v¬|y®€|®ƒ}©ƒ}£x—|t‹zrvksrffm_\j\Sk[NlZLl[Kk[Ki]Mg^Mi]Mj]Mn]Ms^Mz]MZI“`O›VG§N@¸J=Å@7Ï6.Ø1+Þ1+á51á6/â5.â4-à2)Þ4'Ü6&Ú;(Ñ9$Í;$Ì;&Î;'Ñ:)Õ9*Ù7,Ó9-Á>,´;(°5#²5#»9)Ä<.É;/Ë7-Ù?7ø[TãIAÎ:0Ì@3¸6&¬3 µ<)¼9'¿7'À7'Ã7(Å7)É7*Î7,Ñ7-Ù<3Û:2Þ93å<7ê>:ê=9ç85ã41è96ë<9ì::è8:ì>@øPPÿbcÿppùheâTPÊ=6À1+É40Ö;7ß=:à;7â;5á:2â94á83ã73ä73å55æ66é69é69ê7;ë8<ë8>ê7=è7=é6:ï8<ì89è8:ã;:Ý>:Ó>7É<2À</¶8)²:)®;)ª;(©<(¨;'¥:&¥:(¥<) 9( ;) 9*¢6*µ>8ÛTXûepîK\ïI]éJ\ÛM[ÉQZ´VV ZRŠ`RPI7HM9NI5H@+;567>O%Lf6lŠTw”\„›e‰šd›f£k”­t˜°|ƒ•mn}\Q`C<J09F,AP1O_;Wj=czDe‚Bj‹Dk‘Fi’Fe‘FaFcŽGcˆCi‰FjKlMlOlOm‘Qn’Rm”Qm•Om–Pl–Lk—Jm™JrŸNs¢Q{¬]{¬]z«\DA0DB3DB6CC9CC;CC;BB6BC3DF0EH-GJ-JM2ON9RQ?TREUSD\ZC\\D\]Oaaaihxrq{z¦~µ†‚¿ˆƒÃˆ„Á„º{®ys¡to—om•plžrn¡vpžwršwr’to…skzrinmb`l_Wj\Qk[Lm\Lo\Mo\Kq[Mq[No\Nm]Nl_Om`Os`Oz_N†ZMŸ_S¤PE®B8º;2Æ6-Î1(Ö2)Ý5,â70á6/á4-à3,à4*Ý5(Ø7%Ô9%Ï:"Ê="É=$Ê>'Í<)Ò;*Õ9,Ï;-¿@-±<(¬5!®3!º8(Ä<.Ê:/Î5-Ø93ðNIñTMÙB9ÌB5¾?.¦1³<(¾<,Á9+Á7*Â6)Å7+É9.Ï;1Ô;3áD=ãA<ä?;ç>;é=;é;:è88ç85ç85æ95æ74é;:ôJJÿ\[ÿdcøc_Ì=9Á82½4.Ã81ÕA=ÞE@ß=;Ù42â;5â;3â94â94ä84å84ç77é77ê69ë7:ë6;ë8<ë8>è7=è7=é6:ë7:é69æ89à;9Û>9Ñ>6Æ<1¾<.³8)®;)ª;(¦;'¤;(£:'£:' 9&¡>+–7%œ=+š9)š2'ÃPKíkmî\fìP^éJ\ßHWÍKS¹RS¤[R’cQ€jUJM8DL7JB/H9$B7FH#Up=g“V~³o‚³qƒ¥h}’YƒYŽ–c‰—d|Œ_XfCGT89E-6B*:D,=J0JX7Wh>e|Fh‚Cl‹ElFi“Gc‘Fa‘GaGf‹FlŒImNn‘OlOkŽNkOm‘Qp—Tq™Sp™SpšPo›NqNt£Rw¦U|­^z­^y¬]IC3JD6IE9IE:IG;IG;HF9HG5IH3JJ2LL2NN4RP;TQ>XRDYTA]Y>[Z>\YH^[Tdahnk~xs“}x {§ƒ}«…­ƒ|¦}wunoi…kg‚ieˆjf‹mhˆmi‚mh|lfrkbgj`^g]Th\Nh[Kk\In]Kq_Kq_Kp]Lp]Np]Op]Ns]Ow]N~]N‡\L’XL¡UH¤F<«9/º7-Ê7/Ó6-Ú6-Þ6-ß6/ß4-ß4,ß4,ß7,Ý7)Ø6'Ñ6$Ï:$Ì<$Ê<&Ë='Î;)Ñ;*Ó:,Î<-¾A/¯<'¨3!¬3 ¶9'Á=.É;/Ì8.Ì2*àA;ø[TàI@ÐD7ÊF7«2¹>,ÊF7ÌB5Ê@3Ë?2Í?3ÔA7ÜE<ãF?â@;â=9ã:7â64ã54ä65å97å<9ä?9à=6Ü:5ß@:êMHêQLÓ@9¸)#¸-&¾5-É>7Ñ@;Ö>9Ú;8ã;;ç==â96â96â96ä86å95ç:6é99ê::ê88ê88ë8;ë8;é9<è8;ç7:ç7:é6:æ68â88Þ:8Ö=8Ì=5Â<1¹;-®9(ª;*¦;)¢;( ;'ž;&Ÿ:(›:'˜?-Œ5"™=.˜8*—/&Í\X÷y|ßX_æXdÚQ[ÊKR¹LO©VPž`UgU€nVPW8?J*<;C=UQ+lvDƒ¢f”Á~‡¼v†¸s|žan‚Mu~Q‚‡^u}V[b@;D)4<%5=(=D2<E29C+CP2Ue>g{HiƒFl‹Gm‘Gj“Gf’GbGcŽGhJlMo’Pn“PlMiŽKjJj’Ls›TsUsSržQržOs¢Rw¦Uz«Zv¦Zu¥[s£YRK;RK;RJ=RJ=RJ=RJ=RJ=RK;UN<VO=WP>XQ?YR@[TD\UE\UEb[HaZHaYLbYRe][kaiqftuj{zn„|pˆs‹s‰|p„uj{mbre]jc`k`_g_\c_Z^^ZY^YUaZRbZOe[Og[Ki\Kk\Ik\Gm]Fm]Fj^HicMqfTqZJuOB‹WJœ\P£YL¬ZL¥N=¦E4§<,±7(Â8-Ñ80Ü41Ý1-ß6/Þ7.Þ7.Þ7.Þ7.Û7+Ø6)×7)Õ8)Ô8)Ò9)Ò9)Ò9+Ò9+Ô8+Í;,½;+¶>-®9(©1 «2!·9*À</Ã9.È5+Î5-Ô:0ÛC8ÝK>ÔH9Â=,·2#ÝPFØJ@ÔF<ÔD;ÚG?ÞG@àC>à<:à74æ87ì::ì::ç77â64à93Ü=7ÙF>ÏB9Ä:/¼4(·1&¹5)¿;/Ã?3Ã:0É<3Ò?7Ú?:â=;ç;9ì89í9:ç7:å8:å8:å99å97å97å95å95à40á51ä65å76æ89ç9:ç9:ç9;ë;>æ9;ß99Ü=:ÖA=ÍB;ÁA6·?1©:)£<+ =*›<(™:&–:%—8&‘8&A0‹<-‡/#‘/&¼LJämoçloÍVZ½LNµNOªQM¢UO›XO—]R•aTŠjSWZ/:M7FTa)~ŽP›°oŸ¼yœ½x’µs~ah€L\mCYdBT]BHP9?D0:=,@C2<@1:>0<C3:C0AN2Rd>]sBgƒHpKo“Kl’Gj’Ii“KgJlPm‘So“Sl“Pm’Mk“Jm–Jm˜IqœLqžMrŸNr¡Pt£Rv§UyªYzª^y¦cm™ZbŽOWN?WN?WN?WN?WN?WN?WN?WN?XO@YPAYPA[RC\SD]TE^UF_VGd[JcZIcYMcYOf[Uj_]nbdpdhreltgpvirvirsfmm`gg[_bXY^[VZZRYVOWTKXTIXTH\VH^WGcZKcZIf[Ig\Hi]Gj^Fk_Gh`IcbMngTv\MSFšVM«VO°RJ·QE¯H9¬C0¬=,±9)¾8,Ë80Õ62Ù40Ý6.Ü8.Ü8.Ü8.Û7-Ú8-Ù7,×7+×7+Õ7+Õ7+Õ7+Ô8+Ô8+Ô8+Î:,¾<.¶>.®9(¦1 §2 ²:)¹>.¾<.Æ:-Ç4*Ë4)Ò;0ÜE:áOBßQCÝOCÛF?Ø@;Ó<5Ò;4Ö=7Ú=8Ü86Ý33ç77ë78î79ë78ç77å97â=9ÛB:ÊA7¿?4¸:,±6'¯5&²8)¸=-¿@1ÅA4Í@6×@7ß=8ã:7é77ï56ñ7:é6:ç6<ç7:æ89æ87å95å95ã:5â92ã:3ã:5ä;6ä;8ä;8å99å99ä:;ã;;ß<=Ù?=ÐA;Ä?6·=2¬;-¥;+ =*œ=+—<)–;(’:&“8%Œ:%€9'€8)‰5*<5¿QP×eeÑ^a·MM¬NL¤TMYP˜\Q—^S—^S˜_VŒfQ`_/Sg*^r3zO™¯n¥¾|™²pˆ bj‚HauBTe;JX7FP7?H59?158-:</AB4=?4:=2=A3:C0@M3Rd>^tEgƒHqMp”Ln’Hk”Hk“Lh‘Km‘Qn’Tm”Sm”Qk“Lk”Hl—Hm˜HpJpJp Ls¢Qu¦Tx©Xx©Xx¨^m—Xb‹QY‚H]TE]TE]TE]TE]TE]TE]TE]TE[RC\SD]TE^UF_VG`WHaXIaXGe]Je]Hd[Jd[Le[Qf[Uh]Yh][i]]j^^k__k__i^\f[YbWS_VQZVMWTKUQHRNCQMBSOCWQCXRB^WG^WEaYFc[Fg\Hi^Hk`JhbLbaMngUy_PˆXN¢[U±VQ²IE¸E@¹@7¸>1·;/¸8+¼8+Ã9,É;/Ñ:/Ù8.Û7-Û7-Û9.Ú8-Ù7,Ù7,×7+×7+Õ7+Õ7+Ô8+Ô8+Ô8+Ò9+Í;,À>0¶>.­:(£2 £3ª:&³>,¹>.Ä?0Ä8+Å2(È4*Ð<0ØF9ÜM?âNDÙ:6Ø43Ô20Ö42Û97á=<æ<<é;<í9<î5:ë27é26è58å99á<:Ù@:¾8-µ:+±6'®4%­5%°8(µ<+»=.ÈD5ÏC6ÖB8Ü=7â94è64í55ï79ê69é69é69ç77ç77æ95å95å:3ã:3ä;4ä;6ã<6ã;8â:9á99Þ88Ý9:Ü<<Ú@>ÓB=È?9¹:1­7+¥7(¡;,<+™<+”;)“:(9&7%Š9&z6#y6%ˆ9, F=¹SOÀWT¸PO«IF¢MHœSL™YP—^S—aW—aW˜_V‹ePsrFzV“¦n¤¹€«À‡ž³{|[[o<?R$AS+AP/?L2:C06<.69058/9;.@A3=?49<1=A39B/@M3Qc=`vGi…Js’Or–Np”Jm–Jm•Nk”Np”Tq•Uo–Sn–Pl”Kl•Il—Hn™IpLpLpŸNu¤Sx©Xyª[u¦Wq X_‰JVEOx>aXIaXIaXIaXIaXIaXIaXIaXI_VG`WH`WHaXIbYJcZKd[Ld[Je]Hf^Gf^Ig_Lg^Of\Pf\Rf\Sh]Wh]Wh]Wh^Ug]Te[QdZPc[N]YMZVJVRFRNBPL@PM>TN@UO?WQAYTA[VC]XDaZGd]Jf_LfaMdcQleUv\OŠ[Q©b\¸ZX¶FD¹<:Ã<8Æ;6Ä:0Â8-½8)¼:*¾?.Ç?/Ó9-Ù7,Ú8-Ù9-Ú8-Ø8,Ø8,Õ7+Õ7+Ô8+Ô8+Ô8+Ô8+Ô8+Ò9+Í;,Á?1¶>.¬9'¢3 3¢8"¬=)µ@.¼?-¿:+Ã7*Å5*Ë7+Î:.Î</Ó:2Þ65à24Û12Ý34à88å;<ê=?ì<>î7<ì38é06æ25å58á77Ú65Î61·5(¯7'®6&¬7&¬7&®9(²:)µ:*ÃA1ÊB4ÔA7Ü?8â;5æ95ë76î87í68ë78ë78ê86ê86è94ç:4æ;4â92á:2ß:4à;7Þ:8Ü:8Û97Ù99Ö<:Õ@<ÒC=ÊA;»;2¯6+¤6'ž7(œ;*˜;)”;)’;(:&8$Œ7#‡9%€=*v5#5'™E:ªPH¬MG©LG¦QLPJ˜UL”ZO”^R•aV•aV—aWgT‰…_™©x­½Ž¦¸ˆ¡ts†YN`8/A->3B%:F.=F3;A3:=2:=4<=599-?@2<>39<1<@29B/?L2Pb<awHj†Kt“Ps—Or–Lo˜Lo—Pm—Op—Ts˜Up˜Ro—Pm–Jm–Hn™Jp›KqžMrŸNt£Sy¨X{¬]w§[mSe’M[…FS|BNw=cZKcZKcZKcZKcZKcZKcZKcZKcZKcZKd[Ld[Le\Mf]Nf]Nf]Lf^Gg_Hh`IiaLiaNi`Oh_Pg^Oj`Ti`Qi`Qh_Nh_Nh`Mh`MhaNd^N`ZL[UGVPBPM>NK:NK:NK:PM<RO>TQ@VS@XWC[ZF]\H^]Kb`Qf^QmWJ†[R¬ic¾c`ÁMMÈBCÐ;=Ô89Ó84Ì70Á9+º=)µC+¼B+Î;+×7)Ø8*×:+Ù9+Ö9*Ö9*Ô8)Ô8)Ô8)Ô8)Ò9)Ò9)Ò9)Ò9)Í;,Á?1¶>0«:(¡6"š4›7 £=&¯B-³;*º;,Â:,Ç9-Ë8.Î:0Ð<0Ù:4é9<ì4<æ39ã28â38á48â38â25é6<é49ç4:ç7:ã9<Û76Ð21À0(´6(­:(­:(«:(«:(«:(¬9&¯7&¸:+Á;/Î>3Ø?7à=8ä;8è96ì87î66î66î66í74ê84è:3ç:3ä<3á:2ß;2ß<5Ü=7Û=:Ù><×=;Ô>=Ï@<ËB<ÅB:¹=5­7-£5(ž7(š:*–;)“<)‘;*:(Š9&‰8%ˆ7$ƒ8%ƒ@-u4"{3%“G:¤QI¡NFžNG¡WN˜UL’XLZN\N]R”`U—aWhW‰f˜¥z©€Žj]kHDS42@&$2-:&0;*5=.9?3<?4=?4:;3::099->?1;=28;0;?18A.>K1Pb<bxIk‡Lu”Qt˜Ps—MqšNq™Ro™QršSršSršQp™Mm—Km˜Ip›LsžOt Qv¢Sx§Y{©^z¨_qŸVd’JY†C]†JW~GRyBe\Me\Me\Me\Me\Me\Me\Me\Me\Mf]Nf]Nf]Ng^Og^Oh_Ph_Ng_Jh`IiaLjbMjbOjbOjaPjaPjaPiaNh`Mh`Kh`Kh`IiaJjbKf_Mb]J^XHXRBSM=MJ9KH7IH6KJ8LK9LM;NO=PQ?QTASVESVE[YL^VIcQEzXN¡ha¶eaÄVUÖQRÙ@Cß:>Ü87Ô70Ç;.¼?+±C*µA(Ê;*Ô7(Õ8)Õ9*Ö9*Õ9*Õ9*Ô8)Ô8)Ò9)Ò9)Ò9)Ò9)Ò9)Ò9)Í;,Á?1µ=/«<)¢9$™6•5›;"§A*²B.¹@/¿;,Ã7*Å2(Ë4+Ò;0ß=8ê5:î3<ë6=é8>ç:>ä;>ã:=â9<ß58á7:â9>á=>Û=>Ñ96Ã2/¶0'¯9+«>*«>*©>*ª=)©<(©:'«8%¯6%¹7)Å;0Ð=3Ù<5ß<7ä;8è;7î87ï75ï75î85ë:4é;4ç<4ä=4â>5à?7ÞA:ÚA;ÙA>ÕA?Ñ@=ÍB?Ã?:»@8³=3ª8-¡5(›5'˜8(•<*‘<(<)Š<(ˆ:&†9'„7%ƒ6$6%}:)t3!~9*—OA£YNœRG•OE˜VJ•YN[MŽZMŒ[MŽ\Q“_T™`W‘gW~xXyƒ^r|ZU`B;E,/;%0;*0<.3=24<16<2<?6@A9?A6<<077+89+>?1:<17:/:>07@-=J0Oa;bxIk‡Lu”Qu™Qt˜NqšNršSpšRršQsœPr›Op™Kn™Jp›LsžOv¡Ry¥X{§Zz¨]x¦]qŸWg”O]ŠEX‚B`‡NY€ISzCg^Og^Og^Og^Og^Og^Og^Og^Og^Og^Og^Oh_Ph_Ph_Ph_Pi`OjaPjbOjbOjbOi`Oi`OiaNiaNiaNh`Kh`Kh`Ih`IhaGhaGhaGf_Le^Kb[I\WDWR?PM:LI8IH6IH6GH6GJ9GK:HL;IM<HO?JN?PQCXRF[OCiRDƒYM–VL°SLÔZYÛIJâ?Bá99Ú72Ï;1Â?-´B*³<$É:)Ó7(Ô8)Ó:*Õ9*Ó:*Ó:*Ó:*Ò9)Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Í;,Á=0µ=/¬=,£<)˜8 4•9 ¡A)­C-¶A/¿=/À6+Â/%Ç0'Ð6.Þ:8è59î6>ì;Aê@CæAEàBCÜ@AÙ??Í12Ð66Ó:<Ñ=;É;9¾71µ4.­4)ª<-¥>+¥>+¥@,¥>+¤>(¦;'©:'ª7%²7'¼8+Å8.Ì70Ô94Ü=9æ>;é:7ì95ì95ë:4é;4è;4å=4â>4àB7ÝC9ÚC<ÕB;ÏA=Ê?<Ç=;¿>9±<3©:/¢8+œ6(˜7'”8)‘:):(‹=)‰<*‡<)ƒ:'‚9(€7&6%}6$x3#x5%…B2˜UEWK”PCPD•ZL“YMZN\Q\Q’[T•\U˜[VŽbUkbEV`=JS4=G,6?*2=,4>35?66=68?8=B;CF=EG<CD6@>/;9*78*=>0:<17:/:>06?,=J0N`:awHk‡Lu”Qu™Qt˜Nr›OršSpšPq›OrNqœMp›LošKrNu¡Ty¥X«`~©az¥]qœUf‘L_‰G]‡E^ˆIb‰PY€ISzCh_Ph_Ph_Ph_Ph_Ph_Ph_Ph_Pg^Oh_Ph_Ph_Ph_Ph_Pi`Qi`QmdUlcTjaRi`Qh_Pg^Og^Mh`MiaNiaLiaLiaLh`Ih`IhaGh`IgaKf_Le^Kb[H\WDUR?PM<ML:EF4DG4DH7DH7BI9BK:BK:CJ:IK>SQDSPAXL<eK:tE5•H>ÂYSÙROàDEà::Ü75Õ;3Ê>/º=)µ8"É:)Ï8'Ð9(Ò;*Ò;*Ò;*Ò;*Ò;*Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Ñ:)Í;,Á;0¶:.­<,§>+™9#3“7Ÿ?'£9#°;)À>0Æ<1Ì8.Ð7/Ù<5ã?=ì=Bî?DéCEâDCØ@?Ê;7À50»1.¹/,¼51¾:6»<6³:2ª8.¤8,¡:+¡=-Ÿ>+ ?,¡@-¡@-¡A+£>*¦=*©;*­:(²7(¹5)¿5+É90Ô?9àC>ä=7è;5è;5ç<4å<5ä=4á>5Þ@5ÜD9ØD:ÒC;ÊA9Ä?:¼;6¸85°93¢7-™8(•5'“6'‘:)<*‹:)…8&ˆ=*…<+ƒ<*€<)~:'|8%{6&{6&y1#€8*ŒG8–SC’PB‹L=RE™_S‘WL’YN”[R–]T–]V—\V–YTŒ^QaX;EM(6@8A&>G2<E43=2-7.1817>6BE<GJ?HI;BC1?>*;:&78(=>09;069.:>06?,<I/N`:awHj†Ku”Qt˜Pt˜Nr›OršSpšPqœMqœLqœLp›Kp›LsžOw£V{§\‚­e€«dx£\j•P]‡EYƒA^ˆHePcŠQZJSzCi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi_Sj`Wj`Wj`Wj`Wj`Vj`Vj`TjaRjaRjaPjaPjbOjbOjbOjbMjbMg_Jg_Jf^Ie_Ib]IZXCSP=ML:HI7EH7AE6@D5?E7?G8?G8>F7GK=IK=AE4KJ6\Q;aE/ƒJ9Àj]Üg`ãUSäFEåA@àA=Õ>5É=0Æ=-Æ4%Ë5&Ì6'Í7(Í7(Î8)Ñ;,Ó=.Ò<-Ò<-Ò<-Ò<-Ñ;,Ñ;,Ñ;,Î<-»2(¶7.±;/¨:)š7"”5•7œ9"®A-³;+º6)Á4*Í6/×<7âC?êHEèDEèEHÝCCÉ:6»61´80®8.©4+«9/©9.¤8. 8-›9,–:+“<+’<+–=+˜=*™>+š?,›@-A, A-£@-¢;*¥:(«7(°6)¶6+¾8-Ä;3Ì<4Ú=6à=6â?8âA7âA9ÞB6Ú@6×@5ØH=ÒE;ÇA8»;2²5/ª3-¦1*Ÿ2+™9-’;*‘9+:*‹:)ˆ;)‡;+…<+€9'}8(|9(|9(|9({8'y6&x3$w- ‹A4—OC“MAŽL@‘QE“UJŽRG˜\R˜\R‘UMTL—ZU•XS”WT“gZ]T5IQ*AK(BK,>H/7@+2=-4?14<1HPCZ^PW[JMO:DF.AA'==%:;)>?1;=2:=2;?14=*;H.Pb<cyJlˆMu”Qs—Or–Lp™MršSq›QpLqžKrŸNrŸNsŸPv¢U{§\ªb‚­fx£^j“O`‰G^‡Ea‰JfŽPhTd‰S[€LTyEi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi_Sj_Yj_[j_[j_Yj_Yj`Wj`Vj`Tj`TjaRjaRjaRjaRjaRjaPjaPh`Mh`Kg_Jg_Jd]J^YEUR?ON:IJ:EH7BD6@D5?E7>F7>F7=E6@F8>E5>F1HL5MI0U?'Q:¼sbËcXà_ZëUTçGGâ@>àA=Ø?9Î:0Í9+Î=,Ï=.Ï>-Ï;-Î;+Í9+Í:*Î:,Ð:+Ð:,Ð:+Ð:,Ð:+Ð:,Í;.É?5ÅB:¼B7®=/Ÿ6#–1–1›2¨5#´6(Ã;/ÑA8ÞE?æIDéJFêKHÞCAÛEDÐB>¼<3¯9/©=0¥?1Ÿ=0Ÿ?1œ>2—>0”>/=/Š>.‡@.ˆ?,Ž=*‘;*’<+“=,”?+•>+—>,™>,Ÿ@.¡=-¤:,¨8,¬8+±8-¶:0¼9/Ê<2Ð<2Ó?5ÕA7×C9ÖC9ÔD9ÑE8ÊB6ÃA4¹=3°:0©6/¤5.Ÿ4,™7,’:.Œ=,Š;,ˆ;+†:*„;*ƒ<*<,~;*|;){9)y:)x9(w8'u6'w4$€4'D7˜NC‘K?ŒJ>QD’TIRG“SJ˜XO˜UO—VPœ]X”WRŽSOŠcT]W5Xc9_jBZfBIT66C)4@*<H4OXGYbQ_fTW\FJN7BE*??#<<"9:(=>0:<19<1<@25>+;H.Pb<dzKlˆMu”Qt˜Pr–LqšNršSq›Qs OrŸLpLqžMt Qx¤W|§_ªc{¦aošUcŒJ]†Da‰JeNeNc‹Od‰S[€LTyEi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`Qi_Uj_[j^^j^^j_]j_Yj`Wj`Vj`Tj`Tj`Tj`Tj`Tj`Tj`Tj`Vj`TlaOk`LiaNiaNf_La\HYVCSR>MK<HI9DF8AE6AE6?F6>D6<D5=F57@-@H1IK3F?%W?'‡ZC·s`¶VJÙbZðebìTSæFHëGHæCDØ;6Ë7+É:*É:,É:*Ë9,Ê8)È6)È6'Ê8+Ë7)Ë7+Ë7)Ë7+Ì8*Ì8,Ë8.Â3-¾5/¶6-­3(¡0"ž/ 1 §3$¼A2ÇC7ÖG?áLFçLGéJFåFBßD@×CAÏDAÁ@:±;1¤</¡A1œE4˜E3“D3‘D4B2ŠA0…@0‚A/~A.@.†:*‹9+‹:)Š;*‹<+<+Ž=*<*–?.–=-›;+9* 8+¥9-©;.®9/¸:.¾90Á;0Ä>3Å?3ÅA4ÃA3ÁA4¶;,²:,ª8-£7+ž6+›6,˜7.”:/;/ˆ<,‡;-„;,‚:+€;+€;+<,};+z;*z;,w9*v8)u7(t6)v4&ˆ;1”F<–NBŽJ=ˆH<OB‘SH’RI’OI™TO™SQšVSš]Z“YU[Tl[}X~‰^€‹alxRLX46D#=J.LZ@eqYeoW]eMPU>EI0?C(=@#:<$89';<.8:/9<1=A36?,<I/Pb<dzKm‰Nv•Rt˜Ps—MqšNršSpšPu¢Qp LnLpŸOu¤Vy§\}¨`|§`p™UfM\„E\„EeŒMiQfN_…HcˆRZLSxEi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QjaRjaRjaRjaRjaRjaRjaRj`Vk`\k__k`^k`\k`ZkaXkaWkaUkaUkaUkaUkaUkaWkaWkaXkaWnbRmbPjbOjbOhaNe^K_ZGYVERO>MK<GH:DF8CE7BD6@D5<C3<H49B-FJ3LF0O:%qL9šcOª^N¡A5ÆSLçc_ð^^íSUëJOéDHáAAÌ91Ç9-Å7+Å7+È8-Ê:/Ë;0Í:0Ñ>4Ñ>4Ò>4Ò>4Ó?5Ó?5Ó?5Ó>7Ó?;ÑB>ËD>ÆF=ÂF<ÃI>ÇM@ÎPDÙSHÝPGâMGäIEäGBâC?àA=ÛB=ÕFBÉD?º?7«=0Ÿ>.šC0•G3”I6F4ŒE3ˆC3…B1€A0}@.zA.|?-‚:,†8,…9+†:,†:,‡;-‡;+ˆ;+‰:+‹:)Ž8)8*•9,™;/=/¢<0ª:/­8.¯9/°:.°:.°:,¯9+®8*¨7)¥7(ž6)›6*•7+’8-‘9/Ž<0ˆ<.…<-ƒ;,9*:*~9*}:*};+|:,z;,y;,w9*s8*q6(r7)u5)ŒD8’H=‘K?ŠH:‡I<‹OD“TK—TL–QL›TP˜QO”SO–\X_Xh^—~j””p…’fu‚WYh?CR+?O*P`<crQetU\iKNX=CL1?E+>B)=A(;='78(:;-79.9<1=A37@-<I/Oa;bxIlˆMv•Ru™Qt˜Nr›OršSo›Pq OožMožMs¢Rz¨]|ªaz¥^u [cŒJ^‡E[ƒD_‡HgŽOj‘PfL`‡Hb‡QY~KRwDi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QjaRjaRjaRjaRjaRjaRjaRj`Tk`Zk`\k`ZkaXkaWkaUkbSkbSkbSkbSkbSkaUkaWkaWkaXkaWocUnbRkbQkbQkbQhaOd]M`ZJXRBRO>NK<IG8GE6DE5BC3>B1=I1?G/LG1S>+g=-•ZL­_Sœ>4ž3+¶?9ØXUóigödeëRUæJNèNPÙIAÓF<Í@6Ë>4Î@6ÒD:ÕG=×G>ÚJAÚJAÝJBÝJBÞKCÞKCàKDàKEëSRèTTãSRÜROØSLØSLÛULàUNàLHáIFãEBâC@äB@âC@äEBàFDÙJFÊE@¹>7¬=2¡?2˜B1’E3G4ŒE3ˆC3‡B3ƒ@0?1~?0z?/|>/€:.ƒ9.ƒ9.ƒ9.ƒ;-ƒ;-ƒ;-ƒ;-9*‚9*…9+‡:*‰:+;-<.“;/›;/ž9-Ÿ9- :,¡9,¢8+¢8*¢8*Ÿ8)œ8)˜8(”8)9*Œ:,‰:-‡;-„:-‚:,9+€8*}8)}7+|8+{9+z8,y9-x:-v8+q5*o5)q7+t9+ŒH;I<‰I=„I;…K?ŒRF“WM˜XO˜QMœUQ˜RP“VQ’`YˆbWƒg[†zdnrQZi@JY0AR(IZ0Wi?dvNj|VXiEL[:?L.:D)<D,>C-<A-:=,89+:;-68-8;0>B47@-;H.L^8_uFi…Jt“Pt˜Pt˜Nr›Os›TpœQnMmžLp¡Pw¨Y|ªaz¨`rXi“QZƒA\„EaˆIeŒMgŽMgŽKhJfŠLc†PY{IRtBi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QkbSkbSkbSkbSkbSkbSkbSkaUlbYlbYlbYlbVlbVlcRlcRldQldQldQlcRlcTlbVlbXlbXlbXocUnbRkbQlcRlcRkbQg`Pd]M^WGYSCSM?MJ;KH9GE6EB3?B/;H.?H-OC-a=-ƒE:«XR´NJž,+²:9¹=;ÓSRòpnþvvñehëX^ð^_ãUQÚPFÔG@ÐC:ÑD=ÔG>ÖGAÖG?ÙHCÚJBÜHDÝJCÞJFÞKDàKGàKGâHHáGIÜHHÙGGÖHD×HDØGBÛFBàBAäABæBAèBBçCBäB@ßA>ÛA?ÛJGÊC?¹<6®>3¢@3˜A0A0C3ŠA2ˆ@1‡?1…=1‚<0€<1=1=1;1;1ƒ:3;1;1€</€<1=/€</€=-<-€;,‚:,9*‚8+ƒ7)Œ:.8.‘9-”:/—;.š=.ž</Ÿ>.™9)–9(“:*:+Š;,‡;+„;,‚:+‚:,9+7)~6({5){5){5)z6)x6*x8,w9,s8*p4)o5)r:-v<0‹M@‰K<„J<‚M=…QC‹VH’XM—WN“NI™TO—VR•\Ue\{^PfVF`^GEO->N)?O(J\2_rEk~QexKXj@M_7@Q-6E&7D*=F1=D4:>07;-8:,:;-68-8;0>B47@-9F,I[5[qBeFr‘Ns—Os—Mr›Os›TpœQmœLo Os¤Ux¨\z¨`s [i“Q`ŠHYB^†GeŒMgŽMfJeGgŒGhŒLa„NXzHQsAi`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QkbSkbSkbSkbSkbSkbSkbSkbSlbXlbVlbVlcRlcRldOldOldMldMldMldOldQlcRlcTlbVlbVocUmaSkbSlcTmdUlcTi`Qf_Ob[K^WGXQARL<OI9JG6IC3AC.:G+@F*U@+xG9£PL´HH¶8<º39ÈAEÄ?@ÊJIßb`ñqpðnpçbeâZ\ÛQNÕLDÏD?ÍB;ÏD?ÐE>ÐB>Í@9ÒC?ÒC=ÔC@ÕD?×CAØD@ÙECÚDCÝADÝADÞDFÝGHßIHàJIãIIåGHçACêADìBEìBEçAAâ@>Û=:Õ=:ÚFFÉ=<¹83°;2¦@4˜?1‘>0@1Š</Š<0‰;1ˆ:0‡81†93…:4„;4;3€<3:4€<3<3}=1}=3|<0|<0|=.|=.{<-|:,{9)z8*{7*‚:.ƒ9.‡9-‰9.9.‘;.•<.–=-’9)‘:):*Š;*‡;+ƒ<*€;+~;*€8*€8,~6*}5)|4(z4(z4*y5*u2)v6,v8-r6+o3)o5*s:/x@3ŠPB†N?N=‚Q@…VF‰XIŽUJ‘RI’OI—TN“TO‘\VŠfZn[JPK7EK1AN0KZ9WgC`rJgyOdzLVl>G]/AV-7H$1@!6B*=F5<D98=67:39;.;<.68-9<1?C57@-7D*FX2Vl=a}BoŽKq•Ms—Mr›OtœUqRožPq¢Qu¦Wv¦Zs¡Yj—RbŒJ[…E^†H`ˆIcŠKeŒKfJfŽHf‹Fe‰I`ƒMWyGPr@i`Qi`Qi`Qi`Qi`Qi`Qi`Qi`QkbSkbSkbSkbSkbSkbSkbSkbSlbVlcTlcTlcRldQldOldOldMldMldOldOldQlcRlcTlcTlcTnbTmaSmaSmaSmdUlcTjaRg`Pd]M`YIZSCUN>PK8MH5ID1DC.@F*DD([B,ˆTG¯\X«>A¬/3ÎINÆ?EÀ;>»;<ÄFGØZ[ãefÛY[ÌGHÑJGÐEBÍB?ÐB>ÔFBÔFBÐB>É>7ÏD=ÏD=ÒD@ÔEAÖEBÙECÚDEÝDFÞ?Cß@DÞBEßEEáGGàFFàBCá>?èBDé@Cè>Aç=>ä>>ãA?áC@ßECÖBBÅ98¶50°;4¦?6š>3=/?2ˆ</‡:0‡:0‡81‡83‡83ˆ94ˆ;5‚92€:2€:2;2~<0~<0|<0|<0x8,y9-z:.z:.{<-{<-z;,z;,};/9/€7.7,„6*…5*†7*†7(Š8*ˆ9*ˆ;+…<-„<-€=,~<,};+}9,}7-|6,y5*z4*x4)x4)x5,r2(u5+v8-r6+n4)n5*t;0xB6ŠUG„PB€O@„SDˆWIŒVJŒRGNG•TN”UN‹RK„XMdSeZDGK2=J,IX9ZkIgxTcxQYnESh=Nc8J_4:L&0?,95>)=C5<B8:<79<5;=2;=079.9<1?C58?-8B)EU1Uh;azCnŠMr“Nr–NršQsSqRr Uu£Xv¤[t¢ZošUfN^ˆH[ƒDdŒNa‰K`‡HcŠIfJgŽKeŠEa…E`ƒMWyGPr@i`Qi`Qi`QjaRjaRkbSkbSkbSjaRjaRjaRjaRjaRjaRjaRjaRkbSkbSkbSkbSkbQkbQkbQkbQlcRlcRlcRlcRlcTlcTlcTnbTk]Pp`QtdWrdWnbVj`Th`ShbTgaSgaQc]M[VCVQ=SN8NH2GA)KG,I?$bI3‘gW¶xm»le¸YW¹OO¶BE½BEÂDGÈFHÍHKÐJKÒLMÓKMÏEEÐDEÓEDÖFEÕFBÒC=ÎC<ÏF<ÈD8ÉE9ÊE<ÌE?ÒD@×CCÚADÛ?CáBFâBDáCDàDEàDEáFDãEDäDDãCCäB@ä@?ã?>ã?>â@=â@=ÞC?ÓB?Å>:¶93ª70Ÿ:0—=2Ž>3†>2ƒ?2?3=2;3„93†93ˆ81ˆ92…;0„<.„<.ƒ;-;/€:.€:.€:.~:-~:-~:-~:-};/};/};/};/}90}90~80~8.€7.€7.€7.~8,€8,~8,}9,|8-{9-z8,x8,x8,x8.x8.w7-v8-v6,t6+s5*q5+m1'r6,n4)m3(o6+m4)q8-I=…SH…SJ†RGˆRH‹PHQJ”QK•RL“TMTK‰[N‚cQo_HYU:MV7O_;j~YbxQYoHUjCRg@Mb;IY5CR17D&4>#2:#6;'<>0@B5@@6<>358-39-4:.7:/9=/;?.=B,<E(S_;arFn†Rs‘Ur•QršSsSrUužZ~§e~§ep˜YbŠK^†G^†H^„G`†IbˆKdŠMfNeŒMcŠK`‡H^„G_‚LTvDKm;i`Qi`Qi`QjaRjaRkbSkbSkbSjaRjaRjaRjaRjaRjaRjaRjaRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTnbTrbSqaRrbUqcVrfZogZmeXjdVgcWkhYoiYlgThcOc^H[U=RL4PJ0H>%Q?)kP=XG„NBˆE=“D?±WV¸TT»QQÀPOÀNMÁMMÂLJÃKJÐTTÈHGÂ=>Ä=:ÐB@ÓEAÏB;Ç>4ÊD9ÉE9ËF=ÍG>ÔFBØDDÜAEÝAEáBFáCDáCDâDEâDCáFDáFDáFDàEAßD@àB?ßA>àA=àA=àA=ÞC?ÔE?ÈC<º>6¬:0 90”:/Š<0‚<0€A2~@3~>4<3ƒ:3†91ˆ81‰90‡;.…<-„;,„;,ƒ;-‚:,‚:,‚:,9-9-9-9-~:/~:/~:/~:/}90}90}90}90}90}90|90|90|90z:0z:0y9/y9/y9/w9.w9.w8/w8/t8.s7-r6,r6,p6+p6+k1&q7,m4)l3(o6+l3(p7,~H>„RI…SLˆQJ‹PJQL”PM•QN”SOŠOG…SH€[K{ePsiPhkLfqQgxT^tMXnGPf?La:K_:K\:GV7CP4<F-8@)6;'7:)<=/?@2>>2;=04:03;05;16<0:</<?.>A,>D*Q[9^mDkRpŒRp“Sq™RsSsžVxŸ^|¢exžak‘T`†I]ƒF]ƒF\‚E`†Ia‡Jc‰Le‹NdŠMbˆK`†I^„I]LRtBIk9i`Qi`Qi`QjaRjaRkbSkbSkbSjaRjaRjaRjaRjaRjaRjaRjaRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTnbTqdTo_Pm]Pm_RqeYog\ldYhbVfbWpl`{xi~{jyizubpiVe`JTT<FF.C>(MB.S@/U8(]7*l=3n70u91~;5‡@:’IB›PJ¥XP­ZR»^YºSN·GE¼B?ÈDBÑFCÓD@Ï@:ÐA;ÐC<ÒE>ÔG@ÖGCÚFFÛEFÞDFßCDßCDßCDßCDàDEßEEßECßECÞDBÞDBÜD?ÛC>ÜC=ÜC=ÜC=ÛC>ÖE@ÎE?ÃB<³>5¢:1”8-ˆ9,ƒ;-‚@2@1~>2=1ƒ:1†91‡81ˆ:0‡;.„;,„;,„;,‚:,‚:,€:.€:.9-9-~:/~:/~:/~:/~:/~:/}90}90}90}90|90{8/{8/{8/{;1{;1z:0y;0x:/x:/x:/x:/t8.t8.s7-s7-r6,q5+o5*o5*j1&o6+m4)k2'l6*i3'm7+}G=‡PI‡PI‹PJŽQL‘RM’SN”UP‘VPŒWO‡[P~^OtbNoiQorUlwYfwUQeBK_:EY6DU3EV6GV9GT8ER8?I0<D-9>*7;*:<.<>0;=/9;.6<25=26<26<0:</<>0=@-=C)MW5Zi@f|MlˆNn‘Qq™RuŸUtŸWz¡`w`n”Wc‰L\‚E\‚E\‚EZ€C_…Ha‡JbˆKdŠMc‰La‡J_…H]ƒH[}JPr@Gi7i`Qi`Qi`QjaRjaRkbSkbSkbSjaRjaRjaRjaRjaRjaRjaRjaRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTnbTrdWoaTm_Rk_SkaWlcZle[jf]jf]xtiˆ„x‘•’Œ~‰‚r}zg`cNOT>DG2FE1HC0F=,L<,VC4P9+R6*V4*Y5)\6+a9/e=1m=1ŠKBSJ®UO¶NK¼FDÆABÒDCØFFÔ@>ÕA?ÖB>ÖE@ÙECÚFDÛEDÝEDÞDDßCDßCDÞDDÞDDÝEDÝEBÞFCÛFBÚEAÙD>ØE>×D<×D<ÙD=ØE>ÔD<ÓF?ÉF>ºA8§;1˜8,Œ9+…<-ƒ@0@1~?0=/ƒ:1„:/‡9/ˆ:.„:-„;,„;,ƒ:+‚:,‚:,9-9-9-9-~:/~:/~:/~:/~:/~:/}90|90}90{8/{8/{8/{8/x8.{;1y;0y;0y;0x:/x:/x:/v:/s7-s7-s7-r6,q5+o5*o5*m4)j1&o6+l3(h2&k5)h2&l6*|F<ŠOGŒOJPKRMSN‘VPWPŒZQ‰]R…aUy_PiZGd_IdhO\fKN]@EU8AQ4=M0<K.?M3CQ7FR:GS;BL4>G2:A/9=,9=.9=.8<-7;-7=36=56;46<2:<1;=/<?,<A*JS4Ve>bxIj†Ln‘Qs›Tw¡Wx¢Z{¢ar˜[e‹N\‚EZ€C\‚E\‚EZ€C_…H`†Ia‡JbˆKa‡J`†I^„G]ƒHY{HNp>Eg5haQhaQhaQibRibRjcSjcSjcSibRibRibRibRibRibRibRibRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTlcTrfXth\sg[mcYjaXle]snhzwp~w† ”¨¥œ«§œ¨¤™£‘™–‡z{kcfSOR?GJ7EH5BE2BE2EH5KL<JH9GD5D>0A9,A7+@6*F4(S5*qB8ŒPHŸPK¯IG¿GIÌEIÓBEÖ@BÙ@BÚBAÛCBÚEAÚEAÜDAÜDAÞDDÞDFÞDFÞDFÞDDÝEDÝEDÚDCÚFB×FAÖF>ÔE=ÔE=ÓF=ÔE=ÕF>ÔA:ÓC;ÎE=ÁB9®>3ž9-‘;.Š</„?0?/€>.€=-;/„:-†:-‡;.„:-ƒ;-‚:,‚:,‚:,9+9-9-~:/~:/~:/~:/}:1}:1}:1}:1|91z:1|91y90y90x8/x8/v7.y:1y:1x90v:0v:0u9/u9/t:/r7/q6.q6.p5-o4,n3+n3+m4+j1(m7-j4*h2(j4*f2'j6+}D;NGNHQJTLUOŽWPŠYRƒ[Q{YMv\OkZJ]UBYXDY^HOYA?K3:H/7E.4B+5A+8D.=I3CL7EN9BK6?H5<E2:A/8?/7>.5<,4:,5<45<56;56;49;0:</;>-;@)GP1Ra:`vGi…Ko’RuVy£Yy£[xŸ`n‘W`ƒIZ}C[~D^G^G]€F`ƒIa„Jb…Kb…Ka„J`ƒI_‚H^IW|ILp@Bf6haQhaQhaQibRibRjcSjcSjcSibRibRibRibRibRibRibRibRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTlcTkaUndZpf\lcZkd\rmg„|‘Œ§£ µ±®ÄÁ¼ÉÆÁÉľÅÀºÀ¹±¹²¨¤œˆ‚rfcTPQ?FI8>E3;D1:F28E38E38C2:B3<B4@C8CE:GE9>4(P:/e?6€EA¡PO¿X[ÊQVÊCIÕFJ×EFÚFFÛEDÝEBÞDBÝD?ÞCAÞDDÞDFÞDFÞDFÜDCÜDCÜDCÚDCÙEA×FAÕF@ÓF=ÒE<ÐF<ÑG=ÔE=Ô?8Õ@9ÐC:ÆC9¶A7¥=0–=/Œ=.†>/‚?/>.€=-<-ƒ;-„:-…;.‚:,‚:,‚:,‚:,9+:+9-~8,~:/~:/~:/~:/}:1}:1}:1}:1z:1z:1y90y90x8/v7.v7.t8.w8/u9/u9/s9.t8.s9.r8-r8-q6.q6.p5-o4,n3+m4+l3*j4*h2(l6,i3)f2'h4)e1&i5*{B9ŽMG’MH‘PJTLVO‡XNYO|\Qs\Nk[L`WFVSBPSBJQ?@I64@,3<+2;*09(09(2;*5>-9B1<E4?H7?H5>G4:F2:C25@/3>-1<,3:33954954928919;.;>-;@*CL/O^7]sDi„Mp“SwŸYy£Yw¡Yq˜Yf‰OZ}CX{A\E]€F]€F^G`ƒI`ƒI`ƒI`ƒI`ƒI_‚H^G]€JV{HJn@Ae7haQhaQhaQibRibRjcSjcSjcSibRibRibRibRibRibRibRibRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTlbVj`VjaXkdZkg^upjˆ…€Ÿžš°°®ÃÂÀÓÒÐâáßçæäçãàãÞÚÞÖÓÙÎÈμ²¸¥——‡zym]_YIKL:@E1;D/>J6=H7=F5;C4;>5:;3983880==3C<2N71a84ˆHH¯]_Å`dÊWZÍQSÓNO×MKÛKJÞHGáFDãDAâBBßCDÞDFÞDFÝCEÝCEÜBDÜBBÛCBÚDCØD@ÕD?ÓF?ÒE<ÐF<ÐG=ÔE=Ù@:Ù@:ÑB:ÈC:¼C:¬@4™;/Œ9+‰=/ƒ>.>-€=,<,ƒ;,ƒ;,„<-‚:,‚:,‚:,9+9-9-~8.}9.~:/~:/}:1}:1}:1}:1}:1}:1z:1y:1x90x90w8/t8.s7-r8-t8.s9.r8-q8-r8-p7,p7,p7,o6-o6-n5,n5,m4+l3*k2)i3)g1'k7,h4)e1&h4)b0%f4)zA8MG’MH‘PJŽULˆXNYOxZOr\Nk^N^WGRPAKN=CJ:9B12:+08)17+17-06,/5+/5)08+2:+4<-9D4;F5=H7<I7;F56C13@,1>-.800621622717829;0:<.:?)?H+KY5[qChƒLq“Vwž[w¡YsœVgP^IUx@WzB[~F[~F[~F]€H_‚J_‚J_‚J_‚J^I^I]€H]€JUvIJk@@a6haQhaQhaQibRibRjcSjcSjcSibRibRibRibRibRibRibRibRkbSkbSkbSkbSkbSkbSkbSkbSlcTlcTlcTlcTlcTlcTlcTlbVpg^ng_lgatqj‡„¡ œ¼¼ºÎÐÏÏÏÏàààòòòúøùû÷ö÷óðñìéðâßèÐÆÝÁµÁ«¢‘}ub^\GKM7CH2AI2AI4@G5BD7AA7B=9C97?:69<5>?7B71J.+d66QR³giÆnmÀ\\ÇZWÐVS×QNÜLKâHHæCDåBCßCFÝDFÝDFÝCEÝCEÜBDÜBBÜBBÜDCÙCB×C?ÓD>ÒE>ÐF<ÐF<ÔE=ÜC=ÛA9ÒB:ÊD;¿E:°A6œ:-‹5&Š</…=.‚=-€=,<,<,ƒ<*„<-‚:,‚:,‚:,:+9-~8,}9.}9.~:/~:/}:1}:1}:1}:1}:1{;1y:1w;1x90u9/t8.s9.r8-q7,r8-q8-p7,n8,p7,m7+o6+o6+o6-o6-n5,m4+l3*j4*i3)h4)f2'k7,h4)c1&e3(b0%e3(xB8ŽMG’OIŽRJŠUMƒXOzZOq[Mh\LZTDIJ:?C4>D69A208+/7*6<247058157247025.06,08-19,6A3:E5=J9>K9<I77F32A,1>-,6..400511606718:/:<.:?+<E*IW4YoAg‚Mp’Vv\užXp™U`†IX{CSv>WzB[~FY|DY|D\G_‚J_‚J^I^I^I]€H]€H]LStIHhA?]7icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSjcSkbSkbSkbSlcTlcTmdUmdUmdUmdUmdUmdUmdUmdUmdUmdUldWibZgd]vsn“’Ž¯¯­ÄÆÅÚÜÛêîïóôöö÷ùüüþÿÿÿÿÿýÿûúü÷ôýòì÷áÔòØÇàʵƶŸª ‡ˆ„ibbFEH-DH/CG0FE1GD5F<3C60F42H:9>=9>?:B:7I75cGF’jjœ’è­©Ò‹‡ÇtnÀ]XÆSPÕONßJLçFKéFKßEGßIKÚDFÔ;>Ø>@áGIàDGÖ:=Ú@@Ú@@ÙA@ÖB>ÔC>ÑB<ÐA;ÑA9ÞC>ÛA9Ó@9ÊA9¾B8°@5Ÿ:.8*Œ=0‡>/ƒ>.>-€=,<+=*<,:+9-9-~8,~8.|8-|8-|8-|90|90|90|90z:1z:1z:1z:1y=5w<4w;3u:2t91q8/p7.p7.o6-m7-m7-l8-m7-l8-m7-m7-i2+m6/p92o81k4-g2*g2*h3+e0(i7.d2)]-#`0&_/%b2(s>6ŒOJPKˆQJRJ}YMv_QfYITN>DE5>E5:C25@04</3;04:04:068378366446135016//6..6+.9+2=-6C2:G6:I68G44C.1?.*4+,2.-2..3-45/79.:</:?+6>&DR1WlCh‚Rs”]wž_r›Wk”R[~DXyDTu@Tu@WxCZ{F[|GZ{F^JbƒNbƒN^J\}H_€K`L^~MTsJFd@<W4icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSicSkbSkbSkbSlcTlcTmdUmdUmdUmdUmdUmdUmdUmdUmdUmdUldYgd]onj†…ƒ¤¤¢ÁÃÂÖÚÛêîï÷ûü÷ûþøüÿüýÿþþþÿÿýÿþúÿüùÿúóÿðàûëÔíßÅ×ͲÀ»¤£„‚ƒcgjKLQ3GJ/EC.GB/H>2I:3L95K<9?:7BA?MHE]RPyjg£Ž‹Î²®èÅ¿ÿ×Ñ쳪͂|Àc^ÊVVØRSßHMÚADÖDEÕEEØHHÜJKÛEFÕ<>Ö<>ÛACÙ??Ù??ÙA@×A@ÖB>ÓB=ÒA<Ó@9ÜA<Û@;Ó@9ËB:¿C;±A6 ;1’8-Œ=0‡>/ƒ>.>-€=,<+<,<,:+9-9-~8,}9.|8-|8-|8-|90|90|90|90z:1{;2{;2z;2w<4u<3u:2s:1r90n8.o6-m7-m7-l8-l8-l8-l8-j8-l8-l8-k6.n70o81n91l7/i4,g2*e3*c1(d4*_/%^.$b2(^.$`0&p>5JE‡RL‡XR~XOrVKdRDQJ:>?/:A16A05@03>04<14<15;17:379477577557246116//6./7,.9+1<.4A08E49H58G44E24B1.8/.5..3--2+23+46+8:-9=,7?(DQ3WlEh‚Ut•`wbp˜YgPY|DWxCTu@Tu@VwBYzEYzEYzE[|G_€K_€K\}H[|G^J^J^{MSnKF_A9R4icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSicSkbSkbSkbSlcTlcTmdUmdUmdUmdUmdUmdUmdUmdUneVneVlfZed_y{x—™˜²¶·ÍÑÒäéìôùüúÿÿûÿÿüÿÿýþÿþþþÿþüÿÿûÿÿúÿÿôþüçúùÝïðÑáåÄÒØ´¼ÄŸ ¨ƒ‡‘l`gEQX7DG,DC.JD4LD7KA8I>:C;9SJKia_wvœ’¾³¯ÙÎÈêÜÓþèÝÿóéÿäÛð©£ÃecµBEÇDIÚRVÛUTÑMKÊDAÍCAÔFE×EFØDDÙCDÖ=?×>@Ø@?Ö@?ÖB@ÓB?ÓB=ÔA:Ù@:Ù@:ÒB:ËE<ÀD<²B7¡<2“9.Ž<0‰=0ƒ>/>.=-~<,€=-€=-9-9-~:/}9.}9.|8-{8/{8/}:1}:1{;2{;2{;2{;2z;4x<4t;2q;1r90o9/n8.l8-l6,k7,l8-j8-j8-j8-j8-i9-j8-j8-n91m80m80m80m80i7.f4+c1(d2)a1']-#_0&c4*\-#_0&sD<}PJVP~YQpTI^J?OC7?<-46(2;*0;+1<.2=/4<14<15;17:379479668357257227007/08-.9+0;-2?.6C27E48G48G49F54>33:2/4-,2(/0(13(57*6:)6>'DQ5YmJk„Zw—fwœfl“Z_‡KWxCWuCTr@Tr@VtBXvDYwEYwEZxF]{I_}K]{I]{I_}K^|JZwKPgJAW@3I2icSicSicSicSicSicSicSicSicSicSicSicSicSicSicSicSjcSkbSkbSlcTlcTmdUmdUmdUmdUmdUmdUneVneVneVneVmg[jkfƒ‡ˆ£§¨¹¾ÁÐÕØåíïóûýõýÿûÿÿûÿÿüÿÿüþûýþùþýøÿþ÷ÿÿóøýæôýÞî÷ØæòÎÝéÃÎÚ´¶Á™¡«†€‡efmLMQ6FH0HJ5KI:HD9D@7OGDj`_‹€¤š™·¯¬ËÆÂÞÛÔêæÝúñèÿûñÿ÷íÿÝÕû¶±åŽÊcd°@?ÃPKÉTMÉPHÃF@ÈGBÔMJÖKHÐ@?Ò>>Ó??Õ?>ÕA?ÕA?ÓB?ÓB=ÓB=Ö>9Õ@:ÒC=ÌE?ÀE>²A9¢=5•;2Ž<1‰=0ƒ>/>.=/~<,€=-€=-9-9-~:/}9.}9.|8-{8/{8/~;2~;2|<3{;2{;2{;2z;4x<4r90o9/o9/m9.l8-k7,j6+h6+i7,i7,i7,h8,h8,h8,h8,h8,m;2l7/j5-i7.j8/j8/f4+a1'e5+a2(].$a2(b3)Y* a2(yNE\VvWRiNGWC:I<3@:.:;-8<.08)/:*3;.4</5=25;169069079468368349238139/19.19..9+/:,0=,2?.5B17E4;H6<I89C87?428.-3'.0%/1$24&48'4<'ER8]pPr‹d{šnwœifXU|ESsATr@Sq?Sq?Tr@UsAVtBVtBWuC[yG]{I\zH^|J_}K\zHWrIJ^E<M;.?-gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSjcSkbSkbSlcTlcTmdUmdUmdUmdUmdUmdUneVneVofWofWng]qqo‡Œ¤©¬·¿ÂÍÕØãíïòüþôþÿøÿÿúÿÿûÿüýÿúüýõúüñùùíöøêöÿèóÿäðýáîùÙéôÒÞèÅÊÒ­¶¾™£}‚bY]BGL5FK7FJ9CG9AC8c^X…}z«¡ŸÁ·µÌÂÁ×ÏÍæáÝñîéú÷ðüõëÿñæÿòéÿôìÿåß騤·hc¦LAµPDÁYNÇ[QÆQH¿D=ÂA<ËDAË@=Î@>Ð@?ÑA@ÒC?ÑC?ÑC?ÒC=Ó?;ÒA<ÐE@ÉE@½C>¯@9 >5•=3Ž<1‰<2ƒ=1=0=1~<.=/=/~:/~:/~:/}9.|90{8/{8/{8/|<3|<3|<3{;2z;4y:3y:3w;3n70m80l7/l7/k6.h6-h6-h6-i7.h8.h8.g8.g8.g8.g8.h8.k92h6/f4-g5.i70h70e4-b1*c2+b3+_0(]1(]1(V*!b6-{ULtYRaNHN?8A7.=6,;9-9;.9<139-3;.5;/6<06<07:/58-47.69049238139/39/2:/19.19,.9+.9+/:,0;-3>.6C2:E5<G7=E8:B55;/06(02%/1$13%15$2:%GS;buWwk|›rq•g\‚QJp=Oo>Qn>Qn>Qn>Qn>Qn>Sp@Sp@WtD[xH]zJ]zJ_|L_|LZwGRlECU?6D7(6)gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSjcSkbSkbSlcTlcTmdUmdUmdUmdUmdUneVneVofWpgXpgXng]lnmz‚…“š «µ·ÄÎÐÜæèí÷ùôþÿõþýöÿüúÿúûÿùûþóøúíõöèñôãôýêôýèôüåóùßòöÛéìÍØ×¹ÅÄ¥±°’ŽregOMP;GK:FM=GOBKQG{{sŸš”ž¸ØÎÌßÓÓçÛÛóëéü÷ôðïêüüôÿÿöÿýóÿùïÿóêÿçÞÿ×Ëؓƒ¸eS£J:´TF½WIµF;·@8ÈKEÇC>ÊC?ÌB?ÎC@ÎC>ÎC>ÍD>ÍB=ÑB<ÒC?ÎDAÅD?¸A=ª=8ž;5•<4Ž;3‰<2ƒ=3=2=1~<0=1=/~:/~:/~:/}9.|90{8/{8/{8/|<3{;2{;2z:1y:3x92w81u91n70l7/l7/j8/i7.i7.h6-g7-g7-g7-f7-f7-f7-f7-f7-f7-h70h6/g5.g5.g6/h70g6/f5.a2*e6.^2)\0'`4+a5,i=4uSJXG@E>6<5/95,;8/8:/57,36+69.5;/7:/69.69.58-57,36+28,19,28.19,19.19,19,19,.9+.9+.9+/:,0;+3>.6A17B2<D5:C28?/5;-46)24'13%04%07%HT>ex\umt’lf‰_RwKBh9Kk<Nk;Pm=Pm=Nk;Nk;Ol<Qn>VsCZwG]zJ^{K_|L_|LVsCNg@<J9/:2$.&gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSjcSkbSkbSlcTlcTmdUmdUmdUmdUmdUneVneVofWpgXqhYoh^hikmtz…Œ”£¬±¿ÈÍÓÞâåðòðüüòüûõþùøÿ÷úÿöüÿòúýìøùçö÷çööê÷÷ëú÷èüöæýõâ÷ìÖçØÃÕƯÀ±šž“}un[ZWFPQCSVK[bZeld•˜²²¨ÕÎÆèÝÙóããúêëÿñôÿ÷ùÿþûþýùùúôøúïüüðÿÿóÿÿñÿûêÿæÑñª”¼o[©VD®RCµPD»LA¿JAÂG?ÆE?ÉE@ÊE@ÊE>ÉD=ÈC<ÉB<ÑC?ÐE@ÌEAÂC=³=9¦;5œ;5•<6Œ<3‡=4…<5=4<3~<0=1=1~:/~:/~:/}9.|90{8/{8/{8/{;2{;2y:3x92w81v70v70s7/n70j8/j8/j8/j8/h8.h8.h8.g7-f7-f7-f7-d8-d8-d8-f7-g6/h70i81i81h70i81j;3l=5g80k<4b6-a5,oC:xLCtI@nMDC9056.45/8918;247.14+25,58-58-57,46+46+46+46)37)/7(.9)08+.9).9+.9)08)08)19*08)08+/7*08+19,3;.4<-9B1:C0;B0:A1:<.68*24&/3$29'IU?cu[m„gf„bWyTInE?d8Hg;Li;Nk=Nk=Li;Li;Mj<Ol>UrDYvH[xJ]zL^{M\yKQn@G_;6B4+4/#)%gdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSgdSicSjcSjcSjbUlbVlbVmcWmdUmdUmdUleUmfVngWogZnh\oh^mjekormtzƒŠ¥®³ÃÌÑÕßáæîñôüþôýú÷ýùùþ÷úÿôüÿòüÿñüÿîýþðûøïþúñÿùïÿúíÿùéÿòßïàËÝηÁ²›¢—|ubb_NXZL]cWmwn|†}¦¬¢¾ÀµÚÕÏîäâüîîÿôôÿô÷þôõÿùùûúøûýøüÿúøÿõôüñ÷ýñÿÿñÿùèÿôãÿÌ»½p›QD©SFµSH²C:ÀJ@ÄH@ÇG>ÉF>ÈE=ÇD<ÆC;ÆC;ÍG>ÌG>ÇG>¼C;­>5Ÿ:2—:3‘>6Š=5†=4ƒ=5=4€<3;0;1;1~:/~:/~:1}90|90{8/{8/{8/z:1z:1x92w81v70u6/u6/q6.m80k90j8/j8/i9/i9/i9/h8.g7-f7-g7-f7-f7-f7-f7-f7-f5.j81m;4k:3j92j:0m>4oC8l@5oD;d<2c=2zVJ†dZyWMbLA>:13814927<54:0/5+25,9<336+25*24)03(13(25*46+47,.6),7)/7*-8*/7*/7*/7*/7*19,08+08-/7,/7,/7,08-08+7?09B1<E4=D4;A38<.26'-4$2;*JVBcr[h|a]vXOmKFfAAa:Hd;Kh<Nk?Nk?Kh<Jg9Li=Nk=TqEWtHZwK[xL]zNZvMNjAC[;2>4*00!''heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjdVjdVjdXldYlcZmeZmeXmfVg`NjeRnkXol]he\feasqr‚†Š“–ž¡«³¶ÃÈËØÝàêîïö÷ùùûúüþýþþüÿÿúýþöúýòùüñúýòûþóùúòúúòùùíüúëþüçù÷ÞéåÊÕÔ¶º¹›˜˜|qtY_dMbkXr}l„‚‘›´¹²ËÌÇãâÞòîë÷óòû÷öýùøüø÷ÿþüþþüþþüýÿúýÿúýÿúýÿúýÿúúü÷ÿÿúÿûöÿêåا £`X›D=µNE¿MCÂF<ÄD9ÊE<ÎI@ÎKAËH@ÇG<ÆG8ÃH8½J8³H8¦E5šB4‘B5‹C5†B5„B6‚@4‚>3‚<2„:1†91†9191~:1|91{80z7/z7/w7.v6-w7.w7.w8/w8/v7.u6-q5+n3+l7/i81i81h70h70g6/g6/g6/f5.g6/i70i81h70g6/e4-e3,h3-j5/m80k90j:0i;.i=0kA3gB2jI:dH:v_O~k\ŠxjŒoRJ=79.4:04:039/39/39/28.28.17-17-36-06,25,/5+14+/5+/4-.5-/4-.5-/4-/4-/4-/4-05./4-/4-.3,.3,/4-/4-/6.1;23=26@58B79D67B45@03>-4?.DQ=WeN[kQPbHEX<@T8BW6Ic>Ke>Kf=Kf=Je<Je:Lg>Oj?UpGYtK\vO]wP]wRWqNHb?;O6&1+&+.$),heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjcYkdZlc\md[meZmfVidQjhSmjWjk[gg_lll~„“š¨¯µ¸ÂÄÑÖÙâçêòóõûüþÿþÿÿþÿþýûÿþúÿÿúþÿùûüôøûòöüò÷ýó÷úñøúïøúí÷úéùûåòõÚßâÃÌЯ´¸—‘–vkqUYaI_kUtoŒ˜Š§œ¿Á¼ÓÓÑççåóóñ÷÷õûûùüüúûûùþþüþþüþþüþþüþþüþþüþþüûÿþõÿÿõÿÿþÿÿÿûúÿíêð¿ºÂ~uŸKA¯M@¹K>ÂL@ÅK>ÃE9¼=4¾?6ÃE9ÄF7ÃJ7½L:³J7¦F6™C2‘C6E7…C7‚B8A7>5‚<4ƒ:3†91„93~92}:2z:1z:1y90x8/v7.v7.u6-v7.t8.t8.t8.r6,o5*m4+l7/i81j81h70i70h70g6/g6/e3,e3,g5.h6/j81j81j81k92k60l71k90k;1k;/j>1j@0iD2gH6kP=gRAufSueŒ…s†ƒrKL<69.39/39/39/28.28.28.17-17-17-17-06,06,/5+/5+/5+/4./4./4./4./4./4./4./4.05//4..3-.3-.3-.3-/4./6/.80/:21=34@66B66B66B45B16C1CP>P]IR`IIW@AO6BP7EU:Jb@Jd?Ke@Ke>Ic<Hc:Id;Je<SnEXrK\vQ]wT[tTTmOC\>6I5&1-',0$)-heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjcYkdZlc\md[meZmfVmhUkiTmjWjk[kkcwww‘’—§¬²¿ÆÌÒÜÞìñô÷üÿþÿÿþÿÿÿþÿÿþÿþýûÿþúÿÿúýþøúûóõøïñ÷ëñ÷ëô÷ìõùëöøêõøåõ÷áîîÔÛÛ¿ÉÊ«°±’“–yuy`kpZt{iˆŸ§œ®µ­ÍÏÊÞÞÜïïíøøöûûùþþüþþüýýûþþüþþüþþüþþüþþüþþüþþüýÿþûÿÿûÿÿÿÿÿÿûùÿúôÿ÷íýÑÆ؞”L=¥RB©L;§@1·G;ÏYMÏUJ¼@4ÄD7ÆH:ÃK=¹I;ªB7?3—@7“E;‰@9„A9A8?6~>5}=4<4}=4z;4x<4w;3w;3u:2t91t91t91r90r90r90r90o9/n8.l6,k5+l7/j81l71j81k60i70i70h6/g5.g5.f5.g6/h70i81k:3k:3l;4j;3i:0h<1k?2jC4iD2fE2fI7lVAjYGujV†m‘Ž{€mEF658-28.28.28.17-17-17-06,17-17-17-06,06,/5+/5+/5+/4./4./4./4./4./4./4./4./4./4..3--2,-2,.3-/4..5.+5-+6.-9//;12>24@46B47D38E3?L:ER>CQ:;I28F->L3DT9J_@Jb@LdBKc?Ia=G`9F_8G`9RkDXpL]uS^vVZqTPgK=T8.A.&1-',0%*.heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjcYkdZlc\md[meZmfVniVljUlkWlm]pqi€‚ž¡¦¹¾ÄÄËÑÛåçõúýûÿÿüýÿüýÿþüýüûùÿþüÿÿûÿþùûüôõöîîñæéïãçíßíñâðôåñôãñôßòñÜêêÐØؼÇÇ«²±•¡¡‰‘“}“€œž‘ª­¢¶»´ÀÅ¿ÜÜÚééçööôûûùýýûÿÿýÿÿýýýûþþüþþüþþüþþüþþüþþüþþüþþþûûýÿþÿÿþÿ÷òïüóìÿüóÿûíÿôäìñ¹p•P@¢O?¸XJ·M?µE9ÀG<ÁA6ÈE;ÅI?½G=¯@9¡<4š=6–A<?;‡@<@:}@;z?9y@9x?8x?8v=4v=4v=4u<3s=3r<2r<2q;1o;0o;0o;0n:/k9.j8-j8-i7.m82n72m61l71l71k60i70i70l:3j92h70f7/f7/f7/h91f:1j?6f>4e=1f>2hC3iE5fG3cG2cJ6hV@i\IskV…‚oŽzpq_9=,47,28.17-17-17-06,06,06,17-17-17-06,06,/5+/5+/5+.3-.3-.3-.3-.3-.3-.3-.3-.3-.3--2,-2,-2,-2,.3--4-*4,)4,*6,+7-.:.1=14@25A36C2:G5<I78E12?+2@)8F/>N4DX<E]=K`AJb@K`?G_;H^:F^:SiEXpN`uV^uXZnSLcI9M4):(%.+%*.$)-heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjcYkdZlc\md[meZmfVlgTkiTnmYop`tum…‡†¤§¬¿ÄÊÎÕÛæðòûÿÿûÿÿûüþýþÿÿþÿþýûÿÿýÿÿûþýø÷øðïðèçêßàæÚÞåÕçëÚêïÛíðÛìðÙîîÖèèÐÚØ¿Ìʱ½§·µ ±®¶´§Á¾µÉÈÃÒÓÎÙÙ×èèæòòðúúøüüúýýûÿÿýÿÿýüüúþþüþþüþþüþþüþþüþþüþþüÿþüÿüÿÿüÿÿûúüû÷ýÿùþÿøþÿôÿÿíÿþìøßËŜŠbP–J:¥L<´P@¼L>¿@7ÆC;ÅF?¿D=±>;¦:7ž:8™>;‘=;Š?<„@=}@=xA<tB;rC;rC;q?6s>6s>6r=5r=5r=5p>5o=4n>4m=3l<2k;1j;1i:0i:0j:0n72p62p62m61m61l71i70i70l;4k:3h91e90e90e90e:1d<2fB6cA5cC4dD5dG5dI6bI3_I2^L6eV?jbMrmW„ƒo„‡r[`L3:(28,28.17-17-17-06,06,06,17-17-17-06,06,/5+/5+/5+.3-.3-.3-.3-.3-.3-.3-.3--2,-2,,1+,1+,1+,1+-2,,3,,6.+5-*4+*4++5,.8-0;-2=/2=-5@/6A05A-1=)1=)5A-8F/=O5@T8FY=H\@J]?I^=K^>K`?TgG[pQcvZauZYkSJ]G6G4&4%$-*$),"'*heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjcYkdZlc\md[meZlfVjgTkjUlnYorasvm…„Ÿ¤¨¸¿ÇÖßäí÷ùûÿÿûÿÿûüÿþÿÿÿþÿþýûÿÿûÿÿúüüôóõêëíâãçÙÝãÕÜãÑäéÓçìÕéíÖêìÔëëÓèæÏßÚÄÓθÓκÌƶËøÐÉÁ×ÒÎàÛØêæåðïíóòðùù÷ýýûýýûýýûÿÿýþþüüüúþþüþþüþþüþþüþþüþþüþþüÿýüÿûüÿøùÿþýþÿýùÿýôÿøïÿõ÷ÿöð÷çÿÿíÿîÛͧ”¤kX¢[GªWE­L;¹D:ÀD<ÀE>¼E?´A>ª=:¢:9š;9•=<?=…?=}@;wB<rC;oD;oD;p?8q?6q?6q?6q?6p>5o?5o?5o@6n?5m>4k<2h<1h<1h<1l<2o83q62p62p62m61l71j81i81h70g80e90e:1d<2e?4f@5dB6^B4aG8cL:dM;cM8`K6]K3]M4]Q9bX?mhRss[€ƒnv{eFO:4=*39-28.28.28.17-17-17-06,17-17-17-06,06,/5+/5+/5+-2,-2,-2,-2,-2,-2,-2,-2,-2,,1++0*+0*+0*+0*,1+,3,.5.,6.*4+*4+*4++5,-7,.9+-8*0;+4?/5@/4?.3?+3>-3@,7F/9K3@O8BT:GW<H[?M]BM`DUeJ\oSdtZ`rZXgRGXE4B1$2%$-*$),"'*heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjdVjdVjcYkdZlc\md[lfZlfVliVlkVkmXkn]lrhy}• ®·¾ÒÛàêóøûÿÿùþÿùúþþÿÿÿþÿúù÷ÿÿûþýøùùññóèéëÞãçØßæÖàèÓäéÒçíÓèìÓçéÑèèÐçåÎàÛÇ×оÚÓÃÙÑÆÞÔËæÝØîäãóéêúñôÿùûúù÷þþüÿÿýýýûýýûÿÿýÿÿýüüúþþüþþüþþüþþüþþüþþüþþüÿýüÿùøÿúùÿþýüÿý÷ÿþîÿùñÿÿòÿýõÿúóùëÿýíÿúèôλ«wbQ:ª\H²N>¸J=¹I>¸I@´G@¯D>¥=:œ:7–=9>:ˆ?9€A:xA:tD:pF:pE<p?8q?8q?8q?8q?8q?8p?8p?8qB:pA9n?7l=5i=4j>5j>5n=6o83r73p62p62n72l71j81i81d8/d90c;1d>3d@4cA5cA5_C5ZE4_N<gVBhWCcS<^O8[O5\R7]V<]Y>nlUtv^|kcmU2>(6B.3;.39/39/39/28.28.28.17-17-17-17-06,06,/5+/5+/5+-2,-2,-2,-2,-2,-2,-2,-2,,1+,1++0**/)*/)+0*,1++2+-4-+5-*4,*4,+5,+5,,6+-7,,6+/:,2=/6A17B27B15@04?.2?-4C.:G3=L5AO8DT:JX?L\BTbI\kTcqZ^mXUbPDRA2>0#/%&/,&,,$**heVheVheVheVheVheVheVheVheVheVheVheVheVheVheVheVjdVjdVjcYkdZlc\md[lfZlfVolYmlWjlWgkZflbr{xŽ˜š¨±¸ÊÓØãìñøýÿ÷üÿúûÿþÿÿÿþÿúùõÿÿúýýõøøîðòåéëÝåéØãêØåíÖæìÒéíÒêíÒèèÎèæÏèãÏáÚÈØÐÃØÎÄÝÒÌêßÛøíëÿôöÿõùÿõûÿùüþüýÿÿýÿÿýþþüþþüÿÿýÿÿýýýûþþüþþüþþüþþüþþüþþüþþüÿþúÿýûÿýûýüúùýüøÿÿõÿÿóÿÿíýúóÿûûÿúÿÿôÿöæÿóßÿãÍ×­•©oY¨WD®Q@¬O>¯OA±OB­K@¤C<™<5•>7?6ˆ?8A7{B7uD6qE8qE8p?8q?8s>8q?8q?8q?8q?8p?8sB;qB:o@8m>6l=5j>5m>6o>7o83q73o83m82m82i81h91f:1f;2d>3d@4bB5`C5]A3\@2WB1TG4[S>f^Gi^HbX?ZS9ZS7\W:ZV;XW;kmUsw^u}fUaI&28D03;.4:04:039/39/39/28.28.17-17-17-06,06,/5+/5+/5+-2,-2,-2,-2,-2,-2,-2,-2,,1++0*+0**/)*/)+0*+0*,1++2+)3+*4,+5-+5,,6--7.-7,,6+.8-2<16A39D69D47B46A10;+2?-5B09F2<I5@N7FT=JX?R`I[hTanZ\iWQ^MBN@/;/",#+1-(.,&,,heVheVheVheVheVheVheVheVifWifWifWifWifWifWifWifWkeWkeWkdZle[md]ne\mg[mgWqn[kmWknYkr`jrgpyv‡‘“Ÿª°»ÄÉÐÙÞêïóöûþüýÿþÿÿÿþÿþýùÿÿúþþöúúðöøëôöèðôãêñßçïØðöÚíòÔëîÑììÒèæÑáÜÉÝÕÈÞÔÊáÔÎèÚÙòääúëîþòôÿ÷ûÿùÿÿüÿÿýþþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþÿúÿþúÿþúþþüýÿþûÿÿúÿÿúÿÿûÿÿþÿÿÿÿýÿûõÿûñÿüìÿûåÿóÛÿêѺ‚i¦bK—O9ŸT?¨ZF£RAJ: M?Ž<0‹=0†>2=/x=/t?/sB3tD6r=5r=7t=8s>8u@:u@:t?9q?8sA:q@9q@9q@9p?8n?7o>7o>7k92m82l:3l;4l;4h<3g<3c=2c?3^>1dG9cG9T=-N9(M8'@1WQ;PN7KI0OK2]Y>ieJeaDVU7XX<]`CosZz€fdlU=I1-9#2>*4</5;14:04:04:039/39/39/39/39/39/28.17-17-17-06,16016005/05//4./4./4./4.,1+,1+,1+,1+,1+,1++0*+0*.5.-4--4-,3,-4--4,.5-.5-+2*-4,07/5=29A6=E8?G<?J<7B25@/3>-1=)2>*9E/BN8IU?O[GVbN]hWYdTLWI<F;/9.)3*)/+(.*'-+heVheVheVheVheVheVheVheVifWifWifWifWifWifWifWifWkeWkeWkdZle[md]ne\mg[khWonZjlVknYkr`jtiq|x‰”–£®´¾ÇÌÒÛàëðô÷üÿýþÿþÿÿÿþüþýùÿÿúþþôûûï÷ùëô÷æðõáêòÝèíÖêïÑæëËäçÊææÌãáÌßÙÉÞÔÊßÔÎçÙØíßßöçêûïóÿôøÿ÷ûÿúþÿüÿÿþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüýÿüûÿúûÿúýÿüýþÿþýÿÿüÿÿüÿÿûÿÿüÿÿüÿÿûùÿüõÿþñÿÿíÿûåÿòÙÿëÑ౗§u\‘[C–ZBšYCšVC™R@ŽG5‹F6ˆE4ƒD3|C2wB0s@/o>/u@8s>8u>9u@:t?9o=6p>7sA:q?8p?8p?8n?7o>7m>6n=6m>6k<4l=5m>6k?6j?6gA6eA5bB5dG9[A2^G7^I8N=+F7$G8%>6!LL4GK2DG,DD*MM1\[?baC`aBYY=dgJsw^sy_X`I:D,/;%7C/6>16<26<26<25;15;15;15;15;15;15;14:04:039/39/39/27127127116016016005/05/.3-.3--2,-2,,1+,1++0*+0**1**1*)0))0))0)*1*+2*,3+.5--4,.5-07/4;39A6?F>BJ?;F8:E57B14?.3?+6B.<H2@L6LXDS_KYdTWbRLVK=G<1;2+5,+2+*0,)/+heVheVheVheVheVheVheVheVifWifWifWifWifWifWifWifWkeWkeWlcZmd[md]ne\mg[khWmlXimVjoYktaiuiq}yŠ˜™¥²¸¿ÊÐÔÝâíòö÷üÿüýÿýÿþÿþüþýøÿÿ÷ýýóúûí÷úéô÷äïôÞçïØåëÑâçÉÞáÂÝÝÁÞÞÆÞÛÈÝ×ËàÕÏäÙ×îââóçëúîòÿôøÿ÷ûÿùüÿúýÿýýÿþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüýÿüøÿüøÿüûÿþþþþÿüÿÿúÿÿùÿÿøÿÿùÿÿúÿÿúúÿýøÿÿöÿÿóýÿïÿúæÿôÜÿøàÿìÓ㿧¬‚j‡YB…Q;’[F—]IS?F2u>)r>)s@+tD0vE4q?4n<3q<4tB9sA8o?5qA7xH>n>4m>4m>4k?4l=3j>3l=3j>3g?5gA6gA6gC7eC7cC6`C5^D5bK;UB1ZI7`S@RG3C;&E=(FA+AE,BH.AE*<@%?B%NN2_`AijKaaEnqTvzagmSJO94<%4=(=F38>28>48>48>47=37=37=36<28>48>47=37=37=36<26<26<25:449349349338238238227105/05//4..3--2,,1++0*+0*).().().().().(*/)+0*,1+160/4.,1+,1+/4.6;4=B<AH@@K=>K:<I78E34A-2?+3A*5C,DQ=KXDQ^MQ^MHTH<H<2>4.8/-4--4-,3,gdUgdUheVheVheVheVifWifWifWifWifWifWifWifWifWifWkeWkeWlcZmd[md]mf\mg[jiWkmXimVjoYjs`hthm|wˆ˜˜¦³¹¿ÊÐÓÜãëðööûÿüýÿýÿþÿÿýÿþùþþöýýñúûí÷úçô÷âîóÜåíÕâèÎÛàÀ×Ú»ÖÖ¼Ù×ÂÝ×ÇÞ×ÍåÛÙìààøìðûðöÿõûÿùþÿúýÿûûÿûûÿýüÿþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüýÿüøÿþøÿþûÿþþþþÿüþÿúÿÿùÿÿøþÿúýÿùüÿùùÿýúÿÿøÿÿøûÿõüþðÿÿíÿüéÿñßÿðÝÿïÚñÒ½´z{R>tJ4uH3vI4zM8}P;yL7nA.e7'{M@sD:qB8sD:oC8g;0d8-g=1i?3i?3i?3g?3f>2f>2f>2e?2b@4bB5`C5_C5_C5[D4ZE4WD3XI6OB/\T?oiSc^HMK4IG0IK3>D*?H-@F*<B&<A#FI,XY:deFmmQxx\tv^[_F@C.6;%8@+<C19?39?59?58>48>47=37=37=39?58>48>48>48>48>48>48>47<67<67<66;56;56;55:45:438238216005//4.-2,,1+,1++0*+0**/)*/)+0*,1+-2,-2,05/.3-+0*+0*-2.2718=9;B:@K=@M;@M;=J68E13@,1?(0>'<I5BO;HUCIVEDPB;G;2>4.:0,6..5.-4-gdUgdUgdUheVheVifWifWifWifWifWifWifWifWifWifWifWkeWldWlcZmd[md]mf\mg[jiWkmXinWiqZgs_drejyt…••£²·½ÈÎÒÛâêïõôùýûüÿýÿþÿÿýÿÿúÿÿøÿÿóþÿñûþë÷úåðõÞæîÖâèÌÙÜ¿ÕÕ¹ÒйÔѾÚÔÈàÙÑêàßôéíþóùÿ÷üÿûÿÿýÿÿþÿÿþüÿþúÿþúþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüýÿþúÿÿúÿÿûÿÿþþþÿýþÿüüÿûüÿûúÿúøÿù÷ÿú÷ÿýøÿÿûÿÿûýÿúýþùùôîÿþöÿýôÿüóÿûîÿ÷çûæÕéÒÀÏ´¡¶š…”uawVCjI6lI6rM;uP>mF7iB3gB2jE5nI9nJ:oK;oK;eA1cB1cB1cB1cB1cB1bC1`C3]F6\G6\G6ZG6ZG6WH5TG4RG3PH3NH2fdM~~frrZVX@JL4GM3<E*<F+?H+AG+AF(DG*MN/TU6xw[{z^nnVUU=CB.?A,>A.:>-;>39?59?58>48>47=37=37=38>48>48>48>48>48>48>48>49>89>89>88=78=78=77<67<66;55:449338227105//4./4.-2.-2.,1-+0,+0,,1--2.-2.,1-,1-,1-,1-.210513764;4;G9=L9@O<@O:<K67F/4C,2A*6E.:I4@O<BP??M>8F71?2-9-,6--4,,3+fcTfcTgdUheVheVifWjgXjgXifWifWifWifWifWifWifWifWldWldWlcZmd[md]mf\mg[jiWkmXinWiqZgs_bpcgxr‚””£²·ÀËÑÔÝäëðöõúþûüÿýÿþÿÿýÿÿúÿÿøÿÿôÿÿóÿÿïúýèò÷àçðÕâèÌ×Ú½ÑѵÍË´Î˺ÖÏÅÞ×Ñìâã÷îóÿöþÿúÿÿýÿÿþÿÿÿýþÿúþÿùþÿúþÿúþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþþýþÿýþÿýþÿýÿþýÿüþÿúþÿùÿÿ÷ýýóýýõþýøþýùÿþüþþþÿýÿÿþÿþùÿÿüÿÿûüûöóÿøòÿýôÿþñÿýíÿúéÿòà÷äÓçÒ¿Ò»©¹ŸŽŸ„srb|]K}^LtUCaD2W:(Y<*[>,X=*`E2`E2`E2_F2`G3`G3`G3]H3\K7\M:\M:YL9XM9UM8RL6QL6NL5QQ9ikS|€gmqXSY?FL2>G,<F+<F+?H+DJ.EJ,DG*HI*LM.yx\tsWfdMTR;LI6KJ6FE3<=-<?49?59?59?58>48>48>48>47=37=38>48>48>48>49?59?5:?9:?9:?9:?99>89>89>88=78=78=77<66;55:4493382382/40.3/-2.,1-+0,+0,+0,+0,+0,,1--10.21/32/32/32-4-3?17F3=L7@O:?N7<K49H18G05D-8G0<K6>M:<J97E61?2-9-+5,-4,,3+fcTfcTgdUheVheVifWjgXjgXifWifWifWifWifWifWifWifWldWldWlcZmd[md]mf\mg[jiWjlWhmVhrZfs_cqdhys…——¥·»ÇÒØÚãêðõûøýÿüýÿýÿþÿÿýÿþùÿÿõÿÿòÿÿðüÿìøûæïôÝâëÐÝãÇÕÕ¹Î̳ÉÄ°ÊÄ´ÐÉ¿ÚÒÏéàãöíòÿ÷ÿÿúÿÿýÿÿþÿÿÿýýÿúüÿøüÿøýÿúþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþþÿüÿÿüÿþýÿýÿþúÿúøÿùøÿõøÿóøÿóøÿõüÿøýÿúÿýþÿüÿÿüÿÿûÿý÷ÿÿúÿÿüÿÿýÿÿþÿþýûûûóúøìÿÿñÿÿïÿÿíÿþíÿýíÿöçÿîàýêÛúãÑÿæÐãÊ´¥xt^GeO8bL5\H0^J2\J2\J2]K3]K3^L4^L4\M6YN8WO:WO:VP:UO9SN8PN7NN6MO7PT;`fLfoTU^CEO4AK0<F+?I.?I.CL/HN2HM/FI,OP1YX:qmRjfKa\F[V@YTATQ>MJ9BC3=@5;A7;A7;A7:@6:@6:@69?58>48>48>49?59?5:@6:@6:@6;@:;@:;@::?9:?99>89>89>8:?9:?99>89>88=78=77<67<6495273162/40-2.,1-+/.+/.,0/,0/-10.21.23.23-12+1-+9*/@-6H2;M7=O7<N4:L2:L25G/7I1:L6<M::K96G70@3-;.,6--4,,3+cdRcdRdeSefTgeVhfWigXigXhfWifWifWifWifWifWifWkeWkeWkeWldYle[md]mf\kg[jiWikVhmWhrZgt`dqgj{u‰›ª¼ÀÍØÞàçíôùÿúÿÿûÿÿýÿüþÿúþþöúúîúûíúûéøúåóöáêî×ßãÊÙÜÁÓѸÌDZž¬Æ¾±ÌüÖÎËæÝàôêòþ÷ÿÿúÿÿþÿÿÿÿþþþýÿúüÿúüÿúýÿüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüþþüÿþüÿþüÿþüÿýþÿüÿÿüÿÿýÿýÿþûÿüøÿù÷ÿ÷÷ÿõøÿöùÿöüÿøÿÿûÿþýÿüÿÿûÿÿúÿÿúÿÿûÿÿüÿÿþÿþýÿûýüüþûþÿúùúòúüñýûïþúîÿùìÿûìÿýïÿýìÿðÛÿûâìÜގu`P7RB)XH/XH.[M3[M3ZN4ZN4[O5[O5\P6[Q8UO7TO9TO9QO8PN7NN6KM5IM4JN5LR8S\APY<@I,;F(AL.?J,DM0EN1JQ2MR4JM0LL0YY=kjNhdIb^E^ZA_ZD`[GZUAQO:KJ8AB4@B5@B5>A6>A6>A6=@7=@79?59?59?5:@6:@6:B7:B7:B7;@9;@9;@::?8:?99>79>89>7;@:;@9;@:;@9:?9:?8:?9:?98?87=94;4382160/4..3/.3/.21.21,2.,2.+2++2*,4))6%$6)=!2F+8L1;O6;M5;M5;M58J29K3;M5<N6<N87H51B0.<--7,.5--4,]eN^fO_gP`hQdiUfiVgjWijXfgUhgUheVheVjdVkeWkeYlfXlfXkhYmg[kg\mf^jf[ieYgeVghVfkUenYbo]dqhsƒ€–¨¬¹ÈÏÒÛââéïò÷û÷üÿùýüüþùüÿöüþñ÷øêùúêø÷åòñÝîíÙëêÖàßÊ×Ò¾×ϼËÀ®½´¥½³©Â·±ÌÂÁÛÒ×êàèúóûüöÿþûÿÿþÿþýÿüýÿüþýüþýþþþþþþþþþþþþþþþþþþÿþüÿþüÿþüÿþüþýùþýùÿüùÿýúÿþûÿþûÿþÿÿýþÿüýÿýþÿÿÿþÿýýÿüûýøÿÿûüý÷ÿýøÿþúÿýøÿúöÿ÷ôÿùùÿúÿÿüÿÿýÿÿÿÿÿÿÿýÿüýÿþüþýþÿÿÿÿÿÿþüÿþùÿþóÿþîÿýêÿýæúôÜÿúàçàƏˆlUN2WN1TK.SJ+WN1WN1UN1UN1TO2TO2TN4SO4QM2RN5QO6PP6MO7KO6HM6GL5JP6FO4AJ-=F'>H&DK*FM+EL*PV4PT3KN/HK,TT8bbFccIZZ@[Y@\ZA][B][B[Y@XV=US:SQ8HF1GD1DA0A?0@>1>>2??5>@5;>59?59?59A69A48C58C58C5:B79A69@88@58?79A69@8:B7:A9:B79@8:B7;B:<D9=D<>E=8B:8B:7A88@56>14</39/271.40/51-7/-9-.;)0@&6G'7M';V+:W+;U0;T4;R8=P:>O<>P::L6@R8EW=FY=DX=@T;5H2+<),6+,3++2*ZgM[hN\hP]iQbjSckTglXhkXghVghVifWifWkeWlfXmeZmgYmgYkhYkg[kg\jf]jf[hfZefVhiWglVfo\erajwnzŠ‰œ«²ºÈÑÐÙàáéìò÷ûøþþùþúúÿøûþóùýïøùë÷øæõôâíìØéæÓåâÏÜ×ÄÒ˹ʿ­Á´¤¹­¡¼¯¦Á¶²ËÁÀÛÐÖæßæûóþý÷ÿþûÿÿþÿþýÿüýÿüýÿüýÿþþþþþþþþþþþþþþþþþþÿþüÿþüÿþüÿþüÿýúÿüùÿüùÿýúÿýùÿþúÿþýÿýüÿüýÿýþÿþÿÿþÿÿýþýûüÿþýÿþûÿüúÿøóøíçòåÝöèßûðêÿúýÿüÿÿýþÿÿýÿÿûýÿüýÿþüýÿýþÿýþÿÿýÿÿþüÿþõÿÿîÿþèÿÿäúöÝýûâäàNj‡lRM0SN0QK+QK+UO/UO/TO1TO1SO2SO2QP4QP4QM2PO3PN5NQ6MO7JP6HM6EM5HQ6EN1BK.?I'BI(FM+JP,KQ-NR/PT1VZ7^bA_bCY\?TW<UU=VV>WU>XV=YW>XV=XW;WV:XT9QK3OI3LG4ID1DA2B@3A?3>@5<=59?59?59A67B48C57D37D3:B79A69A68@58@59A69A6:B7:B7:B7:B7:B7;C8<D9=E:<F;=G?<H><F;;F6<D5:B39?38=67>74>63=40>-3C)9M*BY/Ga1Op;Ln;Li=Fa>BY?<R=;L::K8=O9DV<K^BM`BL`DH\@:Q71D.+5*+2*)0(ZgM[hN\hP^jRbjSdlUglXilYijXijXkhYkhYlfXmgYnf[nhZmgYkhYkg[jf[jf]ieZhfZfgWhiWejTdmZgtco|sŽ¬³¶ÄÍÍÖÝÞæéñöúøþþúþýùþøøûòôøêóôæðñßêé×áàÌÜÙÆ×ÔÁÐ˸ÇÀ®¿³£½° ¼°¤Ã¶®ËÀ¼ÕËÊãØÞëäëüôÿý÷ÿþûÿÿþÿþýÿüýÿüýÿýÿþþþþþþþþþþþþþþþþþþþÿþüÿþüÿþüÿþüÿýúÿýúÿüùÿüùÿüøÿüøÿýüÿüûÿûüÿüýÿþÿÿþÿÿýþþüýÿþýÿþûÿýûüóîêßÙâÕÍëÝÔ÷ìæÿúýÿüÿÿýþÿÿýÿÿûýÿüýÿþüýÿýþÿýþÿÿþÿÿÿýÿþõÿþíÿýçÿþãû÷ÞþüããßƉ…jPK.QL.QK+QK+TN.UO/TO1UP2TP3SO2QP4PO3QM2ON2PN5MP5MO7JP6IN7FN6HQ6FO2CL/CM+FM,JQ/OU1SY5QY2W_8jpJy[qvVY^@KO4MP5QQ9QQ9RP9SQ8TR9UT8YU:ZV;XR:VP:RM:NI6HE6DB5CA5?A6<=59?5:@69A67B47B47D37D3:B7:B79A69A69A69A6:B7:B7:B7:B7:B7:B7;C8<D9>F;<F;?IA>J@?I>>I9?G8>F7>D8=B;>E>9C;6@74B19I/BV3Pg=XrB^J[}JXuIMhECZ@9O:6G54E2<N8EW=NaESfHSgKNbF@W=6I3/9./6.-4,ZgM[hN\hP^jRblTemVhmYinZklZklZlj[liZnhZnhZog\oi[liZkhYjfZjf[ie\ieZhfZfgWfgUbgQajWerao|s~Ž–¥¬«¹ÂÇÐ×Úáçîó÷öüüøüûõúôóöíîòäéêÜãäÒÜÛÉÓÒ¾Î˸ÉƳþ«½¶¤Ã·§Ç¹¬ÍÁµÖÉÁßÔÐéßÞóèîøñøýõÿþøÿÿüÿþýÿýüÿüýÿüþýýÿþþþþþþþþþþþþþþþþþþþÿþüÿþüþýûÿþüÿýúÿýúÿýúÿüùÿû÷ÿû÷ÿýüÿüûþúûÿûüÿýþÿþÿÿýþþüýÿüûÿþûÿýûýôïíâÜæÙÑðâÙüñëÿúýÿüÿÿýþÿÿýÿÿûýÿúýÿþüýÿýþÿýþÿÿþÿÿÿýÿþõÿþíÿüæÿüáþùãÿþçäßɊ†mPJ0RM0SL/SM-TN.UO/UP2UP2TP3SO2PO3ON2PL1NM1OM4LO4LN6JP6JO8HP8JS8GP3GP3IS1MT3OV4V\8\d=grHtUˆ“i›s…lnuTZaBRV;LN6NN6MM5NL5OM6RP7WS:YU<ZT>XR<TO<QL9KH9GE8EC7@B7=>6:@6:@69A67B47B46C26C2;C8:B79A69A69A69A6:B7;C8;C8:B7:B7;C8;C8=E:>F;=G<=G?<H>>H=>I9?G8>F7>D8=B;=D=9C;6@74B19I/DX5Ri?[uEZ{FWyFTqEIdA=T:2H3/@.->+6H2@R8L_CReGSgKOcG@W=6I30:/07/-4,YgMZhN\hP^jRblTemVinZjo[mn\lm[mk\mj[nhZnhZoi]oi[liZkhYjfZieZie\ieZig[ghXghVchRajWerao|s{‹ŠŸ¦¢°¹ÁËÔÔÝäéñôòúüôúøñöðíðçèìÞâãÕÚÛÉÒÑ¿ÌɶÉıþ«¿¸¦¼³¢Ê¾®ÓŸÜÐÄæÙÑíâàöìíÿôúÿùÿÿ÷ÿÿúÿÿüÿþýÿýüÿûüþüþýþÿÿþþþþþþþþþþþþþþþþþþÿþüÿþüþýûþýûÿýúÿýúÿýúÿûøÿúöþùõÿýüÿûúýùúþúûþüýÿýþÿþÿÿýþÿüûÿýúÿüúÿøó÷ìæòåÝøêáÿôîÿúýÿüÿÿýþÿÿýÿÿûýÿúýÿþüýÿüýÿýþÿÿþÿÿÿýÿþõÿýìÿûãþúßÿüæÿÿéçâ̌ˆoRL2TO2TM0UN1TN.UO/VQ3VQ3UQ4SO2ON2NM1PL1NM1NL3KN3LN6KQ7KP9JR:LU:IR5JS6OY7SZ9T[9]c?fnG‚’c’¦sŸ±›ª’¡zˆ”pr{\\bFLP7KM5LK6KI4LJ5PK5TN8VP:WQ;VP:SN;QL9LI:IG:GE9CE:=>6:@6:@69A67B47B46C26C2;C8;C8:B79A69A6:B7;C8;C8;C8;C8;C8;C8<D9=E:>F;=G<=G?<H>=G<=H8>F7=E6=C7=B;:A:7A95?64B19I/BV3Ne;Uo?Tu@RtAPmAE`=9P6-C.+<*+<)1C-<N4H[?PcERfJNbF@W=5H2,6++2*'.&
\ No newline at end of file
diff --git a/testimgp.jpg b/testimgp.jpg
deleted file mode 100644
index 8cbb658..0000000
--- a/testimgp.jpg
+++ /dev/null
Binary files differ
diff --git a/testprog.jpg b/testprog.jpg
deleted file mode 100644
index 920fee2..0000000
--- a/testprog.jpg
+++ /dev/null
Binary files differ
diff --git a/tjbench.c b/tjbench.c
new file mode 100644
index 0000000..14ccd00
--- /dev/null
+++ b/tjbench.c
@@ -0,0 +1,966 @@
+/*
+ * Copyright (C)2009-2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+#include <errno.h>
+#include <cdjpeg.h>
+#include "./bmp.h"
+#include "./tjutil.h"
+#include "./turbojpeg.h"
+
+
+#define _throw(op, err) {  \
+	printf("ERROR in line %d while %s:\n%s\n", __LINE__, op, err);  \
+  retval=-1;  goto bailout;}
+#define _throwunix(m) _throw(m, strerror(errno))
+#define _throwtj(m) _throw(m, tjGetErrorStr())
+#define _throwbmp(m) _throw(m, bmpgeterr())
+
+int flags=TJFLAG_NOREALLOC, componly=0, decomponly=0, doyuv=0, quiet=0,
+	dotile=0, pf=TJPF_BGR, yuvpad=1, warmup=1;
+char *ext="ppm";
+const char *pixFormatStr[TJ_NUMPF]=
+{
+	"RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY"
+};
+const char *subNameLong[TJ_NUMSAMP]=
+{
+	"4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1"
+};
+const char *csName[TJ_NUMCS]=
+{
+	"RGB", "YCbCr", "GRAY", "CMYK", "YCCK"
+};
+const char *subName[TJ_NUMSAMP]={"444", "422", "420", "GRAY", "440", "411"};
+tjscalingfactor *scalingfactors=NULL, sf={1, 1};  int nsf=0;
+int xformop=TJXOP_NONE, xformopt=0;
+int (*customFilter)(short *, tjregion, tjregion, int, int, tjtransform *);
+double benchtime=5.0;
+
+
+char *formatName(int subsamp, int cs, char *buf)
+{
+	if(cs==TJCS_YCbCr) return (char *)subNameLong[subsamp];
+	else if(cs==TJCS_YCCK)
+	{
+		snprintf(buf, 80, "%s %s", csName[cs], subNameLong[subsamp]);
+		return buf;
+	}
+	else return (char *)csName[cs];
+}
+
+
+char *sigfig(double val, int figs, char *buf, int len)
+{
+	char format[80];
+	int digitsafterdecimal=figs-(int)ceil(log10(fabs(val)));
+	if(digitsafterdecimal<1) snprintf(format, 80, "%%.0f");
+	else snprintf(format, 80, "%%.%df", digitsafterdecimal);
+	snprintf(buf, len, format, val);
+	return buf;
+}
+
+
+/* Custom DCT filter which produces a negative of the image */
+int dummyDCTFilter(short *coeffs, tjregion arrayRegion, tjregion planeRegion,
+	int componentIndex, int transformIndex, tjtransform *transform)
+{
+	int i;
+	for(i=0; i<arrayRegion.w*arrayRegion.h; i++) coeffs[i]=-coeffs[i];
+	return 0;
+}
+
+
+/* Decompression test */
+int decomp(unsigned char *srcbuf, unsigned char **jpegbuf,
+	unsigned long *jpegsize, unsigned char *dstbuf, int w, int h,
+	int subsamp, int jpegqual, char *filename, int tilew, int tileh)
+{
+	char tempstr[1024], sizestr[20]="\0", qualstr[6]="\0", *ptr;
+	FILE *file=NULL;  tjhandle handle=NULL;
+	int row, col, iter=0, dstbufalloc=0, retval=0;
+	double elapsed, elapsedDecode;
+	int ps=tjPixelSize[pf];
+	int scaledw=TJSCALED(w, sf);
+	int scaledh=TJSCALED(h, sf);
+	int pitch=scaledw*ps;
+	int ntilesw=(w+tilew-1)/tilew, ntilesh=(h+tileh-1)/tileh;
+	unsigned char *dstptr, *dstptr2, *yuvbuf=NULL;
+
+	if(jpegqual>0)
+	{
+		snprintf(qualstr, 6, "_Q%d", jpegqual);
+		qualstr[5]=0;
+	}
+
+	if((handle=tjInitDecompress())==NULL)
+		_throwtj("executing tjInitDecompress()");
+
+	if(dstbuf==NULL)
+	{
+		if((dstbuf=(unsigned char *)malloc(pitch*scaledh))==NULL)
+			_throwunix("allocating destination buffer");
+		dstbufalloc=1;
+	}
+	/* Set the destination buffer to gray so we know whether the decompressor
+	   attempted to write to it */
+	memset(dstbuf, 127, pitch*scaledh);
+
+	if(doyuv)
+	{
+		int width=dotile? tilew:scaledw;
+		int height=dotile? tileh:scaledh;
+		int yuvsize=tjBufSizeYUV2(width, yuvpad, height, subsamp);
+		if((yuvbuf=(unsigned char *)malloc(yuvsize))==NULL)
+			_throwunix("allocating YUV buffer");
+		memset(yuvbuf, 127, yuvsize);
+	}
+
+	/* Benchmark */
+	iter=-warmup;
+	elapsed=elapsedDecode=0.;
+	while(1)
+	{
+		int tile=0;
+		double start=gettime();
+		for(row=0, dstptr=dstbuf; row<ntilesh; row++, dstptr+=pitch*tileh)
+		{
+			for(col=0, dstptr2=dstptr; col<ntilesw; col++, tile++, dstptr2+=ps*tilew)
+			{
+				int width=dotile? min(tilew, w-col*tilew):scaledw;
+				int height=dotile? min(tileh, h-row*tileh):scaledh;
+				if(doyuv)
+				{
+					double startDecode;
+					if(tjDecompressToYUV2(handle, jpegbuf[tile], jpegsize[tile], yuvbuf,
+						width, yuvpad, height, flags)==-1)
+						_throwtj("executing tjDecompressToYUV2()");
+					startDecode=gettime();
+					if(tjDecodeYUV(handle, yuvbuf, yuvpad, subsamp, dstptr2, width,
+						pitch, height, pf, flags)==-1)
+						_throwtj("executing tjDecodeYUV()");
+					if(iter>=0) elapsedDecode+=gettime()-startDecode;
+				}
+				else
+					if(tjDecompress2(handle, jpegbuf[tile], jpegsize[tile], dstptr2,
+						width, pitch, height, pf, flags)==-1)
+						_throwtj("executing tjDecompress2()");
+			}
+		}
+		iter++;
+		if(iter>=1)
+		{
+			elapsed+=gettime()-start;
+			if(elapsed>=benchtime) break;
+		}
+	}
+	if(doyuv) elapsed-=elapsedDecode;
+
+	if(tjDestroy(handle)==-1) _throwtj("executing tjDestroy()");
+	handle=NULL;
+
+	if(quiet)
+	{
+		printf("%-6s%s",
+			sigfig((double)(w*h)/1000000.*(double)iter/elapsed, 4, tempstr, 1024),
+			quiet==2? "\n":"  ");
+		if(doyuv)
+			printf("%s\n",
+				sigfig((double)(w*h)/1000000.*(double)iter/elapsedDecode, 4, tempstr,
+					1024));
+		else if(quiet!=2) printf("\n");
+	}
+	else
+	{
+		printf("%s --> Frame rate:         %f fps\n",
+			doyuv? "Decomp to YUV":"Decompress   ", (double)iter/elapsed);
+		printf("                  Throughput:         %f Megapixels/sec\n",
+			(double)(w*h)/1000000.*(double)iter/elapsed);
+		if(doyuv)
+		{
+			printf("YUV Decode    --> Frame rate:         %f fps\n",
+				(double)iter/elapsedDecode);
+			printf("                  Throughput:         %f Megapixels/sec\n",
+				(double)(w*h)/1000000.*(double)iter/elapsedDecode);
+		}
+	}
+	if(sf.num!=1 || sf.denom!=1)
+		snprintf(sizestr, 20, "%d_%d", sf.num, sf.denom);
+	else if(tilew!=w || tileh!=h)
+		snprintf(sizestr, 20, "%dx%d", tilew, tileh);
+	else snprintf(sizestr, 20, "full");
+	if(decomponly)
+		snprintf(tempstr, 1024, "%s_%s.%s", filename, sizestr, ext);
+	else
+		snprintf(tempstr, 1024, "%s_%s%s_%s.%s", filename, subName[subsamp],
+			qualstr, sizestr, ext);
+
+	if(savebmp(tempstr, dstbuf, scaledw, scaledh, pf,
+		(flags&TJFLAG_BOTTOMUP)!=0)==-1)
+		_throwbmp("saving bitmap");
+	ptr=strrchr(tempstr, '.');
+	snprintf(ptr, 1024-(ptr-tempstr), "-err.%s", ext);
+	if(srcbuf && sf.num==1 && sf.denom==1)
+	{
+		if(!quiet) printf("Compression error written to %s.\n", tempstr);
+		if(subsamp==TJ_GRAYSCALE)
+		{
+			int index, index2;
+			for(row=0, index=0; row<h; row++, index+=pitch)
+			{
+				for(col=0, index2=index; col<w; col++, index2+=ps)
+				{
+					int rindex=index2+tjRedOffset[pf];
+					int gindex=index2+tjGreenOffset[pf];
+					int bindex=index2+tjBlueOffset[pf];
+					int y=(int)((double)srcbuf[rindex]*0.299
+						+ (double)srcbuf[gindex]*0.587
+						+ (double)srcbuf[bindex]*0.114 + 0.5);
+					if(y>255) y=255;  if(y<0) y=0;
+					dstbuf[rindex]=abs(dstbuf[rindex]-y);
+					dstbuf[gindex]=abs(dstbuf[gindex]-y);
+					dstbuf[bindex]=abs(dstbuf[bindex]-y);
+				}
+			}
+		}
+		else
+		{
+			for(row=0; row<h; row++)
+				for(col=0; col<w*ps; col++)
+					dstbuf[pitch*row+col]
+						=abs(dstbuf[pitch*row+col]-srcbuf[pitch*row+col]);
+		}
+		if(savebmp(tempstr, dstbuf, w, h, pf,
+			(flags&TJFLAG_BOTTOMUP)!=0)==-1)
+			_throwbmp("saving bitmap");
+	}
+
+	bailout:
+	if(file) fclose(file);
+	if(handle) tjDestroy(handle);
+	if(dstbuf && dstbufalloc) free(dstbuf);
+	if(yuvbuf) free(yuvbuf);
+	return retval;
+}
+
+
+int fullTest(unsigned char *srcbuf, int w, int h, int subsamp, int jpegqual,
+	char *filename)
+{
+	char tempstr[1024], tempstr2[80];
+	FILE *file=NULL;  tjhandle handle=NULL;
+	unsigned char **jpegbuf=NULL, *yuvbuf=NULL, *tmpbuf=NULL, *srcptr, *srcptr2;
+	double start, elapsed, elapsedEncode;
+	int totaljpegsize=0, row, col, i, tilew=w, tileh=h, retval=0;
+	int iter, yuvsize=0;
+	unsigned long *jpegsize=NULL;
+	int ps=tjPixelSize[pf];
+	int ntilesw=1, ntilesh=1, pitch=w*ps;
+	const char *pfStr=pixFormatStr[pf];
+
+	if((tmpbuf=(unsigned char *)malloc(pitch*h)) == NULL)
+		_throwunix("allocating temporary image buffer");
+
+	if(!quiet)
+		printf(">>>>>  %s (%s) <--> JPEG %s Q%d  <<<<<\n", pfStr,
+			(flags&TJFLAG_BOTTOMUP)? "Bottom-up":"Top-down", subNameLong[subsamp],
+			jpegqual);
+
+	for(tilew=dotile? 8:w, tileh=dotile? 8:h; ; tilew*=2, tileh*=2)
+	{
+		if(tilew>w) tilew=w;  if(tileh>h) tileh=h;
+		ntilesw=(w+tilew-1)/tilew;  ntilesh=(h+tileh-1)/tileh;
+
+		if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *)
+			*ntilesw*ntilesh))==NULL)
+			_throwunix("allocating JPEG tile array");
+		memset(jpegbuf, 0, sizeof(unsigned char *)*ntilesw*ntilesh);
+		if((jpegsize=(unsigned long *)malloc(sizeof(unsigned long)
+			*ntilesw*ntilesh))==NULL)
+			_throwunix("allocating JPEG size array");
+		memset(jpegsize, 0, sizeof(unsigned long)*ntilesw*ntilesh);
+
+		if((flags&TJFLAG_NOREALLOC)!=0)
+			for(i=0; i<ntilesw*ntilesh; i++)
+			{
+				if((jpegbuf[i]=(unsigned char *)malloc(tjBufSize(tilew, tileh,
+					subsamp)))==NULL)
+					_throwunix("allocating JPEG tiles");
+			}
+
+		/* Compression test */
+		if(quiet==1)
+			printf("%-4s (%s)  %-5s    %-3d   ", pfStr,
+				(flags&TJFLAG_BOTTOMUP)? "BU":"TD", subNameLong[subsamp], jpegqual);
+		for(i=0; i<h; i++)
+			memcpy(&tmpbuf[pitch*i], &srcbuf[w*ps*i], w*ps);
+		if((handle=tjInitCompress())==NULL)
+			_throwtj("executing tjInitCompress()");
+
+		if(doyuv)
+		{
+			yuvsize=tjBufSizeYUV2(tilew, yuvpad, tileh, subsamp);
+			if((yuvbuf=(unsigned char *)malloc(yuvsize))==NULL)
+				_throwunix("allocating YUV buffer");
+			memset(yuvbuf, 127, yuvsize);
+		}
+
+		/* Benchmark */
+		iter=-warmup;
+		elapsed=elapsedEncode=0.;
+		while(1)
+		{
+			int tile=0;
+			totaljpegsize=0;
+			start=gettime();
+			for(row=0, srcptr=srcbuf; row<ntilesh; row++, srcptr+=pitch*tileh)
+			{
+				for(col=0, srcptr2=srcptr; col<ntilesw; col++, tile++,
+					srcptr2+=ps*tilew)
+				{
+					int width=min(tilew, w-col*tilew);
+					int height=min(tileh, h-row*tileh);
+					if(doyuv)
+					{
+						double startEncode=gettime();
+						if(tjEncodeYUV3(handle, srcptr2, width, pitch, height, pf, yuvbuf,
+							yuvpad, subsamp, flags)==-1)
+							_throwtj("executing tjEncodeYUV3()");
+						if(iter>=0) elapsedEncode+=gettime()-startEncode;
+						if(tjCompressFromYUV(handle, yuvbuf, width, yuvpad, height,
+							subsamp, &jpegbuf[tile], &jpegsize[tile], jpegqual, flags)==-1)
+							_throwtj("executing tjCompressFromYUV()");
+					}
+					else
+					{
+						if(tjCompress2(handle, srcptr2, width, pitch, height, pf,
+							&jpegbuf[tile], &jpegsize[tile], subsamp, jpegqual, flags)==-1)
+							_throwtj("executing tjCompress2()");
+					}
+					totaljpegsize+=jpegsize[tile];
+				}
+			}
+			iter++;
+			if(iter>=1)
+			{
+				elapsed+=gettime()-start;
+				if(elapsed>=benchtime) break;
+			}
+		}
+		if(doyuv) elapsed-=elapsedEncode;
+
+		if(tjDestroy(handle)==-1) _throwtj("executing tjDestroy()");
+		handle=NULL;
+
+		if(quiet==1) printf("%-5d  %-5d   ", tilew, tileh);
+		if(quiet)
+		{
+			if(doyuv)
+				printf("%-6s%s",
+					sigfig((double)(w*h)/1000000.*(double)iter/elapsedEncode, 4, tempstr,
+						1024), quiet==2? "\n":"  ");
+			printf("%-6s%s",
+				sigfig((double)(w*h)/1000000.*(double)iter/elapsed, 4,	tempstr, 1024),
+				quiet==2? "\n":"  ");
+			printf("%-6s%s",
+				sigfig((double)(w*h*ps)/(double)totaljpegsize, 4, tempstr2, 80),
+				quiet==2? "\n":"  ");
+		}
+		else
+		{
+			printf("\n%s size: %d x %d\n", dotile? "Tile":"Image", tilew,
+				tileh);
+			if(doyuv)
+			{
+				printf("Encode YUV    --> Frame rate:         %f fps\n",
+					(double)iter/elapsedEncode);
+				printf("                  Output image size:  %d bytes\n", yuvsize);
+				printf("                  Compression ratio:  %f:1\n",
+					(double)(w*h*ps)/(double)yuvsize);
+				printf("                  Throughput:         %f Megapixels/sec\n",
+					(double)(w*h)/1000000.*(double)iter/elapsedEncode);
+				printf("                  Output bit stream:  %f Megabits/sec\n",
+					(double)yuvsize*8./1000000.*(double)iter/elapsedEncode);
+			}
+			printf("%s --> Frame rate:         %f fps\n",
+				doyuv? "Comp from YUV":"Compress     ", (double)iter/elapsed);
+			printf("                  Output image size:  %d bytes\n",
+				totaljpegsize);
+			printf("                  Compression ratio:  %f:1\n",
+				(double)(w*h*ps)/(double)totaljpegsize);
+			printf("                  Throughput:         %f Megapixels/sec\n",
+				(double)(w*h)/1000000.*(double)iter/elapsed);
+			printf("                  Output bit stream:  %f Megabits/sec\n",
+				(double)totaljpegsize*8./1000000.*(double)iter/elapsed);
+		}
+		if(tilew==w && tileh==h)
+		{
+			snprintf(tempstr, 1024, "%s_%s_Q%d.jpg", filename, subName[subsamp],
+				jpegqual);
+			if((file=fopen(tempstr, "wb"))==NULL)
+				_throwunix("opening reference image");
+			if(fwrite(jpegbuf[0], jpegsize[0], 1, file)!=1)
+				_throwunix("writing reference image");
+			fclose(file);  file=NULL;
+			if(!quiet) printf("Reference image written to %s\n", tempstr);
+		}
+
+		/* Decompression test */
+		if(!componly)
+		{
+			if(decomp(srcbuf, jpegbuf, jpegsize, tmpbuf, w, h, subsamp, jpegqual,
+				filename, tilew, tileh)==-1)
+				goto bailout;
+		}
+
+		for(i=0; i<ntilesw*ntilesh; i++)
+		{
+			if(jpegbuf[i]) free(jpegbuf[i]);  jpegbuf[i]=NULL;
+		}
+		free(jpegbuf);  jpegbuf=NULL;
+		free(jpegsize);  jpegsize=NULL;
+		if(doyuv)
+		{
+			free(yuvbuf);  yuvbuf=NULL;
+		}
+
+		if(tilew==w && tileh==h) break;
+	}
+
+	bailout:
+	if(file) {fclose(file);  file=NULL;}
+	if(jpegbuf)
+	{
+		for(i=0; i<ntilesw*ntilesh; i++)
+		{
+			if(jpegbuf[i]) free(jpegbuf[i]);  jpegbuf[i]=NULL;
+		}
+		free(jpegbuf);  jpegbuf=NULL;
+	}
+	if(yuvbuf) {free(yuvbuf);  yuvbuf=NULL;}
+	if(jpegsize) {free(jpegsize);  jpegsize=NULL;}
+	if(tmpbuf) {free(tmpbuf);  tmpbuf=NULL;}
+	if(handle) {tjDestroy(handle);  handle=NULL;}
+	return retval;
+}
+
+
+int decompTest(char *filename)
+{
+	FILE *file=NULL;  tjhandle handle=NULL;
+	unsigned char **jpegbuf=NULL, *srcbuf=NULL;
+	unsigned long *jpegsize=NULL, srcsize, totaljpegsize;
+	tjtransform *t=NULL;
+	int w=0, h=0, subsamp=-1, cs=-1, _w, _h, _tilew, _tileh,
+		_ntilesw, _ntilesh, _subsamp;
+	char *temp=NULL, tempstr[80], tempstr2[80];
+	int row, col, i, iter, tilew, tileh, ntilesw=1, ntilesh=1, retval=0;
+	double start, elapsed;
+	int ps=tjPixelSize[pf], tile;
+
+	if((file=fopen(filename, "rb"))==NULL)
+		_throwunix("opening file");
+	if(fseek(file, 0, SEEK_END)<0 || (srcsize=ftell(file))==(unsigned long)-1)
+		_throwunix("determining file size");
+	if((srcbuf=(unsigned char *)malloc(srcsize))==NULL)
+		_throwunix("allocating memory");
+	if(fseek(file, 0, SEEK_SET)<0)
+		_throwunix("setting file position");
+	if(fread(srcbuf, srcsize, 1, file)<1)
+		_throwunix("reading JPEG data");
+	fclose(file);  file=NULL;
+
+	temp=strrchr(filename, '.');
+	if(temp!=NULL) *temp='\0';
+
+	if((handle=tjInitTransform())==NULL)
+		_throwtj("executing tjInitTransform()");
+	if(tjDecompressHeader3(handle, srcbuf, srcsize, &w, &h, &subsamp, &cs)==-1)
+		_throwtj("executing tjDecompressHeader3()");
+
+	if(quiet==1)
+	{
+		printf("All performance values in Mpixels/sec\n\n");
+		printf("Bitmap     JPEG   JPEG     %s  %s   Xform   Comp    Decomp  ",
+			dotile? "Tile ":"Image", dotile? "Tile ":"Image");
+		if(doyuv) printf("Decode");
+		printf("\n");
+		printf("Format     CS     Subsamp  Width  Height  Perf    Ratio   Perf    ");
+		if(doyuv) printf("Perf");
+		printf("\n\n");
+	}
+	else if(!quiet)
+		printf(">>>>>  JPEG %s --> %s (%s)  <<<<<\n",
+			formatName(subsamp, cs, tempstr), pixFormatStr[pf],
+			(flags&TJFLAG_BOTTOMUP)? "Bottom-up":"Top-down");
+
+	for(tilew=dotile? 16:w, tileh=dotile? 16:h; ; tilew*=2, tileh*=2)
+	{
+		if(tilew>w) tilew=w;  if(tileh>h) tileh=h;
+		ntilesw=(w+tilew-1)/tilew;  ntilesh=(h+tileh-1)/tileh;
+
+		if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *)
+			*ntilesw*ntilesh))==NULL)
+			_throwunix("allocating JPEG tile array");
+		memset(jpegbuf, 0, sizeof(unsigned char *)*ntilesw*ntilesh);
+		if((jpegsize=(unsigned long *)malloc(sizeof(unsigned long)
+			*ntilesw*ntilesh))==NULL)
+			_throwunix("allocating JPEG size array");
+		memset(jpegsize, 0, sizeof(unsigned long)*ntilesw*ntilesh);
+
+		if((flags&TJFLAG_NOREALLOC)!=0 || !dotile)
+			for(i=0; i<ntilesw*ntilesh; i++)
+			{
+				if((jpegbuf[i]=(unsigned char *)malloc(tjBufSize(tilew, tileh,
+					subsamp)))==NULL)
+					_throwunix("allocating JPEG tiles");
+			}
+
+		_w=w;  _h=h;  _tilew=tilew;  _tileh=tileh;
+		if(!quiet)
+		{
+			printf("\n%s size: %d x %d", dotile? "Tile":"Image", _tilew,
+				_tileh);
+			if(sf.num!=1 || sf.denom!=1)
+				printf(" --> %d x %d", TJSCALED(_w, sf), TJSCALED(_h, sf));
+			printf("\n");
+		}
+		else if(quiet==1)
+		{
+			printf("%-4s (%s)  %-5s  %-5s    ", pixFormatStr[pf],
+				(flags&TJFLAG_BOTTOMUP)? "BU":"TD", csName[cs], subNameLong[subsamp]);
+			printf("%-5d  %-5d   ", tilew, tileh);
+		}
+
+		_subsamp=subsamp;
+		if(dotile || xformop!=TJXOP_NONE || xformopt!=0 || customFilter)
+		{
+			if((t=(tjtransform *)malloc(sizeof(tjtransform)*ntilesw*ntilesh))
+				==NULL)
+				_throwunix("allocating image transform array");
+
+			if(xformop==TJXOP_TRANSPOSE || xformop==TJXOP_TRANSVERSE
+				|| xformop==TJXOP_ROT90 || xformop==TJXOP_ROT270)
+			{
+				_w=h;  _h=w;  _tilew=tileh;  _tileh=tilew;
+			}
+
+			if(xformopt&TJXOPT_GRAY) _subsamp=TJ_GRAYSCALE;
+			if(xformop==TJXOP_HFLIP || xformop==TJXOP_ROT180)
+				_w=_w-(_w%tjMCUWidth[_subsamp]);
+			if(xformop==TJXOP_VFLIP || xformop==TJXOP_ROT180)
+				_h=_h-(_h%tjMCUHeight[_subsamp]);
+			if(xformop==TJXOP_TRANSVERSE || xformop==TJXOP_ROT90)
+				_w=_w-(_w%tjMCUHeight[_subsamp]);
+			if(xformop==TJXOP_TRANSVERSE || xformop==TJXOP_ROT270)
+				_h=_h-(_h%tjMCUWidth[_subsamp]);
+			_ntilesw=(_w+_tilew-1)/_tilew;
+			_ntilesh=(_h+_tileh-1)/_tileh;
+
+			if(xformop==TJXOP_TRANSPOSE || xformop==TJXOP_TRANSVERSE
+				|| xformop==TJXOP_ROT90 || xformop==TJXOP_ROT270)
+			{
+				if(_subsamp==TJSAMP_422) _subsamp=TJSAMP_440;
+				else if(_subsamp==TJSAMP_440) _subsamp=TJSAMP_422;
+			}
+
+			for(row=0, tile=0; row<_ntilesh; row++)
+			{
+				for(col=0; col<_ntilesw; col++, tile++)
+				{
+					t[tile].r.w=min(_tilew, _w-col*_tilew);
+					t[tile].r.h=min(_tileh, _h-row*_tileh);
+					t[tile].r.x=col*_tilew;
+					t[tile].r.y=row*_tileh;
+					t[tile].op=xformop;
+					t[tile].options=xformopt|TJXOPT_TRIM;
+					t[tile].customFilter=customFilter;
+					if(t[tile].options&TJXOPT_NOOUTPUT && jpegbuf[tile])
+					{
+						free(jpegbuf[tile]);  jpegbuf[tile]=NULL;
+					}
+				}
+			}
+
+			iter=-warmup;
+			elapsed=0.;
+			while(1)
+			{
+				start=gettime();
+				if(tjTransform(handle, srcbuf, srcsize, _ntilesw*_ntilesh, jpegbuf,
+					jpegsize, t, flags)==-1)
+					_throwtj("executing tjTransform()");
+				iter++;
+				if(iter>=1)
+				{
+					elapsed+=gettime()-start;
+					if(elapsed>=benchtime) break;
+				}
+			}
+
+			free(t);  t=NULL;
+
+			for(tile=0, totaljpegsize=0; tile<_ntilesw*_ntilesh; tile++)
+				totaljpegsize+=jpegsize[tile];
+
+			if(quiet)
+			{
+				printf("%-6s%s%-6s%s",
+					sigfig((double)(w*h)/1000000./elapsed, 4, tempstr, 80),
+					quiet==2? "\n":"  ",
+					sigfig((double)(w*h*ps)/(double)totaljpegsize, 4, tempstr2, 80),
+					quiet==2? "\n":"  ");
+			}
+			else if(!quiet)
+			{
+				printf("Transform     --> Frame rate:         %f fps\n", 1.0/elapsed);
+				printf("                  Output image size:  %lu bytes\n", totaljpegsize);
+				printf("                  Compression ratio:  %f:1\n",
+					(double)(w*h*ps)/(double)totaljpegsize);
+				printf("                  Throughput:         %f Megapixels/sec\n",
+					(double)(w*h)/1000000./elapsed);
+				printf("                  Output bit stream:  %f Megabits/sec\n",
+					(double)totaljpegsize*8./1000000./elapsed);
+			}
+		}
+		else
+		{
+			if(quiet==1) printf("N/A     N/A     ");
+			jpegsize[0]=srcsize;
+			memcpy(jpegbuf[0], srcbuf, srcsize);
+		}
+
+		if(w==tilew) _tilew=_w;
+		if(h==tileh) _tileh=_h;
+		if(!(xformopt&TJXOPT_NOOUTPUT))
+		{
+			if(decomp(NULL, jpegbuf, jpegsize, NULL, _w, _h, _subsamp, 0,
+				filename, _tilew, _tileh)==-1)
+				goto bailout;
+		}
+		else if(quiet==1) printf("N/A\n");
+
+		for(i=0; i<ntilesw*ntilesh; i++)
+		{
+			free(jpegbuf[i]);  jpegbuf[i]=NULL;
+		}
+		free(jpegbuf);  jpegbuf=NULL;
+		if(jpegsize) {free(jpegsize);  jpegsize=NULL;}
+
+		if(tilew==w && tileh==h) break;
+	}
+
+	bailout:
+	if(file) {fclose(file);  file=NULL;}
+	if(jpegbuf)
+	{
+		for(i=0; i<ntilesw*ntilesh; i++)
+		{
+			if(jpegbuf[i]) free(jpegbuf[i]);  jpegbuf[i]=NULL;
+		}
+		free(jpegbuf);  jpegbuf=NULL;
+	}
+	if(jpegsize) {free(jpegsize);  jpegsize=NULL;}
+	if(srcbuf) {free(srcbuf);  srcbuf=NULL;}
+	if(t) {free(t);  t=NULL;}
+	if(handle) {tjDestroy(handle);  handle=NULL;}
+	return retval;
+}
+
+
+void usage(char *progname)
+{
+	int i;
+	printf("USAGE: %s\n", progname);
+	printf("       <Inputfile (BMP|PPM)> <Quality> [options]\n\n");
+	printf("       %s\n", progname);
+	printf("       <Inputfile (JPG)> [options]\n\n");
+	printf("Options:\n\n");
+	printf("-alloc = Dynamically allocate JPEG image buffers\n");
+	printf("-bmp = Generate output images in Windows Bitmap format (default = PPM)\n");
+	printf("-bottomup = Test bottom-up compression/decompression\n");
+	printf("-tile = Test performance of the codec when the image is encoded as separate\n");
+	printf("     tiles of varying sizes.\n");
+	printf("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =\n");
+	printf("     Test the specified color conversion path in the codec (default = BGR)\n");
+	printf("-fastupsample = Use the fastest chrominance upsampling algorithm available in\n");
+	printf("     the underlying codec\n");
+	printf("-fastdct = Use the fastest DCT/IDCT algorithms available in the underlying\n");
+	printf("     codec\n");
+	printf("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the\n");
+	printf("     underlying codec\n");
+	printf("-subsamp <s> = When testing JPEG compression, this option specifies the level\n");
+	printf("     of chrominance subsampling to use (<s> = 444, 422, 440, 420, 411, or\n");
+	printf("     GRAY).  The default is to test Grayscale, 4:2:0, 4:2:2, and 4:4:4 in\n");
+	printf("     sequence.\n");
+	printf("-quiet = Output results in tabular rather than verbose format\n");
+	printf("-yuv = Test YUV encoding/decoding functions\n");
+	printf("-yuvpad <p> = If testing YUV encoding/decoding, this specifies the number of\n");
+	printf("     bytes to which each row of each plane in the intermediate YUV image is\n");
+	printf("     padded (default = 1)\n");
+	printf("-scale M/N = Scale down the width/height of the decompressed JPEG image by a\n");
+	printf("     factor of M/N (M/N = ");
+	for(i=0; i<nsf; i++)
+	{
+		printf("%d/%d", scalingfactors[i].num, scalingfactors[i].denom);
+		if(nsf==2 && i!=nsf-1) printf(" or ");
+		else if(nsf>2)
+		{
+			if(i!=nsf-1) printf(", ");
+			if(i==nsf-2) printf("or ");
+		}
+		if(i%8==0 && i!=0) printf("\n     ");
+	}
+	printf(")\n");
+	printf("-hflip, -vflip, -transpose, -transverse, -rot90, -rot180, -rot270 =\n");
+	printf("     Perform the corresponding lossless transform prior to\n");
+	printf("     decompression (these options are mutually exclusive)\n");
+	printf("-grayscale = Perform lossless grayscale conversion prior to decompression\n");
+	printf("     test (can be combined with the other transforms above)\n");
+	printf("-benchtime <t> = Run each benchmark for at least <t> seconds (default = 5.0)\n");
+	printf("-warmup <w> = Execute each benchmark <w> times to prime the cache before\n");
+	printf("     taking performance measurements (default = 1)\n");
+	printf("-componly = Stop after running compression tests.  Do not test decompression.\n\n");
+	printf("NOTE:  If the quality is specified as a range (e.g. 90-100), a separate\n");
+	printf("test will be performed for all quality values in the range.\n\n");
+	exit(1);
+}
+
+
+int main(int argc, char *argv[])
+{
+	unsigned char *srcbuf=NULL;  int w=0, h=0, i, j;
+	int minqual=-1, maxqual=-1;  char *temp;
+	int minarg=2, retval=0, subsamp=-1;
+
+	if((scalingfactors=tjGetScalingFactors(&nsf))==NULL || nsf==0)
+		_throwtj("executing tjGetScalingFactors()");
+
+	if(argc<minarg) usage(argv[0]);
+
+	temp=strrchr(argv[1], '.');
+	if(temp!=NULL)
+	{
+		if(!strcasecmp(temp, ".bmp")) ext="bmp";
+		if(!strcasecmp(temp, ".jpg") || !strcasecmp(temp, ".jpeg")) decomponly=1;
+	}
+
+	printf("\n");
+
+	if(!decomponly)
+	{
+		minarg=3;
+		if(argc<minarg) usage(argv[0]);
+		if((minqual=atoi(argv[2]))<1 || minqual>100)
+		{
+			puts("ERROR: Quality must be between 1 and 100.");
+			exit(1);
+		}
+		if((temp=strchr(argv[2], '-'))!=NULL && strlen(temp)>1
+			&& sscanf(&temp[1], "%d", &maxqual)==1 && maxqual>minqual && maxqual>=1
+			&& maxqual<=100) {}
+		else maxqual=minqual;
+	}
+
+	if(argc>minarg)
+	{
+		for(i=minarg; i<argc; i++)
+		{
+			if(!strcasecmp(argv[i], "-tile"))
+			{
+				dotile=1;  xformopt|=TJXOPT_CROP;
+			}
+			if(!strcasecmp(argv[i], "-fastupsample"))
+			{
+				printf("Using fast upsampling code\n\n");
+				flags|=TJFLAG_FASTUPSAMPLE;
+			}
+			if(!strcasecmp(argv[i], "-fastdct"))
+			{
+				printf("Using fastest DCT/IDCT algorithm\n\n");
+				flags|=TJFLAG_FASTDCT;
+			}
+			if(!strcasecmp(argv[i], "-accuratedct"))
+			{
+				printf("Using most accurate DCT/IDCT algorithm\n\n");
+				flags|=TJFLAG_ACCURATEDCT;
+			}
+			if(!strcasecmp(argv[i], "-rgb")) pf=TJPF_RGB;
+			if(!strcasecmp(argv[i], "-rgbx")) pf=TJPF_RGBX;
+			if(!strcasecmp(argv[i], "-bgr")) pf=TJPF_BGR;
+			if(!strcasecmp(argv[i], "-bgrx")) pf=TJPF_BGRX;
+			if(!strcasecmp(argv[i], "-xbgr")) pf=TJPF_XBGR;
+			if(!strcasecmp(argv[i], "-xrgb")) pf=TJPF_XRGB;
+			if(!strcasecmp(argv[i], "-bottomup")) flags|=TJFLAG_BOTTOMUP;
+			if(!strcasecmp(argv[i], "-quiet")) quiet=1;
+			if(!strcasecmp(argv[i], "-qq")) quiet=2;
+			if(!strcasecmp(argv[i], "-scale") && i<argc-1)
+			{
+				int temp1=0, temp2=0, match=0;
+				if(sscanf(argv[++i], "%d/%d", &temp1, &temp2)==2)
+				{
+					for(j=0; j<nsf; j++)
+					{
+						if((double)temp1/(double)temp2
+							== (double)scalingfactors[j].num/(double)scalingfactors[j].denom)
+						{
+							sf=scalingfactors[j];
+							match=1;  break;
+						}
+					}
+					if(!match) usage(argv[0]);
+				}
+				else usage(argv[0]);
+			}
+			if(!strcasecmp(argv[i], "-hflip")) xformop=TJXOP_HFLIP;
+			if(!strcasecmp(argv[i], "-vflip")) xformop=TJXOP_VFLIP;
+			if(!strcasecmp(argv[i], "-transpose")) xformop=TJXOP_TRANSPOSE;
+			if(!strcasecmp(argv[i], "-transverse")) xformop=TJXOP_TRANSVERSE;
+			if(!strcasecmp(argv[i], "-rot90")) xformop=TJXOP_ROT90;
+			if(!strcasecmp(argv[i], "-rot180")) xformop=TJXOP_ROT180;
+			if(!strcasecmp(argv[i], "-rot270")) xformop=TJXOP_ROT270;
+			if(!strcasecmp(argv[i], "-grayscale")) xformopt|=TJXOPT_GRAY;
+			if(!strcasecmp(argv[i], "-custom")) customFilter=dummyDCTFilter;
+			if(!strcasecmp(argv[i], "-nooutput")) xformopt|=TJXOPT_NOOUTPUT;
+			if(!strcasecmp(argv[i], "-benchtime") && i<argc-1)
+			{
+				double temp=atof(argv[++i]);
+				if(temp>0.0) benchtime=temp;
+				else usage(argv[0]);
+			}
+			if(!strcasecmp(argv[i], "-warmup") && i<argc-1)
+			{
+				int temp=atoi(argv[++i]);
+				if(temp>=0)
+				{
+					warmup=temp;
+					printf("Warmup runs = %d\n\n", warmup);
+				}
+				else usage(argv[0]);
+			}
+			if(!strcmp(argv[i], "-?")) usage(argv[0]);
+			if(!strcasecmp(argv[i], "-alloc")) flags&=(~TJFLAG_NOREALLOC);
+			if(!strcasecmp(argv[i], "-bmp")) ext="bmp";
+			if(!strcasecmp(argv[i], "-yuv"))
+			{
+				printf("Testing YUV planar encoding/decoding\n\n");
+				doyuv=1;
+			}
+			if(!strcasecmp(argv[i], "-yuvpad") && i<argc-1)
+			{
+				int temp=atoi(argv[++i]);
+				if(temp>=1) yuvpad=temp;
+			}
+			if(!strcasecmp(argv[i], "-subsamp") && i<argc-1)
+			{
+				i++;
+				if(toupper(argv[i][0])=='G') subsamp=TJSAMP_GRAY;
+				else
+				{
+					int temp=atoi(argv[i]);
+					switch(temp)
+					{
+						case 444:  subsamp=TJSAMP_444;  break;
+						case 422:  subsamp=TJSAMP_422;  break;
+						case 440:  subsamp=TJSAMP_440;  break;
+						case 420:  subsamp=TJSAMP_420;  break;
+						case 411:  subsamp=TJSAMP_411;  break;
+					}
+				}
+			}
+			if(!strcasecmp(argv[i], "-componly")) componly=1;
+		}
+	}
+
+	if((sf.num!=1 || sf.denom!=1) && dotile)
+	{
+		printf("Disabling tiled compression/decompression tests, because those tests do not\n");
+		printf("work when scaled decompression is enabled.\n");
+		dotile=0;
+	}
+
+	if(!decomponly)
+	{
+		if(loadbmp(argv[1], &srcbuf, &w, &h, pf, (flags&TJFLAG_BOTTOMUP)!=0)==-1)
+			_throwbmp("loading bitmap");
+		temp=strrchr(argv[1], '.');
+		if(temp!=NULL) *temp='\0';
+	}
+
+	if(quiet==1 && !decomponly)
+	{
+		printf("All performance values in Mpixels/sec\n\n");
+		printf("Bitmap     JPEG     JPEG  %s  %s   ",
+			dotile? "Tile ":"Image", dotile? "Tile ":"Image");
+		if(doyuv) printf("Encode  ");
+		printf("Comp    Comp    Decomp  ");
+		if(doyuv) printf("Decode");
+		printf("\n");
+		printf("Format     Subsamp  Qual  Width  Height  ");
+		if(doyuv) printf("Perf    ");
+		printf("Perf    Ratio   Perf    ");
+		if(doyuv) printf("Perf");
+		printf("\n\n");
+	}
+
+	if(decomponly)
+	{
+		decompTest(argv[1]);
+		printf("\n");
+		goto bailout;
+	}
+	if(subsamp>=0 && subsamp<TJ_NUMSAMP)
+	{
+		for(i=maxqual; i>=minqual; i--)
+			fullTest(srcbuf, w, h, subsamp, i, argv[1]);
+		printf("\n");
+	}
+	else
+	{
+		for(i=maxqual; i>=minqual; i--)
+			fullTest(srcbuf, w, h, TJSAMP_GRAY, i, argv[1]);
+		printf("\n");
+		for(i=maxqual; i>=minqual; i--)
+			fullTest(srcbuf, w, h, TJSAMP_420, i, argv[1]);
+		printf("\n");
+		for(i=maxqual; i>=minqual; i--)
+			fullTest(srcbuf, w, h, TJSAMP_422, i, argv[1]);
+		printf("\n");
+		for(i=maxqual; i>=minqual; i--)
+			fullTest(srcbuf, w, h, TJSAMP_444, i, argv[1]);
+		printf("\n");
+	}
+
+	bailout:
+	if(srcbuf) free(srcbuf);
+	return retval;
+}
diff --git a/tjbenchtest.in b/tjbenchtest.in
new file mode 100755
index 0000000..5e08c9b
--- /dev/null
+++ b/tjbenchtest.in
@@ -0,0 +1,208 @@
+#!/bin/bash
+
+set -u
+set -e
+trap onexit INT
+trap onexit TERM
+trap onexit EXIT
+
+onexit()
+{
+	if [ -d $OUTDIR ]; then
+		rm -rf $OUTDIR
+	fi
+}
+
+runme()
+{
+	echo \*\*\* $*
+	$*
+}
+
+EXT=bmp
+IMAGES="vgl_5674_0098.${EXT} vgl_6434_0018a.${EXT} vgl_6548_0026a.${EXT} nightshot_iso_100.${EXT}"
+IMGDIR=@srcdir@/testimages
+OUTDIR=__tjbenchtest_output
+EXEDIR=.
+BMPARG=
+NSARG=
+YUVARG=
+if [ "$EXT" = "bmp" ]; then BMPARG=-bmp; fi
+
+if [ -d $OUTDIR ]; then
+	rm -rf $OUTDIR
+fi
+mkdir -p $OUTDIR
+
+exec >$EXEDIR/tjbenchtest.log
+
+if [ $# -gt 0 ]; then
+	if [ "$1" = "-yuv" ]; then
+		NSARG=-nosmooth
+		YUVARG=-yuv
+
+# NOTE: The combination of tjEncodeYUV*() and tjCompressFromYUV*() does not
+# always produce bitwise-identical results to tjCompress*() if subsampling is
+# enabled.  In both cases, if the image width or height are not evenly
+# divisible by the MCU width/height, then the bottom and/or right edge are
+# expanded.  However, the libjpeg code performs this expansion prior to
+# downsampling, and TurboJPEG performs it in tjCompressFromYUV*(), which is
+# after downsampling.  Thus, the two will agree only if the width/height along
+# each downsampled dimension is an odd number or is evenly divisible by the MCU
+# width/height.  This disagreement basically amounts to a round-off error, but
+# there is no easy way around it, so for now, we just test the only image that
+# works.  (NOTE: nightshot_iso_100 does not suffer from the above issue, but
+# it suffers from an unrelated problem whereby the combination of
+# tjDecompressToYUV*() and tjDecodeYUV*() do not produce bitwise-identical
+# results to tjDecompress*() if decompression scaling is enabled.  This latter
+# phenomenon is not yet fully understood but is also believed to be some sort
+# of round-off error.)
+		IMAGES="vgl_6548_0026a.${EXT}"
+	fi
+fi
+
+# Standard tests
+for image in $IMAGES; do
+
+	cp $IMGDIR/$image $OUTDIR
+	basename=`basename $image .${EXT}`
+	runme $EXEDIR/cjpeg -quality 95 -dct fast -grayscale -outfile $OUTDIR/${basename}_GRAY_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT}
+	runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 -outfile $OUTDIR/${basename}_420_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT}
+	runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 -outfile $OUTDIR/${basename}_422_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT}
+	runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 -outfile $OUTDIR/${basename}_444_fast_cjpeg.jpg $IMGDIR/${basename}.${EXT}
+	runme $EXEDIR/cjpeg -quality 95 -dct int -grayscale -outfile $OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT}
+	runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 -outfile $OUTDIR/${basename}_420_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT}
+	runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 -outfile $OUTDIR/${basename}_422_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT}
+	runme $EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 -outfile $OUTDIR/${basename}_444_accurate_cjpeg.jpg $IMGDIR/${basename}.${EXT}
+	for samp in GRAY 420 422 444; do
+		runme $EXEDIR/djpeg -rgb $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_default_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+		runme $EXEDIR/djpeg -dct fast -rgb $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_fast_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+		runme $EXEDIR/djpeg -dct int -rgb $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_accurate_djpeg.${EXT} $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg
+	done
+	for samp in 420 422; do
+		runme $EXEDIR/djpeg -nosmooth $BMPARG -outfile $OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+		runme $EXEDIR/djpeg -dct fast -nosmooth $BMPARG -outfile $OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+		runme $EXEDIR/djpeg -dct int -nosmooth $BMPARG -outfile $OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.${EXT} $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg
+	done
+
+	# Compression
+	for dct in accurate fast; do
+		runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -warmup 0 -${dct}dct $YUVARG
+		for samp in GRAY 420 422 444; do
+			runme cmp $OUTDIR/${basename}_${samp}_Q95.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg
+		done
+	done
+
+	for dct in fast accurate default; do
+		dctarg=-${dct}dct
+		if [ "${dct}" = "default" ]; then
+			dctarg=
+		fi
+
+		# Tiled compression & decompression
+		runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG
+		for samp in GRAY 444; do
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+				runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT}
+				rm $i
+			done
+		done
+		runme $EXEDIR/tjbench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG
+		for samp in 420 422; do
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+				runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT}
+				rm $i
+			done
+		done
+
+		# Tiled decompression
+		for samp in GRAY 444; do
+			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+				runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.${EXT}
+				rm $i
+			done
+		done
+		for samp in 420 422; do
+			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+				runme cmp $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.${EXT}
+				rm $i
+			done
+		done
+	done
+
+	# Scaled decompression
+	for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
+		scalearg=`echo $scale | sed s@_@/@g`
+		for samp in GRAY 420 422 444; do
+			runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_${scale}_djpeg.${EXT} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG
+			runme cmp $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} $OUTDIR/${basename}_${samp}_${scale}_djpeg.${EXT}
+			rm $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT}
+		done
+	done
+
+	# Transforms
+	for samp in GRAY 420 422 444; do
+		runme $EXEDIR/jpegtran -flip horizontal -trim -outfile $OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -flip vertical -trim -outfile $OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -transpose -trim -outfile $OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -transverse -trim -outfile $OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -rotate 90 -trim -outfile $OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -rotate 180 -trim -outfile $OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -rotate 270 -trim -outfile $OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+	done
+	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
+		for samp in GRAY 444; do
+			runme $EXEDIR/djpeg -rgb $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg
+			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 $YUVARG
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+				runme cmp $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT}
+				rm $i
+			done
+		done
+		for samp in 420 422; do
+			runme $EXEDIR/djpeg -nosmooth -rgb $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg
+			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample $YUVARG
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+				runme cmp $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.${EXT}
+				rm $i
+			done
+		done
+	done
+
+	# Grayscale transform
+	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
+		for samp in GRAY 444 422 420; do
+			runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -tile -quiet -benchtime 0.01 -warmup 0 -grayscale $YUVARG
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].${EXT} \
+				$OUTDIR/${basename}_${samp}_Q95_full.${EXT}; do
+				runme cmp $i $OUTDIR/${basename}_GRAY_${xform}_jpegtran.${EXT}
+				rm $i
+			done
+		done
+	done
+
+	# Transforms with scaling
+	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
+		for samp in GRAY 444 422 420; do
+			for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
+				scalearg=`echo $scale | sed s@_@/@g`
+				runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG $BMPARG -outfile $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.${EXT} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg
+				runme $EXEDIR/tjbench $OUTDIR/${basename}_${samp}_Q95.jpg $BMPARG -$xform -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG
+				runme cmp $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT} $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.${EXT}
+				rm $OUTDIR/${basename}_${samp}_Q95_${scale}.${EXT}
+			done
+		done
+	done
+
+done
+
+echo SUCCESS!
diff --git a/tjbenchtest.java.in b/tjbenchtest.java.in
new file mode 100755
index 0000000..2054603
--- /dev/null
+++ b/tjbenchtest.java.in
@@ -0,0 +1,207 @@
+#!/bin/bash
+
+set -u
+set -e
+trap onexit INT
+trap onexit TERM
+trap onexit EXIT
+
+onexit()
+{
+	if [ -d $OUTDIR ]; then
+		rm -rf $OUTDIR
+	fi
+}
+
+runme()
+{
+	echo \*\*\* $*
+	$*
+}
+
+IMAGES="vgl_5674_0098.bmp vgl_6434_0018a.bmp vgl_6548_0026a.bmp nightshot_iso_100.bmp"
+IMGDIR=@srcdir@/testimages
+OUTDIR=__tjbenchtest_java_output
+EXEDIR=.
+JAVA="@JAVA@ -cp java/turbojpeg.jar -Djava.library.path=.libs"
+BMPARG=
+NSARG=
+YUVARG=
+
+if [ -d $OUTDIR ]; then
+	rm -rf $OUTDIR
+fi
+mkdir -p $OUTDIR
+
+exec >$EXEDIR/tjbenchtest-java.log
+
+if [ $# -gt 0 ]; then
+	if [ "$1" = "-yuv" ]; then
+		NSARG=-nosmooth
+		YUVARG=-yuv
+
+# NOTE: The combination of tjEncodeYUV*() and tjCompressFromYUV*() does not
+# always produce bitwise-identical results to tjCompress*() if subsampling is
+# enabled.  In both cases, if the image width or height are not evenly
+# divisible by the MCU width/height, then the bottom and/or right edge are
+# expanded.  However, the libjpeg code performs this expansion prior to
+# downsampling, and TurboJPEG performs it in tjCompressFromYUV*(), which is
+# after downsampling.  Thus, the two will agree only if the width/height along
+# each downsampled dimension is an odd number or is evenly divisible by the MCU
+# width/height.  This disagreement basically amounts to a round-off error, but
+# there is no easy way around it, so for now, we just test the only image that
+# works.  (NOTE: nightshot_iso_100 does not suffer from the above issue, but
+# it suffers from an unrelated problem whereby the combination of
+# tjDecompressToYUV*() and tjDecodeYUV*() do not produce bitwise-identical
+# results to tjDecompress*() if decompression scaling is enabled.  This latter
+# phenomenon is not yet fully understood but is also believed to be some sort
+# of round-off error.)
+		IMAGES="vgl_6548_0026a.bmp"
+	fi
+fi
+
+# Standard tests
+for image in $IMAGES; do
+
+	cp $IMGDIR/$image $OUTDIR
+	basename=`basename $image .bmp`
+	runme $EXEDIR/cjpeg -quality 95 -dct fast -grayscale -outfile $OUTDIR/${basename}_GRAY_fast_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 -outfile $OUTDIR/${basename}_420_fast_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 -outfile $OUTDIR/${basename}_422_fast_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 -outfile $OUTDIR/${basename}_444_fast_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct int -grayscale -outfile $OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 -outfile $OUTDIR/${basename}_420_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 -outfile $OUTDIR/${basename}_422_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp
+	runme $EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 -outfile $OUTDIR/${basename}_444_accurate_cjpeg.jpg $IMGDIR/${basename}.bmp
+	for samp in GRAY 420 422 444; do
+		runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_default_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+		runme $EXEDIR/djpeg -dct fast -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_fast_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+		runme $EXEDIR/djpeg -dct int -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_accurate_djpeg.bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg
+	done
+	for samp in 420 422; do
+		runme $EXEDIR/djpeg -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+		runme $EXEDIR/djpeg -dct fast -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+		runme $EXEDIR/djpeg -dct int -nosmooth -bmp -outfile $OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg
+	done
+
+	# Compression
+	for dct in accurate fast; do
+		runme $JAVA TJBench $OUTDIR/$image 95 -rgb -quiet -benchtime 0.01 -warmup 0 -${dct}dct $YUVARG
+		for samp in GRAY 420 422 444; do
+			runme cmp $OUTDIR/${basename}_${samp}_Q95.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg
+		done
+	done
+
+	for dct in fast accurate default; do
+		dctarg=-${dct}dct
+		if [ "${dct}" = "default" ]; then
+			dctarg=
+		fi
+
+		# Tiled compression & decompression
+		runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG
+		for samp in GRAY 444; do
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp
+				rm $i
+			done
+		done
+		runme $JAVA TJBench $OUTDIR/$image 95 -rgb -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG
+		for samp in 420 422; do
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp
+				rm $i
+			done
+		done
+
+		# Tiled decompression
+		for samp in GRAY 444; do
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -warmup 0 ${dctarg} $YUVARG
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp
+				rm $i
+			done
+		done
+		for samp in 420 422; do
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample ${dctarg} $YUVARG
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp $i -i 54:54 $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp
+				rm $i
+			done
+		done
+	done
+
+	# Scaled decompression
+	for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
+		scalearg=`echo $scale | sed s@_@/@g`
+		for samp in GRAY 420 422 444; do
+			runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG -bmp -outfile $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG
+			runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp
+			rm $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp
+		done
+	done
+
+	# Transforms
+	for samp in GRAY 420 422 444; do
+		runme $EXEDIR/jpegtran -flip horizontal -trim -outfile $OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -flip vertical -trim -outfile $OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -transpose -trim -outfile $OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -transverse -trim -outfile $OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -rotate 90 -trim -outfile $OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -rotate 180 -trim -outfile $OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+		runme $EXEDIR/jpegtran -rotate 270 -trim -outfile $OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg $OUTDIR/${basename}_${samp}_Q95.jpg
+	done
+	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
+		for samp in GRAY 444; do
+			runme $EXEDIR/djpeg -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 $YUVARG
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
+				rm $i
+			done
+		done
+		for samp in 420 422; do
+			runme $EXEDIR/djpeg -nosmooth -rgb -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 -fastupsample $YUVARG
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp -i 54:54 $i $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
+				rm $i
+			done
+		done
+	done
+
+	# Grayscale transform
+	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
+		for samp in GRAY 444 422 420; do
+			runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -tile -quiet -benchtime 0.01 -warmup 0 -grayscale $YUVARG
+			for i in $OUTDIR/${basename}_${samp}_Q95_[0-9]*[0-9]x[0-9]*[0-9].bmp \
+				$OUTDIR/${basename}_${samp}_Q95_full.bmp; do
+				runme cmp -i 54:54 $i $OUTDIR/${basename}_GRAY_${xform}_jpegtran.bmp
+				rm $i
+			done
+		done
+	done
+
+	# Transforms with scaling
+	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
+		for samp in GRAY 444 422 420; do
+			for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
+				scalearg=`echo $scale | sed s@_@/@g`
+				runme $EXEDIR/djpeg -rgb -scale ${scalearg} $NSARG -bmp -outfile $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg
+				runme $JAVA TJBench $OUTDIR/${basename}_${samp}_Q95.jpg -$xform -scale ${scalearg} -quiet -benchtime 0.01 -warmup 0 $YUVARG
+				runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp
+				rm $OUTDIR/${basename}_${samp}_Q95_${scale}.bmp
+			done
+		done
+	done
+
+done
+
+echo SUCCESS!
diff --git a/tjexampletest.in b/tjexampletest.in
new file mode 100755
index 0000000..40b342e
--- /dev/null
+++ b/tjexampletest.in
@@ -0,0 +1,150 @@
+#!/bin/bash
+
+set -u
+set -e
+trap onexit INT
+trap onexit TERM
+trap onexit EXIT
+
+onexit()
+{
+	if [ -d $OUTDIR ]; then
+		rm -rf $OUTDIR
+	fi
+}
+
+runme()
+{
+	echo \*\*\* $*
+	$*
+}
+
+IMAGES="vgl_5674_0098.bmp vgl_6434_0018a.bmp vgl_6548_0026a.bmp nightshot_iso_100.bmp"
+IMGDIR=@srcdir@/testimages
+OUTDIR=__tjexampletest_output
+EXEDIR=.
+JAVA="@JAVA@ -cp java/turbojpeg.jar -Djava.library.path=.libs"
+
+if [ -d $OUTDIR ]; then
+	rm -rf $OUTDIR
+fi
+mkdir -p $OUTDIR
+
+exec >$EXEDIR/tjexampletest.log
+
+for image in $IMAGES; do
+
+	cp $IMGDIR/$image $OUTDIR
+	basename=`basename $image .bmp`
+	$EXEDIR/cjpeg -quality 95 -dct fast -grayscale $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_GRAY_fast_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct fast -sample 2x2 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_420_fast_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct fast -sample 2x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_422_fast_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct fast -sample 1x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_444_fast_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct int -grayscale $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_GRAY_accurate_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct int -sample 2x2 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_420_accurate_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct int -sample 2x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_422_accurate_cjpeg.jpg
+	$EXEDIR/cjpeg -quality 95 -dct int -sample 1x1 $IMGDIR/${basename}.bmp >$OUTDIR/${basename}_444_accurate_cjpeg.jpg
+	for samp in GRAY 420 422 444; do
+		$EXEDIR/djpeg -rgb -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_default_djpeg.bmp
+		$EXEDIR/djpeg -dct fast -rgb -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_fast_djpeg.bmp
+		$EXEDIR/djpeg -dct int -rgb -bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg >$OUTDIR/${basename}_${samp}_accurate_djpeg.bmp
+	done
+	for samp in 420 422; do
+		$EXEDIR/djpeg -nosmooth -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_default_nosmooth_djpeg.bmp
+		$EXEDIR/djpeg -dct fast -nosmooth -bmp $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_fast_nosmooth_djpeg.bmp
+		$EXEDIR/djpeg -dct int -nosmooth -bmp $OUTDIR/${basename}_${samp}_accurate_cjpeg.jpg >$OUTDIR/${basename}_${samp}_accurate_nosmooth_djpeg.bmp
+	done
+
+	# Compression
+	for dct in fast accurate; do
+		for samp in GRAY 420 422 444; do
+			runme $JAVA TJExample $OUTDIR/$image $OUTDIR/${basename}_${samp}_${dct}.jpg -q 95 -samp ${samp} -${dct}dct
+			runme cmp $OUTDIR/${basename}_${samp}_${dct}.jpg $OUTDIR/${basename}_${samp}_${dct}_cjpeg.jpg
+		done
+	done
+
+	# Decompression
+	for dct in fast accurate default; do
+		srcdct=${dct}
+		dctarg=-${dct}dct
+		if [ "${dct}" = "default" ]; then
+			srcdct=fast
+			dctarg=
+		fi
+		for samp in GRAY 420 422 444; do
+			runme $JAVA TJExample $OUTDIR/${basename}_${samp}_${srcdct}.jpg $OUTDIR/${basename}_${samp}_${dct}.bmp ${dctarg}
+			runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${dct}.bmp $OUTDIR/${basename}_${samp}_${dct}_djpeg.bmp
+			rm $OUTDIR/${basename}_${samp}_${dct}.bmp
+		done
+		for samp in 420 422; do
+			runme $JAVA TJExample $OUTDIR/${basename}_${samp}_${srcdct}.jpg $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp -fastupsample ${dctarg}
+			runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp $OUTDIR/${basename}_${samp}_${dct}_nosmooth_djpeg.bmp
+			rm $OUTDIR/${basename}_${samp}_${dct}_nosmooth.bmp
+		done
+	done
+
+	# Scaled decompression
+	for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
+		scalearg=`echo $scale | sed s@_@/@g`
+		for samp in GRAY 420 422 444; do
+			$EXEDIR/djpeg -rgb -bmp -scale ${scalearg} $OUTDIR/${basename}_${samp}_fast_cjpeg.jpg >$OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp
+			runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${scale}.bmp -scale ${scalearg}
+			runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${scale}.bmp $OUTDIR/${basename}_${samp}_${scale}_djpeg.bmp
+			rm $OUTDIR/${basename}_${samp}_${scale}.bmp
+		done
+	done
+
+	# Transforms
+	for samp in GRAY 420 422 444; do
+		$EXEDIR/jpegtran -crop 70x60+16+16 -flip horizontal -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_hflip_jpegtran.jpg
+		$EXEDIR/jpegtran -crop 70x60+16+16 -flip vertical -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_vflip_jpegtran.jpg
+		$EXEDIR/jpegtran -crop 70x60+16+16 -transpose -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_transpose_jpegtran.jpg
+		$EXEDIR/jpegtran -crop 70x60+16+16 -transverse -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_transverse_jpegtran.jpg
+		$EXEDIR/jpegtran -crop 70x60+16+16 -rotate 90 -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_rot90_jpegtran.jpg
+		$EXEDIR/jpegtran -crop 70x60+16+16 -rotate 180 -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_rot180_jpegtran.jpg
+		$EXEDIR/jpegtran -crop 70x60+16+16 -rotate 270 -trim $OUTDIR/${basename}_${samp}_fast.jpg >$OUTDIR/${basename}_${samp}_rot270_jpegtran.jpg
+	done
+	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
+		for samp in GRAY 420 422 444; do
+			runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.jpg -$xform -crop 16,16,70x60
+			runme cmp $OUTDIR/${basename}_${samp}_${xform}.jpg $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg
+			$EXEDIR/djpeg -rgb -bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
+			runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -crop 16,16,70x60
+			runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
+			rm $OUTDIR/${basename}_${samp}_${xform}.bmp
+		done
+		for samp in 420 422; do
+			$EXEDIR/djpeg -nosmooth -rgb -bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
+			runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -crop 16,16,70x60 -fastupsample
+			runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_${samp}_${xform}_jpegtran.bmp
+			rm $OUTDIR/${basename}_${samp}_${xform}.bmp
+		done
+	done
+
+	# Grayscale transform
+	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
+		for samp in GRAY 444 422 420; do
+			runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.jpg -$xform -grayscale -crop 16,16,70x60
+			runme cmp $OUTDIR/${basename}_${samp}_${xform}.jpg $OUTDIR/${basename}_GRAY_${xform}_jpegtran.jpg
+			runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}.bmp -$xform -grayscale -crop 16,16,70x60
+			runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}.bmp $OUTDIR/${basename}_GRAY_${xform}_jpegtran.bmp
+			rm $OUTDIR/${basename}_${samp}_${xform}.bmp
+		done
+	done
+
+	# Transforms with scaling
+	for xform in hflip vflip transpose transverse rot90 rot180 rot270; do
+		for samp in GRAY 444 422 420; do
+			for scale in 2_1 15_8 7_4 13_8 3_2 11_8 5_4 9_8 7_8 3_4 5_8 1_2 3_8 1_4 1_8; do
+				scalearg=`echo $scale | sed s@_@/@g`
+				$EXEDIR/djpeg -rgb -bmp -scale ${scalearg} $OUTDIR/${basename}_${samp}_${xform}_jpegtran.jpg >$OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp
+				runme $JAVA TJExample $OUTDIR/${basename}_${samp}_fast.jpg $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp -$xform -scale ${scalearg} -crop 16,16,70x60
+				runme cmp -i 54:54 $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp $OUTDIR/${basename}_${samp}_${xform}_${scale}_jpegtran.bmp
+				rm $OUTDIR/${basename}_${samp}_${xform}_${scale}.bmp
+			done
+		done
+	done
+
+done
+
+echo SUCCESS!
diff --git a/tjunittest.c b/tjunittest.c
new file mode 100644
index 0000000..dc84bba
--- /dev/null
+++ b/tjunittest.c
@@ -0,0 +1,729 @@
+/*
+ * Copyright (C)2009-2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This program tests the various code paths in the TurboJPEG C Wrapper
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include "./tjutil.h"
+#include "./turbojpeg.h"
+#ifdef _WIN32
+ #include <time.h>
+ #define random() rand()
+#endif
+
+
+void usage(char *progName)
+{
+	printf("\nUSAGE: %s [options]\n", progName);
+	printf("Options:\n");
+	printf("-yuv = test YUV encoding/decoding support\n");
+	printf("-noyuvpad = do not pad each line of each Y, U, and V plane to the nearest\n");
+	printf("            4-byte boundary\n");
+	printf("-alloc = test automatic buffer allocation\n");
+	exit(1);
+}
+
+
+#define _throwtj() {printf("TurboJPEG ERROR:\n%s\n", tjGetErrorStr());  \
+	bailout();}
+#define _tj(f) {if((f)==-1) _throwtj();}
+#define _throw(m) {printf("ERROR: %s\n", m);  bailout();}
+
+const char *subNameLong[TJ_NUMSAMP]=
+{
+	"4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1"
+};
+const char *subName[TJ_NUMSAMP]={"444", "422", "420", "GRAY", "440", "411"};
+
+const char *pixFormatStr[TJ_NUMPF]=
+{
+	"RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "Grayscale",
+	"RGBA", "BGRA", "ABGR", "ARGB", "CMYK"
+};
+
+const int alphaOffset[TJ_NUMPF] = {-1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1};
+
+const int _3byteFormats[]={TJPF_RGB, TJPF_BGR};
+const int _4byteFormats[]={TJPF_RGBX, TJPF_BGRX, TJPF_XBGR, TJPF_XRGB,
+	TJPF_CMYK};
+const int _onlyGray[]={TJPF_GRAY};
+const int _onlyRGB[]={TJPF_RGB};
+
+int doyuv=0, alloc=0, pad=4;
+
+int exitStatus=0;
+#define bailout() {exitStatus=-1;  goto bailout;}
+
+
+void initBuf(unsigned char *buf, int w, int h, int pf, int flags)
+{
+	int roffset=tjRedOffset[pf];
+	int goffset=tjGreenOffset[pf];
+	int boffset=tjBlueOffset[pf];
+	int ps=tjPixelSize[pf];
+	int index, row, col, halfway=16;
+
+	if(pf==TJPF_GRAY)
+	{
+		memset(buf, 0, w*h*ps);
+		for(row=0; row<h; row++)
+		{
+			for(col=0; col<w; col++)
+			{
+				if(flags&TJFLAG_BOTTOMUP) index=(h-row-1)*w+col;
+				else index=row*w+col;
+				if(((row/8)+(col/8))%2==0) buf[index]=(row<halfway)? 255:0;
+				else buf[index]=(row<halfway)? 76:226;
+			}
+		}
+	}
+	else if(pf==TJPF_CMYK)
+	{
+		memset(buf, 255, w*h*ps);
+		for(row=0; row<h; row++)
+		{
+			for(col=0; col<w; col++)
+			{
+				if(flags&TJFLAG_BOTTOMUP) index=(h-row-1)*w+col;
+				else index=row*w+col;
+				if(((row/8)+(col/8))%2==0)
+				{
+					if(row>=halfway) buf[index*ps+3]=0;
+				}
+				else
+				{
+					buf[index*ps+2]=0;
+					if(row<halfway) buf[index*ps+1]=0;
+				}
+			}
+		}
+	}
+	else
+	{
+		memset(buf, 0, w*h*ps);
+		for(row=0; row<h; row++)
+		{
+			for(col=0; col<w; col++)
+			{
+				if(flags&TJFLAG_BOTTOMUP) index=(h-row-1)*w+col;
+				else index=row*w+col;
+				if(((row/8)+(col/8))%2==0)
+				{
+					if(row<halfway)
+					{
+						buf[index*ps+roffset]=255;
+						buf[index*ps+goffset]=255;
+						buf[index*ps+boffset]=255;
+					}
+				}
+				else
+				{
+					buf[index*ps+roffset]=255;
+					if(row>=halfway) buf[index*ps+goffset]=255;
+				}
+			}
+		}
+	}
+}
+
+
+#define checkval(v, cv) { \
+	if(v<cv-1 || v>cv+1) { \
+		printf("\nComp. %s at %d,%d should be %d, not %d\n",  \
+			#v, row, col, cv, v); \
+		retval=0;  exitStatus=-1;  goto bailout; \
+	}}
+
+#define checkval0(v) { \
+	if(v>1) { \
+		printf("\nComp. %s at %d,%d should be 0, not %d\n", #v, row, col, v); \
+		retval=0;  exitStatus=-1;  goto bailout; \
+	}}
+
+#define checkval255(v) { \
+	if(v<254) { \
+		printf("\nComp. %s at %d,%d should be 255, not %d\n", #v, row, col, v); \
+		retval=0;  exitStatus=-1;  goto bailout; \
+	}}
+
+
+int checkBuf(unsigned char *buf, int w, int h, int pf, int subsamp,
+	tjscalingfactor sf, int flags)
+{
+	int roffset=tjRedOffset[pf];
+	int goffset=tjGreenOffset[pf];
+	int boffset=tjBlueOffset[pf];
+	int aoffset=alphaOffset[pf];
+	int ps=tjPixelSize[pf];
+	int index, row, col, retval=1;
+	int halfway=16*sf.num/sf.denom;
+	int blocksize=8*sf.num/sf.denom;
+
+	if(pf==TJPF_CMYK)
+	{
+		for(row=0; row<h; row++)
+		{
+			for(col=0; col<w; col++)
+			{
+				unsigned char c, m, y, k;
+				if(flags&TJFLAG_BOTTOMUP) index=(h-row-1)*w+col;
+				else index=row*w+col;
+				c=buf[index*ps];
+				m=buf[index*ps+1];
+				y=buf[index*ps+2];
+				k=buf[index*ps+3];
+				if(((row/blocksize)+(col/blocksize))%2==0)
+				{
+					checkval255(c);  checkval255(m);  checkval255(y);
+					if(row<halfway) checkval255(k)
+					else checkval0(k)
+				}
+				else
+				{
+					checkval255(c);  checkval0(y);  checkval255(k);
+					if(row<halfway) checkval0(m)
+					else checkval255(m)
+				}
+			}
+		}
+		return 1;
+	}
+
+	for(row=0; row<h; row++)
+	{
+		for(col=0; col<w; col++)
+		{
+			unsigned char r, g, b, a;
+			if(flags&TJFLAG_BOTTOMUP) index=(h-row-1)*w+col;
+			else index=row*w+col;
+			r=buf[index*ps+roffset];
+			g=buf[index*ps+goffset];
+			b=buf[index*ps+boffset];
+			a=aoffset>=0? buf[index*ps+aoffset]:0xFF;
+			if(((row/blocksize)+(col/blocksize))%2==0)
+			{
+				if(row<halfway)
+				{
+					checkval255(r);  checkval255(g);  checkval255(b);
+				}
+				else
+				{
+					checkval0(r);  checkval0(g);  checkval0(b);
+				}
+			}
+			else
+			{
+				if(subsamp==TJSAMP_GRAY)
+				{
+					if(row<halfway)
+					{
+						checkval(r, 76);  checkval(g, 76);  checkval(b, 76);
+					}
+					else
+					{
+						checkval(r, 226);  checkval(g, 226);  checkval(b, 226);
+					}
+				}
+				else
+				{
+					if(row<halfway)
+					{
+						checkval255(r);  checkval0(g);  checkval0(b);
+					}
+					else
+					{
+						checkval255(r);  checkval255(g);  checkval0(b);
+					}
+				}
+			}
+			checkval255(a);
+		}
+	}
+
+	bailout:
+	if(retval==0)
+	{
+		for(row=0; row<h; row++)
+		{
+			for(col=0; col<w; col++)
+			{
+				if(pf==TJPF_CMYK)
+					printf("%.3d/%.3d/%.3d/%.3d ", buf[(row*w+col)*ps],
+						buf[(row*w+col)*ps+1], buf[(row*w+col)*ps+2],
+						buf[(row*w+col)*ps+3]);
+				else
+					printf("%.3d/%.3d/%.3d ", buf[(row*w+col)*ps+roffset],
+						buf[(row*w+col)*ps+goffset], buf[(row*w+col)*ps+boffset]);
+			}
+			printf("\n");
+		}
+	}
+	return retval;
+}
+
+
+#define PAD(v, p) ((v+(p)-1)&(~((p)-1)))
+
+int checkBufYUV(unsigned char *buf, int w, int h, int subsamp,
+	tjscalingfactor sf)
+{
+	int row, col;
+	int hsf=tjMCUWidth[subsamp]/8, vsf=tjMCUHeight[subsamp]/8;
+	int pw=PAD(w, hsf), ph=PAD(h, vsf);
+	int cw=pw/hsf, ch=ph/vsf;
+	int ypitch=PAD(pw, pad), uvpitch=PAD(cw, pad);
+	int retval=1;
+	int halfway=16*sf.num/sf.denom;
+	int blocksize=8*sf.num/sf.denom;
+
+	for(row=0; row<ph; row++)
+	{
+		for(col=0; col<pw; col++)
+		{
+			unsigned char y=buf[ypitch*row+col];
+			if(((row/blocksize)+(col/blocksize))%2==0)
+			{
+				if(row<halfway) checkval255(y)  else checkval0(y);
+			}
+			else
+			{
+				if(row<halfway) checkval(y, 76)  else checkval(y, 226);
+			}
+		}
+	}
+	if(subsamp!=TJSAMP_GRAY)
+	{
+		int halfway=16/vsf*sf.num/sf.denom;
+		for(row=0; row<ch; row++)
+		{
+			for(col=0; col<cw; col++)
+			{
+				unsigned char u=buf[ypitch*ph + (uvpitch*row+col)],
+					v=buf[ypitch*ph + uvpitch*ch + (uvpitch*row+col)];
+				if(((row*vsf/blocksize)+(col*hsf/blocksize))%2==0)
+				{
+					checkval(u, 128);  checkval(v, 128);
+				}
+				else
+				{
+					if(row<halfway)
+					{
+						checkval(u, 85);  checkval255(v);
+					}
+					else
+					{
+						checkval0(u);  checkval(v, 149);
+					}
+				}
+			}
+		}
+	}
+
+	bailout:
+	if(retval==0)
+	{
+		for(row=0; row<ph; row++)
+		{
+			for(col=0; col<pw; col++)
+				printf("%.3d ", buf[ypitch*row+col]);
+			printf("\n");
+		}
+		printf("\n");
+		for(row=0; row<ch; row++)
+		{
+			for(col=0; col<cw; col++)
+				printf("%.3d ", buf[ypitch*ph + (uvpitch*row+col)]);
+			printf("\n");
+		}
+		printf("\n");
+		for(row=0; row<ch; row++)
+		{
+			for(col=0; col<cw; col++)
+				printf("%.3d ", buf[ypitch*ph + uvpitch*ch + (uvpitch*row+col)]);
+			printf("\n");
+		}
+	}
+
+	return retval;
+}
+
+
+void writeJPEG(unsigned char *jpegBuf, unsigned long jpegSize, char *filename)
+{
+	FILE *file=fopen(filename, "wb");
+	if(!file || fwrite(jpegBuf, jpegSize, 1, file)!=1)
+	{
+		printf("ERROR: Could not write to %s.\n%s\n", filename, strerror(errno));
+		bailout();
+	}
+
+	bailout:
+	if(file) fclose(file);
+}
+
+
+void compTest(tjhandle handle, unsigned char **dstBuf,
+	unsigned long *dstSize, int w, int h, int pf, char *basename,
+	int subsamp, int jpegQual, int flags)
+{
+	char tempStr[1024];  unsigned char *srcBuf=NULL, *yuvBuf=NULL;
+	const char *pfStr=pixFormatStr[pf];
+	const char *buStrLong=(flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down ";
+	const char *buStr=(flags&TJFLAG_BOTTOMUP)? "BU":"TD";
+
+	if((srcBuf=(unsigned char *)malloc(w*h*tjPixelSize[pf]))==NULL)
+		_throw("Memory allocation failure");
+	initBuf(srcBuf, w, h, pf, flags);
+
+	if(*dstBuf && *dstSize>0) memset(*dstBuf, 0, *dstSize);
+
+
+	if(!alloc) flags|=TJFLAG_NOREALLOC;
+	if(doyuv)
+	{
+		unsigned long yuvSize=tjBufSizeYUV2(w, pad, h, subsamp);
+		tjscalingfactor sf={1, 1};
+		tjhandle handle2=tjInitCompress();
+		if(!handle2) _throwtj();
+
+		if((yuvBuf=(unsigned char *)malloc(yuvSize))==NULL)
+			_throw("Memory allocation failure");
+		memset(yuvBuf, 0, yuvSize);
+
+		printf("%s %s -> YUV %s ... ", pfStr, buStrLong, subNameLong[subsamp]);
+		_tj(tjEncodeYUV3(handle2, srcBuf, w, 0, h, pf, yuvBuf, pad, subsamp,
+			flags));
+		tjDestroy(handle2);
+		if(checkBufYUV(yuvBuf, w, h, subsamp, sf)) printf("Passed.\n");
+		else printf("FAILED!\n");
+
+		printf("YUV %s %s -> JPEG Q%d ... ", subNameLong[subsamp], buStrLong,
+			jpegQual);
+		_tj(tjCompressFromYUV(handle, yuvBuf, w, pad, h, subsamp, dstBuf,
+			dstSize, jpegQual, flags));
+	}
+	else
+	{
+		printf("%s %s -> %s Q%d ... ", pfStr, buStrLong, subNameLong[subsamp],
+			jpegQual);
+		_tj(tjCompress2(handle, srcBuf, w, 0, h, pf, dstBuf, dstSize, subsamp,
+			jpegQual, flags));
+	}
+
+	snprintf(tempStr, 1024, "%s_enc_%s_%s_%s_Q%d.jpg", basename, pfStr, buStr,
+		subName[subsamp], jpegQual);
+	writeJPEG(*dstBuf, *dstSize, tempStr);
+	printf("Done.\n  Result in %s\n", tempStr);
+
+	bailout:
+	if(yuvBuf) free(yuvBuf);
+	if(srcBuf) free(srcBuf);
+}
+
+
+void _decompTest(tjhandle handle, unsigned char *jpegBuf,
+	unsigned long jpegSize, int w, int h, int pf, char *basename, int subsamp,
+	int flags, tjscalingfactor sf)
+{
+	unsigned char *dstBuf=NULL, *yuvBuf=NULL;
+	int _hdrw=0, _hdrh=0, _hdrsubsamp=-1;
+	int scaledWidth=TJSCALED(w, sf);
+	int scaledHeight=TJSCALED(h, sf);
+	unsigned long dstSize=0;
+
+	_tj(tjDecompressHeader2(handle, jpegBuf, jpegSize, &_hdrw, &_hdrh,
+		&_hdrsubsamp));
+	if(_hdrw!=w || _hdrh!=h || _hdrsubsamp!=subsamp)
+		_throw("Incorrect JPEG header");
+
+	dstSize=scaledWidth*scaledHeight*tjPixelSize[pf];
+	if((dstBuf=(unsigned char *)malloc(dstSize))==NULL)
+		_throw("Memory allocation failure");
+	memset(dstBuf, 0, dstSize);
+
+	if(doyuv)
+	{
+		unsigned long yuvSize=tjBufSizeYUV2(scaledWidth, pad, scaledHeight,
+			subsamp);
+		tjhandle handle2=tjInitDecompress();
+		if(!handle2) _throwtj();
+
+		if((yuvBuf=(unsigned char *)malloc(yuvSize))==NULL)
+			_throw("Memory allocation failure");
+		memset(yuvBuf, 0, yuvSize);
+
+		printf("JPEG -> YUV %s ", subNameLong[subsamp]);
+		if(sf.num!=1 || sf.denom!=1)
+			printf("%d/%d ... ", sf.num, sf.denom);
+		else printf("... ");
+		_tj(tjDecompressToYUV2(handle, jpegBuf, jpegSize, yuvBuf, scaledWidth,
+			pad, scaledHeight, flags));
+		if(checkBufYUV(yuvBuf, scaledWidth, scaledHeight, subsamp, sf))
+			printf("Passed.\n");
+		else printf("FAILED!\n");
+
+		printf("YUV %s -> %s %s ... ", subNameLong[subsamp], pixFormatStr[pf],
+			(flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down ");
+		_tj(tjDecodeYUV(handle2, yuvBuf, pad, subsamp, dstBuf, scaledWidth, 0,
+			scaledHeight, pf, flags));
+		tjDestroy(handle2);
+	}
+	else
+	{
+		printf("JPEG -> %s %s ", pixFormatStr[pf],
+			(flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down ");
+		if(sf.num!=1 || sf.denom!=1)
+			printf("%d/%d ... ", sf.num, sf.denom);
+		else printf("... ");
+		_tj(tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, scaledWidth, 0,
+			scaledHeight, pf, flags));
+	}
+
+	if(checkBuf(dstBuf, scaledWidth, scaledHeight, pf, subsamp, sf, flags))
+		printf("Passed.");
+	else printf("FAILED!");
+	printf("\n");
+
+	bailout:
+	if(yuvBuf) free(yuvBuf);
+	if(dstBuf) free(dstBuf);
+}
+
+
+void decompTest(tjhandle handle, unsigned char *jpegBuf,
+	unsigned long jpegSize, int w, int h, int pf, char *basename, int subsamp,
+	int flags)
+{
+	int i, n=0;
+	tjscalingfactor *sf=tjGetScalingFactors(&n);
+	if(!sf || !n) _throwtj();
+
+	for(i=0; i<n; i++)
+	{
+		if(subsamp==TJSAMP_444 || subsamp==TJSAMP_GRAY ||
+			(subsamp==TJSAMP_411 && sf[i].num==1 &&
+				(sf[i].denom==2 || sf[i].denom==1)) ||
+			(subsamp!=TJSAMP_411 && sf[i].num==1 &&
+				(sf[i].denom==4 || sf[i].denom==2 || sf[i].denom==1)))
+			_decompTest(handle, jpegBuf, jpegSize, w, h, pf, basename, subsamp,
+				flags, sf[i]);
+	}
+
+	bailout:
+	return;
+}
+
+
+void doTest(int w, int h, const int *formats, int nformats, int subsamp,
+	char *basename)
+{
+	tjhandle chandle=NULL, dhandle=NULL;
+	unsigned char *dstBuf=NULL;
+	unsigned long size=0;  int pfi, pf, i;
+
+	if(!alloc)
+		size=tjBufSize(w, h, subsamp);
+	if(size!=0)
+		if((dstBuf=(unsigned char *)tjAlloc(size))==NULL)
+			_throw("Memory allocation failure.");
+
+	if((chandle=tjInitCompress())==NULL || (dhandle=tjInitDecompress())==NULL)
+		_throwtj();
+
+	for(pfi=0; pfi<nformats; pfi++)
+	{
+		for(i=0; i<2; i++)
+		{
+			int flags=0;
+			if(subsamp==TJSAMP_422 || subsamp==TJSAMP_420 || subsamp==TJSAMP_440 ||
+				subsamp==TJSAMP_411)
+				flags|=TJFLAG_FASTUPSAMPLE;
+			if(i==1) flags|=TJFLAG_BOTTOMUP;
+			pf=formats[pfi];
+			compTest(chandle, &dstBuf, &size, w, h, pf, basename, subsamp, 100,
+				flags);
+			decompTest(dhandle, dstBuf, size, w, h, pf, basename, subsamp,
+				flags);
+			if(pf>=TJPF_RGBX && pf<=TJPF_XRGB)
+			{
+				printf("\n");
+				decompTest(dhandle, dstBuf, size, w, h, pf+(TJPF_RGBA-TJPF_RGBX),
+					basename, subsamp, flags);
+			}
+			printf("\n");
+		}
+	}
+	printf("--------------------\n\n");
+
+	bailout:
+	if(chandle) tjDestroy(chandle);
+	if(dhandle) tjDestroy(dhandle);
+
+	if(dstBuf) tjFree(dstBuf);
+}
+
+
+void bufSizeTest(void)
+{
+	int w, h, i, subsamp;
+	unsigned char *srcBuf=NULL, *dstBuf=NULL;
+	tjhandle handle=NULL;
+	unsigned long dstSize=0;
+
+	if((handle=tjInitCompress())==NULL) _throwtj();
+
+	printf("Buffer size regression test\n");
+	for(subsamp=0; subsamp<TJ_NUMSAMP; subsamp++)
+	{
+		for(w=1; w<48; w++)
+		{
+			int maxh=(w==1)? 2048:48;
+			for(h=1; h<maxh; h++)
+			{
+				if(h%100==0) printf("%.4d x %.4d\b\b\b\b\b\b\b\b\b\b\b", w, h);
+				if((srcBuf=(unsigned char *)malloc(w*h*4))==NULL)
+					_throw("Memory allocation failure");
+				if(!alloc || doyuv)
+				{
+					if(doyuv) dstSize=tjBufSizeYUV2(w, pad, h, subsamp);
+					else dstSize=tjBufSize(w, h, subsamp);
+					if((dstBuf=(unsigned char *)tjAlloc(dstSize))==NULL)
+						_throw("Memory allocation failure");
+				}
+
+				for(i=0; i<w*h*4; i++)
+				{
+					if(random()<RAND_MAX/2) srcBuf[i]=0;
+					else srcBuf[i]=255;
+				}
+
+				if(doyuv)
+				{
+					_tj(tjEncodeYUV3(handle, srcBuf, w, 0, h, TJPF_BGRX, dstBuf, pad,
+						subsamp, 0));
+				}
+				else
+				{
+					_tj(tjCompress2(handle, srcBuf, w, 0, h, TJPF_BGRX, &dstBuf,
+						&dstSize, subsamp, 100, alloc? 0:TJFLAG_NOREALLOC));
+				}
+				free(srcBuf);  srcBuf=NULL;
+				tjFree(dstBuf);  dstBuf=NULL;
+
+				if((srcBuf=(unsigned char *)malloc(h*w*4))==NULL)
+					_throw("Memory allocation failure");
+				if(!alloc || doyuv)
+				{
+					if(doyuv) dstSize=tjBufSizeYUV2(h, pad, w, subsamp);
+					else dstSize=tjBufSize(h, w, subsamp);
+					if((dstBuf=(unsigned char *)tjAlloc(dstSize))==NULL)
+						_throw("Memory allocation failure");
+				}
+
+				for(i=0; i<h*w*4; i++)
+				{
+					if(random()<RAND_MAX/2) srcBuf[i]=0;
+					else srcBuf[i]=255;
+				}
+
+				if(doyuv)
+				{
+					_tj(tjEncodeYUV3(handle, srcBuf, h, 0, w, TJPF_BGRX, dstBuf, pad,
+						subsamp, 0));
+				}
+				else
+				{
+					_tj(tjCompress2(handle, srcBuf, h, 0, w, TJPF_BGRX, &dstBuf,
+						&dstSize, subsamp, 100, alloc? 0:TJFLAG_NOREALLOC));
+				}
+				free(srcBuf);  srcBuf=NULL;
+				tjFree(dstBuf);  dstBuf=NULL;
+			}
+		}
+	}
+	printf("Done.      \n");
+
+	bailout:
+	if(srcBuf) free(srcBuf);
+	if(dstBuf) free(dstBuf);
+	if(handle) tjDestroy(handle);
+}
+
+
+int main(int argc, char *argv[])
+{
+	int i, num4bf=5;
+	#ifdef _WIN32
+	srand((unsigned int)time(NULL));
+	#endif
+	if(argc>1)
+	{
+		for(i=1; i<argc; i++)
+		{
+			if(!strcasecmp(argv[i], "-yuv")) doyuv=1;
+			if(!strcasecmp(argv[i], "-noyuvpad")) pad=1;
+			if(!strcasecmp(argv[i], "-alloc")) alloc=1;
+			if(!strncasecmp(argv[i], "-h", 2) || !strcasecmp(argv[i], "-?"))
+				usage(argv[0]);
+		}
+	}
+	if(alloc) printf("Testing automatic buffer allocation\n");
+	if(doyuv) num4bf=4;
+	doTest(35, 39, _3byteFormats, 2, TJSAMP_444, "test");
+	doTest(39, 41, _4byteFormats, num4bf, TJSAMP_444, "test");
+	doTest(41, 35, _3byteFormats, 2, TJSAMP_422, "test");
+	doTest(35, 39, _4byteFormats, num4bf, TJSAMP_422, "test");
+	doTest(39, 41, _3byteFormats, 2, TJSAMP_420, "test");
+	doTest(41, 35, _4byteFormats, num4bf, TJSAMP_420, "test");
+	doTest(35, 39, _3byteFormats, 2, TJSAMP_440, "test");
+	doTest(39, 41, _4byteFormats, num4bf, TJSAMP_440, "test");
+	doTest(41, 35, _3byteFormats, 2, TJSAMP_411, "test");
+	doTest(35, 39, _4byteFormats, num4bf, TJSAMP_411, "test");
+	doTest(39, 41, _onlyGray, 1, TJSAMP_GRAY, "test");
+	doTest(41, 35, _3byteFormats, 2, TJSAMP_GRAY, "test");
+	doTest(35, 39, _4byteFormats, 4, TJSAMP_GRAY, "test");
+	bufSizeTest();
+	if(doyuv)
+	{
+		printf("\n--------------------\n\n");
+		doTest(48, 48, _onlyRGB, 1, TJSAMP_444, "test_yuv0");
+		doTest(48, 48, _onlyRGB, 1, TJSAMP_422, "test_yuv0");
+		doTest(48, 48, _onlyRGB, 1, TJSAMP_420, "test_yuv0");
+		doTest(48, 48, _onlyRGB, 1, TJSAMP_440, "test_yuv0");
+		doTest(48, 48, _onlyRGB, 1, TJSAMP_411, "test_yuv0");
+		doTest(48, 48, _onlyRGB, 1, TJSAMP_GRAY, "test_yuv0");
+		doTest(48, 48, _onlyGray, 1, TJSAMP_GRAY, "test_yuv0");
+	}
+
+	return exitStatus;
+}
diff --git a/tjutil.c b/tjutil.c
new file mode 100644
index 0000000..6618d15
--- /dev/null
+++ b/tjutil.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef _WIN32
+
+#include <windows.h>
+
+static double getfreq(void)
+{
+	LARGE_INTEGER freq;
+	if(!QueryPerformanceFrequency(&freq)) return 0.0;
+	return (double)freq.QuadPart;
+}
+
+static double f=-1.0;
+
+double gettime(void)
+{
+	LARGE_INTEGER t;
+	if(f<0.0) f=getfreq();
+	if(f==0.0) return (double)GetTickCount()/1000.;
+	else
+	{
+		QueryPerformanceCounter(&t);
+		return (double)t.QuadPart/f;
+	}
+}
+
+#else
+
+#include <stdlib.h>
+#include <sys/time.h>
+
+double gettime(void)
+{
+	struct timeval tv;
+	if(gettimeofday(&tv, NULL)<0) return 0.0;
+	else return (double)tv.tv_sec+((double)tv.tv_usec/1000000.);
+}
+
+#endif
diff --git a/tjutil.h b/tjutil.h
new file mode 100644
index 0000000..bdad348
--- /dev/null
+++ b/tjutil.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C)2011 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef _WIN32
+	#ifndef __MINGW32__
+		#include <stdio.h>
+		#define snprintf(str, n, format, ...)  \
+			_snprintf_s(str, n, _TRUNCATE, format, __VA_ARGS__)
+	#endif
+	#define strcasecmp stricmp
+	#define strncasecmp strnicmp
+#endif
+
+#ifndef min
+ #define min(a,b) ((a)<(b)?(a):(b))
+#endif
+
+#ifndef max
+ #define max(a,b) ((a)>(b)?(a):(b))
+#endif
+
+extern double gettime(void);
diff --git a/transupp.c b/transupp.c
index e5ec564..93444e3 100644
--- a/transupp.c
+++ b/transupp.c
@@ -1,8 +1,10 @@
 /*
  * transupp.c
  *
- * Copyright (C) 1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, D. R. Commander.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains image transformation routines and other utility code
@@ -19,7 +21,18 @@
 
 #include "jinclude.h"
 #include "jpeglib.h"
-#include "transupp.h"		/* My own external interface */
+#include "transupp.h"           /* My own external interface */
+#include "jpegcomp.h"
+#include <ctype.h>              /* to declare isdigit() */
+
+
+#if JPEG_LIB_VERSION >= 70
+#define dstinfo_min_DCT_h_scaled_size dstinfo->min_DCT_h_scaled_size
+#define dstinfo_min_DCT_v_scaled_size dstinfo->min_DCT_v_scaled_size
+#else
+#define dstinfo_min_DCT_h_scaled_size DCTSIZE
+#define dstinfo_min_DCT_v_scaled_size DCTSIZE
+#endif
 
 
 #if TRANSFORMS_SUPPORTED
@@ -28,7 +41,8 @@
  * Lossless image transformation routines.  These routines work on DCT
  * coefficient arrays and thus do not require any lossy decompression
  * or recompression of the image.
- * Thanks to Guido Vollbeding for the initial design and code of this feature.
+ * Thanks to Guido Vollbeding for the initial design and code of this feature,
+ * and to Ben Jackson for introducing the cropping feature.
  *
  * Horizontal flipping is done in-place, using a single top-to-bottom
  * pass through the virtual source array.  It will thus be much the
@@ -42,6 +56,13 @@
  * arrays for most of the transforms.  That could result in much thrashing
  * if the image is larger than main memory.
  *
+ * If cropping or trimming is involved, the destination arrays may be smaller
+ * than the source arrays.  Note it is not possible to do horizontal flip
+ * in-place when a nonzero Y crop offset is specified, since we'd have to move
+ * data from one block row to another but the virtual array manager doesn't
+ * guarantee we can touch more than one row at a time.  So in that case,
+ * we have to use a separate destination array.
+ *
  * Some notes about the operating environment of the individual transform
  * routines:
  * 1. Both the source and destination virtual arrays are allocated from the
@@ -54,20 +75,65 @@
  *    and we may as well take that as the effective iMCU size.
  * 4. When "trim" is in effect, the destination's dimensions will be the
  *    trimmed values but the source's will be untrimmed.
- * 5. All the routines assume that the source and destination buffers are
+ * 5. When "crop" is in effect, the destination's dimensions will be the
+ *    cropped values but the source's will be uncropped.  Each transform
+ *    routine is responsible for picking up source data starting at the
+ *    correct X and Y offset for the crop region.  (The X and Y offsets
+ *    passed to the transform routines are measured in iMCU blocks of the
+ *    destination.)
+ * 6. All the routines assume that the source and destination buffers are
  *    padded out to a full iMCU boundary.  This is true, although for the
  *    source buffer it is an undocumented property of jdcoefct.c.
- * Notes 2,3,4 boil down to this: generally we should use the destination's
- * dimensions and ignore the source's.
  */
 
 
 LOCAL(void)
-do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	   jvirt_barray_ptr *src_coef_arrays)
-/* Horizontal flip; done in-place, so no separate dest array is required */
+do_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+         JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+         jvirt_barray_ptr *src_coef_arrays,
+         jvirt_barray_ptr *dst_coef_arrays)
+/* Crop.  This is only used when no rotate/flip is requested with the crop. */
 {
-  JDIMENSION MCU_cols, comp_width, blk_x, blk_y;
+  JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  jpeg_component_info *compptr;
+
+  /* We simply have to copy the right amount of data (the destination's
+   * image size) starting at the given X and Y offsets in the source.
+   */
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
+      src_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+         dst_blk_y + y_crop_blocks,
+         (JDIMENSION) compptr->v_samp_factor, FALSE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+                        dst_buffer[offset_y],
+                        compptr->width_in_blocks);
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                   JDIMENSION x_crop_offset,
+                   jvirt_barray_ptr *src_coef_arrays)
+/* Horizontal flip; done in-place, so no separate dest array is required.
+ * NB: this only works when y_crop_offset is zero.
+ */
+{
+  JDIMENSION MCU_cols, comp_width, blk_x, blk_y, x_crop_blocks;
   int ci, k, offset_y;
   JBLOCKARRAY buffer;
   JCOEFPTR ptr1, ptr2;
@@ -79,32 +145,109 @@
    * mirroring by changing the signs of odd-numbered columns.
    * Partial iMCUs at the right edge are left untouched.
    */
-  MCU_cols = dstinfo->image_width / (dstinfo->max_h_samp_factor * DCTSIZE);
+  MCU_cols = srcinfo->output_width /
+    (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
 
   for (ci = 0; ci < dstinfo->num_components; ci++) {
     compptr = dstinfo->comp_info + ci;
     comp_width = MCU_cols * compptr->h_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
     for (blk_y = 0; blk_y < compptr->height_in_blocks;
-	 blk_y += compptr->v_samp_factor) {
+         blk_y += compptr->v_samp_factor) {
       buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) {
-	  ptr1 = buffer[offset_y][blk_x];
-	  ptr2 = buffer[offset_y][comp_width - blk_x - 1];
-	  /* this unrolled loop doesn't need to know which row it's on... */
-	  for (k = 0; k < DCTSIZE2; k += 2) {
-	    temp1 = *ptr1;	/* swap even column */
-	    temp2 = *ptr2;
-	    *ptr1++ = temp2;
-	    *ptr2++ = temp1;
-	    temp1 = *ptr1;	/* swap odd column with sign change */
-	    temp2 = *ptr2;
-	    *ptr1++ = -temp2;
-	    *ptr2++ = -temp1;
-	  }
-	}
+        /* Do the mirroring */
+        for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) {
+          ptr1 = buffer[offset_y][blk_x];
+          ptr2 = buffer[offset_y][comp_width - blk_x - 1];
+          /* this unrolled loop doesn't need to know which row it's on... */
+          for (k = 0; k < DCTSIZE2; k += 2) {
+            temp1 = *ptr1;      /* swap even column */
+            temp2 = *ptr2;
+            *ptr1++ = temp2;
+            *ptr2++ = temp1;
+            temp1 = *ptr1;      /* swap odd column with sign change */
+            temp2 = *ptr2;
+            *ptr1++ = -temp2;
+            *ptr2++ = -temp1;
+          }
+        }
+        if (x_crop_blocks > 0) {
+          /* Now left-justify the portion of the data to be kept.
+           * We can't use a single jcopy_block_row() call because that routine
+           * depends on memcpy(), whose behavior is unspecified for overlapping
+           * source and destination areas.  Sigh.
+           */
+          for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
+            jcopy_block_row(buffer[offset_y] + blk_x + x_crop_blocks,
+                            buffer[offset_y] + blk_x,
+                            (JDIMENSION) 1);
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flip_h (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+           JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+           jvirt_barray_ptr *src_coef_arrays,
+           jvirt_barray_ptr *dst_coef_arrays)
+/* Horizontal flip in general cropping case */
+{
+  JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, k, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Here we must output into a separate array because we can't touch
+   * different rows of a single virtual array simultaneously.  Otherwise,
+   * this is essentially the same as the routine above.
+   */
+  MCU_cols = srcinfo->output_width /
+    (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
+      src_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+         dst_blk_y + y_crop_blocks,
+         (JDIMENSION) compptr->v_samp_factor, FALSE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        dst_row_ptr = dst_buffer[offset_y];
+        src_row_ptr = src_buffer[offset_y];
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+          if (x_crop_blocks + dst_blk_x < comp_width) {
+            /* Do the mirrorable blocks */
+            dst_ptr = dst_row_ptr[dst_blk_x];
+            src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+            /* this unrolled loop doesn't need to know which row it's on... */
+            for (k = 0; k < DCTSIZE2; k += 2) {
+              *dst_ptr++ = *src_ptr++;   /* copy even column */
+              *dst_ptr++ = - *src_ptr++; /* copy odd column with sign change */
+            }
+          } else {
+            /* Copy last partial block(s) verbatim */
+            jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
+                            dst_row_ptr + dst_blk_x,
+                            (JDIMENSION) 1);
+          }
+        }
       }
     }
   }
@@ -113,11 +256,13 @@
 
 LOCAL(void)
 do_flip_v (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	   jvirt_barray_ptr *src_coef_arrays,
-	   jvirt_barray_ptr *dst_coef_arrays)
+           JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+           jvirt_barray_ptr *src_coef_arrays,
+           jvirt_barray_ptr *dst_coef_arrays)
 /* Vertical flip */
 {
   JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
   int ci, i, j, offset_y;
   JBLOCKARRAY src_buffer, dst_buffer;
   JBLOCKROW src_row_ptr, dst_row_ptr;
@@ -131,51 +276,58 @@
    * of odd-numbered rows.
    * Partial iMCUs at the bottom edge are copied verbatim.
    */
-  MCU_rows = dstinfo->image_height / (dstinfo->max_v_samp_factor * DCTSIZE);
+  MCU_rows = srcinfo->output_height /
+    (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
 
   for (ci = 0; ci < dstinfo->num_components; ci++) {
     compptr = dstinfo->comp_info + ci;
     comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      if (dst_blk_y < comp_height) {
-	/* Row is within the mirrorable area. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   comp_height - dst_blk_y - (JDIMENSION) compptr->v_samp_factor,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
+      if (y_crop_blocks + dst_blk_y < comp_height) {
+        /* Row is within the mirrorable area. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+           comp_height - y_crop_blocks - dst_blk_y -
+           (JDIMENSION) compptr->v_samp_factor,
+           (JDIMENSION) compptr->v_samp_factor, FALSE);
       } else {
-	/* Bottom-edge blocks will be copied verbatim. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci], dst_blk_y,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+        /* Bottom-edge blocks will be copied verbatim. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks,
+           (JDIMENSION) compptr->v_samp_factor, FALSE);
       }
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	if (dst_blk_y < comp_height) {
-	  /* Row is within the mirrorable area. */
-	  dst_row_ptr = dst_buffer[offset_y];
-	  src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
-	  for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	       dst_blk_x++) {
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    src_ptr = src_row_ptr[dst_blk_x];
-	    for (i = 0; i < DCTSIZE; i += 2) {
-	      /* copy even row */
-	      for (j = 0; j < DCTSIZE; j++)
-		*dst_ptr++ = *src_ptr++;
-	      /* copy odd row with sign change */
-	      for (j = 0; j < DCTSIZE; j++)
-		*dst_ptr++ = - *src_ptr++;
-	    }
-	  }
-	} else {
-	  /* Just copy row verbatim. */
-	  jcopy_block_row(src_buffer[offset_y], dst_buffer[offset_y],
-			  compptr->width_in_blocks);
-	}
+        if (y_crop_blocks + dst_blk_y < comp_height) {
+          /* Row is within the mirrorable area. */
+          dst_row_ptr = dst_buffer[offset_y];
+          src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
+          src_row_ptr += x_crop_blocks;
+          for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+               dst_blk_x++) {
+            dst_ptr = dst_row_ptr[dst_blk_x];
+            src_ptr = src_row_ptr[dst_blk_x];
+            for (i = 0; i < DCTSIZE; i += 2) {
+              /* copy even row */
+              for (j = 0; j < DCTSIZE; j++)
+                *dst_ptr++ = *src_ptr++;
+              /* copy odd row with sign change */
+              for (j = 0; j < DCTSIZE; j++)
+                *dst_ptr++ = - *src_ptr++;
+            }
+          }
+        } else {
+          /* Just copy row verbatim. */
+          jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+                          dst_buffer[offset_y],
+                          compptr->width_in_blocks);
+        }
       }
     }
   }
@@ -184,11 +336,12 @@
 
 LOCAL(void)
 do_transpose (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	      jvirt_barray_ptr *src_coef_arrays,
-	      jvirt_barray_ptr *dst_coef_arrays)
+              JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+              jvirt_barray_ptr *src_coef_arrays,
+              jvirt_barray_ptr *dst_coef_arrays)
 /* Transpose source into destination */
 {
-  JDIMENSION dst_blk_x, dst_blk_y;
+  JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
   int ci, i, j, offset_x, offset_y;
   JBLOCKARRAY src_buffer, dst_buffer;
   JCOEFPTR src_ptr, dst_ptr;
@@ -201,25 +354,28 @@
    */
   for (ci = 0; ci < dstinfo->num_components; ci++) {
     compptr = dstinfo->comp_info + ci;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  src_buffer = (*srcinfo->mem->access_virt_barray)
-	    ((j_common_ptr) srcinfo, src_coef_arrays[ci], dst_blk_x,
-	     (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    src_ptr = src_buffer[offset_x][dst_blk_y + offset_y];
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    for (i = 0; i < DCTSIZE; i++)
-	      for (j = 0; j < DCTSIZE; j++)
-		dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	  }
-	}
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          src_buffer = (*srcinfo->mem->access_virt_barray)
+            ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+             dst_blk_x + x_crop_blocks,
+             (JDIMENSION) compptr->h_samp_factor, FALSE);
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            src_ptr = src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks];
+            for (i = 0; i < DCTSIZE; i++)
+              for (j = 0; j < DCTSIZE; j++)
+                dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+          }
+        }
       }
     }
   }
@@ -228,8 +384,9 @@
 
 LOCAL(void)
 do_rot_90 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	   jvirt_barray_ptr *src_coef_arrays,
-	   jvirt_barray_ptr *dst_coef_arrays)
+           JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+           jvirt_barray_ptr *src_coef_arrays,
+           jvirt_barray_ptr *dst_coef_arrays)
 /* 90 degree rotation is equivalent to
  *   1. Transposing the image;
  *   2. Horizontal mirroring.
@@ -237,6 +394,7 @@
  */
 {
   JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
   int ci, i, j, offset_x, offset_y;
   JBLOCKARRAY src_buffer, dst_buffer;
   JCOEFPTR src_ptr, dst_ptr;
@@ -246,44 +404,59 @@
    * at the (output) right edge properly.  They just get transposed and
    * not mirrored.
    */
-  MCU_cols = dstinfo->image_width / (dstinfo->max_h_samp_factor * DCTSIZE);
+  MCU_cols = srcinfo->output_height /
+    (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
 
   for (ci = 0; ci < dstinfo->num_components; ci++) {
     compptr = dstinfo->comp_info + ci;
     comp_width = MCU_cols * compptr->h_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  src_buffer = (*srcinfo->mem->access_virt_barray)
-	    ((j_common_ptr) srcinfo, src_coef_arrays[ci], dst_blk_x,
-	     (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    src_ptr = src_buffer[offset_x][dst_blk_y + offset_y];
-	    if (dst_blk_x < comp_width) {
-	      /* Block is within the mirrorable area. */
-	      dst_ptr = dst_buffer[offset_y]
-		[comp_width - dst_blk_x - offset_x - 1];
-	      for (i = 0; i < DCTSIZE; i++) {
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		i++;
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-	      }
-	    } else {
-	      /* Edge blocks are transposed but not mirrored. */
-	      dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	      for (i = 0; i < DCTSIZE; i++)
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	    }
-	  }
-	}
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          if (x_crop_blocks + dst_blk_x < comp_width) {
+            /* Block is within the mirrorable area. */
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+               comp_width - x_crop_blocks - dst_blk_x -
+               (JDIMENSION) compptr->h_samp_factor,
+               (JDIMENSION) compptr->h_samp_factor, FALSE);
+          } else {
+            /* Edge blocks are transposed but not mirrored. */
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+               dst_blk_x + x_crop_blocks,
+               (JDIMENSION) compptr->h_samp_factor, FALSE);
+          }
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            if (x_crop_blocks + dst_blk_x < comp_width) {
+              /* Block is within the mirrorable area. */
+              src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+                [dst_blk_y + offset_y + y_crop_blocks];
+              for (i = 0; i < DCTSIZE; i++) {
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+                i++;
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+              }
+            } else {
+              /* Edge blocks are transposed but not mirrored. */
+              src_ptr = src_buffer[offset_x]
+                [dst_blk_y + offset_y + y_crop_blocks];
+              for (i = 0; i < DCTSIZE; i++)
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+            }
+          }
+        }
       }
     }
   }
@@ -292,8 +465,9 @@
 
 LOCAL(void)
 do_rot_270 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	    jvirt_barray_ptr *src_coef_arrays,
-	    jvirt_barray_ptr *dst_coef_arrays)
+            JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+            jvirt_barray_ptr *src_coef_arrays,
+            jvirt_barray_ptr *dst_coef_arrays)
 /* 270 degree rotation is equivalent to
  *   1. Horizontal mirroring;
  *   2. Transposing the image.
@@ -301,6 +475,7 @@
  */
 {
   JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
   int ci, i, j, offset_x, offset_y;
   JBLOCKARRAY src_buffer, dst_buffer;
   JCOEFPTR src_ptr, dst_ptr;
@@ -310,44 +485,49 @@
    * at the (output) bottom edge properly.  They just get transposed and
    * not mirrored.
    */
-  MCU_rows = dstinfo->image_height / (dstinfo->max_v_samp_factor * DCTSIZE);
+  MCU_rows = srcinfo->output_width /
+    (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
 
   for (ci = 0; ci < dstinfo->num_components; ci++) {
     compptr = dstinfo->comp_info + ci;
     comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  src_buffer = (*srcinfo->mem->access_virt_barray)
-	    ((j_common_ptr) srcinfo, src_coef_arrays[ci], dst_blk_x,
-	     (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-	    if (dst_blk_y < comp_height) {
-	      /* Block is within the mirrorable area. */
-	      src_ptr = src_buffer[offset_x]
-		[comp_height - dst_blk_y - offset_y - 1];
-	      for (i = 0; i < DCTSIZE; i++) {
-		for (j = 0; j < DCTSIZE; j++) {
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		  j++;
-		  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		}
-	      }
-	    } else {
-	      /* Edge blocks are transposed but not mirrored. */
-	      src_ptr = src_buffer[offset_x][dst_blk_y + offset_y];
-	      for (i = 0; i < DCTSIZE; i++)
-		for (j = 0; j < DCTSIZE; j++)
-		  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	    }
-	  }
-	}
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          src_buffer = (*srcinfo->mem->access_virt_barray)
+            ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+             dst_blk_x + x_crop_blocks,
+             (JDIMENSION) compptr->h_samp_factor, FALSE);
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            if (y_crop_blocks + dst_blk_y < comp_height) {
+              /* Block is within the mirrorable area. */
+              src_ptr = src_buffer[offset_x]
+                [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+              for (i = 0; i < DCTSIZE; i++) {
+                for (j = 0; j < DCTSIZE; j++) {
+                  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+                  j++;
+                  dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+                }
+              }
+            } else {
+              /* Edge blocks are transposed but not mirrored. */
+              src_ptr = src_buffer[offset_x]
+                [dst_blk_y + offset_y + y_crop_blocks];
+              for (i = 0; i < DCTSIZE; i++)
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+            }
+          }
+        }
       }
     }
   }
@@ -356,8 +536,9 @@
 
 LOCAL(void)
 do_rot_180 (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	    jvirt_barray_ptr *src_coef_arrays,
-	    jvirt_barray_ptr *dst_coef_arrays)
+            JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+            jvirt_barray_ptr *src_coef_arrays,
+            jvirt_barray_ptr *dst_coef_arrays)
 /* 180 degree rotation is equivalent to
  *   1. Vertical mirroring;
  *   2. Horizontal mirroring.
@@ -365,90 +546,96 @@
  */
 {
   JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
   int ci, i, j, offset_y;
   JBLOCKARRAY src_buffer, dst_buffer;
   JBLOCKROW src_row_ptr, dst_row_ptr;
   JCOEFPTR src_ptr, dst_ptr;
   jpeg_component_info *compptr;
 
-  MCU_cols = dstinfo->image_width / (dstinfo->max_h_samp_factor * DCTSIZE);
-  MCU_rows = dstinfo->image_height / (dstinfo->max_v_samp_factor * DCTSIZE);
+  MCU_cols = srcinfo->output_width /
+    (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_height /
+    (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
 
   for (ci = 0; ci < dstinfo->num_components; ci++) {
     compptr = dstinfo->comp_info + ci;
     comp_width = MCU_cols * compptr->h_samp_factor;
     comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
-      if (dst_blk_y < comp_height) {
-	/* Row is within the vertically mirrorable area. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
-	   comp_height - dst_blk_y - (JDIMENSION) compptr->v_samp_factor,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
+      if (y_crop_blocks + dst_blk_y < comp_height) {
+        /* Row is within the vertically mirrorable area. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+           comp_height - y_crop_blocks - dst_blk_y -
+           (JDIMENSION) compptr->v_samp_factor,
+           (JDIMENSION) compptr->v_samp_factor, FALSE);
       } else {
-	/* Bottom-edge rows are only mirrored horizontally. */
-	src_buffer = (*srcinfo->mem->access_virt_barray)
-	  ((j_common_ptr) srcinfo, src_coef_arrays[ci], dst_blk_y,
-	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+        /* Bottom-edge rows are only mirrored horizontally. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks,
+           (JDIMENSION) compptr->v_samp_factor, FALSE);
       }
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	if (dst_blk_y < comp_height) {
-	  /* Row is within the mirrorable area. */
-	  dst_row_ptr = dst_buffer[offset_y];
-	  src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
-	  /* Process the blocks that can be mirrored both ways. */
-	  for (dst_blk_x = 0; dst_blk_x < comp_width; dst_blk_x++) {
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    src_ptr = src_row_ptr[comp_width - dst_blk_x - 1];
-	    for (i = 0; i < DCTSIZE; i += 2) {
-	      /* For even row, negate every odd column. */
-	      for (j = 0; j < DCTSIZE; j += 2) {
-		*dst_ptr++ = *src_ptr++;
-		*dst_ptr++ = - *src_ptr++;
-	      }
-	      /* For odd row, negate every even column. */
-	      for (j = 0; j < DCTSIZE; j += 2) {
-		*dst_ptr++ = - *src_ptr++;
-		*dst_ptr++ = *src_ptr++;
-	      }
-	    }
-	  }
-	  /* Any remaining right-edge blocks are only mirrored vertically. */
-	  for (; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    src_ptr = src_row_ptr[dst_blk_x];
-	    for (i = 0; i < DCTSIZE; i += 2) {
-	      for (j = 0; j < DCTSIZE; j++)
-		*dst_ptr++ = *src_ptr++;
-	      for (j = 0; j < DCTSIZE; j++)
-		*dst_ptr++ = - *src_ptr++;
-	    }
-	  }
-	} else {
-	  /* Remaining rows are just mirrored horizontally. */
-	  dst_row_ptr = dst_buffer[offset_y];
-	  src_row_ptr = src_buffer[offset_y];
-	  /* Process the blocks that can be mirrored. */
-	  for (dst_blk_x = 0; dst_blk_x < comp_width; dst_blk_x++) {
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    src_ptr = src_row_ptr[comp_width - dst_blk_x - 1];
-	    for (i = 0; i < DCTSIZE2; i += 2) {
-	      *dst_ptr++ = *src_ptr++;
-	      *dst_ptr++ = - *src_ptr++;
-	    }
-	  }
-	  /* Any remaining right-edge blocks are only copied. */
-	  for (; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
-	    dst_ptr = dst_row_ptr[dst_blk_x];
-	    src_ptr = src_row_ptr[dst_blk_x];
-	    for (i = 0; i < DCTSIZE2; i++)
-	      *dst_ptr++ = *src_ptr++;
-	  }
-	}
+        dst_row_ptr = dst_buffer[offset_y];
+        if (y_crop_blocks + dst_blk_y < comp_height) {
+          /* Row is within the mirrorable area. */
+          src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
+          for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+            dst_ptr = dst_row_ptr[dst_blk_x];
+            if (x_crop_blocks + dst_blk_x < comp_width) {
+              /* Process the blocks that can be mirrored both ways. */
+              src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+              for (i = 0; i < DCTSIZE; i += 2) {
+                /* For even row, negate every odd column. */
+                for (j = 0; j < DCTSIZE; j += 2) {
+                  *dst_ptr++ = *src_ptr++;
+                  *dst_ptr++ = - *src_ptr++;
+                }
+                /* For odd row, negate every even column. */
+                for (j = 0; j < DCTSIZE; j += 2) {
+                  *dst_ptr++ = - *src_ptr++;
+                  *dst_ptr++ = *src_ptr++;
+                }
+              }
+            } else {
+              /* Any remaining right-edge blocks are only mirrored vertically. */
+              src_ptr = src_row_ptr[x_crop_blocks + dst_blk_x];
+              for (i = 0; i < DCTSIZE; i += 2) {
+                for (j = 0; j < DCTSIZE; j++)
+                  *dst_ptr++ = *src_ptr++;
+                for (j = 0; j < DCTSIZE; j++)
+                  *dst_ptr++ = - *src_ptr++;
+              }
+            }
+          }
+        } else {
+          /* Remaining rows are just mirrored horizontally. */
+          src_row_ptr = src_buffer[offset_y];
+          for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+            if (x_crop_blocks + dst_blk_x < comp_width) {
+              /* Process the blocks that can be mirrored. */
+              dst_ptr = dst_row_ptr[dst_blk_x];
+              src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+              for (i = 0; i < DCTSIZE2; i += 2) {
+                *dst_ptr++ = *src_ptr++;
+                *dst_ptr++ = - *src_ptr++;
+              }
+            } else {
+              /* Any remaining right-edge blocks are only copied. */
+              jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
+                              dst_row_ptr + dst_blk_x,
+                              (JDIMENSION) 1);
+            }
+          }
+        }
       }
     }
   }
@@ -457,8 +644,9 @@
 
 LOCAL(void)
 do_transverse (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	       jvirt_barray_ptr *src_coef_arrays,
-	       jvirt_barray_ptr *dst_coef_arrays)
+               JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+               jvirt_barray_ptr *src_coef_arrays,
+               jvirt_barray_ptr *dst_coef_arrays)
 /* Transverse transpose is equivalent to
  *   1. 180 degree rotation;
  *   2. Transposition;
@@ -470,167 +658,471 @@
  */
 {
   JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
   int ci, i, j, offset_x, offset_y;
   JBLOCKARRAY src_buffer, dst_buffer;
   JCOEFPTR src_ptr, dst_ptr;
   jpeg_component_info *compptr;
 
-  MCU_cols = dstinfo->image_width / (dstinfo->max_h_samp_factor * DCTSIZE);
-  MCU_rows = dstinfo->image_height / (dstinfo->max_v_samp_factor * DCTSIZE);
+  MCU_cols = srcinfo->output_height /
+    (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_width /
+    (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
 
   for (ci = 0; ci < dstinfo->num_components; ci++) {
     compptr = dstinfo->comp_info + ci;
     comp_width = MCU_cols * compptr->h_samp_factor;
     comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
     for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
-	 dst_blk_y += compptr->v_samp_factor) {
+         dst_blk_y += compptr->v_samp_factor) {
       dst_buffer = (*srcinfo->mem->access_virt_barray)
-	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
-	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+        ((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION) compptr->v_samp_factor, TRUE);
       for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
-	for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
-	     dst_blk_x += compptr->h_samp_factor) {
-	  src_buffer = (*srcinfo->mem->access_virt_barray)
-	    ((j_common_ptr) srcinfo, src_coef_arrays[ci], dst_blk_x,
-	     (JDIMENSION) compptr->h_samp_factor, FALSE);
-	  for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
-	    if (dst_blk_y < comp_height) {
-	      src_ptr = src_buffer[offset_x]
-		[comp_height - dst_blk_y - offset_y - 1];
-	      if (dst_blk_x < comp_width) {
-		/* Block is within the mirrorable area. */
-		dst_ptr = dst_buffer[offset_y]
-		  [comp_width - dst_blk_x - offset_x - 1];
-		for (i = 0; i < DCTSIZE; i++) {
-		  for (j = 0; j < DCTSIZE; j++) {
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		    j++;
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		  }
-		  i++;
-		  for (j = 0; j < DCTSIZE; j++) {
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		    j++;
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		  }
-		}
-	      } else {
-		/* Right-edge blocks are mirrored in y only */
-		dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-		for (i = 0; i < DCTSIZE; i++) {
-		  for (j = 0; j < DCTSIZE; j++) {
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		    j++;
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		  }
-		}
-	      }
-	    } else {
-	      src_ptr = src_buffer[offset_x][dst_blk_y + offset_y];
-	      if (dst_blk_x < comp_width) {
-		/* Bottom-edge blocks are mirrored in x only */
-		dst_ptr = dst_buffer[offset_y]
-		  [comp_width - dst_blk_x - offset_x - 1];
-		for (i = 0; i < DCTSIZE; i++) {
-		  for (j = 0; j < DCTSIZE; j++)
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-		  i++;
-		  for (j = 0; j < DCTSIZE; j++)
-		    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
-		}
-	      } else {
-		/* At lower right corner, just transpose, no mirroring */
-		dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
-		for (i = 0; i < DCTSIZE; i++)
-		  for (j = 0; j < DCTSIZE; j++)
-		    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
-	      }
-	    }
-	  }
-	}
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          if (x_crop_blocks + dst_blk_x < comp_width) {
+            /* Block is within the mirrorable area. */
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+               comp_width - x_crop_blocks - dst_blk_x -
+               (JDIMENSION) compptr->h_samp_factor,
+               (JDIMENSION) compptr->h_samp_factor, FALSE);
+          } else {
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+               dst_blk_x + x_crop_blocks,
+               (JDIMENSION) compptr->h_samp_factor, FALSE);
+          }
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            if (y_crop_blocks + dst_blk_y < comp_height) {
+              if (x_crop_blocks + dst_blk_x < comp_width) {
+                /* Block is within the mirrorable area. */
+                src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+                  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+                for (i = 0; i < DCTSIZE; i++) {
+                  for (j = 0; j < DCTSIZE; j++) {
+                    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+                    j++;
+                    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+                  }
+                  i++;
+                  for (j = 0; j < DCTSIZE; j++) {
+                    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+                    j++;
+                    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+                  }
+                }
+              } else {
+                /* Right-edge blocks are mirrored in y only */
+                src_ptr = src_buffer[offset_x]
+                  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+                for (i = 0; i < DCTSIZE; i++) {
+                  for (j = 0; j < DCTSIZE; j++) {
+                    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+                    j++;
+                    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+                  }
+                }
+              }
+            } else {
+              if (x_crop_blocks + dst_blk_x < comp_width) {
+                /* Bottom-edge blocks are mirrored in x only */
+                src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+                  [dst_blk_y + offset_y + y_crop_blocks];
+                for (i = 0; i < DCTSIZE; i++) {
+                  for (j = 0; j < DCTSIZE; j++)
+                    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+                  i++;
+                  for (j = 0; j < DCTSIZE; j++)
+                    dst_ptr[j*DCTSIZE+i] = -src_ptr[i*DCTSIZE+j];
+                }
+              } else {
+                /* At lower right corner, just transpose, no mirroring */
+                src_ptr = src_buffer[offset_x]
+                  [dst_blk_y + offset_y + y_crop_blocks];
+                for (i = 0; i < DCTSIZE; i++)
+                  for (j = 0; j < DCTSIZE; j++)
+                    dst_ptr[j*DCTSIZE+i] = src_ptr[i*DCTSIZE+j];
+              }
+            }
+          }
+        }
       }
     }
   }
 }
 
 
+/* Parse an unsigned integer: subroutine for jtransform_parse_crop_spec.
+ * Returns TRUE if valid integer found, FALSE if not.
+ * *strptr is advanced over the digit string, and *result is set to its value.
+ */
+
+LOCAL(boolean)
+jt_read_integer (const char ** strptr, JDIMENSION * result)
+{
+  const char * ptr = *strptr;
+  JDIMENSION val = 0;
+
+  for (; isdigit(*ptr); ptr++) {
+    val = val * 10 + (JDIMENSION) (*ptr - '0');
+  }
+  *result = val;
+  if (ptr == *strptr)
+    return FALSE;               /* oops, no digits */
+  *strptr = ptr;
+  return TRUE;
+}
+
+
+/* Parse a crop specification (written in X11 geometry style).
+ * The routine returns TRUE if the spec string is valid, FALSE if not.
+ *
+ * The crop spec string should have the format
+ *      <width>[f]x<height>[f]{+-}<xoffset>{+-}<yoffset>
+ * where width, height, xoffset, and yoffset are unsigned integers.
+ * Each of the elements can be omitted to indicate a default value.
+ * (A weakness of this style is that it is not possible to omit xoffset
+ * while specifying yoffset, since they look alike.)
+ *
+ * This code is loosely based on XParseGeometry from the X11 distribution.
+ */
+
+GLOBAL(boolean)
+jtransform_parse_crop_spec (jpeg_transform_info *info, const char *spec)
+{
+  info->crop = FALSE;
+  info->crop_width_set = JCROP_UNSET;
+  info->crop_height_set = JCROP_UNSET;
+  info->crop_xoffset_set = JCROP_UNSET;
+  info->crop_yoffset_set = JCROP_UNSET;
+
+  if (isdigit(*spec)) {
+    /* fetch width */
+    if (! jt_read_integer(&spec, &info->crop_width))
+      return FALSE;
+    if (*spec == 'f' || *spec == 'F') {
+      spec++;
+      info->crop_width_set = JCROP_FORCE;
+    } else
+      info->crop_width_set = JCROP_POS;
+  }
+  if (*spec == 'x' || *spec == 'X') {
+    /* fetch height */
+    spec++;
+    if (! jt_read_integer(&spec, &info->crop_height))
+      return FALSE;
+    if (*spec == 'f' || *spec == 'F') {
+      spec++;
+      info->crop_height_set = JCROP_FORCE;
+    } else
+      info->crop_height_set = JCROP_POS;
+  }
+  if (*spec == '+' || *spec == '-') {
+    /* fetch xoffset */
+    info->crop_xoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
+    spec++;
+    if (! jt_read_integer(&spec, &info->crop_xoffset))
+      return FALSE;
+  }
+  if (*spec == '+' || *spec == '-') {
+    /* fetch yoffset */
+    info->crop_yoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
+    spec++;
+    if (! jt_read_integer(&spec, &info->crop_yoffset))
+      return FALSE;
+  }
+  /* We had better have gotten to the end of the string. */
+  if (*spec != '\0')
+    return FALSE;
+  info->crop = TRUE;
+  return TRUE;
+}
+
+
+/* Trim off any partial iMCUs on the indicated destination edge */
+
+LOCAL(void)
+trim_right_edge (jpeg_transform_info *info, JDIMENSION full_width)
+{
+  JDIMENSION MCU_cols;
+
+  MCU_cols = info->output_width / info->iMCU_sample_width;
+  if (MCU_cols > 0 && info->x_crop_offset + MCU_cols ==
+      full_width / info->iMCU_sample_width)
+    info->output_width = MCU_cols * info->iMCU_sample_width;
+}
+
+LOCAL(void)
+trim_bottom_edge (jpeg_transform_info *info, JDIMENSION full_height)
+{
+  JDIMENSION MCU_rows;
+
+  MCU_rows = info->output_height / info->iMCU_sample_height;
+  if (MCU_rows > 0 && info->y_crop_offset + MCU_rows ==
+      full_height / info->iMCU_sample_height)
+    info->output_height = MCU_rows * info->iMCU_sample_height;
+}
+
+
 /* Request any required workspace.
  *
+ * This routine figures out the size that the output image will be
+ * (which implies that all the transform parameters must be set before
+ * it is called).
+ *
  * We allocate the workspace virtual arrays from the source decompression
  * object, so that all the arrays (both the original data and the workspace)
  * will be taken into account while making memory management decisions.
  * Hence, this routine must be called after jpeg_read_header (which reads
  * the image dimensions) and before jpeg_read_coefficients (which realizes
  * the source's virtual arrays).
+ *
+ * This function returns FALSE right away if -perfect is given
+ * and transformation is not perfect.  Otherwise returns TRUE.
  */
 
-GLOBAL(void)
+GLOBAL(boolean)
 jtransform_request_workspace (j_decompress_ptr srcinfo,
-			      jpeg_transform_info *info)
+                              jpeg_transform_info *info)
 {
-  jvirt_barray_ptr *coef_arrays = NULL;
+  jvirt_barray_ptr *coef_arrays;
+  boolean need_workspace, transpose_it;
   jpeg_component_info *compptr;
-  int ci;
+  JDIMENSION xoffset, yoffset;
+  JDIMENSION width_in_iMCUs, height_in_iMCUs;
+  JDIMENSION width_in_blocks, height_in_blocks;
+  int ci, h_samp_factor, v_samp_factor;
 
+  /* Determine number of components in output image */
   if (info->force_grayscale &&
       srcinfo->jpeg_color_space == JCS_YCbCr &&
-      srcinfo->num_components == 3) {
+      srcinfo->num_components == 3)
     /* We'll only process the first component */
     info->num_components = 1;
-  } else {
+  else
     /* Process all the components */
     info->num_components = srcinfo->num_components;
+
+  /* Compute output image dimensions and related values. */
+#if JPEG_LIB_VERSION >= 80
+  jpeg_core_output_dimensions(srcinfo);
+#else
+  srcinfo->output_width = srcinfo->image_width;
+  srcinfo->output_height = srcinfo->image_height;
+#endif
+
+  /* Return right away if -perfect is given and transformation is not perfect.
+   */
+  if (info->perfect) {
+    if (info->num_components == 1) {
+      if (!jtransform_perfect_transform(srcinfo->output_width,
+          srcinfo->output_height,
+          srcinfo->_min_DCT_h_scaled_size,
+          srcinfo->_min_DCT_v_scaled_size,
+          info->transform))
+        return FALSE;
+    } else {
+      if (!jtransform_perfect_transform(srcinfo->output_width,
+          srcinfo->output_height,
+          srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size,
+          srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size,
+          info->transform))
+        return FALSE;
+    }
   }
 
+  /* If there is only one output component, force the iMCU size to be 1;
+   * else use the source iMCU size.  (This allows us to do the right thing
+   * when reducing color to grayscale, and also provides a handy way of
+   * cleaning up "funny" grayscale images whose sampling factors are not 1x1.)
+   */
   switch (info->transform) {
-  case JXFORM_NONE:
-  case JXFORM_FLIP_H:
-    /* Don't need a workspace array */
-    break;
-  case JXFORM_FLIP_V:
-  case JXFORM_ROT_180:
-    /* Need workspace arrays having same dimensions as source image.
-     * Note that we allocate arrays padded out to the next iMCU boundary,
-     * so that transform routines need not worry about missing edge blocks.
-     */
-    coef_arrays = (jvirt_barray_ptr *)
-      (*srcinfo->mem->alloc_small) ((j_common_ptr) srcinfo, JPOOL_IMAGE,
-	SIZEOF(jvirt_barray_ptr) * info->num_components);
-    for (ci = 0; ci < info->num_components; ci++) {
-      compptr = srcinfo->comp_info + ci;
-      coef_arrays[ci] = (*srcinfo->mem->request_virt_barray)
-	((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE,
-	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
-				(long) compptr->h_samp_factor),
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor),
-	 (JDIMENSION) compptr->v_samp_factor);
-    }
-    break;
   case JXFORM_TRANSPOSE:
   case JXFORM_TRANSVERSE:
   case JXFORM_ROT_90:
   case JXFORM_ROT_270:
-    /* Need workspace arrays having transposed dimensions.
-     * Note that we allocate arrays padded out to the next iMCU boundary,
-     * so that transform routines need not worry about missing edge blocks.
-     */
-    coef_arrays = (jvirt_barray_ptr *)
-      (*srcinfo->mem->alloc_small) ((j_common_ptr) srcinfo, JPOOL_IMAGE,
-	SIZEOF(jvirt_barray_ptr) * info->num_components);
-    for (ci = 0; ci < info->num_components; ci++) {
-      compptr = srcinfo->comp_info + ci;
-      coef_arrays[ci] = (*srcinfo->mem->request_virt_barray)
-	((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE,
-	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
-				(long) compptr->v_samp_factor),
-	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
-				(long) compptr->h_samp_factor),
-	 (JDIMENSION) compptr->h_samp_factor);
+    info->output_width = srcinfo->output_height;
+    info->output_height = srcinfo->output_width;
+    if (info->num_components == 1) {
+      info->iMCU_sample_width = srcinfo->_min_DCT_v_scaled_size;
+      info->iMCU_sample_height = srcinfo->_min_DCT_h_scaled_size;
+    } else {
+      info->iMCU_sample_width =
+        srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size;
+      info->iMCU_sample_height =
+        srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size;
+    }
+    break;
+  default:
+    info->output_width = srcinfo->output_width;
+    info->output_height = srcinfo->output_height;
+    if (info->num_components == 1) {
+      info->iMCU_sample_width = srcinfo->_min_DCT_h_scaled_size;
+      info->iMCU_sample_height = srcinfo->_min_DCT_v_scaled_size;
+    } else {
+      info->iMCU_sample_width =
+        srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size;
+      info->iMCU_sample_height =
+        srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size;
     }
     break;
   }
-  info->workspace_coef_arrays = coef_arrays;
+
+  /* If cropping has been requested, compute the crop area's position and
+   * dimensions, ensuring that its upper left corner falls at an iMCU boundary.
+   */
+  if (info->crop) {
+    /* Insert default values for unset crop parameters */
+    if (info->crop_xoffset_set == JCROP_UNSET)
+      info->crop_xoffset = 0;   /* default to +0 */
+    if (info->crop_yoffset_set == JCROP_UNSET)
+      info->crop_yoffset = 0;   /* default to +0 */
+    if (info->crop_xoffset >= info->output_width ||
+        info->crop_yoffset >= info->output_height)
+      ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+    if (info->crop_width_set == JCROP_UNSET)
+      info->crop_width = info->output_width - info->crop_xoffset;
+    if (info->crop_height_set == JCROP_UNSET)
+      info->crop_height = info->output_height - info->crop_yoffset;
+    /* Ensure parameters are valid */
+    if (info->crop_width <= 0 || info->crop_width > info->output_width ||
+        info->crop_height <= 0 || info->crop_height > info->output_height ||
+        info->crop_xoffset > info->output_width - info->crop_width ||
+        info->crop_yoffset > info->output_height - info->crop_height)
+      ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+    /* Convert negative crop offsets into regular offsets */
+    if (info->crop_xoffset_set == JCROP_NEG)
+      xoffset = info->output_width - info->crop_width - info->crop_xoffset;
+    else
+      xoffset = info->crop_xoffset;
+    if (info->crop_yoffset_set == JCROP_NEG)
+      yoffset = info->output_height - info->crop_height - info->crop_yoffset;
+    else
+      yoffset = info->crop_yoffset;
+    /* Now adjust so that upper left corner falls at an iMCU boundary */
+    if (info->crop_width_set == JCROP_FORCE)
+      info->output_width = info->crop_width;
+    else
+      info->output_width =
+        info->crop_width + (xoffset % info->iMCU_sample_width);
+    if (info->crop_height_set == JCROP_FORCE)
+      info->output_height = info->crop_height;
+    else
+      info->output_height =
+        info->crop_height + (yoffset % info->iMCU_sample_height);
+    /* Save x/y offsets measured in iMCUs */
+    info->x_crop_offset = xoffset / info->iMCU_sample_width;
+    info->y_crop_offset = yoffset / info->iMCU_sample_height;
+  } else {
+    info->x_crop_offset = 0;
+    info->y_crop_offset = 0;
+  }
+
+  /* Figure out whether we need workspace arrays,
+   * and if so whether they are transposed relative to the source.
+   */
+  need_workspace = FALSE;
+  transpose_it = FALSE;
+  switch (info->transform) {
+  case JXFORM_NONE:
+    if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
+      need_workspace = TRUE;
+    /* No workspace needed if neither cropping nor transforming */
+    break;
+  case JXFORM_FLIP_H:
+    if (info->trim)
+      trim_right_edge(info, srcinfo->output_width);
+    if (info->y_crop_offset != 0 || info->slow_hflip)
+      need_workspace = TRUE;
+    /* do_flip_h_no_crop doesn't need a workspace array */
+    break;
+  case JXFORM_FLIP_V:
+    if (info->trim)
+      trim_bottom_edge(info, srcinfo->output_height);
+    /* Need workspace arrays having same dimensions as source image. */
+    need_workspace = TRUE;
+    break;
+  case JXFORM_TRANSPOSE:
+    /* transpose does NOT have to trim anything */
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_TRANSVERSE:
+    if (info->trim) {
+      trim_right_edge(info, srcinfo->output_height);
+      trim_bottom_edge(info, srcinfo->output_width);
+    }
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_ROT_90:
+    if (info->trim)
+      trim_right_edge(info, srcinfo->output_height);
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_ROT_180:
+    if (info->trim) {
+      trim_right_edge(info, srcinfo->output_width);
+      trim_bottom_edge(info, srcinfo->output_height);
+    }
+    /* Need workspace arrays having same dimensions as source image. */
+    need_workspace = TRUE;
+    break;
+  case JXFORM_ROT_270:
+    if (info->trim)
+      trim_bottom_edge(info, srcinfo->output_width);
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  }
+
+  /* Allocate workspace if needed.
+   * Note that we allocate arrays padded out to the next iMCU boundary,
+   * so that transform routines need not worry about missing edge blocks.
+   */
+  if (need_workspace) {
+    coef_arrays = (jvirt_barray_ptr *)
+      (*srcinfo->mem->alloc_small) ((j_common_ptr) srcinfo, JPOOL_IMAGE,
+                sizeof(jvirt_barray_ptr) * info->num_components);
+    width_in_iMCUs = (JDIMENSION)
+      jdiv_round_up((long) info->output_width,
+                    (long) info->iMCU_sample_width);
+    height_in_iMCUs = (JDIMENSION)
+      jdiv_round_up((long) info->output_height,
+                    (long) info->iMCU_sample_height);
+    for (ci = 0; ci < info->num_components; ci++) {
+      compptr = srcinfo->comp_info + ci;
+      if (info->num_components == 1) {
+        /* we're going to force samp factors to 1x1 in this case */
+        h_samp_factor = v_samp_factor = 1;
+      } else if (transpose_it) {
+        h_samp_factor = compptr->v_samp_factor;
+        v_samp_factor = compptr->h_samp_factor;
+      } else {
+        h_samp_factor = compptr->h_samp_factor;
+        v_samp_factor = compptr->v_samp_factor;
+      }
+      width_in_blocks = width_in_iMCUs * h_samp_factor;
+      height_in_blocks = height_in_iMCUs * v_samp_factor;
+      coef_arrays[ci] = (*srcinfo->mem->request_virt_barray)
+        ((j_common_ptr) srcinfo, JPOOL_IMAGE, FALSE,
+         width_in_blocks, height_in_blocks, (JDIMENSION) v_samp_factor);
+    }
+    info->workspace_coef_arrays = coef_arrays;
+  } else
+    info->workspace_coef_arrays = NULL;
+
+  return TRUE;
 }
 
 
@@ -642,13 +1134,18 @@
   int tblno, i, j, ci, itemp;
   jpeg_component_info *compptr;
   JQUANT_TBL *qtblptr;
-  JDIMENSION dtemp;
+  JDIMENSION jtemp;
   UINT16 qtemp;
 
-  /* Transpose basic image dimensions */
-  dtemp = dstinfo->image_width;
+  /* Transpose image dimensions */
+  jtemp = dstinfo->image_width;
   dstinfo->image_width = dstinfo->image_height;
-  dstinfo->image_height = dtemp;
+  dstinfo->image_height = jtemp;
+#if JPEG_LIB_VERSION >= 70
+  itemp = dstinfo->min_DCT_h_scaled_size;
+  dstinfo->min_DCT_h_scaled_size = dstinfo->min_DCT_v_scaled_size;
+  dstinfo->min_DCT_v_scaled_size = itemp;
+#endif
 
   /* Transpose sampling factors */
   for (ci = 0; ci < dstinfo->num_components; ci++) {
@@ -663,58 +1160,173 @@
     qtblptr = dstinfo->quant_tbl_ptrs[tblno];
     if (qtblptr != NULL) {
       for (i = 0; i < DCTSIZE; i++) {
-	for (j = 0; j < i; j++) {
-	  qtemp = qtblptr->quantval[i*DCTSIZE+j];
-	  qtblptr->quantval[i*DCTSIZE+j] = qtblptr->quantval[j*DCTSIZE+i];
-	  qtblptr->quantval[j*DCTSIZE+i] = qtemp;
-	}
+        for (j = 0; j < i; j++) {
+          qtemp = qtblptr->quantval[i*DCTSIZE+j];
+          qtblptr->quantval[i*DCTSIZE+j] = qtblptr->quantval[j*DCTSIZE+i];
+          qtblptr->quantval[j*DCTSIZE+i] = qtemp;
+        }
       }
     }
   }
 }
 
 
-/* Trim off any partial iMCUs on the indicated destination edge */
+/* Adjust Exif image parameters.
+ *
+ * We try to adjust the Tags ExifImageWidth and ExifImageHeight if possible.
+ */
 
+#if JPEG_LIB_VERSION >= 70
 LOCAL(void)
-trim_right_edge (j_compress_ptr dstinfo)
+adjust_exif_parameters (JOCTET * data, unsigned int length,
+                        JDIMENSION new_width, JDIMENSION new_height)
 {
-  int ci, max_h_samp_factor;
-  JDIMENSION MCU_cols;
+  boolean is_motorola; /* Flag for byte order */
+  unsigned int number_of_tags, tagnum;
+  unsigned int firstoffset, offset;
+  JDIMENSION new_value;
 
-  /* We have to compute max_h_samp_factor ourselves,
-   * because it hasn't been set yet in the destination
-   * (and we don't want to use the source's value).
-   */
-  max_h_samp_factor = 1;
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    int h_samp_factor = dstinfo->comp_info[ci].h_samp_factor;
-    max_h_samp_factor = MAX(max_h_samp_factor, h_samp_factor);
+  if (length < 12) return; /* Length of an IFD entry */
+
+  /* Discover byte order */
+  if (GETJOCTET(data[0]) == 0x49 && GETJOCTET(data[1]) == 0x49)
+    is_motorola = FALSE;
+  else if (GETJOCTET(data[0]) == 0x4D && GETJOCTET(data[1]) == 0x4D)
+    is_motorola = TRUE;
+  else
+    return;
+
+  /* Check Tag Mark */
+  if (is_motorola) {
+    if (GETJOCTET(data[2]) != 0) return;
+    if (GETJOCTET(data[3]) != 0x2A) return;
+  } else {
+    if (GETJOCTET(data[3]) != 0) return;
+    if (GETJOCTET(data[2]) != 0x2A) return;
   }
-  MCU_cols = dstinfo->image_width / (max_h_samp_factor * DCTSIZE);
-  if (MCU_cols > 0)		/* can't trim to 0 pixels */
-    dstinfo->image_width = MCU_cols * (max_h_samp_factor * DCTSIZE);
-}
 
-LOCAL(void)
-trim_bottom_edge (j_compress_ptr dstinfo)
-{
-  int ci, max_v_samp_factor;
-  JDIMENSION MCU_rows;
-
-  /* We have to compute max_v_samp_factor ourselves,
-   * because it hasn't been set yet in the destination
-   * (and we don't want to use the source's value).
-   */
-  max_v_samp_factor = 1;
-  for (ci = 0; ci < dstinfo->num_components; ci++) {
-    int v_samp_factor = dstinfo->comp_info[ci].v_samp_factor;
-    max_v_samp_factor = MAX(max_v_samp_factor, v_samp_factor);
+  /* Get first IFD offset (offset to IFD0) */
+  if (is_motorola) {
+    if (GETJOCTET(data[4]) != 0) return;
+    if (GETJOCTET(data[5]) != 0) return;
+    firstoffset = GETJOCTET(data[6]);
+    firstoffset <<= 8;
+    firstoffset += GETJOCTET(data[7]);
+  } else {
+    if (GETJOCTET(data[7]) != 0) return;
+    if (GETJOCTET(data[6]) != 0) return;
+    firstoffset = GETJOCTET(data[5]);
+    firstoffset <<= 8;
+    firstoffset += GETJOCTET(data[4]);
   }
-  MCU_rows = dstinfo->image_height / (max_v_samp_factor * DCTSIZE);
-  if (MCU_rows > 0)		/* can't trim to 0 pixels */
-    dstinfo->image_height = MCU_rows * (max_v_samp_factor * DCTSIZE);
+  if (firstoffset > length - 2) return; /* check end of data segment */
+
+  /* Get the number of directory entries contained in this IFD */
+  if (is_motorola) {
+    number_of_tags = GETJOCTET(data[firstoffset]);
+    number_of_tags <<= 8;
+    number_of_tags += GETJOCTET(data[firstoffset+1]);
+  } else {
+    number_of_tags = GETJOCTET(data[firstoffset+1]);
+    number_of_tags <<= 8;
+    number_of_tags += GETJOCTET(data[firstoffset]);
+  }
+  if (number_of_tags == 0) return;
+  firstoffset += 2;
+
+  /* Search for ExifSubIFD offset Tag in IFD0 */
+  for (;;) {
+    if (firstoffset > length - 12) return; /* check end of data segment */
+    /* Get Tag number */
+    if (is_motorola) {
+      tagnum = GETJOCTET(data[firstoffset]);
+      tagnum <<= 8;
+      tagnum += GETJOCTET(data[firstoffset+1]);
+    } else {
+      tagnum = GETJOCTET(data[firstoffset+1]);
+      tagnum <<= 8;
+      tagnum += GETJOCTET(data[firstoffset]);
+    }
+    if (tagnum == 0x8769) break; /* found ExifSubIFD offset Tag */
+    if (--number_of_tags == 0) return;
+    firstoffset += 12;
+  }
+
+  /* Get the ExifSubIFD offset */
+  if (is_motorola) {
+    if (GETJOCTET(data[firstoffset+8]) != 0) return;
+    if (GETJOCTET(data[firstoffset+9]) != 0) return;
+    offset = GETJOCTET(data[firstoffset+10]);
+    offset <<= 8;
+    offset += GETJOCTET(data[firstoffset+11]);
+  } else {
+    if (GETJOCTET(data[firstoffset+11]) != 0) return;
+    if (GETJOCTET(data[firstoffset+10]) != 0) return;
+    offset = GETJOCTET(data[firstoffset+9]);
+    offset <<= 8;
+    offset += GETJOCTET(data[firstoffset+8]);
+  }
+  if (offset > length - 2) return; /* check end of data segment */
+
+  /* Get the number of directory entries contained in this SubIFD */
+  if (is_motorola) {
+    number_of_tags = GETJOCTET(data[offset]);
+    number_of_tags <<= 8;
+    number_of_tags += GETJOCTET(data[offset+1]);
+  } else {
+    number_of_tags = GETJOCTET(data[offset+1]);
+    number_of_tags <<= 8;
+    number_of_tags += GETJOCTET(data[offset]);
+  }
+  if (number_of_tags < 2) return;
+  offset += 2;
+
+  /* Search for ExifImageWidth and ExifImageHeight Tags in this SubIFD */
+  do {
+    if (offset > length - 12) return; /* check end of data segment */
+    /* Get Tag number */
+    if (is_motorola) {
+      tagnum = GETJOCTET(data[offset]);
+      tagnum <<= 8;
+      tagnum += GETJOCTET(data[offset+1]);
+    } else {
+      tagnum = GETJOCTET(data[offset+1]);
+      tagnum <<= 8;
+      tagnum += GETJOCTET(data[offset]);
+    }
+    if (tagnum == 0xA002 || tagnum == 0xA003) {
+      if (tagnum == 0xA002)
+        new_value = new_width; /* ExifImageWidth Tag */
+      else
+        new_value = new_height; /* ExifImageHeight Tag */
+      if (is_motorola) {
+        data[offset+2] = 0; /* Format = unsigned long (4 octets) */
+        data[offset+3] = 4;
+        data[offset+4] = 0; /* Number Of Components = 1 */
+        data[offset+5] = 0;
+        data[offset+6] = 0;
+        data[offset+7] = 1;
+        data[offset+8] = 0;
+        data[offset+9] = 0;
+        data[offset+10] = (JOCTET)((new_value >> 8) & 0xFF);
+        data[offset+11] = (JOCTET)(new_value & 0xFF);
+      } else {
+        data[offset+2] = 4; /* Format = unsigned long (4 octets) */
+        data[offset+3] = 0;
+        data[offset+4] = 1; /* Number Of Components = 1 */
+        data[offset+5] = 0;
+        data[offset+6] = 0;
+        data[offset+7] = 0;
+        data[offset+8] = (JOCTET)(new_value & 0xFF);
+        data[offset+9] = (JOCTET)((new_value >> 8) & 0xFF);
+        data[offset+10] = 0;
+        data[offset+11] = 0;
+      }
+    }
+    offset += 12;
+  } while (--number_of_tags);
 }
+#endif
 
 
 /* Adjust output image parameters as needed.
@@ -730,24 +1342,28 @@
 
 GLOBAL(jvirt_barray_ptr *)
 jtransform_adjust_parameters (j_decompress_ptr srcinfo,
-			      j_compress_ptr dstinfo,
-			      jvirt_barray_ptr *src_coef_arrays,
-			      jpeg_transform_info *info)
+                              j_compress_ptr dstinfo,
+                              jvirt_barray_ptr *src_coef_arrays,
+                              jpeg_transform_info *info)
 {
   /* If force-to-grayscale is requested, adjust destination parameters */
   if (info->force_grayscale) {
-    /* We use jpeg_set_colorspace to make sure subsidiary settings get fixed
-     * properly.  Among other things, the target h_samp_factor & v_samp_factor
-     * will get set to 1, which typically won't match the source.
-     * In fact we do this even if the source is already grayscale; that
-     * provides an easy way of coercing a grayscale JPEG with funny sampling
-     * factors to the customary 1,1.  (Some decoders fail on other factors.)
+    /* First, ensure we have YCbCr or grayscale data, and that the source's
+     * Y channel is full resolution.  (No reasonable person would make Y
+     * be less than full resolution, so actually coping with that case
+     * isn't worth extra code space.  But we check it to avoid crashing.)
      */
-    if ((dstinfo->jpeg_color_space == JCS_YCbCr &&
-	 dstinfo->num_components == 3) ||
-	(dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
-	 dstinfo->num_components == 1)) {
-      /* We have to preserve the source's quantization table number. */
+    if (((dstinfo->jpeg_color_space == JCS_YCbCr &&
+          dstinfo->num_components == 3) ||
+         (dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
+          dstinfo->num_components == 1)) &&
+        srcinfo->comp_info[0].h_samp_factor == srcinfo->max_h_samp_factor &&
+        srcinfo->comp_info[0].v_samp_factor == srcinfo->max_v_samp_factor) {
+      /* We use jpeg_set_colorspace to make sure subsidiary settings get fixed
+       * properly.  Among other things, it sets the target h_samp_factor &
+       * v_samp_factor to 1, which typically won't match the source.
+       * We have to preserve the source's quantization table number, however.
+       */
       int sv_quant_tbl_no = dstinfo->comp_info[0].quant_tbl_no;
       jpeg_set_colorspace(dstinfo, JCS_GRAYSCALE);
       dstinfo->comp_info[0].quant_tbl_no = sv_quant_tbl_no;
@@ -755,48 +1371,64 @@
       /* Sorry, can't do it */
       ERREXIT(dstinfo, JERR_CONVERSION_NOTIMPL);
     }
+  } else if (info->num_components == 1) {
+    /* For a single-component source, we force the destination sampling factors
+     * to 1x1, with or without force_grayscale.  This is useful because some
+     * decoders choke on grayscale images with other sampling factors.
+     */
+    dstinfo->comp_info[0].h_samp_factor = 1;
+    dstinfo->comp_info[0].v_samp_factor = 1;
   }
 
-  /* Correct the destination's image dimensions etc if necessary */
+  /* Correct the destination's image dimensions as necessary
+   * for rotate/flip, resize, and crop operations.
+   */
+#if JPEG_LIB_VERSION >= 70
+  dstinfo->jpeg_width = info->output_width;
+  dstinfo->jpeg_height = info->output_height;
+#endif
+
+  /* Transpose destination image parameters */
   switch (info->transform) {
-  case JXFORM_NONE:
-    /* Nothing to do */
-    break;
-  case JXFORM_FLIP_H:
-    if (info->trim)
-      trim_right_edge(dstinfo);
-    break;
-  case JXFORM_FLIP_V:
-    if (info->trim)
-      trim_bottom_edge(dstinfo);
-    break;
   case JXFORM_TRANSPOSE:
-    transpose_critical_parameters(dstinfo);
-    /* transpose does NOT have to trim anything */
-    break;
   case JXFORM_TRANSVERSE:
-    transpose_critical_parameters(dstinfo);
-    if (info->trim) {
-      trim_right_edge(dstinfo);
-      trim_bottom_edge(dstinfo);
-    }
-    break;
   case JXFORM_ROT_90:
-    transpose_critical_parameters(dstinfo);
-    if (info->trim)
-      trim_right_edge(dstinfo);
-    break;
-  case JXFORM_ROT_180:
-    if (info->trim) {
-      trim_right_edge(dstinfo);
-      trim_bottom_edge(dstinfo);
-    }
-    break;
   case JXFORM_ROT_270:
+#if JPEG_LIB_VERSION < 70
+    dstinfo->image_width = info->output_height;
+    dstinfo->image_height = info->output_width;
+#endif
     transpose_critical_parameters(dstinfo);
-    if (info->trim)
-      trim_bottom_edge(dstinfo);
     break;
+  default:
+#if JPEG_LIB_VERSION < 70
+    dstinfo->image_width = info->output_width;
+    dstinfo->image_height = info->output_height;
+#endif
+    break;
+  }
+
+  /* Adjust Exif properties */
+  if (srcinfo->marker_list != NULL &&
+      srcinfo->marker_list->marker == JPEG_APP0+1 &&
+      srcinfo->marker_list->data_length >= 6 &&
+      GETJOCTET(srcinfo->marker_list->data[0]) == 0x45 &&
+      GETJOCTET(srcinfo->marker_list->data[1]) == 0x78 &&
+      GETJOCTET(srcinfo->marker_list->data[2]) == 0x69 &&
+      GETJOCTET(srcinfo->marker_list->data[3]) == 0x66 &&
+      GETJOCTET(srcinfo->marker_list->data[4]) == 0 &&
+      GETJOCTET(srcinfo->marker_list->data[5]) == 0) {
+    /* Suppress output of JFIF marker */
+    dstinfo->write_JFIF_header = FALSE;
+#if JPEG_LIB_VERSION >= 70
+    /* Adjust Exif image parameters */
+    if (dstinfo->jpeg_width != srcinfo->image_width ||
+        dstinfo->jpeg_height != srcinfo->image_height)
+      /* Align data segment to start of TIFF structure for parsing */
+      adjust_exif_parameters(srcinfo->marker_list->data + 6,
+        srcinfo->marker_list->data_length - 6,
+        dstinfo->jpeg_width, dstinfo->jpeg_height);
+#endif
   }
 
   /* Return the appropriate output data set */
@@ -816,40 +1448,110 @@
  */
 
 GLOBAL(void)
-jtransform_execute_transformation (j_decompress_ptr srcinfo,
-				   j_compress_ptr dstinfo,
-				   jvirt_barray_ptr *src_coef_arrays,
-				   jpeg_transform_info *info)
+jtransform_execute_transform (j_decompress_ptr srcinfo,
+                              j_compress_ptr dstinfo,
+                              jvirt_barray_ptr *src_coef_arrays,
+                              jpeg_transform_info *info)
 {
   jvirt_barray_ptr *dst_coef_arrays = info->workspace_coef_arrays;
 
+  /* Note: conditions tested here should match those in switch statement
+   * in jtransform_request_workspace()
+   */
   switch (info->transform) {
   case JXFORM_NONE:
+    if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
+      do_crop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, dst_coef_arrays);
     break;
   case JXFORM_FLIP_H:
-    do_flip_h(srcinfo, dstinfo, src_coef_arrays);
+    if (info->y_crop_offset != 0 || info->slow_hflip)
+      do_flip_h(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+                src_coef_arrays, dst_coef_arrays);
+    else
+      do_flip_h_no_crop(srcinfo, dstinfo, info->x_crop_offset,
+                        src_coef_arrays);
     break;
   case JXFORM_FLIP_V:
-    do_flip_v(srcinfo, dstinfo, src_coef_arrays, dst_coef_arrays);
+    do_flip_v(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, dst_coef_arrays);
     break;
   case JXFORM_TRANSPOSE:
-    do_transpose(srcinfo, dstinfo, src_coef_arrays, dst_coef_arrays);
+    do_transpose(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+                 src_coef_arrays, dst_coef_arrays);
     break;
   case JXFORM_TRANSVERSE:
-    do_transverse(srcinfo, dstinfo, src_coef_arrays, dst_coef_arrays);
+    do_transverse(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+                  src_coef_arrays, dst_coef_arrays);
     break;
   case JXFORM_ROT_90:
-    do_rot_90(srcinfo, dstinfo, src_coef_arrays, dst_coef_arrays);
+    do_rot_90(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, dst_coef_arrays);
     break;
   case JXFORM_ROT_180:
-    do_rot_180(srcinfo, dstinfo, src_coef_arrays, dst_coef_arrays);
+    do_rot_180(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+               src_coef_arrays, dst_coef_arrays);
     break;
   case JXFORM_ROT_270:
-    do_rot_270(srcinfo, dstinfo, src_coef_arrays, dst_coef_arrays);
+    do_rot_270(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+               src_coef_arrays, dst_coef_arrays);
     break;
   }
 }
 
+/* jtransform_perfect_transform
+ *
+ * Determine whether lossless transformation is perfectly
+ * possible for a specified image and transformation.
+ *
+ * Inputs:
+ *   image_width, image_height: source image dimensions.
+ *   MCU_width, MCU_height: pixel dimensions of MCU.
+ *   transform: transformation identifier.
+ * Parameter sources from initialized jpeg_struct
+ * (after reading source header):
+ *   image_width = cinfo.image_width
+ *   image_height = cinfo.image_height
+ *   MCU_width = cinfo.max_h_samp_factor * cinfo.block_size
+ *   MCU_height = cinfo.max_v_samp_factor * cinfo.block_size
+ * Result:
+ *   TRUE = perfect transformation possible
+ *   FALSE = perfect transformation not possible
+ *           (may use custom action then)
+ */
+
+GLOBAL(boolean)
+jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
+                             int MCU_width, int MCU_height,
+                             JXFORM_CODE transform)
+{
+  boolean result = TRUE; /* initialize TRUE */
+
+  switch (transform) {
+  case JXFORM_FLIP_H:
+  case JXFORM_ROT_270:
+    if (image_width % (JDIMENSION) MCU_width)
+      result = FALSE;
+    break;
+  case JXFORM_FLIP_V:
+  case JXFORM_ROT_90:
+    if (image_height % (JDIMENSION) MCU_height)
+      result = FALSE;
+    break;
+  case JXFORM_TRANSVERSE:
+  case JXFORM_ROT_180:
+    if (image_width % (JDIMENSION) MCU_width)
+      result = FALSE;
+    if (image_height % (JDIMENSION) MCU_height)
+      result = FALSE;
+    break;
+  default:
+    break;
+  }
+
+  return result;
+}
+
 #endif /* TRANSFORMS_SUPPORTED */
 
 
@@ -884,7 +1586,7 @@
 
 GLOBAL(void)
 jcopy_markers_execute (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-		       JCOPY_OPTION option)
+                       JCOPY_OPTION option)
 {
   jpeg_saved_marker_ptr marker;
 
@@ -895,34 +1597,24 @@
    */
   for (marker = srcinfo->marker_list; marker != NULL; marker = marker->next) {
     if (dstinfo->write_JFIF_header &&
-	marker->marker == JPEG_APP0 &&
-	marker->data_length >= 5 &&
-	GETJOCTET(marker->data[0]) == 0x4A &&
-	GETJOCTET(marker->data[1]) == 0x46 &&
-	GETJOCTET(marker->data[2]) == 0x49 &&
-	GETJOCTET(marker->data[3]) == 0x46 &&
-	GETJOCTET(marker->data[4]) == 0)
-      continue;			/* reject duplicate JFIF */
+        marker->marker == JPEG_APP0 &&
+        marker->data_length >= 5 &&
+        GETJOCTET(marker->data[0]) == 0x4A &&
+        GETJOCTET(marker->data[1]) == 0x46 &&
+        GETJOCTET(marker->data[2]) == 0x49 &&
+        GETJOCTET(marker->data[3]) == 0x46 &&
+        GETJOCTET(marker->data[4]) == 0)
+      continue;                 /* reject duplicate JFIF */
     if (dstinfo->write_Adobe_marker &&
-	marker->marker == JPEG_APP0+14 &&
-	marker->data_length >= 5 &&
-	GETJOCTET(marker->data[0]) == 0x41 &&
-	GETJOCTET(marker->data[1]) == 0x64 &&
-	GETJOCTET(marker->data[2]) == 0x6F &&
-	GETJOCTET(marker->data[3]) == 0x62 &&
-	GETJOCTET(marker->data[4]) == 0x65)
-      continue;			/* reject duplicate Adobe */
-#ifdef NEED_FAR_POINTERS
-    /* We could use jpeg_write_marker if the data weren't FAR... */
-    {
-      unsigned int i;
-      jpeg_write_m_header(dstinfo, marker->marker, marker->data_length);
-      for (i = 0; i < marker->data_length; i++)
-	jpeg_write_m_byte(dstinfo, marker->data[i]);
-    }
-#else
+        marker->marker == JPEG_APP0+14 &&
+        marker->data_length >= 5 &&
+        GETJOCTET(marker->data[0]) == 0x41 &&
+        GETJOCTET(marker->data[1]) == 0x64 &&
+        GETJOCTET(marker->data[2]) == 0x6F &&
+        GETJOCTET(marker->data[3]) == 0x62 &&
+        GETJOCTET(marker->data[4]) == 0x65)
+      continue;                 /* reject duplicate Adobe */
     jpeg_write_marker(dstinfo, marker->marker,
-		      marker->data, marker->data_length);
-#endif
+                      marker->data, marker->data_length);
   }
 }
diff --git a/transupp.h b/transupp.h
index 5c2d32a..8fe9071 100644
--- a/transupp.h
+++ b/transupp.h
@@ -1,8 +1,10 @@
 /*
  * transupp.h
  *
- * Copyright (C) 1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains declarations for image transformation routines and
@@ -19,35 +21,9 @@
 
 /* If you happen not to want the image transform support, disable it here */
 #ifndef TRANSFORMS_SUPPORTED
-#define TRANSFORMS_SUPPORTED 1		/* 0 disables transform code */
+#define TRANSFORMS_SUPPORTED 1          /* 0 disables transform code */
 #endif
 
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jtransform_request_workspace		jTrRequest
-#define jtransform_adjust_parameters		jTrAdjust
-#define jtransform_execute_transformation	jTrExec
-#define jcopy_markers_setup			jCMrkSetup
-#define jcopy_markers_execute			jCMrkExec
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/*
- * Codes for supported types of image transformations.
- */
-
-typedef enum {
-	JXFORM_NONE,		/* no transformation */
-	JXFORM_FLIP_H,		/* horizontal flip */
-	JXFORM_FLIP_V,		/* vertical flip */
-	JXFORM_TRANSPOSE,	/* transpose across UL-to-LR axis */
-	JXFORM_TRANSVERSE,	/* transpose across UR-to-LL axis */
-	JXFORM_ROT_90,		/* 90-degree clockwise rotation */
-	JXFORM_ROT_180,		/* 180-degree rotation */
-	JXFORM_ROT_270		/* 270-degree clockwise (or 90 ccw) */
-} JXFORM_CODE;
-
 /*
  * Although rotating and flipping data expressed as DCT coefficients is not
  * hard, there is an asymmetry in the JPEG format specification for images
@@ -75,6 +51,25 @@
  * (For example, -rot 270 -trim trims only the bottom edge, but -rot 90 -trim
  * followed by -rot 180 -trim trims both edges.)
  *
+ * We also offer a lossless-crop option, which discards data outside a given
+ * image region but losslessly preserves what is inside.  Like the rotate and
+ * flip transforms, lossless crop is restricted by the JPEG format: the upper
+ * left corner of the selected region must fall on an iMCU boundary.  If this
+ * does not hold for the given crop parameters, we silently move the upper left
+ * corner up and/or left to make it so, simultaneously increasing the region
+ * dimensions to keep the lower right crop corner unchanged.  (Thus, the
+ * output image covers at least the requested region, but may cover more.)
+ * The adjustment of the region dimensions may be optionally disabled.
+ *
+ * We also provide a lossless-resize option, which is kind of a lossless-crop
+ * operation in the DCT coefficient block domain - it discards higher-order
+ * coefficients and losslessly preserves lower-order coefficients of a
+ * sub-block.
+ *
+ * Rotate/flip transform, resize, and crop can be requested together in a
+ * single invocation.  The crop is applied last --- that is, the crop region
+ * is specified in terms of the destination image after transform/resize.
+ *
  * We also offer a "force to grayscale" option, which simply discards the
  * chrominance channels of a YCbCr image.  This is lossless in the sense that
  * the luminance channel is preserved exactly.  It's not the same kind of
@@ -83,33 +78,109 @@
  * be aware of the option to know how many components to work on.
  */
 
+
+/*
+ * Codes for supported types of image transformations.
+ */
+
+typedef enum {
+  JXFORM_NONE,            /* no transformation */
+  JXFORM_FLIP_H,          /* horizontal flip */
+  JXFORM_FLIP_V,          /* vertical flip */
+  JXFORM_TRANSPOSE,       /* transpose across UL-to-LR axis */
+  JXFORM_TRANSVERSE,      /* transpose across UR-to-LL axis */
+  JXFORM_ROT_90,          /* 90-degree clockwise rotation */
+  JXFORM_ROT_180,         /* 180-degree rotation */
+  JXFORM_ROT_270          /* 270-degree clockwise (or 90 ccw) */
+} JXFORM_CODE;
+
+/*
+ * Codes for crop parameters, which can individually be unspecified,
+ * positive or negative for xoffset or yoffset,
+ * positive or forced for width or height.
+ */
+
+typedef enum {
+  JCROP_UNSET,
+  JCROP_POS,
+  JCROP_NEG,
+  JCROP_FORCE
+} JCROP_CODE;
+
+/*
+ * Transform parameters struct.
+ * NB: application must not change any elements of this struct after
+ * calling jtransform_request_workspace.
+ */
+
 typedef struct {
   /* Options: set by caller */
-  JXFORM_CODE transform;	/* image transform operator */
-  boolean trim;			/* if TRUE, trim partial MCUs as needed */
-  boolean force_grayscale;	/* if TRUE, convert color image to grayscale */
+  JXFORM_CODE transform;        /* image transform operator */
+  boolean perfect;              /* if TRUE, fail if partial MCUs are requested */
+  boolean trim;                 /* if TRUE, trim partial MCUs as needed */
+  boolean force_grayscale;      /* if TRUE, convert color image to grayscale */
+  boolean crop;                 /* if TRUE, crop source image */
+  boolean slow_hflip;  /* For best performance, the JXFORM_FLIP_H transform
+                          normally modifies the source coefficients in place.
+                          Setting this to TRUE will instead use a slower,
+                          double-buffered algorithm, which leaves the source
+                          coefficients in tact (necessary if other transformed
+                          images must be generated from the same set of
+                          coefficients. */
+
+  /* Crop parameters: application need not set these unless crop is TRUE.
+   * These can be filled in by jtransform_parse_crop_spec().
+   */
+  JDIMENSION crop_width;        /* Width of selected region */
+  JCROP_CODE crop_width_set;    /* (forced disables adjustment) */
+  JDIMENSION crop_height;       /* Height of selected region */
+  JCROP_CODE crop_height_set;   /* (forced disables adjustment) */
+  JDIMENSION crop_xoffset;      /* X offset of selected region */
+  JCROP_CODE crop_xoffset_set;  /* (negative measures from right edge) */
+  JDIMENSION crop_yoffset;      /* Y offset of selected region */
+  JCROP_CODE crop_yoffset_set;  /* (negative measures from bottom edge) */
 
   /* Internal workspace: caller should not touch these */
-  int num_components;		/* # of components in workspace */
+  int num_components;           /* # of components in workspace */
   jvirt_barray_ptr * workspace_coef_arrays; /* workspace for transformations */
+  JDIMENSION output_width;      /* cropped destination dimensions */
+  JDIMENSION output_height;
+  JDIMENSION x_crop_offset;     /* destination crop offsets measured in iMCUs */
+  JDIMENSION y_crop_offset;
+  int iMCU_sample_width;        /* destination iMCU size */
+  int iMCU_sample_height;
 } jpeg_transform_info;
 
 
 #if TRANSFORMS_SUPPORTED
 
+/* Parse a crop specification (written in X11 geometry style) */
+EXTERN(boolean) jtransform_parse_crop_spec
+        (jpeg_transform_info *info, const char *spec);
 /* Request any required workspace */
-EXTERN(void) jtransform_request_workspace
-	JPP((j_decompress_ptr srcinfo, jpeg_transform_info *info));
+EXTERN(boolean) jtransform_request_workspace
+        (j_decompress_ptr srcinfo, jpeg_transform_info *info);
 /* Adjust output image parameters */
 EXTERN(jvirt_barray_ptr *) jtransform_adjust_parameters
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     jvirt_barray_ptr *src_coef_arrays,
-	     jpeg_transform_info *info));
+        (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+         jvirt_barray_ptr *src_coef_arrays, jpeg_transform_info *info);
 /* Execute the actual transformation, if any */
-EXTERN(void) jtransform_execute_transformation
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     jvirt_barray_ptr *src_coef_arrays,
-	     jpeg_transform_info *info));
+EXTERN(void) jtransform_execute_transform
+        (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+         jvirt_barray_ptr *src_coef_arrays, jpeg_transform_info *info);
+/* Determine whether lossless transformation is perfectly
+ * possible for a specified image and transformation.
+ */
+EXTERN(boolean) jtransform_perfect_transform
+        (JDIMENSION image_width, JDIMENSION image_height, int MCU_width,
+         int MCU_height, JXFORM_CODE transform);
+
+/* jtransform_execute_transform used to be called
+ * jtransform_execute_transformation, but some compilers complain about
+ * routine names that long.  This macro is here to avoid breaking any
+ * old source code that uses the original name...
+ */
+#define jtransform_execute_transformation       jtransform_execute_transform
 
 #endif /* TRANSFORMS_SUPPORTED */
 
@@ -119,17 +190,17 @@
  */
 
 typedef enum {
-	JCOPYOPT_NONE,		/* copy no optional markers */
-	JCOPYOPT_COMMENTS,	/* copy only comment (COM) markers */
-	JCOPYOPT_ALL		/* copy all optional markers */
+  JCOPYOPT_NONE,          /* copy no optional markers */
+  JCOPYOPT_COMMENTS,      /* copy only comment (COM) markers */
+  JCOPYOPT_ALL            /* copy all optional markers */
 } JCOPY_OPTION;
 
-#define JCOPYOPT_DEFAULT  JCOPYOPT_COMMENTS	/* recommended default */
+#define JCOPYOPT_DEFAULT  JCOPYOPT_COMMENTS     /* recommended default */
 
 /* Setup decompression object to save desired markers in memory */
 EXTERN(void) jcopy_markers_setup
-	JPP((j_decompress_ptr srcinfo, JCOPY_OPTION option));
+        (j_decompress_ptr srcinfo, JCOPY_OPTION option);
 /* Copy markers saved in the given source object to the destination object */
 EXTERN(void) jcopy_markers_execute
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     JCOPY_OPTION option));
+        (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+         JCOPY_OPTION option);
diff --git a/turbojpeg-jni.c b/turbojpeg-jni.c
new file mode 100644
index 0000000..4b5578f
--- /dev/null
+++ b/turbojpeg-jni.c
@@ -0,0 +1,906 @@
+/*
+ * Copyright (C)2011-2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "turbojpeg.h"
+#ifdef WIN32
+#include "tjutil.h"
+#endif
+#include <jni.h>
+#include "java/org_libjpegturbo_turbojpeg_TJCompressor.h"
+#include "java/org_libjpegturbo_turbojpeg_TJDecompressor.h"
+#include "java/org_libjpegturbo_turbojpeg_TJ.h"
+
+#define _throw(msg) {  \
+	jclass _exccls=(*env)->FindClass(env, "java/lang/Exception");  \
+	if(!_exccls) goto bailout;  \
+	(*env)->ThrowNew(env, _exccls, msg);  \
+	goto bailout;  \
+}
+
+#define bailif0(f) {if(!(f)) {  \
+	char temps[80];  \
+	snprintf(temps, 80, "Unexpected NULL condition in line %d", __LINE__);  \
+	_throw(temps);  \
+}}
+
+#define gethandle()  \
+	jclass _cls=(*env)->GetObjectClass(env, obj);  \
+	jfieldID _fid;  \
+	if(!_cls) goto bailout;  \
+	bailif0(_fid=(*env)->GetFieldID(env, _cls, "handle", "J"));  \
+	handle=(tjhandle)(jlong)(*env)->GetLongField(env, obj, _fid);  \
+
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSize
+	(JNIEnv *env, jclass cls, jint width, jint height, jint jpegSubsamp)
+{
+	jint retval=(jint)tjBufSize(width, height, jpegSubsamp);
+	if(retval==-1) _throw(tjGetErrorStr());
+
+	bailout:
+	return retval;
+}
+
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII
+	(JNIEnv *env, jclass cls, jint width, jint pad, jint height, jint subsamp)
+{
+	jint retval=(jint)tjBufSizeYUV2(width, pad, height, subsamp);
+	if(retval==-1) _throw(tjGetErrorStr());
+
+	bailout:
+	return retval;
+}
+
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III
+	(JNIEnv *env, jclass cls, jint width, jint height, jint subsamp)
+{
+	return Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII(env, cls, width,
+		4, height, subsamp);
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_init
+	(JNIEnv *env, jobject obj)
+{
+	jclass cls;
+	jfieldID fid;
+	tjhandle handle;
+
+	if((handle=tjInitCompress())==NULL)
+		_throw(tjGetErrorStr());
+
+	bailif0(cls=(*env)->GetObjectClass(env, obj));
+	bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J"));
+	(*env)->SetLongField(env, obj, fid, (jlong)handle);
+
+	bailout:
+	return;
+}
+
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width,
+		jint pitch, jint height, jint pf, jbyteArray dst, jint jpegSubsamp,
+		jint jpegQual, jint flags)
+{
+	tjhandle handle=0;
+	unsigned long jpegSize=0;
+	jsize arraySize=0, actualPitch;
+	unsigned char *srcBuf=NULL, *jpegBuf=NULL;
+
+	gethandle();
+
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1
+		|| pitch<0)
+		_throw("Invalid argument in compress()");
+	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
+		_throw("Mismatch between Java and C API");
+
+	actualPitch=(pitch==0)? width*tjPixelSize[pf]:pitch;
+	arraySize=(y+height-1)*actualPitch + x+width;
+	if((*env)->GetArrayLength(env, src)<arraySize)
+		_throw("Source buffer is not large enough");
+	jpegSize=tjBufSize(width, height, jpegSubsamp);
+	if((*env)->GetArrayLength(env, dst)<(jsize)jpegSize)
+		_throw("Destination buffer is not large enough");
+
+	bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+	if(tjCompress2(handle, &srcBuf[y*actualPitch + x*tjPixelSize[pf]], width,
+		pitch, height, pf, &jpegBuf, &jpegSize, jpegSubsamp, jpegQual,
+		flags|TJFLAG_NOREALLOC)==-1)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
+		(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+		jpegBuf=srcBuf=NULL;
+		_throw(tjGetErrorStr());
+	}
+
+	bailout:
+	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
+	if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+	return (jint)jpegSize;
+}
+
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIII_3BIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch,
+		jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual,
+		jint flags)
+{
+	return Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII(
+		env, obj, src, 0, 0, width, pitch, height, pf, dst, jpegSubsamp, jpegQual,
+		flags);
+}
+
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII
+	(JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width,
+		jint stride, jint height, jint pf, jbyteArray dst, jint jpegSubsamp,
+		jint jpegQual, jint flags)
+{
+	tjhandle handle=0;
+	unsigned long jpegSize=0;
+	jsize arraySize=0, actualStride;
+	unsigned char *srcBuf=NULL, *jpegBuf=NULL;
+
+	gethandle();
+
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1
+		|| stride<0)
+		_throw("Invalid argument in compress()");
+	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
+		_throw("Mismatch between Java and C API");
+	if(tjPixelSize[pf]!=sizeof(jint))
+		_throw("Pixel format must be 32-bit when compressing from an integer buffer.");
+
+	actualStride=(stride==0)? width:stride;
+	arraySize=(y+height-1)*actualStride + x+width;
+	if((*env)->GetArrayLength(env, src)<arraySize)
+		_throw("Source buffer is not large enough");
+	jpegSize=tjBufSize(width, height, jpegSubsamp);
+	if((*env)->GetArrayLength(env, dst)<(jsize)jpegSize)
+		_throw("Destination buffer is not large enough");
+
+	bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+	if(tjCompress2(handle, &srcBuf[(y*actualStride + x)*sizeof(int)], width,
+		stride*sizeof(jint), height, pf, &jpegBuf, &jpegSize, jpegSubsamp,
+		jpegQual, flags|TJFLAG_NOREALLOC)==-1)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
+		(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+		jpegBuf=srcBuf=NULL;
+		_throw(tjGetErrorStr());
+	}
+
+	bailout:
+	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
+	if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+	return (jint)jpegSize;
+}
+
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII
+	(JNIEnv *env, jobject obj, jintArray src, jint width, jint pitch,
+		jint height, jint pf, jbyteArray dst, jint jpegSubsamp, jint jpegQual,
+		jint flags)
+{
+	return Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII(
+		env, obj, src, 0, 0, width, pitch, height, pf, dst, jpegSubsamp, jpegQual,
+		flags);
+}
+
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3BIIII_3BII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pad, jint height,
+		jint subsamp, jbyteArray dst, jint jpegQual, jint flags)
+{
+	tjhandle handle=0;
+	unsigned long jpegSize=0;
+	jsize arraySize=0;
+	unsigned char *srcBuf=NULL, *jpegBuf=NULL;
+
+	gethandle();
+
+	arraySize=tjBufSizeYUV2(width, pad, height, subsamp);
+	if((*env)->GetArrayLength(env, src)<arraySize)
+		_throw("Source buffer is not large enough");
+	jpegSize=tjBufSize(width, height, subsamp);
+	if((*env)->GetArrayLength(env, dst)<(jsize)jpegSize)
+		_throw("Destination buffer is not large enough");
+
+	bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+	if(tjCompressFromYUV(handle, srcBuf, width, pad, height, subsamp, &jpegBuf,
+		&jpegSize, jpegQual, flags|TJFLAG_NOREALLOC)==-1)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
+		(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+		jpegBuf=srcBuf=NULL;
+		_throw(tjGetErrorStr());
+	}
+
+	bailout:
+	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
+	if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+	return (jint)jpegSize;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3BIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint x, jint y, jint width,
+		jint pitch, jint height, jint pf, jbyteArray dst, jint pad, jint subsamp,
+		jint flags)
+{
+	tjhandle handle=0;
+	jsize arraySize=0, actualPitch, yuvSize;
+	unsigned char *srcBuf=NULL, *dstBuf=NULL;
+
+	gethandle();
+
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1
+		|| pitch<0)
+		_throw("Invalid argument in encodeYUV()");
+	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
+		_throw("Mismatch between Java and C API");
+
+	actualPitch=(pitch==0)? width*tjPixelSize[pf]:pitch;
+	arraySize=(y+height-1)*actualPitch + x+width;
+	if((*env)->GetArrayLength(env, src)<arraySize)
+		_throw("Source buffer is not large enough");
+	yuvSize=(jsize)tjBufSizeYUV2(width, pad, height, subsamp);
+	if(yuvSize==(unsigned long)-1)
+		_throw(tjGetErrorStr());
+	if((*env)->GetArrayLength(env, dst)<yuvSize)
+		_throw("Destination buffer is not large enough");
+
+	bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+	bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+	if(tjEncodeYUV3(handle, &srcBuf[y*actualPitch + x*tjPixelSize[pf]], width,
+		pitch, height, pf, dstBuf, pad, subsamp, flags)==-1)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+		(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+		dstBuf=srcBuf=NULL;
+		_throw(tjGetErrorStr());
+	}
+
+	bailout:
+	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+	if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+	return;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch,
+		jint height, jint pf, jbyteArray dst, jint subsamp, jint flags)
+{
+	Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3BIII(
+		env, obj, src, 0, 0, width, pitch, height, pf, dst, 4, subsamp, flags);
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIIIII_3BIII
+	(JNIEnv *env, jobject obj, jintArray src, jint x, jint y, jint width,
+		jint stride, jint height, jint pf, jbyteArray dst, jint pad, jint subsamp,
+		jint flags)
+{
+	tjhandle handle=0;
+	jsize arraySize=0, actualStride, yuvSize;
+	unsigned char *srcBuf=NULL, *dstBuf=NULL;
+
+	gethandle();
+
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF || width<1 || height<1
+		|| stride<0)
+		_throw("Invalid argument in encodeYUV()");
+	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
+		_throw("Mismatch between Java and C API");
+	if(tjPixelSize[pf]!=sizeof(jint))
+		_throw("Pixel format must be 32-bit when encoding from an integer buffer.");
+
+	actualStride=(stride==0)? width:stride;
+	arraySize=(y+height-1)*actualStride + x+width;
+	if((*env)->GetArrayLength(env, src)<arraySize)
+		_throw("Source buffer is not large enough");
+	yuvSize=(jsize)tjBufSizeYUV2(width, pad, height, subsamp);
+	if(yuvSize==(unsigned long)-1)
+		_throw(tjGetErrorStr());
+	if((*env)->GetArrayLength(env, dst)<yuvSize)
+		_throw("Destination buffer is not large enough");
+
+	bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+	bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+	if(tjEncodeYUV3(handle, &srcBuf[(y*actualStride + x)*sizeof(int)], width,
+		stride*sizeof(jint), height, pf, dstBuf, pad, subsamp, flags)==-1)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+		(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+		dstBuf=srcBuf=NULL;
+		_throw(tjGetErrorStr());
+	}
+
+	bailout:
+	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+	if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+	return;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII
+	(JNIEnv *env, jobject obj, jintArray src, jint width, jint pitch,
+		jint height, jint pf, jbyteArray dst, jint subsamp, jint flags)
+{
+	Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIIIII_3BIII(
+		env, obj, src, 0, 0, width, pitch, height, pf, dst, 4, subsamp, flags);
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy
+	(JNIEnv *env, jobject obj)
+{
+	tjhandle handle=0;
+
+	gethandle();
+
+	if(tjDestroy(handle)==-1) _throw(tjGetErrorStr());
+	(*env)->SetLongField(env, obj, _fid, 0);
+
+	bailout:
+	return;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_init
+	(JNIEnv *env, jobject obj)
+{
+	jclass cls;
+	jfieldID fid;
+	tjhandle handle;
+
+	if((handle=tjInitDecompress())==NULL) _throw(tjGetErrorStr());
+
+	bailif0(cls=(*env)->GetObjectClass(env, obj));
+	bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J"));
+	(*env)->SetLongField(env, obj, fid, (jlong)handle);
+
+	bailout:
+	return;
+}
+
+JNIEXPORT jobjectArray JNICALL Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors
+	(JNIEnv *env, jclass cls)
+{
+  jclass sfcls=NULL;  jfieldID fid=0;
+	tjscalingfactor *sf=NULL;  int n=0, i;
+	jobject sfobj=NULL;
+	jobjectArray sfjava=NULL;
+
+	if((sf=tjGetScalingFactors(&n))==NULL || n==0)
+		_throw(tjGetErrorStr());
+
+	bailif0(sfcls=(*env)->FindClass(env, "org/libjpegturbo/turbojpeg/TJScalingFactor"));
+	bailif0(sfjava=(jobjectArray)(*env)->NewObjectArray(env, n, sfcls, 0));
+
+	for(i=0; i<n; i++)
+	{
+		bailif0(sfobj=(*env)->AllocObject(env, sfcls));
+		bailif0(fid=(*env)->GetFieldID(env, sfcls, "num", "I"));
+		(*env)->SetIntField(env, sfobj, fid, sf[i].num);
+		bailif0(fid=(*env)->GetFieldID(env, sfcls, "denom", "I"));
+		(*env)->SetIntField(env, sfobj, fid, sf[i].denom);
+		(*env)->SetObjectArrayElement(env, sfjava, i, sfobj);
+	}
+
+	bailout:
+	return sfjava;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader
+	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize)
+{
+	tjhandle handle=0;
+	unsigned char *jpegBuf=NULL;
+	int width=0, height=0, jpegSubsamp=-1, jpegColorspace=-1;
+
+	gethandle();
+
+	if((*env)->GetArrayLength(env, src)<jpegSize)
+		_throw("Source buffer is not large enough");
+
+	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+
+	if(tjDecompressHeader3(handle, jpegBuf, (unsigned long)jpegSize,
+		&width, &height, &jpegSubsamp, &jpegColorspace)==-1)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
+		_throw(tjGetErrorStr());
+	}
+	(*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);  jpegBuf=NULL;
+
+	bailif0(_fid=(*env)->GetFieldID(env, _cls, "srcSubsamp", "I"));
+	(*env)->SetIntField(env, obj, _fid, jpegSubsamp);
+	bailif0(_fid=(*env)->GetFieldID(env, _cls, "srcColorspace", "I"));
+	(*env)->SetIntField(env, obj, _fid, jpegColorspace);
+	bailif0(_fid=(*env)->GetFieldID(env, _cls, "srcWidth", "I"));
+	(*env)->SetIntField(env, obj, _fid, width);
+	bailif0(_fid=(*env)->GetFieldID(env, _cls, "srcHeight", "I"));
+	(*env)->SetIntField(env, obj, _fid, height);
+
+	bailout:
+	return;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst,
+		jint x, jint y, jint width, jint pitch, jint height, jint pf, jint flags)
+{
+	tjhandle handle=0;
+	jsize arraySize=0, actualPitch;
+	unsigned char *jpegBuf=NULL, *dstBuf=NULL;
+
+	gethandle();
+
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF)
+		_throw("Invalid argument in decompress()");
+	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
+		_throw("Mismatch between Java and C API");
+
+	if((*env)->GetArrayLength(env, src)<jpegSize)
+		_throw("Source buffer is not large enough");
+	actualPitch=(pitch==0)? width*tjPixelSize[pf]:pitch;
+	arraySize=(y+height-1)*actualPitch + (x+width)*tjPixelSize[pf];
+	if((*env)->GetArrayLength(env, dst)<arraySize)
+		_throw("Destination buffer is not large enough");
+
+	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+	bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+	if(tjDecompress2(handle, jpegBuf, (unsigned long)jpegSize,
+		&dstBuf[y*actualPitch + x*tjPixelSize[pf]], width, pitch, height, pf,
+		flags)==-1)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+		(*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
+		dstBuf=jpegBuf=NULL;
+		_throw(tjGetErrorStr());
+	}
+
+	bailout:
+	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
+	return;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst,
+		jint width, jint pitch, jint height, jint pf, jint flags)
+{
+	Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII
+		(env, obj, src, jpegSize, dst, 0, 0, width, pitch, height, pf, flags);
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst,
+		jint x, jint y, jint width, jint stride, jint height, jint pf, jint flags)
+{
+	tjhandle handle=0;
+	jsize arraySize=0, actualStride;
+	unsigned char *jpegBuf=NULL, *dstBuf=NULL;
+
+	gethandle();
+
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF)
+		_throw("Invalid argument in decompress()");
+	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
+		_throw("Mismatch between Java and C API");
+	if(tjPixelSize[pf]!=sizeof(jint))
+		_throw("Pixel format must be 32-bit when decompressing to an integer buffer.");
+
+	if((*env)->GetArrayLength(env, src)<jpegSize)
+		_throw("Source buffer is not large enough");
+	actualStride=(stride==0)? width:stride;
+	arraySize=(y+height-1)*actualStride + x+width;
+	if((*env)->GetArrayLength(env, dst)<arraySize)
+		_throw("Destination buffer is not large enough");
+
+	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+	bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+	if(tjDecompress2(handle, jpegBuf, (unsigned long)jpegSize,
+		&dstBuf[(y*actualStride + x)*sizeof(int)], width, stride*sizeof(jint),
+		height, pf, flags)==-1)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+		(*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
+		dstBuf=jpegBuf=NULL;
+		_throw(tjGetErrorStr());
+	}
+
+	bailout:
+	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
+	return;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jintArray dst,
+		jint width, jint stride, jint height, jint pf, jint flags)
+{
+	Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII
+		(env, obj, src, jpegSize, dst, 0, 0, width, stride, height, pf, flags);
+
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BIIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst,
+		jint desiredWidth, jint pad, jint desiredHeight, jint flags)
+{
+	tjhandle handle=0;
+	unsigned char *jpegBuf=NULL, *dstBuf=NULL;
+	int jpegSubsamp=-1, jpegWidth=0, jpegHeight=0;
+	jsize yuvSize;
+
+	gethandle();
+
+	if((*env)->GetArrayLength(env, src)<jpegSize)
+		_throw("Source buffer is not large enough");
+	bailif0(_fid=(*env)->GetFieldID(env, _cls, "srcSubsamp", "I"));
+	jpegSubsamp=(int)(*env)->GetIntField(env, obj, _fid);
+	bailif0(_fid=(*env)->GetFieldID(env, _cls, "srcWidth", "I"));
+	jpegWidth=(int)(*env)->GetIntField(env, obj, _fid);
+	bailif0(_fid=(*env)->GetFieldID(env, _cls, "srcHeight", "I"));
+	jpegHeight=(int)(*env)->GetIntField(env, obj, _fid);
+
+	yuvSize=(jsize)tjBufSizeYUV2(desiredWidth==0? jpegWidth:desiredWidth,
+		pad, desiredHeight==0? jpegHeight:desiredHeight, jpegSubsamp);
+	if(yuvSize==(unsigned long)-1)
+		_throw(tjGetErrorStr());
+	if((*env)->GetArrayLength(env, dst)<yuvSize)
+		_throw("Destination buffer is not large enough");
+	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+	bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+	if(tjDecompressToYUV2(handle, jpegBuf, (unsigned long)jpegSize, dstBuf,
+		desiredWidth, pad, desiredHeight, flags)==-1)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+		(*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
+		dstBuf=jpegBuf=NULL;
+		_throw(tjGetErrorStr());
+	}
+
+	bailout:
+	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
+	return;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI
+	(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst,
+		jint flags)
+{
+	Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BIIII(
+		env, obj, src, jpegSize, dst, 0, 4, 0, flags);
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3BII_3BIIIIIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint pad, jint subsamp,
+		jbyteArray dst, jint x, jint y, jint width, jint pitch, jint height,
+		jint pf, jint flags)
+{
+	tjhandle handle=0;
+	jsize arraySize=0, actualPitch;
+	unsigned char *srcBuf=NULL, *dstBuf=NULL;
+
+	gethandle();
+
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF)
+		_throw("Invalid argument in decodeYUV()");
+	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
+		_throw("Mismatch between Java and C API");
+
+	arraySize=tjBufSizeYUV2(width, pad, height, subsamp);
+	if((*env)->GetArrayLength(env, src)<arraySize)
+		_throw("Source buffer is not large enough");
+	actualPitch=(pitch==0)? width*tjPixelSize[pf]:pitch;
+	arraySize=(y+height-1)*actualPitch + (x+width)*tjPixelSize[pf];
+	if((*env)->GetArrayLength(env, dst)<arraySize)
+		_throw("Destination buffer is not large enough");
+
+	bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+	bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+	if(tjDecodeYUV(handle, srcBuf, pad, subsamp,
+		&dstBuf[y*actualPitch + x*tjPixelSize[pf]], width, pitch, height, pf,
+		flags)==-1)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+		(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+		dstBuf=srcBuf=NULL;
+		_throw(tjGetErrorStr());
+	}
+
+	bailout:
+	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+	if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+	return;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3BII_3IIIIIIII
+	(JNIEnv *env, jobject obj, jbyteArray src, jint pad, jint subsamp,
+		jintArray dst, jint x, jint y, jint width, jint stride, jint height,
+		jint pf, jint flags)
+{
+	tjhandle handle=0;
+	jsize arraySize=0, actualStride;
+	unsigned char *srcBuf=NULL, *dstBuf=NULL;
+
+	gethandle();
+
+	if(pf<0 || pf>=org_libjpegturbo_turbojpeg_TJ_NUMPF)
+		_throw("Invalid argument in decodeYUV()");
+	if(org_libjpegturbo_turbojpeg_TJ_NUMPF!=TJ_NUMPF)
+		_throw("Mismatch between Java and C API");
+	if(tjPixelSize[pf]!=sizeof(jint))
+		_throw("Pixel format must be 32-bit when decoding to an integer buffer.");
+
+	arraySize=tjBufSizeYUV2(width, pad, height, subsamp);
+	if((*env)->GetArrayLength(env, src)<arraySize)
+		_throw("Source buffer is not large enough");
+	actualStride=(stride==0)? width:stride;
+	arraySize=(y+height-1)*actualStride + x+width;
+	if((*env)->GetArrayLength(env, dst)<arraySize)
+		_throw("Destination buffer is not large enough");
+
+	bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+	bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+	if(tjDecodeYUV(handle, srcBuf, pad, subsamp,
+		&dstBuf[(y*actualStride + x)*sizeof(int)], width, stride*sizeof(jint),
+		height, pf, flags)==-1)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+		(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+		dstBuf=srcBuf=NULL;
+		_throw(tjGetErrorStr());
+	}
+
+	bailout:
+	if(dstBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
+	if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+	return;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_init
+	(JNIEnv *env, jobject obj)
+{
+	jclass cls;
+	jfieldID fid;
+	tjhandle handle;
+
+	if((handle=tjInitTransform())==NULL) _throw(tjGetErrorStr());
+
+	bailif0(cls=(*env)->GetObjectClass(env, obj));
+	bailif0(fid=(*env)->GetFieldID(env, cls, "handle", "J"));
+	(*env)->SetLongField(env, obj, fid, (jlong)handle);
+
+	bailout:
+	return;
+}
+
+typedef struct _JNICustomFilterParams
+{
+	JNIEnv *env;
+	jobject tobj;
+	jobject cfobj;
+} JNICustomFilterParams;
+
+static int JNICustomFilter(short *coeffs, tjregion arrayRegion,
+	tjregion planeRegion, int componentIndex, int transformIndex,
+	tjtransform *transform)
+{
+	JNICustomFilterParams *params=(JNICustomFilterParams *)transform->data;
+	JNIEnv *env=params->env;
+	jobject tobj=params->tobj, cfobj=params->cfobj;
+  jobject arrayRegionObj, planeRegionObj, bufobj, borobj;
+	jclass cls;  jmethodID mid;  jfieldID fid;
+
+	bailif0(bufobj=(*env)->NewDirectByteBuffer(env, coeffs,
+		sizeof(short)*arrayRegion.w*arrayRegion.h));
+	bailif0(cls=(*env)->FindClass(env, "java/nio/ByteOrder"));
+  bailif0(mid=(*env)->GetStaticMethodID(env, cls, "nativeOrder",
+		"()Ljava/nio/ByteOrder;"));
+	bailif0(borobj=(*env)->CallStaticObjectMethod(env, cls, mid));
+	bailif0(cls=(*env)->GetObjectClass(env, bufobj));
+	bailif0(mid=(*env)->GetMethodID(env, cls, "order",
+		"(Ljava/nio/ByteOrder;)Ljava/nio/ByteBuffer;"));
+	(*env)->CallObjectMethod(env, bufobj, mid, borobj);
+  bailif0(mid=(*env)->GetMethodID(env, cls, "asShortBuffer",
+		"()Ljava/nio/ShortBuffer;"));
+	bailif0(bufobj=(*env)->CallObjectMethod(env, bufobj, mid));
+
+	bailif0(cls=(*env)->FindClass(env, "java/awt/Rectangle"));
+	bailif0(arrayRegionObj=(*env)->AllocObject(env, cls));
+	bailif0(fid=(*env)->GetFieldID(env, cls, "x", "I"));
+	(*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.x);
+	bailif0(fid=(*env)->GetFieldID(env, cls, "y", "I"));
+	(*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.y);
+	bailif0(fid=(*env)->GetFieldID(env, cls, "width", "I"));
+	(*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.w);
+	bailif0(fid=(*env)->GetFieldID(env, cls, "height", "I"));
+	(*env)->SetIntField(env, arrayRegionObj, fid, arrayRegion.h);
+
+	bailif0(planeRegionObj=(*env)->AllocObject(env, cls));
+	bailif0(fid=(*env)->GetFieldID(env, cls, "x", "I"));
+	(*env)->SetIntField(env, planeRegionObj, fid, planeRegion.x);
+	bailif0(fid=(*env)->GetFieldID(env, cls, "y", "I"));
+	(*env)->SetIntField(env, planeRegionObj, fid, planeRegion.y);
+	bailif0(fid=(*env)->GetFieldID(env, cls, "width", "I"));
+	(*env)->SetIntField(env, planeRegionObj, fid, planeRegion.w);
+	bailif0(fid=(*env)->GetFieldID(env, cls, "height", "I"));
+	(*env)->SetIntField(env, planeRegionObj, fid, planeRegion.h);
+
+	bailif0(cls=(*env)->GetObjectClass(env, cfobj));
+	bailif0(mid=(*env)->GetMethodID(env, cls, "customFilter",
+		"(Ljava/nio/ShortBuffer;Ljava/awt/Rectangle;Ljava/awt/Rectangle;IILorg/libjpegturbo/turbojpeg/TJTransform;)V"));
+	(*env)->CallVoidMethod(env, cfobj, mid, bufobj, arrayRegionObj,
+		planeRegionObj, componentIndex, transformIndex, tobj);
+
+	return 0;
+
+	bailout:
+	return -1;
+}
+
+JNIEXPORT jintArray JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_transform
+	(JNIEnv *env, jobject obj, jbyteArray jsrcBuf, jint jpegSize,
+		jobjectArray dstobjs, jobjectArray tobjs, jint flags)
+{
+	tjhandle handle=0;  int i;
+	unsigned char *jpegBuf=NULL, **dstBufs=NULL;  jsize n=0;
+	unsigned long *dstSizes=NULL;  tjtransform *t=NULL;
+	jbyteArray *jdstBufs=NULL;
+	int jpegWidth=0, jpegHeight=0, jpegSubsamp;
+	jintArray jdstSizes=0;  jint *dstSizesi=NULL;
+	JNICustomFilterParams *params=NULL;
+
+	gethandle();
+
+	if((*env)->GetArrayLength(env, jsrcBuf)<jpegSize)
+		_throw("Source buffer is not large enough");
+	bailif0(_fid=(*env)->GetFieldID(env, _cls, "srcWidth", "I"));
+	jpegWidth=(int)(*env)->GetIntField(env, obj, _fid);
+	bailif0(_fid=(*env)->GetFieldID(env, _cls, "srcHeight", "I"));
+	jpegHeight=(int)(*env)->GetIntField(env, obj, _fid);
+	bailif0(_fid=(*env)->GetFieldID(env, _cls, "srcSubsamp", "I"));
+	jpegSubsamp=(int)(*env)->GetIntField(env, obj, _fid);
+
+	n=(*env)->GetArrayLength(env, dstobjs);
+	if(n!=(*env)->GetArrayLength(env, tobjs))
+		_throw("Mismatch between size of transforms array and destination buffers array");
+
+	if((dstBufs=(unsigned char **)malloc(sizeof(unsigned char *)*n))==NULL)
+		_throw("Memory allocation failure");
+	if((jdstBufs=(jbyteArray *)malloc(sizeof(jbyteArray)*n))==NULL)
+		_throw("Memory allocation failure");
+	if((dstSizes=(unsigned long *)malloc(sizeof(unsigned long)*n))==NULL)
+		_throw("Memory allocation failure");
+	if((t=(tjtransform *)malloc(sizeof(tjtransform)*n))==NULL)
+		_throw("Memory allocation failure");
+	if((params=(JNICustomFilterParams *)malloc(sizeof(JNICustomFilterParams)*n))
+		==NULL)
+		_throw("Memory allocation failure");
+	for(i=0; i<n; i++)
+	{
+		dstBufs[i]=NULL;  jdstBufs[i]=NULL;  dstSizes[i]=0;
+		memset(&t[i], 0, sizeof(tjtransform));
+		memset(&params[i], 0, sizeof(JNICustomFilterParams));
+	}
+
+	for(i=0; i<n; i++)
+	{
+		jobject tobj, cfobj;
+
+		bailif0(tobj=(*env)->GetObjectArrayElement(env, tobjs, i));
+		bailif0(_cls=(*env)->GetObjectClass(env, tobj));
+		bailif0(_fid=(*env)->GetFieldID(env, _cls, "op", "I"));
+		t[i].op=(*env)->GetIntField(env, tobj, _fid);
+		bailif0(_fid=(*env)->GetFieldID(env, _cls, "options", "I"));
+		t[i].options=(*env)->GetIntField(env, tobj, _fid);
+		bailif0(_fid=(*env)->GetFieldID(env, _cls, "x", "I"));
+		t[i].r.x=(*env)->GetIntField(env, tobj, _fid);
+		bailif0(_fid=(*env)->GetFieldID(env, _cls, "y", "I"));
+		t[i].r.y=(*env)->GetIntField(env, tobj, _fid);
+		bailif0(_fid=(*env)->GetFieldID(env, _cls, "width", "I"));
+		t[i].r.w=(*env)->GetIntField(env, tobj, _fid);
+		bailif0(_fid=(*env)->GetFieldID(env, _cls, "height", "I"));
+		t[i].r.h=(*env)->GetIntField(env, tobj, _fid);
+
+		bailif0(_fid=(*env)->GetFieldID(env, _cls, "cf",
+			"Lorg/libjpegturbo/turbojpeg/TJCustomFilter;"));
+		cfobj=(*env)->GetObjectField(env, tobj, _fid);
+		if(cfobj)
+		{
+			params[i].env=env;
+			params[i].tobj=tobj;
+			params[i].cfobj=cfobj;
+			t[i].customFilter=JNICustomFilter;
+			t[i].data=(void *)&params[i];
+		}
+	}
+
+	bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, jsrcBuf, 0));
+	for(i=0; i<n; i++)
+	{
+		int w=jpegWidth, h=jpegHeight;
+		if(t[i].r.w!=0) w=t[i].r.w;
+		if(t[i].r.h!=0) h=t[i].r.h;
+		bailif0(jdstBufs[i]=(*env)->GetObjectArrayElement(env, dstobjs, i));
+		if((unsigned long)(*env)->GetArrayLength(env, jdstBufs[i])
+			<tjBufSize(w, h, jpegSubsamp))
+			_throw("Destination buffer is not large enough");
+		bailif0(dstBufs[i]=(*env)->GetPrimitiveArrayCritical(env, jdstBufs[i], 0));
+	}
+
+	if(tjTransform(handle, jpegBuf, jpegSize, n, dstBufs, dstSizes, t,
+		flags|TJFLAG_NOREALLOC)==-1)
+	{
+		(*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0);
+		jpegBuf=NULL;
+		for(i=0; i<n; i++)
+		{
+			(*env)->ReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0);
+			dstBufs[i]=NULL;
+		}
+		_throw(tjGetErrorStr());
+	}
+
+	jdstSizes=(*env)->NewIntArray(env, n);
+	bailif0(dstSizesi=(*env)->GetIntArrayElements(env, jdstSizes, 0));
+	for(i=0; i<n; i++) dstSizesi[i]=(int)dstSizes[i];
+
+	bailout:
+	if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, jsrcBuf, jpegBuf, 0);
+	if(dstBufs)
+	{
+		for(i=0; i<n; i++)
+		{
+			if(dstBufs[i] && jdstBufs && jdstBufs[i])
+				(*env)->ReleasePrimitiveArrayCritical(env, jdstBufs[i], dstBufs[i], 0);
+		}
+		free(dstBufs);
+	}
+	if(jdstBufs) free(jdstBufs);
+	if(dstSizes) free(dstSizes);
+	if(dstSizesi) (*env)->ReleaseIntArrayElements(env, jdstSizes, dstSizesi, 0);
+	if(t) free(t);
+	return jdstSizes;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy
+	(JNIEnv *env, jobject obj)
+{
+	Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy(env, obj);
+}
diff --git a/turbojpeg-mapfile b/turbojpeg-mapfile
new file mode 100755
index 0000000..7d174ca
--- /dev/null
+++ b/turbojpeg-mapfile
@@ -0,0 +1,49 @@
+TURBOJPEG_1.0
+{
+	global:
+		tjInitCompress;
+		tjCompress;
+		TJBUFSIZE;
+		tjInitDecompress;
+		tjDecompressHeader;
+		tjDecompress;
+		tjDestroy;
+		tjGetErrorStr;
+	local:
+		*;
+};
+
+TURBOJPEG_1.1
+{
+	global:
+		TJBUFSIZEYUV;
+		tjDecompressHeader2;
+		tjDecompressToYUV;
+		tjEncodeYUV;
+} TURBOJPEG_1.0;
+
+TURBOJPEG_1.2
+{
+	global:
+		tjAlloc;
+		tjBufSize;
+		tjBufSizeYUV;
+		tjCompress2;
+		tjDecompress2;
+		tjEncodeYUV2;
+		tjFree;
+		tjGetScalingFactors;
+		tjInitTransform;
+		tjTransform;
+} TURBOJPEG_1.1;
+
+TURBOJPEG_1.4
+{
+	global:
+		tjBufSizeYUV2;
+		tjCompressFromYUV;
+		tjDecodeYUV;
+		tjDecompressHeader3;
+		tjDecompressToYUV2;
+		tjEncodeYUV3;
+} TURBOJPEG_1.2;
diff --git a/turbojpeg-mapfile.jni b/turbojpeg-mapfile.jni
new file mode 100755
index 0000000..f9fc7e5
--- /dev/null
+++ b/turbojpeg-mapfile.jni
@@ -0,0 +1,82 @@
+TURBOJPEG_1.0
+{
+	global:
+		tjInitCompress;
+		tjCompress;
+		TJBUFSIZE;
+		tjInitDecompress;
+		tjDecompressHeader;
+		tjDecompress;
+		tjDestroy;
+		tjGetErrorStr;
+	local:
+		*;
+};
+
+TURBOJPEG_1.1
+{
+	global:
+		TJBUFSIZEYUV;
+		tjDecompressHeader2;
+		tjDecompressToYUV;
+		tjEncodeYUV;
+} TURBOJPEG_1.0;
+
+TURBOJPEG_1.2
+{
+	global:
+		tjAlloc;
+		tjBufSize;
+		tjBufSizeYUV;
+		tjCompress2;
+		tjDecompress2;
+		tjEncodeYUV2;
+		tjFree;
+		tjGetScalingFactors;
+		tjInitTransform;
+		tjTransform;
+		Java_org_libjpegturbo_turbojpeg_TJ_bufSize;
+		Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III;
+		Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors;
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_init;
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIII_3BIII;
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIII_3BIII;
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII;
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII;
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_init;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy;
+		Java_org_libjpegturbo_turbojpeg_TJTransformer_init;
+		Java_org_libjpegturbo_turbojpeg_TJTransformer_transform;
+} TURBOJPEG_1.1;
+
+TURBOJPEG_1.3
+{
+	global:
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIIIII_3BIII;
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3IIIIIII_3BIII;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII;
+} TURBOJPEG_1.2;
+
+TURBOJPEG_1.4
+{
+	global:
+		tjBufSizeYUV2;
+		tjCompressFromYUV;
+		tjDecodeYUV;
+		tjDecompressHeader3;
+		tjDecompressToYUV2;
+		tjEncodeYUV3;
+		Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII;
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3BIIII_3BII;
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIIIII_3BIII;
+		Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIIIII_3BIII;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BIIII;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3BII_3BIIIIIII;
+		Java_org_libjpegturbo_turbojpeg_TJDecompressor_decodeYUV___3BII_3IIIIIIII;
+} TURBOJPEG_1.3;
diff --git a/turbojpeg.c b/turbojpeg.c
new file mode 100644
index 0000000..2490836
--- /dev/null
+++ b/turbojpeg.c
@@ -0,0 +1,1805 @@
+/*
+ * Copyright (C)2009-2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* TurboJPEG/LJT:  this implements the TurboJPEG API using libjpeg or
+   libjpeg-turbo */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <jinclude.h>
+#define JPEG_INTERNALS
+#include <jpeglib.h>
+#include <jerror.h>
+#include <setjmp.h>
+#include "./turbojpeg.h"
+#include "./tjutil.h"
+#include "transupp.h"
+#include "./jpegcomp.h"
+
+extern void jpeg_mem_dest_tj(j_compress_ptr, unsigned char **,
+	unsigned long *, boolean);
+extern void jpeg_mem_src_tj(j_decompress_ptr, unsigned char *, unsigned long);
+
+#define PAD(v, p) ((v+(p)-1)&(~((p)-1)))
+#define isPow2(x) (((x)&(x-1))==0)
+
+
+/* Error handling (based on example in example.c) */
+
+static char errStr[JMSG_LENGTH_MAX]="No error";
+
+struct my_error_mgr
+{
+	struct jpeg_error_mgr pub;
+	jmp_buf setjmp_buffer;
+};
+typedef struct my_error_mgr *my_error_ptr;
+
+static void my_error_exit(j_common_ptr cinfo)
+{
+	my_error_ptr myerr=(my_error_ptr)cinfo->err;
+	(*cinfo->err->output_message)(cinfo);
+	longjmp(myerr->setjmp_buffer, 1);
+}
+
+/* Based on output_message() in jerror.c */
+
+static void my_output_message(j_common_ptr cinfo)
+{
+	(*cinfo->err->format_message)(cinfo, errStr);
+}
+
+
+/* Global structures, macros, etc. */
+
+enum {COMPRESS=1, DECOMPRESS=2};
+
+typedef struct _tjinstance
+{
+	struct jpeg_compress_struct cinfo;
+	struct jpeg_decompress_struct dinfo;
+	struct my_error_mgr jerr;
+	int init;
+} tjinstance;
+
+static const int pixelsize[TJ_NUMSAMP]={3, 3, 3, 1, 3, 3};
+
+static const JXFORM_CODE xformtypes[TJ_NUMXOP]=
+{
+	JXFORM_NONE, JXFORM_FLIP_H, JXFORM_FLIP_V, JXFORM_TRANSPOSE,
+	JXFORM_TRANSVERSE, JXFORM_ROT_90, JXFORM_ROT_180, JXFORM_ROT_270
+};
+
+#define NUMSF 16
+static const tjscalingfactor sf[NUMSF]={
+	{2, 1},
+	{15, 8},
+	{7, 4},
+	{13, 8},
+	{3, 2},
+	{11, 8},
+	{5, 4},
+	{9, 8},
+	{1, 1},
+	{7, 8},
+	{3, 4},
+	{5, 8},
+	{1, 2},
+	{3, 8},
+	{1, 4},
+	{1, 8}
+};
+
+#define _throw(m) {snprintf(errStr, JMSG_LENGTH_MAX, "%s", m);  \
+	retval=-1;  goto bailout;}
+#define getinstance(handle) tjinstance *this=(tjinstance *)handle;  \
+	j_compress_ptr cinfo=NULL;  j_decompress_ptr dinfo=NULL;  \
+	if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle");  \
+		return -1;}  \
+	cinfo=&this->cinfo;  dinfo=&this->dinfo;
+#define getcinstance(handle) tjinstance *this=(tjinstance *)handle;  \
+	j_compress_ptr cinfo=NULL;  \
+	if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle");  \
+		return -1;}  \
+	cinfo=&this->cinfo;
+#define getdinstance(handle) tjinstance *this=(tjinstance *)handle;  \
+	j_decompress_ptr dinfo=NULL;  \
+	if(!this) {snprintf(errStr, JMSG_LENGTH_MAX, "Invalid handle");  \
+		return -1;}  \
+	dinfo=&this->dinfo;
+
+static int getPixelFormat(int pixelSize, int flags)
+{
+	if(pixelSize==1) return TJPF_GRAY;
+	if(pixelSize==3)
+	{
+		if(flags&TJ_BGR) return TJPF_BGR;
+		else return TJPF_RGB;
+	}
+	if(pixelSize==4)
+	{
+		if(flags&TJ_ALPHAFIRST)
+		{
+			if(flags&TJ_BGR) return TJPF_XBGR;
+			else return TJPF_XRGB;
+		}
+		else
+		{
+			if(flags&TJ_BGR) return TJPF_BGRX;
+			else return TJPF_RGBX;
+		}
+	}
+	return -1;
+}
+
+static int setCompDefaults(struct jpeg_compress_struct *cinfo,
+	int pixelFormat, int subsamp, int jpegQual, int flags)
+{
+	int retval=0;
+
+	switch(pixelFormat)
+	{
+		case TJPF_GRAY:
+			cinfo->in_color_space=JCS_GRAYSCALE;  break;
+		#if JCS_EXTENSIONS==1
+		case TJPF_RGB:
+			cinfo->in_color_space=JCS_EXT_RGB;  break;
+		case TJPF_BGR:
+			cinfo->in_color_space=JCS_EXT_BGR;  break;
+		case TJPF_RGBX:
+		case TJPF_RGBA:
+			cinfo->in_color_space=JCS_EXT_RGBX;  break;
+		case TJPF_BGRX:
+		case TJPF_BGRA:
+			cinfo->in_color_space=JCS_EXT_BGRX;  break;
+		case TJPF_XRGB:
+		case TJPF_ARGB:
+			cinfo->in_color_space=JCS_EXT_XRGB;  break;
+		case TJPF_XBGR:
+		case TJPF_ABGR:
+			cinfo->in_color_space=JCS_EXT_XBGR;  break;
+		#else
+		case TJPF_RGB:
+		case TJPF_BGR:
+		case TJPF_RGBX:
+		case TJPF_BGRX:
+		case TJPF_XRGB:
+		case TJPF_XBGR:
+		case TJPF_RGBA:
+		case TJPF_BGRA:
+		case TJPF_ARGB:
+		case TJPF_ABGR:
+			cinfo->in_color_space=JCS_RGB;  pixelFormat=TJPF_RGB;
+			break;
+		#endif
+		case TJPF_CMYK:
+			cinfo->in_color_space=JCS_CMYK;  break;
+	}
+
+	cinfo->input_components=tjPixelSize[pixelFormat];
+	jpeg_set_defaults(cinfo);
+	if(jpegQual>=0)
+	{
+		jpeg_set_quality(cinfo, jpegQual, TRUE);
+		if(jpegQual>=96 || flags&TJFLAG_ACCURATEDCT) cinfo->dct_method=JDCT_ISLOW;
+		else cinfo->dct_method=JDCT_FASTEST;
+	}
+	if(subsamp==TJSAMP_GRAY)
+		jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
+	else if(pixelFormat==TJPF_CMYK)
+		jpeg_set_colorspace(cinfo, JCS_YCCK);
+	else jpeg_set_colorspace(cinfo, JCS_YCbCr);
+
+	cinfo->comp_info[0].h_samp_factor=tjMCUWidth[subsamp]/8;
+	cinfo->comp_info[1].h_samp_factor=1;
+	cinfo->comp_info[2].h_samp_factor=1;
+	if(cinfo->num_components>3)
+		cinfo->comp_info[3].h_samp_factor=tjMCUWidth[subsamp]/8;
+	cinfo->comp_info[0].v_samp_factor=tjMCUHeight[subsamp]/8;
+	cinfo->comp_info[1].v_samp_factor=1;
+	cinfo->comp_info[2].v_samp_factor=1;
+	if(cinfo->num_components>3)
+		cinfo->comp_info[3].v_samp_factor=tjMCUHeight[subsamp]/8;
+
+	return retval;
+}
+
+static int setDecompDefaults(struct jpeg_decompress_struct *dinfo,
+	int pixelFormat, int flags)
+{
+	int retval=0;
+
+	switch(pixelFormat)
+	{
+		case TJPF_GRAY:
+			dinfo->out_color_space=JCS_GRAYSCALE;  break;
+		#if JCS_EXTENSIONS==1
+		case TJPF_RGB:
+			dinfo->out_color_space=JCS_EXT_RGB;  break;
+		case TJPF_BGR:
+			dinfo->out_color_space=JCS_EXT_BGR;  break;
+		case TJPF_RGBX:
+			dinfo->out_color_space=JCS_EXT_RGBX;  break;
+		case TJPF_BGRX:
+			dinfo->out_color_space=JCS_EXT_BGRX;  break;
+		case TJPF_XRGB:
+			dinfo->out_color_space=JCS_EXT_XRGB;  break;
+		case TJPF_XBGR:
+			dinfo->out_color_space=JCS_EXT_XBGR;  break;
+		#if JCS_ALPHA_EXTENSIONS==1
+		case TJPF_RGBA:
+			dinfo->out_color_space=JCS_EXT_RGBA;  break;
+		case TJPF_BGRA:
+			dinfo->out_color_space=JCS_EXT_BGRA;  break;
+		case TJPF_ARGB:
+			dinfo->out_color_space=JCS_EXT_ARGB;  break;
+		case TJPF_ABGR:
+			dinfo->out_color_space=JCS_EXT_ABGR;  break;
+		#endif
+		#else
+		case TJPF_RGB:
+		case TJPF_BGR:
+		case TJPF_RGBX:
+		case TJPF_BGRX:
+		case TJPF_XRGB:
+		case TJPF_XBGR:
+		case TJPF_RGBA:
+		case TJPF_BGRA:
+		case TJPF_ARGB:
+		case TJPF_ABGR:
+			dinfo->out_color_space=JCS_RGB;  break;
+		#endif
+		case TJPF_CMYK:
+			dinfo->out_color_space=JCS_CMYK;  break;
+		default:
+			_throw("Unsupported pixel format");
+	}
+
+	if(flags&TJFLAG_FASTDCT) dinfo->dct_method=JDCT_FASTEST;
+
+	bailout:
+	return retval;
+}
+
+
+static int getSubsamp(j_decompress_ptr dinfo)
+{
+	int retval=-1, i, k;
+	for(i=0; i<NUMSUBOPT; i++)
+	{
+		if(dinfo->num_components==pixelsize[i]
+			|| ((dinfo->jpeg_color_space==JCS_YCCK
+				|| dinfo->jpeg_color_space==JCS_CMYK)
+					&& pixelsize[i]==3 && dinfo->num_components==4))
+		{
+			if(dinfo->comp_info[0].h_samp_factor==tjMCUWidth[i]/8
+				&& dinfo->comp_info[0].v_samp_factor==tjMCUHeight[i]/8)
+			{
+				int match=0;
+				for(k=1; k<dinfo->num_components; k++)
+				{
+					int href=1, vref=1;
+					if(dinfo->jpeg_color_space==JCS_YCCK && k==3)
+					{
+						href=tjMCUWidth[i]/8;  vref=tjMCUHeight[i]/8;
+					}
+					if(dinfo->comp_info[k].h_samp_factor==href
+						&& dinfo->comp_info[k].v_samp_factor==vref)
+						match++;
+				}
+				if(match==dinfo->num_components-1)
+				{
+					retval=i;  break;
+				}
+			}
+		}
+	}
+	return retval;
+}
+
+
+#ifndef JCS_EXTENSIONS
+
+/* Conversion functions to emulate the colorspace extensions.  This allows the
+   TurboJPEG wrapper to be used with libjpeg */
+
+#define TORGB(PS, ROFFSET, GOFFSET, BOFFSET) {  \
+	int rowPad=pitch-width*PS;  \
+	while(height--)  \
+	{  \
+		unsigned char *endOfRow=src+width*PS;  \
+		while(src<endOfRow)  \
+		{  \
+			dst[RGB_RED]=src[ROFFSET];  \
+			dst[RGB_GREEN]=src[GOFFSET];  \
+			dst[RGB_BLUE]=src[BOFFSET];  \
+			dst+=RGB_PIXELSIZE;  src+=PS;  \
+		}  \
+		src+=rowPad;  \
+	}  \
+}
+
+static unsigned char *toRGB(unsigned char *src, int width, int pitch,
+	int height, int pixelFormat, unsigned char *dst)
+{
+	unsigned char *retval=src;
+	switch(pixelFormat)
+	{
+		case TJPF_RGB:
+			#if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=3
+			retval=dst;  TORGB(3, 0, 1, 2);
+			#endif
+			break;
+		case TJPF_BGR:
+			#if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=3
+			retval=dst;  TORGB(3, 2, 1, 0);
+			#endif
+			break;
+		case TJPF_RGBX:
+		case TJPF_RGBA:
+			#if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=4
+			retval=dst;  TORGB(4, 0, 1, 2);
+			#endif
+			break;
+		case TJPF_BGRX:
+		case TJPF_BGRA:
+			#if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=4
+			retval=dst;  TORGB(4, 2, 1, 0);
+			#endif
+			break;
+		case TJPF_XRGB:
+		case TJPF_ARGB:
+			#if RGB_RED!=1 || RGB_GREEN!=2 || RGB_BLUE!=3 || RGB_PIXELSIZE!=4
+			retval=dst;  TORGB(4, 1, 2, 3);
+			#endif
+			break;
+		case TJPF_XBGR:
+		case TJPF_ABGR:
+			#if RGB_RED!=3 || RGB_GREEN!=2 || RGB_BLUE!=1 || RGB_PIXELSIZE!=4
+			retval=dst;  TORGB(4, 3, 2, 1);
+			#endif
+			break;
+	}
+	return retval;
+}
+
+#define FROMRGB(PS, ROFFSET, GOFFSET, BOFFSET, SETALPHA) {  \
+	int rowPad=pitch-width*PS;  \
+	while(height--)  \
+	{  \
+		unsigned char *endOfRow=dst+width*PS;  \
+		while(dst<endOfRow)  \
+		{  \
+			dst[ROFFSET]=src[RGB_RED];  \
+			dst[GOFFSET]=src[RGB_GREEN];  \
+			dst[BOFFSET]=src[RGB_BLUE];  \
+			SETALPHA  \
+			dst+=PS;  src+=RGB_PIXELSIZE;  \
+		}  \
+		dst+=rowPad;  \
+	}  \
+}
+
+static void fromRGB(unsigned char *src, unsigned char *dst, int width,
+	int pitch, int height, int pixelFormat)
+{
+	switch(pixelFormat)
+	{
+		case TJPF_RGB:
+			#if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=3
+			FROMRGB(3, 0, 1, 2,);
+			#endif
+			break;
+		case TJPF_BGR:
+			#if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=3
+			FROMRGB(3, 2, 1, 0,);
+			#endif
+			break;
+		case TJPF_RGBX:
+			#if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 0, 1, 2,);
+			#endif
+			break;
+		case TJPF_RGBA:
+			#if RGB_RED!=0 || RGB_GREEN!=1 || RGB_BLUE!=2 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 0, 1, 2, dst[3]=0xFF;);
+			#endif
+			break;
+		case TJPF_BGRX:
+			#if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 2, 1, 0,);
+			#endif
+			break;
+		case TJPF_BGRA:
+			#if RGB_RED!=2 || RGB_GREEN!=1 || RGB_BLUE!=0 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 2, 1, 0, dst[3]=0xFF;);  return;
+			#endif
+			break;
+		case TJPF_XRGB:
+			#if RGB_RED!=1 || RGB_GREEN!=2 || RGB_BLUE!=3 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 1, 2, 3,);  return;
+			#endif
+			break;
+		case TJPF_ARGB:
+			#if RGB_RED!=1 || RGB_GREEN!=2 || RGB_BLUE!=3 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 1, 2, 3, dst[0]=0xFF;);  return;
+			#endif
+			break;
+		case TJPF_XBGR:
+			#if RGB_RED!=3 || RGB_GREEN!=2 || RGB_BLUE!=1 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 3, 2, 1,);  return;
+			#endif
+			break;
+		case TJPF_ABGR:
+			#if RGB_RED!=3 || RGB_GREEN!=2 || RGB_BLUE!=1 || RGB_PIXELSIZE!=4
+			FROMRGB(4, 3, 2, 1, dst[0]=0xFF;);  return;
+			#endif
+			break;
+	}
+}
+
+#endif
+
+
+/* General API functions */
+
+DLLEXPORT char* DLLCALL tjGetErrorStr(void)
+{
+	return errStr;
+}
+
+
+DLLEXPORT int DLLCALL tjDestroy(tjhandle handle)
+{
+	getinstance(handle);
+	if(setjmp(this->jerr.setjmp_buffer)) return -1;
+	if(this->init&COMPRESS) jpeg_destroy_compress(cinfo);
+	if(this->init&DECOMPRESS) jpeg_destroy_decompress(dinfo);
+	free(this);
+	return 0;
+}
+
+
+/* These are exposed mainly because Windows can't malloc() and free() across
+   DLL boundaries except when the CRT DLL is used, and we don't use the CRT DLL
+   with turbojpeg.dll for compatibility reasons.  However, these functions
+   can potentially be used for other purposes by different implementations. */
+
+DLLEXPORT void DLLCALL tjFree(unsigned char *buf)
+{
+	if(buf) free(buf);
+}
+
+
+DLLEXPORT unsigned char *DLLCALL tjAlloc(int bytes)
+{
+	return (unsigned char *)malloc(bytes);
+}
+
+
+/* Compressor  */
+
+static tjhandle _tjInitCompress(tjinstance *this)
+{
+	unsigned char buffer[1], *buf=buffer;  unsigned long size=1;
+
+	/* This is also straight out of example.c */
+	this->cinfo.err=jpeg_std_error(&this->jerr.pub);
+	this->jerr.pub.error_exit=my_error_exit;
+	this->jerr.pub.output_message=my_output_message;
+
+	if(setjmp(this->jerr.setjmp_buffer))
+	{
+		/* If we get here, the JPEG code has signaled an error. */
+		if(this) free(this);  return NULL;
+	}
+
+	jpeg_create_compress(&this->cinfo);
+	/* Make an initial call so it will create the destination manager */
+	jpeg_mem_dest_tj(&this->cinfo, &buf, &size, 0);
+
+	this->init|=COMPRESS;
+	return (tjhandle)this;
+}
+
+DLLEXPORT tjhandle DLLCALL tjInitCompress(void)
+{
+	tjinstance *this=NULL;
+	if((this=(tjinstance *)malloc(sizeof(tjinstance)))==NULL)
+	{
+		snprintf(errStr, JMSG_LENGTH_MAX,
+			"tjInitCompress(): Memory allocation failure");
+		return NULL;
+	}
+	MEMZERO(this, sizeof(tjinstance));
+	return _tjInitCompress(this);
+}
+
+
+DLLEXPORT unsigned long DLLCALL tjBufSize(int width, int height,
+	int jpegSubsamp)
+{
+	unsigned long retval=0;  int mcuw, mcuh, chromasf;
+	if(width<1 || height<1 || jpegSubsamp<0 || jpegSubsamp>=NUMSUBOPT)
+		_throw("tjBufSize(): Invalid argument");
+
+	/* This allows for rare corner cases in which a JPEG image can actually be
+	   larger than the uncompressed input (we wouldn't mention it if it hadn't
+	   happened before.) */
+	mcuw=tjMCUWidth[jpegSubsamp];
+	mcuh=tjMCUHeight[jpegSubsamp];
+	chromasf=jpegSubsamp==TJSAMP_GRAY? 0: 4*64/(mcuw*mcuh);
+	retval=PAD(width, mcuw) * PAD(height, mcuh) * (2 + chromasf) + 2048;
+
+	bailout:
+	return retval;
+}
+
+DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height)
+{
+	unsigned long retval=0;
+	if(width<1 || height<1)
+		_throw("TJBUFSIZE(): Invalid argument");
+
+	/* This allows for rare corner cases in which a JPEG image can actually be
+	   larger than the uncompressed input (we wouldn't mention it if it hadn't
+	   happened before.) */
+	retval=PAD(width, 16) * PAD(height, 16) * 6 + 2048;
+
+	bailout:
+	return retval;
+}
+
+
+DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2(int width, int pad, int height,
+	int subsamp)
+{
+	unsigned long retval=0;
+	int pw, ph, cw, ch;
+	if(width<1 || height<1 || pad<1 || !isPow2(pad) || subsamp<0
+		|| subsamp>=NUMSUBOPT)
+		_throw("tjBufSizeYUV2(): Invalid argument");
+	pw=PAD(width, tjMCUWidth[subsamp]/8);
+	ph=PAD(height, tjMCUHeight[subsamp]/8);
+	cw=pw*8/tjMCUWidth[subsamp];  ch=ph*8/tjMCUHeight[subsamp];
+	retval=PAD(pw, pad)*ph + (subsamp==TJSAMP_GRAY? 0:PAD(cw, pad)*ch*2);
+
+	bailout:
+	return retval;
+}
+
+DLLEXPORT unsigned long DLLCALL tjBufSizeYUV(int width, int height,
+	int subsamp)
+{
+	return tjBufSizeYUV2(width, 4, height, subsamp);
+}
+
+DLLEXPORT unsigned long DLLCALL TJBUFSIZEYUV(int width, int height,
+	int subsamp)
+{
+	return tjBufSizeYUV(width, height, subsamp);
+}
+
+
+DLLEXPORT int DLLCALL tjCompress2(tjhandle handle, unsigned char *srcBuf,
+	int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf,
+	unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags)
+{
+	int i, retval=0, alloc=1;  JSAMPROW *row_pointer=NULL;
+	#ifndef JCS_EXTENSIONS
+	unsigned char *rgbBuf=NULL;
+	#endif
+
+	getcinstance(handle)
+	if((this->init&COMPRESS)==0)
+		_throw("tjCompress2(): Instance has not been initialized for compression");
+
+	if(srcBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0
+		|| pixelFormat>=TJ_NUMPF || jpegBuf==NULL || jpegSize==NULL
+		|| jpegSubsamp<0 || jpegSubsamp>=NUMSUBOPT || jpegQual<0 || jpegQual>100)
+		_throw("tjCompress2(): Invalid argument");
+
+	if(setjmp(this->jerr.setjmp_buffer))
+	{
+		/* If we get here, the JPEG code has signaled an error. */
+		retval=-1;
+		goto bailout;
+	}
+
+	if(pitch==0) pitch=width*tjPixelSize[pixelFormat];
+
+	#ifndef JCS_EXTENSIONS
+	if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK)
+	{
+		rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE);
+		if(!rgbBuf) _throw("tjCompress2(): Memory allocation failure");
+		srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf);
+		pitch=width*RGB_PIXELSIZE;
+	}
+	#endif
+
+	cinfo->image_width=width;
+	cinfo->image_height=height;
+
+	if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
+	else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
+	else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+
+	if(flags&TJFLAG_NOREALLOC)
+	{
+		alloc=0;  *jpegSize=tjBufSize(width, height, jpegSubsamp);
+	}
+	jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
+	if(setCompDefaults(cinfo, pixelFormat, jpegSubsamp, jpegQual, flags)==-1)
+		return -1;
+
+	jpeg_start_compress(cinfo, TRUE);
+	if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*height))==NULL)
+		_throw("tjCompress2(): Memory allocation failure");
+	for(i=0; i<height; i++)
+	{
+		if(flags&TJFLAG_BOTTOMUP) row_pointer[i]=&srcBuf[(height-i-1)*pitch];
+		else row_pointer[i]=&srcBuf[i*pitch];
+	}
+	while(cinfo->next_scanline<cinfo->image_height)
+	{
+		jpeg_write_scanlines(cinfo, &row_pointer[cinfo->next_scanline],
+			cinfo->image_height-cinfo->next_scanline);
+	}
+	jpeg_finish_compress(cinfo);
+
+	bailout:
+	if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo);
+	#ifndef JCS_EXTENSIONS
+	if(rgbBuf) free(rgbBuf);
+	#endif
+	if(row_pointer) free(row_pointer);
+	return retval;
+}
+
+DLLEXPORT int DLLCALL tjCompress(tjhandle handle, unsigned char *srcBuf,
+	int width, int pitch, int height, int pixelSize, unsigned char *jpegBuf,
+	unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags)
+{
+	int retval=0;  unsigned long size;
+	if(flags&TJ_YUV)
+	{
+		size=tjBufSizeYUV(width, height, jpegSubsamp);
+		retval=tjEncodeYUV2(handle, srcBuf, width, pitch, height,
+			getPixelFormat(pixelSize, flags), jpegBuf, jpegSubsamp, flags);
+	}
+	else
+	{
+		retval=tjCompress2(handle, srcBuf, width, pitch, height,
+			getPixelFormat(pixelSize, flags), &jpegBuf, &size, jpegSubsamp, jpegQual,
+			flags|TJFLAG_NOREALLOC);
+	}
+	*jpegSize=size;
+	return retval;
+}
+
+
+DLLEXPORT int DLLCALL tjEncodeYUV3(tjhandle handle, unsigned char *srcBuf,
+	int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf,
+	int pad, int subsamp, int flags)
+{
+	int i, retval=0;  JSAMPROW *row_pointer=NULL;
+	JSAMPLE *_tmpbuf[MAX_COMPONENTS], *_tmpbuf2[MAX_COMPONENTS];
+	JSAMPROW *tmpbuf[MAX_COMPONENTS], *tmpbuf2[MAX_COMPONENTS];
+	JSAMPROW *outbuf[MAX_COMPONENTS];
+	int row, pw, ph, cw[MAX_COMPONENTS], ch[MAX_COMPONENTS];
+	JSAMPLE *ptr=dstBuf;
+	unsigned long yuvsize=0;
+	jpeg_component_info *compptr;
+	#ifndef JCS_EXTENSIONS
+	unsigned char *rgbBuf=NULL;
+	#endif
+
+	getcinstance(handle);
+
+	for(i=0; i<MAX_COMPONENTS; i++)
+	{
+		tmpbuf[i]=NULL;  _tmpbuf[i]=NULL;
+		tmpbuf2[i]=NULL;  _tmpbuf2[i]=NULL;  outbuf[i]=NULL;
+	}
+
+	if((this->init&COMPRESS)==0)
+		_throw("tjEncodeYUV3(): Instance has not been initialized for compression");
+
+	if(srcBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0
+		|| pixelFormat>=TJ_NUMPF || dstBuf==NULL || pad<0 || !isPow2(pad)
+		|| subsamp<0 || subsamp>=NUMSUBOPT)
+		_throw("tjEncodeYUV3(): Invalid argument");
+
+	if(setjmp(this->jerr.setjmp_buffer))
+	{
+		/* If we get here, the JPEG code has signaled an error. */
+		retval=-1;
+		goto bailout;
+	}
+
+	if(pixelFormat==TJPF_CMYK)
+		_throw("tjEncodeYUV3(): Cannot generate YUV images from CMYK pixels");
+
+	if(pitch==0) pitch=width*tjPixelSize[pixelFormat];
+
+	#ifndef JCS_EXTENSIONS
+	if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK)
+	{
+		rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE);
+		if(!rgbBuf) _throw("tjEncodeYUV3(): Memory allocation failure");
+		srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf);
+		pitch=width*RGB_PIXELSIZE;
+	}
+	#endif
+
+	cinfo->image_width=width;
+	cinfo->image_height=height;
+
+	if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
+	else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
+	else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+
+	yuvsize=tjBufSizeYUV2(width, pad, height, subsamp);
+	if(setCompDefaults(cinfo, pixelFormat, subsamp, -1, flags)==-1) return -1;
+
+	/* Execute only the parts of jpeg_start_compress() that we need.  If we
+	   were to call the whole jpeg_start_compress() function, then it would try
+	   to write the file headers, which could overflow the output buffer if the
+	   YUV image were very small. */
+	if(cinfo->global_state!=CSTATE_START)
+		_throw("tjEncodeYUV3(): libjpeg API is in the wrong state");
+	(*cinfo->err->reset_error_mgr)((j_common_ptr)cinfo);
+	jinit_c_master_control(cinfo, FALSE);
+	jinit_color_converter(cinfo);
+	jinit_downsampler(cinfo);
+	(*cinfo->cconvert->start_pass)(cinfo);
+
+	pw=PAD(width, cinfo->max_h_samp_factor);
+	ph=PAD(height, cinfo->max_v_samp_factor);
+
+	if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph))==NULL)
+		_throw("tjEncodeYUV3(): Memory allocation failure");
+	for(i=0; i<height; i++)
+	{
+		if(flags&TJFLAG_BOTTOMUP) row_pointer[i]=&srcBuf[(height-i-1)*pitch];
+		else row_pointer[i]=&srcBuf[i*pitch];
+	}
+	if(height<ph)
+		for(i=height; i<ph; i++) row_pointer[i]=row_pointer[height-1];
+
+	for(i=0; i<cinfo->num_components; i++)
+	{
+		compptr=&cinfo->comp_info[i];
+		_tmpbuf[i]=(JSAMPLE *)malloc(
+			PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE)
+				/compptr->h_samp_factor, 16) * cinfo->max_v_samp_factor + 16);
+		if(!_tmpbuf[i]) _throw("tjEncodeYUV3(): Memory allocation failure");
+		tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*cinfo->max_v_samp_factor);
+		if(!tmpbuf[i]) _throw("tjEncodeYUV3(): Memory allocation failure");
+		for(row=0; row<cinfo->max_v_samp_factor; row++)
+		{
+			unsigned char *_tmpbuf_aligned=
+				(unsigned char *)PAD((size_t)_tmpbuf[i], 16);
+			tmpbuf[i][row]=&_tmpbuf_aligned[
+				PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE)
+					/compptr->h_samp_factor, 16) * row];
+		}
+		_tmpbuf2[i]=(JSAMPLE *)malloc(PAD(compptr->width_in_blocks*DCTSIZE, 16)
+			* compptr->v_samp_factor + 16);
+		if(!_tmpbuf2[i]) _throw("tjEncodeYUV3(): Memory allocation failure");
+		tmpbuf2[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*compptr->v_samp_factor);
+		if(!tmpbuf2[i]) _throw("tjEncodeYUV3(): Memory allocation failure");
+		for(row=0; row<compptr->v_samp_factor; row++)
+		{
+			unsigned char *_tmpbuf2_aligned=
+				(unsigned char *)PAD((size_t)_tmpbuf2[i], 16);
+			tmpbuf2[i][row]=&_tmpbuf2_aligned[
+				PAD(compptr->width_in_blocks*DCTSIZE, 16) * row];
+		}
+		cw[i]=pw*compptr->h_samp_factor/cinfo->max_h_samp_factor;
+		ch[i]=ph*compptr->v_samp_factor/cinfo->max_v_samp_factor;
+		outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ch[i]);
+		if(!outbuf[i]) _throw("tjEncodeYUV3(): Memory allocation failure");
+		for(row=0; row<ch[i]; row++)
+		{
+			outbuf[i][row]=ptr;
+			ptr+=PAD(cw[i], pad);
+		}
+	}
+	if(yuvsize!=(unsigned long)(ptr-dstBuf))
+		_throw("tjEncodeYUV3(): Generated image is not the correct size");
+
+	for(row=0; row<ph; row+=cinfo->max_v_samp_factor)
+	{
+		(*cinfo->cconvert->color_convert)(cinfo, &row_pointer[row], tmpbuf, 0,
+			cinfo->max_v_samp_factor);
+		(cinfo->downsample->downsample)(cinfo, tmpbuf, 0, tmpbuf2, 0);
+		for(i=0, compptr=cinfo->comp_info; i<cinfo->num_components; i++, compptr++)
+			jcopy_sample_rows(tmpbuf2[i], 0, outbuf[i],
+				row*compptr->v_samp_factor/cinfo->max_v_samp_factor,
+				compptr->v_samp_factor, cw[i]);
+	}
+	cinfo->next_scanline+=height;
+	jpeg_abort_compress(cinfo);
+
+	bailout:
+	if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo);
+	#ifndef JCS_EXTENSIONS
+	if(rgbBuf) free(rgbBuf);
+	#endif
+	if(row_pointer) free(row_pointer);
+	for(i=0; i<MAX_COMPONENTS; i++)
+	{
+		if(tmpbuf[i]!=NULL) free(tmpbuf[i]);
+		if(_tmpbuf[i]!=NULL) free(_tmpbuf[i]);
+		if(tmpbuf2[i]!=NULL) free(tmpbuf2[i]);
+		if(_tmpbuf2[i]!=NULL) free(_tmpbuf2[i]);
+		if(outbuf[i]!=NULL) free(outbuf[i]);
+	}
+	return retval;
+}
+
+DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf,
+	int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf,
+	int subsamp, int flags)
+{
+	return tjEncodeYUV3(handle, srcBuf, width, pitch, height, pixelFormat,
+		dstBuf, 4, subsamp, flags);
+}
+
+DLLEXPORT int DLLCALL tjEncodeYUV(tjhandle handle, unsigned char *srcBuf,
+	int width, int pitch, int height, int pixelSize, unsigned char *dstBuf,
+	int subsamp, int flags)
+{
+	return tjEncodeYUV2(handle, srcBuf, width, pitch, height,
+		getPixelFormat(pixelSize, flags), dstBuf, subsamp, flags);
+}
+
+
+DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle, unsigned char *srcBuf,
+	int width, int pad, int height, int subsamp, unsigned char **jpegBuf,
+	unsigned long *jpegSize, int jpegQual, int flags)
+{
+	int i, row, retval=0, alloc=1;  JSAMPROW *inbuf[MAX_COMPONENTS];
+	int cw[MAX_COMPONENTS], ch[MAX_COMPONENTS], iw[MAX_COMPONENTS],
+		tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS];
+	JSAMPLE *_tmpbuf=NULL, *ptr=srcBuf;  JSAMPROW *tmpbuf[MAX_COMPONENTS];
+
+	getcinstance(handle)
+
+	for(i=0; i<MAX_COMPONENTS; i++)
+	{
+		tmpbuf[i]=NULL;  inbuf[i]=NULL;
+	}
+
+	if((this->init&COMPRESS)==0)
+		_throw("tjCompressFromYUV(): Instance has not been initialized for compression");
+
+	if(srcBuf==NULL || width<=0 || pad<1 || height<=0 || subsamp<0
+		|| subsamp>=NUMSUBOPT || jpegBuf==NULL || jpegSize==NULL || jpegQual<0
+		|| jpegQual>100)
+		_throw("tjCompressFromYUV(): Invalid argument");
+
+	if(setjmp(this->jerr.setjmp_buffer))
+	{
+		/* If we get here, the JPEG code has signaled an error. */
+		retval=-1;
+		goto bailout;
+	}
+
+	cinfo->image_width=width;
+	cinfo->image_height=height;
+
+	if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
+	else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
+	else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+
+	if(flags&TJFLAG_NOREALLOC)
+	{
+		alloc=0;  *jpegSize=tjBufSize(width, height, subsamp);
+	}
+	jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
+	if(setCompDefaults(cinfo, TJPF_RGB, subsamp, jpegQual, flags)==-1)
+		return -1;
+	cinfo->raw_data_in=TRUE;
+
+	jpeg_start_compress(cinfo, TRUE);
+	for(i=0; i<cinfo->num_components; i++)
+	{
+		jpeg_component_info *compptr=&cinfo->comp_info[i];
+		int ih;
+		iw[i]=compptr->width_in_blocks*DCTSIZE;
+		ih=compptr->height_in_blocks*DCTSIZE;
+		cw[i]=PAD(cinfo->image_width, cinfo->max_h_samp_factor)
+			*compptr->h_samp_factor/cinfo->max_h_samp_factor;
+		ch[i]=PAD(cinfo->image_height, cinfo->max_v_samp_factor)
+			*compptr->v_samp_factor/cinfo->max_v_samp_factor;
+		if(iw[i]!=cw[i] || ih!=ch[i]) usetmpbuf=1;
+		th[i]=compptr->v_samp_factor*DCTSIZE;
+		tmpbufsize+=iw[i]*th[i];
+		if((inbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ch[i]))==NULL)
+			_throw("tjCompressFromYUV(): Memory allocation failure");
+		for(row=0; row<ch[i]; row++)
+		{
+			inbuf[i][row]=ptr;
+			ptr+=PAD(cw[i], pad);
+		}
+	}
+	if(usetmpbuf)
+	{
+		if((_tmpbuf=(JSAMPLE *)malloc(sizeof(JSAMPLE)*tmpbufsize))==NULL)
+			_throw("tjCompressFromYUV(): Memory allocation failure");
+		ptr=_tmpbuf;
+		for(i=0; i<cinfo->num_components; i++)
+		{
+			if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL)
+				_throw("tjCompressFromYUV(): Memory allocation failure");
+			for(row=0; row<th[i]; row++)
+			{
+				tmpbuf[i][row]=ptr;
+				ptr+=iw[i];
+			}
+		}
+	}
+
+	for(row=0; row<(int)cinfo->image_height;
+		row+=cinfo->max_v_samp_factor*DCTSIZE)
+	{
+		JSAMPARRAY yuvptr[MAX_COMPONENTS];
+		int crow[MAX_COMPONENTS];
+		for(i=0; i<cinfo->num_components; i++)
+		{
+			jpeg_component_info *compptr=&cinfo->comp_info[i];
+			crow[i]=row*compptr->v_samp_factor/cinfo->max_v_samp_factor;
+			if(usetmpbuf)
+			{
+				int j, k;
+				for(j=0; j<min(th[i], ch[i]-crow[i]); j++)
+				{
+					memcpy(tmpbuf[i][j], inbuf[i][crow[i]+j], cw[i]);
+					/* Duplicate last sample in row to fill out MCU */
+					for(k=cw[i]; k<iw[i]; k++) tmpbuf[i][j][k]=tmpbuf[i][j][cw[i]-1];
+				}
+				/* Duplicate last row to fill out MCU */
+				for(j=ch[i]-crow[i]; j<th[i]; j++)
+					memcpy(tmpbuf[i][j], tmpbuf[i][ch[i]-crow[i]-1], iw[i]);
+				yuvptr[i]=tmpbuf[i];
+			}
+			else
+				yuvptr[i]=&inbuf[i][crow[i]];
+		}
+		jpeg_write_raw_data(cinfo, yuvptr, cinfo->max_v_samp_factor*DCTSIZE);
+	}
+	jpeg_finish_compress(cinfo);
+
+	bailout:
+	if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo);
+	for(i=0; i<MAX_COMPONENTS; i++)
+	{
+		if(tmpbuf[i]) free(tmpbuf[i]);
+		if(inbuf[i]) free(inbuf[i]);
+	}
+	if(_tmpbuf) free(_tmpbuf);
+	return retval;
+}
+
+
+/* Decompressor */
+
+static tjhandle _tjInitDecompress(tjinstance *this)
+{
+	unsigned char buffer[1];
+
+	/* This is also straight out of example.c */
+	this->dinfo.err=jpeg_std_error(&this->jerr.pub);
+	this->jerr.pub.error_exit=my_error_exit;
+	this->jerr.pub.output_message=my_output_message;
+
+	if(setjmp(this->jerr.setjmp_buffer))
+	{
+		/* If we get here, the JPEG code has signaled an error. */
+		if(this) free(this);  return NULL;
+	}
+
+	jpeg_create_decompress(&this->dinfo);
+	/* Make an initial call so it will create the source manager */
+	jpeg_mem_src_tj(&this->dinfo, buffer, 1);
+
+	this->init|=DECOMPRESS;
+	return (tjhandle)this;
+}
+
+DLLEXPORT tjhandle DLLCALL tjInitDecompress(void)
+{
+	tjinstance *this;
+	if((this=(tjinstance *)malloc(sizeof(tjinstance)))==NULL)
+	{
+		snprintf(errStr, JMSG_LENGTH_MAX,
+			"tjInitDecompress(): Memory allocation failure");
+		return NULL;
+	}
+	MEMZERO(this, sizeof(tjinstance));
+	return _tjInitDecompress(this);
+}
+
+
+DLLEXPORT int DLLCALL tjDecompressHeader3(tjhandle handle,
+	unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height,
+	int *jpegSubsamp, int *jpegColorspace)
+{
+	int retval=0;
+
+	getdinstance(handle);
+	if((this->init&DECOMPRESS)==0)
+		_throw("tjDecompressHeader3(): Instance has not been initialized for decompression");
+
+	if(jpegBuf==NULL || jpegSize<=0 || width==NULL || height==NULL
+		|| jpegSubsamp==NULL || jpegColorspace==NULL)
+		_throw("tjDecompressHeader3(): Invalid argument");
+
+	if(setjmp(this->jerr.setjmp_buffer))
+	{
+		/* If we get here, the JPEG code has signaled an error. */
+		return -1;
+	}
+
+	jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+	jpeg_read_header(dinfo, TRUE);
+
+	*width=dinfo->image_width;
+	*height=dinfo->image_height;
+	*jpegSubsamp=getSubsamp(dinfo);
+	switch(dinfo->jpeg_color_space)
+	{
+		case JCS_GRAYSCALE:  *jpegColorspace=TJCS_GRAY;  break;
+		case JCS_RGB:        *jpegColorspace=TJCS_RGB;  break;
+		case JCS_YCbCr:      *jpegColorspace=TJCS_YCbCr;  break;
+		case JCS_CMYK:       *jpegColorspace=TJCS_CMYK;  break;
+		case JCS_YCCK:       *jpegColorspace=TJCS_YCCK;  break;
+		default:             *jpegColorspace=-1;  break;
+	}
+
+	jpeg_abort_decompress(dinfo);
+
+	if(*jpegSubsamp<0)
+		_throw("tjDecompressHeader3(): Could not determine subsampling type for JPEG image");
+	if(*jpegColorspace<0)
+		_throw("tjDecompressHeader3(): Could not determine colorspace of JPEG image");
+	if(*width<1 || *height<1)
+		_throw("tjDecompressHeader3(): Invalid data returned in header");
+
+	bailout:
+	return retval;
+}
+
+DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle,
+	unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height,
+	int *jpegSubsamp)
+{
+	int jpegColorspace;
+	return tjDecompressHeader3(handle, jpegBuf, jpegSize, width, height,
+		jpegSubsamp, &jpegColorspace);
+}
+
+DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle handle,
+	unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height)
+{
+	int jpegSubsamp;
+	return tjDecompressHeader2(handle, jpegBuf, jpegSize, width, height,
+		&jpegSubsamp);
+}
+
+
+DLLEXPORT tjscalingfactor* DLLCALL tjGetScalingFactors(int *numscalingfactors)
+{
+	if(numscalingfactors==NULL)
+	{
+		snprintf(errStr, JMSG_LENGTH_MAX,
+			"tjGetScalingFactors(): Invalid argument");
+		return NULL;
+	}
+
+	*numscalingfactors=NUMSF;
+	return (tjscalingfactor *)sf;
+}
+
+
+DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle, unsigned char *jpegBuf,
+	unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch,
+	int height, int pixelFormat, int flags)
+{
+	int i, retval=0;  JSAMPROW *row_pointer=NULL;
+	int jpegwidth, jpegheight, scaledw, scaledh;
+	#ifndef JCS_EXTENSIONS
+	unsigned char *rgbBuf=NULL;
+	unsigned char *_dstBuf=NULL;  int _pitch=0;
+	#endif
+
+	getdinstance(handle);
+	if((this->init&DECOMPRESS)==0)
+		_throw("tjDecompress2(): Instance has not been initialized for decompression");
+
+	if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL || width<0 || pitch<0
+		|| height<0 || pixelFormat<0 || pixelFormat>=TJ_NUMPF)
+		_throw("tjDecompress2(): Invalid argument");
+
+	if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
+	else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
+	else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+
+	if(setjmp(this->jerr.setjmp_buffer))
+	{
+		/* If we get here, the JPEG code has signaled an error. */
+		retval=-1;
+		goto bailout;
+	}
+
+	jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+	jpeg_read_header(dinfo, TRUE);
+	if(setDecompDefaults(dinfo, pixelFormat, flags)==-1)
+	{
+		retval=-1;  goto bailout;
+	}
+
+	if(flags&TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling=FALSE;
+
+	jpegwidth=dinfo->image_width;  jpegheight=dinfo->image_height;
+	if(width==0) width=jpegwidth;
+	if(height==0) height=jpegheight;
+	for(i=0; i<NUMSF; i++)
+	{
+		scaledw=TJSCALED(jpegwidth, sf[i]);
+		scaledh=TJSCALED(jpegheight, sf[i]);
+		if(scaledw<=width && scaledh<=height)
+			break;
+	}
+	if(scaledw>width || scaledh>height)
+		_throw("tjDecompress2(): Could not scale down to desired image dimensions");
+	width=scaledw;  height=scaledh;
+	dinfo->scale_num=sf[i].num;
+	dinfo->scale_denom=sf[i].denom;
+
+	jpeg_start_decompress(dinfo);
+	if(pitch==0) pitch=dinfo->output_width*tjPixelSize[pixelFormat];
+
+	#ifndef JCS_EXTENSIONS
+	if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK &&
+		(RGB_RED!=tjRedOffset[pixelFormat] ||
+			RGB_GREEN!=tjGreenOffset[pixelFormat] ||
+			RGB_BLUE!=tjBlueOffset[pixelFormat] ||
+			RGB_PIXELSIZE!=tjPixelSize[pixelFormat]))
+	{
+		rgbBuf=(unsigned char *)malloc(width*height*3);
+		if(!rgbBuf) _throw("tjDecompress2(): Memory allocation failure");
+		_pitch=pitch;  pitch=width*3;
+		_dstBuf=dstBuf;  dstBuf=rgbBuf;
+	}
+	#endif
+
+	if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)
+		*dinfo->output_height))==NULL)
+		_throw("tjDecompress2(): Memory allocation failure");
+	for(i=0; i<(int)dinfo->output_height; i++)
+	{
+		if(flags&TJFLAG_BOTTOMUP)
+			row_pointer[i]=&dstBuf[(dinfo->output_height-i-1)*pitch];
+		else row_pointer[i]=&dstBuf[i*pitch];
+	}
+	while(dinfo->output_scanline<dinfo->output_height)
+	{
+		jpeg_read_scanlines(dinfo, &row_pointer[dinfo->output_scanline],
+			dinfo->output_height-dinfo->output_scanline);
+	}
+	jpeg_finish_decompress(dinfo);
+
+	#ifndef JCS_EXTENSIONS
+	fromRGB(rgbBuf, _dstBuf, width, _pitch, height, pixelFormat);
+	#endif
+
+	bailout:
+	if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo);
+	#ifndef JCS_EXTENSIONS
+	if(rgbBuf) free(rgbBuf);
+	#endif
+	if(row_pointer) free(row_pointer);
+	return retval;
+}
+
+DLLEXPORT int DLLCALL tjDecompress(tjhandle handle, unsigned char *jpegBuf,
+	unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch,
+	int height, int pixelSize, int flags)
+{
+	if(flags&TJ_YUV)
+		return tjDecompressToYUV(handle, jpegBuf, jpegSize, dstBuf, flags);
+	else
+		return tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, width, pitch,
+			height, getPixelFormat(pixelSize, flags), flags);
+}
+
+
+static int setDecodeDefaults(struct jpeg_decompress_struct *dinfo,
+	int pixelFormat, int subsamp, int flags)
+{
+	int i;
+
+	dinfo->scale_num=dinfo->scale_denom=1;
+
+	if(subsamp==TJSAMP_GRAY)
+	{
+		dinfo->num_components=dinfo->comps_in_scan=1;
+		dinfo->jpeg_color_space=JCS_GRAYSCALE;
+	}
+	else
+	{
+		dinfo->num_components=dinfo->comps_in_scan=3;
+		dinfo->jpeg_color_space=JCS_YCbCr;
+	}
+
+	dinfo->comp_info=(jpeg_component_info *)
+		(*dinfo->mem->alloc_small)((j_common_ptr)dinfo, JPOOL_IMAGE,
+			dinfo->num_components*sizeof(jpeg_component_info));
+
+	for(i=0; i<dinfo->num_components; i++)
+	{
+		jpeg_component_info *compptr=&dinfo->comp_info[i];
+		compptr->h_samp_factor=(i==0)? tjMCUWidth[subsamp]/8:1;
+		compptr->v_samp_factor=(i==0)? tjMCUHeight[subsamp]/8:1;
+		compptr->component_index=i;
+		compptr->component_id=i+1;
+		compptr->quant_tbl_no=compptr->dc_tbl_no=compptr->ac_tbl_no=
+			(i==0)? 0:1;
+		dinfo->cur_comp_info[i]=compptr;
+	}
+	dinfo->data_precision=8;
+	for(i=0; i<2; i++)
+	{
+		if(dinfo->quant_tbl_ptrs[i]==NULL)
+			dinfo->quant_tbl_ptrs[i]=jpeg_alloc_quant_table((j_common_ptr)dinfo);
+	}
+
+	return 0;
+}
+
+
+int my_read_markers(j_decompress_ptr dinfo)
+{
+	return JPEG_REACHED_SOS;
+}
+
+void my_reset_marker_reader(j_decompress_ptr dinfo)
+{
+}
+
+DLLEXPORT int DLLCALL tjDecodeYUV(tjhandle handle, unsigned char *srcBuf,
+	int pad, int subsamp, unsigned char *dstBuf, int width, int pitch,
+	int height, int pixelFormat, int flags)
+{
+	int i, retval=0;  JSAMPROW *row_pointer=NULL;
+	JSAMPLE *_tmpbuf[MAX_COMPONENTS];
+	JSAMPROW *tmpbuf[MAX_COMPONENTS], *inbuf[MAX_COMPONENTS];
+	int row, pw, ph, cw[MAX_COMPONENTS], ch[MAX_COMPONENTS];
+	JSAMPLE *ptr=srcBuf;
+	unsigned long yuvsize=0;
+	jpeg_component_info *compptr;
+	#ifndef JCS_EXTENSIONS
+	unsigned char *rgbBuf=NULL;
+	unsigned char *_dstBuf=NULL;  int _pitch=0;
+	#endif
+	int (*old_read_markers)(j_decompress_ptr);
+	void (*old_reset_marker_reader)(j_decompress_ptr);
+
+	getdinstance(handle);
+
+	for(i=0; i<MAX_COMPONENTS; i++)
+	{
+		tmpbuf[i]=NULL;  _tmpbuf[i]=NULL;  inbuf[i]=NULL;
+	}
+
+	if((this->init&DECOMPRESS)==0)
+		_throw("tjDecodeYUV(): Instance has not been initialized for decompression");
+
+	if(srcBuf==NULL || pad<0 || !isPow2(pad) || subsamp<0 || subsamp>=NUMSUBOPT
+		|| dstBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0
+		|| pixelFormat>=TJ_NUMPF)
+		_throw("tjDecodeYUV(): Invalid argument");
+
+	if(setjmp(this->jerr.setjmp_buffer))
+	{
+		/* If we get here, the JPEG code has signaled an error. */
+		retval=-1;
+		goto bailout;
+	}
+
+	if(pixelFormat==TJPF_CMYK)
+		_throw("tjDecodeYUV(): Cannot decode YUV images into CMYK pixels.");
+
+	if(pitch==0) pitch=width*tjPixelSize[pixelFormat];
+	dinfo->image_width=width;
+	dinfo->image_height=height;
+
+	if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
+	else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
+	else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+
+	yuvsize=tjBufSizeYUV2(width, pad, height, subsamp);
+	if(setDecodeDefaults(dinfo, pixelFormat, subsamp, flags)==-1)
+	{
+		retval=-1;  goto bailout;
+	}
+	old_read_markers=dinfo->marker->read_markers;
+	dinfo->marker->read_markers=my_read_markers;
+	old_reset_marker_reader=dinfo->marker->reset_marker_reader;
+	dinfo->marker->reset_marker_reader=my_reset_marker_reader;
+	jpeg_read_header(dinfo, TRUE);
+	dinfo->marker->read_markers=old_read_markers;
+	dinfo->marker->reset_marker_reader=old_reset_marker_reader;
+
+	if(setDecompDefaults(dinfo, pixelFormat, flags)==-1)
+	{
+		retval=-1;  goto bailout;
+	}
+	dinfo->do_fancy_upsampling=FALSE;
+	jinit_master_decompress(dinfo);
+	(*dinfo->upsample->start_pass)(dinfo);
+
+	pw=PAD(width, dinfo->max_h_samp_factor);
+	ph=PAD(height, dinfo->max_v_samp_factor);
+
+	if(pitch==0) pitch=dinfo->output_width*tjPixelSize[pixelFormat];
+
+	#ifndef JCS_EXTENSIONS
+	if(pixelFormat!=TJPF_GRAY && pixelFormat!=TJPF_CMYK &&
+		(RGB_RED!=tjRedOffset[pixelFormat] ||
+			RGB_GREEN!=tjGreenOffset[pixelFormat] ||
+			RGB_BLUE!=tjBlueOffset[pixelFormat] ||
+			RGB_PIXELSIZE!=tjPixelSize[pixelFormat]))
+	{
+		rgbBuf=(unsigned char *)malloc(width*height*3);
+		if(!rgbBuf) _throw("tjDecodeYUV(): Memory allocation failure");
+		_pitch=pitch;  pitch=width*3;
+		_dstBuf=dstBuf;  dstBuf=rgbBuf;
+	}
+	#endif
+
+	if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph))==NULL)
+		_throw("tjDecodeYUV(): Memory allocation failure");
+	for(i=0; i<height; i++)
+	{
+		if(flags&TJFLAG_BOTTOMUP) row_pointer[i]=&dstBuf[(height-i-1)*pitch];
+		else row_pointer[i]=&dstBuf[i*pitch];
+	}
+	if(height<ph)
+		for(i=height; i<ph; i++) row_pointer[i]=row_pointer[height-1];
+
+	for(i=0; i<dinfo->num_components; i++)
+	{
+		compptr=&dinfo->comp_info[i];
+		_tmpbuf[i]=(JSAMPLE *)malloc(PAD(compptr->width_in_blocks*DCTSIZE, 16)
+			* compptr->v_samp_factor + 16);
+		if(!_tmpbuf[i]) _throw("tjDecodeYUV(): Memory allocation failure");
+		tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*compptr->v_samp_factor);
+		if(!tmpbuf[i]) _throw("tjDecodeYUV(): Memory allocation failure");
+		for(row=0; row<compptr->v_samp_factor; row++)
+		{
+			unsigned char *_tmpbuf_aligned=
+				(unsigned char *)PAD((size_t)_tmpbuf[i], 16);
+			tmpbuf[i][row]=&_tmpbuf_aligned[
+				PAD(compptr->width_in_blocks*DCTSIZE, 16) * row];
+		}
+		cw[i]=pw*compptr->h_samp_factor/dinfo->max_h_samp_factor;
+		ch[i]=ph*compptr->v_samp_factor/dinfo->max_v_samp_factor;
+		inbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ch[i]);
+		if(!inbuf[i]) _throw("tjDecodeYUV(): Memory allocation failure");
+		for(row=0; row<ch[i]; row++)
+		{
+			inbuf[i][row]=ptr;
+			ptr+=PAD(cw[i], pad);
+		}
+	}
+
+	if(yuvsize!=(unsigned long)(ptr-srcBuf))
+		_throw("tjDecodeYUV(): YUV image is not the correct size");
+
+	for(row=0; row<ph; row+=dinfo->max_v_samp_factor)
+	{
+		JDIMENSION inrow=0, outrow=0;
+		for(i=0, compptr=dinfo->comp_info; i<dinfo->num_components; i++, compptr++)
+			jcopy_sample_rows(inbuf[i],
+				row*compptr->v_samp_factor/dinfo->max_v_samp_factor, tmpbuf[i], 0,
+				compptr->v_samp_factor, cw[i]);
+		(dinfo->upsample->upsample)(dinfo, tmpbuf, &inrow,
+			dinfo->max_v_samp_factor, &row_pointer[row], &outrow,
+			dinfo->max_v_samp_factor);
+	}
+	jpeg_abort_decompress(dinfo);
+
+	#ifndef JCS_EXTENSIONS
+	fromRGB(rgbBuf, _dstBuf, width, _pitch, height, pixelFormat);
+	#endif
+
+	bailout:
+	if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo);
+	#ifndef JCS_EXTENSIONS
+	if(rgbBuf) free(rgbBuf);
+	#endif
+	if(row_pointer) free(row_pointer);
+	for(i=0; i<MAX_COMPONENTS; i++)
+	{
+		if(tmpbuf[i]!=NULL) free(tmpbuf[i]);
+		if(_tmpbuf[i]!=NULL) free(_tmpbuf[i]);
+		if(inbuf[i]!=NULL) free(inbuf[i]);
+	}
+	return retval;
+}
+
+
+DLLEXPORT int DLLCALL tjDecompressToYUV2(tjhandle handle,
+	unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
+	int width, int pad, int height, int flags)
+{
+	int i, sfi, row, retval=0;  JSAMPROW *outbuf[MAX_COMPONENTS];
+	int jpegwidth, jpegheight, jpegSubsamp, scaledw, scaledh;
+	int cw[MAX_COMPONENTS], ch[MAX_COMPONENTS], iw[MAX_COMPONENTS],
+		tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS];
+	JSAMPLE *_tmpbuf=NULL, *ptr=dstBuf;  JSAMPROW *tmpbuf[MAX_COMPONENTS];
+	int dctsize;
+
+	getdinstance(handle);
+
+	for(i=0; i<MAX_COMPONENTS; i++)
+	{
+		tmpbuf[i]=NULL;  outbuf[i]=NULL;
+	}
+
+	if((this->init&DECOMPRESS)==0)
+		_throw("tjDecompressToYUV2(): Instance has not been initialized for decompression");
+
+	if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL || width<0 || pad<1
+		|| !isPow2(pad) || height<0)
+		_throw("tjDecompressToYUV2(): Invalid argument");
+
+	if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
+	else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
+	else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+
+	if(setjmp(this->jerr.setjmp_buffer))
+	{
+		/* If we get here, the JPEG code has signaled an error. */
+		retval=-1;
+		goto bailout;
+	}
+
+	jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+	jpeg_read_header(dinfo, TRUE);
+	jpegSubsamp=getSubsamp(dinfo);
+	if(jpegSubsamp<0)
+		_throw("tjDecompressToYUV2(): Could not determine subsampling type for JPEG image");
+
+	jpegwidth=dinfo->image_width;  jpegheight=dinfo->image_height;
+	if(width==0) width=jpegwidth;
+	if(height==0) height=jpegheight;
+	for(i=0; i<NUMSF; i++)
+	{
+		scaledw=TJSCALED(jpegwidth, sf[i]);
+		scaledh=TJSCALED(jpegheight, sf[i]);
+		if(scaledw<=width && scaledh<=height)
+			break;
+	}
+	if(scaledw>width || scaledh>height)
+		_throw("tjDecompressToYUV2(): Could not scale down to desired image dimensions");
+	if(dinfo->num_components>3)
+		_throw("tjDecompressToYUV2(): JPEG image must have 3 or fewer components");
+
+	width=scaledw;  height=scaledh;
+	dinfo->scale_num=sf[i].num;
+	dinfo->scale_denom=sf[i].denom;
+	sfi=i;
+	jpeg_calc_output_dimensions(dinfo);
+
+	dctsize=DCTSIZE*sf[sfi].num/sf[sfi].denom;
+
+	for(i=0; i<dinfo->num_components; i++)
+	{
+		jpeg_component_info *compptr=&dinfo->comp_info[i];
+		int ih;
+		iw[i]=compptr->width_in_blocks*dctsize;
+		ih=compptr->height_in_blocks*dctsize;
+		cw[i]=PAD(dinfo->output_width, dinfo->max_h_samp_factor)
+			*compptr->h_samp_factor/dinfo->max_h_samp_factor;
+		ch[i]=PAD(dinfo->output_height, dinfo->max_v_samp_factor)
+			*compptr->v_samp_factor/dinfo->max_v_samp_factor;
+		if(iw[i]!=cw[i] || ih!=ch[i]) usetmpbuf=1;
+		th[i]=compptr->v_samp_factor*dctsize;
+		tmpbufsize+=iw[i]*th[i];
+		if((outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ch[i]))==NULL)
+			_throw("tjDecompressToYUV2(): Memory allocation failure");
+		for(row=0; row<ch[i]; row++)
+		{
+			outbuf[i][row]=ptr;
+			ptr+=PAD(cw[i], pad);
+		}
+	}
+	if(usetmpbuf)
+	{
+		if((_tmpbuf=(JSAMPLE *)malloc(sizeof(JSAMPLE)*tmpbufsize))==NULL)
+			_throw("tjDecompressToYUV2(): Memory allocation failure");
+		ptr=_tmpbuf;
+		for(i=0; i<dinfo->num_components; i++)
+		{
+			if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL)
+				_throw("tjDecompressToYUV2(): Memory allocation failure");
+			for(row=0; row<th[i]; row++)
+			{
+				tmpbuf[i][row]=ptr;
+				ptr+=iw[i];
+			}
+		}
+	}
+
+	if(flags&TJFLAG_FASTUPSAMPLE) dinfo->do_fancy_upsampling=FALSE;
+	if(flags&TJFLAG_FASTDCT) dinfo->dct_method=JDCT_FASTEST;
+	dinfo->raw_data_out=TRUE;
+
+	jpeg_start_decompress(dinfo);
+	for(row=0; row<(int)dinfo->output_height;
+		row+=dinfo->max_v_samp_factor*dinfo->_min_DCT_scaled_size)
+	{
+		JSAMPARRAY yuvptr[MAX_COMPONENTS];
+		int crow[MAX_COMPONENTS];
+		for(i=0; i<dinfo->num_components; i++)
+		{
+			jpeg_component_info *compptr=&dinfo->comp_info[i];
+			if(jpegSubsamp==TJ_420)
+			{
+				/* When 4:2:0 subsampling is used with IDCT scaling, libjpeg will try
+				   to be clever and use the IDCT to perform upsampling on the U and V
+				   planes.  For instance, if the output image is to be scaled by 1/2
+				   relative to the JPEG image, then the scaling factor and upsampling
+				   effectively cancel each other, so a normal 8x8 IDCT can be used.
+				   However, this is not desirable when using the decompress-to-YUV
+				   functionality in TurboJPEG, since we want to output the U and V
+				   planes in their subsampled form.  Thus, we have to override some
+				   internal libjpeg parameters to force it to use the "scaled" IDCT
+				   functions on the U and V planes. */
+				compptr->_DCT_scaled_size=dctsize;
+				compptr->MCU_sample_width=tjMCUWidth[jpegSubsamp]*
+					sf[sfi].num/sf[sfi].denom*
+					compptr->v_samp_factor/dinfo->max_v_samp_factor;
+				dinfo->idct->inverse_DCT[i] = dinfo->idct->inverse_DCT[0];
+			}
+			crow[i]=row*compptr->v_samp_factor/dinfo->max_v_samp_factor;
+			if(usetmpbuf) yuvptr[i]=tmpbuf[i];
+			else yuvptr[i]=&outbuf[i][crow[i]];
+		}
+		jpeg_read_raw_data(dinfo, yuvptr,
+			dinfo->max_v_samp_factor*dinfo->_min_DCT_scaled_size);
+		if(usetmpbuf)
+		{
+			int j;
+			for(i=0; i<dinfo->num_components; i++)
+			{
+				for(j=0; j<min(th[i], ch[i]-crow[i]); j++)
+				{
+					memcpy(outbuf[i][crow[i]+j], tmpbuf[i][j], cw[i]);
+				}
+			}
+		}
+	}
+	jpeg_finish_decompress(dinfo);
+
+	bailout:
+	if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo);
+	for(i=0; i<MAX_COMPONENTS; i++)
+	{
+		if(tmpbuf[i]) free(tmpbuf[i]);
+		if(outbuf[i]) free(outbuf[i]);
+	}
+	if(_tmpbuf) free(_tmpbuf);
+	return retval;
+}
+
+DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle,
+	unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
+	int flags)
+{
+	return tjDecompressToYUV2(handle, jpegBuf, jpegSize, dstBuf, 0, 4, 0, flags);
+}
+
+
+/* Transformer */
+
+DLLEXPORT tjhandle DLLCALL tjInitTransform(void)
+{
+	tjinstance *this=NULL;  tjhandle handle=NULL;
+	if((this=(tjinstance *)malloc(sizeof(tjinstance)))==NULL)
+	{
+		snprintf(errStr, JMSG_LENGTH_MAX,
+			"tjInitTransform(): Memory allocation failure");
+		return NULL;
+	}
+	MEMZERO(this, sizeof(tjinstance));
+	handle=_tjInitCompress(this);
+	if(!handle) return NULL;
+	handle=_tjInitDecompress(this);
+	return handle;
+}
+
+
+DLLEXPORT int DLLCALL tjTransform(tjhandle handle, unsigned char *jpegBuf,
+	unsigned long jpegSize, int n, unsigned char **dstBufs,
+	unsigned long *dstSizes, tjtransform *t, int flags)
+{
+	jpeg_transform_info *xinfo=NULL;
+	jvirt_barray_ptr *srccoefs, *dstcoefs;
+	int retval=0, i, jpegSubsamp;
+
+	getinstance(handle);
+	if((this->init&COMPRESS)==0 || (this->init&DECOMPRESS)==0)
+		_throw("tjTransform(): Instance has not been initialized for transformation");
+
+	if(jpegBuf==NULL || jpegSize<=0 || n<1 || dstBufs==NULL || dstSizes==NULL
+		|| t==NULL || flags<0)
+		_throw("tjTransform(): Invalid argument");
+
+	if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
+	else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
+	else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+
+	if(setjmp(this->jerr.setjmp_buffer))
+	{
+		/* If we get here, the JPEG code has signaled an error. */
+		retval=-1;
+		goto bailout;
+	}
+
+	jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+
+	if((xinfo=(jpeg_transform_info *)malloc(sizeof(jpeg_transform_info)*n))
+		==NULL)
+		_throw("tjTransform(): Memory allocation failure");
+	MEMZERO(xinfo, sizeof(jpeg_transform_info)*n);
+
+	for(i=0; i<n; i++)
+	{
+		xinfo[i].transform=xformtypes[t[i].op];
+		xinfo[i].perfect=(t[i].options&TJXOPT_PERFECT)? 1:0;
+		xinfo[i].trim=(t[i].options&TJXOPT_TRIM)? 1:0;
+		xinfo[i].force_grayscale=(t[i].options&TJXOPT_GRAY)? 1:0;
+		xinfo[i].crop=(t[i].options&TJXOPT_CROP)? 1:0;
+		if(n!=1 && t[i].op==TJXOP_HFLIP) xinfo[i].slow_hflip=1;
+		else xinfo[i].slow_hflip=0;
+
+		if(xinfo[i].crop)
+		{
+			xinfo[i].crop_xoffset=t[i].r.x;  xinfo[i].crop_xoffset_set=JCROP_POS;
+			xinfo[i].crop_yoffset=t[i].r.y;  xinfo[i].crop_yoffset_set=JCROP_POS;
+			if(t[i].r.w!=0)
+			{
+				xinfo[i].crop_width=t[i].r.w;  xinfo[i].crop_width_set=JCROP_POS;
+			}
+			else xinfo[i].crop_width=JCROP_UNSET;
+			if(t[i].r.h!=0)
+			{
+				xinfo[i].crop_height=t[i].r.h;  xinfo[i].crop_height_set=JCROP_POS;
+			}
+			else xinfo[i].crop_height=JCROP_UNSET;
+		}
+	}
+
+	jcopy_markers_setup(dinfo, JCOPYOPT_ALL);
+	jpeg_read_header(dinfo, TRUE);
+	jpegSubsamp=getSubsamp(dinfo);
+	if(jpegSubsamp<0)
+		_throw("tjTransform(): Could not determine subsampling type for JPEG image");
+
+	for(i=0; i<n; i++)
+	{
+		if(!jtransform_request_workspace(dinfo, &xinfo[i]))
+			_throw("tjTransform(): Transform is not perfect");
+
+		if(xinfo[i].crop)
+		{
+			if((t[i].r.x%xinfo[i].iMCU_sample_width)!=0
+				|| (t[i].r.y%xinfo[i].iMCU_sample_height)!=0)
+			{
+				snprintf(errStr, JMSG_LENGTH_MAX,
+					"To crop this JPEG image, x must be a multiple of %d\n"
+					"and y must be a multiple of %d.\n",
+					xinfo[i].iMCU_sample_width, xinfo[i].iMCU_sample_height);
+				retval=-1;  goto bailout;
+			}
+		}
+	}
+
+	srccoefs=jpeg_read_coefficients(dinfo);
+
+	for(i=0; i<n; i++)
+	{
+		int w, h, alloc=1;
+		if(!xinfo[i].crop)
+		{
+			w=dinfo->image_width;  h=dinfo->image_height;
+		}
+		else
+		{
+			w=xinfo[i].crop_width;  h=xinfo[i].crop_height;
+		}
+		if(flags&TJFLAG_NOREALLOC)
+		{
+			alloc=0;  dstSizes[i]=tjBufSize(w, h, jpegSubsamp);
+		}
+		if(!(t[i].options&TJXOPT_NOOUTPUT))
+			jpeg_mem_dest_tj(cinfo, &dstBufs[i], &dstSizes[i], alloc);
+		jpeg_copy_critical_parameters(dinfo, cinfo);
+		dstcoefs=jtransform_adjust_parameters(dinfo, cinfo, srccoefs,
+			&xinfo[i]);
+		if(!(t[i].options&TJXOPT_NOOUTPUT))
+		{
+			jpeg_write_coefficients(cinfo, dstcoefs);
+			jcopy_markers_execute(dinfo, cinfo, JCOPYOPT_ALL);
+		}
+		else jinit_c_master_control(cinfo, TRUE);
+		jtransform_execute_transformation(dinfo, cinfo, srccoefs,
+			&xinfo[i]);
+		if(t[i].customFilter)
+		{
+			int ci, y;  JDIMENSION by;
+			for(ci=0; ci<cinfo->num_components; ci++)
+			{
+				jpeg_component_info *compptr=&cinfo->comp_info[ci];
+				tjregion arrayRegion={0, 0, compptr->width_in_blocks*DCTSIZE,
+					DCTSIZE};
+				tjregion planeRegion={0, 0, compptr->width_in_blocks*DCTSIZE,
+					compptr->height_in_blocks*DCTSIZE};
+				for(by=0; by<compptr->height_in_blocks; by+=compptr->v_samp_factor)
+				{
+					JBLOCKARRAY barray=(dinfo->mem->access_virt_barray)
+						((j_common_ptr)dinfo, dstcoefs[ci], by, compptr->v_samp_factor,
+						TRUE);
+					for(y=0; y<compptr->v_samp_factor; y++)
+					{
+						if(t[i].customFilter(barray[y][0], arrayRegion, planeRegion,
+							ci, i, &t[i])==-1)
+							_throw("tjTransform(): Error in custom filter");
+						arrayRegion.y+=DCTSIZE;
+					}
+				}
+			}
+		}
+		if(!(t[i].options&TJXOPT_NOOUTPUT)) jpeg_finish_compress(cinfo);
+	}
+
+	jpeg_finish_decompress(dinfo);
+
+	bailout:
+	if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo);
+	if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo);
+	if(xinfo) free(xinfo);
+	return retval;
+}
diff --git a/turbojpeg.h b/turbojpeg.h
new file mode 100644
index 0000000..818ca6e
--- /dev/null
+++ b/turbojpeg.h
@@ -0,0 +1,1165 @@
+/*
+ * Copyright (C)2009-2014 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __TURBOJPEG_H__
+#define __TURBOJPEG_H__
+
+#if defined(_WIN32) && defined(DLLDEFINE)
+#define DLLEXPORT __declspec(dllexport)
+#else
+#define DLLEXPORT
+#endif
+#define DLLCALL
+
+
+/**
+ * @addtogroup TurboJPEG
+ * TurboJPEG API.  This API provides an interface for generating, decoding, and
+ * transforming planar YUV and JPEG images in memory.
+ *
+ * @{
+ */
+
+
+/**
+ * The number of chrominance subsampling options
+ */
+#define TJ_NUMSAMP 6
+
+/**
+ * Chrominance subsampling options.
+ * When pixels are converted from RGB to YCbCr (see #TJCS_YCbCr) or from CMYK
+ * to YCCK (see #TJCS_YCCK) as part of the JPEG compression process, some of
+ * the Cb and Cr (chrominance) components can be discarded or averaged together
+ * to produce a smaller image with little perceptible loss of image clarity
+ * (the human eye is more sensitive to small changes in brightness than to
+ * small changes in color.)  This is called "chrominance subsampling".
+ * <p>
+ * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
+ * convention of the digital video community, the TurboJPEG API uses "YUV" to
+ * refer to an image format consisting of Y, Cb, and Cr image planes.
+ */
+enum TJSAMP
+{
+  /**
+   * 4:4:4 chrominance subsampling (no chrominance subsampling).  The JPEG or
+   * YUV image will contain one chrominance component for every pixel in the
+   * source image.
+   */
+  TJSAMP_444=0,
+  /**
+   * 4:2:2 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 2x1 block of pixels in the source image.
+   */
+  TJSAMP_422,
+  /**
+   * 4:2:0 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 2x2 block of pixels in the source image.
+   */
+  TJSAMP_420,
+  /**
+   * Grayscale.  The JPEG or YUV image will contain no chrominance components.
+   */
+  TJSAMP_GRAY,
+  /**
+   * 4:4:0 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 1x2 block of pixels in the source image.
+   * Note that 4:4:0 subsampling is not fully accelerated in libjpeg-turbo.
+   */
+  TJSAMP_440,
+  /**
+   * 4:1:1 chrominance subsampling.  The JPEG or YUV image will contain one
+   * chrominance component for every 4x1 block of pixels in the source image.
+   * JPEG images compressed with 4:1:1 subsampling will be almost exactly the
+   * same size as those compressed with 4:2:0 subsampling, and in the
+   * aggregate, both subsampling methods produce approximately the same
+   * perceptual quality.  However, 4:1:1 is better able to reproduce sharp
+   * horizontal features.  Note that 4:1:1 subsampling is not fully accelerated
+   * in libjpeg-turbo.
+   */
+  TJSAMP_411
+};
+
+/**
+ * MCU block width (in pixels) for a given level of chrominance subsampling.
+ * MCU block sizes:
+ * - 8x8 for no subsampling or grayscale
+ * - 16x8 for 4:2:2
+ * - 8x16 for 4:4:0
+ * - 16x16 for 4:2:0
+ * - 32x8 for 4:1:1
+ */
+static const int tjMCUWidth[TJ_NUMSAMP]  = {8, 16, 16, 8, 8, 32};
+
+/**
+ * MCU block height (in pixels) for a given level of chrominance subsampling.
+ * MCU block sizes:
+ * - 8x8 for no subsampling or grayscale
+ * - 16x8 for 4:2:2
+ * - 8x16 for 4:4:0
+ * - 16x16 for 4:2:0
+ * - 32x8 for 4:1:1
+ */
+static const int tjMCUHeight[TJ_NUMSAMP] = {8, 8, 16, 8, 16, 8};
+
+
+/**
+ * The number of pixel formats
+ */
+#define TJ_NUMPF 12
+
+/**
+ * Pixel formats
+ */
+enum TJPF
+{
+  /**
+   * RGB pixel format.  The red, green, and blue components in the image are
+   * stored in 3-byte pixels in the order R, G, B from lowest to highest byte
+   * address within each pixel.
+   */
+  TJPF_RGB=0,
+  /**
+   * BGR pixel format.  The red, green, and blue components in the image are
+   * stored in 3-byte pixels in the order B, G, R from lowest to highest byte
+   * address within each pixel.
+   */
+  TJPF_BGR,
+  /**
+   * RGBX pixel format.  The red, green, and blue components in the image are
+   * stored in 4-byte pixels in the order R, G, B from lowest to highest byte
+   * address within each pixel.  The X component is ignored when compressing
+   * and undefined when decompressing.
+   */
+  TJPF_RGBX,
+  /**
+   * BGRX pixel format.  The red, green, and blue components in the image are
+   * stored in 4-byte pixels in the order B, G, R from lowest to highest byte
+   * address within each pixel.  The X component is ignored when compressing
+   * and undefined when decompressing.
+   */
+  TJPF_BGRX,
+  /**
+   * XBGR pixel format.  The red, green, and blue components in the image are
+   * stored in 4-byte pixels in the order R, G, B from highest to lowest byte
+   * address within each pixel.  The X component is ignored when compressing
+   * and undefined when decompressing.
+   */
+  TJPF_XBGR,
+  /**
+   * XRGB pixel format.  The red, green, and blue components in the image are
+   * stored in 4-byte pixels in the order B, G, R from highest to lowest byte
+   * address within each pixel.  The X component is ignored when compressing
+   * and undefined when decompressing.
+   */
+  TJPF_XRGB,
+  /**
+   * Grayscale pixel format.  Each 1-byte pixel represents a luminance
+   * (brightness) level from 0 to 255.
+   */
+  TJPF_GRAY,
+  /**
+   * RGBA pixel format.  This is the same as @ref TJPF_RGBX, except that when
+   * decompressing, the X component is guaranteed to be 0xFF, which can be
+   * interpreted as an opaque alpha channel.
+   */
+  TJPF_RGBA,
+  /**
+   * BGRA pixel format.  This is the same as @ref TJPF_BGRX, except that when
+   * decompressing, the X component is guaranteed to be 0xFF, which can be
+   * interpreted as an opaque alpha channel.
+   */
+  TJPF_BGRA,
+  /**
+   * ABGR pixel format.  This is the same as @ref TJPF_XBGR, except that when
+   * decompressing, the X component is guaranteed to be 0xFF, which can be
+   * interpreted as an opaque alpha channel.
+   */
+  TJPF_ABGR,
+  /**
+   * ARGB pixel format.  This is the same as @ref TJPF_XRGB, except that when
+   * decompressing, the X component is guaranteed to be 0xFF, which can be
+   * interpreted as an opaque alpha channel.
+   */
+  TJPF_ARGB,
+  /**
+   * CMYK pixel format.  Unlike RGB, which is an additive color model used
+   * primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive
+   * color model used primarily for printing.  In the CMYK color model, the
+   * value of each color component typically corresponds to an amount of cyan,
+   * magenta, yellow, or black ink that is applied to a white background.  In
+   * order to convert between CMYK and RGB, it is necessary to use a color
+   * management system (CMS.)  A CMS will attempt to map colors within the
+   * printer's gamut to perceptually similar colors in the display's gamut and
+   * vice versa, but the mapping is typically not 1:1 or reversible, nor can it
+   * be defined with a simple formula.  Thus, such a conversion is out of scope
+   * for a codec library.  However, the TurboJPEG API allows for compressing
+   * CMYK pixels into a YCCK JPEG image (see #TJCS_YCCK) and decompressing YCCK
+   * JPEG images into CMYK pixels.
+   */
+  TJPF_CMYK
+};
+
+
+/**
+ * Red offset (in bytes) for a given pixel format.  This specifies the number
+ * of bytes that the red component is offset from the start of the pixel.  For
+ * instance, if a pixel of format TJ_BGRX is stored in <tt>char pixel[]</tt>,
+ * then the red component will be <tt>pixel[tjRedOffset[TJ_BGRX]]</tt>.
+ */
+static const int tjRedOffset[TJ_NUMPF] = {0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1, -1};
+/**
+ * Green offset (in bytes) for a given pixel format.  This specifies the number
+ * of bytes that the green component is offset from the start of the pixel.
+ * For instance, if a pixel of format TJ_BGRX is stored in
+ * <tt>char pixel[]</tt>, then the green component will be
+ * <tt>pixel[tjGreenOffset[TJ_BGRX]]</tt>.
+ */
+static const int tjGreenOffset[TJ_NUMPF] = {1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2, -1};
+/**
+ * Blue offset (in bytes) for a given pixel format.  This specifies the number
+ * of bytes that the Blue component is offset from the start of the pixel.  For
+ * instance, if a pixel of format TJ_BGRX is stored in <tt>char pixel[]</tt>,
+ * then the blue component will be <tt>pixel[tjBlueOffset[TJ_BGRX]]</tt>.
+ */
+static const int tjBlueOffset[TJ_NUMPF] = {2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3, -1};
+
+/**
+ * Pixel size (in bytes) for a given pixel format.
+ */
+static const int tjPixelSize[TJ_NUMPF] = {3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4};
+
+
+/**
+ * The number of JPEG colorspaces
+ */
+#define TJ_NUMCS 5
+
+/**
+ * JPEG colorspaces
+ */
+enum TJCS
+{
+  /**
+   * RGB colorspace.  When compressing the JPEG image, the R, G, and B
+   * components in the source image are reordered into image planes, but no
+   * colorspace conversion or subsampling is performed.  RGB JPEG images can be
+   * decompressed to any of the extended RGB pixel formats or grayscale, but
+   * they cannot be decompressed to YUV images.
+   */
+  TJCS_RGB=0,
+  /**
+   * YCbCr colorspace.  YCbCr is not an absolute colorspace but rather a
+   * mathematical transformation of RGB designed solely for storage and
+   * transmission.  YCbCr images must be converted to RGB before they can
+   * actually be displayed.  In the YCbCr colorspace, the Y (luminance)
+   * component represents the black & white portion of the original image, and
+   * the Cb and Cr (chrominance) components represent the color portion of the
+   * original image.  Originally, the analog equivalent of this transformation
+   * allowed the same signal to drive both black & white and color televisions,
+   * but JPEG images use YCbCr primarily because it allows the color data to be
+   * optionally subsampled for the purposes of reducing bandwidth or disk
+   * space.  YCbCr is the most common JPEG colorspace, and YCbCr JPEG images
+   * can be compressed from and decompressed to any of the extended RGB pixel
+   * formats or grayscale, or they can be decompressed to YUV planar images.
+   */
+  TJCS_YCbCr,
+  /**
+   * Grayscale colorspace.  The JPEG image retains only the luminance data (Y
+   * component), and any color data from the source image is discarded.
+   * Grayscale JPEG images can be compressed from and decompressed to any of
+   * the extended RGB pixel formats or grayscale, or they can be decompressed
+   * to YUV planar images.
+   */
+  TJCS_GRAY,
+  /**
+   * CMYK colorspace.  When compressing the JPEG image, the C, M, Y, and K
+   * components in the source image are reordered into image planes, but no
+   * colorspace conversion or subsampling is performed.  CMYK JPEG images can
+   * only be decompressed to CMYK pixels.
+   */
+  TJCS_CMYK,
+  /**
+   * YCCK colorspace.  YCCK (AKA "YCbCrK") is not an absolute colorspace but
+   * rather a mathematical transformation of CMYK designed solely for storage
+   * and transmission.  It is to CMYK as YCbCr is to RGB.  CMYK pixels can be
+   * reversibly transformed into YCCK, and as with YCbCr, the chrominance
+   * components in the YCCK pixels can be subsampled without incurring major
+   * perceptual loss.  YCCK JPEG images can only be compressed from and
+   * decompressed to CMYK pixels.
+   */
+  TJCS_YCCK
+};
+
+
+/**
+ * The uncompressed source/destination image is stored in bottom-up (Windows,
+ * OpenGL) order, not top-down (X11) order.
+ */
+#define TJFLAG_BOTTOMUP        2
+/**
+ * When decompressing an image that was compressed using chrominance
+ * subsampling, use the fastest chrominance upsampling algorithm available in
+ * the underlying codec.  The default is to use smooth upsampling, which
+ * creates a smooth transition between neighboring chrominance components in
+ * order to reduce upsampling artifacts in the decompressed image.
+ */
+#define TJFLAG_FASTUPSAMPLE  256
+/**
+ * Disable buffer (re)allocation.  If passed to #tjCompress2() or
+ * #tjTransform(), this flag will cause those functions to generate an error if
+ * the JPEG image buffer is invalid or too small rather than attempting to
+ * allocate or reallocate that buffer.  This reproduces the behavior of earlier
+ * versions of TurboJPEG.
+ */
+#define TJFLAG_NOREALLOC     1024
+/**
+ * Use the fastest DCT/IDCT algorithm available in the underlying codec.  The
+ * default if this flag is not specified is implementation-specific.  For
+ * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast
+ * algorithm by default when compressing, because this has been shown to have
+ * only a very slight effect on accuracy, but it uses the accurate algorithm
+ * when decompressing, because this has been shown to have a larger effect.
+ */
+#define TJFLAG_FASTDCT       2048
+/**
+ * Use the most accurate DCT/IDCT algorithm available in the underlying codec.
+ * The default if this flag is not specified is implementation-specific.  For
+ * example, the implementation of TurboJPEG for libjpeg[-turbo] uses the fast
+ * algorithm by default when compressing, because this has been shown to have
+ * only a very slight effect on accuracy, but it uses the accurate algorithm
+ * when decompressing, because this has been shown to have a larger effect.
+ */
+#define TJFLAG_ACCURATEDCT   4096
+
+
+/**
+ * The number of transform operations
+ */
+#define TJ_NUMXOP 8
+
+/**
+ * Transform operations for #tjTransform()
+ */
+enum TJXOP
+{
+  /**
+   * Do not transform the position of the image pixels
+   */
+  TJXOP_NONE=0,
+  /**
+   * Flip (mirror) image horizontally.  This transform is imperfect if there
+   * are any partial MCU blocks on the right edge (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_HFLIP,
+  /**
+   * Flip (mirror) image vertically.  This transform is imperfect if there are
+   * any partial MCU blocks on the bottom edge (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_VFLIP,
+  /**
+   * Transpose image (flip/mirror along upper left to lower right axis.)  This
+   * transform is always perfect.
+   */
+  TJXOP_TRANSPOSE,
+  /**
+   * Transverse transpose image (flip/mirror along upper right to lower left
+   * axis.)  This transform is imperfect if there are any partial MCU blocks in
+   * the image (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_TRANSVERSE,
+  /**
+   * Rotate image clockwise by 90 degrees.  This transform is imperfect if
+   * there are any partial MCU blocks on the bottom edge (see
+   * #TJXOPT_PERFECT.)
+   */
+  TJXOP_ROT90,
+  /**
+   * Rotate image 180 degrees.  This transform is imperfect if there are any
+   * partial MCU blocks in the image (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_ROT180,
+  /**
+   * Rotate image counter-clockwise by 90 degrees.  This transform is imperfect
+   * if there are any partial MCU blocks on the right edge (see
+   * #TJXOPT_PERFECT.)
+   */
+  TJXOP_ROT270
+};
+
+
+/**
+ * This option will cause #tjTransform() to return an error if the transform is
+ * not perfect.  Lossless transforms operate on MCU blocks, whose size depends
+ * on the level of chrominance subsampling used (see #tjMCUWidth
+ * and #tjMCUHeight.)  If the image's width or height is not evenly divisible
+ * by the MCU block size, then there will be partial MCU blocks on the right
+ * and/or bottom edges.  It is not possible to move these partial MCU blocks to
+ * the top or left of the image, so any transform that would require that is
+ * "imperfect."  If this option is not specified, then any partial MCU blocks
+ * that cannot be transformed will be left in place, which will create
+ * odd-looking strips on the right or bottom edge of the image.
+ */
+#define TJXOPT_PERFECT  1
+/**
+ * This option will cause #tjTransform() to discard any partial MCU blocks that
+ * cannot be transformed.
+ */
+#define TJXOPT_TRIM     2
+/**
+ * This option will enable lossless cropping.  See #tjTransform() for more
+ * information.
+ */
+#define TJXOPT_CROP     4
+/**
+ * This option will discard the color data in the input image and produce
+ * a grayscale output image.
+ */
+#define TJXOPT_GRAY     8
+/**
+ * This option will prevent #tjTransform() from outputting a JPEG image for
+ * this particular transform (this can be used in conjunction with a custom
+ * filter to capture the transformed DCT coefficients without transcoding
+ * them.)
+ */
+#define TJXOPT_NOOUTPUT 16
+
+
+/**
+ * Scaling factor
+ */
+typedef struct
+{
+  /**
+   * Numerator
+   */
+  int num;
+  /**
+   * Denominator
+   */
+  int denom;
+} tjscalingfactor;
+
+/**
+ * Cropping region
+ */
+typedef struct
+{
+  /**
+   * The left boundary of the cropping region.  This must be evenly divisible
+   * by the MCU block width (see #tjMCUWidth.)
+   */
+  int x;
+  /**
+   * The upper boundary of the cropping region.  This must be evenly divisible
+   * by the MCU block height (see #tjMCUHeight.)
+   */
+  int y;
+  /**
+   * The width of the cropping region. Setting this to 0 is the equivalent of
+   * setting it to the width of the source JPEG image - x.
+   */
+  int w;
+  /**
+   * The height of the cropping region. Setting this to 0 is the equivalent of
+   * setting it to the height of the source JPEG image - y.
+   */
+  int h;
+} tjregion;
+
+/**
+ * Lossless transform
+ */
+typedef struct tjtransform
+{
+  /**
+   * Cropping region
+   */
+  tjregion r;
+  /**
+   * One of the @ref TJXOP "transform operations"
+   */
+  int op;
+  /**
+   * The bitwise OR of one of more of the @ref TJXOPT_CROP "transform options"
+   */
+  int options;
+  /**
+   * Arbitrary data that can be accessed within the body of the callback
+   * function
+   */
+  void *data;
+  /**
+   * A callback function that can be used to modify the DCT coefficients
+   * after they are losslessly transformed but before they are transcoded to a
+   * new JPEG image.  This allows for custom filters or other transformations
+   * to be applied in the frequency domain.
+   *
+   * @param coeffs pointer to an array of transformed DCT coefficients.  (NOTE:
+   *        this pointer is not guaranteed to be valid once the callback
+   *        returns, so applications wishing to hand off the DCT coefficients
+   *        to another function or library should make a copy of them within
+   *        the body of the callback.)
+   * @param arrayRegion #tjregion structure containing the width and height of
+   *        the array pointed to by <tt>coeffs</tt> as well as its offset
+   *        relative to the component plane.  TurboJPEG implementations may
+   *        choose to split each component plane into multiple DCT coefficient
+   *        arrays and call the callback function once for each array.
+   * @param planeRegion #tjregion structure containing the width and height of
+   *        the component plane to which <tt>coeffs</tt> belongs
+   * @param componentID ID number of the component plane to which
+   *        <tt>coeffs</tt> belongs (Y, Cb, and Cr have, respectively, ID's of
+   *        0, 1, and 2 in typical JPEG images.)
+   * @param transformID ID number of the transformed image to which
+   *        <tt>coeffs</tt> belongs.  This is the same as the index of the
+   *        transform in the <tt>transforms</tt> array that was passed to
+   *        #tjTransform().
+   * @param transform a pointer to a #tjtransform structure that specifies the
+   *        parameters and/or cropping region for this transform
+   *
+   * @return 0 if the callback was successful, or -1 if an error occurred.
+   */
+  int (*customFilter)(short *coeffs, tjregion arrayRegion,
+    tjregion planeRegion, int componentIndex, int transformIndex,
+    struct tjtransform *transform);
+} tjtransform;
+
+/**
+ * TurboJPEG instance handle
+ */
+typedef void* tjhandle;
+
+
+/**
+ * Pad the given width to the nearest 32-bit boundary
+ */
+#define TJPAD(width) (((width)+3)&(~3))
+
+/**
+ * Compute the scaled value of <tt>dimension</tt> using the given scaling
+ * factor.  This macro performs the integer equivalent of <tt>ceil(dimension *
+ * scalingFactor)</tt>.
+ */
+#define TJSCALED(dimension, scalingFactor) ((dimension * scalingFactor.num \
+  + scalingFactor.denom - 1) / scalingFactor.denom)
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ * Create a TurboJPEG compressor instance.
+ *
+ * @return a handle to the newly-created instance, or NULL if an error
+ * occurred (see #tjGetErrorStr().)
+ */
+DLLEXPORT tjhandle DLLCALL tjInitCompress(void);
+
+
+/**
+ * Compress an RGB, grayscale, or CMYK image into a JPEG image.
+ *
+ * @param handle a handle to a TurboJPEG compressor or transformer instance
+ * @param srcBuf pointer to an image buffer containing RGB, grayscale, or
+ *        CMYK pixels to be compressed
+ * @param width width (in pixels) of the source image
+ * @param pitch bytes per line of the source image.  Normally, this should be
+ *        <tt>width * #tjPixelSize[pixelFormat]</tt> if the image is unpadded,
+ *        or <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each line of
+ *        the image is padded to the nearest 32-bit boundary, as is the case
+ *        for Windows bitmaps.  You can also be clever and use this parameter
+ *        to skip lines, etc.  Setting this parameter to 0 is the equivalent of
+ *        setting it to <tt>width * #tjPixelSize[pixelFormat]</tt>.
+ * @param height height (in pixels) of the source image
+ * @param pixelFormat pixel format of the source image (see @ref TJPF
+ *        "Pixel formats".)
+ * @param jpegBuf address of a pointer to an image buffer that will receive the
+ *        JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer
+ *        to accommodate the size of the JPEG image.  Thus, you can choose to:
+ *        -# pre-allocate the JPEG buffer with an arbitrary size using
+ *        #tjAlloc() and let TurboJPEG grow the buffer as needed,
+ *        -# set <tt>*jpegBuf</tt> to NULL to tell TurboJPEG to allocate the
+ *        buffer for you, or
+ *        -# pre-allocate the buffer to a "worst case" size determined by
+ *        calling #tjBufSize().  This should ensure that the buffer never has
+ *        to be re-allocated (setting #TJFLAG_NOREALLOC guarantees this.)
+ *        .
+ *        If you choose option 1, <tt>*jpegSize</tt> should be set to the
+ *        size of your pre-allocated buffer.  In any case, unless you have
+ *        set #TJFLAG_NOREALLOC, you should always check <tt>*jpegBuf</tt> upon
+ *        return from this function, as it may have changed.
+ * @param jpegSize pointer to an unsigned long variable that holds the size of
+ *        the JPEG image buffer.  If <tt>*jpegBuf</tt> points to a
+ *        pre-allocated buffer, then <tt>*jpegSize</tt> should be set to the
+ *        size of the buffer.  Upon return, <tt>*jpegSize</tt> will contain the
+ *        size of the JPEG image (in bytes.)
+ * @param jpegSubsamp the level of chrominance subsampling to be used when
+ *        generating the JPEG image (see @ref TJSAMP
+ *        "Chrominance subsampling options".)
+ * @param jpegQual the image quality of the generated JPEG image (1 = worst,
+          100 = best)
+ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
+ *        "flags".
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
+*/
+DLLEXPORT int DLLCALL tjCompress2(tjhandle handle, unsigned char *srcBuf,
+  int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf,
+  unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags);
+
+
+/**
+ * Compress a YUV planar image into a JPEG image.  Note that, if the width or
+ * height of the YUV image is not an even multiple of the MCU block size
+ * (see #tjMCUWidth and #tjMCUHeight), then an intermediate buffer copy will be
+ * performed within TurboJPEG.
+ *
+ * @param handle a handle to a TurboJPEG compressor or transformer instance
+ * @param srcBuf pointer to an image buffer containing a YUV planar image
+ *        to be compressed.  The Y, U (Cb), and V (Cr) image planes should be
+ *        stored sequentially in the buffer, and the size of each plane
+ *        is determined by the specified width, height, padding, and level of
+ *        chrominance subsampling.  If the chrominance components are
+ *        subsampled along the horizontal dimension, then the width of the
+ *        luminance plane should be padded to the nearest multiple of 2 (same
+ *        goes for the height of the luminance plane, if the chrominance
+ *        components are subsampled along the vertical dimension.)  This is
+ *        irrespective of any additional padding specified in the <tt>pad</tt>
+ *        parameter.
+ * @param width width (in pixels) of the source image
+ * @param pad the line padding used in the source image.  For instance, if each
+ *        line in each plane of the YUV image is padded to the nearest multiple
+ *        of 4 bytes, then <tt>pad</tt> should be set to 4.
+ * @param height height (in pixels) of the source image
+ * @param subsamp the level of chrominance subsampling used in the source
+ *        image (see @ref TJSAMP "Chrominance subsampling options".)
+ * @param jpegBuf address of a pointer to an image buffer that will receive the
+ *        JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer
+ *        to accommodate the size of the JPEG image.  Thus, you can choose to:
+ *        -# pre-allocate the JPEG buffer with an arbitrary size using
+ *        #tjAlloc() and let TurboJPEG grow the buffer as needed,
+ *        -# set <tt>*jpegBuf</tt> to NULL to tell TurboJPEG to allocate the
+ *        buffer for you, or
+ *        -# pre-allocate the buffer to a "worst case" size determined by
+ *        calling #tjBufSize().  This should ensure that the buffer never has
+ *        to be re-allocated (setting #TJFLAG_NOREALLOC guarantees this.)
+ *        .
+ *        If you choose option 1, <tt>*jpegSize</tt> should be set to the
+ *        size of your pre-allocated buffer.  In any case, unless you have
+ *        set #TJFLAG_NOREALLOC, you should always check <tt>*jpegBuf</tt> upon
+ *        return from this function, as it may have changed.
+ * @param jpegSize pointer to an unsigned long variable that holds the size of
+ *        the JPEG image buffer.  If <tt>*jpegBuf</tt> points to a
+ *        pre-allocated buffer, then <tt>*jpegSize</tt> should be set to the
+ *        size of the buffer.  Upon return, <tt>*jpegSize</tt> will contain the
+ *        size of the JPEG image (in bytes.)
+ * @param jpegQual the image quality of the generated JPEG image (1 = worst,
+          100 = best)
+ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
+ *        "flags".
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
+*/
+DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle, unsigned char *srcBuf,
+  int width, int pad, int height, int subsamp, unsigned char **jpegBuf,
+  unsigned long *jpegSize, int jpegQual, int flags);
+
+
+/**
+ * The maximum size of the buffer (in bytes) required to hold a JPEG image with
+ * the given parameters.  The number of bytes returned by this function is
+ * larger than the size of the uncompressed source image.  The reason for this
+ * is that the JPEG format uses 16-bit coefficients, and it is thus possible
+ * for a very high-quality JPEG image with very high-frequency content to
+ * expand rather than compress when converted to the JPEG format.  Such images
+ * represent a very rare corner case, but since there is no way to predict the
+ * size of a JPEG image prior to compression, the corner case has to be
+ * handled.
+ *
+ * @param width width of the image (in pixels)
+ * @param height height of the image (in pixels)
+ * @param jpegSubsamp the level of chrominance subsampling to be used when
+ *        generating the JPEG image (see @ref TJSAMP
+ *        "Chrominance subsampling options".)
+ *
+ * @return the maximum size of the buffer (in bytes) required to hold the
+ * image, or -1 if the arguments are out of bounds.
+ */
+DLLEXPORT unsigned long DLLCALL tjBufSize(int width, int height,
+  int jpegSubsamp);
+
+
+/**
+ * The size of the buffer (in bytes) required to hold a YUV planar image with
+ * the given parameters.
+ *
+ * @param width width of the image (in pixels)
+ * @param pad the width of each line in each plane of the image is padded to
+ *        the nearest multiple of this number of bytes (must be a power of 2.)
+ * @param height height of the image (in pixels)
+ * @param subsamp level of chrominance subsampling in the image (see
+ *        @ref TJSAMP "Chrominance subsampling options".)
+ *
+ * @return the size of the buffer (in bytes) required to hold the image, or
+ * -1 if the arguments are out of bounds.
+ */
+DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2(int width, int pad, int height,
+  int subsamp);
+
+
+/**
+ * Encode an RGB or grayscale image into a YUV planar image.  This function
+ * uses the accelerated color conversion routines in the underlying
+ * codec but does not execute any of the other steps in the JPEG compression
+ * process.  The Y, U (Cb), and V (Cr) image planes are stored sequentially
+ * into the destination buffer, and the size of each plane is determined by the
+ * width and height of the source image, as well as the specified padding and
+ * level of chrominance subsampling.  If the chrominance components are
+ * subsampled along the horizontal dimension, then the width of the luminance
+ * plane is padded to the nearest multiple of 2 in the output image (same goes
+ * for the height of the luminance plane, if the chrominance components are
+ * subsampled along the vertical dimension.)
+ * <p>
+ * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
+ * convention of the digital video community, the TurboJPEG API uses "YUV" to
+ * refer to an image format consisting of Y, Cb, and Cr image planes.
+ *
+ * @param handle a handle to a TurboJPEG compressor or transformer instance
+ * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels
+ *        to be encoded
+ * @param width width (in pixels) of the source image
+ * @param pitch bytes per line of the source image.  Normally, this should be
+ *        <tt>width * #tjPixelSize[pixelFormat]</tt> if the image is unpadded,
+ *        or <tt>#TJPAD(width * #tjPixelSize[pixelFormat])</tt> if each line of
+ *        the image is padded to the nearest 32-bit boundary, as is the case
+ *        for Windows bitmaps.  You can also be clever and use this parameter
+ *        to skip lines, etc.  Setting this parameter to 0 is the equivalent of
+ *        setting it to <tt>width * #tjPixelSize[pixelFormat]</tt>.
+ * @param height height (in pixels) of the source image
+ * @param pixelFormat pixel format of the source image (see @ref TJPF
+ *        "Pixel formats".)
+ * @param dstBuf pointer to an image buffer that will receive the YUV image.
+ *        Use #tjBufSizeYUV2() to determine the appropriate size for this
+ *        buffer based on the image width, height, padding, and level of
+ *        chrominance subsampling.
+ * @param pad the width of each line in each plane of the YUV image will be
+ *        padded to the nearest multiple of this number of bytes (must be a
+ *        power of 2.)  To generate images suitable for X Video, <tt>pad</tt>
+ *        should be set to 4.
+ * @param subsamp the level of chrominance subsampling to be used when
+ *        generating the YUV image (see @ref TJSAMP
+ *        "Chrominance subsampling options".)  To generate images suitable for
+ *        X Video, <tt>subsamp</tt> should be set to @ref TJSAMP_420.  This
+ *        produces an image compatible with the I420 (AKA "YUV420P") format.
+ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
+ *        "flags".
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
+*/
+DLLEXPORT int DLLCALL tjEncodeYUV3(tjhandle handle,
+  unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat,
+  unsigned char *dstBuf, int pad, int subsamp, int flags);
+
+
+/**
+ * Create a TurboJPEG decompressor instance.
+ *
+ * @return a handle to the newly-created instance, or NULL if an error
+ * occurred (see #tjGetErrorStr().)
+*/
+DLLEXPORT tjhandle DLLCALL tjInitDecompress(void);
+
+
+/**
+ * Retrieve information about a JPEG image without decompressing it.
+ *
+ * @param handle a handle to a TurboJPEG decompressor or transformer instance
+ * @param jpegBuf pointer to a buffer containing a JPEG image
+ * @param jpegSize size of the JPEG image (in bytes)
+ * @param width pointer to an integer variable that will receive the width (in
+ *        pixels) of the JPEG image
+ * @param height pointer to an integer variable that will receive the height
+ *        (in pixels) of the JPEG image
+ * @param jpegSubsamp pointer to an integer variable that will receive the
+ *        level of chrominance subsampling used when compressing the JPEG image
+ *        (see @ref TJSAMP "Chrominance subsampling options".)
+ * @param jpegColorspace pointer to an integer variable that will receive one
+ *        of the JPEG colorspace constants, indicating the colorspace of the
+ *        JPEG image (see @ref TJCS "JPEG colorspaces".)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
+*/
+DLLEXPORT int DLLCALL tjDecompressHeader3(tjhandle handle,
+  unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height,
+  int *jpegSubsamp, int *jpegColorspace);
+
+
+/**
+ * Returns a list of fractional scaling factors that the JPEG decompressor in
+ * this implementation of TurboJPEG supports.
+ *
+ * @param numscalingfactors pointer to an integer variable that will receive
+ *        the number of elements in the list
+ *
+ * @return a pointer to a list of fractional scaling factors, or NULL if an
+ * error is encountered (see #tjGetErrorStr().)
+*/
+DLLEXPORT tjscalingfactor* DLLCALL tjGetScalingFactors(int *numscalingfactors);
+
+
+/**
+ * Decompress a JPEG image to an RGB, grayscale, or CMYK image.
+ *
+ * @param handle a handle to a TurboJPEG decompressor or transformer instance
+ * @param jpegBuf pointer to a buffer containing the JPEG image to decompress
+ * @param jpegSize size of the JPEG image (in bytes)
+ * @param dstBuf pointer to an image buffer that will receive the decompressed
+ *        image.  This buffer should normally be <tt>pitch * scaledHeight</tt>
+ *        bytes in size, where <tt>scaledHeight</tt> can be determined by
+ *        calling #TJSCALED() with the JPEG image height and one of the scaling
+ *        factors returned by #tjGetScalingFactors().  The <tt>dstBuf</tt>
+ *        pointer may also be used to decompress into a specific region of a
+ *        larger buffer.
+ * @param width desired width (in pixels) of the destination image.  If this is
+ *        different than the width of the JPEG image being decompressed, then
+ *        TurboJPEG will use scaling in the JPEG decompressor to generate the
+ *        largest possible image that will fit within the desired width.  If
+ *        <tt>width</tt> is set to 0, then only the height will be considered
+ *        when determining the scaled image size.
+ * @param pitch bytes per line of the destination image.  Normally, this is
+ *        <tt>scaledWidth * #tjPixelSize[pixelFormat]</tt> if the decompressed
+ *        image is unpadded, else <tt>#TJPAD(scaledWidth *
+ *        #tjPixelSize[pixelFormat])</tt> if each line of the decompressed
+ *        image is padded to the nearest 32-bit boundary, as is the case for
+ *        Windows bitmaps.  (NOTE: <tt>scaledWidth</tt> can be determined by
+ *        calling #TJSCALED() with the JPEG image width and one of the scaling
+ *        factors returned by #tjGetScalingFactors().)  You can also be clever
+ *        and use the pitch parameter to skip lines, etc.  Setting this
+ *        parameter to 0 is the equivalent of setting it to
+ *        <tt>scaledWidth * #tjPixelSize[pixelFormat]</tt>.
+ * @param height desired height (in pixels) of the destination image.  If this
+ *        is different than the height of the JPEG image being decompressed,
+ *        then TurboJPEG will use scaling in the JPEG decompressor to generate
+ *        the largest possible image that will fit within the desired height.
+ *        If <tt>height</tt> is set to 0, then only the width will be
+ *        considered when determining the scaled image size.
+ * @param pixelFormat pixel format of the destination image (see @ref
+ *        TJPF "Pixel formats".)
+ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
+ *        "flags".
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
+ */
+DLLEXPORT int DLLCALL tjDecompress2(tjhandle handle,
+  unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
+  int width, int pitch, int height, int pixelFormat, int flags);
+
+
+/**
+ * Decompress a JPEG image to a YUV planar image.  This function performs JPEG
+ * decompression but leaves out the color conversion step, so a planar YUV
+ * image is generated instead of an RGB image.  The structure of the planes in
+ * this image is the same as in the images generated by #tjEncodeYUV3().  Note
+ * that, if the width or height of the JPEG image is not an even multiple of
+ * the MCU block size (see #tjMCUWidth and #tjMCUHeight), then an intermediate
+ * buffer copy will be performed within TurboJPEG.
+ * <p>
+ * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
+ * convention of the digital video community, the TurboJPEG API uses "YUV" to
+ * refer to an image format consisting of Y, Cb, and Cr image planes.
+ *
+ * @param handle a handle to a TurboJPEG decompressor or transformer instance
+ * @param jpegBuf pointer to a buffer containing the JPEG image to decompress
+ * @param jpegSize size of the JPEG image (in bytes)
+ * @param dstBuf pointer to an image buffer that will receive the YUV image.
+ *        Use #tjBufSizeYUV2() to determine the appropriate size for this
+ *        buffer based on the image width, height, padding, and level of
+ *        subsampling.
+ * @param width desired width (in pixels) of the YUV image.  If this is
+ *        different than the width of the JPEG image being decompressed, then
+ *        TurboJPEG will use scaling in the JPEG decompressor to generate the
+ *        largest possible image that will fit within the desired width.  If
+ *        <tt>width</tt> is set to 0, then only the height will be considered
+ *        when determining the scaled image size.
+ * @param pad the width of each line in each plane of the YUV image will be
+ *        padded to the nearest multiple of this number of bytes (must be a
+ *        power of 2.)  To generate images suitable for X Video, <tt>pad</tt>
+ *        should be set to 4.
+ * @param height desired height (in pixels) of the YUV image.  If this is
+ *        different than the height of the JPEG image being decompressed, then
+ *        TurboJPEG will use scaling in the JPEG decompressor to generate the
+ *        largest possible image that will fit within the desired height.  If
+ *        <tt>height</tt> is set to 0, then only the width will be considered
+ *        when determining the scaled image size.
+ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
+ *        "flags".
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
+ */
+DLLEXPORT int DLLCALL tjDecompressToYUV2(tjhandle handle,
+  unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
+  int width, int pad, int height, int flags);
+
+
+/**
+ * Decode a YUV planar image into an RGB or grayscale image.  This function
+ * uses the accelerated color conversion routines in the underlying
+ * codec but does not execute any of the other steps in the JPEG decompression
+ * process.  The Y, U (Cb), and V (Cr) image planes should be stored
+ * sequentially in the source buffer, and the size of each plane is determined
+ * by the width and height of the source image, as well as the specified
+ * padding and level of chrominance subsampling.  If the chrominance components
+ * are subsampled along the horizontal dimension, then the width of the
+ * luminance plane should be padded to the nearest multiple of 2 in the input
+ * image (same goes for the height of the luminance plane, if the chrominance
+ * components are subsampled along the vertical dimension.)
+ * <p>
+ * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
+ * convention of the digital video community, the TurboJPEG API uses "YUV" to
+ * refer to an image format consisting of Y, Cb, and Cr image planes.
+ *
+ * @param handle a handle to a TurboJPEG decompressor or transformer instance
+ * @param srcBuf pointer to an image buffer containing a YUV planar image to be
+ *        decoded.  The size of this buffer should match the value returned
+ *        by #tjBufSizeYUV2() for the given image width, height, padding, and
+ *        level of chrominance subsampling.
+ * @param pad Use this parameter to specify that the width of each line in each
+ *        plane of the YUV source image is padded to the nearest multiple of
+ *        this number of bytes (must be a power of 2.)
+ * @param subsamp the level of chrominance subsampling used in the YUV source
+ *        image (see @ref TJSAMP "Chrominance subsampling options".)
+ * @param dstBuf pointer to an image buffer that will receive the decoded
+ *        image.  This buffer should normally be <tt>pitch * height</tt>
+ *        bytes in size, but the <tt>dstBuf</tt> pointer can also be used to
+ *        decode into a specific region of a larger buffer.
+ * @param width width (in pixels) of the source and destination images
+ * @param pitch bytes per line of the destination image.  Normally, this should
+ *        be <tt>width * #tjPixelSize[pixelFormat]</tt> if the destination
+ *        image is unpadded, or <tt>#TJPAD(width *
+ *        #tjPixelSize[pixelFormat])</tt> if each line of the destination
+ *        image should be padded to the nearest 32-bit boundary, as is the case
+ *        for Windows bitmaps.  You can also be clever and use the pitch
+ *        parameter to skip lines, etc.  Setting this parameter to 0 is the
+ *        equivalent of setting it to <tt>width *
+ *        #tjPixelSize[pixelFormat]</tt>.
+ * @param height height (in pixels) of the source and destination images
+ * @param pixelFormat pixel format of the destination image (see @ref TJPF
+ *        "Pixel formats".)
+ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
+ *        "flags".
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
+ */
+DLLEXPORT int DLLCALL tjDecodeYUV(tjhandle handle, unsigned char *srcBuf,
+	int pad, int subsamp, unsigned char *dstBuf, int width, int pitch,
+	int height, int pixelFormat, int flags);
+
+
+/**
+ * Create a new TurboJPEG transformer instance.
+ *
+ * @return a handle to the newly-created instance, or NULL if an error
+ * occurred (see #tjGetErrorStr().)
+ */
+DLLEXPORT tjhandle DLLCALL tjInitTransform(void);
+
+
+/**
+ * Losslessly transform a JPEG image into another JPEG image.  Lossless
+ * transforms work by moving the raw coefficients from one JPEG image structure
+ * to another without altering the values of the coefficients.  While this is
+ * typically faster than decompressing the image, transforming it, and
+ * re-compressing it, lossless transforms are not free.  Each lossless
+ * transform requires reading and performing Huffman decoding on all of the
+ * coefficients in the source image, regardless of the size of the destination
+ * image.  Thus, this function provides a means of generating multiple
+ * transformed images from the same source or  applying multiple
+ * transformations simultaneously, in order to eliminate the need to read the
+ * source coefficients multiple times.
+ *
+ * @param handle a handle to a TurboJPEG transformer instance
+ * @param jpegBuf pointer to a buffer containing the JPEG image to transform
+ * @param jpegSize size of the JPEG image (in bytes)
+ * @param n the number of transformed JPEG images to generate
+ * @param dstBufs pointer to an array of n image buffers.  <tt>dstBufs[i]</tt>
+ *        will receive a JPEG image that has been transformed using the
+ *        parameters in <tt>transforms[i]</tt>.  TurboJPEG has the ability to
+ *        reallocate the JPEG buffer to accommodate the size of the JPEG image.
+ *        Thus, you can choose to:
+ *        -# pre-allocate the JPEG buffer with an arbitrary size using
+ *        #tjAlloc() and let TurboJPEG grow the buffer as needed,
+ *        -# set <tt>dstBufs[i]</tt> to NULL to tell TurboJPEG to allocate the
+ *        buffer for you, or
+ *        -# pre-allocate the buffer to a "worst case" size determined by
+ *        calling #tjBufSize() with the transformed or cropped width and
+ *        height.  This should ensure that the buffer never has to be
+ *        re-allocated (setting #TJFLAG_NOREALLOC guarantees this.)
+ *        .
+ *        If you choose option 1, <tt>dstSizes[i]</tt> should be set to
+ *        the size of your pre-allocated buffer.  In any case, unless you have
+ *        set #TJFLAG_NOREALLOC, you should always check <tt>dstBufs[i]</tt>
+ *        upon return from this function, as it may have changed.
+ * @param dstSizes pointer to an array of n unsigned long variables that will
+ *        receive the actual sizes (in bytes) of each transformed JPEG image.
+ *        If <tt>dstBufs[i]</tt> points to a pre-allocated buffer, then
+ *        <tt>dstSizes[i]</tt> should be set to the size of the buffer.  Upon
+ *        return, <tt>dstSizes[i]</tt> will contain the size of the JPEG image
+ *        (in bytes.)
+ * @param transforms pointer to an array of n #tjtransform structures, each of
+ *        which specifies the transform parameters and/or cropping region for
+ *        the corresponding transformed output image.
+ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
+ *        "flags".
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
+ */
+DLLEXPORT int DLLCALL tjTransform(tjhandle handle, unsigned char *jpegBuf,
+  unsigned long jpegSize, int n, unsigned char **dstBufs,
+  unsigned long *dstSizes, tjtransform *transforms, int flags);
+
+
+/**
+ * Destroy a TurboJPEG compressor, decompressor, or transformer instance.
+ *
+ * @param handle a handle to a TurboJPEG compressor, decompressor or
+ *        transformer instance
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
+ */
+DLLEXPORT int DLLCALL tjDestroy(tjhandle handle);
+
+
+/**
+ * Allocate an image buffer for use with TurboJPEG.  You should always use
+ * this function to allocate the JPEG destination buffer(s) for #tjCompress2()
+ * and #tjTransform() unless you are disabling automatic buffer
+ * (re)allocation (by setting #TJFLAG_NOREALLOC.)
+ *
+ * @param bytes the number of bytes to allocate
+ *
+ * @return a pointer to a newly-allocated buffer with the specified number of
+ *         bytes
+ *
+ * @sa tjFree()
+ */
+DLLEXPORT unsigned char* DLLCALL tjAlloc(int bytes);
+
+
+/**
+ * Free an image buffer previously allocated by TurboJPEG.  You should always
+ * use this function to free JPEG destination buffer(s) that were automatically
+ * (re)allocated by #tjCompress2() or #tjTransform() or that were manually
+ * allocated using #tjAlloc().
+ *
+ * @param buffer address of the buffer to free
+ *
+ * @sa tjAlloc()
+ */
+DLLEXPORT void DLLCALL tjFree(unsigned char *buffer);
+
+
+/**
+ * Returns a descriptive error message explaining why the last command failed.
+ *
+ * @return a descriptive error message explaining why the last command failed.
+ */
+DLLEXPORT char* DLLCALL tjGetErrorStr(void);
+
+
+/* Deprecated functions and macros */
+#define TJFLAG_FORCEMMX        8
+#define TJFLAG_FORCESSE       16
+#define TJFLAG_FORCESSE2      32
+#define TJFLAG_FORCESSE3     128
+
+
+/* Backward compatibility functions and macros (nothing to see here) */
+#define NUMSUBOPT TJ_NUMSAMP
+#define TJ_444 TJSAMP_444
+#define TJ_422 TJSAMP_422
+#define TJ_420 TJSAMP_420
+#define TJ_411 TJSAMP_420
+#define TJ_GRAYSCALE TJSAMP_GRAY
+
+#define TJ_BGR 1
+#define TJ_BOTTOMUP TJFLAG_BOTTOMUP
+#define TJ_FORCEMMX TJFLAG_FORCEMMX
+#define TJ_FORCESSE TJFLAG_FORCESSE
+#define TJ_FORCESSE2 TJFLAG_FORCESSE2
+#define TJ_ALPHAFIRST 64
+#define TJ_FORCESSE3 TJFLAG_FORCESSE3
+#define TJ_FASTUPSAMPLE TJFLAG_FASTUPSAMPLE
+#define TJ_YUV 512
+
+DLLEXPORT unsigned long DLLCALL TJBUFSIZE(int width, int height);
+
+DLLEXPORT unsigned long DLLCALL TJBUFSIZEYUV(int width, int height,
+  int jpegSubsamp);
+
+DLLEXPORT unsigned long DLLCALL tjBufSizeYUV(int width, int height,
+  int subsamp);
+
+DLLEXPORT int DLLCALL tjCompress(tjhandle handle, unsigned char *srcBuf,
+  int width, int pitch, int height, int pixelSize, unsigned char *dstBuf,
+  unsigned long *compressedSize, int jpegSubsamp, int jpegQual, int flags);
+
+DLLEXPORT int DLLCALL tjEncodeYUV(tjhandle handle,
+  unsigned char *srcBuf, int width, int pitch, int height, int pixelSize,
+  unsigned char *dstBuf, int subsamp, int flags);
+
+DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle,
+  unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat,
+  unsigned char *dstBuf, int subsamp, int flags);
+
+DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle handle,
+  unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height);
+
+DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle,
+  unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height,
+  int *jpegSubsamp);
+
+DLLEXPORT int DLLCALL tjDecompress(tjhandle handle,
+  unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
+  int width, int pitch, int height, int pixelSize, int flags);
+
+DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle,
+  unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
+  int flags);
+
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/usage.doc b/usage.doc
deleted file mode 100644
index 8c4970a..0000000
--- a/usage.doc
+++ /dev/null
@@ -1,562 +0,0 @@
-USAGE instructions for the Independent JPEG Group's JPEG software
-=================================================================
-
-This file describes usage of the JPEG conversion programs cjpeg and djpeg,
-as well as the utility programs jpegtran, rdjpgcom and wrjpgcom.  (See
-the other documentation files if you wish to use the JPEG library within
-your own programs.)
-
-If you are on a Unix machine you may prefer to read the Unix-style manual
-pages in files cjpeg.1, djpeg.1, jpegtran.1, rdjpgcom.1, wrjpgcom.1.
-
-
-INTRODUCTION
-
-These programs implement JPEG image compression and decompression.  JPEG
-(pronounced "jay-peg") is a standardized compression method for full-color
-and gray-scale images.  JPEG is designed to handle "real-world" scenes,
-for example scanned photographs.  Cartoons, line drawings, and other
-non-realistic images are not JPEG's strong suit; on that sort of material
-you may get poor image quality and/or little compression.
-
-JPEG is lossy, meaning that the output image is not necessarily identical to
-the input image.  Hence you should not use JPEG if you have to have identical
-output bits.  However, on typical real-world images, very good compression
-levels can be obtained with no visible change, and amazingly high compression
-is possible if you can tolerate a low-quality image.  You can trade off image
-quality against file size by adjusting the compressor's "quality" setting.
-
-
-GENERAL USAGE
-
-We provide two programs, cjpeg to compress an image file into JPEG format,
-and djpeg to decompress a JPEG file back into a conventional image format.
-
-On Unix-like systems, you say:
-	cjpeg [switches] [imagefile] >jpegfile
-or
-	djpeg [switches] [jpegfile]  >imagefile
-The programs read the specified input file, or standard input if none is
-named.  They always write to standard output (with trace/error messages to
-standard error).  These conventions are handy for piping images between
-programs.
-
-On most non-Unix systems, you say:
-	cjpeg [switches] imagefile jpegfile
-or
-	djpeg [switches] jpegfile  imagefile
-i.e., both the input and output files are named on the command line.  This
-style is a little more foolproof, and it loses no functionality if you don't
-have pipes.  (You can get this style on Unix too, if you prefer, by defining
-TWO_FILE_COMMANDLINE when you compile the programs; see install.doc.)
-
-You can also say:
-	cjpeg [switches] -outfile jpegfile  imagefile
-or
-	djpeg [switches] -outfile imagefile  jpegfile
-This syntax works on all systems, so it is useful for scripts.
-
-The currently supported image file formats are: PPM (PBMPLUS color format),
-PGM (PBMPLUS gray-scale format), BMP, Targa, and RLE (Utah Raster Toolkit
-format).  (RLE is supported only if the URT library is available.)
-cjpeg recognizes the input image format automatically, with the exception
-of some Targa-format files.  You have to tell djpeg which format to generate.
-
-JPEG files are in the defacto standard JFIF file format.  There are other,
-less widely used JPEG-based file formats, but we don't support them.
-
-All switch names may be abbreviated; for example, -grayscale may be written
--gray or -gr.  Most of the "basic" switches can be abbreviated to as little as
-one letter.  Upper and lower case are equivalent (-BMP is the same as -bmp).
-British spellings are also accepted (e.g., -greyscale), though for brevity
-these are not mentioned below.
-
-
-CJPEG DETAILS
-
-The basic command line switches for cjpeg are:
-
-	-quality N	Scale quantization tables to adjust image quality.
-			Quality is 0 (worst) to 100 (best); default is 75.
-			(See below for more info.)
-
-	-grayscale	Create monochrome JPEG file from color input.
-			Be sure to use this switch when compressing a grayscale
-			BMP file, because cjpeg isn't bright enough to notice
-			whether a BMP file uses only shades of gray.  By
-			saying -grayscale, you'll get a smaller JPEG file that
-			takes less time to process.
-
-	-optimize	Perform optimization of entropy encoding parameters.
-			Without this, default encoding parameters are used.
-			-optimize usually makes the JPEG file a little smaller,
-			but cjpeg runs somewhat slower and needs much more
-			memory.  Image quality and speed of decompression are
-			unaffected by -optimize.
-
-	-progressive	Create progressive JPEG file (see below).
-
-	-targa		Input file is Targa format.  Targa files that contain
-			an "identification" field will not be automatically
-			recognized by cjpeg; for such files you must specify
-			-targa to make cjpeg treat the input as Targa format.
-			For most Targa files, you won't need this switch.
-
-The -quality switch lets you trade off compressed file size against quality of
-the reconstructed image: the higher the quality setting, the larger the JPEG
-file, and the closer the output image will be to the original input.  Normally
-you want to use the lowest quality setting (smallest file) that decompresses
-into something visually indistinguishable from the original image.  For this
-purpose the quality setting should be between 50 and 95; the default of 75 is
-often about right.  If you see defects at -quality 75, then go up 5 or 10
-counts at a time until you are happy with the output image.  (The optimal
-setting will vary from one image to another.)
-
--quality 100 will generate a quantization table of all 1's, minimizing loss
-in the quantization step (but there is still information loss in subsampling,
-as well as roundoff error).  This setting is mainly of interest for
-experimental purposes.  Quality values above about 95 are NOT recommended for
-normal use; the compressed file size goes up dramatically for hardly any gain
-in output image quality.
-
-In the other direction, quality values below 50 will produce very small files
-of low image quality.  Settings around 5 to 10 might be useful in preparing an
-index of a large image library, for example.  Try -quality 2 (or so) for some
-amusing Cubist effects.  (Note: quality values below about 25 generate 2-byte
-quantization tables, which are considered optional in the JPEG standard.
-cjpeg emits a warning message when you give such a quality value, because some
-other JPEG programs may be unable to decode the resulting file.  Use -baseline
-if you need to ensure compatibility at low quality values.)
-
-The -progressive switch creates a "progressive JPEG" file.  In this type of
-JPEG file, the data is stored in multiple scans of increasing quality.  If the
-file is being transmitted over a slow communications link, the decoder can use
-the first scan to display a low-quality image very quickly, and can then
-improve the display with each subsequent scan.  The final image is exactly
-equivalent to a standard JPEG file of the same quality setting, and the total
-file size is about the same --- often a little smaller.  CAUTION: progressive
-JPEG is not yet widely implemented, so many decoders will be unable to view a
-progressive JPEG file at all.
-
-Switches for advanced users:
-
-	-dct int	Use integer DCT method (default).
-	-dct fast	Use fast integer DCT (less accurate).
-	-dct float	Use floating-point DCT method.
-			The float method is very slightly more accurate than
-			the int method, but is much slower unless your machine
-			has very fast floating-point hardware.  Also note that
-			results of the floating-point method may vary slightly
-			across machines, while the integer methods should give
-			the same results everywhere.  The fast integer method
-			is much less accurate than the other two.
-
-	-restart N	Emit a JPEG restart marker every N MCU rows, or every
-			N MCU blocks if "B" is attached to the number.
-			-restart 0 (the default) means no restart markers.
-
-	-smooth N	Smooth the input image to eliminate dithering noise.
-			N, ranging from 1 to 100, indicates the strength of
-			smoothing.  0 (the default) means no smoothing.
-
-	-maxmemory N	Set limit for amount of memory to use in processing
-			large images.  Value is in thousands of bytes, or
-			millions of bytes if "M" is attached to the number.
-			For example, -max 4m selects 4000000 bytes.  If more
-			space is needed, temporary files will be used.
-
-	-verbose	Enable debug printout.  More -v's give more printout.
-	or  -debug	Also, version information is printed at startup.
-
-The -restart option inserts extra markers that allow a JPEG decoder to
-resynchronize after a transmission error.  Without restart markers, any damage
-to a compressed file will usually ruin the image from the point of the error
-to the end of the image; with restart markers, the damage is usually confined
-to the portion of the image up to the next restart marker.  Of course, the
-restart markers occupy extra space.  We recommend -restart 1 for images that
-will be transmitted across unreliable networks such as Usenet.
-
-The -smooth option filters the input to eliminate fine-scale noise.  This is
-often useful when converting dithered images to JPEG: a moderate smoothing
-factor of 10 to 50 gets rid of dithering patterns in the input file, resulting
-in a smaller JPEG file and a better-looking image.  Too large a smoothing
-factor will visibly blur the image, however.
-
-Switches for wizards:
-
-	-baseline	Force baseline-compatible quantization tables to be
-			generated.  This clamps quantization values to 8 bits
-			even at low quality settings.  (This switch is poorly
-			named, since it does not ensure that the output is
-			actually baseline JPEG.  For example, you can use
-			-baseline and -progressive together.)
-
-	-qtables file	Use the quantization tables given in the specified
-			text file.
-
-	-qslots N[,...] Select which quantization table to use for each color
-			component.
-
-	-sample HxV[,...]  Set JPEG sampling factors for each color component.
-
-	-scans file	Use the scan script given in the specified text file.
-
-The "wizard" switches are intended for experimentation with JPEG.  If you
-don't know what you are doing, DON'T USE THEM.  These switches are documented
-further in the file wizard.doc.
-
-
-DJPEG DETAILS
-
-The basic command line switches for djpeg are:
-
-	-colors N	Reduce image to at most N colors.  This reduces the
-	or -quantize N	number of colors used in the output image, so that it
-			can be displayed on a colormapped display or stored in
-			a colormapped file format.  For example, if you have
-			an 8-bit display, you'd need to reduce to 256 or fewer
-			colors.  (-colors is the recommended name, -quantize
-			is provided only for backwards compatibility.)
-
-	-fast		Select recommended processing options for fast, low
-			quality output.  (The default options are chosen for
-			highest quality output.)  Currently, this is equivalent
-			to "-dct fast -nosmooth -onepass -dither ordered".
-
-	-grayscale	Force gray-scale output even if JPEG file is color.
-			Useful for viewing on monochrome displays; also,
-			djpeg runs noticeably faster in this mode.
-
-	-scale M/N	Scale the output image by a factor M/N.  Currently
-			the scale factor must be 1/1, 1/2, 1/4, or 1/8.
-			Scaling is handy if the image is larger than your
-			screen; also, djpeg runs much faster when scaling
-			down the output.
-
-	-bmp		Select BMP output format (Windows flavor).  8-bit
-			colormapped format is emitted if -colors or -grayscale
-			is specified, or if the JPEG file is gray-scale;
-			otherwise, 24-bit full-color format is emitted.
-
-	-gif		Select GIF output format.  Since GIF does not support
-			more than 256 colors, -colors 256 is assumed (unless
-			you specify a smaller number of colors).  If you
-			specify -fast, the default number of colors is 216.
-
-	-os2		Select BMP output format (OS/2 1.x flavor).  8-bit
-			colormapped format is emitted if -colors or -grayscale
-			is specified, or if the JPEG file is gray-scale;
-			otherwise, 24-bit full-color format is emitted.
-
-	-pnm		Select PBMPLUS (PPM/PGM) output format (this is the
-			default format).  PGM is emitted if the JPEG file is
-			gray-scale or if -grayscale is specified; otherwise
-			PPM is emitted.
-
-	-rle		Select RLE output format.  (Requires URT library.)
-
-	-targa		Select Targa output format.  Gray-scale format is
-			emitted if the JPEG file is gray-scale or if
-			-grayscale is specified; otherwise, colormapped format
-			is emitted if -colors is specified; otherwise, 24-bit
-			full-color format is emitted.
-
-Switches for advanced users:
-
-	-dct int	Use integer DCT method (default).
-	-dct fast	Use fast integer DCT (less accurate).
-	-dct float	Use floating-point DCT method.
-			The float method is very slightly more accurate than
-			the int method, but is much slower unless your machine
-			has very fast floating-point hardware.  Also note that
-			results of the floating-point method may vary slightly
-			across machines, while the integer methods should give
-			the same results everywhere.  The fast integer method
-			is much less accurate than the other two.
-
-	-dither fs	Use Floyd-Steinberg dithering in color quantization.
-	-dither ordered	Use ordered dithering in color quantization.
-	-dither none	Do not use dithering in color quantization.
-			By default, Floyd-Steinberg dithering is applied when
-			quantizing colors; this is slow but usually produces
-			the best results.  Ordered dither is a compromise
-			between speed and quality; no dithering is fast but
-			usually looks awful.  Note that these switches have
-			no effect unless color quantization is being done.
-			Ordered dither is only available in -onepass mode.
-
-	-map FILE	Quantize to the colors used in the specified image
-			file.  This is useful for producing multiple files
-			with identical color maps, or for forcing a predefined
-			set of colors to be used.  The FILE must be a GIF
-			or PPM file.  This option overrides -colors and
-			-onepass.
-
-	-nosmooth	Use a faster, lower-quality upsampling routine.
-
-	-onepass	Use one-pass instead of two-pass color quantization.
-			The one-pass method is faster and needs less memory,
-			but it produces a lower-quality image.  -onepass is
-			ignored unless you also say -colors N.  Also,
-			the one-pass method is always used for gray-scale
-			output (the two-pass method is no improvement then).
-
-	-maxmemory N	Set limit for amount of memory to use in processing
-			large images.  Value is in thousands of bytes, or
-			millions of bytes if "M" is attached to the number.
-			For example, -max 4m selects 4000000 bytes.  If more
-			space is needed, temporary files will be used.
-
-	-verbose	Enable debug printout.  More -v's give more printout.
-	or  -debug	Also, version information is printed at startup.
-
-
-HINTS FOR CJPEG
-
-Color GIF files are not the ideal input for JPEG; JPEG is really intended for
-compressing full-color (24-bit) images.  In particular, don't try to convert
-cartoons, line drawings, and other images that have only a few distinct
-colors.  GIF works great on these, JPEG does not.  If you want to convert a
-GIF to JPEG, you should experiment with cjpeg's -quality and -smooth options
-to get a satisfactory conversion.  -smooth 10 or so is often helpful.
-
-Avoid running an image through a series of JPEG compression/decompression
-cycles.  Image quality loss will accumulate; after ten or so cycles the image
-may be noticeably worse than it was after one cycle.  It's best to use a
-lossless format while manipulating an image, then convert to JPEG format when
-you are ready to file the image away.
-
-The -optimize option to cjpeg is worth using when you are making a "final"
-version for posting or archiving.  It's also a win when you are using low
-quality settings to make very small JPEG files; the percentage improvement
-is often a lot more than it is on larger files.  (At present, -optimize
-mode is always selected when generating progressive JPEG files.)
-
-GIF input files are no longer supported, to avoid the Unisys LZW patent.
-Use a Unisys-licensed program if you need to read a GIF file.  (Conversion
-of GIF files to JPEG is usually a bad idea anyway.)
-
-
-HINTS FOR DJPEG
-
-To get a quick preview of an image, use the -grayscale and/or -scale switches.
-"-grayscale -scale 1/8" is the fastest case.
-
-Several options are available that trade off image quality to gain speed.
-"-fast" turns on the recommended settings.
-
-"-dct fast" and/or "-nosmooth" gain speed at a small sacrifice in quality.
-When producing a color-quantized image, "-onepass -dither ordered" is fast but
-much lower quality than the default behavior.  "-dither none" may give
-acceptable results in two-pass mode, but is seldom tolerable in one-pass mode.
-
-If you are fortunate enough to have very fast floating point hardware,
-"-dct float" may be even faster than "-dct fast".  But on most machines
-"-dct float" is slower than "-dct int"; in this case it is not worth using,
-because its theoretical accuracy advantage is too small to be significant
-in practice.
-
-Two-pass color quantization requires a good deal of memory; on MS-DOS machines
-it may run out of memory even with -maxmemory 0.  In that case you can still
-decompress, with some loss of image quality, by specifying -onepass for
-one-pass quantization.
-
-To avoid the Unisys LZW patent, djpeg produces uncompressed GIF files.  These
-are larger than they should be, but are readable by standard GIF decoders.
-
-
-HINTS FOR BOTH PROGRAMS
-
-If more space is needed than will fit in the available main memory (as
-determined by -maxmemory), temporary files will be used.  (MS-DOS versions
-will try to get extended or expanded memory first.)  The temporary files are
-often rather large: in typical cases they occupy three bytes per pixel, for
-example 3*800*600 = 1.44Mb for an 800x600 image.  If you don't have enough
-free disk space, leave out -progressive and -optimize (for cjpeg) or specify
--onepass (for djpeg).
-
-On MS-DOS, the temporary files are created in the directory named by the TMP
-or TEMP environment variable, or in the current directory if neither of those
-exist.  Amiga implementations put the temp files in the directory named by
-JPEGTMP:, so be sure to assign JPEGTMP: to a disk partition with adequate free
-space.
-
-The default memory usage limit (-maxmemory) is set when the software is
-compiled.  If you get an "insufficient memory" error, try specifying a smaller
--maxmemory value, even -maxmemory 0 to use the absolute minimum space.  You
-may want to recompile with a smaller default value if this happens often.
-
-On machines that have "environment" variables, you can define the environment
-variable JPEGMEM to set the default memory limit.  The value is specified as
-described for the -maxmemory switch.  JPEGMEM overrides the default value
-specified when the program was compiled, and itself is overridden by an
-explicit -maxmemory switch.
-
-On MS-DOS machines, -maxmemory is the amount of main (conventional) memory to
-use.  (Extended or expanded memory is also used if available.)  Most
-DOS-specific versions of this software do their own memory space estimation
-and do not need you to specify -maxmemory.
-
-
-JPEGTRAN
-
-jpegtran performs various useful transformations of JPEG files.
-It can translate the coded representation from one variant of JPEG to another,
-for example from baseline JPEG to progressive JPEG or vice versa.  It can also
-perform some rearrangements of the image data, for example turning an image
-from landscape to portrait format by rotation.
-
-jpegtran works by rearranging the compressed data (DCT coefficients), without
-ever fully decoding the image.  Therefore, its transformations are lossless:
-there is no image degradation at all, which would not be true if you used
-djpeg followed by cjpeg to accomplish the same conversion.  But by the same
-token, jpegtran cannot perform lossy operations such as changing the image
-quality.
-
-jpegtran uses a command line syntax similar to cjpeg or djpeg.
-On Unix-like systems, you say:
-	jpegtran [switches] [inputfile] >outputfile
-On most non-Unix systems, you say:
-	jpegtran [switches] inputfile outputfile
-where both the input and output files are JPEG files.
-
-To specify the coded JPEG representation used in the output file,
-jpegtran accepts a subset of the switches recognized by cjpeg:
-	-optimize	Perform optimization of entropy encoding parameters.
-	-progressive	Create progressive JPEG file.
-	-restart N	Emit a JPEG restart marker every N MCU rows, or every
-			N MCU blocks if "B" is attached to the number.
-	-scans file	Use the scan script given in the specified text file.
-See the previous discussion of cjpeg for more details about these switches.
-If you specify none of these switches, you get a plain baseline-JPEG output
-file.  The quality setting and so forth are determined by the input file.
-
-The image can be losslessly transformed by giving one of these switches:
-	-flip horizontal	Mirror image horizontally (left-right).
-	-flip vertical		Mirror image vertically (top-bottom).
-	-rotate 90		Rotate image 90 degrees clockwise.
-	-rotate 180		Rotate image 180 degrees.
-	-rotate 270		Rotate image 270 degrees clockwise (or 90 ccw).
-	-transpose		Transpose image (across UL-to-LR axis).
-	-transverse		Transverse transpose (across UR-to-LL axis).
-
-The transpose transformation has no restrictions regarding image dimensions.
-The other transformations operate rather oddly if the image dimensions are not
-a multiple of the iMCU size (usually 8 or 16 pixels), because they can only
-transform complete blocks of DCT coefficient data in the desired way.
-
-jpegtran's default behavior when transforming an odd-size image is designed
-to preserve exact reversibility and mathematical consistency of the
-transformation set.  As stated, transpose is able to flip the entire image
-area.  Horizontal mirroring leaves any partial iMCU column at the right edge
-untouched, but is able to flip all rows of the image.  Similarly, vertical
-mirroring leaves any partial iMCU row at the bottom edge untouched, but is
-able to flip all columns.  The other transforms can be built up as sequences
-of transpose and flip operations; for consistency, their actions on edge
-pixels are defined to be the same as the end result of the corresponding
-transpose-and-flip sequence.
-
-For practical use, you may prefer to discard any untransformable edge pixels
-rather than having a strange-looking strip along the right and/or bottom edges
-of a transformed image.  To do this, add the -trim switch:
-	-trim		Drop non-transformable edge blocks.
-Obviously, a transformation with -trim is not reversible, so strictly speaking
-jpegtran with this switch is not lossless.  Also, the expected mathematical
-equivalences between the transformations no longer hold.  For example,
-"-rot 270 -trim" trims only the bottom edge, but "-rot 90 -trim" followed by
-"-rot 180 -trim" trims both edges.
-
-Another not-strictly-lossless transformation switch is:
-	-grayscale	Force grayscale output.
-This option discards the chrominance channels if the input image is YCbCr
-(ie, a standard color JPEG), resulting in a grayscale JPEG file.  The
-luminance channel is preserved exactly, so this is a better method of reducing
-to grayscale than decompression, conversion, and recompression.  This switch
-is particularly handy for fixing a monochrome picture that was mistakenly
-encoded as a color JPEG.  (In such a case, the space savings from getting rid
-of the near-empty chroma channels won't be large; but the decoding time for
-a grayscale JPEG is substantially less than that for a color JPEG.)
-
-jpegtran also recognizes these switches that control what to do with "extra"
-markers, such as comment blocks:
-	-copy none	Copy no extra markers from source file.  This setting
-			suppresses all comments and other excess baggage
-			present in the source file.
-	-copy comments	Copy only comment markers.  This setting copies
-			comments from the source file, but discards
-			any other inessential data. 
-	-copy all	Copy all extra markers.  This setting preserves
-			miscellaneous markers found in the source file, such
-			as JFIF thumbnails and Photoshop settings.  In some
-			files these extra markers can be sizable.
-The default behavior is -copy comments.  (Note: in IJG releases v6 and v6a,
-jpegtran always did the equivalent of -copy none.)
-
-Additional switches recognized by jpegtran are:
-	-outfile filename
-	-maxmemory N
-	-verbose
-	-debug
-These work the same as in cjpeg or djpeg.
-
-
-THE COMMENT UTILITIES
-
-The JPEG standard allows "comment" (COM) blocks to occur within a JPEG file.
-Although the standard doesn't actually define what COM blocks are for, they
-are widely used to hold user-supplied text strings.  This lets you add
-annotations, titles, index terms, etc to your JPEG files, and later retrieve
-them as text.  COM blocks do not interfere with the image stored in the JPEG
-file.  The maximum size of a COM block is 64K, but you can have as many of
-them as you like in one JPEG file.
-
-We provide two utility programs to display COM block contents and add COM
-blocks to a JPEG file.
-
-rdjpgcom searches a JPEG file and prints the contents of any COM blocks on
-standard output.  The command line syntax is
-	rdjpgcom [-verbose] [inputfilename]
-The switch "-verbose" (or just "-v") causes rdjpgcom to also display the JPEG
-image dimensions.  If you omit the input file name from the command line,
-the JPEG file is read from standard input.  (This may not work on some
-operating systems, if binary data can't be read from stdin.)
-
-wrjpgcom adds a COM block, containing text you provide, to a JPEG file.
-Ordinarily, the COM block is added after any existing COM blocks, but you
-can delete the old COM blocks if you wish.  wrjpgcom produces a new JPEG
-file; it does not modify the input file.  DO NOT try to overwrite the input
-file by directing wrjpgcom's output back into it; on most systems this will
-just destroy your file.
-
-The command line syntax for wrjpgcom is similar to cjpeg's.  On Unix-like
-systems, it is
-	wrjpgcom [switches] [inputfilename]
-The output file is written to standard output.  The input file comes from
-the named file, or from standard input if no input file is named.
-
-On most non-Unix systems, the syntax is
-	wrjpgcom [switches] inputfilename outputfilename
-where both input and output file names must be given explicitly.
-
-wrjpgcom understands three switches:
-	-replace		 Delete any existing COM blocks from the file.
-	-comment "Comment text"	 Supply new COM text on command line.
-	-cfile name		 Read text for new COM block from named file.
-(Switch names can be abbreviated.)  If you have only one line of comment text
-to add, you can provide it on the command line with -comment.  The comment
-text must be surrounded with quotes so that it is treated as a single
-argument.  Longer comments can be read from a text file.
-
-If you give neither -comment nor -cfile, then wrjpgcom will read the comment
-text from standard input.  (In this case an input image file name MUST be
-supplied, so that the source JPEG file comes from somewhere else.)  You can
-enter multiple lines, up to 64KB worth.  Type an end-of-file indicator
-(usually control-D or control-Z) to terminate the comment text entry.
-
-wrjpgcom will not add a COM block if the provided comment string is empty.
-Therefore -replace -comment "" can be used to delete all COM blocks from a
-file.
-
-These utility programs do not depend on the IJG JPEG library.  In
-particular, the source code for rdjpgcom is intended as an illustration of
-the minimum amount of code required to parse a JPEG file header correctly.
diff --git a/usage.txt b/usage.txt
new file mode 100644
index 0000000..ef8e6d0
--- /dev/null
+++ b/usage.txt
@@ -0,0 +1,649 @@
+NOTE:  This file was modified by The libjpeg-turbo Project to include only
+information relevant to libjpeg-turbo and to wordsmith certain sections.
+
+USAGE instructions for the Independent JPEG Group's JPEG software
+=================================================================
+
+This file describes usage of the JPEG conversion programs cjpeg and djpeg,
+as well as the utility programs jpegtran, rdjpgcom and wrjpgcom.  (See
+the other documentation files if you wish to use the JPEG library within
+your own programs.)
+
+If you are on a Unix machine you may prefer to read the Unix-style manual
+pages in files cjpeg.1, djpeg.1, jpegtran.1, rdjpgcom.1, wrjpgcom.1.
+
+
+INTRODUCTION
+
+These programs implement JPEG image encoding, decoding, and transcoding.
+JPEG (pronounced "jay-peg") is a standardized compression method for
+full-color and grayscale images.
+
+
+GENERAL USAGE
+
+We provide two programs, cjpeg to compress an image file into JPEG format,
+and djpeg to decompress a JPEG file back into a conventional image format.
+
+On Unix-like systems, you say:
+        cjpeg [switches] [imagefile] >jpegfile
+or
+        djpeg [switches] [jpegfile]  >imagefile
+The programs read the specified input file, or standard input if none is
+named.  They always write to standard output (with trace/error messages to
+standard error).  These conventions are handy for piping images between
+programs.
+
+On most non-Unix systems, you say:
+        cjpeg [switches] imagefile jpegfile
+or
+        djpeg [switches] jpegfile  imagefile
+i.e., both the input and output files are named on the command line.  This
+style is a little more foolproof, and it loses no functionality if you don't
+have pipes.  (You can get this style on Unix too, if you prefer, by defining
+TWO_FILE_COMMANDLINE when you compile the programs; see install.txt.)
+
+You can also say:
+        cjpeg [switches] -outfile jpegfile  imagefile
+or
+        djpeg [switches] -outfile imagefile  jpegfile
+This syntax works on all systems, so it is useful for scripts.
+
+The currently supported image file formats are: PPM (PBMPLUS color format),
+PGM (PBMPLUS grayscale format), BMP, Targa, and RLE (Utah Raster Toolkit
+format).  (RLE is supported only if the URT library is available.)
+cjpeg recognizes the input image format automatically, with the exception
+of some Targa-format files.  You have to tell djpeg which format to generate.
+
+JPEG files are in the defacto standard JFIF file format.  There are other,
+less widely used JPEG-based file formats, but we don't support them.
+
+All switch names may be abbreviated; for example, -grayscale may be written
+-gray or -gr.  Most of the "basic" switches can be abbreviated to as little as
+one letter.  Upper and lower case are equivalent (-BMP is the same as -bmp).
+British spellings are also accepted (e.g., -greyscale), though for brevity
+these are not mentioned below.
+
+
+CJPEG DETAILS
+
+The basic command line switches for cjpeg are:
+
+        -quality N[,...]  Scale quantization tables to adjust image quality.
+                          Quality is 0 (worst) to 100 (best); default is 75.
+                          (See below for more info.)
+
+        -grayscale      Create monochrome JPEG file from color input.
+                        Be sure to use this switch when compressing a grayscale
+                        BMP file, because cjpeg isn't bright enough to notice
+                        whether a BMP file uses only shades of gray.  By
+                        saying -grayscale, you'll get a smaller JPEG file that
+                        takes less time to process.
+
+        -rgb            Create RGB JPEG file.
+                        Using this switch suppresses the conversion from RGB
+                        colorspace input to the default YCbCr JPEG colorspace.
+
+        -optimize       Perform optimization of entropy encoding parameters.
+                        Without this, default encoding parameters are used.
+                        -optimize usually makes the JPEG file a little smaller,
+                        but cjpeg runs somewhat slower and needs much more
+                        memory.  Image quality and speed of decompression are
+                        unaffected by -optimize.
+
+        -progressive    Create progressive JPEG file (see below).
+
+        -targa          Input file is Targa format.  Targa files that contain
+                        an "identification" field will not be automatically
+                        recognized by cjpeg; for such files you must specify
+                        -targa to make cjpeg treat the input as Targa format.
+                        For most Targa files, you won't need this switch.
+
+The -quality switch lets you trade off compressed file size against quality of
+the reconstructed image: the higher the quality setting, the larger the JPEG
+file, and the closer the output image will be to the original input.  Normally
+you want to use the lowest quality setting (smallest file) that decompresses
+into something visually indistinguishable from the original image.  For this
+purpose the quality setting should be between 50 and 95; the default of 75 is
+often about right.  If you see defects at -quality 75, then go up 5 or 10
+counts at a time until you are happy with the output image.  (The optimal
+setting will vary from one image to another.)
+
+-quality 100 will generate a quantization table of all 1's, minimizing loss
+in the quantization step (but there is still information loss in subsampling,
+as well as roundoff error).  This setting is mainly of interest for
+experimental purposes.  Quality values above about 95 are NOT recommended for
+normal use; the compressed file size goes up dramatically for hardly any gain
+in output image quality.
+
+In the other direction, quality values below 50 will produce very small files
+of low image quality.  Settings around 5 to 10 might be useful in preparing an
+index of a large image library, for example.  Try -quality 2 (or so) for some
+amusing Cubist effects.  (Note: quality values below about 25 generate 2-byte
+quantization tables, which are considered optional in the JPEG standard.
+cjpeg emits a warning message when you give such a quality value, because some
+other JPEG programs may be unable to decode the resulting file.  Use -baseline
+if you need to ensure compatibility at low quality values.)
+
+The -quality option has been extended in this version of cjpeg to support
+separate quality settings for luminance and chrominance (or, in general,
+separate settings for every quantization table slot.)  The principle is the
+same as chrominance subsampling:  since the human eye is more sensitive to
+spatial changes in brightness than spatial changes in color, the chrominance
+components can be quantized more than the luminance components without
+incurring any visible image quality loss.  However, unlike subsampling, this
+feature reduces data in the frequency domain instead of the spatial domain,
+which allows for more fine-grained control.  This option is useful in
+quality-sensitive applications, for which the artifacts generated by
+subsampling may be unacceptable.
+
+The -quality option accepts a comma-separated list of parameters, which
+respectively refer to the quality levels that should be assigned to the
+quantization table slots.  If there are more q-table slots than parameters,
+then the last parameter is replicated.  Thus, if only one quality parameter is
+given, this is used for both luminance and chrominance (slots 0 and 1,
+respectively), preserving the legacy behavior of cjpeg v6b and prior.  More (or
+customized) quantization tables can be set with the -qtables option and
+assigned to components with the -qslots option (see the "wizard" switches
+below.)
+
+JPEG  files  generated  with separate luminance and chrominance quality are
+fully compliant with standard JPEG decoders.
+
+CAUTION: For this setting to be useful, be sure to pass an argument of
+-sample 1x1 to cjpeg to disable chrominance subsampling.  Otherwise, the
+default subsampling level (2x2, AKA "4:2:0") will be used.
+
+The -progressive switch creates a "progressive JPEG" file.  In this type of
+JPEG file, the data is stored in multiple scans of increasing quality.  If the
+file is being transmitted over a slow communications link, the decoder can use
+the first scan to display a low-quality image very quickly, and can then
+improve the display with each subsequent scan.  The final image is exactly
+equivalent to a standard JPEG file of the same quality setting, and the total
+file size is about the same --- often a little smaller.
+
+Switches for advanced users:
+
+        -arithmetic     Use arithmetic coding.  CAUTION: arithmetic coded JPEG
+                        is not yet widely implemented, so many decoders will
+                        be unable to view an arithmetic coded JPEG file at
+                        all.
+
+        -dct int        Use integer DCT method (default).
+        -dct fast       Use fast integer DCT (less accurate).
+                        In libjpeg-turbo, the fast method is generally about
+                        5-15% faster than the int method when using the
+                        x86/x86-64 SIMD extensions (results may vary with other
+                        SIMD implementations, or when using libjpeg-turbo
+                        without SIMD extensions.)  For quality levels of 90 and
+                        below, there should be little or no perceptible
+                        difference between the two algorithms.  For quality
+                        levels above 90, however, the difference between
+                        the fast and the int methods becomes more pronounced.
+                        With quality=97, for instance, the fast method incurs
+                        generally about a 1-3 dB loss (in PSNR) relative to
+                        the int method, but this can be larger for some images.
+                        Do not use the fast method with quality levels above
+                        97.  The algorithm often degenerates at quality=98 and
+                        above and can actually produce a more lossy image than
+                        if lower quality levels had been used.  Also, in
+                        libjpeg-turbo, the fast method is not fully accerated
+                        for quality levels above 97, so it will be slower than
+                        the int method.
+        -dct float      Use floating-point DCT method.
+                        The float method is mainly a legacy feature.  It does
+                        not produce significantly more accurate results than
+                        the int method, and it is much slower.  The float
+                        method may also give different results on different
+                        machines due to varying roundoff behavior, whereas the
+                        integer methods should give the same results on all
+                        machines.
+
+        -restart N      Emit a JPEG restart marker every N MCU rows, or every
+                        N MCU blocks if "B" is attached to the number.
+                        -restart 0 (the default) means no restart markers.
+
+        -smooth N       Smooth the input image to eliminate dithering noise.
+                        N, ranging from 1 to 100, indicates the strength of
+                        smoothing.  0 (the default) means no smoothing.
+
+        -maxmemory N    Set limit for amount of memory to use in processing
+                        large images.  Value is in thousands of bytes, or
+                        millions of bytes if "M" is attached to the number.
+                        For example, -max 4m selects 4000000 bytes.  If more
+                        space is needed, temporary files will be used.
+
+        -verbose        Enable debug printout.  More -v's give more printout.
+        or  -debug      Also, version information is printed at startup.
+
+The -restart option inserts extra markers that allow a JPEG decoder to
+resynchronize after a transmission error.  Without restart markers, any damage
+to a compressed file will usually ruin the image from the point of the error
+to the end of the image; with restart markers, the damage is usually confined
+to the portion of the image up to the next restart marker.  Of course, the
+restart markers occupy extra space.  We recommend -restart 1 for images that
+will be transmitted across unreliable networks such as Usenet.
+
+The -smooth option filters the input to eliminate fine-scale noise.  This is
+often useful when converting dithered images to JPEG: a moderate smoothing
+factor of 10 to 50 gets rid of dithering patterns in the input file, resulting
+in a smaller JPEG file and a better-looking image.  Too large a smoothing
+factor will visibly blur the image, however.
+
+Switches for wizards:
+
+        -baseline       Force baseline-compatible quantization tables to be
+                        generated.  This clamps quantization values to 8 bits
+                        even at low quality settings.  (This switch is poorly
+                        named, since it does not ensure that the output is
+                        actually baseline JPEG.  For example, you can use
+                        -baseline and -progressive together.)
+
+        -qtables file   Use the quantization tables given in the specified
+                        text file.
+
+        -qslots N[,...] Select which quantization table to use for each color
+                        component.
+
+        -sample HxV[,...]  Set JPEG sampling factors for each color component.
+
+        -scans file     Use the scan script given in the specified text file.
+
+The "wizard" switches are intended for experimentation with JPEG.  If you
+don't know what you are doing, DON'T USE THEM.  These switches are documented
+further in the file wizard.txt.
+
+
+DJPEG DETAILS
+
+The basic command line switches for djpeg are:
+
+        -colors N       Reduce image to at most N colors.  This reduces the
+        or -quantize N  number of colors used in the output image, so that it
+                        can be displayed on a colormapped display or stored in
+                        a colormapped file format.  For example, if you have
+                        an 8-bit display, you'd need to reduce to 256 or fewer
+                        colors.  (-colors is the recommended name, -quantize
+                        is provided only for backwards compatibility.)
+
+        -fast           Select recommended processing options for fast, low
+                        quality output.  (The default options are chosen for
+                        highest quality output.)  Currently, this is equivalent
+                        to "-dct fast -nosmooth -onepass -dither ordered".
+
+        -grayscale      Force grayscale output even if JPEG file is color.
+                        Useful for viewing on monochrome displays; also,
+                        djpeg runs noticeably faster in this mode.
+
+        -scale M/N      Scale the output image by a factor M/N.  Currently
+                        the scale factor must be M/8, where M is an integer
+                        between 1 and 16 inclusive, or any reduced fraction
+                        thereof (such as 1/2, 3/4, etc.  Scaling is handy if
+                        the image is larger than your screen; also, djpeg runs
+                        much faster when scaling down the output.
+
+        -bmp            Select BMP output format (Windows flavor).  8-bit
+                        colormapped format is emitted if -colors or -grayscale
+                        is specified, or if the JPEG file is grayscale;
+                        otherwise, 24-bit full-color format is emitted.
+
+        -gif            Select GIF output format.  Since GIF does not support
+                        more than 256 colors, -colors 256 is assumed (unless
+                        you specify a smaller number of colors).  If you
+                        specify -fast, the default number of colors is 216.
+
+        -os2            Select BMP output format (OS/2 1.x flavor).  8-bit
+                        colormapped format is emitted if -colors or -grayscale
+                        is specified, or if the JPEG file is grayscale;
+                        otherwise, 24-bit full-color format is emitted.
+
+        -pnm            Select PBMPLUS (PPM/PGM) output format (this is the
+                        default format).  PGM is emitted if the JPEG file is
+                        grayscale or if -grayscale is specified; otherwise
+                        PPM is emitted.
+
+        -rle            Select RLE output format.  (Requires URT library.)
+
+        -targa          Select Targa output format.  Grayscale format is
+                        emitted if the JPEG file is grayscale or if
+                        -grayscale is specified; otherwise, colormapped format
+                        is emitted if -colors is specified; otherwise, 24-bit
+                        full-color format is emitted.
+
+Switches for advanced users:
+
+        -dct int        Use integer DCT method (default).
+        -dct fast       Use fast integer DCT (less accurate).
+                        In libjpeg-turbo, the fast method is generally about
+                        5-15% faster than the int method when using the
+                        x86/x86-64 SIMD extensions (results may vary with other
+                        SIMD implementations, or when using libjpeg-turbo
+                        without SIMD extensions.)  If the JPEG image was
+                        compressed using a quality level of 85 or below, then
+                        there should be little or no perceptible difference
+                        between the two algorithms.  When decompressing images
+                        that were compressed using quality levels above 85,
+                        however, the difference between the fast and int
+                        methods becomes more pronounced.  With images
+                        compressed using quality=97, for instance, the fast
+                        method incurs generally about a 4-6 dB loss (in PSNR)
+                        relative to the int method, but this can be larger for
+                        some images.  If you can avoid it, do not use the fast
+                        method when decompressing images that were compressed
+                        using quality levels above 97.  The algorithm often
+                        degenerates for such images and can actually produce
+                        a more lossy output image than if the JPEG image had
+                        been compressed using lower quality levels.
+        -dct float      Use floating-point DCT method.
+                        The float method is mainly a legacy feature.  It does
+                        not produce significantly more accurate results than
+                        the int method, and it is much slower.  The float
+                        method may also give different results on different
+                        machines due to varying roundoff behavior, whereas the
+                        integer methods should give the same results on all
+                        machines.
+
+        -dither fs      Use Floyd-Steinberg dithering in color quantization.
+        -dither ordered Use ordered dithering in color quantization.
+        -dither none    Do not use dithering in color quantization.
+                        By default, Floyd-Steinberg dithering is applied when
+                        quantizing colors; this is slow but usually produces
+                        the best results.  Ordered dither is a compromise
+                        between speed and quality; no dithering is fast but
+                        usually looks awful.  Note that these switches have
+                        no effect unless color quantization is being done.
+                        Ordered dither is only available in -onepass mode.
+
+        -map FILE       Quantize to the colors used in the specified image
+                        file.  This is useful for producing multiple files
+                        with identical color maps, or for forcing a predefined
+                        set of colors to be used.  The FILE must be a GIF
+                        or PPM file.  This option overrides -colors and
+                        -onepass.
+
+        -nosmooth       Use a faster, lower-quality upsampling routine.
+
+        -onepass        Use one-pass instead of two-pass color quantization.
+                        The one-pass method is faster and needs less memory,
+                        but it produces a lower-quality image.  -onepass is
+                        ignored unless you also say -colors N.  Also,
+                        the one-pass method is always used for grayscale
+                        output (the two-pass method is no improvement then).
+
+        -maxmemory N    Set limit for amount of memory to use in processing
+                        large images.  Value is in thousands of bytes, or
+                        millions of bytes if "M" is attached to the number.
+                        For example, -max 4m selects 4000000 bytes.  If more
+                        space is needed, temporary files will be used.
+
+        -verbose        Enable debug printout.  More -v's give more printout.
+        or  -debug      Also, version information is printed at startup.
+
+
+HINTS FOR CJPEG
+
+Color GIF files are not the ideal input for JPEG; JPEG is really intended for
+compressing full-color (24-bit) images.  In particular, don't try to convert
+cartoons, line drawings, and other images that have only a few distinct
+colors.  GIF works great on these, JPEG does not.  If you want to convert a
+GIF to JPEG, you should experiment with cjpeg's -quality and -smooth options
+to get a satisfactory conversion.  -smooth 10 or so is often helpful.
+
+Avoid running an image through a series of JPEG compression/decompression
+cycles.  Image quality loss will accumulate; after ten or so cycles the image
+may be noticeably worse than it was after one cycle.  It's best to use a
+lossless format while manipulating an image, then convert to JPEG format when
+you are ready to file the image away.
+
+The -optimize option to cjpeg is worth using when you are making a "final"
+version for posting or archiving.  It's also a win when you are using low
+quality settings to make very small JPEG files; the percentage improvement
+is often a lot more than it is on larger files.  (At present, -optimize
+mode is always selected when generating progressive JPEG files.)
+
+Support for GIF input files was removed in cjpeg v6b due to concerns over
+the Unisys LZW patent.  Although this patent expired in 2006, cjpeg still
+lacks GIF support, for these historical reasons.  (Conversion of GIF files to
+JPEG is usually a bad idea anyway.)
+
+
+HINTS FOR DJPEG
+
+To get a quick preview of an image, use the -grayscale and/or -scale switches.
+"-grayscale -scale 1/8" is the fastest case.
+
+Several options are available that trade off image quality to gain speed.
+"-fast" turns on the recommended settings.
+
+"-dct fast" and/or "-nosmooth" gain speed at a small sacrifice in quality.
+When producing a color-quantized image, "-onepass -dither ordered" is fast but
+much lower quality than the default behavior.  "-dither none" may give
+acceptable results in two-pass mode, but is seldom tolerable in one-pass mode.
+
+Two-pass color quantization requires a good deal of memory; on MS-DOS machines
+it may run out of memory even with -maxmemory 0.  In that case you can still
+decompress, with some loss of image quality, by specifying -onepass for
+one-pass quantization.
+
+To avoid the Unisys LZW patent, djpeg produces uncompressed GIF files.  These
+are larger than they should be, but are readable by standard GIF decoders.
+
+
+HINTS FOR BOTH PROGRAMS
+
+If more space is needed than will fit in the available main memory (as
+determined by -maxmemory), temporary files will be used.  (MS-DOS versions
+will try to get extended or expanded memory first.)  The temporary files are
+often rather large: in typical cases they occupy three bytes per pixel, for
+example 3*800*600 = 1.44Mb for an 800x600 image.  If you don't have enough
+free disk space, leave out -progressive and -optimize (for cjpeg) or specify
+-onepass (for djpeg).
+
+On MS-DOS, the temporary files are created in the directory named by the TMP
+or TEMP environment variable, or in the current directory if neither of those
+exist.  Amiga implementations put the temp files in the directory named by
+JPEGTMP:, so be sure to assign JPEGTMP: to a disk partition with adequate free
+space.
+
+The default memory usage limit (-maxmemory) is set when the software is
+compiled.  If you get an "insufficient memory" error, try specifying a smaller
+-maxmemory value, even -maxmemory 0 to use the absolute minimum space.  You
+may want to recompile with a smaller default value if this happens often.
+
+On machines that have "environment" variables, you can define the environment
+variable JPEGMEM to set the default memory limit.  The value is specified as
+described for the -maxmemory switch.  JPEGMEM overrides the default value
+specified when the program was compiled, and itself is overridden by an
+explicit -maxmemory switch.
+
+On MS-DOS machines, -maxmemory is the amount of main (conventional) memory to
+use.  (Extended or expanded memory is also used if available.)  Most
+DOS-specific versions of this software do their own memory space estimation
+and do not need you to specify -maxmemory.
+
+
+JPEGTRAN
+
+jpegtran performs various useful transformations of JPEG files.
+It can translate the coded representation from one variant of JPEG to another,
+for example from baseline JPEG to progressive JPEG or vice versa.  It can also
+perform some rearrangements of the image data, for example turning an image
+from landscape to portrait format by rotation.
+
+jpegtran works by rearranging the compressed data (DCT coefficients), without
+ever fully decoding the image.  Therefore, its transformations are lossless:
+there is no image degradation at all, which would not be true if you used
+djpeg followed by cjpeg to accomplish the same conversion.  But by the same
+token, jpegtran cannot perform lossy operations such as changing the image
+quality.
+
+jpegtran uses a command line syntax similar to cjpeg or djpeg.
+On Unix-like systems, you say:
+        jpegtran [switches] [inputfile] >outputfile
+On most non-Unix systems, you say:
+        jpegtran [switches] inputfile outputfile
+where both the input and output files are JPEG files.
+
+To specify the coded JPEG representation used in the output file,
+jpegtran accepts a subset of the switches recognized by cjpeg:
+        -optimize       Perform optimization of entropy encoding parameters.
+        -progressive    Create progressive JPEG file.
+        -arithmetic     Use arithmetic coding.
+        -restart N      Emit a JPEG restart marker every N MCU rows, or every
+                        N MCU blocks if "B" is attached to the number.
+        -scans file     Use the scan script given in the specified text file.
+See the previous discussion of cjpeg for more details about these switches.
+If you specify none of these switches, you get a plain baseline-JPEG output
+file.  The quality setting and so forth are determined by the input file.
+
+The image can be losslessly transformed by giving one of these switches:
+        -flip horizontal        Mirror image horizontally (left-right).
+        -flip vertical          Mirror image vertically (top-bottom).
+        -rotate 90              Rotate image 90 degrees clockwise.
+        -rotate 180             Rotate image 180 degrees.
+        -rotate 270             Rotate image 270 degrees clockwise (or 90 ccw).
+        -transpose              Transpose image (across UL-to-LR axis).
+        -transverse             Transverse transpose (across UR-to-LL axis).
+
+The transpose transformation has no restrictions regarding image dimensions.
+The other transformations operate rather oddly if the image dimensions are not
+a multiple of the iMCU size (usually 8 or 16 pixels), because they can only
+transform complete blocks of DCT coefficient data in the desired way.
+
+jpegtran's default behavior when transforming an odd-size image is designed
+to preserve exact reversibility and mathematical consistency of the
+transformation set.  As stated, transpose is able to flip the entire image
+area.  Horizontal mirroring leaves any partial iMCU column at the right edge
+untouched, but is able to flip all rows of the image.  Similarly, vertical
+mirroring leaves any partial iMCU row at the bottom edge untouched, but is
+able to flip all columns.  The other transforms can be built up as sequences
+of transpose and flip operations; for consistency, their actions on edge
+pixels are defined to be the same as the end result of the corresponding
+transpose-and-flip sequence.
+
+For practical use, you may prefer to discard any untransformable edge pixels
+rather than having a strange-looking strip along the right and/or bottom edges
+of a transformed image.  To do this, add the -trim switch:
+        -trim           Drop non-transformable edge blocks.
+Obviously, a transformation with -trim is not reversible, so strictly speaking
+jpegtran with this switch is not lossless.  Also, the expected mathematical
+equivalences between the transformations no longer hold.  For example,
+"-rot 270 -trim" trims only the bottom edge, but "-rot 90 -trim" followed by
+"-rot 180 -trim" trims both edges.
+
+If you are only interested in perfect transformations, add the -perfect switch:
+        -perfect        Fail with an error if the transformation is not
+                        perfect.
+For example, you may want to do
+  jpegtran -rot 90 -perfect foo.jpg || djpeg foo.jpg | pnmflip -r90 | cjpeg
+to do a perfect rotation, if available, or an approximated one if not.
+
+This version of jpegtran also offers a lossless crop option, which discards
+data outside of a given image region but losslessly preserves what is inside.
+Like the rotate and flip transforms, lossless crop is restricted by the current
+JPEG format; the upper left corner of the selected region must fall on an iMCU
+boundary.  If it doesn't, then it is silently moved up and/or left to the
+nearest iMCU boundary (the lower right corner is unchanged.)
+
+The image can be losslessly cropped by giving the switch:
+        -crop WxH+X+Y   Crop to a rectangular region of width W and height H,
+                        starting at point X,Y.
+
+Other not-strictly-lossless transformation switches are:
+
+        -grayscale      Force grayscale output.
+This option discards the chrominance channels if the input image is YCbCr
+(ie, a standard color JPEG), resulting in a grayscale JPEG file.  The
+luminance channel is preserved exactly, so this is a better method of reducing
+to grayscale than decompression, conversion, and recompression.  This switch
+is particularly handy for fixing a monochrome picture that was mistakenly
+encoded as a color JPEG.  (In such a case, the space savings from getting rid
+of the near-empty chroma channels won't be large; but the decoding time for
+a grayscale JPEG is substantially less than that for a color JPEG.)
+
+jpegtran also recognizes these switches that control what to do with "extra"
+markers, such as comment blocks:
+        -copy none      Copy no extra markers from source file.  This setting
+                        suppresses all comments and other excess baggage
+                        present in the source file.
+        -copy comments  Copy only comment markers.  This setting copies
+                        comments from the source file but discards
+                        any other data that is inessential for image display.
+        -copy all       Copy all extra markers.  This setting preserves
+                        miscellaneous markers found in the source file, such
+                        as JFIF thumbnails, Exif data, and Photoshop settings.
+                        In some files, these extra markers can be sizable.
+The default behavior is -copy comments.  (Note: in IJG releases v6 and v6a,
+jpegtran always did the equivalent of -copy none.)
+
+Additional switches recognized by jpegtran are:
+        -outfile filename
+        -maxmemory N
+        -verbose
+        -debug
+These work the same as in cjpeg or djpeg.
+
+
+THE COMMENT UTILITIES
+
+The JPEG standard allows "comment" (COM) blocks to occur within a JPEG file.
+Although the standard doesn't actually define what COM blocks are for, they
+are widely used to hold user-supplied text strings.  This lets you add
+annotations, titles, index terms, etc to your JPEG files, and later retrieve
+them as text.  COM blocks do not interfere with the image stored in the JPEG
+file.  The maximum size of a COM block is 64K, but you can have as many of
+them as you like in one JPEG file.
+
+We provide two utility programs to display COM block contents and add COM
+blocks to a JPEG file.
+
+rdjpgcom searches a JPEG file and prints the contents of any COM blocks on
+standard output.  The command line syntax is
+        rdjpgcom [-raw] [-verbose] [inputfilename]
+The switch "-raw" (or just "-r") causes rdjpgcom to output non-printable
+characters in JPEG comments.  These characters are normally escaped for
+security reasons.
+The switch "-verbose" (or just "-v") causes rdjpgcom to also display the JPEG
+image dimensions.  If you omit the input file name from the command line,
+the JPEG file is read from standard input.  (This may not work on some
+operating systems, if binary data can't be read from stdin.)
+
+wrjpgcom adds a COM block, containing text you provide, to a JPEG file.
+Ordinarily, the COM block is added after any existing COM blocks, but you
+can delete the old COM blocks if you wish.  wrjpgcom produces a new JPEG
+file; it does not modify the input file.  DO NOT try to overwrite the input
+file by directing wrjpgcom's output back into it; on most systems this will
+just destroy your file.
+
+The command line syntax for wrjpgcom is similar to cjpeg's.  On Unix-like
+systems, it is
+        wrjpgcom [switches] [inputfilename]
+The output file is written to standard output.  The input file comes from
+the named file, or from standard input if no input file is named.
+
+On most non-Unix systems, the syntax is
+        wrjpgcom [switches] inputfilename outputfilename
+where both input and output file names must be given explicitly.
+
+wrjpgcom understands three switches:
+        -replace                 Delete any existing COM blocks from the file.
+        -comment "Comment text"  Supply new COM text on command line.
+        -cfile name              Read text for new COM block from named file.
+(Switch names can be abbreviated.)  If you have only one line of comment text
+to add, you can provide it on the command line with -comment.  The comment
+text must be surrounded with quotes so that it is treated as a single
+argument.  Longer comments can be read from a text file.
+
+If you give neither -comment nor -cfile, then wrjpgcom will read the comment
+text from standard input.  (In this case an input image file name MUST be
+supplied, so that the source JPEG file comes from somewhere else.)  You can
+enter multiple lines, up to 64KB worth.  Type an end-of-file indicator
+(usually control-D or control-Z) to terminate the comment text entry.
+
+wrjpgcom will not add a COM block if the provided comment string is empty.
+Therefore -replace -comment "" can be used to delete all COM blocks from a
+file.
+
+These utility programs do not depend on the IJG JPEG library.  In
+particular, the source code for rdjpgcom is intended as an illustration of
+the minimum amount of code required to parse a JPEG file header correctly.
diff --git a/win/jconfig.h.in b/win/jconfig.h.in
new file mode 100644
index 0000000..8783900
--- /dev/null
+++ b/win/jconfig.h.in
@@ -0,0 +1,50 @@
+/* jconfig.vc --- jconfig.h for Microsoft Visual C++ on Windows 95 or NT. */
+/* see jconfig.txt for explanations */
+
+#define JPEG_LIB_VERSION @JPEG_LIB_VERSION@
+#define LIBJPEG_TURBO_VERSION @VERSION@
+#cmakedefine C_ARITH_CODING_SUPPORTED
+#cmakedefine D_ARITH_CODING_SUPPORTED
+#cmakedefine MEM_SRCDST_SUPPORTED
+
+/*
+ * Define BITS_IN_JSAMPLE as either
+ *   8   for 8-bit sample values (the usual setting)
+ *   12  for 12-bit sample values
+ * Only 8 and 12 are legal data precisions for lossy JPEG according to the
+ * JPEG standard, and the IJG code does not support anything else!
+ * We do not support run-time selection of data precision, sorry.
+ */
+
+#define BITS_IN_JSAMPLE  @BITS_IN_JSAMPLE@      /* use 8 or 12 */
+
+#define HAVE_UNSIGNED_CHAR
+#define HAVE_UNSIGNED_SHORT
+/* #define void char */
+/* #define const */
+#undef __CHAR_UNSIGNED__
+#define HAVE_STDDEF_H
+#define HAVE_STDLIB_H
+#undef NEED_BSD_STRINGS
+#undef NEED_SYS_TYPES_H
+#undef NEED_FAR_POINTERS	/* we presume a 32-bit flat memory model */
+#undef INCOMPLETE_TYPES_BROKEN
+
+/* Define "boolean" as unsigned char, not int, per Windows custom */
+#ifndef __RPCNDR_H__		/* don't conflict if rpcndr.h already read */
+typedef unsigned char boolean;
+#endif
+#define HAVE_BOOLEAN		/* prevent jmorecfg.h from redefining it */
+
+/* Define "INT32" as int, not long, per Windows custom */
+#if !(defined(_BASETSD_H_) || defined(_BASETSD_H))   /* don't conflict if basetsd.h already read */
+typedef short INT16;
+typedef signed int INT32;
+#endif
+#define XMD_H                   /* prevent jmorecfg.h from redefining it */
+
+#ifdef JPEG_INTERNALS
+
+#undef RIGHT_SHIFT_IS_UNSIGNED
+
+#endif /* JPEG_INTERNALS */
diff --git a/win/jconfigint.h.in b/win/jconfigint.h.in
new file mode 100644
index 0000000..2131bf5
--- /dev/null
+++ b/win/jconfigint.h.in
@@ -0,0 +1,13 @@
+#define VERSION "@VERSION@"
+#define BUILD "@BUILD@"
+#define PACKAGE_NAME "@CMAKE_PROJECT_NAME@"
+
+#ifndef INLINE
+#if defined(__GNUC__)
+#define INLINE inline __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#define INLINE __forceinline
+#else
+#define INLINE
+#endif
+#endif
diff --git a/win/jpeg62-memsrcdst.def b/win/jpeg62-memsrcdst.def
new file mode 100755
index 0000000..4511c8e
--- /dev/null
+++ b/win/jpeg62-memsrcdst.def
@@ -0,0 +1,104 @@
+EXPORTS
+	jcopy_block_row @ 1 ; 
+	jcopy_sample_rows @ 2 ; 
+	jdiv_round_up @ 3 ; 
+	jinit_1pass_quantizer @ 4 ; 
+	jinit_2pass_quantizer @ 5 ; 
+	jinit_c_coef_controller @ 6 ; 
+	jinit_c_main_controller @ 7 ; 
+	jinit_c_master_control @ 8 ; 
+	jinit_c_prep_controller @ 9 ; 
+	jinit_color_converter @ 10 ; 
+	jinit_color_deconverter @ 11 ; 
+	jinit_compress_master @ 12 ; 
+	jinit_d_coef_controller @ 13 ; 
+	jinit_d_main_controller @ 14 ; 
+	jinit_d_post_controller @ 15 ; 
+	jinit_downsampler @ 16 ; 
+	jinit_forward_dct @ 17 ; 
+	jinit_huff_decoder @ 18 ; 
+	jinit_huff_encoder @ 19 ; 
+	jinit_input_controller @ 20 ; 
+	jinit_inverse_dct @ 21 ; 
+	jinit_marker_reader @ 22 ; 
+	jinit_marker_writer @ 23 ; 
+	jinit_master_decompress @ 24 ; 
+	jinit_memory_mgr @ 25 ; 
+	jinit_merged_upsampler @ 26 ; 
+	jinit_phuff_decoder @ 27 ; 
+	jinit_phuff_encoder @ 28 ; 
+	jinit_upsampler @ 29 ; 
+	jpeg_CreateCompress @ 30 ; 
+	jpeg_CreateDecompress @ 31 ; 
+	jpeg_abort @ 32 ; 
+	jpeg_abort_compress @ 33 ; 
+	jpeg_abort_decompress @ 34 ; 
+	jpeg_add_quant_table @ 35 ; 
+	jpeg_alloc_huff_table @ 36 ; 
+	jpeg_alloc_quant_table @ 37 ; 
+	jpeg_calc_output_dimensions @ 38 ; 
+	jpeg_consume_input @ 39 ; 
+	jpeg_copy_critical_parameters @ 40 ; 
+	jpeg_default_colorspace @ 41 ; 
+	jpeg_destroy @ 42 ; 
+	jpeg_destroy_compress @ 43 ; 
+	jpeg_destroy_decompress @ 44 ; 
+	jpeg_fdct_float @ 45 ; 
+	jpeg_fdct_ifast @ 46 ; 
+	jpeg_fdct_islow @ 47 ; 
+	jpeg_fill_bit_buffer @ 48 ; 
+	jpeg_finish_compress @ 49 ; 
+	jpeg_finish_decompress @ 50 ; 
+	jpeg_finish_output @ 51 ; 
+	jpeg_free_large @ 52 ; 
+	jpeg_free_small @ 53 ; 
+	jpeg_gen_optimal_table @ 54 ; 
+	jpeg_get_large @ 55 ; 
+	jpeg_get_small @ 56 ; 
+	jpeg_has_multiple_scans @ 57 ; 
+	jpeg_huff_decode @ 58 ; 
+	jpeg_idct_1x1 @ 59 ; 
+	jpeg_idct_2x2 @ 60 ; 
+	jpeg_idct_4x4 @ 61 ; 
+	jpeg_idct_float @ 62 ; 
+	jpeg_idct_ifast @ 63 ; 
+	jpeg_idct_islow @ 64 ; 
+	jpeg_input_complete @ 65 ; 
+	jpeg_make_c_derived_tbl @ 66 ; 
+	jpeg_make_d_derived_tbl @ 67 ; 
+	jpeg_mem_available @ 68 ; 
+	jpeg_mem_init @ 69 ; 
+	jpeg_mem_term @ 70 ; 
+	jpeg_new_colormap @ 71 ; 
+	jpeg_open_backing_store @ 72 ; 
+	jpeg_quality_scaling @ 73 ; 
+	jpeg_read_coefficients @ 74 ; 
+	jpeg_read_header @ 75 ; 
+	jpeg_read_raw_data @ 76 ; 
+	jpeg_read_scanlines @ 77 ; 
+	jpeg_resync_to_restart @ 78 ; 
+	jpeg_save_markers @ 79 ; 
+	jpeg_set_colorspace @ 80 ; 
+	jpeg_set_defaults @ 81 ; 
+	jpeg_set_linear_quality @ 82 ; 
+	jpeg_set_marker_processor @ 83 ; 
+	jpeg_set_quality @ 84 ; 
+	jpeg_simple_progression @ 85 ; 
+	jpeg_start_compress @ 86 ; 
+	jpeg_start_decompress @ 87 ; 
+	jpeg_start_output @ 88 ; 
+	jpeg_std_error @ 89 ; 
+	jpeg_stdio_dest @ 90 ; 
+	jpeg_stdio_src @ 91 ; 
+	jpeg_suppress_tables @ 92 ; 
+	jpeg_write_coefficients @ 93 ; 
+	jpeg_write_m_byte @ 94 ; 
+	jpeg_write_m_header @ 95 ; 
+	jpeg_write_marker @ 96 ; 
+	jpeg_write_raw_data @ 97 ; 
+	jpeg_write_scanlines @ 98 ; 
+	jpeg_write_tables @ 99 ; 
+	jround_up @ 100 ; 
+	jzero_far @ 101 ; 
+	jpeg_mem_dest @ 102 ; 
+	jpeg_mem_src @ 103 ; 
diff --git a/win/jpeg62.def b/win/jpeg62.def
new file mode 100755
index 0000000..3c33fbf
--- /dev/null
+++ b/win/jpeg62.def
@@ -0,0 +1,102 @@
+EXPORTS
+	jcopy_block_row @ 1 ; 
+	jcopy_sample_rows @ 2 ; 
+	jdiv_round_up @ 3 ; 
+	jinit_1pass_quantizer @ 4 ; 
+	jinit_2pass_quantizer @ 5 ; 
+	jinit_c_coef_controller @ 6 ; 
+	jinit_c_main_controller @ 7 ; 
+	jinit_c_master_control @ 8 ; 
+	jinit_c_prep_controller @ 9 ; 
+	jinit_color_converter @ 10 ; 
+	jinit_color_deconverter @ 11 ; 
+	jinit_compress_master @ 12 ; 
+	jinit_d_coef_controller @ 13 ; 
+	jinit_d_main_controller @ 14 ; 
+	jinit_d_post_controller @ 15 ; 
+	jinit_downsampler @ 16 ; 
+	jinit_forward_dct @ 17 ; 
+	jinit_huff_decoder @ 18 ; 
+	jinit_huff_encoder @ 19 ; 
+	jinit_input_controller @ 20 ; 
+	jinit_inverse_dct @ 21 ; 
+	jinit_marker_reader @ 22 ; 
+	jinit_marker_writer @ 23 ; 
+	jinit_master_decompress @ 24 ; 
+	jinit_memory_mgr @ 25 ; 
+	jinit_merged_upsampler @ 26 ; 
+	jinit_phuff_decoder @ 27 ; 
+	jinit_phuff_encoder @ 28 ; 
+	jinit_upsampler @ 29 ; 
+	jpeg_CreateCompress @ 30 ; 
+	jpeg_CreateDecompress @ 31 ; 
+	jpeg_abort @ 32 ; 
+	jpeg_abort_compress @ 33 ; 
+	jpeg_abort_decompress @ 34 ; 
+	jpeg_add_quant_table @ 35 ; 
+	jpeg_alloc_huff_table @ 36 ; 
+	jpeg_alloc_quant_table @ 37 ; 
+	jpeg_calc_output_dimensions @ 38 ; 
+	jpeg_consume_input @ 39 ; 
+	jpeg_copy_critical_parameters @ 40 ; 
+	jpeg_default_colorspace @ 41 ; 
+	jpeg_destroy @ 42 ; 
+	jpeg_destroy_compress @ 43 ; 
+	jpeg_destroy_decompress @ 44 ; 
+	jpeg_fdct_float @ 45 ; 
+	jpeg_fdct_ifast @ 46 ; 
+	jpeg_fdct_islow @ 47 ; 
+	jpeg_fill_bit_buffer @ 48 ; 
+	jpeg_finish_compress @ 49 ; 
+	jpeg_finish_decompress @ 50 ; 
+	jpeg_finish_output @ 51 ; 
+	jpeg_free_large @ 52 ; 
+	jpeg_free_small @ 53 ; 
+	jpeg_gen_optimal_table @ 54 ; 
+	jpeg_get_large @ 55 ; 
+	jpeg_get_small @ 56 ; 
+	jpeg_has_multiple_scans @ 57 ; 
+	jpeg_huff_decode @ 58 ; 
+	jpeg_idct_1x1 @ 59 ; 
+	jpeg_idct_2x2 @ 60 ; 
+	jpeg_idct_4x4 @ 61 ; 
+	jpeg_idct_float @ 62 ; 
+	jpeg_idct_ifast @ 63 ; 
+	jpeg_idct_islow @ 64 ; 
+	jpeg_input_complete @ 65 ; 
+	jpeg_make_c_derived_tbl @ 66 ; 
+	jpeg_make_d_derived_tbl @ 67 ; 
+	jpeg_mem_available @ 68 ; 
+	jpeg_mem_init @ 69 ; 
+	jpeg_mem_term @ 70 ; 
+	jpeg_new_colormap @ 71 ; 
+	jpeg_open_backing_store @ 72 ; 
+	jpeg_quality_scaling @ 73 ; 
+	jpeg_read_coefficients @ 74 ; 
+	jpeg_read_header @ 75 ; 
+	jpeg_read_raw_data @ 76 ; 
+	jpeg_read_scanlines @ 77 ; 
+	jpeg_resync_to_restart @ 78 ; 
+	jpeg_save_markers @ 79 ; 
+	jpeg_set_colorspace @ 80 ; 
+	jpeg_set_defaults @ 81 ; 
+	jpeg_set_linear_quality @ 82 ; 
+	jpeg_set_marker_processor @ 83 ; 
+	jpeg_set_quality @ 84 ; 
+	jpeg_simple_progression @ 85 ; 
+	jpeg_start_compress @ 86 ; 
+	jpeg_start_decompress @ 87 ; 
+	jpeg_start_output @ 88 ; 
+	jpeg_std_error @ 89 ; 
+	jpeg_stdio_dest @ 90 ; 
+	jpeg_stdio_src @ 91 ; 
+	jpeg_suppress_tables @ 92 ; 
+	jpeg_write_coefficients @ 93 ; 
+	jpeg_write_m_byte @ 94 ; 
+	jpeg_write_m_header @ 95 ; 
+	jpeg_write_marker @ 96 ; 
+	jpeg_write_raw_data @ 97 ; 
+	jpeg_write_scanlines @ 98 ; 
+	jpeg_write_tables @ 99 ; 
+	jround_up @ 100 ; 
+	jzero_far @ 101 ; 
diff --git a/win/jpeg7-memsrcdst.def b/win/jpeg7-memsrcdst.def
new file mode 100644
index 0000000..8c9f517
--- /dev/null
+++ b/win/jpeg7-memsrcdst.def
@@ -0,0 +1,106 @@
+EXPORTS
+	jcopy_block_row @ 1 ; 
+	jcopy_sample_rows @ 2 ; 
+	jdiv_round_up @ 3 ; 
+	jinit_1pass_quantizer @ 4 ; 
+	jinit_2pass_quantizer @ 5 ; 
+	jinit_c_coef_controller @ 6 ; 
+	jinit_c_main_controller @ 7 ; 
+	jinit_c_master_control @ 8 ; 
+	jinit_c_prep_controller @ 9 ; 
+	jinit_color_converter @ 10 ; 
+	jinit_color_deconverter @ 11 ; 
+	jinit_compress_master @ 12 ; 
+	jinit_d_coef_controller @ 13 ; 
+	jinit_d_main_controller @ 14 ; 
+	jinit_d_post_controller @ 15 ; 
+	jinit_downsampler @ 16 ; 
+	jinit_forward_dct @ 17 ; 
+	jinit_huff_decoder @ 18 ; 
+	jinit_huff_encoder @ 19 ; 
+	jinit_input_controller @ 20 ; 
+	jinit_inverse_dct @ 21 ; 
+	jinit_marker_reader @ 22 ; 
+	jinit_marker_writer @ 23 ; 
+	jinit_master_decompress @ 24 ; 
+	jinit_memory_mgr @ 25 ; 
+	jinit_merged_upsampler @ 26 ; 
+	jinit_phuff_decoder @ 27 ; 
+	jinit_phuff_encoder @ 28 ; 
+	jinit_upsampler @ 29 ; 
+	jpeg_CreateCompress @ 30 ; 
+	jpeg_CreateDecompress @ 31 ; 
+	jpeg_abort @ 32 ; 
+	jpeg_abort_compress @ 33 ; 
+	jpeg_abort_decompress @ 34 ; 
+	jpeg_add_quant_table @ 35 ; 
+	jpeg_alloc_huff_table @ 36 ; 
+	jpeg_alloc_quant_table @ 37 ; 
+	jpeg_calc_jpeg_dimensions @ 38 ; 
+	jpeg_calc_output_dimensions @ 39 ; 
+	jpeg_consume_input @ 40 ; 
+	jpeg_copy_critical_parameters @ 41 ; 
+	jpeg_default_colorspace @ 42 ; 
+	jpeg_default_qtables @ 43 ;
+	jpeg_destroy @ 44 ; 
+	jpeg_destroy_compress @ 45 ; 
+	jpeg_destroy_decompress @ 46 ; 
+	jpeg_fdct_float @ 47 ; 
+	jpeg_fdct_ifast @ 48 ; 
+	jpeg_fdct_islow @ 49 ; 
+	jpeg_fill_bit_buffer @ 50 ; 
+	jpeg_finish_compress @ 51 ; 
+	jpeg_finish_decompress @ 52 ; 
+	jpeg_finish_output @ 53 ; 
+	jpeg_free_large @ 54 ; 
+	jpeg_free_small @ 55 ; 
+	jpeg_gen_optimal_table @ 56 ; 
+	jpeg_get_large @ 57 ; 
+	jpeg_get_small @ 58 ; 
+	jpeg_has_multiple_scans @ 59 ; 
+	jpeg_huff_decode @ 60 ; 
+	jpeg_idct_1x1 @ 61 ; 
+	jpeg_idct_2x2 @ 62 ; 
+	jpeg_idct_4x4 @ 63 ; 
+	jpeg_idct_float @ 64 ; 
+	jpeg_idct_ifast @ 65 ; 
+	jpeg_idct_islow @ 66 ; 
+	jpeg_input_complete @ 67 ; 
+	jpeg_make_c_derived_tbl @ 68 ; 
+	jpeg_make_d_derived_tbl @ 69 ; 
+	jpeg_mem_available @ 70 ; 
+	jpeg_mem_init @ 71 ; 
+	jpeg_mem_term @ 72 ; 
+	jpeg_new_colormap @ 73 ; 
+	jpeg_open_backing_store @ 74 ; 
+	jpeg_quality_scaling @ 75 ; 
+	jpeg_read_coefficients @ 76 ; 
+	jpeg_read_header @ 77 ; 
+	jpeg_read_raw_data @ 78 ; 
+	jpeg_read_scanlines @ 79 ; 
+	jpeg_resync_to_restart @ 80 ; 
+	jpeg_save_markers @ 81 ; 
+	jpeg_set_colorspace @ 82 ; 
+	jpeg_set_defaults @ 83 ; 
+	jpeg_set_linear_quality @ 84 ; 
+	jpeg_set_marker_processor @ 85 ; 
+	jpeg_set_quality @ 86 ; 
+	jpeg_simple_progression @ 87 ; 
+	jpeg_start_compress @ 88 ; 
+	jpeg_start_decompress @ 89 ; 
+	jpeg_start_output @ 90 ; 
+	jpeg_std_error @ 91 ; 
+	jpeg_stdio_dest @ 92 ; 
+	jpeg_stdio_src @ 93 ; 
+	jpeg_suppress_tables @ 94 ; 
+	jpeg_write_coefficients @ 95 ; 
+	jpeg_write_m_byte @ 96 ; 
+	jpeg_write_m_header @ 97 ; 
+	jpeg_write_marker @ 98 ; 
+	jpeg_write_raw_data @ 99 ; 
+	jpeg_write_scanlines @ 100 ; 
+	jpeg_write_tables @ 101 ; 
+	jround_up @ 102 ; 
+	jzero_far @ 103 ; 
+	jpeg_mem_dest @ 104 ; 
+	jpeg_mem_src @ 105 ; 
diff --git a/win/jpeg7.def b/win/jpeg7.def
new file mode 100644
index 0000000..5ca227b
--- /dev/null
+++ b/win/jpeg7.def
@@ -0,0 +1,104 @@
+EXPORTS
+	jcopy_block_row @ 1 ; 
+	jcopy_sample_rows @ 2 ; 
+	jdiv_round_up @ 3 ; 
+	jinit_1pass_quantizer @ 4 ; 
+	jinit_2pass_quantizer @ 5 ; 
+	jinit_c_coef_controller @ 6 ; 
+	jinit_c_main_controller @ 7 ; 
+	jinit_c_master_control @ 8 ; 
+	jinit_c_prep_controller @ 9 ; 
+	jinit_color_converter @ 10 ; 
+	jinit_color_deconverter @ 11 ; 
+	jinit_compress_master @ 12 ; 
+	jinit_d_coef_controller @ 13 ; 
+	jinit_d_main_controller @ 14 ; 
+	jinit_d_post_controller @ 15 ; 
+	jinit_downsampler @ 16 ; 
+	jinit_forward_dct @ 17 ; 
+	jinit_huff_decoder @ 18 ; 
+	jinit_huff_encoder @ 19 ; 
+	jinit_input_controller @ 20 ; 
+	jinit_inverse_dct @ 21 ; 
+	jinit_marker_reader @ 22 ; 
+	jinit_marker_writer @ 23 ; 
+	jinit_master_decompress @ 24 ; 
+	jinit_memory_mgr @ 25 ; 
+	jinit_merged_upsampler @ 26 ; 
+	jinit_phuff_decoder @ 27 ; 
+	jinit_phuff_encoder @ 28 ; 
+	jinit_upsampler @ 29 ; 
+	jpeg_CreateCompress @ 30 ; 
+	jpeg_CreateDecompress @ 31 ; 
+	jpeg_abort @ 32 ; 
+	jpeg_abort_compress @ 33 ; 
+	jpeg_abort_decompress @ 34 ; 
+	jpeg_add_quant_table @ 35 ; 
+	jpeg_alloc_huff_table @ 36 ; 
+	jpeg_alloc_quant_table @ 37 ; 
+	jpeg_calc_jpeg_dimensions @ 38 ; 
+	jpeg_calc_output_dimensions @ 39 ; 
+	jpeg_consume_input @ 40 ; 
+	jpeg_copy_critical_parameters @ 41 ; 
+	jpeg_default_colorspace @ 42 ; 
+	jpeg_default_qtables @ 43 ;
+	jpeg_destroy @ 44 ; 
+	jpeg_destroy_compress @ 45 ; 
+	jpeg_destroy_decompress @ 46 ; 
+	jpeg_fdct_float @ 47 ; 
+	jpeg_fdct_ifast @ 48 ; 
+	jpeg_fdct_islow @ 49 ; 
+	jpeg_fill_bit_buffer @ 50 ; 
+	jpeg_finish_compress @ 51 ; 
+	jpeg_finish_decompress @ 52 ; 
+	jpeg_finish_output @ 53 ; 
+	jpeg_free_large @ 54 ; 
+	jpeg_free_small @ 55 ; 
+	jpeg_gen_optimal_table @ 56 ; 
+	jpeg_get_large @ 57 ; 
+	jpeg_get_small @ 58 ; 
+	jpeg_has_multiple_scans @ 59 ; 
+	jpeg_huff_decode @ 60 ; 
+	jpeg_idct_1x1 @ 61 ; 
+	jpeg_idct_2x2 @ 62 ; 
+	jpeg_idct_4x4 @ 63 ; 
+	jpeg_idct_float @ 64 ; 
+	jpeg_idct_ifast @ 65 ; 
+	jpeg_idct_islow @ 66 ; 
+	jpeg_input_complete @ 67 ; 
+	jpeg_make_c_derived_tbl @ 68 ; 
+	jpeg_make_d_derived_tbl @ 69 ; 
+	jpeg_mem_available @ 70 ; 
+	jpeg_mem_init @ 71 ; 
+	jpeg_mem_term @ 72 ; 
+	jpeg_new_colormap @ 73 ; 
+	jpeg_open_backing_store @ 74 ; 
+	jpeg_quality_scaling @ 75 ; 
+	jpeg_read_coefficients @ 76 ; 
+	jpeg_read_header @ 77 ; 
+	jpeg_read_raw_data @ 78 ; 
+	jpeg_read_scanlines @ 79 ; 
+	jpeg_resync_to_restart @ 80 ; 
+	jpeg_save_markers @ 81 ; 
+	jpeg_set_colorspace @ 82 ; 
+	jpeg_set_defaults @ 83 ; 
+	jpeg_set_linear_quality @ 84 ; 
+	jpeg_set_marker_processor @ 85 ; 
+	jpeg_set_quality @ 86 ; 
+	jpeg_simple_progression @ 87 ; 
+	jpeg_start_compress @ 88 ; 
+	jpeg_start_decompress @ 89 ; 
+	jpeg_start_output @ 90 ; 
+	jpeg_std_error @ 91 ; 
+	jpeg_stdio_dest @ 92 ; 
+	jpeg_stdio_src @ 93 ; 
+	jpeg_suppress_tables @ 94 ; 
+	jpeg_write_coefficients @ 95 ; 
+	jpeg_write_m_byte @ 96 ; 
+	jpeg_write_m_header @ 97 ; 
+	jpeg_write_marker @ 98 ; 
+	jpeg_write_raw_data @ 99 ; 
+	jpeg_write_scanlines @ 100 ; 
+	jpeg_write_tables @ 101 ; 
+	jround_up @ 102 ; 
+	jzero_far @ 103 ; 
diff --git a/win/jpeg8.def b/win/jpeg8.def
new file mode 100644
index 0000000..3fa6111
--- /dev/null
+++ b/win/jpeg8.def
@@ -0,0 +1,107 @@
+EXPORTS
+	jcopy_block_row @ 1 ; 
+	jcopy_sample_rows @ 2 ; 
+	jdiv_round_up @ 3 ; 
+	jinit_1pass_quantizer @ 4 ; 
+	jinit_2pass_quantizer @ 5 ; 
+	jinit_c_coef_controller @ 6 ; 
+	jinit_c_main_controller @ 7 ; 
+	jinit_c_master_control @ 8 ; 
+	jinit_c_prep_controller @ 9 ; 
+	jinit_color_converter @ 10 ; 
+	jinit_color_deconverter @ 11 ; 
+	jinit_compress_master @ 12 ; 
+	jinit_d_coef_controller @ 13 ; 
+	jinit_d_main_controller @ 14 ; 
+	jinit_d_post_controller @ 15 ; 
+	jinit_downsampler @ 16 ; 
+	jinit_forward_dct @ 17 ; 
+	jinit_huff_decoder @ 18 ; 
+	jinit_huff_encoder @ 19 ; 
+	jinit_input_controller @ 20 ; 
+	jinit_inverse_dct @ 21 ; 
+	jinit_marker_reader @ 22 ; 
+	jinit_marker_writer @ 23 ; 
+	jinit_master_decompress @ 24 ; 
+	jinit_memory_mgr @ 25 ; 
+	jinit_merged_upsampler @ 26 ; 
+	jinit_phuff_decoder @ 27 ; 
+	jinit_phuff_encoder @ 28 ; 
+	jinit_upsampler @ 29 ; 
+	jpeg_CreateCompress @ 30 ; 
+	jpeg_CreateDecompress @ 31 ; 
+	jpeg_abort @ 32 ; 
+	jpeg_abort_compress @ 33 ; 
+	jpeg_abort_decompress @ 34 ; 
+	jpeg_add_quant_table @ 35 ; 
+	jpeg_alloc_huff_table @ 36 ; 
+	jpeg_alloc_quant_table @ 37 ; 
+	jpeg_calc_jpeg_dimensions @ 38 ; 
+	jpeg_calc_output_dimensions @ 39 ; 
+	jpeg_consume_input @ 40 ; 
+	jpeg_copy_critical_parameters @ 41 ; 
+	jpeg_core_output_dimensions @ 42 ; 
+	jpeg_default_colorspace @ 43 ; 
+	jpeg_default_qtables @ 44 ;
+	jpeg_destroy @ 45 ; 
+	jpeg_destroy_compress @ 46 ; 
+	jpeg_destroy_decompress @ 47 ; 
+	jpeg_fdct_float @ 48 ; 
+	jpeg_fdct_ifast @ 49 ; 
+	jpeg_fdct_islow @ 50 ; 
+	jpeg_fill_bit_buffer @ 51 ; 
+	jpeg_finish_compress @ 52 ; 
+	jpeg_finish_decompress @ 53 ; 
+	jpeg_finish_output @ 54 ; 
+	jpeg_free_large @ 55 ; 
+	jpeg_free_small @ 56 ; 
+	jpeg_gen_optimal_table @ 57 ; 
+	jpeg_get_large @ 58 ; 
+	jpeg_get_small @ 59 ; 
+	jpeg_has_multiple_scans @ 60 ; 
+	jpeg_huff_decode @ 61 ; 
+	jpeg_idct_1x1 @ 62 ; 
+	jpeg_idct_2x2 @ 63 ; 
+	jpeg_idct_4x4 @ 64 ; 
+	jpeg_idct_float @ 65 ; 
+	jpeg_idct_ifast @ 66 ; 
+	jpeg_idct_islow @ 67 ; 
+	jpeg_input_complete @ 68 ; 
+	jpeg_make_c_derived_tbl @ 69 ; 
+	jpeg_make_d_derived_tbl @ 70 ; 
+	jpeg_mem_available @ 71 ; 
+	jpeg_mem_dest @ 72 ;
+	jpeg_mem_init @ 73 ; 
+	jpeg_mem_src @ 74 ;
+	jpeg_mem_term @ 75 ; 
+	jpeg_new_colormap @ 76 ; 
+	jpeg_open_backing_store @ 77 ; 
+	jpeg_quality_scaling @ 78 ; 
+	jpeg_read_coefficients @ 79 ; 
+	jpeg_read_header @ 80 ; 
+	jpeg_read_raw_data @ 81 ; 
+	jpeg_read_scanlines @ 82 ; 
+	jpeg_resync_to_restart @ 83 ; 
+	jpeg_save_markers @ 84 ; 
+	jpeg_set_colorspace @ 85 ; 
+	jpeg_set_defaults @ 86 ; 
+	jpeg_set_linear_quality @ 87 ; 
+	jpeg_set_marker_processor @ 88 ; 
+	jpeg_set_quality @ 89 ; 
+	jpeg_simple_progression @ 90 ; 
+	jpeg_start_compress @ 91 ; 
+	jpeg_start_decompress @ 92 ; 
+	jpeg_start_output @ 93 ; 
+	jpeg_std_error @ 94 ; 
+	jpeg_stdio_dest @ 95 ; 
+	jpeg_stdio_src @ 96 ; 
+	jpeg_suppress_tables @ 97 ; 
+	jpeg_write_coefficients @ 98 ; 
+	jpeg_write_m_byte @ 99 ; 
+	jpeg_write_m_header @ 100 ; 
+	jpeg_write_marker @ 101 ; 
+	jpeg_write_raw_data @ 102 ; 
+	jpeg_write_scanlines @ 103 ; 
+	jpeg_write_tables @ 104 ; 
+	jround_up @ 105 ; 
+	jzero_far @ 106 ; 
diff --git a/win/jsimdcfg.inc b/win/jsimdcfg.inc
new file mode 100755
index 0000000..9d4aede
--- /dev/null
+++ b/win/jsimdcfg.inc
@@ -0,0 +1,94 @@
+;
+; Automatically generated include file from jsimdcfg.inc.h
+;
+;
+; -- jpeglib.h
+;
+%define DCTSIZE 8
+%define DCTSIZE2 64
+;
+; -- jmorecfg.h
+;
+%define RGB_RED 0
+%define RGB_GREEN 1
+%define RGB_BLUE 2
+%define RGB_PIXELSIZE 3
+%define EXT_RGB_RED 0
+%define EXT_RGB_GREEN 1
+%define EXT_RGB_BLUE 2
+%define EXT_RGB_PIXELSIZE 3
+%define EXT_RGBX_RED 0
+%define EXT_RGBX_GREEN 1
+%define EXT_RGBX_BLUE 2
+%define EXT_RGBX_PIXELSIZE 4
+%define EXT_BGR_RED 2
+%define EXT_BGR_GREEN 1
+%define EXT_BGR_BLUE 0
+%define EXT_BGR_PIXELSIZE 3
+%define EXT_BGRX_RED 2
+%define EXT_BGRX_GREEN 1
+%define EXT_BGRX_BLUE 0
+%define EXT_BGRX_PIXELSIZE 4
+%define EXT_XBGR_RED 3
+%define EXT_XBGR_GREEN 2
+%define EXT_XBGR_BLUE 1
+%define EXT_XBGR_PIXELSIZE 4
+%define EXT_XRGB_RED 1
+%define EXT_XRGB_GREEN 2
+%define EXT_XRGB_BLUE 3
+%define EXT_XRGB_PIXELSIZE 4
+%define RGBX_FILLER_0XFF 1
+; Representation of a single sample (pixel element value).
+; On this SIMD implementation, this must be 'unsigned char'.
+;
+%define JSAMPLE byte ; unsigned char
+%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE)
+%define CENTERJSAMPLE 128
+; Representation of a DCT frequency coefficient.
+; On this SIMD implementation, this must be 'short'.
+;
+%define JCOEF word ; short
+%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF)
+; Datatype used for image dimensions.
+; On this SIMD implementation, this must be 'unsigned int'.
+;
+%define JDIMENSION dword ; unsigned int
+%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION)
+%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h)
+%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h)
+%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h)
+%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h)
+%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW)
+%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY)
+%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE)
+%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR)
+;
+; -- jdct.h
+;
+; A forward DCT routine is given a pointer to a work area of type DCTELEM[];
+; the DCT is to be performed in-place in that buffer.
+; To maximize parallelism, Type DCTELEM is changed to short (originally, int).
+;
+%define DCTELEM word ; short
+%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM)
+%define float FP32 ; float
+%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(float)
+; To maximize parallelism, Type short is changed to short.
+;
+%define ISLOW_MULT_TYPE word ; must be short
+%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE)
+%define IFAST_MULT_TYPE word ; must be short
+%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE)
+%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors
+%define FLOAT_MULT_TYPE FP32 ; must be float
+%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE)
+;
+; -- jsimd.h
+;
+%define JSIMD_NONE 0x00
+%define JSIMD_MMX 0x01
+%define JSIMD_3DNOW 0x02
+%define JSIMD_SSE 0x04
+%define JSIMD_SSE2 0x08
+; Short forms of external names for systems with brain-damaged linkers.
+;
diff --git a/wizard.doc b/wizard.txt
similarity index 69%
rename from wizard.doc
rename to wizard.txt
index 54170b2..ede721e 100644
--- a/wizard.doc
+++ b/wizard.txt
@@ -30,7 +30,7 @@
 You can substitute a different set of quantization values by using the
 -qtables switch:
 
-	-qtables file	Use the quantization tables given in the named file.
+        -qtables file   Use the quantization tables given in the named file.
 
 The specified file should be a text file containing decimal quantization
 values.  The file should contain one to four tables, each of 64 elements.
@@ -43,27 +43,27 @@
 with '#' and extends to the end of the line.  Here is an example file that
 duplicates the default quantization tables:
 
-	# Quantization tables given in JPEG spec, section K.1
+        # Quantization tables given in JPEG spec, section K.1
 
-	# This is table 0 (the luminance table):
-	  16  11  10  16  24  40  51  61
-	  12  12  14  19  26  58  60  55
-	  14  13  16  24  40  57  69  56
-	  14  17  22  29  51  87  80  62
-	  18  22  37  56  68 109 103  77
-	  24  35  55  64  81 104 113  92
-	  49  64  78  87 103 121 120 101
-	  72  92  95  98 112 100 103  99
+        # This is table 0 (the luminance table):
+          16  11  10  16  24  40  51  61
+          12  12  14  19  26  58  60  55
+          14  13  16  24  40  57  69  56
+          14  17  22  29  51  87  80  62
+          18  22  37  56  68 109 103  77
+          24  35  55  64  81 104 113  92
+          49  64  78  87 103 121 120 101
+          72  92  95  98 112 100 103  99
 
-	# This is table 1 (the chrominance table):
-	  17  18  24  47  99  99  99  99
-	  18  21  26  66  99  99  99  99
-	  24  26  56  99  99  99  99  99
-	  47  66  99  99  99  99  99  99
-	  99  99  99  99  99  99  99  99
-	  99  99  99  99  99  99  99  99
-	  99  99  99  99  99  99  99  99
-	  99  99  99  99  99  99  99  99
+        # This is table 1 (the chrominance table):
+          17  18  24  47  99  99  99  99
+          18  21  26  66  99  99  99  99
+          24  26  56  99  99  99  99  99
+          47  66  99  99  99  99  99  99
+          99  99  99  99  99  99  99  99
+          99  99  99  99  99  99  99  99
+          99  99  99  99  99  99  99  99
+          99  99  99  99  99  99  99  99
 
 If the -qtables switch is used without -quality, then the specified tables
 are used exactly as-is.  If both -qtables and -quality are used, then the
@@ -75,8 +75,8 @@
 table 1 for chrominance components.  To override this choice, use the -qslots
 switch:
 
-	-qslots N[,...]		Select which quantization table to use for
-				each color component.
+        -qslots N[,...]         Select which quantization table to use for
+                                each color component.
 
 The -qslots switch specifies a quantization table number for each color
 component, in the order in which the components appear in the JPEG SOF marker.
@@ -93,8 +93,8 @@
 compressing YCbCr data, and no downsampling for all other color spaces.
 You can override this default with the -sample switch:
 
-	-sample HxV[,...]	Set JPEG sampling factors for each color
-				component.
+        -sample HxV[,...]       Set JPEG sampling factors for each color
+                                component.
 
 The -sample switch specifies the JPEG sampling factors for each color
 component, in the order in which they appear in the JPEG SOF marker.
@@ -119,7 +119,7 @@
 files or progressive JPEG files with custom progression parameters by using
 the -scans switch:
 
-	-scans file	Use the scan sequence given in the named file.
+        -scans file     Use the scan sequence given in the named file.
 
 The specified file should be a text file containing a "scan script".
 The script specifies the contents and ordering of the scans to be emitted.
@@ -138,10 +138,10 @@
 positional indexes.)
 
 The progression parameters for each scan are:
-	Ss	Zigzag index of first coefficient included in scan
-	Se	Zigzag index of last coefficient included in scan
-	Ah	Zero for first scan of a coefficient, else Al of prior scan
-	Al	Successive approximation low bit position for scan
+        Ss      Zigzag index of first coefficient included in scan
+        Se      Zigzag index of last coefficient included in scan
+        Ah      Zero for first scan of a coefficient, else Al of prior scan
+        Al      Successive approximation low bit position for scan
 If the progression parameters are omitted, the values 0,63,0,0 are used,
 producing a sequential JPEG file.  cjpeg automatically determines whether
 the script represents a progressive or sequential file, by observing whether
@@ -156,52 +156,52 @@
 legibility, commas or dashes can be placed between values.  (Actually, any
 single punctuation character other than ':' or ';' can be inserted.)  For
 example, the following two scan definitions are equivalent:
-	0 1 2: 0 63 0 0;
-	0,1,2 : 0-63, 0,0 ;
+        0 1 2: 0 63 0 0;
+        0,1,2 : 0-63, 0,0 ;
 
 Here is an example of a scan script that generates a partially interleaved
 sequential JPEG file:
 
-	0;			# Y only in first scan
-	1 2;			# Cb and Cr in second scan
+        0;                      # Y only in first scan
+        1 2;                    # Cb and Cr in second scan
 
 Here is an example of a progressive scan script using only spectral selection
 (no successive approximation):
 
-	# Interleaved DC scan for Y,Cb,Cr:
-	0,1,2: 0-0,   0, 0 ;
-	# AC scans:
-	0:     1-2,   0, 0 ;	# First two Y AC coefficients
-	0:     3-5,   0, 0 ;	# Three more
-	1:     1-63,  0, 0 ;	# All AC coefficients for Cb
-	2:     1-63,  0, 0 ;	# All AC coefficients for Cr
-	0:     6-9,   0, 0 ;	# More Y coefficients
-	0:     10-63, 0, 0 ;	# Remaining Y coefficients
+        # Interleaved DC scan for Y,Cb,Cr:
+        0,1,2: 0-0,   0, 0 ;
+        # AC scans:
+        0:     1-2,   0, 0 ;    # First two Y AC coefficients
+        0:     3-5,   0, 0 ;    # Three more
+        1:     1-63,  0, 0 ;    # All AC coefficients for Cb
+        2:     1-63,  0, 0 ;    # All AC coefficients for Cr
+        0:     6-9,   0, 0 ;    # More Y coefficients
+        0:     10-63, 0, 0 ;    # Remaining Y coefficients
 
 Here is an example of a successive-approximation script.  This is equivalent
 to the default script used by "cjpeg -progressive" for YCbCr images:
 
-	# Initial DC scan for Y,Cb,Cr (lowest bit not sent)
-	0,1,2: 0-0,   0, 1 ;
-	# First AC scan: send first 5 Y AC coefficients, minus 2 lowest bits:
-	0:     1-5,   0, 2 ;
-	# Send all Cr,Cb AC coefficients, minus lowest bit:
-	# (chroma data is usually too small to be worth subdividing further;
-	#  but note we send Cr first since eye is least sensitive to Cb)
-	2:     1-63,  0, 1 ;
-	1:     1-63,  0, 1 ;
-	# Send remaining Y AC coefficients, minus 2 lowest bits:
-	0:     6-63,  0, 2 ;
-	# Send next-to-lowest bit of all Y AC coefficients:
-	0:     1-63,  2, 1 ;
-	# At this point we've sent all but the lowest bit of all coefficients.
-	# Send lowest bit of DC coefficients
-	0,1,2: 0-0,   1, 0 ;
-	# Send lowest bit of AC coefficients
-	2:     1-63,  1, 0 ;
-	1:     1-63,  1, 0 ;
-	# Y AC lowest bit scan is last; it's usually the largest scan
-	0:     1-63,  1, 0 ;
+        # Initial DC scan for Y,Cb,Cr (lowest bit not sent)
+        0,1,2: 0-0,   0, 1 ;
+        # First AC scan: send first 5 Y AC coefficients, minus 2 lowest bits:
+        0:     1-5,   0, 2 ;
+        # Send all Cr,Cb AC coefficients, minus lowest bit:
+        # (chroma data is usually too small to be worth subdividing further;
+        #  but note we send Cr first since eye is least sensitive to Cb)
+        2:     1-63,  0, 1 ;
+        1:     1-63,  0, 1 ;
+        # Send remaining Y AC coefficients, minus 2 lowest bits:
+        0:     6-63,  0, 2 ;
+        # Send next-to-lowest bit of all Y AC coefficients:
+        0:     1-63,  2, 1 ;
+        # At this point we've sent all but the lowest bit of all coefficients.
+        # Send lowest bit of DC coefficients
+        0,1,2: 0-0,   1, 0 ;
+        # Send lowest bit of AC coefficients
+        2:     1-63,  1, 0 ;
+        1:     1-63,  1, 0 ;
+        # Y AC lowest bit scan is last; it's usually the largest scan
+        0:     1-63,  1, 0 ;
 
 It may be worth pointing out that this script is tuned for quality settings
 of around 50 to 75.  For lower quality settings, you'd probably want to use
diff --git a/wrbmp.c b/wrbmp.c
index 3283b0f..3a85441 100644
--- a/wrbmp.c
+++ b/wrbmp.c
@@ -1,8 +1,10 @@
 /*
  * wrbmp.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2013, Linaro Limited.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to write output images in Microsoft "BMP"
@@ -17,7 +19,7 @@
  * This code contributed by James Arthur Boucher.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef BMP_SUPPORTED
 
@@ -42,15 +44,15 @@
 /* Private version of data destination object */
 
 typedef struct {
-  struct djpeg_dest_struct pub;	/* public fields */
+  struct djpeg_dest_struct pub; /* public fields */
 
-  boolean is_os2;		/* saves the OS2 format request flag */
+  boolean is_os2;               /* saves the OS2 format request flag */
 
-  jvirt_sarray_ptr whole_image;	/* needed to reverse row order */
-  JDIMENSION data_width;	/* JSAMPLEs per row */
-  JDIMENSION row_width;		/* physical width of one row in the BMP file */
-  int pad_bytes;		/* number of padding bytes needed per row */
-  JDIMENSION cur_output_row;	/* next row# to write to virtual array */
+  jvirt_sarray_ptr whole_image; /* needed to reverse row order */
+  JDIMENSION data_width;        /* JSAMPLEs per row */
+  JDIMENSION row_width;         /* physical width of one row in the BMP file */
+  int pad_bytes;                /* number of padding bytes needed per row */
+  JDIMENSION cur_output_row;    /* next row# to write to virtual array */
 } bmp_dest_struct;
 
 typedef bmp_dest_struct * bmp_dest_ptr;
@@ -58,8 +60,8 @@
 
 /* Forward declarations */
 LOCAL(void) write_colormap
-	JPP((j_decompress_ptr cinfo, bmp_dest_ptr dest,
-	     int map_colors, int map_entry_size));
+        (j_decompress_ptr cinfo, bmp_dest_ptr dest, int map_colors,
+         int map_entry_size);
 
 
 /*
@@ -69,7 +71,7 @@
 
 METHODDEF(void)
 put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		JDIMENSION rows_supplied)
+                JDIMENSION rows_supplied)
 /* This version is for writing 24-bit pixels */
 {
   bmp_dest_ptr dest = (bmp_dest_ptr) dinfo;
@@ -89,11 +91,30 @@
    */
   inptr = dest->pub.buffer[0];
   outptr = image_ptr[0];
-  for (col = cinfo->output_width; col > 0; col--) {
-    outptr[2] = *inptr++;	/* can omit GETJSAMPLE() safely */
-    outptr[1] = *inptr++;
-    outptr[0] = *inptr++;
-    outptr += 3;
+
+  if(cinfo->out_color_space == JCS_RGB565) {
+    #define red_mask    0xF800
+    #define green_mask  0x7E0
+    #define blue_mask   0x1F
+    unsigned char  r, g, b;
+    unsigned short *inptr2 = (unsigned short *)inptr;
+    for (col = cinfo->output_width; col > 0; col--) {
+      r = (*inptr2 & red_mask) >> 11;
+      g = (*inptr2 & green_mask) >> 5;
+      b = (*inptr2 & blue_mask);
+      outptr[0] = b << 3;
+      outptr[1] = g << 2;
+      outptr[2] = r << 3;
+      outptr += 3;
+      inptr2++;
+    }
+  } else {
+    for (col = cinfo->output_width; col > 0; col--) {
+      outptr[2] = *inptr++;       /* can omit GETJSAMPLE() safely */
+      outptr[1] = *inptr++;
+      outptr[0] = *inptr++;
+      outptr += 3;
+    }
   }
 
   /* Zero out the pad bytes. */
@@ -104,7 +125,7 @@
 
 METHODDEF(void)
 put_gray_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-	       JDIMENSION rows_supplied)
+               JDIMENSION rows_supplied)
 /* This version is for grayscale OR quantized color output */
 {
   bmp_dest_ptr dest = (bmp_dest_ptr) dinfo;
@@ -123,7 +144,7 @@
   inptr = dest->pub.buffer[0];
   outptr = image_ptr[0];
   for (col = cinfo->output_width; col > 0; col--) {
-    *outptr++ = *inptr++;	/* can omit GETJSAMPLE() safely */
+    *outptr++ = *inptr++;       /* can omit GETJSAMPLE() safely */
   }
 
   /* Zero out the pad bytes. */
@@ -160,13 +181,13 @@
   char bmpfileheader[14];
   char bmpinfoheader[40];
 #define PUT_2B(array,offset,value)  \
-	(array[offset] = (char) ((value) & 0xFF), \
-	 array[offset+1] = (char) (((value) >> 8) & 0xFF))
+        (array[offset] = (char) ((value) & 0xFF), \
+         array[offset+1] = (char) (((value) >> 8) & 0xFF))
 #define PUT_4B(array,offset,value)  \
-	(array[offset] = (char) ((value) & 0xFF), \
-	 array[offset+1] = (char) (((value) >> 8) & 0xFF), \
-	 array[offset+2] = (char) (((value) >> 16) & 0xFF), \
-	 array[offset+3] = (char) (((value) >> 24) & 0xFF))
+        (array[offset] = (char) ((value) & 0xFF), \
+         array[offset+1] = (char) (((value) >> 8) & 0xFF), \
+         array[offset+2] = (char) (((value) >> 16) & 0xFF), \
+         array[offset+3] = (char) (((value) >> 24) & 0xFF))
   INT32 headersize, bfSize;
   int bits_per_pixel, cmap_entries;
 
@@ -181,6 +202,9 @@
       bits_per_pixel = 24;
       cmap_entries = 0;
     }
+  } else if (cinfo->out_color_space == JCS_RGB565) {
+    bits_per_pixel = 24;
+    cmap_entries   = 0;
   } else {
     /* Grayscale output.  We need to fake a 256-entry colormap. */
     bits_per_pixel = 8;
@@ -189,23 +213,23 @@
   /* File size */
   headersize = 14 + 40 + cmap_entries * 4; /* Header and colormap */
   bfSize = headersize + (INT32) dest->row_width * (INT32) cinfo->output_height;
-  
+
   /* Set unused fields of header to 0 */
-  MEMZERO(bmpfileheader, SIZEOF(bmpfileheader));
-  MEMZERO(bmpinfoheader, SIZEOF(bmpinfoheader));
+  MEMZERO(bmpfileheader, sizeof(bmpfileheader));
+  MEMZERO(bmpinfoheader, sizeof(bmpinfoheader));
 
   /* Fill the file header */
-  bmpfileheader[0] = 0x42;	/* first 2 bytes are ASCII 'B', 'M' */
+  bmpfileheader[0] = 0x42;      /* first 2 bytes are ASCII 'B', 'M' */
   bmpfileheader[1] = 0x4D;
   PUT_4B(bmpfileheader, 2, bfSize); /* bfSize */
   /* we leave bfReserved1 & bfReserved2 = 0 */
   PUT_4B(bmpfileheader, 10, headersize); /* bfOffBits */
 
   /* Fill the info header (Microsoft calls this a BITMAPINFOHEADER) */
-  PUT_2B(bmpinfoheader, 0, 40);	/* biSize */
+  PUT_2B(bmpinfoheader, 0, 40); /* biSize */
   PUT_4B(bmpinfoheader, 4, cinfo->output_width); /* biWidth */
   PUT_4B(bmpinfoheader, 8, cinfo->output_height); /* biHeight */
-  PUT_2B(bmpinfoheader, 12, 1);	/* biPlanes - must be 1 */
+  PUT_2B(bmpinfoheader, 12, 1); /* biPlanes - must be 1 */
   PUT_2B(bmpinfoheader, 14, bits_per_pixel); /* biBitCount */
   /* we leave biCompression = 0, for none */
   /* we leave biSizeImage = 0; this is correct for uncompressed data */
@@ -246,6 +270,9 @@
       bits_per_pixel = 24;
       cmap_entries = 0;
     }
+  } else if (cinfo->out_color_space == JCS_RGB565) {
+    bits_per_pixel = 24;
+    cmap_entries   = 0;
   } else {
     /* Grayscale output.  We need to fake a 256-entry colormap. */
     bits_per_pixel = 8;
@@ -254,23 +281,23 @@
   /* File size */
   headersize = 14 + 12 + cmap_entries * 3; /* Header and colormap */
   bfSize = headersize + (INT32) dest->row_width * (INT32) cinfo->output_height;
-  
+
   /* Set unused fields of header to 0 */
-  MEMZERO(bmpfileheader, SIZEOF(bmpfileheader));
-  MEMZERO(bmpcoreheader, SIZEOF(bmpcoreheader));
+  MEMZERO(bmpfileheader, sizeof(bmpfileheader));
+  MEMZERO(bmpcoreheader, sizeof(bmpcoreheader));
 
   /* Fill the file header */
-  bmpfileheader[0] = 0x42;	/* first 2 bytes are ASCII 'B', 'M' */
+  bmpfileheader[0] = 0x42;      /* first 2 bytes are ASCII 'B', 'M' */
   bmpfileheader[1] = 0x4D;
   PUT_4B(bmpfileheader, 2, bfSize); /* bfSize */
   /* we leave bfReserved1 & bfReserved2 = 0 */
   PUT_4B(bmpfileheader, 10, headersize); /* bfOffBits */
 
   /* Fill the info header (Microsoft calls this a BITMAPCOREHEADER) */
-  PUT_2B(bmpcoreheader, 0, 12);	/* bcSize */
+  PUT_2B(bmpcoreheader, 0, 12); /* bcSize */
   PUT_2B(bmpcoreheader, 4, cinfo->output_width); /* bcWidth */
   PUT_2B(bmpcoreheader, 6, cinfo->output_height); /* bcHeight */
-  PUT_2B(bmpcoreheader, 8, 1);	/* bcPlanes - must be 1 */
+  PUT_2B(bmpcoreheader, 8, 1);  /* bcPlanes - must be 1 */
   PUT_2B(bmpcoreheader, 10, bits_per_pixel); /* bcBitCount */
 
   if (JFWRITE(dest->pub.output_file, bmpfileheader, 14) != (size_t) 14)
@@ -290,7 +317,7 @@
 
 LOCAL(void)
 write_colormap (j_decompress_ptr cinfo, bmp_dest_ptr dest,
-		int map_colors, int map_entry_size)
+                int map_colors, int map_entry_size)
 {
   JSAMPARRAY colormap = cinfo->colormap;
   int num_colors = cinfo->actual_number_of_colors;
@@ -301,20 +328,20 @@
     if (cinfo->out_color_components == 3) {
       /* Normal case with RGB colormap */
       for (i = 0; i < num_colors; i++) {
-	putc(GETJSAMPLE(colormap[2][i]), outfile);
-	putc(GETJSAMPLE(colormap[1][i]), outfile);
-	putc(GETJSAMPLE(colormap[0][i]), outfile);
-	if (map_entry_size == 4)
-	  putc(0, outfile);
+        putc(GETJSAMPLE(colormap[2][i]), outfile);
+        putc(GETJSAMPLE(colormap[1][i]), outfile);
+        putc(GETJSAMPLE(colormap[0][i]), outfile);
+        if (map_entry_size == 4)
+          putc(0, outfile);
       }
     } else {
       /* Grayscale colormap (only happens with grayscale quantization) */
       for (i = 0; i < num_colors; i++) {
-	putc(GETJSAMPLE(colormap[0][i]), outfile);
-	putc(GETJSAMPLE(colormap[0][i]), outfile);
-	putc(GETJSAMPLE(colormap[0][i]), outfile);
-	if (map_entry_size == 4)
-	  putc(0, outfile);
+        putc(GETJSAMPLE(colormap[0][i]), outfile);
+        putc(GETJSAMPLE(colormap[0][i]), outfile);
+        putc(GETJSAMPLE(colormap[0][i]), outfile);
+        if (map_entry_size == 4)
+          putc(0, outfile);
       }
     }
   } else {
@@ -324,10 +351,10 @@
       putc(i, outfile);
       putc(i, outfile);
       if (map_entry_size == 4)
-	putc(0, outfile);
+        putc(0, outfile);
     }
   }
-  /* Pad colormap with zeros to ensure specified number of colormap entries */ 
+  /* Pad colormap with zeros to ensure specified number of colormap entries */
   if (i > map_colors)
     ERREXIT1(cinfo, JERR_TOO_MANY_COLORS, i);
   for (; i < map_colors; i++) {
@@ -395,7 +422,7 @@
   /* Create module interface object, fill in method pointers */
   dest = (bmp_dest_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(bmp_dest_struct));
+                                  sizeof(bmp_dest_struct));
   dest->pub.start_output = start_output_bmp;
   dest->pub.finish_output = finish_output_bmp;
   dest->is_os2 = is_os2;
@@ -407,6 +434,8 @@
       dest->pub.put_pixel_rows = put_gray_rows;
     else
       dest->pub.put_pixel_rows = put_pixel_rows;
+  } else if(cinfo->out_color_space == JCS_RGB565 ) {
+      dest->pub.put_pixel_rows = put_pixel_rows;
   } else {
     ERREXIT(cinfo, JERR_BMP_COLORSPACE);
   }
@@ -415,16 +444,26 @@
   jpeg_calc_output_dimensions(cinfo);
 
   /* Determine width of rows in the BMP file (padded to 4-byte boundary). */
-  row_width = cinfo->output_width * cinfo->output_components;
-  dest->data_width = row_width;
-  while ((row_width & 3) != 0) row_width++;
-  dest->row_width = row_width;
-  dest->pad_bytes = (int) (row_width - dest->data_width);
+  if (cinfo->out_color_space == JCS_RGB565) {
+    row_width = cinfo->output_width * 2;
+    dest->row_width = dest->data_width = cinfo->output_width * 3;
+  } else {
+    row_width = cinfo->output_width * cinfo->output_components;
+    dest->row_width = dest->data_width = row_width;
+  }
+  while ((dest->row_width & 3) != 0) dest->row_width++;
+  dest->pad_bytes = (int) (dest->row_width - dest->data_width);
+  if (cinfo->out_color_space == JCS_RGB565) {
+    while ((row_width & 3) != 0) row_width++;
+  } else {
+    row_width = dest->row_width;
+  }
+
 
   /* Allocate space for inversion array, prepare for write pass */
   dest->whole_image = (*cinfo->mem->request_virt_sarray)
     ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
-     row_width, cinfo->output_height, (JDIMENSION) 1);
+     dest->row_width, cinfo->output_height, (JDIMENSION) 1);
   dest->cur_output_row = 0;
   if (cinfo->progress != NULL) {
     cd_progress_ptr progress = (cd_progress_ptr) cinfo->progress;
diff --git a/wrgif.c b/wrgif.c
index 5fe8328..d260ee0 100644
--- a/wrgif.c
+++ b/wrgif.c
@@ -1,8 +1,10 @@
 /*
  * wrgif.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code relevant
+ * to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to write output images in GIF format.
@@ -37,7 +39,7 @@
  *    CompuServe Incorporated."
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef GIF_SUPPORTED
 
@@ -45,31 +47,31 @@
 /* Private version of data destination object */
 
 typedef struct {
-  struct djpeg_dest_struct pub;	/* public fields */
+  struct djpeg_dest_struct pub; /* public fields */
 
-  j_decompress_ptr cinfo;	/* back link saves passing separate parm */
+  j_decompress_ptr cinfo;       /* back link saves passing separate parm */
 
   /* State for packing variable-width codes into a bitstream */
-  int n_bits;			/* current number of bits/code */
-  int maxcode;			/* maximum code, given n_bits */
-  INT32 cur_accum;		/* holds bits not yet output */
-  int cur_bits;			/* # of bits in cur_accum */
+  int n_bits;                   /* current number of bits/code */
+  int maxcode;                  /* maximum code, given n_bits */
+  INT32 cur_accum;              /* holds bits not yet output */
+  int cur_bits;                 /* # of bits in cur_accum */
 
   /* State for GIF code assignment */
-  int ClearCode;		/* clear code (doesn't change) */
-  int EOFCode;			/* EOF code (ditto) */
-  int code_counter;		/* counts output symbols */
+  int ClearCode;                /* clear code (doesn't change) */
+  int EOFCode;                  /* EOF code (ditto) */
+  int code_counter;             /* counts output symbols */
 
   /* GIF data packet construction buffer */
-  int bytesinpkt;		/* # of bytes in current packet */
-  char packetbuf[256];		/* workspace for accumulating packet */
+  int bytesinpkt;               /* # of bytes in current packet */
+  char packetbuf[256];          /* workspace for accumulating packet */
 
 } gif_dest_struct;
 
 typedef gif_dest_struct * gif_dest_ptr;
 
 /* Largest value that will fit in N bits */
-#define MAXCODE(n_bits)	((1 << (n_bits)) - 1)
+#define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
 
 
 /*
@@ -81,10 +83,10 @@
 flush_packet (gif_dest_ptr dinfo)
 /* flush any accumulated data */
 {
-  if (dinfo->bytesinpkt > 0) {	/* never write zero-length packet */
+  if (dinfo->bytesinpkt > 0) {  /* never write zero-length packet */
     dinfo->packetbuf[0] = (char) dinfo->bytesinpkt++;
     if (JFWRITE(dinfo->pub.output_file, dinfo->packetbuf, dinfo->bytesinpkt)
-	!= (size_t) dinfo->bytesinpkt)
+        != (size_t) dinfo->bytesinpkt)
       ERREXIT(dinfo->cinfo, JERR_FILE_WRITE);
     dinfo->bytesinpkt = 0;
   }
@@ -93,10 +95,10 @@
 
 /* Add a character to current packet; flush to disk if necessary */
 #define CHAR_OUT(dinfo,c)  \
-	{ (dinfo)->packetbuf[++(dinfo)->bytesinpkt] = (char) (c);  \
-	    if ((dinfo)->bytesinpkt >= 255)  \
-	      flush_packet(dinfo);  \
-	}
+        { (dinfo)->packetbuf[++(dinfo)->bytesinpkt] = (char) (c);  \
+            if ((dinfo)->bytesinpkt >= 255)  \
+              flush_packet(dinfo);  \
+        }
 
 
 /* Routine to convert variable-width codes into a byte stream */
@@ -173,7 +175,7 @@
     dinfo->code_counter++;
   } else {
     output(dinfo, dinfo->ClearCode);
-    dinfo->code_counter = dinfo->ClearCode + 2;	/* reset the counter */
+    dinfo->code_counter = dinfo->ClearCode + 2; /* reset the counter */
   }
 }
 
@@ -218,7 +220,7 @@
 LOCAL(void)
 emit_header (gif_dest_ptr dinfo, int num_colors, JSAMPARRAY colormap)
 /* Output the GIF file header, including color map */
-/* If colormap==NULL, synthesize a gray-scale colormap */
+/* If colormap==NULL, synthesize a grayscale colormap */
 {
   int BitsPerPixel, ColorMapSize, InitCodeSize, FlagByte;
   int cshift = dinfo->cinfo->data_precision - 8;
@@ -248,9 +250,9 @@
   /* Write the Logical Screen Descriptor */
   put_word(dinfo, (unsigned int) dinfo->cinfo->output_width);
   put_word(dinfo, (unsigned int) dinfo->cinfo->output_height);
-  FlagByte = 0x80;		/* Yes, there is a global color table */
+  FlagByte = 0x80;              /* Yes, there is a global color table */
   FlagByte |= (BitsPerPixel-1) << 4; /* color resolution */
-  FlagByte |= (BitsPerPixel-1);	/* size of global color table */
+  FlagByte |= (BitsPerPixel-1); /* size of global color table */
   putc(FlagByte, dinfo->pub.output_file);
   putc(0, dinfo->pub.output_file); /* Background color index */
   putc(0, dinfo->pub.output_file); /* Reserved (aspect ratio in GIF89) */
@@ -260,18 +262,18 @@
   for (i=0; i < ColorMapSize; i++) {
     if (i < num_colors) {
       if (colormap != NULL) {
-	if (dinfo->cinfo->out_color_space == JCS_RGB) {
-	  /* Normal case: RGB color map */
-	  putc(GETJSAMPLE(colormap[0][i]) >> cshift, dinfo->pub.output_file);
-	  putc(GETJSAMPLE(colormap[1][i]) >> cshift, dinfo->pub.output_file);
-	  putc(GETJSAMPLE(colormap[2][i]) >> cshift, dinfo->pub.output_file);
-	} else {
-	  /* Grayscale "color map": possible if quantizing grayscale image */
-	  put_3bytes(dinfo, GETJSAMPLE(colormap[0][i]) >> cshift);
-	}
+        if (dinfo->cinfo->out_color_space == JCS_RGB) {
+          /* Normal case: RGB color map */
+          putc(GETJSAMPLE(colormap[0][i]) >> cshift, dinfo->pub.output_file);
+          putc(GETJSAMPLE(colormap[1][i]) >> cshift, dinfo->pub.output_file);
+          putc(GETJSAMPLE(colormap[2][i]) >> cshift, dinfo->pub.output_file);
+        } else {
+          /* Grayscale "color map": possible if quantizing grayscale image */
+          put_3bytes(dinfo, GETJSAMPLE(colormap[0][i]) >> cshift);
+        }
       } else {
-	/* Create a gray-scale map of num_colors values, range 0..255 */
-	put_3bytes(dinfo, (i * 255 + (num_colors-1)/2) / (num_colors-1));
+        /* Create a grayscale map of num_colors values, range 0..255 */
+        put_3bytes(dinfo, (i * 255 + (num_colors-1)/2) / (num_colors-1));
       }
     } else {
       /* fill out the map to a power of 2 */
@@ -280,7 +282,7 @@
   }
   /* Write image separator and Image Descriptor */
   putc(',', dinfo->pub.output_file); /* separator */
-  put_word(dinfo, 0);		/* left/top offset */
+  put_word(dinfo, 0);           /* left/top offset */
   put_word(dinfo, 0);
   put_word(dinfo, (unsigned int) dinfo->cinfo->output_width); /* image size */
   put_word(dinfo, (unsigned int) dinfo->cinfo->output_height);
@@ -317,7 +319,7 @@
 
 METHODDEF(void)
 put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		JDIMENSION rows_supplied)
+                JDIMENSION rows_supplied)
 {
   gif_dest_ptr dest = (gif_dest_ptr) dinfo;
   register JSAMPROW ptr;
@@ -364,8 +366,8 @@
   /* Create module interface object, fill in method pointers */
   dest = (gif_dest_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(gif_dest_struct));
-  dest->cinfo = cinfo;		/* make back link for subroutines */
+                                  sizeof(gif_dest_struct));
+  dest->cinfo = cinfo;          /* make back link for subroutines */
   dest->pub.start_output = start_output_gif;
   dest->pub.put_pixel_rows = put_pixel_rows;
   dest->pub.finish_output = finish_output_gif;
diff --git a/wrjpgcom.c b/wrjpgcom.c
index 8c04b05..0a22f62 100644
--- a/wrjpgcom.c
+++ b/wrjpgcom.c
@@ -1,8 +1,10 @@
 /*
  * wrjpgcom.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1994-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2014, D. R. Commander
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains a very simple stand-alone application that inserts
@@ -11,59 +13,50 @@
  * JPEG markers.
  */
 
-#define JPEG_CJPEG_DJPEG	/* to get the command-line config symbols */
-#include "jinclude.h"		/* get auto-config symbols, <stdio.h> */
+#define JPEG_CJPEG_DJPEG        /* to get the command-line config symbols */
+#include "jinclude.h"           /* get auto-config symbols, <stdio.h> */
 
-#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc() */
+#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc() */
 extern void * malloc ();
 #endif
-#include <ctype.h>		/* to declare isupper(), tolower() */
+#include <ctype.h>              /* to declare isupper(), tolower() */
 #ifdef USE_SETMODE
-#include <fcntl.h>		/* to declare setmode()'s parameter macros */
+#include <fcntl.h>              /* to declare setmode()'s parameter macros */
 /* If you have setmode() but not <io.h>, just delete this line: */
-#include <io.h>			/* to declare setmode() */
+#include <io.h>                 /* to declare setmode() */
 #endif
 
-#ifdef USE_CCOMMAND		/* command-line reader for Macintosh */
+#ifdef USE_CCOMMAND             /* command-line reader for Macintosh */
 #ifdef __MWERKS__
 #include <SIOUX.h>              /* Metrowerks needs this */
-#include <console.h>		/* ... and this */
+#include <console.h>            /* ... and this */
 #endif
 #ifdef THINK_C
-#include <console.h>		/* Think declares it here */
+#include <console.h>            /* Think declares it here */
 #endif
 #endif
 
-#ifdef DONT_USE_B_MODE		/* define mode parameters for fopen() */
-#define READ_BINARY	"r"
-#define WRITE_BINARY	"w"
+#ifdef DONT_USE_B_MODE          /* define mode parameters for fopen() */
+#define READ_BINARY     "r"
+#define WRITE_BINARY    "w"
 #else
-#ifdef VMS			/* VMS is very nonstandard */
-#define READ_BINARY	"rb", "ctx=stm"
-#define WRITE_BINARY	"wb", "ctx=stm"
-#else				/* standard ANSI-compliant case */
-#define READ_BINARY	"rb"
-#define WRITE_BINARY	"wb"
-#endif
+#define READ_BINARY     "rb"
+#define WRITE_BINARY    "wb"
 #endif
 
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#ifndef EXIT_FAILURE            /* define exit() codes if not provided */
 #define EXIT_FAILURE  1
 #endif
 #ifndef EXIT_SUCCESS
-#ifdef VMS
-#define EXIT_SUCCESS  1		/* VMS is very nonstandard */
-#else
 #define EXIT_SUCCESS  0
 #endif
-#endif
 
 /* Reduce this value if your malloc() can't allocate blocks up to 64K.
  * On DOS, compiling in large model is usually a better solution.
  */
 
 #ifndef MAX_COM_LENGTH
-#define MAX_COM_LENGTH 65000L	/* must be <= 65533 in any case */
+#define MAX_COM_LENGTH 65000L   /* must be <= 65533 in any case */
 #endif
 
 
@@ -72,12 +65,12 @@
  * To reuse this code in another application, you might need to change these.
  */
 
-static FILE * infile;		/* input JPEG file */
+static FILE * infile;           /* input JPEG file */
 
 /* Return next input byte, or EOF if no more */
 #define NEXTBYTE()  getc(infile)
 
-static FILE * outfile;		/* output JPEG file */
+static FILE * outfile;          /* output JPEG file */
 
 /* Emit an output byte */
 #define PUTBYTE(x)  putc((x), outfile)
@@ -154,11 +147,11 @@
  * in this program.  (See jdmarker.c for a more complete list.)
  */
 
-#define M_SOF0  0xC0		/* Start Of Frame N */
-#define M_SOF1  0xC1		/* N indicates which compression process */
-#define M_SOF2  0xC2		/* Only SOF0-SOF2 are now in common use */
+#define M_SOF0  0xC0            /* Start Of Frame N */
+#define M_SOF1  0xC1            /* N indicates which compression process */
+#define M_SOF2  0xC2            /* Only SOF0-SOF2 are now in common use */
 #define M_SOF3  0xC3
-#define M_SOF5  0xC5		/* NB: codes C4 and CC are NOT SOF markers */
+#define M_SOF5  0xC5            /* NB: codes C4 and CC are NOT SOF markers */
 #define M_SOF6  0xC6
 #define M_SOF7  0xC7
 #define M_SOF9  0xC9
@@ -167,10 +160,10 @@
 #define M_SOF13 0xCD
 #define M_SOF14 0xCE
 #define M_SOF15 0xCF
-#define M_SOI   0xD8		/* Start Of Image (beginning of datastream) */
-#define M_EOI   0xD9		/* End Of Image (end of datastream) */
-#define M_SOS   0xDA		/* Start Of Scan (begins compressed data) */
-#define M_COM   0xFE		/* COMment */
+#define M_SOI   0xD8            /* Start Of Image (beginning of datastream) */
+#define M_EOI   0xD9            /* End Of Image (end of datastream) */
+#define M_SOS   0xDA            /* Start Of Scan (begins compressed data) */
+#define M_COM   0xFE            /* COMment */
 
 
 /*
@@ -302,40 +295,40 @@
       /* Note that marker codes 0xC4, 0xC8, 0xCC are not, and must not be,
        * treated as SOFn.  C4 in particular is actually DHT.
        */
-    case M_SOF0:		/* Baseline */
-    case M_SOF1:		/* Extended sequential, Huffman */
-    case M_SOF2:		/* Progressive, Huffman */
-    case M_SOF3:		/* Lossless, Huffman */
-    case M_SOF5:		/* Differential sequential, Huffman */
-    case M_SOF6:		/* Differential progressive, Huffman */
-    case M_SOF7:		/* Differential lossless, Huffman */
-    case M_SOF9:		/* Extended sequential, arithmetic */
-    case M_SOF10:		/* Progressive, arithmetic */
-    case M_SOF11:		/* Lossless, arithmetic */
-    case M_SOF13:		/* Differential sequential, arithmetic */
-    case M_SOF14:		/* Differential progressive, arithmetic */
-    case M_SOF15:		/* Differential lossless, arithmetic */
+    case M_SOF0:                /* Baseline */
+    case M_SOF1:                /* Extended sequential, Huffman */
+    case M_SOF2:                /* Progressive, Huffman */
+    case M_SOF3:                /* Lossless, Huffman */
+    case M_SOF5:                /* Differential sequential, Huffman */
+    case M_SOF6:                /* Differential progressive, Huffman */
+    case M_SOF7:                /* Differential lossless, Huffman */
+    case M_SOF9:                /* Extended sequential, arithmetic */
+    case M_SOF10:               /* Progressive, arithmetic */
+    case M_SOF11:               /* Lossless, arithmetic */
+    case M_SOF13:               /* Differential sequential, arithmetic */
+    case M_SOF14:               /* Differential progressive, arithmetic */
+    case M_SOF15:               /* Differential lossless, arithmetic */
       return marker;
 
-    case M_SOS:			/* should not see compressed data before SOF */
+    case M_SOS:                 /* should not see compressed data before SOF */
       ERREXIT("SOS without prior SOFn");
       break;
 
-    case M_EOI:			/* in case it's a tables-only JPEG stream */
+    case M_EOI:                 /* in case it's a tables-only JPEG stream */
       return marker;
 
-    case M_COM:			/* Existing COM: conditionally discard */
+    case M_COM:                 /* Existing COM: conditionally discard */
       if (keep_COM) {
-	write_marker(marker);
-	copy_variable();
+        write_marker(marker);
+        copy_variable();
       } else {
-	skip_variable();
+        skip_variable();
       }
       break;
 
-    default:			/* Anything else just gets copied */
+    default:                    /* Anything else just gets copied */
       write_marker(marker);
-      copy_variable();		/* we assume it has a parameter count... */
+      copy_variable();          /* we assume it has a parameter count... */
       break;
     }
   } /* end loop */
@@ -344,7 +337,7 @@
 
 /* Command line parsing code */
 
-static const char * progname;	/* program name for error messages */
+static const char * progname;   /* program name for error messages */
 
 
 static void
@@ -370,7 +363,7 @@
   fprintf(stderr, "If you do not give either -comment or -cfile on the command line,\n");
   fprintf(stderr, "then the comment text is read from standard input.\n");
   fprintf(stderr, "It can be multiple lines, up to %u characters total.\n",
-	  (unsigned int) MAX_COM_LENGTH);
+          (unsigned int) MAX_COM_LENGTH);
 #ifndef TWO_FILE_COMMANDLINE
   fprintf(stderr, "You must specify an input JPEG file name when supplying\n");
   fprintf(stderr, "comment text from standard input.\n");
@@ -391,17 +384,17 @@
 
   while ((ca = *arg++) != '\0') {
     if ((ck = *keyword++) == '\0')
-      return 0;			/* arg longer than keyword, no good */
-    if (isupper(ca))		/* force arg to lcase (assume ck is already) */
+      return 0;                 /* arg longer than keyword, no good */
+    if (isupper(ca))            /* force arg to lcase (assume ck is already) */
       ca = tolower(ca);
     if (ca != ck)
-      return 0;			/* no good */
-    nmatched++;			/* count matched characters */
+      return 0;                 /* no good */
+    nmatched++;                 /* count matched characters */
   }
   /* reached end of argument; fail if it's too short for unique abbrev */
   if (nmatched < minchars)
     return 0;
-  return 1;			/* A-OK */
+  return 1;                     /* A-OK */
 }
 
 
@@ -427,21 +420,21 @@
 
   progname = argv[0];
   if (progname == NULL || progname[0] == 0)
-    progname = "wrjpgcom";	/* in case C library doesn't provide it */
+    progname = "wrjpgcom";      /* in case C library doesn't provide it */
 
   /* Parse switches, if any */
   for (argn = 1; argn < argc; argn++) {
     arg = argv[argn];
     if (arg[0] != '-')
-      break;			/* not switch, must be file name */
-    arg++;			/* advance over '-' */
+      break;                    /* not switch, must be file name */
+    arg++;                      /* advance over '-' */
     if (keymatch(arg, "replace", 1)) {
       keep_COM = 0;
     } else if (keymatch(arg, "cfile", 2)) {
       if (++argn >= argc) usage();
       if ((comment_file = fopen(argv[argn], "r")) == NULL) {
-	fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
-	exit(EXIT_FAILURE);
+        fprintf(stderr, "%s: can't open %s\n", progname, argv[argn]);
+        exit(EXIT_FAILURE);
       }
     } else if (keymatch(arg, "comment", 1)) {
       if (++argn >= argc) usage();
@@ -450,21 +443,36 @@
        * under MS-DOG and must parse out the quoted string ourselves.  Sigh.
        */
       if (comment_arg[0] == '"') {
-	comment_arg = (char *) malloc((size_t) MAX_COM_LENGTH);
-	if (comment_arg == NULL)
-	  ERREXIT("Insufficient memory");
-	strcpy(comment_arg, argv[argn]+1);
-	for (;;) {
-	  comment_length = (unsigned int) strlen(comment_arg);
-	  if (comment_length > 0 && comment_arg[comment_length-1] == '"') {
-	    comment_arg[comment_length-1] = '\0'; /* zap terminating quote */
-	    break;
-	  }
-	  if (++argn >= argc)
-	    ERREXIT("Missing ending quote mark");
-	  strcat(comment_arg, " ");
-	  strcat(comment_arg, argv[argn]);
-	}
+        comment_arg = (char *) malloc((size_t) MAX_COM_LENGTH);
+        if (comment_arg == NULL)
+          ERREXIT("Insufficient memory");
+        if (strlen(argv[argn]) + 2 >= (size_t) MAX_COM_LENGTH) {
+          fprintf(stderr, "Comment text may not exceed %u bytes\n",
+                  (unsigned int) MAX_COM_LENGTH);
+          exit(EXIT_FAILURE);
+        }
+        strcpy(comment_arg, argv[argn]+1);
+        for (;;) {
+          comment_length = (unsigned int) strlen(comment_arg);
+          if (comment_length > 0 && comment_arg[comment_length-1] == '"') {
+            comment_arg[comment_length-1] = '\0'; /* zap terminating quote */
+            break;
+          }
+          if (++argn >= argc)
+            ERREXIT("Missing ending quote mark");
+          if (strlen(comment_arg) + strlen(argv[argn]) + 2 >=
+              (size_t) MAX_COM_LENGTH) {
+            fprintf(stderr, "Comment text may not exceed %u bytes\n",
+                    (unsigned int) MAX_COM_LENGTH);
+            exit(EXIT_FAILURE);
+          }
+          strcat(comment_arg, " ");
+          strcat(comment_arg, argv[argn]);
+        }
+      } else if (strlen(argv[argn]) >= (size_t) MAX_COM_LENGTH) {
+        fprintf(stderr, "Comment text may not exceed %u bytes\n",
+                (unsigned int) MAX_COM_LENGTH);
+        exit(EXIT_FAILURE);
       }
       comment_length = (unsigned int) strlen(comment_arg);
     } else
@@ -488,10 +496,10 @@
     }
   } else {
     /* default input file is stdin */
-#ifdef USE_SETMODE		/* need to hack file mode? */
+#ifdef USE_SETMODE              /* need to hack file mode? */
     setmode(fileno(stdin), O_BINARY);
 #endif
-#ifdef USE_FDOPEN		/* need to re-open in binary mode? */
+#ifdef USE_FDOPEN               /* need to re-open in binary mode? */
     if ((infile = fdopen(fileno(stdin), READ_BINARY)) == NULL) {
       fprintf(stderr, "%s: can't open stdin\n", progname);
       exit(EXIT_FAILURE);
@@ -506,7 +514,7 @@
   /* Must have explicit output file name */
   if (argn != argc-2) {
     fprintf(stderr, "%s: must name one input and one output file\n",
-	    progname);
+            progname);
     usage();
   }
   if ((outfile = fopen(argv[argn+1], WRITE_BINARY)) == NULL) {
@@ -520,10 +528,10 @@
     usage();
   }
   /* default output file is stdout */
-#ifdef USE_SETMODE		/* need to hack file mode? */
+#ifdef USE_SETMODE              /* need to hack file mode? */
   setmode(fileno(stdout), O_BINARY);
 #endif
-#ifdef USE_FDOPEN		/* need to re-open in binary mode? */
+#ifdef USE_FDOPEN               /* need to re-open in binary mode? */
   if ((outfile = fdopen(fileno(stdout), WRITE_BINARY)) == NULL) {
     fprintf(stderr, "%s: can't open stdout\n", progname);
     exit(EXIT_FAILURE);
@@ -545,9 +553,9 @@
     src_file = (comment_file != NULL ? comment_file : stdin);
     while ((c = getc(src_file)) != EOF) {
       if (comment_length >= (unsigned int) MAX_COM_LENGTH) {
-	fprintf(stderr, "Comment text may not exceed %u bytes\n",
-		(unsigned int) MAX_COM_LENGTH);
-	exit(EXIT_FAILURE);
+        fprintf(stderr, "Comment text may not exceed %u bytes\n",
+                (unsigned int) MAX_COM_LENGTH);
+        exit(EXIT_FAILURE);
       }
       comment_arg[comment_length++] = (char) c;
     }
@@ -579,5 +587,5 @@
 
   /* All done. */
   exit(EXIT_SUCCESS);
-  return 0;			/* suppress no-return-value warnings */
+  return 0;                     /* suppress no-return-value warnings */
 }
diff --git a/wrppm.c b/wrppm.c
index 6c6d908..d3a613c 100644
--- a/wrppm.c
+++ b/wrppm.c
@@ -1,8 +1,11 @@
 /*
  * wrppm.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * Modified 2009 by Guido Vollbeding.
+ * It was modified by The libjpeg-turbo Project to include only code and
+ * information relevant to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to write output images in PPM/PGM format.
@@ -15,7 +18,7 @@
  * an ordinary stdio stream.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef PPM_SUPPORTED
 
@@ -40,12 +43,12 @@
 #define BYTESPERSAMPLE 1
 #define PPM_MAXVAL 255
 #else
-/* The word-per-sample format always puts the LSB first. */
-#define PUTPPMSAMPLE(ptr,v)			\
-	{ register int val_ = v;		\
-	  *ptr++ = (char) (val_ & 0xFF);	\
-	  *ptr++ = (char) ((val_ >> 8) & 0xFF);	\
-	}
+/* The word-per-sample format always puts the MSB first. */
+#define PUTPPMSAMPLE(ptr,v)                     \
+        { register int val_ = v;                \
+          *ptr++ = (char) ((val_ >> 8) & 0xFF); \
+          *ptr++ = (char) (val_ & 0xFF);        \
+        }
 #define BYTESPERSAMPLE 2
 #define PPM_MAXVAL ((1<<BITS_IN_JSAMPLE)-1)
 #endif
@@ -54,25 +57,20 @@
 
 /*
  * When JSAMPLE is the same size as char, we can just fwrite() the
- * decompressed data to the PPM or PGM file.  On PCs, in order to make this
- * work the output buffer must be allocated in near data space, because we are
- * assuming small-data memory model wherein fwrite() can't reach far memory.
- * If you need to process very wide images on a PC, you might have to compile
- * in large-memory model, or else replace fwrite() with a putc() loop ---
- * which will be much slower.
+ * decompressed data to the PPM or PGM file.
  */
 
 
 /* Private version of data destination object */
 
 typedef struct {
-  struct djpeg_dest_struct pub;	/* public fields */
+  struct djpeg_dest_struct pub; /* public fields */
 
   /* Usually these two pointers point to the same place: */
-  char *iobuffer;		/* fwrite's I/O buffer */
-  JSAMPROW pixrow;		/* decompressor output buffer */
-  size_t buffer_width;		/* width of I/O buffer */
-  JDIMENSION samples_per_row;	/* JSAMPLEs per output row */
+  char *iobuffer;               /* fwrite's I/O buffer */
+  JSAMPROW pixrow;              /* decompressor output buffer */
+  size_t buffer_width;          /* width of I/O buffer */
+  JDIMENSION samples_per_row;   /* JSAMPLEs per output row */
 } ppm_dest_struct;
 
 typedef ppm_dest_struct * ppm_dest_ptr;
@@ -88,7 +86,7 @@
 
 METHODDEF(void)
 put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		JDIMENSION rows_supplied)
+                JDIMENSION rows_supplied)
 {
   ppm_dest_ptr dest = (ppm_dest_ptr) dinfo;
 
@@ -103,7 +101,7 @@
 
 METHODDEF(void)
 copy_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		 JDIMENSION rows_supplied)
+                 JDIMENSION rows_supplied)
 {
   ppm_dest_ptr dest = (ppm_dest_ptr) dinfo;
   register char * bufferptr;
@@ -126,7 +124,7 @@
 
 METHODDEF(void)
 put_demapped_rgb (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		  JDIMENSION rows_supplied)
+                  JDIMENSION rows_supplied)
 {
   ppm_dest_ptr dest = (ppm_dest_ptr) dinfo;
   register char * bufferptr;
@@ -151,7 +149,7 @@
 
 METHODDEF(void)
 put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		   JDIMENSION rows_supplied)
+                   JDIMENSION rows_supplied)
 {
   ppm_dest_ptr dest = (ppm_dest_ptr) dinfo;
   register char * bufferptr;
@@ -182,14 +180,14 @@
   case JCS_GRAYSCALE:
     /* emit header for raw PGM format */
     fprintf(dest->pub.output_file, "P5\n%ld %ld\n%d\n",
-	    (long) cinfo->output_width, (long) cinfo->output_height,
-	    PPM_MAXVAL);
+            (long) cinfo->output_width, (long) cinfo->output_height,
+            PPM_MAXVAL);
     break;
   case JCS_RGB:
     /* emit header for raw PPM format */
     fprintf(dest->pub.output_file, "P6\n%ld %ld\n%d\n",
-	    (long) cinfo->output_width, (long) cinfo->output_height,
-	    PPM_MAXVAL);
+            (long) cinfo->output_width, (long) cinfo->output_height,
+            PPM_MAXVAL);
     break;
   default:
     ERREXIT(cinfo, JERR_PPM_COLORSPACE);
@@ -223,21 +221,21 @@
   /* Create module interface object, fill in method pointers */
   dest = (ppm_dest_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(ppm_dest_struct));
+                                  sizeof(ppm_dest_struct));
   dest->pub.start_output = start_output_ppm;
   dest->pub.finish_output = finish_output_ppm;
 
   /* Calculate output image dimensions so we can allocate space */
   jpeg_calc_output_dimensions(cinfo);
 
-  /* Create physical I/O buffer.  Note we make this near on a PC. */
+  /* Create physical I/O buffer */
   dest->samples_per_row = cinfo->output_width * cinfo->out_color_components;
-  dest->buffer_width = dest->samples_per_row * (BYTESPERSAMPLE * SIZEOF(char));
+  dest->buffer_width = dest->samples_per_row * (BYTESPERSAMPLE * sizeof(char));
   dest->iobuffer = (char *) (*cinfo->mem->alloc_small)
     ((j_common_ptr) cinfo, JPOOL_IMAGE, dest->buffer_width);
 
   if (cinfo->quantize_colors || BITS_IN_JSAMPLE != 8 ||
-      SIZEOF(JSAMPLE) != SIZEOF(char)) {
+      sizeof(JSAMPLE) != sizeof(char)) {
     /* When quantizing, we need an output buffer for colormap indexes
      * that's separate from the physical I/O buffer.  We also need a
      * separate buffer if pixel format translation must take place.
@@ -255,7 +253,6 @@
   } else {
     /* We will fwrite() directly from decompressor output buffer. */
     /* Synthesize a JSAMPARRAY pointer structure */
-    /* Cast here implies near->far pointer conversion on PCs */
     dest->pixrow = (JSAMPROW) dest->iobuffer;
     dest->pub.buffer = & dest->pixrow;
     dest->pub.buffer_height = 1;
diff --git a/wrrle.c b/wrrle.c
index a4e7337..6f35ad9 100644
--- a/wrrle.c
+++ b/wrrle.c
@@ -1,8 +1,10 @@
 /*
  * wrrle.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code and
+ * information relevant to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to write output images in RLE format.
@@ -16,7 +18,7 @@
  * with updates from Robert Hutchinson.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef RLE_SUPPORTED
 
@@ -47,15 +49,15 @@
  * though not all of the entries need be used.
  */
 
-#define CMAPBITS	8
-#define CMAPLENGTH	(1<<(CMAPBITS))
+#define CMAPBITS        8
+#define CMAPLENGTH      (1<<(CMAPBITS))
 
 typedef struct {
   struct djpeg_dest_struct pub; /* public fields */
 
-  jvirt_sarray_ptr image;	/* virtual array to store the output image */
-  rle_map *colormap;	 	/* RLE-style color map, or NULL if none */
-  rle_pixel **rle_row;		/* To pass rows to rle_putrow() */
+  jvirt_sarray_ptr image;       /* virtual array to store the output image */
+  rle_map *colormap;            /* RLE-style color map, or NULL if none */
+  rle_pixel **rle_row;          /* To pass rows to rle_putrow() */
 
 } rle_dest_struct;
 
@@ -63,8 +65,8 @@
 
 /* Forward declarations */
 METHODDEF(void) rle_put_pixel_rows
-    JPP((j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-	 JDIMENSION rows_supplied));
+        (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
+         JDIMENSION rows_supplied);
 
 
 /*
@@ -97,8 +99,8 @@
    */
 
   if (cinfo->output_width > 32767 || cinfo->output_height > 32767)
-    ERREXIT2(cinfo, JERR_RLE_DIMENSIONS, cinfo->output_width, 
-	     cinfo->output_height);
+    ERREXIT2(cinfo, JERR_RLE_DIMENSIONS, cinfo->output_width,
+             cinfo->output_height);
 
   if (cinfo->out_color_space != JCS_GRAYSCALE &&
       cinfo->out_color_space != JCS_RGB)
@@ -113,7 +115,7 @@
 
   if (cinfo->quantize_colors) {
     /* Allocate storage for RLE-style cmap, zero any extra entries */
-    cmapsize = cinfo->out_color_components * CMAPLENGTH * SIZEOF(rle_map);
+    cmapsize = cinfo->out_color_components * CMAPLENGTH * sizeof(rle_map);
     dest->colormap = (rle_map *) (*cinfo->mem->alloc_small)
       ((j_common_ptr) cinfo, JPOOL_IMAGE, cmapsize);
     MEMZERO(dest->colormap, cmapsize);
@@ -151,7 +153,7 @@
 
 METHODDEF(void)
 rle_put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		    JDIMENSION rows_supplied)
+                    JDIMENSION rows_supplied)
 {
   rle_dest_ptr dest = (rle_dest_ptr) dinfo;
 
@@ -172,7 +174,7 @@
 finish_output_rle (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo)
 {
   rle_dest_ptr dest = (rle_dest_ptr) dinfo;
-  rle_hdr header;		/* Output file information */
+  rle_hdr header;               /* Output file information */
   rle_pixel **rle_row, *red, *green, *blue;
   JSAMPROW output_row;
   char cmapcomment[80];
@@ -207,8 +209,7 @@
   rle_put_setup(&header);
 
   /* Now output the RLE data from our virtual array.
-   * We assume here that (a) rle_pixel is represented the same as JSAMPLE,
-   * and (b) we are not on a machine where FAR pointers differ from regular.
+   * We assume here that rle_pixel is represented the same as JSAMPLE.
    */
 
 #ifdef PROGRESS_REPORT
@@ -223,7 +224,7 @@
     for (row = cinfo->output_height-1; row >= 0; row--) {
       rle_row = (rle_pixel **) (*cinfo->mem->access_virt_sarray)
         ((j_common_ptr) cinfo, dest->image,
-	 (JDIMENSION) row, (JDIMENSION) 1, FALSE);
+         (JDIMENSION) row, (JDIMENSION) 1, FALSE);
       rle_putrow(rle_row, (int) cinfo->output_width, &header);
 #ifdef PROGRESS_REPORT
       if (progress != NULL) {
@@ -237,7 +238,7 @@
       rle_row = (rle_pixel **) dest->rle_row;
       output_row = * (*cinfo->mem->access_virt_sarray)
         ((j_common_ptr) cinfo, dest->image,
-	 (JDIMENSION) row, (JDIMENSION) 1, FALSE);
+         (JDIMENSION) row, (JDIMENSION) 1, FALSE);
       red = rle_row[0];
       green = rle_row[1];
       blue = rle_row[2];
@@ -281,7 +282,7 @@
   /* Create module interface object, fill in method pointers */
   dest = (rle_dest_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-                                  SIZEOF(rle_dest_struct));
+                                  sizeof(rle_dest_struct));
   dest->pub.start_output = start_output_rle;
   dest->pub.finish_output = finish_output_rle;
 
diff --git a/wrtarga.c b/wrtarga.c
index cf104d2..5fbfc53 100644
--- a/wrtarga.c
+++ b/wrtarga.c
@@ -1,8 +1,10 @@
 /*
  * wrtarga.c
  *
+ * This file was part of the Independent JPEG Group's software:
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
+ * It was modified by The libjpeg-turbo Project to include only code and
+ * information relevant to libjpeg-turbo.
  * For conditions of distribution and use, see the accompanying README file.
  *
  * This file contains routines to write output images in Targa format.
@@ -14,7 +16,7 @@
  * Based on code contributed by Lee Daniel Crocker.
  */
 
-#include "cdjpeg.h"		/* Common decls for cjpeg/djpeg applications */
+#include "cdjpeg.h"             /* Common decls for cjpeg/djpeg applications */
 
 #ifdef TARGA_SUPPORTED
 
@@ -28,23 +30,14 @@
   Sorry, this code only copes with 8-bit JSAMPLEs. /* deliberate syntax err */
 #endif
 
-/*
- * The output buffer needs to be writable by fwrite().  On PCs, we must
- * allocate the buffer in near data space, because we are assuming small-data
- * memory model, wherein fwrite() can't reach far memory.  If you need to
- * process very wide images on a PC, you might have to compile in large-memory
- * model, or else replace fwrite() with a putc() loop --- which will be much
- * slower.
- */
-
 
 /* Private version of data destination object */
 
 typedef struct {
-  struct djpeg_dest_struct pub;	/* public fields */
+  struct djpeg_dest_struct pub; /* public fields */
 
-  char *iobuffer;		/* physical I/O buffer */
-  JDIMENSION buffer_width;	/* width of one row */
+  char *iobuffer;               /* physical I/O buffer */
+  JDIMENSION buffer_width;      /* width of one row */
 } tga_dest_struct;
 
 typedef tga_dest_struct * tga_dest_ptr;
@@ -57,30 +50,30 @@
   char targaheader[18];
 
   /* Set unused fields of header to 0 */
-  MEMZERO(targaheader, SIZEOF(targaheader));
+  MEMZERO(targaheader, sizeof(targaheader));
 
   if (num_colors > 0) {
-    targaheader[1] = 1;		/* color map type 1 */
+    targaheader[1] = 1;         /* color map type 1 */
     targaheader[5] = (char) (num_colors & 0xFF);
     targaheader[6] = (char) (num_colors >> 8);
-    targaheader[7] = 24;	/* 24 bits per cmap entry */
+    targaheader[7] = 24;        /* 24 bits per cmap entry */
   }
 
   targaheader[12] = (char) (cinfo->output_width & 0xFF);
   targaheader[13] = (char) (cinfo->output_width >> 8);
   targaheader[14] = (char) (cinfo->output_height & 0xFF);
   targaheader[15] = (char) (cinfo->output_height >> 8);
-  targaheader[17] = 0x20;	/* Top-down, non-interlaced */
+  targaheader[17] = 0x20;       /* Top-down, non-interlaced */
 
   if (cinfo->out_color_space == JCS_GRAYSCALE) {
-    targaheader[2] = 3;		/* image type = uncompressed gray-scale */
-    targaheader[16] = 8;	/* bits per pixel */
-  } else {			/* must be RGB */
+    targaheader[2] = 3;         /* image type = uncompressed grayscale */
+    targaheader[16] = 8;        /* bits per pixel */
+  } else {                      /* must be RGB */
     if (num_colors > 0) {
-      targaheader[2] = 1;	/* image type = colormapped RGB */
+      targaheader[2] = 1;       /* image type = colormapped RGB */
       targaheader[16] = 8;
     } else {
-      targaheader[2] = 2;	/* image type = uncompressed RGB */
+      targaheader[2] = 2;       /* image type = uncompressed RGB */
       targaheader[16] = 24;
     }
   }
@@ -97,7 +90,7 @@
 
 METHODDEF(void)
 put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		JDIMENSION rows_supplied)
+                JDIMENSION rows_supplied)
 /* used for unquantized full-color output */
 {
   tga_dest_ptr dest = (tga_dest_ptr) dinfo;
@@ -118,7 +111,7 @@
 
 METHODDEF(void)
 put_gray_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-	       JDIMENSION rows_supplied)
+               JDIMENSION rows_supplied)
 /* used for grayscale OR quantized color output */
 {
   tga_dest_ptr dest = (tga_dest_ptr) dinfo;
@@ -142,7 +135,7 @@
 
 METHODDEF(void)
 put_demapped_gray (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
-		   JDIMENSION rows_supplied)
+                   JDIMENSION rows_supplied)
 {
   tga_dest_ptr dest = (tga_dest_ptr) dinfo;
   register JSAMPROW inptr;
@@ -183,14 +176,14 @@
       /* We only support 8-bit colormap indexes, so only 256 colors */
       num_colors = cinfo->actual_number_of_colors;
       if (num_colors > 256)
-	ERREXIT1(cinfo, JERR_TOO_MANY_COLORS, num_colors);
+        ERREXIT1(cinfo, JERR_TOO_MANY_COLORS, num_colors);
       write_header(cinfo, dinfo, num_colors);
       /* Write the colormap.  Note Targa uses BGR byte order */
       outfile = dest->pub.output_file;
       for (i = 0; i < num_colors; i++) {
-	putc(GETJSAMPLE(cinfo->colormap[2][i]), outfile);
-	putc(GETJSAMPLE(cinfo->colormap[1][i]), outfile);
-	putc(GETJSAMPLE(cinfo->colormap[0][i]), outfile);
+        putc(GETJSAMPLE(cinfo->colormap[2][i]), outfile);
+        putc(GETJSAMPLE(cinfo->colormap[1][i]), outfile);
+        putc(GETJSAMPLE(cinfo->colormap[0][i]), outfile);
       }
       dest->pub.put_pixel_rows = put_gray_rows;
     } else {
@@ -229,18 +222,18 @@
   /* Create module interface object, fill in method pointers */
   dest = (tga_dest_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				  SIZEOF(tga_dest_struct));
+                                  sizeof(tga_dest_struct));
   dest->pub.start_output = start_output_tga;
   dest->pub.finish_output = finish_output_tga;
 
   /* Calculate output image dimensions so we can allocate space */
   jpeg_calc_output_dimensions(cinfo);
 
-  /* Create I/O buffer.  Note we make this near on a PC. */
+  /* Create I/O buffer. */
   dest->buffer_width = cinfo->output_width * cinfo->output_components;
   dest->iobuffer = (char *)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				(size_t) (dest->buffer_width * SIZEOF(char)));
+                                (size_t) (dest->buffer_width * sizeof(char)));
 
   /* Create decompressor output buffer. */
   dest->pub.buffer = (*cinfo->mem->alloc_sarray)