Silence warning in CMake 2.8.12 and later
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1151 632fc199-4ca6-4c93-a231-07263d6284db
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d3c0972..d80933e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,7 +6,7 @@
cmake_policy(SET CMP0022 OLD)
project(libjpeg-turbo C)
-set(VERSION 1.3.1)
+set(VERSION 1.3.80)
if(MINGW OR CYGWIN)
execute_process(COMMAND "date" "+%Y%m%d" OUTPUT_VARIABLE BUILD)
@@ -316,12 +316,16 @@
if(WITH_JAVA)
add_test(TJUnitTest ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest)
add_test(TJUnitTest-yuv ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest -yuv)
+add_test(TJUnitTest-yuv-nopad ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest -yuv -noyuvpad)
add_test(TJUnitTest-bi ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest -bi)
add_test(TJUnitTest-bi-yuv ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest -bi -yuv)
+add_test(TJUnitTest-bi-yuv-nopad ${JAVA_RUNTIME} -cp java/${OBJDIR}turbojpeg.jar -Djava.library.path=${CMAKE_CURRENT_BINARY_DIR}/${OBJDIR} TJUnitTest -bi -yuv -noyuvpad)
endif()
add_test(tjunittest tjunittest)
add_test(tjunittest-alloc tjunittest -alloc)
add_test(tjunittest-yuv tjunittest -yuv)
+add_test(tjunittest-yuv-alloc tjunittest -yuv -alloc)
+add_test(tjunittest-yuv-nopad tjunittest -yuv -noyuvpad)
add_test(cjpeg-int sharedlib/cjpeg -dct int -outfile testoutint.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
add_test(cjpeg-int-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_INT} -DFILE=testoutint.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
add_test(cjpeg-fast sharedlib/cjpeg -dct fast -opt -outfile testoutfst.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
@@ -379,6 +383,8 @@
add_test(tjunittest-static tjunittest-static)
add_test(tjunittest-static-alloc tjunittest-static -alloc)
add_test(tjunittest-static-yuv tjunittest-static -yuv)
+add_test(tjunittest-static-yuv-alloc tjunittest-static -yuv -alloc)
+add_test(tjunittest-static-yuv-nopad tjunittest-static -yuv -noyuvpad)
add_test(cjpeg-static-int cjpeg-static -dct int -outfile testoutint.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
add_test(cjpeg-static-int-cmp ${CMAKE_COMMAND} -DMD5=${MD5_JPEG_INT} -DFILE=testoutint.jpg -P ${CMAKE_SOURCE_DIR}/cmakescripts/md5cmp.cmake)
add_test(cjpeg-static-fast cjpeg-static -dct fast -opt -outfile testoutfst.jpg ${CMAKE_SOURCE_DIR}/testimages/testorig.ppm)
diff --git a/ChangeLog.txt b/ChangeLog.txt
index df0041e..5c83016 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -1,3 +1,38 @@
+1.4 pre-beta
+============
+
+[1] The TurboJPEG API can now be used to generate YUV images with an arbitrary
+line padding (previously, it only supported 4-byte padding, which was
+compatible with X Video.) Also, the decompress-to-YUV function has been
+extended to support image scaling.
+
+[2] Added SIMD acceleration for DSPr2-capable MIPS platforms. This speeds up
+the compression of full-color JPEGs by 70-80% on such platforms and
+decompression by 25-35%.
+
+[3] Added support for 4:1:1 subsampling to the TurboJPEG API. This is mainly
+included for compatibility, since 4:1:1 is not fully accelerated in
+libjpeg-turbo and has no significant advantages relative to 4:2:0.
+
+[4] Added support for CMYK images to the TurboJPEG API. This feature allows
+CMYK source images to be compressed to YCCK JPEGs and YCCK or CMYK JPEGs to be
+decompressed to CMYK destination images. Conversion between CMYK and RGB
+images is not supported. Such conversion requires a color management system
+and is out of scope for a codec library.
+
+[5] The TurboJPEG API can now be used to compress JPEG images from YUV planar
+source images and to decode YUV planar images into RGB, grayscale, or extended
+RGB images.
+
+[6] If an application attempts to decompress a Huffman-coded JPEG image whose
+header does not contain Huffman tables, libjpeg-turbo will now insert the
+default Huffman tables. In order to save space, many motion JPEG video frames
+are encoded without the default Huffman tables, so these frames can now be
+successfully decompressed by libjpeg-turbo without additional work on the part
+of the application. An application can still override the Huffman tables, for
+instance to re-use tables from a previous frame of the same video.
+
+
1.3.1
=====
diff --git a/Makefile.am b/Makefile.am
index 79594bf..2a68a96 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -4,7 +4,7 @@
if WITH_TURBOJPEG
lib_LTLIBRARIES += libturbojpeg.la
-libturbojpeg_la_LDFLAGS = -version-info 0:0 -no-undefined
+libturbojpeg_la_LDFLAGS = -version-info 1:0:1 -no-undefined
include_HEADERS += turbojpeg.h
endif
@@ -201,11 +201,15 @@
$(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest
$(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -bi
$(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv
+ $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -noyuvpad
$(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -bi
+ $(JAVA) -cp java/turbojpeg.jar -Djava.library.path=.libs TJUnitTest -yuv -bi -noyuvpad
endif
./tjunittest
./tjunittest -alloc
./tjunittest -yuv
+ ./tjunittest -yuv -alloc
+ ./tjunittest -yuv -noyuvpad
endif
./cjpeg -dct int -outfile testoutint.jpg $(srcdir)/testimages/testorig.ppm
md5/md5cmp $(MD5_JPEG_INT) testoutint.jpg
@@ -314,6 +318,11 @@
rm -f *_440_*.ppm
rm -f *_440_*.jpg
rm -f *_440.yuv
+ rm -f *_411_*.bmp
+ rm -f *_411_*.png
+ rm -f *_411_*.ppm
+ rm -f *_411_*.jpg
+ rm -f *_411.yuv
tjtest:
diff --git a/acinclude.m4 b/acinclude.m4
index afb4359..40a9e52 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -180,3 +180,66 @@
$2
fi
])
+
+# AC_CHECK_COMPATIBLE_MIPSEL_ASSEMBLER_IFELSE
+# --------------------------
+# Test whether the assembler is suitable and supports MIPS instructions
+AC_DEFUN([AC_CHECK_COMPATIBLE_MIPSEL_ASSEMBLER_IFELSE],[
+ have_mips_dspr2=no
+ ac_save_CFLAGS="$CFLAGS"
+ CFLAGS="$CCASFLAGS -mdspr2"
+
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+
+ int main ()
+ {
+ int c = 0, a = 0, b = 0;
+ __asm__ __volatile__ (
+ "precr.qb.ph %[c], %[a], %[b] \n\t"
+ : [c] "=r" (c)
+ : [a] "r" (a), [b] "r" (b)
+ );
+ return c;
+ }
+ ]])], have_mips_dspr2=yes)
+ CFLAGS=$ac_save_CFLAGS
+
+ if test "x$have_mips_dspr2" = "xyes" ; then
+ $1
+ else
+ $2
+ fi
+])
+
+AC_DEFUN([AC_CHECK_COMPATIBLE_ARM64_ASSEMBLER_IFELSE],[
+ ac_good_gnu_arm_assembler=no
+ ac_save_CC="$CC"
+ ac_save_CFLAGS="$CFLAGS"
+ CFLAGS="$CCASFLAGS -x assembler-with-cpp"
+ CC="$CCAS"
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+ .text
+ movi v0.16b, #100]])], ac_good_gnu_arm_assembler=yes)
+
+ ac_use_gas_preprocessor=no
+ if test "x$ac_good_gnu_arm_assembler" = "xno" ; then
+ CC="gas-preprocessor.pl $CCAS"
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+ .text
+ movi v0.16b, #100]])], ac_use_gas_preprocessor=yes)
+ fi
+ CFLAGS="$ac_save_CFLAGS"
+ CC="$ac_save_CC"
+
+ if test "x$ac_use_gas_preprocessor" = "xyes" ; then
+ CCAS="gas-preprocessor.pl $CCAS"
+ AC_SUBST([CCAS])
+ ac_good_gnu_arm_assembler=yes
+ fi
+
+ if test "x$ac_good_gnu_arm_assembler" = "xyes" ; then
+ $1
+ else
+ $2
+ fi
+])
diff --git a/configure.ac b/configure.ac
index 34eea29..d0780c9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ([2.56])
-AC_INIT([libjpeg-turbo], [1.3.1])
+AC_INIT([libjpeg-turbo], [1.3.80])
BUILD=`date +%Y%m%d`
AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2])
@@ -435,6 +435,38 @@
fi
fi
;;
+ aarch64*)
+ AC_MSG_RESULT([yes (arm64)])
+ AC_MSG_CHECKING([if the assembler is GNU-compatible and can be used])
+ AC_CHECK_COMPATIBLE_ARM64_ASSEMBLER_IFELSE(
+ [AC_MSG_RESULT([yes])
+ simd_arch=aarch64],
+ [AC_MSG_RESULT([no])
+ with_simd=no])
+ if test "x${with_simd}" = "xno"; then
+ if test "x${require_simd}" = "xyes"; then
+ AC_MSG_ERROR([SIMD support can't be enabled.])
+ else
+ AC_MSG_WARN([SIMD support can't be enabled. Performance will suffer.])
+ fi
+ fi
+ ;;
+ mipsel*)
+ AC_MSG_RESULT([yes (mipsel)])
+ AC_MSG_CHECKING([if the assembler is GNU-compatible and can be used])
+ AC_CHECK_COMPATIBLE_MIPSEL_ASSEMBLER_IFELSE(
+ [AC_MSG_RESULT([yes])
+ simd_arch=mipsel],
+ [AC_MSG_RESULT([no])
+ with_simd=no])
+ if test "x${with_simd}" = "xno"; then
+ if test "x${require_simd}" = "xyes"; then
+ AC_MSG_ERROR([SIMD support can't be enabled.])
+ else
+ AC_MSG_WARN([SIMD support can't be enabled. Performance will suffer.])
+ fi
+ fi
+ ;;
*)
AC_MSG_RESULT([no ("$host_cpu")])
with_simd=no;
@@ -458,6 +490,8 @@
AM_CONDITIONAL([SIMD_I386], [test "x$simd_arch" = "xi386"])
AM_CONDITIONAL([SIMD_X86_64], [test "x$simd_arch" = "xx86_64"])
AM_CONDITIONAL([SIMD_ARM], [test "x$simd_arch" = "xarm"])
+AM_CONDITIONAL([SIMD_ARM_64], [test "x$simd_arch" = "xaarch64"])
+AM_CONDITIONAL([SIMD_MIPSEL], [test "x$simd_arch" = "xmipsel"])
AM_CONDITIONAL([X86_64], [test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xamd64"])
AM_CONDITIONAL([WITH_TURBOJPEG], [test "x$with_turbojpeg" != "xno"])
diff --git a/doc/html/annotated.html b/doc/html/annotated.html
index 1e3fbd0..f928720 100644
--- a/doc/html/annotated.html
+++ b/doc/html/annotated.html
@@ -23,7 +23,7 @@
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">TurboJPEG
-  <span id="projectnumber">1.2.1</span>
+  <span id="projectnumber">1.4</span>
</div>
</td>
</tr>
diff --git a/doc/html/classes.html b/doc/html/classes.html
index 4722b14..ad625f1 100644
--- a/doc/html/classes.html
+++ b/doc/html/classes.html
@@ -23,7 +23,7 @@
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">TurboJPEG
-  <span id="projectnumber">1.2.1</span>
+  <span id="projectnumber">1.4</span>
</div>
</td>
</tr>
diff --git a/doc/html/functions.html b/doc/html/functions.html
index 7af0d8e..55ccba0 100644
--- a/doc/html/functions.html
+++ b/doc/html/functions.html
@@ -23,7 +23,7 @@
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">TurboJPEG
-  <span id="projectnumber">1.2.1</span>
+  <span id="projectnumber">1.4</span>
</div>
</td>
</tr>
diff --git a/doc/html/functions_vars.html b/doc/html/functions_vars.html
index e6a6f72..cdc5560 100644
--- a/doc/html/functions_vars.html
+++ b/doc/html/functions_vars.html
@@ -23,7 +23,7 @@
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">TurboJPEG
-  <span id="projectnumber">1.2.1</span>
+  <span id="projectnumber">1.4</span>
</div>
</td>
</tr>
diff --git a/doc/html/group___turbo_j_p_e_g.html b/doc/html/group___turbo_j_p_e_g.html
index b0ab027..28e4926 100644
--- a/doc/html/group___turbo_j_p_e_g.html
+++ b/doc/html/group___turbo_j_p_e_g.html
@@ -23,7 +23,7 @@
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">TurboJPEG
-  <span id="projectnumber">1.2.1</span>
+  <span id="projectnumber">1.4</span>
</div>
</td>
</tr>
@@ -109,21 +109,12 @@
<tr class="memitem:ga7010a4402f54a45ba822ad8675a4655e"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e">TJ_NUMPF</a></td></tr>
<tr class="memdesc:ga7010a4402f54a45ba822ad8675a4655e"><td class="mdescLeft"> </td><td class="mdescRight">The number of pixel formats. <a href="#ga7010a4402f54a45ba822ad8675a4655e">More...</a><br/></td></tr>
<tr class="separator:ga7010a4402f54a45ba822ad8675a4655e"><td class="memSeparator" colspan="2"> </td></tr>
+<tr class="memitem:ga39f57a6fb02d9cf32e7b6890099b5a71"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga39f57a6fb02d9cf32e7b6890099b5a71">TJ_NUMCS</a></td></tr>
+<tr class="memdesc:ga39f57a6fb02d9cf32e7b6890099b5a71"><td class="mdescLeft"> </td><td class="mdescRight">The number of JPEG colorspaces. <a href="#ga39f57a6fb02d9cf32e7b6890099b5a71">More...</a><br/></td></tr>
+<tr class="separator:ga39f57a6fb02d9cf32e7b6890099b5a71"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:ga72ecf4ebe6eb702d3c6f5ca27455e1ec"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">TJFLAG_BOTTOMUP</a></td></tr>
<tr class="memdesc:ga72ecf4ebe6eb702d3c6f5ca27455e1ec"><td class="mdescLeft"> </td><td class="mdescRight">The uncompressed source/destination image is stored in bottom-up (Windows, OpenGL) order, not top-down (X11) order. <a href="#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">More...</a><br/></td></tr>
<tr class="separator:ga72ecf4ebe6eb702d3c6f5ca27455e1ec"><td class="memSeparator" colspan="2"> </td></tr>
-<tr class="memitem:ga4e872f11c82f241736fa8297920f24e5"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga4e872f11c82f241736fa8297920f24e5">TJFLAG_FORCEMMX</a></td></tr>
-<tr class="memdesc:ga4e872f11c82f241736fa8297920f24e5"><td class="mdescLeft"> </td><td class="mdescRight">Turn off CPU auto-detection and force TurboJPEG to use MMX code (if the underlying codec supports it.) <a href="#ga4e872f11c82f241736fa8297920f24e5">More...</a><br/></td></tr>
-<tr class="separator:ga4e872f11c82f241736fa8297920f24e5"><td class="memSeparator" colspan="2"> </td></tr>
-<tr class="memitem:gae17e63189e8cd730feed3efbd2454f38"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gae17e63189e8cd730feed3efbd2454f38">TJFLAG_FORCESSE</a></td></tr>
-<tr class="memdesc:gae17e63189e8cd730feed3efbd2454f38"><td class="mdescLeft"> </td><td class="mdescRight">Turn off CPU auto-detection and force TurboJPEG to use SSE code (if the underlying codec supports it.) <a href="#gae17e63189e8cd730feed3efbd2454f38">More...</a><br/></td></tr>
-<tr class="separator:gae17e63189e8cd730feed3efbd2454f38"><td class="memSeparator" colspan="2"> </td></tr>
-<tr class="memitem:ga8cf0bca96ea4d472563f4b0ebf8c48e7"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga8cf0bca96ea4d472563f4b0ebf8c48e7">TJFLAG_FORCESSE2</a></td></tr>
-<tr class="memdesc:ga8cf0bca96ea4d472563f4b0ebf8c48e7"><td class="mdescLeft"> </td><td class="mdescRight">Turn off CPU auto-detection and force TurboJPEG to use SSE2 code (if the underlying codec supports it.) <a href="#ga8cf0bca96ea4d472563f4b0ebf8c48e7">More...</a><br/></td></tr>
-<tr class="separator:ga8cf0bca96ea4d472563f4b0ebf8c48e7"><td class="memSeparator" colspan="2"> </td></tr>
-<tr class="memitem:gaf9d49066633404da4386d70820295dd2"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaf9d49066633404da4386d70820295dd2">TJFLAG_FORCESSE3</a></td></tr>
-<tr class="memdesc:gaf9d49066633404da4386d70820295dd2"><td class="mdescLeft"> </td><td class="mdescRight">Turn off CPU auto-detection and force TurboJPEG to use SSE3 code (if the underlying codec supports it.) <a href="#gaf9d49066633404da4386d70820295dd2">More...</a><br/></td></tr>
-<tr class="separator:gaf9d49066633404da4386d70820295dd2"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:ga4ee4506c81177a06f77e2504a22efd2d"><td class="memItemLeft" align="right" valign="top">#define </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga4ee4506c81177a06f77e2504a22efd2d">TJFLAG_FASTUPSAMPLE</a></td></tr>
<tr class="memdesc:ga4ee4506c81177a06f77e2504a22efd2d"><td class="mdescLeft"> </td><td class="mdescRight">When decompressing an image that was compressed using chrominance subsampling, use the fastest chrominance upsampling algorithm available in the underlying codec. <a href="#ga4ee4506c81177a06f77e2504a22efd2d">More...</a><br/></td></tr>
<tr class="separator:ga4ee4506c81177a06f77e2504a22efd2d"><td class="memSeparator" colspan="2"> </td></tr>
@@ -178,7 +169,8 @@
<a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737">TJSAMP_420</a>,
<a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a3f1c9504842ddc7a48d0f690754b6248">TJSAMP_GRAY</a>,
<br/>
-  <a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974">TJSAMP_440</a>
+  <a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974">TJSAMP_440</a>,
+<a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a28ec62575e5ea295c3fde3001dc628e2">TJSAMP_411</a>
<br/>
}</td></tr>
<tr class="memdesc:ga1d047060ea80bb9820d540bb928e9074"><td class="mdescLeft"> </td><td class="mdescRight">Chrominance subsampling options. <a href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">More...</a><br/></td></tr>
@@ -196,11 +188,23 @@
<br/>
  <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aac037ff1845cf9b74bb81a3659c2b9fb4">TJPF_BGRA</a>,
<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa1ba1a7f1631dbeaa49a0a85fc4a40081">TJPF_ABGR</a>,
-<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c">TJPF_ARGB</a>
+<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c">TJPF_ARGB</a>,
+<a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7f5100ec44c91994e243f1cf55553f8b">TJPF_CMYK</a>
<br/>
}</td></tr>
<tr class="memdesc:gac916144e26c3817ac514e64ae5d12e2a"><td class="mdescLeft"> </td><td class="mdescRight">Pixel formats. <a href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">More...</a><br/></td></tr>
<tr class="separator:gac916144e26c3817ac514e64ae5d12e2a"><td class="memSeparator" colspan="2"> </td></tr>
+<tr class="memitem:ga4f83ad3368e0e29d1957be0efa7c3720"><td class="memItemLeft" align="right" valign="top">enum  </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720">TJCS</a> { <br/>
+  <a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a677cb7ccb85c4038ac41964a2e09e555">TJCS_RGB</a>,
+<a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75">TJCS_YCbCr</a>,
+<a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720ab3e7d6a87f695e45b81c1b5262b5a50a">TJCS_GRAY</a>,
+<a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a6c8b636152ac8195b869587db315ee53">TJCS_CMYK</a>,
+<br/>
+  <a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e">TJCS_YCCK</a>
+<br/>
+ }</td></tr>
+<tr class="memdesc:ga4f83ad3368e0e29d1957be0efa7c3720"><td class="mdescLeft"> </td><td class="mdescRight">JPEG colorspaces. <a href="group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720">More...</a><br/></td></tr>
+<tr class="separator:ga4f83ad3368e0e29d1957be0efa7c3720"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:ga2de531af4e7e6c4f124908376b354866"><td class="memItemLeft" align="right" valign="top">enum  </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866">TJXOP</a> { <br/>
  <a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27">TJXOP_NONE</a>,
<a class="el" href="group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce">TJXOP_HFLIP</a>,
@@ -222,32 +226,38 @@
<tr class="memdesc:ga3d10c47fbe4a2489a2b30c931551d01a"><td class="mdescLeft"> </td><td class="mdescRight">Create a TurboJPEG compressor instance. <a href="#ga3d10c47fbe4a2489a2b30c931551d01a">More...</a><br/></td></tr>
<tr class="separator:ga3d10c47fbe4a2489a2b30c931551d01a"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:gaba62b7a98f960839b588579898495cf2"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2">tjCompress2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegSubsamp, int jpegQual, int flags)</td></tr>
-<tr class="memdesc:gaba62b7a98f960839b588579898495cf2"><td class="mdescLeft"> </td><td class="mdescRight">Compress an RGB or grayscale image into a JPEG image. <a href="#gaba62b7a98f960839b588579898495cf2">More...</a><br/></td></tr>
+<tr class="memdesc:gaba62b7a98f960839b588579898495cf2"><td class="mdescLeft"> </td><td class="mdescRight">Compress an RGB, grayscale, or CMYK image into a JPEG image. <a href="#gaba62b7a98f960839b588579898495cf2">More...</a><br/></td></tr>
<tr class="separator:gaba62b7a98f960839b588579898495cf2"><td class="memSeparator" colspan="2"> </td></tr>
+<tr class="memitem:ga0b931126c7a615ddc3bbd0cca6698d67"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0b931126c7a615ddc3bbd0cca6698d67">tjCompressFromYUV</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int width, int pad, int height, int subsamp, unsigned char **jpegBuf, unsigned long *jpegSize, int jpegQual, int flags)</td></tr>
+<tr class="memdesc:ga0b931126c7a615ddc3bbd0cca6698d67"><td class="mdescLeft"> </td><td class="mdescRight">Compress a YUV planar image into a JPEG image. <a href="#ga0b931126c7a615ddc3bbd0cca6698d67">More...</a><br/></td></tr>
+<tr class="separator:ga0b931126c7a615ddc3bbd0cca6698d67"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:gaccc5bca7f12fcdcc302e6e1c6d4b311b"><td class="memItemLeft" align="right" valign="top">DLLEXPORT unsigned long DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b">tjBufSize</a> (int width, int height, int jpegSubsamp)</td></tr>
<tr class="memdesc:gaccc5bca7f12fcdcc302e6e1c6d4b311b"><td class="mdescLeft"> </td><td class="mdescRight">The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters. <a href="#gaccc5bca7f12fcdcc302e6e1c6d4b311b">More...</a><br/></td></tr>
<tr class="separator:gaccc5bca7f12fcdcc302e6e1c6d4b311b"><td class="memSeparator" colspan="2"> </td></tr>
-<tr class="memitem:ga9d0cb06fd5052d21b6f2b382db8b219c"><td class="memItemLeft" align="right" valign="top">DLLEXPORT unsigned long DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga9d0cb06fd5052d21b6f2b382db8b219c">tjBufSizeYUV</a> (int width, int height, int subsamp)</td></tr>
-<tr class="memdesc:ga9d0cb06fd5052d21b6f2b382db8b219c"><td class="mdescLeft"> </td><td class="mdescRight">The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters. <a href="#ga9d0cb06fd5052d21b6f2b382db8b219c">More...</a><br/></td></tr>
-<tr class="separator:ga9d0cb06fd5052d21b6f2b382db8b219c"><td class="memSeparator" colspan="2"> </td></tr>
-<tr class="memitem:ga0fa4e7b1943687c6a0c0304529c55d35"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0fa4e7b1943687c6a0c0304529c55d35">tjEncodeYUV2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, int subsamp, int flags)</td></tr>
-<tr class="memdesc:ga0fa4e7b1943687c6a0c0304529c55d35"><td class="mdescLeft"> </td><td class="mdescRight">Encode an RGB or grayscale image into a YUV planar image. <a href="#ga0fa4e7b1943687c6a0c0304529c55d35">More...</a><br/></td></tr>
-<tr class="separator:ga0fa4e7b1943687c6a0c0304529c55d35"><td class="memSeparator" colspan="2"> </td></tr>
+<tr class="memitem:gaf451664a62c1f6c7cc5a6401f32908c9"><td class="memItemLeft" align="right" valign="top">DLLEXPORT unsigned long DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9">tjBufSizeYUV2</a> (int width, int pad, int height, int subsamp)</td></tr>
+<tr class="memdesc:gaf451664a62c1f6c7cc5a6401f32908c9"><td class="mdescLeft"> </td><td class="mdescRight">The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters. <a href="#gaf451664a62c1f6c7cc5a6401f32908c9">More...</a><br/></td></tr>
+<tr class="separator:gaf451664a62c1f6c7cc5a6401f32908c9"><td class="memSeparator" colspan="2"> </td></tr>
+<tr class="memitem:ga0a5ffbf7cb58a5b6a8201114fe889360"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga0a5ffbf7cb58a5b6a8201114fe889360">tjEncodeYUV3</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf, int pad, int subsamp, int flags)</td></tr>
+<tr class="memdesc:ga0a5ffbf7cb58a5b6a8201114fe889360"><td class="mdescLeft"> </td><td class="mdescRight">Encode an RGB or grayscale image into a YUV planar image. <a href="#ga0a5ffbf7cb58a5b6a8201114fe889360">More...</a><br/></td></tr>
+<tr class="separator:ga0a5ffbf7cb58a5b6a8201114fe889360"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:gae5408179d041e2a2f7199c8283cf649e"><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gae5408179d041e2a2f7199c8283cf649e">tjInitDecompress</a> (void)</td></tr>
<tr class="memdesc:gae5408179d041e2a2f7199c8283cf649e"><td class="mdescLeft"> </td><td class="mdescRight">Create a TurboJPEG decompressor instance. <a href="#gae5408179d041e2a2f7199c8283cf649e">More...</a><br/></td></tr>
<tr class="separator:gae5408179d041e2a2f7199c8283cf649e"><td class="memSeparator" colspan="2"> </td></tr>
-<tr class="memitem:gac5675fceb7997b385516cdffdb34e6aa"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gac5675fceb7997b385516cdffdb34e6aa">tjDecompressHeader2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, int *jpegSubsamp)</td></tr>
-<tr class="memdesc:gac5675fceb7997b385516cdffdb34e6aa"><td class="mdescLeft"> </td><td class="mdescRight">Retrieve information about a JPEG image without decompressing it. <a href="#gac5675fceb7997b385516cdffdb34e6aa">More...</a><br/></td></tr>
-<tr class="separator:gac5675fceb7997b385516cdffdb34e6aa"><td class="memSeparator" colspan="2"> </td></tr>
+<tr class="memitem:gacd0fac3af74b3511d39b4781b7103086"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gacd0fac3af74b3511d39b4781b7103086">tjDecompressHeader3</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height, int *jpegSubsamp, int *jpegColorspace)</td></tr>
+<tr class="memdesc:gacd0fac3af74b3511d39b4781b7103086"><td class="mdescLeft"> </td><td class="mdescRight">Retrieve information about a JPEG image without decompressing it. <a href="#gacd0fac3af74b3511d39b4781b7103086">More...</a><br/></td></tr>
+<tr class="separator:gacd0fac3af74b3511d39b4781b7103086"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:ga6449044b9af402999ccf52f401333be8"><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="structtjscalingfactor.html">tjscalingfactor</a> *DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8">tjGetScalingFactors</a> (int *numscalingfactors)</td></tr>
<tr class="memdesc:ga6449044b9af402999ccf52f401333be8"><td class="mdescLeft"> </td><td class="mdescRight">Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of TurboJPEG supports. <a href="#ga6449044b9af402999ccf52f401333be8">More...</a><br/></td></tr>
<tr class="separator:ga6449044b9af402999ccf52f401333be8"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:gada69cc6443d1bb493b40f1626259e5e9"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gada69cc6443d1bb493b40f1626259e5e9">tjDecompress2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)</td></tr>
-<tr class="memdesc:gada69cc6443d1bb493b40f1626259e5e9"><td class="mdescLeft"> </td><td class="mdescRight">Decompress a JPEG image to an RGB or grayscale image. <a href="#gada69cc6443d1bb493b40f1626259e5e9">More...</a><br/></td></tr>
+<tr class="memdesc:gada69cc6443d1bb493b40f1626259e5e9"><td class="mdescLeft"> </td><td class="mdescRight">Decompress a JPEG image to an RGB, grayscale, or CMYK image. <a href="#gada69cc6443d1bb493b40f1626259e5e9">More...</a><br/></td></tr>
<tr class="separator:gada69cc6443d1bb493b40f1626259e5e9"><td class="memSeparator" colspan="2"> </td></tr>
-<tr class="memitem:gad7810af095624a4016e72957a50f77d8"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#gad7810af095624a4016e72957a50f77d8">tjDecompressToYUV</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int flags)</td></tr>
-<tr class="memdesc:gad7810af095624a4016e72957a50f77d8"><td class="mdescLeft"> </td><td class="mdescRight">Decompress a JPEG image to a YUV planar image. <a href="#gad7810af095624a4016e72957a50f77d8">More...</a><br/></td></tr>
-<tr class="separator:gad7810af095624a4016e72957a50f77d8"><td class="memSeparator" colspan="2"> </td></tr>
+<tr class="memitem:ga7c08b340ad7f8e85d407bd9e81d44d07"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga7c08b340ad7f8e85d407bd9e81d44d07">tjDecompressToYUV2</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf, int width, int pad, int height, int flags)</td></tr>
+<tr class="memdesc:ga7c08b340ad7f8e85d407bd9e81d44d07"><td class="mdescLeft"> </td><td class="mdescRight">Decompress a JPEG image to a YUV planar image. <a href="#ga7c08b340ad7f8e85d407bd9e81d44d07">More...</a><br/></td></tr>
+<tr class="separator:ga7c08b340ad7f8e85d407bd9e81d44d07"><td class="memSeparator" colspan="2"> </td></tr>
+<tr class="memitem:ga132ae2c2cadcf64c8bb0f3bdf69da3ed"><td class="memItemLeft" align="right" valign="top">DLLEXPORT int DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga132ae2c2cadcf64c8bb0f3bdf69da3ed">tjDecodeYUV</a> (<a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> handle, unsigned char *srcBuf, int pad, int subsamp, unsigned char *dstBuf, int width, int pitch, int height, int pixelFormat, int flags)</td></tr>
+<tr class="memdesc:ga132ae2c2cadcf64c8bb0f3bdf69da3ed"><td class="mdescLeft"> </td><td class="mdescRight">Decode a YUV planar image into an RGB or grayscale image. <a href="#ga132ae2c2cadcf64c8bb0f3bdf69da3ed">More...</a><br/></td></tr>
+<tr class="separator:ga132ae2c2cadcf64c8bb0f3bdf69da3ed"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:ga3155b775bfbac9dbba869b95a0367902"><td class="memItemLeft" align="right" valign="top">DLLEXPORT <a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> DLLCALL </td><td class="memItemRight" valign="bottom"><a class="el" href="group___turbo_j_p_e_g.html#ga3155b775bfbac9dbba869b95a0367902">tjInitTransform</a> (void)</td></tr>
<tr class="memdesc:ga3155b775bfbac9dbba869b95a0367902"><td class="mdescLeft"> </td><td class="mdescRight">Create a new TurboJPEG transformer instance. <a href="#ga3155b775bfbac9dbba869b95a0367902">More...</a><br/></td></tr>
<tr class="separator:ga3155b775bfbac9dbba869b95a0367902"><td class="memSeparator" colspan="2"> </td></tr>
@@ -292,6 +302,20 @@
<p>TurboJPEG API. </p>
<p>This API provides an interface for generating, decoding, and transforming planar YUV and JPEG images in memory. </p>
<h2 class="groupheader">Macro Definition Documentation</h2>
+<a class="anchor" id="ga39f57a6fb02d9cf32e7b6890099b5a71"></a>
+<div class="memitem">
+<div class="memproto">
+ <table class="memname">
+ <tr>
+ <td class="memname">#define TJ_NUMCS</td>
+ </tr>
+ </table>
+</div><div class="memdoc">
+
+<p>The number of JPEG colorspaces. </p>
+
+</div>
+</div>
<a class="anchor" id="ga7010a4402f54a45ba822ad8675a4655e"></a>
<div class="memitem">
<div class="memproto">
@@ -393,62 +417,6 @@
</div>
</div>
-<a class="anchor" id="ga4e872f11c82f241736fa8297920f24e5"></a>
-<div class="memitem">
-<div class="memproto">
- <table class="memname">
- <tr>
- <td class="memname">#define TJFLAG_FORCEMMX</td>
- </tr>
- </table>
-</div><div class="memdoc">
-
-<p>Turn off CPU auto-detection and force TurboJPEG to use MMX code (if the underlying codec supports it.) </p>
-
-</div>
-</div>
-<a class="anchor" id="gae17e63189e8cd730feed3efbd2454f38"></a>
-<div class="memitem">
-<div class="memproto">
- <table class="memname">
- <tr>
- <td class="memname">#define TJFLAG_FORCESSE</td>
- </tr>
- </table>
-</div><div class="memdoc">
-
-<p>Turn off CPU auto-detection and force TurboJPEG to use SSE code (if the underlying codec supports it.) </p>
-
-</div>
-</div>
-<a class="anchor" id="ga8cf0bca96ea4d472563f4b0ebf8c48e7"></a>
-<div class="memitem">
-<div class="memproto">
- <table class="memname">
- <tr>
- <td class="memname">#define TJFLAG_FORCESSE2</td>
- </tr>
- </table>
-</div><div class="memdoc">
-
-<p>Turn off CPU auto-detection and force TurboJPEG to use SSE2 code (if the underlying codec supports it.) </p>
-
-</div>
-</div>
-<a class="anchor" id="gaf9d49066633404da4386d70820295dd2"></a>
-<div class="memitem">
-<div class="memproto">
- <table class="memname">
- <tr>
- <td class="memname">#define TJFLAG_FORCESSE3</td>
- </tr>
- </table>
-</div><div class="memdoc">
-
-<p>Turn off CPU auto-detection and force TurboJPEG to use SSE3 code (if the underlying codec supports it.) </p>
-
-</div>
-</div>
<a class="anchor" id="ga8808d403c68b62aaa58a4c1e58e98963"></a>
<div class="memitem">
<div class="memproto">
@@ -460,7 +428,7 @@
</div><div class="memdoc">
<p>Disable buffer (re)allocation. </p>
-<p>If passed to <a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2" title="Compress an RGB or grayscale image into a JPEG image.">tjCompress2()</a> or <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a>, this flag will cause those functions to generate an error if the JPEG image buffer is invalid or too small rather than attempting to allocate or reallocate that buffer. This reproduces the behavior of earlier versions of TurboJPEG. </p>
+<p>If passed to <a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2" title="Compress an RGB, grayscale, or CMYK image into a JPEG image.">tjCompress2()</a> or <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a>, this flag will cause those functions to generate an error if the JPEG image buffer is invalid or too small rather than attempting to allocate or reallocate that buffer. This reproduces the behavior of earlier versions of TurboJPEG. </p>
</div>
</div>
@@ -613,6 +581,42 @@
</div>
</div>
<h2 class="groupheader">Enumeration Type Documentation</h2>
+<a class="anchor" id="ga4f83ad3368e0e29d1957be0efa7c3720"></a>
+<div class="memitem">
+<div class="memproto">
+ <table class="memname">
+ <tr>
+ <td class="memname">enum <a class="el" href="group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720">TJCS</a></td>
+ </tr>
+ </table>
+</div><div class="memdoc">
+
+<p>JPEG colorspaces. </p>
+<table class="fieldtable">
+<tr><th colspan="2">Enumerator</th></tr><tr><td class="fieldname"><em><a class="anchor" id="gga4f83ad3368e0e29d1957be0efa7c3720a677cb7ccb85c4038ac41964a2e09e555"></a>TJCS_RGB</em> </td><td class="fielddoc">
+<p>RGB colorspace. </p>
+<p>When compressing the JPEG image, the R, G, and B components in the source image are reordered into image planes, but no colorspace conversion or subsampling is performed. RGB JPEG images can be decompressed to any of the extended RGB pixel formats or grayscale, but they cannot be decompressed to YUV images. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75"></a>TJCS_YCbCr</em> </td><td class="fielddoc">
+<p>YCbCr colorspace. </p>
+<p>YCbCr is not an absolute colorspace but rather a mathematical transformation of RGB designed solely for storage and transmission. YCbCr images must be converted to RGB before they can actually be displayed. In the YCbCr colorspace, the Y (luminance) component represents the black & white portion of the original image, and the Cb and Cr (chrominance) components represent the color portion of the original image. Originally, the analog equivalent of this transformation allowed the same signal to drive both black & white and color televisions, but JPEG images use YCbCr primarily because it allows the color data to be optionally subsampled for the purposes of reducing bandwidth or disk space. YCbCr is the most common JPEG colorspace, and YCbCr JPEG images can be compressed from and decompressed to any of the extended RGB pixel formats or grayscale, or they can be decompressed to YUV planar images. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga4f83ad3368e0e29d1957be0efa7c3720ab3e7d6a87f695e45b81c1b5262b5a50a"></a>TJCS_GRAY</em> </td><td class="fielddoc">
+<p>Grayscale colorspace. </p>
+<p>The JPEG image retains only the luminance data (Y component), and any color data from the source image is discarded. Grayscale JPEG images can be compressed from and decompressed to any of the extended RGB pixel formats or grayscale, or they can be decompressed to YUV planar images. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga4f83ad3368e0e29d1957be0efa7c3720a6c8b636152ac8195b869587db315ee53"></a>TJCS_CMYK</em> </td><td class="fielddoc">
+<p>CMYK colorspace. </p>
+<p>When compressing the JPEG image, the C, M, Y, and K components in the source image are reordered into image planes, but no colorspace conversion or subsampling is performed. CMYK JPEG images can only be decompressed to CMYK pixels. </p>
+</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e"></a>TJCS_YCCK</em> </td><td class="fielddoc">
+<p>YCCK colorspace. </p>
+<p>YCCK (AKA "YCbCrK") is not an absolute colorspace but rather a mathematical transformation of CMYK designed solely for storage and transmission. It is to CMYK as YCbCr is to RGB. CMYK pixels can be reversibly transformed into YCCK, and as with YCbCr, the chrominance components in the YCCK pixels can be subsampled without incurring major perceptual loss. YCCK JPEG images can only be compressed from and decompressed to CMYK pixels. </p>
+</td></tr>
+</table>
+
+</div>
+</div>
<a class="anchor" id="gac916144e26c3817ac514e64ae5d12e2a"></a>
<div class="memitem">
<div class="memproto">
@@ -669,6 +673,10 @@
<p>ARGB pixel format. </p>
<p>This is the same as <a class="el" href="group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84">TJPF_XRGB</a>, except that when decompressing, the X component is guaranteed to be 0xFF, which can be interpreted as an opaque alpha channel. </p>
</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="ggac916144e26c3817ac514e64ae5d12e2aa7f5100ec44c91994e243f1cf55553f8b"></a>TJPF_CMYK</em> </td><td class="fielddoc">
+<p>CMYK pixel format. </p>
+<p>Unlike RGB, which is an additive color model used primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive color model used primarily for printing. In the CMYK color model, the value of each color component typically corresponds to an amount of cyan, magenta, yellow, or black ink that is applied to a white background. In order to convert between CMYK and RGB, it is necessary to use a color management system (CMS.) A CMS will attempt to map colors within the printer's gamut to perceptually similar colors in the display's gamut and vice versa, but the mapping is typically not 1:1 or reversible, nor can it be defined with a simple formula. Thus, such a conversion is out of scope for a codec library. However, the TurboJPEG API allows for compressing CMYK pixels into a YCCK JPEG image (see <a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e" title="YCCK colorspace.">TJCS_YCCK</a>) and decompressing YCCK JPEG images into CMYK pixels. </p>
+</td></tr>
</table>
</div>
@@ -684,7 +692,7 @@
</div><div class="memdoc">
<p>Chrominance subsampling options. </p>
-<p>When an image is converted from the RGB to the YCbCr colorspace as part of the JPEG compression process, some of the Cb and Cr (chrominance) components can be discarded or averaged together to produce a smaller image with little perceptible loss of image clarity (the human eye is more sensitive to small changes in brightness than small changes in color.) This is called "chrominance subsampling". </p>
+<p>When pixels are converted from RGB to YCbCr (see <a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75" title="YCbCr colorspace.">TJCS_YCbCr</a>) or from CMYK to YCCK (see <a class="el" href="group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e" title="YCCK colorspace.">TJCS_YCCK</a>) as part of the JPEG compression process, some of the Cb and Cr (chrominance) components can be discarded or averaged together to produce a smaller image with little perceptible loss of image clarity (the human eye is more sensitive to small changes in brightness than to small changes in color.) This is called "chrominance subsampling". </p>
<p>NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the convention of the digital video community, the TurboJPEG API uses "YUV" to refer to an image format consisting of Y, Cb, and Cr image planes. </p>
<table class="fieldtable">
<tr><th colspan="2">Enumerator</th></tr><tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074afb8da4f44197837bdec0a4f593dacae3"></a>TJSAMP_444</em> </td><td class="fielddoc">
@@ -707,6 +715,10 @@
<p>4:4:0 chrominance subsampling. </p>
<p>The JPEG or YUV image will contain one chrominance component for every 1x2 block of pixels in the source image. Note that 4:4:0 subsampling is not fully accelerated in libjpeg-turbo. </p>
</td></tr>
+<tr><td class="fieldname"><em><a class="anchor" id="gga1d047060ea80bb9820d540bb928e9074a28ec62575e5ea295c3fde3001dc628e2"></a>TJSAMP_411</em> </td><td class="fielddoc">
+<p>4:1:1 chrominance subsampling. </p>
+<p>The JPEG or YUV image will contain one chrominance component for every 4x1 block of pixels in the source image. JPEG images compressed with 4:1:1 subsampling will be almost exactly the same size as those compressed with 4:2:0 subsampling, and in the aggregate, both subsampling methods produce approximately the same perceptual quality. However, 4:1:1 is better able to reproduce sharp horizontal features. Note that 4:1:1 subsampling is not fully accelerated in libjpeg-turbo. </p>
+</td></tr>
</table>
</div>
@@ -772,7 +784,7 @@
</div><div class="memdoc">
<p>Allocate an image buffer for use with TurboJPEG. </p>
-<p>You should always use this function to allocate the JPEG destination buffer(s) for <a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2" title="Compress an RGB or grayscale image into a JPEG image.">tjCompress2()</a> and <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> unless you are disabling automatic buffer (re)allocation (by setting <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a>.)</p>
+<p>You should always use this function to allocate the JPEG destination buffer(s) for <a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2" title="Compress an RGB, grayscale, or CMYK image into a JPEG image.">tjCompress2()</a> and <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> unless you are disabling automatic buffer (re)allocation (by setting <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a>.)</p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">bytes</td><td>the number of bytes to allocate</td></tr>
@@ -828,12 +840,12 @@
</div>
</div>
-<a class="anchor" id="ga9d0cb06fd5052d21b6f2b382db8b219c"></a>
+<a class="anchor" id="gaf451664a62c1f6c7cc5a6401f32908c9"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
- <td class="memname">DLLEXPORT unsigned long DLLCALL tjBufSizeYUV </td>
+ <td class="memname">DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2 </td>
<td>(</td>
<td class="paramtype">int </td>
<td class="paramname"><em>width</em>, </td>
@@ -842,6 +854,12 @@
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int </td>
+ <td class="paramname"><em>pad</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
<td class="paramname"><em>height</em>, </td>
</tr>
<tr>
@@ -862,6 +880,7 @@
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">width</td><td>width of the image (in pixels) </td></tr>
+ <tr><td class="paramname">pad</td><td>the width of each line in each plane of the image is padded to the nearest multiple of this number of bytes (must be a power of 2.) </td></tr>
<tr><td class="paramname">height</td><td>height of the image (in pixels) </td></tr>
<tr><td class="paramname">subsamp</td><td>level of chrominance subsampling in the image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.)</td></tr>
</table>
@@ -949,11 +968,11 @@
</table>
</div><div class="memdoc">
-<p>Compress an RGB or grayscale image into a JPEG image. </p>
+<p>Compress an RGB, grayscale, or CMYK image into a JPEG image. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">handle</td><td>a handle to a TurboJPEG compressor or transformer instance </td></tr>
- <tr><td class="paramname">srcBuf</td><td>pointer to an image buffer containing RGB or grayscale pixels to be compressed </td></tr>
+ <tr><td class="paramname">srcBuf</td><td>pointer to an image buffer containing RGB, grayscale, or CMYK pixels to be compressed </td></tr>
<tr><td class="paramname">width</td><td>width (in pixels) of the source image </td></tr>
<tr><td class="paramname">pitch</td><td>bytes per line of the source image. Normally, this should be <code>width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code> if the image is unpadded, or <code><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511" title="Pad the given width to the nearest 32-bit boundary.">TJPAD</a>(width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat])</code> if each line of the image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use this parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to <code>width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code>. </td></tr>
<tr><td class="paramname">height</td><td>height (in pixels) of the source image </td></tr>
@@ -975,6 +994,197 @@
</div>
</div>
+<a class="anchor" id="ga0b931126c7a615ddc3bbd0cca6698d67"></a>
+<div class="memitem">
+<div class="memproto">
+ <table class="memname">
+ <tr>
+ <td class="memname">DLLEXPORT int DLLCALL tjCompressFromYUV </td>
+ <td>(</td>
+ <td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> </td>
+ <td class="paramname"><em>handle</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">unsigned char * </td>
+ <td class="paramname"><em>srcBuf</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>width</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>pad</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>height</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>subsamp</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">unsigned char ** </td>
+ <td class="paramname"><em>jpegBuf</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">unsigned long * </td>
+ <td class="paramname"><em>jpegSize</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>jpegQual</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>flags</em> </td>
+ </tr>
+ <tr>
+ <td></td>
+ <td>)</td>
+ <td></td><td></td>
+ </tr>
+ </table>
+</div><div class="memdoc">
+
+<p>Compress a YUV planar image into a JPEG image. </p>
+<dl class="params"><dt>Parameters</dt><dd>
+ <table class="params">
+ <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG compressor or transformer instance </td></tr>
+ <tr><td class="paramname">srcBuf</td><td>pointer to an image buffer containing a YUV planar image to be compressed. The Y, U (Cb), and V (Cr) image planes should be stored sequentially in the buffer, and the size of each plane is determined by the specified width, height, padding, and level of chrominance subsampling. If the chrominance components are subsampled along the horizontal dimension, then the width of the luminance plane should be padded to the nearest multiple of 2 (same goes for the height of the luminance plane, if the chrominance components are subsampled along the vertical dimension.) This is irrespective of any additional padding specified in the <code>pad</code> parameter. </td></tr>
+ <tr><td class="paramname">width</td><td>width (in pixels) of the source image </td></tr>
+ <tr><td class="paramname">pad</td><td>the line padding used in the source image. For instance, if each line in each plane of the YUV image is padded to the nearest multiple of 4 bytes, then <code>pad</code> should be set to 4. </td></tr>
+ <tr><td class="paramname">height</td><td>height (in pixels) of the source image </td></tr>
+ <tr><td class="paramname">subsamp</td><td>the level of chrominance subsampling used in the source image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.) </td></tr>
+ <tr><td class="paramname">jpegBuf</td><td>address of a pointer to an image buffer that will receive the JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer to accommodate the size of the JPEG image. Thus, you can choose to:<ol type="1">
+<li>pre-allocate the JPEG buffer with an arbitrary size using <a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a> and let TurboJPEG grow the buffer as needed,</li>
+<li>set <code>*jpegBuf</code> to NULL to tell TurboJPEG to allocate the buffer for you, or</li>
+<li>pre-allocate the buffer to a "worst case" size determined by calling <a class="el" href="group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b" title="The maximum size of the buffer (in bytes) required to hold a JPEG image with the given parameters...">tjBufSize()</a>. This should ensure that the buffer never has to be re-allocated (setting <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a> guarantees this.)</li>
+</ol>
+If you choose option 1, <code>*jpegSize</code> should be set to the size of your pre-allocated buffer. In any case, unless you have set <a class="el" href="group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963" title="Disable buffer (re)allocation.">TJFLAG_NOREALLOC</a>, you should always check <code>*jpegBuf</code> upon return from this function, as it may have changed. </td></tr>
+ <tr><td class="paramname">jpegSize</td><td>pointer to an unsigned long variable that holds the size of the JPEG image buffer. If <code>*jpegBuf</code> points to a pre-allocated buffer, then <code>*jpegSize</code> should be set to the size of the buffer. Upon return, <code>*jpegSize</code> will contain the size of the JPEG image (in bytes.) </td></tr>
+ <tr><td class="paramname">jpegQual</td><td>the image quality of the generated JPEG image (1 = worst, 100 = best) </td></tr>
+ <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
+ </table>
+ </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
+<a class="anchor" id="ga132ae2c2cadcf64c8bb0f3bdf69da3ed"></a>
+<div class="memitem">
+<div class="memproto">
+ <table class="memname">
+ <tr>
+ <td class="memname">DLLEXPORT int DLLCALL tjDecodeYUV </td>
+ <td>(</td>
+ <td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> </td>
+ <td class="paramname"><em>handle</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">unsigned char * </td>
+ <td class="paramname"><em>srcBuf</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>pad</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>subsamp</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">unsigned char * </td>
+ <td class="paramname"><em>dstBuf</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>width</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>pitch</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>height</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>pixelFormat</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>flags</em> </td>
+ </tr>
+ <tr>
+ <td></td>
+ <td>)</td>
+ <td></td><td></td>
+ </tr>
+ </table>
+</div><div class="memdoc">
+
+<p>Decode a YUV planar image into an RGB or grayscale image. </p>
+<p>This function uses the accelerated color conversion routines in the underlying codec but does not execute any of the other steps in the JPEG decompression process. The Y, U (Cb), and V (Cr) image planes should be stored sequentially in the source buffer, and the size of each plane is determined by the width and height of the source image, as well as the specified padding and level of chrominance subsampling. If the chrominance components are subsampled along the horizontal dimension, then the width of the luminance plane should be padded to the nearest multiple of 2 in the input image (same goes for the height of the luminance plane, if the chrominance components are subsampled along the vertical dimension.) </p>
+<p>NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the convention of the digital video community, the TurboJPEG API uses "YUV" to refer to an image format consisting of Y, Cb, and Cr image planes.</p>
+<dl class="params"><dt>Parameters</dt><dd>
+ <table class="params">
+ <tr><td class="paramname">handle</td><td>a handle to a TurboJPEG decompressor or transformer instance </td></tr>
+ <tr><td class="paramname">srcBuf</td><td>pointer to an image buffer containing a YUV planar image to be decoded. The size of this buffer should match the value returned by <a class="el" href="group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9" title="The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters...">tjBufSizeYUV2()</a> for the given image width, height, padding, and level of chrominance subsampling. </td></tr>
+ <tr><td class="paramname">pad</td><td>Use this parameter to specify that the width of each line in each plane of the YUV source image is padded to the nearest multiple of this number of bytes (must be a power of 2.) </td></tr>
+ <tr><td class="paramname">subsamp</td><td>the level of chrominance subsampling used in the YUV source image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.) </td></tr>
+ <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the decoded image. This buffer should normally be <code>pitch * height</code> bytes in size, but the <code>dstBuf</code> pointer can also be used to decode into a specific region of a larger buffer. </td></tr>
+ <tr><td class="paramname">width</td><td>width (in pixels) of the source and destination images </td></tr>
+ <tr><td class="paramname">pitch</td><td>bytes per line of the destination image. Normally, this should be <code>width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code> if the destination image is unpadded, or <code><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511" title="Pad the given width to the nearest 32-bit boundary.">TJPAD</a>(width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat])</code> if each line of the destination image should be padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use the pitch parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to <code>width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code>. </td></tr>
+ <tr><td class="paramname">height</td><td>height (in pixels) of the source and destination images </td></tr>
+ <tr><td class="paramname">pixelFormat</td><td>pixel format of the destination image (see <a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">Pixel formats</a>.) </td></tr>
+ <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
+ </table>
+ </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
+
+</div>
+</div>
<a class="anchor" id="gada69cc6443d1bb493b40f1626259e5e9"></a>
<div class="memitem">
<div class="memproto">
@@ -1041,7 +1251,7 @@
</table>
</div><div class="memdoc">
-<p>Decompress a JPEG image to an RGB or grayscale image. </p>
+<p>Decompress a JPEG image to an RGB, grayscale, or CMYK image. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">handle</td><td>a handle to a TurboJPEG decompressor or transformer instance </td></tr>
@@ -1049,26 +1259,23 @@
<tr><td class="paramname">jpegSize</td><td>size of the JPEG image (in bytes) </td></tr>
<tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the decompressed image. This buffer should normally be <code>pitch * scaledHeight</code> bytes in size, where <code>scaledHeight</code> can be determined by calling <a class="el" href="group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df" title="Compute the scaled value of dimension using the given scaling factor.">TJSCALED()</a> with the JPEG image height and one of the scaling factors returned by <a class="el" href="group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8" title="Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of Tur...">tjGetScalingFactors()</a>. The <code>dstBuf</code> pointer may also be used to decompress into a specific region of a larger buffer. </td></tr>
<tr><td class="paramname">width</td><td>desired width (in pixels) of the destination image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If <code>width</code> is set to 0, then only the height will be considered when determining the scaled image size. </td></tr>
- <tr><td class="paramname">pitch</td><td>bytes per line of the destination image. Normally, this is <code>scaledWidth * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code> if the decompressed image is unpadded, else <code><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511" title="Pad the given width to the nearest 32-bit boundary.">TJPAD</a>(scaledWidth * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat])</code> if each line of the decompressed image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. (NOTE: <code>scaledWidth</code> can be determined by calling <a class="el" href="group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df" title="Compute the scaled value of dimension using the given scaling factor.">TJSCALED()</a> with the JPEG image width and one of the scaling factors returned by <a class="el" href="group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8" title="Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of Tur...">tjGetScalingFactors()</a>.) You can also be clever and use the pitch parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to <code>scaledWidth<ul>
-<li><a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]. </li>
-</ul>
-</code></td></tr>
- <tr><td class="paramname">height</td><td><code>desired height (in pixels) of the destination image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If <code>height</code> is set to 0, then only the width will be considered when determining the scaled image size. </code></td></tr>
- <tr><td class="paramname">pixelFormat</td><td><code>pixel format of the destination image (see <a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">Pixel formats</a>.) </code></td></tr>
- <tr><td class="paramname">flags</td><td><code>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</code></td></tr>
+ <tr><td class="paramname">pitch</td><td>bytes per line of the destination image. Normally, this is <code>scaledWidth * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code> if the decompressed image is unpadded, else <code><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511" title="Pad the given width to the nearest 32-bit boundary.">TJPAD</a>(scaledWidth * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat])</code> if each line of the decompressed image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. (NOTE: <code>scaledWidth</code> can be determined by calling <a class="el" href="group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df" title="Compute the scaled value of dimension using the given scaling factor.">TJSCALED()</a> with the JPEG image width and one of the scaling factors returned by <a class="el" href="group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8" title="Returns a list of fractional scaling factors that the JPEG decompressor in this implementation of Tur...">tjGetScalingFactors()</a>.) You can also be clever and use the pitch parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to <code>scaledWidth * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code>. </td></tr>
+ <tr><td class="paramname">height</td><td>desired height (in pixels) of the destination image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If <code>height</code> is set to 0, then only the width will be considered when determining the scaled image size. </td></tr>
+ <tr><td class="paramname">pixelFormat</td><td>pixel format of the destination image (see <a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">Pixel formats</a>.) </td></tr>
+ <tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
</table>
</dd>
</dl>
-<dl class="section return"><dt>Returns</dt><dd><code> 0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </code></dd></dl>
+<dl class="section return"><dt>Returns</dt><dd>0 if successful, or -1 if an error occurred (see <a class="el" href="group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf" title="Returns a descriptive error message explaining why the last command failed.">tjGetErrorStr()</a>.) </dd></dl>
</div>
</div>
-<a class="anchor" id="gac5675fceb7997b385516cdffdb34e6aa"></a>
+<a class="anchor" id="gacd0fac3af74b3511d39b4781b7103086"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
- <td class="memname">DLLEXPORT int DLLCALL tjDecompressHeader2 </td>
+ <td class="memname">DLLEXPORT int DLLCALL tjDecompressHeader3 </td>
<td>(</td>
<td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> </td>
<td class="paramname"><em>handle</em>, </td>
@@ -1101,7 +1308,13 @@
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int * </td>
- <td class="paramname"><em>jpegSubsamp</em> </td>
+ <td class="paramname"><em>jpegSubsamp</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int * </td>
+ <td class="paramname"><em>jpegColorspace</em> </td>
</tr>
<tr>
<td></td>
@@ -1119,7 +1332,8 @@
<tr><td class="paramname">jpegSize</td><td>size of the JPEG image (in bytes) </td></tr>
<tr><td class="paramname">width</td><td>pointer to an integer variable that will receive the width (in pixels) of the JPEG image </td></tr>
<tr><td class="paramname">height</td><td>pointer to an integer variable that will receive the height (in pixels) of the JPEG image </td></tr>
- <tr><td class="paramname">jpegSubsamp</td><td>pointer to an integer variable that will receive the level of chrominance subsampling used when compressing the JPEG image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.)</td></tr>
+ <tr><td class="paramname">jpegSubsamp</td><td>pointer to an integer variable that will receive the level of chrominance subsampling used when compressing the JPEG image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.) </td></tr>
+ <tr><td class="paramname">jpegColorspace</td><td>pointer to an integer variable that will receive one of the JPEG colorspace constants, indicating the colorspace of the JPEG image (see <a class="el" href="group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720">JPEG colorspaces</a>.)</td></tr>
</table>
</dd>
</dl>
@@ -1127,12 +1341,12 @@
</div>
</div>
-<a class="anchor" id="gad7810af095624a4016e72957a50f77d8"></a>
+<a class="anchor" id="ga7c08b340ad7f8e85d407bd9e81d44d07"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
- <td class="memname">DLLEXPORT int DLLCALL tjDecompressToYUV </td>
+ <td class="memname">DLLEXPORT int DLLCALL tjDecompressToYUV2 </td>
<td>(</td>
<td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> </td>
<td class="paramname"><em>handle</em>, </td>
@@ -1159,6 +1373,24 @@
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int </td>
+ <td class="paramname"><em>width</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>pad</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
+ <td class="paramname"><em>height</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
<td class="paramname"><em>flags</em> </td>
</tr>
<tr>
@@ -1170,14 +1402,17 @@
</div><div class="memdoc">
<p>Decompress a JPEG image to a YUV planar image. </p>
-<p>This function performs JPEG decompression but leaves out the color conversion step, so a planar YUV image is generated instead of an RGB image. The padding of the planes in this image is the same as in the images generated by <a class="el" href="group___turbo_j_p_e_g.html#ga0fa4e7b1943687c6a0c0304529c55d35" title="Encode an RGB or grayscale image into a YUV planar image.">tjEncodeYUV2()</a>. Note that, if the width or height of the image is not an even multiple of the MCU block size (see <a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c" title="MCU block width (in pixels) for a given level of chrominance subsampling.">tjMCUWidth</a> and <a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf" title="MCU block height (in pixels) for a given level of chrominance subsampling.">tjMCUHeight</a>), then an intermediate buffer copy will be performed within TurboJPEG. </p>
+<p>This function performs JPEG decompression but leaves out the color conversion step, so a planar YUV image is generated instead of an RGB image. The structure of the planes in this image is the same as in the images generated by <a class="el" href="group___turbo_j_p_e_g.html#ga0a5ffbf7cb58a5b6a8201114fe889360" title="Encode an RGB or grayscale image into a YUV planar image.">tjEncodeYUV3()</a>. Note that, if the width or height of the JPEG image is not an even multiple of the MCU block size (see <a class="el" href="group___turbo_j_p_e_g.html#ga9e61e7cd47a15a173283ba94e781308c" title="MCU block width (in pixels) for a given level of chrominance subsampling.">tjMCUWidth</a> and <a class="el" href="group___turbo_j_p_e_g.html#gabd247bb9fecb393eca57366feb8327bf" title="MCU block height (in pixels) for a given level of chrominance subsampling.">tjMCUHeight</a>), then an intermediate buffer copy will be performed within TurboJPEG. </p>
<p>NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the convention of the digital video community, the TurboJPEG API uses "YUV" to refer to an image format consisting of Y, Cb, and Cr image planes.</p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">handle</td><td>a handle to a TurboJPEG decompressor or transformer instance </td></tr>
<tr><td class="paramname">jpegBuf</td><td>pointer to a buffer containing the JPEG image to decompress </td></tr>
<tr><td class="paramname">jpegSize</td><td>size of the JPEG image (in bytes) </td></tr>
- <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the YUV image. Use <a class="el" href="group___turbo_j_p_e_g.html#ga9d0cb06fd5052d21b6f2b382db8b219c" title="The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters...">tjBufSizeYUV()</a> to determine the appropriate size for this buffer based on the image width, height, and level of subsampling. </td></tr>
+ <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the YUV image. Use <a class="el" href="group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9" title="The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters...">tjBufSizeYUV2()</a> to determine the appropriate size for this buffer based on the image width, height, padding, and level of subsampling. </td></tr>
+ <tr><td class="paramname">width</td><td>desired width (in pixels) of the YUV image. If this is different than the width of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired width. If <code>width</code> is set to 0, then only the height will be considered when determining the scaled image size. </td></tr>
+ <tr><td class="paramname">pad</td><td>the width of each line in each plane of the YUV image will be padded to the nearest multiple of this number of bytes (must be a power of 2.) To generate images suitable for X Video, <code>pad</code> should be set to 4. </td></tr>
+ <tr><td class="paramname">height</td><td>desired height (in pixels) of the YUV image. If this is different than the height of the JPEG image being decompressed, then TurboJPEG will use scaling in the JPEG decompressor to generate the largest possible image that will fit within the desired height. If <code>height</code> is set to 0, then only the width will be considered when determining the scaled image size. </td></tr>
<tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
</table>
</dd>
@@ -1211,12 +1446,12 @@
</div>
</div>
-<a class="anchor" id="ga0fa4e7b1943687c6a0c0304529c55d35"></a>
+<a class="anchor" id="ga0a5ffbf7cb58a5b6a8201114fe889360"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
- <td class="memname">DLLEXPORT int DLLCALL tjEncodeYUV2 </td>
+ <td class="memname">DLLEXPORT int DLLCALL tjEncodeYUV3 </td>
<td>(</td>
<td class="paramtype"><a class="el" href="group___turbo_j_p_e_g.html#ga758d2634ecb4949de7815cba621f5763">tjhandle</a> </td>
<td class="paramname"><em>handle</em>, </td>
@@ -1261,6 +1496,12 @@
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int </td>
+ <td class="paramname"><em>pad</em>, </td>
+ </tr>
+ <tr>
+ <td class="paramkey"></td>
+ <td></td>
+ <td class="paramtype">int </td>
<td class="paramname"><em>subsamp</em>, </td>
</tr>
<tr>
@@ -1278,7 +1519,7 @@
</div><div class="memdoc">
<p>Encode an RGB or grayscale image into a YUV planar image. </p>
-<p>This function uses the accelerated color conversion routines in TurboJPEG's underlying codec to produce a planar YUV image that is suitable for X Video. Specifically, if the chrominance components are subsampled along the horizontal dimension, then the width of the luminance plane is padded to the nearest multiple of 2 in the output image (same goes for the height of the luminance plane, if the chrominance components are subsampled along the vertical dimension.) Also, each line of each plane in the output image is padded to 4 bytes. Although this will work with any subsampling option, it is really only useful in combination with TJ_420, which produces an image compatible with the I420 (AKA "YUV420P") format. </p>
+<p>This function uses the accelerated color conversion routines in the underlying codec but does not execute any of the other steps in the JPEG compression process. The Y, U (Cb), and V (Cr) image planes are stored sequentially into the destination buffer, and the size of each plane is determined by the width and height of the source image, as well as the specified padding and level of chrominance subsampling. If the chrominance components are subsampled along the horizontal dimension, then the width of the luminance plane is padded to the nearest multiple of 2 in the output image (same goes for the height of the luminance plane, if the chrominance components are subsampled along the vertical dimension.) </p>
<p>NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the convention of the digital video community, the TurboJPEG API uses "YUV" to refer to an image format consisting of Y, Cb, and Cr image planes.</p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
@@ -1288,8 +1529,9 @@
<tr><td class="paramname">pitch</td><td>bytes per line of the source image. Normally, this should be <code>width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code> if the image is unpadded, or <code><a class="el" href="group___turbo_j_p_e_g.html#ga0aba955473315e405295d978f0c16511" title="Pad the given width to the nearest 32-bit boundary.">TJPAD</a>(width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat])</code> if each line of the image is padded to the nearest 32-bit boundary, as is the case for Windows bitmaps. You can also be clever and use this parameter to skip lines, etc. Setting this parameter to 0 is the equivalent of setting it to <code>width * <a class="el" href="group___turbo_j_p_e_g.html#gad77cf8fe5b2bfd3cb3f53098146abb4c" title="Pixel size (in bytes) for a given pixel format.">tjPixelSize</a>[pixelFormat]</code>. </td></tr>
<tr><td class="paramname">height</td><td>height (in pixels) of the source image </td></tr>
<tr><td class="paramname">pixelFormat</td><td>pixel format of the source image (see <a class="el" href="group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a">Pixel formats</a>.) </td></tr>
- <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the YUV image. Use <a class="el" href="group___turbo_j_p_e_g.html#ga9d0cb06fd5052d21b6f2b382db8b219c" title="The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters...">tjBufSizeYUV()</a> to determine the appropriate size for this buffer based on the image width, height, and level of chrominance subsampling. </td></tr>
- <tr><td class="paramname">subsamp</td><td>the level of chrominance subsampling to be used when generating the YUV image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.) </td></tr>
+ <tr><td class="paramname">dstBuf</td><td>pointer to an image buffer that will receive the YUV image. Use <a class="el" href="group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9" title="The size of the buffer (in bytes) required to hold a YUV planar image with the given parameters...">tjBufSizeYUV2()</a> to determine the appropriate size for this buffer based on the image width, height, padding, and level of chrominance subsampling. </td></tr>
+ <tr><td class="paramname">pad</td><td>the width of each line in each plane of the YUV image will be padded to the nearest multiple of this number of bytes (must be a power of 2.) To generate images suitable for X Video, <code>pad</code> should be set to 4. </td></tr>
+ <tr><td class="paramname">subsamp</td><td>the level of chrominance subsampling to be used when generating the YUV image (see <a class="el" href="group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074">Chrominance subsampling options</a>.) To generate images suitable for X Video, <code>subsamp</code> should be set to <a class="el" href="group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737">TJSAMP_420</a>. This produces an image compatible with the I420 (AKA "YUV420P") format. </td></tr>
<tr><td class="paramname">flags</td><td>the bitwise OR of one or more of the <a class="el" href="group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec">flags</a>.</td></tr>
</table>
</dd>
@@ -1313,7 +1555,7 @@
</div><div class="memdoc">
<p>Free an image buffer previously allocated by TurboJPEG. </p>
-<p>You should always use this function to free JPEG destination buffer(s) that were automatically (re)allocated by <a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2" title="Compress an RGB or grayscale image into a JPEG image.">tjCompress2()</a> or <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> or that were manually allocated using <a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a>.</p>
+<p>You should always use this function to free JPEG destination buffer(s) that were automatically (re)allocated by <a class="el" href="group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2" title="Compress an RGB, grayscale, or CMYK image into a JPEG image.">tjCompress2()</a> or <a class="el" href="group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616" title="Losslessly transform a JPEG image into another JPEG image.">tjTransform()</a> or that were manually allocated using <a class="el" href="group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff" title="Allocate an image buffer for use with TurboJPEG.">tjAlloc()</a>.</p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">buffer</td><td>address of the buffer to free</td></tr>
@@ -1580,7 +1822,8 @@
<li>8x8 for no subsampling or grayscale</li>
<li>16x8 for 4:2:2</li>
<li>8x16 for 4:4:0</li>
-<li>16x16 for 4:2:0 </li>
+<li>16x16 for 4:2:0</li>
+<li>32x8 for 4:1:1 </li>
</ul>
</div>
@@ -1609,7 +1852,8 @@
<li>8x8 for no subsampling or grayscale</li>
<li>16x8 for 4:2:2</li>
<li>8x16 for 4:4:0</li>
-<li>16x16 for 4:2:0 </li>
+<li>16x16 for 4:2:0</li>
+<li>32x8 for 4:1:1 </li>
</ul>
</div>
diff --git a/doc/html/index.html b/doc/html/index.html
index 72daeb8..139b84c 100644
--- a/doc/html/index.html
+++ b/doc/html/index.html
@@ -23,7 +23,7 @@
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">TurboJPEG
-  <span id="projectnumber">1.2.1</span>
+  <span id="projectnumber">1.4</span>
</div>
</td>
</tr>
diff --git a/doc/html/modules.html b/doc/html/modules.html
index 25b7ec6..6b769e4 100644
--- a/doc/html/modules.html
+++ b/doc/html/modules.html
@@ -23,7 +23,7 @@
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">TurboJPEG
-  <span id="projectnumber">1.2.1</span>
+  <span id="projectnumber">1.4</span>
</div>
</td>
</tr>
diff --git a/doc/html/search/all_74.js b/doc/html/search/all_74.js
index a1927ba..435cec4 100644
--- a/doc/html/search/all_74.js
+++ b/doc/html/search/all_74.js
@@ -1,26 +1,31 @@
var searchData=
[
+ ['tj_5fnumcs',['TJ_NUMCS',['../group___turbo_j_p_e_g.html#ga39f57a6fb02d9cf32e7b6890099b5a71',1,'turbojpeg.h']]],
['tj_5fnumpf',['TJ_NUMPF',['../group___turbo_j_p_e_g.html#ga7010a4402f54a45ba822ad8675a4655e',1,'turbojpeg.h']]],
['tj_5fnumsamp',['TJ_NUMSAMP',['../group___turbo_j_p_e_g.html#ga5ef3d169162ce77ce348e292a0b7477c',1,'turbojpeg.h']]],
['tj_5fnumxop',['TJ_NUMXOP',['../group___turbo_j_p_e_g.html#ga0f6dbd18adf38b7d46ac547f0f4d562c',1,'turbojpeg.h']]],
['tjalloc',['tjAlloc',['../group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff',1,'turbojpeg.h']]],
['tjblueoffset',['tjBlueOffset',['../group___turbo_j_p_e_g.html#ga84e2e35d3f08025f976ec1ec53693dea',1,'turbojpeg.h']]],
['tjbufsize',['tjBufSize',['../group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b',1,'turbojpeg.h']]],
- ['tjbufsizeyuv',['tjBufSizeYUV',['../group___turbo_j_p_e_g.html#ga9d0cb06fd5052d21b6f2b382db8b219c',1,'turbojpeg.h']]],
+ ['tjbufsizeyuv2',['tjBufSizeYUV2',['../group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9',1,'turbojpeg.h']]],
['tjcompress2',['tjCompress2',['../group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2',1,'turbojpeg.h']]],
+ ['tjcompressfromyuv',['tjCompressFromYUV',['../group___turbo_j_p_e_g.html#ga0b931126c7a615ddc3bbd0cca6698d67',1,'turbojpeg.h']]],
+ ['tjcs',['TJCS',['../group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720',1,'turbojpeg.h']]],
+ ['tjcs_5fcmyk',['TJCS_CMYK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a6c8b636152ac8195b869587db315ee53',1,'turbojpeg.h']]],
+ ['tjcs_5fgray',['TJCS_GRAY',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720ab3e7d6a87f695e45b81c1b5262b5a50a',1,'turbojpeg.h']]],
+ ['tjcs_5frgb',['TJCS_RGB',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a677cb7ccb85c4038ac41964a2e09e555',1,'turbojpeg.h']]],
+ ['tjcs_5fycbcr',['TJCS_YCbCr',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75',1,'turbojpeg.h']]],
+ ['tjcs_5fycck',['TJCS_YCCK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e',1,'turbojpeg.h']]],
+ ['tjdecodeyuv',['tjDecodeYUV',['../group___turbo_j_p_e_g.html#ga132ae2c2cadcf64c8bb0f3bdf69da3ed',1,'turbojpeg.h']]],
['tjdecompress2',['tjDecompress2',['../group___turbo_j_p_e_g.html#gada69cc6443d1bb493b40f1626259e5e9',1,'turbojpeg.h']]],
- ['tjdecompressheader2',['tjDecompressHeader2',['../group___turbo_j_p_e_g.html#gac5675fceb7997b385516cdffdb34e6aa',1,'turbojpeg.h']]],
- ['tjdecompresstoyuv',['tjDecompressToYUV',['../group___turbo_j_p_e_g.html#gad7810af095624a4016e72957a50f77d8',1,'turbojpeg.h']]],
+ ['tjdecompressheader3',['tjDecompressHeader3',['../group___turbo_j_p_e_g.html#gacd0fac3af74b3511d39b4781b7103086',1,'turbojpeg.h']]],
+ ['tjdecompresstoyuv2',['tjDecompressToYUV2',['../group___turbo_j_p_e_g.html#ga7c08b340ad7f8e85d407bd9e81d44d07',1,'turbojpeg.h']]],
['tjdestroy',['tjDestroy',['../group___turbo_j_p_e_g.html#ga674adee917b95ad4a896f1ba39e12540',1,'turbojpeg.h']]],
- ['tjencodeyuv2',['tjEncodeYUV2',['../group___turbo_j_p_e_g.html#ga0fa4e7b1943687c6a0c0304529c55d35',1,'turbojpeg.h']]],
+ ['tjencodeyuv3',['tjEncodeYUV3',['../group___turbo_j_p_e_g.html#ga0a5ffbf7cb58a5b6a8201114fe889360',1,'turbojpeg.h']]],
['tjflag_5faccuratedct',['TJFLAG_ACCURATEDCT',['../group___turbo_j_p_e_g.html#gacb233cfd722d66d1ccbf48a7de81f0e0',1,'turbojpeg.h']]],
['tjflag_5fbottomup',['TJFLAG_BOTTOMUP',['../group___turbo_j_p_e_g.html#ga72ecf4ebe6eb702d3c6f5ca27455e1ec',1,'turbojpeg.h']]],
['tjflag_5ffastdct',['TJFLAG_FASTDCT',['../group___turbo_j_p_e_g.html#gaabce235db80d3f698b27f36cbd453da2',1,'turbojpeg.h']]],
['tjflag_5ffastupsample',['TJFLAG_FASTUPSAMPLE',['../group___turbo_j_p_e_g.html#ga4ee4506c81177a06f77e2504a22efd2d',1,'turbojpeg.h']]],
- ['tjflag_5fforcemmx',['TJFLAG_FORCEMMX',['../group___turbo_j_p_e_g.html#ga4e872f11c82f241736fa8297920f24e5',1,'turbojpeg.h']]],
- ['tjflag_5fforcesse',['TJFLAG_FORCESSE',['../group___turbo_j_p_e_g.html#gae17e63189e8cd730feed3efbd2454f38',1,'turbojpeg.h']]],
- ['tjflag_5fforcesse2',['TJFLAG_FORCESSE2',['../group___turbo_j_p_e_g.html#ga8cf0bca96ea4d472563f4b0ebf8c48e7',1,'turbojpeg.h']]],
- ['tjflag_5fforcesse3',['TJFLAG_FORCESSE3',['../group___turbo_j_p_e_g.html#gaf9d49066633404da4386d70820295dd2',1,'turbojpeg.h']]],
['tjflag_5fnorealloc',['TJFLAG_NOREALLOC',['../group___turbo_j_p_e_g.html#ga8808d403c68b62aaa58a4c1e58e98963',1,'turbojpeg.h']]],
['tjfree',['tjFree',['../group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137',1,'turbojpeg.h']]],
['tjgeterrorstr',['tjGetErrorStr',['../group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf',1,'turbojpeg.h']]],
@@ -39,6 +44,7 @@
['tjpf_5fbgr',['TJPF_BGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839',1,'turbojpeg.h']]],
['tjpf_5fbgra',['TJPF_BGRA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aac037ff1845cf9b74bb81a3659c2b9fb4',1,'turbojpeg.h']]],
['tjpf_5fbgrx',['TJPF_BGRX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8',1,'turbojpeg.h']]],
+ ['tjpf_5fcmyk',['TJPF_CMYK',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7f5100ec44c91994e243f1cf55553f8b',1,'turbojpeg.h']]],
['tjpf_5fgray',['TJPF_GRAY',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa5431b54b015337705f13118073711a1a',1,'turbojpeg.h']]],
['tjpf_5frgb',['TJPF_RGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c',1,'turbojpeg.h']]],
['tjpf_5frgba',['TJPF_RGBA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12',1,'turbojpeg.h']]],
@@ -49,6 +55,7 @@
['tjredoffset',['tjRedOffset',['../group___turbo_j_p_e_g.html#gadd9b446742ac8a3923f7992c7988fea8',1,'turbojpeg.h']]],
['tjregion',['tjregion',['../structtjregion.html',1,'']]],
['tjsamp',['TJSAMP',['../group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074',1,'turbojpeg.h']]],
+ ['tjsamp_5f411',['TJSAMP_411',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a28ec62575e5ea295c3fde3001dc628e2',1,'turbojpeg.h']]],
['tjsamp_5f420',['TJSAMP_420',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737',1,'turbojpeg.h']]],
['tjsamp_5f422',['TJSAMP_422',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a136130902cc578f11f32429b59368404',1,'turbojpeg.h']]],
['tjsamp_5f440',['TJSAMP_440',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974',1,'turbojpeg.h']]],
@@ -56,7 +63,7 @@
['tjsamp_5fgray',['TJSAMP_GRAY',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a3f1c9504842ddc7a48d0f690754b6248',1,'turbojpeg.h']]],
['tjscaled',['TJSCALED',['../group___turbo_j_p_e_g.html#ga84878bb65404204743aa18cac02781df',1,'turbojpeg.h']]],
['tjscalingfactor',['tjscalingfactor',['../structtjscalingfactor.html',1,'']]],
- ['tjtransform',['tjtransform',['../structtjtransform.html',1,'tjtransform'],['../group___turbo_j_p_e_g.html#gaa29f3189c41be12ec5dee7caec318a31',1,'tjtransform(): turbojpeg.h'],['../group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616',1,'tjTransform(tjhandle handle, unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, int flags): turbojpeg.h']]],
+ ['tjtransform',['tjtransform',['../structtjtransform.html',1,'tjtransform'],['../group___turbo_j_p_e_g.html#gae403193ceb4aafb7e0f56ab587b48616',1,'tjTransform(tjhandle handle, unsigned char *jpegBuf, unsigned long jpegSize, int n, unsigned char **dstBufs, unsigned long *dstSizes, tjtransform *transforms, int flags): turbojpeg.h'],['../group___turbo_j_p_e_g.html#gaa29f3189c41be12ec5dee7caec318a31',1,'tjtransform(): turbojpeg.h']]],
['tjxop',['TJXOP',['../group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866',1,'turbojpeg.h']]],
['tjxop_5fhflip',['TJXOP_HFLIP',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aa0df69776caa30f0fa28e26332d311ce',1,'turbojpeg.h']]],
['tjxop_5fnone',['TJXOP_NONE',['../group___turbo_j_p_e_g.html#gga2de531af4e7e6c4f124908376b354866aad88c0366cd3f7d0eac9d7a3fa1c2c27',1,'turbojpeg.h']]],
diff --git a/doc/html/search/enums_74.js b/doc/html/search/enums_74.js
index 20bd4db..276aa24 100644
--- a/doc/html/search/enums_74.js
+++ b/doc/html/search/enums_74.js
@@ -1,5 +1,6 @@
var searchData=
[
+ ['tjcs',['TJCS',['../group___turbo_j_p_e_g.html#ga4f83ad3368e0e29d1957be0efa7c3720',1,'turbojpeg.h']]],
['tjpf',['TJPF',['../group___turbo_j_p_e_g.html#gac916144e26c3817ac514e64ae5d12e2a',1,'turbojpeg.h']]],
['tjsamp',['TJSAMP',['../group___turbo_j_p_e_g.html#ga1d047060ea80bb9820d540bb928e9074',1,'turbojpeg.h']]],
['tjxop',['TJXOP',['../group___turbo_j_p_e_g.html#ga2de531af4e7e6c4f124908376b354866',1,'turbojpeg.h']]]
diff --git a/doc/html/search/enumvalues_74.js b/doc/html/search/enumvalues_74.js
index 55664f1..7dc2f8d 100644
--- a/doc/html/search/enumvalues_74.js
+++ b/doc/html/search/enumvalues_74.js
@@ -1,16 +1,23 @@
var searchData=
[
+ ['tjcs_5fcmyk',['TJCS_CMYK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a6c8b636152ac8195b869587db315ee53',1,'turbojpeg.h']]],
+ ['tjcs_5fgray',['TJCS_GRAY',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720ab3e7d6a87f695e45b81c1b5262b5a50a',1,'turbojpeg.h']]],
+ ['tjcs_5frgb',['TJCS_RGB',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a677cb7ccb85c4038ac41964a2e09e555',1,'turbojpeg.h']]],
+ ['tjcs_5fycbcr',['TJCS_YCbCr',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a7389b8f65bb387ffedce3efd0d78ec75',1,'turbojpeg.h']]],
+ ['tjcs_5fycck',['TJCS_YCCK',['../group___turbo_j_p_e_g.html#gga4f83ad3368e0e29d1957be0efa7c3720a53839e0fe867b76b58d16b0a1a7c598e',1,'turbojpeg.h']]],
['tjpf_5fabgr',['TJPF_ABGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa1ba1a7f1631dbeaa49a0a85fc4a40081',1,'turbojpeg.h']]],
['tjpf_5fargb',['TJPF_ARGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aae8f846ed9d9de99b6e1dfe448848765c',1,'turbojpeg.h']]],
['tjpf_5fbgr',['TJPF_BGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aab10624437fb8ef495a0b153e65749839',1,'turbojpeg.h']]],
['tjpf_5fbgra',['TJPF_BGRA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aac037ff1845cf9b74bb81a3659c2b9fb4',1,'turbojpeg.h']]],
['tjpf_5fbgrx',['TJPF_BGRX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa2a1fbf569ca79897eae886e3376ca4c8',1,'turbojpeg.h']]],
+ ['tjpf_5fcmyk',['TJPF_CMYK',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7f5100ec44c91994e243f1cf55553f8b',1,'turbojpeg.h']]],
['tjpf_5fgray',['TJPF_GRAY',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa5431b54b015337705f13118073711a1a',1,'turbojpeg.h']]],
['tjpf_5frgb',['TJPF_RGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa7ce93230bff449518ce387c17e6ed37c',1,'turbojpeg.h']]],
['tjpf_5frgba',['TJPF_RGBA',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa88d2e88fab67f6503cf972e14851cc12',1,'turbojpeg.h']]],
['tjpf_5frgbx',['TJPF_RGBX',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aa83973bebb7e2dc6fa8bae89ff3f42e01',1,'turbojpeg.h']]],
['tjpf_5fxbgr',['TJPF_XBGR',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aaf6603b27147de47e212e75dac027b2af',1,'turbojpeg.h']]],
['tjpf_5fxrgb',['TJPF_XRGB',['../group___turbo_j_p_e_g.html#ggac916144e26c3817ac514e64ae5d12e2aadae996905efcfa3b42a0bb3bea7f9d84',1,'turbojpeg.h']]],
+ ['tjsamp_5f411',['TJSAMP_411',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a28ec62575e5ea295c3fde3001dc628e2',1,'turbojpeg.h']]],
['tjsamp_5f420',['TJSAMP_420',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a63085dbf683cfe39e513cdb6343e3737',1,'turbojpeg.h']]],
['tjsamp_5f422',['TJSAMP_422',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074a136130902cc578f11f32429b59368404',1,'turbojpeg.h']]],
['tjsamp_5f440',['TJSAMP_440',['../group___turbo_j_p_e_g.html#gga1d047060ea80bb9820d540bb928e9074accf740e6f3aa6ba20ba922cad13cb974',1,'turbojpeg.h']]],
diff --git a/doc/html/search/functions_74.js b/doc/html/search/functions_74.js
index c746a91..0a0e6cd 100644
--- a/doc/html/search/functions_74.js
+++ b/doc/html/search/functions_74.js
@@ -2,13 +2,15 @@
[
['tjalloc',['tjAlloc',['../group___turbo_j_p_e_g.html#ga5c9234bda6d993cdaffdd89bf81a00ff',1,'turbojpeg.h']]],
['tjbufsize',['tjBufSize',['../group___turbo_j_p_e_g.html#gaccc5bca7f12fcdcc302e6e1c6d4b311b',1,'turbojpeg.h']]],
- ['tjbufsizeyuv',['tjBufSizeYUV',['../group___turbo_j_p_e_g.html#ga9d0cb06fd5052d21b6f2b382db8b219c',1,'turbojpeg.h']]],
+ ['tjbufsizeyuv2',['tjBufSizeYUV2',['../group___turbo_j_p_e_g.html#gaf451664a62c1f6c7cc5a6401f32908c9',1,'turbojpeg.h']]],
['tjcompress2',['tjCompress2',['../group___turbo_j_p_e_g.html#gaba62b7a98f960839b588579898495cf2',1,'turbojpeg.h']]],
+ ['tjcompressfromyuv',['tjCompressFromYUV',['../group___turbo_j_p_e_g.html#ga0b931126c7a615ddc3bbd0cca6698d67',1,'turbojpeg.h']]],
+ ['tjdecodeyuv',['tjDecodeYUV',['../group___turbo_j_p_e_g.html#ga132ae2c2cadcf64c8bb0f3bdf69da3ed',1,'turbojpeg.h']]],
['tjdecompress2',['tjDecompress2',['../group___turbo_j_p_e_g.html#gada69cc6443d1bb493b40f1626259e5e9',1,'turbojpeg.h']]],
- ['tjdecompressheader2',['tjDecompressHeader2',['../group___turbo_j_p_e_g.html#gac5675fceb7997b385516cdffdb34e6aa',1,'turbojpeg.h']]],
- ['tjdecompresstoyuv',['tjDecompressToYUV',['../group___turbo_j_p_e_g.html#gad7810af095624a4016e72957a50f77d8',1,'turbojpeg.h']]],
+ ['tjdecompressheader3',['tjDecompressHeader3',['../group___turbo_j_p_e_g.html#gacd0fac3af74b3511d39b4781b7103086',1,'turbojpeg.h']]],
+ ['tjdecompresstoyuv2',['tjDecompressToYUV2',['../group___turbo_j_p_e_g.html#ga7c08b340ad7f8e85d407bd9e81d44d07',1,'turbojpeg.h']]],
['tjdestroy',['tjDestroy',['../group___turbo_j_p_e_g.html#ga674adee917b95ad4a896f1ba39e12540',1,'turbojpeg.h']]],
- ['tjencodeyuv2',['tjEncodeYUV2',['../group___turbo_j_p_e_g.html#ga0fa4e7b1943687c6a0c0304529c55d35',1,'turbojpeg.h']]],
+ ['tjencodeyuv3',['tjEncodeYUV3',['../group___turbo_j_p_e_g.html#ga0a5ffbf7cb58a5b6a8201114fe889360',1,'turbojpeg.h']]],
['tjfree',['tjFree',['../group___turbo_j_p_e_g.html#ga8c4a1231dc06a450514c835f6471f137',1,'turbojpeg.h']]],
['tjgeterrorstr',['tjGetErrorStr',['../group___turbo_j_p_e_g.html#ga9af79c908ec131b1ae8d52fe40375abf',1,'turbojpeg.h']]],
['tjgetscalingfactors',['tjGetScalingFactors',['../group___turbo_j_p_e_g.html#ga6449044b9af402999ccf52f401333be8',1,'turbojpeg.h']]],
diff --git a/doc/html/structtjregion.html b/doc/html/structtjregion.html
index 9ecd917..515686c 100644
--- a/doc/html/structtjregion.html
+++ b/doc/html/structtjregion.html
@@ -23,7 +23,7 @@
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">TurboJPEG
-  <span id="projectnumber">1.2.1</span>
+  <span id="projectnumber">1.4</span>
</div>
</td>
</tr>
diff --git a/doc/html/structtjscalingfactor.html b/doc/html/structtjscalingfactor.html
index 33c7366..f34e150 100644
--- a/doc/html/structtjscalingfactor.html
+++ b/doc/html/structtjscalingfactor.html
@@ -23,7 +23,7 @@
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">TurboJPEG
-  <span id="projectnumber">1.2.1</span>
+  <span id="projectnumber">1.4</span>
</div>
</td>
</tr>
diff --git a/doc/html/structtjtransform.html b/doc/html/structtjtransform.html
index 3199343..ef2c8d5 100644
--- a/doc/html/structtjtransform.html
+++ b/doc/html/structtjtransform.html
@@ -23,7 +23,7 @@
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">TurboJPEG
-  <span id="projectnumber">1.2.1</span>
+  <span id="projectnumber">1.4</span>
</div>
</td>
</tr>
@@ -108,7 +108,7 @@
<tr class="memdesc:a688fe8f1a8ecc12a538d9e561cf338e3"><td class="mdescLeft"> </td><td class="mdescRight">Arbitrary data that can be accessed within the body of the callback function. <a href="#a688fe8f1a8ecc12a538d9e561cf338e3">More...</a><br/></td></tr>
<tr class="separator:a688fe8f1a8ecc12a538d9e561cf338e3"><td class="memSeparator" colspan="2"> </td></tr>
<tr class="memitem:a43ee1bcdd2a8d7249a756774f78793c1"><td class="memItemLeft" align="right" valign="top">int(* </td><td class="memItemRight" valign="bottom"><a class="el" href="structtjtransform.html#a43ee1bcdd2a8d7249a756774f78793c1">customFilter</a> )(short *coeffs, <a class="el" href="structtjregion.html">tjregion</a> arrayRegion, <a class="el" href="structtjregion.html">tjregion</a> planeRegion, int componentIndex, int transformIndex, struct <a class="el" href="structtjtransform.html">tjtransform</a> *transform)</td></tr>
-<tr class="memdesc:a43ee1bcdd2a8d7249a756774f78793c1"><td class="mdescLeft"> </td><td class="mdescRight">A callback function that can be used to modify the DCT coefficients after they are losslessly transformed but before they are transcoded to a new JPEG file. <a href="#a43ee1bcdd2a8d7249a756774f78793c1">More...</a><br/></td></tr>
+<tr class="memdesc:a43ee1bcdd2a8d7249a756774f78793c1"><td class="mdescLeft"> </td><td class="mdescRight">A callback function that can be used to modify the DCT coefficients after they are losslessly transformed but before they are transcoded to a new JPEG image. <a href="#a43ee1bcdd2a8d7249a756774f78793c1">More...</a><br/></td></tr>
<tr class="separator:a43ee1bcdd2a8d7249a756774f78793c1"><td class="memSeparator" colspan="2"> </td></tr>
</table>
<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
@@ -124,7 +124,7 @@
</table>
</div><div class="memdoc">
-<p>A callback function that can be used to modify the DCT coefficients after they are losslessly transformed but before they are transcoded to a new JPEG file. </p>
+<p>A callback function that can be used to modify the DCT coefficients after they are losslessly transformed but before they are transcoded to a new JPEG image. </p>
<p>This allows for custom filters or other transformations to be applied in the frequency domain.</p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
diff --git a/doxygen.config b/doxygen.config
index b881d82..9680175 100644
--- a/doxygen.config
+++ b/doxygen.config
@@ -1,5 +1,5 @@
PROJECT_NAME = TurboJPEG
-PROJECT_NUMBER = 1.2.1
+PROJECT_NUMBER = 1.4
OUTPUT_DIRECTORY = doc/
USE_WINDOWS_ENCODING = NO
OPTIMIZE_OUTPUT_FOR_C = YES
diff --git a/java/TJBench.java b/java/TJBench.java
index eaf5fa3..5257a1b 100644
--- a/java/TJBench.java
+++ b/java/TJBench.java
@@ -36,20 +36,25 @@
static final int YUVENCODE = 1;
static final int YUVDECODE = 2;
+ static final int YUVCOMPRESS = 3;
- static int flags = 0, yuv = 0, quiet = 0, pf = TJ.PF_BGR;
- static boolean decompOnly, doTile;
+ static int flags = 0, yuv = 0, quiet = 0, pf = TJ.PF_BGR, yuvpad = 1;
+ static boolean compOnly, decompOnly, doTile;
static final String[] pixFormatStr = {
"RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "GRAY"
};
static final String[] subNameLong = {
- "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0"
+ "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1"
};
static final String[] subName = {
- "444", "422", "420", "GRAY", "440"
+ "444", "422", "420", "GRAY", "440", "411"
+ };
+
+ static final String[] csName = {
+ "RGB", "YCbCr", "GRAY", "CMYK", "YCCK"
};
static TJScalingFactor sf;
@@ -62,6 +67,16 @@
}
+ static String formatName(int subsamp, int cs) {
+ if (cs == TJ.CS_YCbCr)
+ return subNameLong[subsamp];
+ else if (cs == TJ.CS_YCCK)
+ return csName[cs] + " " + subNameLong[subsamp];
+ else
+ return csName[cs];
+ }
+
+
static String sigFig(double val, int figs) {
String format;
int digitsAfterDecimal = figs - (int)Math.ceil(Math.log10(Math.abs(val)));
@@ -127,9 +142,9 @@
TJDecompressor tjd;
double start, elapsed;
int ps = TJ.getPixelSize(pf), i;
- int yuvSize = TJ.bufSizeYUV(w, h, subsamp), bufsize;
- int scaledw = (yuv == YUVDECODE) ? w : sf.getScaled(w);
- int scaledh = (yuv == YUVDECODE) ? h : sf.getScaled(h);
+ int scaledw = sf.getScaled(w);
+ int scaledh = sf.getScaled(h);
+ int yuvSize = TJ.bufSizeYUV(scaledw, yuvpad, scaledh, subsamp), bufsize;
int pitch = scaledw * ps;
if (jpegQual > 0)
@@ -148,7 +163,7 @@
/* Execute once to preload cache */
tjd.setJPEGImage(jpegBuf[0], jpegSize[0]);
if (yuv == YUVDECODE)
- tjd.decompressToYUV(dstBuf, flags);
+ tjd.decompressToYUV(dstBuf, scaledw, yuvpad, scaledh, flags);
else
tjd.decompress(dstBuf, scaledw, pitch, scaledh, pf, flags);
@@ -157,7 +172,7 @@
i++) {
int tile = 0;
if (yuv == YUVDECODE)
- tjd.decompressToYUV(dstBuf, flags);
+ tjd.decompressToYUV(dstBuf, scaledw, yuvpad, scaledh, flags);
else {
for (int y = 0; y < h; y += tileh) {
for (int x = 0; x < w; x += tilew, tile++) {
@@ -183,23 +198,24 @@
(double)(w * h) / 1000000. * (double)i / elapsed);
}
+ if (sf.getNum() != 1 || sf.getDenom() != 1)
+ sizeStr = new String(sf.getNum() + "_" + sf.getDenom());
+ else if (tilew != w || tileh != h)
+ sizeStr = new String(tilew + "x" + tileh);
+ else
+ sizeStr = new String("full");
+ if (decompOnly)
+ tempStr = new String(fileName + "_" + sizeStr +
+ (yuv != 0 ? ".yuv" : ".bmp"));
+ else
+ tempStr = new String(fileName + "_" + subName[subsamp] + qualStr +
+ "_" + sizeStr + (yuv != 0 ? ".yuv" : ".bmp"));
+
if (yuv == YUVDECODE) {
- tempStr = fileName + "_" + subName[subsamp] + qualStr + ".yuv";
FileOutputStream fos = new FileOutputStream(tempStr);
fos.write(dstBuf, 0, yuvSize);
fos.close();
} else {
- if (sf.getNum() != 1 || sf.getDenom() != 1)
- sizeStr = new String(sf.getNum() + "_" + sf.getDenom());
- else if (tilew != w || tileh != h)
- sizeStr = new String(tilew + "x" + tileh);
- else
- sizeStr = new String("full");
- if (decompOnly)
- tempStr = new String(fileName + "_" + sizeStr + ".bmp");
- else
- tempStr = new String(fileName + "_" + subName[subsamp] + qualStr +
- "_" + sizeStr + ".bmp");
saveImage(tempStr, dstBuf, scaledw, scaledh, pf);
int ndx = tempStr.indexOf('.');
tempStr = new String(tempStr.substring(0, ndx) + "-err.bmp");
@@ -305,7 +321,9 @@
int[] jpegSize;
double start, elapsed;
int totalJpegSize = 0, tilew, tileh, i;
- int ps = TJ.getPixelSize(pf), ntilesw = 1, ntilesh = 1, pitch = w * ps;
+ int ps = (yuv == YUVCOMPRESS ? 3 : TJ.getPixelSize(pf));
+ int ntilesw = 1, ntilesh = 1, pitch = w * ps;
+ String pfStr = (yuv == YUVCOMPRESS ? "YUV" : pixFormatStr[pf]);
if (yuv == YUVENCODE) {
doTestYUV(srcBuf, w, h, subsamp, fileName);
@@ -315,8 +333,7 @@
tmpBuf = new byte[pitch * h];
if (quiet == 0)
- System.out.format(">>>>> %s (%s) <--> JPEG %s Q%d <<<<<\n",
- pixFormatStr[pf],
+ System.out.format(">>>>> %s (%s) <--> JPEG %s Q%d <<<<<\n", pfStr,
(flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down",
subNameLong[subsamp], jpegQual);
@@ -336,12 +353,16 @@
/* Compression test */
if (quiet == 1)
- System.out.format("%s\t%s\t%s\t%d\t", pixFormatStr[pf],
+ System.out.format("%s\t%s\t%s\t%d\t", pfStr,
(flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD",
subNameLong[subsamp], jpegQual);
- for (i = 0; i < h; i++)
- System.arraycopy(srcBuf, w * ps * i, tmpBuf, pitch * i, w * ps);
- tjc.setSourceImage(srcBuf, tilew, pitch, tileh, pf);
+ if (yuv != YUVCOMPRESS)
+ for (i = 0; i < h; i++)
+ System.arraycopy(srcBuf, w * ps * i, tmpBuf, pitch * i, w * ps);
+ if (yuv == YUVCOMPRESS)
+ tjc.setSourceImageYUV(srcBuf, tilew, yuvpad, tileh);
+ else
+ tjc.setSourceImage(srcBuf, tilew, pitch, tileh, pf);
tjc.setJPEGQuality(jpegQual);
tjc.setSubsamp(subsamp);
@@ -357,7 +378,8 @@
for (int x = 0; x < w; x += tilew, tile++) {
int width = Math.min(tilew, w - x);
int height = Math.min(tileh, h - y);
- tjc.setSourceImage(srcBuf, x, y, width, pitch, height, pf);
+ if (yuv != YUVCOMPRESS)
+ tjc.setSourceImage(srcBuf, x, y, width, pitch, height, pf);
tjc.compress(jpegBuf[tile], flags);
jpegSize[tile] = tjc.getCompressedSize();
totalJpegSize += jpegSize[tile];
@@ -398,8 +420,9 @@
}
/* Decompression test */
- decompTest(srcBuf, jpegBuf, jpegSize, tmpBuf, w, h, subsamp, jpegQual,
- fileName, tilew, tileh);
+ if (!compOnly)
+ decompTest(srcBuf, jpegBuf, jpegSize, tmpBuf, w, h, subsamp, jpegQual,
+ fileName, tilew, tileh);
for (i = 0; i < ntilesw * ntilesh; i++)
jpegBuf[i] = null;
@@ -417,7 +440,7 @@
byte[] srcBuf;
int[] jpegSize;
int totalJpegSize;
- int w = 0, h = 0, subsamp = -1, _w, _h, _tilew, _tileh,
+ int w = 0, h = 0, subsamp = -1, cs = -1, _w, _h, _tilew, _tileh,
_ntilesw, _ntilesh, _subsamp, x, y;
int ntilesw = 1, ntilesh = 1;
double start, elapsed;
@@ -439,17 +462,22 @@
w = tjt.getWidth();
h = tjt.getHeight();
subsamp = tjt.getSubsamp();
+ cs = tjt.getColorspace();
if (quiet == 1) {
System.out.println("All performance values in Mpixels/sec\n");
- System.out.format("Bitmap\tBitmap\tJPEG\t%s %s \tXform\tComp\tDecomp\n",
+ System.out.format("Bitmap\tBitmap\tJPEG\tJPEG\t%s %s \tXform\tComp\tDecomp\n",
(doTile ? "Tile " : "Image"),
(doTile ? "Tile " : "Image"));
- System.out.println("Format\tOrder\tSubsamp\tWidth Height\tPerf \tRatio\tPerf\n");
+ System.out.println("Format\tOrder\tCS\tSubsamp\tWidth Height\tPerf \tRatio\tPerf\n");
} else if (quiet == 0) {
- System.out.format(">>>>> JPEG %s --> %s (%s) <<<<<",
- subNameLong[subsamp], pixFormatStr[pf],
- (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down");
+ if (yuv == YUVDECODE)
+ System.out.format(">>>>> JPEG %s --> YUV <<<<<",
+ formatName(subsamp, cs));
+ else
+ System.out.format(">>>>> JPEG %s --> %s (%s) <<<<<",
+ formatName(subsamp, cs), pixFormatStr[pf],
+ (flags & TJ.FLAG_BOTTOMUP) != 0 ? "Bottom-up" : "Top-down");
}
for (int tilew = doTile ? 16 : w, tileh = doTile ? 16 : h; ;
@@ -470,9 +498,9 @@
sf.getScaled(_h));
System.out.println("");
} else if (quiet == 1) {
- System.out.format("%s\t%s\t%s\t", pixFormatStr[pf],
+ System.out.format("%s\t%s\t%s\t%s\t", pixFormatStr[pf],
(flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD",
- subNameLong[subsamp]);
+ csName[cs], subNameLong[subsamp]);
System.out.format("%-4d %-4d\t", tilew, tileh);
}
@@ -582,7 +610,7 @@
String className = new TJBench().getClass().getName();
System.out.println("\nUSAGE: java " + className);
- System.out.println(" <Inputfile (BMP)> <Quality> [options]\n");
+ System.out.println(" <Inputfile (BMP|YUV)> <Quality> [options]\n");
System.out.println(" java " + className);
System.out.println(" <Inputfile (JPG)> [options]\n");
System.out.println("Options:\n");
@@ -600,10 +628,21 @@
System.out.println(" codec");
System.out.println("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the");
System.out.println(" underlying codec");
- System.out.println("-440 = Test 4:4:0 chrominance subsampling instead of 4:2:2");
+ System.out.println("-subsamp <s> = if compressing a JPEG image from a YUV planar source image,");
+ System.out.println(" this specifies the level of chrominance subsampling used in the source");
+ System.out.println(" image. Otherwise, this specifies the level of chrominance subsampling");
+ System.out.println(" to use in the JPEG destination image. <s> = 444, 422, 440, 420, 411,");
+ System.out.println(" or GRAY");
System.out.println("-quiet = Output results in tabular rather than verbose format");
System.out.println("-yuvencode = Encode RGB input as planar YUV rather than compressing as JPEG");
System.out.println("-yuvdecode = Decode JPEG image to planar YUV rather than RGB");
+ System.out.println("-yuvsize WxH = if compressing a JPEG image from a YUV planar source image, this");
+ System.out.println(" specifies the width and height of the source image.");
+ System.out.println("-yuvpad <p> = if compressing a JPEG image from a YUV planar source image, this");
+ System.out.println(" specifies the number of bytes to which each row of each plane in the");
+ System.out.println(" source image is padded. If decompressing a JPEG image to a YUV planar");
+ System.out.println(" destination image, this specifies the row padding for each plane of the");
+ System.out.println(" destination image. (default=1)");
System.out.println("-scale M/N = scale down the width/height of the decompressed JPEG image by a");
System.out.print (" factor of M/N (M/N = ");
for (i = 0; i < nsf; i++) {
@@ -626,7 +665,8 @@
System.out.println(" decompression (these options are mutually exclusive)");
System.out.println("-grayscale = Perform lossless grayscale conversion prior to decompression");
System.out.println(" test (can be combined with the other transforms above)");
- System.out.println("-benchTime <t> = Run each benchmark for at least <t> seconds (default = 5.0)\n");
+ System.out.println("-benchtime <t> = Run each benchmark for at least <t> seconds (default = 5.0)");
+ System.out.println("-componly = Stop after running compression tests. Do not test decompression.\n");
System.out.println("NOTE: If the quality is specified as a range (e.g. 90-100), a separate");
System.out.println("test will be performed for all quality values in the range.\n");
System.exit(1);
@@ -637,7 +677,7 @@
byte[] srcBuf = null; int w = 0, h = 0;
int minQual = -1, maxQual = -1;
int minArg = 1; int retval = 0;
- boolean do440 = false;
+ int subsamp = -1;
try {
@@ -647,6 +687,8 @@
String tempStr = argv[0].toLowerCase();
if (tempStr.endsWith(".jpg") || tempStr.endsWith(".jpeg"))
decompOnly = true;
+ if (tempStr.endsWith(".yuv"))
+ yuv = YUVCOMPRESS;
System.out.println("");
@@ -715,8 +757,6 @@
System.out.println("Using most accurate DCT/IDCT algorithm\n");
flags |= TJ.FLAG_ACCURATEDCT;
}
- if (argv[i].equals("-440"))
- do440 = true;
if (argv[i].equalsIgnoreCase("-rgb"))
pf = TJ.PF_RGB;
if (argv[i].equalsIgnoreCase("-rgbx"))
@@ -786,6 +826,44 @@
else
usage();
}
+ if (argv[i].equalsIgnoreCase("-yuvsize") && i < argv.length - 1) {
+ int temp1 = 0, temp2 = 0;
+ Scanner scanner = new Scanner(argv[++i]).useDelimiter("x");
+ try {
+ temp1 = scanner.nextInt();
+ temp2 = scanner.nextInt();
+ } catch(Exception e) {}
+ if (temp1 >= 1 && temp2 >= 1) {
+ w = temp1;
+ h = temp2;
+ } else
+ usage();
+ }
+ if (argv[i].equalsIgnoreCase("-yuvpad") && i < argv.length - 1) {
+ int temp = 0;
+ try {
+ temp = Integer.parseInt(argv[++i]);
+ } catch (NumberFormatException e) {}
+ if (temp >= 1)
+ yuvpad = temp;
+ }
+ if (argv[i].equalsIgnoreCase("-subsamp") && i < argv.length - 1) {
+ i++;
+ if (argv[i].toUpperCase().startsWith("G"))
+ subsamp = TJ.SAMP_GRAY;
+ else if (argv[i].equals("444"))
+ subsamp = TJ.SAMP_444;
+ else if (argv[i].equals("422"))
+ subsamp = TJ.SAMP_422;
+ else if (argv[i].equals("440"))
+ subsamp = TJ.SAMP_440;
+ else if (argv[i].equals("420"))
+ subsamp = TJ.SAMP_420;
+ else if (argv[i].equals("411"))
+ subsamp = TJ.SAMP_411;
+ }
+ if (argv[i].equalsIgnoreCase("-componly"))
+ compOnly = true;
if (argv[i].equalsIgnoreCase("-?"))
usage();
}
@@ -802,17 +880,30 @@
if (yuv != 0 && doTile) {
System.out.println("Disabling tiled compression/decompression tests, because those tests do not");
- System.out.println("work when YUV encoding or decoding is enabled.\n");
+ System.out.println("work when YUV encoding, compression, or decoding is enabled.\n");
doTile = false;
}
if (!decompOnly) {
- int[] width = new int[1], height = new int[1];
- srcBuf = loadImage(argv[0], width, height, pf);
- w = width[0]; h = height[0];
- int index = -1;
- if ((index = argv[0].indexOf('.')) >= 0)
- argv[0] = argv[0].substring(0, index);
+ if(yuv == YUVCOMPRESS) {
+ if (w < 1 || h < 1 || subsamp < 0 || subsamp >= TJ.NUMSAMP)
+ throw new Exception("YUV image size and/or subsampling not specified");
+ FileInputStream fis = new FileInputStream(argv[0]);
+ int srcSize = (int)fis.getChannel().size();
+ if (srcSize != TJ.bufSizeYUV(w, yuvpad, h, subsamp))
+ throw new Exception("YUV image file is the wrong size");
+ srcBuf = new byte[srcSize];
+ fis.read(srcBuf, 0, srcSize);
+ fis.close();
+ }
+ else {
+ int[] width = new int[1], height = new int[1];
+ srcBuf = loadImage(argv[0], width, height, pf);
+ w = width[0]; h = height[0];
+ int index = -1;
+ if ((index = argv[0].indexOf('.')) >= 0)
+ argv[0] = argv[0].substring(0, index);
+ }
}
if (quiet == 1 && !decompOnly) {
@@ -829,21 +920,27 @@
}
System.gc();
- for (int i = maxQual; i >= minQual; i--)
- doTest(srcBuf, w, h, TJ.SAMP_GRAY, i, argv[0]);
- System.out.println("");
- System.gc();
- for (int i = maxQual; i >= minQual; i--)
- doTest(srcBuf, w, h, TJ.SAMP_420, i, argv[0]);
- System.out.println("");
- System.gc();
- for (int i = maxQual; i >= minQual; i--)
- doTest(srcBuf, w, h, do440 ? TJ.SAMP_440 : TJ.SAMP_422, i, argv[0]);
- System.out.println("");
- System.gc();
- for (int i = maxQual; i >= minQual; i--)
- doTest(srcBuf, w, h, TJ.SAMP_444, i, argv[0]);
- System.out.println("");
+ if (yuv == YUVCOMPRESS || (subsamp >= 0 && subsamp < TJ.NUMSAMP)) {
+ for (int i = maxQual; i >= minQual; i--)
+ doTest(srcBuf, w, h, subsamp, i, argv[0]);
+ System.out.println("");
+ } else {
+ for (int i = maxQual; i >= minQual; i--)
+ doTest(srcBuf, w, h, TJ.SAMP_GRAY, i, argv[0]);
+ System.out.println("");
+ System.gc();
+ for (int i = maxQual; i >= minQual; i--)
+ doTest(srcBuf, w, h, TJ.SAMP_420, i, argv[0]);
+ System.out.println("");
+ System.gc();
+ for (int i = maxQual; i >= minQual; i--)
+ doTest(srcBuf, w, h, TJ.SAMP_422, i, argv[0]);
+ System.out.println("");
+ System.gc();
+ for (int i = maxQual; i >= minQual; i--)
+ doTest(srcBuf, w, h, TJ.SAMP_444, i, argv[0]);
+ System.out.println("");
+ }
} catch (Exception e) {
System.out.println("ERROR: " + e.getMessage());
diff --git a/java/TJUnitTest.java b/java/TJUnitTest.java
index ffe82e9..a6ee73e 100644
--- a/java/TJUnitTest.java
+++ b/java/TJUnitTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (C)2011-2012, 2014 D. R. Commander.
+ * Copyright (C)2011-2014 D. R. Commander. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -46,24 +46,26 @@
System.out.println("\nUSAGE: java " + classname + " [options]\n");
System.out.println("Options:\n");
System.out.println("-yuv = test YUV encoding/decoding support\n");
+ System.out.println("-noyuvpad = do not pad each line of each Y, U, and V plane to the nearest\n");
+ System.out.println(" 4-byte boundary\n");
System.out.println("-bi = test BufferedImage support\n");
System.exit(1);
}
private static final String[] subNameLong = {
- "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0"
+ "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1"
};
private static final String[] subName = {
- "444", "422", "420", "GRAY", "440"
+ "444", "422", "420", "GRAY", "440", "411"
};
private static final String[] pixFormatStr = {
"RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "Grayscale",
- "RGBA", "BGRA", "ABGR", "ARGB"
+ "RGBA", "BGRA", "ABGR", "ARGB", "CMYK"
};
private static final int[] alphaOffset = {
- -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0
+ -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1
};
private static final int[] _3byteFormats = {
@@ -73,7 +75,7 @@
BufferedImage.TYPE_3BYTE_BGR
};
private static final int[] _4byteFormats = {
- TJ.PF_RGBX, TJ.PF_BGRX, TJ.PF_XBGR, TJ.PF_XRGB
+ TJ.PF_RGBX, TJ.PF_BGRX, TJ.PF_XBGR, TJ.PF_XRGB, TJ.PF_CMYK
};
private static final int[] _4byteFormatsBI = {
BufferedImage.TYPE_INT_BGR, BufferedImage.TYPE_INT_RGB,
@@ -93,6 +95,7 @@
private static final int YUVENCODE = 1;
private static final int YUVDECODE = 2;
private static int yuv = 0;
+ private static int pad = 4;
private static boolean bi = false;
private static int exitStatus = 0;
@@ -162,8 +165,8 @@
int ps = TJ.getPixelSize(pf);
int index, row, col, halfway = 16;
- Arrays.fill(buf, (byte)0);
if (pf == TJ.PF_GRAY) {
+ Arrays.fill(buf, (byte)0);
for (row = 0; row < h; row++) {
for (col = 0; col < w; col++) {
if ((flags & TJ.FLAG_BOTTOMUP) != 0)
@@ -178,6 +181,27 @@
}
return;
}
+ if (pf == TJ.PF_CMYK) {
+ Arrays.fill(buf, (byte)255);
+ for (row = 0; row < h; row++) {
+ for (col = 0; col < w; col++) {
+ if ((flags & TJ.FLAG_BOTTOMUP) != 0)
+ index = (h - row - 1) * w + col;
+ else
+ index = row * w + col;
+ if (((row / 8) + (col / 8)) % 2 == 0) {
+ if (row >= halfway) buf[index * ps + 3] = 0;
+ } else {
+ buf[index * ps + 2] = 0;
+ if (row < halfway)
+ buf[index * ps + 1] = 0;
+ }
+ }
+ }
+ return;
+ }
+
+ Arrays.fill(buf, (byte)0);
for (row = 0; row < h; row++) {
for (col = 0; col < w; col++) {
if ((flags & TJ.FLAG_BOTTOMUP) != 0)
@@ -296,6 +320,39 @@
int blockSize = 8 * sf.getNum() / sf.getDenom();
try {
+
+ if (pf == TJ.PF_CMYK) {
+ for (row = 0; row < h; row++) {
+ for (col = 0; col < w; col++) {
+ if ((flags & TJ.FLAG_BOTTOMUP) != 0)
+ index = (h - row - 1) * w + col;
+ else
+ index = row * w + col;
+ byte c = buf[index * ps];
+ byte m = buf[index * ps + 1];
+ byte y = buf[index * ps + 2];
+ byte k = buf[index * ps + 3];
+ checkVal255(row, col, c, "C");
+ if (((row / blockSize) + (col / blockSize)) % 2 == 0) {
+ checkVal255(row, col, m, "M");
+ checkVal255(row, col, y, "Y");
+ if (row < halfway)
+ checkVal255(row, col, k, "K");
+ else
+ checkVal0(row, col, k, "K");
+ } else {
+ checkVal0(row, col, y, "Y");
+ checkVal255(row, col, k, "K");
+ if (row < halfway)
+ checkVal0(row, col, m, "M");
+ else
+ checkVal255(row, col, m, "M");
+ }
+ }
+ }
+ return 1;
+ }
+
for (row = 0; row < halfway; row++) {
for (col = 0; col < w; col++) {
if ((flags & TJ.FLAG_BOTTOMUP) != 0)
@@ -348,13 +405,25 @@
if (retval == 0) {
for (row = 0; row < h; row++) {
for (col = 0; col < w; col++) {
- int r = buf[pitch * row + col * ps + roffset];
- int g = buf[pitch * row + col * ps + goffset];
- int b = buf[pitch * row + col * ps + boffset];
- if (r < 0) r += 256;
- if (g < 0) g += 256;
- if (b < 0) b += 256;
- System.out.format("%3d/%3d/%3d ", r, g, b);
+ if (pf == TJ.PF_CMYK) {
+ int c = buf[pitch * row + col * ps];
+ int m = buf[pitch * row + col * ps + 1];
+ int y = buf[pitch * row + col * ps + 2];
+ int k = buf[pitch * row + col * ps + 3];
+ if (c < 0) c += 256;
+ if (m < 0) m += 256;
+ if (y < 0) y += 256;
+ if (k < 0) k += 256;
+ System.out.format("%3d/%3d/%3d/%3d ", c, m, y, k);
+ } else {
+ int r = buf[pitch * row + col * ps + roffset];
+ int g = buf[pitch * row + col * ps + goffset];
+ int b = buf[pitch * row + col * ps + boffset];
+ if (r < 0) r += 256;
+ if (g < 0) g += 256;
+ if (b < 0) b += 256;
+ System.out.format("%3d/%3d/%3d ", r, g, b);
+ }
}
System.out.print("\n");
}
@@ -469,17 +538,67 @@
return ((v + (p) - 1) & (~((p) - 1)));
}
- private static int checkBufYUV(byte[] buf, int size, int w, int h,
+ private static void initBufYUV(byte[] buf, int w, int pad, int h,
int subsamp) throws Exception {
int row, col;
int hsf = TJ.getMCUWidth(subsamp) / 8, vsf = TJ.getMCUHeight(subsamp) / 8;
int pw = PAD(w, hsf), ph = PAD(h, vsf);
int cw = pw / hsf, ch = ph / vsf;
- int ypitch = PAD(pw, 4), uvpitch = PAD(cw, 4);
+ int ypitch = PAD(pw, pad), uvpitch = PAD(cw, pad);
+ int halfway = 16, blockSize = 8;
+
+ Arrays.fill(buf, (byte)0);
+ for (row = 0; row < ph; row++) {
+ for (col = 0; col < pw; col++) {
+ int index = ypitch * row + col;
+ if (((row / blockSize) + (col / blockSize)) % 2 == 0) {
+ if (row < halfway)
+ buf[index] = (byte)255;
+ else
+ buf[index] = 0;
+ } else {
+ if (row < halfway)
+ buf[index] = 76;
+ else
+ buf[index] = (byte)226;
+ }
+ }
+ }
+ if (subsamp != TJ.SAMP_GRAY) {
+ halfway = 16 / vsf;
+ for (row = 0; row < ch; row++) {
+ for (col = 0; col < cw; col++) {
+ int uindex = ypitch * ph + (uvpitch * row + col),
+ vindex = ypitch * ph + uvpitch * ch + (uvpitch * row + col);
+ if (((row * vsf / blockSize) + (col * hsf / blockSize)) % 2 == 0) {
+ buf[uindex] = buf[vindex] = (byte)128;
+ } else {
+ if (row < halfway) {
+ buf[uindex] = 85;
+ buf[vindex] = (byte)255;
+ } else {
+ buf[uindex] = 0;
+ buf[vindex] = (byte)149;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ private static int checkBufYUV(byte[] buf, int size, int w, int h,
+ int subsamp, TJScalingFactor sf)
+ throws Exception {
+ int row, col;
+ int hsf = TJ.getMCUWidth(subsamp) / 8, vsf = TJ.getMCUHeight(subsamp) / 8;
+ int pw = PAD(w, hsf), ph = PAD(h, vsf);
+ int cw = pw / hsf, ch = ph / vsf;
+ int ypitch = PAD(pw, pad), uvpitch = PAD(cw, pad);
int retval = 1;
int correctsize = ypitch * ph +
(subsamp == TJ.SAMP_GRAY ? 0 : uvpitch * ch * 2);
- int halfway = 16;
+ int halfway = 16 * sf.getNum() / sf.getDenom();
+ int blockSize = 8 * sf.getNum() / sf.getDenom();
try {
if (size != correctsize)
@@ -489,7 +608,7 @@
for (row = 0; row < ph; row++) {
for (col = 0; col < pw; col++) {
byte y = buf[ypitch * row + col];
- if (((row / 8) + (col / 8)) % 2 == 0) {
+ if (((row / blockSize) + (col / blockSize)) % 2 == 0) {
if (row < halfway)
checkVal255(row, col, y, "Y");
else
@@ -503,12 +622,12 @@
}
}
if (subsamp != TJ.SAMP_GRAY) {
- halfway = 16 / vsf;
+ halfway = 16 / vsf * sf.getNum() / sf.getDenom();
for (row = 0; row < ch; row++) {
for (col = 0; col < cw; col++) {
byte u = buf[ypitch * ph + (uvpitch * row + col)],
v = buf[ypitch * ph + uvpitch * ch + (uvpitch * row + col)];
- if (((row * vsf / 8) + (col * hsf / 8)) % 2 == 0) {
+ if (((row * vsf / blockSize) + (col * hsf / blockSize)) % 2 == 0) {
checkVal(row, col, u, "U", 128);
checkVal(row, col, v, "V", 128);
} else {
@@ -575,53 +694,64 @@
byte[] srcBuf = null;
BufferedImage img = null;
String pfStr;
+ String buStrLong = (flags & TJ.FLAG_BOTTOMUP) != 0 ?
+ "Bottom-Up" : "Top-Down ";
+ String buStr = (flags & TJ.FLAG_BOTTOMUP) != 0 ? "BU" : "TD";
double t;
int size = 0, ps, imgType = pf;
- if (bi) {
- pf = biTypePF(imgType);
- pfStr = biTypeStr(imgType);
- } else
- pfStr = pixFormatStr[pf];
- ps = TJ.getPixelSize(pf);
-
- System.out.print(pfStr + " ");
- if (bi)
- System.out.print("(" + pixFormatStr[pf] + ") ");
- if ((flags & TJ.FLAG_BOTTOMUP) != 0)
- System.out.print("Bottom-Up");
- else
- System.out.print("Top-Down ");
- System.out.print(" -> " + subNameLong[subsamp] + " ");
- if (yuv == YUVENCODE)
- System.out.print("YUV ... ");
- else
- System.out.print("Q" + jpegQual + " ... ");
-
- if (bi) {
- img = new BufferedImage(w, h, imgType);
- initImg(img, pf, flags);
- tempstr = baseName + "_enc_" + pfStr + "_" +
- (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_" +
- subName[subsamp] + "_Q" + jpegQual + ".png";
- File file = new File(tempstr);
- ImageIO.write(img, "png", file);
+ if (yuv == YUVDECODE) {
+ System.out.format("YUV %s %s --> JPEG Q%d ... ", subNameLong[subsamp],
+ buStrLong, jpegQual);
+ srcBuf = new byte[TJ.bufSizeYUV(w, pad, h, subsamp)];
+ initBufYUV(srcBuf, w, pad, h, subsamp);
+ pfStr = "YUV";
} else {
- srcBuf = new byte[w * h * ps + 1];
- initBuf(srcBuf, w, w * ps, h, pf, flags);
+ if (bi) {
+ pf = biTypePF(imgType);
+ pfStr = biTypeStr(imgType);
+ } else
+ pfStr = pixFormatStr[pf];
+ ps = TJ.getPixelSize(pf);
+
+ System.out.print(pfStr + " ");
+ if (bi)
+ System.out.print("(" + pixFormatStr[pf] + ") ");
+ if (yuv == YUVENCODE)
+ System.out.format("%s -> %s YUV ... ", buStrLong,
+ subNameLong[subsamp]);
+ else
+ System.out.format("%s -> %s Q%d ... ", buStrLong, subNameLong[subsamp],
+ jpegQual);
+
+ if (bi) {
+ img = new BufferedImage(w, h, imgType);
+ initImg(img, pf, flags);
+ tempstr = baseName + "_enc_" + pfStr + "_" + buStr + "_" +
+ subName[subsamp] + "_Q" + jpegQual + ".png";
+ File file = new File(tempstr);
+ ImageIO.write(img, "png", file);
+ } else {
+ srcBuf = new byte[w * h * ps + 1];
+ initBuf(srcBuf, w, w * ps, h, pf, flags);
+ }
}
Arrays.fill(dstBuf, (byte)0);
t = getTime();
tjc.setSubsamp(subsamp);
tjc.setJPEGQuality(jpegQual);
+ tjc.setYUVPad(pad);
if (bi) {
if (yuv == YUVENCODE)
tjc.encodeYUV(img, dstBuf, flags);
else
tjc.compress(img, dstBuf, flags);
} else {
- tjc.setSourceImage(srcBuf, w, 0, h, pf);
+ if (yuv == YUVDECODE)
+ tjc.setSourceImageYUV(srcBuf, w, pad, h);
+ else
+ tjc.setSourceImage(srcBuf, w, 0, h, pf);
if (yuv == YUVENCODE)
tjc.encodeYUV(dstBuf, flags);
else
@@ -631,17 +761,16 @@
t = getTime() - t;
if (yuv == YUVENCODE)
- tempstr = baseName + "_enc_" + pfStr + "_" +
- (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_" +
+ tempstr = baseName + "_enc_" + pfStr + "_" + buStr + "_" +
subName[subsamp] + ".yuv";
else
- tempstr = baseName + "_enc_" + pfStr + "_" +
- (((flags & TJ.FLAG_BOTTOMUP) != 0) ? "BU" : "TD") + "_" +
+ tempstr = baseName + "_enc_" + pfStr + "_" + buStr + "_" +
subName[subsamp] + "_Q" + jpegQual + ".jpg";
writeJPEG(dstBuf, size, tempstr);
if (yuv == YUVENCODE) {
- if (checkBufYUV(dstBuf, size, w, h, subsamp) == 1)
+ if (checkBufYUV(dstBuf, size, w, h, subsamp,
+ new TJScalingFactor(1, 1)) == 1)
System.out.print("Passed.");
else {
System.out.print("FAILED!");
@@ -677,7 +806,7 @@
System.out.print("JPEG -> ");
if (yuv == YUVDECODE)
- System.out.print("YUV " + subNameLong[subsamp] + " ... ");
+ System.out.print("YUV " + subNameLong[subsamp] + " ");
else {
System.out.print(pfStr + " ");
if (bi)
@@ -686,11 +815,11 @@
System.out.print("Bottom-Up ");
else
System.out.print("Top-Down ");
- if (!sf.isOne())
- System.out.print(sf.getNum() + "/" + sf.getDenom() + " ... ");
- else
- System.out.print("... ");
}
+ if (!sf.isOne())
+ System.out.print(sf.getNum() + "/" + sf.getDenom() + " ... ");
+ else
+ System.out.print("... ");
t = getTime();
tjd.setJPEGImage(jpegBuf, jpegSize);
@@ -706,7 +835,7 @@
throw new Exception("Scaled size mismatch");
if (yuv == YUVDECODE)
- dstBuf = tjd.decompressToYUV(flags);
+ dstBuf = tjd.decompressToYUV(scaledWidth, pad, scaledHeight, flags);
else {
if (bi)
img = tjd.decompress(scaledWidth, scaledHeight, imgType, flags);
@@ -725,7 +854,8 @@
}
if (yuv == YUVDECODE) {
- if (checkBufYUV(dstBuf, dstBuf.length, w, h, subsamp) == 1)
+ if (checkBufYUV(dstBuf, dstBuf.length, scaledWidth, scaledHeight,
+ subsamp, sf) == 1)
System.out.print("Passed.");
else {
System.out.print("FAILED!"); exitStatus = -1;
@@ -749,14 +879,18 @@
String baseName, int subsamp,
int flags) throws Exception {
int i;
- if ((subsamp == TJ.SAMP_444 || subsamp == TJ.SAMP_GRAY) && yuv == 0) {
- TJScalingFactor[] sf = TJ.getScalingFactors();
- for (i = 0; i < sf.length; i++)
+ TJScalingFactor[] sf = TJ.getScalingFactors();
+ for (i = 0; i < sf.length; i++) {
+ int num = sf[i].getNum();
+ int denom = sf[i].getDenom();
+ if (subsamp == TJ.SAMP_444 || subsamp == TJ.SAMP_GRAY ||
+ (subsamp == TJ.SAMP_411 && num == 1 &&
+ (denom == 2 || denom == 1)) ||
+ (subsamp != TJ.SAMP_411 && num == 1 &&
+ (denom == 4 || denom == 2 || denom == 1)))
decompTest(tjd, jpegBuf, jpegSize, w, h, pf, baseName, subsamp,
flags, sf[i]);
- } else
- decompTest(tjd, jpegBuf, jpegSize, w, h, pf, baseName, subsamp,
- flags, new TJScalingFactor(1, 1));
+ }
}
private static void doTest(int w, int h, int[] formats, int subsamp,
@@ -767,7 +901,7 @@
byte[] dstBuf;
if (yuv == YUVENCODE)
- dstBuf = new byte[TJ.bufSizeYUV(w, h, subsamp)];
+ dstBuf = new byte[TJ.bufSizeYUV(w, pad, h, subsamp)];
else
dstBuf = new byte[TJ.bufSize(w, h, subsamp)];
@@ -776,10 +910,11 @@
tjd = new TJDecompressor();
for (int pf : formats) {
+ if (pf < 0) continue;
for (int i = 0; i < 2; i++) {
int flags = 0;
if (subsamp == TJ.SAMP_422 || subsamp == TJ.SAMP_420 ||
- subsamp == TJ.SAMP_440)
+ subsamp == TJ.SAMP_440 || subsamp == TJ.SAMP_411)
flags |= TJ.FLAG_FASTUPSAMPLE;
if (i == 1) {
if (yuv == YUVDECODE) {
@@ -825,7 +960,7 @@
System.out.format("%04d x %04d\b\b\b\b\b\b\b\b\b\b\b", w, h);
srcBuf = new byte[w * h * 4];
if (yuv == YUVENCODE)
- dstBuf = new byte[TJ.bufSizeYUV(w, h, subsamp)];
+ dstBuf = new byte[TJ.bufSizeYUV(w, pad, h, subsamp)];
else
dstBuf = new byte[TJ.bufSize(w, h, subsamp)];
for (i = 0; i < w * h * 4; i++) {
@@ -834,6 +969,7 @@
tjc.setSourceImage(srcBuf, w, 0, h, TJ.PF_BGRX);
tjc.setSubsamp(subsamp);
tjc.setJPEGQuality(100);
+ tjc.setYUVPad(pad);
if (yuv == YUVENCODE)
tjc.encodeYUV(dstBuf, 0);
else
@@ -841,7 +977,7 @@
srcBuf = new byte[h * w * 4];
if (yuv == YUVENCODE)
- dstBuf = new byte[TJ.bufSizeYUV(h, w, subsamp)];
+ dstBuf = new byte[TJ.bufSizeYUV(h, pad, w, subsamp)];
else
dstBuf = new byte[TJ.bufSize(h, w, subsamp)];
for (i = 0; i < h * w * 4; i++) {
@@ -870,6 +1006,8 @@
for (int i = 0; i < argv.length; i++) {
if (argv[i].equalsIgnoreCase("-yuv"))
doyuv = true;
+ if (argv[i].equalsIgnoreCase("-noyuvpad"))
+ pad = 1;
if (argv[i].substring(0, 1).equalsIgnoreCase("-h") ||
argv[i].equalsIgnoreCase("-?"))
usage();
@@ -878,7 +1016,10 @@
testName = "javabitest";
}
}
- if (doyuv) yuv = YUVENCODE;
+ if (doyuv) {
+ yuv = YUVENCODE;
+ _4byteFormats[4] = -1;
+ }
doTest(35, 39, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_444,
testName);
doTest(39, 41, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_444,
@@ -895,10 +1036,15 @@
testName);
doTest(39, 41, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_440,
testName);
- doTest(35, 39, bi ? onlyGrayBI : onlyGray, TJ.SAMP_GRAY, testName);
- doTest(39, 41, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_GRAY,
+ doTest(41, 35, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_411,
testName);
- doTest(41, 35, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_GRAY,
+ doTest(35, 39, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_411,
+ testName);
+ doTest(39, 41, bi ? onlyGrayBI : onlyGray, TJ.SAMP_GRAY, testName);
+ doTest(41, 35, bi ? _3byteFormatsBI : _3byteFormats, TJ.SAMP_GRAY,
+ testName);
+ _4byteFormats[4] = -1;
+ doTest(35, 39, bi ? _4byteFormatsBI : _4byteFormats, TJ.SAMP_GRAY,
testName);
if (!bi)
bufSizeTest();
@@ -913,10 +1059,12 @@
doTest(41, 35, onlyRGB, TJ.SAMP_420, "javatest_yuv1");
doTest(48, 48, onlyRGB, TJ.SAMP_440, "javatest_yuv0");
doTest(35, 39, onlyRGB, TJ.SAMP_440, "javatest_yuv1");
+ doTest(48, 48, onlyRGB, TJ.SAMP_411, "javatest_yuv0");
+ doTest(39, 41, onlyRGB, TJ.SAMP_411, "javatest_yuv1");
doTest(48, 48, onlyRGB, TJ.SAMP_GRAY, "javatest_yuv0");
- doTest(35, 39, onlyRGB, TJ.SAMP_GRAY, "javatest_yuv1");
+ doTest(41, 35, onlyRGB, TJ.SAMP_GRAY, "javatest_yuv1");
doTest(48, 48, onlyGray, TJ.SAMP_GRAY, "javatest_yuv0");
- doTest(39, 41, onlyGray, TJ.SAMP_GRAY, "javatest_yuv1");
+ doTest(35, 39, onlyGray, TJ.SAMP_GRAY, "javatest_yuv1");
}
} catch(Exception e) {
e.printStackTrace();
diff --git a/java/doc/constant-values.html b/java/doc/constant-values.html
index e4adb67..01f950f 100644
--- a/java/doc/constant-values.html
+++ b/java/doc/constant-values.html
@@ -99,6 +99,36 @@
<TH ALIGN="left" COLSPAN="3">org.libjpegturbo.turbojpeg.<A HREF="org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A></TH>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
+<A NAME="org.libjpegturbo.turbojpeg.TJ.CS_CMYK"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
+<CODE>public static final int</CODE></FONT></TD>
+<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#CS_CMYK">CS_CMYK</A></CODE></TD>
+<TD ALIGN="right"><CODE>3</CODE></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<A NAME="org.libjpegturbo.turbojpeg.TJ.CS_GRAY"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
+<CODE>public static final int</CODE></FONT></TD>
+<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#CS_GRAY">CS_GRAY</A></CODE></TD>
+<TD ALIGN="right"><CODE>2</CODE></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<A NAME="org.libjpegturbo.turbojpeg.TJ.CS_RGB"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
+<CODE>public static final int</CODE></FONT></TD>
+<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#CS_RGB">CS_RGB</A></CODE></TD>
+<TD ALIGN="right"><CODE>0</CODE></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<A NAME="org.libjpegturbo.turbojpeg.TJ.CS_YCbCr"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
+<CODE>public static final int</CODE></FONT></TD>
+<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#CS_YCbCr">CS_YCbCr</A></CODE></TD>
+<TD ALIGN="right"><CODE>1</CODE></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<A NAME="org.libjpegturbo.turbojpeg.TJ.CS_YCCK"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
+<CODE>public static final int</CODE></FONT></TD>
+<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#CS_YCCK">CS_YCCK</A></CODE></TD>
+<TD ALIGN="right"><CODE>4</CODE></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
<A NAME="org.libjpegturbo.turbojpeg.TJ.FLAG_ACCURATEDCT"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
<CODE>public static final int</CODE></FONT></TD>
<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#FLAG_ACCURATEDCT">FLAG_ACCURATEDCT</A></CODE></TD>
@@ -147,16 +177,22 @@
<TD ALIGN="right"><CODE>128</CODE></TD>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
+<A NAME="org.libjpegturbo.turbojpeg.TJ.NUMCS"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
+<CODE>public static final int</CODE></FONT></TD>
+<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#NUMCS">NUMCS</A></CODE></TD>
+<TD ALIGN="right"><CODE>5</CODE></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
<A NAME="org.libjpegturbo.turbojpeg.TJ.NUMPF"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
<CODE>public static final int</CODE></FONT></TD>
<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#NUMPF">NUMPF</A></CODE></TD>
-<TD ALIGN="right"><CODE>11</CODE></TD>
+<TD ALIGN="right"><CODE>12</CODE></TD>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
<A NAME="org.libjpegturbo.turbojpeg.TJ.NUMSAMP"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
<CODE>public static final int</CODE></FONT></TD>
<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#NUMSAMP">NUMSAMP</A></CODE></TD>
-<TD ALIGN="right"><CODE>5</CODE></TD>
+<TD ALIGN="right"><CODE>6</CODE></TD>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
<A NAME="org.libjpegturbo.turbojpeg.TJ.PF_ABGR"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
@@ -189,6 +225,12 @@
<TD ALIGN="right"><CODE>3</CODE></TD>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
+<A NAME="org.libjpegturbo.turbojpeg.TJ.PF_CMYK"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
+<CODE>public static final int</CODE></FONT></TD>
+<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#PF_CMYK">PF_CMYK</A></CODE></TD>
+<TD ALIGN="right"><CODE>11</CODE></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
<A NAME="org.libjpegturbo.turbojpeg.TJ.PF_GRAY"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
<CODE>public static final int</CODE></FONT></TD>
<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#PF_GRAY">PF_GRAY</A></CODE></TD>
@@ -225,6 +267,12 @@
<TD ALIGN="right"><CODE>5</CODE></TD>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
+<A NAME="org.libjpegturbo.turbojpeg.TJ.SAMP_411"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
+<CODE>public static final int</CODE></FONT></TD>
+<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#SAMP_411">SAMP_411</A></CODE></TD>
+<TD ALIGN="right"><CODE>5</CODE></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
<A NAME="org.libjpegturbo.turbojpeg.TJ.SAMP_420"><!-- --></A><TD ALIGN="right"><FONT SIZE="-1">
<CODE>public static final int</CODE></FONT></TD>
<TD ALIGN="left"><CODE><A HREF="org/libjpegturbo/turbojpeg/TJ.html#SAMP_420">SAMP_420</A></CODE></TD>
diff --git a/java/doc/deprecated-list.html b/java/doc/deprecated-list.html
index 37ca515..9f3b4fd 100644
--- a/java/doc/deprecated-list.html
+++ b/java/doc/deprecated-list.html
@@ -81,9 +81,39 @@
</CENTER>
<HR SIZE="4" NOSHADE>
<B>Contents</B><UL>
+<LI><A HREF="#field">Deprecated Fields</A>
<LI><A HREF="#method">Deprecated Methods</A>
</UL>
+<A NAME="field"><!-- --></A>
+<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
+<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
+<TH ALIGN="left" COLSPAN="2"><FONT SIZE="+2">
+<B>Deprecated Fields</B></FONT></TH>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD><A HREF="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCEMMX">org.libjpegturbo.turbojpeg.TJ.FLAG_FORCEMMX</A>
+<BR>
+ </TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD><A HREF="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE">org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE</A>
+<BR>
+ </TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD><A HREF="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE2">org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE2</A>
+<BR>
+ </TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD><A HREF="org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE3">org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE3</A>
+<BR>
+ </TD>
+</TR>
+</TABLE>
+
+<P>
<A NAME="method"><!-- --></A>
<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
<TR BGCOLOR="#CCCCFF" CLASS="TableHeadingColor">
@@ -91,12 +121,28 @@
<B>Deprecated Methods</B></FONT></TH>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD><A HREF="org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)">org.libjpegturbo.turbojpeg.TJ.bufSizeYUV(int, int, int)</A>
+<BR>
+ <I>Use <A HREF="org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><CODE>TJ.bufSizeYUV(int, int, int, int)</CODE></A> instead.</I> </TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
<TD><A HREF="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)">org.libjpegturbo.turbojpeg.TJDecompressor.decompress(byte[], int, int, int, int, int)</A>
<BR>
<I>Use
<A HREF="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)"><CODE>TJDecompressor.decompress(byte[], int, int, int, int, int, int, int)</CODE></A> instead.</I> </TD>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD><A HREF="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)">org.libjpegturbo.turbojpeg.TJDecompressor.decompressToYUV(byte[], int)</A>
+<BR>
+ <I>Use <A HREF="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int, int, int, int)"><CODE>TJDecompressor.decompressToYUV(byte[], int, int, int, int)</CODE></A>
+ instead.</I> </TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD><A HREF="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int)">org.libjpegturbo.turbojpeg.TJDecompressor.decompressToYUV(int)</A>
+<BR>
+ <I>Use <A HREF="org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)"><CODE>TJDecompressor.decompressToYUV(int, int, int, int)</CODE></A> instead.</I> </TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
<TD><A HREF="org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int)">org.libjpegturbo.turbojpeg.TJCompressor.setSourceImage(byte[], int, int, int, int)</A>
<BR>
<I>Use
diff --git a/java/doc/index-all.html b/java/doc/index-all.html
index a534d43..81d9b45 100644
--- a/java/doc/index-all.html
+++ b/java/doc/index-all.html
@@ -82,10 +82,13 @@
Static method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
<DD>Returns the maximum size of the buffer (in bytes) required to hold a JPEG
image with the given width, height, and level of chrominance subsampling.
-<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)"><B>bufSizeYUV(int, int, int)</B></A> -
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><B>bufSizeYUV(int, int, int, int)</B></A> -
Static method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
<DD>Returns the size of the buffer (in bytes) required to hold a YUV planar
image with the given width, height, and level of chrominance subsampling.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)"><B>bufSizeYUV(int, int, int)</B></A> -
+Static method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
+<DD><B>Deprecated.</B> <I>Use <A HREF="./org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><CODE>TJ.bufSizeYUV(int, int, int, int)</CODE></A> instead.</I>
</DL>
<HR>
<A NAME="_C_"><!-- --></A><H2>
@@ -116,11 +119,26 @@
Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
<DD>Compress the uncompressed source image stored in <code>srcImage</code>
and return a buffer containing a JPEG image.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#CS_CMYK"><B>CS_CMYK</B></A> -
+Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
+<DD>CMYK colorspace.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#CS_GRAY"><B>CS_GRAY</B></A> -
+Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
+<DD>Grayscale colorspace.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#CS_RGB"><B>CS_RGB</B></A> -
+Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
+<DD>RGB colorspace.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#CS_YCbCr"><B>CS_YCbCr</B></A> -
+Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
+<DD>YCbCr colorspace.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#CS_YCCK"><B>CS_YCCK</B></A> -
+Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
+<DD>YCCK colorspace.
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJCustomFilter.html#customFilter(java.nio.ShortBuffer, java.awt.Rectangle, java.awt.Rectangle, int, int, org.libjpegturbo.turbojpeg.TJTransform)"><B>customFilter(ShortBuffer, Rectangle, Rectangle, int, int, TJTransform)</B></A> -
Method in interface org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCustomFilter.html" title="interface in org.libjpegturbo.turbojpeg">TJCustomFilter</A>
<DD>A callback function that can be used to modify the DCT coefficients after
they are losslessly transformed but before they are transcoded to a new
- JPEG file.
+ JPEG image.
</DL>
<HR>
<A NAME="_D_"><!-- --></A><H2>
@@ -152,14 +170,21 @@
<DD>Decompress the JPEG source image associated with this decompressor
instance and return a <code>BufferedImage</code> instance containing the
decompressed image.
-<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)"><B>decompressToYUV(byte[], int)</B></A> -
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int, int, int, int)"><B>decompressToYUV(byte[], int, int, int, int)</B></A> -
Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
<DD>Decompress the JPEG source image associated with this decompressor
instance and output a YUV planar image to the given destination buffer.
-<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int)"><B>decompressToYUV(int)</B></A> -
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)"><B>decompressToYUV(byte[], int)</B></A> -
+Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
+<DD><B>Deprecated.</B> <I>Use <A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int, int, int, int)"><CODE>TJDecompressor.decompressToYUV(byte[], int, int, int, int)</CODE></A>
+ instead.</I>
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)"><B>decompressToYUV(int, int, int, int)</B></A> -
Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
<DD>Decompress the JPEG source image associated with this decompressor
instance and return a buffer containing a YUV planar image.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int)"><B>decompressToYUV(int)</B></A> -
+Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
+<DD><B>Deprecated.</B> <I>Use <A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)"><CODE>TJDecompressor.decompressToYUV(int, int, int, int)</CODE></A> instead.</I>
</DL>
<HR>
<A NAME="_E_"><!-- --></A><H2>
@@ -214,20 +239,16 @@
the underlying codec.
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCEMMX"><B>FLAG_FORCEMMX</B></A> -
Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
-<DD>Turn off CPU auto-detection and force TurboJPEG to use MMX code
- (if the underlying codec supports it.)
+<DD><B>Deprecated.</B>
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE"><B>FLAG_FORCESSE</B></A> -
Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
-<DD>Turn off CPU auto-detection and force TurboJPEG to use SSE code
- (if the underlying codec supports it.)
+<DD><B>Deprecated.</B>
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE2"><B>FLAG_FORCESSE2</B></A> -
Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
-<DD>Turn off CPU auto-detection and force TurboJPEG to use SSE2 code
- (if the underlying codec supports it.)
+<DD><B>Deprecated.</B>
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE3"><B>FLAG_FORCESSE3</B></A> -
Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
-<DD>Turn off CPU auto-detection and force TurboJPEG to use SSE3 code
- (if the underlying codec supports it.)
+<DD><B>Deprecated.</B>
</DL>
<HR>
<A NAME="_G_"><!-- --></A><H2>
@@ -237,6 +258,10 @@
Static method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
<DD>For the given pixel format, returns the number of bytes that the blue
component is offset from the start of the pixel.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#getColorspace()"><B>getColorspace()</B></A> -
+Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
+<DD>Returns the colorspace used in the JPEG image associated with this
+ decompressor instance.
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#getCompressedSize()"><B>getCompressedSize()</B></A> -
Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
<DD>Returns the size of the image (in bytes) generated by the most recent
@@ -334,6 +359,9 @@
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegBufSize"><B>jpegBufSize</B></A> -
Variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
<DD>
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegColorspace"><B>jpegColorspace</B></A> -
+Variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
+<DD>
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegHeight"><B>jpegHeight</B></A> -
Variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A>
<DD>
@@ -348,6 +376,9 @@
<A NAME="_N_"><!-- --></A><H2>
<B>N</B></H2>
<DL>
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#NUMCS"><B>NUMCS</B></A> -
+Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
+<DD>The number of JPEG colorspaces
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJTransform.html#NUMOP"><B>NUMOP</B></A> -
Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJTransform.html" title="class in org.libjpegturbo.turbojpeg">TJTransform</A>
<DD>The number of lossless transform operations
@@ -432,6 +463,9 @@
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#PF_BGRX"><B>PF_BGRX</B></A> -
Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
<DD>BGRX pixel format.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#PF_CMYK"><B>PF_CMYK</B></A> -
+Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
+<DD>CMYK pixel format.
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#PF_GRAY"><B>PF_GRAY</B></A> -
Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
<DD>Grayscale pixel format.
@@ -455,6 +489,9 @@
<A NAME="_S_"><!-- --></A><H2>
<B>S</B></H2>
<DL>
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#SAMP_411"><B>SAMP_411</B></A> -
+Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
+<DD>4:1:1 chrominance subsampling.
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJ.html#SAMP_420"><B>SAMP_420</B></A> -
Static variable in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg">TJ</A>
<DD>4:2:0 chrominance subsampling.
@@ -484,10 +521,17 @@
Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
<DD><B>Deprecated.</B> <I>Use
<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImage(byte[], int, int, int, int, int, int)"><CODE>TJCompressor.setSourceImage(byte[], int, int, int, int, int, int)</CODE></A> instead.</I>
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImageYUV(byte[], int, int, int)"><B>setSourceImageYUV(byte[], int, int, int)</B></A> -
+Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
+<DD>Associate an uncompressed YUV planar source image with this compressor
+ instance.
<DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#setSubsamp(int)"><B>setSubsamp(int)</B></A> -
Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
<DD>Set the level of chrominance subsampling for subsequent compress/encode
operations.
+<DT><A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html#setYUVPad(int)"><B>setYUVPad(int)</B></A> -
+Method in class org.libjpegturbo.turbojpeg.<A HREF="./org/libjpegturbo/turbojpeg/TJCompressor.html" title="class in org.libjpegturbo.turbojpeg">TJCompressor</A>
+<DD>Set the plane padding for subsequent YUV encode operations.
</DL>
<HR>
<A NAME="_T_"><!-- --></A><H2>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJ.html b/java/doc/org/libjpegturbo/turbojpeg/TJ.html
index f905406..bad022e 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/TJ.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJ.html
@@ -115,6 +115,46 @@
<TR BGCOLOR="white" CLASS="TableRowColor">
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
<CODE>static int</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_CMYK">CS_CMYK</A></B></CODE>
+
+<BR>
+ CMYK colorspace.</TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>static int</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_GRAY">CS_GRAY</A></B></CODE>
+
+<BR>
+ Grayscale colorspace.</TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>static int</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_RGB">CS_RGB</A></B></CODE>
+
+<BR>
+ RGB colorspace.</TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>static int</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_YCbCr">CS_YCbCr</A></B></CODE>
+
+<BR>
+ YCbCr colorspace.</TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>static int</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_YCCK">CS_YCCK</A></B></CODE>
+
+<BR>
+ YCCK colorspace.</TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>static int</CODE></FONT></TD>
<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_ACCURATEDCT">FLAG_ACCURATEDCT</A></B></CODE>
<BR>
@@ -154,8 +194,7 @@
<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCEMMX">FLAG_FORCEMMX</A></B></CODE>
<BR>
- Turn off CPU auto-detection and force TurboJPEG to use MMX code
- (if the underlying codec supports it.)</TD>
+ <B>Deprecated.</B> </TD>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
@@ -163,8 +202,7 @@
<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE">FLAG_FORCESSE</A></B></CODE>
<BR>
- Turn off CPU auto-detection and force TurboJPEG to use SSE code
- (if the underlying codec supports it.)</TD>
+ <B>Deprecated.</B> </TD>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
@@ -172,8 +210,7 @@
<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE2">FLAG_FORCESSE2</A></B></CODE>
<BR>
- Turn off CPU auto-detection and force TurboJPEG to use SSE2 code
- (if the underlying codec supports it.)</TD>
+ <B>Deprecated.</B> </TD>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
@@ -181,8 +218,15 @@
<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#FLAG_FORCESSE3">FLAG_FORCESSE3</A></B></CODE>
<BR>
- Turn off CPU auto-detection and force TurboJPEG to use SSE3 code
- (if the underlying codec supports it.)</TD>
+ <B>Deprecated.</B> </TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>static int</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#NUMCS">NUMCS</A></B></CODE>
+
+<BR>
+ The number of JPEG colorspaces</TD>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
@@ -243,6 +287,14 @@
<TR BGCOLOR="white" CLASS="TableRowColor">
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
<CODE>static int</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_CMYK">PF_CMYK</A></B></CODE>
+
+<BR>
+ CMYK pixel format.</TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>static int</CODE></FONT></TD>
<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#PF_GRAY">PF_GRAY</A></B></CODE>
<BR>
@@ -291,6 +343,14 @@
<TR BGCOLOR="white" CLASS="TableRowColor">
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
<CODE>static int</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_411">SAMP_411</A></B></CODE>
+
+<BR>
+ 4:1:1 chrominance subsampling.</TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>static int</CODE></FONT></TD>
<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_420">SAMP_420</A></B></CODE>
<BR>
@@ -373,6 +433,17 @@
int subsamp)</CODE>
<BR>
+ <B>Deprecated.</B> <I>Use <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><CODE>bufSizeYUV(int, int, int, int)</CODE></A> instead.</I></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>static int</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)">bufSizeYUV</A></B>(int width,
+ int pad,
+ int height,
+ int subsamp)</CODE>
+
+<BR>
Returns the size of the buffer (in bytes) required to hold a YUV planar
image with the given width, height, and level of chrominance subsampling.</TD>
</TR>
@@ -539,6 +610,25 @@
</DL>
<HR>
+<A NAME="SAMP_411"><!-- --></A><H3>
+SAMP_411</H3>
+<PRE>
+public static final int <B>SAMP_411</B></PRE>
+<DL>
+<DD>4:1:1 chrominance subsampling. The JPEG or YUV image will contain one
+ chrominance component for every 4x1 block of pixels in the source image.
+ JPEG images compressed with 4:1:1 subsampling will be almost exactly the
+ same size as those compressed with 4:2:0 subsampling, and in the
+ aggregate, both subsampling methods produce approximately the same
+ perceptual quality. However, 4:1:1 is better able to reproduce sharp
+ horizontal features. Note that 4:1:1 subsampling is not fully accelerated
+ in libjpeg-turbo.
+<P>
+<DL>
+<DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.SAMP_411">Constant Field Values</A></DL>
+</DL>
+<HR>
+
<A NAME="NUMPF"><!-- --></A><H3>
NUMPF</H3>
<PRE>
@@ -708,6 +798,131 @@
</DL>
<HR>
+<A NAME="PF_CMYK"><!-- --></A><H3>
+PF_CMYK</H3>
+<PRE>
+public static final int <B>PF_CMYK</B></PRE>
+<DL>
+<DD>CMYK pixel format. Unlike RGB, which is an additive color model used
+ primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive
+ color model used primarily for printing. In the CMYK color model, the
+ value of each color component typically corresponds to an amount of cyan,
+ magenta, yellow, or black ink that is applied to a white background. In
+ order to convert between CMYK and RGB, it is necessary to use a color
+ management system (CMS.) A CMS will attempt to map colors within the
+ printer's gamut to perceptually similar colors in the display's gamut and
+ vice versa, but the mapping is typically not 1:1 or reversible, nor can it
+ be defined with a simple formula. Thus, such a conversion is out of scope
+ for a codec library. However, the TurboJPEG API allows for compressing
+ CMYK pixels into a YCCK JPEG image (see <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_YCCK"><CODE>CS_YCCK</CODE></A>) and
+ decompressing YCCK JPEG images into CMYK pixels.
+<P>
+<DL>
+<DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.PF_CMYK">Constant Field Values</A></DL>
+</DL>
+<HR>
+
+<A NAME="NUMCS"><!-- --></A><H3>
+NUMCS</H3>
+<PRE>
+public static final int <B>NUMCS</B></PRE>
+<DL>
+<DD>The number of JPEG colorspaces
+<P>
+<DL>
+<DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.NUMCS">Constant Field Values</A></DL>
+</DL>
+<HR>
+
+<A NAME="CS_RGB"><!-- --></A><H3>
+CS_RGB</H3>
+<PRE>
+public static final int <B>CS_RGB</B></PRE>
+<DL>
+<DD>RGB colorspace. When compressing the JPEG image, the R, G, and B
+ components in the source image are reordered into image planes, but no
+ colorspace conversion or subsampling is performed. RGB JPEG images can be
+ decompressed to any of the extended RGB pixel formats or grayscale, but
+ they cannot be decompressed to YUV images.
+<P>
+<DL>
+<DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.CS_RGB">Constant Field Values</A></DL>
+</DL>
+<HR>
+
+<A NAME="CS_YCbCr"><!-- --></A><H3>
+CS_YCbCr</H3>
+<PRE>
+public static final int <B>CS_YCbCr</B></PRE>
+<DL>
+<DD>YCbCr colorspace. YCbCr is not an absolute colorspace but rather a
+ mathematical transformation of RGB designed solely for storage and
+ transmission. YCbCr images must be converted to RGB before they can
+ actually be displayed. In the YCbCr colorspace, the Y (luminance)
+ component represents the black & white portion of the original image, and
+ the Cb and Cr (chrominance) components represent the color portion of the
+ original image. Originally, the analog equivalent of this transformation
+ allowed the same signal to drive both black & white and color televisions,
+ but JPEG images use YCbCr primarily because it allows the color data to be
+ optionally subsampled for the purposes of reducing bandwidth or disk
+ space. YCbCr is the most common JPEG colorspace, and YCbCr JPEG images
+ can be compressed from and decompressed to any of the extended RGB pixel
+ formats or grayscale, or they can be decompressed to YUV planar images.
+<P>
+<DL>
+<DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.CS_YCbCr">Constant Field Values</A></DL>
+</DL>
+<HR>
+
+<A NAME="CS_GRAY"><!-- --></A><H3>
+CS_GRAY</H3>
+<PRE>
+public static final int <B>CS_GRAY</B></PRE>
+<DL>
+<DD>Grayscale colorspace. The JPEG image retains only the luminance data (Y
+ component), and any color data from the source image is discarded.
+ Grayscale JPEG images can be compressed from and decompressed to any of
+ the extended RGB pixel formats or grayscale, or they can be decompressed
+ to YUV planar images.
+<P>
+<DL>
+<DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.CS_GRAY">Constant Field Values</A></DL>
+</DL>
+<HR>
+
+<A NAME="CS_CMYK"><!-- --></A><H3>
+CS_CMYK</H3>
+<PRE>
+public static final int <B>CS_CMYK</B></PRE>
+<DL>
+<DD>CMYK colorspace. When compressing the JPEG image, the C, M, Y, and K
+ components in the source image are reordered into image planes, but no
+ colorspace conversion or subsampling is performed. CMYK JPEG images can
+ only be decompressed to CMYK pixels.
+<P>
+<DL>
+<DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.CS_CMYK">Constant Field Values</A></DL>
+</DL>
+<HR>
+
+<A NAME="CS_YCCK"><!-- --></A><H3>
+CS_YCCK</H3>
+<PRE>
+public static final int <B>CS_YCCK</B></PRE>
+<DL>
+<DD>YCCK colorspace. YCCK (AKA "YCbCrK") is not an absolute colorspace but
+ rather a mathematical transformation of CMYK designed solely for storage
+ and transmission. It is to CMYK as YCbCr is to RGB. CMYK pixels can be
+ reversibly transformed into YCCK, and as with YCbCr, the chrominance
+ components in the YCCK pixels can be subsampled without incurring major
+ perceptual loss. YCCK JPEG images can only be compressed from and
+ decompressed to CMYK pixels.
+<P>
+<DL>
+<DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.CS_YCCK">Constant Field Values</A></DL>
+</DL>
+<HR>
+
<A NAME="FLAG_BOTTOMUP"><!-- --></A><H3>
FLAG_BOTTOMUP</H3>
<PRE>
@@ -724,12 +939,10 @@
<A NAME="FLAG_FORCEMMX"><!-- --></A><H3>
FLAG_FORCEMMX</H3>
<PRE>
-public static final int <B>FLAG_FORCEMMX</B></PRE>
+<FONT SIZE="-1">@Deprecated
+</FONT>public static final int <B>FLAG_FORCEMMX</B></PRE>
<DL>
-<DD>Turn off CPU auto-detection and force TurboJPEG to use MMX code
- (if the underlying codec supports it.)
-<P>
-<DL>
+<DD><B>Deprecated.</B> <DL>
<DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_FORCEMMX">Constant Field Values</A></DL>
</DL>
<HR>
@@ -737,12 +950,10 @@
<A NAME="FLAG_FORCESSE"><!-- --></A><H3>
FLAG_FORCESSE</H3>
<PRE>
-public static final int <B>FLAG_FORCESSE</B></PRE>
+<FONT SIZE="-1">@Deprecated
+</FONT>public static final int <B>FLAG_FORCESSE</B></PRE>
<DL>
-<DD>Turn off CPU auto-detection and force TurboJPEG to use SSE code
- (if the underlying codec supports it.)
-<P>
-<DL>
+<DD><B>Deprecated.</B> <DL>
<DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE">Constant Field Values</A></DL>
</DL>
<HR>
@@ -750,12 +961,10 @@
<A NAME="FLAG_FORCESSE2"><!-- --></A><H3>
FLAG_FORCESSE2</H3>
<PRE>
-public static final int <B>FLAG_FORCESSE2</B></PRE>
+<FONT SIZE="-1">@Deprecated
+</FONT>public static final int <B>FLAG_FORCESSE2</B></PRE>
<DL>
-<DD>Turn off CPU auto-detection and force TurboJPEG to use SSE2 code
- (if the underlying codec supports it.)
-<P>
-<DL>
+<DD><B>Deprecated.</B> <DL>
<DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE2">Constant Field Values</A></DL>
</DL>
<HR>
@@ -763,12 +972,10 @@
<A NAME="FLAG_FORCESSE3"><!-- --></A><H3>
FLAG_FORCESSE3</H3>
<PRE>
-public static final int <B>FLAG_FORCESSE3</B></PRE>
+<FONT SIZE="-1">@Deprecated
+</FONT>public static final int <B>FLAG_FORCESSE3</B></PRE>
<DL>
-<DD>Turn off CPU auto-detection and force TurboJPEG to use SSE3 code
- (if the underlying codec supports it.)
-<P>
-<DL>
+<DD><B>Deprecated.</B> <DL>
<DT><B>See Also:</B><DD><A HREF="../../../constant-values.html#org.libjpegturbo.turbojpeg.TJ.FLAG_FORCESSE3">Constant Field Values</A></DL>
</DL>
<HR>
@@ -991,10 +1198,11 @@
</DL>
<HR>
-<A NAME="bufSizeYUV(int, int, int)"><!-- --></A><H3>
+<A NAME="bufSizeYUV(int, int, int, int)"><!-- --></A><H3>
bufSizeYUV</H3>
<PRE>
public static int <B>bufSizeYUV</B>(int width,
+ int pad,
int height,
int subsamp)
throws java.lang.Exception</PRE>
@@ -1003,7 +1211,9 @@
image with the given width, height, and level of chrominance subsampling.
<P>
<DD><DL>
-<DT><B>Parameters:</B><DD><CODE>width</CODE> - the width (in pixels) of the YUV image<DD><CODE>height</CODE> - the height (in pixels) of the YUV image<DD><CODE>subsamp</CODE> - the level of chrominance subsampling used in the YUV
+<DT><B>Parameters:</B><DD><CODE>width</CODE> - the width (in pixels) of the YUV image<DD><CODE>pad</CODE> - the width of each line in each plane of the image is padded to
+ the nearest multiple of this number of bytes (must be a power of
+ 2.)<DD><CODE>height</CODE> - the height (in pixels) of the YUV image<DD><CODE>subsamp</CODE> - the level of chrominance subsampling used in the YUV
image (one of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.SAMP_*</CODE></A>)
<DT><B>Returns:</B><DD>the size of the buffer (in bytes) required to hold a YUV planar
image with the given width, height, and level of chrominance subsampling
@@ -1013,6 +1223,25 @@
</DL>
<HR>
+<A NAME="bufSizeYUV(int, int, int)"><!-- --></A><H3>
+bufSizeYUV</H3>
+<PRE>
+<FONT SIZE="-1">@Deprecated
+</FONT>public static int <B>bufSizeYUV</B>(int width,
+ int height,
+ int subsamp)
+ throws java.lang.Exception</PRE>
+<DL>
+<DD><B>Deprecated.</B> <I>Use <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><CODE>bufSizeYUV(int, int, int, int)</CODE></A> instead.</I>
+<P>
+<DD><DL>
+
+<DT><B>Throws:</B>
+<DD><CODE>java.lang.Exception</CODE></DL>
+</DD>
+</DL>
+<HR>
+
<A NAME="getScalingFactors()"><!-- --></A><H3>
getScalingFactors</H3>
<PRE>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html b/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html
index 7fa3d0f..35114c7 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html
@@ -298,12 +298,32 @@
<TR BGCOLOR="white" CLASS="TableRowColor">
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
<CODE> void</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSourceImageYUV(byte[], int, int, int)">setSourceImageYUV</A></B>(byte[] srcImage,
+ int width,
+ int pad,
+ int height)</CODE>
+
+<BR>
+ Associate an uncompressed YUV planar source image with this compressor
+ instance.</TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE> void</CODE></FONT></TD>
<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSubsamp(int)">setSubsamp</A></B>(int newSubsamp)</CODE>
<BR>
Set the level of chrominance subsampling for subsequent compress/encode
operations.</TD>
</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE> void</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setYUVPad(int)">setYUVPad</A></B>(int pad)</CODE>
+
+<BR>
+ Set the plane padding for subsequent YUV encode operations.</TD>
+</TR>
</TABLE>
<A NAME="methods_inherited_from_class_java.lang.Object"><!-- --></A>
<TABLE BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
@@ -412,8 +432,8 @@
<DD>Associate an uncompressed source image with this compressor instance.
<P>
<DD><DL>
-<DT><B>Parameters:</B><DD><CODE>srcImage</CODE> - image buffer containing RGB or grayscale pixels to be
- compressed<DD><CODE>x</CODE> - x offset (in pixels) of the region from which the JPEG image
+<DT><B>Parameters:</B><DD><CODE>srcImage</CODE> - image buffer containing RGB, grayscale, or CMYK pixels to
+ be compressed<DD><CODE>x</CODE> - x offset (in pixels) of the region from which the JPEG image
should be compressed, relative to the start of <code>srcImage</code>.<DD><CODE>y</CODE> - y offset (in pixels) of the region from which the JPEG image
should be compressed, relative to the start of <code>srcImage</code>.<DD><CODE>width</CODE> - width (in pixels) of the region in the source image from
which the JPEG image should be compressed.<DD><CODE>pitch</CODE> - bytes per line of the source image. Normally, this should be
@@ -435,11 +455,12 @@
<A NAME="setSourceImage(byte[], int, int, int, int)"><!-- --></A><H3>
setSourceImage</H3>
<PRE>
-public void <B>setSourceImage</B>(byte[] srcImage,
- int width,
- int pitch,
- int height,
- int pixelFormat)
+<FONT SIZE="-1">@Deprecated
+</FONT>public void <B>setSourceImage</B>(byte[] srcImage,
+ int width,
+ int pitch,
+ int height,
+ int pixelFormat)
throws java.lang.Exception</PRE>
<DL>
<DD><B>Deprecated.</B> <I>Use
@@ -453,6 +474,38 @@
</DL>
<HR>
+<A NAME="setSourceImageYUV(byte[], int, int, int)"><!-- --></A><H3>
+setSourceImageYUV</H3>
+<PRE>
+public void <B>setSourceImageYUV</B>(byte[] srcImage,
+ int width,
+ int pad,
+ int height)
+ throws java.lang.Exception</PRE>
+<DL>
+<DD>Associate an uncompressed YUV planar source image with this compressor
+ instance.
+<P>
+<DD><DL>
+<DT><B>Parameters:</B><DD><CODE>srcImage</CODE> - image buffer containing a YUV planar image to be
+ compressed. The Y, U (Cb), and V (Cr) image planes should be stored
+ sequentially in the buffer, and the size of each plane is determined by
+ the specified width, height, and padding, as well as the level of
+ chrominance subsampling (specified using <A HREF="../../../org/libjpegturbo/turbojpeg/TJCompressor.html#setSubsamp(int)"><CODE>setSubsamp(int)</CODE></A>.) If the
+ chrominance components are subsampled along the horizontal dimension, then
+ the width of the luminance plane should be padded to the nearest multiple
+ of 2 (same goes for the height of the luminance plane, if the chrominance
+ components are subsampled along the vertical dimension.) This is
+ irrespective of any additional padding specified in the <code>pad</code>
+ parameter.<DD><CODE>width</CODE> - width (in pixels) of the source image<DD><CODE>pad</CODE> - the line padding used in the source image. For instance, if
+ each line in each plane of the YUV image is padded to the nearest multiple
+ of 4 bytes, then <code>pad</code> should be set to 4.<DD><CODE>height</CODE> - height (in pixels) of the source image
+<DT><B>Throws:</B>
+<DD><CODE>java.lang.Exception</CODE></DL>
+</DD>
+</DL>
+<HR>
+
<A NAME="setSubsamp(int)"><!-- --></A><H3>
setSubsamp</H3>
<PRE>
@@ -460,7 +513,17 @@
throws java.lang.Exception</PRE>
<DL>
<DD>Set the level of chrominance subsampling for subsequent compress/encode
- operations.
+ operations. When pixels are converted from RGB to YCbCr (see
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_YCbCr"><CODE>TJ.CS_YCbCr</CODE></A>) or from CMYK to YCCK (see <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#CS_YCCK"><CODE>TJ.CS_YCCK</CODE></A>) as part
+ of the JPEG compression process, some of the Cb and Cr (chrominance)
+ components can be discarded or averaged together to produce a smaller
+ image with little perceptible loss of image clarity (the human eye is more
+ sensitive to small changes in brightness than to small changes in color.)
+ This is called "chrominance subsampling".
+ <p>
+ NOTE: When compressing a YUV planar image into a JPEG image, this method
+ also specifies the level of chrominance subsampling used in the source
+ image.
<P>
<DD><DL>
<DT><B>Parameters:</B><DD><CODE>newSubsamp</CODE> - the new level of chrominance subsampling (one of
@@ -570,6 +633,25 @@
</DL>
<HR>
+<A NAME="setYUVPad(int)"><!-- --></A><H3>
+setYUVPad</H3>
+<PRE>
+public void <B>setYUVPad</B>(int pad)
+ throws java.lang.Exception</PRE>
+<DL>
+<DD>Set the plane padding for subsequent YUV encode operations.
+<P>
+<DD><DL>
+<DT><B>Parameters:</B><DD><CODE>pad</CODE> - the width of each line in each plane of the YUV image will be
+ padded to the nearest multiple of this number of bytes (must be a
+ power of 2.) The default padding is 4 bytes, which generates
+ images suitable for direct video display.
+<DT><B>Throws:</B>
+<DD><CODE>java.lang.Exception</CODE></DL>
+</DD>
+</DL>
+<HR>
+
<A NAME="encodeYUV(byte[], int)"><!-- --></A><H3>
encodeYUV</H3>
<PRE>
@@ -579,17 +661,16 @@
<DL>
<DD>Encode the uncompressed source image associated with this compressor
instance and output a YUV planar image to the given destination buffer.
- This method uses the accelerated color conversion routines in
- TurboJPEG's underlying codec to produce a planar YUV image that is
- suitable for direct video display. Specifically, if the chrominance
- components are subsampled along the horizontal dimension, then the width
- of the luminance plane is padded to the nearest multiple of 2 in the
- output image (same goes for the height of the luminance plane, if the
+ This method uses the accelerated color conversion routines in TurboJPEG's
+ underlying codec but does not execute any of the other steps in the JPEG
+ compression process. The Y, U (Cb), and V (Cr) image planes are stored
+ sequentially into the destination buffer, and the size of each plane is
+ determined by the width and height of the source image, as well as the
+ specified padding and level of chrominance subsampling. If the
+ chrominance components are subsampled along the horizontal dimension, then
+ the width of the luminance plane is padded to the nearest multiple of 2 in
+ the output image (same goes for the height of the luminance plane, if the
chrominance components are subsampled along the vertical dimension.)
- Also, each line of each plane in the output image is padded to 4 bytes.
- Although this will work with any subsampling option, it is really only
- useful in combination with <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#SAMP_420"><CODE>TJ.SAMP_420</CODE></A>, which produces an image
- compatible with the I420 (AKA "YUV420P") format.
<p>
NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
convention of the digital video community, the TurboJPEG API uses "YUV" to
@@ -597,7 +678,7 @@
<P>
<DD><DL>
<DT><B>Parameters:</B><DD><CODE>dstBuf</CODE> - buffer that will receive the YUV planar image. Use
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)"><CODE>TJ.bufSizeYUV(int, int, int)</CODE></A> to determine the appropriate size for this buffer
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><CODE>TJ.bufSizeYUV(int, int, int, int)</CODE></A> to determine the appropriate size for this buffer
based on the image width, height, and level of chrominance subsampling.<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
<DT><B>Throws:</B>
<DD><CODE>java.lang.Exception</CODE></DL>
@@ -639,7 +720,7 @@
<DD><DL>
<DT><B>Parameters:</B><DD><CODE>srcImage</CODE> - a <code>BufferedImage</code> instance containing RGB or
grayscale pixels to be encoded<DD><CODE>dstBuf</CODE> - buffer that will receive the YUV planar image. Use
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)"><CODE>TJ.bufSizeYUV(int, int, int)</CODE></A> to determine the appropriate size for this buffer
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><CODE>TJ.bufSizeYUV(int, int, int, int)</CODE></A> to determine the appropriate size for this buffer
based on the image width, height, and level of chrominance subsampling.<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
<DT><B>Throws:</B>
<DD><CODE>java.lang.Exception</CODE></DL>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html b/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html
index 707b58d..3291c71 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html
@@ -122,7 +122,7 @@
<BR>
A callback function that can be used to modify the DCT coefficients after
they are losslessly transformed but before they are transcoded to a new
- JPEG file.</TD>
+ JPEG image.</TD>
</TR>
</TABLE>
@@ -151,7 +151,7 @@
<DL>
<DD>A callback function that can be used to modify the DCT coefficients after
they are losslessly transformed but before they are transcoded to a new
- JPEG file. This allows for custom filters or other transformations to be
+ JPEG image. This allows for custom filters or other transformations to be
applied in the frequency domain.
<P>
<DD><DL>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html b/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html
index a437c16..2dc3cc6 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html
@@ -142,6 +142,14 @@
<TR BGCOLOR="white" CLASS="TableRowColor">
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
<CODE>protected int</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegColorspace">jpegColorspace</A></B></CODE>
+
+<BR>
+ </TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE>protected int</CODE></FONT></TD>
<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegHeight">jpegHeight</A></B></CODE>
<BR>
@@ -303,6 +311,19 @@
int flags)</CODE>
<BR>
+ <B>Deprecated.</B> <I>Use <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int, int, int, int)"><CODE>decompressToYUV(byte[], int, int, int, int)</CODE></A>
+ instead.</I></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE> void</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int, int, int, int)">decompressToYUV</A></B>(byte[] dstBuf,
+ int desiredWidth,
+ int pad,
+ int desiredHeight,
+ int flags)</CODE>
+
+<BR>
Decompress the JPEG source image associated with this decompressor
instance and output a YUV planar image to the given destination buffer.</TD>
</TR>
@@ -312,6 +333,17 @@
<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int)">decompressToYUV</A></B>(int flags)</CODE>
<BR>
+ <B>Deprecated.</B> <I>Use <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)"><CODE>decompressToYUV(int, int, int, int)</CODE></A> instead.</I></TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE> byte[]</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)">decompressToYUV</A></B>(int desiredWidth,
+ int pad,
+ int desiredHeight,
+ int flags)</CODE>
+
+<BR>
Decompress the JPEG source image associated with this decompressor
instance and return a buffer containing a YUV planar image.</TD>
</TR>
@@ -326,6 +358,15 @@
<TR BGCOLOR="white" CLASS="TableRowColor">
<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
<CODE> int</CODE></FONT></TD>
+<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getColorspace()">getColorspace</A></B>()</CODE>
+
+<BR>
+ Returns the colorspace used in the JPEG image associated with this
+ decompressor instance.</TD>
+</TR>
+<TR BGCOLOR="white" CLASS="TableRowColor">
+<TD ALIGN="right" VALIGN="top" WIDTH="1%"><FONT SIZE="-1">
+<CODE> int</CODE></FONT></TD>
<TD><CODE><B><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getHeight()">getHeight</A></B>()</CODE>
<BR>
@@ -480,6 +521,16 @@
<DL>
</DL>
</DL>
+<HR>
+
+<A NAME="jpegColorspace"><!-- --></A><H3>
+jpegColorspace</H3>
+<PRE>
+protected int <B>jpegColorspace</B></PRE>
+<DL>
+<DL>
+</DL>
+</DL>
<!-- ========= CONSTRUCTOR DETAIL ======== -->
@@ -614,7 +665,7 @@
throws java.lang.Exception</PRE>
<DL>
<DD>Returns the level of chrominance subsampling used in the JPEG image
- associated with this decompressor instance.
+ associated with this decompressor instance. See <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.SAMP_*</CODE></A>.
<P>
<DD><DL>
@@ -626,6 +677,25 @@
</DL>
<HR>
+<A NAME="getColorspace()"><!-- --></A><H3>
+getColorspace</H3>
+<PRE>
+public int <B>getColorspace</B>()
+ throws java.lang.Exception</PRE>
+<DL>
+<DD>Returns the colorspace used in the JPEG image associated with this
+ decompressor instance. See <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.CS_*</CODE></A>.
+<P>
+<DD><DL>
+
+<DT><B>Returns:</B><DD>the colorspace used in the JPEG image associated with this
+ decompressor instance
+<DT><B>Throws:</B>
+<DD><CODE>java.lang.Exception</CODE></DL>
+</DD>
+</DL>
+<HR>
+
<A NAME="getJPEGBuf()"><!-- --></A><H3>
getJPEGBuf</H3>
<PRE>
@@ -777,12 +847,13 @@
<A NAME="decompress(byte[], int, int, int, int, int)"><!-- --></A><H3>
decompress</H3>
<PRE>
-public void <B>decompress</B>(byte[] dstBuf,
- int desiredWidth,
- int pitch,
- int desiredHeight,
- int pixelFormat,
- int flags)
+<FONT SIZE="-1">@Deprecated
+</FONT>public void <B>decompress</B>(byte[] dstBuf,
+ int desiredWidth,
+ int pitch,
+ int desiredHeight,
+ int pixelFormat,
+ int flags)
throws java.lang.Exception</PRE>
<DL>
<DD><B>Deprecated.</B> <I>Use
@@ -825,10 +896,13 @@
</DL>
<HR>
-<A NAME="decompressToYUV(byte[], int)"><!-- --></A><H3>
+<A NAME="decompressToYUV(byte[], int, int, int, int)"><!-- --></A><H3>
decompressToYUV</H3>
<PRE>
public void <B>decompressToYUV</B>(byte[] dstBuf,
+ int desiredWidth,
+ int pad,
+ int desiredHeight,
int flags)
throws java.lang.Exception</PRE>
<DL>
@@ -848,8 +922,64 @@
<P>
<DD><DL>
<DT><B>Parameters:</B><DD><CODE>dstBuf</CODE> - buffer that will receive the YUV planar image. Use
- <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int)"><CODE>TJ.bufSizeYUV(int, int, int)</CODE></A> to determine the appropriate size for this buffer
- based on the image width, height, and level of chrominance subsampling.<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html#bufSizeYUV(int, int, int, int)"><CODE>TJ.bufSizeYUV(int, int, int, int)</CODE></A> to determine the appropriate size for this buffer
+ based on the image width, height, and level of chrominance subsampling.<DD><CODE>desiredWidth</CODE> - desired width (in pixels) of the YUV image. If the
+ desired image dimensions are different than the dimensions of the JPEG
+ image being decompressed, then TurboJPEG will use scaling in the JPEG
+ decompressor to generate the largest possible image that will fit within
+ the desired dimensions. Setting this to 0 is the same as setting it to
+ the width of the JPEG image (in other words, the width will not be
+ considered when determining the scaled image size.)<DD><CODE>pad</CODE> - the width of each line in each plane of the YUV image will be
+ padded to the nearest multiple of this number of bytes (must be a power of
+ 2.)<DD><CODE>desiredHeight</CODE> - desired height (in pixels) of the YUV image. If the
+ desired image dimensions are different than the dimensions of the JPEG
+ image being decompressed, then TurboJPEG will use scaling in the JPEG
+ decompressor to generate the largest possible image that will fit within
+ the desired dimensions. Setting this to 0 is the same as setting it to
+ the height of the JPEG image (in other words, the height will not be
+ considered when determining the scaled image size.)<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+<DT><B>Throws:</B>
+<DD><CODE>java.lang.Exception</CODE></DL>
+</DD>
+</DL>
+<HR>
+
+<A NAME="decompressToYUV(byte[], int)"><!-- --></A><H3>
+decompressToYUV</H3>
+<PRE>
+<FONT SIZE="-1">@Deprecated
+</FONT>public void <B>decompressToYUV</B>(byte[] dstBuf,
+ int flags)
+ throws java.lang.Exception</PRE>
+<DL>
+<DD><B>Deprecated.</B> <I>Use <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int, int, int, int)"><CODE>decompressToYUV(byte[], int, int, int, int)</CODE></A>
+ instead.</I>
+<P>
+<DD><DL>
+
+<DT><B>Throws:</B>
+<DD><CODE>java.lang.Exception</CODE></DL>
+</DD>
+</DL>
+<HR>
+
+<A NAME="decompressToYUV(int, int, int, int)"><!-- --></A><H3>
+decompressToYUV</H3>
+<PRE>
+public byte[] <B>decompressToYUV</B>(int desiredWidth,
+ int pad,
+ int desiredHeight,
+ int flags)
+ throws java.lang.Exception</PRE>
+<DL>
+<DD>Decompress the JPEG source image associated with this decompressor
+ instance and return a buffer containing a YUV planar image. See <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int, int, int, int)"><CODE>decompressToYUV(byte[], int, int, int, int)</CODE></A> for more detail.
+<P>
+<DD><DL>
+<DT><B>Parameters:</B><DD><CODE>desiredWidth</CODE> - see
+ <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int, int, int, int)"><CODE>decompressToYUV(byte[], int, int, int, int)</CODE></A> for description<DD><CODE>pad</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int, int, int, int)"><CODE>decompressToYUV(byte[], int, int, int, int)</CODE></A> for
+ description<DD><CODE>desiredHeight</CODE> - see <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int, int, int, int)"><CODE>decompressToYUV(byte[], int, int, int, int)</CODE></A> for description<DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
+<DT><B>Returns:</B><DD>a buffer containing a YUV planar image
<DT><B>Throws:</B>
<DD><CODE>java.lang.Exception</CODE></DL>
</DD>
@@ -859,15 +989,14 @@
<A NAME="decompressToYUV(int)"><!-- --></A><H3>
decompressToYUV</H3>
<PRE>
-public byte[] <B>decompressToYUV</B>(int flags)
+<FONT SIZE="-1">@Deprecated
+</FONT>public byte[] <B>decompressToYUV</B>(int flags)
throws java.lang.Exception</PRE>
<DL>
-<DD>Decompress the JPEG source image associated with this decompressor
- instance and return a buffer containing a YUV planar image. See <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)"><CODE>decompressToYUV(byte[], int)</CODE></A> for more detail.
+<DD><B>Deprecated.</B> <I>Use <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)"><CODE>decompressToYUV(int, int, int, int)</CODE></A> instead.</I>
<P>
<DD><DL>
-<DT><B>Parameters:</B><DD><CODE>flags</CODE> - the bitwise OR of one or more of <A HREF="../../../org/libjpegturbo/turbojpeg/TJ.html" title="class in org.libjpegturbo.turbojpeg"><CODE>TJ.FLAG_*</CODE></A>
-<DT><B>Returns:</B><DD>a buffer containing a YUV planar image
+
<DT><B>Throws:</B>
<DD><CODE>java.lang.Exception</CODE></DL>
</DD>
diff --git a/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html b/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html
index 1e76ac8..0811b51 100644
--- a/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html
+++ b/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html
@@ -120,7 +120,7 @@
<TH ALIGN="left"><B>Fields inherited from class org.libjpegturbo.turbojpeg.<A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A></B></TH>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
-<TD><CODE><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#handle">handle</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegBuf">jpegBuf</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegBufSize">jpegBufSize</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegHeight">jpegHeight</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegSubsamp">jpegSubsamp</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegWidth">jpegWidth</A></CODE></TD>
+<TD><CODE><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#handle">handle</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegBuf">jpegBuf</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegBufSize">jpegBufSize</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegColorspace">jpegColorspace</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegHeight">jpegHeight</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegSubsamp">jpegSubsamp</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#jpegWidth">jpegWidth</A></CODE></TD>
</TR>
</TABLE>
@@ -203,7 +203,7 @@
<TH ALIGN="left"><B>Methods inherited from class org.libjpegturbo.turbojpeg.<A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html" title="class in org.libjpegturbo.turbojpeg">TJDecompressor</A></B></TH>
</TR>
<TR BGCOLOR="white" CLASS="TableRowColor">
-<TD><CODE><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#close()">close</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(java.awt.image.BufferedImage, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int[], int, int, int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)">decompressToYUV</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int)">decompressToYUV</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#finalize()">finalize</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getHeight()">getHeight</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGBuf()">getJPEGBuf</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGSize()">getJPEGSize</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)">getScaledHeight</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledWidth(int, int)">getScaledWidth</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSubsamp()">getSubsamp</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getWidth()">getWidth</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#setJPEGImage(byte[], int)">setJPEGImage</A></CODE></TD>
+<TD><CODE><A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#close()">close</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(java.awt.image.BufferedImage, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(byte[], int, int, int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int[], int, int, int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompress(int, int, int, int, int)">decompress</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int)">decompressToYUV</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(byte[], int, int, int, int)">decompressToYUV</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int)">decompressToYUV</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#decompressToYUV(int, int, int, int)">decompressToYUV</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#finalize()">finalize</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getColorspace()">getColorspace</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getHeight()">getHeight</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGBuf()">getJPEGBuf</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getJPEGSize()">getJPEGSize</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledHeight(int, int)">getScaledHeight</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getScaledWidth(int, int)">getScaledWidth</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getSubsamp()">getSubsamp</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#getWidth()">getWidth</A>, <A HREF="../../../org/libjpegturbo/turbojpeg/TJDecompressor.html#setJPEGImage(byte[], int)">setJPEGImage</A></CODE></TD>
</TR>
</TABLE>
<A NAME="methods_inherited_from_class_java.lang.Object"><!-- --></A>
diff --git a/java/org/libjpegturbo/turbojpeg/TJ.java b/java/org/libjpegturbo/turbojpeg/TJ.java
index 6c6a95d..ac4a4dd 100644
--- a/java/org/libjpegturbo/turbojpeg/TJ.java
+++ b/java/org/libjpegturbo/turbojpeg/TJ.java
@@ -37,7 +37,7 @@
/**
* The number of chrominance subsampling options
*/
- public static final int NUMSAMP = 5;
+ public static final int NUMSAMP = 6;
/**
* 4:4:4 chrominance subsampling (no chrominance subsampling). The JPEG
* or YUV image will contain one chrominance component for every pixel in the
@@ -64,6 +64,17 @@
* Note that 4:4:0 subsampling is not fully accelerated in libjpeg-turbo.
*/
public static final int SAMP_440 = 4;
+ /**
+ * 4:1:1 chrominance subsampling. The JPEG or YUV image will contain one
+ * chrominance component for every 4x1 block of pixels in the source image.
+ * JPEG images compressed with 4:1:1 subsampling will be almost exactly the
+ * same size as those compressed with 4:2:0 subsampling, and in the
+ * aggregate, both subsampling methods produce approximately the same
+ * perceptual quality. However, 4:1:1 is better able to reproduce sharp
+ * horizontal features. Note that 4:1:1 subsampling is not fully accelerated
+ * in libjpeg-turbo.
+ */
+ public static final int SAMP_411 = 5;
/**
@@ -82,7 +93,7 @@
}
private static final int[] mcuWidth = {
- 8, 16, 16, 8, 8
+ 8, 16, 16, 8, 8, 32
};
@@ -103,14 +114,14 @@
}
private static final int[] mcuHeight = {
- 8, 8, 16, 8, 16
+ 8, 8, 16, 8, 16, 8
};
/**
* The number of pixel formats
*/
- public static final int NUMPF = 11;
+ public static final int NUMPF = 12;
/**
* RGB pixel format. The red, green, and blue components in the image are
* stored in 3-byte pixels in the order R, G, B from lowest to highest byte
@@ -180,6 +191,22 @@
* interpreted as an opaque alpha channel.
*/
public static final int PF_ARGB = 10;
+ /**
+ * CMYK pixel format. Unlike RGB, which is an additive color model used
+ * primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive
+ * color model used primarily for printing. In the CMYK color model, the
+ * value of each color component typically corresponds to an amount of cyan,
+ * magenta, yellow, or black ink that is applied to a white background. In
+ * order to convert between CMYK and RGB, it is necessary to use a color
+ * management system (CMS.) A CMS will attempt to map colors within the
+ * printer's gamut to perceptually similar colors in the display's gamut and
+ * vice versa, but the mapping is typically not 1:1 or reversible, nor can it
+ * be defined with a simple formula. Thus, such a conversion is out of scope
+ * for a codec library. However, the TurboJPEG API allows for compressing
+ * CMYK pixels into a YCCK JPEG image (see {@link #CS_YCCK}) and
+ * decompressing YCCK JPEG images into CMYK pixels.
+ */
+ public static final int PF_CMYK = 11;
/**
@@ -196,7 +223,7 @@
}
private static final int[] pixelSize = {
- 3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4
+ 3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4
};
@@ -218,7 +245,7 @@
}
private static final int[] redOffset = {
- 0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1
+ 0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1, -1
};
@@ -240,7 +267,7 @@
}
private static final int[] greenOffset = {
- 1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2
+ 1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2, -1
};
@@ -262,35 +289,80 @@
}
private static final int[] blueOffset = {
- 2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3
+ 2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3, -1
};
/**
+ * The number of JPEG colorspaces
+ */
+ public static final int NUMCS = 5;
+ /**
+ * RGB colorspace. When compressing the JPEG image, the R, G, and B
+ * components in the source image are reordered into image planes, but no
+ * colorspace conversion or subsampling is performed. RGB JPEG images can be
+ * decompressed to any of the extended RGB pixel formats or grayscale, but
+ * they cannot be decompressed to YUV images.
+ */
+ public static final int CS_RGB = 0;
+ /**
+ * YCbCr colorspace. YCbCr is not an absolute colorspace but rather a
+ * mathematical transformation of RGB designed solely for storage and
+ * transmission. YCbCr images must be converted to RGB before they can
+ * actually be displayed. In the YCbCr colorspace, the Y (luminance)
+ * component represents the black & white portion of the original image, and
+ * the Cb and Cr (chrominance) components represent the color portion of the
+ * original image. Originally, the analog equivalent of this transformation
+ * allowed the same signal to drive both black & white and color televisions,
+ * but JPEG images use YCbCr primarily because it allows the color data to be
+ * optionally subsampled for the purposes of reducing bandwidth or disk
+ * space. YCbCr is the most common JPEG colorspace, and YCbCr JPEG images
+ * can be compressed from and decompressed to any of the extended RGB pixel
+ * formats or grayscale, or they can be decompressed to YUV planar images.
+ */
+ public static final int CS_YCbCr = 1;
+ /**
+ * Grayscale colorspace. The JPEG image retains only the luminance data (Y
+ * component), and any color data from the source image is discarded.
+ * Grayscale JPEG images can be compressed from and decompressed to any of
+ * the extended RGB pixel formats or grayscale, or they can be decompressed
+ * to YUV planar images.
+ */
+ public static final int CS_GRAY = 2;
+ /**
+ * CMYK colorspace. When compressing the JPEG image, the C, M, Y, and K
+ * components in the source image are reordered into image planes, but no
+ * colorspace conversion or subsampling is performed. CMYK JPEG images can
+ * only be decompressed to CMYK pixels.
+ */
+ public static final int CS_CMYK = 3;
+ /**
+ * YCCK colorspace. YCCK (AKA "YCbCrK") is not an absolute colorspace but
+ * rather a mathematical transformation of CMYK designed solely for storage
+ * and transmission. It is to CMYK as YCbCr is to RGB. CMYK pixels can be
+ * reversibly transformed into YCCK, and as with YCbCr, the chrominance
+ * components in the YCCK pixels can be subsampled without incurring major
+ * perceptual loss. YCCK JPEG images can only be compressed from and
+ * decompressed to CMYK pixels.
+ */
+ public static final int CS_YCCK = 4;
+
+
+ /**
* The uncompressed source/destination image is stored in bottom-up (Windows,
* OpenGL) order, not top-down (X11) order.
*/
public static final int FLAG_BOTTOMUP = 2;
- /**
- * Turn off CPU auto-detection and force TurboJPEG to use MMX code
- * (if the underlying codec supports it.)
- */
+
+ @Deprecated
public static final int FLAG_FORCEMMX = 8;
- /**
- * Turn off CPU auto-detection and force TurboJPEG to use SSE code
- * (if the underlying codec supports it.)
- */
+ @Deprecated
public static final int FLAG_FORCESSE = 16;
- /**
- * Turn off CPU auto-detection and force TurboJPEG to use SSE2 code
- * (if the underlying codec supports it.)
- */
+ @Deprecated
public static final int FLAG_FORCESSE2 = 32;
- /**
- * Turn off CPU auto-detection and force TurboJPEG to use SSE3 code
- * (if the underlying codec supports it.)
- */
+ @Deprecated
public static final int FLAG_FORCESSE3 = 128;
+
/**
* When decompressing an image that was compressed using chrominance
* subsampling, use the fastest chrominance upsampling algorithm available in
@@ -343,6 +415,10 @@
*
* @param width the width (in pixels) of the YUV image
*
+ * @param pad the width of each line in each plane of the image is padded to
+ * the nearest multiple of this number of bytes (must be a power of
+ * 2.)
+ *
* @param height the height (in pixels) of the YUV image
*
* @param subsamp the level of chrominance subsampling used in the YUV
@@ -351,6 +427,14 @@
* @return the size of the buffer (in bytes) required to hold a YUV planar
* image with the given width, height, and level of chrominance subsampling
*/
+ public static native int bufSizeYUV(int width, int pad, int height,
+ int subsamp)
+ throws Exception;
+
+ /**
+ * @deprecated Use {@link #bufSizeYUV(int, int, int, int)} instead.
+ */
+ @Deprecated
public static native int bufSizeYUV(int width, int height, int subsamp)
throws Exception;
diff --git a/java/org/libjpegturbo/turbojpeg/TJCompressor.java b/java/org/libjpegturbo/turbojpeg/TJCompressor.java
index 52ae613..63a7fa5 100644
--- a/java/org/libjpegturbo/turbojpeg/TJCompressor.java
+++ b/java/org/libjpegturbo/turbojpeg/TJCompressor.java
@@ -95,8 +95,8 @@
/**
* Associate an uncompressed source image with this compressor instance.
*
- * @param srcImage image buffer containing RGB or grayscale pixels to be
- * compressed
+ * @param srcImage image buffer containing RGB, grayscale, or CMYK pixels to
+ * be compressed
*
* @param x x offset (in pixels) of the region from which the JPEG image
* should be compressed, relative to the start of <code>srcImage</code>.
@@ -139,22 +139,69 @@
srcPixelFormat = pixelFormat;
srcX = x;
srcY = y;
+ srcIsYUV = false;
}
/**
* @deprecated Use
* {@link #setSourceImage(byte[], int, int, int, int, int, int)} instead.
*/
+ @Deprecated
public void setSourceImage(byte[] srcImage, int width, int pitch,
int height, int pixelFormat) throws Exception {
setSourceImage(srcImage, 0, 0, width, pitch, height, pixelFormat);
srcX = srcY = -1;
}
+ /**
+ * Associate an uncompressed YUV planar source image with this compressor
+ * instance.
+ *
+ * @param srcImage image buffer containing a YUV planar image to be
+ * compressed. The Y, U (Cb), and V (Cr) image planes should be stored
+ * sequentially in the buffer, and the size of each plane is determined by
+ * the specified width, height, and padding, as well as the level of
+ * chrominance subsampling (specified using {@link #setSubsamp}.) If the
+ * chrominance components are subsampled along the horizontal dimension, then
+ * the width of the luminance plane should be padded to the nearest multiple
+ * of 2 (same goes for the height of the luminance plane, if the chrominance
+ * components are subsampled along the vertical dimension.) This is
+ * irrespective of any additional padding specified in the <code>pad</code>
+ * parameter.
+ *
+ * @param width width (in pixels) of the source image
+ *
+ * @param pad the line padding used in the source image. For instance, if
+ * each line in each plane of the YUV image is padded to the nearest multiple
+ * of 4 bytes, then <code>pad</code> should be set to 4.
+ *
+ * @param height height (in pixels) of the source image
+ */
+ public void setSourceImageYUV(byte[] srcImage, int width, int pad,
+ int height) throws Exception {
+ if (handle == 0) init();
+ if (srcImage == null || width < 1 || pad < 1 || height < 1)
+ throw new Exception("Invalid argument in setSourceImageYUV()");
+ srcBuf = srcImage;
+ srcWidth = width;
+ srcYUVPad = pad;
+ srcHeight = height;
+ srcIsYUV = true;
+ }
/**
* Set the level of chrominance subsampling for subsequent compress/encode
- * operations.
+ * operations. When pixels are converted from RGB to YCbCr (see
+ * {@link TJ#CS_YCbCr}) or from CMYK to YCCK (see {@link TJ#CS_YCCK}) as part
+ * of the JPEG compression process, some of the Cb and Cr (chrominance)
+ * components can be discarded or averaged together to produce a smaller
+ * image with little perceptible loss of image clarity (the human eye is more
+ * sensitive to small changes in brightness than to small changes in color.)
+ * This is called "chrominance subsampling".
+ * <p>
+ * NOTE: When compressing a YUV planar image into a JPEG image, this method
+ * also specifies the level of chrominance subsampling used in the source
+ * image.
*
* @param newSubsamp the new level of chrominance subsampling (one of
* {@link TJ TJ.SAMP_*})
@@ -196,14 +243,19 @@
throw new Exception("JPEG Quality not set");
if (subsamp < 0)
throw new Exception("Subsampling level not set");
- if (srcX >= 0 && srcY >= 0)
- compressedSize = compress(srcBuf, srcX, srcY, srcWidth, srcPitch,
- srcHeight, srcPixelFormat, dstBuf, subsamp,
- jpegQuality, flags);
- else
- compressedSize = compress(srcBuf, srcWidth, srcPitch, srcHeight,
- srcPixelFormat, dstBuf, subsamp, jpegQuality,
- flags);
+ if (srcIsYUV)
+ compressedSize = compressFromYUV(srcBuf, srcWidth, srcYUVPad, srcHeight,
+ subsamp, dstBuf, jpegQuality, flags);
+ else {
+ if (srcX >= 0 && srcY >= 0)
+ compressedSize = compress(srcBuf, srcX, srcY, srcWidth, srcPitch,
+ srcHeight, srcPixelFormat, dstBuf, subsamp,
+ jpegQuality, flags);
+ else
+ compressedSize = compress(srcBuf, srcWidth, srcPitch, srcHeight,
+ srcPixelFormat, dstBuf, subsamp, jpegQuality,
+ flags);
+ }
}
/**
@@ -330,20 +382,34 @@
return buf;
}
+
+ /**
+ * Set the plane padding for subsequent YUV encode operations.
+ *
+ * @param pad the width of each line in each plane of the YUV image will be
+ * padded to the nearest multiple of this number of bytes (must be a
+ * power of 2.) The default padding is 4 bytes, which generates
+ * images suitable for direct video display.
+ */
+ public void setYUVPad(int pad) throws Exception {
+ if(pad < 1 || ((pad & (pad - 1)) != 0))
+ throw new Exception("Invalid argument in setYUVPad()");
+ yuvPad = pad;
+ }
+
/**
* Encode the uncompressed source image associated with this compressor
* instance and output a YUV planar image to the given destination buffer.
- * This method uses the accelerated color conversion routines in
- * TurboJPEG's underlying codec to produce a planar YUV image that is
- * suitable for direct video display. Specifically, if the chrominance
- * components are subsampled along the horizontal dimension, then the width
- * of the luminance plane is padded to the nearest multiple of 2 in the
- * output image (same goes for the height of the luminance plane, if the
+ * This method uses the accelerated color conversion routines in TurboJPEG's
+ * underlying codec but does not execute any of the other steps in the JPEG
+ * compression process. The Y, U (Cb), and V (Cr) image planes are stored
+ * sequentially into the destination buffer, and the size of each plane is
+ * determined by the width and height of the source image, as well as the
+ * specified padding and level of chrominance subsampling. If the
+ * chrominance components are subsampled along the horizontal dimension, then
+ * the width of the luminance plane is padded to the nearest multiple of 2 in
+ * the output image (same goes for the height of the luminance plane, if the
* chrominance components are subsampled along the vertical dimension.)
- * Also, each line of each plane in the output image is padded to 4 bytes.
- * Although this will work with any subsampling option, it is really only
- * useful in combination with {@link TJ#SAMP_420}, which produces an image
- * compatible with the I420 (AKA "YUV420P") format.
* <p>
* NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
* convention of the digital video community, the TurboJPEG API uses "YUV" to
@@ -362,9 +428,9 @@
throw new Exception(NO_ASSOC_ERROR);
if (subsamp < 0)
throw new Exception("Subsampling level not set");
- encodeYUV(srcBuf, srcWidth, srcPitch, srcHeight,
- srcPixelFormat, dstBuf, subsamp, flags);
- compressedSize = TJ.bufSizeYUV(srcWidth, srcHeight, subsamp);
+ encodeYUV(srcBuf, srcWidth, srcPitch, srcHeight, srcPixelFormat, dstBuf,
+ yuvPad, subsamp, flags);
+ compressedSize = TJ.bufSizeYUV(srcWidth, yuvPad, srcHeight, subsamp);
}
/**
@@ -381,7 +447,7 @@
throw new Exception(NO_ASSOC_ERROR);
if (subsamp < 0)
throw new Exception("Subsampling level not set");
- byte[] buf = new byte[TJ.bufSizeYUV(srcWidth, srcHeight, subsamp)];
+ byte[] buf = new byte[TJ.bufSizeYUV(srcWidth, yuvPad, srcHeight, subsamp)];
encodeYUV(buf, flags);
return buf;
}
@@ -442,8 +508,8 @@
int stride = sm.getScanlineStride();
DataBufferInt db = (DataBufferInt)wr.getDataBuffer();
int[] buf = db.getData();
- encodeYUV(buf, width, stride, height, pixelFormat, dstBuf, subsamp,
- flags);
+ encodeYUV(buf, width, stride, height, pixelFormat, dstBuf, yuvPad,
+ subsamp, flags);
} else {
ComponentSampleModel sm =
(ComponentSampleModel)srcImage.getSampleModel();
@@ -453,10 +519,10 @@
int pitch = sm.getScanlineStride();
DataBufferByte db = (DataBufferByte)wr.getDataBuffer();
byte[] buf = db.getData();
- encodeYUV(buf, width, pitch, height, pixelFormat, dstBuf, subsamp,
- flags);
+ encodeYUV(buf, width, pitch, height, pixelFormat, dstBuf, yuvPad,
+ subsamp, flags);
}
- compressedSize = TJ.bufSizeYUV(width, height, subsamp);
+ compressedSize = TJ.bufSizeYUV(width, yuvPad, height, subsamp);
}
/**
@@ -476,7 +542,7 @@
throw new Exception("Subsampling level not set");
int width = srcImage.getWidth();
int height = srcImage.getHeight();
- byte[] buf = new byte[TJ.bufSizeYUV(width, height, subsamp)];
+ byte[] buf = new byte[TJ.bufSizeYUV(width, yuvPad, height, subsamp)];
encodeYUV(srcImage, buf, flags);
return buf;
}
@@ -529,13 +595,25 @@
int stride, int height, int pixelFormat, byte[] dstBuf, int jpegSubsamp,
int jpegQual, int flags) throws Exception;
+ private native int compressFromYUV(byte[] srcBuf, int width, int pad,
+ int height, int subsamp, byte[] dstBuf, int jpegQual, int flags)
+ throws Exception;
+
private native void encodeYUV(byte[] srcBuf, int width, int pitch,
int height, int pixelFormat, byte[] dstBuf, int subsamp, int flags)
- throws Exception;
+ throws Exception; // deprecated
+
+ private native void encodeYUV(byte[] srcBuf, int width, int pitch,
+ int height, int pixelFormat, byte[] dstBuf, int pad, int subsamp,
+ int flags) throws Exception;
private native void encodeYUV(int[] srcBuf, int width, int stride,
int height, int pixelFormat, byte[] dstBuf, int subsamp, int flags)
- throws Exception;
+ throws Exception; // deprecated
+
+ private native void encodeYUV(int[] srcBuf, int width, int pitch,
+ int height, int pixelFormat, byte[] dstBuf, int pad, int subsamp,
+ int flags) throws Exception;
static {
TJLoader.load();
@@ -549,8 +627,11 @@
private int srcY = -1;
private int srcPitch = 0;
private int srcPixelFormat = -1;
+ private int srcYUVPad = -1;
+ private boolean srcIsYUV;
private int subsamp = -1;
private int jpegQuality = -1;
private int compressedSize = 0;
+ private int yuvPad = 4;
private ByteOrder byteOrder = null;
};
diff --git a/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java b/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java
index 5615b4e..bf78f2e 100644
--- a/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java
+++ b/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java
@@ -39,7 +39,7 @@
/**
* A callback function that can be used to modify the DCT coefficients after
* they are losslessly transformed but before they are transcoded to a new
- * JPEG file. This allows for custom filters or other transformations to be
+ * JPEG image. This allows for custom filters or other transformations to be
* applied in the frequency domain.
*
* @param coeffBuffer a buffer containing transformed DCT coefficients.
diff --git a/java/org/libjpegturbo/turbojpeg/TJDecompressor.java b/java/org/libjpegturbo/turbojpeg/TJDecompressor.java
index 46d1d5f..d14a989 100644
--- a/java/org/libjpegturbo/turbojpeg/TJDecompressor.java
+++ b/java/org/libjpegturbo/turbojpeg/TJDecompressor.java
@@ -117,7 +117,7 @@
/**
* Returns the level of chrominance subsampling used in the JPEG image
- * associated with this decompressor instance.
+ * associated with this decompressor instance. See {@link TJ TJ.SAMP_*}.
*
* @return the level of chrominance subsampling used in the JPEG image
* associated with this decompressor instance
@@ -131,6 +131,21 @@
}
/**
+ * Returns the colorspace used in the JPEG image associated with this
+ * decompressor instance. See {@link TJ TJ.CS_*}.
+ *
+ * @return the colorspace used in the JPEG image associated with this
+ * decompressor instance
+ */
+ public int getColorspace() throws Exception {
+ if (jpegColorspace < 0)
+ throw new Exception(NO_ASSOC_ERROR);
+ if (jpegColorspace >= TJ.NUMCS)
+ throw new Exception("JPEG header information is invalid");
+ return jpegColorspace;
+ }
+
+ /**
* Returns the JPEG image buffer associated with this decompressor instance.
*
* @return the JPEG image buffer associated with this decompressor instance
@@ -313,6 +328,7 @@
* @deprecated Use
* {@link #decompress(byte[], int, int, int, int, int, int, int)} instead.
*/
+ @Deprecated
public void decompress(byte[] dstBuf, int desiredWidth, int pitch,
int desiredHeight, int pixelFormat, int flags)
throws Exception {
@@ -377,39 +393,91 @@
* {@link TJ#bufSizeYUV} to determine the appropriate size for this buffer
* based on the image width, height, and level of chrominance subsampling.
*
+ * @param desiredWidth desired width (in pixels) of the YUV image. If the
+ * desired image dimensions are different than the dimensions of the JPEG
+ * image being decompressed, then TurboJPEG will use scaling in the JPEG
+ * decompressor to generate the largest possible image that will fit within
+ * the desired dimensions. Setting this to 0 is the same as setting it to
+ * the width of the JPEG image (in other words, the width will not be
+ * considered when determining the scaled image size.)
+ *
+ * @param pad the width of each line in each plane of the YUV image will be
+ * padded to the nearest multiple of this number of bytes (must be a power of
+ * 2.)
+ *
+ * @param desiredHeight desired height (in pixels) of the YUV image. If the
+ * desired image dimensions are different than the dimensions of the JPEG
+ * image being decompressed, then TurboJPEG will use scaling in the JPEG
+ * decompressor to generate the largest possible image that will fit within
+ * the desired dimensions. Setting this to 0 is the same as setting it to
+ * the height of the JPEG image (in other words, the height will not be
+ * considered when determining the scaled image size.)
+ *
* @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
*/
- public void decompressToYUV(byte[] dstBuf, int flags) throws Exception {
+ public void decompressToYUV(byte[] dstBuf, int desiredWidth, int pad,
+ int desiredHeight, int flags) throws Exception {
if (jpegBuf == null)
throw new Exception(NO_ASSOC_ERROR);
- if (dstBuf == null || flags < 0)
+ if (dstBuf == null || desiredWidth < 0 || pad < 1 ||
+ ((pad & (pad - 1)) != 0) || desiredHeight < 0 || flags < 0)
throw new Exception("Invalid argument in decompressToYUV()");
- decompressToYUV(jpegBuf, jpegBufSize, dstBuf, flags);
+ decompressToYUV(jpegBuf, jpegBufSize, dstBuf, desiredWidth, pad,
+ desiredHeight, flags);
}
+ /**
+ * @deprecated Use {@link #decompressToYUV(byte[], int, int, int, int)}
+ * instead.
+ */
+ @Deprecated
+ public void decompressToYUV(byte[] dstBuf, int flags) throws Exception {
+ decompressToYUV(dstBuf, 0, 4, 0, flags);
+ }
/**
* Decompress the JPEG source image associated with this decompressor
* instance and return a buffer containing a YUV planar image. See {@link
- * #decompressToYUV(byte[], int)} for more detail.
+ * #decompressToYUV(byte[], int, int, int, int)} for more detail.
+ *
+ * @param desiredWidth see
+ * {@link #decompressToYUV(byte[], int, int, int, int)} for description
+ *
+ * @param pad see {@link #decompressToYUV(byte[], int, int, int, int)} for
+ * description
+ *
+ * @param desiredHeight see {@link
+ * #decompressToYUV(byte[], int, int, int, int)} for description
*
* @param flags the bitwise OR of one or more of {@link TJ TJ.FLAG_*}
*
* @return a buffer containing a YUV planar image
*/
- public byte[] decompressToYUV(int flags) throws Exception {
+ public byte[] decompressToYUV(int desiredWidth, int pad, int desiredHeight,
+ int flags) throws Exception {
if (flags < 0)
throw new Exception("Invalid argument in decompressToYUV()");
if (jpegWidth < 1 || jpegHeight < 1 || jpegSubsamp < 0)
throw new Exception(NO_ASSOC_ERROR);
if (jpegSubsamp >= TJ.NUMSAMP)
throw new Exception("JPEG header information is invalid");
- byte[] buf = new byte[TJ.bufSizeYUV(jpegWidth, jpegHeight, jpegSubsamp)];
- decompressToYUV(buf, flags);
+ int scaledWidth = getScaledWidth(desiredWidth, desiredHeight);
+ int scaledHeight = getScaledHeight(desiredWidth, desiredHeight);
+ byte[] buf = new byte[TJ.bufSizeYUV(scaledWidth, pad, scaledHeight,
+ jpegSubsamp)];
+ decompressToYUV(buf, desiredWidth, pad, desiredHeight, flags);
return buf;
}
/**
+ * @deprecated Use {@link #decompressToYUV(int, int, int, int)} instead.
+ */
+ @Deprecated
+ public byte[] decompressToYUV(int flags) throws Exception {
+ return decompressToYUV(0, 4, 0, flags);
+ }
+
+ /**
* Decompress the JPEG source image associated with this decompressor
* instance and output a decompressed image to the given destination buffer.
*
@@ -623,7 +691,10 @@
int flags) throws Exception;
private native void decompressToYUV(byte[] srcBuf, int size, byte[] dstBuf,
- int flags) throws Exception;
+ int flags) throws Exception; // deprecated
+
+ private native void decompressToYUV(byte[] srcBuf, int size, byte[] dstBuf,
+ int desiredWidth, int pad, int desiredheight, int flags) throws Exception;
static {
TJLoader.load();
@@ -635,5 +706,6 @@
protected int jpegWidth = 0;
protected int jpegHeight = 0;
protected int jpegSubsamp = -1;
+ protected int jpegColorspace = -1;
private ByteOrder byteOrder = null;
};
diff --git a/java/org_libjpegturbo_turbojpeg_TJ.h b/java/org_libjpegturbo_turbojpeg_TJ.h
index d7b032a..b00a128 100644
--- a/java/org_libjpegturbo_turbojpeg_TJ.h
+++ b/java/org_libjpegturbo_turbojpeg_TJ.h
@@ -8,7 +8,7 @@
extern "C" {
#endif
#undef org_libjpegturbo_turbojpeg_TJ_NUMSAMP
-#define org_libjpegturbo_turbojpeg_TJ_NUMSAMP 5L
+#define org_libjpegturbo_turbojpeg_TJ_NUMSAMP 6L
#undef org_libjpegturbo_turbojpeg_TJ_SAMP_444
#define org_libjpegturbo_turbojpeg_TJ_SAMP_444 0L
#undef org_libjpegturbo_turbojpeg_TJ_SAMP_422
@@ -19,8 +19,10 @@
#define org_libjpegturbo_turbojpeg_TJ_SAMP_GRAY 3L
#undef org_libjpegturbo_turbojpeg_TJ_SAMP_440
#define org_libjpegturbo_turbojpeg_TJ_SAMP_440 4L
+#undef org_libjpegturbo_turbojpeg_TJ_SAMP_411
+#define org_libjpegturbo_turbojpeg_TJ_SAMP_411 5L
#undef org_libjpegturbo_turbojpeg_TJ_NUMPF
-#define org_libjpegturbo_turbojpeg_TJ_NUMPF 11L
+#define org_libjpegturbo_turbojpeg_TJ_NUMPF 12L
#undef org_libjpegturbo_turbojpeg_TJ_PF_RGB
#define org_libjpegturbo_turbojpeg_TJ_PF_RGB 0L
#undef org_libjpegturbo_turbojpeg_TJ_PF_BGR
@@ -43,16 +45,22 @@
#define org_libjpegturbo_turbojpeg_TJ_PF_ABGR 9L
#undef org_libjpegturbo_turbojpeg_TJ_PF_ARGB
#define org_libjpegturbo_turbojpeg_TJ_PF_ARGB 10L
+#undef org_libjpegturbo_turbojpeg_TJ_PF_CMYK
+#define org_libjpegturbo_turbojpeg_TJ_PF_CMYK 11L
+#undef org_libjpegturbo_turbojpeg_TJ_NUMCS
+#define org_libjpegturbo_turbojpeg_TJ_NUMCS 5L
+#undef org_libjpegturbo_turbojpeg_TJ_CS_RGB
+#define org_libjpegturbo_turbojpeg_TJ_CS_RGB 0L
+#undef org_libjpegturbo_turbojpeg_TJ_CS_YCbCr
+#define org_libjpegturbo_turbojpeg_TJ_CS_YCbCr 1L
+#undef org_libjpegturbo_turbojpeg_TJ_CS_GRAY
+#define org_libjpegturbo_turbojpeg_TJ_CS_GRAY 2L
+#undef org_libjpegturbo_turbojpeg_TJ_CS_CMYK
+#define org_libjpegturbo_turbojpeg_TJ_CS_CMYK 3L
+#undef org_libjpegturbo_turbojpeg_TJ_CS_YCCK
+#define org_libjpegturbo_turbojpeg_TJ_CS_YCCK 4L
#undef org_libjpegturbo_turbojpeg_TJ_FLAG_BOTTOMUP
#define org_libjpegturbo_turbojpeg_TJ_FLAG_BOTTOMUP 2L
-#undef org_libjpegturbo_turbojpeg_TJ_FLAG_FORCEMMX
-#define org_libjpegturbo_turbojpeg_TJ_FLAG_FORCEMMX 8L
-#undef org_libjpegturbo_turbojpeg_TJ_FLAG_FORCESSE
-#define org_libjpegturbo_turbojpeg_TJ_FLAG_FORCESSE 16L
-#undef org_libjpegturbo_turbojpeg_TJ_FLAG_FORCESSE2
-#define org_libjpegturbo_turbojpeg_TJ_FLAG_FORCESSE2 32L
-#undef org_libjpegturbo_turbojpeg_TJ_FLAG_FORCESSE3
-#define org_libjpegturbo_turbojpeg_TJ_FLAG_FORCESSE3 128L
#undef org_libjpegturbo_turbojpeg_TJ_FLAG_FASTUPSAMPLE
#define org_libjpegturbo_turbojpeg_TJ_FLAG_FASTUPSAMPLE 256L
#undef org_libjpegturbo_turbojpeg_TJ_FLAG_FASTDCT
@@ -70,9 +78,17 @@
/*
* Class: org_libjpegturbo_turbojpeg_TJ
* Method: bufSizeYUV
+ * Signature: (IIII)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII
+ (JNIEnv *, jclass, jint, jint, jint, jint);
+
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJ
+ * Method: bufSizeYUV
* Signature: (III)I
*/
-JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III
(JNIEnv *, jclass, jint, jint, jint);
/*
diff --git a/java/org_libjpegturbo_turbojpeg_TJCompressor.h b/java/org_libjpegturbo_turbojpeg_TJCompressor.h
index 2fc9136..50070ef 100644
--- a/java/org_libjpegturbo_turbojpeg_TJCompressor.h
+++ b/java/org_libjpegturbo_turbojpeg_TJCompressor.h
@@ -57,6 +57,14 @@
/*
* Class: org_libjpegturbo_turbojpeg_TJCompressor
+ * Method: compressFromYUV
+ * Signature: ([BIIII[BII)I
+ */
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3BIIII_3BII
+ (JNIEnv *, jobject, jbyteArray, jint, jint, jint, jint, jbyteArray, jint, jint);
+
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJCompressor
* Method: encodeYUV
* Signature: ([BIIII[BII)V
*/
@@ -66,11 +74,27 @@
/*
* Class: org_libjpegturbo_turbojpeg_TJCompressor
* Method: encodeYUV
+ * Signature: ([BIIII[BIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BIII
+ (JNIEnv *, jobject, jbyteArray, jint, jint, jint, jint, jbyteArray, jint, jint, jint);
+
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJCompressor
+ * Method: encodeYUV
* Signature: ([IIIII[BII)V
*/
JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII
(JNIEnv *, jobject, jintArray, jint, jint, jint, jint, jbyteArray, jint, jint);
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJCompressor
+ * Method: encodeYUV
+ * Signature: ([IIIII[BIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BIII
+ (JNIEnv *, jobject, jintArray, jint, jint, jint, jint, jbyteArray, jint, jint, jint);
+
#ifdef __cplusplus
}
#endif
diff --git a/java/org_libjpegturbo_turbojpeg_TJDecompressor.h b/java/org_libjpegturbo_turbojpeg_TJDecompressor.h
index f798a77..203f004 100644
--- a/java/org_libjpegturbo_turbojpeg_TJDecompressor.h
+++ b/java/org_libjpegturbo_turbojpeg_TJDecompressor.h
@@ -68,9 +68,17 @@
* Method: decompressToYUV
* Signature: ([BI[BI)V
*/
-JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI
(JNIEnv *, jobject, jbyteArray, jint, jbyteArray, jint);
+/*
+ * Class: org_libjpegturbo_turbojpeg_TJDecompressor
+ * Method: decompressToYUV
+ * Signature: ([BI[BIIII)V
+ */
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BIIII
+ (JNIEnv *, jobject, jbyteArray, jint, jbyteArray, jint, jint, jint, jint);
+
#ifdef __cplusplus
}
#endif
diff --git a/jcparam.c b/jcparam.c
index 2b9a740..f4e5eec 100644
--- a/jcparam.c
+++ b/jcparam.c
@@ -16,6 +16,7 @@
#define JPEG_INTERNALS
#include "jinclude.h"
#include "jpeglib.h"
+#include "jstdhuff.c"
/*
@@ -166,116 +167,6 @@
/*
- * Huffman table setup routines
- */
-
-LOCAL(void)
-add_huff_table (j_compress_ptr cinfo,
- JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val)
-/* Define a Huffman table */
-{
- int nsymbols, len;
-
- if (*htblptr == NULL)
- *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
-
- /* Copy the number-of-symbols-of-each-code-length counts */
- MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
-
- /* Validate the counts. We do this here mainly so we can copy the right
- * number of symbols from the val[] array, without risking marching off
- * the end of memory. jchuff.c will do a more thorough test later.
- */
- nsymbols = 0;
- for (len = 1; len <= 16; len++)
- nsymbols += bits[len];
- if (nsymbols < 1 || nsymbols > 256)
- ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
-
- MEMCOPY((*htblptr)->huffval, val, nsymbols * SIZEOF(UINT8));
-
- /* Initialize sent_table FALSE so table will be written to JPEG file. */
- (*htblptr)->sent_table = FALSE;
-}
-
-
-LOCAL(void)
-std_huff_tables (j_compress_ptr cinfo)
-/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
-/* IMPORTANT: these are only valid for 8-bit data precision! */
-{
- static const UINT8 bits_dc_luminance[17] =
- { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
- static const UINT8 val_dc_luminance[] =
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-
- static const UINT8 bits_dc_chrominance[17] =
- { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
- static const UINT8 val_dc_chrominance[] =
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
-
- static const UINT8 bits_ac_luminance[17] =
- { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
- static const UINT8 val_ac_luminance[] =
- { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
- 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
- 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
- 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
- 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
- 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
- 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
- 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
- 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
- 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
- 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
- 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
- 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
- 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
- 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
- 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
- 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
- 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
- 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
- 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
- 0xf9, 0xfa };
-
- static const UINT8 bits_ac_chrominance[17] =
- { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
- static const UINT8 val_ac_chrominance[] =
- { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
- 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
- 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
- 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
- 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
- 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
- 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
- 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
- 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
- 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
- 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
- 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
- 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
- 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
- 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
- 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
- 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
- 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
- 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
- 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
- 0xf9, 0xfa };
-
- add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[0],
- bits_dc_luminance, val_dc_luminance);
- add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[0],
- bits_ac_luminance, val_ac_luminance);
- add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[1],
- bits_dc_chrominance, val_dc_chrominance);
- add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[1],
- bits_ac_chrominance, val_ac_chrominance);
-}
-
-
-/*
* Default parameter setup for compression.
*
* Applications that don't choose to use this routine must do their
@@ -313,7 +204,7 @@
/* Set up two quantization tables using default quality of 75 */
jpeg_set_quality(cinfo, 75, TRUE);
/* Set up two Huffman tables */
- std_huff_tables(cinfo);
+ std_huff_tables((j_common_ptr) cinfo);
/* Initialize default arithmetic coding conditioning */
for (i = 0; i < NUM_ARITH_TBLS; i++) {
diff --git a/jddctmgr.c b/jddctmgr.c
index 0a5decb..88b4707 100644
--- a/jddctmgr.c
+++ b/jddctmgr.c
@@ -133,6 +133,11 @@
method = JDCT_ISLOW; /* jidctint uses islow-style table */
break;
case 6:
+#if defined(__mips__)
+ if (jsimd_can_idct_6x6())
+ method_ptr = jsimd_idct_6x6;
+ else
+#endif
method_ptr = jpeg_idct_6x6;
method = JDCT_ISLOW; /* jidctint uses islow-style table */
break;
@@ -188,6 +193,11 @@
method = JDCT_ISLOW; /* jidctint uses islow-style table */
break;
case 12:
+#if defined(__mips__)
+ if (jsimd_can_idct_12x12())
+ method_ptr = jsimd_idct_12x12;
+ else
+#endif
method_ptr = jpeg_idct_12x12;
method = JDCT_ISLOW; /* jidctint uses islow-style table */
break;
diff --git a/jdhuff.c b/jdhuff.c
index dba5f18..d21d399 100644
--- a/jdhuff.c
+++ b/jdhuff.c
@@ -21,6 +21,7 @@
#include "jpeglib.h"
#include "jdhuff.h" /* Declarations shared with jdphuff.c */
#include "jpegcomp.h"
+#include "jstdhuff.c"
/*
@@ -795,6 +796,12 @@
huff_entropy_ptr entropy;
int i;
+ /* Motion JPEG frames typically do not include the Huffman tables if they
+ are the default tables. Thus, if the tables are not set by the time
+ the Huffman decoder is initialized (usually within the body of
+ jpeg_start_decompress()), we set them to default values. */
+ std_huff_tables((j_common_ptr) cinfo);
+
entropy = (huff_entropy_ptr)
(*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
SIZEOF(huff_entropy_decoder));
diff --git a/jsimd_none.c b/jsimd_none.c
index 9787902..882fc08 100644
--- a/jsimd_none.c
+++ b/jsimd_none.c
@@ -258,6 +258,18 @@
return 0;
}
+GLOBAL(int)
+jsimd_can_idct_6x6 (void)
+{
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_12x12 (void)
+{
+ return 0;
+}
+
GLOBAL(void)
jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
JCOEFPTR coef_block, JSAMPARRAY output_buf,
@@ -272,6 +284,20 @@
{
}
+GLOBAL(void)
+jsimd_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+}
+
+GLOBAL(void)
+jsimd_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+}
+
GLOBAL(int)
jsimd_can_idct_islow (void)
{
diff --git a/jsimddct.h b/jsimddct.h
index a1c7440..9d2d945 100644
--- a/jsimddct.h
+++ b/jsimddct.h
@@ -68,6 +68,8 @@
EXTERN(int) jsimd_can_idct_2x2 JPP((void));
EXTERN(int) jsimd_can_idct_4x4 JPP((void));
+EXTERN(int) jsimd_can_idct_6x6 JPP((void));
+EXTERN(int) jsimd_can_idct_12x12 JPP((void));
EXTERN(void) jsimd_idct_2x2 JPP((j_decompress_ptr cinfo,
jpeg_component_info * compptr,
@@ -79,6 +81,16 @@
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
JDIMENSION output_col));
+EXTERN(void) jsimd_idct_6x6 JPP((j_decompress_ptr cinfo,
+ jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf,
+ JDIMENSION output_col));
+EXTERN(void) jsimd_idct_12x12 JPP((j_decompress_ptr cinfo,
+ jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf,
+ JDIMENSION output_col));
EXTERN(int) jsimd_can_idct_islow JPP((void));
EXTERN(int) jsimd_can_idct_ifast JPP((void));
diff --git a/jstdhuff.c b/jstdhuff.c
new file mode 100644
index 0000000..7fec6ca
--- /dev/null
+++ b/jstdhuff.c
@@ -0,0 +1,133 @@
+/*
+* jstdhuff.c
+*
+* This file was part of the Independent JPEG Group's software:
+* Copyright (C) 1991-1998, Thomas G. Lane.
+* libjpeg-turbo Modifications:
+* Copyright (C) 2013, D. R. Commander.
+* For conditions of distribution and use, see the accompanying README file.
+*
+* This file contains routines to set the default Huffman tables, if they are
+* not already set.
+*/
+
+/*
+ * Huffman table setup routines
+ */
+
+LOCAL(void)
+add_huff_table (j_common_ptr cinfo,
+ JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val)
+/* Define a Huffman table */
+{
+ int nsymbols, len;
+
+ if (*htblptr == NULL)
+ *htblptr = jpeg_alloc_huff_table(cinfo);
+ else
+ return;
+
+ /* Copy the number-of-symbols-of-each-code-length counts */
+ MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
+
+ /* Validate the counts. We do this here mainly so we can copy the right
+ * number of symbols from the val[] array, without risking marching off
+ * the end of memory. jchuff.c will do a more thorough test later.
+ */
+ nsymbols = 0;
+ for (len = 1; len <= 16; len++)
+ nsymbols += bits[len];
+ if (nsymbols < 1 || nsymbols > 256)
+ ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+ MEMCOPY((*htblptr)->huffval, val, nsymbols * SIZEOF(UINT8));
+
+ /* Initialize sent_table FALSE so table will be written to JPEG file. */
+ (*htblptr)->sent_table = FALSE;
+}
+
+
+LOCAL(void)
+std_huff_tables (j_common_ptr cinfo)
+/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
+/* IMPORTANT: these are only valid for 8-bit data precision! */
+{
+ JHUFF_TBL **dc_huff_tbl_ptrs, **ac_huff_tbl_ptrs;
+
+ static const UINT8 bits_dc_luminance[17] =
+ { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
+ static const UINT8 val_dc_luminance[] =
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+ static const UINT8 bits_dc_chrominance[17] =
+ { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+ static const UINT8 val_dc_chrominance[] =
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+
+ static const UINT8 bits_ac_luminance[17] =
+ { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
+ static const UINT8 val_ac_luminance[] =
+ { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+ 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+ 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+ 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+ 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+ 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+ 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+ 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+ 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+ 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+ 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+ 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+ 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+ 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+ 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+ 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+ 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+ 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+ 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+ 0xf9, 0xfa };
+
+ static const UINT8 bits_ac_chrominance[17] =
+ { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
+ static const UINT8 val_ac_chrominance[] =
+ { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+ 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+ 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+ 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+ 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+ 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+ 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+ 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+ 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+ 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+ 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+ 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+ 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+ 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+ 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+ 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+ 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+ 0xf9, 0xfa };
+
+ if (cinfo->is_decompressor) {
+ dc_huff_tbl_ptrs = ((j_decompress_ptr)cinfo)->dc_huff_tbl_ptrs;
+ ac_huff_tbl_ptrs = ((j_decompress_ptr)cinfo)->ac_huff_tbl_ptrs;
+ } else {
+ dc_huff_tbl_ptrs = ((j_compress_ptr)cinfo)->dc_huff_tbl_ptrs;
+ ac_huff_tbl_ptrs = ((j_compress_ptr)cinfo)->ac_huff_tbl_ptrs;
+ }
+
+ add_huff_table(cinfo, &dc_huff_tbl_ptrs[0], bits_dc_luminance,
+ val_dc_luminance);
+ add_huff_table(cinfo, &ac_huff_tbl_ptrs[0], bits_ac_luminance,
+ val_ac_luminance);
+ add_huff_table(cinfo, &dc_huff_tbl_ptrs[1], bits_dc_chrominance,
+ val_dc_chrominance);
+ add_huff_table(cinfo, &ac_huff_tbl_ptrs[1], bits_ac_chrominance,
+ val_ac_chrominance);
+}
diff --git a/jversion.h b/jversion.h
index c37651b..032fef0 100644
--- a/jversion.h
+++ b/jversion.h
@@ -29,4 +29,6 @@
"Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
"Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
"Copyright (C) 2009-2014 D. R. Commander\n" \
- "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)"
+ "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
+ "Copyright (C) 2013 MIPS Technologies, Inc.\n" \
+ "Copyright (C) 2013 Linaro Limited"
diff --git a/release/libjpeg-turbo.nsi.in b/release/libjpeg-turbo.nsi.in
index 4f65303..0514565 100755
--- a/release/libjpeg-turbo.nsi.in
+++ b/release/libjpeg-turbo.nsi.in
@@ -1,7 +1,7 @@
!include x64.nsh
Name "@CMAKE_PROJECT_NAME@ SDK for @INST_PLATFORM@"
OutFile "@CMAKE_BINARY_DIR@\${BUILDDIR}@INST_NAME@.exe"
-InstallDir "@INST_DIR@"
+InstallDir @INST_DIR@
SetCompressor bzip2
@@ -111,15 +111,13 @@
!ifdef GCC
Delete $INSTDIR\bin\libjpeg-@DLL_VERSION@.dll
Delete $SYSDIR\libturbojpeg.dll
- Delete $INSTDIR\bin\libturbojpeg.dll
- Delete $INSTDIR\lib\libturbojpeg.dll.a
- Delete $INSTDIR\lib\libturbojpeg.a
- Delete $INSTDIR\lib\libjpeg.dll.a
- Delete $INSTDIR\lib\libjpeg.a
+ Delete $INSTDIR\lib\libturbojpeg.dll.a"
+ Delete $INSTDIR\lib\libturbojpeg.a"
+ Delete $INSTDIR\lib\libjpeg.dll.a"
+ Delete $INSTDIR\lib\libjpeg.a"
!else
Delete $INSTDIR\bin\jpeg@DLL_VERSION@.dll
Delete $SYSDIR\turbojpeg.dll
- Delete $INSTDIR\bin\turbojpeg.dll
Delete $INSTDIR\lib\jpeg.lib
Delete $INSTDIR\lib\jpeg-static.lib
Delete $INSTDIR\lib\turbojpeg.lib
@@ -134,11 +132,11 @@
Delete $INSTDIR\bin\tjbench.exe
Delete $INSTDIR\bin\rdjpgcom.exe
Delete $INSTDIR\bin\wrjpgcom.exe
- Delete $INSTDIR\include\jconfig.h
- Delete $INSTDIR\include\jerror.h
- Delete $INSTDIR\include\jmorecfg.h
- Delete $INSTDIR\include\jpeglib.h
- Delete $INSTDIR\include\turbojpeg.h
+ Delete $INSTDIR\include\jconfig.h"
+ Delete $INSTDIR\include\jerror.h"
+ Delete $INSTDIR\include\jmorecfg.h"
+ Delete $INSTDIR\include\jpeglib.h"
+ Delete $INSTDIR\include\turbojpeg.h"
Delete $INSTDIR\uninstall_@VERSION@.exe
Delete $INSTDIR\doc\README
Delete $INSTDIR\doc\README-turbo.txt
diff --git a/release/libjpeg-turbo.spec.in b/release/libjpeg-turbo.spec.in
index 6c97814..616618d 100644
--- a/release/libjpeg-turbo.spec.in
+++ b/release/libjpeg-turbo.spec.in
@@ -124,7 +124,7 @@
%{_libdir}/libjpeg.so.@SO_MAJOR_VERSION@
%{_libdir}/libjpeg.so
%{_libdir}/libjpeg.a
-%{_libdir}/libturbojpeg.so.0.0.0
+%{_libdir}/libturbojpeg.so.0.1.0
%{_libdir}/libturbojpeg.so.0
%{_libdir}/libturbojpeg.so
%{_libdir}/libturbojpeg.a
diff --git a/simd/Makefile.am b/simd/Makefile.am
index a12ff6e..be2f0a5 100644
--- a/simd/Makefile.am
+++ b/simd/Makefile.am
@@ -58,6 +58,18 @@
endif
+if SIMD_ARM_64
+
+libsimd_la_SOURCES = jsimd_arm64.c jsimd_arm_neon_64.S
+
+endif
+
+if SIMD_MIPSEL
+
+libsimd_la_SOURCES = jsimd_mips.c jsimd_mips_dspr2_asm.h jsimd_mips_dspr2.S
+
+endif
+
AM_CPPFLAGS = -I$(top_srcdir)
.asm.lo:
diff --git a/simd/jsimd.h b/simd/jsimd.h
index 3d4751f..f75aa9b 100644
--- a/simd/jsimd.h
+++ b/simd/jsimd.h
@@ -3,7 +3,8 @@
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright 2011 D. R. Commander
- *
+ * Copyright (C) 2013, MIPS Technologies, Inc., California
+ *
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
* For conditions of distribution and use, see copyright notice in jsimdext.inc
@@ -18,6 +19,7 @@
#define JSIMD_SSE 0x04
#define JSIMD_SSE2 0x08
#define JSIMD_ARM_NEON 0x10
+#define JSIMD_MIPS_DSPR2 0x20
/* Short forms of external names for systems with brain-damaged linkers. */
@@ -386,6 +388,93 @@
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_rgb_ycc_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgb_ycc_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgbx_ycc_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgr_ycc_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgrx_ycc_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxbgr_ycc_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxrgb_ycc_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+
+EXTERN(void) jsimd_rgb_gray_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgb_gray_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgbx_gray_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgr_gray_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgrx_gray_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxbgr_gray_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxrgb_gray_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows));
+
+EXTERN (void) jsimd_ycc_rgb_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPIMAGE input_buf, JDIMENSION input_row,
+ JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_ycc_extrgb_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPIMAGE input_buf, JDIMENSION input_row,
+ JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_ycc_extrgbx_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPIMAGE input_buf, JDIMENSION input_row,
+ JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_ycc_extbgr_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPIMAGE input_buf, JDIMENSION input_row,
+ JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_ycc_extbgrx_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPIMAGE input_buf, JDIMENSION input_row,
+ JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_ycc_extxbgr_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPIMAGE input_buf, JDIMENSION input_row,
+ JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_ycc_extxrgb_convert_mips_dspr2
+ JPP((JDIMENSION img_width,
+ JSAMPIMAGE input_buf, JDIMENSION input_row,
+ JSAMPARRAY output_buf, int num_rows));
+
/* SIMD Downsample */
EXTERN(void) jsimd_h2v2_downsample_mmx
JPP((JDIMENSION image_width, int max_v_samp_factor,
@@ -405,6 +494,15 @@
JDIMENSION v_samp_factor, JDIMENSION width_blocks,
JSAMPARRAY input_data, JSAMPARRAY output_data));
+EXTERN(void) jsimd_h2v2_downsample_mips_dspr2
+ JPP((JDIMENSION image_width, int max_v_samp_factor,
+ JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+ JSAMPARRAY input_data, JSAMPARRAY output_data));
+EXTERN(void) jsimd_h2v1_downsample_mips_dspr2
+ JPP((JDIMENSION image_width, int max_v_samp_factor,
+ JDIMENSION v_samp_factor, JDIMENSION width_blocks,
+ JSAMPARRAY input_data, JSAMPARRAY output_data));
+
/* SIMD Upsample */
EXTERN(void) jsimd_h2v2_upsample_mmx
JPP((int max_v_samp_factor, JDIMENSION output_width,
@@ -526,6 +624,20 @@
JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+EXTERN(void) jsimd_h2v1_fancy_upsample_mips_dspr2
+ JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
+ JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+EXTERN(void) jsimd_h2v2_fancy_upsample_mips_dspr2
+ JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
+ JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+
+EXTERN(void) jsimd_h2v2_upsample_mips_dspr2
+ JPP((int max_v_samp_factor, JDIMENSION output_width,
+ JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+EXTERN(void) jsimd_h2v1_upsample_mips_dspr2
+ JPP((int max_v_samp_factor, JDIMENSION output_width,
+ JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+
/* SIMD Sample Conversion */
EXTERN(void) jsimd_convsamp_mmx JPP((JSAMPARRAY sample_data,
JDIMENSION start_col,
@@ -539,6 +651,10 @@
JDIMENSION start_col,
DCTELEM * workspace));
+EXTERN(void) jsimd_convsamp_mips_dspr2 JPP((JSAMPARRAY sample_data,
+ JDIMENSION start_col,
+ DCTELEM * workspace));
+
EXTERN(void) jsimd_convsamp_float_3dnow JPP((JSAMPARRAY sample_data,
JDIMENSION start_col,
FAST_FLOAT * workspace));
@@ -551,6 +667,10 @@
JDIMENSION start_col,
FAST_FLOAT * workspace));
+EXTERN(void) jsimd_convsamp_float_mips_dspr2 JPP((JSAMPARRAY sample_data,
+ JDIMENSION start_col,
+ FAST_FLOAT * workspace));
+
/* SIMD Forward DCT */
EXTERN(void) jsimd_fdct_islow_mmx JPP((DCTELEM * data));
EXTERN(void) jsimd_fdct_ifast_mmx JPP((DCTELEM * data));
@@ -562,6 +682,9 @@
EXTERN(void) jsimd_fdct_ifast_neon JPP((DCTELEM * data));
+EXTERN(void) jsimd_fdct_islow_mips_dspr2 JPP((DCTELEM * data));
+EXTERN(void) jsimd_fdct_ifast_mips_dspr2 JPP((DCTELEM * data));
+
EXTERN(void) jsimd_fdct_float_3dnow JPP((FAST_FLOAT * data));
extern const int jconst_fdct_float_sse[];
@@ -580,6 +703,10 @@
DCTELEM * divisors,
DCTELEM * workspace));
+EXTERN(void) jsimd_quantize_mips_dspr2 JPP((JCOEFPTR coef_block,
+ DCTELEM * divisors,
+ DCTELEM * workspace));
+
EXTERN(void) jsimd_quantize_float_3dnow JPP((JCOEFPTR coef_block,
FAST_FLOAT * divisors,
FAST_FLOAT * workspace));
@@ -592,6 +719,10 @@
FAST_FLOAT * divisors,
FAST_FLOAT * workspace));
+EXTERN(void) jsimd_quantize_float_mips_dspr2 JPP((JCOEFPTR coef_block,
+ FAST_FLOAT * divisors,
+ FAST_FLOAT * workspace));
+
/* SIMD Reduced Inverse DCT */
EXTERN(void) jsimd_idct_2x2_mmx JPP((void * dct_table,
JCOEFPTR coef_block,
@@ -621,6 +752,25 @@
JSAMPARRAY output_buf,
JDIMENSION output_col));
+EXTERN(void) jsimd_idct_2x2_mips_dspr2 JPP((void * dct_table,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf,
+ JDIMENSION output_col));
+EXTERN(void) jsimd_idct_4x4_mips_dspr2 JPP((void * dct_table,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf,
+ JDIMENSION output_col,
+ int * workspace));
+EXTERN(void) jsimd_idct_6x6_mips_dspr2 JPP((void * dct_table,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf,
+ JDIMENSION output_col));
+EXTERN(void) jsimd_idct_12x12_pass1_mips_dspr2 JPP((JCOEFPTR coef_block,
+ void * dct_table,
+ int * workspace));
+EXTERN(void) jsimd_idct_12x12_pass2_mips_dspr2 JPP((int * workspace,
+ int * output));
+
/* SIMD Inverse DCT */
EXTERN(void) jsimd_idct_islow_mmx JPP((void * dct_table,
JCOEFPTR coef_block,
@@ -651,6 +801,15 @@
JSAMPARRAY output_buf,
JDIMENSION output_col));
+EXTERN(void) jsimd_idct_ifast_cols_mips_dspr2 JPP((JCOEF * inptr,
+ IFAST_MULT_TYPE * quantptr,
+ DCTELEM * wsptr,
+ const int * idct_coefs));
+EXTERN(void) jsimd_idct_ifast_rows_mips_dspr2 JPP((DCTELEM * wsptr,
+ JSAMPARRAY output_buf,
+ JDIMENSION output_col,
+ const int * idct_coefs));
+
EXTERN(void) jsimd_idct_float_3dnow JPP((void * dct_table,
JCOEFPTR coef_block,
JSAMPARRAY output_buf,
diff --git a/simd/jsimd_arm.c b/simd/jsimd_arm.c
index bd717a4..c6fa4ca 100644
--- a/simd/jsimd_arm.c
+++ b/simd/jsimd_arm.c
@@ -2,7 +2,7 @@
* jsimd_arm.c
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright 2009-2011 D. R. Commander
+ * Copyright 2009-2011, 2013 D. R. Commander
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -123,10 +123,10 @@
#endif
/* Force different settings through environment variables */
- env = getenv("JSIMD_FORCE_ARM_NEON");
+ env = getenv("JSIMD_FORCENEON");
if ((env != NULL) && (strcmp(env, "1") == 0))
simd_support &= JSIMD_ARM_NEON;
- env = getenv("JSIMD_FORCE_NO_SIMD");
+ env = getenv("JSIMD_FORCENONE");
if ((env != NULL) && (strcmp(env, "1") == 0))
simd_support = 0;
}
diff --git a/simd/jsimd_arm64.c b/simd/jsimd_arm64.c
new file mode 100644
index 0000000..d280d62
--- /dev/null
+++ b/simd/jsimd_arm64.c
@@ -0,0 +1,586 @@
+/*
+ * jsimd_arm64.c
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2009-2011, 2013-2014 D. R. Commander
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ * This file contains the interface between the "normal" portions
+ * of the library and the SIMD implementations when running on
+ * ARM architecture.
+ *
+ * Based on the stubs from 'jsimd_none.c'
+ */
+
+#define JPEG_INTERNALS
+#include "../jinclude.h"
+#include "../jpeglib.h"
+#include "../jsimd.h"
+#include "../jdct.h"
+#include "../jsimddct.h"
+#include "jsimd.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+static unsigned int simd_support = ~0;
+
+#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
+
+#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
+
+LOCAL(int)
+check_feature (char *buffer, char *feature)
+{
+ char *p;
+ if (*feature == 0)
+ return 0;
+ if (strncmp(buffer, "Features", 8) != 0)
+ return 0;
+ buffer += 8;
+ while (isspace(*buffer))
+ buffer++;
+
+ /* Check if 'feature' is present in the buffer as a separate word */
+ while ((p = strstr(buffer, feature))) {
+ if (p > buffer && !isspace(*(p - 1))) {
+ buffer++;
+ continue;
+ }
+ p += strlen(feature);
+ if (*p != 0 && !isspace(*p)) {
+ buffer++;
+ continue;
+ }
+ return 1;
+ }
+ return 0;
+}
+
+LOCAL(int)
+parse_proc_cpuinfo (int bufsize)
+{
+ char *buffer = (char *)malloc(bufsize);
+ FILE *fd;
+ simd_support = 0;
+
+ if (!buffer)
+ return 0;
+
+ fd = fopen("/proc/cpuinfo", "r");
+ if (fd) {
+ while (fgets(buffer, bufsize, fd)) {
+ if (!strchr(buffer, '\n') && !feof(fd)) {
+ /* "impossible" happened - insufficient size of the buffer! */
+ fclose(fd);
+ free(buffer);
+ return 0;
+ }
+ if (check_feature(buffer, "neon"))
+ simd_support |= JSIMD_ARM_NEON;
+ }
+ fclose(fd);
+ }
+ free(buffer);
+ return 1;
+}
+
+#endif
+
+/*
+ * Check what SIMD accelerations are supported.
+ *
+ * FIXME: This code is racy under a multi-threaded environment.
+ */
+LOCAL(void)
+init_simd (void)
+{
+ char *env = NULL;
+#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
+ int bufsize = 1024; /* an initial guess for the line buffer size limit */
+#endif
+
+ if (simd_support != ~0U)
+ return;
+
+ simd_support = 0;
+
+#if defined(__ARM_NEON__)
+ simd_support |= JSIMD_ARM_NEON;
+#elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
+ /* We still have a chance to use NEON regardless of globally used
+ * -mcpu/-mfpu options passed to gcc by performing runtime detection via
+ * /proc/cpuinfo parsing on linux/android */
+ while (!parse_proc_cpuinfo(bufsize)) {
+ bufsize *= 2;
+ if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
+ break;
+ }
+#endif
+
+ /* Force different settings through environment variables */
+ env = getenv("JSIMD_FORCENEON");
+ if ((env != NULL) && (strcmp(env, "1") == 0))
+ simd_support &= JSIMD_ARM_NEON;
+ env = getenv("JSIMD_FORCENONE");
+ if ((env != NULL) && (strcmp(env, "1") == 0))
+ simd_support = 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_ycc (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_ycc_rgb (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+ return 0;
+ if (simd_support & JSIMD_ARM_NEON)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows)
+{
+}
+
+GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows)
+{
+}
+
+GLOBAL(void)
+jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
+ JSAMPIMAGE input_buf, JDIMENSION input_row,
+ JSAMPARRAY output_buf, int num_rows)
+{
+ void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
+
+ switch(cinfo->out_color_space)
+ {
+ case JCS_EXT_RGB:
+ neonfct=jsimd_ycc_extrgb_convert_neon;
+ break;
+ case JCS_EXT_RGBX:
+ case JCS_EXT_RGBA:
+ neonfct=jsimd_ycc_extrgbx_convert_neon;
+ break;
+ case JCS_EXT_BGR:
+ neonfct=jsimd_ycc_extbgr_convert_neon;
+ break;
+ case JCS_EXT_BGRX:
+ case JCS_EXT_BGRA:
+ neonfct=jsimd_ycc_extbgrx_convert_neon;
+ break;
+ case JCS_EXT_XBGR:
+ case JCS_EXT_ABGR:
+ neonfct=jsimd_ycc_extxbgr_convert_neon;
+ break;
+ case JCS_EXT_XRGB:
+ case JCS_EXT_ARGB:
+ neonfct=jsimd_ycc_extxrgb_convert_neon;
+ break;
+ default:
+ neonfct=jsimd_ycc_extrgb_convert_neon;
+ break;
+ }
+
+ if (simd_support & JSIMD_ARM_NEON)
+ neonfct(cinfo->output_width, input_buf,
+ input_row, output_buf, num_rows);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_downsample (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_downsample (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+ JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+ JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_upsample (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_upsample (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_upsample (j_decompress_ptr cinfo,
+ jpeg_component_info * compptr,
+ JSAMPARRAY input_data,
+ JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_upsample (j_decompress_ptr cinfo,
+ jpeg_component_info * compptr,
+ JSAMPARRAY input_data,
+ JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_fancy_upsample (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_fancy_upsample (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
+ jpeg_component_info * compptr,
+ JSAMPARRAY input_data,
+ JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
+ jpeg_component_info * compptr,
+ JSAMPARRAY input_data,
+ JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_merged_upsample (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_merged_upsample (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
+ JSAMPIMAGE input_buf,
+ JDIMENSION in_row_group_ctr,
+ JSAMPARRAY output_buf)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
+ JSAMPIMAGE input_buf,
+ JDIMENSION in_row_group_ctr,
+ JSAMPARRAY output_buf)
+{
+}
+
+GLOBAL(int)
+jsimd_can_convsamp (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_convsamp_float (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
+ DCTELEM * workspace)
+{
+}
+
+GLOBAL(void)
+jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
+ FAST_FLOAT * workspace)
+{
+}
+
+GLOBAL(int)
+jsimd_can_fdct_islow (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_ifast (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_float (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_fdct_islow (DCTELEM * data)
+{
+}
+
+GLOBAL(void)
+jsimd_fdct_ifast (DCTELEM * data)
+{
+}
+
+GLOBAL(void)
+jsimd_fdct_float (FAST_FLOAT * data)
+{
+}
+
+GLOBAL(int)
+jsimd_can_quantize (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_quantize_float (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
+ DCTELEM * workspace)
+{
+}
+
+GLOBAL(void)
+jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+ FAST_FLOAT * workspace)
+{
+}
+
+GLOBAL(int)
+jsimd_can_idct_2x2 (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(JCOEF) != 2)
+ return 0;
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(ISLOW_MULT_TYPE) != 2)
+ return 0;
+
+ if ((simd_support & JSIMD_ARM_NEON))
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_4x4 (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(JCOEF) != 2)
+ return 0;
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(ISLOW_MULT_TYPE) != 2)
+ return 0;
+
+ if ((simd_support & JSIMD_ARM_NEON))
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+ if ((simd_support & JSIMD_ARM_NEON))
+ jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+ if ((simd_support & JSIMD_ARM_NEON))
+ jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
+}
+
+GLOBAL(int)
+jsimd_can_idct_islow (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(JCOEF) != 2)
+ return 0;
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(ISLOW_MULT_TYPE) != 2)
+ return 0;
+
+ if (simd_support & JSIMD_ARM_NEON)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_ifast (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(JCOEF) != 2)
+ return 0;
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(IFAST_MULT_TYPE) != 2)
+ return 0;
+ if (IFAST_SCALE_BITS != 2)
+ return 0;
+
+ if ((simd_support & JSIMD_ARM_NEON))
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_float (void)
+{
+ init_simd();
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+ if ((simd_support & JSIMD_ARM_NEON))
+ jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+ if ((simd_support & JSIMD_ARM_NEON))
+ jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+}
+
diff --git a/simd/jsimd_arm_neon_64.S b/simd/jsimd_arm_neon_64.S
new file mode 100644
index 0000000..2d9e95e
--- /dev/null
+++ b/simd/jsimd_arm_neon_64.S
@@ -0,0 +1,1832 @@
+/*
+ * ARMv8 NEON optimizations for libjpeg-turbo
+ *
+ * Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies).
+ * All rights reserved.
+ * Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+ * Copyright (C) 2013, Linaro Limited
+ * Author: Ragesh Radhakrishnan <ragesh.r@linaro.org>
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
+#endif
+
+.text
+.arch armv8-a+fp+simd
+
+
+#define RESPECT_STRICT_ALIGNMENT 1
+
+#define RTSM_SQSHRN_SIM_ISSUE
+
+
+/*****************************************************************************/
+
+/* Supplementary macro for setting function attributes */
+.macro asm_function fname
+#ifdef __APPLE__
+ .func _\fname
+ .globl _\fname
+_\fname:
+#else
+ .func \fname
+ .global \fname
+#ifdef __ELF__
+ .hidden \fname
+ .type \fname, %function
+#endif
+\fname:
+#endif
+.endm
+
+/* Transpose elements of single 128 bit registers */
+.macro transpose_single x0,x1,xi,xilen,literal
+ ins \xi\xilen[0], \x0\xilen[0]
+ ins \x1\xilen[0], \x0\xilen[1]
+ trn1 \x0\literal, \x0\literal, \x1\literal
+ trn2 \x1\literal, \xi\literal, \x1\literal
+.endm
+
+/* Transpose elements of 2 differnet registers */
+.macro transpose x0,x1,xi,xilen,literal
+ mov \xi\xilen, \x0\xilen
+ trn1 \x0\literal, \x0\literal, \x1\literal
+ trn2 \x1\literal, \xi\literal, \x1\literal
+.endm
+
+/* Transpose a block of 4x4 coefficients in four 64-bit registers */
+.macro transpose_4x4_32 x0,x0len x1,x1len x2,x2len x3,x3len,xi,xilen
+ mov \xi\xilen, \x0\xilen
+ trn1 \x0\x0len, \x0\x0len, \x2\x2len
+ trn2 \x2\x2len, \xi\x0len, \x2\x2len
+ mov \xi\xilen, \x1\xilen
+ trn1 \x1\x1len, \x1\x1len, \x3\x3len
+ trn2 \x3\x3len, \xi\x1len, \x3\x3len
+.endm
+
+.macro transpose_4x4_16 x0,x0len x1,x1len, x2,x2len, x3,x3len,xi,xilen
+ mov \xi\xilen, \x0\xilen
+ trn1 \x0\x0len, \x0\x0len, \x1\x1len
+ trn2 \x1\x2len, \xi\x0len, \x1\x2len
+ mov \xi\xilen, \x2\xilen
+ trn1 \x2\x2len, \x2\x2len, \x3\x3len
+ trn2 \x3\x2len, \xi\x1len, \x3\x3len
+.endm
+
+.macro transpose_4x4 x0, x1, x2, x3,x5
+ transpose_4x4_16 \x0,.4h, \x1,.4h, \x2,.4h,\x3,.4h,\x5,.16b
+ transpose_4x4_32 \x0,.2s, \x1,.2s, \x2,.2s,\x3,.2s,\x5,.16b
+.endm
+
+
+#define CENTERJSAMPLE 128
+
+/*****************************************************************************/
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ *
+ * GLOBAL(void)
+ * jsimd_idct_islow_neon (void * dct_table, JCOEFPTR coef_block,
+ * JSAMPARRAY output_buf, JDIMENSION output_col)
+ */
+
+#define FIX_0_298631336 (2446)
+#define FIX_0_390180644 (3196)
+#define FIX_0_541196100 (4433)
+#define FIX_0_765366865 (6270)
+#define FIX_0_899976223 (7373)
+#define FIX_1_175875602 (9633)
+#define FIX_1_501321110 (12299)
+#define FIX_1_847759065 (15137)
+#define FIX_1_961570560 (16069)
+#define FIX_2_053119869 (16819)
+#define FIX_2_562915447 (20995)
+#define FIX_3_072711026 (25172)
+
+#define FIX_1_175875602_MINUS_1_961570560 (FIX_1_175875602 - FIX_1_961570560)
+#define FIX_1_175875602_MINUS_0_390180644 (FIX_1_175875602 - FIX_0_390180644)
+#define FIX_0_541196100_MINUS_1_847759065 (FIX_0_541196100 - FIX_1_847759065)
+#define FIX_3_072711026_MINUS_2_562915447 (FIX_3_072711026 - FIX_2_562915447)
+#define FIX_0_298631336_MINUS_0_899976223 (FIX_0_298631336 - FIX_0_899976223)
+#define FIX_1_501321110_MINUS_0_899976223 (FIX_1_501321110 - FIX_0_899976223)
+#define FIX_2_053119869_MINUS_2_562915447 (FIX_2_053119869 - FIX_2_562915447)
+#define FIX_0_541196100_PLUS_0_765366865 (FIX_0_541196100 + FIX_0_765366865)
+
+/*
+ * Reference SIMD-friendly 1-D ISLOW iDCT C implementation.
+ * Uses some ideas from the comments in 'simd/jiss2int-64.asm'
+ */
+#define REF_1D_IDCT(xrow0, xrow1, xrow2, xrow3, xrow4, xrow5, xrow6, xrow7) \
+{ \
+ DCTELEM row0, row1, row2, row3, row4, row5, row6, row7; \
+ INT32 q1, q2, q3, q4, q5, q6, q7; \
+ INT32 tmp11_plus_tmp2, tmp11_minus_tmp2; \
+ \
+ /* 1-D iDCT input data */ \
+ row0 = xrow0; \
+ row1 = xrow1; \
+ row2 = xrow2; \
+ row3 = xrow3; \
+ row4 = xrow4; \
+ row5 = xrow5; \
+ row6 = xrow6; \
+ row7 = xrow7; \
+ \
+ q5 = row7 + row3; \
+ q4 = row5 + row1; \
+ q6 = MULTIPLY(q5, FIX_1_175875602_MINUS_1_961570560) + \
+ MULTIPLY(q4, FIX_1_175875602); \
+ q7 = MULTIPLY(q5, FIX_1_175875602) + \
+ MULTIPLY(q4, FIX_1_175875602_MINUS_0_390180644); \
+ q2 = MULTIPLY(row2, FIX_0_541196100) + \
+ MULTIPLY(row6, FIX_0_541196100_MINUS_1_847759065); \
+ q4 = q6; \
+ q3 = ((INT32) row0 - (INT32) row4) << 13; \
+ q6 += MULTIPLY(row5, -FIX_2_562915447) + \
+ MULTIPLY(row3, FIX_3_072711026_MINUS_2_562915447); \
+ /* now we can use q1 (reloadable constants have been used up) */ \
+ q1 = q3 + q2; \
+ q4 += MULTIPLY(row7, FIX_0_298631336_MINUS_0_899976223) + \
+ MULTIPLY(row1, -FIX_0_899976223); \
+ q5 = q7; \
+ q1 = q1 + q6; \
+ q7 += MULTIPLY(row7, -FIX_0_899976223) + \
+ MULTIPLY(row1, FIX_1_501321110_MINUS_0_899976223); \
+ \
+ /* (tmp11 + tmp2) has been calculated (out_row1 before descale) */ \
+ tmp11_plus_tmp2 = q1; \
+ row1 = 0; \
+ \
+ q1 = q1 - q6; \
+ q5 += MULTIPLY(row5, FIX_2_053119869_MINUS_2_562915447) + \
+ MULTIPLY(row3, -FIX_2_562915447); \
+ q1 = q1 - q6; \
+ q6 = MULTIPLY(row2, FIX_0_541196100_PLUS_0_765366865) + \
+ MULTIPLY(row6, FIX_0_541196100); \
+ q3 = q3 - q2; \
+ \
+ /* (tmp11 - tmp2) has been calculated (out_row6 before descale) */ \
+ tmp11_minus_tmp2 = q1; \
+ \
+ q1 = ((INT32) row0 + (INT32) row4) << 13; \
+ q2 = q1 + q6; \
+ q1 = q1 - q6; \
+ \
+ /* pick up the results */ \
+ tmp0 = q4; \
+ tmp1 = q5; \
+ tmp2 = (tmp11_plus_tmp2 - tmp11_minus_tmp2) / 2; \
+ tmp3 = q7; \
+ tmp10 = q2; \
+ tmp11 = (tmp11_plus_tmp2 + tmp11_minus_tmp2) / 2; \
+ tmp12 = q3; \
+ tmp13 = q1; \
+}
+
+#define XFIX_0_899976223 v0.4h[0]
+#define XFIX_0_541196100 v0.4h[1]
+#define XFIX_2_562915447 v0.4h[2]
+#define XFIX_0_298631336_MINUS_0_899976223 v0.4h[3]
+#define XFIX_1_501321110_MINUS_0_899976223 v1.4h[0]
+#define XFIX_2_053119869_MINUS_2_562915447 v1.4h[1]
+#define XFIX_0_541196100_PLUS_0_765366865 v1.4h[2]
+#define XFIX_1_175875602 v1.4h[3]
+#define XFIX_1_175875602_MINUS_0_390180644 v2.4h[0]
+#define XFIX_0_541196100_MINUS_1_847759065 v2.4h[1]
+#define XFIX_3_072711026_MINUS_2_562915447 v2.4h[2]
+#define XFIX_1_175875602_MINUS_1_961570560 v2.4h[3]
+
+.balign 16
+jsimd_idct_islow_neon_consts:
+ .short FIX_0_899976223 /* d0[0] */
+ .short FIX_0_541196100 /* d0[1] */
+ .short FIX_2_562915447 /* d0[2] */
+ .short FIX_0_298631336_MINUS_0_899976223 /* d0[3] */
+ .short FIX_1_501321110_MINUS_0_899976223 /* d1[0] */
+ .short FIX_2_053119869_MINUS_2_562915447 /* d1[1] */
+ .short FIX_0_541196100_PLUS_0_765366865 /* d1[2] */
+ .short FIX_1_175875602 /* d1[3] */
+ /* reloadable constants */
+ .short FIX_1_175875602_MINUS_0_390180644 /* d2[0] */
+ .short FIX_0_541196100_MINUS_1_847759065 /* d2[1] */
+ .short FIX_3_072711026_MINUS_2_562915447 /* d2[2] */
+ .short FIX_1_175875602_MINUS_1_961570560 /* d2[3] */
+
+asm_function jsimd_idct_islow_neon
+
+ DCT_TABLE .req x0
+ COEF_BLOCK .req x1
+ OUTPUT_BUF .req x2
+ OUTPUT_COL .req x3
+ TMP1 .req x0
+ TMP2 .req x1
+ TMP3 .req x2
+ TMP4 .req x15
+
+ ROW0L .req v16
+ ROW0R .req v17
+ ROW1L .req v18
+ ROW1R .req v19
+ ROW2L .req v20
+ ROW2R .req v21
+ ROW3L .req v22
+ ROW3R .req v23
+ ROW4L .req v24
+ ROW4R .req v25
+ ROW5L .req v26
+ ROW5R .req v27
+ ROW6L .req v28
+ ROW6R .req v29
+ ROW7L .req v30
+ ROW7R .req v31
+
+ adr x15, jsimd_idct_islow_neon_consts
+ ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [COEF_BLOCK], 32
+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32
+ ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [COEF_BLOCK], 32
+ mul v16.4h, v16.4h, v0.4h
+ mul v17.4h, v17.4h, v1.4h
+ ins v16.2d[1], v17.2d[0] /* 128 bit q8 */
+ ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32
+ mul v18.4h, v18.4h, v2.4h
+ mul v19.4h, v19.4h, v3.4h
+ ins v18.2d[1], v19.2d[0] /* 128 bit q9 */
+ ld1 {v24.4h, v25.4h, v26.4h, v27.4h}, [COEF_BLOCK], 32
+ mul v20.4h, v20.4h, v4.4h
+ mul v21.4h, v21.4h, v5.4h
+ ins v20.2d[1], v21.2d[0] /* 128 bit q10 */
+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [DCT_TABLE], 32
+ mul v22.4h, v22.4h, v6.4h
+ mul v23.4h, v23.4h, v7.4h
+ ins v22.2d[1], v23.2d[0] /* 128 bit q11 */
+ ld1 {v28.4h, v29.4h, v30.4h, v31.4h}, [COEF_BLOCK], 32
+ mul v24.4h, v24.4h, v0.4h
+ mul v25.4h, v25.4h, v1.4h
+ ins v24.2d[1], v25.2d[0] /* 128 bit q12 */
+ ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [DCT_TABLE], 32
+ mul v28.4h, v28.4h, v4.4h
+ mul v29.4h, v29.4h, v5.4h
+ ins v28.2d[1], v29.2d[0] /* 128 bit q14 */
+ mul v26.4h, v26.4h, v2.4h
+ mul v27.4h, v27.4h, v3.4h
+ ins v26.2d[1], v27.2d[0] /* 128 bit q13 */
+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [x15] /* load constants */
+ add x15, x15, #16
+ mul v30.4h, v30.4h, v6.4h
+ mul v31.4h, v31.4h, v7.4h
+ ins v30.2d[1], v31.2d[0] /* 128 bit q15 */
+ sub sp, sp, #32
+ st1 {v8.4h-v11.4h}, [sp] /* save NEON registers */
+ sub sp, sp, #32
+ st1 {v12.4h-v15.4h}, [sp]
+ /* 1-D IDCT, pass 1, left 4x8 half */
+ add v4.4h, ROW7L.4h, ROW3L.4h
+ add v5.4h, ROW5L.4h, ROW1L.4h
+ smull v12.4s, v4.4h, XFIX_1_175875602_MINUS_1_961570560
+ smlal v12.4s, v5.4h, XFIX_1_175875602
+ smull v14.4s, v4.4h, XFIX_1_175875602
+ /* Check for the zero coefficients in the right 4x8 half */
+ /* push {x4, x5} */
+ stp x4, x5, [sp, -16]!
+ mov x5, #0
+ smlal v14.4s, v5.4h, XFIX_1_175875602_MINUS_0_390180644
+ ssubl v6.4s, ROW0L.4h, ROW4L.4h
+ ldr x4, [COEF_BLOCK, #(-96 + 2 * (4 + 1 * 8))]
+ smull v4.4s, ROW2L.4h, XFIX_0_541196100
+ smlal v4.4s, ROW6L.4h, XFIX_0_541196100_MINUS_1_847759065
+ orr x0, x4, x5
+ mov v8.16b, v12.16b
+ smlsl v12.4s, ROW5L.4h, XFIX_2_562915447
+ ldr x4, [COEF_BLOCK, #(-96 + 2 * (4 + 2 * 8))]
+ smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447
+ shl v6.4s, v6.4s, #13
+ orr x0, x0, x4
+ smlsl v8.4s, ROW1L.4h, XFIX_0_899976223
+ orr x0, x0 , x5
+ add v2.4s, v6.4s, v4.4s
+ ldr x4, [COEF_BLOCK, #(-96 + 2 * (4 + 3 * 8))]
+ mov v10.16b, v14.16b
+ add v2.4s, v2.4s, v12.4s
+ orr x0, x0, x4
+ smlsl v14.4s, ROW7L.4h, XFIX_0_899976223
+ orr x0, x0, x5
+ smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223
+ rshrn ROW1L.4h, v2.4s, #11
+ ldr x4, [COEF_BLOCK, #(-96 + 2 * (4 + 4 * 8))]
+ sub v2.4s, v2.4s, v12.4s
+ smlal v10.4s, ROW5L.4h, XFIX_2_053119869_MINUS_2_562915447
+ orr x0, x0, x4
+ smlsl v10.4s, ROW3L.4h, XFIX_2_562915447
+ orr x0, x0, x5
+ sub v2.4s, v2.4s, v12.4s
+ smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865
+ ldr x4, [COEF_BLOCK, #(-96 + 2 * (4 + 5 * 8))]
+ smlal v12.4s, ROW6L.4h, XFIX_0_541196100
+ sub v6.4s, v6.4s, v4.4s
+ orr x0, x0, x4
+ rshrn ROW6L.4h, v2.4s, #11
+ orr x0, x0, x5
+ add v2.4s, v6.4s, v10.4s
+ ldr x4, [COEF_BLOCK, #(-96 + 2 * (4 + 6 * 8))]
+ sub v6.4s, v6.4s, v10.4s
+ saddl v10.4s, ROW0L.4h, ROW4L.4h
+ orr x0, x0, x4
+ rshrn ROW2L.4h, v2.4s, #11
+ orr x0, x0, x5
+ rshrn ROW5L.4h, v6.4s, #11
+ ldr x4, [COEF_BLOCK, #(-96 + 2 * (4 + 7 * 8))]
+ shl v10.4s, v10.4s, #13
+ smlal v8.4s, ROW7L.4h, XFIX_0_298631336_MINUS_0_899976223
+ orr x0, x0, x4
+ add v4.4s, v10.4s, v12.4s
+ orr x0, x0, x5
+ sub v2.4s, v10.4s, v12.4s
+ add v12.4s, v4.4s, v14.4s
+ ldr x4, [COEF_BLOCK, #(-96 + 2 * (4 + 0 * 8))]
+ sub v4.4s, v4.4s, v14.4s
+ add v10.4s, v2.4s, v8.4s
+ orr x0, x4, x5
+ sub v6.4s, v2.4s, v8.4s
+ /* pop {x4, x5} */
+ ldp x4, x5, [sp], 16
+ rshrn ROW7L.4h, v4.4s, #11
+ rshrn ROW3L.4h, v10.4s, #11
+ rshrn ROW0L.4h, v12.4s, #11
+ rshrn ROW4L.4h, v6.4s, #11
+ cmp x0, #0 /* orrs instruction removed */
+
+ beq 3f /* Go to do some special handling for the sparse right 4x8 half */
+
+ /* 1-D IDCT, pass 1, right 4x8 half */
+ ld1 {v2.4h}, [x15] /* reload constants */
+ add v10.4h, ROW7R.4h, ROW3R.4h
+ add v8.4h, ROW5R.4h, ROW1R.4h
+ /* Transpose ROW6L <-> ROW7L (v3 available free register) */
+ transpose ROW6L, ROW7L, v3, .16b, .4h
+ smull v12.4s, v10.4h, XFIX_1_175875602_MINUS_1_961570560
+ smlal v12.4s, v8.4h, XFIX_1_175875602
+ /* Transpose ROW2L <-> ROW3L (v3 available free register) */
+ transpose ROW2L, ROW3L, v3, .16b, .4h
+ smull v14.4s, v10.4h, XFIX_1_175875602
+ smlal v14.4s, v8.4h, XFIX_1_175875602_MINUS_0_390180644
+ /* Transpose ROW0L <-> ROW1L (v3 available free register) */
+ transpose ROW0L, ROW1L, v3, .16b, .4h
+ ssubl v6.4s, ROW0R.4h, ROW4R.4h
+ smull v4.4s, ROW2R.4h, XFIX_0_541196100
+ smlal v4.4s, ROW6R.4h, XFIX_0_541196100_MINUS_1_847759065
+ /* Transpose ROW4L <-> ROW5L (v3 available free register) */
+ transpose ROW4L, ROW5L, v3, .16b, .4h
+ mov v8.16b, v12.16b
+ smlsl v12.4s, ROW5R.4h, XFIX_2_562915447
+ smlal v12.4s, ROW3R.4h, XFIX_3_072711026_MINUS_2_562915447
+ /* Transpose ROW1L <-> ROW3L (v3 available free register) */
+ transpose ROW1L, ROW3L, v3, .16b, .2s
+ shl v6.4s, v6.4s, #13
+ smlsl v8.4s, ROW1R.4h, XFIX_0_899976223
+ /* Transpose ROW4L <-> ROW6L (v3 available free register) */
+ transpose ROW4L, ROW6L, v3, .16b, .2s
+ add v2.4s, v6.4s, v4.4s
+ mov v10.16b, v14.16b
+ add v2.4s, v2.4s, v12.4s
+ /* Transpose ROW0L <-> ROW2L (v3 available free register) */
+ transpose ROW0L, ROW2L, v3, .16b, .2s
+ smlsl v14.4s, ROW7R.4h, XFIX_0_899976223
+ smlal v14.4s, ROW1R.4h, XFIX_1_501321110_MINUS_0_899976223
+ rshrn ROW1R.4h, v2.4s, #11
+ /* Transpose ROW5L <-> ROW7L (v3 available free register) */
+ transpose ROW5L, ROW7L, v3, .16b, .2s
+ sub v2.4s, v2.4s, v12.4s
+ smlal v10.4s, ROW5R.4h, XFIX_2_053119869_MINUS_2_562915447
+ smlsl v10.4s, ROW3R.4h, XFIX_2_562915447
+ sub v2.4s, v2.4s, v12.4s
+ smull v12.4s, ROW2R.4h, XFIX_0_541196100_PLUS_0_765366865
+ smlal v12.4s, ROW6R.4h, XFIX_0_541196100
+ sub v6.4s, v6.4s, v4.4s
+ rshrn ROW6R.4h, v2.4s, #11
+ add v2.4s, v6.4s, v10.4s
+ sub v6.4s, v6.4s, v10.4s
+ saddl v10.4s, ROW0R.4h, ROW4R.4h
+ rshrn ROW2R.4h, v2.4s, #11
+ rshrn ROW5R.4h, v6.4s, #11
+ shl v10.4s, v10.4s, #13
+ smlal v8.4s, ROW7R.4h, XFIX_0_298631336_MINUS_0_899976223
+ add v4.4s, v10.4s, v12.4s
+ sub v2.4s, v10.4s, v12.4s
+ add v12.4s, v4.4s, v14.4s
+ sub v4.4s, v4.4s, v14.4s
+ add v10.4s, v2.4s, v8.4s
+ sub v12.4s, v2.4s, v8.4s
+ rshrn ROW7R.4h, v4.4s, #11
+ rshrn ROW3R.4h, v10.4s, #11
+ rshrn ROW0R.4h, v12.4s, #11
+ rshrn ROW4R.4h, v6.4s, #11
+ /* Transpose right 4x8 half */
+ transpose ROW6R, ROW7R, v3, .16b, .4h
+ transpose ROW2R, ROW3R, v3, .16b, .4h
+ transpose ROW0R, ROW1R, v3, .16b, .4h
+ transpose ROW4R, ROW5R, v3, .16b, .4h
+ transpose ROW1R, ROW3R, v3, .16b, .2s
+ transpose ROW4R, ROW6R, v3, .16b, .2s
+ transpose ROW0R, ROW2R, v3, .16b, .2s
+ transpose ROW5R, ROW7R, v3, .16b, .2s
+
+1: /* 1-D IDCT, pass 2 (normal variant), left 4x8 half */
+ ld1 {v2.4h}, [x15] /* reload constants */
+ smull v12.4S, ROW1R.4h, XFIX_1_175875602 /* ROW5L.4h <-> ROW1R.4h */
+ smlal v12.4s, ROW1L.4h, XFIX_1_175875602
+ smlal v12.4s, ROW3R.4h, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L.4h <-> ROW3R.4h */
+ smlal v12.4s, ROW3L.4h, XFIX_1_175875602_MINUS_1_961570560
+ smull v14.4s, ROW3R.4h, XFIX_1_175875602 /* ROW7L.4h <-> ROW3R.4h */
+ smlal v14.4s, ROW3L.4h, XFIX_1_175875602
+ smlal v14.4s, ROW1R.4h, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L.4h <-> ROW1R.4h */
+ smlal v14.4s, ROW1L.4h, XFIX_1_175875602_MINUS_0_390180644
+ ssubl v6.4s, ROW0L.4h, ROW0R.4h /* ROW4L.4h <-> ROW0R.4h */
+ smull v4.4s, ROW2L.4h, XFIX_0_541196100
+ smlal v4.4s, ROW2R.4h, XFIX_0_541196100_MINUS_1_847759065 /* ROW6L.4h <-> ROW2R.4h */
+ mov v8.16b, v12.16b
+ smlsl v12.4s, ROW1R.4h, XFIX_2_562915447 /* ROW5L.4h <-> ROW1R.4h */
+ smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447
+ shl v6.4s, v6.4s, #13
+ smlsl v8.4s, ROW1L.4h, XFIX_0_899976223
+ add v2.4s, v6.4s, v4.4s
+ mov v10.16b, v14.16b
+ add v2.4s, v2.4s, v12.4s
+ smlsl v14.4s, ROW3R.4h, XFIX_0_899976223 /* ROW7L.4h <-> ROW3R.4h */
+ smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223
+ shrn ROW1L.4h, v2.4s, #16
+ sub v2.4s, v2.4s, v12.4s
+ smlal v10.4s, ROW1R.4h, XFIX_2_053119869_MINUS_2_562915447 /* ROW5L.4h <-> ROW1R.4h */
+ smlsl v10.4s, ROW3L.4h, XFIX_2_562915447
+ sub v2.4s, v2.4s, v12.4s
+ smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865
+ smlal v12.4s, ROW2R.4h, XFIX_0_541196100 /* ROW6L.4h <-> ROW2R.4h */
+ sub v6.4s, v6.4s, v4.4s
+ shrn ROW2R.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */
+ add v2.4s, v6.4s, v10.4s
+ sub v6.4s, v6.4s, v10.4s
+ saddl v10.4s, ROW0L.4h, ROW0R.4h /* ROW4L.4h <-> ROW0R.4h */
+ shrn ROW2L.4h, v2.4s, #16
+ shrn ROW1R.4h, v6.4s, #16 /* ROW5L.4h <-> ROW1R.4h */
+ shl v10.4s, v10.4s, #13
+ smlal v8.4s, ROW3R.4h, XFIX_0_298631336_MINUS_0_899976223 /* ROW7L.4h <-> ROW3R.4h */
+ add v4.4s, v10.4s, v12.4s
+ sub v2.4s, v10.4s, v12.4s
+ add v12.4s, v4.4s, v14.4s
+ sub v4.4s, v4.4s, v14.4s
+ add v10.4s, v2.4s, v8.4s
+ sub v6.4s, v2.4s, v8.4s
+ shrn ROW3R.4h, v4.4s, #16 /* ROW7L.4h <-> ROW3R.4h */
+ shrn ROW3L.4h, v10.4s, #16
+ shrn ROW0L.4h, v12.4s, #16
+ shrn ROW0R.4h, v6.4s, #16 /* ROW4L.4h <-> ROW0R.4h */
+ /* 1-D IDCT, pass 2, right 4x8 half */
+ ld1 {v2.4h}, [x15] /* reload constants */
+ smull v12.4s, ROW5R.4h, XFIX_1_175875602
+ smlal v12.4s, ROW5L.4h, XFIX_1_175875602 /* ROW5L.4h <-> ROW1R.4h */
+ smlal v12.4s, ROW7R.4h, XFIX_1_175875602_MINUS_1_961570560
+ smlal v12.4s, ROW7L.4h, XFIX_1_175875602_MINUS_1_961570560 /* ROW7L.4h <-> ROW3R.4h */
+ smull v14.4s, ROW7R.4h, XFIX_1_175875602
+ smlal v14.4s, ROW7L.4h, XFIX_1_175875602 /* ROW7L.4h <-> ROW3R.4h */
+ smlal v14.4s, ROW5R.4h, XFIX_1_175875602_MINUS_0_390180644
+ smlal v14.4s, ROW5L.4h, XFIX_1_175875602_MINUS_0_390180644 /* ROW5L.4h <-> ROW1R.4h */
+ ssubl v6.4s, ROW4L.4h, ROW4R.4h /* ROW4L.4h <-> ROW0R.4h */
+ smull v4.4s, ROW6L.4h, XFIX_0_541196100 /* ROW6L.4h <-> ROW2R.4h */
+ smlal v4.4s, ROW6R.4h, XFIX_0_541196100_MINUS_1_847759065
+ mov v8.16b, v12.16b
+ smlsl v12.4s, ROW5R.4h, XFIX_2_562915447
+ smlal v12.4s, ROW7L.4h, XFIX_3_072711026_MINUS_2_562915447 /* ROW7L.4h <-> ROW3R.4h */
+ shl v6.4s, v6.4s, #13
+ smlsl v8.4s, ROW5L.4h, XFIX_0_899976223 /* ROW5L.4h <-> ROW1R.4h */
+ add v2.4s, v6.4s, v4.4s
+ mov v10.16b, v14.16b
+ add v2.4s, v2.4s, v12.4s
+ smlsl v14.4s, ROW7R.4h, XFIX_0_899976223
+ smlal v14.4s, ROW5L.4h, XFIX_1_501321110_MINUS_0_899976223 /* ROW5L.4h <-> ROW1R.4h */
+ shrn ROW5L.4h, v2.4s, #16 /* ROW5L.4h <-> ROW1R.4h */
+ sub v2.4s, v2.4s, v12.4s
+ smlal v10.4s, ROW5R.4h, XFIX_2_053119869_MINUS_2_562915447
+ smlsl v10.4s, ROW7L.4h, XFIX_2_562915447 /* ROW7L.4h <-> ROW3R.4h */
+ sub v2.4s, v2.4s, v12.4s
+ smull v12.4s, ROW6L.4h, XFIX_0_541196100_PLUS_0_765366865 /* ROW6L.4h <-> ROW2R.4h */
+ smlal v12.4s, ROW6R.4h, XFIX_0_541196100
+ sub v6.4s, v6.4s, v4.4s
+ shrn ROW6R.4h, v2.4s, #16
+ add v2.4s, v6.4s, v10.4s
+ sub v6.4s, v6.4s, v10.4s
+ saddl v10.4s, ROW4L.4h, ROW4R.4h /* ROW4L.4h <-> ROW0R.4h */
+ shrn ROW6L.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */
+ shrn ROW5R.4h, v6.4s, #16
+ shl v10.4s, v10.4s, #13
+ smlal v8.4s, ROW7R.4h, XFIX_0_298631336_MINUS_0_899976223
+ add v4.4s, v10.4s, v12.4s
+ sub v2.4s, v10.4s, v12.4s
+ add v12.4s, v4.4s, v14.4s
+ sub v4.4s, v4.4s, v14.4s
+ add v10.4s, v2.4s, v8.4s
+ sub v6.4s, v2.4s, v8.4s
+ shrn ROW7R.4h, v4.4s, #16
+ shrn ROW7L.4h, v10.4s, #16 /* ROW7L.4h <-> ROW3R.4h */
+ shrn ROW4L.4h, v12.4s, #16 /* ROW4L.4h <-> ROW0R.4h */
+ shrn ROW4R.4h, v6.4s, #16
+
+2: /* Descale to 8-bit and range limit */
+ ins v16.2d[1], v17.2d[0]
+ ins v18.2d[1], v19.2d[0]
+ ins v20.2d[1], v21.2d[0]
+ ins v22.2d[1], v23.2d[0]
+#ifdef RTSM_SQSHRN_SIM_ISSUE
+ sqrshrn v16.8b, v16.8h, #2
+ sqrshrn2 v16.16b, v18.8h, #2
+ sqrshrn v18.8b, v20.8h, #2
+ sqrshrn2 v18.16b, v22.8h, #2
+#else
+ sqrshrn v16.4h, v16.4s, #2
+ sqrshrn2 v16.8h, v18.4s, #2
+ sqrshrn v18.4h, v20.4s, #2
+ sqrshrn2 v18.8h, v22.4s, #2
+#endif
+ /* vpop {v8.4h-d15.4h} */ /* restore NEON registers */
+
+ ld1 {v12.4h-v15.4h}, [sp], 32
+ ld1 {v8.4h-v11.4h}, [sp], 32
+ ins v24.2d[1], v25.2d[0]
+#ifdef RTSM_SQSHRN_SIM_ISSUE
+
+ sqrshrn v20.8b, v24.8h, #2
+#else
+
+ sqrshrn v20.4h, v24.4s, #2
+#endif
+ /* Transpose the final 8-bit samples and do signed->unsigned conversion */
+ /* trn1 v16.8h, v16.8h, v18.8h */
+ transpose v16, v18, v3, .16b, .8h
+ ins v26.2d[1], v27.2d[0]
+ ins v28.2d[1], v29.2d[0]
+ ins v30.2d[1], v31.2d[0]
+#ifdef RTSM_SQSHRN_SIM_ISSUE
+ sqrshrn2 v20.16b, v26.8h, #2
+ sqrshrn v22.8b, v28.8h, #2
+#else
+ sqrshrn2 v20.8h, v26.4s, #2
+ sqrshrn v22.4h, v28.4s, #2
+#endif
+ movi v0.16b, #(CENTERJSAMPLE)
+#ifdef RTSM_SQSHRN_SIM_ISSUE
+ sqrshrn2 v22.16b, v30.8h, #2
+#else
+ sqrshrn2 v22.8h, v30.4s, #2
+#endif
+ transpose_single v16, v17, v3, .2d, .8b
+ transpose_single v18, v19, v3, .2d, .8b
+ add v16.8b, v16.8b, v0.8b
+ add v17.8b, v17.8b, v0.8b
+ add v18.8b, v18.8b, v0.8b
+ add v19.8b, v19.8b, v0.8b
+ transpose v20, v22, v3, .16b, .8h
+ /* Store results to the output buffer */
+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
+ add TMP1, TMP1, OUTPUT_COL
+ add TMP2, TMP2, OUTPUT_COL
+ st1 {v16.8b}, [TMP1]
+ transpose_single v20, v21, v3, .2d, .8b
+ st1 {v17.8b}, [TMP2]
+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
+ add TMP1, TMP1, OUTPUT_COL
+ add TMP2, TMP2, OUTPUT_COL
+ st1 {v18.8b}, [TMP1]
+ add v20.8b, v20.8b, v0.8b
+ add v21.8b, v21.8b, v0.8b
+ st1 {v19.8b}, [TMP2]
+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
+ ldp TMP3, TMP4, [OUTPUT_BUF]
+ add TMP1, TMP1, OUTPUT_COL
+ add TMP2, TMP2, OUTPUT_COL
+ add TMP3, TMP3, OUTPUT_COL
+ add TMP4, TMP4, OUTPUT_COL
+ transpose_single v22, v23, v3, .2d, .8b
+ st1 {v20.8b}, [TMP1]
+ add v22.8b, v22.8b, v0.8b
+ add v23.8b, v23.8b, v0.8b
+ st1 {v21.8b}, [TMP2]
+ st1 {v22.8b}, [TMP3]
+ st1 {v23.8b}, [TMP4]
+ blr x30
+
+3: /* Left 4x8 half is done, right 4x8 half contains mostly zeros */
+
+ /* Transpose left 4x8 half */
+ transpose ROW6L, ROW7L, v3, .16b, .4h
+ transpose ROW2L, ROW3L, v3, .16b, .4h
+ transpose ROW0L, ROW1L, v3, .16b, .4h
+ transpose ROW4L, ROW5L, v3, .16b, .4h
+ shl ROW0R.4h, ROW0R.4h, #2 /* PASS1_BITS */
+ transpose ROW1L, ROW3L, v3, .16b, .2s
+ transpose ROW4L, ROW6L, v3, .16b, .2s
+ transpose ROW0L, ROW2L, v3, .16b, .2s
+ transpose ROW5L, ROW7L, v3, .16b, .2s
+ cmp x0, #0
+ beq 4f /* Right 4x8 half has all zeros, go to 'sparse' second pass */
+
+ /* Only row 0 is non-zero for the right 4x8 half */
+ dup ROW1R.4h, ROW0R.4h[1]
+ dup ROW2R.4h, ROW0R.4h[2]
+ dup ROW3R.4h, ROW0R.4h[3]
+ dup ROW4R.4h, ROW0R.4h[0]
+ dup ROW5R.4h, ROW0R.4h[1]
+ dup ROW6R.4h, ROW0R.4h[2]
+ dup ROW7R.4h, ROW0R.4h[3]
+ dup ROW0R.4h, ROW0R.4h[0]
+ b 1b /* Go to 'normal' second pass */
+
+4: /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), left 4x8 half */
+ ld1 {v2.4h}, [x15] /* reload constants */
+ smull v12.4s, ROW1L.4h, XFIX_1_175875602
+ smlal v12.4s, ROW3L.4h, XFIX_1_175875602_MINUS_1_961570560
+ smull v14.4s, ROW3L.4h, XFIX_1_175875602
+ smlal v14.4s, ROW1L.4h, XFIX_1_175875602_MINUS_0_390180644
+ smull v4.4s, ROW2L.4h, XFIX_0_541196100
+ sshll v6.4s, ROW0L.4h, #13
+ mov v8.16b, v12.16b
+ smlal v12.4s, ROW3L.4h, XFIX_3_072711026_MINUS_2_562915447
+ smlsl v8.4s, ROW1L.4h, XFIX_0_899976223
+ add v2.4s, v6.4s, v4.4s
+ mov v10.16b, v14.16b
+ smlal v14.4s, ROW1L.4h, XFIX_1_501321110_MINUS_0_899976223
+ add v2.4s, v2.4s, v12.4s
+ add v12.4s, v12.4s, v12.4s
+ smlsl v10.4s, ROW3L.4h, XFIX_2_562915447
+ shrn ROW1L.4h, v2.4s, #16
+ sub v2.4s, v2.4s, v12.4s
+ smull v12.4s, ROW2L.4h, XFIX_0_541196100_PLUS_0_765366865
+ sub v6.4s, v6.4s, v4.4s
+ shrn ROW2R.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */
+ add v2.4s, v6.4s, v10.4s
+ sub v6.4s, v6.4s, v10.4s
+ sshll v10.4s, ROW0L.4h, #13
+ shrn ROW2L.4h, v2.4s, #16
+ shrn ROW1R.4h, v6.4s, #16 /* ROW5L.4h <-> ROW1R.4h */
+ add v4.4s, v10.4s, v12.4s
+ sub v2.4s, v10.4s, v12.4s
+ add v12.4s, v4.4s, v14.4s
+ sub v4.4s, v4.4s, v14.4s
+ add v10.4s, v2.4s, v8.4s
+ sub v6.4s, v2.4s, v8.4s
+ shrn ROW3R.4h, v4.4s, #16 /* ROW7L.4h <-> ROW3R.4h */
+ shrn ROW3L.4h, v10.4s, #16
+ shrn ROW0L.4h, v12.4s, #16
+ shrn ROW0R.4h, v6.4s, #16 /* ROW4L.4h <-> ROW0R.4h */
+ /* 1-D IDCT, pass 2 (sparse variant with zero rows 4-7), right 4x8 half */
+ ld1 {v2.4h}, [x15] /* reload constants */
+ smull v12.4s, ROW5L.4h, XFIX_1_175875602
+ smlal v12.4s, ROW7L.4h, XFIX_1_175875602_MINUS_1_961570560
+ smull v14.4s, ROW7L.4h, XFIX_1_175875602
+ smlal v14.4s, ROW5L.4h, XFIX_1_175875602_MINUS_0_390180644
+ smull v4.4s, ROW6L.4h, XFIX_0_541196100
+ sshll v6.4s, ROW4L.4h, #13
+ mov v8.16b, v12.16b
+ smlal v12.4s, ROW7L.4h, XFIX_3_072711026_MINUS_2_562915447
+ smlsl v8.4s, ROW5L.4h, XFIX_0_899976223
+ add v2.4s, v6.4s, v4.4s
+ mov v10.16b, v14.16b
+ smlal v14.4s, ROW5L.4h, XFIX_1_501321110_MINUS_0_899976223
+ add v2.4s, v2.4s, v12.4s
+ add v12.4s, v12.4s, v12.4s
+ smlsl v10.4s, ROW7L.4h, XFIX_2_562915447
+ shrn ROW5L.4h, v2.4s, #16 /* ROW5L.4h <-> ROW1R.4h */
+ sub v2.4s, v2.4s, v12.4s
+ smull v12.4s, ROW6L.4h, XFIX_0_541196100_PLUS_0_765366865
+ sub v6.4s, v6.4s, v4.4s
+ shrn ROW6R.4h, v2.4s, #16
+ add v2.4s, v6.4s, v10.4s
+ sub v6.4s, v6.4s, v10.4s
+ sshll v10.4s, ROW4L.4h, #13
+ shrn ROW6L.4h, v2.4s, #16 /* ROW6L.4h <-> ROW2R.4h */
+ shrn ROW5R.4h, v6.4s, #16
+ add v4.4s, v10.4s, v12.4s
+ sub v2.4s, v10.4s, v12.4s
+ add v12.4s, v4.4s, v14.4s
+ sub v4.4s, v4.4s, v14.4s
+ add v10.4s, v2.4s, v8.4s
+ sub v6.4s, v2.4s, v8.4s
+ shrn ROW7R.4h, v4.4s, #16
+ shrn ROW7L.4h, v10.4s, #16 /* ROW7L.4h <-> ROW3R.4h */
+ shrn ROW4L.4h, v12.4s, #16 /* ROW4L.4h <-> ROW0R.4h */
+ shrn ROW4R.4h, v6.4s, #16
+ b 2b /* Go to epilogue */
+
+ .unreq DCT_TABLE
+ .unreq COEF_BLOCK
+ .unreq OUTPUT_BUF
+ .unreq OUTPUT_COL
+ .unreq TMP1
+ .unreq TMP2
+ .unreq TMP3
+ .unreq TMP4
+
+ .unreq ROW0L
+ .unreq ROW0R
+ .unreq ROW1L
+ .unreq ROW1R
+ .unreq ROW2L
+ .unreq ROW2R
+ .unreq ROW3L
+ .unreq ROW3R
+ .unreq ROW4L
+ .unreq ROW4R
+ .unreq ROW5L
+ .unreq ROW5R
+ .unreq ROW6L
+ .unreq ROW6R
+ .unreq ROW7L
+ .unreq ROW7R
+.endfunc
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_idct_ifast_neon
+ *
+ * This function contains a fast, not so accurate integer implementation of
+ * the inverse DCT (Discrete Cosine Transform). It uses the same calculations
+ * and produces exactly the same output as IJG's original 'jpeg_idct_ifast'
+ * function from jidctfst.c
+ *
+ * Normally 1-D AAN DCT needs 5 multiplications and 29 additions.
+ * But in ARM NEON case some extra additions are required because VQDMULH
+ * instruction can't handle the constants larger than 1. So the expressions
+ * like "x * 1.082392200" have to be converted to "x * 0.082392200 + x",
+ * which introduces an extra addition. Overall, there are 6 extra additions
+ * per 1-D IDCT pass, totalling to 5 VQDMULH and 35 VADD/VSUB instructions.
+ */
+
+#define XFIX_1_082392200 v0.4h[0]
+#define XFIX_1_414213562 v0.4h[1]
+#define XFIX_1_847759065 v0.4h[2]
+#define XFIX_2_613125930 v0.4h[3]
+
+.balign 16
+jsimd_idct_ifast_neon_consts:
+ .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */
+ .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */
+ .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
+ .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */
+
+asm_function jsimd_idct_ifast_neon
+
+ DCT_TABLE .req x0
+ COEF_BLOCK .req x1
+ OUTPUT_BUF .req x2
+ OUTPUT_COL .req x3
+ TMP1 .req x0
+ TMP2 .req x1
+ TMP3 .req x2
+ TMP4 .req x15
+
+ /* Load and dequantize coefficients into NEON registers
+ * with the following allocation:
+ * 0 1 2 3 | 4 5 6 7
+ * ---------+--------
+ * 0 | d16 | d17 ( v8.8h )
+ * 1 | d18 | d19 ( v9.8h )
+ * 2 | d20 | d21 ( v10.8h )
+ * 3 | d22 | d23 ( v11.8h )
+ * 4 | d24 | d25 ( v12.8h )
+ * 5 | d26 | d27 ( v13.8h )
+ * 6 | d28 | d29 ( v14.8h )
+ * 7 | d30 | d31 ( v15.8h )
+ */
+ adr x15, jsimd_idct_ifast_neon_consts
+ ld1 {v8.8h, v9.8h}, [COEF_BLOCK], 32
+ ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32
+ ld1 {v10.8h, v11.8h}, [COEF_BLOCK], 32
+ mul v8.8h, v8.8h, v0.8h
+ ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32
+ mul v9.8h, v9.8h, v1.8h
+ ld1 {v12.8h, v13.8h}, [COEF_BLOCK], 32
+ mul v10.8h, v10.8h, v2.8h
+ ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32
+ mul v11.8h, v11.8h, v3.8h
+ ld1 {v14.8h, v15.8h}, [COEF_BLOCK], 32
+ mul v12.8h, v12.8h, v0.8h
+ ld1 {v2.8h, v3.8h}, [DCT_TABLE], 32
+ mul v14.8h, v14.8h, v2.8h
+ mul v13.8h, v13.8h, v1.8h
+ ld1 {v0.4h}, [x15] /* load constants */
+ mul v15.8h, v15.8h, v3.8h
+
+ /* vpush {v4.8h-v6.8h} */ /* save NEON registers */
+ sub sp, sp, #32
+ st1 {v4.8h-v5.8h}, [sp] /* save NEON registers */
+ sub sp, sp, #16
+ st1 {v6.8h}, [sp]
+ /* 1-D IDCT, pass 1 */
+ sub v2.8h, v10.8h, v14.8h
+ add v14.8h, v10.8h, v14.8h
+ sub v1.8h, v11.8h, v13.8h
+ add v13.8h, v11.8h, v13.8h
+ sub v5.8h, v9.8h, v15.8h
+ add v15.8h, v9.8h, v15.8h
+ sqdmulh v4.8h, v2.8h, XFIX_1_414213562
+ sqdmulh v6.8h, v1.8h, XFIX_2_613125930
+ add v3.8h, v1.8h, v1.8h
+ sub v1.8h, v5.8h, v1.8h
+ add v10.8h, v2.8h, v4.8h
+ sqdmulh v4.8h, v1.8h, XFIX_1_847759065
+ sub v2.8h, v15.8h, v13.8h
+ add v3.8h, v3.8h, v6.8h
+ sqdmulh v6.8h, v2.8h, XFIX_1_414213562
+ add v1.8h, v1.8h, v4.8h
+ sqdmulh v4.8h, v5.8h, XFIX_1_082392200
+ sub v10.8h, v10.8h, v14.8h
+ add v2.8h, v2.8h, v6.8h
+ sub v6.8h, v8.8h, v12.8h
+ add v12.8h, v8.8h, v12.8h
+ add v9.8h, v5.8h, v4.8h
+ add v5.8h, v6.8h, v10.8h
+ sub v10.8h, v6.8h, v10.8h
+ add v6.8h, v15.8h, v13.8h
+ add v8.8h, v12.8h, v14.8h
+ sub v3.8h, v6.8h, v3.8h
+ sub v12.8h, v12.8h, v14.8h
+ sub v3.8h, v3.8h, v1.8h
+ sub v1.8h, v9.8h, v1.8h
+ add v2.8h, v3.8h, v2.8h
+ sub v15.8h, v8.8h, v6.8h
+ add v1.8h, v1.8h, v2.8h
+ add v8.8h, v8.8h, v6.8h
+ add v14.8h, v5.8h, v3.8h
+ sub v9.8h, v5.8h, v3.8h
+ sub v13.8h, v10.8h, v2.8h
+ add v10.8h, v10.8h, v2.8h
+ /* Transpose q8-q9 */
+ mov v18.16b, v8.16b
+ trn1 v8.8h, v8.8h, v9.8h
+ trn2 v9.8h, v18.8h, v9.8h
+ sub v11.8h, v12.8h, v1.8h
+ /* Transpose q14-q15 */
+ mov v18.16b, v14.16b
+ trn1 v14.8h, v14.8h, v15.8h
+ trn2 v15.8h, v18.8h, v15.8h
+ add v12.8h, v12.8h, v1.8h
+ /* Transpose q10-q11 */
+ mov v18.16b, v10.16b
+ trn1 v10.8h, v10.8h, v11.8h
+ trn2 v11.8h, v18.8h, v11.8h
+ /* Transpose q12-q13 */
+ mov v18.16b, v12.16b
+ trn1 v12.8h, v12.8h, v13.8h
+ trn2 v13.8h, v18.8h, v13.8h
+ /* Transpose q9-q11 */
+ mov v18.16b, v9.16b
+ trn1 v9.4s, v9.4s, v11.4s
+ trn2 v11.4s, v18.4s, v11.4s
+ /* Transpose q12-q14 */
+ mov v18.16b, v12.16b
+ trn1 v12.4s, v12.4s, v14.4s
+ trn2 v14.4s, v18.4s, v14.4s
+ /* Transpose q8-q10 */
+ mov v18.16b, v8.16b
+ trn1 v8.4s, v8.4s, v10.4s
+ trn2 v10.4s, v18.4s, v10.4s
+ /* Transpose q13-q15 */
+ mov v18.16b, v13.16b
+ trn1 v13.4s, v13.4s, v15.4s
+ trn2 v15.4s, v18.4s, v15.4s
+ /* vswp v14.4h, v10-MSB.4h */
+ umov x10, v14.d[0]
+ ins v14.2d[0], v10.2d[1]
+ ins v10.2d[1], x10
+ /* vswp v13.4h, v9MSB.4h */
+
+ umov x10, v13.d[0]
+ ins v13.2d[0], v9.2d[1]
+ ins v9.2d[1], x10
+ /* 1-D IDCT, pass 2 */
+ sub v2.8h, v10.8h, v14.8h
+ /* vswp v15.4h, v11MSB.4h */
+ umov x10, v15.d[0]
+ ins v15.2d[0], v11.2d[1]
+ ins v11.2d[1], x10
+ add v14.8h, v10.8h, v14.8h
+ /* vswp v12.4h, v8-MSB.4h */
+ umov x10, v12.d[0]
+ ins v12.2d[0], v8.2d[1]
+ ins v8.2d[1], x10
+ sub v1.8h, v11.8h, v13.8h
+ add v13.8h, v11.8h, v13.8h
+ sub v5.8h, v9.8h, v15.8h
+ add v15.8h, v9.8h, v15.8h
+ sqdmulh v4.8h, v2.8h, XFIX_1_414213562
+ sqdmulh v6.8h, v1.8h, XFIX_2_613125930
+ add v3.8h, v1.8h, v1.8h
+ sub v1.8h, v5.8h, v1.8h
+ add v10.8h, v2.8h, v4.8h
+ sqdmulh v4.8h, v1.8h, XFIX_1_847759065
+ sub v2.8h, v15.8h, v13.8h
+ add v3.8h, v3.8h, v6.8h
+ sqdmulh v6.8h, v2.8h, XFIX_1_414213562
+ add v1.8h, v1.8h, v4.8h
+ sqdmulh v4.8h, v5.8h, XFIX_1_082392200
+ sub v10.8h, v10.8h, v14.8h
+ add v2.8h, v2.8h, v6.8h
+ sub v6.8h, v8.8h, v12.8h
+ add v12.8h, v8.8h, v12.8h
+ add v9.8h, v5.8h, v4.8h
+ add v5.8h, v6.8h, v10.8h
+ sub v10.8h, v6.8h, v10.8h
+ add v6.8h, v15.8h, v13.8h
+ add v8.8h, v12.8h, v14.8h
+ sub v3.8h, v6.8h, v3.8h
+ sub v12.8h, v12.8h, v14.8h
+ sub v3.8h, v3.8h, v1.8h
+ sub v1.8h, v9.8h, v1.8h
+ add v2.8h, v3.8h, v2.8h
+ sub v15.8h, v8.8h, v6.8h
+ add v1.8h, v1.8h, v2.8h
+ add v8.8h, v8.8h, v6.8h
+ add v14.8h, v5.8h, v3.8h
+ sub v9.8h, v5.8h, v3.8h
+ sub v13.8h, v10.8h, v2.8h
+ /* vpop {v4.8h-v7.4h} */ /* restore NEON registers...not available */
+ ld1 {v6.8h}, [sp], 16
+ ld1 {v4.8h-v5.8h}, [sp], 32
+ add v10.8h, v10.8h, v2.8h
+ sub v11.8h, v12.8h, v1.8h
+ add v12.8h, v12.8h, v1.8h
+ /* Descale to 8-bit and range limit */
+ movi v0.16b, #0x80
+#ifdef RTSM_SQSHRN_SIM_ISSUE
+ sqshrn v8.8b, v8.8h, #5
+ sqshrn2 v8.16b, v9.8h, #5
+ sqshrn v9.8b, v10.8h, #5
+ sqshrn2 v9.16b, v11.8h, #5
+ sqshrn v10.8b, v12.8h, #5
+ sqshrn2 v10.16b, v13.8h, #5
+ sqshrn v11.8b, v14.8h, #5
+ sqshrn2 v11.16b, v15.8h, #5
+#else
+ sqshrn v8.4h, v8.4s, #5
+ sqshrn2 v8.8h, v9.4s, #5
+ sqshrn v9.4h, v10.4s, #5
+ sqshrn2 v9.8h, v11.4s, #5
+ sqshrn v10.4h, v12.4s, #5
+ sqshrn2 v10.8h, v13.4s, #5
+ sqshrn v11.4h, v14.4s, #5
+ sqshrn2 v11.8h, v15.4s, #5
+#endif
+ add v8.16b, v8.16b, v0.16b
+ add v9.16b, v9.16b, v0.16b
+ add v10.16b, v10.16b, v0.16b
+ add v11.16b, v11.16b, v0.16b
+ /* Transpose the final 8-bit samples */
+ /* Transpose q8-q9 */
+ mov v18.16b, v8.16b
+ trn1 v8.8h, v8.8h, v9.8h
+ trn2 v9.8h, v18.8h, v9.8h
+ /* Transpose q10-q11 */
+ mov v18.16b, v10.16b
+ trn1 v10.8h, v10.8h, v11.8h
+ trn2 v11.8h, v18.8h, v11.8h
+ /* Transpose q8-q10 */
+ mov v18.16b, v8.16b
+ trn1 v8.4s, v8.4s, v10.4s
+ trn2 v10.4s, v18.4s, v10.4s
+ /* Transpose q9-q11 */
+ mov v18.16b, v9.16b
+ trn1 v9.4s, v9.4s, v11.4s
+ trn2 v11.4s, v18.4s, v11.4s
+ /* make copy */
+ ins v17.2d[0], v8.2d[1]
+ /* Transpose d16-d17-msb */
+ mov v18.16b, v8.16b
+ trn1 v8.8b, v8.8b, v17.8b
+ trn2 v17.8b, v18.8b, v17.8b
+ /* make copy */
+ ins v19.2d[0], v9.2d[1]
+ mov v18.16b, v9.16b
+ trn1 v9.8b, v9.8b, v19.8b
+ trn2 v19.8b, v18.8b, v19.8b
+ /* Store results to the output buffer */
+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
+ add TMP1, TMP1, OUTPUT_COL
+ add TMP2, TMP2, OUTPUT_COL
+ st1 {v8.8b}, [TMP1]
+ st1 {v17.8b}, [TMP2]
+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
+ add TMP1, TMP1, OUTPUT_COL
+ add TMP2, TMP2, OUTPUT_COL
+ st1 {v9.8b}, [TMP1]
+ /* make copy */
+ ins v21.2d[0], v10.2d[1]
+ mov v18.16b, v10.16b
+ trn1 v10.8b, v10.8b, v21.8b
+ trn2 v21.8b, v18.8b, v21.8b
+ st1 {v19.8b}, [TMP2]
+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
+ ldp TMP3, TMP4, [OUTPUT_BUF]
+ add TMP1, TMP1, OUTPUT_COL
+ add TMP2, TMP2, OUTPUT_COL
+ add TMP3, TMP3, OUTPUT_COL
+ add TMP4, TMP4, OUTPUT_COL
+ st1 {v10.8b}, [TMP1]
+ /* make copy */
+ ins v23.2d[0], v11.2d[1]
+ mov v18.16b, v11.16b
+ trn1 v11.8b, v11.8b, v23.8b
+ trn2 v23.8b, v18.8b, v23.8b
+ st1 {v21.8b}, [TMP2]
+ st1 {v11.8b}, [TMP3]
+ st1 {v23.8b}, [TMP4]
+ blr x30
+
+ .unreq DCT_TABLE
+ .unreq COEF_BLOCK
+ .unreq OUTPUT_BUF
+ .unreq OUTPUT_COL
+ .unreq TMP1
+ .unreq TMP2
+ .unreq TMP3
+ .unreq TMP4
+.endfunc
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_idct_4x4_neon
+ *
+ * This function contains inverse-DCT code for getting reduced-size
+ * 4x4 pixels output from an 8x8 DCT block. It uses the same calculations
+ * and produces exactly the same output as IJG's original 'jpeg_idct_4x4'
+ * function from jpeg-6b (jidctred.c).
+ *
+ * NOTE: jpeg-8 has an improved implementation of 4x4 inverse-DCT, which
+ * requires much less arithmetic operations and hence should be faster.
+ * The primary purpose of this particular NEON optimized function is
+ * bit exact compatibility with jpeg-6b.
+ *
+ * TODO: a bit better instructions scheduling can be achieved by expanding
+ * idct_helper/transpose_4x4 macros and reordering instructions,
+ * but readability will suffer somewhat.
+ */
+
+#define CONST_BITS 13
+
+#define FIX_0_211164243 (1730) /* FIX(0.211164243) */
+#define FIX_0_509795579 (4176) /* FIX(0.509795579) */
+#define FIX_0_601344887 (4926) /* FIX(0.601344887) */
+#define FIX_0_720959822 (5906) /* FIX(0.720959822) */
+#define FIX_0_765366865 (6270) /* FIX(0.765366865) */
+#define FIX_0_850430095 (6967) /* FIX(0.850430095) */
+#define FIX_0_899976223 (7373) /* FIX(0.899976223) */
+#define FIX_1_061594337 (8697) /* FIX(1.061594337) */
+#define FIX_1_272758580 (10426) /* FIX(1.272758580) */
+#define FIX_1_451774981 (11893) /* FIX(1.451774981) */
+#define FIX_1_847759065 (15137) /* FIX(1.847759065) */
+#define FIX_2_172734803 (17799) /* FIX(2.172734803) */
+#define FIX_2_562915447 (20995) /* FIX(2.562915447) */
+#define FIX_3_624509785 (29692) /* FIX(3.624509785) */
+
+.balign 16
+jsimd_idct_4x4_neon_consts:
+ .short FIX_1_847759065 /* v0.4h[0] */
+ .short -FIX_0_765366865 /* v0.4h[1] */
+ .short -FIX_0_211164243 /* v0.4h[2] */
+ .short FIX_1_451774981 /* v0.4h[3] */
+ .short -FIX_2_172734803 /* d1[0] */
+ .short FIX_1_061594337 /* d1[1] */
+ .short -FIX_0_509795579 /* d1[2] */
+ .short -FIX_0_601344887 /* d1[3] */
+ .short FIX_0_899976223 /* v2.4h[0] */
+ .short FIX_2_562915447 /* v2.4h[1] */
+ .short 1 << (CONST_BITS+1) /* v2.4h[2] */
+ .short 0 /* v2.4h[3] */
+
+.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29
+ smull v28.4s, \x4, v2.4h[2]
+ smlal v28.4s, \x8, v0.4h[0]
+ smlal v28.4s, \x14, v0.4h[1]
+
+ smull v26.4s, \x16, v1.4h[2]
+ smlal v26.4s, \x12, v1.4h[3]
+ smlal v26.4s, \x10, v2.4h[0]
+ smlal v26.4s, \x6, v2.4h[1]
+
+ smull v30.4s, \x4, v2.4h[2]
+ smlsl v30.4s, \x8, v0.4h[0]
+ smlsl v30.4s, \x14, v0.4h[1]
+
+ smull v24.4s, \x16, v0.4h[2]
+ smlal v24.4s, \x12, v0.4h[3]
+ smlal v24.4s, \x10, v1.4h[0]
+ smlal v24.4s, \x6, v1.4h[1]
+
+ add v20.4s, v28.4s, v26.4s
+ sub v28.4s, v28.4s, v26.4s
+
+.if \shift > 16
+ srshr v20.4s, v20.4s, #\shift
+ srshr v28.4s, v28.4s, #\shift
+ xtn \y26, v20.4s
+ xtn \y29, v28.4s
+.else
+ rshrn \y26, v20.4s, #\shift
+ rshrn \y29, v28.4s, #\shift
+.endif
+
+ add v20.4s, v30.4s, v24.4s
+ sub v30.4s, v30.4s, v24.4s
+
+.if \shift > 16
+ srshr v20.4s, v20.4s, #\shift
+ srshr v30.4s, v30.4s, #\shift
+ xtn \y27, v20.4s
+ xtn \y28, v30.4s
+.else
+ rshrn \y27, v20.4s, #\shift
+ rshrn \y28, v30.4s, #\shift
+.endif
+
+.endm
+
+asm_function jsimd_idct_4x4_neon
+
+ DCT_TABLE .req x0
+ COEF_BLOCK .req x1
+ OUTPUT_BUF .req x2
+ OUTPUT_COL .req x3
+ TMP1 .req x0
+ TMP2 .req x1
+ TMP3 .req x2
+ TMP4 .req x15
+
+ /* vpush {v8.4h-v15.4h} */
+ sub sp, sp, #32
+ st1 {v8.4h-v11.4h}, [sp] /* save NEON registers */
+ sub sp, sp, #32
+ st1 {v12.4h-v15.4h}, [sp]
+
+ /* Load constants (v3.4h is just used for padding) */
+ adr TMP4, jsimd_idct_4x4_neon_consts
+ ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4]
+
+ /* Load all COEF_BLOCK into NEON registers with the following allocation:
+ * 0 1 2 3 | 4 5 6 7
+ * ---------+--------
+ * 0 | v4.4h | v5.4h
+ * 1 | v6.4h | v7.4h
+ * 2 | v8.4h | v9.4h
+ * 3 | v10.4h | v11.4h
+ * 4 | - | -
+ * 5 | v12.4h | v13.4h
+ * 6 | v14.4h | v15.4h
+ * 7 | v16.4h | v17.4h
+ */
+ ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32
+ ld1 {v8.4h, v9.4h, v10.4h, v11.4h}, [COEF_BLOCK], 32
+ add COEF_BLOCK, COEF_BLOCK, #16
+ ld1 {v12.4h, v13.4h, v14.4h, v15.4h}, [COEF_BLOCK], 32
+ ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16
+ /* dequantize */
+ ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32
+ mul v4.4h, v4.4h, v18.4h
+ mul v5.4h, v5.4h, v19.4h
+ ins v4.2d[1], v5.2d[0] /* 128 bit q4 */
+ ld1 {v22.4h, v23.4h, v24.4h, v25.4h}, [DCT_TABLE], 32
+ mul v6.4h, v6.4h, v20.4h
+ mul v7.4h, v7.4h, v21.4h
+ ins v6.2d[1], v7.2d[0] /* 128 bit q6 */
+ mul v8.4h, v8.4h, v22.4h
+ mul v9.4h, v9.4h, v23.4h
+ ins v8.2d[1], v9.2d[0] /* 128 bit q8 */
+ add DCT_TABLE, DCT_TABLE, #16
+ ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, [DCT_TABLE], 32
+ mul v10.4h, v10.4h, v24.4h
+ mul v11.4h, v11.4h, v25.4h
+ ins v10.2d[1], v11.2d[0] /* 128 bit q10 */
+ mul v12.4h, v12.4h, v26.4h
+ mul v13.4h, v13.4h, v27.4h
+ ins v12.2d[1], v13.2d[0] /* 128 bit q12 */
+ ld1 {v30.8h}, [DCT_TABLE], 16
+ mul v14.4h, v14.4h, v28.4h
+ mul v15.4h, v15.4h, v29.4h
+ ins v14.2d[1], v15.2d[0] /* 128 bit q14 */
+ mul v16.4h, v16.4h, v30.4h
+ mul v17.4h, v17.4h, v31.4h
+ ins v16.2d[1], v17.2d[0] /* 128 bit q16 */
+
+ /* Pass 1 */
+ idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v12.4h, v14.4h, v16.4h, 12, v4.4h, v6.4h, v8.4h, v10.4h
+ transpose_4x4 v4, v6, v8, v10, v3
+ ins v10.2d[1], v11.2d[0]
+ idct_helper v5.4h, v7.4h, v9.4h, v11.4h, v13.4h, v15.4h, v17.4h, 12, v5.4h, v7.4h, v9.4h, v11.4h
+ transpose_4x4 v5, v7, v9, v11, v3
+ ins v10.2d[1], v11.2d[0]
+ /* Pass 2 */
+ idct_helper v4.4h, v6.4h, v8.4h, v10.4h, v7.4h, v9.4h, v11.4h, 19, v26.4h, v27.4h, v28.4h, v29.4h
+ transpose_4x4 v26, v27, v28, v29, v3
+
+ /* Range limit */
+ movi v30.8h, #0x80
+ ins v26.2d[1], v27.2d[0]
+ ins v28.2d[1], v29.2d[0]
+ add v26.8h, v26.8h, v30.8h
+ add v28.8h, v28.8h, v30.8h
+ sqxtun v26.8b, v26.8h
+ sqxtun v27.8b, v28.8h
+
+ /* Store results to the output buffer */
+ ldp TMP1, TMP2, [OUTPUT_BUF], 16
+ ldp TMP3, TMP4, [OUTPUT_BUF]
+ add TMP1, TMP1, OUTPUT_COL
+ add TMP2, TMP2, OUTPUT_COL
+ add TMP3, TMP3, OUTPUT_COL
+ add TMP4, TMP4, OUTPUT_COL
+
+#if defined(__ARMEL__) && !RESPECT_STRICT_ALIGNMENT
+ /* We can use much less instructions on little endian systems if the
+ * OS kernel is not configured to trap unaligned memory accesses
+ */
+ st1 {v26.s}[0], [TMP1], 4
+ st1 {v27.s}[0], [TMP3], 4
+ st1 {v26.s}[1], [TMP2], 4
+ st1 {v27.s}[1], [TMP4], 4
+#else
+ st1 {v26.b}[0], [TMP1], 1
+ st1 {v27.b}[0], [TMP3], 1
+ st1 {v26.b}[1], [TMP1], 1
+ st1 {v27.b}[1], [TMP3], 1
+ st1 {v26.b}[2], [TMP1], 1
+ st1 {v27.b}[2], [TMP3], 1
+ st1 {v26.b}[3], [TMP1], 1
+ st1 {v27.b}[3], [TMP3], 1
+
+ st1 {v26.b}[4], [TMP2], 1
+ st1 {v27.b}[4], [TMP4], 1
+ st1 {v26.b}[5], [TMP2], 1
+ st1 {v27.b}[5], [TMP4], 1
+ st1 {v26.b}[6], [TMP2], 1
+ st1 {v27.b}[6], [TMP4], 1
+ st1 {v26.b}[7], [TMP2], 1
+ st1 {v27.b}[7], [TMP4], 1
+#endif
+
+ /* vpop {v8.4h-v15.4h} ;not available */
+ ld1 {v12.4h-v15.4h}, [sp], 32
+ ld1 {v8.4h-v11.4h}, [sp], 32
+
+ blr x30
+
+ .unreq DCT_TABLE
+ .unreq COEF_BLOCK
+ .unreq OUTPUT_BUF
+ .unreq OUTPUT_COL
+ .unreq TMP1
+ .unreq TMP2
+ .unreq TMP3
+ .unreq TMP4
+.endfunc
+
+.purgem idct_helper
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_idct_2x2_neon
+ *
+ * This function contains inverse-DCT code for getting reduced-size
+ * 2x2 pixels output from an 8x8 DCT block. It uses the same calculations
+ * and produces exactly the same output as IJG's original 'jpeg_idct_2x2'
+ * function from jpeg-6b (jidctred.c).
+ *
+ * NOTE: jpeg-8 has an improved implementation of 2x2 inverse-DCT, which
+ * requires much less arithmetic operations and hence should be faster.
+ * The primary purpose of this particular NEON optimized function is
+ * bit exact compatibility with jpeg-6b.
+ */
+
+.balign 8
+jsimd_idct_2x2_neon_consts:
+ .short -FIX_0_720959822 /* d0[0] */
+ .short FIX_0_850430095 /* d0[1] */
+ .short -FIX_1_272758580 /* d0[2] */
+ .short FIX_3_624509785 /* d0[3] */
+
+.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27
+ sshll v28.4s, \x4, #15
+ smull v26.4s, \x6, v0.4h[3]
+ smlal v26.4s, \x10, v0.4h[2]
+ smlal v26.4s, \x12, v0.4h[1]
+ smlal v26.4s, \x16, v0.4h[0]
+
+ add v20.4s, v28.4s, v26.4s
+ sub v28.4s, v28.4s, v26.4s
+
+.if \shift > 16
+ srshr v20.4s, v20.4s, #\shift
+ srshr v28.4s, v28.4s, #\shift
+ xtn \y26, v20.4s
+ xtn \y27, v28.4s
+.else
+ rshrn \y26, v20.4s, #\shift
+ rshrn \y27, v28.4s, #\shift
+.endif
+
+.endm
+
+asm_function jsimd_idct_2x2_neon
+
+ DCT_TABLE .req x0
+ COEF_BLOCK .req x1
+ OUTPUT_BUF .req x2
+ OUTPUT_COL .req x3
+ TMP1 .req x0
+ TMP2 .req x15
+
+ /* vpush {v8.4h-v15.4h} ; not available */
+ sub sp, sp, #32
+ st1 {v8.4h-v11.4h}, [sp] /* save NEON registers */
+ sub sp, sp, #32
+ st1 {v12.4h-v15.4h}, [sp]
+
+ /* Load constants */
+ adr TMP2, jsimd_idct_2x2_neon_consts
+ ld1 {v0.4h}, [TMP2]
+
+ /* Load all COEF_BLOCK into NEON registers with the following allocation:
+ * 0 1 2 3 | 4 5 6 7
+ * ---------+--------
+ * 0 | v4.4h | v5.4h
+ * 1 | v6.4h | v7.4h
+ * 2 | - | -
+ * 3 | v10.4h | v11.4h
+ * 4 | - | -
+ * 5 | v12.4h | v13.4h
+ * 6 | - | -
+ * 7 | v16.4h | v17.4h
+ */
+ ld1 {v4.4h, v5.4h, v6.4h, v7.4h}, [COEF_BLOCK], 32
+ add COEF_BLOCK, COEF_BLOCK, #16
+ ld1 {v10.4h, v11.4h}, [COEF_BLOCK], 16
+ add COEF_BLOCK, COEF_BLOCK, #16
+ ld1 {v12.4h, v13.4h}, [COEF_BLOCK], 16
+ add COEF_BLOCK, COEF_BLOCK, #16
+ ld1 {v16.4h, v17.4h}, [COEF_BLOCK], 16
+ /* Dequantize */
+ ld1 {v18.4h, v19.4h, v20.4h, v21.4h}, [DCT_TABLE], 32
+ mul v4.8h, v4.8h, v18.8h
+ mul v5.8h, v5.8h, v18.8h
+ ins v4.2d[1], v5.2d[0]
+ mul v6.8h, v6.8h, v20.8h
+ mul v7.8h, v7.8h, v21.8h
+ ins v6.2d[1], v7.2d[0]
+ add DCT_TABLE, DCT_TABLE, #16
+ ld1 {v24.4h, v25.4h}, [DCT_TABLE], 16
+ mul v10.8h, v10.8h, v24.8h
+ mul v11.8h, v11.8h, v25.8h
+ ins v10.2d[1], v11.2d[0]
+ add DCT_TABLE, DCT_TABLE, #16
+ ld1 {v26.4h, v27.4h}, [DCT_TABLE], 16
+ mul v12.8h, v12.8h, v26.8h
+ mul v13.8h, v13.8h, v27.8h
+ ins v12.2d[1], v13.2d[0]
+ add DCT_TABLE, DCT_TABLE, #16
+ ld1 {v30.4h, v31.4h}, [DCT_TABLE], 16
+ mul v16.8h, v16.8h, v30.8h
+ mul v17.8h, v17.8h, v31.8h
+ ins v16.2d[1], v17.2d[0]
+
+ /* Pass 1 */
+#if 0
+ idct_helper v4.4h, v6.4h, v10.4h, v12.4h, v16.4h, 13, v4.4h, v6.4h
+ transpose_4x4 v4.4h, v6.4h, v8.4h, v10.4h
+ idct_helper v5.4h, v7.4h, v11.4h, v13.4h, v17.4h, 13, v5.4h, v7.4h
+ transpose_4x4 v5.4h, v7.4h, v9.4h, v11.4h
+#else
+ smull v26.4s, v6.4h, v0.4h[3]
+ smlal v26.4s, v10.4h, v0.4h[2]
+ smlal v26.4s, v12.4h, v0.4h[1]
+ smlal v26.4s, v16.4h, v0.4h[0]
+ smull v24.4s, v7.4h, v0.4h[3]
+ smlal v24.4s, v11.4h, v0.4h[2]
+ smlal v24.4s, v13.4h, v0.4h[1]
+ smlal v24.4s, v17.4h, v0.4h[0]
+ sshll v28.4s, v4.4h, #15
+ sshll v30.4s, v5.4h, #15
+ add v20.4s, v28.4s, v26.4s
+ sub v28.4s, v28.4s, v26.4s
+ rshrn v4.4h, v20.4s, #13
+ rshrn v6.4h, v28.4s, #13
+ add v20.4s, v30.4s, v24.4s
+ sub v28.4s, v30.4s, v24.4s
+ rshrn v5.4h, v20.4s, #13
+ rshrn v7.4h, v28.4s, #13
+ transpose v4, v6, v3, .16b, .8h
+ transpose v6, v10, v3, .16b, .4s
+#endif
+
+ /* Pass 2 */
+ idct_helper v4.4h, v6.4h, v10.4h, v7.4h, v11.4h, 20, v26.4h, v27.4h
+
+ /* Range limit */
+ movi v30.8h, #0x80
+ ins v26.2d[1], v27.2d[0]
+ add v26.8h, v26.8h, v30.8h
+ sqxtun v30.8b, v26.8h
+ ins v26.2d[0], v30.2d[0]
+ sqxtun v27.8b, v26.8h
+
+ /* Store results to the output buffer */
+ ldp TMP1, TMP2, [OUTPUT_BUF]
+ add TMP1, TMP1, OUTPUT_COL
+ add TMP2, TMP2, OUTPUT_COL
+
+ st1 {v26.b}[0], [TMP1], 1
+ st1 {v27.b}[4], [TMP1], 1
+ st1 {v26.b}[1], [TMP2], 1
+ st1 {v27.b}[5], [TMP2], 1
+
+ /* vpop {v8.4h-v15.4h} ;not available */
+
+ ld1 {v12.4h-v15.4h}, [sp], 32
+ ld1 {v8.4h-v11.4h}, [sp], 32
+
+ blr x30
+
+ .unreq DCT_TABLE
+ .unreq COEF_BLOCK
+ .unreq OUTPUT_BUF
+ .unreq OUTPUT_COL
+ .unreq TMP1
+ .unreq TMP2
+.endfunc
+
+.purgem idct_helper
+
+
+/*****************************************************************************/
+
+/*
+ * jsimd_ycc_extrgb_convert_neon
+ * jsimd_ycc_extbgr_convert_neon
+ * jsimd_ycc_extrgbx_convert_neon
+ * jsimd_ycc_extbgrx_convert_neon
+ * jsimd_ycc_extxbgr_convert_neon
+ * jsimd_ycc_extxrgb_convert_neon
+ *
+ * Colorspace conversion YCbCr -> RGB
+ */
+
+
+.macro do_load size
+ .if \size == 8
+ ld1 {v4.8b}, [U], 8
+ ld1 {v5.8b}, [V], 8
+ ld1 {v0.8b}, [Y], 8
+ prfm PLDL1KEEP, [U, #64]
+ prfm PLDL1KEEP, [V, #64]
+ prfm PLDL1KEEP, [Y, #64]
+ .elseif \size == 4
+ ld1 {v4.b}[0], [U]
+ ld1 {v4.b}[1], [U]
+ ld1 {v4.b}[2], [U]
+ ld1 {v4.b}[3], [U]
+ ld1 {v5.b}[0], [V]
+ ld1 {v5.b}[1], [V], 1
+ ld1 {v5.b}[2], [V], 1
+ ld1 {v5.b}[3], [V], 1
+ ld1 {v0.b}[0], [Y], 1
+ ld1 {v0.b}[1], [Y], 1
+ ld1 {v0.b}[2], [Y], 1
+ ld1 {v0.b}[3], [Y], 1
+ .elseif \size == 2
+ ld1 {v4.b}[4], [U], 1
+ ld1 {v4.b}[5], [U], 1
+ ld1 {v5.b}[4], [V], 1
+ ld1 {v5.b}[5], [V], 1
+ ld1 {v0.b}[4], [Y], 1
+ ld1 {v0.b}[5], [Y], 1
+ .elseif \size == 1
+ ld1 {v4.b}[6], [U], 1
+ ld1 {v5.b}[6], [V], 1
+ ld1 {v0.b}[6], [Y], 1
+ .else
+ .error unsupported macroblock size
+ .endif
+.endm
+
+.macro do_store bpp, size
+ .if \bpp == 24
+ .if \size == 8
+ st3 {v10.8b, v11.8b, v12.8b}, [RGB], 24
+ .elseif \size == 4
+ st3 {v10.b, v11.b, v12.b}[0], [RGB], 3
+ st3 {v10.b, v11.b, v12.b}[1], [RGB], 3
+ st3 {v10.b, v11.b, v12.b}[2], [RGB], 3
+ st3 {v10.b, v11.b, v12.b}[3], [RGB], 3
+ .elseif \size == 2
+ st3 {v10.b, v11.b, v12.b}[4], [RGB], 3
+ st3 {v10.b, v11.b, v12.b}[4], [RGB], 3
+ .elseif \size == 1
+ st3 {v10.b, v11.b, v12.b}[6], [RGB], 3
+ .else
+ .error unsupported macroblock size
+ .endif
+ .elseif \bpp == 32
+ .if \size == 8
+ st4 {v10.8b, v11.8b, v12.8b, v13.8b}, [RGB], 32
+ .elseif \size == 4
+ st4 {v10.b, v11.b, v12.b, v13.b}[0], [RGB], 4
+ st4 {v10.b, v11.b, v12.b, v13.b}[1], [RGB], 4
+ st4 {v10.b, v11.b, v12.b, v13.b}[2], [RGB], 4
+ st4 {v10.b, v11.b, v12.b, v13.b}[3], [RGB], 4
+ .elseif \size == 2
+ st4 {v10.b, v11.b, v12.b, v13.b}[4], [RGB], 4
+ st4 {v10.b, v11.b, v12.b, v13.b}[5], [RGB], 4
+ .elseif \size == 1
+ st4 {v10.b, v11.b, v12.b, v13.b}[6], [RGB], 4
+ .else
+ .error unsupported macroblock size
+ .endif
+ .else
+ .error unsupported bpp
+ .endif
+.endm
+#ifdef RTSM_SQSHRN_SIM_ISSUE
+.macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, rsize, g_offs, gsize, b_offs, bsize, defsize
+#else
+.macro generate_jsimd_ycc_rgb_convert_neon colorid, bpp, r_offs, rsize, g_offs, gsize, b_offs, bsize
+#endif
+/*
+ * 2 stage pipelined YCbCr->RGB conversion
+ */
+
+.macro do_yuv_to_rgb_stage1
+ uaddw v6.8h, v2.8h, v4.8b /* q3 = u - 128 */
+ uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */
+ smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */
+ smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */
+ smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */
+ smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */
+ smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */
+ smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */
+ smull v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */
+ smull2 v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */
+.endm
+
+.macro do_yuv_to_rgb_stage2
+ rshrn v20.4h, v20.4s, #15
+ rshrn2 v20.8h, v22.4s, #15
+ rshrn v24.4h, v24.4s, #14
+ rshrn2 v24.8h, v26.4s, #14
+ rshrn v28.4h, v28.4s, #14
+ rshrn2 v28.8h, v30.4s, #14
+ uaddw v20.8h, v20.8h, v0.8b
+ uaddw v24.8h, v24.8h, v0.8b
+ uaddw v28.8h, v28.8h, v0.8b
+#ifdef RTSM_SQSHRN_SIM_ISSUE
+ sqxtun v1\g_offs\defsize, v20.8h
+ sqxtun v1\r_offs\defsize, v24.8h
+ sqxtun v1\b_offs\defsize, v28.8h
+
+#else
+ sqxtun v1\g_offs\gsize, v20.4s
+ sqxtun v1\r_offs\rsize, v24.4s
+ sqxtun v1\b_offs\bsize, v28.4s
+#endif
+.endm
+
+.macro do_yuv_to_rgb_stage2_store_load_stage1
+ ld1 {v4.8b}, [U], 8
+ rshrn v20.4h, v20.4s, #15
+ rshrn2 v20.8h, v22.4s, #15
+ rshrn v24.4h, v24.4s, #14
+ rshrn2 v24.8h, v26.4s, #14
+ rshrn v28.4h, v28.4s, #14
+ ld1 {v5.8b}, [V], 8
+ rshrn2 v28.8h, v30.4s, #14
+ uaddw v20.8h, v20.8h, v0.8b
+ uaddw v24.8h, v24.8h, v0.8b
+ uaddw v28.8h, v28.8h, v0.8b
+#ifdef RTSM_SQSHRN_SIM_ISSUE
+ sqxtun v1\g_offs\defsize, v20.8h
+#else
+ sqxtun v1\g_offs\gsize, v20.4s
+#endif
+ ld1 {v0.8b}, [Y], 8
+#ifdef RTSM_SQSHRN_SIM_ISSUE
+ sqxtun v1\r_offs\defsize, v24.8h
+#else
+ sqxtun v1\r_offs\rsize, v24.4s
+#endif
+ prfm PLDL1KEEP, [U, #64]
+ prfm PLDL1KEEP, [V, #64]
+ prfm PLDL1KEEP, [Y, #64]
+#ifdef RTSM_SQSHRN_SIM_ISSUE
+ sqxtun v1\b_offs\defsize, v28.8h
+#else
+ sqxtun v1\b_offs\gsize, v28.4s
+#endif
+ uaddw v6.8h, v2.8h, v4.8b /* v6.16b = u - 128 */
+ uaddw v8.8h, v2.8h, v5.8b /* q2 = v - 128 */
+ do_store \bpp, 8
+ smull v20.4s, v6.4h, v1.4h[1] /* multiply by -11277 */
+ smlal v20.4s, v8.4h, v1.4h[2] /* multiply by -23401 */
+ smull2 v22.4s, v6.8h, v1.4h[1] /* multiply by -11277 */
+ smlal2 v22.4s, v8.8h, v1.4h[2] /* multiply by -23401 */
+ smull v24.4s, v8.4h, v1.4h[0] /* multiply by 22971 */
+ smull2 v26.4s, v8.8h, v1.4h[0] /* multiply by 22971 */
+ smull v28.4s, v6.4h, v1.4h[3] /* multiply by 29033 */
+ smull2 v30.4s, v6.8h, v1.4h[3] /* multiply by 29033 */
+.endm
+
+.macro do_yuv_to_rgb
+ do_yuv_to_rgb_stage1
+ do_yuv_to_rgb_stage2
+.endm
+
+/* Apple gas crashes on adrl, work around that by using adr.
+ * But this requires a copy of these constants for each function.
+ */
+
+.balign 16
+jsimd_ycc_\colorid\()_neon_consts:
+ .short 0, 0, 0, 0
+ .short 22971, -11277, -23401, 29033
+ .short -128, -128, -128, -128
+ .short -128, -128, -128, -128
+
+asm_function jsimd_ycc_\colorid\()_convert_neon
+ OUTPUT_WIDTH .req x0
+ INPUT_BUF .req x1
+ INPUT_ROW .req x2
+ OUTPUT_BUF .req x3
+ NUM_ROWS .req x4
+
+ INPUT_BUF0 .req x5
+ INPUT_BUF1 .req x6
+ INPUT_BUF2 .req INPUT_BUF
+
+ RGB .req x7
+ Y .req x8
+ U .req x9
+ V .req x10
+ N .req x15
+
+ /* Load constants to d1, d2, d3 (v0.4h is just used for padding) */
+ adr x15, jsimd_ycc_\colorid\()_neon_consts
+ ld1 {v0.4h, v1.4h}, [x15], 16
+ ld1 {v2.8h}, [x15]
+
+ /* Save ARM registers and handle input arguments */
+ /* push {x4, x5, x6, x7, x8, x9, x10, x30} */
+ stp x4, x5, [sp,-16]!
+ stp x6, x7, [sp,-16]!
+ stp x8, x9, [sp,-16]!
+ stp x10, x30, [sp,-16]!
+ ldr INPUT_BUF0, [INPUT_BUF]
+ ldr INPUT_BUF1, [INPUT_BUF, 8]
+ ldr INPUT_BUF2, [INPUT_BUF, 16]
+ .unreq INPUT_BUF
+
+ /* Save NEON registers */
+ /* vpush {v8.4h-v15.4h} */
+ sub sp, sp, #32
+ st1 {v8.4h-v11.4h}, [sp]
+ sub sp, sp, #32
+ st1 {v12.4h-v15.4h}, [sp]
+
+ /* Initially set v10, v11.4h, v12.8b, d13 to 0xFF */
+ movi v10.16b, #255
+ movi v12.16b, #255
+
+ /* Outer loop over scanlines */
+ cmp NUM_ROWS, #1
+ blt 9f
+0:
+ lsl x16, INPUT_ROW, #3
+ ldr Y, [INPUT_BUF0, x16]
+ ldr U, [INPUT_BUF1, x16]
+ mov N, OUTPUT_WIDTH
+ ldr V, [INPUT_BUF2, x16]
+ add INPUT_ROW, INPUT_ROW, #1
+ ldr RGB, [OUTPUT_BUF], #8
+
+ /* Inner loop over pixels */
+ subs N, N, #8
+ blt 3f
+ do_load 8
+ do_yuv_to_rgb_stage1
+ subs N, N, #8
+ blt 2f
+1:
+ do_yuv_to_rgb_stage2_store_load_stage1
+ subs N, N, #8
+ bge 1b
+2:
+ do_yuv_to_rgb_stage2
+ do_store \bpp, 8
+ tst N, #7
+ beq 8f
+3:
+ tst N, #4
+ beq 3f
+ do_load 4
+3:
+ tst N, #2
+ beq 4f
+ do_load 2
+4:
+ tst N, #1
+ beq 5f
+ do_load 1
+5:
+ do_yuv_to_rgb
+ tst N, #4
+ beq 6f
+ do_store \bpp, 4
+6:
+ tst N, #2
+ beq 7f
+ do_store \bpp, 2
+7:
+ tst N, #1
+ beq 8f
+ do_store \bpp, 1
+8:
+ subs NUM_ROWS, NUM_ROWS, #1
+ bgt 0b
+9:
+ /* Restore all registers and return */
+ /* vpop {v8.4h-v15.4h} */
+ ld1 {v12.4h-v15.4h}, [sp], #32
+ ld1 {v8.4h-v11.4h}, [sp], #32
+ /* pop {r4, r5, r6, r7, r8, r9, r10, pc} */
+ ldp x10, x30, [sp], #16
+ ldp x8, x9, [sp], #16
+ ldp x6, x5, [sp], #16
+ ldp x4, x5, [sp], #16
+ br x30
+ .unreq OUTPUT_WIDTH
+ .unreq INPUT_ROW
+ .unreq OUTPUT_BUF
+ .unreq NUM_ROWS
+ .unreq INPUT_BUF0
+ .unreq INPUT_BUF1
+ .unreq INPUT_BUF2
+ .unreq RGB
+ .unreq Y
+ .unreq U
+ .unreq V
+ .unreq N
+.endfunc
+
+.purgem do_yuv_to_rgb
+.purgem do_yuv_to_rgb_stage1
+.purgem do_yuv_to_rgb_stage2
+.purgem do_yuv_to_rgb_stage2_store_load_stage1
+.endm
+
+/* RTSM simulator fix integer saturation works on 8b boundry add a new parameter
+ * as a workaround for the simulator fix
+ */
+#ifdef RTSM_SQSHRN_SIM_ISSUE
+/*--------------------------------- id ----- bpp R rsize G gsize B bsize defsize */
+generate_jsimd_ycc_rgb_convert_neon extrgb, 24, 0, .4h, 1, .4h, 2, .4h, .8b
+generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h, .8b
+generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, .4h, 1, .4h, 2, .4h, .8b
+generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, .4h, 1, .4h, 0, .4h, .8b
+generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, .4h, 2, .4h, 1, .4h, .8b
+generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, .4h, 2, .4h, 3, .4h, .8b
+#else
+/*--------------------------------- id ----- bpp R rsize G gsize B bsize */
+generate_jsimd_ycc_rgb_convert_neon extrgb, 24, 0, .4h, 1, .4h, 2, .4h
+generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h
+generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, .4h, 1, .4h, 2, .4h
+generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, .4h, 1, .4h, 0, .4h
+generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, .4h, 2, .4h, 1, .4h
+generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, .4h, 2, .4h, 3, .4h
+#endif
+
+.purgem do_load
+.purgem do_store
diff --git a/simd/jsimd_i386.c b/simd/jsimd_i386.c
index e96f5b8..b731edb 100644
--- a/simd/jsimd_i386.c
+++ b/simd/jsimd_i386.c
@@ -2,7 +2,7 @@
* jsimd_i386.c
*
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright 2009-2011 D. R. Commander
+ * Copyright 2009-2011, 2013 D. R. Commander
*
* Based on the x86 SIMD extension for IJG JPEG library,
* Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -59,6 +59,9 @@
env = getenv("JSIMD_FORCESSE2");
if ((env != NULL) && (strcmp(env, "1") == 0))
simd_support &= JSIMD_SSE2;
+ env = getenv("JSIMD_FORCENONE");
+ if ((env != NULL) && (strcmp(env, "1") == 0))
+ simd_support = 0;
}
GLOBAL(int)
diff --git a/simd/jsimd_mips.c b/simd/jsimd_mips.c
new file mode 100644
index 0000000..5b4f893
--- /dev/null
+++ b/simd/jsimd_mips.c
@@ -0,0 +1,857 @@
+/*
+ * jsimd_mips.c
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2009-2011 D. R. Commander
+ * Copyright (C) 2013, MIPS Technologies, Inc., California
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ * This file contains the interface between the "normal" portions
+ * of the library and the SIMD implementations when running on
+ * MIPS architecture.
+ *
+ * Based on the stubs from 'jsimd_none.c'
+ */
+
+#define JPEG_INTERNALS
+#include "../jinclude.h"
+#include "../jpeglib.h"
+#include "../jsimd.h"
+#include "../jdct.h"
+#include "../jsimddct.h"
+#include "jsimd.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+static unsigned int simd_support = ~0;
+
+#if defined(__linux__)
+
+LOCAL(int)
+parse_proc_cpuinfo(const char* search_string)
+{
+ const char* file_name = "/proc/cpuinfo";
+ char cpuinfo_line[256];
+ FILE* f = NULL;
+ simd_support = 0;
+
+ if ((f = fopen(file_name, "r")) != NULL) {
+ while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f) != NULL) {
+ if (strstr(cpuinfo_line, search_string) != NULL) {
+ fclose(f);
+ simd_support |= JSIMD_MIPS_DSPR2;
+ return 1;
+ }
+ }
+ fclose(f);
+ }
+ /* Did not find string in the proc file, or not Linux ELF. */
+ return 0;
+}
+#endif
+
+/*
+ * Check what SIMD accelerations are supported.
+ *
+ * FIXME: This code is racy under a multi-threaded environment.
+ */
+LOCAL(void)
+init_simd (void)
+{
+ if (simd_support != ~0U)
+ return;
+
+ simd_support = 0;
+
+#if defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
+ simd_support |= JSIMD_MIPS_DSPR2;
+#elif defined(__linux__)
+ /* We still have a chance to use MIPS DSPR2 regardless of globally used
+ * -mdspr2 options passed to gcc by performing runtime detection via
+ * /proc/cpuinfo parsing on linux */
+ if (!parse_proc_cpuinfo("MIPS 74K"))
+ return;
+#endif
+}
+static const int mips_idct_ifast_coefs[4] = {
+ 0x45404540, // FIX( 1.082392200 / 2) = 17734 = 0x4546
+ 0x5A805A80, // FIX( 1.414213562 / 2) = 23170 = 0x5A82
+ 0x76407640, // FIX( 1.847759065 / 2) = 30274 = 0x7642
+ 0xAC60AC60 // FIX(-2.613125930 / 4) = -21407 = 0xAC61
+};
+
+GLOBAL(int)
+jsimd_can_rgb_ycc (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+ return 0;
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+ return 0;
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_ycc_rgb (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+ return 0;
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows)
+{
+ void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+ switch(cinfo->in_color_space)
+ {
+ case JCS_EXT_RGB:
+ mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2;
+ break;
+ case JCS_EXT_RGBX:
+ case JCS_EXT_RGBA:
+ mipsdspr2fct=jsimd_extrgbx_ycc_convert_mips_dspr2;
+ break;
+ case JCS_EXT_BGR:
+ mipsdspr2fct=jsimd_extbgr_ycc_convert_mips_dspr2;
+ break;
+ case JCS_EXT_BGRX:
+ case JCS_EXT_BGRA:
+ mipsdspr2fct=jsimd_extbgrx_ycc_convert_mips_dspr2;
+ break;
+ case JCS_EXT_XBGR:
+ case JCS_EXT_ABGR:
+ mipsdspr2fct=jsimd_extxbgr_ycc_convert_mips_dspr2;
+
+ break;
+ case JCS_EXT_XRGB:
+ case JCS_EXT_ARGB:
+ mipsdspr2fct=jsimd_extxrgb_ycc_convert_mips_dspr2;
+ break;
+ default:
+ mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2;
+ break;
+ }
+
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ mipsdspr2fct(cinfo->image_width, input_buf,
+ output_buf, output_row, num_rows);
+}
+
+GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+ JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+ JDIMENSION output_row, int num_rows)
+{
+ void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+ switch(cinfo->in_color_space)
+ {
+ case JCS_EXT_RGB:
+ mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2;
+ break;
+ case JCS_EXT_RGBX:
+ case JCS_EXT_RGBA:
+ mipsdspr2fct=jsimd_extrgbx_gray_convert_mips_dspr2;
+ break;
+ case JCS_EXT_BGR:
+ mipsdspr2fct=jsimd_extbgr_gray_convert_mips_dspr2;
+ break;
+ case JCS_EXT_BGRX:
+ case JCS_EXT_BGRA:
+ mipsdspr2fct=jsimd_extbgrx_gray_convert_mips_dspr2;
+ break;
+ case JCS_EXT_XBGR:
+ case JCS_EXT_ABGR:
+ mipsdspr2fct=jsimd_extxbgr_gray_convert_mips_dspr2;
+ break;
+ case JCS_EXT_XRGB:
+ case JCS_EXT_ARGB:
+ mipsdspr2fct=jsimd_extxrgb_gray_convert_mips_dspr2;
+ break;
+ default:
+ mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2;
+ break;
+ }
+
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ mipsdspr2fct(cinfo->image_width, input_buf,
+ output_buf, output_row, num_rows);
+
+}
+
+GLOBAL(void)
+jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
+ JSAMPIMAGE input_buf, JDIMENSION input_row,
+ JSAMPARRAY output_buf, int num_rows)
+{
+ void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
+
+ switch(cinfo->out_color_space)
+ {
+ case JCS_EXT_RGB:
+ mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2;
+ break;
+ case JCS_EXT_RGBX:
+ case JCS_EXT_RGBA:
+ mipsdspr2fct=jsimd_ycc_extrgbx_convert_mips_dspr2;
+ break;
+ case JCS_EXT_BGR:
+ mipsdspr2fct=jsimd_ycc_extbgr_convert_mips_dspr2;
+ break;
+ case JCS_EXT_BGRX:
+ case JCS_EXT_BGRA:
+ mipsdspr2fct=jsimd_ycc_extbgrx_convert_mips_dspr2;
+ break;
+ case JCS_EXT_XBGR:
+ case JCS_EXT_ABGR:
+ mipsdspr2fct=jsimd_ycc_extxbgr_convert_mips_dspr2;
+ break;
+ case JCS_EXT_XRGB:
+ case JCS_EXT_ARGB:
+ mipsdspr2fct=jsimd_ycc_extxrgb_convert_mips_dspr2;
+ break;
+ default:
+ mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2;
+ break;
+ }
+
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ mipsdspr2fct(cinfo->output_width, input_buf,
+ input_row, output_buf, num_rows);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_downsample (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_downsample (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+ JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ jsimd_h2v2_downsample_mips_dspr2(cinfo->image_width,
+ cinfo->max_v_samp_factor, compptr->v_samp_factor,
+ compptr->width_in_blocks, input_data, output_data);
+}
+
+GLOBAL(void)
+jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+ JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ jsimd_h2v1_downsample_mips_dspr2(cinfo->image_width,
+ cinfo->max_v_samp_factor, compptr->v_samp_factor,
+ compptr->width_in_blocks, input_data, output_data);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_upsample (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_upsample (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_upsample (j_decompress_ptr cinfo,
+ jpeg_component_info * compptr,
+ JSAMPARRAY input_data,
+ JSAMPARRAY * output_data_ptr)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ jsimd_h2v2_upsample_mips_dspr2(cinfo->max_v_samp_factor,
+ cinfo->output_width, input_data, output_data_ptr);
+}
+
+GLOBAL(void)
+jsimd_h2v1_upsample (j_decompress_ptr cinfo,
+ jpeg_component_info * compptr,
+ JSAMPARRAY input_data,
+ JSAMPARRAY * output_data_ptr)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ jsimd_h2v1_upsample_mips_dspr2(cinfo->max_v_samp_factor,
+ cinfo->output_width, input_data, output_data_ptr);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_fancy_upsample (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_fancy_upsample (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
+ jpeg_component_info * compptr,
+ JSAMPARRAY input_data,
+ JSAMPARRAY * output_data_ptr)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ jsimd_h2v2_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor,
+ compptr->downsampled_width, input_data, output_data_ptr);
+}
+
+GLOBAL(void)
+jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
+ jpeg_component_info * compptr,
+ JSAMPARRAY input_data,
+ JSAMPARRAY * output_data_ptr)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ jsimd_h2v1_fancy_upsample_mips_dspr2(cinfo->max_v_samp_factor,
+ compptr->downsampled_width, input_data, output_data_ptr);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_merged_upsample (void)
+{
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_merged_upsample (void)
+{
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
+ JSAMPIMAGE input_buf,
+ JDIMENSION in_row_group_ctr,
+ JSAMPARRAY output_buf)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
+ JSAMPIMAGE input_buf,
+ JDIMENSION in_row_group_ctr,
+ JSAMPARRAY output_buf)
+{
+}
+
+GLOBAL(int)
+jsimd_can_convsamp (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(DCTELEM) != 2)
+ return 0;
+
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_convsamp_float (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(JCOEF) != 2)
+ return 0;
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(ISLOW_MULT_TYPE) != 2)
+ return 0;
+
+ if ((simd_support & JSIMD_MIPS_DSPR2))
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
+ DCTELEM * workspace)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ jsimd_convsamp_mips_dspr2(sample_data, start_col, workspace);
+}
+
+GLOBAL(void)
+jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
+ FAST_FLOAT * workspace)
+{
+ if ((simd_support & JSIMD_MIPS_DSPR2))
+ jsimd_convsamp_float_mips_dspr2(sample_data, start_col, workspace);
+}
+
+GLOBAL(int)
+jsimd_can_fdct_islow (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(DCTELEM) != 2)
+ return 0;
+
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_ifast (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(DCTELEM) != 2)
+ return 0;
+
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_float (void)
+{
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_fdct_islow (DCTELEM * data)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ jsimd_fdct_islow_mips_dspr2(data);
+}
+
+GLOBAL(void)
+jsimd_fdct_ifast (DCTELEM * data)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ jsimd_fdct_ifast_mips_dspr2(data);
+}
+
+GLOBAL(void)
+jsimd_fdct_float (FAST_FLOAT * data)
+{
+}
+
+GLOBAL(int)
+jsimd_can_quantize (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(JCOEF) != 2)
+ return 0;
+ if (sizeof(DCTELEM) != 2)
+ return 0;
+
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_quantize_float (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(JCOEF) != 2)
+ return 0;
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(ISLOW_MULT_TYPE) != 2)
+ return 0;
+
+ if ((simd_support & JSIMD_MIPS_DSPR2))
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
+ DCTELEM * workspace)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ jsimd_quantize_mips_dspr2(coef_block, divisors, workspace);
+}
+
+GLOBAL(void)
+jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+ FAST_FLOAT * workspace)
+{
+ if ((simd_support & JSIMD_MIPS_DSPR2))
+ jsimd_quantize_float_mips_dspr2(coef_block, divisors, workspace);
+}
+
+GLOBAL(int)
+jsimd_can_idct_2x2 (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(JCOEF) != 2)
+ return 0;
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(ISLOW_MULT_TYPE) != 2)
+ return 0;
+
+ if ((simd_support & JSIMD_MIPS_DSPR2))
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_4x4 (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(JCOEF) != 2)
+ return 0;
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(ISLOW_MULT_TYPE) != 2)
+ return 0;
+
+ if ((simd_support & JSIMD_MIPS_DSPR2))
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_6x6 (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(JCOEF) != 2)
+ return 0;
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(ISLOW_MULT_TYPE) != 2)
+ return 0;
+
+ if ((simd_support & JSIMD_MIPS_DSPR2))
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_12x12 (void)
+{
+ init_simd();
+
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(JCOEF) != 2)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(ISLOW_MULT_TYPE) != 2)
+ return 0;
+
+ if (simd_support & JSIMD_MIPS_DSPR2)
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+ if ((simd_support & JSIMD_MIPS_DSPR2))
+ jsimd_idct_2x2_mips_dspr2(compptr->dct_table, coef_block,
+ output_buf, output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+ if ((simd_support & JSIMD_MIPS_DSPR2))
+ {
+ int workspace[DCTSIZE*4]; /* buffers data between passes */
+ jsimd_idct_4x4_mips_dspr2(compptr->dct_table, coef_block,
+ output_buf, output_col, workspace);
+ }
+}
+
+GLOBAL(void)
+jsimd_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+ if ((simd_support & JSIMD_MIPS_DSPR2))
+ jsimd_idct_6x6_mips_dspr2(compptr->dct_table, coef_block,
+ output_buf, output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2) {
+ int workspace[96];
+ int output[12] = {
+ (int)(output_buf[0] + output_col),
+ (int)(output_buf[1] + output_col),
+ (int)(output_buf[2] + output_col),
+ (int)(output_buf[3] + output_col),
+ (int)(output_buf[4] + output_col),
+ (int)(output_buf[5] + output_col),
+ (int)(output_buf[6] + output_col),
+ (int)(output_buf[7] + output_col),
+ (int)(output_buf[8] + output_col),
+ (int)(output_buf[9] + output_col),
+ (int)(output_buf[10] + output_col),
+ (int)(output_buf[11] + output_col),
+ };
+ jsimd_idct_12x12_pass1_mips_dspr2(coef_block,
+ compptr->dct_table, workspace);
+ jsimd_idct_12x12_pass2_mips_dspr2(workspace, output);
+ }
+}
+
+GLOBAL(int)
+jsimd_can_idct_islow (void)
+{
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_ifast (void)
+{
+ init_simd();
+
+ /* The code is optimised for these values only */
+ if (DCTSIZE != 8)
+ return 0;
+ if (sizeof(JCOEF) != 2)
+ return 0;
+ if (BITS_IN_JSAMPLE != 8)
+ return 0;
+ if (sizeof(JDIMENSION) != 4)
+ return 0;
+ if (sizeof(IFAST_MULT_TYPE) != 2)
+ return 0;
+ if (IFAST_SCALE_BITS != 2)
+ return 0;
+
+ if ((simd_support & JSIMD_MIPS_DSPR2))
+ return 1;
+
+ return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_float (void)
+{
+ return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+}
+
+GLOBAL(void)
+jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+ if (simd_support & JSIMD_MIPS_DSPR2) {
+ JCOEFPTR inptr;
+ IFAST_MULT_TYPE * quantptr;
+ DCTELEM workspace[DCTSIZE2]; /* buffers data between passes */
+
+ /* Pass 1: process columns from input, store into work array. */
+
+ inptr = coef_block;
+ quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
+
+ jsimd_idct_ifast_cols_mips_dspr2(inptr, quantptr,
+ workspace, mips_idct_ifast_coefs);
+
+ /* Pass 2: process rows from work array, store into output array. */
+ /* Note that we must descale the results by a factor of 8 == 2**3, */
+ /* and also undo the PASS1_BITS scaling. */
+
+ jsimd_idct_ifast_rows_mips_dspr2(workspace, output_buf,
+ output_col, mips_idct_ifast_coefs);
+ }
+}
+
+GLOBAL(void)
+jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
+ JDIMENSION output_col)
+{
+}
diff --git a/simd/jsimd_mips_dspr2.S b/simd/jsimd_mips_dspr2.S
new file mode 100644
index 0000000..198c349
--- /dev/null
+++ b/simd/jsimd_mips_dspr2.S
@@ -0,0 +1,3324 @@
+/*
+ * MIPS DSPr2 optimizations for libjpeg-turbo
+ *
+ * Copyright (C) 2013, MIPS Technologies, Inc., California.
+ * All rights reserved.
+ * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com)
+ * Darko Laus (darko.laus@imgtec.com)
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#include "jsimd_mips_dspr2_asm.h"
+
+/*****************************************************************************/
+/*
+ * jsimd_extrgb_ycc_convert_mips_dspr2
+ * jsimd_extbgr_ycc_convert_mips_dspr2
+ * jsimd_extrgbx_ycc_convert_mips_dspr2
+ * jsimd_extbgrx_ycc_convert_mips_dspr2
+ * jsimd_extxbgr_ycc_convert_mips_dspr2
+ * jsimd_extxrgb_ycc_convert_mips_dspr2
+ *
+ * Colorspace conversion RGB -> YCbCr
+ */
+
+.macro GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs
+
+.macro DO_RGB_TO_YCC r, \
+ g, \
+ b, \
+ inptr
+ lbu \r, \r_offs(\inptr)
+ lbu \g, \g_offs(\inptr)
+ lbu \b, \b_offs(\inptr)
+ addiu \inptr, \pixel_size
+.endm
+
+LEAF_MIPS_DSPR2(jsimd_\colorid\()_ycc_convert_mips_dspr2)
+/*
+ * a0 - cinfo->image_width
+ * a1 - input_buf
+ * a2 - output_buf
+ * a3 - output_row
+ * 16(sp) - num_rows
+ */
+
+ SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ lw t7, 48(sp) // t7 = num_rows
+ li s0, 0x4c8b // FIX(0.29900)
+ li s1, 0x9646 // FIX(0.58700)
+ li s2, 0x1d2f // FIX(0.11400)
+ li s3, 0xffffd4cd // -FIX(0.16874)
+ li s4, 0xffffab33 // -FIX(0.33126)
+ li s5, 0x8000 // FIX(0.50000)
+ li s6, 0xffff94d1 // -FIX(0.41869)
+ li s7, 0xffffeb2f // -FIX(0.08131)
+ li t8, 0x807fff // CBCR_OFFSET + ONE_HALF-1
+
+0:
+ addiu t7, -1 // --num_rows
+ lw t6, 0(a1) // t6 = input_buf[0]
+ lw t0, 0(a2)
+ lw t1, 4(a2)
+ lw t2, 8(a2)
+ sll t3, a3, 2
+ lwx t0, t3(t0) // t0 = output_buf[0][output_row]
+ lwx t1, t3(t1) // t1 = output_buf[1][output_row]
+ lwx t2, t3(t2) // t2 = output_buf[2][output_row]
+
+ addu t9, t2, a0 // t9 = end address
+ addiu a3, 1
+
+1:
+ DO_RGB_TO_YCC t3, t4, t5, t6
+
+ mtlo s5, $ac0
+ mtlo t8, $ac1
+ mtlo t8, $ac2
+ maddu $ac0, s2, t5
+ maddu $ac1, s5, t5
+ maddu $ac2, s5, t3
+ maddu $ac0, s0, t3
+ maddu $ac1, s3, t3
+ maddu $ac2, s6, t4
+ maddu $ac0, s1, t4
+ maddu $ac1, s4, t4
+ maddu $ac2, s7, t5
+ extr.w t3, $ac0, 16
+ extr.w t4, $ac1, 16
+ extr.w t5, $ac2, 16
+ sb t3, 0(t0)
+ sb t4, 0(t1)
+ sb t5, 0(t2)
+ addiu t0, 1
+ addiu t2, 1
+ bne t2, t9, 1b
+ addiu t1, 1
+ bgtz t7, 0b
+ addiu a1, 4
+
+ RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ j ra
+ nop
+END(jsimd_\colorid\()_ycc_convert_mips_dspr2)
+
+.purgem DO_RGB_TO_YCC
+
+.endm
+
+/*------------------------------------------id -- pix R G B */
+GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2
+GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0
+GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2
+GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0
+GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1
+GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3
+
+/*****************************************************************************/
+/*
+ * jsimd_ycc_extrgb_convert_mips_dspr2
+ * jsimd_ycc_extbgr_convert_mips_dspr2
+ * jsimd_ycc_extrgbx_convert_mips_dspr2
+ * jsimd_ycc_extbgrx_convert_mips_dspr2
+ * jsimd_ycc_extxbgr_convert_mips_dspr2
+ * jsimd_ycc_extxrgb_convert_mips_dspr2
+ *
+ * Colorspace conversion YCbCr -> RGB
+ */
+
+.macro GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs, a_offs
+
+.macro STORE_YCC_TO_RGB scratch0 \
+ scratch1 \
+ scratch2 \
+ outptr
+ sb \scratch0, \r_offs(\outptr)
+ sb \scratch1, \g_offs(\outptr)
+ sb \scratch2, \b_offs(\outptr)
+.if (\pixel_size == 4)
+ li t0, 0xFF
+ sb t0, \a_offs(\outptr)
+.endif
+ addiu \outptr, \pixel_size
+.endm
+
+LEAF_MIPS_DSPR2(jsimd_ycc_\colorid\()_convert_mips_dspr2)
+/*
+ * a0 - cinfo->image_width
+ * a1 - input_buf
+ * a2 - input_row
+ * a3 - output_buf
+ * 16(sp) - num_rows
+ */
+
+ SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ lw s1, 48(sp)
+ li t3, 0x8000
+ li t4, 0x166e9 // FIX(1.40200)
+ li t5, 0x1c5a2 // FIX(1.77200)
+ li t6, 0xffff492e // -FIX(0.71414)
+ li t7, 0xffffa7e6 // -FIX(0.34414)
+ repl.ph t8, 128
+
+0:
+ lw s0, 0(a3)
+ lw t0, 0(a1)
+ lw t1, 4(a1)
+ lw t2, 8(a1)
+ sll s5, a2, 2
+ addiu s1, -1
+ lwx s2, s5(t0)
+ lwx s3, s5(t1)
+ lwx s4, s5(t2)
+ addu t9, s2, a0
+ addiu a2, 1
+
+1:
+ lbu s7, 0(s4) // cr
+ lbu s6, 0(s3) // cb
+ lbu s5, 0(s2) // y
+ addiu s2, 1
+ addiu s4, 1
+ addiu s7, -128
+ addiu s6, -128
+ mul t2, t7, s6
+ mul t0, t6, s7 // Crgtab[cr]
+ sll s7, 15
+ mulq_rs.w t1, t4, s7 // Crrtab[cr]
+ sll s6, 15
+ addu t2, t3 // Cbgtab[cb]
+ addu t2, t0
+
+ mulq_rs.w t0, t5, s6 // Cbbtab[cb]
+ sra t2, 16
+ addu t1, s5
+ addu t2, s5 // add y
+ ins t2, t1, 16, 16
+ subu.ph t2, t2, t8
+ addu t0, s5
+ shll_s.ph t2, t2, 8
+ subu t0, 128
+ shra.ph t2, t2, 8
+ shll_s.w t0, t0, 24
+ addu.ph t2, t2, t8 // clip & store
+ sra t0, t0, 24
+ sra t1, t2, 16
+ addiu t0, 128
+
+ STORE_YCC_TO_RGB t1, t2, t0, s0
+
+ bne s2, t9, 1b
+ addiu s3, 1
+ bgtz s1, 0b
+ addiu a3, 4
+
+ RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ j ra
+ nop
+END(jsimd_ycc_\colorid\()_convert_mips_dspr2)
+
+.purgem STORE_YCC_TO_RGB
+
+.endm
+
+/*------------------------------------------id -- pix R G B A */
+GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2, 3
+GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0, 3
+GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2, 3
+GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0, 3
+GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1, 0
+GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3, 0
+
+/*****************************************************************************/
+/*
+ * jsimd_extrgb_gray_convert_mips_dspr2
+ * jsimd_extbgr_gray_convert_mips_dspr2
+ * jsimd_extrgbx_gray_convert_mips_dspr2
+ * jsimd_extbgrx_gray_convert_mips_dspr2
+ * jsimd_extxbgr_gray_convert_mips_dspr2
+ * jsimd_extxrgb_gray_convert_mips_dspr2
+ *
+ * Colorspace conversion RGB -> GRAY
+ */
+
+.macro GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs
+
+.macro DO_RGB_TO_GRAY r, \
+ g, \
+ b, \
+ inptr
+ lbu \r, \r_offs(\inptr)
+ lbu \g, \g_offs(\inptr)
+ lbu \b, \b_offs(\inptr)
+ addiu \inptr, \pixel_size
+.endm
+
+LEAF_MIPS_DSPR2(jsimd_\colorid\()_gray_convert_mips_dspr2)
+/*
+ * a0 - cinfo->image_width
+ * a1 - input_buf
+ * a2 - output_buf
+ * a3 - output_row
+ * 16(sp) - num_rows
+ */
+
+ SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ li s0, 0x4c8b // s0 = FIX(0.29900)
+ li s1, 0x9646 // s1 = FIX(0.58700)
+ li s2, 0x1d2f // s2 = FIX(0.11400)
+ li s7, 0x8000 // s7 = FIX(0.50000)
+ lw s6, 48(sp)
+ andi t7, a0, 3
+
+0:
+ addiu s6, -1 // s6 = num_rows
+ lw t0, 0(a1)
+ lw t1, 0(a2)
+ sll t3, a3, 2
+ lwx t1, t3(t1)
+ addiu a3, 1
+ addu t9, t1, a0
+ subu t8, t9, t7
+ beq t1, t8, 2f
+ nop
+
+1:
+ DO_RGB_TO_GRAY t3, t4, t5, t0
+ DO_RGB_TO_GRAY s3, s4, s5, t0
+
+ mtlo s7, $ac0
+ maddu $ac0, s2, t5
+ maddu $ac0, s1, t4
+ maddu $ac0, s0, t3
+ mtlo s7, $ac1
+ maddu $ac1, s2, s5
+ maddu $ac1, s1, s4
+ maddu $ac1, s0, s3
+ extr.w t6, $ac0, 16
+
+ DO_RGB_TO_GRAY t3, t4, t5, t0
+ DO_RGB_TO_GRAY s3, s4, s5, t0
+
+ mtlo s7, $ac0
+ maddu $ac0, s2, t5
+ maddu $ac0, s1, t4
+ extr.w t2, $ac1, 16
+ maddu $ac0, s0, t3
+ mtlo s7, $ac1
+ maddu $ac1, s2, s5
+ maddu $ac1, s1, s4
+ maddu $ac1, s0, s3
+ extr.w t5, $ac0, 16
+ sb t6, 0(t1)
+ sb t2, 1(t1)
+ extr.w t3, $ac1, 16
+ addiu t1, 4
+ sb t5, -2(t1)
+ sb t3, -1(t1)
+ bne t1, t8, 1b
+ nop
+
+2:
+ beqz t7, 4f
+ nop
+
+3:
+ DO_RGB_TO_GRAY t3, t4, t5, t0
+
+ mtlo s7, $ac0
+ maddu $ac0, s2, t5
+ maddu $ac0, s1, t4
+ maddu $ac0, s0, t3
+ extr.w t6, $ac0, 16
+ sb t6, 0(t1)
+ addiu t1, 1
+ bne t1, t9, 3b
+ nop
+
+4:
+ bgtz s6, 0b
+ addiu a1, 4
+
+ RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ j ra
+ nop
+END(jsimd_\colorid\()_gray_convert_mips_dspr2)
+
+.purgem DO_RGB_TO_GRAY
+
+.endm
+
+/*------------------------------------------id -- pix R G B */
+GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2
+GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0
+GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2
+GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0
+GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1
+GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3
+/*****************************************************************************/
+/*
+ * jsimd_h2v2_fancy_upsample_mips_dspr2
+ *
+ * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
+ */
+LEAF_MIPS_DSPR2(jsimd_h2v2_fancy_upsample_mips_dspr2)
+/*
+ * a0 - cinfo->max_v_samp_factor
+ * a1 - downsampled_width
+ * a2 - input_data
+ * a3 - output_data_ptr
+ */
+
+ SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5
+
+ li s4, 0
+ lw s2, 0(a3) // s2 = *output_data_ptr
+0:
+ li t9, 2
+ lw s1, -4(a2) // s1 = inptr1
+
+1:
+ lw s0, 0(a2) // s0 = inptr0
+ lwx s3, s4(s2)
+ addiu s5, a1, -2 // s5 = downsampled_width - 2
+ srl t4, s5, 1
+ sll t4, t4, 1
+ lbu t0, 0(s0)
+ lbu t1, 1(s0)
+ lbu t2, 0(s1)
+ lbu t3, 1(s1)
+ addiu s0, 2
+ addiu s1, 2
+ addu t8, s0, t4 // t8 = end address
+ andi s5, s5, 1 // s5 = residual
+ sll t4, t0, 1
+ sll t6, t1, 1
+ addu t0, t0, t4 // t0 = (*inptr0++) * 3
+ addu t1, t1, t6 // t1 = (*inptr0++) * 3
+ addu t7, t0, t2 // t7 = thiscolsum
+ addu t6, t1, t3 // t5 = nextcolsum
+ sll t0, t7, 2 // t0 = thiscolsum * 4
+ subu t1, t0, t7 // t1 = thiscolsum * 3
+ shra_r.w t0, t0, 4
+ addiu t1, 7
+ addu t1, t1, t6
+ srl t1, t1, 4
+ sb t0, 0(s3)
+ sb t1, 1(s3)
+ addiu s3, 2
+2:
+ lh t0, 0(s0) // t0 = A3|A2
+ lh t2, 0(s1) // t2 = B3|B2
+ addiu s0, 2
+ addiu s1, 2
+ preceu.ph.qbr t0, t0 // t0 = 0|A3|0|A2
+ preceu.ph.qbr t2, t2 // t2 = 0|B3|0|B2
+ shll.ph t1, t0, 1
+ sll t3, t6, 1
+ addu.ph t0, t1, t0 // t0 = A3*3|A2*3
+ addu t3, t3, t6 // t3 = this * 3
+ addu.ph t0, t0, t2 // t0 = next2|next1
+ addu t1, t3, t7
+ andi t7, t0, 0xFFFF // t7 = next1
+ sll t2, t7, 1
+ addu t2, t7, t2 // t2 = next1*3
+ addu t4, t2, t6
+ srl t6, t0, 16 // t6 = next2
+ shra_r.w t1, t1, 4 // t1 = (this*3 + last + 8) >> 4
+ addu t0, t3, t7
+ addiu t0, 7
+ srl t0, t0, 4 // t0 = (this*3 + next1 + 7) >> 4
+ shra_r.w t4, t4, 4 // t3 = (next1*3 + this + 8) >> 4
+ addu t2, t2, t6
+ addiu t2, 7
+ srl t2, t2, 4 // t2 = (next1*3 + next2 + 7) >> 4
+ sb t1, 0(s3)
+ sb t0, 1(s3)
+ sb t4, 2(s3)
+ sb t2, 3(s3)
+ bne t8, s0, 2b
+ addiu s3, 4
+ beqz s5, 4f
+ addu t8, s0, s5
+3:
+ lbu t0, 0(s0)
+ lbu t2, 0(s1)
+ addiu s0, 1
+ addiu s1, 1
+ sll t3, t6, 1
+ sll t1, t0, 1
+ addu t1, t0, t1 // t1 = inptr0 * 3
+ addu t3, t3, t6 // t3 = thiscolsum * 3
+ addu t5, t1, t2
+ addu t1, t3, t7
+ shra_r.w t1, t1, 4
+ addu t0, t3, t5
+ addiu t0, 7
+ srl t0, t0, 4
+ sb t1, 0(s3)
+ sb t0, 1(s3)
+ addiu s3, 2
+ move t7, t6
+ bne t8, s0, 3b
+ move t6, t5
+4:
+ sll t0, t6, 2 // t0 = thiscolsum * 4
+ subu t1, t0, t6 // t1 = thiscolsum * 3
+ addu t1, t1, t7
+ addiu s4, 4
+ shra_r.w t1, t1, 4
+ addiu t0, 7
+ srl t0, t0, 4
+ sb t1, 0(s3)
+ sb t0, 1(s3)
+ addiu t9, -1
+ addiu s3, 2
+ bnez t9, 1b
+ lw s1, 4(a2)
+ srl t0, s4, 2
+ subu t0, a0, t0
+ bgtz t0, 0b
+ addiu a2, 4
+
+ RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5
+
+ j ra
+ nop
+END(jsimd_h2v2_fancy_upsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_h2v1_fancy_upsample_mips_dspr2)
+/*
+ * a0 - cinfo->max_v_samp_factor
+ * a1 - downsampled_width
+ * a2 - input_data
+ * a3 - output_data_ptr
+ */
+
+ SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
+
+ .set at
+
+ beqz a0, 3f
+ sll t0, a0, 2
+ lw s1, 0(a3)
+ addu s0, s1, t0
+ li s3, 0x10001
+0:
+ addiu t8, a1, -2
+ srl t9, t8, 2
+ lw t7, 0(a2)
+ lw s2, 0(s1)
+ lbu t0, 0(t7)
+ lbu t1, 1(t7) // t1 = inptr[1]
+ sll t2, t0, 1
+ addu t2, t2, t0 // t2 = invalue*3
+ addu t2, t2, t1
+ shra_r.w t2, t2, 2
+ sb t0, 0(s2)
+ sb t2, 1(s2)
+ beqz t9, 11f
+ addiu s2, 2
+1:
+ ulw t0, 0(t7) // t0 = |P3|P2|P1|P0|
+ ulw t1, 1(t7)
+ ulh t2, 4(t7) // t2 = |0|0|P5|P4|
+ preceu.ph.qbl t3, t0 // t3 = |0|P3|0|P2|
+ preceu.ph.qbr t0, t0 // t0 = |0|P1|0|P0|
+ preceu.ph.qbr t2, t2 // t2 = |0|P5|0|P4|
+ preceu.ph.qbl t4, t1 // t4 = |0|P4|0|P3|
+ preceu.ph.qbr t1, t1 // t1 = |0|P2|0|P1|
+ shll.ph t5, t4, 1
+ shll.ph t6, t1, 1
+ addu.ph t5, t5, t4 // t5 = |P4*3|P3*3|
+ addu.ph t6, t6, t1 // t6 = |P2*3|P1*3|
+ addu.ph t4, t3, s3
+ addu.ph t0, t0, s3
+ addu.ph t4, t4, t5
+ addu.ph t0, t0, t6
+ shrl.ph t4, t4, 2 // t4 = |0|P3|0|P2|
+ shrl.ph t0, t0, 2 // t0 = |0|P1|0|P0|
+ addu.ph t2, t2, t5
+ addu.ph t3, t3, t6
+ shra_r.ph t2, t2, 2 // t2 = |0|P5|0|P4|
+ shra_r.ph t3, t3, 2 // t3 = |0|P3|0|P2|
+ shll.ph t2, t2, 8
+ shll.ph t3, t3, 8
+ or t2, t4, t2
+ or t3, t3, t0
+ addiu t9, -1
+ usw t3, 0(s2)
+ usw t2, 4(s2)
+ addiu s2, 8
+ bgtz t9, 1b
+ addiu t7, 4
+11:
+ andi t8, 3
+ beqz t8, 3f
+ addiu t7, 1
+2:
+ lbu t0, 0(t7)
+ addiu t7, 1
+ sll t1, t0, 1
+ addu t2, t0, t1 // t2 = invalue
+ lbu t3, -2(t7)
+ lbu t4, 0(t7)
+ addiu t3, 1
+ addiu t4, 2
+ addu t3, t3, t2
+ addu t4, t4, t2
+ srl t3, 2
+ srl t4, 2
+ sb t3, 0(s2)
+ sb t4, 1(s2)
+ addiu t8, -1
+ bgtz t8, 2b
+ addiu s2, 2
+
+ lbu t0, 0(t7)
+ lbu t2, -1(t7)
+ sll t1, t0, 1
+ addu t1, t1, t0 // t1 = invalue * 3
+ addu t1, t1, t2
+ addiu t1, 1
+ srl t1, t1, 2
+ sb t1, 0(s2)
+ sb t0, 1(s2)
+ addiu s1, 4
+ bne s1, s0, 0b
+ addiu a2, 4
+3:
+ RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
+
+ j ra
+ nop
+END(jsimd_h2v1_fancy_upsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_h2v1_downsample_mips_dspr2)
+/*
+ * a0 - cinfo->image_width
+ * a1 - cinfo->max_v_samp_factor
+ * a2 - compptr->v_samp_factor
+ * a3 - compptr->width_in_blocks
+ * 16(sp) - input_data
+ * 20(sp) - output_data
+ */
+ .set at
+
+ SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4
+
+ beqz a2, 7f
+ lw s1, 44(sp) // s1 = output_data
+ lw s0, 40(sp) // s0 = input_data
+ srl s2, a0, 2
+ andi t9, a0, 2
+ srl t7, t9, 1
+ addu s2, t7, s2
+ sll t0, a3, 3 // t0 = width_in_blocks*DCT
+ srl t7, t0, 1
+ subu s2, t7, s2
+0:
+ andi t6, a0, 1 // t6 = temp_index
+ addiu t6, -1
+ lw t4, 0(s1) // t4 = outptr
+ lw t5, 0(s0) // t5 = inptr0
+ li s3, 0 // s3 = bias
+ srl t7, a0, 1 // t7 = image_width1
+ srl s4, t7, 2
+ andi t8, t7, 3
+1:
+ ulhu t0, 0(t5)
+ ulhu t1, 2(t5)
+ ulhu t2, 4(t5)
+ ulhu t3, 6(t5)
+ raddu.w.qb t0, t0
+ raddu.w.qb t1, t1
+ raddu.w.qb t2, t2
+ raddu.w.qb t3, t3
+ shra.ph t0, t0, 1
+ shra_r.ph t1, t1, 1
+ shra.ph t2, t2, 1
+ shra_r.ph t3, t3, 1
+ sb t0, 0(t4)
+ sb t1, 1(t4)
+ sb t2, 2(t4)
+ sb t3, 3(t4)
+ addiu s4, -1
+ addiu t4, 4
+ bgtz s4, 1b
+ addiu t5, 8
+ beqz t8, 3f
+ addu s4, t4, t8
+2:
+ ulhu t0, 0(t5)
+ raddu.w.qb t0, t0
+ addqh.w t0, t0, s3
+ xori s3, s3, 1
+ sb t0, 0(t4)
+ addiu t4, 1
+ bne t4, s4, 2b
+ addiu t5, 2
+3:
+ lbux t1, t6(t5)
+ sll t1, 1
+ addqh.w t2, t1, s3 // t2 = pixval1
+ xori s3, s3, 1
+ addqh.w t3, t1, s3 // t3 = pixval2
+ blez s2, 5f
+ append t3, t2, 8
+ addu t5, t4, s2 // t5 = loop_end2
+4:
+ ush t3, 0(t4)
+ addiu s2, -1
+ bgtz s2, 4b
+ addiu t4, 2
+5:
+ beqz t9, 6f
+ nop
+ sb t2, 0(t4)
+6:
+ addiu s1, 4
+ addiu a2, -1
+ bnez a2, 0b
+ addiu s0, 4
+7:
+ RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4
+
+ j ra
+ nop
+END(jsimd_h2v1_downsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_h2v2_downsample_mips_dspr2)
+
+/*
+ * a0 - cinfo->image_width
+ * a1 - cinfo->max_v_samp_factor
+ * a2 - compptr->v_samp_factor
+ * a3 - compptr->width_in_blocks
+ * 16(sp) - input_data
+ * 20(sp) - output_data
+ */
+ .set at
+ SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ beqz a2, 8f
+ lw s1, 52(sp) // s1 = output_data
+ lw s0, 48(sp) // s0 = input_data
+
+ andi t6, a0, 1 // t6 = temp_index
+ addiu t6, -1
+ srl t7, a0, 1 // t7 = image_width1
+ srl s4, t7, 2
+ andi t8, t7, 3
+ andi t9, a0, 2
+ srl s2, a0, 2
+ srl t7, t9, 1
+ addu s2, t7, s2
+ sll t0, a3, 3 // s2 = width_in_blocks*DCT
+ srl t7, t0, 1
+ subu s2, t7, s2
+0:
+ lw t4, 0(s1) // t4 = outptr
+ lw t5, 0(s0) // t5 = inptr0
+ lw s7, 4(s0) // s7 = inptr1
+ li s6, 1 // s6 = bias
+2:
+ ulw t0, 0(t5) // t0 = |P3|P2|P1|P0|
+ ulw t1, 0(s7) // t1 = |Q3|Q2|Q1|Q0|
+ ulw t2, 4(t5)
+ ulw t3, 4(s7)
+ precrq.ph.w t7, t0, t1 // t2 = |P3|P2|Q3|Q2|
+ ins t0, t1, 16, 16 // t0 = |Q1|Q0|P1|P0|
+ raddu.w.qb t1, t7
+ raddu.w.qb t0, t0
+ shra_r.w t1, t1, 2
+ addiu t0, 1
+ srl t0, 2
+ precrq.ph.w t7, t2, t3
+ ins t2, t3, 16, 16
+ raddu.w.qb t7, t7
+ raddu.w.qb t2, t2
+ shra_r.w t7, t7, 2
+ addiu t2, 1
+ srl t2, 2
+ sb t0, 0(t4)
+ sb t1, 1(t4)
+ sb t2, 2(t4)
+ sb t7, 3(t4)
+ addiu t4, 4
+ addiu t5, 8
+ addiu s4, s4, -1
+ bgtz s4, 2b
+ addiu s7, 8
+ beqz t8, 4f
+ addu t8, t4, t8
+3:
+ ulhu t0, 0(t5)
+ ulhu t1, 0(s7)
+ ins t0, t1, 16, 16
+ raddu.w.qb t0, t0
+ addu t0, t0, s6
+ srl t0, 2
+ xori s6, s6, 3
+ sb t0, 0(t4)
+ addiu t5, 2
+ addiu t4, 1
+ bne t8, t4, 3b
+ addiu s7, 2
+4:
+ lbux t1, t6(t5)
+ sll t1, 1
+ lbux t0, t6(s7)
+ sll t0, 1
+ addu t1, t1, t0
+ addu t3, t1, s6
+ srl t0, t3, 2 // t2 = pixval1
+ xori s6, s6, 3
+ addu t2, t1, s6
+ srl t1, t2, 2 // t3 = pixval2
+ blez s2, 6f
+ append t1, t0, 8
+5:
+ ush t1, 0(t4)
+ addiu s2, -1
+ bgtz s2, 5b
+ addiu t4, 2
+6:
+ beqz t9, 7f
+ nop
+ sb t0, 0(t4)
+7:
+ addiu s1, 4
+ addiu a2, -1
+ bnez a2, 0b
+ addiu s0, 8
+8:
+ RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ j ra
+ nop
+END(jsimd_h2v2_downsample_mips_dspr2)
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_h2v1_upsample_mips_dspr2)
+/*
+ * a0 - cinfo->max_v_samp_factor
+ * a1 - cinfo->output_width
+ * a2 - input_data
+ * a3 - output_data_ptr
+ */
+ lw t7, 0(a3) // t7 = output_data
+ andi t8, a1, 0xf // t8 = residual
+ sll t0, a0, 2
+ blez a0, 4f
+ addu t9, t7, t0 // t9 = output_data end address
+0:
+ lw t5, 0(t7) // t5 = outptr
+ lw t6, 0(a2) // t6 = inptr
+ addu t3, t5, a1 // t3 = outptr + output_width (end address)
+ subu t3, t8 // t3 = end address - residual
+ beq t5, t3, 2f
+ move t4, t8
+1:
+ ulw t0, 0(t6) // t0 = |P3|P2|P1|P0|
+ ulw t2, 4(t6) // t2 = |P7|P6|P5|P4|
+ srl t1, t0, 16 // t1 = |X|X|P3|P2|
+ ins t0, t0, 16, 16 // t0 = |P1|P0|P1|P0|
+ ins t1, t1, 16, 16 // t1 = |P3|P2|P3|P2|
+ ins t0, t0, 8, 16 // t0 = |P1|P1|P0|P0|
+ ins t1, t1, 8, 16 // t1 = |P3|P3|P2|P2|
+ usw t0, 0(t5)
+ usw t1, 4(t5)
+ srl t0, t2, 16 // t0 = |X|X|P7|P6|
+ ins t2, t2, 16, 16 // t2 = |P5|P4|P5|P4|
+ ins t0, t0, 16, 16 // t0 = |P7|P6|P7|P6|
+ ins t2, t2, 8, 16 // t2 = |P5|P5|P4|P4|
+ ins t0, t0, 8, 16 // t0 = |P7|P7|P6|P6|
+ usw t2, 8(t5)
+ usw t0, 12(t5)
+ addiu t5, 16
+ bne t5, t3, 1b
+ addiu t6, 8
+ beqz t8, 3f
+ move t4, t8
+2:
+ lbu t1, 0(t6)
+ sb t1, 0(t5)
+ sb t1, 1(t5)
+ addiu t4, -2
+ addiu t6, 1
+ bgtz t4, 2b
+ addiu t5, 2
+3:
+ addiu t7, 4
+ bne t9, t7, 0b
+ addiu a2, 4
+4:
+ j ra
+ nop
+END(jsimd_h2v1_upsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_h2v2_upsample_mips_dspr2)
+/*
+ * a0 - cinfo->max_v_samp_factor
+ * a1 - cinfo->output_width
+ * a2 - input_data
+ * a3 - output_data_ptr
+ */
+ lw t7, 0(a3)
+ blez a0, 7f
+ andi t9, a1, 0xf // t9 = residual
+0:
+ lw t6, 0(a2) // t6 = inptr
+ lw t5, 0(t7) // t5 = outptr
+ addu t8, t5, a1 // t8 = outptr end address
+ subu t8, t9 // t8 = end address - residual
+ beq t5, t8, 2f
+ move t4, t9
+1:
+ ulw t0, 0(t6)
+ srl t1, t0, 16
+ ins t0, t0, 16, 16
+ ins t0, t0, 8, 16
+ ins t1, t1, 16, 16
+ ins t1, t1, 8, 16
+ ulw t2, 4(t6)
+ usw t0, 0(t5)
+ usw t1, 4(t5)
+ srl t3, t2, 16
+ ins t2, t2, 16, 16
+ ins t2, t2, 8, 16
+ ins t3, t3, 16, 16
+ ins t3, t3, 8, 16
+ usw t2, 8(t5)
+ usw t3, 12(t5)
+ addiu t5, 16
+ bne t5, t8, 1b
+ addiu t6, 8
+ beqz t9, 3f
+ move t4, t9
+2:
+ lbu t0, 0(t6)
+ sb t0, 0(t5)
+ sb t0, 1(t5)
+ addiu t4, -2
+ addiu t6, 1
+ bgtz t4, 2b
+ addiu t5, 2
+3:
+ ulw t6, 0(t7) // t6 = outptr
+ ulw t5, 4(t7) // t5 = outptr[1]
+ addu t4, t6, a1 // t4 = new end address
+ subu t8, t4, t9
+ beqz t8, 5f
+ nop
+4:
+ ulw t0, 0(t6)
+ ulw t1, 4(t6)
+ ulw t2, 8(t6)
+ usw t0, 0(t5)
+ ulw t0, 12(t6)
+ usw t1, 4(t5)
+ usw t2, 8(t5)
+ usw t0, 12(t5)
+ addiu t6, 16
+ bne t6, t8, 4b
+ addiu t5, 16
+ beqz t9, 6f
+ nop
+5:
+ lbu t0, 0(t6)
+ sb t0, 0(t5)
+ addiu t6, 1
+ bne t6, t4, 5b
+ addiu t5, 1
+6:
+ addiu t7, 8
+ addiu a0, -2
+ bgtz a0, 0b
+ addiu a2, 4
+7:
+ j ra
+ nop
+END(jsimd_h2v2_upsample_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_ifast_cols_mips_dspr2)
+/*
+ * a0 - inptr
+ * a1 - quantptr
+ * a2 - wsptr
+ * a3 - mips_idct_ifast_coefs
+ */
+
+ SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ addiu t9, a0, 16 // end address
+ or AT, a3, zero
+
+0:
+ lw s0, 0(a1) // quantptr[DCTSIZE*0]
+ lw t0, 0(a0) // inptr[DCTSIZE*0]
+ lw t1, 16(a0) // inptr[DCTSIZE*1]
+ muleq_s.w.phl v0, t0, s0 // tmp0 ...
+ lw t2, 32(a0) // inptr[DCTSIZE*2]
+ lw t3, 48(a0) // inptr[DCTSIZE*3]
+ lw t4, 64(a0) // inptr[DCTSIZE*4]
+ lw t5, 80(a0) // inptr[DCTSIZE*5]
+ muleq_s.w.phr t0, t0, s0 // ... tmp0 ...
+ lw t6, 96(a0) // inptr[DCTSIZE*6]
+ lw t7, 112(a0) // inptr[DCTSIZE*7]
+ or s4, t1, t2
+ or s5, t3, t4
+ bnez s4, 1f
+ ins t0, v0, 16, 16 // ... tmp0
+ bnez s5, 1f
+ or s6, t5, t6
+ or s6, s6, t7
+ bnez s6, 1f
+ sw t0, 0(a2) // wsptr[DCTSIZE*0]
+ sw t0, 16(a2) // wsptr[DCTSIZE*1]
+ sw t0, 32(a2) // wsptr[DCTSIZE*2]
+ sw t0, 48(a2) // wsptr[DCTSIZE*3]
+ sw t0, 64(a2) // wsptr[DCTSIZE*4]
+ sw t0, 80(a2) // wsptr[DCTSIZE*5]
+ sw t0, 96(a2) // wsptr[DCTSIZE*6]
+ sw t0, 112(a2) // wsptr[DCTSIZE*7]
+ addiu a0, a0, 4
+ b 2f
+ addiu a1, a1, 4
+
+1:
+ lw s1, 32(a1) // quantptr[DCTSIZE*2]
+ lw s2, 64(a1) // quantptr[DCTSIZE*4]
+ muleq_s.w.phl v0, t2, s1 // tmp1 ...
+ muleq_s.w.phr t2, t2, s1 // ... tmp1 ...
+ lw s0, 16(a1) // quantptr[DCTSIZE*1]
+ lw s1, 48(a1) // quantptr[DCTSIZE*3]
+ lw s3, 96(a1) // quantptr[DCTSIZE*6]
+ muleq_s.w.phl v1, t4, s2 // tmp2 ...
+ muleq_s.w.phr t4, t4, s2 // ... tmp2 ...
+ lw s2, 80(a1) // quantptr[DCTSIZE*5]
+ lw t8, 4(AT) // FIX(1.414213562)
+ ins t2, v0, 16, 16 // ... tmp1
+ muleq_s.w.phl v0, t6, s3 // tmp3 ...
+ muleq_s.w.phr t6, t6, s3 // ... tmp3 ...
+ ins t4, v1, 16, 16 // ... tmp2
+ addq.ph s4, t0, t4 // tmp10
+ subq.ph s5, t0, t4 // tmp11
+ ins t6, v0, 16, 16 // ... tmp3
+ subq.ph s6, t2, t6 // tmp12 ...
+ addq.ph s7, t2, t6 // tmp13
+ mulq_s.ph s6, s6, t8 // ... tmp12 ...
+ addq.ph t0, s4, s7 // tmp0
+ subq.ph t6, s4, s7 // tmp3
+ muleq_s.w.phl v0, t1, s0 // tmp4 ...
+ muleq_s.w.phr t1, t1, s0 // ... tmp4 ...
+ shll_s.ph s6, s6, 1 // x2
+ lw s3, 112(a1) // quantptr[DCTSIZE*7]
+ subq.ph s6, s6, s7 // ... tmp12
+ muleq_s.w.phl v1, t7, s3 // tmp7 ...
+ muleq_s.w.phr t7, t7, s3 // ... tmp7 ...
+ ins t1, v0, 16, 16 // ... tmp4
+ addq.ph t2, s5, s6 // tmp1
+ subq.ph t4, s5, s6 // tmp2
+ muleq_s.w.phl v0, t5, s2 // tmp6 ...
+ muleq_s.w.phr t5, t5, s2 // ... tmp6 ...
+ ins t7, v1, 16, 16 // ... tmp7
+ addq.ph s5, t1, t7 // z11
+ subq.ph s6, t1, t7 // z12
+ muleq_s.w.phl v1, t3, s1 // tmp5 ...
+ muleq_s.w.phr t3, t3, s1 // ... tmp5 ...
+ ins t5, v0, 16, 16 // ... tmp6
+ ins t3, v1, 16, 16 // ... tmp5
+ addq.ph s7, t5, t3 // z13
+ subq.ph v0, t5, t3 // z10
+ addq.ph t7, s5, s7 // tmp7
+ subq.ph s5, s5, s7 // tmp11 ...
+ addq.ph v1, v0, s6 // z5 ...
+ mulq_s.ph s5, s5, t8 // ... tmp11
+ lw t8, 8(AT) // FIX(1.847759065)
+ lw s4, 0(AT) // FIX(1.082392200)
+ addq.ph s0, t0, t7
+ subq.ph s1, t0, t7
+ mulq_s.ph v1, v1, t8 // ... z5
+ shll_s.ph s5, s5, 1 // x2
+ lw t8, 12(AT) // FIX(-2.613125930)
+ sw s0, 0(a2) // wsptr[DCTSIZE*0]
+ shll_s.ph v0, v0, 1 // x4
+ mulq_s.ph v0, v0, t8 // tmp12 ...
+ mulq_s.ph s4, s6, s4 // tmp10 ...
+ shll_s.ph v1, v1, 1 // x2
+ addiu a0, a0, 4
+ addiu a1, a1, 4
+ sw s1, 112(a2) // wsptr[DCTSIZE*7]
+ shll_s.ph s6, v0, 1 // x4
+ shll_s.ph s4, s4, 1 // x2
+ addq.ph s6, s6, v1 // ... tmp12
+ subq.ph t5, s6, t7 // tmp6
+ subq.ph s4, s4, v1 // ... tmp10
+ subq.ph t3, s5, t5 // tmp5
+ addq.ph s2, t2, t5
+ addq.ph t1, s4, t3 // tmp4
+ subq.ph s3, t2, t5
+ sw s2, 16(a2) // wsptr[DCTSIZE*1]
+ sw s3, 96(a2) // wsptr[DCTSIZE*6]
+ addq.ph v0, t4, t3
+ subq.ph v1, t4, t3
+ sw v0, 32(a2) // wsptr[DCTSIZE*2]
+ sw v1, 80(a2) // wsptr[DCTSIZE*5]
+ addq.ph v0, t6, t1
+ subq.ph v1, t6, t1
+ sw v0, 64(a2) // wsptr[DCTSIZE*4]
+ sw v1, 48(a2) // wsptr[DCTSIZE*3]
+
+2:
+ bne a0, t9, 0b
+ addiu a2, a2, 4
+
+ RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ j ra
+ nop
+
+END(jsimd_idct_ifast_cols_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_ifast_rows_mips_dspr2)
+/*
+ * a0 - wsptr
+ * a1 - output_buf
+ * a2 - output_col
+ * a3 - mips_idct_ifast_coefs
+ */
+
+ SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3
+
+ addiu t9, a0, 128 // end address
+ lui s8, 0x8080
+ ori s8, s8, 0x8080
+
+0:
+ lw AT, 36(sp) // restore $a3 (mips_idct_ifast_coefs)
+ lw t0, 0(a0) // wsptr[DCTSIZE*0+0/1] b a
+ lw s0, 16(a0) // wsptr[DCTSIZE*1+0/1] B A
+ lw t2, 4(a0) // wsptr[DCTSIZE*0+2/3] d c
+ lw s2, 20(a0) // wsptr[DCTSIZE*1+2/3] D C
+ lw t4, 8(a0) // wsptr[DCTSIZE*0+4/5] f e
+ lw s4, 24(a0) // wsptr[DCTSIZE*1+4/5] F E
+ lw t6, 12(a0) // wsptr[DCTSIZE*0+6/7] h g
+ lw s6, 28(a0) // wsptr[DCTSIZE*1+6/7] H G
+ precrq.ph.w t1, s0, t0 // B b
+ ins t0, s0, 16, 16 // A a
+ bnez t1, 1f
+ or s0, t2, s2
+ bnez s0, 1f
+ or s0, t4, s4
+ bnez s0, 1f
+ or s0, t6, s6
+ bnez s0, 1f
+ shll_s.ph s0, t0, 2 // A a
+ lw a3, 0(a1)
+ lw AT, 4(a1)
+ precrq.ph.w t0, s0, s0 // A A
+ ins s0, s0, 16, 16 // a a
+ addu a3, a3, a2
+ addu AT, AT, a2
+ precrq.qb.ph t0, t0, t0 // A A A A
+ precrq.qb.ph s0, s0, s0 // a a a a
+ addu.qb s0, s0, s8
+ addu.qb t0, t0, s8
+ sw s0, 0(a3)
+ sw s0, 4(a3)
+ sw t0, 0(AT)
+ sw t0, 4(AT)
+ addiu a0, a0, 32
+ bne a0, t9, 0b
+ addiu a1, a1, 8
+ b 2f
+ nop
+
+1:
+ precrq.ph.w t3, s2, t2
+ ins t2, s2, 16, 16
+ precrq.ph.w t5, s4, t4
+ ins t4, s4, 16, 16
+ precrq.ph.w t7, s6, t6
+ ins t6, s6, 16, 16
+ lw t8, 4(AT) // FIX(1.414213562)
+ addq.ph s4, t0, t4 // tmp10
+ subq.ph s5, t0, t4 // tmp11
+ subq.ph s6, t2, t6 // tmp12 ...
+ addq.ph s7, t2, t6 // tmp13
+ mulq_s.ph s6, s6, t8 // ... tmp12 ...
+ addq.ph t0, s4, s7 // tmp0
+ subq.ph t6, s4, s7 // tmp3
+ shll_s.ph s6, s6, 1 // x2
+ subq.ph s6, s6, s7 // ... tmp12
+ addq.ph t2, s5, s6 // tmp1
+ subq.ph t4, s5, s6 // tmp2
+ addq.ph s5, t1, t7 // z11
+ subq.ph s6, t1, t7 // z12
+ addq.ph s7, t5, t3 // z13
+ subq.ph v0, t5, t3 // z10
+ addq.ph t7, s5, s7 // tmp7
+ subq.ph s5, s5, s7 // tmp11 ...
+ addq.ph v1, v0, s6 // z5 ...
+ mulq_s.ph s5, s5, t8 // ... tmp11
+ lw t8, 8(AT) // FIX(1.847759065)
+ lw s4, 0(AT) // FIX(1.082392200)
+ addq.ph s0, t0, t7 // tmp0 + tmp7
+ subq.ph s7, t0, t7 // tmp0 - tmp7
+ mulq_s.ph v1, v1, t8 // ... z5
+ lw a3, 0(a1)
+ lw t8, 12(AT) // FIX(-2.613125930)
+ shll_s.ph s5, s5, 1 // x2
+ addu a3, a3, a2
+ shll_s.ph v0, v0, 1 // x4
+ mulq_s.ph v0, v0, t8 // tmp12 ...
+ mulq_s.ph s4, s6, s4 // tmp10 ...
+ shll_s.ph v1, v1, 1 // x2
+ addiu a0, a0, 32
+ addiu a1, a1, 8
+ shll_s.ph s6, v0, 1 // x4
+ shll_s.ph s4, s4, 1 // x2
+ addq.ph s6, s6, v1 // ... tmp12
+ shll_s.ph s0, s0, 2
+ subq.ph t5, s6, t7 // tmp6
+ subq.ph s4, s4, v1 // ... tmp10
+ subq.ph t3, s5, t5 // tmp5
+ shll_s.ph s7, s7, 2
+ addq.ph t1, s4, t3 // tmp4
+ addq.ph s1, t2, t5 // tmp1 + tmp6
+ subq.ph s6, t2, t5 // tmp1 - tmp6
+ addq.ph s2, t4, t3 // tmp2 + tmp5
+ subq.ph s5, t4, t3 // tmp2 - tmp5
+ addq.ph s4, t6, t1 // tmp3 + tmp4
+ subq.ph s3, t6, t1 // tmp3 - tmp4
+ shll_s.ph s1, s1, 2
+ shll_s.ph s2, s2, 2
+ shll_s.ph s3, s3, 2
+ shll_s.ph s4, s4, 2
+ shll_s.ph s5, s5, 2
+ shll_s.ph s6, s6, 2
+ precrq.ph.w t0, s1, s0 // B A
+ ins s0, s1, 16, 16 // b a
+ precrq.ph.w t2, s3, s2 // D C
+ ins s2, s3, 16, 16 // d c
+ precrq.ph.w t4, s5, s4 // F E
+ ins s4, s5, 16, 16 // f e
+ precrq.ph.w t6, s7, s6 // H G
+ ins s6, s7, 16, 16 // h g
+ precrq.qb.ph t0, t2, t0 // D C B A
+ precrq.qb.ph s0, s2, s0 // d c b a
+ precrq.qb.ph t4, t6, t4 // H G F E
+ precrq.qb.ph s4, s6, s4 // h g f e
+ addu.qb s0, s0, s8
+ addu.qb s4, s4, s8
+ sw s0, 0(a3) // outptr[0/1/2/3] d c b a
+ sw s4, 4(a3) // outptr[4/5/6/7] h g f e
+ lw a3, -4(a1)
+ addu.qb t0, t0, s8
+ addu a3, a3, a2
+ addu.qb t4, t4, s8
+ sw t0, 0(a3) // outptr[0/1/2/3] D C B A
+ bne a0, t9, 0b
+ sw t4, 4(a3) // outptr[4/5/6/7] H G F E
+
+2:
+
+ RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8, a3
+
+ j ra
+ nop
+
+END(jsimd_idct_ifast_rows_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_fdct_islow_mips_dspr2)
+/*
+ * a0 - data
+ */
+
+ SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+ lui t0, 6437
+ ori t0, 2260
+ lui t1, 9633
+ ori t1, 11363
+ lui t2, 0xd39e
+ ori t2, 0xe6dc
+ lui t3, 0xf72d
+ ori t3, 9633
+ lui t4, 2261
+ ori t4, 9633
+ lui t5, 0xd39e
+ ori t5, 6437
+ lui t6, 9633
+ ori t6, 0xd39d
+ lui t7, 0xe6dc
+ ori t7, 2260
+ lui t8, 4433
+ ori t8, 10703
+ lui t9, 0xd630
+ ori t9, 4433
+ li s8, 8
+ move a1, a0
+1:
+ lw s0, 0(a1) // tmp0 = 1|0
+ lw s1, 4(a1) // tmp1 = 3|2
+ lw s2, 8(a1) // tmp2 = 5|4
+ lw s3, 12(a1) // tmp3 = 7|6
+ packrl.ph s1, s1, s1 // tmp1 = 2|3
+ packrl.ph s3, s3, s3 // tmp3 = 6|7
+ subq.ph s7, s1, s2 // tmp7 = 2-5|3-4 = t5|t4
+ subq.ph s5, s0, s3 // tmp5 = 1-6|0-7 = t6|t7
+ mult $0, $0 // ac0 = 0
+ dpa.w.ph $ac0, s7, t0 // ac0 += t5* 6437 + t4* 2260
+ dpa.w.ph $ac0, s5, t1 // ac0 += t6* 9633 + t7* 11363
+ mult $ac1, $0, $0 // ac1 = 0
+ dpa.w.ph $ac1, s7, t2 // ac1 += t5*-11362 + t4* -6436
+ dpa.w.ph $ac1, s5, t3 // ac1 += t6* -2259 + t7* 9633
+ mult $ac2, $0, $0 // ac2 = 0
+ dpa.w.ph $ac2, s7, t4 // ac2 += t5* 2261 + t4* 9633
+ dpa.w.ph $ac2, s5, t5 // ac2 += t6*-11362 + t7* 6437
+ mult $ac3, $0, $0 // ac3 = 0
+ dpa.w.ph $ac3, s7, t6 // ac3 += t5* 9633 + t4*-11363
+ dpa.w.ph $ac3, s5, t7 // ac3 += t6* -6436 + t7* 2260
+ addq.ph s6, s1, s2 // tmp6 = 2+5|3+4 = t2|t3
+ addq.ph s4, s0, s3 // tmp4 = 1+6|0+7 = t1|t0
+ extr_r.w s0, $ac0, 11 // tmp0 = (ac0 + 1024) >> 11
+ extr_r.w s1, $ac1, 11 // tmp1 = (ac1 + 1024) >> 11
+ extr_r.w s2, $ac2, 11 // tmp2 = (ac2 + 1024) >> 11
+ extr_r.w s3, $ac3, 11 // tmp3 = (ac3 + 1024) >> 11
+ addq.ph s5, s4, s6 // tmp5 = t1+t2|t0+t3 = t11|t10
+ subq.ph s7, s4, s6 // tmp7 = t1-t2|t0-t3 = t12|t13
+ sh s0, 2(a1)
+ sh s1, 6(a1)
+ sh s2, 10(a1)
+ sh s3, 14(a1)
+ mult $0, $0 // ac0 = 0
+ dpa.w.ph $ac0, s7, t8 // ac0 += t12* 4433 + t13* 10703
+ mult $ac1, $0, $0 // ac1 = 0
+ dpa.w.ph $ac1, s7, t9 // ac1 += t12*-10704 + t13* 4433
+ sra s4, s5, 16 // tmp4 = t11
+ addiu a1, a1, 16
+ addiu s8, s8, -1
+ extr_r.w s0, $ac0, 11 // tmp0 = (ac0 + 1024) >> 11
+ extr_r.w s1, $ac1, 11 // tmp1 = (ac1 + 1024) >> 11
+ addu s2, s5, s4 // tmp2 = t10 + t11
+ subu s3, s5, s4 // tmp3 = t10 - t11
+ sll s2, s2, 2 // tmp2 = (t10 + t11) << 2
+ sll s3, s3, 2 // tmp3 = (t10 - t11) << 2
+ sh s2, -16(a1)
+ sh s3, -8(a1)
+ sh s0, -12(a1)
+ bgtz s8, 1b
+ sh s1, -4(a1)
+ li t0, 2260
+ li t1, 11363
+ li t2, 9633
+ li t3, 6436
+ li t4, 6437
+ li t5, 2261
+ li t6, 11362
+ li t7, 2259
+ li t8, 4433
+ li t9, 10703
+ li a1, 10704
+ li s8, 8
+
+2:
+ lh a2, 0(a0) // 0
+ lh a3, 16(a0) // 8
+ lh v0, 32(a0) // 16
+ lh v1, 48(a0) // 24
+ lh s4, 64(a0) // 32
+ lh s5, 80(a0) // 40
+ lh s6, 96(a0) // 48
+ lh s7, 112(a0) // 56
+ addu s2, v0, s5 // tmp2 = 16 + 40
+ subu s5, v0, s5 // tmp5 = 16 - 40
+ addu s3, v1, s4 // tmp3 = 24 + 32
+ subu s4, v1, s4 // tmp4 = 24 - 32
+ addu s0, a2, s7 // tmp0 = 0 + 56
+ subu s7, a2, s7 // tmp7 = 0 - 56
+ addu s1, a3, s6 // tmp1 = 8 + 48
+ subu s6, a3, s6 // tmp6 = 8 - 48
+ addu a2, s0, s3 // tmp10 = tmp0 + tmp3
+ subu v1, s0, s3 // tmp13 = tmp0 - tmp3
+ addu a3, s1, s2 // tmp11 = tmp1 + tmp2
+ subu v0, s1, s2 // tmp12 = tmp1 - tmp2
+ mult s7, t1 // ac0 = tmp7 * c1
+ madd s4, t0 // ac0 += tmp4 * c0
+ madd s5, t4 // ac0 += tmp5 * c4
+ madd s6, t2 // ac0 += tmp6 * c2
+ mult $ac1, s7, t2 // ac1 = tmp7 * c2
+ msub $ac1, s4, t3 // ac1 -= tmp4 * c3
+ msub $ac1, s5, t6 // ac1 -= tmp5 * c6
+ msub $ac1, s6, t7 // ac1 -= tmp6 * c7
+ mult $ac2, s7, t4 // ac2 = tmp7 * c4
+ madd $ac2, s4, t2 // ac2 += tmp4 * c2
+ madd $ac2, s5, t5 // ac2 += tmp5 * c5
+ msub $ac2, s6, t6 // ac2 -= tmp6 * c6
+ mult $ac3, s7, t0 // ac3 = tmp7 * c0
+ msub $ac3, s4, t1 // ac3 -= tmp4 * c1
+ madd $ac3, s5, t2 // ac3 += tmp5 * c2
+ msub $ac3, s6, t3 // ac3 -= tmp6 * c3
+ extr_r.w s0, $ac0, 15 // tmp0 = (ac0 + 16384) >> 15
+ extr_r.w s1, $ac1, 15 // tmp1 = (ac1 + 16384) >> 15
+ extr_r.w s2, $ac2, 15 // tmp2 = (ac2 + 16384) >> 15
+ extr_r.w s3, $ac3, 15 // tmp3 = (ac3 + 16384) >> 15
+ addiu s8, s8, -1
+ addu s4, a2, a3 // tmp4 = tmp10 + tmp11
+ subu s5, a2, a3 // tmp5 = tmp10 - tmp11
+ sh s0, 16(a0)
+ sh s1, 48(a0)
+ sh s2, 80(a0)
+ sh s3, 112(a0)
+ mult v0, t8 // ac0 = tmp12 * c8
+ madd v1, t9 // ac0 += tmp13 * c9
+ mult $ac1, v1, t8 // ac1 = tmp13 * c8
+ msub $ac1, v0, a1 // ac1 -= tmp12 * c10
+ addiu a0, a0, 2
+ extr_r.w s6, $ac0, 15 // tmp6 = (ac0 + 16384) >> 15
+ extr_r.w s7, $ac1, 15 // tmp7 = (ac1 + 16384) >> 15
+ shra_r.w s4, s4, 2 // tmp4 = (tmp4 + 2) >> 2
+ shra_r.w s5, s5, 2 // tmp5 = (tmp5 + 2) >> 2
+ sh s4, -2(a0)
+ sh s5, 62(a0)
+ sh s6, 30(a0)
+ bgtz s8, 2b
+ sh s7, 94(a0)
+
+ RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+ jr ra
+ nop
+
+END(jsimd_fdct_islow_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_fdct_ifast_mips_dspr2)
+/*
+ * a0 - data
+ */
+ .set at
+ SAVE_REGS_ON_STACK 8, s0, s1
+ li a1, 0x014e014e // FIX_1_306562965 (334 << 16)|(334 & 0xffff)
+ li a2, 0x008b008b // FIX_0_541196100 (139 << 16)|(139 & 0xffff)
+ li a3, 0x00620062 // FIX_0_382683433 (98 << 16) |(98 & 0xffff)
+ li s1, 0x00b500b5 // FIX_0_707106781 (181 << 16)|(181 & 0xffff)
+
+ move v0, a0
+ addiu v1, v0, 128 // end address
+
+0:
+ lw t0, 0(v0) // tmp0 = 1|0
+ lw t1, 4(v0) // tmp1 = 3|2
+ lw t2, 8(v0) // tmp2 = 5|4
+ lw t3, 12(v0) // tmp3 = 7|6
+ packrl.ph t1, t1, t1 // tmp1 = 2|3
+ packrl.ph t3, t3, t3 // tmp3 = 6|7
+ subq.ph t7, t1, t2 // tmp7 = 2-5|3-4 = t5|t4
+ subq.ph t5, t0, t3 // tmp5 = 1-6|0-7 = t6|t7
+ addq.ph t6, t1, t2 // tmp6 = 2+5|3+4 = t2|t3
+ addq.ph t4, t0, t3 // tmp4 = 1+6|0+7 = t1|t0
+ addq.ph t8, t4, t6 // tmp5 = t1+t2|t0+t3 = t11|t10
+ subq.ph t9, t4, t6 // tmp7 = t1-t2|t0-t3 = t12|t13
+ sra t4, t8, 16 // tmp4 = t11
+ mult $0, $0 // ac0 = 0
+ dpa.w.ph $ac0, t9, s1
+ mult $ac1, $0, $0 // ac1 = 0
+ dpa.w.ph $ac1, t7, a3 // ac1 += t4*98 + t5*98
+ dpsx.w.ph $ac1, t5, a3 // ac1 += t6*98 + t7*98
+ mult $ac2, $0, $0 // ac2 = 0
+ dpa.w.ph $ac2, t7, a2 // ac2 += t4*139 + t5*139
+ mult $ac3, $0, $0 // ac3 = 0
+ dpa.w.ph $ac3, t5, a1 // ac3 += t6*334 + t7*334
+ precrq.ph.w t0, t5, t7 // t0 = t5|t6
+ addq.ph t2, t8, t4 // tmp2 = t10 + t11
+ subq.ph t3, t8, t4 // tmp3 = t10 - t11
+ extr.w t4, $ac0, 8
+ mult $0, $0 // ac0 = 0
+ dpa.w.ph $ac0, t0, s1 // ac0 += t5*181 + t6*181
+ extr.w t0, $ac1, 8 // t0 = z5
+ extr.w t1, $ac2, 8 // t1 = MULTIPLY(tmp10, 139)
+ extr.w t7, $ac3, 8 // t2 = MULTIPLY(tmp12, 334)
+ extr.w t8, $ac0, 8 // t8 = z3 = MULTIPLY(tmp11, 181)
+ add t6, t1, t0 // t6 = z2
+ add t7, t7, t0 // t7 = z4
+ subq.ph t0, t5, t8 // t0 = z13 = tmp7 - z3
+ addq.ph t8, t5, t8 // t9 = z11 = tmp7 + z3
+ addq.ph t1, t0, t6 // t1 = z13 + z2
+ subq.ph t6, t0, t6 // t6 = z13 - z2
+ addq.ph t0, t8, t7 // t0 = z11 + z4
+ subq.ph t7, t8, t7 // t7 = z11 - z4
+ addq.ph t5, t4, t9
+ subq.ph t4, t9, t4
+ sh t2, 0(v0)
+ sh t5, 4(v0)
+ sh t3, 8(v0)
+ sh t4, 12(v0)
+ sh t1, 10(v0)
+ sh t6, 6(v0)
+ sh t0, 2(v0)
+ sh t7, 14(v0)
+ addiu v0, 16
+ bne v1, v0, 0b
+ nop
+ move v0, a0
+ addiu v1, v0, 16
+
+1:
+ lh t0, 0(v0) // 0
+ lh t1, 16(v0) // 8
+ lh t2, 32(v0) // 16
+ lh t3, 48(v0) // 24
+ lh t4, 64(v0) // 32
+ lh t5, 80(v0) // 40
+ lh t6, 96(v0) // 48
+ lh t7, 112(v0) // 56
+ add t8, t0, t7 // t8 = tmp0
+ sub t7, t0, t7 // t7 = tmp7
+ add t0, t1, t6 // t0 = tmp1
+ sub t1, t1, t6 // t1 = tmp6
+ add t6, t2, t5 // t6 = tmp2
+ sub t5, t2, t5 // t5 = tmp5
+ add t2, t3, t4 // t2 = tmp3
+ sub t3, t3, t4 // t3 = tmp4
+ add t4, t8, t2 // t4 = tmp10 = tmp0 + tmp3
+ sub t8, t8, t2 // t8 = tmp13 = tmp0 - tmp3
+ sub s0, t0, t6 // s0 = tmp12 = tmp1 - tmp2
+ ins t8, s0, 16, 16 // t8 = tmp12|tmp13
+ add t2, t0, t6 // t2 = tmp11 = tmp1 + tmp2
+ mult $0, $0 // ac0 = 0
+ dpa.w.ph $ac0, t8, s1 // ac0 += t12*181 + t13*181
+ add s0, t4, t2 // t8 = tmp10+tmp11
+ sub t4, t4, t2 // t4 = tmp10-tmp11
+ sh s0, 0(v0)
+ sh t4, 64(v0)
+ extr.w t2, $ac0, 8 // z1 = MULTIPLY(tmp12+tmp13,FIX_0_707106781)
+ addq.ph t4, t8, t2 // t9 = tmp13 + z1
+ subq.ph t8, t8, t2 // t2 = tmp13 - z1
+ sh t4, 32(v0)
+ sh t8, 96(v0)
+ add t3, t3, t5 // t3 = tmp10 = tmp4 + tmp5
+ add t0, t5, t1 // t0 = tmp11 = tmp5 + tmp6
+ add t1, t1, t7 // t1 = tmp12 = tmp6 + tmp7
+ andi t4, a1, 0xffff
+ mul s0, t1, t4
+ sra s0, s0, 8 // s0 = z4 = MULTIPLY(tmp12, FIX_1_306562965)
+ ins t1, t3, 16, 16 // t1 = tmp10|tmp12
+ mult $0, $0 // ac0 = 0
+ mulsa.w.ph $ac0, t1, a3 // ac0 += t10*98 - t12*98
+ extr.w t8, $ac0, 8 // z5 = MULTIPLY(tmp10-tmp12,FIX_0_382683433)
+ add t2, t7, t8 // t2 = tmp7 + z5
+ sub t7, t7, t8 // t7 = tmp7 - z5
+ andi t4, a2, 0xffff
+ mul t8, t3, t4
+ sra t8, t8, 8 // t8 = z2 = MULTIPLY(tmp10, FIX_0_541196100)
+ andi t4, s1, 0xffff
+ mul t6, t0, t4
+ sra t6, t6, 8 // t6 = z3 = MULTIPLY(tmp11, FIX_0_707106781)
+ add t0, t6, t8 // t0 = z3 + z2
+ sub t1, t6, t8 // t1 = z3 - z2
+ add t3, t6, s0 // t3 = z3 + z4
+ sub t4, t6, s0 // t4 = z3 - z4
+ sub t5, t2, t1 // t5 = dataptr[5]
+ sub t6, t7, t0 // t6 = dataptr[3]
+ add t3, t2, t3 // t3 = dataptr[1]
+ add t4, t7, t4 // t4 = dataptr[7]
+ sh t5, 80(v0)
+ sh t6, 48(v0)
+ sh t3, 16(v0)
+ sh t4, 112(v0)
+ addiu v0, 2
+ bne v0, v1, 1b
+ nop
+
+ RESTORE_REGS_FROM_STACK 8, s0, s1
+
+ j ra
+ nop
+END(jsimd_fdct_ifast_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_quantize_mips_dspr2)
+/*
+ * a0 - coef_block
+ * a1 - divisors
+ * a2 - workspace
+ */
+
+ .set at
+
+ SAVE_REGS_ON_STACK 16, s0, s1, s2
+
+ addiu v0, a2, 124 // v0 = workspace_end
+ lh t0, 0(a2)
+ lh t1, 0(a1)
+ lh t2, 128(a1)
+ sra t3, t0, 15
+ sll t3, t3, 1
+ addiu t3, t3, 1
+ mul t0, t0, t3
+ lh t4, 384(a1)
+ lh t5, 130(a1)
+ lh t6, 2(a2)
+ lh t7, 2(a1)
+ lh t8, 386(a1)
+
+1:
+ andi t1, 0xffff
+ add t9, t0, t2
+ andi t9, 0xffff
+ mul v1, t9, t1
+ sra s0, t6, 15
+ sll s0, s0, 1
+ addiu s0, s0, 1
+ addiu t9, t4, 16
+ srav v1, v1, t9
+ mul v1, v1, t3
+ mul t6, t6, s0
+ andi t7, 0xffff
+ addiu a2, a2, 4
+ addiu a1, a1, 4
+ add s1, t6, t5
+ andi s1, 0xffff
+ sh v1, 0(a0)
+
+ mul s2, s1, t7
+ addiu s1, t8, 16
+ srav s2, s2, s1
+ mul s2,s2, s0
+ lh t0, 0(a2)
+ lh t1, 0(a1)
+ sra t3, t0, 15
+ sll t3, t3, 1
+ addiu t3, t3, 1
+ mul t0, t0, t3
+ lh t2, 128(a1)
+ lh t4, 384(a1)
+ lh t5, 130(a1)
+ lh t8, 386(a1)
+ lh t6, 2(a2)
+ lh t7, 2(a1)
+ sh s2, 2(a0)
+ lh t0, 0(a2)
+ sra t3, t0, 15
+ sll t3, t3, 1
+ addiu t3, t3, 1
+ mul t0, t0,t3
+ bne a2, v0, 1b
+ addiu a0, a0, 4
+
+ andi t1, 0xffff
+ add t9, t0, t2
+ andi t9, 0xffff
+ mul v1, t9, t1
+ sra s0, t6, 15
+ sll s0, s0, 1
+ addiu s0, s0, 1
+ addiu t9, t4, 16
+ srav v1, v1, t9
+ mul v1, v1, t3
+ mul t6, t6, s0
+ andi t7, 0xffff
+ sh v1, 0(a0)
+ add s1, t6, t5
+ andi s1, 0xffff
+ mul s2, s1, t7
+ addiu s1, t8, 16
+ addiu a2, a2, 4
+ addiu a1, a1, 4
+ srav s2, s2, s1
+ mul s2, s2, s0
+ sh s2, 2(a0)
+
+ RESTORE_REGS_FROM_STACK 16, s0, s1, s2
+
+ j ra
+ nop
+
+END(jsimd_quantize_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_quantize_float_mips_dspr2)
+/*
+ * a0 - coef_block
+ * a1 - divisors
+ * a2 - workspace
+ */
+
+ .set at
+
+ li t1, 0x46800100 //integer representation 16384.5
+ mtc1 t1, f0
+ li t0, 63
+0:
+ lwc1 f1, 0(a2)
+ lwc1 f5, 0(a1)
+ lwc1 f2, 4(a2)
+ lwc1 f6, 4(a1)
+ lwc1 f3, 8(a2)
+ lwc1 f7, 8(a1)
+ lwc1 f4, 12(a2)
+ lwc1 f8, 12(a1)
+ madd.s f1, f0, f1, f5
+ madd.s f2, f0, f2, f6
+ madd.s f3, f0, f3, f7
+ madd.s f4, f0, f4, f8
+ lwc1 f5, 16(a1)
+ lwc1 f6, 20(a1)
+ trunc.w.s f1, f1
+ trunc.w.s f2, f2
+ trunc.w.s f3, f3
+ trunc.w.s f4, f4
+ lwc1 f7, 24(a1)
+ lwc1 f8, 28(a1)
+ mfc1 t1, f1
+ mfc1 t2, f2
+ mfc1 t3, f3
+ mfc1 t4, f4
+ lwc1 f1, 16(a2)
+ lwc1 f2, 20(a2)
+ lwc1 f3, 24(a2)
+ lwc1 f4, 28(a2)
+ madd.s f1, f0, f1, f5
+ madd.s f2, f0, f2, f6
+ madd.s f3, f0, f3, f7
+ madd.s f4, f0, f4, f8
+ addiu t1, t1, -16384
+ addiu t2, t2, -16384
+ addiu t3, t3, -16384
+ addiu t4, t4, -16384
+ trunc.w.s f1, f1
+ trunc.w.s f2, f2
+ trunc.w.s f3, f3
+ trunc.w.s f4, f4
+ sh t1, 0(a0)
+ sh t2, 2(a0)
+ sh t3, 4(a0)
+ sh t4, 6(a0)
+ mfc1 t1, f1
+ mfc1 t2, f2
+ mfc1 t3, f3
+ mfc1 t4, f4
+ addiu t0, t0, -8
+ addiu a2, a2, 32
+ addiu a1, a1, 32
+ addiu t1, t1, -16384
+ addiu t2, t2, -16384
+ addiu t3, t3, -16384
+ addiu t4, t4, -16384
+ sh t1, 8(a0)
+ sh t2, 10(a0)
+ sh t3, 12(a0)
+ sh t4, 14(a0)
+ bgez t0, 0b
+ addiu a0, a0, 16
+
+ j ra
+ nop
+
+END(jsimd_quantize_float_mips_dspr2)
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_2x2_mips_dspr2)
+/*
+ * a0 - compptr->dct_table
+ * a1 - coef_block
+ * a2 - output_buf
+ * a3 - output_col
+ */
+ .set at
+
+ SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, s5
+
+ addiu sp, sp, -40
+ move v0, sp
+ addiu s2, zero, 29692
+ addiu s3, zero, -10426
+ addiu s4, zero, 6967
+ addiu s5, zero, -5906
+ lh t0, 0(a1) // t0 = inptr[DCTSIZE*0]
+ lh t5, 0(a0) // t5 = quantptr[DCTSIZE*0]
+ lh t1, 48(a1) // t1 = inptr[DCTSIZE*3]
+ lh t6, 48(a0) // t6 = quantptr[DCTSIZE*3]
+ mul t4, t5, t0
+ lh t0, 16(a1) // t0 = inptr[DCTSIZE*1]
+ lh t5, 16(a0) // t5 = quantptr[DCTSIZE*1]
+ mul t6, t6, t1
+ mul t5, t5, t0
+ lh t2, 80(a1) // t2 = inptr[DCTSIZE*5]
+ lh t7, 80(a0) // t7 = quantptr[DCTSIZE*5]
+ lh t3, 112(a1) // t3 = inptr[DCTSIZE*7]
+ lh t8, 112(a0) // t8 = quantptr[DCTSIZE*7]
+ mul t7, t7, t2
+ mult zero, zero
+ mul t8, t8, t3
+ li s0, 0x73FCD746 // s0 = (29692 << 16) | (-10426 & 0xffff)
+ li s1, 0x1B37E8EE // s1 = (6967 << 16) | (-5906 & 0xffff)
+ ins t6, t5, 16, 16 // t6 = t5|t6
+ sll t4, t4, 15
+ dpa.w.ph $ac0, t6, s0
+ lh t1, 2(a1)
+ lh t6, 2(a0)
+ ins t8, t7, 16, 16 // t8 = t7|t8
+ dpa.w.ph $ac0, t8, s1
+ mflo t0, $ac0
+ mul t5, t6, t1
+ lh t1, 18(a1)
+ lh t6, 18(a0)
+ lh t2, 50(a1)
+ lh t7, 50(a0)
+ mul t6, t6, t1
+ subu t8, t4, t0
+ mul t7, t7, t2
+ addu t0, t4, t0
+ shra_r.w t0, t0, 13
+ lh t1, 82(a1)
+ lh t2, 82(a0)
+ lh t3, 114(a1)
+ lh t4, 114(a0)
+ shra_r.w t8, t8, 13
+ mul t1, t1, t2
+ mul t3, t3, t4
+ sw t0, 0(v0)
+ sw t8, 20(v0)
+ sll t4, t5, 15
+ ins t7, t6, 16, 16
+ mult zero, zero
+ dpa.w.ph $ac0, t7, s0
+ ins t3, t1, 16, 16
+ lh t1, 6(a1)
+ lh t6, 6(a0)
+ dpa.w.ph $ac0, t3, s1
+ mflo t0, $ac0
+ mul t5, t6, t1
+ lh t1, 22(a1)
+ lh t6, 22(a0)
+ lh t2, 54(a1)
+ lh t7, 54(a0)
+ mul t6, t6, t1
+ subu t8, t4, t0
+ mul t7, t7, t2
+ addu t0, t4, t0
+ shra_r.w t0, t0, 13
+ lh t1, 86(a1)
+ lh t2, 86(a0)
+ lh t3, 118(a1)
+ lh t4, 118(a0)
+ shra_r.w t8, t8, 13
+ mul t1, t1, t2
+ mul t3, t3, t4
+ sw t0, 4(v0)
+ sw t8, 24(v0)
+ sll t4, t5, 15
+ ins t7, t6, 16, 16
+ mult zero, zero
+ dpa.w.ph $ac0, t7, s0
+ ins t3, t1, 16, 16
+ lh t1, 10(a1)
+ lh t6, 10(a0)
+ dpa.w.ph $ac0, t3, s1
+ mflo t0, $ac0
+ mul t5, t6, t1
+ lh t1, 26(a1)
+ lh t6, 26(a0)
+ lh t2, 58(a1)
+ lh t7, 58(a0)
+ mul t6, t6, t1
+ subu t8, t4, t0
+ mul t7, t7, t2
+ addu t0, t4, t0
+ shra_r.w t0, t0, 13
+ lh t1, 90(a1)
+ lh t2, 90(a0)
+ lh t3, 122(a1)
+ lh t4, 122(a0)
+ shra_r.w t8, t8, 13
+ mul t1, t1, t2
+ mul t3, t3, t4
+ sw t0, 8(v0)
+ sw t8, 28(v0)
+ sll t4, t5, 15
+ ins t7, t6, 16, 16
+ mult zero, zero
+ dpa.w.ph $ac0, t7, s0
+ ins t3, t1, 16, 16
+ lh t1, 14(a1)
+ lh t6, 14(a0)
+ dpa.w.ph $ac0, t3, s1
+ mflo t0, $ac0
+ mul t5, t6, t1
+ lh t1, 30(a1)
+ lh t6, 30(a0)
+ lh t2, 62(a1)
+ lh t7, 62(a0)
+ mul t6, t6, t1
+ subu t8, t4, t0
+ mul t7, t7, t2
+ addu t0, t4, t0
+ shra_r.w t0, t0, 13
+ lh t1, 94(a1)
+ lh t2, 94(a0)
+ lh t3, 126(a1)
+ lh t4, 126(a0)
+ shra_r.w t8, t8, 13
+ mul t1, t1, t2
+ mul t3, t3, t4
+ sw t0, 12(v0)
+ sw t8, 32(v0)
+ sll t4, t5, 15
+ ins t7, t6, 16, 16
+ mult zero, zero
+ dpa.w.ph $ac0, t7, s0
+ ins t3, t1, 16, 16
+ dpa.w.ph $ac0, t3, s1
+ mflo t0, $ac0
+ lw t9, 0(a2)
+ lw t3, 0(v0)
+ lw t7, 4(v0)
+ lw t1, 8(v0)
+ addu t9, t9, a3
+ sll t3, t3, 15
+ subu t8, t4, t0
+ addu t0, t4, t0
+ shra_r.w t0, t0, 13
+ shra_r.w t8, t8, 13
+ sw t0, 16(v0)
+ sw t8, 36(v0)
+ lw t5, 12(v0)
+ lw t6, 16(v0)
+ mult t7, s2
+ madd t1, s3
+ madd t5, s4
+ madd t6, s5
+ lw t5, 24(v0)
+ lw t7, 28(v0)
+ mflo t0, $ac0
+ lw t8, 32(v0)
+ lw t2, 36(v0)
+ mult $ac1, t5, s2
+ madd $ac1, t7, s3
+ madd $ac1, t8, s4
+ madd $ac1, t2, s5
+ addu t1, t3, t0
+ subu t6, t3, t0
+ shra_r.w t1, t1, 20
+ shra_r.w t6, t6, 20
+ mflo t4, $ac1
+ shll_s.w t1, t1, 24
+ shll_s.w t6, t6, 24
+ sra t1, t1, 24
+ sra t6, t6, 24
+ addiu t1, t1, 128
+ addiu t6, t6, 128
+ lw t0, 20(v0)
+ sb t1, 0(t9)
+ sb t6, 1(t9)
+ sll t0, t0, 15
+ lw t9, 4(a2)
+ addu t1, t0, t4
+ subu t6, t0, t4
+ addu t9, t9, a3
+ shra_r.w t1, t1, 20
+ shra_r.w t6, t6, 20
+ shll_s.w t1, t1, 24
+ shll_s.w t6, t6, 24
+ sra t1, t1, 24
+ sra t6, t6, 24
+ addiu t1, t1, 128
+ addiu t6, t6, 128
+ sb t1, 0(t9)
+ sb t6, 1(t9)
+ addiu sp, sp, 40
+
+ RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, s5
+
+ j ra
+ nop
+
+END(jsimd_idct_2x2_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_4x4_mips_dspr2)
+/*
+ * a0 - compptr->dct_table
+ * a1 - coef_block
+ * a2 - output_buf
+ * a3 - output_col
+ * 16(sp) - workspace[DCTSIZE*4]; // buffers data between passes
+ */
+
+ .set at
+ SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ lw v1, 48(sp)
+ move t0, a1
+ move t1, v1
+ li t9, 4
+ li s0, 0x2e75f93e
+ li s1, 0x21f9ba79
+ li s2, 0xecc2efb0
+ li s3, 0x52031ccd
+
+0:
+ lh s6, 32(t0) // inptr[DCTSIZE*2]
+ lh t6, 32(a0) // quantptr[DCTSIZE*2]
+ lh s7, 96(t0) // inptr[DCTSIZE*6]
+ lh t7, 96(a0) // quantptr[DCTSIZE*6]
+ mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2])
+ lh s4, 0(t0) // inptr[DCTSIZE*0]
+ mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6])
+ lh s5, 0(a0) // quantptr[0]
+ li s6, 15137
+ li s7, 6270
+ mul t2, s4, s5 // tmp0 = (inptr[0] * quantptr[0])
+ mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2])
+ lh t5, 112(t0) // inptr[DCTSIZE*7]
+ mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6])
+ lh s4, 112(a0) // quantptr[DCTSIZE*7]
+ lh v0, 80(t0) // inptr[DCTSIZE*5]
+ lh s5, 80(a0) // quantptr[DCTSIZE*5]
+ lh s6, 48(a0) // quantptr[DCTSIZE*3]
+ sll t2, t2, 14 // tmp0 <<= (CONST_BITS+1)
+ lh s7, 16(a0) // quantptr[DCTSIZE*1]
+ lh t8, 16(t0) // inptr[DCTSIZE*1]
+ subu t6, t6, t7 // tmp2 = MULTIPLY(z2, t5) - MULTIPLY(z3, t6)
+ lh t7, 48(t0) // inptr[DCTSIZE*3]
+ mul t5, s4, t5 // z1 = (inptr[DCTSIZE*7] * quantptr[DCTSIZE*7])
+ mul v0, s5, v0 // z2 = (inptr[DCTSIZE*5] * quantptr[DCTSIZE*5])
+ mul t7, s6, t7 // z3 = (inptr[DCTSIZE*3] * quantptr[DCTSIZE*3])
+ mul t8, s7, t8 // z4 = (inptr[DCTSIZE*1] * quantptr[DCTSIZE*1])
+ addu t3, t2, t6 // tmp10 = tmp0 + z2
+ subu t4, t2, t6 // tmp10 = tmp0 - z2
+ mult $ac0, zero, zero
+ mult $ac1, zero, zero
+ ins t5, v0, 16, 16
+ ins t7, t8, 16, 16
+ addiu t9, t9, -1
+ dpa.w.ph $ac0, t5, s0
+ dpa.w.ph $ac0, t7, s1
+ dpa.w.ph $ac1, t5, s2
+ dpa.w.ph $ac1, t7, s3
+ mflo s4, $ac0
+ mflo s5, $ac1
+ addiu a0, a0, 2
+ addiu t1, t1, 4
+ addiu t0, t0, 2
+ addu t6, t4, s4
+ subu t5, t4, s4
+ addu s6, t3, s5
+ subu s7, t3, s5
+ shra_r.w t6, t6, 12 // DESCALE(tmp12 + temp1, 12)
+ shra_r.w t5, t5, 12 // DESCALE(tmp12 - temp1, 12)
+ shra_r.w s6, s6, 12 // DESCALE(tmp10 + temp2, 12)
+ shra_r.w s7, s7, 12 // DESCALE(tmp10 - temp2, 12)
+ sw t6, 28(t1)
+ sw t5, 60(t1)
+ sw s6, -4(t1)
+ bgtz t9, 0b
+ sw s7, 92(t1)
+ // second loop three pass
+ li t9, 3
+1:
+ lh s6, 34(t0) // inptr[DCTSIZE*2]
+ lh t6, 34(a0) // quantptr[DCTSIZE*2]
+ lh s7, 98(t0) // inptr[DCTSIZE*6]
+ lh t7, 98(a0) // quantptr[DCTSIZE*6]
+ mul t6, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2])
+ lh s4, 2(t0) // inptr[DCTSIZE*0]
+ mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6])
+ lh s5, 2(a0) // quantptr[DCTSIZE*0]
+ li s6, 15137
+ li s7, 6270
+ mul t2, s4, s5 // tmp0 = (inptr[0] * quantptr[0])
+ mul v0, s6, t6 // z2 = (inptr[DCTSIZE*2] * quantptr[DCTSIZE*2])
+ lh t5, 114(t0) // inptr[DCTSIZE*7]
+ mul t7, s7, t7 // z3 = (inptr[DCTSIZE*6] * quantptr[DCTSIZE*6])
+ lh s4, 114(a0) // quantptr[DCTSIZE*7]
+ lh s5, 82(a0) // quantptr[DCTSIZE*5]
+ lh t6, 82(t0) // inptr[DCTSIZE*5]
+ sll t2, t2, 14 // tmp0 <<= (CONST_BITS+1)
+ lh s6, 50(a0) // quantptr[DCTSIZE*3]
+ lh t8, 18(t0) // inptr[DCTSIZE*1]
+ subu v0, v0, t7 // tmp2 = MULTIPLY(z2, t5) - MULTIPLY(z3, t6)
+ lh t7, 50(t0) // inptr[DCTSIZE*3]
+ lh s7, 18(a0) // quantptr[DCTSIZE*1]
+ mul t5, s4, t5 // z1 = (inptr[DCTSIZE*7] * quantptr[DCTSIZE*7])
+ mul t6, s5, t6 // z2 = (inptr[DCTSIZE*5] * quantptr[DCTSIZE*5])
+ mul t7, s6, t7 // z3 = (inptr[DCTSIZE*3] * quantptr[DCTSIZE*3])
+ mul t8, s7, t8 // z4 = (inptr[DCTSIZE*1] * quantptr[DCTSIZE*1])
+ addu t3, t2, v0 // tmp10 = tmp0 + z2
+ subu t4, t2, v0 // tmp10 = tmp0 - z2
+ mult $ac0, zero, zero
+ mult $ac1, zero, zero
+ ins t5, t6, 16, 16
+ ins t7, t8, 16, 16
+ dpa.w.ph $ac0, t5, s0
+ dpa.w.ph $ac0, t7, s1
+ dpa.w.ph $ac1, t5, s2
+ dpa.w.ph $ac1, t7, s3
+ mflo t5, $ac0
+ mflo t6, $ac1
+ addiu t9, t9, -1
+ addiu t0, t0, 2
+ addiu a0, a0, 2
+ addiu t1, t1, 4
+ addu s5, t4, t5
+ subu s4, t4, t5
+ addu s6, t3, t6
+ subu s7, t3, t6
+ shra_r.w s5, s5, 12 // DESCALE(tmp12 + temp1, 12)
+ shra_r.w s4, s4, 12 // DESCALE(tmp12 - temp1, 12)
+ shra_r.w s6, s6, 12 // DESCALE(tmp10 + temp2, 12)
+ shra_r.w s7, s7, 12 // DESCALE(tmp10 - temp2, 12)
+ sw s5, 32(t1)
+ sw s4, 64(t1)
+ sw s6, 0(t1)
+ bgtz t9, 1b
+ sw s7, 96(t1)
+ move t1, v1
+ li s4, 15137
+ lw s6, 8(t1) // wsptr[2]
+ li s5, 6270
+ lw s7, 24(t1) // wsptr[6]
+ mul s4, s4, s6 // MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+ lw t2, 0(t1) // wsptr[0]
+ mul s5, s5, s7 // MULTIPLY((INT32) wsptr[6], - FIX_0_765366865)
+ lh t5, 28(t1) // wsptr[7]
+ lh t6, 20(t1) // wsptr[5]
+ lh t7, 12(t1) // wsptr[3]
+ lh t8, 4(t1) // wsptr[1]
+ ins t5, t6, 16, 16
+ ins t7, t8, 16, 16
+ mult $ac0, zero, zero
+ dpa.w.ph $ac0, t5, s0
+ dpa.w.ph $ac0, t7, s1
+ mult $ac1, zero, zero
+ dpa.w.ph $ac1, t5, s2
+ dpa.w.ph $ac1, t7, s3
+ sll t2, t2, 14 // tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1)
+ mflo s6, $ac0
+ // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865)
+ subu s4, s4, s5
+ addu t3, t2, s4 // tmp10 = tmp0 + z2
+ mflo s7, $ac1
+ subu t4, t2, s4 // tmp10 = tmp0 - z2
+ addu t7, t4, s6
+ subu t8, t4, s6
+ addu t5, t3, s7
+ subu t6, t3, s7
+ shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19)
+ shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19)
+ shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19)
+ shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19)
+ sll s4, t9, 2
+ lw v0, 0(a2) // output_buf[ctr]
+ shll_s.w t5, t5, 24
+ shll_s.w t6, t6, 24
+ shll_s.w t7, t7, 24
+ shll_s.w t8, t8, 24
+ sra t5, t5, 24
+ sra t6, t6, 24
+ sra t7, t7, 24
+ sra t8, t8, 24
+ addu v0, v0, a3 // outptr = output_buf[ctr] + output_col
+ addiu t5, t5, 128
+ addiu t6, t6, 128
+ addiu t7, t7, 128
+ addiu t8, t8, 128
+ sb t5, 0(v0)
+ sb t7, 1(v0)
+ sb t8, 2(v0)
+ sb t6, 3(v0)
+ // 2
+ li s4, 15137
+ lw s6, 40(t1) // wsptr[2]
+ li s5, 6270
+ lw s7, 56(t1) // wsptr[6]
+ mul s4, s4, s6 // MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+ lw t2, 32(t1) // wsptr[0]
+ mul s5, s5, s7 // MULTIPLY((INT32) wsptr[6], - FIX_0_765366865)
+ lh t5, 60(t1) // wsptr[7]
+ lh t6, 52(t1) // wsptr[5]
+ lh t7, 44(t1) // wsptr[3]
+ lh t8, 36(t1) // wsptr[1]
+ ins t5, t6, 16, 16
+ ins t7, t8, 16, 16
+ mult $ac0, zero, zero
+ dpa.w.ph $ac0, t5, s0
+ dpa.w.ph $ac0, t7, s1
+ mult $ac1, zero, zero
+ dpa.w.ph $ac1, t5, s2
+ dpa.w.ph $ac1, t7, s3
+ sll t2, t2, 14 // tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1)
+ mflo s6, $ac0
+ // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865)
+ subu s4, s4, s5
+ addu t3, t2, s4 // tmp10 = tmp0 + z2
+ mflo s7, $ac1
+ subu t4, t2, s4 // tmp10 = tmp0 - z2
+ addu t7, t4, s6
+ subu t8, t4, s6
+ addu t5, t3, s7
+ subu t6, t3, s7
+ shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, CONST_BITS-PASS1_BITS+1)
+ shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, CONST_BITS-PASS1_BITS+1)
+ shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, CONST_BITS-PASS1_BITS+1)
+ shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, CONST_BITS-PASS1_BITS+1)
+ sll s4, t9, 2
+ lw v0, 4(a2) // output_buf[ctr]
+ shll_s.w t5, t5, 24
+ shll_s.w t6, t6, 24
+ shll_s.w t7, t7, 24
+ shll_s.w t8, t8, 24
+ sra t5, t5, 24
+ sra t6, t6, 24
+ sra t7, t7, 24
+ sra t8, t8, 24
+ addu v0, v0, a3 // outptr = output_buf[ctr] + output_col
+ addiu t5, t5, 128
+ addiu t6, t6, 128
+ addiu t7, t7, 128
+ addiu t8, t8, 128
+ sb t5, 0(v0)
+ sb t7, 1(v0)
+ sb t8, 2(v0)
+ sb t6, 3(v0)
+ // 3
+ li s4, 15137
+ lw s6, 72(t1) // wsptr[2]
+ li s5, 6270
+ lw s7, 88(t1) // wsptr[6]
+ mul s4, s4, s6 // MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+ lw t2, 64(t1) // wsptr[0]
+ mul s5, s5, s7 // MULTIPLY((INT32) wsptr[6], - FIX_0_765366865)
+ lh t5, 92(t1) // wsptr[7]
+ lh t6, 84(t1) // wsptr[5]
+ lh t7, 76(t1) // wsptr[3]
+ lh t8, 68(t1) // wsptr[1]
+ ins t5, t6, 16, 16
+ ins t7, t8, 16, 16
+ mult $ac0, zero, zero
+ dpa.w.ph $ac0, t5, s0
+ dpa.w.ph $ac0, t7, s1
+ mult $ac1, zero, zero
+ dpa.w.ph $ac1, t5, s2
+ dpa.w.ph $ac1, t7, s3
+ sll t2, t2, 14 // tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1)
+ mflo s6, $ac0
+ // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865)
+ subu s4, s4, s5
+ addu t3, t2, s4 // tmp10 = tmp0 + z2
+ mflo s7, $ac1
+ subu t4, t2, s4 // tmp10 = tmp0 - z2
+ addu t7, t4, s6
+ subu t8, t4, s6
+ addu t5, t3, s7
+ subu t6, t3, s7
+ shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19)
+ shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19)
+ shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19)
+ shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19)
+ sll s4, t9, 2
+ lw v0, 8(a2) // output_buf[ctr]
+ shll_s.w t5, t5, 24
+ shll_s.w t6, t6, 24
+ shll_s.w t7, t7, 24
+ shll_s.w t8, t8, 24
+ sra t5, t5, 24
+ sra t6, t6, 24
+ sra t7, t7, 24
+ sra t8, t8, 24
+ addu v0, v0, a3 // outptr = output_buf[ctr] + output_col
+ addiu t5, t5, 128
+ addiu t6, t6, 128
+ addiu t7, t7, 128
+ addiu t8, t8, 128
+ sb t5, 0(v0)
+ sb t7, 1(v0)
+ sb t8, 2(v0)
+ sb t6, 3(v0)
+ li s4, 15137
+ lw s6, 104(t1) // wsptr[2]
+ li s5, 6270
+ lw s7, 120(t1) // wsptr[6]
+ mul s4, s4, s6 // MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+ lw t2, 96(t1) // wsptr[0]
+ mul s5, s5, s7 // MULTIPLY((INT32) wsptr[6], -FIX_0_765366865)
+ lh t5, 124(t1) // wsptr[7]
+ lh t6, 116(t1) // wsptr[5]
+ lh t7, 108(t1) // wsptr[3]
+ lh t8, 100(t1) // wsptr[1]
+ ins t5, t6, 16, 16
+ ins t7, t8, 16, 16
+ mult $ac0, zero, zero
+ dpa.w.ph $ac0, t5, s0
+ dpa.w.ph $ac0, t7, s1
+ mult $ac1, zero, zero
+ dpa.w.ph $ac1, t5, s2
+ dpa.w.ph $ac1, t7, s3
+ sll t2, t2, 14 // tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1)
+ mflo s6, $ac0
+ // MULTIPLY(wsptr[2], FIX_1_847759065 + MULTIPLY(wsptr[6], -FIX_0_765366865)
+ subu s4, s4, s5
+ addu t3, t2, s4 // tmp10 = tmp0 + z2;
+ mflo s7, $ac1
+ subu t4, t2, s4 // tmp10 = tmp0 - z2;
+ addu t7, t4, s6
+ subu t8, t4, s6
+ addu t5, t3, s7
+ subu t6, t3, s7
+ shra_r.w t5, t5, 19 // DESCALE(tmp10 + temp2, 19)
+ shra_r.w t6, t6, 19 // DESCALE(tmp10 - temp2, 19)
+ shra_r.w t7, t7, 19 // DESCALE(tmp12 + temp1, 19)
+ shra_r.w t8, t8, 19 // DESCALE(tmp12 - temp1, 19)
+ sll s4, t9, 2
+ lw v0, 12(a2) // output_buf[ctr]
+ shll_s.w t5, t5, 24
+ shll_s.w t6, t6, 24
+ shll_s.w t7, t7, 24
+ shll_s.w t8, t8, 24
+ sra t5, t5, 24
+ sra t6, t6, 24
+ sra t7, t7, 24
+ sra t8, t8, 24
+ addu v0, v0, a3 // outptr = output_buf[ctr] + output_col
+ addiu t5, t5, 128
+ addiu t6, t6, 128
+ addiu t7, t7, 128
+ addiu t8, t8, 128
+ sb t5, 0(v0)
+ sb t7, 1(v0)
+ sb t8, 2(v0)
+ sb t6, 3(v0)
+
+ RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ j ra
+ nop
+END(jsimd_idct_4x4_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_6x6_mips_dspr2)
+/*
+ * a0 - compptr->dct_table
+ * a1 - coef_block
+ * a2 - output_buf
+ * a3 - output_col
+ */
+ .set at
+
+ SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ addiu sp, sp, -144
+ move v0, sp
+ addiu v1, v0, 24
+ addiu t9, zero, 5793
+ addiu s0, zero, 10033
+ addiu s1, zero, 2998
+
+1:
+ lh s2, 0(a0) // q0 = quantptr[ 0]
+ lh s3, 32(a0) // q1 = quantptr[16]
+ lh s4, 64(a0) // q2 = quantptr[32]
+ lh t2, 64(a1) // tmp2 = inptr[32]
+ lh t1, 32(a1) // tmp1 = inptr[16]
+ lh t0, 0(a1) // tmp0 = inptr[ 0]
+ mul t2, t2, s4 // tmp2 = tmp2 * q2
+ mul t1, t1, s3 // tmp1 = tmp1 * q1
+ mul t0, t0, s2 // tmp0 = tmp0 * q0
+ lh t6, 16(a1) // z1 = inptr[ 8]
+ lh t8, 80(a1) // z3 = inptr[40]
+ lh t7, 48(a1) // z2 = inptr[24]
+ lh s2, 16(a0) // q0 = quantptr[ 8]
+ lh s4, 80(a0) // q2 = quantptr[40]
+ lh s3, 48(a0) // q1 = quantptr[24]
+ mul t2, t2, t9 // tmp2 = tmp2 * 5793
+ mul t1, t1, s0 // tmp1 = tmp1 * 10033
+ sll t0, t0, 13 // tmp0 = tmp0 << 13
+ mul t6, t6, s2 // z1 = z1 * q0
+ mul t8, t8, s4 // z3 = z3 * q2
+ mul t7, t7, s3 // z2 = z2 * q1
+ addu t3, t0, t2 // tmp10 = tmp0 + tmp2
+ sll t2, t2, 1 // tmp2 = tmp2 << 2
+ subu t4, t0, t2 // tmp11 = tmp0 - tmp2;
+ subu t5, t3, t1 // tmp12 = tmp10 - tmp1
+ addu t3, t3, t1 // tmp10 = tmp10 + tmp1
+ addu t1, t6, t8 // tmp1 = z1 + z3
+ mul t1, t1, s1 // tmp1 = tmp1 * 2998
+ shra_r.w t4, t4, 11 // tmp11 = (tmp11 + 1024) >> 11
+ subu t2, t6, t8 // tmp2 = z1 - z3
+ subu t2, t2, t7 // tmp2 = tmp2 - z2
+ sll t2, t2, 2 // tmp2 = tmp2 << 2
+ addu t0, t6, t7 // tmp0 = z1 + z2
+ sll t0, t0, 13 // tmp0 = tmp0 << 13
+ subu s2, t8, t7 // q0 = z3 - z2
+ sll s2, s2, 13 // q0 = q0 << 13
+ addu t0, t0, t1 // tmp0 = tmp0 + tmp1
+ addu t1, s2, t1 // tmp1 = q0 + tmp1
+ addu s2, t4, t2 // q0 = tmp11 + tmp2
+ subu s3, t4, t2 // q1 = tmp11 - tmp2
+ addu t6, t3, t0 // z1 = tmp10 + tmp0
+ subu t7, t3, t0 // z2 = tmp10 - tmp0
+ addu t4, t5, t1 // tmp11 = tmp12 + tmp1
+ subu t5, t5, t1 // tmp12 = tmp12 - tmp1
+ shra_r.w t6, t6, 11 // z1 = (z1 + 1024) >> 11
+ shra_r.w t7, t7, 11 // z2 = (z2 + 1024) >> 11
+ shra_r.w t4, t4, 11 // tmp11 = (tmp11 + 1024) >> 11
+ shra_r.w t5, t5, 11 // tmp12 = (tmp12 + 1024) >> 11
+ sw s2, 24(v0)
+ sw s3, 96(v0)
+ sw t6, 0(v0)
+ sw t7, 120(v0)
+ sw t4, 48(v0)
+ sw t5, 72(v0)
+ addiu v0, v0, 4
+ addiu a1, a1, 2
+ bne v0, v1, 1b
+ addiu a0, a0, 2
+
+ /* Pass 2: process 6 rows from work array, store into output array. */
+ move v0, sp
+ addiu v1, v0, 144
+
+2:
+ lw t0, 0(v0)
+ lw t2, 16(v0)
+ lw s5, 0(a2)
+ addiu t0, t0, 16
+ sll t0, t0, 13
+ mul t3, t2, t9
+ lw t6, 4(v0)
+ lw t8, 20(v0)
+ lw t7, 12(v0)
+ addu s5, s5, a3
+ addu s6, t6, t8
+ mul s6, s6, s1
+ addu t1, t0, t3
+ subu t4, t0, t3
+ subu t4, t4, t3
+ lw t3, 8(v0)
+ mul t0, t3, s0
+ addu s7, t6, t7
+ sll s7, s7, 13
+ addu s7, s6, s7
+ subu t2, t8, t7
+ sll t2, t2, 13
+ addu t2, s6, t2
+ subu s6, t6, t7
+ subu s6, s6, t8
+ sll s6, s6, 13
+ addu t3, t1, t0
+ subu t5, t1, t0
+ addu t6, t3, s7
+ subu t3, t3, s7
+ addu t7, t4, s6
+ subu t4, t4, s6
+ addu t8, t5, t2
+ subu t5, t5, t2
+ shll_s.w t6, t6, 6
+ shll_s.w t3, t3, 6
+ shll_s.w t7, t7, 6
+ shll_s.w t4, t4, 6
+ shll_s.w t8, t8, 6
+ shll_s.w t5, t5, 6
+ sra t6, t6, 24
+ addiu t6, t6, 128
+ sra t3, t3, 24
+ addiu t3, t3, 128
+ sb t6, 0(s5)
+ sra t7, t7, 24
+ addiu t7, t7, 128
+ sb t3, 5(s5)
+ sra t4, t4, 24
+ addiu t4, t4, 128
+ sb t7, 1(s5)
+ sra t8, t8, 24
+ addiu t8, t8, 128
+ sb t4, 4(s5)
+ addiu v0, v0, 24
+ sra t5, t5, 24
+ addiu t5, t5, 128
+ sb t8, 2(s5)
+ addiu a2, a2, 4
+ bne v0, v1, 2b
+ sb t5, 3(s5)
+
+ addiu sp, sp, 144
+
+ RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
+
+ j ra
+ nop
+
+END(jsimd_idct_6x6_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_12x12_pass1_mips_dspr2)
+/*
+ * a0 - compptr->dct_table
+ * a1 - coef_block
+ * a2 - workspace
+ */
+
+ SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
+
+ li a3, 8
+
+1:
+ // odd part
+ lh t0, 48(a1)
+ lh t1, 48(a0)
+ lh t2, 16(a1)
+ lh t3, 16(a0)
+ lh t4, 80(a1)
+ lh t5, 80(a0)
+ lh t6, 112(a1)
+ lh t7, 112(a0)
+ mul t0, t0, t1 // z2
+ mul t1, t2, t3 // z1
+ mul t2, t4, t5 // z3
+ mul t3, t6, t7 // z4
+ li t4, 10703 // FIX(1.306562965)
+ li t5, 4433 // FIX_0_541196100
+ li t6, 7053 // FIX(0.860918669)
+ mul t4, t0,t4 // tmp11
+ mul t5, t0,t5 // -tmp14
+ addu t7, t1,t2 // tmp10
+ addu t8, t7,t3 // tmp10 + z4
+ mul t6, t6, t8 // tmp15
+ li t8, 2139 // FIX(0.261052384)
+ mul t8, t7, t8 // MULTIPLY(tmp10, FIX(0.261052384))
+ li t7, 2295 // FIX(0.280143716)
+ mul t7, t1, t7 // MULTIPLY(z1, FIX(0.280143716))
+ addu t9, t2, t3 // z3 + z4
+ li s0, 8565 // FIX(1.045510580)
+ mul t9, t9, s0 // -tmp13
+ li s0, 12112 // FIX(1.478575242)
+ mul s0, t2, s0 // MULTIPLY(z3, FIX(1.478575242)
+ li s1, 12998 // FIX(1.586706681)
+ mul s1, t3, s1 // MULTIPLY(z4, FIX(1.586706681))
+ li s2, 5540 // FIX(0.676326758)
+ mul s2, t1, s2 // MULTIPLY(z1, FIX(0.676326758))
+ li s3, 16244 // FIX(1.982889723)
+ mul s3, t3, s3 // MULTIPLY(z4, FIX(1.982889723))
+ subu t1, t1, t3 // z1-=z4
+ subu t0, t0, t2 // z2-=z3
+ addu t2, t0, t1 // z1+z2
+ li t3, 4433 // FIX_0_541196100
+ mul t2, t2, t3 // z3
+ li t3, 6270 // FIX_0_765366865
+ mul t1, t1, t3 // MULTIPLY(z1, FIX_0_765366865)
+ li t3, 15137 // FIX_0_765366865
+ mul t0, t0, t3 // MULTIPLY(z2, FIX_1_847759065)
+ addu t8, t6, t8 // tmp12
+ addu t3, t8, t4 // tmp12 + tmp11
+ addu t3, t3, t7 // tmp10
+ subu t8, t8, t9 // tmp12 + tmp13
+ addu s0, t5, s0
+ subu t8, t8, s0 // tmp12
+ subu t9, t6, t9
+ subu s1, s1, t4
+ addu t9, t9, s1 // tmp13
+ subu t6, t6, t5
+ subu t6, t6, s2
+ subu t6, t6, s3 // tmp15
+ // even part start
+ lh t4, 64(a1)
+ lh t5, 64(a0)
+ lh t7, 32(a1)
+ lh s0, 32(a0)
+ lh s1, 0(a1)
+ lh s2, 0(a0)
+ lh s3, 96(a1)
+ lh v0, 96(a0)
+ mul t4, t4, t5 // DEQUANTIZE(inptr[DCTSIZE*4],quantptr[DCTSIZE*4])
+ mul t5, t7, s0 // DEQUANTIZE(inptr[DCTSIZE*2],quantptr[DCTSIZE*2])
+ mul t7, s1, s2 // DEQUANTIZE(inptr[DCTSIZE*0],quantptr[DCTSIZE*0])
+ mul s0, s3, v0 // DEQUANTIZE(inptr[DCTSIZE*6],quantptr[DCTSIZE*6])
+ // odd part end
+ addu t1, t2, t1 // tmp11
+ subu t0, t2, t0 // tmp14
+ // update counter and pointers
+ addiu a3, a3, -1
+ addiu a0, a0, 2
+ addiu a1, a1, 2
+ // even part rest
+ li s1, 10033
+ li s2, 11190
+ mul t4, t4, s1 // z4
+ mul s1, t5, s2 // z4
+ sll t5, t5, 13 // z1
+ sll t7, t7, 13
+ addiu t7, t7, 1024 // z3
+ sll s0, s0, 13 // z2
+ addu s2, t7, t4 // tmp10
+ subu t4, t7, t4 // tmp11
+ subu s3, t5, s0 // tmp12
+ addu t2, t7, s3 // tmp21
+ subu s3, t7, s3 // tmp24
+ addu t7, s1, s0 // tmp12
+ addu v0, s2, t7 // tmp20
+ subu s2, s2, t7 // tmp25
+ subu s1, s1, t5 // z4 - z1
+ subu s1, s1, s0 // tmp12
+ addu s0, t4, s1 // tmp22
+ subu t4, t4, s1 // tmp23
+ // final output stage
+ addu t5, v0, t3
+ subu v0, v0, t3
+ addu t3, t2, t1
+ subu t2, t2, t1
+ addu t1, s0, t8
+ subu s0, s0, t8
+ addu t8, t4, t9
+ subu t4, t4, t9
+ addu t9, s3, t0
+ subu s3, s3, t0
+ addu t0, s2, t6
+ subu s2, s2, t6
+ sra t5, t5, 11
+ sra t3, t3, 11
+ sra t1, t1, 11
+ sra t8, t8, 11
+ sra t9, t9, 11
+ sra t0, t0, 11
+ sra s2, s2, 11
+ sra s3, s3, 11
+ sra t4, t4, 11
+ sra s0, s0, 11
+ sra t2, t2, 11
+ sra v0, v0, 11
+ sw t5, 0(a2)
+ sw t3, 32(a2)
+ sw t1, 64(a2)
+ sw t8, 96(a2)
+ sw t9, 128(a2)
+ sw t0, 160(a2)
+ sw s2, 192(a2)
+ sw s3, 224(a2)
+ sw t4, 256(a2)
+ sw s0, 288(a2)
+ sw t2, 320(a2)
+ sw v0, 352(a2)
+ bgtz a3, 1b
+ addiu a2, a2, 4
+
+ RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
+
+ j ra
+ nop
+
+END(jsimd_idct_12x12_pass1_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_idct_12x12_pass2_mips_dspr2)
+/*
+ * a0 - workspace
+ * a1 - output
+ */
+
+ SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
+
+ li a3, 12
+
+1:
+ // Odd part
+ lw t0, 12(a0)
+ lw t1, 4(a0)
+ lw t2, 20(a0)
+ lw t3, 28(a0)
+ li t4, 10703 // FIX(1.306562965)
+ li t5, 4433 // FIX_0_541196100
+ mul t4, t0, t4 // tmp11
+ mul t5, t0, t5 // -tmp14
+ addu t6, t1, t2 // tmp10
+ li t7, 2139 // FIX(0.261052384)
+ mul t7, t6, t7 // MULTIPLY(tmp10, FIX(0.261052384))
+ addu t6, t6, t3 // tmp10 + z4
+ li t8, 7053 // FIX(0.860918669)
+ mul t6, t6, t8 // tmp15
+ li t8, 2295 // FIX(0.280143716)
+ mul t8, t1, t8 // MULTIPLY(z1, FIX(0.280143716))
+ addu t9, t2, t3 // z3 + z4
+ li s0, 8565 // FIX(1.045510580)
+ mul t9, t9, s0 // -tmp13
+ li s0, 12112 // FIX(1.478575242)
+ mul s0, t2, s0 // MULTIPLY(z3, FIX(1.478575242))
+ li s1, 12998 // FIX(1.586706681)
+ mul s1, t3, s1 // MULTIPLY(z4, FIX(1.586706681))
+ li s2, 5540 // FIX(0.676326758)
+ mul s2, t1, s2 // MULTIPLY(z1, FIX(0.676326758))
+ li s3, 16244 // FIX(1.982889723)
+ mul s3, t3, s3 // MULTIPLY(z4, FIX(1.982889723))
+ subu t1, t1, t3 // z1 -= z4
+ subu t0, t0, t2 // z2 -= z3
+ addu t2, t1, t0 // z1 + z2
+ li t3, 4433 // FIX_0_541196100
+ mul t2, t2, t3 // z3
+ li t3, 6270 // FIX_0_765366865
+ mul t1, t1, t3 // MULTIPLY(z1, FIX_0_765366865)
+ li t3, 15137 // FIX_1_847759065
+ mul t0, t0, t3 // MULTIPLY(z2, FIX_1_847759065)
+ addu t3, t6, t7 // tmp12
+ addu t7, t3, t4
+ addu t7, t7, t8 // tmp10
+ subu t3, t3, t9
+ subu t3, t3, t5
+ subu t3, t3, s0 // tmp12
+ subu t9, t6, t9
+ subu t9, t9, t4
+ addu t9, t9, s1 // tmp13
+ subu t6, t6, t5
+ subu t6, t6, s2
+ subu t6, t6, s3 // tmp15
+ addu t1, t2, t1 // tmp11
+ subu t0, t2, t0 // tmp14
+ // even part
+ lw t2, 16(a0) // z4
+ lw t4, 8(a0) // z1
+ lw t5, 0(a0) // z3
+ lw t8, 24(a0) // z2
+ li s0, 10033 // FIX(1.224744871)
+ li s1, 11190 // FIX(1.366025404)
+ mul t2, t2, s0 // z4
+ mul s0, t4, s1 // z4
+ addiu t5, t5, 0x10
+ sll t5, t5, 13 // z3
+ sll t4, t4, 13 // z1
+ sll t8, t8, 13 // z2
+ subu s1, t4, t8 // tmp12
+ addu s2, t5, t2 // tmp10
+ subu t2, t5, t2 // tmp11
+ addu s3, t5, s1 // tmp21
+ subu s1, t5, s1 // tmp24
+ addu t5, s0, t8 // tmp12
+ addu v0, s2, t5 // tmp20
+ subu t5, s2, t5 // tmp25
+ subu t4, s0, t4
+ subu t4, t4, t8 // tmp12
+ addu t8, t2, t4 // tmp22
+ subu t2, t2, t4 // tmp23
+ // increment counter and pointers
+ addiu a3, a3, -1
+ addiu a0, a0, 32
+ // Final stage
+ addu t4, v0, t7
+ subu v0, v0, t7
+ addu t7, s3, t1
+ subu s3, s3, t1
+ addu t1, t8, t3
+ subu t8, t8, t3
+ addu t3, t2, t9
+ subu t2, t2, t9
+ addu t9, s1, t0
+ subu s1, s1, t0
+ addu t0, t5, t6
+ subu t5, t5, t6
+ sll t4, t4, 4
+ sll t7, t7, 4
+ sll t1, t1, 4
+ sll t3, t3, 4
+ sll t9, t9, 4
+ sll t0, t0, 4
+ sll t5, t5, 4
+ sll s1, s1, 4
+ sll t2, t2, 4
+ sll t8, t8, 4
+ sll s3, s3, 4
+ sll v0, v0, 4
+ shll_s.w t4, t4, 2
+ shll_s.w t7, t7, 2
+ shll_s.w t1, t1, 2
+ shll_s.w t3, t3, 2
+ shll_s.w t9, t9, 2
+ shll_s.w t0, t0, 2
+ shll_s.w t5, t5, 2
+ shll_s.w s1, s1, 2
+ shll_s.w t2, t2, 2
+ shll_s.w t8, t8, 2
+ shll_s.w s3, s3, 2
+ shll_s.w v0, v0, 2
+ srl t4, t4, 24
+ srl t7, t7, 24
+ srl t1, t1, 24
+ srl t3, t3, 24
+ srl t9, t9, 24
+ srl t0, t0, 24
+ srl t5, t5, 24
+ srl s1, s1, 24
+ srl t2, t2, 24
+ srl t8, t8, 24
+ srl s3, s3, 24
+ srl v0, v0, 24
+ lw t6, 0(a1)
+ addiu t4, t4, 0x80
+ addiu t7, t7, 0x80
+ addiu t1, t1, 0x80
+ addiu t3, t3, 0x80
+ addiu t9, t9, 0x80
+ addiu t0, t0, 0x80
+ addiu t5, t5, 0x80
+ addiu s1, s1, 0x80
+ addiu t2, t2, 0x80
+ addiu t8, t8, 0x80
+ addiu s3, s3, 0x80
+ addiu v0, v0, 0x80
+ sb t4, 0(t6)
+ sb t7, 1(t6)
+ sb t1, 2(t6)
+ sb t3, 3(t6)
+ sb t9, 4(t6)
+ sb t0, 5(t6)
+ sb t5, 6(t6)
+ sb s1, 7(t6)
+ sb t2, 8(t6)
+ sb t8, 9(t6)
+ sb s3, 10(t6)
+ sb v0, 11(t6)
+ bgtz a3, 1b
+ addiu a1, a1, 4
+
+ RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
+
+ jr ra
+ nop
+
+END(jsimd_idct_12x12_pass2_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_convsamp_mips_dspr2)
+/*
+ * a0 - sample_data
+ * a1 - start_col
+ * a2 - workspace
+ */
+
+ lw t0, 0(a0)
+ li t7, 0xff80ff80
+ addu t0, t0, a1
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ lw t0, 4(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 0(a2)
+ usw t4, 4(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 8(a2)
+ usw t6, 12(a2)
+
+ lw t0, 8(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 16(a2)
+ usw t4, 20(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 24(a2)
+ usw t6, 28(a2)
+
+ lw t0, 12(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 32(a2)
+ usw t4, 36(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 40(a2)
+ usw t6, 44(a2)
+
+ lw t0, 16(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 48(a2)
+ usw t4, 52(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 56(a2)
+ usw t6, 60(a2)
+
+ lw t0, 20(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 64(a2)
+ usw t4, 68(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 72(a2)
+ usw t6, 76(a2)
+
+ lw t0, 24(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 80(a2)
+ usw t4, 84(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 88(a2)
+ usw t6, 92(a2)
+
+ lw t0, 28(a0)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu t0, t0, a1
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ ulw t1, 0(t0)
+ ulw t2, 4(t0)
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 96(a2)
+ usw t4, 100(a2)
+ preceu.ph.qbr t3, t1
+ preceu.ph.qbl t4, t1
+ usw t5, 104(a2)
+ usw t6, 108(a2)
+ preceu.ph.qbr t5, t2
+ preceu.ph.qbl t6, t2
+ addu.ph t3, t3, t7
+ addu.ph t4, t4, t7
+ addu.ph t5, t5, t7
+ addu.ph t6, t6, t7
+ usw t3, 112(a2)
+ usw t4, 116(a2)
+ usw t5, 120(a2)
+ usw t6, 124(a2)
+
+ j ra
+ nop
+
+END(jsimd_convsamp_mips_dspr2)
+
+/*****************************************************************************/
+LEAF_MIPS_DSPR2(jsimd_convsamp_float_mips_dspr2)
+/*
+ * a0 - sample_data
+ * a1 - start_col
+ * a2 - workspace
+ */
+
+ .set at
+
+ lw t0, 0(a0)
+ addu t0, t0, a1
+ lbu t1, 0(t0)
+ lbu t2, 1(t0)
+ lbu t3, 2(t0)
+ lbu t4, 3(t0)
+ lbu t5, 4(t0)
+ lbu t6, 5(t0)
+ lbu t7, 6(t0)
+ lbu t8, 7(t0)
+ addiu t1, t1, -128
+ addiu t2, t2, -128
+ addiu t3, t3, -128
+ addiu t4, t4, -128
+ addiu t5, t5, -128
+ addiu t6, t6, -128
+ addiu t7, t7, -128
+ addiu t8, t8, -128
+ mtc1 t1, f1
+ mtc1 t2, f2
+ mtc1 t3, f3
+ mtc1 t4, f4
+ mtc1 t5, f5
+ mtc1 t6, f6
+ mtc1 t7, f7
+ mtc1 t8, f8
+ cvt.s.w f1, f1
+ cvt.s.w f2, f2
+ cvt.s.w f3, f3
+ cvt.s.w f4, f4
+ cvt.s.w f5, f5
+ cvt.s.w f6, f6
+ cvt.s.w f7, f7
+ cvt.s.w f8, f8
+ lw t0, 4(a0)
+ swc1 f1, 0(a2)
+ swc1 f2, 4(a2)
+ swc1 f3, 8(a2)
+ addu t0, t0, a1
+ swc1 f4, 12(a2)
+ swc1 f5, 16(a2)
+ swc1 f6, 20(a2)
+ swc1 f7, 24(a2)
+ swc1 f8, 28(a2)
+ //elemr 1
+ lbu t1, 0(t0)
+ lbu t2, 1(t0)
+ lbu t3, 2(t0)
+ lbu t4, 3(t0)
+ lbu t5, 4(t0)
+ lbu t6, 5(t0)
+ lbu t7, 6(t0)
+ lbu t8, 7(t0)
+ addiu t1, t1, -128
+ addiu t2, t2, -128
+ addiu t3, t3, -128
+ addiu t4, t4, -128
+ addiu t5, t5, -128
+ addiu t6, t6, -128
+ addiu t7, t7, -128
+ addiu t8, t8, -128
+ mtc1 t1, f1
+ mtc1 t2, f2
+ mtc1 t3, f3
+ mtc1 t4, f4
+ mtc1 t5, f5
+ mtc1 t6, f6
+ mtc1 t7, f7
+ mtc1 t8, f8
+ cvt.s.w f1, f1
+ cvt.s.w f2, f2
+ cvt.s.w f3, f3
+ cvt.s.w f4, f4
+ cvt.s.w f5, f5
+ cvt.s.w f6, f6
+ cvt.s.w f7, f7
+ cvt.s.w f8, f8
+ lw t0, 8(a0)
+ swc1 f1, 32(a2)
+ swc1 f2, 36(a2)
+ swc1 f3, 40(a2)
+ addu t0, t0, a1
+ swc1 f4, 44(a2)
+ swc1 f5, 48(a2)
+ swc1 f6, 52(a2)
+ swc1 f7, 56(a2)
+ swc1 f8, 60(a2)
+ //elemr 2
+ lbu t1, 0(t0)
+ lbu t2, 1(t0)
+ lbu t3, 2(t0)
+ lbu t4, 3(t0)
+ lbu t5, 4(t0)
+ lbu t6, 5(t0)
+ lbu t7, 6(t0)
+ lbu t8, 7(t0)
+ addiu t1, t1, -128
+ addiu t2, t2, -128
+ addiu t3, t3, -128
+ addiu t4, t4, -128
+ addiu t5, t5, -128
+ addiu t6, t6, -128
+ addiu t7, t7, -128
+ addiu t8, t8, -128
+ mtc1 t1, f1
+ mtc1 t2, f2
+ mtc1 t3, f3
+ mtc1 t4, f4
+ mtc1 t5, f5
+ mtc1 t6, f6
+ mtc1 t7, f7
+ mtc1 t8, f8
+ cvt.s.w f1, f1
+ cvt.s.w f2, f2
+ cvt.s.w f3, f3
+ cvt.s.w f4, f4
+ cvt.s.w f5, f5
+ cvt.s.w f6, f6
+ cvt.s.w f7, f7
+ cvt.s.w f8, f8
+ lw t0, 12(a0)
+ swc1 f1, 64(a2)
+ swc1 f2, 68(a2)
+ swc1 f3, 72(a2)
+ addu t0, t0, a1
+ swc1 f4, 76(a2)
+ swc1 f5, 80(a2)
+ swc1 f6, 84(a2)
+ swc1 f7, 88(a2)
+ swc1 f8, 92(a2)
+ //elemr 3
+ lbu t1, 0(t0)
+ lbu t2, 1(t0)
+ lbu t3, 2(t0)
+ lbu t4, 3(t0)
+ lbu t5, 4(t0)
+ lbu t6, 5(t0)
+ lbu t7, 6(t0)
+ lbu t8, 7(t0)
+ addiu t1, t1, -128
+ addiu t2, t2, -128
+ addiu t3, t3, -128
+ addiu t4, t4, -128
+ addiu t5, t5, -128
+ addiu t6, t6, -128
+ addiu t7, t7, -128
+ addiu t8, t8, -128
+ mtc1 t1, f1
+ mtc1 t2, f2
+ mtc1 t3, f3
+ mtc1 t4, f4
+ mtc1 t5, f5
+ mtc1 t6, f6
+ mtc1 t7, f7
+ mtc1 t8, f8
+ cvt.s.w f1, f1
+ cvt.s.w f2, f2
+ cvt.s.w f3, f3
+ cvt.s.w f4, f4
+ cvt.s.w f5, f5
+ cvt.s.w f6, f6
+ cvt.s.w f7, f7
+ cvt.s.w f8, f8
+ lw t0, 16(a0)
+ swc1 f1, 96(a2)
+ swc1 f2, 100(a2)
+ swc1 f3, 104(a2)
+ addu t0, t0, a1
+ swc1 f4, 108(a2)
+ swc1 f5, 112(a2)
+ swc1 f6, 116(a2)
+ swc1 f7, 120(a2)
+ swc1 f8, 124(a2)
+ //elemr 4
+ lbu t1, 0(t0)
+ lbu t2, 1(t0)
+ lbu t3, 2(t0)
+ lbu t4, 3(t0)
+ lbu t5, 4(t0)
+ lbu t6, 5(t0)
+ lbu t7, 6(t0)
+ lbu t8, 7(t0)
+ addiu t1, t1, -128
+ addiu t2, t2, -128
+ addiu t3, t3, -128
+ addiu t4, t4, -128
+ addiu t5, t5, -128
+ addiu t6, t6, -128
+ addiu t7, t7, -128
+ addiu t8, t8, -128
+ mtc1 t1, f1
+ mtc1 t2, f2
+ mtc1 t3, f3
+ mtc1 t4, f4
+ mtc1 t5, f5
+ mtc1 t6, f6
+ mtc1 t7, f7
+ mtc1 t8, f8
+ cvt.s.w f1, f1
+ cvt.s.w f2, f2
+ cvt.s.w f3, f3
+ cvt.s.w f4, f4
+ cvt.s.w f5, f5
+ cvt.s.w f6, f6
+ cvt.s.w f7, f7
+ cvt.s.w f8, f8
+ lw t0, 20(a0)
+ swc1 f1, 128(a2)
+ swc1 f2, 132(a2)
+ swc1 f3, 136(a2)
+ addu t0, t0, a1
+ swc1 f4, 140(a2)
+ swc1 f5, 144(a2)
+ swc1 f6, 148(a2)
+ swc1 f7, 152(a2)
+ swc1 f8, 156(a2)
+ //elemr 5
+ lbu t1, 0(t0)
+ lbu t2, 1(t0)
+ lbu t3, 2(t0)
+ lbu t4, 3(t0)
+ lbu t5, 4(t0)
+ lbu t6, 5(t0)
+ lbu t7, 6(t0)
+ lbu t8, 7(t0)
+ addiu t1, t1, -128
+ addiu t2, t2, -128
+ addiu t3, t3, -128
+ addiu t4, t4, -128
+ addiu t5, t5, -128
+ addiu t6, t6, -128
+ addiu t7, t7, -128
+ addiu t8, t8, -128
+ mtc1 t1, f1
+ mtc1 t2, f2
+ mtc1 t3, f3
+ mtc1 t4, f4
+ mtc1 t5, f5
+ mtc1 t6, f6
+ mtc1 t7, f7
+ mtc1 t8, f8
+ cvt.s.w f1, f1
+ cvt.s.w f2, f2
+ cvt.s.w f3, f3
+ cvt.s.w f4, f4
+ cvt.s.w f5, f5
+ cvt.s.w f6, f6
+ cvt.s.w f7, f7
+ cvt.s.w f8, f8
+ lw t0, 24(a0)
+ swc1 f1, 160(a2)
+ swc1 f2, 164(a2)
+ swc1 f3, 168(a2)
+ addu t0, t0, a1
+ swc1 f4, 172(a2)
+ swc1 f5, 176(a2)
+ swc1 f6, 180(a2)
+ swc1 f7, 184(a2)
+ swc1 f8, 188(a2)
+ //elemr 6
+ lbu t1, 0(t0)
+ lbu t2, 1(t0)
+ lbu t3, 2(t0)
+ lbu t4, 3(t0)
+ lbu t5, 4(t0)
+ lbu t6, 5(t0)
+ lbu t7, 6(t0)
+ lbu t8, 7(t0)
+ addiu t1, t1, -128
+ addiu t2, t2, -128
+ addiu t3, t3, -128
+ addiu t4, t4, -128
+ addiu t5, t5, -128
+ addiu t6, t6, -128
+ addiu t7, t7, -128
+ addiu t8, t8, -128
+ mtc1 t1, f1
+ mtc1 t2, f2
+ mtc1 t3, f3
+ mtc1 t4, f4
+ mtc1 t5, f5
+ mtc1 t6, f6
+ mtc1 t7, f7
+ mtc1 t8, f8
+ cvt.s.w f1, f1
+ cvt.s.w f2, f2
+ cvt.s.w f3, f3
+ cvt.s.w f4, f4
+ cvt.s.w f5, f5
+ cvt.s.w f6, f6
+ cvt.s.w f7, f7
+ cvt.s.w f8, f8
+ lw t0, 28(a0)
+ swc1 f1, 192(a2)
+ swc1 f2, 196(a2)
+ swc1 f3, 200(a2)
+ addu t0, t0, a1
+ swc1 f4, 204(a2)
+ swc1 f5, 208(a2)
+ swc1 f6, 212(a2)
+ swc1 f7, 216(a2)
+ swc1 f8, 220(a2)
+ //elemr 7
+ lbu t1, 0(t0)
+ lbu t2, 1(t0)
+ lbu t3, 2(t0)
+ lbu t4, 3(t0)
+ lbu t5, 4(t0)
+ lbu t6, 5(t0)
+ lbu t7, 6(t0)
+ lbu t8, 7(t0)
+ addiu t1, t1, -128
+ addiu t2, t2, -128
+ addiu t3, t3, -128
+ addiu t4, t4, -128
+ addiu t5, t5, -128
+ addiu t6, t6, -128
+ addiu t7, t7, -128
+ addiu t8, t8, -128
+ mtc1 t1, f1
+ mtc1 t2, f2
+ mtc1 t3, f3
+ mtc1 t4, f4
+ mtc1 t5, f5
+ mtc1 t6, f6
+ mtc1 t7, f7
+ mtc1 t8, f8
+ cvt.s.w f1, f1
+ cvt.s.w f2, f2
+ cvt.s.w f3, f3
+ cvt.s.w f4, f4
+ cvt.s.w f5, f5
+ cvt.s.w f6, f6
+ cvt.s.w f7, f7
+ cvt.s.w f8, f8
+ swc1 f1, 224(a2)
+ swc1 f2, 228(a2)
+ swc1 f3, 232(a2)
+ swc1 f4, 236(a2)
+ swc1 f5, 240(a2)
+ swc1 f6, 244(a2)
+ swc1 f7, 248(a2)
+ swc1 f8, 252(a2)
+
+ j ra
+ nop
+
+END(jsimd_convsamp_float_mips_dspr2)
+
+/*****************************************************************************/
+
diff --git a/simd/jsimd_mips_dspr2_asm.h b/simd/jsimd_mips_dspr2_asm.h
new file mode 100644
index 0000000..50ec31b
--- /dev/null
+++ b/simd/jsimd_mips_dspr2_asm.h
@@ -0,0 +1,285 @@
+/*
+ * MIPS DSPr2 optimizations for libjpeg-turbo
+ *
+ * Copyright (C) 2013, MIPS Technologies, Inc., California.
+ * All rights reserved.
+ * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com)
+ * Darko Laus (darko.laus@imgtec.com)
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#define zero $0
+#define AT $1
+#define v0 $2
+#define v1 $3
+#define a0 $4
+#define a1 $5
+#define a2 $6
+#define a3 $7
+#define t0 $8
+#define t1 $9
+#define t2 $10
+#define t3 $11
+#define t4 $12
+#define t5 $13
+#define t6 $14
+#define t7 $15
+#define s0 $16
+#define s1 $17
+#define s2 $18
+#define s3 $19
+#define s4 $20
+#define s5 $21
+#define s6 $22
+#define s7 $23
+#define t8 $24
+#define t9 $25
+#define k0 $26
+#define k1 $27
+#define gp $28
+#define sp $29
+#define fp $30
+#define s8 $30
+#define ra $31
+
+#define f0 $f0
+#define f1 $f1
+#define f2 $f2
+#define f3 $f3
+#define f4 $f4
+#define f5 $f5
+#define f6 $f6
+#define f7 $f7
+#define f8 $f8
+#define f9 $f9
+#define f10 $f10
+#define f11 $f11
+#define f12 $f12
+#define f13 $f13
+#define f14 $f14
+#define f15 $f15
+#define f16 $f16
+#define f17 $f17
+#define f18 $f18
+#define f19 $f19
+#define f20 $f20
+#define f21 $f21
+#define f22 $f22
+#define f23 $f23
+#define f24 $f24
+#define f25 $f25
+#define f26 $f26
+#define f27 $f27
+#define f28 $f28
+#define f29 $f29
+#define f30 $f30
+#define f31 $f31
+
+/*
+ * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
+ */
+#define LEAF_MIPS32R2(symbol) \
+ .globl symbol; \
+ .align 2; \
+ .type symbol, @function; \
+ .ent symbol, 0; \
+symbol: .frame sp, 0, ra; \
+ .set push; \
+ .set arch=mips32r2; \
+ .set noreorder; \
+ .set noat;
+
+/*
+ * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2
+ */
+#define LEAF_MIPS_DSPR2(symbol) \
+LEAF_MIPS32R2(symbol) \
+ .set dspr2;
+
+/*
+ * END - mark end of function
+ */
+#define END(function) \
+ .set pop; \
+ .end function; \
+ .size function,.-function
+
+/*
+ * Checks if stack offset is big enough for storing/restoring regs_num
+ * number of register to/from stack. Stack offset must be greater than
+ * or equal to the number of bytes needed for storing registers (regs_num*4).
+ * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
+ * preserved for input arguments of the functions, already stored in a0-a3),
+ * stack size can be further optimized by utilizing this space.
+ */
+.macro CHECK_STACK_OFFSET regs_num, stack_offset
+.if \stack_offset < \regs_num * 4 - 16
+.error "Stack offset too small."
+.endif
+.endm
+
+/*
+ * Saves set of registers on stack. Maximum number of registers that
+ * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
+ * Stack offset is number of bytes that are added to stack pointer (sp)
+ * before registers are pushed in order to provide enough space on stack
+ * (offset must be multiple of 4, and must be big enough, as described by
+ * CHECK_STACK_OFFSET macro). This macro is intended to be used in
+ * combination with RESTORE_REGS_FROM_STACK macro. Example:
+ * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
+ * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
+ */
+.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
+ r2 = 0, r3 = 0, r4 = 0, \
+ r5 = 0, r6 = 0, r7 = 0, \
+ r8 = 0, r9 = 0, r10 = 0, \
+ r11 = 0, r12 = 0, r13 = 0, \
+ r14 = 0
+ .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
+ .error "Stack offset must be pozitive and multiple of 4."
+ .endif
+ .if \stack_offset != 0
+ addiu sp, sp, -\stack_offset
+ .endif
+ sw \r1, 0(sp)
+ .if \r2 != 0
+ sw \r2, 4(sp)
+ .endif
+ .if \r3 != 0
+ sw \r3, 8(sp)
+ .endif
+ .if \r4 != 0
+ sw \r4, 12(sp)
+ .endif
+ .if \r5 != 0
+ CHECK_STACK_OFFSET 5, \stack_offset
+ sw \r5, 16(sp)
+ .endif
+ .if \r6 != 0
+ CHECK_STACK_OFFSET 6, \stack_offset
+ sw \r6, 20(sp)
+ .endif
+ .if \r7 != 0
+ CHECK_STACK_OFFSET 7, \stack_offset
+ sw \r7, 24(sp)
+ .endif
+ .if \r8 != 0
+ CHECK_STACK_OFFSET 8, \stack_offset
+ sw \r8, 28(sp)
+ .endif
+ .if \r9 != 0
+ CHECK_STACK_OFFSET 9, \stack_offset
+ sw \r9, 32(sp)
+ .endif
+ .if \r10 != 0
+ CHECK_STACK_OFFSET 10, \stack_offset
+ sw \r10, 36(sp)
+ .endif
+ .if \r11 != 0
+ CHECK_STACK_OFFSET 11, \stack_offset
+ sw \r11, 40(sp)
+ .endif
+ .if \r12 != 0
+ CHECK_STACK_OFFSET 12, \stack_offset
+ sw \r12, 44(sp)
+ .endif
+ .if \r13 != 0
+ CHECK_STACK_OFFSET 13, \stack_offset
+ sw \r13, 48(sp)
+ .endif
+ .if \r14 != 0
+ CHECK_STACK_OFFSET 14, \stack_offset
+ sw \r14, 52(sp)
+ .endif
+.endm
+
+/*
+ * Restores set of registers from stack. Maximum number of registers that
+ * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
+ * Stack offset is number of bytes that are added to stack pointer (sp)
+ * after registers are restored (offset must be multiple of 4, and must
+ * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
+ * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
+ * Example:
+ * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
+ * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
+ */
+.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
+ r2 = 0, r3 = 0, r4 = 0, \
+ r5 = 0, r6 = 0, r7 = 0, \
+ r8 = 0, r9 = 0, r10 = 0, \
+ r11 = 0, r12 = 0, r13 = 0, \
+ r14 = 0
+ .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
+ .error "Stack offset must be pozitive and multiple of 4."
+ .endif
+ lw \r1, 0(sp)
+ .if \r2 != 0
+ lw \r2, 4(sp)
+ .endif
+ .if \r3 != 0
+ lw \r3, 8(sp)
+ .endif
+ .if \r4 != 0
+ lw \r4, 12(sp)
+ .endif
+ .if \r5 != 0
+ CHECK_STACK_OFFSET 5, \stack_offset
+ lw \r5, 16(sp)
+ .endif
+ .if \r6 != 0
+ CHECK_STACK_OFFSET 6, \stack_offset
+ lw \r6, 20(sp)
+ .endif
+ .if \r7 != 0
+ CHECK_STACK_OFFSET 7, \stack_offset
+ lw \r7, 24(sp)
+ .endif
+ .if \r8 != 0
+ CHECK_STACK_OFFSET 8, \stack_offset
+ lw \r8, 28(sp)
+ .endif
+ .if \r9 != 0
+ CHECK_STACK_OFFSET 9, \stack_offset
+ lw \r9, 32(sp)
+ .endif
+ .if \r10 != 0
+ CHECK_STACK_OFFSET 10, \stack_offset
+ lw \r10, 36(sp)
+ .endif
+ .if \r11 != 0
+ CHECK_STACK_OFFSET 11, \stack_offset
+ lw \r11, 40(sp)
+ .endif
+ .if \r12 != 0
+ CHECK_STACK_OFFSET 12, \stack_offset
+ lw \r12, 44(sp)
+ .endif
+ .if \r13 != 0
+ CHECK_STACK_OFFSET 13, \stack_offset
+ lw \r13, 48(sp)
+ .endif
+ .if \r14 != 0
+ CHECK_STACK_OFFSET 14, \stack_offset
+ lw \r14, 52(sp)
+ .endif
+ .if \stack_offset != 0
+ addiu sp, sp, \stack_offset
+ .endif
+.endm
+
+
diff --git a/tjbench.c b/tjbench.c
index 1f88e13..7a7bc92 100644
--- a/tjbench.c
+++ b/tjbench.c
@@ -29,6 +29,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <ctype.h>
#include <math.h>
#include <errno.h>
#include <cdjpeg.h>
@@ -44,9 +45,9 @@
#define _throwtj(m) _throw(m, tjGetErrorStr())
#define _throwbmp(m) _throw(m, bmpgeterr())
-enum {YUVENCODE=1, YUVDECODE};
-int flags=TJFLAG_NOREALLOC, decomponly=0, yuv=0, quiet=0, dotile=0,
- pf=TJPF_BGR;
+enum {YUVENCODE=1, YUVDECODE, YUVCOMPRESS};
+int flags=TJFLAG_NOREALLOC, componly=0, decomponly=0, yuv=0, quiet=0, dotile=0,
+ pf=TJPF_BGR, yuvpad=1;
char *ext="ppm";
const char *pixFormatStr[TJ_NUMPF]=
{
@@ -54,15 +55,31 @@
};
const char *subNameLong[TJ_NUMSAMP]=
{
- "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0"
+ "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1"
};
-const char *subName[NUMSUBOPT]={"444", "422", "420", "GRAY", "440"};
+const char *csName[TJ_NUMCS]=
+{
+ "RGB", "YCbCr", "GRAY", "CMYK", "YCCK"
+};
+const char *subName[TJ_NUMSAMP]={"444", "422", "420", "GRAY", "440", "411"};
tjscalingfactor *scalingfactors=NULL, sf={1, 1}; int nsf=0;
int xformop=TJXOP_NONE, xformopt=0;
int (*customFilter)(short *, tjregion, tjregion, int, int, tjtransform *);
double benchtime=5.0;
+char *formatName(int subsamp, int cs, char *buf)
+{
+ if(cs==TJCS_YCbCr) return (char *)subNameLong[subsamp];
+ else if(cs==TJCS_YCCK)
+ {
+ snprintf(buf, 80, "%s %s", csName[cs], subNameLong[subsamp]);
+ return buf;
+ }
+ else return (char *)csName[cs];
+}
+
+
char *sigfig(double val, int figs, char *buf, int len)
{
char format[80];
@@ -94,9 +111,9 @@
int row, col, i, dstbufalloc=0, retval=0;
double start, elapsed;
int ps=tjPixelSize[pf];
- int yuvsize=tjBufSizeYUV(w, h, subsamp), bufsize;
- int scaledw=(yuv==YUVDECODE)? w : TJSCALED(w, sf);
- int scaledh=(yuv==YUVDECODE)? h : TJSCALED(h, sf);
+ int scaledw=TJSCALED(w, sf);
+ int scaledh=TJSCALED(h, sf);
+ int yuvsize=tjBufSizeYUV2(scaledw, yuvpad, scaledh, subsamp), bufsize;
int pitch=scaledw*ps;
int ntilesw=(w+tilew-1)/tilew, ntilesh=(h+tileh-1)/tileh;
unsigned char *dstptr, *dstptr2;
@@ -124,8 +141,9 @@
/* Execute once to preload cache */
if(yuv==YUVDECODE)
{
- if(tjDecompressToYUV(handle, jpegbuf[0], jpegsize[0], dstbuf, flags)==-1)
- _throwtj("executing tjDecompressToYUV()");
+ if(tjDecompressToYUV2(handle, jpegbuf[0], jpegsize[0], dstbuf, scaledw,
+ yuvpad, scaledh, flags)==-1)
+ _throwtj("executing tjDecompressToYUV2()");
}
else if(tjDecompress2(handle, jpegbuf[0], jpegsize[0], dstbuf, scaledw,
pitch, scaledh, pf, flags)==-1)
@@ -137,8 +155,9 @@
int tile=0;
if(yuv==YUVDECODE)
{
- if(tjDecompressToYUV(handle, jpegbuf[0], jpegsize[0], dstbuf, flags)==-1)
- _throwtj("executing tjDecompressToYUV()");
+ if(tjDecompressToYUV2(handle, jpegbuf[0], jpegsize[0], dstbuf, scaledw,
+ yuvpad, scaledh, flags)==-1)
+ _throwtj("executing tjDecompressToYUV2()");
}
else for(row=0, dstptr=dstbuf; row<ntilesh; row++, dstptr+=pitch*tileh)
{
@@ -167,10 +186,19 @@
printf(" Dest. throughput: %f Megapixels/sec\n",
(double)(w*h)/1000000.*(double)i/elapsed);
}
+ if(sf.num!=1 || sf.denom!=1)
+ snprintf(sizestr, 20, "%d_%d", sf.num, sf.denom);
+ else if(tilew!=w || tileh!=h)
+ snprintf(sizestr, 20, "%dx%d", tilew, tileh);
+ else snprintf(sizestr, 20, "full");
+ if(decomponly)
+ snprintf(tempstr, 1024, "%s_%s.%s", filename, sizestr, ext);
+ else
+ snprintf(tempstr, 1024, "%s_%s%s_%s.%s", filename, subName[subsamp],
+ qualstr, sizestr, ext);
+
if(yuv==YUVDECODE)
{
- snprintf(tempstr, 1024, "%s_%s%s.yuv", filename, subName[subsamp],
- qualstr);
if((file=fopen(tempstr, "wb"))==NULL)
_throwunix("opening YUV image for output");
if(fwrite(dstbuf, yuvsize, 1, file)!=1)
@@ -179,16 +207,6 @@
}
else
{
- if(sf.num!=1 || sf.denom!=1)
- snprintf(sizestr, 20, "%d_%d", sf.num, sf.denom);
- else if(tilew!=w || tileh!=h)
- snprintf(sizestr, 20, "%dx%d", tilew, tileh);
- else snprintf(sizestr, 20, "full");
- if(decomponly)
- snprintf(tempstr, 1024, "%s_%s.%s", filename, sizestr, ext);
- else
- snprintf(tempstr, 1024, "%s_%s%s_%s.%s", filename, subName[subsamp],
- qualstr, sizestr, ext);
if(savebmp(tempstr, dstbuf, scaledw, scaledh, pf,
(flags&TJFLAG_BOTTOMUP)!=0)==-1)
_throwbmp("saving bitmap");
@@ -323,7 +341,9 @@
double start, elapsed;
int totaljpegsize=0, row, col, i, tilew=w, tileh=h, retval=0;
unsigned long *jpegsize=NULL;
- int ps=tjPixelSize[pf], ntilesw=1, ntilesh=1, pitch=w*ps;
+ int ps=(yuv==YUVCOMPRESS)? 3:tjPixelSize[pf];
+ int ntilesw=1, ntilesh=1, pitch=w*ps;
+ const char *pfStr=(yuv==YUVCOMPRESS)? "YUV":pixFormatStr[pf];
if(yuv==YUVENCODE) {dotestyuv(srcbuf, w, h, subsamp, filename); return;}
@@ -331,7 +351,7 @@
_throwunix("allocating temporary image buffer");
if(!quiet)
- printf(">>>>> %s (%s) <--> JPEG %s Q%d <<<<<\n", pixFormatStr[pf],
+ printf(">>>>> %s (%s) <--> JPEG %s Q%d <<<<<\n", pfStr,
(flags&TJFLAG_BOTTOMUP)? "Bottom-up":"Top-down", subNameLong[subsamp],
jpegqual);
@@ -359,17 +379,27 @@
/* Compression test */
if(quiet==1)
- printf("%s\t%s\t%s\t%d\t", pixFormatStr[pf],
- (flags&TJFLAG_BOTTOMUP)? "BU":"TD", subNameLong[subsamp], jpegqual);
- for(i=0; i<h; i++)
- memcpy(&tmpbuf[pitch*i], &srcbuf[w*ps*i], w*ps);
+ printf("%s\t%s\t%s\t%d\t", pfStr, (flags&TJFLAG_BOTTOMUP)? "BU":"TD",
+ subNameLong[subsamp], jpegqual);
+ if(yuv!=YUVCOMPRESS)
+ for(i=0; i<h; i++)
+ memcpy(&tmpbuf[pitch*i], &srcbuf[w*ps*i], w*ps);
if((handle=tjInitCompress())==NULL)
_throwtj("executing tjInitCompress()");
/* Execute once to preload cache */
- if(tjCompress2(handle, srcbuf, tilew, pitch, tileh, pf, &jpegbuf[0],
- &jpegsize[0], subsamp, jpegqual, flags)==-1)
- _throwtj("executing tjCompress2()");
+ if(yuv==YUVCOMPRESS)
+ {
+ if(tjCompressFromYUV(handle, srcbuf, tilew, yuvpad, tileh, subsamp,
+ &jpegbuf[0], &jpegsize[0], jpegqual, flags)==-1)
+ _throwtj("executing tjCompressFromYUV()");
+ }
+ else
+ {
+ if(tjCompress2(handle, srcbuf, tilew, pitch, tileh, pf, &jpegbuf[0],
+ &jpegsize[0], subsamp, jpegqual, flags)==-1)
+ _throwtj("executing tjCompress2()");
+ }
/* Benchmark */
for(i=0, start=gettime(); (elapsed=gettime()-start)<benchtime; i++)
@@ -383,9 +413,18 @@
{
int width=min(tilew, w-col*tilew);
int height=min(tileh, h-row*tileh);
- if(tjCompress2(handle, srcptr2, width, pitch, height, pf,
- &jpegbuf[tile], &jpegsize[tile], subsamp, jpegqual, flags)==-1)
- _throwtj("executing tjCompress()2");
+ if(yuv==YUVCOMPRESS)
+ {
+ if(tjCompressFromYUV(handle, srcptr2, width, yuvpad, height,
+ subsamp, &jpegbuf[tile], &jpegsize[tile], jpegqual, flags)==-1)
+ _throwtj("executing tjCompressFromYUV()");
+ }
+ else
+ {
+ if(tjCompress2(handle, srcptr2, width, pitch, height, pf,
+ &jpegbuf[tile], &jpegsize[tile], subsamp, jpegqual, flags)==-1)
+ _throwtj("executing tjCompress2()");
+ }
totaljpegsize+=jpegsize[tile];
}
}
@@ -429,9 +468,12 @@
}
/* Decompression test */
- if(decomptest(srcbuf, jpegbuf, jpegsize, tmpbuf, w, h, subsamp, jpegqual,
- filename, tilew, tileh)==-1)
- goto bailout;
+ if(!componly)
+ {
+ if(decomptest(srcbuf, jpegbuf, jpegsize, tmpbuf, w, h, subsamp, jpegqual,
+ filename, tilew, tileh)==-1)
+ goto bailout;
+ }
for(i=0; i<ntilesw*ntilesh; i++)
{
@@ -466,7 +508,7 @@
unsigned char **jpegbuf=NULL, *srcbuf=NULL;
unsigned long *jpegsize=NULL, srcsize, totaljpegsize;
tjtransform *t=NULL;
- int w=0, h=0, subsamp=-1, _w, _h, _tilew, _tileh,
+ int w=0, h=0, subsamp=-1, cs=-1, _w, _h, _tilew, _tileh,
_ntilesw, _ntilesh, _subsamp;
char *temp=NULL, tempstr[80], tempstr2[80];
int row, col, i, tilew, tileh, ntilesw=1, ntilesh=1, retval=0;
@@ -490,20 +532,25 @@
if((handle=tjInitTransform())==NULL)
_throwtj("executing tjInitTransform()");
- if(tjDecompressHeader2(handle, srcbuf, srcsize, &w, &h, &subsamp)==-1)
- _throwtj("executing tjDecompressHeader2()");
+ if(tjDecompressHeader3(handle, srcbuf, srcsize, &w, &h, &subsamp, &cs)==-1)
+ _throwtj("executing tjDecompressHeader3()");
if(quiet==1)
{
printf("All performance values in Mpixels/sec\n\n");
- printf("Bitmap\tBitmap\tJPEG\t%s %s \tXform\tComp\tDecomp\n",
+ printf("Bitmap\tBitmap\tJPEG\tJPEG\t%s %s \tXform\tComp\tDecomp\n",
dotile? "Tile ":"Image", dotile? "Tile ":"Image");
- printf("Format\tOrder\tSubsamp\tWidth Height\tPerf \tRatio\tPerf\n\n");
+ printf("Format\tOrder\tCS\tSubsamp\tWidth Height\tPerf \tRatio\tPerf\n\n");
}
else if(!quiet)
{
- printf(">>>>> JPEG %s --> %s (%s) <<<<<\n", subNameLong[subsamp],
- pixFormatStr[pf], (flags&TJFLAG_BOTTOMUP)? "Bottom-up":"Top-down");
+ if(yuv==YUVDECODE)
+ printf(">>>>> JPEG %s --> YUV <<<<<\n",
+ formatName(subsamp, cs, tempstr));
+ else
+ printf(">>>>> JPEG %s --> %s (%s) <<<<<\n",
+ formatName(subsamp, cs, tempstr), pixFormatStr[pf],
+ (flags&TJFLAG_BOTTOMUP)? "Bottom-up":"Top-down");
}
for(tilew=dotile? 16:w, tileh=dotile? 16:h; ; tilew*=2, tileh*=2)
@@ -539,8 +586,8 @@
}
else if(quiet==1)
{
- printf("%s\t%s\t%s\t", pixFormatStr[pf],
- (flags&TJFLAG_BOTTOMUP)? "BU":"TD", subNameLong[subsamp]);
+ printf("%s\t%s\t%s\t%s\t", pixFormatStr[pf],
+ (flags&TJFLAG_BOTTOMUP)? "BU":"TD", csName[cs], subNameLong[subsamp]);
printf("%-4d %-4d\t", tilew, tileh);
}
@@ -667,7 +714,7 @@
{
int i;
printf("USAGE: %s\n", progname);
- printf(" <Inputfile (BMP|PPM)> <Quality> [options]\n\n");
+ printf(" <Inputfile (BMP|PPM|YUV)> <Quality> [options]\n\n");
printf(" %s\n", progname);
printf(" <Inputfile (JPG)> [options]\n\n");
printf("Options:\n\n");
@@ -676,8 +723,6 @@
printf("-bottomup = Test bottom-up compression/decompression\n");
printf("-tile = Test performance of the codec when the image is encoded as separate\n");
printf(" tiles of varying sizes.\n");
- printf("-forcemmx, -forcesse, -forcesse2, -forcesse3 =\n");
- printf(" Force MMX, SSE, SSE2, or SSE3 code paths in the underlying codec\n");
printf("-rgb, -bgr, -rgbx, -bgrx, -xbgr, -xrgb =\n");
printf(" Test the specified color conversion path in the codec (default: BGR)\n");
printf("-fastupsample = Use the fastest chrominance upsampling algorithm available in\n");
@@ -686,10 +731,21 @@
printf(" codec\n");
printf("-accuratedct = Use the most accurate DCT/IDCT algorithms available in the\n");
printf(" underlying codec\n");
- printf("-440 = Test 4:4:0 chrominance subsampling instead of 4:2:2\n");
+ printf("-subsamp <s> = if compressing a JPEG image from a YUV planar source image,\n");
+ printf(" this specifies the level of chrominance subsampling used in the source\n");
+ printf(" image. Otherwise, this specifies the level of chrominance subsampling\n");
+ printf(" to use in the JPEG destination image. <s> = 444, 422, 440, 420, 411,\n");
+ printf(" or GRAY\n");
printf("-quiet = Output results in tabular rather than verbose format\n");
printf("-yuvencode = Encode RGB input as planar YUV rather than compressing as JPEG\n");
printf("-yuvdecode = Decode JPEG image to planar YUV rather than RGB\n");
+ printf("-yuvsize WxH = if compressing a JPEG image from a YUV planar source image, this\n");
+ printf(" specifies the width and height of the source image.\n");
+ printf("-yuvpad <p> = if compressing a JPEG image from a YUV planar source image, this\n");
+ printf(" specifies the number of bytes to which each row of each plane in the\n");
+ printf(" source image is padded. If decompressing a JPEG image to a YUV planar\n");
+ printf(" destination image, this specifies the row padding for each plane of the\n");
+ printf(" destination image. (default=1)\n");
printf("-scale M/N = scale down the width/height of the decompressed JPEG image by a\n");
printf(" factor of M/N (M/N = ");
for(i=0; i<nsf; i++)
@@ -709,7 +765,8 @@
printf(" decompression (these options are mutually exclusive)\n");
printf("-grayscale = Perform lossless grayscale conversion prior to decompression\n");
printf(" test (can be combined with the other transforms above)\n");
- printf("-benchtime <t> = Run each benchmark for at least <t> seconds (default = 5.0)\n\n");
+ printf("-benchtime <t> = Run each benchmark for at least <t> seconds (default = 5.0)\n");
+ printf("-componly = Stop after running compression tests. Do not test decompression.\n\n");
printf("NOTE: If the quality is specified as a range (e.g. 90-100), a separate\n");
printf("test will be performed for all quality values in the range.\n\n");
exit(1);
@@ -718,9 +775,9 @@
int main(int argc, char *argv[])
{
- unsigned char *srcbuf=NULL; int w, h, i, j;
+ unsigned char *srcbuf=NULL; int w=0, h=0, i, j;
int minqual=-1, maxqual=-1; char *temp;
- int minarg=2, retval=0, do440=0;
+ int minarg=2, retval=0, subsamp=-1;
if((scalingfactors=tjGetScalingFactors(&nsf))==NULL || nsf==0)
_throwtj("executing tjGetScalingFactors()");
@@ -732,6 +789,7 @@
{
if(!strcasecmp(temp, ".bmp")) ext="bmp";
if(!strcasecmp(temp, ".jpg") || !strcasecmp(temp, ".jpeg")) decomponly=1;
+ if(!strcasecmp(temp, ".yuv")) yuv=YUVCOMPRESS;
}
printf("\n");
@@ -776,26 +834,6 @@
{
dotile=1; xformopt|=TJXOPT_CROP;
}
- if(!strcasecmp(argv[i], "-forcesse3"))
- {
- printf("Forcing SSE3 code\n\n");
- flags|=TJFLAG_FORCESSE3;
- }
- if(!strcasecmp(argv[i], "-forcesse2"))
- {
- printf("Forcing SSE2 code\n\n");
- flags|=TJFLAG_FORCESSE2;
- }
- if(!strcasecmp(argv[i], "-forcesse"))
- {
- printf("Forcing SSE code\n\n");
- flags|=TJFLAG_FORCESSE;
- }
- if(!strcasecmp(argv[i], "-forcemmx"))
- {
- printf("Forcing MMX code\n\n");
- flags|=TJFLAG_FORCEMMX;
- }
if(!strcasecmp(argv[i], "-fastupsample"))
{
printf("Using fast upsampling code\n\n");
@@ -811,7 +849,6 @@
printf("Using most accurate DCT/IDCT algorithm\n\n");
flags|=TJFLAG_ACCURATEDCT;
}
- if(!strcmp(argv[i], "-440")) do440=1;
if(!strcasecmp(argv[i], "-rgb")) pf=TJPF_RGB;
if(!strcasecmp(argv[i], "-rgbx")) pf=TJPF_RGBX;
if(!strcasecmp(argv[i], "-bgr")) pf=TJPF_BGR;
@@ -858,9 +895,44 @@
if(!strcmp(argv[i], "-?")) usage(argv[0]);
if(!strcasecmp(argv[i], "-alloc")) flags&=(~TJFLAG_NOREALLOC);
if(!strcasecmp(argv[i], "-bmp")) ext="bmp";
+ if(!strcasecmp(argv[i], "-yuvsize") && i<argc-1)
+ {
+ int temp1=0, temp2=0;
+ if(sscanf(argv[++i], "%dx%d", &temp1, &temp2)==2 && temp1>=1
+ && temp2>=1)
+ {
+ w=temp1; h=temp2;
+ }
+ else usage(argv[0]);
+ }
+ if(!strcasecmp(argv[i], "-yuvpad") && i<argc-1)
+ {
+ int temp=atoi(argv[++i]);
+ if(temp>=1) yuvpad=temp;
+ }
+ if(!strcasecmp(argv[i], "-subsamp") && i<argc-1)
+ {
+ i++;
+ if(toupper(argv[i][0])=='G') subsamp=TJSAMP_GRAY;
+ else
+ {
+ int temp=atoi(argv[i]);
+ switch(temp)
+ {
+ case 444: subsamp=TJSAMP_444; break;
+ case 422: subsamp=TJSAMP_422; break;
+ case 440: subsamp=TJSAMP_440; break;
+ case 420: subsamp=TJSAMP_420; break;
+ case 411: subsamp=TJSAMP_411; break;
+ }
+ }
+ }
+ if(!strcasecmp(argv[i], "-componly")) componly=1;
}
}
+ if(yuv) ext="yuv";
+
if((sf.num!=1 || sf.denom!=1) && dotile)
{
printf("Disabling tiled compression/decompression tests, because those tests do not\n");
@@ -871,14 +943,38 @@
if(yuv && dotile)
{
printf("Disabling tiled compression/decompression tests, because those tests do not\n");
- printf("work when YUV encoding or decoding is enabled.\n\n");
+ printf("work when YUV encoding, compression, or decoding is enabled.\n\n");
dotile=0;
}
if(!decomponly)
{
- if(loadbmp(argv[1], &srcbuf, &w, &h, pf, (flags&TJFLAG_BOTTOMUP)!=0)==-1)
- _throwbmp("loading bitmap");
+ if(yuv==YUVCOMPRESS)
+ {
+ FILE *file=NULL; unsigned long srcsize;
+ if(w<1 || h<1 || subsamp<0 || subsamp>=TJ_NUMSAMP)
+ _throw("opening YUV image file",
+ "YUV image size and/or subsampling not specified");
+ if((file=fopen(argv[1], "rb"))==NULL)
+ _throwunix("opening YUV image file");
+ if(fseek(file, 0, SEEK_END)<0 ||
+ (srcsize=ftell(file))==(unsigned long)-1)
+ _throwunix("determining YUV image file size");
+ if(srcsize!=tjBufSizeYUV2(w, yuvpad, h, subsamp))
+ _throw("opening YUV image file", "YUV image file is the wrong size");
+ if((srcbuf=(unsigned char *)malloc(srcsize))==NULL)
+ _throwunix("allocating memory for YUV image");
+ if(fseek(file, 0, SEEK_SET)<0)
+ _throwunix("setting YUV image file position");
+ if(fread(srcbuf, srcsize, 1, file)<1)
+ _throwunix("reading YUV data");
+ fclose(file); file=NULL;
+ }
+ else
+ {
+ if(loadbmp(argv[1], &srcbuf, &w, &h, pf, (flags&TJFLAG_BOTTOMUP)!=0)==-1)
+ _throwbmp("loading bitmap");
+ }
temp=strrchr(argv[1], '.');
if(temp!=NULL) *temp='\0';
}
@@ -897,18 +993,27 @@
printf("\n");
goto bailout;
}
- for(i=maxqual; i>=minqual; i--)
- dotest(srcbuf, w, h, TJ_GRAYSCALE, i, argv[1]);
- printf("\n");
- for(i=maxqual; i>=minqual; i--)
- dotest(srcbuf, w, h, TJ_420, i, argv[1]);
- printf("\n");
- for(i=maxqual; i>=minqual; i--)
- dotest(srcbuf, w, h, do440? TJSAMP_440:TJ_422, i, argv[1]);
- printf("\n");
- for(i=maxqual; i>=minqual; i--)
- dotest(srcbuf, w, h, TJ_444, i, argv[1]);
- printf("\n");
+ if(yuv==YUVCOMPRESS || (subsamp>=0 && subsamp<TJ_NUMSAMP))
+ {
+ for(i=maxqual; i>=minqual; i--)
+ dotest(srcbuf, w, h, subsamp, i, argv[1]);
+ printf("\n");
+ }
+ else
+ {
+ for(i=maxqual; i>=minqual; i--)
+ dotest(srcbuf, w, h, TJSAMP_GRAY, i, argv[1]);
+ printf("\n");
+ for(i=maxqual; i>=minqual; i--)
+ dotest(srcbuf, w, h, TJSAMP_420, i, argv[1]);
+ printf("\n");
+ for(i=maxqual; i>=minqual; i--)
+ dotest(srcbuf, w, h, TJSAMP_422, i, argv[1]);
+ printf("\n");
+ for(i=maxqual; i>=minqual; i--)
+ dotest(srcbuf, w, h, TJSAMP_444, i, argv[1]);
+ printf("\n");
+ }
bailout:
if(srcbuf) free(srcbuf);
diff --git a/tjunittest.c b/tjunittest.c
index 3bb194d..d2d3e98 100644
--- a/tjunittest.c
+++ b/tjunittest.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C)2009-2012, 2014 D. R. Commander. All Rights Reserved.
+ * Copyright (C)2009-2014 D. R. Commander. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -47,6 +47,8 @@
printf("\nUSAGE: %s [options]\n", progName);
printf("Options:\n");
printf("-yuv = test YUV encoding/decoding support\n");
+ printf("-noyuvpad = do not pad each line of each Y, U, and V plane to the nearest\n");
+ printf(" 4-byte boundary\n");
printf("-alloc = test automatic buffer allocation\n");
exit(1);
}
@@ -59,25 +61,26 @@
const char *subNameLong[TJ_NUMSAMP]=
{
- "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0"
+ "4:4:4", "4:2:2", "4:2:0", "GRAY", "4:4:0", "4:1:1"
};
-const char *subName[TJ_NUMSAMP]={"444", "422", "420", "GRAY", "440"};
+const char *subName[TJ_NUMSAMP]={"444", "422", "420", "GRAY", "440", "411"};
const char *pixFormatStr[TJ_NUMPF]=
{
"RGB", "BGR", "RGBX", "BGRX", "XBGR", "XRGB", "Grayscale",
- "RGBA", "BGRA", "ABGR", "ARGB"
+ "RGBA", "BGRA", "ABGR", "ARGB", "CMYK"
};
-const int alphaOffset[TJ_NUMPF] = {-1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0};
+const int alphaOffset[TJ_NUMPF] = {-1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1};
const int _3byteFormats[]={TJPF_RGB, TJPF_BGR};
-const int _4byteFormats[]={TJPF_RGBX, TJPF_BGRX, TJPF_XBGR, TJPF_XRGB};
+const int _4byteFormats[]={TJPF_RGBX, TJPF_BGRX, TJPF_XBGR, TJPF_XRGB,
+ TJPF_CMYK};
const int _onlyGray[]={TJPF_GRAY};
const int _onlyRGB[]={TJPF_RGB};
enum {YUVENCODE=1, YUVDECODE};
-int yuv=0, alloc=0;
+int doyuv=0, alloc=0, pad=4;
int exitStatus=0;
#define bailout() {exitStatus=-1; goto bailout;}
@@ -91,9 +94,9 @@
int ps=tjPixelSize[pf];
int index, row, col, halfway=16;
- memset(buf, 0, w*h*ps);
if(pf==TJPF_GRAY)
{
+ memset(buf, 0, w*h*ps);
for(row=0; row<h; row++)
{
for(col=0; col<w; col++)
@@ -105,8 +108,30 @@
}
}
}
+ else if(pf==TJPF_CMYK)
+ {
+ memset(buf, 255, w*h*ps);
+ for(row=0; row<h; row++)
+ {
+ for(col=0; col<w; col++)
+ {
+ if(flags&TJFLAG_BOTTOMUP) index=(h-row-1)*w+col;
+ else index=row*w+col;
+ if(((row/8)+(col/8))%2==0)
+ {
+ if(row>=halfway) buf[index*ps+3]=0;
+ }
+ else
+ {
+ buf[index*ps+2]=0;
+ if(row<halfway) buf[index*ps+1]=0;
+ }
+ }
+ }
+ }
else
{
+ memset(buf, 0, w*h*ps);
for(row=0; row<h; row++)
{
for(col=0; col<w; col++)
@@ -165,6 +190,36 @@
int halfway=16*sf.num/sf.denom;
int blocksize=8*sf.num/sf.denom;
+ if(pf==TJPF_CMYK)
+ {
+ for(row=0; row<h; row++)
+ {
+ for(col=0; col<w; col++)
+ {
+ unsigned char c, m, y, k;
+ if(flags&TJFLAG_BOTTOMUP) index=(h-row-1)*w+col;
+ else index=row*w+col;
+ c=buf[index*ps];
+ m=buf[index*ps+1];
+ y=buf[index*ps+2];
+ k=buf[index*ps+3];
+ if(((row/blocksize)+(col/blocksize))%2==0)
+ {
+ checkval255(c); checkval255(m); checkval255(y);
+ if(row<halfway) checkval255(k)
+ else checkval0(k)
+ }
+ else
+ {
+ checkval255(c); checkval0(y); checkval255(k);
+ if(row<halfway) checkval0(m)
+ else checkval255(m)
+ }
+ }
+ }
+ return 1;
+ }
+
for(row=0; row<h; row++)
{
for(col=0; col<w; col++)
@@ -223,8 +278,13 @@
{
for(col=0; col<w; col++)
{
- printf("%.3d/%.3d/%.3d ", buf[(row*w+col)*ps+roffset],
- buf[(row*w+col)*ps+goffset], buf[(row*w+col)*ps+boffset]);
+ if(pf==TJPF_CMYK)
+ printf("%.3d/%.3d/%.3d/%.3d ", buf[(row*w+col)*ps],
+ buf[(row*w+col)*ps+1], buf[(row*w+col)*ps+2],
+ buf[(row*w+col)*ps+3]);
+ else
+ printf("%.3d/%.3d/%.3d ", buf[(row*w+col)*ps+roffset],
+ buf[(row*w+col)*ps+goffset], buf[(row*w+col)*ps+boffset]);
}
printf("\n");
}
@@ -235,22 +295,24 @@
#define PAD(v, p) ((v+(p)-1)&(~((p)-1)))
-int checkBufYUV(unsigned char *buf, int w, int h, int subsamp)
+int checkBufYUV(unsigned char *buf, int w, int h, int subsamp,
+ tjscalingfactor sf)
{
int row, col;
int hsf=tjMCUWidth[subsamp]/8, vsf=tjMCUHeight[subsamp]/8;
int pw=PAD(w, hsf), ph=PAD(h, vsf);
int cw=pw/hsf, ch=ph/vsf;
- int ypitch=PAD(pw, 4), uvpitch=PAD(cw, 4);
+ int ypitch=PAD(pw, pad), uvpitch=PAD(cw, pad);
int retval=1;
- int halfway=16;
+ int halfway=16*sf.num/sf.denom;
+ int blocksize=8*sf.num/sf.denom;
for(row=0; row<ph; row++)
{
for(col=0; col<pw; col++)
{
unsigned char y=buf[ypitch*row+col];
- if(((row/8)+(col/8))%2==0)
+ if(((row/blocksize)+(col/blocksize))%2==0)
{
if(row<halfway) checkval255(y) else checkval0(y);
}
@@ -262,14 +324,14 @@
}
if(subsamp!=TJSAMP_GRAY)
{
- halfway=16/vsf;
+ int halfway=16/vsf*sf.num/sf.denom;
for(row=0; row<ch; row++)
{
for(col=0; col<cw; col++)
{
unsigned char u=buf[ypitch*ph + (uvpitch*row+col)],
v=buf[ypitch*ph + uvpitch*ch + (uvpitch*row+col)];
- if(((row*vsf/8)+(col*hsf/8))%2==0)
+ if(((row*vsf/blocksize)+(col*hsf/blocksize))%2==0)
{
checkval(u, 128); checkval(v, 128);
}
@@ -335,57 +397,60 @@
unsigned long *dstSize, int w, int h, int pf, char *basename,
int subsamp, int jpegQual, int flags)
{
- char tempStr[1024]; unsigned char *srcBuf=NULL;
- double t;
-
- if(yuv==YUVENCODE)
- printf("%s %s -> %s YUV ... ", pixFormatStr[pf],
- (flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down ", subNameLong[subsamp]);
- else
- printf("%s %s -> %s Q%d ... ", pixFormatStr[pf],
- (flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down ", subNameLong[subsamp],
- jpegQual);
+ char tempStr[1024]; unsigned char *srcBuf=NULL, *yuvBuf=NULL;
+ const char *pfStr=pixFormatStr[pf];
+ const char *buStrLong=(flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down ";
+ const char *buStr=(flags&TJFLAG_BOTTOMUP)? "BU":"TD";
if((srcBuf=(unsigned char *)malloc(w*h*tjPixelSize[pf]))==NULL)
_throw("Memory allocation failure");
initBuf(srcBuf, w, h, pf, flags);
+
if(*dstBuf && *dstSize>0) memset(*dstBuf, 0, *dstSize);
- t=gettime();
- if(yuv==YUVENCODE)
+
+ if(!alloc) flags|=TJFLAG_NOREALLOC;
+ if(doyuv)
{
- _tj(tjEncodeYUV2(handle, srcBuf, w, 0, h, pf, *dstBuf, subsamp, flags));
+ unsigned long yuvSize=tjBufSizeYUV2(w, pad, h, subsamp);
+ tjscalingfactor sf={1, 1};
+ tjhandle handle2=tjInitCompress();
+ if(!handle2) _throwtj();
+
+ if((yuvBuf=(unsigned char *)malloc(yuvSize))==NULL)
+ _throw("Memory allocation failure");
+ memset(yuvBuf, 0, yuvSize);
+
+ printf("%s %s -> YUV %s ... ", pfStr, buStrLong, subNameLong[subsamp]);
+ _tj(tjEncodeYUV3(handle2, srcBuf, w, 0, h, pf, yuvBuf, pad, subsamp,
+ flags));
+ tjDestroy(handle2);
+ snprintf(tempStr, 1024, "%s_enc_%s_%s_%s.yuv", basename, pfStr, buStr,
+ subName[subsamp]);
+ writeJPEG(yuvBuf, yuvSize, tempStr);
+ if(checkBufYUV(yuvBuf, w, h, subsamp, sf)) printf("Passed.\n");
+ else printf("FAILED!\n");
+
+ printf("YUV %s %s -> JPEG Q%d ... ", subNameLong[subsamp], buStrLong,
+ jpegQual);
+ _tj(tjCompressFromYUV(handle, yuvBuf, w, pad, h, subsamp, dstBuf,
+ dstSize, jpegQual, flags));
}
else
{
- if(!alloc)
- {
- flags|=TJFLAG_NOREALLOC;
- *dstSize=(yuv==YUVENCODE? tjBufSizeYUV(w, h, subsamp)
- : tjBufSize(w, h, subsamp));
- }
+ printf("%s %s -> %s Q%d ... ", pfStr, buStrLong, subNameLong[subsamp],
+ jpegQual);
_tj(tjCompress2(handle, srcBuf, w, 0, h, pf, dstBuf, dstSize, subsamp,
jpegQual, flags));
}
- t=gettime()-t;
- if(yuv==YUVENCODE)
- snprintf(tempStr, 1024, "%s_enc_%s_%s_%s.yuv", basename, pixFormatStr[pf],
- (flags&TJFLAG_BOTTOMUP)? "BU":"TD", subName[subsamp]);
- else
- snprintf(tempStr, 1024, "%s_enc_%s_%s_%s_Q%d.jpg", basename,
- pixFormatStr[pf], (flags&TJFLAG_BOTTOMUP)? "BU":"TD", subName[subsamp],
- jpegQual);
+ snprintf(tempStr, 1024, "%s_enc_%s_%s_%s_Q%d.jpg", basename, pfStr, buStr,
+ subName[subsamp], jpegQual);
writeJPEG(*dstBuf, *dstSize, tempStr);
- if(yuv==YUVENCODE)
- {
- if(checkBufYUV(*dstBuf, w, h, subsamp)) printf("Passed.");
- else printf("FAILED!");
- }
- else printf("Done.");
- printf(" %f ms\n Result in %s\n", t*1000., tempStr);
+ printf("Done.\n Result in %s\n", tempStr);
bailout:
+ if(yuvBuf) free(yuvBuf);
if(srcBuf) free(srcBuf);
}
@@ -394,16 +459,49 @@
unsigned long jpegSize, int w, int h, int pf, char *basename, int subsamp,
int flags, tjscalingfactor sf)
{
- unsigned char *dstBuf=NULL;
- int _hdrw=0, _hdrh=0, _hdrsubsamp=-1; double t;
+ unsigned char *dstBuf=NULL, *yuvBuf=NULL;
+ int _hdrw=0, _hdrh=0, _hdrsubsamp=-1;
int scaledWidth=TJSCALED(w, sf);
int scaledHeight=TJSCALED(h, sf);
unsigned long dstSize=0;
- if(yuv==YUVENCODE) return;
+ _tj(tjDecompressHeader2(handle, jpegBuf, jpegSize, &_hdrw, &_hdrh,
+ &_hdrsubsamp));
+ if(_hdrw!=w || _hdrh!=h || _hdrsubsamp!=subsamp)
+ _throw("Incorrect JPEG header");
- if(yuv==YUVDECODE)
- printf("JPEG -> YUV %s ... ", subNameLong[subsamp]);
+ dstSize=scaledWidth*scaledHeight*tjPixelSize[pf];
+ if((dstBuf=(unsigned char *)malloc(dstSize))==NULL)
+ _throw("Memory allocation failure");
+ memset(dstBuf, 0, dstSize);
+
+ if(doyuv)
+ {
+ unsigned long yuvSize=tjBufSizeYUV2(scaledWidth, pad, scaledHeight,
+ subsamp);
+ tjhandle handle2=tjInitDecompress();
+ if(!handle2) _throwtj();
+
+ if((yuvBuf=(unsigned char *)malloc(yuvSize))==NULL)
+ _throw("Memory allocation failure");
+ memset(yuvBuf, 0, yuvSize);
+
+ printf("JPEG -> YUV %s ", subNameLong[subsamp]);
+ if(sf.num!=1 || sf.denom!=1)
+ printf("%d/%d ... ", sf.num, sf.denom);
+ else printf("... ");
+ _tj(tjDecompressToYUV2(handle, jpegBuf, jpegSize, yuvBuf, scaledWidth,
+ pad, scaledHeight, flags));
+ if(checkBufYUV(yuvBuf, scaledWidth, scaledHeight, subsamp, sf))
+ printf("Passed.\n");
+ else printf("FAILED!\n");
+
+ printf("YUV %s -> %s %s ... ", subNameLong[subsamp], pixFormatStr[pf],
+ (flags&TJFLAG_BOTTOMUP)? "Bottom-Up":"Top-Down ");
+ _tj(tjDecodeYUV(handle2, yuvBuf, pad, subsamp, dstBuf, scaledWidth, 0,
+ scaledHeight, pf, flags));
+ tjDestroy(handle2);
+ }
else
{
printf("JPEG -> %s %s ", pixFormatStr[pf],
@@ -411,45 +509,17 @@
if(sf.num!=1 || sf.denom!=1)
printf("%d/%d ... ", sf.num, sf.denom);
else printf("... ");
- }
-
- _tj(tjDecompressHeader2(handle, jpegBuf, jpegSize, &_hdrw, &_hdrh,
- &_hdrsubsamp));
- if(_hdrw!=w || _hdrh!=h || _hdrsubsamp!=subsamp)
- _throw("Incorrect JPEG header");
-
- if(yuv==YUVDECODE) dstSize=tjBufSizeYUV(w, h, subsamp);
- else dstSize=scaledWidth*scaledHeight*tjPixelSize[pf];
- if((dstBuf=(unsigned char *)malloc(dstSize))==NULL)
- _throw("Memory allocation failure");
- memset(dstBuf, 0, dstSize);
-
- t=gettime();
- if(yuv==YUVDECODE)
- {
- _tj(tjDecompressToYUV(handle, jpegBuf, jpegSize, dstBuf, flags));
- }
- else
- {
_tj(tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, scaledWidth, 0,
scaledHeight, pf, flags));
}
- t=gettime()-t;
- if(yuv==YUVDECODE)
- {
- if(checkBufYUV(dstBuf, w, h, subsamp)) printf("Passed.");
- else printf("FAILED!");
- }
- else
- {
- if(checkBuf(dstBuf, scaledWidth, scaledHeight, pf, subsamp, sf, flags))
- printf("Passed.");
- else printf("FAILED!");
- }
- printf(" %f ms\n", t*1000.);
+ if(checkBuf(dstBuf, scaledWidth, scaledHeight, pf, subsamp, sf, flags))
+ printf("Passed.");
+ else printf("FAILED!");
+ printf("\n");
bailout:
+ if(yuvBuf) free(yuvBuf);
if(dstBuf) free(dstBuf);
}
@@ -459,18 +529,19 @@
int flags)
{
int i, n=0;
- tjscalingfactor *sf=tjGetScalingFactors(&n), sf1={1, 1};
+ tjscalingfactor *sf=tjGetScalingFactors(&n);
if(!sf || !n) _throwtj();
- if((subsamp==TJSAMP_444 || subsamp==TJSAMP_GRAY) && !yuv)
+ for(i=0; i<n; i++)
{
- for(i=0; i<n; i++)
+ if(subsamp==TJSAMP_444 || subsamp==TJSAMP_GRAY ||
+ (subsamp==TJSAMP_411 && sf[i].num==1 &&
+ (sf[i].denom==2 || sf[i].denom==1)) ||
+ (subsamp!=TJSAMP_411 && sf[i].num==1 &&
+ (sf[i].denom==4 || sf[i].denom==2 || sf[i].denom==1)))
_decompTest(handle, jpegBuf, jpegSize, w, h, pf, basename, subsamp,
flags, sf[i]);
}
- else
- _decompTest(handle, jpegBuf, jpegSize, w, h, pf, basename, subsamp, flags,
- sf1);
bailout:
return;
@@ -485,12 +556,10 @@
unsigned long size=0; int pfi, pf, i;
if(!alloc)
- {
- size=(yuv==YUVENCODE? tjBufSizeYUV(w, h, subsamp)
- : tjBufSize(w, h, subsamp));
+ size=tjBufSize(w, h, subsamp);
+ if(size!=0)
if((dstBuf=(unsigned char *)tjAlloc(size))==NULL)
_throw("Memory allocation failure.");
- }
if((chandle=tjInitCompress())==NULL || (dhandle=tjInitDecompress())==NULL)
_throwtj();
@@ -500,13 +569,10 @@
for(i=0; i<2; i++)
{
int flags=0;
- if(subsamp==TJSAMP_422 || subsamp==TJSAMP_420 || subsamp==TJSAMP_440)
+ if(subsamp==TJSAMP_422 || subsamp==TJSAMP_420 || subsamp==TJSAMP_440 ||
+ subsamp==TJSAMP_411)
flags|=TJFLAG_FASTUPSAMPLE;
- if(i==1)
- {
- if(yuv==YUVDECODE) goto bailout;
- else flags|=TJFLAG_BOTTOMUP;
- }
+ if(i==1) flags|=TJFLAG_BOTTOMUP;
pf=formats[pfi];
compTest(chandle, &dstBuf, &size, w, h, pf, basename, subsamp, 100,
flags);
@@ -551,9 +617,9 @@
if(h%100==0) printf("%.4d x %.4d\b\b\b\b\b\b\b\b\b\b\b", w, h);
if((srcBuf=(unsigned char *)malloc(w*h*4))==NULL)
_throw("Memory allocation failure");
- if(!alloc || yuv==YUVENCODE)
+ if(!alloc || doyuv)
{
- if(yuv==YUVENCODE) dstSize=tjBufSizeYUV(w, h, subsamp);
+ if(doyuv) dstSize=tjBufSizeYUV2(w, pad, h, subsamp);
else dstSize=tjBufSize(w, h, subsamp);
if((dstBuf=(unsigned char *)tjAlloc(dstSize))==NULL)
_throw("Memory allocation failure");
@@ -565,10 +631,10 @@
else srcBuf[i]=255;
}
- if(yuv==YUVENCODE)
+ if(doyuv)
{
- _tj(tjEncodeYUV2(handle, srcBuf, w, 0, h, TJPF_BGRX, dstBuf, subsamp,
- 0));
+ _tj(tjEncodeYUV3(handle, srcBuf, w, 0, h, TJPF_BGRX, dstBuf, pad,
+ subsamp, 0));
}
else
{
@@ -580,9 +646,9 @@
if((srcBuf=(unsigned char *)malloc(h*w*4))==NULL)
_throw("Memory allocation failure");
- if(!alloc || yuv==YUVENCODE)
+ if(!alloc || doyuv)
{
- if(yuv==YUVENCODE) dstSize=tjBufSizeYUV(h, w, subsamp);
+ if(doyuv) dstSize=tjBufSizeYUV2(h, pad, w, subsamp);
else dstSize=tjBufSize(h, w, subsamp);
if((dstBuf=(unsigned char *)tjAlloc(dstSize))==NULL)
_throw("Memory allocation failure");
@@ -594,10 +660,10 @@
else srcBuf[i]=255;
}
- if(yuv==YUVENCODE)
+ if(doyuv)
{
- _tj(tjEncodeYUV2(handle, srcBuf, h, 0, w, TJPF_BGRX, dstBuf, subsamp,
- 0));
+ _tj(tjEncodeYUV3(handle, srcBuf, h, 0, w, TJPF_BGRX, dstBuf, pad,
+ subsamp, 0));
}
else
{
@@ -620,7 +686,7 @@
int main(int argc, char *argv[])
{
- int doyuv=0, i;
+ int i, num4bf=5;
#ifdef _WIN32
srand((unsigned int)time(NULL));
#endif
@@ -629,41 +695,38 @@
for(i=1; i<argc; i++)
{
if(!strcasecmp(argv[i], "-yuv")) doyuv=1;
+ if(!strcasecmp(argv[i], "-noyuvpad")) pad=1;
if(!strcasecmp(argv[i], "-alloc")) alloc=1;
if(!strncasecmp(argv[i], "-h", 2) || !strcasecmp(argv[i], "-?"))
usage(argv[0]);
}
}
if(alloc) printf("Testing automatic buffer allocation\n");
- if(doyuv) {yuv=YUVENCODE; alloc=0;}
+ if(doyuv) num4bf=4;
doTest(35, 39, _3byteFormats, 2, TJSAMP_444, "test");
- doTest(39, 41, _4byteFormats, 4, TJSAMP_444, "test");
+ doTest(39, 41, _4byteFormats, num4bf, TJSAMP_444, "test");
doTest(41, 35, _3byteFormats, 2, TJSAMP_422, "test");
- doTest(35, 39, _4byteFormats, 4, TJSAMP_422, "test");
+ doTest(35, 39, _4byteFormats, num4bf, TJSAMP_422, "test");
doTest(39, 41, _3byteFormats, 2, TJSAMP_420, "test");
- doTest(41, 35, _4byteFormats, 4, TJSAMP_420, "test");
+ doTest(41, 35, _4byteFormats, num4bf, TJSAMP_420, "test");
doTest(35, 39, _3byteFormats, 2, TJSAMP_440, "test");
- doTest(39, 41, _4byteFormats, 4, TJSAMP_440, "test");
- doTest(35, 39, _onlyGray, 1, TJSAMP_GRAY, "test");
- doTest(39, 41, _3byteFormats, 2, TJSAMP_GRAY, "test");
- doTest(41, 35, _4byteFormats, 4, TJSAMP_GRAY, "test");
+ doTest(39, 41, _4byteFormats, num4bf, TJSAMP_440, "test");
+ doTest(41, 35, _3byteFormats, 2, TJSAMP_411, "test");
+ doTest(35, 39, _4byteFormats, num4bf, TJSAMP_411, "test");
+ doTest(39, 41, _onlyGray, 1, TJSAMP_GRAY, "test");
+ doTest(41, 35, _3byteFormats, 2, TJSAMP_GRAY, "test");
+ doTest(35, 39, _4byteFormats, 4, TJSAMP_GRAY, "test");
bufSizeTest();
if(doyuv)
{
printf("\n--------------------\n\n");
- yuv=YUVDECODE;
doTest(48, 48, _onlyRGB, 1, TJSAMP_444, "test_yuv0");
- doTest(35, 39, _onlyRGB, 1, TJSAMP_444, "test_yuv1");
doTest(48, 48, _onlyRGB, 1, TJSAMP_422, "test_yuv0");
- doTest(39, 41, _onlyRGB, 1, TJSAMP_422, "test_yuv1");
doTest(48, 48, _onlyRGB, 1, TJSAMP_420, "test_yuv0");
- doTest(41, 35, _onlyRGB, 1, TJSAMP_420, "test_yuv1");
doTest(48, 48, _onlyRGB, 1, TJSAMP_440, "test_yuv0");
- doTest(35, 39, _onlyRGB, 1, TJSAMP_440, "test_yuv1");
+ doTest(48, 48, _onlyRGB, 1, TJSAMP_411, "test_yuv0");
doTest(48, 48, _onlyRGB, 1, TJSAMP_GRAY, "test_yuv0");
- doTest(35, 39, _onlyRGB, 1, TJSAMP_GRAY, "test_yuv1");
doTest(48, 48, _onlyGray, 1, TJSAMP_GRAY, "test_yuv0");
- doTest(39, 41, _onlyGray, 1, TJSAMP_GRAY, "test_yuv1");
}
return exitStatus;
diff --git a/turbojpeg-jni.c b/turbojpeg-jni.c
index 634bedf..efe5590 100644
--- a/turbojpeg-jni.c
+++ b/turbojpeg-jni.c
@@ -67,16 +67,23 @@
return retval;
}
-JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV
- (JNIEnv *env, jclass cls, jint width, jint height, jint subsamp)
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII
+ (JNIEnv *env, jclass cls, jint width, jint pad, jint height, jint subsamp)
{
- jint retval=(jint)tjBufSizeYUV(width, height, subsamp);
+ jint retval=(jint)tjBufSizeYUV2(width, pad, height, subsamp);
if(retval==-1) _throw(tjGetErrorStr());
bailout:
return retval;
}
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III
+ (JNIEnv *env, jclass cls, jint width, jint height, jint subsamp)
+{
+ return Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII(env, cls, width,
+ 4, height, subsamp);
+}
+
JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_init
(JNIEnv *env, jobject obj)
{
@@ -207,12 +214,48 @@
flags);
}
-JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII
- (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch,
- jint height, jint pf, jbyteArray dst, jint subsamp, jint flags)
+JNIEXPORT jint JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3BIIII_3BII
+ (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pad, jint height,
+ jint subsamp, jbyteArray dst, jint jpegQual, jint flags)
{
tjhandle handle=0;
+ unsigned long jpegSize=0;
jsize arraySize=0;
+ unsigned char *srcBuf=NULL, *jpegBuf=NULL;
+
+ gethandle();
+
+ arraySize=tjBufSizeYUV2(width, pad, height, subsamp);
+ if((*env)->GetArrayLength(env, src)<arraySize)
+ _throw("Source buffer is not large enough");
+ jpegSize=tjBufSize(width, height, subsamp);
+ if((*env)->GetArrayLength(env, dst)<(jsize)jpegSize)
+ _throw("Destination buffer is not large enough");
+
+ bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
+ bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
+
+ if(tjCompressFromYUV(handle, srcBuf, width, pad, height, subsamp, &jpegBuf,
+ &jpegSize, jpegQual, flags|TJFLAG_NOREALLOC)==-1)
+ {
+ (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
+ (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+ jpegBuf=srcBuf=NULL;
+ _throw(tjGetErrorStr());
+ }
+
+ bailout:
+ if(jpegBuf) (*env)->ReleasePrimitiveArrayCritical(env, dst, jpegBuf, 0);
+ if(srcBuf) (*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
+ return (jint)jpegSize;
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BIII
+ (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch,
+ jint height, jint pf, jbyteArray dst, jint pad, jint subsamp, jint flags)
+{
+ tjhandle handle=0;
+ jsize arraySize=0, yuvSize;
unsigned char *srcBuf=NULL, *dstBuf=NULL;
gethandle();
@@ -226,15 +269,17 @@
arraySize=(pitch==0)? width*tjPixelSize[pf]*height:pitch*height;
if((*env)->GetArrayLength(env, src)<arraySize)
_throw("Source buffer is not large enough");
- if((*env)->GetArrayLength(env, dst)
- <(jsize)tjBufSizeYUV(width, height, subsamp))
+ yuvSize=(jsize)tjBufSizeYUV2(width, pad, height, subsamp);
+ if(yuvSize==(unsigned long)-1)
+ _throw(tjGetErrorStr());
+ if((*env)->GetArrayLength(env, dst)<yuvSize)
_throw("Destination buffer is not large enough");
bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
- if(tjEncodeYUV2(handle, srcBuf, width, pitch, height, pf, dstBuf, subsamp,
- flags)==-1)
+ if(tjEncodeYUV3(handle, srcBuf, width, pitch, height, pf, dstBuf, pad,
+ subsamp, flags)==-1)
{
(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
@@ -248,12 +293,20 @@
return;
}
-JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII
- (JNIEnv *env, jobject obj, jintArray src, jint width, jint stride,
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BII
+ (JNIEnv *env, jobject obj, jbyteArray src, jint width, jint pitch,
jint height, jint pf, jbyteArray dst, jint subsamp, jint flags)
{
+ Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BIII(
+ env, obj, src, width, pitch, height, pf, dst, 4, subsamp, flags);
+}
+
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BIII
+ (JNIEnv *env, jobject obj, jintArray src, jint width, jint stride,
+ jint height, jint pf, jbyteArray dst, jint pad, jint subsamp, jint flags)
+{
tjhandle handle=0;
- jsize arraySize=0;
+ jsize arraySize=0, yuvSize;
unsigned char *srcBuf=NULL, *dstBuf=NULL;
gethandle();
@@ -269,15 +322,17 @@
arraySize=(stride==0)? width*height:stride*height;
if((*env)->GetArrayLength(env, src)<arraySize)
_throw("Source buffer is not large enough");
- if((*env)->GetArrayLength(env, dst)
- <(jsize)tjBufSizeYUV(width, height, subsamp))
+ yuvSize=(jsize)tjBufSizeYUV2(width, pad, height, subsamp);
+ if(yuvSize==(unsigned long)-1)
+ _throw(tjGetErrorStr());
+ if((*env)->GetArrayLength(env, dst)<yuvSize)
_throw("Destination buffer is not large enough");
bailif0(srcBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
- if(tjEncodeYUV2(handle, srcBuf, width, stride*sizeof(jint), height, pf,
- dstBuf, subsamp, flags)==-1)
+ if(tjEncodeYUV3(handle, srcBuf, width, stride*sizeof(jint), height, pf,
+ dstBuf, pad, subsamp, flags)==-1)
{
(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
(*env)->ReleasePrimitiveArrayCritical(env, src, srcBuf, 0);
@@ -291,6 +346,14 @@
return;
}
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BII
+ (JNIEnv *env, jobject obj, jintArray src, jint width, jint pitch,
+ jint height, jint pf, jbyteArray dst, jint subsamp, jint flags)
+{
+ Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BIII(
+ env, obj, src, width, pitch, height, pf, dst, 4, subsamp, flags);
+}
+
JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJCompressor_destroy
(JNIEnv *env, jobject obj)
{
@@ -355,7 +418,7 @@
{
tjhandle handle=0;
unsigned char *jpegBuf=NULL;
- int width=0, height=0, jpegSubsamp=-1;
+ int width=0, height=0, jpegSubsamp=-1, jpegColorspace=-1;
gethandle();
@@ -364,8 +427,8 @@
bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
- if(tjDecompressHeader2(handle, jpegBuf, (unsigned long)jpegSize,
- &width, &height, &jpegSubsamp)==-1)
+ if(tjDecompressHeader3(handle, jpegBuf, (unsigned long)jpegSize,
+ &width, &height, &jpegSubsamp, &jpegColorspace)==-1)
{
(*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
_throw(tjGetErrorStr());
@@ -374,6 +437,8 @@
bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegSubsamp", "I"));
(*env)->SetIntField(env, obj, _fid, jpegSubsamp);
+ bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegColorspace", "I"));
+ (*env)->SetIntField(env, obj, _fid, jpegColorspace);
bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegWidth", "I"));
(*env)->SetIntField(env, obj, _fid, width);
bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I"));
@@ -484,13 +549,14 @@
}
-JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BIIII
(JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst,
- jint flags)
+ jint desiredWidth, jint pad, jint desiredHeight, jint flags)
{
tjhandle handle=0;
unsigned char *jpegBuf=NULL, *dstBuf=NULL;
int jpegSubsamp=-1, jpegWidth=0, jpegHeight=0;
+ jsize yuvSize;
gethandle();
@@ -502,15 +568,18 @@
jpegWidth=(int)(*env)->GetIntField(env, obj, _fid);
bailif0(_fid=(*env)->GetFieldID(env, _cls, "jpegHeight", "I"));
jpegHeight=(int)(*env)->GetIntField(env, obj, _fid);
- if((*env)->GetArrayLength(env, dst)
- <(jsize)tjBufSizeYUV(jpegWidth, jpegHeight, jpegSubsamp))
- _throw("Destination buffer is not large enough");
+ yuvSize=(jsize)tjBufSizeYUV2(desiredWidth==0? jpegWidth:desiredWidth,
+ pad, desiredHeight==0? jpegHeight:desiredHeight, jpegSubsamp);
+ if(yuvSize==(unsigned long)-1)
+ _throw(tjGetErrorStr());
+ if((*env)->GetArrayLength(env, dst)<yuvSize)
+ _throw("Destination buffer is not large enough");
bailif0(jpegBuf=(*env)->GetPrimitiveArrayCritical(env, src, 0));
bailif0(dstBuf=(*env)->GetPrimitiveArrayCritical(env, dst, 0));
- if(tjDecompressToYUV(handle, jpegBuf, (unsigned long)jpegSize, dstBuf,
- flags)==-1)
+ if(tjDecompressToYUV2(handle, jpegBuf, (unsigned long)jpegSize, dstBuf,
+ desiredWidth, pad, desiredHeight, flags)==-1)
{
(*env)->ReleasePrimitiveArrayCritical(env, dst, dstBuf, 0);
(*env)->ReleasePrimitiveArrayCritical(env, src, jpegBuf, 0);
@@ -524,6 +593,14 @@
return;
}
+JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI
+ (JNIEnv *env, jobject obj, jbyteArray src, jint jpegSize, jbyteArray dst,
+ jint flags)
+{
+ Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BIIII(
+ env, obj, src, jpegSize, dst, 0, 4, 0, flags);
+}
+
JNIEXPORT void JNICALL Java_org_libjpegturbo_turbojpeg_TJTransformer_init
(JNIEnv *env, jobject obj)
{
diff --git a/turbojpeg-mapfile b/turbojpeg-mapfile
index bd1ac71..7d174ca 100755
--- a/turbojpeg-mapfile
+++ b/turbojpeg-mapfile
@@ -36,3 +36,14 @@
tjInitTransform;
tjTransform;
} TURBOJPEG_1.1;
+
+TURBOJPEG_1.4
+{
+ global:
+ tjBufSizeYUV2;
+ tjCompressFromYUV;
+ tjDecodeYUV;
+ tjDecompressHeader3;
+ tjDecompressToYUV2;
+ tjEncodeYUV3;
+} TURBOJPEG_1.2;
diff --git a/turbojpeg-mapfile.jni b/turbojpeg-mapfile.jni
index ca39c9e..a1be1fd 100755
--- a/turbojpeg-mapfile.jni
+++ b/turbojpeg-mapfile.jni
@@ -36,7 +36,7 @@
tjInitTransform;
tjTransform;
Java_org_libjpegturbo_turbojpeg_TJ_bufSize;
- Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV;
+ Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__III;
Java_org_libjpegturbo_turbojpeg_TJ_getScalingFactors;
Java_org_libjpegturbo_turbojpeg_TJCompressor_init;
Java_org_libjpegturbo_turbojpeg_TJCompressor_compress___3BIIII_3BIII;
@@ -48,7 +48,7 @@
Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressHeader;
Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIII;
Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIII;
- Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV;
+ Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BI;
Java_org_libjpegturbo_turbojpeg_TJDecompressor_destroy;
Java_org_libjpegturbo_turbojpeg_TJTransformer_init;
Java_org_libjpegturbo_turbojpeg_TJTransformer_transform;
@@ -62,3 +62,19 @@
Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3BIIIIIII;
Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompress___3BI_3IIIIIIII;
} TURBOJPEG_1.2;
+
+TURBOJPEG_1.4
+{
+ global:
+ tjBufSizeYUV2;
+ tjCompressFromYUV;
+ tjDecodeYUV;
+ tjDecompressHeader3;
+ tjDecompressToYUV2;
+ tjEncodeYUV3;
+ Java_org_libjpegturbo_turbojpeg_TJ_bufSizeYUV__IIII;
+ Java_org_libjpegturbo_turbojpeg_TJCompressor_compressFromYUV___3BIIII_3BII;
+ Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3BIIII_3BIII;
+ Java_org_libjpegturbo_turbojpeg_TJCompressor_encodeYUV___3IIIII_3BIII;
+ Java_org_libjpegturbo_turbojpeg_TJDecompressor_decompressToYUV___3BI_3BIIII;
+} TURBOJPEG_1.3;
diff --git a/turbojpeg.c b/turbojpeg.c
index 9117273..5aee2e8 100644
--- a/turbojpeg.c
+++ b/turbojpeg.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C)2009-2012, 2014 D. R. Commander. All Rights Reserved.
+ * Copyright (C)2009-2014 D. R. Commander. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -39,12 +39,14 @@
#include "./turbojpeg.h"
#include "./tjutil.h"
#include "transupp.h"
+#include "./jpegcomp.h"
extern void jpeg_mem_dest_tj(j_compress_ptr, unsigned char **,
unsigned long *, boolean);
extern void jpeg_mem_src_tj(j_decompress_ptr, unsigned char *, unsigned long);
#define PAD(v, p) ((v+(p)-1)&(~((p)-1)))
+#define isPow2(x) (((x)&(x-1))==0)
/* Error handling (based on example in example.c) */
@@ -85,7 +87,7 @@
int init;
} tjinstance;
-static const int pixelsize[TJ_NUMSAMP]={3, 3, 3, 1, 3};
+static const int pixelsize[TJ_NUMSAMP]={3, 3, 3, 1, 3, 3};
static const JXFORM_CODE xformtypes[TJ_NUMXOP]=
{
@@ -185,6 +187,8 @@
cinfo->in_color_space=JCS_RGB; pixelFormat=TJPF_RGB;
break;
#endif
+ case TJPF_CMYK:
+ cinfo->in_color_space=JCS_CMYK; break;
}
cinfo->input_components=tjPixelSize[pixelFormat];
@@ -197,15 +201,20 @@
}
if(subsamp==TJSAMP_GRAY)
jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
- else
- jpeg_set_colorspace(cinfo, JCS_YCbCr);
+ else if(pixelFormat==TJPF_CMYK)
+ jpeg_set_colorspace(cinfo, JCS_YCCK);
+ else jpeg_set_colorspace(cinfo, JCS_YCbCr);
cinfo->comp_info[0].h_samp_factor=tjMCUWidth[subsamp]/8;
cinfo->comp_info[1].h_samp_factor=1;
cinfo->comp_info[2].h_samp_factor=1;
+ if(cinfo->num_components>3)
+ cinfo->comp_info[3].h_samp_factor=tjMCUWidth[subsamp]/8;
cinfo->comp_info[0].v_samp_factor=tjMCUHeight[subsamp]/8;
cinfo->comp_info[1].v_samp_factor=1;
cinfo->comp_info[2].v_samp_factor=1;
+ if(cinfo->num_components>3)
+ cinfo->comp_info[3].v_samp_factor=tjMCUHeight[subsamp]/8;
return retval;
}
@@ -255,6 +264,8 @@
case TJPF_ABGR:
dinfo->out_color_space=JCS_RGB; break;
#endif
+ case TJPF_CMYK:
+ dinfo->out_color_space=JCS_CMYK; break;
default:
_throw("Unsupported pixel format");
}
@@ -271,7 +282,10 @@
int retval=-1, i, k;
for(i=0; i<NUMSUBOPT; i++)
{
- if(dinfo->num_components==pixelsize[i])
+ if(dinfo->num_components==pixelsize[i]
+ || ((dinfo->jpeg_color_space==JCS_YCCK
+ || dinfo->jpeg_color_space==JCS_CMYK)
+ && pixelsize[i]==3 && dinfo->num_components==4))
{
if(dinfo->comp_info[0].h_samp_factor==tjMCUWidth[i]/8
&& dinfo->comp_info[0].v_samp_factor==tjMCUHeight[i]/8)
@@ -279,8 +293,13 @@
int match=0;
for(k=1; k<dinfo->num_components; k++)
{
- if(dinfo->comp_info[k].h_samp_factor==1
- && dinfo->comp_info[k].v_samp_factor==1)
+ int href=1, vref=1;
+ if(dinfo->jpeg_color_space==JCS_YCCK && k==3)
+ {
+ href=tjMCUWidth[i]/8; vref=tjMCUHeight[i]/8;
+ }
+ if(dinfo->comp_info[k].h_samp_factor==href
+ && dinfo->comp_info[k].v_samp_factor==vref)
match++;
}
if(match==dinfo->num_components-1)
@@ -547,22 +566,28 @@
}
-DLLEXPORT unsigned long DLLCALL tjBufSizeYUV(int width, int height,
+DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2(int width, int pad, int height,
int subsamp)
{
unsigned long retval=0;
int pw, ph, cw, ch;
- if(width<1 || height<1 || subsamp<0 || subsamp>=NUMSUBOPT)
- _throw("tjBufSizeYUV(): Invalid argument");
+ if(width<1 || height<1 || pad<1 || !isPow2(pad) || subsamp<0
+ || subsamp>=NUMSUBOPT)
+ _throw("tjBufSizeYUV2(): Invalid argument");
pw=PAD(width, tjMCUWidth[subsamp]/8);
ph=PAD(height, tjMCUHeight[subsamp]/8);
cw=pw*8/tjMCUWidth[subsamp]; ch=ph*8/tjMCUHeight[subsamp];
- retval=PAD(pw, 4)*ph + (subsamp==TJSAMP_GRAY? 0:PAD(cw, 4)*ch*2);
+ retval=PAD(pw, pad)*ph + (subsamp==TJSAMP_GRAY? 0:PAD(cw, pad)*ch*2);
bailout:
return retval;
}
+DLLEXPORT unsigned long DLLCALL tjBufSizeYUV(int width, int height,
+ int subsamp)
+{
+ return tjBufSizeYUV2(width, 4, height, subsamp);
+}
DLLEXPORT unsigned long DLLCALL TJBUFSIZEYUV(int width, int height,
int subsamp)
@@ -669,9 +694,9 @@
}
-DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf,
+DLLEXPORT int DLLCALL tjEncodeYUV3(tjhandle handle, unsigned char *srcBuf,
int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf,
- int subsamp, int flags)
+ int pad, int subsamp, int flags)
{
int i, retval=0; JSAMPROW *row_pointer=NULL;
JSAMPLE *_tmpbuf[MAX_COMPONENTS], *_tmpbuf2[MAX_COMPONENTS];
@@ -694,12 +719,12 @@
}
if((this->init&COMPRESS)==0)
- _throw("tjEncodeYUV2(): Instance has not been initialized for compression");
+ _throw("tjEncodeYUV3(): Instance has not been initialized for compression");
if(srcBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0
- || pixelFormat>=TJ_NUMPF || dstBuf==NULL || subsamp<0
- || subsamp>=NUMSUBOPT)
- _throw("tjEncodeYUV2(): Invalid argument");
+ || pixelFormat>=TJ_NUMPF || dstBuf==NULL || pad<0 || !isPow2(pad)
+ || subsamp<0 || subsamp>=NUMSUBOPT)
+ _throw("tjEncodeYUV3(): Invalid argument");
if(setjmp(this->jerr.setjmp_buffer))
{
@@ -708,13 +733,16 @@
goto bailout;
}
+ if(pixelFormat==TJPF_CMYK)
+ _throw("tjEncodeYUV3(): Cannot generate YUV images from CMYK pixels");
+
if(pitch==0) pitch=width*tjPixelSize[pixelFormat];
#ifndef JCS_EXTENSIONS
if(pixelFormat!=TJPF_GRAY)
{
rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE);
- if(!rgbBuf) _throw("tjEncodeYUV2(): Memory allocation failure");
+ if(!rgbBuf) _throw("tjEncodeYUV3(): Memory allocation failure");
srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf);
pitch=width*RGB_PIXELSIZE;
}
@@ -727,7 +755,7 @@
else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
- yuvsize=tjBufSizeYUV(width, height, subsamp);
+ yuvsize=tjBufSizeYUV2(width, pad, height, subsamp);
if(setCompDefaults(cinfo, pixelFormat, subsamp, -1, flags)==-1) return -1;
/* Execute only the parts of jpeg_start_compress() that we need. If we
@@ -746,7 +774,7 @@
ph=PAD(height, cinfo->max_v_samp_factor);
if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph))==NULL)
- _throw("tjEncodeYUV2(): Memory allocation failure");
+ _throw("tjEncodeYUV3(): Memory allocation failure");
for(i=0; i<height; i++)
{
if(flags&TJFLAG_BOTTOMUP) row_pointer[i]=&srcBuf[(height-i-1)*pitch];
@@ -761,9 +789,9 @@
_tmpbuf[i]=(JSAMPLE *)malloc(
PAD((compptr->width_in_blocks*cinfo->max_h_samp_factor*DCTSIZE)
/compptr->h_samp_factor, 16) * cinfo->max_v_samp_factor + 16);
- if(!_tmpbuf[i]) _throw("tjEncodeYUV2(): Memory allocation failure");
+ if(!_tmpbuf[i]) _throw("tjEncodeYUV3(): Memory allocation failure");
tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*cinfo->max_v_samp_factor);
- if(!tmpbuf[i]) _throw("tjEncodeYUV2(): Memory allocation failure");
+ if(!tmpbuf[i]) _throw("tjEncodeYUV3(): Memory allocation failure");
for(row=0; row<cinfo->max_v_samp_factor; row++)
{
unsigned char *_tmpbuf_aligned=
@@ -774,9 +802,9 @@
}
_tmpbuf2[i]=(JSAMPLE *)malloc(PAD(compptr->width_in_blocks*DCTSIZE, 16)
* compptr->v_samp_factor + 16);
- if(!_tmpbuf2[i]) _throw("tjEncodeYUV2(): Memory allocation failure");
+ if(!_tmpbuf2[i]) _throw("tjEncodeYUV3(): Memory allocation failure");
tmpbuf2[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*compptr->v_samp_factor);
- if(!tmpbuf2[i]) _throw("tjEncodeYUV2(): Memory allocation failure");
+ if(!tmpbuf2[i]) _throw("tjEncodeYUV3(): Memory allocation failure");
for(row=0; row<compptr->v_samp_factor; row++)
{
unsigned char *_tmpbuf2_aligned=
@@ -787,15 +815,15 @@
cw[i]=pw*compptr->h_samp_factor/cinfo->max_h_samp_factor;
ch[i]=ph*compptr->v_samp_factor/cinfo->max_v_samp_factor;
outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ch[i]);
- if(!outbuf[i]) _throw("tjEncodeYUV2(): Memory allocation failure");
+ if(!outbuf[i]) _throw("tjEncodeYUV3(): Memory allocation failure");
for(row=0; row<ch[i]; row++)
{
outbuf[i][row]=ptr;
- ptr+=PAD(cw[i], 4);
+ ptr+=PAD(cw[i], pad);
}
}
if(yuvsize!=(unsigned long)(ptr-dstBuf))
- _throw("tjEncodeYUV2(): Generated image is not the correct size");
+ _throw("tjEncodeYUV3(): Generated image is not the correct size");
for(row=0; row<ph; row+=cinfo->max_v_samp_factor)
{
@@ -827,6 +855,14 @@
return retval;
}
+DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf,
+ int width, int pitch, int height, int pixelFormat, unsigned char *dstBuf,
+ int subsamp, int flags)
+{
+ return tjEncodeYUV3(handle, srcBuf, width, pitch, height, pixelFormat,
+ dstBuf, 4, subsamp, flags);
+}
+
DLLEXPORT int DLLCALL tjEncodeYUV(tjhandle handle, unsigned char *srcBuf,
int width, int pitch, int height, int pixelSize, unsigned char *dstBuf,
int subsamp, int flags)
@@ -836,6 +872,134 @@
}
+DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle, unsigned char *srcBuf,
+ int width, int pad, int height, int subsamp, unsigned char **jpegBuf,
+ unsigned long *jpegSize, int jpegQual, int flags)
+{
+ int i, row, retval=0, alloc=1; JSAMPROW *inbuf[MAX_COMPONENTS];
+ int cw[MAX_COMPONENTS], ch[MAX_COMPONENTS], iw[MAX_COMPONENTS],
+ tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS];
+ JSAMPLE *_tmpbuf=NULL, *ptr=srcBuf; JSAMPROW *tmpbuf[MAX_COMPONENTS];
+
+ getinstance(handle)
+
+ for(i=0; i<MAX_COMPONENTS; i++)
+ {
+ tmpbuf[i]=NULL; inbuf[i]=NULL;
+ }
+
+ if((this->init&COMPRESS)==0)
+ _throw("tjCompressFromYUV(): Instance has not been initialized for compression");
+
+ if(srcBuf==NULL || width<=0 || pad<1 || height<=0 || subsamp<0
+ || subsamp>=NUMSUBOPT || jpegBuf==NULL || jpegSize==NULL || jpegQual<0
+ || jpegQual>100)
+ _throw("tjCompressFromYUV(): Invalid argument");
+
+ if(setjmp(this->jerr.setjmp_buffer))
+ {
+ /* If we get here, the JPEG code has signaled an error. */
+ retval=-1;
+ goto bailout;
+ }
+
+ cinfo->image_width=width;
+ cinfo->image_height=height;
+
+ if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
+ else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
+ else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+
+ if(flags&TJFLAG_NOREALLOC)
+ {
+ alloc=0; *jpegSize=tjBufSize(width, height, subsamp);
+ }
+ jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
+ if(setCompDefaults(cinfo, TJPF_RGB, subsamp, jpegQual, flags)==-1)
+ return -1;
+ cinfo->raw_data_in=TRUE;
+
+ jpeg_start_compress(cinfo, TRUE);
+ for(i=0; i<cinfo->num_components; i++)
+ {
+ jpeg_component_info *compptr=&cinfo->comp_info[i];
+ int ih;
+ iw[i]=compptr->width_in_blocks*DCTSIZE;
+ ih=compptr->height_in_blocks*DCTSIZE;
+ cw[i]=PAD(cinfo->image_width, cinfo->max_h_samp_factor)
+ *compptr->h_samp_factor/cinfo->max_h_samp_factor;
+ ch[i]=PAD(cinfo->image_height, cinfo->max_v_samp_factor)
+ *compptr->v_samp_factor/cinfo->max_v_samp_factor;
+ if(iw[i]!=cw[i] || ih!=ch[i]) usetmpbuf=1;
+ th[i]=compptr->v_samp_factor*DCTSIZE;
+ tmpbufsize+=iw[i]*th[i];
+ if((inbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ch[i]))==NULL)
+ _throw("tjCompressFromYUV(): Memory allocation failure");
+ for(row=0; row<ch[i]; row++)
+ {
+ inbuf[i][row]=ptr;
+ ptr+=PAD(cw[i], pad);
+ }
+ }
+ if(usetmpbuf)
+ {
+ if((_tmpbuf=(JSAMPLE *)malloc(sizeof(JSAMPLE)*tmpbufsize))==NULL)
+ _throw("tjCompressFromYUV(): Memory allocation failure");
+ ptr=_tmpbuf;
+ for(i=0; i<cinfo->num_components; i++)
+ {
+ if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL)
+ _throw("tjCompressFromYUV(): Memory allocation failure");
+ for(row=0; row<th[i]; row++)
+ {
+ tmpbuf[i][row]=ptr;
+ ptr+=iw[i];
+ }
+ }
+ }
+
+ for(row=0; row<(int)cinfo->image_height;
+ row+=cinfo->max_v_samp_factor*DCTSIZE)
+ {
+ JSAMPARRAY yuvptr[MAX_COMPONENTS];
+ int crow[MAX_COMPONENTS];
+ for(i=0; i<cinfo->num_components; i++)
+ {
+ jpeg_component_info *compptr=&cinfo->comp_info[i];
+ crow[i]=row*compptr->v_samp_factor/cinfo->max_v_samp_factor;
+ if(usetmpbuf)
+ {
+ int j, k;
+ for(j=0; j<min(th[i], ch[i]-crow[i]); j++)
+ {
+ memcpy(tmpbuf[i][j], inbuf[i][crow[i]+j], cw[i]);
+ /* Duplicate last sample in row to fill out MCU */
+ for(k=cw[i]; k<iw[i]; k++) tmpbuf[i][j][k]=tmpbuf[i][j][cw[i]-1];
+ }
+ /* Duplicate last row to fill out MCU */
+ for(j=ch[i]-crow[i]; j<th[i]; j++)
+ memcpy(tmpbuf[i][j], tmpbuf[i][ch[i]-crow[i]-1], iw[i]);
+ yuvptr[i]=tmpbuf[i];
+ }
+ else
+ yuvptr[i]=&inbuf[i][crow[i]];
+ }
+ jpeg_write_raw_data(cinfo, yuvptr, cinfo->max_v_samp_factor*DCTSIZE);
+ }
+ jpeg_finish_compress(cinfo);
+
+ bailout:
+ if(cinfo->global_state>CSTATE_START) jpeg_abort_compress(cinfo);
+ for(i=0; i<MAX_COMPONENTS; i++)
+ {
+ if(tmpbuf[i]) free(tmpbuf[i]);
+ if(inbuf[i]) free(inbuf[i]);
+ }
+ if(_tmpbuf) free(_tmpbuf);
+ return retval;
+}
+
+
/* Decompressor */
static tjhandle _tjInitDecompress(tjinstance *this)
@@ -875,19 +1039,19 @@
}
-DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle,
+DLLEXPORT int DLLCALL tjDecompressHeader3(tjhandle handle,
unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height,
- int *jpegSubsamp)
+ int *jpegSubsamp, int *jpegColorspace)
{
int retval=0;
getinstance(handle);
if((this->init&DECOMPRESS)==0)
- _throw("tjDecompressHeader2(): Instance has not been initialized for decompression");
+ _throw("tjDecompressHeader3(): Instance has not been initialized for decompression");
if(jpegBuf==NULL || jpegSize<=0 || width==NULL || height==NULL
- || jpegSubsamp==NULL)
- _throw("tjDecompressHeader2(): Invalid argument");
+ || jpegSubsamp==NULL || jpegColorspace==NULL)
+ _throw("tjDecompressHeader3(): Invalid argument");
if(setjmp(this->jerr.setjmp_buffer))
{
@@ -901,18 +1065,38 @@
*width=dinfo->image_width;
*height=dinfo->image_height;
*jpegSubsamp=getSubsamp(dinfo);
+ switch(dinfo->jpeg_color_space)
+ {
+ case JCS_GRAYSCALE: *jpegColorspace=TJCS_GRAY; break;
+ case JCS_RGB: *jpegColorspace=TJCS_RGB; break;
+ case JCS_YCbCr: *jpegColorspace=TJCS_YCbCr; break;
+ case JCS_CMYK: *jpegColorspace=TJCS_CMYK; break;
+ case JCS_YCCK: *jpegColorspace=TJCS_YCCK; break;
+ default: *jpegColorspace=-1; break;
+ }
jpeg_abort_decompress(dinfo);
if(*jpegSubsamp<0)
- _throw("tjDecompressHeader2(): Could not determine subsampling type for JPEG image");
+ _throw("tjDecompressHeader3(): Could not determine subsampling type for JPEG image");
+ if(*jpegColorspace<0)
+ _throw("tjDecompressHeader3(): Could not determine colorspace of JPEG image");
if(*width<1 || *height<1)
- _throw("tjDecompressHeader2(): Invalid data returned in header");
+ _throw("tjDecompressHeader3(): Invalid data returned in header");
bailout:
return retval;
}
+DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle,
+ unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height,
+ int *jpegSubsamp)
+{
+ int jpegColorspace;
+ return tjDecompressHeader3(handle, jpegBuf, jpegSize, width, height,
+ jpegSubsamp, &jpegColorspace);
+}
+
DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle handle,
unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height)
{
@@ -1049,14 +1233,224 @@
}
-DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle,
- unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
- int flags)
+static int setDecodeDefaults(struct jpeg_decompress_struct *dinfo,
+ int pixelFormat, int subsamp, int flags)
{
- int i, row, retval=0; JSAMPROW *outbuf[MAX_COMPONENTS];
+ int i;
+
+ dinfo->scale_num=dinfo->scale_denom=1;
+
+ if(subsamp==TJSAMP_GRAY)
+ {
+ dinfo->num_components=dinfo->comps_in_scan=1;
+ dinfo->jpeg_color_space=JCS_GRAYSCALE;
+ }
+ else
+ {
+ dinfo->num_components=dinfo->comps_in_scan=3;
+ dinfo->jpeg_color_space=JCS_YCbCr;
+ }
+
+ dinfo->comp_info=(jpeg_component_info *)
+ (*dinfo->mem->alloc_small)((j_common_ptr)dinfo, JPOOL_IMAGE,
+ dinfo->num_components*SIZEOF(jpeg_component_info));
+
+ for(i=0; i<dinfo->num_components; i++)
+ {
+ jpeg_component_info *compptr=&dinfo->comp_info[i];
+ compptr->h_samp_factor=(i==0)? tjMCUWidth[subsamp]/8:1;
+ compptr->v_samp_factor=(i==0)? tjMCUHeight[subsamp]/8:1;
+ compptr->component_index=i;
+ compptr->component_id=i+1;
+ compptr->quant_tbl_no=compptr->dc_tbl_no=compptr->ac_tbl_no=
+ (i==0)? 0:1;
+ dinfo->cur_comp_info[i]=compptr;
+ }
+ dinfo->data_precision=8;
+ for(i=0; i<2; i++)
+ {
+ if(dinfo->quant_tbl_ptrs[i]==NULL)
+ dinfo->quant_tbl_ptrs[i]=jpeg_alloc_quant_table((j_common_ptr)dinfo);
+ }
+
+ return 0;
+}
+
+
+int my_read_markers(j_decompress_ptr dinfo)
+{
+ return JPEG_REACHED_SOS;
+}
+
+void my_reset_marker_reader(j_decompress_ptr dinfo)
+{
+}
+
+DLLEXPORT int DLLCALL tjDecodeYUV(tjhandle handle, unsigned char *srcBuf,
+ int pad, int subsamp, unsigned char *dstBuf, int width, int pitch,
+ int height, int pixelFormat, int flags)
+{
+ int i, retval=0; JSAMPROW *row_pointer=NULL;
+ JSAMPLE *_tmpbuf[MAX_COMPONENTS];
+ JSAMPROW *tmpbuf[MAX_COMPONENTS], *inbuf[MAX_COMPONENTS];
+ int row, pw, ph, cw[MAX_COMPONENTS], ch[MAX_COMPONENTS];
+ JSAMPLE *ptr=srcBuf;
+ unsigned long yuvsize=0;
+ jpeg_component_info *compptr;
+ #ifndef JCS_EXTENSIONS
+ unsigned char *rgbBuf=NULL;
+ #endif
+ JMETHOD(int, old_read_markers, (j_decompress_ptr));
+ JMETHOD(void, old_reset_marker_reader, (j_decompress_ptr));
+
+ getinstance(handle);
+
+ for(i=0; i<MAX_COMPONENTS; i++)
+ {
+ tmpbuf[i]=NULL; _tmpbuf[i]=NULL; inbuf[i]=NULL;
+ }
+
+ if((this->init&DECOMPRESS)==0)
+ _throw("tjDecodeYUV(): Instance has not been initialized for compression");
+
+ if(srcBuf==NULL || pad<0 || !isPow2(pad) || subsamp<0 || subsamp>=NUMSUBOPT
+ || dstBuf==NULL || width<=0 || pitch<0 || height<=0 || pixelFormat<0
+ || pixelFormat>=TJ_NUMPF)
+ _throw("tjDecodeYUV(): Invalid argument");
+
+ if(setjmp(this->jerr.setjmp_buffer))
+ {
+ /* If we get here, the JPEG code has signaled an error. */
+ retval=-1;
+ goto bailout;
+ }
+
+ if(pixelFormat==TJPF_CMYK)
+ _throw("tjDecodeYUV(): Cannot decode YUV images into CMYK pixels.");
+
+ if(pitch==0) pitch=width*tjPixelSize[pixelFormat];
+
+ #ifndef JCS_EXTENSIONS
+ if(pixelFormat!=TJPF_GRAY)
+ {
+ rgbBuf=(unsigned char *)malloc(width*height*RGB_PIXELSIZE);
+ if(!rgbBuf) _throw("tjDecodeYUV(): Memory allocation failure");
+ srcBuf=toRGB(srcBuf, width, pitch, height, pixelFormat, rgbBuf);
+ pitch=width*RGB_PIXELSIZE;
+ }
+ #endif
+
+ dinfo->image_width=width;
+ dinfo->image_height=height;
+
+ if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
+ else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
+ else if(flags&TJFLAG_FORCESSE2) putenv("JSIMD_FORCESSE2=1");
+
+ yuvsize=tjBufSizeYUV2(width, pad, height, subsamp);
+ if(setDecodeDefaults(dinfo, pixelFormat, subsamp, flags)==-1)
+ {
+ retval=-1; goto bailout;
+ }
+ old_read_markers=dinfo->marker->read_markers;
+ dinfo->marker->read_markers=my_read_markers;
+ old_reset_marker_reader=dinfo->marker->reset_marker_reader;
+ dinfo->marker->reset_marker_reader=my_reset_marker_reader;
+ jpeg_read_header(dinfo, TRUE);
+ dinfo->marker->read_markers=old_read_markers;
+ dinfo->marker->reset_marker_reader=old_reset_marker_reader;
+
+ if(setDecompDefaults(dinfo, pixelFormat, flags)==-1)
+ {
+ retval=-1; goto bailout;
+ }
+ dinfo->do_fancy_upsampling=FALSE;
+ jinit_master_decompress(dinfo);
+ (*dinfo->upsample->start_pass)(dinfo);
+
+ pw=PAD(width, dinfo->max_h_samp_factor);
+ ph=PAD(height, dinfo->max_v_samp_factor);
+
+ if(pitch==0) pitch=dinfo->output_width*tjPixelSize[pixelFormat];
+
+ if((row_pointer=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ph))==NULL)
+ _throw("tjDecodeYUV(): Memory allocation failure");
+ for(i=0; i<height; i++)
+ {
+ if(flags&TJFLAG_BOTTOMUP) row_pointer[i]=&dstBuf[(height-i-1)*pitch];
+ else row_pointer[i]=&dstBuf[i*pitch];
+ }
+ if(height<ph)
+ for(i=height; i<ph; i++) row_pointer[i]=row_pointer[height-1];
+
+ for(i=0; i<dinfo->num_components; i++)
+ {
+ compptr=&dinfo->comp_info[i];
+ _tmpbuf[i]=(JSAMPLE *)malloc(PAD(compptr->width_in_blocks*DCTSIZE, 16)
+ * compptr->v_samp_factor + 16);
+ if(!_tmpbuf[i]) _throw("tjDecodeYUV(): Memory allocation failure");
+ tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*compptr->v_samp_factor);
+ if(!tmpbuf[i]) _throw("tjDecodeYUV(): Memory allocation failure");
+ for(row=0; row<compptr->v_samp_factor; row++)
+ {
+ unsigned char *_tmpbuf_aligned=
+ (unsigned char *)PAD((size_t)_tmpbuf[i], 16);
+ tmpbuf[i][row]=&_tmpbuf_aligned[
+ PAD(compptr->width_in_blocks*DCTSIZE, 16) * row];
+ }
+ cw[i]=pw*compptr->h_samp_factor/dinfo->max_h_samp_factor;
+ ch[i]=ph*compptr->v_samp_factor/dinfo->max_v_samp_factor;
+ inbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ch[i]);
+ if(!inbuf[i]) _throw("tjDecodeYUV(): Memory allocation failure");
+ for(row=0; row<ch[i]; row++)
+ {
+ inbuf[i][row]=ptr;
+ ptr+=PAD(cw[i], pad);
+ }
+ }
+
+ if(yuvsize!=(unsigned long)(ptr-srcBuf))
+ _throw("tjDecodeYUV(): YUV image is not the correct size");
+
+ for(row=0; row<ph; row+=dinfo->max_v_samp_factor)
+ {
+ JDIMENSION inrow=0, outrow=0;
+ for(i=0, compptr=dinfo->comp_info; i<dinfo->num_components; i++, compptr++)
+ jcopy_sample_rows(inbuf[i],
+ row*compptr->v_samp_factor/dinfo->max_v_samp_factor, tmpbuf[i], 0,
+ compptr->v_samp_factor, cw[i]);
+ (dinfo->upsample->upsample)(dinfo, tmpbuf, &inrow,
+ dinfo->max_v_samp_factor, &row_pointer[row], &outrow,
+ dinfo->max_v_samp_factor);
+ }
+ jpeg_abort_decompress(dinfo);
+
+ bailout:
+ if(dinfo->global_state>DSTATE_START) jpeg_abort_decompress(dinfo);
+ #ifndef JCS_EXTENSIONS
+ if(rgbBuf) free(rgbBuf);
+ #endif
+ if(row_pointer) free(row_pointer);
+ for(i=0; i<MAX_COMPONENTS; i++)
+ {
+ if(tmpbuf[i]!=NULL) free(tmpbuf[i]);
+ if(_tmpbuf[i]!=NULL) free(_tmpbuf[i]);
+ if(inbuf[i]!=NULL) free(inbuf[i]);
+ }
+ return retval;
+}
+
+
+DLLEXPORT int DLLCALL tjDecompressToYUV2(tjhandle handle,
+ unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
+ int width, int pad, int height, int flags)
+{
+ int i, sfi, row, retval=0; JSAMPROW *outbuf[MAX_COMPONENTS];
+ int jpegwidth, jpegheight, jpegSubsamp, scaledw, scaledh;
int cw[MAX_COMPONENTS], ch[MAX_COMPONENTS], iw[MAX_COMPONENTS],
tmpbufsize=0, usetmpbuf=0, th[MAX_COMPONENTS];
JSAMPLE *_tmpbuf=NULL, *ptr=dstBuf; JSAMPROW *tmpbuf[MAX_COMPONENTS];
+ int dctsize;
getinstance(handle);
@@ -1066,10 +1460,11 @@
}
if((this->init&DECOMPRESS)==0)
- _throw("tjDecompressToYUV(): Instance has not been initialized for decompression");
+ _throw("tjDecompressToYUV2(): Instance has not been initialized for decompression");
- if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL)
- _throw("tjDecompressToYUV(): Invalid argument");
+ if(jpegBuf==NULL || jpegSize<=0 || dstBuf==NULL || width<0 || pad<1
+ || !isPow2(pad) || height<0)
+ _throw("tjDecompressToYUV2(): Invalid argument");
if(flags&TJFLAG_FORCEMMX) putenv("JSIMD_FORCEMMX=1");
else if(flags&TJFLAG_FORCESSE) putenv("JSIMD_FORCESSE=1");
@@ -1084,37 +1479,63 @@
jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
jpeg_read_header(dinfo, TRUE);
+ jpegSubsamp=getSubsamp(dinfo);
+ if(jpegSubsamp<0)
+ _throw("tjDecompressToYUV2(): Could not determine subsampling type for JPEG image");
+
+ jpegwidth=dinfo->image_width; jpegheight=dinfo->image_height;
+ if(width==0) width=jpegwidth;
+ if(height==0) height=jpegheight;
+ for(i=0; i<NUMSF; i++)
+ {
+ scaledw=TJSCALED(jpegwidth, sf[i]);
+ scaledh=TJSCALED(jpegheight, sf[i]);
+ if(scaledw<=width && scaledh<=height)
+ break;
+ }
+ if(scaledw>width || scaledh>height)
+ _throw("tjDecompressToYUV2(): Could not scale down to desired image dimensions");
+ if(dinfo->num_components>3)
+ _throw("tjDecompressToYUV2(): JPEG image must have 3 or fewer components");
+
+ width=scaledw; height=scaledh;
+ dinfo->scale_num=sf[i].num;
+ dinfo->scale_denom=sf[i].denom;
+ sfi=i;
+ jpeg_calc_output_dimensions(dinfo);
+
+ dctsize=DCTSIZE*sf[sfi].num/sf[sfi].denom;
for(i=0; i<dinfo->num_components; i++)
{
jpeg_component_info *compptr=&dinfo->comp_info[i];
int ih;
- iw[i]=compptr->width_in_blocks*DCTSIZE;
- ih=compptr->height_in_blocks*DCTSIZE;
- cw[i]=PAD(dinfo->image_width, dinfo->max_h_samp_factor)
+ iw[i]=compptr->width_in_blocks*dctsize;
+ ih=compptr->height_in_blocks*dctsize;
+ cw[i]=PAD(dinfo->output_width, dinfo->max_h_samp_factor)
*compptr->h_samp_factor/dinfo->max_h_samp_factor;
- ch[i]=PAD(dinfo->image_height, dinfo->max_v_samp_factor)
+ ch[i]=PAD(dinfo->output_height, dinfo->max_v_samp_factor)
*compptr->v_samp_factor/dinfo->max_v_samp_factor;
if(iw[i]!=cw[i] || ih!=ch[i]) usetmpbuf=1;
- th[i]=compptr->v_samp_factor*DCTSIZE;
+ th[i]=compptr->v_samp_factor*dctsize;
tmpbufsize+=iw[i]*th[i];
if((outbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*ch[i]))==NULL)
- _throw("tjDecompressToYUV(): Memory allocation failure");
+ _throw("tjDecompressToYUV2(): Memory allocation failure");
for(row=0; row<ch[i]; row++)
{
outbuf[i][row]=ptr;
- ptr+=PAD(cw[i], 4);
+ ptr+=PAD(cw[i], pad);
}
}
if(usetmpbuf)
{
if((_tmpbuf=(JSAMPLE *)malloc(sizeof(JSAMPLE)*tmpbufsize))==NULL)
- _throw("tjDecompressToYUV(): Memory allocation failure");
+ _throw("tjDecompressToYUV2(): Memory allocation failure");
ptr=_tmpbuf;
for(i=0; i<dinfo->num_components; i++)
{
if((tmpbuf[i]=(JSAMPROW *)malloc(sizeof(JSAMPROW)*th[i]))==NULL)
- _throw("tjDecompressToYUV(): Memory allocation failure");
+ _throw("tjDecompressToYUV2(): Memory allocation failure");
for(row=0; row<th[i]; row++)
{
tmpbuf[i][row]=ptr;
@@ -1129,18 +1550,37 @@
jpeg_start_decompress(dinfo);
for(row=0; row<(int)dinfo->output_height;
- row+=dinfo->max_v_samp_factor*DCTSIZE)
+ row+=dinfo->max_v_samp_factor*dinfo->_min_DCT_scaled_size)
{
JSAMPARRAY yuvptr[MAX_COMPONENTS];
int crow[MAX_COMPONENTS];
for(i=0; i<dinfo->num_components; i++)
{
jpeg_component_info *compptr=&dinfo->comp_info[i];
+ if(jpegSubsamp==TJ_420)
+ {
+ /* When 4:2:0 subsampling is used with IDCT scaling, libjpeg will try
+ to be clever and use the IDCT to perform upsampling on the U and V
+ planes. For instance, if the output image is to be scaled by 1/2
+ relative to the JPEG image, then the scaling factor and upsampling
+ effectively cancel each other, so a normal 8x8 IDCT can be used.
+ However, this is not desirable when using the decompress-to-YUV
+ functionality in TurboJPEG, since we want to output the U and V
+ planes in their subsampled form. Thus, we have to override some
+ internal libjpeg parameters to force it to use the "scaled" IDCT
+ functions on the U and V planes. */
+ compptr->_DCT_scaled_size=dctsize;
+ compptr->MCU_sample_width=tjMCUWidth[jpegSubsamp]*
+ sf[sfi].num/sf[sfi].denom*
+ compptr->v_samp_factor/dinfo->max_v_samp_factor;
+ dinfo->idct->inverse_DCT[i] = dinfo->idct->inverse_DCT[0];
+ }
crow[i]=row*compptr->v_samp_factor/dinfo->max_v_samp_factor;
if(usetmpbuf) yuvptr[i]=tmpbuf[i];
else yuvptr[i]=&outbuf[i][crow[i]];
}
- jpeg_read_raw_data(dinfo, yuvptr, dinfo->max_v_samp_factor*DCTSIZE);
+ jpeg_read_raw_data(dinfo, yuvptr,
+ dinfo->max_v_samp_factor*dinfo->_min_DCT_scaled_size);
if(usetmpbuf)
{
int j;
@@ -1166,6 +1606,13 @@
return retval;
}
+DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle,
+ unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
+ int flags)
+{
+ return tjDecompressToYUV2(handle, jpegBuf, jpegSize, dstBuf, 0, 4, 0, flags);
+}
+
/* Transformer */
diff --git a/turbojpeg.h b/turbojpeg.h
index c778556..a082fa3 100644
--- a/turbojpeg.h
+++ b/turbojpeg.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C)2009-2013 D. R. Commander. All Rights Reserved.
+ * Copyright (C)2009-2014 D. R. Commander. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -49,16 +49,16 @@
/**
* The number of chrominance subsampling options
*/
-#define TJ_NUMSAMP 5
+#define TJ_NUMSAMP 6
/**
* Chrominance subsampling options.
- * When an image is converted from the RGB to the YCbCr colorspace as part of
- * the JPEG compression process, some of the Cb and Cr (chrominance) components
- * can be discarded or averaged together to produce a smaller image with little
- * perceptible loss of image clarity (the human eye is more sensitive to small
- * changes in brightness than small changes in color.) This is called
- * "chrominance subsampling".
+ * When pixels are converted from RGB to YCbCr (see #TJCS_YCbCr) or from CMYK
+ * to YCCK (see #TJCS_YCCK) as part of the JPEG compression process, some of
+ * the Cb and Cr (chrominance) components can be discarded or averaged together
+ * to produce a smaller image with little perceptible loss of image clarity
+ * (the human eye is more sensitive to small changes in brightness than to
+ * small changes in color.) This is called "chrominance subsampling".
* <p>
* NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
* convention of the digital video community, the TurboJPEG API uses "YUV" to
@@ -91,7 +91,18 @@
* chrominance component for every 1x2 block of pixels in the source image.
* Note that 4:4:0 subsampling is not fully accelerated in libjpeg-turbo.
*/
- TJSAMP_440
+ TJSAMP_440,
+ /**
+ * 4:1:1 chrominance subsampling. The JPEG or YUV image will contain one
+ * chrominance component for every 4x1 block of pixels in the source image.
+ * JPEG images compressed with 4:1:1 subsampling will be almost exactly the
+ * same size as those compressed with 4:2:0 subsampling, and in the
+ * aggregate, both subsampling methods produce approximately the same
+ * perceptual quality. However, 4:1:1 is better able to reproduce sharp
+ * horizontal features. Note that 4:1:1 subsampling is not fully accelerated
+ * in libjpeg-turbo.
+ */
+ TJSAMP_411
};
/**
@@ -100,9 +111,10 @@
* - 8x8 for no subsampling or grayscale
* - 16x8 for 4:2:2
* - 8x16 for 4:4:0
- * - 16x16 for 4:2:0
+ * - 16x16 for 4:2:0
+ * - 32x8 for 4:1:1
*/
-static const int tjMCUWidth[TJ_NUMSAMP] = {8, 16, 16, 8, 8};
+static const int tjMCUWidth[TJ_NUMSAMP] = {8, 16, 16, 8, 8, 32};
/**
* MCU block height (in pixels) for a given level of chrominance subsampling.
@@ -110,15 +122,16 @@
* - 8x8 for no subsampling or grayscale
* - 16x8 for 4:2:2
* - 8x16 for 4:4:0
- * - 16x16 for 4:2:0
+ * - 16x16 for 4:2:0
+ * - 32x8 for 4:1:1
*/
-static const int tjMCUHeight[TJ_NUMSAMP] = {8, 8, 16, 8, 16};
+static const int tjMCUHeight[TJ_NUMSAMP] = {8, 8, 16, 8, 16, 8};
/**
* The number of pixel formats
*/
-#define TJ_NUMPF 11
+#define TJ_NUMPF 12
/**
* Pixel formats
@@ -193,16 +206,33 @@
* decompressing, the X component is guaranteed to be 0xFF, which can be
* interpreted as an opaque alpha channel.
*/
- TJPF_ARGB
+ TJPF_ARGB,
+ /**
+ * CMYK pixel format. Unlike RGB, which is an additive color model used
+ * primarily for display, CMYK (Cyan/Magenta/Yellow/Key) is a subtractive
+ * color model used primarily for printing. In the CMYK color model, the
+ * value of each color component typically corresponds to an amount of cyan,
+ * magenta, yellow, or black ink that is applied to a white background. In
+ * order to convert between CMYK and RGB, it is necessary to use a color
+ * management system (CMS.) A CMS will attempt to map colors within the
+ * printer's gamut to perceptually similar colors in the display's gamut and
+ * vice versa, but the mapping is typically not 1:1 or reversible, nor can it
+ * be defined with a simple formula. Thus, such a conversion is out of scope
+ * for a codec library. However, the TurboJPEG API allows for compressing
+ * CMYK pixels into a YCCK JPEG image (see #TJCS_YCCK) and decompressing YCCK
+ * JPEG images into CMYK pixels.
+ */
+ TJPF_CMYK
};
+
/**
* Red offset (in bytes) for a given pixel format. This specifies the number
* of bytes that the red component is offset from the start of the pixel. For
* instance, if a pixel of format TJ_BGRX is stored in <tt>char pixel[]</tt>,
* then the red component will be <tt>pixel[tjRedOffset[TJ_BGRX]]</tt>.
*/
-static const int tjRedOffset[TJ_NUMPF] = {0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1};
+static const int tjRedOffset[TJ_NUMPF] = {0, 2, 0, 2, 3, 1, 0, 0, 2, 3, 1, -1};
/**
* Green offset (in bytes) for a given pixel format. This specifies the number
* of bytes that the green component is offset from the start of the pixel.
@@ -210,19 +240,81 @@
* <tt>char pixel[]</tt>, then the green component will be
* <tt>pixel[tjGreenOffset[TJ_BGRX]]</tt>.
*/
-static const int tjGreenOffset[TJ_NUMPF] = {1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2};
+static const int tjGreenOffset[TJ_NUMPF] = {1, 1, 1, 1, 2, 2, 0, 1, 1, 2, 2, -1};
/**
* Blue offset (in bytes) for a given pixel format. This specifies the number
* of bytes that the Blue component is offset from the start of the pixel. For
* instance, if a pixel of format TJ_BGRX is stored in <tt>char pixel[]</tt>,
* then the blue component will be <tt>pixel[tjBlueOffset[TJ_BGRX]]</tt>.
*/
-static const int tjBlueOffset[TJ_NUMPF] = {2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3};
+static const int tjBlueOffset[TJ_NUMPF] = {2, 0, 2, 0, 1, 3, 0, 2, 0, 1, 3, -1};
/**
* Pixel size (in bytes) for a given pixel format.
*/
-static const int tjPixelSize[TJ_NUMPF] = {3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4};
+static const int tjPixelSize[TJ_NUMPF] = {3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4};
+
+
+/**
+ * The number of JPEG colorspaces
+ */
+#define TJ_NUMCS 5
+
+/**
+ * JPEG colorspaces
+ */
+enum TJCS
+{
+ /**
+ * RGB colorspace. When compressing the JPEG image, the R, G, and B
+ * components in the source image are reordered into image planes, but no
+ * colorspace conversion or subsampling is performed. RGB JPEG images can be
+ * decompressed to any of the extended RGB pixel formats or grayscale, but
+ * they cannot be decompressed to YUV images.
+ */
+ TJCS_RGB=0,
+ /**
+ * YCbCr colorspace. YCbCr is not an absolute colorspace but rather a
+ * mathematical transformation of RGB designed solely for storage and
+ * transmission. YCbCr images must be converted to RGB before they can
+ * actually be displayed. In the YCbCr colorspace, the Y (luminance)
+ * component represents the black & white portion of the original image, and
+ * the Cb and Cr (chrominance) components represent the color portion of the
+ * original image. Originally, the analog equivalent of this transformation
+ * allowed the same signal to drive both black & white and color televisions,
+ * but JPEG images use YCbCr primarily because it allows the color data to be
+ * optionally subsampled for the purposes of reducing bandwidth or disk
+ * space. YCbCr is the most common JPEG colorspace, and YCbCr JPEG images
+ * can be compressed from and decompressed to any of the extended RGB pixel
+ * formats or grayscale, or they can be decompressed to YUV planar images.
+ */
+ TJCS_YCbCr,
+ /**
+ * Grayscale colorspace. The JPEG image retains only the luminance data (Y
+ * component), and any color data from the source image is discarded.
+ * Grayscale JPEG images can be compressed from and decompressed to any of
+ * the extended RGB pixel formats or grayscale, or they can be decompressed
+ * to YUV planar images.
+ */
+ TJCS_GRAY,
+ /**
+ * CMYK colorspace. When compressing the JPEG image, the C, M, Y, and K
+ * components in the source image are reordered into image planes, but no
+ * colorspace conversion or subsampling is performed. CMYK JPEG images can
+ * only be decompressed to CMYK pixels.
+ */
+ TJCS_CMYK,
+ /**
+ * YCCK colorspace. YCCK (AKA "YCbCrK") is not an absolute colorspace but
+ * rather a mathematical transformation of CMYK designed solely for storage
+ * and transmission. It is to CMYK as YCbCr is to RGB. CMYK pixels can be
+ * reversibly transformed into YCCK, and as with YCbCr, the chrominance
+ * components in the YCCK pixels can be subsampled without incurring major
+ * perceptual loss. YCCK JPEG images can only be compressed from and
+ * decompressed to CMYK pixels.
+ */
+ TJCS_YCCK
+};
/**
@@ -231,26 +323,6 @@
*/
#define TJFLAG_BOTTOMUP 2
/**
- * Turn off CPU auto-detection and force TurboJPEG to use MMX code (if the
- * underlying codec supports it.)
- */
-#define TJFLAG_FORCEMMX 8
-/**
- * Turn off CPU auto-detection and force TurboJPEG to use SSE code (if the
- * underlying codec supports it.)
- */
-#define TJFLAG_FORCESSE 16
-/**
- * Turn off CPU auto-detection and force TurboJPEG to use SSE2 code (if the
- * underlying codec supports it.)
- */
-#define TJFLAG_FORCESSE2 32
-/**
- * Turn off CPU auto-detection and force TurboJPEG to use SSE3 code (if the
- * underlying codec supports it.)
- */
-#define TJFLAG_FORCESSE3 128
-/**
* When decompressing an image that was compressed using chrominance
* subsampling, use the fastest chrominance upsampling algorithm available in
* the underlying codec. The default is to use smooth upsampling, which
@@ -445,8 +517,8 @@
/**
* A callback function that can be used to modify the DCT coefficients
* after they are losslessly transformed but before they are transcoded to a
- * new JPEG file. This allows for custom filters or other transformations to
- * be applied in the frequency domain.
+ * new JPEG image. This allows for custom filters or other transformations
+ * to be applied in the frequency domain.
*
* @param coeffs pointer to an array of transformed DCT coefficients. (NOTE:
* this pointer is not guaranteed to be valid once the callback
@@ -512,11 +584,11 @@
/**
- * Compress an RGB or grayscale image into a JPEG image.
+ * Compress an RGB, grayscale, or CMYK image into a JPEG image.
*
* @param handle a handle to a TurboJPEG compressor or transformer instance
- * @param srcBuf pointer to an image buffer containing RGB or grayscale pixels
- * to be compressed
+ * @param srcBuf pointer to an image buffer containing RGB, grayscale, or
+ * CMYK pixels to be compressed
* @param width width (in pixels) of the source image
* @param pitch bytes per line of the source image. Normally, this should be
* <tt>width * #tjPixelSize[pixelFormat]</tt> if the image is unpadded,
@@ -564,6 +636,60 @@
/**
+ * Compress a YUV planar image into a JPEG image.
+ *
+ * @param handle a handle to a TurboJPEG compressor or transformer instance
+ * @param srcBuf pointer to an image buffer containing a YUV planar image
+ * to be compressed. The Y, U (Cb), and V (Cr) image planes should be
+ * stored sequentially in the buffer, and the size of each plane
+ * is determined by the specified width, height, padding, and level of
+ * chrominance subsampling. If the chrominance components are
+ * subsampled along the horizontal dimension, then the width of the
+ * luminance plane should be padded to the nearest multiple of 2 (same
+ * goes for the height of the luminance plane, if the chrominance
+ * components are subsampled along the vertical dimension.) This is
+ * irrespective of any additional padding specified in the <tt>pad</tt>
+ * parameter.
+ * @param width width (in pixels) of the source image
+ * @param pad the line padding used in the source image. For instance, if each
+ * line in each plane of the YUV image is padded to the nearest multiple
+ * of 4 bytes, then <tt>pad</tt> should be set to 4.
+ * @param height height (in pixels) of the source image
+ * @param subsamp the level of chrominance subsampling used in the source
+ * image (see @ref TJSAMP "Chrominance subsampling options".)
+ * @param jpegBuf address of a pointer to an image buffer that will receive the
+ * JPEG image. TurboJPEG has the ability to reallocate the JPEG buffer
+ * to accommodate the size of the JPEG image. Thus, you can choose to:
+ * -# pre-allocate the JPEG buffer with an arbitrary size using
+ * #tjAlloc() and let TurboJPEG grow the buffer as needed,
+ * -# set <tt>*jpegBuf</tt> to NULL to tell TurboJPEG to allocate the
+ * buffer for you, or
+ * -# pre-allocate the buffer to a "worst case" size determined by
+ * calling #tjBufSize(). This should ensure that the buffer never has
+ * to be re-allocated (setting #TJFLAG_NOREALLOC guarantees this.)
+ * .
+ * If you choose option 1, <tt>*jpegSize</tt> should be set to the
+ * size of your pre-allocated buffer. In any case, unless you have
+ * set #TJFLAG_NOREALLOC, you should always check <tt>*jpegBuf</tt> upon
+ * return from this function, as it may have changed.
+ * @param jpegSize pointer to an unsigned long variable that holds the size of
+ * the JPEG image buffer. If <tt>*jpegBuf</tt> points to a
+ * pre-allocated buffer, then <tt>*jpegSize</tt> should be set to the
+ * size of the buffer. Upon return, <tt>*jpegSize</tt> will contain the
+ * size of the JPEG image (in bytes.)
+ * @param jpegQual the image quality of the generated JPEG image (1 = worst,
+ 100 = best)
+ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
+ * "flags".
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
+*/
+DLLEXPORT int DLLCALL tjCompressFromYUV(tjhandle handle, unsigned char *srcBuf,
+ int width, int pad, int height, int subsamp, unsigned char **jpegBuf,
+ unsigned long *jpegSize, int jpegQual, int flags);
+
+
+/**
* The maximum size of the buffer (in bytes) required to hold a JPEG image with
* the given parameters. The number of bytes returned by this function is
* larger than the size of the uncompressed source image. The reason for this
@@ -592,6 +718,8 @@
* the given parameters.
*
* @param width width of the image (in pixels)
+ * @param pad the width of each line in each plane of the image is padded to
+ * the nearest multiple of this number of bytes (must be a power of 2.)
* @param height height of the image (in pixels)
* @param subsamp level of chrominance subsampling in the image (see
* @ref TJSAMP "Chrominance subsampling options".)
@@ -599,22 +727,22 @@
* @return the size of the buffer (in bytes) required to hold the image, or
* -1 if the arguments are out of bounds.
*/
-DLLEXPORT unsigned long DLLCALL tjBufSizeYUV(int width, int height,
+DLLEXPORT unsigned long DLLCALL tjBufSizeYUV2(int width, int pad, int height,
int subsamp);
/**
* Encode an RGB or grayscale image into a YUV planar image. This function
- * uses the accelerated color conversion routines in TurboJPEG's underlying
- * codec to produce a planar YUV image that is suitable for X Video.
- * Specifically, if the chrominance components are subsampled along the
- * horizontal dimension, then the width of the luminance plane is padded to the
- * nearest multiple of 2 in the output image (same goes for the height of the
- * luminance plane, if the chrominance components are subsampled along the
- * vertical dimension.) Also, each line of each plane in the output image is
- * padded to 4 bytes. Although this will work with any subsampling option, it
- * is really only useful in combination with TJ_420, which produces an image
- * compatible with the I420 (AKA "YUV420P") format.
+ * uses the accelerated color conversion routines in the underlying
+ * codec but does not execute any of the other steps in the JPEG compression
+ * process. The Y, U (Cb), and V (Cr) image planes are stored sequentially
+ * into the destination buffer, and the size of each plane is determined by the
+ * width and height of the source image, as well as the specified padding and
+ * level of chrominance subsampling. If the chrominance components are
+ * subsampled along the horizontal dimension, then the width of the luminance
+ * plane is padded to the nearest multiple of 2 in the output image (same goes
+ * for the height of the luminance plane, if the chrominance components are
+ * subsampled along the vertical dimension.)
* <p>
* NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
* convention of the digital video community, the TurboJPEG API uses "YUV" to
@@ -635,20 +763,26 @@
* @param pixelFormat pixel format of the source image (see @ref TJPF
* "Pixel formats".)
* @param dstBuf pointer to an image buffer that will receive the YUV image.
- * Use #tjBufSizeYUV() to determine the appropriate size for this buffer
- * based on the image width, height, and level of chrominance
- * subsampling.
+ * Use #tjBufSizeYUV2() to determine the appropriate size for this
+ * buffer based on the image width, height, padding, and level of
+ * chrominance subsampling.
+ * @param pad the width of each line in each plane of the YUV image will be
+ * padded to the nearest multiple of this number of bytes (must be a
+ * power of 2.) To generate images suitable for X Video, <tt>pad</tt>
+ * should be set to 4.
* @param subsamp the level of chrominance subsampling to be used when
* generating the YUV image (see @ref TJSAMP
- * "Chrominance subsampling options".)
+ * "Chrominance subsampling options".) To generate images suitable for
+ * X Video, <tt>subsamp</tt> should be set to @ref TJSAMP_420. This
+ * produces an image compatible with the I420 (AKA "YUV420P") format.
* @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
* "flags".
*
* @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
*/
-DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle,
+DLLEXPORT int DLLCALL tjEncodeYUV3(tjhandle handle,
unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat,
- unsigned char *dstBuf, int subsamp, int flags);
+ unsigned char *dstBuf, int pad, int subsamp, int flags);
/**
@@ -673,12 +807,15 @@
* @param jpegSubsamp pointer to an integer variable that will receive the
* level of chrominance subsampling used when compressing the JPEG image
* (see @ref TJSAMP "Chrominance subsampling options".)
+ * @param jpegColorspace pointer to an integer variable that will receive one
+ * of the JPEG colorspace constants, indicating the colorspace of the
+ * JPEG image (see @ref TJCS "JPEG colorspaces".)
*
* @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
*/
-DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle,
+DLLEXPORT int DLLCALL tjDecompressHeader3(tjhandle handle,
unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height,
- int *jpegSubsamp);
+ int *jpegSubsamp, int *jpegColorspace);
/**
@@ -695,7 +832,7 @@
/**
- * Decompress a JPEG image to an RGB or grayscale image.
+ * Decompress a JPEG image to an RGB, grayscale, or CMYK image.
*
* @param handle a handle to a TurboJPEG decompressor or transformer instance
* @param jpegBuf pointer to a buffer containing the JPEG image to decompress
@@ -722,8 +859,8 @@
* calling #TJSCALED() with the JPEG image width and one of the scaling
* factors returned by #tjGetScalingFactors().) You can also be clever
* and use the pitch parameter to skip lines, etc. Setting this
- * parameter to 0 is the equivalent of setting it to <tt>scaledWidth
- * * #tjPixelSize[pixelFormat]</tt>.
+ * parameter to 0 is the equivalent of setting it to
+ * <tt>scaledWidth * #tjPixelSize[pixelFormat]</tt>.
* @param height desired height (in pixels) of the destination image. If this
* is different than the height of the JPEG image being decompressed,
* then TurboJPEG will use scaling in the JPEG decompressor to generate
@@ -745,11 +882,11 @@
/**
* Decompress a JPEG image to a YUV planar image. This function performs JPEG
* decompression but leaves out the color conversion step, so a planar YUV
- * image is generated instead of an RGB image. The padding of the planes in
- * this image is the same as in the images generated by #tjEncodeYUV2(). Note
- * that, if the width or height of the image is not an even multiple of the MCU
- * block size (see #tjMCUWidth and #tjMCUHeight), then an intermediate buffer
- * copy will be performed within TurboJPEG.
+ * image is generated instead of an RGB image. The structure of the planes in
+ * this image is the same as in the images generated by #tjEncodeYUV3(). Note
+ * that, if the width or height of the JPEG image is not an even multiple of
+ * the MCU block size (see #tjMCUWidth and #tjMCUHeight), then an intermediate
+ * buffer copy will be performed within TurboJPEG.
* <p>
* NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
* convention of the digital video community, the TurboJPEG API uses "YUV" to
@@ -759,16 +896,87 @@
* @param jpegBuf pointer to a buffer containing the JPEG image to decompress
* @param jpegSize size of the JPEG image (in bytes)
* @param dstBuf pointer to an image buffer that will receive the YUV image.
- * Use #tjBufSizeYUV() to determine the appropriate size for this buffer
- * based on the image width, height, and level of subsampling.
+ * Use #tjBufSizeYUV2() to determine the appropriate size for this
+ * buffer based on the image width, height, padding, and level of
+ * subsampling.
+ * @param width desired width (in pixels) of the YUV image. If this is
+ * different than the width of the JPEG image being decompressed, then
+ * TurboJPEG will use scaling in the JPEG decompressor to generate the
+ * largest possible image that will fit within the desired width. If
+ * <tt>width</tt> is set to 0, then only the height will be considered
+ * when determining the scaled image size.
+ * @param pad the width of each line in each plane of the YUV image will be
+ * padded to the nearest multiple of this number of bytes (must be a
+ * power of 2.) To generate images suitable for X Video, <tt>pad</tt>
+ * should be set to 4.
+ * @param height desired height (in pixels) of the YUV image. If this is
+ * different than the height of the JPEG image being decompressed, then
+ * TurboJPEG will use scaling in the JPEG decompressor to generate the
+ * largest possible image that will fit within the desired height. If
+ * <tt>height</tt> is set to 0, then only the width will be considered
+ * when determining the scaled image size.
* @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
* "flags".
*
* @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
*/
-DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle,
+DLLEXPORT int DLLCALL tjDecompressToYUV2(tjhandle handle,
unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
- int flags);
+ int width, int pad, int height, int flags);
+
+
+/**
+ * Decode a YUV planar image into an RGB or grayscale image. This function
+ * uses the accelerated color conversion routines in the underlying
+ * codec but does not execute any of the other steps in the JPEG decompression
+ * process. The Y, U (Cb), and V (Cr) image planes should be stored
+ * sequentially in the source buffer, and the size of each plane is determined
+ * by the width and height of the source image, as well as the specified
+ * padding and level of chrominance subsampling. If the chrominance components
+ * are subsampled along the horizontal dimension, then the width of the
+ * luminance plane should be padded to the nearest multiple of 2 in the input
+ * image (same goes for the height of the luminance plane, if the chrominance
+ * components are subsampled along the vertical dimension.)
+ * <p>
+ * NOTE: Technically, the JPEG format uses the YCbCr colorspace, but per the
+ * convention of the digital video community, the TurboJPEG API uses "YUV" to
+ * refer to an image format consisting of Y, Cb, and Cr image planes.
+ *
+ * @param handle a handle to a TurboJPEG decompressor or transformer instance
+ * @param srcBuf pointer to an image buffer containing a YUV planar image to be
+ * decoded. The size of this buffer should match the value returned
+ * by #tjBufSizeYUV2() for the given image width, height, padding, and
+ * level of chrominance subsampling.
+ * @param pad Use this parameter to specify that the width of each line in each
+ * plane of the YUV source image is padded to the nearest multiple of
+ * this number of bytes (must be a power of 2.)
+ * @param subsamp the level of chrominance subsampling used in the YUV source
+ * image (see @ref TJSAMP "Chrominance subsampling options".)
+ * @param dstBuf pointer to an image buffer that will receive the decoded
+ * image. This buffer should normally be <tt>pitch * height</tt>
+ * bytes in size, but the <tt>dstBuf</tt> pointer can also be used to
+ * decode into a specific region of a larger buffer.
+ * @param width width (in pixels) of the source and destination images
+ * @param pitch bytes per line of the destination image. Normally, this should
+ * be <tt>width * #tjPixelSize[pixelFormat]</tt> if the destination
+ * image is unpadded, or <tt>#TJPAD(width *
+ * #tjPixelSize[pixelFormat])</tt> if each line of the destination
+ * image should be padded to the nearest 32-bit boundary, as is the case
+ * for Windows bitmaps. You can also be clever and use the pitch
+ * parameter to skip lines, etc. Setting this parameter to 0 is the
+ * equivalent of setting it to <tt>width *
+ * #tjPixelSize[pixelFormat]</tt>.
+ * @param height height (in pixels) of the source and destination images
+ * @param pixelFormat pixel format of the destination image (see @ref TJPF
+ * "Pixel formats".)
+ * @param flags the bitwise OR of one or more of the @ref TJFLAG_BOTTOMUP
+ * "flags".
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tjGetErrorStr().)
+ */
+DLLEXPORT int DLLCALL tjDecodeYUV(tjhandle handle, unsigned char *srcBuf,
+ int pad, int subsamp, unsigned char *dstBuf, int width, int pitch,
+ int height, int pixelFormat, int flags);
/**
@@ -882,6 +1090,13 @@
DLLEXPORT char* DLLCALL tjGetErrorStr(void);
+/* Deprecated functions and macros */
+#define TJFLAG_FORCEMMX 8
+#define TJFLAG_FORCESSE 16
+#define TJFLAG_FORCESSE2 32
+#define TJFLAG_FORCESSE3 128
+
+
/* Backward compatibility functions and macros (nothing to see here) */
#define NUMSUBOPT TJ_NUMSAMP
#define TJ_444 TJSAMP_444
@@ -905,6 +1120,9 @@
DLLEXPORT unsigned long DLLCALL TJBUFSIZEYUV(int width, int height,
int jpegSubsamp);
+DLLEXPORT unsigned long DLLCALL tjBufSizeYUV(int width, int height,
+ int subsamp);
+
DLLEXPORT int DLLCALL tjCompress(tjhandle handle, unsigned char *srcBuf,
int width, int pitch, int height, int pixelSize, unsigned char *dstBuf,
unsigned long *compressedSize, int jpegSubsamp, int jpegQual, int flags);
@@ -913,13 +1131,25 @@
unsigned char *srcBuf, int width, int pitch, int height, int pixelSize,
unsigned char *dstBuf, int subsamp, int flags);
+DLLEXPORT int DLLCALL tjEncodeYUV2(tjhandle handle,
+ unsigned char *srcBuf, int width, int pitch, int height, int pixelFormat,
+ unsigned char *dstBuf, int subsamp, int flags);
+
DLLEXPORT int DLLCALL tjDecompressHeader(tjhandle handle,
unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height);
+DLLEXPORT int DLLCALL tjDecompressHeader2(tjhandle handle,
+ unsigned char *jpegBuf, unsigned long jpegSize, int *width, int *height,
+ int *jpegSubsamp);
+
DLLEXPORT int DLLCALL tjDecompress(tjhandle handle,
unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
int width, int pitch, int height, int pixelSize, int flags);
+DLLEXPORT int DLLCALL tjDecompressToYUV(tjhandle handle,
+ unsigned char *jpegBuf, unsigned long jpegSize, unsigned char *dstBuf,
+ int flags);
+
/**
* @}