Add SSE optmized IDCT in JPEG decoding for INTEL Atom platform.
ADD SSE optmized IDCT. It makes album2D workload about 18%
faster than jidctfst.c. The major implementation is at
"jidctintelsse.c".
Change-Id: I6dc6f367158172b468e85e7eae249027eb1766cf
diff --git a/jddctmgr.c b/jddctmgr.c
index 74a96db..97b516a 100644
--- a/jddctmgr.c
+++ b/jddctmgr.c
@@ -50,6 +50,13 @@
#endif
+#ifdef ANDROID_INTELSSE2_IDCT
+extern short __attribute__((aligned(16))) quantptrSSE[DCTSIZE2];
+extern void jpeg_idct_intelsse (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+ JCOEFPTR coef_block,
+ JSAMPARRAY output_buf, JDIMENSION output_col);
+#endif
+
/*
* The decompressor input side (jdinput.c) saves away the appropriate
* quantization table for each component at the start of the first scan
@@ -151,6 +158,13 @@
method = JDCT_IFAST;
break;
#else /* ANDROID_ARMV6_IDCT */
+#ifdef ANDROID_INTELSSE2_IDCT
+ case JDCT_ISLOW:
+ case JDCT_IFAST:
+ method_ptr = jpeg_idct_intelsse;
+ method = JDCT_ISLOW; /* Use quant table of ISLOW.*/
+ break;
+#else
#ifdef DCT_ISLOW_SUPPORTED
case JDCT_ISLOW:
method_ptr = jpeg_idct_islow;
@@ -163,6 +177,7 @@
method = JDCT_IFAST;
break;
#endif
+#endif /* ANDROID_INTELSSE2_IDCT*/
#endif /* ANDROID_ARMV6_IDCT */
#ifdef DCT_FLOAT_SUPPORTED
case JDCT_FLOAT: