Add SSE optmized IDCT in JPEG decoding for INTEL Atom platform.

ADD SSE optmized IDCT. It makes album2D workload about 18%
faster than jidctfst.c. The major implementation is at
"jidctintelsse.c".

Change-Id: I6dc6f367158172b468e85e7eae249027eb1766cf
diff --git a/jddctmgr.c b/jddctmgr.c
index 74a96db..97b516a 100644
--- a/jddctmgr.c
+++ b/jddctmgr.c
@@ -50,6 +50,13 @@
 
 #endif
 
+#ifdef ANDROID_INTELSSE2_IDCT
+extern short __attribute__((aligned(16))) quantptrSSE[DCTSIZE2];
+extern void jpeg_idct_intelsse (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		JCOEFPTR coef_block,
+		JSAMPARRAY output_buf, JDIMENSION output_col);
+#endif
+
 /*
  * The decompressor input side (jdinput.c) saves away the appropriate
  * quantization table for each component at the start of the first scan
@@ -151,6 +158,13 @@
 	method = JDCT_IFAST;
 	break;
 #else /* ANDROID_ARMV6_IDCT */
+#ifdef ANDROID_INTELSSE2_IDCT
+      case JDCT_ISLOW:
+      case JDCT_IFAST:
+	method_ptr = jpeg_idct_intelsse;
+	method = JDCT_ISLOW; /* Use quant table of ISLOW.*/
+	break;
+#else
 #ifdef DCT_ISLOW_SUPPORTED
       case JDCT_ISLOW:
 	method_ptr = jpeg_idct_islow;
@@ -163,6 +177,7 @@
 	method = JDCT_IFAST;
 	break;
 #endif
+#endif /* ANDROID_INTELSSE2_IDCT*/
 #endif /* ANDROID_ARMV6_IDCT */
 #ifdef DCT_FLOAT_SUPPORTED
       case JDCT_FLOAT: