Revert of Improved x86 SSE build and run-time checks. (https://codereview.chromium.org/272503006/)

Reason for revert:
Windows builders breaking.  :(

Original issue's description:
> Improved x86 SSE build and run-time checks.
>
> Replaces the current build/run-time checks for SSE level in
> opts_check_x86.cpp with a simpler and more future-proof version.
> Also adds SSE versions 4.1 and 4.2 to the config file.
>
> Author: henrik.smiding@intel.com
>
> Signed-off-by: Henrik Smiding <henrik.smiding@intel.com>
>
> Committed: http://code.google.com/p/skia/source/detail?r=14644

R=reed@google.com, tomhudson@google.com, djsollen@google.com, joakim.landberg@intel.com, henrik.smiding@intel.com
TBR=djsollen@google.com, henrik.smiding@intel.com, joakim.landberg@intel.com, reed@google.com, tomhudson@google.com
NOTREECHECKS=true
NOTRY=true

Author: mtklein@google.com

Review URL: https://codereview.chromium.org/277593004

git-svn-id: http://skia.googlecode.com/svn/trunk@14646 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/include/core/SkPreConfig.h b/include/core/SkPreConfig.h
index a15f8ec..9aefc2d 100644
--- a/include/core/SkPreConfig.h
+++ b/include/core/SkPreConfig.h
@@ -123,18 +123,12 @@
 #define SK_CPU_SSE_LEVEL_SSE2     20
 #define SK_CPU_SSE_LEVEL_SSE3     30
 #define SK_CPU_SSE_LEVEL_SSSE3    31
-#define SK_CPU_SSE_LEVEL_SSE41    41
-#define SK_CPU_SSE_LEVEL_SSE42    42
 
 // Are we in GCC?
 #ifndef SK_CPU_SSE_LEVEL
     // These checks must be done in descending order to ensure we set the highest
     // available SSE level.
-    #if defined(__SSE4_2__)
-        #define SK_CPU_SSE_LEVEL    SK_CPU_SSE_LEVEL_SSE42
-    #elif defined(__SSE4_1__)
-        #define SK_CPU_SSE_LEVEL    SK_CPU_SSE_LEVEL_SSE41
-    #elif defined(__SSSE3__)
+    #if defined(__SSSE3__)
         #define SK_CPU_SSE_LEVEL    SK_CPU_SSE_LEVEL_SSSE3
     #elif defined(__SSE3__)
         #define SK_CPU_SSE_LEVEL    SK_CPU_SSE_LEVEL_SSE3
diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp
index d614293..0b0debb 100644
--- a/src/opts/opts_check_x86.cpp
+++ b/src/opts/opts_check_x86.cpp
@@ -25,11 +25,10 @@
 #include <intrin.h>
 #endif
 
-/* This file must *not* be compiled with -msse or any other optional SIMD
-   extension, otherwise gcc may generate SIMD instructions even for scalar ops
-   (and thus give an invalid instruction on Pentium3 on the code below).
-   For example, only files named *_SSE2.cpp in this directory should be
-   compiled with -msse2 or higher. */
+/* This file must *not* be compiled with -msse or -msse2, otherwise
+   gcc may generate sse2 even for scalar ops (and thus give an invalid
+   instruction on Pentium3 on the code below).  Only files named *_SSE2.cpp
+   in this directory should be compiled with -msse2. */
 
 
 /* Function to get the CPU SSE-level in runtime, for different compilers. */
@@ -49,7 +48,8 @@
     }
 #endif
 }
-#elif defined(__x86_64__)
+#else
+#if defined(__x86_64__)
 static inline void getcpuid(int info_type, int info[4]) {
     asm volatile (
         "cpuid \n\t"
@@ -70,47 +70,56 @@
     );
 }
 #endif
+#endif
 
 ////////////////////////////////////////////////////////////////////////////////
 
-/* Fetch the SIMD level directly from the CPU, at run-time.
- * Only checks the levels needed by the optimizations in this file.
- */
-static int get_SIMD_level() {
-    int cpu_info[4] = { 0 };
+#if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+/* All x86_64 machines have SSE2, or we know it's supported at compile time,  so don't even bother checking. */
+static inline bool hasSSE2() {
+    return true;
+}
+#else
 
+static inline bool hasSSE2() {
+    int cpu_info[4] = { 0 };
     getcpuid(1, cpu_info);
-    if ((cpu_info[2] & (1<<20)) != 0) {
-        return SK_CPU_SSE_LEVEL_SSE42;
-    } else if ((cpu_info[2] & (1<<9)) != 0) {
-        return SK_CPU_SSE_LEVEL_SSSE3;
-    } else if ((cpu_info[3] & (1<<26)) != 0) {
-        return SK_CPU_SSE_LEVEL_SSE2;
-    } else {
-        return 0;
-    }
+    return (cpu_info[3] & (1<<26)) != 0;
+}
+#endif
+
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+/* If we know SSSE3 is supported at compile time, don't even bother checking. */
+static inline bool hasSSSE3() {
+    return true;
+}
+#elif defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
+/* For the Android framework we should always know at compile time if the device
+ * we are building for supports SSSE3.  The one exception to this rule is on the
+ * emulator where we are compiled without the -msse3 option (so we have no SSSE3
+ * procs) but can be run on a host machine that supports SSSE3 instructions. So
+ * for that particular case we disable our SSSE3 options.
+ */
+static inline bool hasSSSE3() {
+    return false;
+}
+#else
+
+static inline bool hasSSSE3() {
+    int cpu_info[4] = { 0 };
+    getcpuid(1, cpu_info);
+    return (cpu_info[2] & 0x200) != 0;
+}
+#endif
+
+static bool cachedHasSSE2() {
+    static bool gHasSSE2 = hasSSE2();
+    return gHasSSE2;
 }
 
-/* Verify that the requested SIMD level exists in the build.
- * If not, check if the platform supports it.
- */
-static inline bool supports_simd(int minLevel) {
-    if (minLevel <= SK_CPU_SSE_LEVEL) {
-        return true;
-    } else {
-#if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
-        /* For the Android framework we should always know at compile time if the device
-         * we are building for supports SSSE3.  The one exception to this rule is on the
-         * emulator where we are compiled without the -mssse3 option (so we have no
-         * SSSE3 procs) but can be run on a host machine that supports SSSE3
-         * instructions. So for that particular case we disable our SSSE3 options.
-         */
-        return false;
-#else
-        static int gSIMDLevel = get_SIMD_level();
-        return (minLevel <= gSIMDLevel);
-#endif
-    }
+static bool cachedHasSSSE3() {
+    static bool gHasSSSE3 = hasSSSE3();
+    return gHasSSSE3;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -118,7 +127,7 @@
 SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
 
 void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) {
-    if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (cachedHasSSE2()) {
         procs->fExtraHorizontalReads = 3;
         procs->fConvolveVertically = &convolveVertically_SSE2;
         procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
@@ -131,29 +140,29 @@
 
 void SkBitmapProcState::platformProcs() {
     /* Every optimization in the function requires at least SSE2 */
-    if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (!cachedHasSSE2()) {
         return;
     }
 
     /* Check fSampleProc32 */
     if (fSampleProc32 == S32_opaque_D32_filter_DX) {
-        if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
+        if (cachedHasSSSE3()) {
             fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
         } else {
             fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
         }
     } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
-        if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
+        if (cachedHasSSSE3()) {
             fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
         }
     } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
-        if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
+        if (cachedHasSSSE3()) {
             fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
         } else {
             fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
         }
     } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
-        if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
+        if (cachedHasSSSE3()) {
             fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
         }
     }
@@ -196,7 +205,7 @@
 };
 
 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
-    if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (cachedHasSSE2()) {
         return platform_16_procs[flags];
     } else {
         return NULL;
@@ -211,7 +220,7 @@
 };
 
 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
-    if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (cachedHasSSE2()) {
         return platform_32_procs[flags];
     } else {
         return NULL;
@@ -219,7 +228,7 @@
 }
 
 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
-    if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (cachedHasSSE2()) {
         return Color32_SSE2;
     } else {
         return NULL;
@@ -230,7 +239,7 @@
 
 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
 /* Return NULL for now, since the optimized path in ColorRect32_SSE2 is disabled.
-    if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (cachedHasSSE2()) {
         return ColorRect32_SSE2;
     } else {
         return NULL;
@@ -249,7 +258,7 @@
     }
 
     ColorProc proc = NULL;
-    if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (cachedHasSSE2()) {
         switch (dstConfig) {
             case SkBitmap::kARGB_8888_Config:
                 // The SSE2 version is not (yet) faster for black, so we check
@@ -266,7 +275,7 @@
 }
 
 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
-    if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (cachedHasSSE2()) {
         if (isOpaque) {
             return SkBlitLCD16OpaqueRow_SSE2;
         } else {
@@ -287,7 +296,7 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 SkMemset16Proc SkMemset16GetPlatformProc() {
-    if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (cachedHasSSE2()) {
         return sk_memset16_SSE2;
     } else {
         return NULL;
@@ -295,7 +304,7 @@
 }
 
 SkMemset32Proc SkMemset32GetPlatformProc() {
-    if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (cachedHasSSE2()) {
         return sk_memset32_SSE2;
     } else {
         return NULL;
@@ -305,7 +314,7 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
-    if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (!cachedHasSSE2()) {
         return NULL;
     }
     switch (type) {
@@ -331,7 +340,7 @@
 #ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
     return false;
 #else
-    if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (!cachedHasSSE2()) {
         return false;
     }
     return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
@@ -356,7 +365,7 @@
 
 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
                                                SkXfermode::Mode mode) {
-    if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+    if (cachedHasSSE2()) {
         return SkPlatformXfermodeFactory_impl_SSE2(rec, mode);
     } else {
         return SkPlatformXfermodeFactory_impl(rec, mode);