Add SSE4 version of BlurImage optimizations.
Adds an SSE4.1 version of the existing BlurImage optimizations.
Performance of blur_image_filter_* benchmarks show a 10-50%
improvement on Linux/Ubuntu Core i7.
Signed-off-by: Henrik Smiding <henrik.smiding@intel.com>
Committed: https://skia.googlesource.com/skia/+/2830632ce93c97ed7647b13348365ea92e4ea665
R=mtklein@google.com, reed@chromium.org
Author: henrik.smiding@intel.com
Review URL: https://codereview.chromium.org/366593004
diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp
index 603458b..5bab17a 100644
--- a/src/opts/opts_check_x86.cpp
+++ b/src/opts/opts_check_x86.cpp
@@ -15,6 +15,7 @@
#include "SkBlitRow_opts_SSE2.h"
#include "SkBlitRow_opts_SSE4.h"
#include "SkBlurImage_opts_SSE2.h"
+#include "SkBlurImage_opts_SSE4.h"
#include "SkMorphology_opts.h"
#include "SkMorphology_opts_SSE2.h"
#include "SkRTConf.h"
@@ -358,10 +359,13 @@
#ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
return false;
#else
- if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
- return false;
+ if (supports_simd(SK_CPU_SSE_LEVEL_SSE41)) {
+ return SkBoxBlurGetPlatformProcs_SSE4(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
}
- return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
+ else if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
+ return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
+ }
+ return false;
#endif
}