Roll SwiftShader from 3f37fd8e4fbe to 011bba68c819 (3 revisions) am: 249275b3f8 am: 0bec5a52b4 am: 4efc731668 am: 973665e24e am: 1575104152

Original change: https://android-review.googlesource.com/c/platform/external/swiftshader/+/2061115

Change-Id: Idce90409f2bd2abc20eb6bded7701d579f83ddf7
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/src/Device/Blitter.cpp b/src/Device/Blitter.cpp
index 871f43d..1c18a23 100644
--- a/src/Device/Blitter.cpp
+++ b/src/Device/Blitter.cpp
@@ -30,17 +30,15 @@
 #	include <emmintrin.h>
 #endif
 
-namespace {
-rr::RValue<rr::Int> PackFields(rr::Int4 const &ints, const sw::int4 shifts)
+namespace sw {
+
+static rr::RValue<rr::Int> PackFields(rr::Int4 const &ints, const sw::int4 shifts)
 {
 	return (rr::Int(ints.x) << shifts[0]) |
 	       (rr::Int(ints.y) << shifts[1]) |
 	       (rr::Int(ints.z) << shifts[2]) |
 	       (rr::Int(ints.w) << shifts[3]);
 }
-}  // namespace
-
-namespace sw {
 
 Blitter::Blitter()
     : blitMutex()
@@ -1450,7 +1448,7 @@
 	{
 		value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) :  // Unapply scale
 		             Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w);   // Apply unscale
-		value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
+		value.xyz = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : linearToSRGB(value);
 		value *= Float4(scale.x, scale.y, scale.z, scale.w);  // Apply scale
 	}
 	else if(unscale != scale)
@@ -1484,30 +1482,6 @@
 	return z * sliceB + y * pitchB + x * bytes;
 }
 
-Float4 Blitter::LinearToSRGB(const Float4 &c)
-{
-	Float4 lc = Min(c, 0.0031308f) * 12.92f;
-	Float4 ec = Float4(1.055f) * Pow<Mediump>(c, (1.0f / 2.4f)) - 0.055f;  // TODO(b/149574741): Use a custom approximation.
-
-	Float4 s = c;
-	s.xyz = Max(lc, ec);
-
-	return s;
-}
-
-Float4 Blitter::sRGBtoLinear(const Float4 &c)
-{
-	Float4 lc = c * (1.0f / 12.92f);
-	Float4 ec = Pow<Mediump>((c + 0.055f) * (1.0f / 1.055f), 2.4f);  // TODO(b/149574741): Use a custom approximation.
-
-	Int4 linear = CmpLT(c, 0.04045f);
-
-	Float4 s = c;
-	s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec)));  // TODO: IfThenElse()
-
-	return s;
-}
-
 Float4 Blitter::sample(Pointer<Byte> &source, Float &x, Float &y, Float &z,
                        Int &sWidth, Int &sHeight, Int &sDepth,
                        Int &sSliceB, Int &sPitchB, const State &state)
diff --git a/src/Device/Blitter.hpp b/src/Device/Blitter.hpp
index 65a9cf6..c7a9108 100644
--- a/src/Device/Blitter.hpp
+++ b/src/Device/Blitter.hpp
@@ -169,8 +169,6 @@
 	static void ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled = false);
 	static Int ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes);
 	static Int ComputeOffset(Int &x, Int &y, Int &z, Int &sliceB, Int &pitchB, int bytes);
-	static Float4 LinearToSRGB(const Float4 &color);
-	static Float4 sRGBtoLinear(const Float4 &color);
 
 	using BlitFunction = FunctionT<void(const BlitData *)>;
 	using BlitRoutineType = BlitFunction::RoutineType;
diff --git a/src/Pipeline/ShaderCore.cpp b/src/Pipeline/ShaderCore.cpp
index a3bc666..507767c 100644
--- a/src/Pipeline/ShaderCore.cpp
+++ b/src/Pipeline/ShaderCore.cpp
@@ -699,6 +699,23 @@
 	return (UInt(truncBits.x) >> 20) | (UInt(truncBits.y) >> 9) | (UInt(truncBits.z) << 1);
 }
 
+Float4 linearToSRGB(const Float4 &c)
+{
+	Float4 lc = Min(c, 0.0031308f) * 12.92f;
+	Float4 ec = MulAdd(1.055f, Pow<Mediump>(c, (1.0f / 2.4f)), -0.055f);  // TODO(b/149574741): Use a custom approximation.
+
+	return Max(lc, ec);
+}
+
+Float4 sRGBtoLinear(const Float4 &c)
+{
+	Float4 lc = c * (1.0f / 12.92f);
+	Float4 ec = Pow<Mediump>(MulAdd(c, 1.0f / 1.055f, 0.055f / 1.055f), 2.4f);  // TODO(b/149574741): Use a custom approximation.
+
+	Int4 linear = CmpLT(c, 0.04045f);
+	return As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec)));  // TODO: IfThenElse()
+}
+
 RValue<Bool> AnyTrue(const RValue<SIMD::Int> &bools)
 {
 	return SignMask(bools) != 0;
diff --git a/src/Pipeline/ShaderCore.hpp b/src/Pipeline/ShaderCore.hpp
index c9bc3e1..68d9da4 100644
--- a/src/Pipeline/ShaderCore.hpp
+++ b/src/Pipeline/ShaderCore.hpp
@@ -241,6 +241,8 @@
 sw::SIMD::UInt floatToHalfBits(sw::SIMD::UInt floatBits, bool storeInUpperBits);
 Float4 r11g11b10Unpack(UInt r11g11b10bits);
 UInt r11g11b10Pack(const Float4 &value);
+Float4 linearToSRGB(const Float4 &c);
+Float4 sRGBtoLinear(const Float4 &c);
 
 RValue<Bool> AnyTrue(const RValue<SIMD::Int> &bools);
 RValue<Bool> AnyFalse(const RValue<SIMD::Int> &bools);
diff --git a/src/Pipeline/SpirvShaderImage.cpp b/src/Pipeline/SpirvShaderImage.cpp
index 57f96e6..7adf2c7 100644
--- a/src/Pipeline/SpirvShaderImage.cpp
+++ b/src/Pipeline/SpirvShaderImage.cpp
@@ -21,11 +21,9 @@
 
 #include <spirv/unified1/spirv.hpp>
 
-namespace {
+namespace sw {
 
-using namespace sw;
-
-vk::Format SpirvFormatToVulkanFormat(spv::ImageFormat format)
+static vk::Format SpirvFormatToVulkanFormat(spv::ImageFormat format)
 {
 	switch(format)
 	{
@@ -76,20 +74,6 @@
 	}
 }
 
-SIMD::Float sRGBtoLinear(SIMD::Float c)
-{
-	SIMD::Float lc = c * (1.0f / 12.92f);
-	SIMD::Float ec = Pow<Mediump>((c + 0.055f) * (1.0f / 1.055f), 2.4f);  // TODO(b/149574741): Use a custom approximation.
-
-	SIMD::Int linear = CmpLT(c, 0.04045f);
-
-	return rr::As<SIMD::Float>((linear & rr::As<SIMD::Int>(lc)) | (~linear & rr::As<SIMD::Int>(ec)));  // TODO: IfThenElse()
-}
-
-}  // anonymous namespace
-
-namespace sw {
-
 SpirvShader::ImageInstruction::ImageInstruction(InsnIterator insn, const SpirvShader &spirv)
     : ImageInstructionSignature(parseVariantAndMethod(insn))
     , position(insn.distanceFrom(spirv.begin()))
@@ -909,9 +893,9 @@
 		break;
 	case VK_FORMAT_R8G8B8A8_SRGB:
 	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
-		dst.move(0, ::sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
-		dst.move(1, ::sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
-		dst.move(2, ::sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
+		dst.move(0, sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
+		dst.move(1, sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
+		dst.move(2, sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		break;
 	case VK_FORMAT_B8G8R8A8_UNORM:
@@ -921,9 +905,9 @@
 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		break;
 	case VK_FORMAT_B8G8R8A8_SRGB:
-		dst.move(0, ::sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
-		dst.move(1, ::sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
-		dst.move(2, ::sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
+		dst.move(0, sRGBtoLinear(SIMD::Float((packed[0] >> 16) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
+		dst.move(1, sRGBtoLinear(SIMD::Float((packed[0] >> 8) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
+		dst.move(2, sRGBtoLinear(SIMD::Float(packed[0] & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF)));
 		dst.move(3, SIMD::Float((packed[0] >> 24) & SIMD::Int(0xFF)) * SIMD::Float(1.0f / 0xFF));
 		break;
 	case VK_FORMAT_R8G8B8A8_UINT:
@@ -1243,6 +1227,12 @@
 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
 		            ((SIMD::UInt(Round(Min(Max(As<SIMD::Float>(texel[3]), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
 		break;
+	case VK_FORMAT_B8G8R8A8_SRGB:
+		packed[0] = (SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[2])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) |
+		            ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[1])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 8) |
+		            ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[0])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 16) |
+		            ((SIMD::UInt(Round(Min(Max(linearToSRGB(As<SIMD::Float>(texel[3])), SIMD::Float(0.0f)), SIMD::Float(1.0f)) * SIMD::Float(255.0f)))) << 24);
+		break;
 	case VK_FORMAT_R8G8B8A8_SNORM:
 	case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
 		packed[0] = (SIMD::Int(Round(Min(Max(As<SIMD::Float>(texel[0]), SIMD::Float(-1.0f)), SIMD::Float(1.0f)) * SIMD::Float(127.0f))) &
diff --git a/src/Vulkan/VkPhysicalDevice.cpp b/src/Vulkan/VkPhysicalDevice.cpp
index 1881234..5369175 100644
--- a/src/Vulkan/VkPhysicalDevice.cpp
+++ b/src/Vulkan/VkPhysicalDevice.cpp
@@ -1779,7 +1779,7 @@
 	case VK_FORMAT_B8G8R8A8_SRGB:
 		pFormatProperties->optimalTilingFeatures |=
 		    VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT |
-		    VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT_KHR;
+		    VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT;
 		pFormatProperties->bufferFeatures |=
 		    VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
 		break;