VK_EXT_shader_stencil_export support
VK_EXT_shader_stencil_export is a simple extension to implement in
SwiftShader. Added PixelRoutine::stencilReplaceRef(), which can be
trivially modified to output Byte4 instead of Byte8, should the
stencil code be refactored.
Verified that these tests still pass with SwANGLE
dEQP-GLES3.functional.fbo.blit.depth_stencil.depth32f_stencil8_basic
dEQP-GLES3.functional.fbo.blit.depth_stencil.depth32f_stencil8_stencil_only
dEQP-GLES3.functional.fbo.blit.depth_stencil.depth24_stencil8_basic
dEQP-GLES3.functional.fbo.blit.depth_stencil.depth24_stencil8_stencil_only
Change-Id: Ia5829489ccba75ee7c41e365ca2d3d586c987c2d
Tests: dEQP-VK.pipeline.shader_stencil_export.op_replace
Bug: b/148175198
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/41469
Presubmit-Ready: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
Tested-by: Alexis Hétu <sugoi@google.com>
diff --git a/src/Pipeline/PixelRoutine.cpp b/src/Pipeline/PixelRoutine.cpp
index 8543a5e..26ed5b5 100644
--- a/src/Pipeline/PixelRoutine.cpp
+++ b/src/Pipeline/PixelRoutine.cpp
@@ -750,6 +750,29 @@
}
}
+Byte8 PixelRoutine::stencilReplaceRef(bool isBack)
+{
+ if(spirvShader)
+ {
+ auto it = spirvShader->outputBuiltins.find(spv::BuiltInFragStencilRefEXT);
+ if(it != spirvShader->outputBuiltins.end())
+ {
+ UInt4 sRef = As<UInt4>(routine.getVariable(it->second.Id)[it->second.FirstComponent]) & UInt4(0xff);
+ // TODO (b/148295813): Could be done with a single pshufb instruction. Optimize the
+ // following line by either adding a rr::Shuffle() variant to do
+ // it explicitly or adding a Byte4(Int4) constructor would work.
+ sRef.x = rr::UInt(sRef.x) | (rr::UInt(sRef.y) << 8) | (rr::UInt(sRef.z) << 16) | (rr::UInt(sRef.w) << 24);
+
+ UInt2 sRefDuplicated;
+ sRefDuplicated = Insert(sRefDuplicated, sRef.x, 0);
+ sRefDuplicated = Insert(sRefDuplicated, sRef.x, 1);
+ return As<Byte8>(sRefDuplicated);
+ }
+ }
+
+ return *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceQ));
+}
+
void PixelRoutine::stencilOperation(Byte8 &output, const Byte8 &bufferValue, VkStencilOp operation, bool isBack)
{
switch(operation)
@@ -761,7 +784,7 @@
output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
break;
case VK_STENCIL_OP_REPLACE:
- output = *Pointer<Byte8>(data + OFFSET(DrawData, stencil[isBack].referenceQ));
+ output = stencilReplaceRef(isBack);
break;
case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
diff --git a/src/Pipeline/PixelRoutine.hpp b/src/Pipeline/PixelRoutine.hpp
index 34722ad..72d4253 100644
--- a/src/Pipeline/PixelRoutine.hpp
+++ b/src/Pipeline/PixelRoutine.hpp
@@ -65,6 +65,7 @@
private:
Float4 interpolateCentroid(const Float4 &x, const Float4 &y, const Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective);
+ Byte8 stencilReplaceRef(bool isBack);
void stencilTest(const Pointer<Byte> &sBuffer, int q, const Int &x, Int &sMask, const Int &cMask);
void stencilTest(Byte8 &value, VkCompareOp stencilCompareMode, bool isBack);
void stencilOperation(Byte8 &newValue, const Byte8 &bufferValue, const PixelProcessor::States::StencilOpState &ops, bool isBack, const Int &zMask, const Int &sMask);
diff --git a/src/Pipeline/SpirvShader.cpp b/src/Pipeline/SpirvShader.cpp
index 5026373..4e50f2d 100644
--- a/src/Pipeline/SpirvShader.cpp
+++ b/src/Pipeline/SpirvShader.cpp
@@ -381,6 +381,7 @@
case spv::CapabilityGroupNonUniformShuffleRelative: capabilities.GroupNonUniformShuffleRelative = true; break;
case spv::CapabilityDeviceGroup: capabilities.DeviceGroup = true; break;
case spv::CapabilityMultiView: capabilities.MultiView = true; break;
+ case spv::CapabilityStencilExportEXT: capabilities.StencilExportEXT = true; break;
default:
UNSUPPORTED("Unsupported capability %u", insn.word(1));
}
@@ -719,6 +720,7 @@
if(!strcmp(ext, "SPV_KHR_variable_pointers")) break;
if(!strcmp(ext, "SPV_KHR_device_group")) break;
if(!strcmp(ext, "SPV_KHR_multiview")) break;
+ if(!strcmp(ext, "SPV_EXT_shader_stencil_export")) break;
UNSUPPORTED("SPIR-V Extension: %s", ext);
break;
}
diff --git a/src/Pipeline/SpirvShader.hpp b/src/Pipeline/SpirvShader.hpp
index 406fe97..e9876c7 100644
--- a/src/Pipeline/SpirvShader.hpp
+++ b/src/Pipeline/SpirvShader.hpp
@@ -544,6 +544,7 @@
bool GroupNonUniformArithmetic : 1;
bool DeviceGroup : 1;
bool MultiView : 1;
+ bool StencilExportEXT : 1;
};
Capabilities const &getUsedCapabilities() const
diff --git a/src/Shader/SamplerCore.cpp b/src/Shader/SamplerCore.cpp
index 5690cfa..810874b 100644
--- a/src/Shader/SamplerCore.cpp
+++ b/src/Shader/SamplerCore.cpp
@@ -1915,7 +1915,7 @@
Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1]));
Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2]));
Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3]));
- c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
+ c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); // TODO (b/148295813) : Optimize with pshufb
switch(state.textureFormat)
{
@@ -2067,7 +2067,7 @@
Int c1 = Int(buffer[0][index[1]]);
Int c2 = Int(buffer[0][index[2]]);
Int c3 = Int(buffer[0][index[3]]);
- c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
+ c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); // TODO (b/148295813) : Optimize with pshufb
UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function);
@@ -2075,14 +2075,14 @@
c1 = Int(buffer[1][index[1]]);
c2 = Int(buffer[1][index[2]]);
c3 = Int(buffer[1][index[3]]);
- c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
+ c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); // TODO (b/148295813) : Optimize with pshufb
UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0)));
c0 = Int(buffer[2][index[0]]);
c1 = Int(buffer[2][index[1]]);
c2 = Int(buffer[2][index[2]]);
c3 = Int(buffer[2][index[3]]);
- c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
+ c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); // TODO (b/148295813) : Optimize with pshufb
UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0)));
const UShort4 yY = UShort4(iround(Yy * 0x4000));
diff --git a/src/Vulkan/libVulkan.cpp b/src/Vulkan/libVulkan.cpp
index 92a123a..937f482 100644
--- a/src/Vulkan/libVulkan.cpp
+++ b/src/Vulkan/libVulkan.cpp
@@ -321,6 +321,8 @@
{ VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME, VK_EXT_QUEUE_FAMILY_FOREIGN_SPEC_VERSION },
// The following extension is only used to add support for Bresenham lines
{ VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, VK_EXT_LINE_RASTERIZATION_SPEC_VERSION },
+ // The following extension is used by ANGLE to emulate blitting the stencil buffer
+ { VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, VK_EXT_SHADER_STENCIL_EXPORT_SPEC_VERSION },
#ifndef __ANDROID__
// We fully support the KHR_swapchain v70 additions, so just track the spec version.
{ VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_KHR_SWAPCHAIN_SPEC_VERSION },