Revert "Revert "New approach to GrProcessor uniforms.""
This reverts commit ae59426ea6e9b351d9d52f2a9c12d05023351994.
Bug: skia:12182
Change-Id: I591a0a89ffad1a3d5d867dd247ceeec71b6041a4
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/449516
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
diff --git a/src/gpu/GrUniformDataManager.cpp b/src/gpu/GrUniformDataManager.cpp
index 69842d0..859ff1d 100644
--- a/src/gpu/GrUniformDataManager.cpp
+++ b/src/gpu/GrUniformDataManager.cpp
@@ -7,18 +7,295 @@
#include "src/gpu/GrUniformDataManager.h"
+#include "src/gpu/GrProgramInfo.h"
#include "src/gpu/GrShaderVar.h"
// ensure that these types are the sizes the uniform data is expecting
static_assert(sizeof(int32_t) == 4);
static_assert(sizeof(float) == 4);
-GrUniformDataManager::GrUniformDataManager(uint32_t uniformCount, uint32_t uniformSize)
- : fUniformSize(uniformSize)
- , fUniformsDirty(false) {
+//////////////////////////////////////////////////////////////////////////////
+
+GrUniformDataManager::UniformManager::UniformManager(ProgramUniforms uniforms, Layout layout)
+ : fUniforms(std::move(uniforms)), fLayout(layout) {}
+
+template <typename BaseType> static constexpr size_t tight_vec_size(int vecLength) {
+ return sizeof(BaseType) * vecLength;
+}
+
+/**
+ * From Section 7.6.2.2 "Standard Uniform Block Layout":
+ * 1. If the member is a scalar consuming N basic machine units, the base alignment is N.
+ * 2. If the member is a two- or four-component vector with components consuming N basic machine
+ * units, the base alignment is 2N or 4N, respectively.
+ * 3. If the member is a three-component vector with components consuming N
+ * basic machine units, the base alignment is 4N.
+ * 4. If the member is an array of scalars or vectors, the base alignment and array
+ * stride are set to match the base alignment of a single array element, according
+ * to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The
+ * array may have padding at the end; the base offset of the member following
+ * the array is rounded up to the next multiple of the base alignment.
+ * 5. If the member is a column-major matrix with C columns and R rows, the
+ * matrix is stored identically to an array of C column vectors with R components each,
+ * according to rule (4).
+ * 6. If the member is an array of S column-major matrices with C columns and
+ * R rows, the matrix is stored identically to a row of S × C column vectors
+ * with R components each, according to rule (4).
+ * 7. If the member is a row-major matrix with C columns and R rows, the matrix
+ * is stored identically to an array of R row vectors with C components each,
+ * according to rule (4).
+ * 8. If the member is an array of S row-major matrices with C columns and R
+ * rows, the matrix is stored identically to a row of S × R row vectors with C
+ * components each, according to rule (4).
+ * 9. If the member is a structure, the base alignment of the structure is N, where
+ * N is the largest base alignment value of any of its members, and rounded
+ * up to the base alignment of a vec4. The individual members of this substructure are then
+ * assigned offsets by applying this set of rules recursively,
+ * where the base offset of the first member of the sub-structure is equal to the
+ * aligned offset of the structure. The structure may have padding at the end;
+ * the base offset of the member following the sub-structure is rounded up to
+ * the next multiple of the base alignment of the structure.
+ * 10. If the member is an array of S structures, the S elements of the array are laid
+ * out in order, according to rule (9).
+ */
+template <typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
+struct Rules140 {
+ /**
+ * For an array of scalars or vectors this returns the stride between array elements. For
+ * matrices or arrays of matrices this returns the stride between columns of the matrix. Note
+ * that for single (non-array) scalars or vectors we don't require a stride.
+ */
+ static constexpr size_t Stride(int count) {
+ SkASSERT(count >= 1 || count == GrShaderVar::kNonArray);
+ static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
+ static_assert(Cols >= 1 && Cols <= 4);
+ if (Cols != 1) {
+ // This is a matrix or array of matrices. We return the stride between columns.
+ SkASSERT(RowsOrVecLength > 1);
+ return Rules140<BaseType, RowsOrVecLength>::Stride(1);
+ }
+ if (count == 0) {
+ // Stride doesn't matter for a non-array.
+ return 0;
+ }
+
+ // Rule 4.
+
+ // Alignment of vec4 by Rule 2.
+ constexpr size_t kVec4Alignment = tight_vec_size<float>(4);
+ // Get alignment of a single vector of BaseType by Rule 1, 2, or 3
+ int n = RowsOrVecLength == 3 ? 4 : RowsOrVecLength;
+ size_t kElementAlignment = tight_vec_size<BaseType>(n);
+ // Round kElementAlignment up to multiple of kVec4Alignment.
+ size_t m = (kElementAlignment + kVec4Alignment - 1)/kVec4Alignment;
+ return m*kVec4Alignment;
+ }
+};
+
+/**
+ * When using the std430 storage layout, shader storage blocks will be laid out in buffer storage
+ * identically to uniform and shader storage blocks using the std140 layout, except that the base
+ * alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are
+ * not rounded up a multiple of the base alignment of a vec4.
+ */
+template <typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
+struct Rules430 {
+ static constexpr size_t Stride(int count) {
+ SkASSERT(count >= 1 || count == GrShaderVar::kNonArray);
+ static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
+ static_assert(Cols >= 1 && Cols <= 4);
+
+ if (Cols != 1) {
+ // This is a matrix or array of matrices. We return the stride between columns.
+ SkASSERT(RowsOrVecLength > 1);
+ return Rules430<BaseType, RowsOrVecLength>::Stride(1);
+ }
+ if (count == 0) {
+ // Stride doesn't matter for a non-array.
+ return 0;
+ }
+ // Rule 4 without the round up to a multiple of align-of vec4.
+ return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
+ }
+};
+
+// The strides used here were derived from the rules we've imposed on ourselves in
+// GrMtlPipelineStateDataManger. Everything is tight except 3-component which have the stride of
+// their 4-component equivalents.
+template <typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
+struct RulesMetal {
+ static constexpr size_t Stride(int count) {
+ SkASSERT(count >= 1 || count == GrShaderVar::kNonArray);
+ static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
+ static_assert(Cols >= 1 && Cols <= 4);
+ if (Cols != 1) {
+ // This is a matrix or array of matrices. We return the stride between columns.
+ SkASSERT(RowsOrVecLength > 1);
+ return RulesMetal<BaseType, RowsOrVecLength>::Stride(1);
+ }
+ if (count == 0) {
+ // Stride doesn't matter for a non-array.
+ return 0;
+ }
+ return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
+ }
+};
+
+template <template <typename BaseType, int RowsOrVecLength, int Cols> class Rules>
+class Writer {
+private:
+ using CType = GrProcessor::Uniform::CType;
+
+ template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
+ static void Write(void* dst, int n, const BaseType v[]) {
+ if (dst) {
+ size_t stride = Rules<BaseType, RowsOrVecLength, Cols>::Stride(n);
+ n = (n == GrShaderVar::kNonArray) ? 1 : n;
+ n *= Cols;
+ if (stride == RowsOrVecLength*sizeof(BaseType)) {
+ std::memcpy(dst, v, n*stride);
+ } else {
+ for (int i = 0; i < n; ++i) {
+ std::memcpy(dst, v, RowsOrVecLength*sizeof(BaseType));
+ v += RowsOrVecLength;
+ dst = SkTAddOffset<void>(dst, stride);
+ }
+ }
+ }
+ }
+
+ static void WriteSkMatrices(void* d, int n, const SkMatrix m[]) {
+ size_t offset = 0;
+ for (int i = 0; i < std::max(n, 1); ++i) {
+ float mt[] = {
+ m[i].get(SkMatrix::kMScaleX),
+ m[i].get(SkMatrix::kMSkewY),
+ m[i].get(SkMatrix::kMPersp0),
+ m[i].get(SkMatrix::kMSkewX),
+ m[i].get(SkMatrix::kMScaleY),
+ m[i].get(SkMatrix::kMPersp1),
+ m[i].get(SkMatrix::kMTransX),
+ m[i].get(SkMatrix::kMTransY),
+ m[i].get(SkMatrix::kMPersp2),
+ };
+ Write<float, 3, 3>(SkTAddOffset<void>(d, offset), 1, mt);
+ // Stride() will give us the stride of each column, so mul by 3 to get matrix stride.
+ offset += 3*Rules<float, 3, 3>::Stride(1);
+ }
+ }
+
+public:
+ static void WriteUniform(GrSLType type, CType ctype, void* d, int n, const void* v) {
+ SkASSERT(d);
+ SkASSERT(n >= 1 || n == GrShaderVar::kNonArray);
+ switch (type) {
+ case kInt_GrSLType:
+ return Write<int32_t>(d, n, static_cast<const int32_t*>(v));
+
+ case kInt2_GrSLType:
+ return Write<int32_t, 2>(d, n, static_cast<const int32_t*>(v));
+
+ case kInt3_GrSLType:
+ return Write<int32_t, 3>(d, n, static_cast<const int32_t*>(v));
+
+ case kInt4_GrSLType:
+ return Write<int32_t, 4>(d, n, static_cast<const int32_t*>(v));
+
+ case kHalf_GrSLType:
+ case kFloat_GrSLType:
+ return Write<float>(d, n, static_cast<const float*>(v));
+
+ case kHalf2_GrSLType:
+ case kFloat2_GrSLType:
+ return Write<float, 2>(d, n, static_cast<const float*>(v));
+
+ case kHalf3_GrSLType:
+ case kFloat3_GrSLType:
+ return Write<float, 3>(d, n, static_cast<const float*>(v));
+
+ case kHalf4_GrSLType:
+ case kFloat4_GrSLType:
+ return Write<float, 4>(d, n, static_cast<const float*>(v));
+
+ case kHalf2x2_GrSLType:
+ case kFloat2x2_GrSLType:
+ return Write<float, 2, 2>(d, n, static_cast<const float*>(v));
+
+ case kHalf3x3_GrSLType:
+ case kFloat3x3_GrSLType: {
+ switch (ctype) {
+ case CType::kDefault:
+ return Write<float, 3, 3>(d, n, static_cast<const float*>(v));
+ case CType::kSkMatrix:
+ return WriteSkMatrices(d, n, static_cast<const SkMatrix*>(v));
+ }
+ SkUNREACHABLE;
+ }
+
+ case kHalf4x4_GrSLType:
+ case kFloat4x4_GrSLType:
+ return Write<float, 4, 4>(d, n, static_cast<const float*>(v));
+
+ default:
+ SK_ABORT("Unexpect uniform type");
+ }
+ }
+};
+
+bool GrUniformDataManager::UniformManager::writeUniforms(const GrProgramInfo& info, void* buffer) {
+ decltype(&Writer<Rules140>::WriteUniform) write;
+ switch (fLayout) {
+ case Layout::kStd140:
+ write = Writer<Rules140>::WriteUniform;
+ break;
+ case Layout::kStd430:
+ write = Writer<Rules430>::WriteUniform;
+ break;
+ case Layout::kMetal:
+ write = Writer<RulesMetal>::WriteUniform;
+ break;
+ }
+
+ bool wrote = false;
+ auto set = [&, processorIndex = 0](const GrProcessor& p) mutable {
+ SkASSERT(buffer);
+ const ProcessorUniforms& uniforms = fUniforms[processorIndex];
+ for (const NewUniform& u : uniforms) {
+ if (u.type != kVoid_GrSLType) {
+ SkASSERT(u.count >= 0);
+ static_assert(GrShaderVar::kNonArray == 0);
+ void* d = SkTAddOffset<void>(buffer, u.offset);
+ size_t index = u.indexInProcessor;
+ const void* v = p.uniformData(index);
+ write(u.type, p.uniforms()[index].ctype(), d, u.count, v);
+ wrote = true;
+ }
+ }
+ ++processorIndex;
+ };
+
+ info.visitProcessors(set);
+ return wrote;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+GrUniformDataManager::GrUniformDataManager(ProgramUniforms uniforms,
+ Layout layout,
+ uint32_t uniformCount,
+ uint32_t uniformSize)
+ : fUniformSize(uniformSize)
+ , fUniformsDirty(false)
+ , fUniformManager(std::move(uniforms), layout) {
fUniformData.reset(uniformSize);
fUniforms.push_back_n(uniformCount);
- // subclasses fill in the uniforms in their constructor
+ // subclasses fill in the legacy uniforms in their constructor
+}
+
+void GrUniformDataManager::setUniforms(const GrProgramInfo& info) {
+ if (fUniformManager.writeUniforms(info, fUniformData.get())) {
+ this->markDirty();
+ }
}
void* GrUniformDataManager::getBufferPtrAndMarkDirty(const Uniform& uni) const {