Reland "Try to avoid vertex colors in Texture/FillRect ops when possible."
This is a reland of e0b989e5e3ab64f3585fe1bf0228e964dad0678c
Original change's description:
> Try to avoid vertex colors in Texture/FillRect ops when possible.
>
> Avoids unnecessary fragment shader color multiplication.
>
> Change-Id: I353d3ca91824ce20c9e9af1c5c84ab9953ddd8ab
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/201004
> Commit-Queue: Brian Salomon <bsalomon@google.com>
> Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Change-Id: I22125cb7058f528cb368ff30c3c26e3d55056e66
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/201222
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
diff --git a/src/gpu/ops/GrFillRectOp.cpp b/src/gpu/ops/GrFillRectOp.cpp
index d9a11c4..94f90b3 100644
--- a/src/gpu/ops/GrFillRectOp.cpp
+++ b/src/gpu/ops/GrFillRectOp.cpp
@@ -79,7 +79,7 @@
const GrPerspQuad& localQuad, GrQuadType localQuadType)
: INHERITED(ClassID())
, fHelper(args, aaType, stencil)
- , fWideColor(!SkPMColor4fFitsInBytes(paintColor)) {
+ , fColorType(GrQuadPerEdgeAA::MinColorType(paintColor)) {
// The color stored with the quad is the clear color if a scissor-clear is decided upon
// when executing the op.
fDeviceQuads.push_back(deviceQuad, deviceQuadType, { paintColor, edgeFlags });
@@ -178,10 +178,9 @@
using Domain = GrQuadPerEdgeAA::Domain;
static constexpr SkRect kEmptyDomain = SkRect::MakeEmpty();
- VertexSpec vertexSpec(fDeviceQuads.quadType(),
- fWideColor ? ColorType::kHalf : ColorType::kByte,
- fLocalQuads.quadType(), fHelper.usesLocalCoords(), Domain::kNo,
- fHelper.aaType(), fHelper.compatibleWithAlphaAsCoverage());
+ VertexSpec vertexSpec(fDeviceQuads.quadType(), fColorType, fLocalQuads.quadType(),
+ fHelper.usesLocalCoords(), Domain::kNo, fHelper.aaType(),
+ fHelper.compatibleWithAlphaAsCoverage());
// Make sure that if the op thought it was a solid color, the vertex spec does not use
// local coords.
SkASSERT(!fHelper.isTrivial() || !fHelper.usesLocalCoords());
@@ -258,7 +257,7 @@
// If the processor sets are compatible, the two ops are always compatible; it just needs to
// adjust the state of the op to be the more general quad and aa types of the two ops and
// then concatenate the per-quad data.
- fWideColor |= that->fWideColor;
+ fColorType = SkTMax(fColorType, that->fColorType);
// The helper stores the aa type, but isCompatible(with true arg) allows the two ops' aa
// types to be none and coverage, in which case this op's aa type must be lifted to coverage
@@ -297,8 +296,8 @@
}
// clear compatible won't need to be updated, since device quad type and paint is the same,
- // but this quad has a new color, so maybe update wide color
- fWideColor |= !SkPMColor4fFitsInBytes(color);
+ // but this quad has a new color, so maybe update color type
+ fColorType = SkTMax(fColorType, GrQuadPerEdgeAA::MinColorType(color));
// Update the bounds and add the quad to this op's storage
SkRect newBounds = this->bounds();
@@ -328,7 +327,7 @@
// No metadata attached to the local quads; this list is empty when local coords are not needed.
GrQuadList fLocalQuads;
- unsigned fWideColor: 1;
+ ColorType fColorType;
typedef GrMeshDrawOp INHERITED;
};
diff --git a/src/gpu/ops/GrQuadPerEdgeAA.cpp b/src/gpu/ops/GrQuadPerEdgeAA.cpp
index 4845b55..cdc49b3 100644
--- a/src/gpu/ops/GrQuadPerEdgeAA.cpp
+++ b/src/gpu/ops/GrQuadPerEdgeAA.cpp
@@ -624,9 +624,7 @@
// Writes four vertices in triangle strip order, including the additional data for local
// coordinates, domain, color, and coverage as needed to satisfy the vertex spec.
static void write_quad(GrVertexWriter* vb, const GrQuadPerEdgeAA::VertexSpec& spec,
- CoverageMode mode, Sk4f coverage,
- SkPMColor4f color4f, bool wideColor,
- const SkRect& domain,
+ CoverageMode mode, Sk4f coverage, SkPMColor4f color4f, const SkRect& domain,
const Vertices& quad) {
static constexpr auto If = GrVertexWriter::If<float>;
@@ -639,8 +637,9 @@
// save color
if (spec.hasVertexColors()) {
+ bool wide = spec.colorType() == GrQuadPerEdgeAA::ColorType::kHalf;
vb->write(GrVertexColor(
- color4f * (mode == CoverageMode::kWithColor ? coverage[i] : 1.f), wideColor));
+ color4f * (mode == CoverageMode::kWithColor ? coverage[i] : 1.f), wide));
}
// save local position
@@ -684,12 +683,21 @@
namespace GrQuadPerEdgeAA {
+ColorType MinColorType(SkPMColor4f color) {
+ if (color == SK_PMColor4fWHITE) {
+ return ColorType::kNone;
+ } else if (color.fitsInBytes()) {
+ return ColorType::kByte;
+ } else {
+ return ColorType::kHalf;
+ }
+}
+
////////////////// Tessellate Implementation
void* Tessellate(void* vertices, const VertexSpec& spec, const GrPerspQuad& deviceQuad,
const SkPMColor4f& color4f, const GrPerspQuad& localQuad, const SkRect& domain,
GrQuadAAFlags aaFlags) {
- bool wideColor = GrQuadPerEdgeAA::ColorType::kHalf == spec.colorType();
CoverageMode mode = get_mode_for_spec(spec);
// Load position data into Sk4fs (always x, y, and load w to avoid branching down the road)
@@ -732,12 +740,12 @@
// applied a mirror, etc. The current 2D case is already adequately fast.
// Write two quads for inner and outer, inner will use the
- write_quad(&vb, spec, mode, maxCoverage, color4f, wideColor, domain, inner);
- write_quad(&vb, spec, mode, 0.f, color4f, wideColor, domain, outer);
+ write_quad(&vb, spec, mode, maxCoverage, color4f, domain, inner);
+ write_quad(&vb, spec, mode, 0.f, color4f, domain, outer);
} else {
// No outsetting needed, just write a single quad with full coverage
SkASSERT(mode == CoverageMode::kNone);
- write_quad(&vb, spec, mode, 1.f, color4f, wideColor, domain, outer);
+ write_quad(&vb, spec, mode, 1.f, color4f, domain, outer);
}
return vb.fPtr;
diff --git a/src/gpu/ops/GrQuadPerEdgeAA.h b/src/gpu/ops/GrQuadPerEdgeAA.h
index 232a10d..a491f26 100644
--- a/src/gpu/ops/GrQuadPerEdgeAA.h
+++ b/src/gpu/ops/GrQuadPerEdgeAA.h
@@ -26,6 +26,9 @@
enum class ColorType { kNone, kByte, kHalf, kLast = kHalf };
static const int kColorTypeCount = static_cast<int>(ColorType::kLast) + 1;
+ // Gets the minimum ColorType that can represent a color.
+ ColorType MinColorType(SkPMColor4f);
+
// Specifies the vertex configuration for an op that renders per-edge AA quads. The vertex
// order (when enabled) is device position, color, local position, domain, aa edge equations.
// This order matches the constructor argument order of VertexSpec and is the order that
diff --git a/src/gpu/ops/GrTextureOp.cpp b/src/gpu/ops/GrTextureOp.cpp
index 9db287a..5b4232c 100644
--- a/src/gpu/ops/GrTextureOp.cpp
+++ b/src/gpu/ops/GrTextureOp.cpp
@@ -291,7 +291,7 @@
auto bounds = dstQuad.bounds(dstQuadType);
this->setBounds(bounds, HasAABloat(aaType == GrAAType::kCoverage), IsZeroArea::kNo);
fDomain = static_cast<unsigned>(domain);
- fWideColor = !SkPMColor4fFitsInBytes(color);
+ fColorType = static_cast<unsigned>(GrQuadPerEdgeAA::MinColorType(color));
fCanSkipAllocatorGather =
static_cast<unsigned>(fProxies[0].fProxy->canSkipResourceAllocator());
}
@@ -311,7 +311,7 @@
// identical, unless an entry provides a dstClip or additional transform that changes it.
// The quad list will automatically adapt to that.
fQuads.reserve(cnt, GrQuadTypeForTransformedRect(viewMatrix));
-
+ bool allOpaque = true;
for (unsigned p = 0; p < fProxyCnt; ++p) {
fProxies[p].fProxy = SkRef(set[p].fProxy.get());
fProxies[p].fQuadCnt = 1;
@@ -351,6 +351,7 @@
set[p].fDstRect);
}
float alpha = SkTPin(set[p].fAlpha, 0.f, 1.f);
+ allOpaque &= (1.f == alpha);
SkPMColor4f color{alpha, alpha, alpha, alpha};
int srcQuadIndex = -1;
if (set[p].fDstClipQuad) {
@@ -371,7 +372,7 @@
}
this->setBounds(bounds, HasAABloat(this->aaType() == GrAAType::kCoverage), IsZeroArea::kNo);
fDomain = static_cast<unsigned>(false);
- fWideColor = static_cast<unsigned>(false);
+ fColorType = static_cast<unsigned>(allOpaque ? ColorType::kNone : ColorType::kByte);
}
void tess(void* v, const VertexSpec& spec, const GrTextureProxy* proxy, int start,
@@ -408,7 +409,7 @@
GrQuadType quadType = GrQuadType::kRect;
GrQuadType srcQuadType = GrQuadType::kRect;
Domain domain = Domain::kNo;
- bool wideColor = false;
+ ColorType colorType = ColorType::kNone;
int numProxies = 0;
int numTotalQuads = 0;
auto textureType = fProxies[0].fProxy->textureType();
@@ -426,7 +427,7 @@
if (op.fDomain) {
domain = Domain::kYes;
}
- wideColor |= op.fWideColor;
+ colorType = SkTMax(colorType, static_cast<ColorType>(op.fColorType));
numProxies += op.fProxyCnt;
for (unsigned p = 0; p < op.fProxyCnt; ++p) {
numTotalQuads += op.fProxies[p].fQuadCnt;
@@ -443,8 +444,7 @@
}
}
- VertexSpec vertexSpec(quadType, wideColor ? ColorType::kHalf : ColorType::kByte,
- srcQuadType, /* hasLocal */ true, domain, aaType,
+ VertexSpec vertexSpec(quadType, colorType, srcQuadType, /* hasLocal */ true, domain, aaType,
/* alpha as coverage */ true);
GrSamplerState samplerState = GrSamplerState(GrSamplerState::WrapMode::kClamp,
@@ -562,7 +562,7 @@
}
fDomain |= that->fDomain;
- fWideColor |= that->fWideColor;
+ fColorType = SkTMax(fColorType, that->fColorType);
if (upgradeToCoverageAAOnMerge) {
fAAType = static_cast<unsigned>(GrAAType::kCoverage);
}
@@ -644,11 +644,12 @@
unsigned fFilter : 2;
unsigned fAAType : 2;
unsigned fDomain : 1;
- unsigned fWideColor : 1;
+ unsigned fColorType : 2;
+ GR_STATIC_ASSERT(GrQuadPerEdgeAA::kColorTypeCount <= 4);
// Used to track whether fProxy is ref'ed or has a pending IO after finalize() is called.
unsigned fFinalized : 1;
unsigned fCanSkipAllocatorGather : 1;
- unsigned fProxyCnt : 32 - 8;
+ unsigned fProxyCnt : 32 - 9;
Proxy fProxies[1];
static_assert(kGrQuadTypeCount <= 4, "GrQuadType does not fit in 2 bits");