Add Short4 vertex attributes, and benchmark them as 4.12 colors

This is likely our best widely portable option for encoding wide gamut
colors that doesnt sacrifice too much performance. The benchmark uses
them as we're likely to do: 4.12 fixed point, in the destination color
space. We're using SINT vertex attributes for simplicity, so the encode
and decode are simple multiply/divide by 4096.

Bug: skia:
Change-Id: I9b544f3e187b775d81f83dc9dd44611570ad33c2
Reviewed-on: https://skia-review.googlesource.com/155001
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/bench/VertexColorSpaceBench.cpp b/bench/VertexColorSpaceBench.cpp
index 7f57a91..1251a9f 100644
--- a/bench/VertexColorSpaceBench.cpp
+++ b/bench/VertexColorSpaceBench.cpp
@@ -31,6 +31,7 @@
     kFloat_Mode,     // Transform colors on CPU, use float4 attributes.
     kHalf_Mode,      // Transform colors on CPU, use half4 attributes.
     kShader_Mode,    // Use ubyte4 attributes, transform colors on GPU (vertex shader).
+    kShort_Mode,     // Transform on CPU, use short4 (4.12) attributes with a bit of shader math.
 };
 
 class GP : public GrGeometryProcessor {
@@ -51,6 +52,9 @@
             case kHalf_Mode:
                 fInColor = {"inColor", kHalf4_GrVertexAttribType, kHalf4_GrSLType};
                 break;
+            case kShort_Mode:
+                fInColor = {"inColor", kShort4_GrVertexAttribType, kShort4_GrSLType};
+                break;
         }
         this->setVertexAttributeCnt(2);
     }
@@ -80,6 +84,8 @@
                     vertBuilder->appendColorGamutXform(&xformedColor, "color", &fColorSpaceHelper);
                     vertBuilder->codeAppendf("color = %s;", xformedColor.c_str());
                     vertBuilder->codeAppend("color = half4(color.rgb * color.a, color.a);");
+                } else if (kShort_Mode == gp.fMode) {
+                    vertBuilder->codeAppend("color = color * (1 / 4096.0);");
                 }
 
                 vertBuilder->codeAppendf("%s = color;", varying.vsOut());
@@ -139,7 +145,7 @@
             : INHERITED(ClassID())
             , fMode(mode)
             , fColor4f(color4f) {
-        SkASSERT(kFloat_Mode == fMode || kHalf_Mode == mode);
+        SkASSERT(kFloat_Mode == fMode || kHalf_Mode == mode || kShort_Mode == mode);
         this->setBounds(SkRect::MakeWH(100.f, 100.f), HasAABloat::kNo, IsZeroArea::kNo);
     }
 
@@ -171,6 +177,7 @@
                 vertexStride += sizeof(GrColor4f);
                 break;
             case kHalf_Mode:
+            case kShort_Mode:
                 vertexStride += sizeof(uint64_t);
                 break;
             default:
@@ -208,7 +215,7 @@
             };
             SkASSERT(sizeof(V) == vertexStride);
             uint64_t color;
-            Sk4h halfColor = SkFloatToHalf_finite_ftz(Sk4f::Load(&fColor));
+            Sk4h halfColor = SkFloatToHalf_finite_ftz(Sk4f::Load(&fColor4f));
             color = (uint64_t)halfColor[0] << 48 |
                     (uint64_t)halfColor[1] << 32 |
                     (uint64_t)halfColor[2] << 16 |
@@ -220,6 +227,26 @@
                 v[i + 1].fPos.set(dx * i, 100.0f);
                 v[i + 1].fColor = color;
             }
+        } else if (kShort_Mode == fMode) {
+            struct ShortColor { int16_t fRGBA[4]; };
+            struct V {
+                SkPoint fPos;
+                ShortColor fColor;
+            };
+            SkASSERT(sizeof(V) == vertexStride);
+            Sk4i c = Sk4f_round(Sk4f::Load(&fColor4f) * 4096.0f);
+            c = Sk4i::Max(-32768, Sk4i::Min(c, 32767));
+            ShortColor color;
+            for (int i = 0; i < 4; ++i) {
+                color.fRGBA[i] = c[i];
+            }
+            V* v = (V*)verts;
+            for (int i = 0; i < kVertexCount; i += 2) {
+                v[i + 0].fPos.set(dx * i, 0.0f);
+                v[i + 0].fColor = color;
+                v[i + 1].fPos.set(dx * i, 100.0f);
+                v[i + 1].fColor = color;
+            }
         } else {
             struct V {
                 SkPoint fPos;
@@ -299,6 +326,7 @@
                         op = pool->allocate<Op>(SkColorToUnpremulGrColor(c), xform);
                         break;
                     case kHalf_Mode:
+                    case kShort_Mode:
                     case kFloat_Mode: {
                         GrColor4f c4f = GrColor4f::FromGrColor(SkColorToUnpremulGrColor(c));
                         c4f = xform->apply(c4f);
@@ -322,4 +350,5 @@
 DEF_BENCH(return new VertexColorSpaceBench(kBaseline_Mode, "baseline"));
 DEF_BENCH(return new VertexColorSpaceBench(kFloat_Mode,    "float"));
 DEF_BENCH(return new VertexColorSpaceBench(kHalf_Mode,     "half"));
+DEF_BENCH(return new VertexColorSpaceBench(kShort_Mode,    "short"));
 DEF_BENCH(return new VertexColorSpaceBench(kShader_Mode,   "shader"));
diff --git a/include/private/GrTypesPriv.h b/include/private/GrTypesPriv.h
index 740c1b9..dea649a 100644
--- a/include/private/GrTypesPriv.h
+++ b/include/private/GrTypesPriv.h
@@ -711,6 +711,8 @@
                                      // 255 -> 1.0f.
 
     kShort2_GrVertexAttribType,       // vector of 2 16-bit shorts.
+    kShort4_GrVertexAttribType,       // vector of 4 16-bit shorts.
+
     kUShort2_GrVertexAttribType,      // vector of 2 unsigned shorts. 0 -> 0, 65535 -> 65535.
     kUShort2_norm_GrVertexAttribType, // vector of 2 unsigned shorts. 0 -> 0.0f, 65535 -> 1.0f.
 
diff --git a/src/gpu/GrPrimitiveProcessor.h b/src/gpu/GrPrimitiveProcessor.h
index 8a903f3..d51a075 100644
--- a/src/gpu/GrPrimitiveProcessor.h
+++ b/src/gpu/GrPrimitiveProcessor.h
@@ -293,6 +293,8 @@
             return 4 * sizeof(char);
         case kShort2_GrVertexAttribType:
             return 2 * sizeof(int16_t);
+        case kShort4_GrVertexAttribType:
+            return 4 * sizeof(int16_t);
         case kUShort2_GrVertexAttribType: // fall through
         case kUShort2_norm_GrVertexAttribType:
             return 2 * sizeof(uint16_t);
diff --git a/src/gpu/gl/GrGLVertexArray.cpp b/src/gpu/gl/GrGLVertexArray.cpp
index 2f65ed5..1c8bcb3 100644
--- a/src/gpu/gl/GrGLVertexArray.cpp
+++ b/src/gpu/gl/GrGLVertexArray.cpp
@@ -63,6 +63,8 @@
             return {true, 4, GR_GL_UNSIGNED_BYTE};
         case kShort2_GrVertexAttribType:
             return {false, 2, GR_GL_SHORT};
+        case kShort4_GrVertexAttribType:
+            return {false, 4, GR_GL_SHORT};
         case kUShort2_GrVertexAttribType:
             return {false, 2, GR_GL_UNSIGNED_SHORT};
         case kUShort2_norm_GrVertexAttribType:
@@ -123,6 +125,8 @@
             return false;
         case kShort2_GrVertexAttribType:
             return true;
+        case kShort4_GrVertexAttribType:
+            return true;
         case kUShort2_GrVertexAttribType:
             return shaderCaps->integerSupport(); // FIXME: caller should handle this.
         case kUShort2_norm_GrVertexAttribType:
diff --git a/src/gpu/mtl/GrMtlPipelineStateBuilder.mm b/src/gpu/mtl/GrMtlPipelineStateBuilder.mm
index 5f2c1ca..9339d97 100644
--- a/src/gpu/mtl/GrMtlPipelineStateBuilder.mm
+++ b/src/gpu/mtl/GrMtlPipelineStateBuilder.mm
@@ -129,6 +129,8 @@
             return MTLVertexFormatUChar4Normalized;
         case kShort2_GrVertexAttribType:
             return MTLVertexFormatShort2;
+        case kShort4_GrVertexAttribType:
+            return MTLVertexFormatShort4;
         case kUShort2_GrVertexAttribType:
             return MTLVertexFormatUShort2;
         case kUShort2_norm_GrVertexAttribType:
diff --git a/src/gpu/vk/GrVkPipeline.cpp b/src/gpu/vk/GrVkPipeline.cpp
index c07f082..7ea898d 100644
--- a/src/gpu/vk/GrVkPipeline.cpp
+++ b/src/gpu/vk/GrVkPipeline.cpp
@@ -60,6 +60,8 @@
             return VK_FORMAT_R8G8B8A8_UNORM;
         case kShort2_GrVertexAttribType:
             return VK_FORMAT_R16G16_SINT;
+        case kShort4_GrVertexAttribType:
+            return VK_FORMAT_R16G16B16A16_SINT;
         case kUShort2_GrVertexAttribType:
             return VK_FORMAT_R16G16_UINT;
         case kUShort2_norm_GrVertexAttribType: