remove SkVM uniform8/16

Uniforms in practice are always pointers or 32-bit ints or floats, so
these are essentially dead code. The change to SkVMBlitter.cpp is the
only interesting change, and I think it makes more sense now than
before.  The program will need float coverage in the end, so might as
well feed it one directly.

Change-Id: I7f1e77731cf10ccc35595012a6df4f9e54a0dad8
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/338631
Commit-Queue: Mike Reed <reed@google.com>
Auto-Submit: Mike Klein <mtklein@google.com>
Reviewed-by: Mike Reed <reed@google.com>
diff --git a/src/core/SkVM.cpp b/src/core/SkVM.cpp
index 514c372..1bffe85 100644
--- a/src/core/SkVM.cpp
+++ b/src/core/SkVM.cpp
@@ -242,8 +242,6 @@
             case Op::gather16: write(o, V{id}, "=", op, Arg{immy}, Hex{immz}, V{x}); break;
             case Op::gather32: write(o, V{id}, "=", op, Arg{immy}, Hex{immz}, V{x}); break;
 
-            case Op::uniform8:  write(o, V{id}, "=", op, Arg{immy}, Hex{immz}); break;
-            case Op::uniform16: write(o, V{id}, "=", op, Arg{immy}, Hex{immz}); break;
             case Op::uniform32: write(o, V{id}, "=", op, Arg{immy}, Hex{immz}); break;
 
             case Op::splat: write(o, V{id}, "=", op, Splat{immy}); break;
@@ -356,8 +354,6 @@
                 case Op::gather16: write(o, R{d}, "=", op, Arg{immy}, Hex{immz}, R{x}); break;
                 case Op::gather32: write(o, R{d}, "=", op, Arg{immy}, Hex{immz}, R{x}); break;
 
-                case Op::uniform8:  write(o, R{d}, "=", op, Arg{immy}, Hex{immz}); break;
-                case Op::uniform16: write(o, R{d}, "=", op, Arg{immy}, Hex{immz}); break;
                 case Op::uniform32: write(o, R{d}, "=", op, Arg{immy}, Hex{immz}); break;
 
                 case Op::splat:     write(o, R{d}, "=", op, Splat{immy}); break;
@@ -664,12 +660,6 @@
         return {this, push(Op::gather32, index.id,NA,NA, ptr.ix,offset)};
     }
 
-    I32 Builder::uniform8(Arg ptr, int offset) {
-        return {this, push(Op::uniform8, NA,NA,NA, ptr.ix, offset)};
-    }
-    I32 Builder::uniform16(Arg ptr, int offset) {
-        return {this, push(Op::uniform16, NA,NA,NA, ptr.ix, offset)};
-    }
     I32 Builder::uniform32(Arg ptr, int offset) {
         return {this, push(Op::uniform32, NA,NA,NA, ptr.ix, offset)};
     }
@@ -2551,14 +2541,11 @@
 
                 case Op::splat: vals[i] = llvm::ConstantInt::get(I32, immy); break;
 
-                case Op::uniform8:  t = i8 ; goto uniform;
-                case Op::uniform16: t = i16; goto uniform;
-                case Op::uniform32: t = i32; goto uniform;
-                uniform: {
+                case Op::uniform32: {
                     llvm::Value* ptr = b->CreateBitCast(b->CreateConstInBoundsGEP1_32(nullptr,
                                                                                       args[immy],
                                                                                       immz),
-                                                        t->getPointerTo());
+                                                        i32->getPointerTo());
                     llvm::Value* val = b->CreateZExt(b->CreateAlignedLoad(ptr, 1), i32);
                     vals[i] = I32->isVectorTy() ? b->CreateVectorSplat(K, val)
                                                 : val;
@@ -3575,16 +3562,6 @@
                 }
                 break;
 
-                case Op::uniform8: a->movzbq(GP0, A::Mem{arg[immy], immz});
-                                   a->vmovd((A::Xmm)dst(), GP0);
-                                   a->vbroadcastss(dst(), dst());
-                                   break;
-
-                case Op::uniform16: a->movzwq(GP0, A::Mem{arg[immy], immz});
-                                    a->vmovd((A::Xmm)dst(), GP0);
-                                    a->vbroadcastss(dst(), dst());
-                                    break;
-
                 case Op::uniform32: a->vbroadcastss(dst(), A::Mem{arg[immy], immz});
                                     break;
 
@@ -3778,17 +3755,6 @@
                                  else        { a->ldrq(dst(), arg[immy]); }
                                                break;
 
-                case Op::uniform8: a->add(GP0, arg[immy], immz);
-                                   a->ld1r16b(dst(), GP0);
-                                   a->uxtlb2h(dst(), dst());
-                                   a->uxtlh2s(dst(), dst());
-                                   break;
-
-                case Op::uniform16: a->add(GP0, arg[immy], immz);
-                                    a->ld1r8h(dst(), GP0);
-                                    a->uxtlh2s(dst(), dst());
-                                    break;
-
                 case Op::uniform32: a->add(GP0, arg[immy], immz);
                                     a->ld1r4s(dst(), GP0);
                                     break;
diff --git a/src/core/SkVM.h b/src/core/SkVM.h
index b864252..8158bba 100644
--- a/src/core/SkVM.h
+++ b/src/core/SkVM.h
@@ -423,7 +423,7 @@
         M(index)                                                     \
         M(load8)    M(load16)    M(load32)  M(load64) M(load128)     \
         M(gather8)  M(gather16)  M(gather32)                         \
-        M(uniform8) M(uniform16) M(uniform32)                        \
+                                 M(uniform32)                        \
         M(splat)                                                     \
         M(add_f32) M(add_i32)                                        \
         M(sub_f32) M(sub_i32)                                        \
@@ -631,9 +631,7 @@
         I32 load64 (Arg ptr, int lane);  // Load 32-bit lane 0-1 of  64-bit value.
         I32 load128(Arg ptr, int lane);  // Load 32-bit lane 0-3 of 128-bit value.
 
-        // Load u8,u16,i32 uniform with byte-count offset.
-        I32 uniform8 (Arg ptr, int offset);
-        I32 uniform16(Arg ptr, int offset);
+        // Load i32/f32 uniform with byte-count offset.
         I32 uniform32(Arg ptr, int offset);
         F32 uniformF (Arg ptr, int offset) { return this->bit_cast(this->uniform32(ptr,offset)); }
 
@@ -649,8 +647,6 @@
         }
 
         // Convenience methods for working with skvm::Uniform(s).
-        I32 uniform8 (Uniform u)            { return this->uniform8 (u.ptr, u.offset); }
-        I32 uniform16(Uniform u)            { return this->uniform16(u.ptr, u.offset); }
         I32 uniform32(Uniform u)            { return this->uniform32(u.ptr, u.offset); }
         F32 uniformF (Uniform u)            { return this->uniformF (u.ptr, u.offset); }
         I32 gather8  (Uniform u, I32 index) { return this->gather8  (u.ptr, u.offset, index); }
diff --git a/src/core/SkVMBlitter.cpp b/src/core/SkVMBlitter.cpp
index ffb3cf9..db3128f 100644
--- a/src/core/SkVMBlitter.cpp
+++ b/src/core/SkVMBlitter.cpp
@@ -33,7 +33,7 @@
     static_assert(SkIsAlign4(sizeof(BlitterUniforms)), "");
     static constexpr int kBlitterUniformsCount = sizeof(BlitterUniforms) / 4;
 
-    enum class Coverage { Full, UniformA8, MaskA8, MaskLCD16, Mask3D };
+    enum class Coverage { Full, UniformF, MaskA8, MaskLCD16, Mask3D };
 
     struct Params {
         sk_sp<SkShader>         shader;
@@ -198,7 +198,7 @@
         //    - Mask3D:    mul varying, add varying, 8-bit coverage varying
         //    - MaskA8:    8-bit coverage varying
         //    - MaskLCD16: 565 coverage varying
-        //    - UniformA8: 8-bit coverage uniform
+        //    - UniformF:  float coverage uniform
 
         skvm::Coord device = device_coord(p, uniforms);
         skvm::Color paint = p->uniformColor(params.paint, uniforms);
@@ -259,8 +259,8 @@
                 cov.r = cov.g = cov.b = cov.a = p->splat(1.0f);
                 break;
 
-            case Coverage::UniformA8:
-                cov.r = cov.g = cov.b = cov.a = from_unorm(8, p->uniform8(p->uniform(), 0));
+            case Coverage::UniformF:
+                cov.r = cov.g = cov.b = cov.a = p->uniformF(p->uniform(), 0);
                 break;
 
             case Coverage::Mask3D:
@@ -581,7 +581,7 @@
                     }
                 };
                 cache_program(std::move(fBlitH),         Coverage::Full);
-                cache_program(std::move(fBlitAntiH),     Coverage::UniformA8);
+                cache_program(std::move(fBlitAntiH),     Coverage::UniformF);
                 cache_program(std::move(fBlitMaskA8),    Coverage::MaskA8);
                 cache_program(std::move(fBlitMask3D),    Coverage::Mask3D);
                 cache_program(std::move(fBlitMaskLCD16), Coverage::MaskLCD16);
@@ -675,14 +675,15 @@
 
         void blitAntiH(int x, int y, const SkAlpha cov[], const int16_t runs[]) override {
             if (fBlitAntiH.empty()) {
-                fBlitAntiH = this->buildProgram(Coverage::UniformA8);
+                fBlitAntiH = this->buildProgram(Coverage::UniformF);
             }
             for (int16_t run = *runs; run > 0; run = *runs) {
                 this->updateUniforms(x+run, y);
+                const float covF = *cov * (1/255.0f);
                 if (const void* sprite = this->isSprite(x,y)) {
-                    fBlitAntiH.eval(run, fUniforms.buf.data(), fDevice.addr(x,y), sprite, cov);
+                    fBlitAntiH.eval(run, fUniforms.buf.data(), fDevice.addr(x,y), sprite, &covF);
                 } else {
-                    fBlitAntiH.eval(run, fUniforms.buf.data(), fDevice.addr(x,y), cov);
+                    fBlitAntiH.eval(run, fUniforms.buf.data(), fDevice.addr(x,y), &covF);
                 }
                 x    += run;
                 runs += run;
diff --git a/src/opts/SkVM_opts.h b/src/opts/SkVM_opts.h
index da39ae0..6d4b332 100644
--- a/src/opts/SkVM_opts.h
+++ b/src/opts/SkVM_opts.h
@@ -210,14 +210,8 @@
                         r[d].i32 = n - I32::Load(iota);
                     } break;
 
-                    CASE(Op::uniform8):
-                        r[d].i32 = *(const uint8_t* )( (const char*)args[immy] + immz );
-                        break;
-                    CASE(Op::uniform16):
-                        r[d].i32 = *(const uint16_t*)( (const char*)args[immy] + immz );
-                        break;
                     CASE(Op::uniform32):
-                        r[d].i32 = *(const int*     )( (const char*)args[immy] + immz );
+                        r[d].i32 = *(const int*)( (const char*)args[immy] + immz );
                         break;
 
                     CASE(Op::splat): r[d].i32 = immy; break;
diff --git a/tests/SkVMTest.cpp b/tests/SkVMTest.cpp
index a11a499..8b888ac 100644
--- a/tests/SkVMTest.cpp
+++ b/tests/SkVMTest.cpp
@@ -854,10 +854,10 @@
         const size_t kPtr = sizeof(const int*);
 
         x = b.add(x, b.uniform32(uniforms, kPtr+0));
-        x = b.mul(x, b.uniform8 (uniforms, kPtr+4));
-        x = b.sub(x, b.uniform16(uniforms, kPtr+6));
+        x = b.mul(x, b.uniform32(uniforms, kPtr+4));
+        x = b.sub(x, b.uniform32(uniforms, kPtr+8));
 
-        skvm::I32 limit = b.uniform32(uniforms, kPtr+8);
+        skvm::I32 limit = b.uniform32(uniforms, kPtr+12);
         x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
         x = b.select(b.gt(x, limit     ), limit     , x);
 
@@ -889,8 +889,8 @@
         struct {
             const uint8_t* img;
             int      add   = 5;
-            uint8_t  mul   = 3;
-            uint16_t sub   = 18;
+            int      mul   = 3;
+            int      sub   = 18;
             int      limit = M-1;
         } uniforms{img};