native f32 min/max
No diffs.
Change-Id: Ia0b35c2787e27d74763f21b81072affa6caf1e5a
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/253720
Commit-Queue: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Reed <reed@google.com>
Auto-Submit: Mike Klein <mtklein@google.com>
Reviewed-by: Mike Reed <reed@google.com>
diff --git a/src/core/SkVM.cpp b/src/core/SkVM.cpp
index a51b4f0..62b395a 100644
--- a/src/core/SkVM.cpp
+++ b/src/core/SkVM.cpp
@@ -138,6 +138,8 @@
case Op::sub_f32: write(o, V{id}, "= sub_f32", V{x}, V{y} ); break;
case Op::mul_f32: write(o, V{id}, "= mul_f32", V{x}, V{y} ); break;
case Op::div_f32: write(o, V{id}, "= div_f32", V{x}, V{y} ); break;
+ case Op::min_f32: write(o, V{id}, "= min_f32", V{x}, V{y} ); break;
+ case Op::max_f32: write(o, V{id}, "= max_f32", V{x}, V{y} ); break;
case Op::mad_f32: write(o, V{id}, "= mad_f32", V{x}, V{y}, V{z}); break;
case Op:: eq_f32: write(o, V{id}, "= eq_f32", V{x}, V{y}); break;
@@ -243,6 +245,8 @@
case Op::sub_f32: write(o, R{d}, "= sub_f32", R{x}, R{y} ); break;
case Op::mul_f32: write(o, R{d}, "= mul_f32", R{x}, R{y} ); break;
case Op::div_f32: write(o, R{d}, "= div_f32", R{x}, R{y} ); break;
+ case Op::min_f32: write(o, R{d}, "= min_f32", R{x}, R{y} ); break;
+ case Op::max_f32: write(o, R{d}, "= max_f32", R{x}, R{y} ); break;
case Op::mad_f32: write(o, R{d}, "= mad_f32", R{x}, R{y}, R{z}); break;
case Op:: eq_f32: write(o, R{d}, "= eq_f32", R{x}, R{y}); break;
@@ -497,6 +501,8 @@
F32 Builder::sub(F32 x, F32 y ) { return {this->push(Op::sub_f32, x.id, y.id)}; }
F32 Builder::mul(F32 x, F32 y ) { return {this->push(Op::mul_f32, x.id, y.id)}; }
F32 Builder::div(F32 x, F32 y ) { return {this->push(Op::div_f32, x.id, y.id)}; }
+ F32 Builder::min(F32 x, F32 y ) { return {this->push(Op::min_f32, x.id, y.id)}; }
+ F32 Builder::max(F32 x, F32 y ) { return {this->push(Op::max_f32, x.id, y.id)}; }
F32 Builder::mad(F32 x, F32 y, F32 z) {
if (this->isZero(z.id)) {
return this->mul(x,y);
@@ -755,6 +761,8 @@
void Assembler::vsubps(Ymm dst, Ymm x, Ymm y) { this->op(0,0x0f,0x5c, dst,x,y); }
void Assembler::vmulps(Ymm dst, Ymm x, Ymm y) { this->op(0,0x0f,0x59, dst,x,y); }
void Assembler::vdivps(Ymm dst, Ymm x, Ymm y) { this->op(0,0x0f,0x5e, dst,x,y); }
+ void Assembler::vminps(Ymm dst, Ymm x, Ymm y) { this->op(0,0x0f,0x5d, dst,x,y); }
+ void Assembler::vmaxps(Ymm dst, Ymm x, Ymm y) { this->op(0,0x0f,0x5f, dst,x,y); }
void Assembler::vfmadd132ps(Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x380f,0x98, dst,x,y); }
void Assembler::vfmadd213ps(Ymm dst, Ymm x, Ymm y) { this->op(0x66,0x380f,0xa8, dst,x,y); }
@@ -1063,6 +1071,8 @@
void Assembler::fsub4s(V d, V n, V m) { this->op(0b0'1'0'01110'1'0'1, m, 0b11010'1, n, d); }
void Assembler::fmul4s(V d, V n, V m) { this->op(0b0'1'1'01110'0'0'1, m, 0b11011'1, n, d); }
void Assembler::fdiv4s(V d, V n, V m) { this->op(0b0'1'1'01110'0'0'1, m, 0b11111'1, n, d); }
+ void Assembler::fmin4s(V d, V n, V m) { this->op(0b0'1'0'01110'1'0'1, m, 0b11110'1, n, d); }
+ void Assembler::fmax4s(V d, V n, V m) { this->op(0b0'1'0'01110'0'0'1, m, 0b11110'1, n, d); }
void Assembler::fcmeq4s(V d, V n, V m) { this->op(0b0'1'0'01110'0'0'1, m, 0b1110'0'1, n, d); }
void Assembler::fcmgt4s(V d, V n, V m) { this->op(0b0'1'1'01110'1'0'1, m, 0b1110'0'1, n, d); }
@@ -1364,6 +1374,8 @@
CASE(Op::sub_f32): r(d).f32 = r(x).f32 - r(y).f32; break;
CASE(Op::mul_f32): r(d).f32 = r(x).f32 * r(y).f32; break;
CASE(Op::div_f32): r(d).f32 = r(x).f32 / r(y).f32; break;
+ CASE(Op::min_f32): r(d).f32 = min(r(x).f32, r(y).f32); break;
+ CASE(Op::max_f32): r(d).f32 = max(r(x).f32, r(y).f32); break;
CASE(Op::mad_f32): r(d).f32 = r(x).f32 * r(y).f32 + r(z).f32; break;
@@ -1872,6 +1884,8 @@
case Op::sub_f32: a->vsubps(dst(), r[x], r[y]); break;
case Op::mul_f32: a->vmulps(dst(), r[x], r[y]); break;
case Op::div_f32: a->vdivps(dst(), r[x], r[y]); break;
+ case Op::min_f32: a->vminps(dst(), r[x], r[y]); break;
+ case Op::max_f32: a->vmaxps(dst(), r[x], r[y]); break;
case Op::mad_f32:
if (avail & (1<<r[x])) { set_dst(r[x]); a->vfmadd132ps(r[x], r[z], r[y]); }
@@ -1956,6 +1970,8 @@
case Op::sub_f32: a->fsub4s(dst(), r[x], r[y]); break;
case Op::mul_f32: a->fmul4s(dst(), r[x], r[y]); break;
case Op::div_f32: a->fdiv4s(dst(), r[x], r[y]); break;
+ case Op::min_f32: a->fmin4s(dst(), r[x], r[y]); break;
+ case Op::max_f32: a->fmax4s(dst(), r[x], r[y]); break;
case Op::mad_f32: // fmla4s is z += x*y
if (avail & (1<<r[z])) { set_dst(r[z]); a->fmla4s( r[z], r[x], r[y]); }
diff --git a/src/core/SkVM.h b/src/core/SkVM.h
index 24f95ca..ddda739 100644
--- a/src/core/SkVM.h
+++ b/src/core/SkVM.h
@@ -69,7 +69,7 @@
DstEqXOpY vpand, vpor, vpxor, vpandn,
vpaddd, vpsubd, vpmulld,
vpsubw, vpmullw,
- vaddps, vsubps, vmulps, vdivps,
+ vaddps, vsubps, vmulps, vdivps, vminps, vmaxps,
vfmadd132ps, vfmadd213ps, vfmadd231ps,
vpackusdw, vpackuswb,
vpcmpeqd, vpcmpgtd;
@@ -144,7 +144,7 @@
add4s, sub4s, mul4s,
cmeq4s, cmgt4s,
sub8h, mul8h,
- fadd4s, fsub4s, fmul4s, fdiv4s,
+ fadd4s, fsub4s, fmul4s, fdiv4s, fmin4s, fmax4s,
fcmeq4s, fcmgt4s, fcmge4s,
tbl;
@@ -262,6 +262,8 @@
sub_f32, sub_i32, sub_i16x2,
mul_f32, mul_i32, mul_i16x2,
div_f32,
+ min_f32,
+ max_f32,
mad_f32,
shl_i32, shl_i16x2,
shr_i32, shr_i16x2,
@@ -368,6 +370,8 @@
F32 sub(F32 x, F32 y);
F32 mul(F32 x, F32 y);
F32 div(F32 x, F32 y);
+ F32 min(F32 x, F32 y);
+ F32 max(F32 x, F32 y);
F32 mad(F32 x, F32 y, F32 z); // x*y+z, often an FMA
I32 eq (F32 x, F32 y);
@@ -457,10 +461,6 @@
uint32_t hash() const;
- // TODO: native min/max ops
- skvm::F32 min(skvm::F32 x, skvm::F32 y) { return select(lt(x,y), x,y); }
- skvm::F32 max(skvm::F32 x, skvm::F32 y) { return select(gt(x,y), x,y); }
-
private:
struct InstructionHash {
size_t operator()(const Instruction& inst) const;
diff --git a/tests/SkVMTest.cpp b/tests/SkVMTest.cpp
index 86a2ae5..f298021 100644
--- a/tests/SkVMTest.cpp
+++ b/tests/SkVMTest.cpp
@@ -829,6 +829,14 @@
});
test_asm(r, [&](A& a) {
+ a.vminps(A::ymm0, A::ymm1, A::ymm2);
+ a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
+ },{
+ 0xc5,0xf4,0x5d,0xc2,
+ 0xc5,0xf4,0x5f,0xc2,
+ });
+
+ test_asm(r, [&](A& a) {
a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
},{
0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
@@ -1063,6 +1071,8 @@
a.fsub4s(A::v4, A::v3, A::v1);
a.fmul4s(A::v4, A::v3, A::v1);
a.fdiv4s(A::v4, A::v3, A::v1);
+ a.fmin4s(A::v4, A::v3, A::v1);
+ a.fmax4s(A::v4, A::v3, A::v1);
a.fmla4s(A::v4, A::v3, A::v1);
@@ -1091,6 +1101,8 @@
0x64,0xd4,0xa1,0x4e,
0x64,0xdc,0x21,0x6e,
0x64,0xfc,0x21,0x6e,
+ 0x64,0xf4,0xa1,0x4e,
+ 0x64,0xf4,0x21,0x4e,
0x64,0xcc,0x21,0x4e,