Emulate vector shift by immediate intrinsics.
Bug b/37496338
Change-Id: I01e4aaf49e86d9c2f11647a0e84bf1ee388a90cf
Reviewed-on: https://swiftshader-review.googlesource.com/10931
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 1a0ac08..3ed9d50 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -3249,12 +3249,38 @@
RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
{
- return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Short4 result;
+ result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
{
- return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Short4 result;
+ result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
@@ -3622,14 +3648,50 @@
return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
}
+ RValue<UShort> Extract(RValue<UShort4> val, int i)
+ {
+ return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
+ }
+
+ RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
+ {
+ return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
+ }
+
RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
{
- return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UShort4 result;
+ result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
{
- return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UShort4 result;
+ result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
@@ -3776,14 +3838,58 @@
return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
}
+ RValue<Short> Extract(RValue<Short8> val, int i)
+ {
+ return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
+ }
+
+ RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
+ {
+ return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
+ }
+
RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
{
- return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Short8 result;
+ result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
+ result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
+ result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
+ result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
+ result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
{
- return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Short8 result;
+ result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
+ result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
+ result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
+ result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
+ result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
@@ -3866,14 +3972,58 @@
return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
}
+ RValue<UShort> Extract(RValue<UShort8> val, int i)
+ {
+ return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
+ }
+
+ RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
+ {
+ return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
+ }
+
RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
{
- return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UShort8 result;
+ result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
+ result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
+ result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
+ result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
+ result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
{
- return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UShort8 result;
+ result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
+ result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
+ result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
+ result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
+ result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
@@ -4788,12 +4938,34 @@
RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
{
- return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Int2 result;
+ result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
{
- return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Int2 result;
+ result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
@@ -4975,14 +5147,46 @@
return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
}
+ RValue<UInt> Extract(RValue<UInt2> val, int i)
+ {
+ return RValue<UInt>(Nucleus::createExtractElement(val.value, UInt::getType(), i));
+ }
+
+ RValue<UInt2> Insert(RValue<UInt2> val, RValue<UInt> element, int i)
+ {
+ return RValue<UInt2>(Nucleus::createInsertElement(val.value, element.value, i));
+ }
+
RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
{
- return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UInt2 result;
+ result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
{
- return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UInt2 result;
+ result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
@@ -5078,18 +5282,15 @@
Value *x = Nucleus::createBitCast(cast.value, Int::getType());
Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
- Value *e;
int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
Value *b = Nucleus::createBitCast(a, Byte16::getType());
Value *c = Nucleus::createShuffleVector(b, b, swizzle);
int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Value *d = Nucleus::createBitCast(c, Short8::getType());
- e = Nucleus::createShuffleVector(d, d, swizzle2);
+ Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
- Value *f = Nucleus::createBitCast(e, Int4::getType());
- Value *g = Nucleus::createAShr(f, V(::context->getConstantInt32(24)));
- storeValue(g);
+ *this = As<Int4>(e) >> 24;
}
Int4::Int4(RValue<Float4> cast)
@@ -5103,9 +5304,8 @@
{
int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
- Value *d = Nucleus::createBitCast(c, Int4::getType());
- Value *e = Nucleus::createAShr(d, V(::context->getConstantInt32(16)));
- storeValue(e);
+
+ *this = As<Int4>(c) >> 16;
}
Int4::Int4(RValue<UShort4> cast)
@@ -5269,12 +5469,38 @@
RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
{
- return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Int4 result;
+ result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
{
- return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ Int4 result;
+ result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
@@ -5620,14 +5846,50 @@
return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
}
+ RValue<UInt> Extract(RValue<UInt4> x, int i)
+ {
+ return RValue<UInt>(Nucleus::createExtractElement(x.value, UInt::getType(), i));
+ }
+
+ RValue<UInt4> Insert(RValue<UInt4> x, RValue<UInt> element, int i)
+ {
+ return RValue<UInt4>(Nucleus::createInsertElement(x.value, element.value, i));
+ }
+
RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
{
- return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UInt4 result;
+ result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
{
- return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ if(emulateIntrinsics)
+ {
+ UInt4 result;
+ result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
+ result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
+ result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
+ result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
+
+ return result;
+ }
+ else
+ {
+ return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+ }
}
RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)