Emulate vector shift by immediate intrinsics.

Bug b/37496338

Change-Id: I01e4aaf49e86d9c2f11647a0e84bf1ee388a90cf
Reviewed-on: https://swiftshader-review.googlesource.com/10931
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Reactor/SubzeroReactor.cpp b/src/Reactor/SubzeroReactor.cpp
index 1a0ac08..3ed9d50 100644
--- a/src/Reactor/SubzeroReactor.cpp
+++ b/src/Reactor/SubzeroReactor.cpp
@@ -3249,12 +3249,38 @@
 
 	RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
 	{
-		return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			Short4 result;
+			result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
+			result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
+			result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
+
+			return result;
+		}
+		else
+		{
+			return RValue<Short4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
 	{
-		return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			Short4 result;
+			result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
+			result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
+			result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
+
+			return result;
+		}
+		else
+		{
+			return RValue<Short4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
@@ -3622,14 +3648,50 @@
 		return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
 	}
 
+	RValue<UShort> Extract(RValue<UShort4> val, int i)
+	{
+		return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
+	}
+
+	RValue<UShort4> Insert(RValue<UShort4> val, RValue<UShort> element, int i)
+	{
+		return RValue<UShort4>(Nucleus::createInsertElement(val.value, element.value, i));
+	}
+
 	RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
 	{
-		return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			UShort4 result;
+			result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
+			result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
+			result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
+
+			return result;
+		}
+		else
+		{
+			return RValue<UShort4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
 	{
-		return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			UShort4 result;
+			result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
+			result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
+			result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
+
+			return result;
+		}
+		else
+		{
+			return RValue<UShort4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
@@ -3776,14 +3838,58 @@
 		return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
 	}
 
+	RValue<Short> Extract(RValue<Short8> val, int i)
+	{
+		return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
+	}
+
+	RValue<Short8> Insert(RValue<Short8> val, RValue<Short> element, int i)
+	{
+		return RValue<Short8>(Nucleus::createInsertElement(val.value, element.value, i));
+	}
+
 	RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
 	{
-		return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			Short8 result;
+			result = Insert(result, Extract(lhs, 0) << Short(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) << Short(rhs), 1);
+			result = Insert(result, Extract(lhs, 2) << Short(rhs), 2);
+			result = Insert(result, Extract(lhs, 3) << Short(rhs), 3);
+			result = Insert(result, Extract(lhs, 4) << Short(rhs), 4);
+			result = Insert(result, Extract(lhs, 5) << Short(rhs), 5);
+			result = Insert(result, Extract(lhs, 6) << Short(rhs), 6);
+			result = Insert(result, Extract(lhs, 7) << Short(rhs), 7);
+
+			return result;
+		}
+		else
+		{
+			return RValue<Short8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
 	{
-		return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			Short8 result;
+			result = Insert(result, Extract(lhs, 0) >> Short(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) >> Short(rhs), 1);
+			result = Insert(result, Extract(lhs, 2) >> Short(rhs), 2);
+			result = Insert(result, Extract(lhs, 3) >> Short(rhs), 3);
+			result = Insert(result, Extract(lhs, 4) >> Short(rhs), 4);
+			result = Insert(result, Extract(lhs, 5) >> Short(rhs), 5);
+			result = Insert(result, Extract(lhs, 6) >> Short(rhs), 6);
+			result = Insert(result, Extract(lhs, 7) >> Short(rhs), 7);
+
+			return result;
+		}
+		else
+		{
+			return RValue<Short8>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
@@ -3866,14 +3972,58 @@
 		return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
 	}
 
+	RValue<UShort> Extract(RValue<UShort8> val, int i)
+	{
+		return RValue<UShort>(Nucleus::createExtractElement(val.value, UShort::getType(), i));
+	}
+
+	RValue<UShort8> Insert(RValue<UShort8> val, RValue<UShort> element, int i)
+	{
+		return RValue<UShort8>(Nucleus::createInsertElement(val.value, element.value, i));
+	}
+
 	RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
 	{
-		return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			UShort8 result;
+			result = Insert(result, Extract(lhs, 0) << UShort(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) << UShort(rhs), 1);
+			result = Insert(result, Extract(lhs, 2) << UShort(rhs), 2);
+			result = Insert(result, Extract(lhs, 3) << UShort(rhs), 3);
+			result = Insert(result, Extract(lhs, 4) << UShort(rhs), 4);
+			result = Insert(result, Extract(lhs, 5) << UShort(rhs), 5);
+			result = Insert(result, Extract(lhs, 6) << UShort(rhs), 6);
+			result = Insert(result, Extract(lhs, 7) << UShort(rhs), 7);
+
+			return result;
+		}
+		else
+		{
+			return RValue<UShort8>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
 	{
-		return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			UShort8 result;
+			result = Insert(result, Extract(lhs, 0) >> UShort(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) >> UShort(rhs), 1);
+			result = Insert(result, Extract(lhs, 2) >> UShort(rhs), 2);
+			result = Insert(result, Extract(lhs, 3) >> UShort(rhs), 3);
+			result = Insert(result, Extract(lhs, 4) >> UShort(rhs), 4);
+			result = Insert(result, Extract(lhs, 5) >> UShort(rhs), 5);
+			result = Insert(result, Extract(lhs, 6) >> UShort(rhs), 6);
+			result = Insert(result, Extract(lhs, 7) >> UShort(rhs), 7);
+
+			return result;
+		}
+		else
+		{
+			return RValue<UShort8>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
@@ -4788,12 +4938,34 @@
 
 	RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
 	{
-		return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			Int2 result;
+			result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
+
+			return result;
+		}
+		else
+		{
+			return RValue<Int2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
 	{
-		return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			Int2 result;
+			result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
+
+			return result;
+		}
+		else
+		{
+			return RValue<Int2>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
@@ -4975,14 +5147,46 @@
 		return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
 	}
 
+	RValue<UInt> Extract(RValue<UInt2> val, int i)
+	{
+		return RValue<UInt>(Nucleus::createExtractElement(val.value, UInt::getType(), i));
+	}
+
+	RValue<UInt2> Insert(RValue<UInt2> val, RValue<UInt> element, int i)
+	{
+		return RValue<UInt2>(Nucleus::createInsertElement(val.value, element.value, i));
+	}
+
 	RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
 	{
-		return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			UInt2 result;
+			result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
+
+			return result;
+		}
+		else
+		{
+			return RValue<UInt2>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
 	{
-		return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			UInt2 result;
+			result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
+
+			return result;
+		}
+		else
+		{
+			return RValue<UInt2>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
@@ -5078,18 +5282,15 @@
 		Value *x = Nucleus::createBitCast(cast.value, Int::getType());
 		Value *a = Nucleus::createInsertElement(loadValue(), x, 0);
 
-		Value *e;
 		int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
 		Value *b = Nucleus::createBitCast(a, Byte16::getType());
 		Value *c = Nucleus::createShuffleVector(b, b, swizzle);
 
 		int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
 		Value *d = Nucleus::createBitCast(c, Short8::getType());
-		e = Nucleus::createShuffleVector(d, d, swizzle2);
+		Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
 
-		Value *f = Nucleus::createBitCast(e, Int4::getType());
-		Value *g = Nucleus::createAShr(f, V(::context->getConstantInt32(24)));
-		storeValue(g);
+		*this = As<Int4>(e) >> 24;
 	}
 
 	Int4::Int4(RValue<Float4> cast)
@@ -5103,9 +5304,8 @@
 	{
 		int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
 		Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
-		Value *d = Nucleus::createBitCast(c, Int4::getType());
-		Value *e = Nucleus::createAShr(d, V(::context->getConstantInt32(16)));
-		storeValue(e);
+
+		*this = As<Int4>(c) >> 16;
 	}
 
 	Int4::Int4(RValue<UShort4> cast)
@@ -5269,12 +5469,38 @@
 
 	RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
 	{
-		return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			Int4 result;
+			result = Insert(result, Extract(lhs, 0) << Int(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) << Int(rhs), 1);
+			result = Insert(result, Extract(lhs, 2) << Int(rhs), 2);
+			result = Insert(result, Extract(lhs, 3) << Int(rhs), 3);
+
+			return result;
+		}
+		else
+		{
+			return RValue<Int4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
 	{
-		return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			Int4 result;
+			result = Insert(result, Extract(lhs, 0) >> Int(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) >> Int(rhs), 1);
+			result = Insert(result, Extract(lhs, 2) >> Int(rhs), 2);
+			result = Insert(result, Extract(lhs, 3) >> Int(rhs), 3);
+
+			return result;
+		}
+		else
+		{
+			return RValue<Int4>(Nucleus::createAShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
@@ -5620,14 +5846,50 @@
 		return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
 	}
 
+	RValue<UInt> Extract(RValue<UInt4> x, int i)
+	{
+		return RValue<UInt>(Nucleus::createExtractElement(x.value, UInt::getType(), i));
+	}
+
+	RValue<UInt4> Insert(RValue<UInt4> x, RValue<UInt> element, int i)
+	{
+		return RValue<UInt4>(Nucleus::createInsertElement(x.value, element.value, i));
+	}
+
 	RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
 	{
-		return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			UInt4 result;
+			result = Insert(result, Extract(lhs, 0) << UInt(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) << UInt(rhs), 1);
+			result = Insert(result, Extract(lhs, 2) << UInt(rhs), 2);
+			result = Insert(result, Extract(lhs, 3) << UInt(rhs), 3);
+
+			return result;
+		}
+		else
+		{
+			return RValue<UInt4>(Nucleus::createShl(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
 	{
-		return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		if(emulateIntrinsics)
+		{
+			UInt4 result;
+			result = Insert(result, Extract(lhs, 0) >> UInt(rhs), 0);
+			result = Insert(result, Extract(lhs, 1) >> UInt(rhs), 1);
+			result = Insert(result, Extract(lhs, 2) >> UInt(rhs), 2);
+			result = Insert(result, Extract(lhs, 3) >> UInt(rhs), 3);
+
+			return result;
+		}
+		else
+		{
+			return RValue<UInt4>(Nucleus::createLShr(lhs.value, V(::context->getConstantInt32(rhs))));
+		}
 	}
 
 	RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)