[SelectionDAG] fix bug in translating funnel shift with non-power-of-2 type The bug is visible in the constant-folded x86 tests. We can't use the negated shift amount when the type is not power-of-2: https://rise4fun.com/Alive/US1r ...so in that case, use the regular lowering that includes a select to guard against a shift-by-bitwidth. This path is improved by only calculating the modulo shift amount once now. Also, improve the rotate (with power-of-2 size) lowering to use a negate rather than subtract from bitwidth. This improves the codegen whether we have a rotate instruction or not (although we can still see that we're not matching to a legal rotate in all cases). llvm-svn: 338592

commit: 8aac22e06a196541961cd6d6cc46ffd4f39b60c3 [log] [tgz]
author: Sanjay Patel <spatel@rotateright.com> Wed Aug 01 17:17:08 2018 +0000
committer: Sanjay Patel <spatel@rotateright.com> Wed Aug 01 17:17:08 2018 +0000
tree: ce54e012d6bc3a7d3f1e697f5ab4cdd9fdb69bbd
parent: 0bb8d83c89321c0bfe955c5e2b2069a2870912d4 [diff] [blame]
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
index d93c936..35f46e7 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll

@@ -40,7 +40,7 @@
 define i16 @rotl_i16(i16 %x, i16 %z) {
 ; CHECK-LABEL: rotl_i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subfic 5, 4, 16
+; CHECK-NEXT:    neg 5, 4
 ; CHECK-NEXT:    clrlwi 6, 3, 16
 ; CHECK-NEXT:    rlwinm 4, 4, 0, 28, 31
 ; CHECK-NEXT:    clrlwi 5, 5, 28
@@ -75,13 +75,11 @@
 define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
 ; CHECK-LABEL: rotl_v4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addis 3, 2, .LCPI5_0@toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI5_0@toc@l
-; CHECK-NEXT:    lvx 4, 0, 3
-; CHECK-NEXT:    vsubuwm 4, 4, 3
-; CHECK-NEXT:    vslw 3, 2, 3
-; CHECK-NEXT:    vsrw 2, 2, 4
-; CHECK-NEXT:    xxlor 34, 35, 34
+; CHECK-NEXT:    xxlxor 36, 36, 36
+; CHECK-NEXT:    vslw 5, 2, 3
+; CHECK-NEXT:    vsubuwm 3, 4, 3
+; CHECK-NEXT:    vsrw 2, 2, 3
+; CHECK-NEXT:    xxlor 34, 37, 34
 ; CHECK-NEXT:    blr
   %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
   ret <4 x i32> %f
@@ -131,7 +129,7 @@
 define i16 @rotr_i16(i16 %x, i16 %z) {
 ; CHECK-LABEL: rotr_i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subfic 5, 4, 16
+; CHECK-NEXT:    neg 5, 4
 ; CHECK-NEXT:    clrlwi 6, 3, 16
 ; CHECK-NEXT:    rlwinm 4, 4, 0, 28, 31
 ; CHECK-NEXT:    clrlwi 5, 5, 28
@@ -146,7 +144,7 @@
 define i32 @rotr_i32(i32 %x, i32 %z) {
 ; CHECK-LABEL: rotr_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subfic 4, 4, 32
+; CHECK-NEXT:    neg 4, 4
 ; CHECK-NEXT:    clrlwi 4, 4, 27
 ; CHECK-NEXT:    rlwnm 3, 3, 4, 0, 31
 ; CHECK-NEXT:    blr
@@ -157,7 +155,7 @@
 define i64 @rotr_i64(i64 %x, i64 %z) {
 ; CHECK-LABEL: rotr_i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subfic 4, 4, 64
+; CHECK-NEXT:    neg 4, 4
 ; CHECK-NEXT:    rlwinm 4, 4, 0, 26, 31
 ; CHECK-NEXT:    rotld 3, 3, 4
 ; CHECK-NEXT:    blr
@@ -170,13 +168,11 @@
 define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
 ; CHECK-LABEL: rotr_v4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI12_0@toc@l
-; CHECK-NEXT:    lvx 4, 0, 3
-; CHECK-NEXT:    vsubuwm 4, 4, 3
-; CHECK-NEXT:    vsrw 3, 2, 3
-; CHECK-NEXT:    vslw 2, 2, 4
-; CHECK-NEXT:    xxlor 34, 34, 35
+; CHECK-NEXT:    xxlxor 36, 36, 36
+; CHECK-NEXT:    vsrw 5, 2, 3
+; CHECK-NEXT:    vsubuwm 3, 4, 3
+; CHECK-NEXT:    vslw 2, 2, 3
+; CHECK-NEXT:    xxlor 34, 34, 37
 ; CHECK-NEXT:    blr
   %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
   ret <4 x i32> %f
commit	8aac22e06a196541961cd6d6cc46ffd4f39b60c3	[log] [tgz]
author	Sanjay Patel <spatel@rotateright.com>	Wed Aug 01 17:17:08 2018 +0000
committer	Sanjay Patel <spatel@rotateright.com>	Wed Aug 01 17:17:08 2018 +0000
tree	ce54e012d6bc3a7d3f1e697f5ab4cdd9fdb69bbd
parent	0bb8d83c89321c0bfe955c5e2b2069a2870912d4 [diff] [blame]