[SelectionDAG] fix bug in translating funnel shift with non-power-of-2 type
The bug is visible in the constant-folded x86 tests. We can't use the
negated shift amount when the type is not power-of-2:
https://rise4fun.com/Alive/US1r
...so in that case, use the regular lowering that includes a select
to guard against a shift-by-bitwidth. This path is improved by only
calculating the modulo shift amount once now.
Also, improve the rotate (with power-of-2 size) lowering to use
a negate rather than subtract from bitwidth. This improves the
codegen whether we have a rotate instruction or not (although
we can still see that we're not matching to a legal rotate in
all cases).
llvm-svn: 338592
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
index d93c936..35f46e7 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
@@ -40,7 +40,7 @@
define i16 @rotl_i16(i16 %x, i16 %z) {
; CHECK-LABEL: rotl_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: subfic 5, 4, 16
+; CHECK-NEXT: neg 5, 4
; CHECK-NEXT: clrlwi 6, 3, 16
; CHECK-NEXT: rlwinm 4, 4, 0, 28, 31
; CHECK-NEXT: clrlwi 5, 5, 28
@@ -75,13 +75,11 @@
define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
; CHECK-LABEL: rotl_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addis 3, 2, .LCPI5_0@toc@ha
-; CHECK-NEXT: addi 3, 3, .LCPI5_0@toc@l
-; CHECK-NEXT: lvx 4, 0, 3
-; CHECK-NEXT: vsubuwm 4, 4, 3
-; CHECK-NEXT: vslw 3, 2, 3
-; CHECK-NEXT: vsrw 2, 2, 4
-; CHECK-NEXT: xxlor 34, 35, 34
+; CHECK-NEXT: xxlxor 36, 36, 36
+; CHECK-NEXT: vslw 5, 2, 3
+; CHECK-NEXT: vsubuwm 3, 4, 3
+; CHECK-NEXT: vsrw 2, 2, 3
+; CHECK-NEXT: xxlor 34, 37, 34
; CHECK-NEXT: blr
%f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
ret <4 x i32> %f
@@ -131,7 +129,7 @@
define i16 @rotr_i16(i16 %x, i16 %z) {
; CHECK-LABEL: rotr_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: subfic 5, 4, 16
+; CHECK-NEXT: neg 5, 4
; CHECK-NEXT: clrlwi 6, 3, 16
; CHECK-NEXT: rlwinm 4, 4, 0, 28, 31
; CHECK-NEXT: clrlwi 5, 5, 28
@@ -146,7 +144,7 @@
define i32 @rotr_i32(i32 %x, i32 %z) {
; CHECK-LABEL: rotr_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: subfic 4, 4, 32
+; CHECK-NEXT: neg 4, 4
; CHECK-NEXT: clrlwi 4, 4, 27
; CHECK-NEXT: rlwnm 3, 3, 4, 0, 31
; CHECK-NEXT: blr
@@ -157,7 +155,7 @@
define i64 @rotr_i64(i64 %x, i64 %z) {
; CHECK-LABEL: rotr_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: subfic 4, 4, 64
+; CHECK-NEXT: neg 4, 4
; CHECK-NEXT: rlwinm 4, 4, 0, 26, 31
; CHECK-NEXT: rotld 3, 3, 4
; CHECK-NEXT: blr
@@ -170,13 +168,11 @@
define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
; CHECK-LABEL: rotr_v4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addis 3, 2, .LCPI12_0@toc@ha
-; CHECK-NEXT: addi 3, 3, .LCPI12_0@toc@l
-; CHECK-NEXT: lvx 4, 0, 3
-; CHECK-NEXT: vsubuwm 4, 4, 3
-; CHECK-NEXT: vsrw 3, 2, 3
-; CHECK-NEXT: vslw 2, 2, 4
-; CHECK-NEXT: xxlor 34, 34, 35
+; CHECK-NEXT: xxlxor 36, 36, 36
+; CHECK-NEXT: vsrw 5, 2, 3
+; CHECK-NEXT: vsubuwm 3, 4, 3
+; CHECK-NEXT: vslw 2, 2, 3
+; CHECK-NEXT: xxlor 34, 34, 37
; CHECK-NEXT: blr
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
ret <4 x i32> %f