[SelectionDAG] fix bug in translating funnel shift with non-power-of-2 type

The bug is visible in the constant-folded x86 tests. We can't use the
negated shift amount when the type is not power-of-2:
https://rise4fun.com/Alive/US1r

...so in that case, use the regular lowering that includes a select
to guard against a shift-by-bitwidth. This path is improved by only
calculating the modulo shift amount once now.

Also, improve the rotate (with power-of-2 size) lowering to use
a negate rather than subtract from bitwidth. This improves the
codegen whether we have a rotate instruction or not (although
we can still see that we're not matching to a legal rotate in
all cases).

llvm-svn: 338592
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
index d93c936..35f46e7 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
@@ -40,7 +40,7 @@
 define i16 @rotl_i16(i16 %x, i16 %z) {
 ; CHECK-LABEL: rotl_i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subfic 5, 4, 16
+; CHECK-NEXT:    neg 5, 4
 ; CHECK-NEXT:    clrlwi 6, 3, 16
 ; CHECK-NEXT:    rlwinm 4, 4, 0, 28, 31
 ; CHECK-NEXT:    clrlwi 5, 5, 28
@@ -75,13 +75,11 @@
 define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
 ; CHECK-LABEL: rotl_v4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addis 3, 2, .LCPI5_0@toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI5_0@toc@l
-; CHECK-NEXT:    lvx 4, 0, 3
-; CHECK-NEXT:    vsubuwm 4, 4, 3
-; CHECK-NEXT:    vslw 3, 2, 3
-; CHECK-NEXT:    vsrw 2, 2, 4
-; CHECK-NEXT:    xxlor 34, 35, 34
+; CHECK-NEXT:    xxlxor 36, 36, 36
+; CHECK-NEXT:    vslw 5, 2, 3
+; CHECK-NEXT:    vsubuwm 3, 4, 3
+; CHECK-NEXT:    vsrw 2, 2, 3
+; CHECK-NEXT:    xxlor 34, 37, 34
 ; CHECK-NEXT:    blr
   %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
   ret <4 x i32> %f
@@ -131,7 +129,7 @@
 define i16 @rotr_i16(i16 %x, i16 %z) {
 ; CHECK-LABEL: rotr_i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subfic 5, 4, 16
+; CHECK-NEXT:    neg 5, 4
 ; CHECK-NEXT:    clrlwi 6, 3, 16
 ; CHECK-NEXT:    rlwinm 4, 4, 0, 28, 31
 ; CHECK-NEXT:    clrlwi 5, 5, 28
@@ -146,7 +144,7 @@
 define i32 @rotr_i32(i32 %x, i32 %z) {
 ; CHECK-LABEL: rotr_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subfic 4, 4, 32
+; CHECK-NEXT:    neg 4, 4
 ; CHECK-NEXT:    clrlwi 4, 4, 27
 ; CHECK-NEXT:    rlwnm 3, 3, 4, 0, 31
 ; CHECK-NEXT:    blr
@@ -157,7 +155,7 @@
 define i64 @rotr_i64(i64 %x, i64 %z) {
 ; CHECK-LABEL: rotr_i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subfic 4, 4, 64
+; CHECK-NEXT:    neg 4, 4
 ; CHECK-NEXT:    rlwinm 4, 4, 0, 26, 31
 ; CHECK-NEXT:    rotld 3, 3, 4
 ; CHECK-NEXT:    blr
@@ -170,13 +168,11 @@
 define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
 ; CHECK-LABEL: rotr_v4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI12_0@toc@l
-; CHECK-NEXT:    lvx 4, 0, 3
-; CHECK-NEXT:    vsubuwm 4, 4, 3
-; CHECK-NEXT:    vsrw 3, 2, 3
-; CHECK-NEXT:    vslw 2, 2, 4
-; CHECK-NEXT:    xxlor 34, 34, 35
+; CHECK-NEXT:    xxlxor 36, 36, 36
+; CHECK-NEXT:    vsrw 5, 2, 3
+; CHECK-NEXT:    vsubuwm 3, 4, 3
+; CHECK-NEXT:    vslw 2, 2, 3
+; CHECK-NEXT:    xxlor 34, 34, 37
 ; CHECK-NEXT:    blr
   %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
   ret <4 x i32> %f
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
index 9acc1ac..05d105b 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
@@ -18,9 +18,8 @@
 define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: fshl_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subfic 6, 5, 32
 ; CHECK-NEXT:    andi. 5, 5, 31
-; CHECK-NEXT:    clrlwi 6, 6, 27
+; CHECK-NEXT:    subfic 6, 5, 32
 ; CHECK-NEXT:    slw 5, 3, 5
 ; CHECK-NEXT:    srw 4, 4, 6
 ; CHECK-NEXT:    or 4, 5, 4
@@ -36,24 +35,19 @@
 ; CHECK-LABEL: fshl_i37:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    lis 6, -8857
-; CHECK-NEXT:    subfic 7, 5, 37
 ; CHECK-NEXT:    clrldi 5, 5, 27
 ; CHECK-NEXT:    clrldi 4, 4, 27
 ; CHECK-NEXT:    ori 6, 6, 51366
-; CHECK-NEXT:    clrldi 7, 7, 27
 ; CHECK-NEXT:    sldi 6, 6, 32
 ; CHECK-NEXT:    oris 6, 6, 3542
 ; CHECK-NEXT:    ori 6, 6, 31883
-; CHECK-NEXT:    mulhdu 8, 7, 6
 ; CHECK-NEXT:    mulhdu 6, 5, 6
-; CHECK-NEXT:    rldicl 8, 8, 59, 5
 ; CHECK-NEXT:    rldicl 6, 6, 59, 5
-; CHECK-NEXT:    mulli 8, 8, 37
 ; CHECK-NEXT:    mulli 6, 6, 37
-; CHECK-NEXT:    sub 7, 7, 8
 ; CHECK-NEXT:    subf. 5, 6, 5
-; CHECK-NEXT:    srd 4, 4, 7
+; CHECK-NEXT:    subfic 6, 5, 37
 ; CHECK-NEXT:    sld 5, 3, 5
+; CHECK-NEXT:    srd 4, 4, 6
 ; CHECK-NEXT:    or 4, 5, 4
 ; CHECK-NEXT:    isel 3, 3, 4, 2
 ; CHECK-NEXT:    blr
@@ -130,9 +124,8 @@
 define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: fshr_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    subfic 6, 5, 32
 ; CHECK-NEXT:    andi. 5, 5, 31
-; CHECK-NEXT:    clrlwi 6, 6, 27
+; CHECK-NEXT:    subfic 6, 5, 32
 ; CHECK-NEXT:    srw 5, 4, 5
 ; CHECK-NEXT:    slw 3, 3, 6
 ; CHECK-NEXT:    or 3, 3, 5
@@ -148,24 +141,19 @@
 ; CHECK-LABEL: fshr_i37:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    lis 6, -8857
-; CHECK-NEXT:    subfic 7, 5, 37
 ; CHECK-NEXT:    clrldi 5, 5, 27
-; CHECK-NEXT:    clrldi 9, 4, 27
 ; CHECK-NEXT:    ori 6, 6, 51366
-; CHECK-NEXT:    clrldi 7, 7, 27
 ; CHECK-NEXT:    sldi 6, 6, 32
 ; CHECK-NEXT:    oris 6, 6, 3542
 ; CHECK-NEXT:    ori 6, 6, 31883
-; CHECK-NEXT:    mulhdu 8, 5, 6
-; CHECK-NEXT:    mulhdu 6, 7, 6
-; CHECK-NEXT:    rldicl 8, 8, 59, 5
+; CHECK-NEXT:    mulhdu 6, 5, 6
 ; CHECK-NEXT:    rldicl 6, 6, 59, 5
-; CHECK-NEXT:    mulli 8, 8, 37
 ; CHECK-NEXT:    mulli 6, 6, 37
-; CHECK-NEXT:    subf. 5, 8, 5
-; CHECK-NEXT:    sub 6, 7, 6
-; CHECK-NEXT:    srd 5, 9, 5
-; CHECK-NEXT:    sld 3, 3, 6
+; CHECK-NEXT:    subf. 5, 6, 5
+; CHECK-NEXT:    clrldi 6, 4, 27
+; CHECK-NEXT:    subfic 7, 5, 37
+; CHECK-NEXT:    srd 5, 6, 5
+; CHECK-NEXT:    sld 3, 3, 7
 ; CHECK-NEXT:    or 3, 3, 5
 ; CHECK-NEXT:    isel 3, 4, 3, 2
 ; CHECK-NEXT:    blr