[X86] Allow folding of stack reloads when loading a subreg of the spilled reg

We did not support subregs in InlineSpiller:foldMemoryOperand() because targets
may not deal with them correctly.

This adds a target hook to let the spiller know that a target can handle
subregs, and actually enables it for x86 for the case of stack slot reloads.
This fixes PR30832.

Differential Revision: https://reviews.llvm.org/D26521

llvm-svn: 287792
diff --git a/llvm/test/CodeGen/X86/partial-fold32.ll b/llvm/test/CodeGen/X86/partial-fold32.ll
index ba3f73b..7fc1ed3 100644
--- a/llvm/test/CodeGen/X86/partial-fold32.ll
+++ b/llvm/test/CodeGen/X86/partial-fold32.ll
@@ -3,8 +3,7 @@
 define fastcc i8 @fold32to8(i32 %add, i8 %spill) {
 ; CHECK-LABEL: fold32to8:
 ; CHECK:    movl %ecx, (%esp) # 4-byte Spill
-; CHECK:    movl (%esp), %eax # 4-byte Reload
-; CHECK:    subb %al, %dl
+; CHECK:    subb (%esp), %dl  # 1-byte Folded Reload
 entry:
   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
   %trunc = trunc i32 %add to i8
diff --git a/llvm/test/CodeGen/X86/partial-fold64.ll b/llvm/test/CodeGen/X86/partial-fold64.ll
index b9ea7d6..15c9d19 100644
--- a/llvm/test/CodeGen/X86/partial-fold64.ll
+++ b/llvm/test/CodeGen/X86/partial-fold64.ll
@@ -3,8 +3,7 @@
 define i32 @fold64to32(i64 %add, i32 %spill) {
 ; CHECK-LABEL: fold64to32:
 ; CHECK:    movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; CHECK:    subl %eax, %esi
+; CHECK:    subl -{{[0-9]+}}(%rsp), %esi # 4-byte Folded Reload
 entry:
   tail call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"()
   %trunc = trunc i64 %add to i32
@@ -15,8 +14,7 @@
 define i8 @fold64to8(i64 %add, i8 %spill) {
 ; CHECK-LABEL: fold64to8:
 ; CHECK:    movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK:    movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; CHECK:    subb %al, %sil
+; CHECK:    subb -{{[0-9]+}}(%rsp), %sil # 1-byte Folded Reload
 entry:
   tail call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"()
   %trunc = trunc i64 %add to i8
diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll
index 0453dc1..7852294 100644
--- a/llvm/test/CodeGen/X86/vector-half-conversions.ll
+++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll
@@ -4788,9 +4788,8 @@
 ; AVX1-NEXT:    orl %ebx, %r14d
 ; AVX1-NEXT:    shlq $32, %r14
 ; AVX1-NEXT:    orq %r15, %r14
-; AVX1-NEXT:    vmovupd (%rsp), %ymm0 # 32-byte Reload
-; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT:    # xmm0 = mem[1,0]
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movw %ax, %bx
 ; AVX1-NEXT:    shll $16, %ebx
@@ -4856,9 +4855,8 @@
 ; AVX2-NEXT:    orl %ebx, %r14d
 ; AVX2-NEXT:    shlq $32, %r14
 ; AVX2-NEXT:    orq %r15, %r14
-; AVX2-NEXT:    vmovupd (%rsp), %ymm0 # 32-byte Reload
-; AVX2-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX2-NEXT:    # xmm0 = mem[1,0]
 ; AVX2-NEXT:    callq __truncdfhf2
 ; AVX2-NEXT:    movw %ax, %bx
 ; AVX2-NEXT:    shll $16, %ebx
@@ -5585,9 +5583,8 @@
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX1-NEXT:    vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
-; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT:    # xmm0 = mem[1,0]
 ; AVX1-NEXT:    callq __truncdfhf2
 ; AVX1-NEXT:    movl %eax, %r12d
 ; AVX1-NEXT:    vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
@@ -5654,9 +5651,8 @@
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    callq __truncdfhf2
 ; AVX2-NEXT:    movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX2-NEXT:    vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
-; AVX2-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
+; AVX2-NEXT:    # xmm0 = mem[1,0]
 ; AVX2-NEXT:    callq __truncdfhf2
 ; AVX2-NEXT:    movl %eax, %r12d
 ; AVX2-NEXT:    vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload