[X86] Handle COPYs of physregs better (regalloc hints)

Enable enableMultipleCopyHints() on X86.

Original Patch by @jonpa:

While enabling the mischeduler for SystemZ, it was discovered that for some reason a test needed one extra seemingly needless COPY (test/CodeGen/SystemZ/call-03.ll). The handling for that is resulted in this patch, which improves the register coalescing by providing not just one copy hint, but a sorted list of copy hints. On SystemZ, this gives ~12500 less register moves on SPEC, as well as marginally less spilling.

Instead of improving just the SystemZ backend, the improvement has been implemented in common-code (calculateSpillWeightAndHint(). This gives a lot of test failures, but since this should be a general improvement I hope that the involved targets will help and review the test updates.

Differential Revision: https://reviews.llvm.org/D38128

llvm-svn: 342578
diff --git a/llvm/test/CodeGen/X86/avoid-sfb.ll b/llvm/test/CodeGen/X86/avoid-sfb.ll
index 5319700..9d6c6c9 100644
--- a/llvm/test/CodeGen/X86/avoid-sfb.ll
+++ b/llvm/test/CodeGen/X86/avoid-sfb.ll
@@ -727,28 +727,29 @@
 define void @test_stack(%struct.S6* noalias nocapture sret %agg.result, %struct.S6* byval nocapture readnone align 8 %s1, %struct.S6* byval nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_stack:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; CHECK-NEXT:    movups %xmm0, (%rdi)
-; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT:    movq %rax, 16(%rdi)
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movl %eax, 24(%rdi)
-; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-NEXT:    movl %eax, 28(%rdi)
-; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
-; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT:    movq %rcx, 16(%rdi)
 ; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-NEXT:    movl %ecx, 24(%rdi)
+; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-NEXT:    movl %ecx, 28(%rdi)
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %esi
 ; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movl %edx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    retq
 ;
 ; DISABLED-LABEL: test_stack:
 ; DISABLED:       # %bb.0: # %entry
+; DISABLED-NEXT:    movq %rdi, %rax
 ; DISABLED-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
 ; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; DISABLED-NEXT:    movups %xmm0, (%rdi)
@@ -758,51 +759,50 @@
 ; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
 ; DISABLED-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; DISABLED-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
-; DISABLED-NEXT:    movq %rdi, %rax
 ; DISABLED-NEXT:    retq
 ;
 ; CHECK-AVX2-LABEL: test_stack:
 ; CHECK-AVX2:       # %bb.0: # %entry
+; CHECK-AVX2-NEXT:    movq %rdi, %rax
 ; CHECK-AVX2-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
 ; CHECK-AVX2-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
 ; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
-; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-AVX2-NEXT:    movq %rax, 16(%rdi)
-; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX2-NEXT:    movl %eax, 24(%rdi)
-; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX2-NEXT:    movl %eax, 28(%rdi)
+; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-AVX2-NEXT:    movq %rcx, 16(%rdi)
+; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX2-NEXT:    movl %ecx, 24(%rdi)
+; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX2-NEXT:    movl %ecx, 28(%rdi)
 ; CHECK-AVX2-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
 ; CHECK-AVX2-NEXT:    vmovups %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-AVX2-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX2-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
-; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX2-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
-; CHECK-AVX2-NEXT:    movq %rdi, %rax
+; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-AVX2-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX2-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
+; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX2-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
 ; CHECK-AVX2-NEXT:    retq
 ;
 ; CHECK-AVX512-LABEL: test_stack:
 ; CHECK-AVX512:       # %bb.0: # %entry
+; CHECK-AVX512-NEXT:    movq %rdi, %rax
 ; CHECK-AVX512-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
 ; CHECK-AVX512-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
 ; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
-; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-AVX512-NEXT:    movq %rax, 16(%rdi)
-; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX512-NEXT:    movl %eax, 24(%rdi)
-; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX512-NEXT:    movl %eax, 28(%rdi)
+; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-AVX512-NEXT:    movq %rcx, 16(%rdi)
+; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX512-NEXT:    movl %ecx, 24(%rdi)
+; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX512-NEXT:    movl %ecx, 28(%rdi)
 ; CHECK-AVX512-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
 ; CHECK-AVX512-NEXT:    vmovups %xmm0, {{[0-9]+}}(%rsp)
-; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-AVX512-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
-; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX512-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
-; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; CHECK-AVX512-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
-; CHECK-AVX512-NEXT:    movq %rdi, %rax
+; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-AVX512-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX512-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
+; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
+; CHECK-AVX512-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
 ; CHECK-AVX512-NEXT:    retq
 entry:
   %s6.sroa.0.0..sroa_cast1 = bitcast %struct.S6* %s2 to i8*