LiveRegMatrix: Fix some subreg interference checks
Surprisingly, one of the three interference checks in LiveRegMatrix was
using the main live range instead of the apropriate subregister range
resulting in unnecessarily conservative results.
llvm-svn: 296722
diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
index 7431f14..db3c88a 100644
--- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
@@ -191,10 +191,10 @@
; GFX9: flat_load_dword [[A:v[0-9]+]]
; GFX9: flat_load_dword [[B:v[0-9]+]]
+; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
-; GFX9-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: buffer_store_dwordx4
; VI: flat_load_ushort v[[A_LO:[0-9]+]]
@@ -203,9 +203,9 @@
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
-; VI: v_add_u16_e32
-; VI: v_add_u16_e32
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
+; VI: v_add_u16_e32
+; VI: v_add_u16_e32
; VI: buffer_store_dwordx4
define void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
index 4ae15e8..3fe6f87 100644
--- a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
@@ -5,20 +5,19 @@
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
-; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
-; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
-; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
+; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
+; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
+; SI: v_cmp_nlt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
; GCN: s_cbranch_vccnz
; GCN: one{{$}}
-; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
-; SI: s_branch
-; VI: buffer_store_short
-; VI: s_endpgm
+; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[B_F32]]
+; GCN: buffer_store_short
+; GCN: s_endpgm
; GCN: two{{$}}
-; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
+; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[A_F32]]
; GCN: buffer_store_short v[[B_F16]]
; GCN: s_endpgm
define void @br_cc_f16(
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
index 41f31de..acd4f7e 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -1195,9 +1195,8 @@
; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
-; GCN-DAG: v_mov_b32_e32 v[[NEG_A_LO:[0-9]+]], v[[A_LO]]
; GCN: buffer_store_dword [[RESULT]]
-; GCN: buffer_store_dwordx2 v{{\[}}[[NEG_A_LO]]:[[NEG_A_HI]]{{\]}}
+; GCN: buffer_store_dwordx2 v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
define void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index 079a441..e4e634d 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -417,10 +417,10 @@
; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
-; GFX89: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
+; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
-; CI: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
-; CI: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
+; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
+; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -444,10 +444,10 @@
; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
-; GFX89: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
+; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
-; CI: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
-; CI: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
+; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
+; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
index 96686cf..b282b51 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -188,10 +188,10 @@
; GFX9: flat_load_dword [[A:v[0-9]+]]
; GFX9: flat_load_dword [[B:v[0-9]+]]
+; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]]
; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
-; GFX9-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: buffer_store_dwordx4
; VI: flat_load_ushort v[[A_LO:[0-9]+]]
@@ -199,10 +199,10 @@
; VI: flat_load_ushort v[[B_LO:[0-9]+]]
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
-; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
-; VI: v_subrev_u16_e32
-; VI: v_subrev_u16_e32
-; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
+; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
+; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
+; VI-DAG: v_subrev_u16_e32
+; VI-DAG: v_subrev_u16_e32
; VI: buffer_store_dwordx4
define void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/subreg_interference.mir b/llvm/test/CodeGen/AMDGPU/subreg_interference.mir
new file mode 100644
index 0000000..24d06a5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/subreg_interference.mir
@@ -0,0 +1,24 @@
+# RUN: llc -o - %s -mtriple=amdgcn--amdhsa -verify-machineinstrs -run-pass=greedy,virtregrewriter | FileCheck %s
+---
+# We should not detect any interference between v0/v1 here and only allocate
+# sgpr0-sgpr3.
+#
+# CHECK-LABEL: func0
+# CHECK: S_NOP 0, implicit-def %sgpr0
+# CHECK: S_NOP 0, implicit-def %sgpr3
+# CHECK: S_NOP 0, implicit-def %sgpr1
+# CHECK: S_NOP 0, implicit-def %sgpr2
+# CHECK: S_NOP 0, implicit %sgpr0, implicit %sgpr3
+# CHECK: S_NOP 0, implicit %sgpr1, implicit %sgpr2
+name: func0
+body: |
+ bb.0:
+ S_NOP 0, implicit-def undef %0.sub0 : sreg_128
+ S_NOP 0, implicit-def %0.sub3
+ S_NOP 0, implicit-def undef %1.sub1 : sreg_128
+ S_NOP 0, implicit-def %1.sub2
+
+
+ S_NOP 0, implicit %0.sub0, implicit %0.sub3
+ S_NOP 0, implicit %1.sub1, implicit %1.sub2
+...