AMDGPU/SI: Enable lanemask tracking in misched Summary: This results in higher register usage, but should make it easier for the compiler to hide latency. This pass is a prerequisite for some more scheduler improvements, and I think the increase register usage with this patch is acceptable, because when combined with the scheduler improvements, the total register usage will decrease. shader-db stats: 2382 shaders in 478 tests Totals: SGPRS: 48672 -> 49088 (0.85 %) VGPRS: 34148 -> 34847 (2.05 %) Code Size: 1285816 -> 1289128 (0.26 %) bytes LDS: 28 -> 28 (0.00 %) blocks Scratch: 492544 -> 573440 (16.42 %) bytes per wave Max Waves: 6856 -> 6846 (-0.15 %) Wait states: 0 -> 0 (0.00 %) Depends on D18451 Reviewers: nhaehnle, arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18452 llvm-svn: 264876

commit: 0bc954e3bc474383f87ab9e55ab1aa5ae996f9c0 [log] [tgz]
author: Tom Stellard <thomas.stellard@amd.com> Wed Mar 30 16:35:09 2016 +0000
committer: Tom Stellard <thomas.stellard@amd.com> Wed Mar 30 16:35:09 2016 +0000
tree: d428795eaee9170ce8fc563a9634e2568806147e
parent: f76123386a7867ff5fa63a55841668ac098e201e [diff] [blame]
diff --git a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
index e92a4fd..be0670d 100644
--- a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
+++ b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll

@@ -28,10 +28,10 @@
 }
 
 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
-; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; SI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
 ; GCN: buffer_store_dword [[RESULT]]
@@ -42,13 +42,13 @@
 }
 
 ; GCN-LABEL: {{^}}test_use_s_v_s:
-; GCN-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
-; GCN-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
-
 ; GCN: buffer_load_dword [[VA0:v[0-9]+]]
 ; GCN-NOT: v_mov_b32
 ; GCN: buffer_load_dword [[VA1:v[0-9]+]]
 
+; GCN-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+
 ; GCN-NOT: v_mov_b32
 ; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
 ; GCN-NOT: v_mov_b32
@@ -68,10 +68,10 @@
 }
 
 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
-; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; SI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
 ; GCN: buffer_store_dword [[RESULT]]
@@ -82,10 +82,10 @@
 }
 
 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
-; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
-; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; SI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
 ; GCN: buffer_store_dword [[RESULT]]
commit	0bc954e3bc474383f87ab9e55ab1aa5ae996f9c0	[log] [tgz]
author	Tom Stellard <thomas.stellard@amd.com>	Wed Mar 30 16:35:09 2016 +0000
committer	Tom Stellard <thomas.stellard@amd.com>	Wed Mar 30 16:35:09 2016 +0000
tree	d428795eaee9170ce8fc563a9634e2568806147e
parent	f76123386a7867ff5fa63a55841668ac098e201e [diff] [blame]