AMDGPU: Introduce TokenFactor for ABI register copies in call sequence

The call was missing chain dependencies on the pre-call copies. I
don't think this was causing any real issues however.

llvm-svn: 360906
diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
index b606de1..c4b2561 100644
--- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
@@ -78,6 +78,19 @@
 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8
 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24
 
+; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8
+; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12
+; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
+; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
+
+; GCN-NOT: s_add_u32 s32, s32, 0x800
+
+
+; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
+; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
+; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12
+; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16
+
 ; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24
 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28
 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32
@@ -88,19 +101,6 @@
 ; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28
 ; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32
 
-; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8
-; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12
-; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
-; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
-
-; GCN-NOT: s_add_u32 s32, s32, 0x800
-
-; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
-; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
-; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12
-; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16
-
-
 ; GCN: s_swappc_b64
 ; GCN-NOT: v_readlane_b32 s32
 ; GCN: v_readlane_b32
@@ -272,16 +272,6 @@
 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8
 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24
 
-; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24
-; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28
-; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32
-; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36
-
-; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24
-; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28
-; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:32
-; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:36
-
 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8
 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12
 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
@@ -294,7 +284,15 @@
 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:16
 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:20
 
+; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24
+; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28
+; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32
+; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36
 
+; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24
+; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28
+; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:32
+; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:36
 
 ; GCN: s_swappc_b64
 ; GCN-NOT: v_readlane_b32 s32
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index 2afe4c8..4d05fcc 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -805,14 +805,14 @@
 }
 
 ; GCN-LABEL: {{^}}stack_12xv3i32:
-; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
-; GCN: buffer_store_dword [[REG15]], {{.*}} offset:16
-; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
-; GCN: buffer_store_dword [[REG14]], {{.*}} offset:12
-; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
-; GCN: buffer_store_dword [[REG13]], {{.*}} offset:8
 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:4
+; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
+; GCN: buffer_store_dword [[REG13]], {{.*}} offset:8
+; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
+; GCN: buffer_store_dword [[REG14]], {{.*}} offset:12
+; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
+; GCN: buffer_store_dword [[REG15]], {{.*}} offset:16
 ; GCN: v_mov_b32_e32 v31, 11
 ; GCN: s_getpc
 define void @stack_12xv3i32() #0 {
@@ -834,14 +834,14 @@
 }
 
 ; GCN-LABEL: {{^}}stack_12xv3f32:
-; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000
-; GCN: buffer_store_dword [[REG15]], {{.*}} offset:16
-; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000
-; GCN: buffer_store_dword [[REG14]], {{.*}} offset:12
-; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000
-; GCN: buffer_store_dword [[REG13]], {{.*}} offset:8
 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000
 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:4
+; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000
+; GCN: buffer_store_dword [[REG13]], {{.*}} offset:8
+; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000
+; GCN: buffer_store_dword [[REG14]], {{.*}} offset:12
+; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000
+; GCN: buffer_store_dword [[REG15]], {{.*}} offset:16
 ; GCN: v_mov_b32_e32 v31, 0x41300000
 ; GCN: s_getpc
 define void @stack_12xv3f32() #0 {
@@ -863,22 +863,24 @@
 }
 
 ; GCN-LABEL: {{^}}stack_8xv5i32:
-; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
-; GCN: buffer_store_dword [[REG15]], {{.*}} offset:32
-; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
-; GCN: buffer_store_dword [[REG14]], {{.*}} offset:28
-; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
-; GCN: buffer_store_dword [[REG13]], {{.*}} offset:24
-; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
-; GCN: buffer_store_dword [[REG12]], {{.*}} offset:20
-; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11
-; GCN: buffer_store_dword [[REG11]], {{.*}} offset:16
-; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10
-; GCN: buffer_store_dword [[REG10]], {{.*}} offset:12
-; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9
-; GCN: buffer_store_dword [[REG9]], {{.*}} offset:8
+
 ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 8
 ; GCN: buffer_store_dword [[REG8]], {{.*}} offset:4
+; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9
+; GCN: buffer_store_dword [[REG9]], {{.*}} offset:8
+; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10
+; GCN: buffer_store_dword [[REG10]], {{.*}} offset:12
+; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11
+; GCN: buffer_store_dword [[REG11]], {{.*}} offset:16
+; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
+; GCN: buffer_store_dword [[REG12]], {{.*}} offset:20
+; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
+; GCN: buffer_store_dword [[REG13]], {{.*}} offset:24
+; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
+; GCN: buffer_store_dword [[REG14]], {{.*}} offset:28
+; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
+; GCN: buffer_store_dword [[REG15]], {{.*}} offset:32
+
 ; GCN: v_mov_b32_e32 v31, 7
 ; GCN: s_getpc
 define void @stack_8xv5i32() #0 {
@@ -896,22 +898,23 @@
 }
 
 ; GCN-LABEL: {{^}}stack_8xv5f32:
-; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000
-; GCN: buffer_store_dword [[REG15]], {{.*}} offset:32
-; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000
-; GCN: buffer_store_dword [[REG14]], {{.*}} offset:28
-; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000
-; GCN: buffer_store_dword [[REG13]], {{.*}} offset:24
-; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000
-; GCN: buffer_store_dword [[REG12]], {{.*}} offset:20
-; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000
-; GCN: buffer_store_dword [[REG11]], {{.*}} offset:16
-; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000
-; GCN: buffer_store_dword [[REG10]], {{.*}} offset:12
-; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000
-; GCN: buffer_store_dword [[REG9]], {{.*}} offset:8
 ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 0x41000000
 ; GCN: buffer_store_dword [[REG8]], {{.*}} offset:4
+; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000
+; GCN: buffer_store_dword [[REG9]], {{.*}} offset:8
+; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000
+; GCN: buffer_store_dword [[REG10]], {{.*}} offset:12
+; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000
+; GCN: buffer_store_dword [[REG11]], {{.*}} offset:16
+; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000
+; GCN: buffer_store_dword [[REG12]], {{.*}} offset:20
+; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000
+; GCN: buffer_store_dword [[REG13]], {{.*}} offset:24
+; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000
+; GCN: buffer_store_dword [[REG14]], {{.*}} offset:28
+; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000
+; GCN: buffer_store_dword [[REG15]], {{.*}} offset:32
+
 ; GCN: v_mov_b32_e32 v31, 0x40e00000
 ; GCN: s_getpc
 define void @stack_8xv5f32() #0 {