DAGCombiner: Replace store of FP constant after attemping store merges If storing multiple FP constants, some subset of the stores would be replaced with integers due to visit order, so MergeConsecutiveStores would only partially merge these. llvm-svn: 248169

commit: b774834429430885d62835a9a60d58308dd72a88 [log] [tgz]
author: Matt Arsenault <Matthew.Arsenault@amd.com> Mon Sep 21 15:59:46 2015 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> Mon Sep 21 15:59:46 2015 +0000
tree: a6acbc0a9ad62d0ec719a78c1a2a5daac1a7f227
parent: a30ddb652419290b65f4ce8dafdd170c8098ddd7 [diff]
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5c41102..24627bc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -11485,16 +11485,6 @@
   if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
     return Chain;
 
-  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
-  //
-  // Make sure to do this only after attempting to merge stores in order to
-  //  avoid changing the types of some subset of stores due to visit order,
-  //  preventing their merging.
-  if (isa<ConstantFPSDNode>(Value)) {
-    if (SDValue NewSt = replaceStoreOfFPConstant(ST))
-      return NewSt;
-  }
-
   // Try to infer better alignment information than the store already has.
   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
@@ -11618,6 +11608,16 @@
       return SDValue(N, 0);
   }
 
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  //
+  // Make sure to do this only after attempting to merge stores in order to
+  //  avoid changing the types of some subset of stores due to visit order,
+  //  preventing their merging.
+  if (isa<ConstantFPSDNode>(Value)) {
+    if (SDValue NewSt = replaceStoreOfFPConstant(ST))
+      return NewSt;
+  }
+
   return ReduceLoadOpStoreWidth(N);
 }
 

diff --git a/llvm/test/CodeGen/AMDGPU/merge-stores.ll b/llvm/test/CodeGen/AMDGPU/merge-stores.ll
index 87148ae..62d372a 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-stores.ll

@@ -121,10 +121,7 @@
 }
 
 ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dwordx2 v
+; GCN: buffer_store_dwordx4
 define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
   %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
@@ -137,17 +134,9 @@
   ret void
 }
 
-; First store is out of order. Because of order of combines, the
-; consecutive store fails because only some of the stores have been
-; replaced with integer constant stores, and then won't merge because
-; the types are different.
-
+; First store is out of order.
 ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
+; GCN: buffer_store_dwordx4
 define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
   %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
@@ -160,6 +149,29 @@
   ret void
 }
 
+; FIXME: Should be able to merge this
+; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
+; XGCN: buffer_store_dwordx4
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: s_endpgm
+define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
+  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+  %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
+  %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
+
+  %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
+  %out.gep.3.bc = bitcast float addrspace(1)* %out.gep.3 to i32 addrspace(1)*
+
+  store i32 11, i32 addrspace(1)* %out.gep.1.bc
+  store float 2.0, float addrspace(1)* %out.gep.2
+  store i32 17, i32 addrspace(1)* %out.gep.3.bc
+  store float 8.0, float addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}merge_global_store_3_constants_i32:
 ; SI-DAG: buffer_store_dwordx2
 ; SI-DAG: buffer_store_dword

diff --git a/llvm/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll b/llvm/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll
new file mode 100644
index 0000000..db92f20
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll

@@ -0,0 +1,28 @@
+; RUN: llc -march=ppc64 -mtriple=ppc64-apple-darwin < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}_merge_8_float_zero_stores:
+; CHECK: li [[ZEROREG:r[0-9]+]], 0
+; CHECK-DAG: std [[ZEROREG]], 0([[PTR:r[0-9]+]])
+; CHECK-DAG: std [[ZEROREG]], 8([[PTR]])
+; CHECK-DAG: std [[ZEROREG]], 16([[PTR]])
+; CHECK-DAG: std [[ZEROREG]], 24([[PTR]])
+; CHECK: blr
+define void @merge_8_float_zero_stores(float* %ptr) {
+  %idx0 = getelementptr float, float* %ptr, i64 0
+  %idx1 = getelementptr float, float* %ptr, i64 1
+  %idx2 = getelementptr float, float* %ptr, i64 2
+  %idx3 = getelementptr float, float* %ptr, i64 3
+  %idx4 = getelementptr float, float* %ptr, i64 4
+  %idx5 = getelementptr float, float* %ptr, i64 5
+  %idx6 = getelementptr float, float* %ptr, i64 6
+  %idx7 = getelementptr float, float* %ptr, i64 7
+  store float 0.0, float* %idx0, align 4
+  store float 0.0, float* %idx1, align 4
+  store float 0.0, float* %idx2, align 4
+  store float 0.0, float* %idx3, align 4
+  store float 0.0, float* %idx4, align 4
+  store float 0.0, float* %idx5, align 4
+  store float 0.0, float* %idx6, align 4
+  store float 0.0, float* %idx7, align 4
+  ret void
+}

diff --git a/llvm/test/CodeGen/X86/vector-merge-store-fp-constants.ll b/llvm/test/CodeGen/X86/vector-merge-store-fp-constants.ll
new file mode 100644
index 0000000..a6fb32d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vector-merge-store-fp-constants.ll

@@ -0,0 +1,35 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefix=DEFAULTCPU -check-prefix=ALL %s
+; RUN: llc -march=x86-64 -mcpu=x86-64 -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefix=X8664CPU -check-prefix=ALL %s
+
+
+; ALL-LABEL: {{^}}merge_8_float_zero_stores:
+
+; DEFAULTCPU-DAG: movq $0, ([[PTR:%[a-z]+]])
+; DEFAULTCPU-DAG: movq $0, 8([[PTR]])
+; DEFAULTCPU-DAG: movq $0, 16([[PTR]])
+; DEFAULTCPU-DAG: movq $0, 24([[PTR]])
+
+; X8664CPU: xorps [[ZEROREG:%xmm[0-9]+]], [[ZEROREG]]
+; X8664CPU-DAG: movups [[ZEROREG]], ([[PTR:%[a-z]+]])
+; X8664CPU-DAG: movups [[ZEROREG]], 16([[PTR:%[a-z]+]])
+
+; ALL: retq
+define void @merge_8_float_zero_stores(float* %ptr) {
+  %idx0 = getelementptr float, float* %ptr, i64 0
+  %idx1 = getelementptr float, float* %ptr, i64 1
+  %idx2 = getelementptr float, float* %ptr, i64 2
+  %idx3 = getelementptr float, float* %ptr, i64 3
+  %idx4 = getelementptr float, float* %ptr, i64 4
+  %idx5 = getelementptr float, float* %ptr, i64 5
+  %idx6 = getelementptr float, float* %ptr, i64 6
+  %idx7 = getelementptr float, float* %ptr, i64 7
+  store float 0.0, float* %idx0, align 4
+  store float 0.0, float* %idx1, align 4
+  store float 0.0, float* %idx2, align 4
+  store float 0.0, float* %idx3, align 4
+  store float 0.0, float* %idx4, align 4
+  store float 0.0, float* %idx5, align 4
+  store float 0.0, float* %idx6, align 4
+  store float 0.0, float* %idx7, align 4
+  ret void
+}
commit	b774834429430885d62835a9a60d58308dd72a88	[log] [tgz]
author	Matt Arsenault <Matthew.Arsenault@amd.com>	Mon Sep 21 15:59:46 2015 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	Mon Sep 21 15:59:46 2015 +0000
tree	a6acbc0a9ad62d0ec719a78c1a2a5daac1a7f227
parent	a30ddb652419290b65f4ce8dafdd170c8098ddd7 [diff]