Add TargetLowering::prepareVolatileOrAtomicLoad

One unusual feature of the z architecture is that the result of a
previous load can be reused indefinitely for subsequent loads, even if
a cache-coherent store to that location is performed by another CPU.
A special serializing instruction must be used if you want to force
a load to be reattempted.

Since volatile loads are not supposed to be omitted in this way,
we should insert a serializing instruction before each such load.
The same goes for atomic loads.

The patch implements this at the IR->DAG boundary, in a similar way
to atomic fences.  It is a no-op for targets other than SystemZ.

llvm-svn: 196906
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-01.ll b/llvm/test/CodeGen/SystemZ/atomic-load-01.ll
index a5bc883..f3acd60 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-01.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-01.ll
@@ -2,11 +2,10 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; This is just a placeholder to make sure that loads are handled.
-; The CS-based sequence is probably far too conservative.
 define i8 @f1(i8 *%src) {
 ; CHECK-LABEL: f1:
-; CHECK: cs
+; CHECK: bcr 1{{[45]}}, %r0
+; CHECK: lb %r2, 0(%r2)
 ; CHECK: br %r14
   %val = load atomic i8 *%src seq_cst, align 1
   ret i8 %val
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-02.ll b/llvm/test/CodeGen/SystemZ/atomic-load-02.ll
index 2c9bbdb..d9bec60 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-02.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-02.ll
@@ -2,11 +2,10 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; This is just a placeholder to make sure that loads are handled.
-; The CS-based sequence is probably far too conservative.
 define i16 @f1(i16 *%src) {
 ; CHECK-LABEL: f1:
-; CHECK: cs
+; CHECK: bcr 1{{[45]}}, %r0
+; CHECK: lh %r2, 0(%r2)
 ; CHECK: br %r14
   %val = load atomic i16 *%src seq_cst, align 2
   ret i16 %val
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-03.ll b/llvm/test/CodeGen/SystemZ/atomic-load-03.ll
index 1fb41f5..7e5eb92 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-03.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-03.ll
@@ -2,12 +2,10 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; This is just a placeholder to make sure that loads are handled.
-; Using CS is probably too conservative.
-define i32 @f1(i32 %dummy, i32 *%src) {
+define i32 @f1(i32 *%src) {
 ; CHECK-LABEL: f1:
-; CHECK: lhi %r2, 0
-; CHECK: cs %r2, %r2, 0(%r3)
+; CHECK: bcr 1{{[45]}}, %r0
+; CHECK: l %r2, 0(%r2)
 ; CHECK: br %r14
   %val = load atomic i32 *%src seq_cst, align 4
   ret i32 %val
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-04.ll b/llvm/test/CodeGen/SystemZ/atomic-load-04.ll
index 92cac40..c7a9a98 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-04.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-04.ll
@@ -2,12 +2,10 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; This is just a placeholder to make sure that loads are handled.
-; Using CSG is probably too conservative.
-define i64 @f1(i64 %dummy, i64 *%src) {
+define i64 @f1(i64 *%src) {
 ; CHECK-LABEL: f1:
-; CHECK: lghi %r2, 0
-; CHECK: csg %r2, %r2, 0(%r3)
+; CHECK: bcr 1{{[45]}}, %r0
+; CHECK: lg %r2, 0(%r2)
 ; CHECK: br %r14
   %val = load atomic i64 *%src seq_cst, align 8
   ret i64 %val
diff --git a/llvm/test/CodeGen/SystemZ/atomic-store-01.ll b/llvm/test/CodeGen/SystemZ/atomic-store-01.ll
index 53ed24f..952e1a9 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-store-01.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-store-01.ll
@@ -2,11 +2,10 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; This is just a placeholder to make sure that stores are handled.
-; The CS-based sequence is probably far too conservative.
 define void @f1(i8 %val, i8 *%src) {
 ; CHECK-LABEL: f1:
-; CHECK: cs
+; CHECK: stc %r2, 0(%r3)
+; CHECK: bcr 1{{[45]}}, %r0
 ; CHECK: br %r14
   store atomic i8 %val, i8 *%src seq_cst, align 1
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/atomic-store-02.ll b/llvm/test/CodeGen/SystemZ/atomic-store-02.ll
index 42d6695..c9576e5 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-store-02.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-store-02.ll
@@ -2,11 +2,10 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; This is just a placeholder to make sure that stores are handled.
-; The CS-based sequence is probably far too conservative.
 define void @f1(i16 %val, i16 *%src) {
 ; CHECK-LABEL: f1:
-; CHECK: cs
+; CHECK: sth %r2, 0(%r3)
+; CHECK: bcr 1{{[45]}}, %r0
 ; CHECK: br %r14
   store atomic i16 %val, i16 *%src seq_cst, align 2
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/atomic-store-03.ll b/llvm/test/CodeGen/SystemZ/atomic-store-03.ll
index 846c86f..459cb6a 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-store-03.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-store-03.ll
@@ -2,14 +2,10 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; This is just a placeholder to make sure that stores are handled.
-; Using CS is probably too conservative.
 define void @f1(i32 %val, i32 *%src) {
 ; CHECK-LABEL: f1:
-; CHECK: l %r0, 0(%r3)
-; CHECK: [[LABEL:\.[^:]*]]:
-; CHECK: cs %r0, %r2, 0(%r3)
-; CHECK: jl [[LABEL]]
+; CHECK: st %r2, 0(%r3)
+; CHECK: bcr 1{{[45]}}, %r0
 ; CHECK: br %r14
   store atomic i32 %val, i32 *%src seq_cst, align 4
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/atomic-store-04.ll b/llvm/test/CodeGen/SystemZ/atomic-store-04.ll
index 24615b1..7f2406e 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-store-04.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-store-04.ll
@@ -2,14 +2,10 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-; This is just a placeholder to make sure that stores are handled.
-; Using CS is probably too conservative.
 define void @f1(i64 %val, i64 *%src) {
 ; CHECK-LABEL: f1:
-; CHECK: lg %r0, 0(%r3)
-; CHECK: [[LABEL:\.[^:]*]]:
-; CHECK: csg %r0, %r2, 0(%r3)
-; CHECK: jl [[LABEL]]
+; CHECK: stg %r2, 0(%r3)
+; CHECK: bcr 1{{[45]}}, %r0
 ; CHECK: br %r14
   store atomic i64 %val, i64 *%src seq_cst, align 8
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-01.ll b/llvm/test/CodeGen/SystemZ/cond-store-01.ll
index d55ea21..62e9796 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-01.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-01.ll
@@ -347,11 +347,10 @@
 define void @f20(i8 *%ptr, i8 %alt, i32 %limit) {
 ; FIXME: should use a normal load instead of CS.
 ; CHECK-LABEL: f20:
-; CHECK: cs {{%r[0-9]+}},
-; CHECK: jl
+; CHECK: lb {{%r[0-9]+}}, 0(%r2)
 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
 ; CHECK: [[LABEL]]:
-; CHECK: stc {{%r[0-9]+}},
+; CHECK: stc {{%r[0-9]+}}, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
   %orig = load atomic i8 *%ptr unordered, align 1
@@ -367,7 +366,7 @@
 ; CHECK: jhe [[LABEL:[^ ]*]]
 ; CHECK: lb %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
-; CHECK: cs {{%r[0-9]+}},
+; CHECK: stc %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
   %orig = load i8 *%ptr
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-02.ll b/llvm/test/CodeGen/SystemZ/cond-store-02.ll
index 91bc486..4fbcdab 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-02.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-02.ll
@@ -347,11 +347,10 @@
 define void @f20(i16 *%ptr, i16 %alt, i32 %limit) {
 ; FIXME: should use a normal load instead of CS.
 ; CHECK-LABEL: f20:
-; CHECK: cs {{%r[0-9]+}},
-; CHECK: jl
+; CHECK: lh {{%r[0-9]+}}, 0(%r2)
 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
 ; CHECK: [[LABEL]]:
-; CHECK: sth {{%r[0-9]+}},
+; CHECK: sth {{%r[0-9]+}}, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
   %orig = load atomic i16 *%ptr unordered, align 2
@@ -367,7 +366,7 @@
 ; CHECK: jhe [[LABEL:[^ ]*]]
 ; CHECK: lh %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
-; CHECK: cs {{%r[0-9]+}},
+; CHECK: sth %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
   %orig = load i16 *%ptr
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-03.ll b/llvm/test/CodeGen/SystemZ/cond-store-03.ll
index d4fd48d..4b22555 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-03.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-03.ll
@@ -272,7 +272,7 @@
 define void @f16(i32 *%ptr, i32 %alt, i32 %limit) {
 ; FIXME: should use a normal load instead of CS.
 ; CHECK-LABEL: f16:
-; CHECK: cs {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2)
+; CHECK: l {{%r[0-5]}}, 0(%r2)
 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
 ; CHECK: [[LABEL]]:
 ; CHECK: st {{%r[0-5]}}, 0(%r2)
@@ -291,7 +291,7 @@
 ; CHECK: jhe [[LABEL:[^ ]*]]
 ; CHECK: l %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
-; CHECK: cs {{%r[0-5]}}, %r3, 0(%r2)
+; CHECK: st %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
   %orig = load i32 *%ptr
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-04.ll b/llvm/test/CodeGen/SystemZ/cond-store-04.ll
index fc565c4..346b51a 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-04.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-04.ll
@@ -164,7 +164,7 @@
 define void @f10(i64 *%ptr, i64 %alt, i32 %limit) {
 ; FIXME: should use a normal load instead of CSG.
 ; CHECK-LABEL: f10:
-; CHECK: csg {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2)
+; CHECK: lg {{%r[0-5]}}, 0(%r2)
 ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]]
 ; CHECK: [[LABEL]]:
 ; CHECK: stg {{%r[0-5]}}, 0(%r2)
@@ -183,7 +183,7 @@
 ; CHECK: jhe [[LABEL:[^ ]*]]
 ; CHECK: lg %r3, 0(%r2)
 ; CHECK: [[LABEL]]:
-; CHECK: csg {{%r[0-5]}}, %r3, 0(%r2)
+; CHECK: stg %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
   %orig = load i64 *%ptr