Add TargetLowering::prepareVolatileOrAtomicLoad

One unusual feature of the z architecture is that the result of a
previous load can be reused indefinitely for subsequent loads, even if
a cache-coherent store to that location is performed by another CPU.
A special serializing instruction must be used if you want to force
a load to be reattempted.

Since volatile loads are not supposed to be omitted in this way,
we should insert a serializing instruction before each such load.
The same goes for atomic loads.

The patch implements this at the IR->DAG boundary, in a similar way
to atomic fences.  It is a no-op for targets other than SystemZ.

llvm-svn: 196905
diff --git a/llvm/test/CodeGen/SystemZ/Large/branch-range-01.py b/llvm/test/CodeGen/SystemZ/Large/branch-range-01.py
index 552c9ca..edb631d 100644
--- a/llvm/test/CodeGen/SystemZ/Large/branch-range-01.py
+++ b/llvm/test/CodeGen/SystemZ/Large/branch-range-01.py
@@ -79,7 +79,7 @@
     next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
     print 'before%d:' % i
     print '  %%bstop%d = getelementptr i32 *%%stop, i64 %d' % (i, i)
-    print '  %%bcur%d = load volatile i32 *%%bstop%d' % (i, i)
+    print '  %%bcur%d = load i32 *%%bstop%d' % (i, i)
     print '  %%btest%d = icmp eq i32 %%limit, %%bcur%d' % (i, i)
     print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
     print ''
@@ -95,7 +95,7 @@
 
 for i in xrange(branch_blocks):
     print '  %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i + 25)
-    print '  %%acur%d = load volatile i32 *%%astop%d' % (i, i)
+    print '  %%acur%d = load i32 *%%astop%d' % (i, i)
     print '  %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i)
     print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
     print ''
diff --git a/llvm/test/CodeGen/SystemZ/Large/branch-range-02.py b/llvm/test/CodeGen/SystemZ/Large/branch-range-02.py
index 0b21ced..743e12d 100644
--- a/llvm/test/CodeGen/SystemZ/Large/branch-range-02.py
+++ b/llvm/test/CodeGen/SystemZ/Large/branch-range-02.py
@@ -72,7 +72,7 @@
     print 'b%d:' % i
     print '  store volatile i8 %d, i8 *%%base' % value
     print '  %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i)
-    print '  %%acur%d = load volatile i32 *%%astop%d' % (i, i)
+    print '  %%acur%d = load i32 *%%astop%d' % (i, i)
     print '  %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i)
     print '  br i1 %%atest%d, label %%%s, label %%%s' % (i, other, next)
 
diff --git a/llvm/test/CodeGen/SystemZ/Large/branch-range-03.py b/llvm/test/CodeGen/SystemZ/Large/branch-range-03.py
index 75cdf24..5c9a93b 100644
--- a/llvm/test/CodeGen/SystemZ/Large/branch-range-03.py
+++ b/llvm/test/CodeGen/SystemZ/Large/branch-range-03.py
@@ -79,7 +79,7 @@
     next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
     print 'before%d:' % i
     print '  %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
-    print '  %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+    print '  %%bcur%d = load i8 *%%bstop%d' % (i, i)
     print '  %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
     print '  %%btest%d = icmp eq i32 %%limit, %%bext%d' % (i, i)
     print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -96,7 +96,7 @@
 
 for i in xrange(branch_blocks):
     print '  %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
-    print '  %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+    print '  %%acur%d = load i8 *%%astop%d' % (i, i)
     print '  %%aext%d = sext i8 %%acur%d to i32' % (i, i)
     print '  %%atest%d = icmp eq i32 %%limit, %%aext%d' % (i, i)
     print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/llvm/test/CodeGen/SystemZ/Large/branch-range-04.py b/llvm/test/CodeGen/SystemZ/Large/branch-range-04.py
index 3ae3ae9..2c9090f 100644
--- a/llvm/test/CodeGen/SystemZ/Large/branch-range-04.py
+++ b/llvm/test/CodeGen/SystemZ/Large/branch-range-04.py
@@ -83,7 +83,7 @@
     next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
     print 'before%d:' % i
     print '  %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
-    print '  %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+    print '  %%bcur%d = load i8 *%%bstop%d' % (i, i)
     print '  %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
     print '  %%btest%d = icmp eq i64 %%limit, %%bext%d' % (i, i)
     print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -100,7 +100,7 @@
 
 for i in xrange(branch_blocks):
     print '  %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
-    print '  %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+    print '  %%acur%d = load i8 *%%astop%d' % (i, i)
     print '  %%aext%d = sext i8 %%acur%d to i64' % (i, i)
     print '  %%atest%d = icmp eq i64 %%limit, %%aext%d' % (i, i)
     print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/llvm/test/CodeGen/SystemZ/Large/branch-range-05.py b/llvm/test/CodeGen/SystemZ/Large/branch-range-05.py
index 6928b8f..52f4a96 100644
--- a/llvm/test/CodeGen/SystemZ/Large/branch-range-05.py
+++ b/llvm/test/CodeGen/SystemZ/Large/branch-range-05.py
@@ -82,7 +82,7 @@
 for i in xrange(branch_blocks):
     next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
     print 'before%d:' % i
-    print '  %%bcur%d = load volatile i8 *%%stop' % i
+    print '  %%bcur%d = load i8 *%%stop' % i
     print '  %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
     print '  %%btest%d = icmp slt i32 %%bext%d, %d' % (i, i, i + 50)
     print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -98,7 +98,7 @@
     print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
 
 for i in xrange(branch_blocks):
-    print '  %%acur%d = load volatile i8 *%%stop' % i
+    print '  %%acur%d = load i8 *%%stop' % i
     print '  %%aext%d = sext i8 %%acur%d to i32' % (i, i)
     print '  %%atest%d = icmp slt i32 %%aext%d, %d' % (i, i, i + 100)
     print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/llvm/test/CodeGen/SystemZ/Large/branch-range-06.py b/llvm/test/CodeGen/SystemZ/Large/branch-range-06.py
index aabc72f..c34ebac 100644
--- a/llvm/test/CodeGen/SystemZ/Large/branch-range-06.py
+++ b/llvm/test/CodeGen/SystemZ/Large/branch-range-06.py
@@ -82,7 +82,7 @@
 for i in xrange(branch_blocks):
     next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
     print 'before%d:' % i
-    print '  %%bcur%d = load volatile i8 *%%stop' % i
+    print '  %%bcur%d = load i8 *%%stop' % i
     print '  %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
     print '  %%btest%d = icmp slt i64 %%bext%d, %d' % (i, i, i + 50)
     print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -98,7 +98,7 @@
     print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
 
 for i in xrange(branch_blocks):
-    print '  %%acur%d = load volatile i8 *%%stop' % i
+    print '  %%acur%d = load i8 *%%stop' % i
     print '  %%aext%d = sext i8 %%acur%d to i64' % (i, i)
     print '  %%atest%d = icmp slt i64 %%aext%d, %d' % (i, i, i + 100)
     print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/llvm/test/CodeGen/SystemZ/Large/branch-range-09.py b/llvm/test/CodeGen/SystemZ/Large/branch-range-09.py
index b3fd813..bc712cb 100644
--- a/llvm/test/CodeGen/SystemZ/Large/branch-range-09.py
+++ b/llvm/test/CodeGen/SystemZ/Large/branch-range-09.py
@@ -79,7 +79,7 @@
     next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
     print 'before%d:' % i
     print '  %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
-    print '  %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+    print '  %%bcur%d = load i8 *%%bstop%d' % (i, i)
     print '  %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
     print '  %%btest%d = icmp ult i32 %%limit, %%bext%d' % (i, i)
     print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -96,7 +96,7 @@
 
 for i in xrange(branch_blocks):
     print '  %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
-    print '  %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+    print '  %%acur%d = load i8 *%%astop%d' % (i, i)
     print '  %%aext%d = sext i8 %%acur%d to i32' % (i, i)
     print '  %%atest%d = icmp ult i32 %%limit, %%aext%d' % (i, i)
     print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/llvm/test/CodeGen/SystemZ/Large/branch-range-10.py b/llvm/test/CodeGen/SystemZ/Large/branch-range-10.py
index 3aeea3e..8c483c3 100644
--- a/llvm/test/CodeGen/SystemZ/Large/branch-range-10.py
+++ b/llvm/test/CodeGen/SystemZ/Large/branch-range-10.py
@@ -83,7 +83,7 @@
     next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
     print 'before%d:' % i
     print '  %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
-    print '  %%bcur%d = load volatile i8 *%%bstop%d' % (i, i)
+    print '  %%bcur%d = load i8 *%%bstop%d' % (i, i)
     print '  %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
     print '  %%btest%d = icmp ult i64 %%limit, %%bext%d' % (i, i)
     print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -100,7 +100,7 @@
 
 for i in xrange(branch_blocks):
     print '  %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
-    print '  %%acur%d = load volatile i8 *%%astop%d' % (i, i)
+    print '  %%acur%d = load i8 *%%astop%d' % (i, i)
     print '  %%aext%d = sext i8 %%acur%d to i64' % (i, i)
     print '  %%atest%d = icmp ult i64 %%limit, %%aext%d' % (i, i)
     print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/llvm/test/CodeGen/SystemZ/Large/branch-range-11.py b/llvm/test/CodeGen/SystemZ/Large/branch-range-11.py
index 034902c..0546103 100644
--- a/llvm/test/CodeGen/SystemZ/Large/branch-range-11.py
+++ b/llvm/test/CodeGen/SystemZ/Large/branch-range-11.py
@@ -98,8 +98,8 @@
 for i in xrange(branch_blocks):
     next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
     print 'before%d:' % i
-    print '  %%bcur%da = load volatile i32 *%%stopa' % i
-    print '  %%bcur%db = load volatile i32 *%%stopb' % i
+    print '  %%bcur%da = load i32 *%%stopa' % i
+    print '  %%bcur%db = load i32 *%%stopb' % i
     print '  %%bsub%d = sub i32 %%bcur%da, %%bcur%db' % (i, i, i)
     print '  %%btest%d = icmp ult i32 %%bsub%d, %d' % (i, i, i + 50)
     print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -115,8 +115,8 @@
     print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
 
 for i in xrange(branch_blocks):
-    print '  %%acur%da = load volatile i32 *%%stopa' % i
-    print '  %%acur%db = load volatile i32 *%%stopb' % i
+    print '  %%acur%da = load i32 *%%stopa' % i
+    print '  %%acur%db = load i32 *%%stopb' % i
     print '  %%asub%d = sub i32 %%acur%da, %%acur%db' % (i, i, i)
     print '  %%atest%d = icmp ult i32 %%asub%d, %d' % (i, i, i + 100)
     print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/llvm/test/CodeGen/SystemZ/Large/branch-range-12.py b/llvm/test/CodeGen/SystemZ/Large/branch-range-12.py
index 007d477..626c899 100644
--- a/llvm/test/CodeGen/SystemZ/Large/branch-range-12.py
+++ b/llvm/test/CodeGen/SystemZ/Large/branch-range-12.py
@@ -98,8 +98,8 @@
 for i in xrange(branch_blocks):
     next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
     print 'before%d:' % i
-    print '  %%bcur%da = load volatile i64 *%%stopa' % i
-    print '  %%bcur%db = load volatile i64 *%%stopb' % i
+    print '  %%bcur%da = load i64 *%%stopa' % i
+    print '  %%bcur%db = load i64 *%%stopb' % i
     print '  %%bsub%d = sub i64 %%bcur%da, %%bcur%db' % (i, i, i)
     print '  %%btest%d = icmp ult i64 %%bsub%d, %d' % (i, i, i + 50)
     print '  br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -115,8 +115,8 @@
     print '  store volatile i8 %d, i8 *%%ptr%d' % (value, i)
 
 for i in xrange(branch_blocks):
-    print '  %%acur%da = load volatile i64 *%%stopa' % i
-    print '  %%acur%db = load volatile i64 *%%stopb' % i
+    print '  %%acur%da = load i64 *%%stopa' % i
+    print '  %%acur%db = load i64 *%%stopb' % i
     print '  %%asub%d = sub i64 %%acur%da, %%acur%db' % (i, i, i)
     print '  %%atest%d = icmp ult i64 %%asub%d, %d' % (i, i, i + 100)
     print '  br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/llvm/test/CodeGen/SystemZ/frame-13.ll b/llvm/test/CodeGen/SystemZ/frame-13.ll
index 393850f..58dee1d 100644
--- a/llvm/test/CodeGen/SystemZ/frame-13.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-13.ll
@@ -243,8 +243,8 @@
 
 ; And again with maximum register pressure.  The only spill slots that the
 ; NOFP case needs are the emergency ones, so the offsets are the same as for f2.
-; However, the FP case uses %r11 as the frame pointer and must therefore
-; spill a second register.  This leads to an extra displacement of 8.
+; The FP case needs to spill an extra register and is too dependent on
+; register allocation heuristics for a stable test.
 define void @f11(i32 *%vptr) {
 ; CHECK-NOFP-LABEL: f11:
 ; CHECK-NOFP: stmg %r6, %r15,
@@ -254,15 +254,6 @@
 ; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 ; CHECK-NOFP: lmg %r6, %r15,
 ; CHECK-NOFP: br %r14
-;
-; CHECK-FP-LABEL: f11:
-; CHECK-FP: stmg %r6, %r15,
-; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
-; CHECK-FP: lay [[REGISTER]], 4096(%r11)
-; CHECK-FP: mvhi 8([[REGISTER]]), 42
-; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
-; CHECK-FP: lmg %r6, %r15,
-; CHECK-FP: br %r14
   %i0 = load volatile i32 *%vptr
   %i1 = load volatile i32 *%vptr
   %i3 = load volatile i32 *%vptr
diff --git a/llvm/test/CodeGen/SystemZ/frame-14.ll b/llvm/test/CodeGen/SystemZ/frame-14.ll
index 3b48179..24169cf 100644
--- a/llvm/test/CodeGen/SystemZ/frame-14.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-14.ll
@@ -266,8 +266,8 @@
 
 ; And again with maximum register pressure.  The only spill slots that the
 ; NOFP case needs are the emergency ones, so the offsets are the same as for f4.
-; However, the FP case uses %r11 as the frame pointer and must therefore
-; spill a second register.  This leads to an extra displacement of 8.
+; The FP case needs to spill an extra register and is too dependent on
+; register allocation heuristics for a stable test.
 define void @f11(i32 *%vptr) {
 ; CHECK-NOFP-LABEL: f11:
 ; CHECK-NOFP: stmg %r6, %r15,
@@ -278,16 +278,6 @@
 ; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 ; CHECK-NOFP: lmg %r6, %r15,
 ; CHECK-NOFP: br %r14
-;
-; CHECK-FP-LABEL: f11:
-; CHECK-FP: stmg %r6, %r15,
-; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11)
-; CHECK-FP: llilh [[REGISTER]], 8
-; CHECK-FP: agr [[REGISTER]], %r11
-; CHECK-FP: mvi 8([[REGISTER]]), 42
-; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
-; CHECK-FP: lmg %r6, %r15,
-; CHECK-FP: br %r14
   %i0 = load volatile i32 *%vptr
   %i1 = load volatile i32 *%vptr
   %i3 = load volatile i32 *%vptr
diff --git a/llvm/test/CodeGen/SystemZ/serialize-01.ll b/llvm/test/CodeGen/SystemZ/serialize-01.ll
new file mode 100644
index 0000000..7801fac
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/serialize-01.ll
@@ -0,0 +1,21 @@
+; Test serialization instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
+; RUN:   FileCheck %s -check-prefix=CHECK-FULL
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \
+; RUN:   FileCheck %s -check-prefix=CHECK-FAST
+
+; Check that volatile loads produce a serialisation.
+define i32 @f1(i32 *%src) {
+; CHECK-FULL-LABEL: f1:
+; CHECK-FULL: bcr 15, %r0
+; CHECK-FULL: l %r2, 0(%r2)
+; CHECK-FULL: br %r14
+;
+; CHECK-FAST-LABEL: f1:
+; CHECK-FAST: bcr 14, %r0
+; CHECK-FAST: l %r2, 0(%r2)
+; CHECK-FAST: br %r14
+  %val = load volatile i32 *%src
+  ret i32 %val
+}
diff --git a/llvm/test/CodeGen/SystemZ/spill-01.ll b/llvm/test/CodeGen/SystemZ/spill-01.ll
index ca64a88..c1f780c 100644
--- a/llvm/test/CodeGen/SystemZ/spill-01.ll
+++ b/llvm/test/CodeGen/SystemZ/spill-01.ll
@@ -400,6 +400,7 @@
 ; CHECK: stgrl [[REG]], h8
 ; CHECK: br %r14
 entry:
+  %val8 = load volatile i64 *@h8
   %val0 = load volatile i64 *@h0
   %val1 = load volatile i64 *@h1
   %val2 = load volatile i64 *@h2
@@ -408,7 +409,6 @@
   %val5 = load volatile i64 *@h5
   %val6 = load volatile i64 *@h6
   %val7 = load volatile i64 *@h7
-  %val8 = load volatile i64 *@h8
   %val9 = load volatile i64 *@h9
 
   call void @foo()