[SystemZ] Improve use of conditional instructions
This patch moves formation of LOC-type instructions from (late)
IfConversion to the early if-conversion pass, and in some cases
additionally creates them directly from select instructions
during DAG instruction selection.
To make early if-conversion work, the patch implements the
canInsertSelect / insertSelect callbacks. It also implements
the commuteInstructionImpl and FoldImmediate callbacks to
enable generation of the full range of LOC instructions.
Finally, the patch adds support for all instructions of the
load-store-on-condition-2 facility, which allows using LOC
instructions also for high registers.
Due to the use of the GRX32 register class to enable high registers,
we now also have to handle the cases where there are still no single
hardware instructions (conditional move from a low register to a high
register or vice versa). These are converted back to a branch sequence
after register allocation. Since the expandRAPseudos callback is not
allowed to create new basic blocks, this requires a simple new pass,
modelled after the ARM/AArch64 ExpandPseudos pass.
Overall, this patch causes significantly more LOC-type instructions
to be used, and results in a measurable performance improvement.
llvm-svn: 288028
diff --git a/llvm/test/CodeGen/SystemZ/cond-li.ll b/llvm/test/CodeGen/SystemZ/cond-li.ll
deleted file mode 100644
index a3e2f3f..0000000
--- a/llvm/test/CodeGen/SystemZ/cond-li.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; Test LOCHI/LOCGHI
-;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-
-; CHECK-LABEL: bar1:
-; CHECK: lhi [[REG:%r[0-5]]], 42
-; CHECK: chi %r2, 0
-; CHECK: lochie [[REG]], 0
-define signext i32 @bar1(i32 signext %x) {
- %cmp = icmp ne i32 %x, 0
- %.x = select i1 %cmp, i32 42, i32 0
- ret i32 %.x
-}
-
-; CHECK-LABEL: bar2:
-; CHECK: ltgr [[REG:%r[0-5]]], %r2
-; CHECK: lghi %r2, 42
-; CHECK: locghie %r2, 0
-define signext i64 @bar2(i64 signext %x) {
- %cmp = icmp ne i64 %x, 0
- %.x = select i1 %cmp, i64 42, i64 0
- ret i64 %.x
-}
diff --git a/llvm/test/CodeGen/SystemZ/cond-load-01.ll b/llvm/test/CodeGen/SystemZ/cond-load-01.ll
index d10551f..c7ec410 100644
--- a/llvm/test/CodeGen/SystemZ/cond-load-01.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-load-01.ll
@@ -2,6 +2,10 @@
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+; Run the test again to make sure it still works the same even
+; in the presence of the load-store-on-condition-2 facility.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
declare i32 @foo(i32 *)
; Test the simple case.
diff --git a/llvm/test/CodeGen/SystemZ/cond-load-03.ll b/llvm/test/CodeGen/SystemZ/cond-load-03.ll
new file mode 100644
index 0000000..4cce92e
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/cond-load-03.ll
@@ -0,0 +1,159 @@
+; Test LOCFH. See comments in asm-18.ll about testing high-word operations.
+;
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: -no-integrated-as | FileCheck %s
+
+declare void @foo(i32 *)
+
+; Test the simple case.
+define void @f1(i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; ...and again with the operands swapped.
+define void @f2(i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhl [[REG]], 0(%r2)
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %other, i32 %easy
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Check the high end of the aligned LOC range.
+define void @f3(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], 524284(%r2)
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 131071
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Check the next word up. Other sequences besides this one would be OK.
+define void @f4(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: agfi %r2, 524288
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 131072
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Check the low end of the LOC range.
+define void @f5(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], -524288(%r2)
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 -131072
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Check the next word down, with the same comments as f4.
+define void @f6(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK-DAG: agfi %r2, -524292
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 -131073
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Try a frame index base.
+define void @f7(i32 %alt, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: locfhhe [[REG]], {{[0-9]+}}(%r15)
+; CHECK: br %r14
+ %ptr = alloca i32
+ call void @foo(i32 *%ptr)
+ %easy = call i32 asm "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Try a case when an index is involved.
+define void @f8(i32 %limit, i64 %base, i64 %index) {
+; CHECK-LABEL: f8:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 42
+; CHECK: locfhhe [[REG]], 0({{%r[1-5]}})
+; CHECK: br %r14
+ %easy = call i32 asm "stepa $0", "=h"()
+ %add = add i64 %base, %index
+ %ptr = inttoptr i64 %add to i32 *
+ %cond = icmp ult i32 %limit, 42
+ %other = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %easy, i32 %other
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Test that conditionally-executed loads do not use LOC, since it is allowed
+; to trap even when the condition is false.
+define void @f9(i32 %limit, i32 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: loc
+; CHECK: lfh
+; CHECK: br %r14
+entry:
+ %easy = call i32 asm "stepa $0", "=h"()
+ %cmp = icmp ule i32 %easy, %limit
+ br i1 %cmp, label %load, label %exit
+
+load:
+ %other = load i32, i32 *%ptr
+ br label %exit
+
+exit:
+ %res = phi i32 [ %easy, %entry ], [ %other, %load ]
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-01.ll b/llvm/test/CodeGen/SystemZ/cond-move-01.ll
index 088dee0..0be81c3 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-01.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-move-01.ll
@@ -1,6 +1,10 @@
; Test LOCR and LOCGR.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 -verify-machineinstrs | FileCheck %s
+;
+; Run the test again to make sure it still works the same even
+; in the presence of the load-store-on-condition-2 facility.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
; Test LOCR.
define i32 @f1(i32 %a, i32 %b, i32 %limit) {
@@ -46,3 +50,76 @@
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
}
+
+; Check that we also get LOCR as a result of early if-conversion.
+define i32 @f5(i32 %a, i32 %b, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK: clfi %r4, 41
+; CHECK: locrh %r2, %r3
+; CHECK: br %r14
+entry:
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %a, %if.then ], [ %b, %entry ]
+ ret i32 %res
+}
+
+; ... and likewise for LOCGR.
+define i64 @f6(i64 %a, i64 %b, i64 %limit) {
+; CHECK-LABEL: f6:
+; CHECK: clgfi %r4, 41
+; CHECK: locgrh %r2, %r3
+; CHECK: br %r14
+entry:
+ %cond = icmp ult i64 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i64 [ %a, %if.then ], [ %b, %entry ]
+ ret i64 %res
+}
+
+; Check that inverting the condition works as well.
+define i32 @f7(i32 %a, i32 %b, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: clfi %r4, 41
+; CHECK: locrle %r2, %r3
+; CHECK: br %r14
+entry:
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %b, %if.then ], [ %a, %entry ]
+ ret i32 %res
+}
+
+; ... and likewise for LOCGR.
+define i64 @f8(i64 %a, i64 %b, i64 %limit) {
+; CHECK-LABEL: f8:
+; CHECK: clgfi %r4, 41
+; CHECK: locgrle %r2, %r3
+; CHECK: br %r14
+entry:
+ %cond = icmp ult i64 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i64 [ %b, %if.then ], [ %a, %entry ]
+ ret i64 %res
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-02.ll b/llvm/test/CodeGen/SystemZ/cond-move-02.ll
new file mode 100644
index 0000000..2e2bacd
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/cond-move-02.ll
@@ -0,0 +1,138 @@
+; Test LOCHI and LOCGHI.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
+
+define i32 @f1(i32 %x) {
+; CHECK-LABEL: f1:
+; CHECK: lhi [[REG:%r[0-5]]], 0
+; CHECK: chi %r2, 0
+; CHECK: lochilh [[REG]], 42
+; CHECK: br %r14
+ %cond = icmp ne i32 %x, 0
+ %res = select i1 %cond, i32 42, i32 0
+ ret i32 %res
+}
+
+define i32 @f2(i32 %x, i32 %y) {
+; CHECK-LABEL: f2:
+; CHECK: chi %r2, 0
+; CHECK: lochilh %r3, 42
+; CHECK: br %r14
+ %cond = icmp ne i32 %x, 0
+ %res = select i1 %cond, i32 42, i32 %y
+ ret i32 %res
+}
+
+define i32 @f3(i32 %x, i32 %y) {
+; CHECK-LABEL: f3:
+; CHECK: chi %r2, 0
+; CHECK: lochie %r3, 42
+; CHECK: br %r14
+ %cond = icmp ne i32 %x, 0
+ %res = select i1 %cond, i32 %y, i32 42
+ ret i32 %res
+}
+
+define i64 @f4(i64 %x) {
+; CHECK-LABEL: f4:
+; CHECK: lghi [[REG:%r[0-5]]], 0
+; CHECK: cghi %r2, 0
+; CHECK: locghilh [[REG]], 42
+; CHECK: br %r14
+ %cond = icmp ne i64 %x, 0
+ %res = select i1 %cond, i64 42, i64 0
+ ret i64 %res
+}
+
+define i64 @f5(i64 %x, i64 %y) {
+; CHECK-LABEL: f5:
+; CHECK: cghi %r2, 0
+; CHECK: locghilh %r3, 42
+; CHECK: br %r14
+ %cond = icmp ne i64 %x, 0
+ %res = select i1 %cond, i64 42, i64 %y
+ ret i64 %res
+}
+
+define i64 @f6(i64 %x, i64 %y) {
+; CHECK-LABEL: f6:
+; CHECK: cghi %r2, 0
+; CHECK: locghie %r3, 42
+; CHECK: br %r14
+ %cond = icmp ne i64 %x, 0
+ %res = select i1 %cond, i64 %y, i64 42
+ ret i64 %res
+}
+
+; Check that we also get LOCHI as a result of early if-conversion.
+define i32 @f7(i32 %x, i32 %y) {
+; CHECK-LABEL: f7:
+; CHECK: chi %r2, 0
+; CHECK: lochie %r3, 42
+; CHECK: br %r14
+entry:
+ %cond = icmp ne i32 %x, 0
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %y, %if.then ], [ 42, %entry ]
+ ret i32 %res
+}
+
+; ... and the same for LOCGHI.
+define i64 @f8(i64 %x, i64 %y) {
+; CHECK-LABEL: f8:
+; CHECK: cghi %r2, 0
+; CHECK: locghie %r3, 42
+; CHECK: br %r14
+entry:
+ %cond = icmp ne i64 %x, 0
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i64 [ %y, %if.then ], [ 42, %entry ]
+ ret i64 %res
+}
+
+; Check that inverting the condition works as well.
+define i32 @f9(i32 %x, i32 %y) {
+; CHECK-LABEL: f9:
+; CHECK: chi %r2, 0
+; CHECK: lochilh %r3, 42
+; CHECK: br %r14
+entry:
+ %cond = icmp ne i32 %x, 0
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ 42, %if.then ], [ %y, %entry ]
+ ret i32 %res
+}
+
+; ... and the same for LOCGHI.
+define i64 @f10(i64 %x, i64 %y) {
+; CHECK-LABEL: f10:
+; CHECK: cghi %r2, 0
+; CHECK: locghilh %r3, 42
+; CHECK: br %r14
+entry:
+ %cond = icmp ne i64 %x, 0
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i64 [ 42, %if.then ], [ %y, %entry ]
+ ret i64 %res
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-03.ll b/llvm/test/CodeGen/SystemZ/cond-move-03.ll
new file mode 100644
index 0000000..a9bf1c8
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/cond-move-03.ll
@@ -0,0 +1,213 @@
+; Test LOCFHR and LOCHHI.
+; See comments in asm-18.ll about testing high-word operations.
+;
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: -no-integrated-as | FileCheck %s
+
+define void @f1(i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 42
+; CHECK: locfhrl [[REG2]], [[REG1]]
+; CHECK: stepc [[REG2]]
+; CHECK: br %r14
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ ret void
+}
+
+; FIXME: We should commute the LOCRMux to save one move.
+define void @f2(i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42,
+; CHECK: risblg [[REG2]], [[REG1]], 0, 159, 32
+; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: stepc [[REG1]]
+; CHECK: br %r14
+ %dummy = call i32 asm sideeffect "dummy $0", "=h"()
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=r"()
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+ ret void
+}
+
+define void @f3(i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: stepa [[REG2:%r[0-5]]]
+; CHECK-DAG: stepb [[REG1:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42,
+; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: stepc [[REG1]]
+; CHECK: br %r14
+ %dummy = call i32 asm sideeffect "dummy $0", "=h"()
+ %a = call i32 asm sideeffect "stepa $0", "=r"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+ ret void
+}
+
+; FIXME: We should commute the LOCRMux to save one move.
+define void @f4(i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42,
+; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32
+; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: stepc [[REG1]]
+; CHECK: br %r14
+ %dummy = call i32 asm sideeffect "dummy $0", "=h"()
+ %a = call i32 asm sideeffect "stepa $0", "=r"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ call void asm sideeffect "stepc $0", "r"(i32 %res)
+ call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+ ret void
+}
+
+define void @f5(i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: stepa [[REG2:%r[0-5]]]
+; CHECK-DAG: stepb [[REG1:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42,
+; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: stepc [[REG1]]
+; CHECK: br %r14
+ %dummy = call i32 asm sideeffect "dummy $0", "=h"()
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=r"()
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ call void asm sideeffect "stepc $0", "r"(i32 %res)
+ call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+ ret void
+}
+
+; Check that we also get LOCFHR as a result of early if-conversion.
+define void @f6(i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: locfhrle [[REG2]], [[REG1]]
+; CHECK: stepc [[REG2]]
+; CHECK: br %r14
+entry:
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %a, %if.then ], [ %b, %entry ]
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ ret void
+}
+
+; Check that inverting the condition works as well.
+define void @f7(i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: locfhrh [[REG2]], [[REG1]]
+; CHECK: stepc [[REG2]]
+; CHECK: br %r14
+entry:
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %b, %if.then ], [ %a, %entry ]
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ ret void
+}
+
+define void @f8(i32 %limit) {
+; CHECK-LABEL: f8:
+; CHECK: clfi %r2, 42
+; CHECK: lochhil [[REG:%r[0-5]]], 32767
+; CHECK: stepa [[REG]]
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 32767, i32 0
+ call void asm sideeffect "stepa $0", "h"(i32 %res)
+ ret void
+}
+
+define void @f9(i32 %limit) {
+; CHECK-LABEL: f9:
+; CHECK: clfi %r2, 42
+; CHECK: lochhil [[REG:%r[0-5]]], -32768
+; CHECK: stepa [[REG]]
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 -32768, i32 0
+ call void asm sideeffect "stepa $0", "h"(i32 %res)
+ ret void
+}
+
+; Check that we also get LOCHHI as a result of early if-conversion.
+define void @f10(i32 %limit) {
+; CHECK-LABEL: f10:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: lochhile [[REG]], 123
+; CHECK: stepb [[REG]]
+; CHECK: br %r14
+entry:
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ 123, %if.then ], [ %a, %entry ]
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
+
+; Check that inverting the condition works as well.
+define void @f11(i32 %limit) {
+; CHECK-LABEL: f11:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: lochhih [[REG]], 123
+; CHECK: stepb [[REG]]
+; CHECK: br %r14
+entry:
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %a, %if.then ], [ 123, %entry ]
+ call void asm sideeffect "stepb $0", "h"(i32 %res)
+ ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-07.ll b/llvm/test/CodeGen/SystemZ/cond-store-07.ll
index 35b1303..79b4f87 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-07.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-07.ll
@@ -2,6 +2,10 @@
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+; Run the test again to make sure it still works the same even
+; in the presence of the load-store-on-condition-2 facility.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
declare void @foo(i32 *)
; Test the simple case, with the loaded value first.
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-09.ll b/llvm/test/CodeGen/SystemZ/cond-store-09.ll
new file mode 100644
index 0000000..bf7a8b8
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/cond-store-09.ll
@@ -0,0 +1,142 @@
+; Test STOCFHs that are presented as selects.
+; See comments in asm-18.ll about testing high-word operations.
+;
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: -no-integrated-as | FileCheck %s
+
+declare void @foo(i32 *)
+
+; Test the simple case, with the loaded value first.
+define void @f1(i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+ %alt = call i32 asm "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhl [[REG]], 0(%r2)
+; CHECK: br %r14
+ %alt = call i32 asm "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %alt, i32 %orig
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the high end of the aligned STOC range.
+define void @f3(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], 524284(%r2)
+; CHECK: br %r14
+ %alt = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 131071
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the next word up. Other sequences besides this one would be OK.
+define void @f4(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: agfi %r2, 524288
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+ %alt = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 131072
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the low end of the STOC range.
+define void @f5(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], -524288(%r2)
+; CHECK: br %r14
+ %alt = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 -131072
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Check the next word down, with the same comments as f8.
+define void @f6(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: agfi %r2, -524292
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+ %alt = call i32 asm "stepa $0", "=h"()
+ %ptr = getelementptr i32, i32 *%base, i64 -131073
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ ret void
+}
+
+; Try a frame index base.
+define void @f7(i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: stocfhhe [[REG]], {{[0-9]+}}(%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+ %ptr = alloca i32
+ call void @foo(i32 *%ptr)
+ %alt = call i32 asm "stepa $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %orig = load i32, i32 *%ptr
+ %res = select i1 %cond, i32 %orig, i32 %alt
+ store i32 %res, i32 *%ptr
+ call void @foo(i32 *%ptr)
+ ret void
+}
+
+; Test that conditionally-executed stores do not use STOC, since STOC
+; is allowed to trap even when the condition is false.
+define void @f8(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f8:
+; CHECK-NOT: stoc
+; CHECK: stfh
+; CHECK: br %r14
+entry:
+ %val = call i32 asm "stepa $0", "=h"()
+ %cmp = icmp ule i32 %a, %b
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 %val, i32 *%dest
+ br label %exit
+
+exit:
+ ret void
+}