[SystemZ] Improve use of conditional instructions

This patch moves formation of LOC-type instructions from (late)
IfConversion to the early if-conversion pass, and in some cases
additionally creates them directly from select instructions
during DAG instruction selection.

To make early if-conversion work, the patch implements the
canInsertSelect / insertSelect callbacks.  It also implements
the commuteInstructionImpl and FoldImmediate callbacks to
enable generation of the full range of LOC instructions.

Finally, the patch adds support for all instructions of the
load-store-on-condition-2 facility, which allows using LOC
instructions also for high registers.

Due to the use of the GRX32 register class to enable high registers,
we now also have to handle the cases where there are still no single
hardware instructions (conditional move from a low register to a high
register or vice versa).  These are converted back to a branch sequence
after register allocation.  Since the expandRAPseudos callback is not
allowed to create new basic blocks, this requires a simple new pass,
modelled after the ARM/AArch64 ExpandPseudos pass.

Overall, this patch causes significantly more LOC-type instructions
to be used, and results in a measurable performance improvement.

llvm-svn: 288028
diff --git a/llvm/test/CodeGen/SystemZ/cond-li.ll b/llvm/test/CodeGen/SystemZ/cond-li.ll
deleted file mode 100644
index a3e2f3f..0000000
--- a/llvm/test/CodeGen/SystemZ/cond-li.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; Test LOCHI/LOCGHI
-;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-
-; CHECK-LABEL: bar1:
-; CHECK: lhi [[REG:%r[0-5]]], 42
-; CHECK: chi %r2, 0
-; CHECK: lochie [[REG]], 0
-define signext i32 @bar1(i32 signext %x) {
-  %cmp = icmp ne i32 %x, 0
-  %.x = select i1 %cmp, i32 42, i32 0
-  ret i32 %.x
-}
-
-; CHECK-LABEL: bar2:
-; CHECK: ltgr [[REG:%r[0-5]]], %r2
-; CHECK: lghi %r2, 42
-; CHECK: locghie %r2, 0
-define signext i64 @bar2(i64 signext %x) {
-  %cmp = icmp ne i64 %x, 0
-  %.x = select i1 %cmp, i64 42, i64 0
-  ret i64 %.x
-}
diff --git a/llvm/test/CodeGen/SystemZ/cond-load-01.ll b/llvm/test/CodeGen/SystemZ/cond-load-01.ll
index d10551f..c7ec410 100644
--- a/llvm/test/CodeGen/SystemZ/cond-load-01.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-load-01.ll
@@ -2,6 +2,10 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
 
+; Run the test again to make sure it still works the same even
+; in the presence of the load-store-on-condition-2 facility.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
 declare i32 @foo(i32 *)
 
 ; Test the simple case.
diff --git a/llvm/test/CodeGen/SystemZ/cond-load-03.ll b/llvm/test/CodeGen/SystemZ/cond-load-03.ll
new file mode 100644
index 0000000..4cce92e
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/cond-load-03.ll
@@ -0,0 +1,159 @@
+; Test LOCFH.  See comments in asm-18.ll about testing high-word operations.
+;
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   -no-integrated-as | FileCheck %s
+
+declare void @foo(i32 *)
+
+; Test the simple case.
+define void @f1(i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+  %easy = call i32 asm "stepa $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  call void asm sideeffect "stepb $0", "h"(i32 %res)
+  ret void
+}
+
+; ...and again with the operands swapped.
+define void @f2(i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhl [[REG]], 0(%r2)
+; CHECK: br %r14
+  %easy = call i32 asm "stepa $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %other, i32 %easy
+  call void asm sideeffect "stepb $0", "h"(i32 %res)
+  ret void
+}
+
+; Check the high end of the aligned LOC range.
+define void @f3(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], 524284(%r2)
+; CHECK: br %r14
+  %easy = call i32 asm "stepa $0", "=h"()
+  %ptr = getelementptr i32, i32 *%base, i64 131071
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  call void asm sideeffect "stepb $0", "h"(i32 %res)
+  ret void
+}
+
+; Check the next word up.  Other sequences besides this one would be OK.
+define void @f4(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: agfi %r2, 524288
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+  %easy = call i32 asm "stepa $0", "=h"()
+  %ptr = getelementptr i32, i32 *%base, i64 131072
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  call void asm sideeffect "stepb $0", "h"(i32 %res)
+  ret void
+}
+
+; Check the low end of the LOC range.
+define void @f5(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], -524288(%r2)
+; CHECK: br %r14
+  %easy = call i32 asm "stepa $0", "=h"()
+  %ptr = getelementptr i32, i32 *%base, i64 -131072
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  call void asm sideeffect "stepb $0", "h"(i32 %res)
+  ret void
+}
+
+; Check the next word down, with the same comments as f4.
+define void @f6(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK-DAG: agfi %r2, -524292
+; CHECK-DAG: clfi %r3, 42
+; CHECK: locfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+  %easy = call i32 asm "stepa $0", "=h"()
+  %ptr = getelementptr i32, i32 *%base, i64 -131073
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  call void asm sideeffect "stepb $0", "h"(i32 %res)
+  ret void
+}
+
+; Try a frame index base.
+define void @f7(i32 %alt, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: locfhhe [[REG]], {{[0-9]+}}(%r15)
+; CHECK: br %r14
+  %ptr = alloca i32
+  call void @foo(i32 *%ptr)
+  %easy = call i32 asm "stepa $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  call void asm sideeffect "stepb $0", "h"(i32 %res)
+  ret void
+}
+
+; Try a case when an index is involved.
+define void @f8(i32 %limit, i64 %base, i64 %index) {
+; CHECK-LABEL: f8:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 42
+; CHECK: locfhhe [[REG]], 0({{%r[1-5]}})
+; CHECK: br %r14
+  %easy = call i32 asm "stepa $0", "=h"()
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i32 *
+  %cond = icmp ult i32 %limit, 42
+  %other = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %easy, i32 %other
+  call void asm sideeffect "stepb $0", "h"(i32 %res)
+  ret void
+}
+
+; Test that conditionally-executed loads do not use LOC, since it is allowed
+; to trap even when the condition is false.
+define void @f9(i32 %limit, i32 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: loc
+; CHECK: lfh
+; CHECK: br %r14
+entry:
+  %easy = call i32 asm "stepa $0", "=h"()
+  %cmp = icmp ule i32 %easy, %limit
+  br i1 %cmp, label %load, label %exit
+
+load:
+  %other = load i32, i32 *%ptr
+  br label %exit
+
+exit:
+  %res = phi i32 [ %easy, %entry ], [ %other, %load ]
+  call void asm sideeffect "stepb $0", "h"(i32 %res)
+  ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-01.ll b/llvm/test/CodeGen/SystemZ/cond-move-01.ll
index 088dee0..0be81c3 100644
--- a/llvm/test/CodeGen/SystemZ/cond-move-01.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-move-01.ll
@@ -1,6 +1,10 @@
 ; Test LOCR and LOCGR.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 -verify-machineinstrs | FileCheck %s
+;
+; Run the test again to make sure it still works the same even
+; in the presence of the load-store-on-condition-2 facility.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
 
 ; Test LOCR.
 define i32 @f1(i32 %a, i32 %b, i32 %limit) {
@@ -46,3 +50,76 @@
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
 }
+
+; Check that we also get LOCR as a result of early if-conversion.
+define i32 @f5(i32 %a, i32 %b, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK: clfi %r4, 41
+; CHECK: locrh %r2, %r3
+; CHECK: br %r14
+entry:
+  %cond = icmp ult i32 %limit, 42
+  br i1 %cond, label %if.then, label %return
+
+if.then:
+  br label %return
+
+return:
+  %res = phi i32 [ %a, %if.then ], [ %b, %entry ]
+  ret i32 %res
+}
+
+; ... and likewise for LOCGR.
+define i64 @f6(i64 %a, i64 %b, i64 %limit) {
+; CHECK-LABEL: f6:
+; CHECK: clgfi %r4, 41
+; CHECK: locgrh %r2, %r3
+; CHECK: br %r14
+entry:
+  %cond = icmp ult i64 %limit, 42
+  br i1 %cond, label %if.then, label %return
+
+if.then:
+  br label %return
+
+return:
+  %res = phi i64 [ %a, %if.then ], [ %b, %entry ]
+  ret i64 %res
+}
+
+; Check that inverting the condition works as well.
+define i32 @f7(i32 %a, i32 %b, i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: clfi %r4, 41
+; CHECK: locrle %r2, %r3
+; CHECK: br %r14
+entry:
+  %cond = icmp ult i32 %limit, 42
+  br i1 %cond, label %if.then, label %return
+
+if.then:
+  br label %return
+
+return:
+  %res = phi i32 [ %b, %if.then ], [ %a, %entry ]
+  ret i32 %res
+}
+
+; ... and likewise for LOCGR.
+define i64 @f8(i64 %a, i64 %b, i64 %limit) {
+; CHECK-LABEL: f8:
+; CHECK: clgfi %r4, 41
+; CHECK: locgrle %r2, %r3
+; CHECK: br %r14
+entry:
+  %cond = icmp ult i64 %limit, 42
+  br i1 %cond, label %if.then, label %return
+
+if.then:
+  br label %return
+
+return:
+  %res = phi i64 [ %b, %if.then ], [ %a, %entry ]
+  ret i64 %res
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-02.ll b/llvm/test/CodeGen/SystemZ/cond-move-02.ll
new file mode 100644
index 0000000..2e2bacd
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/cond-move-02.ll
@@ -0,0 +1,138 @@
+; Test LOCHI and LOCGHI.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
+
+define i32 @f1(i32 %x) {
+; CHECK-LABEL: f1:
+; CHECK: lhi [[REG:%r[0-5]]], 0
+; CHECK: chi %r2, 0
+; CHECK: lochilh [[REG]], 42
+; CHECK: br %r14
+  %cond = icmp ne i32 %x, 0
+  %res = select i1 %cond, i32 42, i32 0
+  ret i32 %res
+}
+
+define i32 @f2(i32 %x, i32 %y) {
+; CHECK-LABEL: f2:
+; CHECK: chi %r2, 0
+; CHECK: lochilh %r3, 42
+; CHECK: br %r14
+  %cond = icmp ne i32 %x, 0
+  %res = select i1 %cond, i32 42, i32 %y
+  ret i32 %res
+}
+
+define i32 @f3(i32 %x, i32 %y) {
+; CHECK-LABEL: f3:
+; CHECK: chi %r2, 0
+; CHECK: lochie %r3, 42
+; CHECK: br %r14
+  %cond = icmp ne i32 %x, 0
+  %res = select i1 %cond, i32 %y, i32 42
+  ret i32 %res
+}
+
+define i64 @f4(i64 %x) {
+; CHECK-LABEL: f4:
+; CHECK: lghi [[REG:%r[0-5]]], 0
+; CHECK: cghi %r2, 0
+; CHECK: locghilh [[REG]], 42
+; CHECK: br %r14
+  %cond = icmp ne i64 %x, 0
+  %res = select i1 %cond, i64 42, i64 0
+  ret i64 %res
+}
+
+define i64 @f5(i64 %x, i64 %y) {
+; CHECK-LABEL: f5:
+; CHECK: cghi %r2, 0
+; CHECK: locghilh %r3, 42
+; CHECK: br %r14
+  %cond = icmp ne i64 %x, 0
+  %res = select i1 %cond, i64 42, i64 %y
+  ret i64 %res
+}
+
+define i64 @f6(i64 %x, i64 %y) {
+; CHECK-LABEL: f6:
+; CHECK: cghi %r2, 0
+; CHECK: locghie %r3, 42
+; CHECK: br %r14
+  %cond = icmp ne i64 %x, 0
+  %res = select i1 %cond, i64 %y, i64 42
+  ret i64 %res
+}
+
+; Check that we also get LOCHI as a result of early if-conversion.
+define i32 @f7(i32 %x, i32 %y) {
+; CHECK-LABEL: f7:
+; CHECK: chi %r2, 0
+; CHECK: lochie %r3, 42
+; CHECK: br %r14
+entry:
+  %cond = icmp ne i32 %x, 0
+  br i1 %cond, label %if.then, label %return
+
+if.then:
+  br label %return
+
+return:
+  %res = phi i32 [ %y, %if.then ], [ 42, %entry ]
+  ret i32 %res
+}
+
+; ... and the same for LOCGHI.
+define i64 @f8(i64 %x, i64 %y) {
+; CHECK-LABEL: f8:
+; CHECK: cghi %r2, 0
+; CHECK: locghie %r3, 42
+; CHECK: br %r14
+entry:
+  %cond = icmp ne i64 %x, 0
+  br i1 %cond, label %if.then, label %return
+
+if.then:
+  br label %return
+
+return:
+  %res = phi i64 [ %y, %if.then ], [ 42, %entry ]
+  ret i64 %res
+}
+
+; Check that inverting the condition works as well.
+define i32 @f9(i32 %x, i32 %y) {
+; CHECK-LABEL: f9:
+; CHECK: chi %r2, 0
+; CHECK: lochilh %r3, 42
+; CHECK: br %r14
+entry:
+  %cond = icmp ne i32 %x, 0
+  br i1 %cond, label %if.then, label %return
+
+if.then:
+  br label %return
+
+return:
+  %res = phi i32 [ 42, %if.then ], [ %y, %entry ]
+  ret i32 %res
+}
+
+; ... and the same for LOCGHI.
+define i64 @f10(i64 %x, i64 %y) {
+; CHECK-LABEL: f10:
+; CHECK: cghi %r2, 0
+; CHECK: locghilh %r3, 42
+; CHECK: br %r14
+entry:
+  %cond = icmp ne i64 %x, 0
+  br i1 %cond, label %if.then, label %return
+
+if.then:
+  br label %return
+
+return:
+  %res = phi i64 [ 42, %if.then ], [ %y, %entry ]
+  ret i64 %res
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/cond-move-03.ll b/llvm/test/CodeGen/SystemZ/cond-move-03.ll
new file mode 100644
index 0000000..a9bf1c8
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/cond-move-03.ll
@@ -0,0 +1,213 @@
+; Test LOCFHR and LOCHHI.
+; See comments in asm-18.ll about testing high-word operations.
+;
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   -no-integrated-as | FileCheck %s
+
+define void @f1(i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 42
+; CHECK: locfhrl [[REG2]], [[REG1]]
+; CHECK: stepc [[REG2]]
+; CHECK: br %r14
+  %a = call i32 asm sideeffect "stepa $0", "=h"()
+  %b = call i32 asm sideeffect "stepb $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  %res = select i1 %cond, i32 %a, i32 %b
+  call void asm sideeffect "stepc $0", "h"(i32 %res)
+  ret void
+}
+
+; FIXME: We should commute the LOCRMux to save one move.
+define void @f2(i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42,
+; CHECK: risblg [[REG2]], [[REG1]], 0, 159, 32
+; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: stepc [[REG1]]
+; CHECK: br %r14
+  %dummy = call i32 asm sideeffect "dummy $0", "=h"()
+  %a = call i32 asm sideeffect "stepa $0", "=h"()
+  %b = call i32 asm sideeffect "stepb $0", "=r"()
+  %cond = icmp ult i32 %limit, 42
+  %res = select i1 %cond, i32 %a, i32 %b
+  call void asm sideeffect "stepc $0", "h"(i32 %res)
+  call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+  ret void
+}
+
+define void @f3(i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: stepa [[REG2:%r[0-5]]]
+; CHECK-DAG: stepb [[REG1:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42,
+; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: stepc [[REG1]]
+; CHECK: br %r14
+  %dummy = call i32 asm sideeffect "dummy $0", "=h"()
+  %a = call i32 asm sideeffect "stepa $0", "=r"()
+  %b = call i32 asm sideeffect "stepb $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  %res = select i1 %cond, i32 %a, i32 %b
+  call void asm sideeffect "stepc $0", "h"(i32 %res)
+  call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+  ret void
+}
+
+; FIXME: We should commute the LOCRMux to save one move.
+define void @f4(i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42,
+; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32
+; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: stepc [[REG1]]
+; CHECK: br %r14
+  %dummy = call i32 asm sideeffect "dummy $0", "=h"()
+  %a = call i32 asm sideeffect "stepa $0", "=r"()
+  %b = call i32 asm sideeffect "stepb $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  %res = select i1 %cond, i32 %a, i32 %b
+  call void asm sideeffect "stepc $0", "r"(i32 %res)
+  call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+  ret void
+}
+
+define void @f5(i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: stepa [[REG2:%r[0-5]]]
+; CHECK-DAG: stepb [[REG1:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42,
+; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: stepc [[REG1]]
+; CHECK: br %r14
+  %dummy = call i32 asm sideeffect "dummy $0", "=h"()
+  %a = call i32 asm sideeffect "stepa $0", "=h"()
+  %b = call i32 asm sideeffect "stepb $0", "=r"()
+  %cond = icmp ult i32 %limit, 42
+  %res = select i1 %cond, i32 %a, i32 %b
+  call void asm sideeffect "stepc $0", "r"(i32 %res)
+  call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+  ret void
+}
+
+; Check that we also get LOCFHR as a result of early if-conversion.
+define void @f6(i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: locfhrle [[REG2]], [[REG1]]
+; CHECK: stepc [[REG2]]
+; CHECK: br %r14
+entry:
+  %a = call i32 asm sideeffect "stepa $0", "=h"()
+  %b = call i32 asm sideeffect "stepb $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  br i1 %cond, label %if.then, label %return
+
+if.then:
+  br label %return
+
+return:
+  %res = phi i32 [ %a, %if.then ], [ %b, %entry ]
+  call void asm sideeffect "stepc $0", "h"(i32 %res)
+  ret void
+}
+
+; Check that inverting the condition works as well.
+define void @f7(i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: locfhrh [[REG2]], [[REG1]]
+; CHECK: stepc [[REG2]]
+; CHECK: br %r14
+entry:
+  %a = call i32 asm sideeffect "stepa $0", "=h"()
+  %b = call i32 asm sideeffect "stepb $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  br i1 %cond, label %if.then, label %return
+
+if.then:
+  br label %return
+
+return:
+  %res = phi i32 [ %b, %if.then ], [ %a, %entry ]
+  call void asm sideeffect "stepc $0", "h"(i32 %res)
+  ret void
+}
+
+define void @f8(i32 %limit) {
+; CHECK-LABEL: f8:
+; CHECK: clfi %r2, 42
+; CHECK: lochhil [[REG:%r[0-5]]], 32767
+; CHECK: stepa [[REG]]
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %res = select i1 %cond, i32 32767, i32 0
+  call void asm sideeffect "stepa $0", "h"(i32 %res)
+  ret void
+}
+
+define void @f9(i32 %limit) {
+; CHECK-LABEL: f9:
+; CHECK: clfi %r2, 42
+; CHECK: lochhil [[REG:%r[0-5]]], -32768
+; CHECK: stepa [[REG]]
+; CHECK: br %r14
+  %cond = icmp ult i32 %limit, 42
+  %res = select i1 %cond, i32 -32768, i32 0
+  call void asm sideeffect "stepa $0", "h"(i32 %res)
+  ret void
+}
+
+; Check that we also get LOCHHI as a result of early if-conversion.
+define void @f10(i32 %limit) {
+; CHECK-LABEL: f10:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: lochhile [[REG]], 123
+; CHECK: stepb [[REG]]
+; CHECK: br %r14
+entry:
+  %a = call i32 asm sideeffect "stepa $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  br i1 %cond, label %if.then, label %return
+
+if.then:
+  br label %return
+
+return:
+  %res = phi i32 [ 123, %if.then ], [ %a, %entry ]
+  call void asm sideeffect "stepb $0", "h"(i32 %res)
+  ret void
+}
+
+; Check that inverting the condition works as well.
+define void @f11(i32 %limit) {
+; CHECK-LABEL: f11:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: lochhih [[REG]], 123
+; CHECK: stepb [[REG]]
+; CHECK: br %r14
+entry:
+  %a = call i32 asm sideeffect "stepa $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  br i1 %cond, label %if.then, label %return
+
+if.then:
+  br label %return
+
+return:
+  %res = phi i32 [ %a, %if.then ], [ 123, %entry ]
+  call void asm sideeffect "stepb $0", "h"(i32 %res)
+  ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-07.ll b/llvm/test/CodeGen/SystemZ/cond-store-07.ll
index 35b1303..79b4f87 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-07.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-07.ll
@@ -2,6 +2,10 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
 
+; Run the test again to make sure it still works the same even
+; in the presence of the load-store-on-condition-2 facility.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
 declare void @foo(i32 *)
 
 ; Test the simple case, with the loaded value first.
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-09.ll b/llvm/test/CodeGen/SystemZ/cond-store-09.ll
new file mode 100644
index 0000000..bf7a8b8
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/cond-store-09.ll
@@ -0,0 +1,142 @@
+; Test STOCFHs that are presented as selects.
+; See comments in asm-18.ll about testing high-word operations.
+;
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   -no-integrated-as | FileCheck %s
+
+declare void @foo(i32 *)
+
+; Test the simple case, with the loaded value first.
+define void @f1(i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+  %alt = call i32 asm "stepa $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; ...and with the loaded value second
+define void @f2(i32 *%ptr, i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhl [[REG]], 0(%r2)
+; CHECK: br %r14
+  %alt = call i32 asm "stepa $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %alt, i32 %orig
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STOC range.
+define void @f3(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], 524284(%r2)
+; CHECK: br %r14
+  %alt = call i32 asm "stepa $0", "=h"()
+  %ptr = getelementptr i32, i32 *%base, i64 131071
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check the next word up.  Other sequences besides this one would be OK.
+define void @f4(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: agfi %r2, 524288
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+  %alt = call i32 asm "stepa $0", "=h"()
+  %ptr = getelementptr i32, i32 *%base, i64 131072
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check the low end of the STOC range.
+define void @f5(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], -524288(%r2)
+; CHECK: br %r14
+  %alt = call i32 asm "stepa $0", "=h"()
+  %ptr = getelementptr i32, i32 *%base, i64 -131072
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Check the next word down, with the same comments as f8.
+define void @f6(i32 *%base, i32 %limit) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: stepa [[REG:%r[0-5]]]
+; CHECK-DAG: agfi %r2, -524292
+; CHECK-DAG: clfi %r3, 42
+; CHECK: stocfhhe [[REG]], 0(%r2)
+; CHECK: br %r14
+  %alt = call i32 asm "stepa $0", "=h"()
+  %ptr = getelementptr i32, i32 *%base, i64 -131073
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  ret void
+}
+
+; Try a frame index base.
+define void @f7(i32 %limit) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: stepa [[REG:%r[0-5]]]
+; CHECK: stocfhhe [[REG]], {{[0-9]+}}(%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+  %ptr = alloca i32
+  call void @foo(i32 *%ptr)
+  %alt = call i32 asm "stepa $0", "=h"()
+  %cond = icmp ult i32 %limit, 42
+  %orig = load i32, i32 *%ptr
+  %res = select i1 %cond, i32 %orig, i32 %alt
+  store i32 %res, i32 *%ptr
+  call void @foo(i32 *%ptr)
+  ret void
+}
+
+; Test that conditionally-executed stores do not use STOC, since STOC
+; is allowed to trap even when the condition is false.
+define void @f8(i32 %a, i32 %b, i32 *%dest) {
+; CHECK-LABEL: f8:
+; CHECK-NOT: stoc
+; CHECK: stfh
+; CHECK: br %r14
+entry:
+  %val = call i32 asm "stepa $0", "=h"()
+  %cmp = icmp ule i32 %a, %b
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 %val, i32 *%dest
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/llvm/test/MC/Disassembler/SystemZ/insns-z13.txt b/llvm/test/MC/Disassembler/SystemZ/insns-z13.txt
index b9d66572..5a98386 100644
--- a/llvm/test/MC/Disassembler/SystemZ/insns-z13.txt
+++ b/llvm/test/MC/Disassembler/SystemZ/insns-z13.txt
@@ -4414,3 +4414,196 @@
 
 #CHECK: locghi %r11, 32512, 15
 0xec 0xbf 0x7f 0x00 0x00 0x46
+
+#CHECK: lochhi %r11, 42, 0
+0xec 0xb0 0x00 0x2a 0x00 0x4e
+
+#CHECK: lochhio %r11, 42
+0xec 0xb1 0x00 0x2a 0x00 0x4e
+
+#CHECK: lochhih %r11, 42
+0xec 0xb2 0x00 0x2a 0x00 0x4e
+
+#CHECK: lochhinle %r11, 42
+0xec 0xb3 0x00 0x2a 0x00 0x4e
+
+#CHECK: lochhil %r11, -1
+0xec 0xb4 0xff 0xff 0x00 0x4e
+
+#CHECK: lochhinhe %r11, 42
+0xec 0xb5 0x00 0x2a 0x00 0x4e
+
+#CHECK: lochhilh %r11, -1
+0xec 0xb6 0xff 0xff 0x00 0x4e
+
+#CHECK: lochhine %r11, 0
+0xec 0xb7 0x00 0x00 0x00 0x4e
+
+#CHECK: lochhie %r11, 0
+0xec 0xb8 0x00 0x00 0x00 0x4e
+
+#CHECK: lochhinlh %r11, 42
+0xec 0xb9 0x00 0x2a 0x00 0x4e
+
+#CHECK: lochhihe %r11, 255
+0xec 0xba 0x00 0xff 0x00 0x4e
+
+#CHECK: lochhinl %r11, 255
+0xec 0xbb 0x00 0xff 0x00 0x4e
+
+#CHECK: lochhile %r11, 32767
+0xec 0xbc 0x7f 0xff 0x00 0x4e
+
+#CHECK: lochhinh %r11, 32767
+0xec 0xbd 0x7f 0xff 0x00 0x4e
+
+#CHECK: lochhino %r11, 32512
+0xec 0xbe 0x7f 0x00 0x00 0x4e
+
+#CHECK: lochhi %r11, 32512, 15
+0xec 0xbf 0x7f 0x00 0x00 0x4e
+
+# CHECK: locfh %r7, 6399(%r8), 0
+0xeb 0x70 0x88 0xff 0x01 0xe0
+
+# CHECK: locfho %r7, 6399(%r8)
+0xeb 0x71 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhh %r7, 6399(%r8)
+0xeb 0x72 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhnle %r7, 6399(%r8)
+0xeb 0x73 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhl %r7, 6399(%r8)
+0xeb 0x74 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhnhe %r7, 6399(%r8)
+0xeb 0x75 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhlh %r7, 6399(%r8)
+0xeb 0x76 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhne %r7, 6399(%r8)
+0xeb 0x77 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhe %r7, 6399(%r8)
+0xeb 0x78 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhnlh %r7, 6399(%r8)
+0xeb 0x79 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhhe %r7, 6399(%r8)
+0xeb 0x7a 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhnl %r7, 6399(%r8)
+0xeb 0x7b 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhle %r7, 6399(%r8)
+0xeb 0x7c 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhnh %r7, 6399(%r8)
+0xeb 0x7d 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhno %r7, 6399(%r8)
+0xeb 0x7e 0x88 0xff 0x01 0xe0
+
+# CHECK: locfh %r7, 6399(%r8), 15
+0xeb 0x7f 0x88 0xff 0x01 0xe0
+
+# CHECK: locfhr %r11, %r3, 0
+0xb9 0xe0 0x00 0xb3
+
+# CHECK: locfhro %r11, %r3
+0xb9 0xe0 0x10 0xb3
+
+# CHECK: locfhrh %r11, %r3
+0xb9 0xe0 0x20 0xb3
+
+# CHECK: locfhrnle %r11, %r3
+0xb9 0xe0 0x30 0xb3
+
+# CHECK: locfhrl %r11, %r3
+0xb9 0xe0 0x40 0xb3
+
+# CHECK: locfhrnhe %r11, %r3
+0xb9 0xe0 0x50 0xb3
+
+# CHECK: locfhrlh %r11, %r3
+0xb9 0xe0 0x60 0xb3
+
+# CHECK: locfhrne %r11, %r3
+0xb9 0xe0 0x70 0xb3
+
+# CHECK: locfhre %r11, %r3
+0xb9 0xe0 0x80 0xb3
+
+# CHECK: locfhrnlh %r11, %r3
+0xb9 0xe0 0x90 0xb3
+
+# CHECK: locfhrhe %r11, %r3
+0xb9 0xe0 0xa0 0xb3
+
+# CHECK: locfhrnl %r11, %r3
+0xb9 0xe0 0xb0 0xb3
+
+# CHECK: locfhrle %r11, %r3
+0xb9 0xe0 0xc0 0xb3
+
+# CHECK: locfhrnh %r11, %r3
+0xb9 0xe0 0xd0 0xb3
+
+# CHECK: locfhrno %r11, %r3
+0xb9 0xe0 0xe0 0xb3
+
+# CHECK: locfhr %r11, %r3, 15
+0xb9 0xe0 0xf0 0xb3
+
+# CHECK: stocfh %r1, 2(%r3), 0
+0xeb 0x10 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfho %r1, 2(%r3)
+0xeb 0x11 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhh %r1, 2(%r3)
+0xeb 0x12 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhnle %r1, 2(%r3)
+0xeb 0x13 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhl %r1, 2(%r3)
+0xeb 0x14 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhnhe %r1, 2(%r3)
+0xeb 0x15 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhlh %r1, 2(%r3)
+0xeb 0x16 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhne %r1, 2(%r3)
+0xeb 0x17 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhe %r1, 2(%r3)
+0xeb 0x18 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhnlh %r1, 2(%r3)
+0xeb 0x19 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhhe %r1, 2(%r3)
+0xeb 0x1a 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhnl %r1, 2(%r3)
+0xeb 0x1b 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhle %r1, 2(%r3)
+0xeb 0x1c 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhnh %r1, 2(%r3)
+0xeb 0x1d 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfhno %r1, 2(%r3)
+0xeb 0x1e 0x30 0x02 0x00 0xe1
+
+# CHECK: stocfh %r1, 2(%r3), 15
+0xeb 0x1f 0x30 0x02 0x00 0xe1
+
diff --git a/llvm/test/MC/SystemZ/insn-bad-z13.s b/llvm/test/MC/SystemZ/insn-bad-z13.s
index 87f1ce8..db2de11 100644
--- a/llvm/test/MC/SystemZ/insn-bad-z13.s
+++ b/llvm/test/MC/SystemZ/insn-bad-z13.s
@@ -1960,3 +1960,56 @@
         locghie	%f0, 0
         locghie	0, %r0        
         
+#CHECK: error: invalid operand
+#CHECK: lochhie	%r0, 66000
+#CHECK: error: invalid operand
+#CHECK: lochhie	%f0, 0
+#CHECK: error: invalid operand
+#CHECK: lochhie	0, %r0
+
+        lochhie	%r0, 66000
+        lochhie	%f0, 0
+        lochhie	0, %r0
+
+#CHECK: error: invalid operand
+#CHECK: locfh	%r0,0,-1
+#CHECK: error: invalid operand
+#CHECK: locfh	%r0,0,16
+#CHECK: error: invalid operand
+#CHECK: locfh	%r0,-524289,1
+#CHECK: error: invalid operand
+#CHECK: locfh	%r0,524288,1
+#CHECK: error: invalid use of indexed addressing
+#CHECK: locfh	%r0,0(%r1,%r2),1
+
+	locfh	%r0,0,-1
+	locfh	%r0,0,16
+	locfh	%r0,-524289,1
+	locfh	%r0,524288,1
+	locfh	%r0,0(%r1,%r2),1
+
+#CHECK: error: invalid operand
+#CHECK: locfhr	%r0,%r0,-1
+#CHECK: error: invalid operand
+#CHECK: locfhr	%r0,%r0,16
+
+	locfhr	%r0,%r0,-1
+	locfhr	%r0,%r0,16
+
+#CHECK: error: invalid operand
+#CHECK: stocfh	%r0,0,-1
+#CHECK: error: invalid operand
+#CHECK: stocfh	%r0,0,16
+#CHECK: error: invalid operand
+#CHECK: stocfh	%r0,-524289,1
+#CHECK: error: invalid operand
+#CHECK: stocfh	%r0,524288,1
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stocfh	%r0,0(%r1,%r2),1
+
+	stocfh	%r0,0,-1
+	stocfh	%r0,0,16
+	stocfh	%r0,-524289,1
+	stocfh	%r0,524288,1
+	stocfh	%r0,0(%r1,%r2),1
+
diff --git a/llvm/test/MC/SystemZ/insn-good-z13.s b/llvm/test/MC/SystemZ/insn-good-z13.s
index 73f3075..4fd6a66 100644
--- a/llvm/test/MC/SystemZ/insn-good-z13.s
+++ b/llvm/test/MC/SystemZ/insn-good-z13.s
@@ -6892,3 +6892,206 @@
         locghinh %r11, 32767
         locghino %r11, 32512
         locghi %r11, 32512, 15
+
+#CHECK: lochhi  %r11, 42, 0    # encoding: [0xec,0xb0,0x00,0x2a,0x00,0x4e]
+#CHECK: lochhio %r11, 42       # encoding: [0xec,0xb1,0x00,0x2a,0x00,0x4e]
+#CHECK: lochhih %r11, 42       # encoding: [0xec,0xb2,0x00,0x2a,0x00,0x4e]
+#CHECK: lochhinle %r11, 42     # encoding: [0xec,0xb3,0x00,0x2a,0x00,0x4e]
+#CHECK: lochhil %r11, -1       # encoding: [0xec,0xb4,0xff,0xff,0x00,0x4e]
+#CHECK: lochhinhe %r11, 42     # encoding: [0xec,0xb5,0x00,0x2a,0x00,0x4e]
+#CHECK: lochhilh %r11, -1      # encoding: [0xec,0xb6,0xff,0xff,0x00,0x4e]
+#CHECK: lochhine %r11, 0       # encoding: [0xec,0xb7,0x00,0x00,0x00,0x4e]
+#CHECK: lochhie %r11, 0        # encoding: [0xec,0xb8,0x00,0x00,0x00,0x4e]
+#CHECK: lochhinlh %r11, 42     # encoding: [0xec,0xb9,0x00,0x2a,0x00,0x4e]
+#CHECK: lochhihe %r11, 255     # encoding: [0xec,0xba,0x00,0xff,0x00,0x4e]
+#CHECK: lochhinl %r11, 255     # encoding: [0xec,0xbb,0x00,0xff,0x00,0x4e]
+#CHECK: lochhile %r11, 32767   # encoding: [0xec,0xbc,0x7f,0xff,0x00,0x4e]
+#CHECK: lochhinh %r11, 32767   # encoding: [0xec,0xbd,0x7f,0xff,0x00,0x4e]
+#CHECK: lochhino %r11, 32512   # encoding: [0xec,0xbe,0x7f,0x00,0x00,0x4e]
+#CHECK: lochhi %r11, 32512, 15 # encoding: [0xec,0xbf,0x7f,0x00,0x00,0x4e]
+
+        lochhi  %r11, 42, 0
+        lochhio %r11, 42
+        lochhih %r11, 42
+        lochhinle %r11, 42
+        lochhil %r11, -1
+        lochhinhe %r11, 42
+        lochhilh %r11, -1
+        lochhine %r11, 0
+        lochhie %r11, 0
+        lochhinlh %r11, 42
+        lochhihe %r11, 255
+        lochhinl %r11, 255
+        lochhile %r11, 32767
+        lochhinh %r11, 32767
+        lochhino %r11, 32512
+        lochhi %r11, 32512, 15
+
+#CHECK: locfh	%r0, 0, 0               # encoding: [0xeb,0x00,0x00,0x00,0x00,0xe0]
+#CHECK: locfh	%r0, 0, 15              # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xe0]
+#CHECK: locfh	%r0, -524288, 0         # encoding: [0xeb,0x00,0x00,0x00,0x80,0xe0]
+#CHECK: locfh	%r0, 524287, 0          # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xe0]
+#CHECK: locfh	%r0, 0(%r1), 0          # encoding: [0xeb,0x00,0x10,0x00,0x00,0xe0]
+#CHECK: locfh	%r0, 0(%r15), 0         # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xe0]
+#CHECK: locfh	%r15, 0, 0              # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xe0]
+#CHECK: locfh	%r1, 4095(%r2), 3       # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xe0]
+
+	locfh	%r0,0,0
+	locfh	%r0,0,15
+	locfh	%r0,-524288,0
+	locfh	%r0,524287,0
+	locfh	%r0,0(%r1),0
+	locfh	%r0,0(%r15),0
+	locfh	%r15,0,0
+	locfh	%r1,4095(%r2),3
+
+#CHECK: locfho   %r1, 2(%r3)            # encoding: [0xeb,0x11,0x30,0x02,0x00,0xe0]
+#CHECK: locfhh   %r1, 2(%r3)            # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe0]
+#CHECK: locfhp   %r1, 2(%r3)            # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnle %r1, 2(%r3)            # encoding: [0xeb,0x13,0x30,0x02,0x00,0xe0]
+#CHECK: locfhl   %r1, 2(%r3)            # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe0]
+#CHECK: locfhm   %r1, 2(%r3)            # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnhe %r1, 2(%r3)            # encoding: [0xeb,0x15,0x30,0x02,0x00,0xe0]
+#CHECK: locfhlh  %r1, 2(%r3)            # encoding: [0xeb,0x16,0x30,0x02,0x00,0xe0]
+#CHECK: locfhne  %r1, 2(%r3)            # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnz  %r1, 2(%r3)            # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe0]
+#CHECK: locfhe   %r1, 2(%r3)            # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe0]
+#CHECK: locfhz   %r1, 2(%r3)            # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnlh %r1, 2(%r3)            # encoding: [0xeb,0x19,0x30,0x02,0x00,0xe0]
+#CHECK: locfhhe  %r1, 2(%r3)            # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnl  %r1, 2(%r3)            # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnm  %r1, 2(%r3)            # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe0]
+#CHECK: locfhle  %r1, 2(%r3)            # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnh  %r1, 2(%r3)            # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe0]
+#CHECK: locfhnp  %r1, 2(%r3)            # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe0]
+#CHECK: locfhno  %r1, 2(%r3)            # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xe0]
+
+	locfho   %r1,2(%r3)
+	locfhh   %r1,2(%r3)
+	locfhp   %r1,2(%r3)
+	locfhnle %r1,2(%r3)
+	locfhl   %r1,2(%r3)
+	locfhm   %r1,2(%r3)
+	locfhnhe %r1,2(%r3)
+	locfhlh  %r1,2(%r3)
+	locfhne  %r1,2(%r3)
+	locfhnz  %r1,2(%r3)
+	locfhe   %r1,2(%r3)
+	locfhz   %r1,2(%r3)
+	locfhnlh %r1,2(%r3)
+	locfhhe  %r1,2(%r3)
+	locfhnl  %r1,2(%r3)
+	locfhnm  %r1,2(%r3)
+	locfhle  %r1,2(%r3)
+	locfhnh  %r1,2(%r3)
+	locfhnp  %r1,2(%r3)
+	locfhno  %r1,2(%r3)
+
+#CHECK: locfhr	%r1, %r2, 0             # encoding: [0xb9,0xe0,0x00,0x12]
+#CHECK: locfhr	%r1, %r2, 15            # encoding: [0xb9,0xe0,0xf0,0x12]
+
+	locfhr	%r1,%r2,0
+	locfhr	%r1,%r2,15
+
+#CHECK: locfhro   %r1, %r3              # encoding: [0xb9,0xe0,0x10,0x13]
+#CHECK: locfhrh   %r1, %r3              # encoding: [0xb9,0xe0,0x20,0x13]
+#CHECK: locfhrp   %r1, %r3              # encoding: [0xb9,0xe0,0x20,0x13]
+#CHECK: locfhrnle %r1, %r3              # encoding: [0xb9,0xe0,0x30,0x13]
+#CHECK: locfhrl   %r1, %r3              # encoding: [0xb9,0xe0,0x40,0x13]
+#CHECK: locfhrm   %r1, %r3              # encoding: [0xb9,0xe0,0x40,0x13]
+#CHECK: locfhrnhe %r1, %r3              # encoding: [0xb9,0xe0,0x50,0x13]
+#CHECK: locfhrlh  %r1, %r3              # encoding: [0xb9,0xe0,0x60,0x13]
+#CHECK: locfhrne  %r1, %r3              # encoding: [0xb9,0xe0,0x70,0x13]
+#CHECK: locfhrnz  %r1, %r3              # encoding: [0xb9,0xe0,0x70,0x13]
+#CHECK: locfhre   %r1, %r3              # encoding: [0xb9,0xe0,0x80,0x13]
+#CHECK: locfhrz   %r1, %r3              # encoding: [0xb9,0xe0,0x80,0x13]
+#CHECK: locfhrnlh %r1, %r3              # encoding: [0xb9,0xe0,0x90,0x13]
+#CHECK: locfhrhe  %r1, %r3              # encoding: [0xb9,0xe0,0xa0,0x13]
+#CHECK: locfhrnl  %r1, %r3              # encoding: [0xb9,0xe0,0xb0,0x13]
+#CHECK: locfhrnm  %r1, %r3              # encoding: [0xb9,0xe0,0xb0,0x13]
+#CHECK: locfhrle  %r1, %r3              # encoding: [0xb9,0xe0,0xc0,0x13]
+#CHECK: locfhrnh  %r1, %r3              # encoding: [0xb9,0xe0,0xd0,0x13]
+#CHECK: locfhrnp  %r1, %r3              # encoding: [0xb9,0xe0,0xd0,0x13]
+#CHECK: locfhrno  %r1, %r3              # encoding: [0xb9,0xe0,0xe0,0x13]
+
+	locfhro   %r1,%r3
+	locfhrh   %r1,%r3
+	locfhrp   %r1,%r3
+	locfhrnle %r1,%r3
+	locfhrl   %r1,%r3
+	locfhrm   %r1,%r3
+	locfhrnhe %r1,%r3
+	locfhrlh  %r1,%r3
+	locfhrne  %r1,%r3
+	locfhrnz  %r1,%r3
+	locfhre   %r1,%r3
+	locfhrz   %r1,%r3
+	locfhrnlh %r1,%r3
+	locfhrhe  %r1,%r3
+	locfhrnl  %r1,%r3
+	locfhrnm  %r1,%r3
+	locfhrle  %r1,%r3
+	locfhrnh  %r1,%r3
+	locfhrnp  %r1,%r3
+	locfhrno  %r1,%r3
+
+#CHECK: stocfh	%r0, 0, 0               # encoding: [0xeb,0x00,0x00,0x00,0x00,0xe1]
+#CHECK: stocfh	%r0, 0, 15              # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xe1]
+#CHECK: stocfh	%r0, -524288, 0         # encoding: [0xeb,0x00,0x00,0x00,0x80,0xe1]
+#CHECK: stocfh	%r0, 524287, 0          # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xe1]
+#CHECK: stocfh	%r0, 0(%r1), 0          # encoding: [0xeb,0x00,0x10,0x00,0x00,0xe1]
+#CHECK: stocfh	%r0, 0(%r15), 0         # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xe1]
+#CHECK: stocfh	%r15, 0, 0              # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xe1]
+#CHECK: stocfh	%r1, 4095(%r2), 3       # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xe1]
+
+	stocfh	%r0,0,0
+	stocfh	%r0,0,15
+	stocfh	%r0,-524288,0
+	stocfh	%r0,524287,0
+	stocfh	%r0,0(%r1),0
+	stocfh	%r0,0(%r15),0
+	stocfh	%r15,0,0
+	stocfh	%r1,4095(%r2),3
+
+#CHECK: stocfho   %r1, 2(%r3)           # encoding: [0xeb,0x11,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhh   %r1, 2(%r3)           # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhp   %r1, 2(%r3)           # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnle %r1, 2(%r3)           # encoding: [0xeb,0x13,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhl   %r1, 2(%r3)           # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhm   %r1, 2(%r3)           # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnhe %r1, 2(%r3)           # encoding: [0xeb,0x15,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhlh  %r1, 2(%r3)           # encoding: [0xeb,0x16,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhne  %r1, 2(%r3)           # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnz  %r1, 2(%r3)           # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhe   %r1, 2(%r3)           # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhz   %r1, 2(%r3)           # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnlh %r1, 2(%r3)           # encoding: [0xeb,0x19,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhhe  %r1, 2(%r3)           # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnl  %r1, 2(%r3)           # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnm  %r1, 2(%r3)           # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhle  %r1, 2(%r3)           # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnh  %r1, 2(%r3)           # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhnp  %r1, 2(%r3)           # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe1]
+#CHECK: stocfhno  %r1, 2(%r3)           # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xe1]
+
+	stocfho   %r1,2(%r3)
+	stocfhh   %r1,2(%r3)
+	stocfhp   %r1,2(%r3)
+	stocfhnle %r1,2(%r3)
+	stocfhl   %r1,2(%r3)
+	stocfhm   %r1,2(%r3)
+	stocfhnhe %r1,2(%r3)
+	stocfhlh  %r1,2(%r3)
+	stocfhne  %r1,2(%r3)
+	stocfhnz  %r1,2(%r3)
+	stocfhe   %r1,2(%r3)
+	stocfhz   %r1,2(%r3)
+	stocfhnlh %r1,2(%r3)
+	stocfhhe  %r1,2(%r3)
+	stocfhnl  %r1,2(%r3)
+	stocfhnm  %r1,2(%r3)
+	stocfhle  %r1,2(%r3)
+	stocfhnh  %r1,2(%r3)
+	stocfhnp  %r1,2(%r3)
+	stocfhno  %r1,2(%r3)
+