blob: a408a0a2b94def8aaa2756e46cc2a0344fa63756 [file] [log] [blame]
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SystemZTargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
Ulrich Weigand5f613df2013-05-06 16:15:19 +000014#include "SystemZISelLowering.h"
15#include "SystemZCallingConv.h"
16#include "SystemZConstantPoolValue.h"
17#include "SystemZMachineFunctionInfo.h"
18#include "SystemZTargetMachine.h"
19#include "llvm/CodeGen/CallingConvLower.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineRegisterInfo.h"
22#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Ulrich Weigand57c85f52015-04-01 12:51:43 +000023#include "llvm/IR/Intrinsics.h"
Reid Kleckner0e8c4bb2017-09-07 23:27:44 +000024#include "llvm/IR/IntrinsicInst.h"
Craig Topperd0af7e82017-04-28 05:31:46 +000025#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/KnownBits.h"
Will Dietz981af002013-10-12 00:55:57 +000027#include <cctype>
28
Ulrich Weigand5f613df2013-05-06 16:15:19 +000029using namespace llvm;
30
Chandler Carruth84e68b22014-04-22 02:41:26 +000031#define DEBUG_TYPE "systemz-lower"
32
Richard Sandifordf722a8e302013-10-16 11:10:55 +000033namespace {
Richard Sandifordd420f732013-12-13 15:28:45 +000034// Represents information about a comparison.
35struct Comparison {
36 Comparison(SDValue Op0In, SDValue Op1In)
37 : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
38
39 // The operands to the comparison.
40 SDValue Op0, Op1;
41
42 // The opcode that should be used to compare Op0 and Op1.
43 unsigned Opcode;
44
45 // A SystemZICMP value. Only used for integer comparisons.
46 unsigned ICmpType;
47
48 // The mask of CC values that Opcode can produce.
49 unsigned CCValid;
50
51 // The mask of CC values for which the original condition is true.
52 unsigned CCMask;
53};
Richard Sandifordc2312692014-03-06 10:38:30 +000054} // end anonymous namespace
Richard Sandifordf722a8e302013-10-16 11:10:55 +000055
Ulrich Weigand5f613df2013-05-06 16:15:19 +000056// Classify VT as either 32 or 64 bit.
57static bool is32Bit(EVT VT) {
58 switch (VT.getSimpleVT().SimpleTy) {
59 case MVT::i32:
60 return true;
61 case MVT::i64:
62 return false;
63 default:
64 llvm_unreachable("Unsupported type");
65 }
66}
67
68// Return a version of MachineOperand that can be safely used before the
69// final use.
70static MachineOperand earlyUseOperand(MachineOperand Op) {
71 if (Op.isReg())
72 Op.setIsKill(false);
73 return Op;
74}
75
Mehdi Amini44ede332015-07-09 02:09:04 +000076SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
Eric Christophera6734172015-01-31 00:06:45 +000077 const SystemZSubtarget &STI)
Mehdi Amini44ede332015-07-09 02:09:04 +000078 : TargetLowering(TM), Subtarget(STI) {
Matt Arsenault41e5ac42018-03-14 00:36:23 +000079 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
Ulrich Weigand5f613df2013-05-06 16:15:19 +000080
81 // Set up the register classes.
Richard Sandiford0755c932013-10-01 11:26:28 +000082 if (Subtarget.hasHighWord())
83 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
84 else
85 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
Ulrich Weigand49506d72015-05-05 19:28:34 +000086 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
87 if (Subtarget.hasVector()) {
88 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
89 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
90 } else {
91 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
92 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
93 }
Ulrich Weigandf2968d52017-07-17 17:44:20 +000094 if (Subtarget.hasVectorEnhancements1())
95 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
96 else
97 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
Ulrich Weigand5f613df2013-05-06 16:15:19 +000098
Ulrich Weigandce4c1092015-05-05 19:25:42 +000099 if (Subtarget.hasVector()) {
100 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
101 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
102 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
103 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
Ulrich Weigand80b3af72015-05-05 19:27:45 +0000104 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
Ulrich Weigandcd808232015-05-05 19:26:48 +0000105 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
Ulrich Weigandce4c1092015-05-05 19:25:42 +0000106 }
107
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000108 // Compute derived properties from the register classes
Eric Christopher23a3a7c2015-02-26 00:00:24 +0000109 computeRegisterProperties(Subtarget.getRegisterInfo());
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000110
111 // Set up special registers.
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000112 setStackPointerRegisterToSaveRestore(SystemZ::R15D);
113
114 // TODO: It may be better to default to latency-oriented scheduling, however
115 // LLVM's current latency-oriented scheduler can't handle physreg definitions
Richard Sandiford14a44492013-05-22 13:38:45 +0000116 // such as SystemZ has with CC, so set this to the register-pressure
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000117 // scheduler, because it can.
118 setSchedulingPreference(Sched::RegPressure);
119
120 setBooleanContents(ZeroOrOneBooleanContent);
Ulrich Weigandce4c1092015-05-05 19:25:42 +0000121 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000122
123 // Instructions are strings of 2-byte aligned 2-byte values.
124 setMinFunctionAlignment(2);
Ulrich Weigand497c70f2018-04-24 14:03:21 +0000125 // For performance reasons we prefer 16-byte alignment.
126 setPrefFunctionAlignment(4);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000127
128 // Handle operations that are handled in a similar way for all types.
129 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
130 I <= MVT::LAST_FP_VALUETYPE;
131 ++I) {
132 MVT VT = MVT::SimpleValueType(I);
133 if (isTypeLegal(VT)) {
Richard Sandifordf722a8e302013-10-16 11:10:55 +0000134 // Lower SET_CC into an IPM-based sequence.
135 setOperationAction(ISD::SETCC, VT, Custom);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000136
137 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
138 setOperationAction(ISD::SELECT, VT, Expand);
139
140 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
141 setOperationAction(ISD::SELECT_CC, VT, Custom);
142 setOperationAction(ISD::BR_CC, VT, Custom);
143 }
144 }
145
146 // Expand jump table branches as address arithmetic followed by an
147 // indirect jump.
148 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
149
150 // Expand BRCOND into a BR_CC (see above).
151 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
152
153 // Handle integer types.
154 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
155 I <= MVT::LAST_INTEGER_VALUETYPE;
156 ++I) {
157 MVT VT = MVT::SimpleValueType(I);
158 if (isTypeLegal(VT)) {
159 // Expand individual DIV and REMs into DIVREMs.
160 setOperationAction(ISD::SDIV, VT, Expand);
161 setOperationAction(ISD::UDIV, VT, Expand);
162 setOperationAction(ISD::SREM, VT, Expand);
163 setOperationAction(ISD::UREM, VT, Expand);
164 setOperationAction(ISD::SDIVREM, VT, Custom);
165 setOperationAction(ISD::UDIVREM, VT, Custom);
166
Richard Sandifordbef3d7a2013-12-10 10:49:34 +0000167 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
168 // stores, putting a serialization instruction after the stores.
169 setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
170 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000171
Richard Sandiford41350a52013-12-24 15:18:04 +0000172 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
173 // available, or if the operand is constant.
174 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
175
Ulrich Weigandb4012182015-03-31 12:56:33 +0000176 // Use POPCNT on z196 and above.
177 if (Subtarget.hasPopulationCount())
178 setOperationAction(ISD::CTPOP, VT, Custom);
179 else
180 setOperationAction(ISD::CTPOP, VT, Expand);
181
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000182 // No special instructions for these.
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000183 setOperationAction(ISD::CTTZ, VT, Expand);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000184 setOperationAction(ISD::ROTR, VT, Expand);
185
Richard Sandiford7d86e472013-08-21 09:34:56 +0000186 // Use *MUL_LOHI where possible instead of MULH*.
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000187 setOperationAction(ISD::MULHS, VT, Expand);
188 setOperationAction(ISD::MULHU, VT, Expand);
Richard Sandiford7d86e472013-08-21 09:34:56 +0000189 setOperationAction(ISD::SMUL_LOHI, VT, Custom);
190 setOperationAction(ISD::UMUL_LOHI, VT, Custom);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000191
Richard Sandiforddc6c2c92014-03-21 10:56:30 +0000192 // Only z196 and above have native support for conversions to unsigned.
Jonas Paulssonb7a2ef82017-02-02 15:42:14 +0000193 // On z10, promoting to i64 doesn't generate an inexact condition for
194 // values that are outside the i32 range but in the i64 range, so use
195 // the default expansion.
Richard Sandiforddc6c2c92014-03-21 10:56:30 +0000196 if (!Subtarget.hasFPExtension())
197 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000198 }
199 }
200
201 // Type legalization will convert 8- and 16-bit atomic operations into
202 // forms that operate on i32s (but still keeping the original memory VT).
203 // Lower them into full i32 operations.
204 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
205 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
206 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
207 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
208 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
209 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
210 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
211 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
212 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
213 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
214 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000215
Ulrich Weiganda11f63a2017-08-04 18:57:58 +0000216 // Even though i128 is not a legal type, we still need to custom lower
217 // the atomic operations in order to exploit SystemZ instructions.
218 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
219 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
Ulrich Weigand0f1de042017-09-28 16:22:54 +0000220
221 // We can use the CC result of compare-and-swap to implement
222 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
223 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
224 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
225 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
Ulrich Weiganda11f63a2017-08-04 18:57:58 +0000226
Ulrich Weiganda9ac6d62016-04-04 12:45:44 +0000227 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
228
Zhan Jun Liauab42cbc2016-06-10 19:58:10 +0000229 // Traps are legal, as we will convert them to "j .+2".
230 setOperationAction(ISD::TRAP, MVT::Other, Legal);
231
Richard Sandiforddc6c2c92014-03-21 10:56:30 +0000232 // z10 has instructions for signed but not unsigned FP conversion.
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000233 // Handle unsigned 32-bit types as signed 64-bit types.
Richard Sandiforddc6c2c92014-03-21 10:56:30 +0000234 if (!Subtarget.hasFPExtension()) {
235 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
236 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
237 }
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000238
239 // We have native support for a 64-bit CTLZ, via FLOGR.
240 setOperationAction(ISD::CTLZ, MVT::i32, Promote);
241 setOperationAction(ISD::CTLZ, MVT::i64, Legal);
242
243 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
244 setOperationAction(ISD::OR, MVT::i64, Custom);
245
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000246 // FIXME: Can we support these natively?
247 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
248 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
249 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
250
251 // We have native instructions for i8, i16 and i32 extensions, but not i1.
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000252 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000253 for (MVT VT : MVT::integer_valuetypes()) {
254 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
255 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
256 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
257 }
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000258
259 // Handle the various types of symbolic address.
260 setOperationAction(ISD::ConstantPool, PtrVT, Custom);
261 setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
262 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
263 setOperationAction(ISD::BlockAddress, PtrVT, Custom);
264 setOperationAction(ISD::JumpTable, PtrVT, Custom);
265
266 // We need to handle dynamic allocations specially because of the
267 // 160-byte area at the bottom of the stack.
268 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
Marcin Koscielnicki9de88d92016-05-04 23:31:26 +0000269 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000270
271 // Use custom expanders so that we can force the function to use
272 // a frame pointer.
273 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
274 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
275
Richard Sandiford03481332013-08-23 11:36:42 +0000276 // Handle prefetches with PFD or PFDRL.
277 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
278
Ulrich Weigandce4c1092015-05-05 19:25:42 +0000279 for (MVT VT : MVT::vector_valuetypes()) {
280 // Assume by default that all vector operations need to be expanded.
281 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
282 if (getOperationAction(Opcode, VT) == Legal)
283 setOperationAction(Opcode, VT, Expand);
284
285 // Likewise all truncating stores and extending loads.
286 for (MVT InnerVT : MVT::vector_valuetypes()) {
287 setTruncStoreAction(VT, InnerVT, Expand);
288 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
289 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
290 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
291 }
292
293 if (isTypeLegal(VT)) {
294 // These operations are legal for anything that can be stored in a
295 // vector register, even if there is no native support for the format
Ulrich Weigand80b3af72015-05-05 19:27:45 +0000296 // as such. In particular, we can do these for v4f32 even though there
297 // are no specific instructions for that format.
Ulrich Weigandce4c1092015-05-05 19:25:42 +0000298 setOperationAction(ISD::LOAD, VT, Legal);
299 setOperationAction(ISD::STORE, VT, Legal);
300 setOperationAction(ISD::VSELECT, VT, Legal);
301 setOperationAction(ISD::BITCAST, VT, Legal);
302 setOperationAction(ISD::UNDEF, VT, Legal);
303
304 // Likewise, except that we need to replace the nodes with something
305 // more specific.
306 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
307 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
308 }
309 }
310
311 // Handle integer vector types.
312 for (MVT VT : MVT::integer_vector_valuetypes()) {
313 if (isTypeLegal(VT)) {
314 // These operations have direct equivalents.
315 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
316 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
317 setOperationAction(ISD::ADD, VT, Legal);
318 setOperationAction(ISD::SUB, VT, Legal);
319 if (VT != MVT::v2i64)
320 setOperationAction(ISD::MUL, VT, Legal);
321 setOperationAction(ISD::AND, VT, Legal);
322 setOperationAction(ISD::OR, VT, Legal);
323 setOperationAction(ISD::XOR, VT, Legal);
Ulrich Weigand2b3482f2017-07-17 17:41:11 +0000324 if (Subtarget.hasVectorEnhancements1())
325 setOperationAction(ISD::CTPOP, VT, Legal);
326 else
327 setOperationAction(ISD::CTPOP, VT, Custom);
Ulrich Weigandce4c1092015-05-05 19:25:42 +0000328 setOperationAction(ISD::CTTZ, VT, Legal);
329 setOperationAction(ISD::CTLZ, VT, Legal);
Ulrich Weigandce4c1092015-05-05 19:25:42 +0000330
331 // Convert a GPR scalar to a vector by inserting it into element 0.
332 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
333
Ulrich Weigandcd2a1b52015-05-05 19:29:21 +0000334 // Use a series of unpacks for extensions.
335 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
336 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
337
Ulrich Weigandce4c1092015-05-05 19:25:42 +0000338 // Detect shifts by a scalar amount and convert them into
339 // V*_BY_SCALAR.
340 setOperationAction(ISD::SHL, VT, Custom);
341 setOperationAction(ISD::SRA, VT, Custom);
342 setOperationAction(ISD::SRL, VT, Custom);
343
344 // At present ROTL isn't matched by DAGCombiner. ROTR should be
345 // converted into ROTL.
346 setOperationAction(ISD::ROTL, VT, Expand);
347 setOperationAction(ISD::ROTR, VT, Expand);
348
349 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
350 // and inverting the result as necessary.
351 setOperationAction(ISD::SETCC, VT, Custom);
352 }
353 }
354
Ulrich Weigandcd808232015-05-05 19:26:48 +0000355 if (Subtarget.hasVector()) {
356 // There should be no need to check for float types other than v2f64
357 // since <2 x f32> isn't a legal type.
358 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
Jonas Paulssonfccc7d62017-04-12 11:49:08 +0000359 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
Ulrich Weigandcd808232015-05-05 19:26:48 +0000360 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
Jonas Paulssonfccc7d62017-04-12 11:49:08 +0000361 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
Ulrich Weigandcd808232015-05-05 19:26:48 +0000362 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
Jonas Paulssonfccc7d62017-04-12 11:49:08 +0000363 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
Ulrich Weigandcd808232015-05-05 19:26:48 +0000364 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
Jonas Paulssonfccc7d62017-04-12 11:49:08 +0000365 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
Ulrich Weigandcd808232015-05-05 19:26:48 +0000366 }
367
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000368 // Handle floating-point types.
369 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
370 I <= MVT::LAST_FP_VALUETYPE;
371 ++I) {
372 MVT VT = MVT::SimpleValueType(I);
373 if (isTypeLegal(VT)) {
374 // We can use FI for FRINT.
375 setOperationAction(ISD::FRINT, VT, Legal);
376
Richard Sandifordaf5f66a2013-08-21 09:04:20 +0000377 // We can use the extended form of FI for other rounding operations.
378 if (Subtarget.hasFPExtension()) {
379 setOperationAction(ISD::FNEARBYINT, VT, Legal);
380 setOperationAction(ISD::FFLOOR, VT, Legal);
381 setOperationAction(ISD::FCEIL, VT, Legal);
382 setOperationAction(ISD::FTRUNC, VT, Legal);
383 setOperationAction(ISD::FROUND, VT, Legal);
384 }
385
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000386 // No special instructions for these.
387 setOperationAction(ISD::FSIN, VT, Expand);
388 setOperationAction(ISD::FCOS, VT, Expand);
Ulrich Weigand126caeb2015-09-21 17:35:45 +0000389 setOperationAction(ISD::FSINCOS, VT, Expand);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000390 setOperationAction(ISD::FREM, VT, Expand);
Ulrich Weigand126caeb2015-09-21 17:35:45 +0000391 setOperationAction(ISD::FPOW, VT, Expand);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000392 }
393 }
394
Ulrich Weigandcd808232015-05-05 19:26:48 +0000395 // Handle floating-point vector types.
396 if (Subtarget.hasVector()) {
397 // Scalar-to-vector conversion is just a subreg.
Ulrich Weigand80b3af72015-05-05 19:27:45 +0000398 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
Ulrich Weigandcd808232015-05-05 19:26:48 +0000399 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
400
401 // Some insertions and extractions can be done directly but others
402 // need to go via integers.
Ulrich Weigand80b3af72015-05-05 19:27:45 +0000403 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
Ulrich Weigandcd808232015-05-05 19:26:48 +0000404 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
Ulrich Weigand80b3af72015-05-05 19:27:45 +0000405 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
Ulrich Weigandcd808232015-05-05 19:26:48 +0000406 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
407
408 // These operations have direct equivalents.
409 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
410 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
411 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
412 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
413 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
414 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
415 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
416 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
417 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
418 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
419 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
420 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
421 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
422 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
423 }
424
Ulrich Weigand2b3482f2017-07-17 17:41:11 +0000425 // The vector enhancements facility 1 has instructions for these.
426 if (Subtarget.hasVectorEnhancements1()) {
Ulrich Weigand33435c42017-07-17 17:42:48 +0000427 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
428 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
429 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
430 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
431 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
432 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
433 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
434 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
435 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
436 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
437 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
438 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
439 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
440 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
441
Ulrich Weigand2b3482f2017-07-17 17:41:11 +0000442 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
443 setOperationAction(ISD::FMAXNAN, MVT::f64, Legal);
444 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
445 setOperationAction(ISD::FMINNAN, MVT::f64, Legal);
446
447 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
448 setOperationAction(ISD::FMAXNAN, MVT::v2f64, Legal);
449 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
450 setOperationAction(ISD::FMINNAN, MVT::v2f64, Legal);
Ulrich Weigand33435c42017-07-17 17:42:48 +0000451
452 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
453 setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
454 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
455 setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
456
457 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
458 setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
459 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
460 setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
Ulrich Weigandf2968d52017-07-17 17:44:20 +0000461
462 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
463 setOperationAction(ISD::FMAXNAN, MVT::f128, Legal);
464 setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
465 setOperationAction(ISD::FMINNAN, MVT::f128, Legal);
Ulrich Weigand2b3482f2017-07-17 17:41:11 +0000466 }
467
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000468 // We have fused multiply-addition for f32 and f64 but not f128.
469 setOperationAction(ISD::FMA, MVT::f32, Legal);
470 setOperationAction(ISD::FMA, MVT::f64, Legal);
Ulrich Weigandf2968d52017-07-17 17:44:20 +0000471 if (Subtarget.hasVectorEnhancements1())
472 setOperationAction(ISD::FMA, MVT::f128, Legal);
473 else
474 setOperationAction(ISD::FMA, MVT::f128, Expand);
475
476 // We don't have a copysign instruction on vector registers.
477 if (Subtarget.hasVectorEnhancements1())
478 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000479
480 // Needed so that we don't try to implement f128 constant loads using
481 // a load-and-extend of a f80 constant (in cases where the constant
482 // would fit in an f80).
Ahmed Bougacha2b6917b2015-01-08 00:51:32 +0000483 for (MVT VT : MVT::fp_valuetypes())
484 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000485
Ulrich Weigandf2968d52017-07-17 17:44:20 +0000486 // We don't have extending load instruction on vector registers.
487 if (Subtarget.hasVectorEnhancements1()) {
488 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
489 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
490 }
491
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000492 // Floating-point truncation and stores need to be done separately.
493 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
494 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
495 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
496
497 // We have 64-bit FPR<->GPR moves, but need special handling for
498 // 32-bit forms.
Ulrich Weigand80b3af72015-05-05 19:27:45 +0000499 if (!Subtarget.hasVector()) {
500 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
501 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
502 }
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000503
504 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
505 // structure, but VAEND is a no-op.
506 setOperationAction(ISD::VASTART, MVT::Other, Custom);
507 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
508 setOperationAction(ISD::VAEND, MVT::Other, Expand);
Richard Sandifordd131ff82013-07-08 09:35:23 +0000509
Richard Sandiford95bc5f92014-03-07 11:34:35 +0000510 // Codes for which we want to perform some z-specific combinations.
Ulrich Weigand849a59f2018-01-19 20:52:04 +0000511 setTargetDAGCombine(ISD::ZERO_EXTEND);
Richard Sandiford95bc5f92014-03-07 11:34:35 +0000512 setTargetDAGCombine(ISD::SIGN_EXTEND);
Ulrich Weigand849a59f2018-01-19 20:52:04 +0000513 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
Ulrich Weigandce4c1092015-05-05 19:25:42 +0000514 setTargetDAGCombine(ISD::STORE);
515 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Ulrich Weigand80b3af72015-05-05 19:27:45 +0000516 setTargetDAGCombine(ISD::FP_ROUND);
Bryan Chan28b759c2016-05-16 20:32:22 +0000517 setTargetDAGCombine(ISD::BSWAP);
Elliot Colpbc2cfc22016-07-06 18:13:11 +0000518 setTargetDAGCombine(ISD::SHL);
519 setTargetDAGCombine(ISD::SRA);
520 setTargetDAGCombine(ISD::SRL);
521 setTargetDAGCombine(ISD::ROTL);
Richard Sandiford95bc5f92014-03-07 11:34:35 +0000522
Ulrich Weigand57c85f52015-04-01 12:51:43 +0000523 // Handle intrinsics.
524 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
Ulrich Weigandc1708b22015-05-05 19:31:09 +0000525 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
Ulrich Weigand57c85f52015-04-01 12:51:43 +0000526
Richard Sandifordd131ff82013-07-08 09:35:23 +0000527 // We want to use MVC in preference to even a single load/store pair.
528 MaxStoresPerMemcpy = 0;
529 MaxStoresPerMemcpyOptSize = 0;
Richard Sandiford47660c12013-07-09 09:32:42 +0000530
531 // The main memset sequence is a byte store followed by an MVC.
532 // Two STC or MV..I stores win over that, but the kind of fused stores
533 // generated by target-independent code don't when the byte value is
534 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
535 // than "STC;MVC". Handle the choice in target-specific code instead.
536 MaxStoresPerMemset = 0;
537 MaxStoresPerMemsetOptSize = 0;
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000538}
539
Mehdi Amini44ede332015-07-09 02:09:04 +0000540EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
541 LLVMContext &, EVT VT) const {
Richard Sandifordabc010b2013-11-06 12:16:02 +0000542 if (!VT.isVector())
543 return MVT::i32;
544 return VT.changeVectorElementTypeToInteger();
545}
546
547bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
Stephen Lin73de7bf2013-07-09 18:16:56 +0000548 VT = VT.getScalarType();
549
550 if (!VT.isSimple())
551 return false;
552
553 switch (VT.getSimpleVT().SimpleTy) {
554 case MVT::f32:
555 case MVT::f64:
556 return true;
557 case MVT::f128:
Ulrich Weigandf2968d52017-07-17 17:44:20 +0000558 return Subtarget.hasVectorEnhancements1();
Stephen Lin73de7bf2013-07-09 18:16:56 +0000559 default:
560 break;
561 }
562
563 return false;
564}
565
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000566bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
567 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
568 return Imm.isZero() || Imm.isNegZero();
569}
570
Ulrich Weigand1f6666a2015-03-31 12:52:27 +0000571bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
572 // We can use CGFI or CLGFI.
573 return isInt<32>(Imm) || isUInt<32>(Imm);
574}
575
576bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
577 // We can use ALGFI or SLGFI.
578 return isUInt<32>(Imm) || isUInt<32>(-Imm);
579}
580
Matt Arsenault6f2a5262014-07-27 17:46:40 +0000581bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
582 unsigned,
583 unsigned,
584 bool *Fast) const {
Richard Sandiford46af5a22013-05-30 09:45:42 +0000585 // Unaligned accesses should never be slower than the expanded version.
586 // We check specifically for aligned accesses in the few cases where
587 // they are required.
588 if (Fast)
589 *Fast = true;
590 return true;
591}
Matt Arsenaultbd7d80a2015-06-01 05:31:59 +0000592
Jonas Paulsson024e3192017-07-21 11:59:37 +0000593// Information about the addressing mode for a memory access.
594struct AddressingMode {
595 // True if a long displacement is supported.
596 bool LongDisplacement;
597
598 // True if use of index register is supported.
599 bool IndexReg;
600
601 AddressingMode(bool LongDispl, bool IdxReg) :
602 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
603};
604
605// Return the desired addressing mode for a Load which has only one use (in
606// the same block) which is a Store.
607static AddressingMode getLoadStoreAddrMode(bool HasVector,
608 Type *Ty) {
609 // With vector support a Load->Store combination may be combined to either
610 // an MVC or vector operations and it seems to work best to allow the
611 // vector addressing mode.
612 if (HasVector)
613 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
614
615 // Otherwise only the MVC case is special.
616 bool MVC = Ty->isIntegerTy(8);
617 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
618}
619
620// Return the addressing mode which seems most desirable given an LLVM
621// Instruction pointer.
622static AddressingMode
623supportedAddressingMode(Instruction *I, bool HasVector) {
624 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
625 switch (II->getIntrinsicID()) {
626 default: break;
627 case Intrinsic::memset:
628 case Intrinsic::memmove:
629 case Intrinsic::memcpy:
630 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
631 }
632 }
633
634 if (isa<LoadInst>(I) && I->hasOneUse()) {
635 auto *SingleUser = dyn_cast<Instruction>(*I->user_begin());
636 if (SingleUser->getParent() == I->getParent()) {
637 if (isa<ICmpInst>(SingleUser)) {
638 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
Jonas Paulssoncc5fe732018-01-31 12:41:25 +0000639 if (C->getBitWidth() <= 64 &&
640 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
Jonas Paulsson024e3192017-07-21 11:59:37 +0000641 // Comparison of memory with 16 bit signed / unsigned immediate
642 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
643 } else if (isa<StoreInst>(SingleUser))
644 // Load->Store
645 return getLoadStoreAddrMode(HasVector, I->getType());
646 }
647 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
648 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
649 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
650 // Load->Store
651 return getLoadStoreAddrMode(HasVector, LoadI->getType());
652 }
653
654 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
655
656 // * Use LDE instead of LE/LEY for z13 to avoid partial register
657 // dependencies (LDE only supports small offsets).
658 // * Utilize the vector registers to hold floating point
659 // values (vector load / store instructions only support small
660 // offsets).
661
662 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
663 I->getOperand(0)->getType());
664 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
665 bool IsVectorAccess = MemAccessTy->isVectorTy();
666
667 // A store of an extracted vector element will be combined into a VSTE type
668 // instruction.
669 if (!IsVectorAccess && isa<StoreInst>(I)) {
670 Value *DataOp = I->getOperand(0);
671 if (isa<ExtractElementInst>(DataOp))
672 IsVectorAccess = true;
673 }
674
675 // A load which gets inserted into a vector element will be combined into a
676 // VLE type instruction.
677 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
678 User *LoadUser = *I->user_begin();
679 if (isa<InsertElementInst>(LoadUser))
680 IsVectorAccess = true;
681 }
682
683 if (IsFPAccess || IsVectorAccess)
684 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
685 }
686
687 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
688}
689
Mehdi Amini0cdec1e2015-07-09 02:09:40 +0000690bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
Jonas Paulsson6228aed2017-08-09 11:28:01 +0000691 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
Richard Sandiford791bea42013-07-31 12:58:26 +0000692 // Punt on globals for now, although they can be used in limited
693 // RELATIVE LONG cases.
694 if (AM.BaseGV)
695 return false;
696
697 // Require a 20-bit signed offset.
698 if (!isInt<20>(AM.BaseOffs))
699 return false;
700
Jonas Paulsson6228aed2017-08-09 11:28:01 +0000701 AddressingMode SupportedAM(true, true);
702 if (I != nullptr)
703 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
704
705 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
706 return false;
707
708 if (!SupportedAM.IndexReg)
Jonas Paulsson024e3192017-07-21 11:59:37 +0000709 // No indexing allowed.
710 return AM.Scale == 0;
711 else
712 // Indexing is OK but no scale factor can be applied.
713 return AM.Scale == 0 || AM.Scale == 1;
Richard Sandiford791bea42013-07-31 12:58:26 +0000714}
715
Richard Sandiford709bda62013-08-19 12:42:31 +0000716bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
717 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
718 return false;
719 unsigned FromBits = FromType->getPrimitiveSizeInBits();
720 unsigned ToBits = ToType->getPrimitiveSizeInBits();
721 return FromBits > ToBits;
722}
723
724bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
725 if (!FromVT.isInteger() || !ToVT.isInteger())
726 return false;
727 unsigned FromBits = FromVT.getSizeInBits();
728 unsigned ToBits = ToVT.getSizeInBits();
729 return FromBits > ToBits;
730}
731
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000732//===----------------------------------------------------------------------===//
733// Inline asm support
734//===----------------------------------------------------------------------===//
735
736TargetLowering::ConstraintType
Benjamin Kramer9bfb6272015-07-05 19:29:18 +0000737SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000738 if (Constraint.size() == 1) {
739 switch (Constraint[0]) {
740 case 'a': // Address register
741 case 'd': // Data register (equivalent to 'r')
742 case 'f': // Floating-point register
Richard Sandiford0755c932013-10-01 11:26:28 +0000743 case 'h': // High-part register
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000744 case 'r': // General-purpose register
Ulrich Weigand18f69302018-03-02 20:36:34 +0000745 case 'v': // Vector register
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000746 return C_RegisterClass;
747
748 case 'Q': // Memory with base and unsigned 12-bit displacement
749 case 'R': // Likewise, plus an index
750 case 'S': // Memory with base and signed 20-bit displacement
751 case 'T': // Likewise, plus an index
752 case 'm': // Equivalent to 'T'.
753 return C_Memory;
754
755 case 'I': // Unsigned 8-bit constant
756 case 'J': // Unsigned 12-bit constant
757 case 'K': // Signed 16-bit constant
758 case 'L': // Signed 20-bit displacement (on all targets we support)
759 case 'M': // 0x7fffffff
760 return C_Other;
761
762 default:
763 break;
764 }
765 }
766 return TargetLowering::getConstraintType(Constraint);
767}
768
769TargetLowering::ConstraintWeight SystemZTargetLowering::
770getSingleConstraintMatchWeight(AsmOperandInfo &info,
771 const char *constraint) const {
772 ConstraintWeight weight = CW_Invalid;
773 Value *CallOperandVal = info.CallOperandVal;
774 // If we don't have a value, we can't do a match,
775 // but allow it at the lowest weight.
Craig Topper062a2ba2014-04-25 05:30:21 +0000776 if (!CallOperandVal)
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000777 return CW_Default;
778 Type *type = CallOperandVal->getType();
779 // Look at the constraint type.
780 switch (*constraint) {
781 default:
782 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
783 break;
784
785 case 'a': // Address register
786 case 'd': // Data register (equivalent to 'r')
Richard Sandiford0755c932013-10-01 11:26:28 +0000787 case 'h': // High-part register
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000788 case 'r': // General-purpose register
789 if (CallOperandVal->getType()->isIntegerTy())
790 weight = CW_Register;
791 break;
792
793 case 'f': // Floating-point register
794 if (type->isFloatingPointTy())
795 weight = CW_Register;
796 break;
797
Ulrich Weigand18f69302018-03-02 20:36:34 +0000798 case 'v': // Vector register
799 if ((type->isVectorTy() || type->isFloatingPointTy()) &&
800 Subtarget.hasVector())
801 weight = CW_Register;
802 break;
803
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000804 case 'I': // Unsigned 8-bit constant
Richard Sandiford21f5d682014-03-06 11:22:58 +0000805 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000806 if (isUInt<8>(C->getZExtValue()))
807 weight = CW_Constant;
808 break;
809
810 case 'J': // Unsigned 12-bit constant
Richard Sandiford21f5d682014-03-06 11:22:58 +0000811 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000812 if (isUInt<12>(C->getZExtValue()))
813 weight = CW_Constant;
814 break;
815
816 case 'K': // Signed 16-bit constant
Richard Sandiford21f5d682014-03-06 11:22:58 +0000817 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000818 if (isInt<16>(C->getSExtValue()))
819 weight = CW_Constant;
820 break;
821
822 case 'L': // Signed 20-bit displacement (on all targets we support)
Richard Sandiford21f5d682014-03-06 11:22:58 +0000823 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000824 if (isInt<20>(C->getSExtValue()))
825 weight = CW_Constant;
826 break;
827
828 case 'M': // 0x7fffffff
Richard Sandiford21f5d682014-03-06 11:22:58 +0000829 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000830 if (C->getZExtValue() == 0x7fffffff)
831 weight = CW_Constant;
832 break;
833 }
834 return weight;
835}
836
Richard Sandifordb8204052013-07-12 09:08:12 +0000837// Parse a "{tNNN}" register constraint for which the register type "t"
838// has already been verified. MC is the class associated with "t" and
839// Map maps 0-based register numbers to LLVM register numbers.
840static std::pair<unsigned, const TargetRegisterClass *>
Benjamin Kramer9bfb6272015-07-05 19:29:18 +0000841parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
Ulrich Weigand18f69302018-03-02 20:36:34 +0000842 const unsigned *Map, unsigned Size) {
Richard Sandifordb8204052013-07-12 09:08:12 +0000843 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
844 if (isdigit(Constraint[2])) {
Benjamin Kramer9bfb6272015-07-05 19:29:18 +0000845 unsigned Index;
846 bool Failed =
847 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
Ulrich Weigand18f69302018-03-02 20:36:34 +0000848 if (!Failed && Index < Size && Map[Index])
Richard Sandifordb8204052013-07-12 09:08:12 +0000849 return std::make_pair(Map[Index], RC);
850 }
Craig Topper062a2ba2014-04-25 05:30:21 +0000851 return std::make_pair(0U, nullptr);
Richard Sandifordb8204052013-07-12 09:08:12 +0000852}
853
Eric Christopher11e4df72015-02-26 22:38:43 +0000854std::pair<unsigned, const TargetRegisterClass *>
855SystemZTargetLowering::getRegForInlineAsmConstraint(
Benjamin Kramer9bfb6272015-07-05 19:29:18 +0000856 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000857 if (Constraint.size() == 1) {
858 // GCC Constraint Letters
859 switch (Constraint[0]) {
860 default: break;
861 case 'd': // Data register (equivalent to 'r')
862 case 'r': // General-purpose register
863 if (VT == MVT::i64)
864 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
865 else if (VT == MVT::i128)
866 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
867 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
868
869 case 'a': // Address register
870 if (VT == MVT::i64)
871 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
872 else if (VT == MVT::i128)
873 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
874 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
875
Richard Sandiford0755c932013-10-01 11:26:28 +0000876 case 'h': // High-part register (an LLVM extension)
877 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
878
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000879 case 'f': // Floating-point register
880 if (VT == MVT::f64)
881 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
882 else if (VT == MVT::f128)
883 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
884 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
Ulrich Weigand18f69302018-03-02 20:36:34 +0000885
886 case 'v': // Vector register
887 if (Subtarget.hasVector()) {
888 if (VT == MVT::f32)
889 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
890 if (VT == MVT::f64)
891 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
892 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
893 }
894 break;
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000895 }
896 }
Benjamin Kramer9bfb6272015-07-05 19:29:18 +0000897 if (Constraint.size() > 0 && Constraint[0] == '{') {
Richard Sandifordb8204052013-07-12 09:08:12 +0000898 // We need to override the default register parsing for GPRs and FPRs
899 // because the interpretation depends on VT. The internal names of
900 // the registers are also different from the external names
901 // (F0D and F0S instead of F0, etc.).
902 if (Constraint[1] == 'r') {
903 if (VT == MVT::i32)
904 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
Ulrich Weigand18f69302018-03-02 20:36:34 +0000905 SystemZMC::GR32Regs, 16);
Richard Sandifordb8204052013-07-12 09:08:12 +0000906 if (VT == MVT::i128)
907 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
Ulrich Weigand18f69302018-03-02 20:36:34 +0000908 SystemZMC::GR128Regs, 16);
Richard Sandifordb8204052013-07-12 09:08:12 +0000909 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
Ulrich Weigand18f69302018-03-02 20:36:34 +0000910 SystemZMC::GR64Regs, 16);
Richard Sandifordb8204052013-07-12 09:08:12 +0000911 }
912 if (Constraint[1] == 'f') {
913 if (VT == MVT::f32)
914 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
Ulrich Weigand18f69302018-03-02 20:36:34 +0000915 SystemZMC::FP32Regs, 16);
Richard Sandifordb8204052013-07-12 09:08:12 +0000916 if (VT == MVT::f128)
917 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
Ulrich Weigand18f69302018-03-02 20:36:34 +0000918 SystemZMC::FP128Regs, 16);
Richard Sandifordb8204052013-07-12 09:08:12 +0000919 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
Ulrich Weigand18f69302018-03-02 20:36:34 +0000920 SystemZMC::FP64Regs, 16);
921 }
922 if (Constraint[1] == 'v') {
923 if (VT == MVT::f32)
924 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
925 SystemZMC::VR32Regs, 32);
926 if (VT == MVT::f64)
927 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
928 SystemZMC::VR64Regs, 32);
929 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
930 SystemZMC::VR128Regs, 32);
Richard Sandifordb8204052013-07-12 09:08:12 +0000931 }
932 }
Eric Christopher11e4df72015-02-26 22:38:43 +0000933 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000934}
935
936void SystemZTargetLowering::
937LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
938 std::vector<SDValue> &Ops,
939 SelectionDAG &DAG) const {
940 // Only support length 1 constraints for now.
941 if (Constraint.length() == 1) {
942 switch (Constraint[0]) {
943 case 'I': // Unsigned 8-bit constant
Richard Sandiford21f5d682014-03-06 11:22:58 +0000944 if (auto *C = dyn_cast<ConstantSDNode>(Op))
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000945 if (isUInt<8>(C->getZExtValue()))
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000946 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000947 Op.getValueType()));
948 return;
949
950 case 'J': // Unsigned 12-bit constant
Richard Sandiford21f5d682014-03-06 11:22:58 +0000951 if (auto *C = dyn_cast<ConstantSDNode>(Op))
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000952 if (isUInt<12>(C->getZExtValue()))
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000953 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000954 Op.getValueType()));
955 return;
956
957 case 'K': // Signed 16-bit constant
Richard Sandiford21f5d682014-03-06 11:22:58 +0000958 if (auto *C = dyn_cast<ConstantSDNode>(Op))
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000959 if (isInt<16>(C->getSExtValue()))
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000960 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000961 Op.getValueType()));
962 return;
963
964 case 'L': // Signed 20-bit displacement (on all targets we support)
Richard Sandiford21f5d682014-03-06 11:22:58 +0000965 if (auto *C = dyn_cast<ConstantSDNode>(Op))
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000966 if (isInt<20>(C->getSExtValue()))
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000967 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000968 Op.getValueType()));
969 return;
970
971 case 'M': // 0x7fffffff
Richard Sandiford21f5d682014-03-06 11:22:58 +0000972 if (auto *C = dyn_cast<ConstantSDNode>(Op))
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000973 if (C->getZExtValue() == 0x7fffffff)
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000974 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
Ulrich Weigand5f613df2013-05-06 16:15:19 +0000975 Op.getValueType()));
976 return;
977 }
978 }
979 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
980}
981
982//===----------------------------------------------------------------------===//
983// Calling conventions
984//===----------------------------------------------------------------------===//
985
986#include "SystemZGenCallingConv.inc"
987
Ulrich Weigand5eb64112018-03-02 20:39:30 +0000988const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
989 CallingConv::ID) const {
990 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
991 SystemZ::R14D, 0 };
992 return ScratchRegs;
993}
994
Richard Sandiford709bda62013-08-19 12:42:31 +0000995bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
996 Type *ToType) const {
997 return isTruncateFree(FromType, ToType);
998}
999
Matt Arsenault31380752017-04-18 21:16:46 +00001000bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
Ulrich Weigand19d24d22015-11-13 13:00:27 +00001001 return CI->isTailCall();
Richard Sandiford709bda62013-08-19 12:42:31 +00001002}
1003
Ulrich Weigand5211f9f2015-05-05 19:30:05 +00001004// We do not yet support 128-bit single-element vector types. If the user
1005// attempts to use such types as function argument or return type, prefer
1006// to error out instead of emitting code violating the ABI.
1007static void VerifyVectorType(MVT VT, EVT ArgVT) {
1008 if (ArgVT.isVector() && !VT.isVector())
1009 report_fatal_error("Unsupported vector argument or return type");
1010}
1011
1012static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
1013 for (unsigned i = 0; i < Ins.size(); ++i)
1014 VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1015}
1016
1017static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
1018 for (unsigned i = 0; i < Outs.size(); ++i)
1019 VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1020}
1021
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001022// Value is a value that has been passed to us in the location described by VA
1023// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1024// any loads onto Chain.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001025static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001026 CCValAssign &VA, SDValue Chain,
1027 SDValue Value) {
1028 // If the argument has been promoted from a smaller type, insert an
1029 // assertion to capture this.
1030 if (VA.getLocInfo() == CCValAssign::SExt)
1031 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1032 DAG.getValueType(VA.getValVT()));
1033 else if (VA.getLocInfo() == CCValAssign::ZExt)
1034 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1035 DAG.getValueType(VA.getValVT()));
1036
1037 if (VA.isExtInLoc())
1038 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
Ulrich Weigandcd2a1b52015-05-05 19:29:21 +00001039 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1040 // If this is a short vector argument loaded from the stack,
1041 // extend from i64 to full vector size and then bitcast.
1042 assert(VA.getLocVT() == MVT::i64);
1043 assert(VA.getValVT().isVector());
Ahmed Bougacha128f8732016-04-26 21:15:30 +00001044 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
Ulrich Weigandcd2a1b52015-05-05 19:29:21 +00001045 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1046 } else
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001047 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1048 return Value;
1049}
1050
1051// Value is a value of type VA.getValVT() that we need to copy into
1052// the location described by VA. Return a copy of Value converted to
1053// VA.getValVT(). The caller is responsible for handling indirect values.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001054static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001055 CCValAssign &VA, SDValue Value) {
1056 switch (VA.getLocInfo()) {
1057 case CCValAssign::SExt:
1058 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1059 case CCValAssign::ZExt:
1060 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1061 case CCValAssign::AExt:
1062 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
Ulrich Weigandcd2a1b52015-05-05 19:29:21 +00001063 case CCValAssign::BCvt:
1064 // If this is a short vector argument to be stored to the stack,
1065 // bitcast to v2i64 and then extract first element.
1066 assert(VA.getLocVT() == MVT::i64);
1067 assert(VA.getValVT().isVector());
1068 Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
1069 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1070 DAG.getConstant(0, DL, MVT::i32));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001071 case CCValAssign::Full:
1072 return Value;
1073 default:
1074 llvm_unreachable("Unhandled getLocInfo()");
1075 }
1076}
1077
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001078SDValue SystemZTargetLowering::LowerFormalArguments(
1079 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1080 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1081 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001082 MachineFunction &MF = DAG.getMachineFunction();
Matthias Braun941a7052016-07-28 18:40:00 +00001083 MachineFrameInfo &MFI = MF.getFrameInfo();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001084 MachineRegisterInfo &MRI = MF.getRegInfo();
1085 SystemZMachineFunctionInfo *FuncInfo =
Eric Christophera6734172015-01-31 00:06:45 +00001086 MF.getInfo<SystemZMachineFunctionInfo>();
1087 auto *TFL =
1088 static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
Ulrich Weigandcfa1d2b2016-02-19 14:10:21 +00001089 EVT PtrVT = getPointerTy(DAG.getDataLayout());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001090
Ulrich Weigand5211f9f2015-05-05 19:30:05 +00001091 // Detect unsupported vector argument types.
1092 if (Subtarget.hasVector())
1093 VerifyVectorTypes(Ins);
1094
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001095 // Assign locations to all of the incoming arguments.
1096 SmallVector<CCValAssign, 16> ArgLocs;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00001097 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001098 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1099
1100 unsigned NumFixedGPRs = 0;
1101 unsigned NumFixedFPRs = 0;
1102 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1103 SDValue ArgValue;
1104 CCValAssign &VA = ArgLocs[I];
1105 EVT LocVT = VA.getLocVT();
1106 if (VA.isRegLoc()) {
1107 // Arguments passed in registers
1108 const TargetRegisterClass *RC;
1109 switch (LocVT.getSimpleVT().SimpleTy) {
1110 default:
1111 // Integers smaller than i64 should be promoted to i64.
1112 llvm_unreachable("Unexpected argument type");
1113 case MVT::i32:
1114 NumFixedGPRs += 1;
1115 RC = &SystemZ::GR32BitRegClass;
1116 break;
1117 case MVT::i64:
1118 NumFixedGPRs += 1;
1119 RC = &SystemZ::GR64BitRegClass;
1120 break;
1121 case MVT::f32:
1122 NumFixedFPRs += 1;
1123 RC = &SystemZ::FP32BitRegClass;
1124 break;
1125 case MVT::f64:
1126 NumFixedFPRs += 1;
1127 RC = &SystemZ::FP64BitRegClass;
1128 break;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00001129 case MVT::v16i8:
1130 case MVT::v8i16:
1131 case MVT::v4i32:
1132 case MVT::v2i64:
Ulrich Weigand80b3af72015-05-05 19:27:45 +00001133 case MVT::v4f32:
Ulrich Weigandcd808232015-05-05 19:26:48 +00001134 case MVT::v2f64:
Ulrich Weigandce4c1092015-05-05 19:25:42 +00001135 RC = &SystemZ::VR128BitRegClass;
1136 break;
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001137 }
1138
1139 unsigned VReg = MRI.createVirtualRegister(RC);
1140 MRI.addLiveIn(VA.getLocReg(), VReg);
1141 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1142 } else {
1143 assert(VA.isMemLoc() && "Argument not register or memory");
1144
1145 // Create the frame index object for this incoming parameter.
Matthias Braun941a7052016-07-28 18:40:00 +00001146 int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1147 VA.getLocMemOffset(), true);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001148
1149 // Create the SelectionDAG nodes corresponding to a load
1150 // from this parameter. Unpromoted ints and floats are
1151 // passed as right-justified 8-byte values.
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001152 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1153 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001154 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1155 DAG.getIntPtrConstant(4, DL));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001156 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
Justin Lebar9c375812016-07-15 18:27:10 +00001157 MachinePointerInfo::getFixedStack(MF, FI));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001158 }
1159
1160 // Convert the value of the argument register into the value that's
1161 // being passed.
Ulrich Weigandcfa1d2b2016-02-19 14:10:21 +00001162 if (VA.getLocInfo() == CCValAssign::Indirect) {
Justin Lebar9c375812016-07-15 18:27:10 +00001163 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1164 MachinePointerInfo()));
Ulrich Weigandcfa1d2b2016-02-19 14:10:21 +00001165 // If the original argument was split (e.g. i128), we need
1166 // to load all parts of it here (using the same address).
1167 unsigned ArgIndex = Ins[I].OrigArgIndex;
1168 assert (Ins[I].PartOffset == 0);
1169 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1170 CCValAssign &PartVA = ArgLocs[I + 1];
1171 unsigned PartOffset = Ins[I + 1].PartOffset;
1172 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1173 DAG.getIntPtrConstant(PartOffset, DL));
Justin Lebar9c375812016-07-15 18:27:10 +00001174 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1175 MachinePointerInfo()));
Ulrich Weigandcfa1d2b2016-02-19 14:10:21 +00001176 ++I;
1177 }
1178 } else
1179 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001180 }
1181
1182 if (IsVarArg) {
1183 // Save the number of non-varargs registers for later use by va_start, etc.
1184 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1185 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1186
1187 // Likewise the address (in the form of a frame index) of where the
1188 // first stack vararg would be. The 1-byte size here is arbitrary.
1189 int64_t StackSize = CCInfo.getNextStackOffset();
Matthias Braun941a7052016-07-28 18:40:00 +00001190 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001191
1192 // ...and a similar frame index for the caller-allocated save area
1193 // that will be used to store the incoming registers.
1194 int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
Matthias Braun941a7052016-07-28 18:40:00 +00001195 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001196 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1197
1198 // Store the FPR varargs in the reserved frame slots. (We store the
1199 // GPRs as part of the prologue.)
1200 if (NumFixedFPRs < SystemZ::NumArgFPRs) {
1201 SDValue MemOps[SystemZ::NumArgFPRs];
1202 for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
1203 unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
Matthias Braun941a7052016-07-28 18:40:00 +00001204 int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
Mehdi Amini44ede332015-07-09 02:09:04 +00001205 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001206 unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
1207 &SystemZ::FP64BitRegClass);
1208 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1209 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
Justin Lebar9c375812016-07-15 18:27:10 +00001210 MachinePointerInfo::getFixedStack(MF, FI));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001211 }
1212 // Join the stores, which are independent of one another.
1213 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001214 makeArrayRef(&MemOps[NumFixedFPRs],
1215 SystemZ::NumArgFPRs-NumFixedFPRs));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001216 }
1217 }
1218
1219 return Chain;
1220}
1221
Benjamin Kramerc6cc58e2014-10-04 16:55:56 +00001222static bool canUseSiblingCall(const CCState &ArgCCInfo,
Bryan Chan893110e2016-04-28 00:17:23 +00001223 SmallVectorImpl<CCValAssign> &ArgLocs,
1224 SmallVectorImpl<ISD::OutputArg> &Outs) {
Richard Sandiford709bda62013-08-19 12:42:31 +00001225 // Punt if there are any indirect or stack arguments, or if the call
Bryan Chan893110e2016-04-28 00:17:23 +00001226 // needs the callee-saved argument register R6, or if the call uses
1227 // the callee-saved register arguments SwiftSelf and SwiftError.
Richard Sandiford709bda62013-08-19 12:42:31 +00001228 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1229 CCValAssign &VA = ArgLocs[I];
1230 if (VA.getLocInfo() == CCValAssign::Indirect)
1231 return false;
1232 if (!VA.isRegLoc())
1233 return false;
1234 unsigned Reg = VA.getLocReg();
Richard Sandiford0755c932013-10-01 11:26:28 +00001235 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
Richard Sandiford709bda62013-08-19 12:42:31 +00001236 return false;
Bryan Chan893110e2016-04-28 00:17:23 +00001237 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1238 return false;
Richard Sandiford709bda62013-08-19 12:42:31 +00001239 }
1240 return true;
1241}
1242
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001243SDValue
1244SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
1245 SmallVectorImpl<SDValue> &InVals) const {
1246 SelectionDAG &DAG = CLI.DAG;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001247 SDLoc &DL = CLI.DL;
Craig Topperb94011f2013-07-14 04:42:23 +00001248 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1249 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1250 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001251 SDValue Chain = CLI.Chain;
1252 SDValue Callee = CLI.Callee;
Richard Sandiford709bda62013-08-19 12:42:31 +00001253 bool &IsTailCall = CLI.IsTailCall;
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001254 CallingConv::ID CallConv = CLI.CallConv;
1255 bool IsVarArg = CLI.IsVarArg;
1256 MachineFunction &MF = DAG.getMachineFunction();
Mehdi Amini44ede332015-07-09 02:09:04 +00001257 EVT PtrVT = getPointerTy(MF.getDataLayout());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001258
Ulrich Weigand5211f9f2015-05-05 19:30:05 +00001259 // Detect unsupported vector argument and return types.
1260 if (Subtarget.hasVector()) {
1261 VerifyVectorTypes(Outs);
1262 VerifyVectorTypes(Ins);
1263 }
1264
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001265 // Analyze the operands of the call, assigning locations to each operand.
1266 SmallVector<CCValAssign, 16> ArgLocs;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00001267 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001268 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1269
Richard Sandiford709bda62013-08-19 12:42:31 +00001270 // We don't support GuaranteedTailCallOpt, only automatically-detected
1271 // sibling calls.
Bryan Chan893110e2016-04-28 00:17:23 +00001272 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
Richard Sandiford709bda62013-08-19 12:42:31 +00001273 IsTailCall = false;
1274
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001275 // Get a count of how many bytes are to be pushed on the stack.
1276 unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1277
1278 // Mark the start of the call.
Richard Sandiford709bda62013-08-19 12:42:31 +00001279 if (!IsTailCall)
Serge Pavlovd526b132017-05-09 13:35:13 +00001280 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001281
1282 // Copy argument values to their designated locations.
1283 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
1284 SmallVector<SDValue, 8> MemOpChains;
1285 SDValue StackPtr;
1286 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1287 CCValAssign &VA = ArgLocs[I];
1288 SDValue ArgValue = OutVals[I];
1289
1290 if (VA.getLocInfo() == CCValAssign::Indirect) {
1291 // Store the argument in a stack slot and pass its address.
Ulrich Weigandcfa1d2b2016-02-19 14:10:21 +00001292 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001293 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
Justin Lebar9c375812016-07-15 18:27:10 +00001294 MemOpChains.push_back(
1295 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1296 MachinePointerInfo::getFixedStack(MF, FI)));
Ulrich Weigandcfa1d2b2016-02-19 14:10:21 +00001297 // If the original argument was split (e.g. i128), we need
1298 // to store all parts of it here (and pass just one address).
1299 unsigned ArgIndex = Outs[I].OrigArgIndex;
1300 assert (Outs[I].PartOffset == 0);
1301 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1302 SDValue PartValue = OutVals[I + 1];
1303 unsigned PartOffset = Outs[I + 1].PartOffset;
1304 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1305 DAG.getIntPtrConstant(PartOffset, DL));
Justin Lebar9c375812016-07-15 18:27:10 +00001306 MemOpChains.push_back(
1307 DAG.getStore(Chain, DL, PartValue, Address,
1308 MachinePointerInfo::getFixedStack(MF, FI)));
Ulrich Weigandcfa1d2b2016-02-19 14:10:21 +00001309 ++I;
1310 }
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001311 ArgValue = SpillSlot;
1312 } else
1313 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1314
1315 if (VA.isRegLoc())
1316 // Queue up the argument copies and emit them at the end.
1317 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1318 else {
1319 assert(VA.isMemLoc() && "Argument not register or memory");
1320
1321 // Work out the address of the stack slot. Unpromoted ints and
1322 // floats are passed as right-justified 8-byte values.
1323 if (!StackPtr.getNode())
1324 StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
1325 unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset();
1326 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1327 Offset += 4;
1328 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001329 DAG.getIntPtrConstant(Offset, DL));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001330
1331 // Emit the store.
Justin Lebar9c375812016-07-15 18:27:10 +00001332 MemOpChains.push_back(
1333 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001334 }
1335 }
1336
1337 // Join the stores, which are independent of one another.
1338 if (!MemOpChains.empty())
Craig Topper48d114b2014-04-26 18:35:24 +00001339 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001340
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001341 // Accept direct calls by converting symbolic call addresses to the
Richard Sandiford709bda62013-08-19 12:42:31 +00001342 // associated Target* opcodes. Force %r1 to be used for indirect
1343 // tail calls.
1344 SDValue Glue;
Richard Sandiford21f5d682014-03-06 11:22:58 +00001345 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001346 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1347 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
Richard Sandiford21f5d682014-03-06 11:22:58 +00001348 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001349 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1350 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
Richard Sandiford709bda62013-08-19 12:42:31 +00001351 } else if (IsTailCall) {
1352 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1353 Glue = Chain.getValue(1);
1354 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1355 }
1356
1357 // Build a sequence of copy-to-reg nodes, chained and glued together.
1358 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1359 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1360 RegsToPass[I].second, Glue);
1361 Glue = Chain.getValue(1);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001362 }
1363
1364 // The first call operand is the chain and the second is the target address.
1365 SmallVector<SDValue, 8> Ops;
1366 Ops.push_back(Chain);
1367 Ops.push_back(Callee);
1368
1369 // Add argument registers to the end of the list so that they are
1370 // known live into the call.
1371 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1372 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1373 RegsToPass[I].second.getValueType()));
1374
Richard Sandiford02bb0ec2014-07-10 11:44:37 +00001375 // Add a register mask operand representing the call-preserved registers.
Eric Christophera6734172015-01-31 00:06:45 +00001376 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
Eric Christopher9deb75d2015-03-11 22:42:13 +00001377 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
Richard Sandiford02bb0ec2014-07-10 11:44:37 +00001378 assert(Mask && "Missing call preserved mask for calling convention");
1379 Ops.push_back(DAG.getRegisterMask(Mask));
1380
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001381 // Glue the call to the argument copies, if any.
1382 if (Glue.getNode())
1383 Ops.push_back(Glue);
1384
1385 // Emit the call.
1386 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Richard Sandiford709bda62013-08-19 12:42:31 +00001387 if (IsTailCall)
Craig Topper48d114b2014-04-26 18:35:24 +00001388 return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1389 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001390 Glue = Chain.getValue(1);
1391
1392 // Mark the end of the call, which is glued to the call itself.
1393 Chain = DAG.getCALLSEQ_END(Chain,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001394 DAG.getConstant(NumBytes, DL, PtrVT, true),
1395 DAG.getConstant(0, DL, PtrVT, true),
Andrew Trickad6d08a2013-05-29 22:03:55 +00001396 Glue, DL);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001397 Glue = Chain.getValue(1);
1398
1399 // Assign locations to each value returned by this call.
1400 SmallVector<CCValAssign, 16> RetLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001401 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001402 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1403
1404 // Copy all of the result registers out of their specified physreg.
1405 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1406 CCValAssign &VA = RetLocs[I];
1407
1408 // Copy the value out, gluing the copy to the end of the call sequence.
1409 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1410 VA.getLocVT(), Glue);
1411 Chain = RetValue.getValue(1);
1412 Glue = RetValue.getValue(2);
1413
1414 // Convert the value of the return register into the value that's
1415 // being returned.
1416 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1417 }
1418
1419 return Chain;
1420}
1421
Ulrich Weiganda887f062015-08-13 13:37:06 +00001422bool SystemZTargetLowering::
1423CanLowerReturn(CallingConv::ID CallConv,
1424 MachineFunction &MF, bool isVarArg,
1425 const SmallVectorImpl<ISD::OutputArg> &Outs,
1426 LLVMContext &Context) const {
1427 // Detect unsupported vector return types.
1428 if (Subtarget.hasVector())
1429 VerifyVectorTypes(Outs);
1430
Ulrich Weigandcfa1d2b2016-02-19 14:10:21 +00001431 // Special case that we cannot easily detect in RetCC_SystemZ since
1432 // i128 is not a legal type.
1433 for (auto &Out : Outs)
1434 if (Out.ArgVT == MVT::i128)
1435 return false;
1436
Ulrich Weiganda887f062015-08-13 13:37:06 +00001437 SmallVector<CCValAssign, 16> RetLocs;
1438 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1439 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1440}
1441
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001442SDValue
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001443SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1444 bool IsVarArg,
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001445 const SmallVectorImpl<ISD::OutputArg> &Outs,
1446 const SmallVectorImpl<SDValue> &OutVals,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001447 const SDLoc &DL, SelectionDAG &DAG) const {
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001448 MachineFunction &MF = DAG.getMachineFunction();
1449
Ulrich Weigand5211f9f2015-05-05 19:30:05 +00001450 // Detect unsupported vector return types.
1451 if (Subtarget.hasVector())
1452 VerifyVectorTypes(Outs);
1453
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001454 // Assign locations to each returned value.
1455 SmallVector<CCValAssign, 16> RetLocs;
Eric Christopherb5217502014-08-06 18:45:26 +00001456 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001457 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1458
1459 // Quick exit for void returns
1460 if (RetLocs.empty())
1461 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1462
1463 // Copy the result values into the output registers.
1464 SDValue Glue;
1465 SmallVector<SDValue, 4> RetOps;
1466 RetOps.push_back(Chain);
1467 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1468 CCValAssign &VA = RetLocs[I];
1469 SDValue RetValue = OutVals[I];
1470
1471 // Make the return register live on exit.
1472 assert(VA.isRegLoc() && "Can only return in registers!");
1473
1474 // Promote the value as required.
1475 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1476
1477 // Chain and glue the copies together.
1478 unsigned Reg = VA.getLocReg();
1479 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1480 Glue = Chain.getValue(1);
1481 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1482 }
1483
1484 // Update chain and glue.
1485 RetOps[0] = Chain;
1486 if (Glue.getNode())
1487 RetOps.push_back(Glue);
1488
Craig Topper48d114b2014-04-26 18:35:24 +00001489 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001490}
1491
Ulrich Weigand57c85f52015-04-01 12:51:43 +00001492// Return true if Op is an intrinsic node with chain that returns the CC value
1493// as its only (other) argument. Provide the associated SystemZISD opcode and
1494// the mask of valid CC values if so.
1495static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1496 unsigned &CCValid) {
1497 unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1498 switch (Id) {
1499 case Intrinsic::s390_tbegin:
1500 Opcode = SystemZISD::TBEGIN;
1501 CCValid = SystemZ::CCMASK_TBEGIN;
1502 return true;
1503
1504 case Intrinsic::s390_tbegin_nofloat:
1505 Opcode = SystemZISD::TBEGIN_NOFLOAT;
1506 CCValid = SystemZ::CCMASK_TBEGIN;
1507 return true;
1508
1509 case Intrinsic::s390_tend:
1510 Opcode = SystemZISD::TEND;
1511 CCValid = SystemZ::CCMASK_TEND;
1512 return true;
1513
1514 default:
1515 return false;
1516 }
1517}
1518
Ulrich Weigandc1708b22015-05-05 19:31:09 +00001519// Return true if Op is an intrinsic node without chain that returns the
1520// CC value as its final argument. Provide the associated SystemZISD
1521// opcode and the mask of valid CC values if so.
1522static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1523 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1524 switch (Id) {
1525 case Intrinsic::s390_vpkshs:
1526 case Intrinsic::s390_vpksfs:
1527 case Intrinsic::s390_vpksgs:
1528 Opcode = SystemZISD::PACKS_CC;
1529 CCValid = SystemZ::CCMASK_VCMP;
1530 return true;
1531
1532 case Intrinsic::s390_vpklshs:
1533 case Intrinsic::s390_vpklsfs:
1534 case Intrinsic::s390_vpklsgs:
1535 Opcode = SystemZISD::PACKLS_CC;
1536 CCValid = SystemZ::CCMASK_VCMP;
1537 return true;
1538
1539 case Intrinsic::s390_vceqbs:
1540 case Intrinsic::s390_vceqhs:
1541 case Intrinsic::s390_vceqfs:
1542 case Intrinsic::s390_vceqgs:
1543 Opcode = SystemZISD::VICMPES;
1544 CCValid = SystemZ::CCMASK_VCMP;
1545 return true;
1546
1547 case Intrinsic::s390_vchbs:
1548 case Intrinsic::s390_vchhs:
1549 case Intrinsic::s390_vchfs:
1550 case Intrinsic::s390_vchgs:
1551 Opcode = SystemZISD::VICMPHS;
1552 CCValid = SystemZ::CCMASK_VCMP;
1553 return true;
1554
1555 case Intrinsic::s390_vchlbs:
1556 case Intrinsic::s390_vchlhs:
1557 case Intrinsic::s390_vchlfs:
1558 case Intrinsic::s390_vchlgs:
1559 Opcode = SystemZISD::VICMPHLS;
1560 CCValid = SystemZ::CCMASK_VCMP;
1561 return true;
1562
1563 case Intrinsic::s390_vtm:
1564 Opcode = SystemZISD::VTM;
1565 CCValid = SystemZ::CCMASK_VCMP;
1566 return true;
1567
1568 case Intrinsic::s390_vfaebs:
1569 case Intrinsic::s390_vfaehs:
1570 case Intrinsic::s390_vfaefs:
1571 Opcode = SystemZISD::VFAE_CC;
1572 CCValid = SystemZ::CCMASK_ANY;
1573 return true;
1574
1575 case Intrinsic::s390_vfaezbs:
1576 case Intrinsic::s390_vfaezhs:
1577 case Intrinsic::s390_vfaezfs:
1578 Opcode = SystemZISD::VFAEZ_CC;
1579 CCValid = SystemZ::CCMASK_ANY;
1580 return true;
1581
1582 case Intrinsic::s390_vfeebs:
1583 case Intrinsic::s390_vfeehs:
1584 case Intrinsic::s390_vfeefs:
1585 Opcode = SystemZISD::VFEE_CC;
1586 CCValid = SystemZ::CCMASK_ANY;
1587 return true;
1588
1589 case Intrinsic::s390_vfeezbs:
1590 case Intrinsic::s390_vfeezhs:
1591 case Intrinsic::s390_vfeezfs:
1592 Opcode = SystemZISD::VFEEZ_CC;
1593 CCValid = SystemZ::CCMASK_ANY;
1594 return true;
1595
1596 case Intrinsic::s390_vfenebs:
1597 case Intrinsic::s390_vfenehs:
1598 case Intrinsic::s390_vfenefs:
1599 Opcode = SystemZISD::VFENE_CC;
1600 CCValid = SystemZ::CCMASK_ANY;
1601 return true;
1602
1603 case Intrinsic::s390_vfenezbs:
1604 case Intrinsic::s390_vfenezhs:
1605 case Intrinsic::s390_vfenezfs:
1606 Opcode = SystemZISD::VFENEZ_CC;
1607 CCValid = SystemZ::CCMASK_ANY;
1608 return true;
1609
1610 case Intrinsic::s390_vistrbs:
1611 case Intrinsic::s390_vistrhs:
1612 case Intrinsic::s390_vistrfs:
1613 Opcode = SystemZISD::VISTR_CC;
1614 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
1615 return true;
1616
1617 case Intrinsic::s390_vstrcbs:
1618 case Intrinsic::s390_vstrchs:
1619 case Intrinsic::s390_vstrcfs:
1620 Opcode = SystemZISD::VSTRC_CC;
1621 CCValid = SystemZ::CCMASK_ANY;
1622 return true;
1623
1624 case Intrinsic::s390_vstrczbs:
1625 case Intrinsic::s390_vstrczhs:
1626 case Intrinsic::s390_vstrczfs:
1627 Opcode = SystemZISD::VSTRCZ_CC;
1628 CCValid = SystemZ::CCMASK_ANY;
1629 return true;
1630
1631 case Intrinsic::s390_vfcedbs:
Ulrich Weigand33435c42017-07-17 17:42:48 +00001632 case Intrinsic::s390_vfcesbs:
Ulrich Weigandc1708b22015-05-05 19:31:09 +00001633 Opcode = SystemZISD::VFCMPES;
1634 CCValid = SystemZ::CCMASK_VCMP;
1635 return true;
1636
1637 case Intrinsic::s390_vfchdbs:
Ulrich Weigand33435c42017-07-17 17:42:48 +00001638 case Intrinsic::s390_vfchsbs:
Ulrich Weigandc1708b22015-05-05 19:31:09 +00001639 Opcode = SystemZISD::VFCMPHS;
1640 CCValid = SystemZ::CCMASK_VCMP;
1641 return true;
1642
1643 case Intrinsic::s390_vfchedbs:
Ulrich Weigand33435c42017-07-17 17:42:48 +00001644 case Intrinsic::s390_vfchesbs:
Ulrich Weigandc1708b22015-05-05 19:31:09 +00001645 Opcode = SystemZISD::VFCMPHES;
1646 CCValid = SystemZ::CCMASK_VCMP;
1647 return true;
1648
1649 case Intrinsic::s390_vftcidb:
Ulrich Weigand33435c42017-07-17 17:42:48 +00001650 case Intrinsic::s390_vftcisb:
Ulrich Weigandc1708b22015-05-05 19:31:09 +00001651 Opcode = SystemZISD::VFTCI;
1652 CCValid = SystemZ::CCMASK_VCMP;
1653 return true;
1654
Marcin Koscielnickicf7cc722016-07-10 14:41:22 +00001655 case Intrinsic::s390_tdc:
1656 Opcode = SystemZISD::TDC;
1657 CCValid = SystemZ::CCMASK_TDC;
1658 return true;
1659
Ulrich Weigandc1708b22015-05-05 19:31:09 +00001660 default:
1661 return false;
1662 }
1663}
1664
Ulrich Weigand57c85f52015-04-01 12:51:43 +00001665// Emit an intrinsic with chain with a glued value instead of its CC result.
1666static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
1667 unsigned Opcode) {
1668 // Copy all operands except the intrinsic ID.
1669 unsigned NumOps = Op.getNumOperands();
1670 SmallVector<SDValue, 6> Ops;
1671 Ops.reserve(NumOps - 1);
1672 Ops.push_back(Op.getOperand(0));
1673 for (unsigned I = 2; I < NumOps; ++I)
1674 Ops.push_back(Op.getOperand(I));
1675
1676 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
1677 SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
1678 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
1679 SDValue OldChain = SDValue(Op.getNode(), 1);
1680 SDValue NewChain = SDValue(Intr.getNode(), 0);
1681 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
1682 return Intr;
1683}
1684
Ulrich Weigandc1708b22015-05-05 19:31:09 +00001685// Emit an intrinsic with a glued value instead of its CC result.
1686static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op,
1687 unsigned Opcode) {
1688 // Copy all operands except the intrinsic ID.
1689 unsigned NumOps = Op.getNumOperands();
1690 SmallVector<SDValue, 6> Ops;
1691 Ops.reserve(NumOps - 1);
1692 for (unsigned I = 1; I < NumOps; ++I)
1693 Ops.push_back(Op.getOperand(I));
1694
1695 if (Op->getNumValues() == 1)
1696 return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
1697 assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
1698 SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
1699 return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
1700}
1701
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001702// CC is a comparison that will be implemented using an integer or
1703// floating-point comparison. Return the condition code mask for
1704// a branch on true. In the integer case, CCMASK_CMP_UO is set for
1705// unsigned comparisons and clear for signed ones. In the floating-point
1706// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
1707static unsigned CCMaskForCondCode(ISD::CondCode CC) {
1708#define CONV(X) \
1709 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
1710 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
1711 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
1712
1713 switch (CC) {
1714 default:
1715 llvm_unreachable("Invalid integer condition!");
1716
1717 CONV(EQ);
1718 CONV(NE);
1719 CONV(GT);
1720 CONV(GE);
1721 CONV(LT);
1722 CONV(LE);
1723
1724 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
1725 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
1726 }
1727#undef CONV
1728}
1729
Richard Sandifordd420f732013-12-13 15:28:45 +00001730// If C can be converted to a comparison against zero, adjust the operands
Richard Sandiforda0757082013-08-01 10:29:45 +00001731// as necessary.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001732static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
Richard Sandifordd420f732013-12-13 15:28:45 +00001733 if (C.ICmpType == SystemZICMP::UnsignedOnly)
Richard Sandiforda0757082013-08-01 10:29:45 +00001734 return;
1735
Richard Sandiford21f5d682014-03-06 11:22:58 +00001736 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
Richard Sandiforda0757082013-08-01 10:29:45 +00001737 if (!ConstOp1)
1738 return;
1739
1740 int64_t Value = ConstOp1->getSExtValue();
Richard Sandifordd420f732013-12-13 15:28:45 +00001741 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
1742 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
1743 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
1744 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
1745 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001746 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
Richard Sandiforda0757082013-08-01 10:29:45 +00001747 }
1748}
1749
Richard Sandifordd420f732013-12-13 15:28:45 +00001750// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
1751// adjust the operands as necessary.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001752static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
1753 Comparison &C) {
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001754 // For us to make any changes, it must a comparison between a single-use
1755 // load and a constant.
Richard Sandifordd420f732013-12-13 15:28:45 +00001756 if (!C.Op0.hasOneUse() ||
1757 C.Op0.getOpcode() != ISD::LOAD ||
1758 C.Op1.getOpcode() != ISD::Constant)
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001759 return;
1760
1761 // We must have an 8- or 16-bit load.
Richard Sandiford21f5d682014-03-06 11:22:58 +00001762 auto *Load = cast<LoadSDNode>(C.Op0);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001763 unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
1764 if (NumBits != 8 && NumBits != 16)
1765 return;
1766
1767 // The load must be an extending one and the constant must be within the
1768 // range of the unextended value.
Richard Sandiford21f5d682014-03-06 11:22:58 +00001769 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
Richard Sandifordd420f732013-12-13 15:28:45 +00001770 uint64_t Value = ConstOp1->getZExtValue();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001771 uint64_t Mask = (1 << NumBits) - 1;
1772 if (Load->getExtensionType() == ISD::SEXTLOAD) {
Richard Sandifordd420f732013-12-13 15:28:45 +00001773 // Make sure that ConstOp1 is in range of C.Op0.
1774 int64_t SignedValue = ConstOp1->getSExtValue();
1775 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001776 return;
Richard Sandifordd420f732013-12-13 15:28:45 +00001777 if (C.ICmpType != SystemZICMP::SignedOnly) {
1778 // Unsigned comparison between two sign-extended values is equivalent
1779 // to unsigned comparison between two zero-extended values.
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001780 Value &= Mask;
Richard Sandifordd420f732013-12-13 15:28:45 +00001781 } else if (NumBits == 8) {
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001782 // Try to treat the comparison as unsigned, so that we can use CLI.
1783 // Adjust CCMask and Value as necessary.
Richard Sandifordd420f732013-12-13 15:28:45 +00001784 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001785 // Test whether the high bit of the byte is set.
Richard Sandifordd420f732013-12-13 15:28:45 +00001786 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
1787 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001788 // Test whether the high bit of the byte is clear.
Richard Sandifordd420f732013-12-13 15:28:45 +00001789 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001790 else
1791 // No instruction exists for this combination.
1792 return;
Richard Sandifordd420f732013-12-13 15:28:45 +00001793 C.ICmpType = SystemZICMP::UnsignedOnly;
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001794 }
1795 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
1796 if (Value > Mask)
1797 return;
Ulrich Weigand47f36492015-12-16 18:04:06 +00001798 // If the constant is in range, we can use any comparison.
1799 C.ICmpType = SystemZICMP::Any;
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001800 } else
1801 return;
1802
1803 // Make sure that the first operand is an i32 of the right extension type.
Richard Sandifordd420f732013-12-13 15:28:45 +00001804 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
1805 ISD::SEXTLOAD :
1806 ISD::ZEXTLOAD);
1807 if (C.Op0.getValueType() != MVT::i32 ||
Jonas Paulssonb9a24672017-11-30 08:18:50 +00001808 Load->getExtensionType() != ExtType) {
Justin Lebar9c375812016-07-15 18:27:10 +00001809 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
1810 Load->getBasePtr(), Load->getPointerInfo(),
1811 Load->getMemoryVT(), Load->getAlignment(),
1812 Load->getMemOperand()->getFlags());
Jonas Paulssonb9a24672017-11-30 08:18:50 +00001813 // Update the chain uses.
1814 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
1815 }
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001816
1817 // Make sure that the second operand is an i32 with the right value.
Richard Sandifordd420f732013-12-13 15:28:45 +00001818 if (C.Op1.getValueType() != MVT::i32 ||
1819 Value != ConstOp1->getZExtValue())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001820 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00001821}
1822
Richard Sandiford5bc670b2013-09-06 11:51:39 +00001823// Return true if Op is either an unextended load, or a load suitable
1824// for integer register-memory comparisons of type ICmpType.
1825static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
Richard Sandiford21f5d682014-03-06 11:22:58 +00001826 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
Richard Sandiford5bc670b2013-09-06 11:51:39 +00001827 if (Load) {
1828 // There are no instructions to compare a register with a memory byte.
1829 if (Load->getMemoryVT() == MVT::i8)
1830 return false;
1831 // Otherwise decide on extension type.
Richard Sandiford24e597b2013-08-23 11:27:19 +00001832 switch (Load->getExtensionType()) {
1833 case ISD::NON_EXTLOAD:
Richard Sandiford24e597b2013-08-23 11:27:19 +00001834 return true;
1835 case ISD::SEXTLOAD:
Richard Sandiford5bc670b2013-09-06 11:51:39 +00001836 return ICmpType != SystemZICMP::UnsignedOnly;
Richard Sandiford24e597b2013-08-23 11:27:19 +00001837 case ISD::ZEXTLOAD:
Richard Sandiford5bc670b2013-09-06 11:51:39 +00001838 return ICmpType != SystemZICMP::SignedOnly;
Richard Sandiford24e597b2013-08-23 11:27:19 +00001839 default:
1840 break;
1841 }
Richard Sandiford5bc670b2013-09-06 11:51:39 +00001842 }
Richard Sandiford24e597b2013-08-23 11:27:19 +00001843 return false;
1844}
1845
Richard Sandifordd420f732013-12-13 15:28:45 +00001846// Return true if it is better to swap the operands of C.
1847static bool shouldSwapCmpOperands(const Comparison &C) {
Richard Sandiford24e597b2013-08-23 11:27:19 +00001848 // Leave f128 comparisons alone, since they have no memory forms.
Richard Sandifordd420f732013-12-13 15:28:45 +00001849 if (C.Op0.getValueType() == MVT::f128)
Richard Sandiford24e597b2013-08-23 11:27:19 +00001850 return false;
1851
1852 // Always keep a floating-point constant second, since comparisons with
1853 // zero can use LOAD TEST and comparisons with other constants make a
1854 // natural memory operand.
Richard Sandifordd420f732013-12-13 15:28:45 +00001855 if (isa<ConstantFPSDNode>(C.Op1))
Richard Sandiford24e597b2013-08-23 11:27:19 +00001856 return false;
1857
1858 // Never swap comparisons with zero since there are many ways to optimize
1859 // those later.
Richard Sandiford21f5d682014-03-06 11:22:58 +00001860 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
Richard Sandifordd420f732013-12-13 15:28:45 +00001861 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
Richard Sandiford24e597b2013-08-23 11:27:19 +00001862 return false;
1863
Richard Sandiford7b4118a2013-12-06 09:56:50 +00001864 // Also keep natural memory operands second if the loaded value is
1865 // only used here. Several comparisons have memory forms.
Richard Sandifordd420f732013-12-13 15:28:45 +00001866 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
Richard Sandiford7b4118a2013-12-06 09:56:50 +00001867 return false;
1868
Richard Sandiford24e597b2013-08-23 11:27:19 +00001869 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
1870 // In that case we generally prefer the memory to be second.
Richard Sandifordd420f732013-12-13 15:28:45 +00001871 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
Richard Sandiford24e597b2013-08-23 11:27:19 +00001872 // The only exceptions are when the second operand is a constant and
1873 // we can use things like CHHSI.
Richard Sandifordd420f732013-12-13 15:28:45 +00001874 if (!ConstOp1)
Richard Sandiford24e597b2013-08-23 11:27:19 +00001875 return true;
Richard Sandiford5bc670b2013-09-06 11:51:39 +00001876 // The unsigned memory-immediate instructions can handle 16-bit
1877 // unsigned integers.
Richard Sandifordd420f732013-12-13 15:28:45 +00001878 if (C.ICmpType != SystemZICMP::SignedOnly &&
1879 isUInt<16>(ConstOp1->getZExtValue()))
Richard Sandiford5bc670b2013-09-06 11:51:39 +00001880 return false;
1881 // The signed memory-immediate instructions can handle 16-bit
1882 // signed integers.
Richard Sandifordd420f732013-12-13 15:28:45 +00001883 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
1884 isInt<16>(ConstOp1->getSExtValue()))
Richard Sandiford5bc670b2013-09-06 11:51:39 +00001885 return false;
Richard Sandiford24e597b2013-08-23 11:27:19 +00001886 return true;
1887 }
Richard Sandiford7b4118a2013-12-06 09:56:50 +00001888
1889 // Try to promote the use of CGFR and CLGFR.
Richard Sandifordd420f732013-12-13 15:28:45 +00001890 unsigned Opcode0 = C.Op0.getOpcode();
1891 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
Richard Sandiford7b4118a2013-12-06 09:56:50 +00001892 return true;
Richard Sandifordd420f732013-12-13 15:28:45 +00001893 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
Richard Sandiford7b4118a2013-12-06 09:56:50 +00001894 return true;
Richard Sandifordd420f732013-12-13 15:28:45 +00001895 if (C.ICmpType != SystemZICMP::SignedOnly &&
Richard Sandiford7b4118a2013-12-06 09:56:50 +00001896 Opcode0 == ISD::AND &&
Richard Sandifordd420f732013-12-13 15:28:45 +00001897 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
1898 cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
Richard Sandiford7b4118a2013-12-06 09:56:50 +00001899 return true;
1900
Richard Sandiford24e597b2013-08-23 11:27:19 +00001901 return false;
1902}
1903
Richard Sandiford73170f82013-12-11 11:45:08 +00001904// Return a version of comparison CC mask CCMask in which the LT and GT
1905// actions are swapped.
1906static unsigned reverseCCMask(unsigned CCMask) {
1907 return ((CCMask & SystemZ::CCMASK_CMP_EQ) |
1908 (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
1909 (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
1910 (CCMask & SystemZ::CCMASK_CMP_UO));
1911}
1912
Richard Sandiford0847c452013-12-13 15:50:30 +00001913// Check whether C tests for equality between X and Y and whether X - Y
1914// or Y - X is also computed. In that case it's better to compare the
1915// result of the subtraction against zero.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001916static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
1917 Comparison &C) {
Richard Sandiford0847c452013-12-13 15:50:30 +00001918 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
1919 C.CCMask == SystemZ::CCMASK_CMP_NE) {
Richard Sandiford28c111e2014-03-06 11:00:15 +00001920 for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
Richard Sandiford0847c452013-12-13 15:50:30 +00001921 SDNode *N = *I;
1922 if (N->getOpcode() == ISD::SUB &&
1923 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
1924 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
1925 C.Op0 = SDValue(N, 0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001926 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
Richard Sandiford0847c452013-12-13 15:50:30 +00001927 return;
1928 }
1929 }
1930 }
1931}
1932
Richard Sandifordd420f732013-12-13 15:28:45 +00001933// Check whether C compares a floating-point value with zero and if that
1934// floating-point value is also negated. In this case we can use the
1935// negation to set CC, so avoiding separate LOAD AND TEST and
1936// LOAD (NEGATIVE/COMPLEMENT) instructions.
1937static void adjustForFNeg(Comparison &C) {
Richard Sandiford21f5d682014-03-06 11:22:58 +00001938 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
Richard Sandiford73170f82013-12-11 11:45:08 +00001939 if (C1 && C1->isZero()) {
Richard Sandiford28c111e2014-03-06 11:00:15 +00001940 for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
Richard Sandiford73170f82013-12-11 11:45:08 +00001941 SDNode *N = *I;
1942 if (N->getOpcode() == ISD::FNEG) {
Richard Sandifordd420f732013-12-13 15:28:45 +00001943 C.Op0 = SDValue(N, 0);
1944 C.CCMask = reverseCCMask(C.CCMask);
Richard Sandiford73170f82013-12-11 11:45:08 +00001945 return;
1946 }
1947 }
1948 }
1949}
1950
Richard Sandifordd420f732013-12-13 15:28:45 +00001951// Check whether C compares (shl X, 32) with 0 and whether X is
Richard Sandifordbd2f0e92013-12-13 15:07:39 +00001952// also sign-extended. In that case it is better to test the result
1953// of the sign extension using LTGFR.
1954//
1955// This case is important because InstCombine transforms a comparison
1956// with (sext (trunc X)) into a comparison with (shl X, 32).
Richard Sandifordd420f732013-12-13 15:28:45 +00001957static void adjustForLTGFR(Comparison &C) {
Richard Sandifordbd2f0e92013-12-13 15:07:39 +00001958 // Check for a comparison between (shl X, 32) and 0.
Richard Sandifordd420f732013-12-13 15:28:45 +00001959 if (C.Op0.getOpcode() == ISD::SHL &&
1960 C.Op0.getValueType() == MVT::i64 &&
1961 C.Op1.getOpcode() == ISD::Constant &&
1962 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
Richard Sandiford21f5d682014-03-06 11:22:58 +00001963 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
Richard Sandifordbd2f0e92013-12-13 15:07:39 +00001964 if (C1 && C1->getZExtValue() == 32) {
Richard Sandifordd420f732013-12-13 15:28:45 +00001965 SDValue ShlOp0 = C.Op0.getOperand(0);
Richard Sandifordbd2f0e92013-12-13 15:07:39 +00001966 // See whether X has any SIGN_EXTEND_INREG uses.
Richard Sandiford28c111e2014-03-06 11:00:15 +00001967 for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
Richard Sandifordbd2f0e92013-12-13 15:07:39 +00001968 SDNode *N = *I;
1969 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
1970 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
Richard Sandifordd420f732013-12-13 15:28:45 +00001971 C.Op0 = SDValue(N, 0);
Richard Sandifordbd2f0e92013-12-13 15:07:39 +00001972 return;
1973 }
1974 }
1975 }
1976 }
1977}
1978
Richard Sandiford83a0b6a2013-12-20 11:56:02 +00001979// If C compares the truncation of an extending load, try to compare
1980// the untruncated value instead. This exposes more opportunities to
1981// reuse CC.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00001982static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
1983 Comparison &C) {
Richard Sandiford83a0b6a2013-12-20 11:56:02 +00001984 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
1985 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
1986 C.Op1.getOpcode() == ISD::Constant &&
1987 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
Richard Sandiford21f5d682014-03-06 11:22:58 +00001988 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00001989 if (L->getMemoryVT().getStoreSizeInBits() <= C.Op0.getValueSizeInBits()) {
Richard Sandiford83a0b6a2013-12-20 11:56:02 +00001990 unsigned Type = L->getExtensionType();
1991 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
1992 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
1993 C.Op0 = C.Op0.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001994 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
Richard Sandiford83a0b6a2013-12-20 11:56:02 +00001995 }
1996 }
1997 }
1998}
1999
Richard Sandiford030c1652013-09-13 09:09:50 +00002000// Return true if shift operation N has an in-range constant shift value.
2001// Store it in ShiftVal if so.
2002static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
Richard Sandiford21f5d682014-03-06 11:22:58 +00002003 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
Richard Sandiford030c1652013-09-13 09:09:50 +00002004 if (!Shift)
2005 return false;
2006
2007 uint64_t Amount = Shift->getZExtValue();
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00002008 if (Amount >= N.getValueSizeInBits())
Richard Sandiford030c1652013-09-13 09:09:50 +00002009 return false;
2010
2011 ShiftVal = Amount;
2012 return true;
2013}
2014
2015// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2016// instruction and whether the CC value is descriptive enough to handle
2017// a comparison of type Opcode between the AND result and CmpVal.
2018// CCMask says which comparison result is being tested and BitSize is
2019// the number of bits in the operands. If TEST UNDER MASK can be used,
2020// return the corresponding CC mask, otherwise return 0.
Richard Sandiford5bc670b2013-09-06 11:51:39 +00002021static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2022 uint64_t Mask, uint64_t CmpVal,
2023 unsigned ICmpType) {
Richard Sandiford113c8702013-09-03 15:38:35 +00002024 assert(Mask != 0 && "ANDs with zero should have been removed by now");
2025
Richard Sandiford030c1652013-09-13 09:09:50 +00002026 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2027 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2028 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2029 return 0;
2030
Richard Sandiford113c8702013-09-03 15:38:35 +00002031 // Work out the masks for the lowest and highest bits.
2032 unsigned HighShift = 63 - countLeadingZeros(Mask);
2033 uint64_t High = uint64_t(1) << HighShift;
2034 uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2035
2036 // Signed ordered comparisons are effectively unsigned if the sign
2037 // bit is dropped.
Richard Sandiford5bc670b2013-09-06 11:51:39 +00002038 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
Richard Sandiford113c8702013-09-03 15:38:35 +00002039
2040 // Check for equality comparisons with 0, or the equivalent.
2041 if (CmpVal == 0) {
2042 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2043 return SystemZ::CCMASK_TM_ALL_0;
2044 if (CCMask == SystemZ::CCMASK_CMP_NE)
2045 return SystemZ::CCMASK_TM_SOME_1;
2046 }
Ulrich Weigand4a4d4ab2016-02-01 18:31:19 +00002047 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
Richard Sandiford113c8702013-09-03 15:38:35 +00002048 if (CCMask == SystemZ::CCMASK_CMP_LT)
2049 return SystemZ::CCMASK_TM_ALL_0;
2050 if (CCMask == SystemZ::CCMASK_CMP_GE)
2051 return SystemZ::CCMASK_TM_SOME_1;
2052 }
2053 if (EffectivelyUnsigned && CmpVal < Low) {
2054 if (CCMask == SystemZ::CCMASK_CMP_LE)
2055 return SystemZ::CCMASK_TM_ALL_0;
2056 if (CCMask == SystemZ::CCMASK_CMP_GT)
2057 return SystemZ::CCMASK_TM_SOME_1;
2058 }
2059
2060 // Check for equality comparisons with the mask, or the equivalent.
2061 if (CmpVal == Mask) {
2062 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2063 return SystemZ::CCMASK_TM_ALL_1;
2064 if (CCMask == SystemZ::CCMASK_CMP_NE)
2065 return SystemZ::CCMASK_TM_SOME_0;
2066 }
2067 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2068 if (CCMask == SystemZ::CCMASK_CMP_GT)
2069 return SystemZ::CCMASK_TM_ALL_1;
2070 if (CCMask == SystemZ::CCMASK_CMP_LE)
2071 return SystemZ::CCMASK_TM_SOME_0;
2072 }
2073 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2074 if (CCMask == SystemZ::CCMASK_CMP_GE)
2075 return SystemZ::CCMASK_TM_ALL_1;
2076 if (CCMask == SystemZ::CCMASK_CMP_LT)
2077 return SystemZ::CCMASK_TM_SOME_0;
2078 }
2079
2080 // Check for ordered comparisons with the top bit.
2081 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2082 if (CCMask == SystemZ::CCMASK_CMP_LE)
2083 return SystemZ::CCMASK_TM_MSB_0;
2084 if (CCMask == SystemZ::CCMASK_CMP_GT)
2085 return SystemZ::CCMASK_TM_MSB_1;
2086 }
2087 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2088 if (CCMask == SystemZ::CCMASK_CMP_LT)
2089 return SystemZ::CCMASK_TM_MSB_0;
2090 if (CCMask == SystemZ::CCMASK_CMP_GE)
2091 return SystemZ::CCMASK_TM_MSB_1;
2092 }
2093
2094 // If there are just two bits, we can do equality checks for Low and High
2095 // as well.
2096 if (Mask == Low + High) {
2097 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2098 return SystemZ::CCMASK_TM_MIXED_MSB_0;
2099 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2100 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
2101 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2102 return SystemZ::CCMASK_TM_MIXED_MSB_1;
2103 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2104 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
2105 }
2106
2107 // Looks like we've exhausted our options.
2108 return 0;
2109}
2110
Richard Sandifordd420f732013-12-13 15:28:45 +00002111// See whether C can be implemented as a TEST UNDER MASK instruction.
2112// Update the arguments with the TM version if so.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002113static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2114 Comparison &C) {
Richard Sandiford113c8702013-09-03 15:38:35 +00002115 // Check that we have a comparison with a constant.
Richard Sandiford21f5d682014-03-06 11:22:58 +00002116 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
Richard Sandifordd420f732013-12-13 15:28:45 +00002117 if (!ConstOp1)
Richard Sandiford35b9be22013-08-28 10:31:43 +00002118 return;
Richard Sandifordd420f732013-12-13 15:28:45 +00002119 uint64_t CmpVal = ConstOp1->getZExtValue();
Richard Sandiford35b9be22013-08-28 10:31:43 +00002120
2121 // Check whether the nonconstant input is an AND with a constant mask.
Richard Sandifordc3dc4472013-12-13 15:46:55 +00002122 Comparison NewC(C);
2123 uint64_t MaskVal;
Craig Topper062a2ba2014-04-25 05:30:21 +00002124 ConstantSDNode *Mask = nullptr;
Richard Sandifordc3dc4472013-12-13 15:46:55 +00002125 if (C.Op0.getOpcode() == ISD::AND) {
2126 NewC.Op0 = C.Op0.getOperand(0);
2127 NewC.Op1 = C.Op0.getOperand(1);
2128 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2129 if (!Mask)
2130 return;
2131 MaskVal = Mask->getZExtValue();
2132 } else {
2133 // There is no instruction to compare with a 64-bit immediate
2134 // so use TMHH instead if possible. We need an unsigned ordered
2135 // comparison with an i64 immediate.
2136 if (NewC.Op0.getValueType() != MVT::i64 ||
2137 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2138 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2139 NewC.ICmpType == SystemZICMP::SignedOnly)
2140 return;
2141 // Convert LE and GT comparisons into LT and GE.
2142 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2143 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2144 if (CmpVal == uint64_t(-1))
2145 return;
2146 CmpVal += 1;
2147 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2148 }
2149 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2150 // be masked off without changing the result.
2151 MaskVal = -(CmpVal & -CmpVal);
2152 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2153 }
Ulrich Weigandb8d76fb2015-03-30 13:46:59 +00002154 if (!MaskVal)
2155 return;
Richard Sandiford35b9be22013-08-28 10:31:43 +00002156
Richard Sandiford113c8702013-09-03 15:38:35 +00002157 // Check whether the combination of mask, comparison value and comparison
2158 // type are suitable.
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00002159 unsigned BitSize = NewC.Op0.getValueSizeInBits();
Richard Sandiford030c1652013-09-13 09:09:50 +00002160 unsigned NewCCMask, ShiftVal;
Richard Sandifordc3dc4472013-12-13 15:46:55 +00002161 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2162 NewC.Op0.getOpcode() == ISD::SHL &&
2163 isSimpleShift(NewC.Op0, ShiftVal) &&
Jonas Paulsson8c336472017-06-26 13:38:27 +00002164 (MaskVal >> ShiftVal != 0) &&
Ulrich Weigand5bfed6c2017-12-05 19:42:07 +00002165 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
Richard Sandifordc3dc4472013-12-13 15:46:55 +00002166 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2167 MaskVal >> ShiftVal,
Richard Sandiford030c1652013-09-13 09:09:50 +00002168 CmpVal >> ShiftVal,
2169 SystemZICMP::Any))) {
Richard Sandifordc3dc4472013-12-13 15:46:55 +00002170 NewC.Op0 = NewC.Op0.getOperand(0);
2171 MaskVal >>= ShiftVal;
2172 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2173 NewC.Op0.getOpcode() == ISD::SRL &&
2174 isSimpleShift(NewC.Op0, ShiftVal) &&
Jonas Paulsson8c336472017-06-26 13:38:27 +00002175 (MaskVal << ShiftVal != 0) &&
Ulrich Weigand5bfed6c2017-12-05 19:42:07 +00002176 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
Richard Sandifordc3dc4472013-12-13 15:46:55 +00002177 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
Richard Sandiford030c1652013-09-13 09:09:50 +00002178 MaskVal << ShiftVal,
2179 CmpVal << ShiftVal,
2180 SystemZICMP::UnsignedOnly))) {
Richard Sandifordc3dc4472013-12-13 15:46:55 +00002181 NewC.Op0 = NewC.Op0.getOperand(0);
2182 MaskVal <<= ShiftVal;
Richard Sandiford030c1652013-09-13 09:09:50 +00002183 } else {
Richard Sandifordc3dc4472013-12-13 15:46:55 +00002184 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2185 NewC.ICmpType);
Richard Sandiford030c1652013-09-13 09:09:50 +00002186 if (!NewCCMask)
2187 return;
2188 }
Richard Sandiford113c8702013-09-03 15:38:35 +00002189
Richard Sandiford35b9be22013-08-28 10:31:43 +00002190 // Go ahead and make the change.
Richard Sandifordd420f732013-12-13 15:28:45 +00002191 C.Opcode = SystemZISD::TM;
Richard Sandifordc3dc4472013-12-13 15:46:55 +00002192 C.Op0 = NewC.Op0;
2193 if (Mask && Mask->getZExtValue() == MaskVal)
2194 C.Op1 = SDValue(Mask, 0);
2195 else
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002196 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
Richard Sandifordd420f732013-12-13 15:28:45 +00002197 C.CCValid = SystemZ::CCMASK_TM;
2198 C.CCMask = NewCCMask;
Richard Sandiford35b9be22013-08-28 10:31:43 +00002199}
2200
Ulrich Weigand31112892018-01-19 20:54:18 +00002201// See whether the comparison argument contains a redundant AND
2202// and remove it if so. This sometimes happens due to the generic
2203// BRCOND expansion.
2204static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2205 Comparison &C) {
2206 if (C.Op0.getOpcode() != ISD::AND)
2207 return;
2208 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2209 if (!Mask)
2210 return;
2211 KnownBits Known;
2212 DAG.computeKnownBits(C.Op0.getOperand(0), Known);
2213 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2214 return;
2215
2216 C.Op0 = C.Op0.getOperand(0);
2217}
2218
Ulrich Weigand57c85f52015-04-01 12:51:43 +00002219// Return a Comparison that tests the condition-code result of intrinsic
2220// node Call against constant integer CC using comparison code Cond.
2221// Opcode is the opcode of the SystemZISD operation for the intrinsic
2222// and CCValid is the set of possible condition-code results.
2223static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2224 SDValue Call, unsigned CCValid, uint64_t CC,
2225 ISD::CondCode Cond) {
2226 Comparison C(Call, SDValue());
2227 C.Opcode = Opcode;
2228 C.CCValid = CCValid;
2229 if (Cond == ISD::SETEQ)
2230 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2231 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2232 else if (Cond == ISD::SETNE)
2233 // ...and the inverse of that.
2234 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2235 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2236 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2237 // always true for CC>3.
Justin Bognera6d38362015-06-23 15:38:24 +00002238 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
Ulrich Weigand57c85f52015-04-01 12:51:43 +00002239 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2240 // ...and the inverse of that.
Justin Bognera6d38362015-06-23 15:38:24 +00002241 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
Ulrich Weigand57c85f52015-04-01 12:51:43 +00002242 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2243 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2244 // always true for CC>3.
Justin Bognera6d38362015-06-23 15:38:24 +00002245 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
Ulrich Weigand57c85f52015-04-01 12:51:43 +00002246 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2247 // ...and the inverse of that.
Justin Bognera6d38362015-06-23 15:38:24 +00002248 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
Ulrich Weigand57c85f52015-04-01 12:51:43 +00002249 else
2250 llvm_unreachable("Unexpected integer comparison type");
2251 C.CCMask &= CCValid;
2252 return C;
2253}
2254
Richard Sandifordd420f732013-12-13 15:28:45 +00002255// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2256static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002257 ISD::CondCode Cond, const SDLoc &DL) {
Ulrich Weigand57c85f52015-04-01 12:51:43 +00002258 if (CmpOp1.getOpcode() == ISD::Constant) {
2259 uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2260 unsigned Opcode, CCValid;
2261 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2262 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2263 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2264 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
Ulrich Weigandc1708b22015-05-05 19:31:09 +00002265 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2266 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2267 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2268 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
Ulrich Weigand57c85f52015-04-01 12:51:43 +00002269 }
Richard Sandifordd420f732013-12-13 15:28:45 +00002270 Comparison C(CmpOp0, CmpOp1);
2271 C.CCMask = CCMaskForCondCode(Cond);
2272 if (C.Op0.getValueType().isFloatingPoint()) {
2273 C.CCValid = SystemZ::CCMASK_FCMP;
2274 C.Opcode = SystemZISD::FCMP;
Richard Sandiford83a0b6a2013-12-20 11:56:02 +00002275 adjustForFNeg(C);
Richard Sandiford5bc670b2013-09-06 11:51:39 +00002276 } else {
Richard Sandifordd420f732013-12-13 15:28:45 +00002277 C.CCValid = SystemZ::CCMASK_ICMP;
2278 C.Opcode = SystemZISD::ICMP;
Richard Sandiford5bc670b2013-09-06 11:51:39 +00002279 // Choose the type of comparison. Equality and inequality tests can
2280 // use either signed or unsigned comparisons. The choice also doesn't
2281 // matter if both sign bits are known to be clear. In those cases we
2282 // want to give the main isel code the freedom to choose whichever
2283 // form fits best.
Richard Sandifordd420f732013-12-13 15:28:45 +00002284 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2285 C.CCMask == SystemZ::CCMASK_CMP_NE ||
2286 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2287 C.ICmpType = SystemZICMP::Any;
2288 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2289 C.ICmpType = SystemZICMP::UnsignedOnly;
Richard Sandiford5bc670b2013-09-06 11:51:39 +00002290 else
Richard Sandifordd420f732013-12-13 15:28:45 +00002291 C.ICmpType = SystemZICMP::SignedOnly;
2292 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
Ulrich Weigand31112892018-01-19 20:54:18 +00002293 adjustForRedundantAnd(DAG, DL, C);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002294 adjustZeroCmp(DAG, DL, C);
2295 adjustSubwordCmp(DAG, DL, C);
2296 adjustForSubtraction(DAG, DL, C);
Richard Sandiford83a0b6a2013-12-20 11:56:02 +00002297 adjustForLTGFR(C);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002298 adjustICmpTruncate(DAG, DL, C);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002299 }
2300
Richard Sandifordd420f732013-12-13 15:28:45 +00002301 if (shouldSwapCmpOperands(C)) {
2302 std::swap(C.Op0, C.Op1);
2303 C.CCMask = reverseCCMask(C.CCMask);
Richard Sandiford24e597b2013-08-23 11:27:19 +00002304 }
2305
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002306 adjustForTestUnderMask(DAG, DL, C);
Richard Sandifordd420f732013-12-13 15:28:45 +00002307 return C;
2308}
2309
2310// Emit the comparison instruction described by C.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002311static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
Ulrich Weigand57c85f52015-04-01 12:51:43 +00002312 if (!C.Op1.getNode()) {
2313 SDValue Op;
2314 switch (C.Op0.getOpcode()) {
2315 case ISD::INTRINSIC_W_CHAIN:
2316 Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
2317 break;
Ulrich Weigandc1708b22015-05-05 19:31:09 +00002318 case ISD::INTRINSIC_WO_CHAIN:
2319 Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
2320 break;
Ulrich Weigand57c85f52015-04-01 12:51:43 +00002321 default:
2322 llvm_unreachable("Invalid comparison operands");
2323 }
2324 return SDValue(Op.getNode(), Op->getNumValues() - 1);
2325 }
Richard Sandifordd420f732013-12-13 15:28:45 +00002326 if (C.Opcode == SystemZISD::ICMP)
2327 return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002328 DAG.getConstant(C.ICmpType, DL, MVT::i32));
Richard Sandifordd420f732013-12-13 15:28:45 +00002329 if (C.Opcode == SystemZISD::TM) {
2330 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2331 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2332 return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002333 DAG.getConstant(RegisterOnly, DL, MVT::i32));
Richard Sandifordd420f732013-12-13 15:28:45 +00002334 }
2335 return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002336}
2337
Richard Sandiford7d86e472013-08-21 09:34:56 +00002338// Implement a 32-bit *MUL_LOHI operation by extending both operands to
2339// 64 bits. Extend is the extension type to use. Store the high part
2340// in Hi and the low part in Lo.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002341static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2342 SDValue Op0, SDValue Op1, SDValue &Hi,
2343 SDValue &Lo) {
Richard Sandiford7d86e472013-08-21 09:34:56 +00002344 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2345 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2346 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002347 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2348 DAG.getConstant(32, DL, MVT::i64));
Richard Sandiford7d86e472013-08-21 09:34:56 +00002349 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2350 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2351}
2352
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002353// Lower a binary operation that produces two VT results, one in each
2354// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
Ulrich Weigand43579cf2017-07-05 13:17:31 +00002355// and Opcode performs the GR128 operation. Store the even register result
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002356// in Even and the odd register result in Odd.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002357static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
Ulrich Weigand43579cf2017-07-05 13:17:31 +00002358 unsigned Opcode, SDValue Op0, SDValue Op1,
2359 SDValue &Even, SDValue &Odd) {
2360 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002361 bool Is32Bit = is32Bit(VT);
Richard Sandifordd8163202013-09-13 09:12:44 +00002362 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2363 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002364}
2365
Richard Sandiford48ef6ab2013-12-06 09:53:09 +00002366// Return an i32 value that is 1 if the CC value produced by Glue is
2367// in the mask CCMask and 0 otherwise. CC is known to have a value
2368// in CCValid, so other values can be ignored.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002369static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue,
Richard Sandiford48ef6ab2013-12-06 09:53:09 +00002370 unsigned CCValid, unsigned CCMask) {
Ulrich Weigand849a59f2018-01-19 20:52:04 +00002371 SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32),
2372 DAG.getConstant(0, DL, MVT::i32),
2373 DAG.getConstant(CCValid, DL, MVT::i32),
2374 DAG.getConstant(CCMask, DL, MVT::i32), Glue };
2375 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
Richard Sandifordf722a8e302013-10-16 11:10:55 +00002376}
2377
Ulrich Weigandcd808232015-05-05 19:26:48 +00002378// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2379// be done directly. IsFP is true if CC is for a floating-point rather than
2380// integer comparison.
2381static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002382 switch (CC) {
Ulrich Weigandcd808232015-05-05 19:26:48 +00002383 case ISD::SETOEQ:
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002384 case ISD::SETEQ:
Ulrich Weigandcd808232015-05-05 19:26:48 +00002385 return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002386
Ulrich Weigandcd808232015-05-05 19:26:48 +00002387 case ISD::SETOGE:
2388 case ISD::SETGE:
Saleem Abdulrasoolee33c492015-05-10 00:53:41 +00002389 return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
Ulrich Weigandcd808232015-05-05 19:26:48 +00002390
2391 case ISD::SETOGT:
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002392 case ISD::SETGT:
Ulrich Weigandcd808232015-05-05 19:26:48 +00002393 return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002394
2395 case ISD::SETUGT:
Saleem Abdulrasoolee33c492015-05-10 00:53:41 +00002396 return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002397
2398 default:
2399 return 0;
2400 }
2401}
2402
2403// Return the SystemZISD vector comparison operation for CC or its inverse,
2404// or 0 if neither can be done directly. Indicate in Invert whether the
Ulrich Weigandcd808232015-05-05 19:26:48 +00002405// result is for the inverse of CC. IsFP is true if CC is for a
2406// floating-point rather than integer comparison.
2407static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
2408 bool &Invert) {
2409 if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002410 Invert = false;
2411 return Opcode;
2412 }
2413
Ulrich Weigandcd808232015-05-05 19:26:48 +00002414 CC = ISD::getSetCCInverse(CC, !IsFP);
2415 if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002416 Invert = true;
2417 return Opcode;
2418 }
2419
2420 return 0;
2421}
2422
Ulrich Weigand80b3af72015-05-05 19:27:45 +00002423// Return a v2f64 that contains the extended form of elements Start and Start+1
2424// of v4f32 value Op.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002425static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
Ulrich Weigand80b3af72015-05-05 19:27:45 +00002426 SDValue Op) {
2427 int Mask[] = { Start, -1, Start + 1, -1 };
2428 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2429 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2430}
2431
2432// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2433// producing a result of type VT.
Ulrich Weigand33435c42017-07-17 17:42:48 +00002434SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2435 const SDLoc &DL, EVT VT,
2436 SDValue CmpOp0,
2437 SDValue CmpOp1) const {
2438 // There is no hardware support for v4f32 (unless we have the vector
2439 // enhancements facility 1), so extend the vector into two v2f64s
2440 // and compare those.
2441 if (CmpOp0.getValueType() == MVT::v4f32 &&
2442 !Subtarget.hasVectorEnhancements1()) {
Ulrich Weigand80b3af72015-05-05 19:27:45 +00002443 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
2444 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
2445 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
2446 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
2447 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2448 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2449 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2450 }
2451 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2452}
2453
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002454// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2455// an integer mask of type VT.
Ulrich Weigand33435c42017-07-17 17:42:48 +00002456SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2457 const SDLoc &DL, EVT VT,
2458 ISD::CondCode CC,
2459 SDValue CmpOp0,
2460 SDValue CmpOp1) const {
Ulrich Weigandcd808232015-05-05 19:26:48 +00002461 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002462 bool Invert = false;
2463 SDValue Cmp;
Ulrich Weigandcd808232015-05-05 19:26:48 +00002464 switch (CC) {
2465 // Handle tests for order using (or (ogt y x) (oge x y)).
2466 case ISD::SETUO:
2467 Invert = true;
Simon Pilgrim8c4069e2017-07-07 10:07:09 +00002468 LLVM_FALLTHROUGH;
Ulrich Weigandcd808232015-05-05 19:26:48 +00002469 case ISD::SETO: {
2470 assert(IsFP && "Unexpected integer comparison");
Ulrich Weigand80b3af72015-05-05 19:27:45 +00002471 SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2472 SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
Ulrich Weigandcd808232015-05-05 19:26:48 +00002473 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2474 break;
2475 }
2476
2477 // Handle <> tests using (or (ogt y x) (ogt x y)).
2478 case ISD::SETUEQ:
2479 Invert = true;
Simon Pilgrim8c4069e2017-07-07 10:07:09 +00002480 LLVM_FALLTHROUGH;
Ulrich Weigandcd808232015-05-05 19:26:48 +00002481 case ISD::SETONE: {
2482 assert(IsFP && "Unexpected integer comparison");
Ulrich Weigand80b3af72015-05-05 19:27:45 +00002483 SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
2484 SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
Ulrich Weigandcd808232015-05-05 19:26:48 +00002485 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2486 break;
2487 }
2488
2489 // Otherwise a single comparison is enough. It doesn't really
2490 // matter whether we try the inversion or the swap first, since
2491 // there are no cases where both work.
2492 default:
2493 if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
Ulrich Weigand80b3af72015-05-05 19:27:45 +00002494 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
Ulrich Weigandcd808232015-05-05 19:26:48 +00002495 else {
2496 CC = ISD::getSetCCSwappedOperands(CC);
2497 if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
Ulrich Weigand80b3af72015-05-05 19:27:45 +00002498 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
Ulrich Weigandcd808232015-05-05 19:26:48 +00002499 else
2500 llvm_unreachable("Unhandled comparison");
2501 }
2502 break;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002503 }
2504 if (Invert) {
2505 SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
2506 DAG.getConstant(65535, DL, MVT::i32));
2507 Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
2508 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
2509 }
2510 return Cmp;
2511}
2512
Richard Sandiford48ef6ab2013-12-06 09:53:09 +00002513SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
2514 SelectionDAG &DAG) const {
2515 SDValue CmpOp0 = Op.getOperand(0);
2516 SDValue CmpOp1 = Op.getOperand(1);
2517 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2518 SDLoc DL(Op);
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002519 EVT VT = Op.getValueType();
2520 if (VT.isVector())
2521 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
Richard Sandiford48ef6ab2013-12-06 09:53:09 +00002522
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002523 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
Richard Sandifordd420f732013-12-13 15:28:45 +00002524 SDValue Glue = emitCmp(DAG, DL, C);
2525 return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask);
Richard Sandiford48ef6ab2013-12-06 09:53:09 +00002526}
2527
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002528SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002529 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2530 SDValue CmpOp0 = Op.getOperand(2);
2531 SDValue CmpOp1 = Op.getOperand(3);
2532 SDValue Dest = Op.getOperand(4);
Andrew Trickef9de2a2013-05-25 02:42:55 +00002533 SDLoc DL(Op);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002534
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002535 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
Richard Sandifordd420f732013-12-13 15:28:45 +00002536 SDValue Glue = emitCmp(DAG, DL, C);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002537 return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002538 Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
2539 DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, Glue);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002540}
2541
Richard Sandiford57485472013-12-13 15:35:00 +00002542// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2543// allowing Pos and Neg to be wider than CmpOp.
2544static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
2545 return (Neg.getOpcode() == ISD::SUB &&
2546 Neg.getOperand(0).getOpcode() == ISD::Constant &&
2547 cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
2548 Neg.getOperand(1) == Pos &&
2549 (Pos == CmpOp ||
2550 (Pos.getOpcode() == ISD::SIGN_EXTEND &&
2551 Pos.getOperand(0) == CmpOp)));
2552}
2553
2554// Return the absolute or negative absolute of Op; IsNegative decides which.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00002555static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
Richard Sandiford57485472013-12-13 15:35:00 +00002556 bool IsNegative) {
2557 Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op);
2558 if (IsNegative)
2559 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002560 DAG.getConstant(0, DL, Op.getValueType()), Op);
Richard Sandiford57485472013-12-13 15:35:00 +00002561 return Op;
2562}
2563
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002564SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
2565 SelectionDAG &DAG) const {
2566 SDValue CmpOp0 = Op.getOperand(0);
2567 SDValue CmpOp1 = Op.getOperand(1);
2568 SDValue TrueOp = Op.getOperand(2);
2569 SDValue FalseOp = Op.getOperand(3);
2570 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002571 SDLoc DL(Op);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002572
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002573 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
Richard Sandiford57485472013-12-13 15:35:00 +00002574
2575 // Check for absolute and negative-absolute selections, including those
2576 // where the comparison value is sign-extended (for LPGFR and LNGFR).
2577 // This check supplements the one in DAGCombiner.
2578 if (C.Opcode == SystemZISD::ICMP &&
2579 C.CCMask != SystemZ::CCMASK_CMP_EQ &&
2580 C.CCMask != SystemZ::CCMASK_CMP_NE &&
2581 C.Op1.getOpcode() == ISD::Constant &&
2582 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2583 if (isAbsolute(C.Op0, TrueOp, FalseOp))
2584 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
2585 if (isAbsolute(C.Op0, FalseOp, TrueOp))
2586 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
2587 }
2588
Richard Sandifordd420f732013-12-13 15:28:45 +00002589 SDValue Glue = emitCmp(DAG, DL, C);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002590 SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
2591 DAG.getConstant(C.CCMask, DL, MVT::i32), Glue};
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002592
Ulrich Weigand849a59f2018-01-19 20:52:04 +00002593 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002594}
2595
2596SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
2597 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00002598 SDLoc DL(Node);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002599 const GlobalValue *GV = Node->getGlobal();
2600 int64_t Offset = Node->getOffset();
Mehdi Amini44ede332015-07-09 02:09:04 +00002601 EVT PtrVT = getPointerTy(DAG.getDataLayout());
Eric Christopher93bf97c2014-06-27 07:38:01 +00002602 CodeModel::Model CM = DAG.getTarget().getCodeModel();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002603
2604 SDValue Result;
Rafael Espindola3beef8d2016-06-27 23:15:57 +00002605 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
Richard Sandiford54b36912013-09-27 15:14:04 +00002606 // Assign anchors at 1<<12 byte boundaries.
2607 uint64_t Anchor = Offset & ~uint64_t(0xfff);
2608 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
2609 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2610
2611 // The offset can be folded into the address if it is aligned to a halfword.
2612 Offset -= Anchor;
2613 if (Offset != 0 && (Offset & 1) == 0) {
2614 SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
2615 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002616 Offset = 0;
2617 }
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002618 } else {
2619 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
2620 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2621 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
Justin Lebar9c375812016-07-15 18:27:10 +00002622 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002623 }
2624
2625 // If there was a non-zero offset that we didn't fold, create an explicit
2626 // addition for it.
2627 if (Offset != 0)
2628 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002629 DAG.getConstant(Offset, DL, PtrVT));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002630
2631 return Result;
2632}
2633
Ulrich Weigand7db69182015-02-18 09:13:27 +00002634SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
2635 SelectionDAG &DAG,
2636 unsigned Opcode,
2637 SDValue GOTOffset) const {
2638 SDLoc DL(Node);
Mehdi Amini44ede332015-07-09 02:09:04 +00002639 EVT PtrVT = getPointerTy(DAG.getDataLayout());
Ulrich Weigand7db69182015-02-18 09:13:27 +00002640 SDValue Chain = DAG.getEntryNode();
2641 SDValue Glue;
2642
2643 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
2644 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
2645 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
2646 Glue = Chain.getValue(1);
2647 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
2648 Glue = Chain.getValue(1);
2649
2650 // The first call operand is the chain and the second is the TLS symbol.
2651 SmallVector<SDValue, 8> Ops;
2652 Ops.push_back(Chain);
2653 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
2654 Node->getValueType(0),
2655 0, 0));
2656
2657 // Add argument registers to the end of the list so that they are
2658 // known live into the call.
2659 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
2660 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
2661
2662 // Add a register mask operand representing the call-preserved registers.
2663 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
Eric Christopher9deb75d2015-03-11 22:42:13 +00002664 const uint32_t *Mask =
2665 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
Ulrich Weigand7db69182015-02-18 09:13:27 +00002666 assert(Mask && "Missing call preserved mask for calling convention");
2667 Ops.push_back(DAG.getRegisterMask(Mask));
2668
2669 // Glue the call to the argument copies.
2670 Ops.push_back(Glue);
2671
2672 // Emit the call.
2673 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2674 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
2675 Glue = Chain.getValue(1);
2676
2677 // Copy the return value from %r2.
2678 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
2679}
2680
Marcin Koscielnickif12609c2016-04-20 01:03:48 +00002681SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
2682 SelectionDAG &DAG) const {
Ulrich Weigandfffc7112016-11-08 20:15:26 +00002683 SDValue Chain = DAG.getEntryNode();
Mehdi Amini44ede332015-07-09 02:09:04 +00002684 EVT PtrVT = getPointerTy(DAG.getDataLayout());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002685
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002686 // The high part of the thread pointer is in access register 0.
Ulrich Weigandfffc7112016-11-08 20:15:26 +00002687 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002688 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
2689
2690 // The low part of the thread pointer is in access register 1.
Ulrich Weigandfffc7112016-11-08 20:15:26 +00002691 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002692 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
2693
2694 // Merge them into a single 64-bit address.
2695 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002696 DAG.getConstant(32, DL, PtrVT));
Marcin Koscielnickif12609c2016-04-20 01:03:48 +00002697 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
2698}
2699
2700SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
2701 SelectionDAG &DAG) const {
Chih-Hung Hsieh9f9e4682018-02-28 17:48:55 +00002702 if (DAG.getTarget().useEmulatedTLS())
Marcin Koscielnickif12609c2016-04-20 01:03:48 +00002703 return LowerToTLSEmulatedModel(Node, DAG);
2704 SDLoc DL(Node);
2705 const GlobalValue *GV = Node->getGlobal();
2706 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2707 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
2708
2709 SDValue TP = lowerThreadPointer(DL, DAG);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002710
Ulrich Weigand7db69182015-02-18 09:13:27 +00002711 // Get the offset of GA from the thread pointer, based on the TLS model.
2712 SDValue Offset;
2713 switch (model) {
2714 case TLSModel::GeneralDynamic: {
2715 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
2716 SystemZConstantPoolValue *CPV =
2717 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002718
Ulrich Weigand7db69182015-02-18 09:13:27 +00002719 Offset = DAG.getConstantPool(CPV, PtrVT, 8);
Alex Lorenze40c8a22015-08-11 23:09:45 +00002720 Offset = DAG.getLoad(
2721 PtrVT, DL, DAG.getEntryNode(), Offset,
Justin Lebar9c375812016-07-15 18:27:10 +00002722 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
Ulrich Weigand7db69182015-02-18 09:13:27 +00002723
2724 // Call __tls_get_offset to retrieve the offset.
2725 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
2726 break;
2727 }
2728
2729 case TLSModel::LocalDynamic: {
2730 // Load the GOT offset of the module ID.
2731 SystemZConstantPoolValue *CPV =
2732 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
2733
2734 Offset = DAG.getConstantPool(CPV, PtrVT, 8);
Alex Lorenze40c8a22015-08-11 23:09:45 +00002735 Offset = DAG.getLoad(
2736 PtrVT, DL, DAG.getEntryNode(), Offset,
Justin Lebar9c375812016-07-15 18:27:10 +00002737 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
Ulrich Weigand7db69182015-02-18 09:13:27 +00002738
2739 // Call __tls_get_offset to retrieve the module base offset.
2740 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
2741
2742 // Note: The SystemZLDCleanupPass will remove redundant computations
2743 // of the module base offset. Count total number of local-dynamic
2744 // accesses to trigger execution of that pass.
2745 SystemZMachineFunctionInfo* MFI =
2746 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
2747 MFI->incNumLocalDynamicTLSAccesses();
2748
2749 // Add the per-symbol offset.
2750 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
2751
2752 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
Alex Lorenze40c8a22015-08-11 23:09:45 +00002753 DTPOffset = DAG.getLoad(
2754 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
Justin Lebar9c375812016-07-15 18:27:10 +00002755 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
Ulrich Weigand7db69182015-02-18 09:13:27 +00002756
2757 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
2758 break;
2759 }
2760
2761 case TLSModel::InitialExec: {
2762 // Load the offset from the GOT.
2763 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
2764 SystemZII::MO_INDNTPOFF);
2765 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
Justin Lebar9c375812016-07-15 18:27:10 +00002766 Offset =
2767 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
2768 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
Ulrich Weigand7db69182015-02-18 09:13:27 +00002769 break;
2770 }
2771
2772 case TLSModel::LocalExec: {
2773 // Force the offset into the constant pool and load it from there.
2774 SystemZConstantPoolValue *CPV =
2775 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
2776
2777 Offset = DAG.getConstantPool(CPV, PtrVT, 8);
Alex Lorenze40c8a22015-08-11 23:09:45 +00002778 Offset = DAG.getLoad(
2779 PtrVT, DL, DAG.getEntryNode(), Offset,
Justin Lebar9c375812016-07-15 18:27:10 +00002780 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
Ulrich Weigand7db69182015-02-18 09:13:27 +00002781 break;
Ulrich Weigandb7e59092015-02-18 09:42:23 +00002782 }
Ulrich Weigand7db69182015-02-18 09:13:27 +00002783 }
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002784
2785 // Add the base and offset together.
2786 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
2787}
2788
2789SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
2790 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00002791 SDLoc DL(Node);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002792 const BlockAddress *BA = Node->getBlockAddress();
2793 int64_t Offset = Node->getOffset();
Mehdi Amini44ede332015-07-09 02:09:04 +00002794 EVT PtrVT = getPointerTy(DAG.getDataLayout());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002795
2796 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
2797 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2798 return Result;
2799}
2800
2801SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
2802 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00002803 SDLoc DL(JT);
Mehdi Amini44ede332015-07-09 02:09:04 +00002804 EVT PtrVT = getPointerTy(DAG.getDataLayout());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002805 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2806
2807 // Use LARL to load the address of the table.
2808 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2809}
2810
2811SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
2812 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00002813 SDLoc DL(CP);
Mehdi Amini44ede332015-07-09 02:09:04 +00002814 EVT PtrVT = getPointerTy(DAG.getDataLayout());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002815
2816 SDValue Result;
2817 if (CP->isMachineConstantPoolEntry())
2818 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
NAKAMURA Takumi0a7d0ad2015-09-22 11:15:07 +00002819 CP->getAlignment());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002820 else
2821 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
NAKAMURA Takumi0a7d0ad2015-09-22 11:15:07 +00002822 CP->getAlignment(), CP->getOffset());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002823
2824 // Use LARL to load the address of the constant pool entry.
2825 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
2826}
2827
Ulrich Weigandf557d082016-04-04 12:44:55 +00002828SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
2829 SelectionDAG &DAG) const {
2830 MachineFunction &MF = DAG.getMachineFunction();
Matthias Braun941a7052016-07-28 18:40:00 +00002831 MachineFrameInfo &MFI = MF.getFrameInfo();
2832 MFI.setFrameAddressIsTaken(true);
Ulrich Weigandf557d082016-04-04 12:44:55 +00002833
2834 SDLoc DL(Op);
2835 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2836 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2837
2838 // If the back chain frame index has not been allocated yet, do so.
2839 SystemZMachineFunctionInfo *FI = MF.getInfo<SystemZMachineFunctionInfo>();
2840 int BackChainIdx = FI->getFramePointerSaveIndex();
2841 if (!BackChainIdx) {
2842 // By definition, the frame address is the address of the back chain.
Matthias Braun941a7052016-07-28 18:40:00 +00002843 BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
Ulrich Weigandf557d082016-04-04 12:44:55 +00002844 FI->setFramePointerSaveIndex(BackChainIdx);
2845 }
2846 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
2847
2848 // FIXME The frontend should detect this case.
2849 if (Depth > 0) {
2850 report_fatal_error("Unsupported stack frame traversal count");
2851 }
2852
2853 return BackChain;
2854}
2855
2856SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
2857 SelectionDAG &DAG) const {
2858 MachineFunction &MF = DAG.getMachineFunction();
Matthias Braun941a7052016-07-28 18:40:00 +00002859 MachineFrameInfo &MFI = MF.getFrameInfo();
2860 MFI.setReturnAddressIsTaken(true);
Ulrich Weigandf557d082016-04-04 12:44:55 +00002861
2862 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
2863 return SDValue();
2864
2865 SDLoc DL(Op);
2866 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2867 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2868
2869 // FIXME The frontend should detect this case.
2870 if (Depth > 0) {
2871 report_fatal_error("Unsupported stack frame traversal count");
2872 }
2873
2874 // Return R14D, which has the return address. Mark it an implicit live-in.
2875 unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
2876 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
2877}
2878
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002879SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
2880 SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +00002881 SDLoc DL(Op);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002882 SDValue In = Op.getOperand(0);
2883 EVT InVT = In.getValueType();
2884 EVT ResVT = Op.getValueType();
2885
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002886 // Convert loads directly. This is normally done by DAGCombiner,
2887 // but we need this case for bitcasts that are created during lowering
2888 // and which are then lowered themselves.
2889 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
Jonas Paulssonb9a24672017-11-30 08:18:50 +00002890 if (ISD::isNormalLoad(LoadN)) {
2891 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
2892 LoadN->getBasePtr(), LoadN->getMemOperand());
2893 // Update the chain uses.
2894 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
2895 return NewLoad;
2896 }
Ulrich Weigandce4c1092015-05-05 19:25:42 +00002897
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002898 if (InVT == MVT::i32 && ResVT == MVT::f32) {
Richard Sandifordf6377fb2013-10-01 14:31:11 +00002899 SDValue In64;
2900 if (Subtarget.hasHighWord()) {
2901 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
2902 MVT::i64);
2903 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
2904 MVT::i64, SDValue(U64, 0), In);
2905 } else {
2906 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
2907 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002908 DAG.getConstant(32, DL, MVT::i64));
Richard Sandifordf6377fb2013-10-01 14:31:11 +00002909 }
2910 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
Ulrich Weigand9ac2f9b2015-05-04 17:41:22 +00002911 return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
Richard Sandifordd8163202013-09-13 09:12:44 +00002912 DL, MVT::f32, Out64);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002913 }
2914 if (InVT == MVT::f32 && ResVT == MVT::i32) {
2915 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
Ulrich Weigand9ac2f9b2015-05-04 17:41:22 +00002916 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
Richard Sandifordd8163202013-09-13 09:12:44 +00002917 MVT::f64, SDValue(U64, 0), In);
2918 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
Richard Sandifordf6377fb2013-10-01 14:31:11 +00002919 if (Subtarget.hasHighWord())
2920 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
2921 MVT::i32, Out64);
2922 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002923 DAG.getConstant(32, DL, MVT::i64));
Richard Sandifordf6377fb2013-10-01 14:31:11 +00002924 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002925 }
2926 llvm_unreachable("Unexpected bitcast combination");
2927}
2928
2929SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
2930 SelectionDAG &DAG) const {
2931 MachineFunction &MF = DAG.getMachineFunction();
2932 SystemZMachineFunctionInfo *FuncInfo =
2933 MF.getInfo<SystemZMachineFunctionInfo>();
Mehdi Amini44ede332015-07-09 02:09:04 +00002934 EVT PtrVT = getPointerTy(DAG.getDataLayout());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002935
2936 SDValue Chain = Op.getOperand(0);
2937 SDValue Addr = Op.getOperand(1);
2938 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002939 SDLoc DL(Op);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002940
2941 // The initial values of each field.
2942 const unsigned NumFields = 4;
2943 SDValue Fields[NumFields] = {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002944 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
2945 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002946 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
2947 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
2948 };
2949
2950 // Store each field into its respective slot.
2951 SDValue MemOps[NumFields];
2952 unsigned Offset = 0;
2953 for (unsigned I = 0; I < NumFields; ++I) {
2954 SDValue FieldAddr = Addr;
2955 if (Offset != 0)
2956 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002957 DAG.getIntPtrConstant(Offset, DL));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002958 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
Justin Lebar9c375812016-07-15 18:27:10 +00002959 MachinePointerInfo(SV, Offset));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002960 Offset += 8;
2961 }
Craig Topper48d114b2014-04-26 18:35:24 +00002962 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002963}
2964
2965SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
2966 SelectionDAG &DAG) const {
2967 SDValue Chain = Op.getOperand(0);
2968 SDValue DstPtr = Op.getOperand(1);
2969 SDValue SrcPtr = Op.getOperand(2);
2970 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
2971 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
Andrew Trickef9de2a2013-05-25 02:42:55 +00002972 SDLoc DL(Op);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002973
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002974 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002975 /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
Krzysztof Parzyszeka46c36b2015-04-13 17:16:45 +00002976 /*isTailCall*/false,
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002977 MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
2978}
2979
2980SDValue SystemZTargetLowering::
2981lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
Jonas Paulssonf12b9252015-11-28 11:02:32 +00002982 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
Marcin Koscielnickiad1482c2016-05-05 00:37:30 +00002983 MachineFunction &MF = DAG.getMachineFunction();
Matthias Braunf1caa282017-12-15 22:22:58 +00002984 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
2985 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
Jonas Paulssonf12b9252015-11-28 11:02:32 +00002986
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002987 SDValue Chain = Op.getOperand(0);
2988 SDValue Size = Op.getOperand(1);
Jonas Paulssonf12b9252015-11-28 11:02:32 +00002989 SDValue Align = Op.getOperand(2);
Andrew Trickef9de2a2013-05-25 02:42:55 +00002990 SDLoc DL(Op);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00002991
Jonas Paulssonf12b9252015-11-28 11:02:32 +00002992 // If user has set the no alignment function attribute, ignore
2993 // alloca alignments.
2994 uint64_t AlignVal = (RealignOpt ?
2995 dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
2996
2997 uint64_t StackAlign = TFI->getStackAlignment();
2998 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
2999 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3000
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003001 unsigned SPReg = getStackPointerRegisterToSaveRestore();
Jonas Paulssonf12b9252015-11-28 11:02:32 +00003002 SDValue NeededSpace = Size;
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003003
3004 // Get a reference to the stack pointer.
3005 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3006
Marcin Koscielnickiad1482c2016-05-05 00:37:30 +00003007 // If we need a backchain, save it now.
3008 SDValue Backchain;
3009 if (StoreBackchain)
Justin Lebar9c375812016-07-15 18:27:10 +00003010 Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
Marcin Koscielnickiad1482c2016-05-05 00:37:30 +00003011
Jonas Paulssonf12b9252015-11-28 11:02:32 +00003012 // Add extra space for alignment if needed.
3013 if (ExtraAlignSpace)
3014 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
Elliot Colpbc2cfc22016-07-06 18:13:11 +00003015 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
Jonas Paulssonf12b9252015-11-28 11:02:32 +00003016
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003017 // Get the new stack pointer value.
Jonas Paulssonf12b9252015-11-28 11:02:32 +00003018 SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003019
3020 // Copy the new stack pointer back.
3021 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3022
3023 // The allocated data lives above the 160 bytes allocated for the standard
3024 // frame, plus any outgoing stack arguments. We don't know how much that
3025 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3026 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3027 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3028
Jonas Paulssonf12b9252015-11-28 11:02:32 +00003029 // Dynamically realign if needed.
3030 if (RequiredAlign > StackAlign) {
3031 Result =
3032 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3033 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3034 Result =
3035 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3036 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3037 }
3038
Marcin Koscielnickiad1482c2016-05-05 00:37:30 +00003039 if (StoreBackchain)
Justin Lebar9c375812016-07-15 18:27:10 +00003040 Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
Marcin Koscielnickiad1482c2016-05-05 00:37:30 +00003041
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003042 SDValue Ops[2] = { Result, Chain };
Craig Topper64941d92014-04-27 19:20:57 +00003043 return DAG.getMergeValues(Ops, DL);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003044}
3045
Marcin Koscielnicki9de88d92016-05-04 23:31:26 +00003046SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3047 SDValue Op, SelectionDAG &DAG) const {
3048 SDLoc DL(Op);
3049
3050 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3051}
3052
Richard Sandiford7d86e472013-08-21 09:34:56 +00003053SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3054 SelectionDAG &DAG) const {
3055 EVT VT = Op.getValueType();
3056 SDLoc DL(Op);
3057 SDValue Ops[2];
3058 if (is32Bit(VT))
3059 // Just do a normal 64-bit multiplication and extract the results.
3060 // We define this so that it can be used for constant division.
3061 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3062 Op.getOperand(1), Ops[1], Ops[0]);
Ulrich Weigand2b3482f2017-07-17 17:41:11 +00003063 else if (Subtarget.hasMiscellaneousExtensions2())
3064 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3065 // the high result in the even register. ISD::SMUL_LOHI is defined to
3066 // return the low half first, so the results are in reverse order.
3067 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
3068 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
Richard Sandiford7d86e472013-08-21 09:34:56 +00003069 else {
Ulrich Weigand43579cf2017-07-05 13:17:31 +00003070 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
Richard Sandiford7d86e472013-08-21 09:34:56 +00003071 //
3072 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3073 //
3074 // but using the fact that the upper halves are either all zeros
3075 // or all ones:
3076 //
3077 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3078 //
3079 // and grouping the right terms together since they are quicker than the
3080 // multiplication:
3081 //
3082 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003083 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
Richard Sandiford7d86e472013-08-21 09:34:56 +00003084 SDValue LL = Op.getOperand(0);
3085 SDValue RL = Op.getOperand(1);
3086 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3087 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
Ulrich Weigand43579cf2017-07-05 13:17:31 +00003088 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3089 // the high result in the even register. ISD::SMUL_LOHI is defined to
3090 // return the low half first, so the results are in reverse order.
3091 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
Richard Sandiford7d86e472013-08-21 09:34:56 +00003092 LL, RL, Ops[1], Ops[0]);
3093 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3094 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3095 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3096 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3097 }
Craig Topper64941d92014-04-27 19:20:57 +00003098 return DAG.getMergeValues(Ops, DL);
Richard Sandiford7d86e472013-08-21 09:34:56 +00003099}
3100
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003101SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3102 SelectionDAG &DAG) const {
3103 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00003104 SDLoc DL(Op);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003105 SDValue Ops[2];
Richard Sandiford7d86e472013-08-21 09:34:56 +00003106 if (is32Bit(VT))
3107 // Just do a normal 64-bit multiplication and extract the results.
3108 // We define this so that it can be used for constant division.
3109 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3110 Op.getOperand(1), Ops[1], Ops[0]);
3111 else
Ulrich Weigand43579cf2017-07-05 13:17:31 +00003112 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3113 // the high result in the even register. ISD::UMUL_LOHI is defined to
3114 // return the low half first, so the results are in reverse order.
3115 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
Richard Sandiford7d86e472013-08-21 09:34:56 +00003116 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
Craig Topper64941d92014-04-27 19:20:57 +00003117 return DAG.getMergeValues(Ops, DL);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003118}
3119
3120SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3121 SelectionDAG &DAG) const {
3122 SDValue Op0 = Op.getOperand(0);
3123 SDValue Op1 = Op.getOperand(1);
3124 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00003125 SDLoc DL(Op);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003126
Ulrich Weigand43579cf2017-07-05 13:17:31 +00003127 // We use DSGF for 32-bit division. This means the first operand must
3128 // always be 64-bit, and the second operand should be 32-bit whenever
3129 // that is possible, to improve performance.
3130 if (is32Bit(VT))
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003131 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
Ulrich Weigand43579cf2017-07-05 13:17:31 +00003132 else if (DAG.ComputeNumSignBits(Op1) > 32)
Richard Sandiforde6e78852013-07-02 15:40:22 +00003133 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003134
Ulrich Weigand43579cf2017-07-05 13:17:31 +00003135 // DSG(F) returns the remainder in the even register and the
3136 // quotient in the odd register.
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003137 SDValue Ops[2];
Ulrich Weigand43579cf2017-07-05 13:17:31 +00003138 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
Craig Topper64941d92014-04-27 19:20:57 +00003139 return DAG.getMergeValues(Ops, DL);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003140}
3141
3142SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3143 SelectionDAG &DAG) const {
3144 EVT VT = Op.getValueType();
Andrew Trickef9de2a2013-05-25 02:42:55 +00003145 SDLoc DL(Op);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003146
Ulrich Weigand43579cf2017-07-05 13:17:31 +00003147 // DL(G) returns the remainder in the even register and the
3148 // quotient in the odd register.
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003149 SDValue Ops[2];
Ulrich Weigand43579cf2017-07-05 13:17:31 +00003150 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
3151 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
Craig Topper64941d92014-04-27 19:20:57 +00003152 return DAG.getMergeValues(Ops, DL);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003153}
3154
3155SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3156 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3157
3158 // Get the known-zero masks for each operand.
3159 SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
Craig Topperd0af7e82017-04-28 05:31:46 +00003160 KnownBits Known[2];
3161 DAG.computeKnownBits(Ops[0], Known[0]);
3162 DAG.computeKnownBits(Ops[1], Known[1]);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003163
3164 // See if the upper 32 bits of one operand and the lower 32 bits of the
3165 // other are known zero. They are the low and high operands respectively.
Craig Topperd0af7e82017-04-28 05:31:46 +00003166 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3167 Known[1].Zero.getZExtValue() };
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003168 unsigned High, Low;
3169 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3170 High = 1, Low = 0;
3171 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3172 High = 0, Low = 1;
3173 else
3174 return Op;
3175
3176 SDValue LowOp = Ops[Low];
3177 SDValue HighOp = Ops[High];
3178
3179 // If the high part is a constant, we're better off using IILH.
3180 if (HighOp.getOpcode() == ISD::Constant)
3181 return Op;
3182
3183 // If the low part is a constant that is outside the range of LHI,
3184 // then we're better off using IILF.
3185 if (LowOp.getOpcode() == ISD::Constant) {
3186 int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3187 if (!isInt<16>(Value))
3188 return Op;
3189 }
3190
3191 // Check whether the high part is an AND that doesn't change the
3192 // high 32 bits and just masks out low bits. We can skip it if so.
3193 if (HighOp.getOpcode() == ISD::AND &&
3194 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
Richard Sandifordccc2a7c2013-12-03 11:01:54 +00003195 SDValue HighOp0 = HighOp.getOperand(0);
3196 uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3197 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3198 HighOp = HighOp0;
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003199 }
3200
3201 // Take advantage of the fact that all GR32 operations only change the
3202 // low 32 bits by truncating Low to an i32 and inserting it directly
3203 // using a subreg. The interesting cases are those where the truncation
3204 // can be folded.
Andrew Trickef9de2a2013-05-25 02:42:55 +00003205 SDLoc DL(Op);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003206 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
Richard Sandiford87a44362013-09-30 10:28:35 +00003207 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
Richard Sandifordd8163202013-09-13 09:12:44 +00003208 MVT::i64, HighOp, Low32);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003209}
3210
Ulrich Weigandb4012182015-03-31 12:56:33 +00003211SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3212 SelectionDAG &DAG) const {
3213 EVT VT = Op.getValueType();
Ulrich Weigandb4012182015-03-31 12:56:33 +00003214 SDLoc DL(Op);
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003215 Op = Op.getOperand(0);
3216
3217 // Handle vector types via VPOPCT.
3218 if (VT.isVector()) {
3219 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3220 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
Sanjay Patel1ed771f2016-09-14 16:37:15 +00003221 switch (VT.getScalarSizeInBits()) {
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003222 case 8:
3223 break;
3224 case 16: {
3225 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3226 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3227 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
3228 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3229 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
3230 break;
3231 }
3232 case 32: {
3233 SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
3234 DAG.getConstant(0, DL, MVT::i32));
3235 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3236 break;
3237 }
3238 case 64: {
3239 SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
3240 DAG.getConstant(0, DL, MVT::i32));
3241 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3242 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3243 break;
3244 }
3245 default:
3246 llvm_unreachable("Unexpected type");
3247 }
3248 return Op;
3249 }
Ulrich Weigandb4012182015-03-31 12:56:33 +00003250
3251 // Get the known-zero mask for the operand.
Craig Topperd0af7e82017-04-28 05:31:46 +00003252 KnownBits Known;
3253 DAG.computeKnownBits(Op, Known);
3254 unsigned NumSignificantBits = (~Known.Zero).getActiveBits();
Ulrich Weigand050527b2015-03-31 19:28:50 +00003255 if (NumSignificantBits == 0)
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003256 return DAG.getConstant(0, DL, VT);
Ulrich Weigandb4012182015-03-31 12:56:33 +00003257
3258 // Skip known-zero high parts of the operand.
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003259 int64_t OrigBitSize = VT.getSizeInBits();
Ulrich Weigand050527b2015-03-31 19:28:50 +00003260 int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3261 BitSize = std::min(BitSize, OrigBitSize);
Ulrich Weigandb4012182015-03-31 12:56:33 +00003262
3263 // The POPCNT instruction counts the number of bits in each byte.
3264 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3265 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
3266 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3267
3268 // Add up per-byte counts in a binary tree. All bits of Op at
3269 // position larger than BitSize remain zero throughout.
3270 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003271 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
Ulrich Weigandb4012182015-03-31 12:56:33 +00003272 if (BitSize != OrigBitSize)
3273 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003274 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
Ulrich Weigandb4012182015-03-31 12:56:33 +00003275 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3276 }
3277
3278 // Extract overall result from high byte.
3279 if (BitSize > 8)
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003280 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3281 DAG.getConstant(BitSize - 8, DL, VT));
Ulrich Weigandb4012182015-03-31 12:56:33 +00003282
3283 return Op;
3284}
3285
Ulrich Weiganda9ac6d62016-04-04 12:45:44 +00003286SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3287 SelectionDAG &DAG) const {
3288 SDLoc DL(Op);
3289 AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3290 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
Konstantin Zhuravlyovbb80d3e2017-07-11 22:23:00 +00003291 SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
Ulrich Weiganda9ac6d62016-04-04 12:45:44 +00003292 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3293
3294 // The only fence that needs an instruction is a sequentially-consistent
3295 // cross-thread fence.
JF Bastien800f87a2016-04-06 21:19:33 +00003296 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
Konstantin Zhuravlyovbb80d3e2017-07-11 22:23:00 +00003297 FenceSSID == SyncScope::System) {
Ulrich Weiganda9ac6d62016-04-04 12:45:44 +00003298 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
JF Bastien800f87a2016-04-06 21:19:33 +00003299 Op.getOperand(0)),
3300 0);
Ulrich Weiganda9ac6d62016-04-04 12:45:44 +00003301 }
3302
3303 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3304 return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3305}
3306
Ulrich Weigand02f1c022017-08-04 18:53:35 +00003307// Op is an atomic load. Lower it into a normal volatile load.
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00003308SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3309 SelectionDAG &DAG) const {
Richard Sandiford21f5d682014-03-06 11:22:58 +00003310 auto *Node = cast<AtomicSDNode>(Op.getNode());
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00003311 return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
Ulrich Weigand02f1c022017-08-04 18:53:35 +00003312 Node->getChain(), Node->getBasePtr(),
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00003313 Node->getMemoryVT(), Node->getMemOperand());
3314}
3315
Ulrich Weigand02f1c022017-08-04 18:53:35 +00003316// Op is an atomic store. Lower it into a normal volatile store.
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00003317SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3318 SelectionDAG &DAG) const {
Richard Sandiford21f5d682014-03-06 11:22:58 +00003319 auto *Node = cast<AtomicSDNode>(Op.getNode());
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00003320 SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3321 Node->getBasePtr(), Node->getMemoryVT(),
3322 Node->getMemOperand());
Ulrich Weigand02f1c022017-08-04 18:53:35 +00003323 // We have to enforce sequential consistency by performing a
3324 // serialization operation after the store.
3325 if (Node->getOrdering() == AtomicOrdering::SequentiallyConsistent)
3326 Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3327 MVT::Other, Chain), 0);
3328 return Chain;
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00003329}
3330
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003331// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3332// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00003333SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3334 SelectionDAG &DAG,
3335 unsigned Opcode) const {
Richard Sandiford21f5d682014-03-06 11:22:58 +00003336 auto *Node = cast<AtomicSDNode>(Op.getNode());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003337
3338 // 32-bit operations need no code outside the main loop.
3339 EVT NarrowVT = Node->getMemoryVT();
3340 EVT WideVT = MVT::i32;
3341 if (NarrowVT == WideVT)
3342 return Op;
3343
3344 int64_t BitSize = NarrowVT.getSizeInBits();
3345 SDValue ChainIn = Node->getChain();
3346 SDValue Addr = Node->getBasePtr();
3347 SDValue Src2 = Node->getVal();
3348 MachineMemOperand *MMO = Node->getMemOperand();
Andrew Trickef9de2a2013-05-25 02:42:55 +00003349 SDLoc DL(Node);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003350 EVT PtrVT = Addr.getValueType();
3351
3352 // Convert atomic subtracts of constants into additions.
3353 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
Richard Sandiford21f5d682014-03-06 11:22:58 +00003354 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003355 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003356 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003357 }
3358
3359 // Get the address of the containing word.
3360 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003361 DAG.getConstant(-4, DL, PtrVT));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003362
3363 // Get the number of bits that the word must be rotated left in order
3364 // to bring the field to the top bits of a GR32.
3365 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003366 DAG.getConstant(3, DL, PtrVT));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003367 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3368
3369 // Get the complementing shift amount, for rotating a field in the top
3370 // bits back to its proper position.
3371 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003372 DAG.getConstant(0, DL, WideVT), BitShift);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003373
3374 // Extend the source operand to 32 bits and prepare it for the inner loop.
3375 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3376 // operations require the source to be shifted in advance. (This shift
3377 // can be folded if the source is constant.) For AND and NAND, the lower
3378 // bits must be set, while for other opcodes they should be left clear.
3379 if (Opcode != SystemZISD::ATOMIC_SWAPW)
3380 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003381 DAG.getConstant(32 - BitSize, DL, WideVT));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003382 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
3383 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
3384 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003385 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003386
3387 // Construct the ATOMIC_LOADW_* node.
3388 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3389 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003390 DAG.getConstant(BitSize, DL, WideVT) };
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003391 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003392 NarrowVT, MMO);
3393
3394 // Rotate the result of the final CS so that the field is in the lower
3395 // bits of a GR32, then truncate it.
3396 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003397 DAG.getConstant(BitSize, DL, WideVT));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003398 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
3399
3400 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
Craig Topper64941d92014-04-27 19:20:57 +00003401 return DAG.getMergeValues(RetOps, DL);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003402}
3403
Richard Sandiford41350a52013-12-24 15:18:04 +00003404// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
Richard Sandiford002019a2013-12-24 15:22:39 +00003405// into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
Richard Sandiford41350a52013-12-24 15:18:04 +00003406// operations into additions.
3407SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
3408 SelectionDAG &DAG) const {
Richard Sandiford21f5d682014-03-06 11:22:58 +00003409 auto *Node = cast<AtomicSDNode>(Op.getNode());
Richard Sandiford41350a52013-12-24 15:18:04 +00003410 EVT MemVT = Node->getMemoryVT();
3411 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
3412 // A full-width operation.
3413 assert(Op.getValueType() == MemVT && "Mismatched VTs");
3414 SDValue Src2 = Node->getVal();
3415 SDValue NegSrc2;
3416 SDLoc DL(Src2);
3417
Richard Sandiford21f5d682014-03-06 11:22:58 +00003418 if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
Richard Sandiford41350a52013-12-24 15:18:04 +00003419 // Use an addition if the operand is constant and either LAA(G) is
3420 // available or the negative value is in the range of A(G)FHI.
3421 int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
Eric Christopher93bf97c2014-06-27 07:38:01 +00003422 if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003423 NegSrc2 = DAG.getConstant(Value, DL, MemVT);
Eric Christopher93bf97c2014-06-27 07:38:01 +00003424 } else if (Subtarget.hasInterlockedAccess1())
Richard Sandiford41350a52013-12-24 15:18:04 +00003425 // Use LAA(G) if available.
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003426 NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
Richard Sandiford41350a52013-12-24 15:18:04 +00003427 Src2);
3428
3429 if (NegSrc2.getNode())
3430 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
3431 Node->getChain(), Node->getBasePtr(), NegSrc2,
Konstantin Zhuravlyov8ea02462016-10-15 22:01:18 +00003432 Node->getMemOperand());
Richard Sandiford41350a52013-12-24 15:18:04 +00003433
3434 // Use the node as-is.
3435 return Op;
3436 }
3437
3438 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
3439}
3440
Ulrich Weigand0f1de042017-09-28 16:22:54 +00003441// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003442SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
3443 SelectionDAG &DAG) const {
Richard Sandiford21f5d682014-03-06 11:22:58 +00003444 auto *Node = cast<AtomicSDNode>(Op.getNode());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003445 SDValue ChainIn = Node->getOperand(0);
3446 SDValue Addr = Node->getOperand(1);
3447 SDValue CmpVal = Node->getOperand(2);
3448 SDValue SwapVal = Node->getOperand(3);
3449 MachineMemOperand *MMO = Node->getMemOperand();
Andrew Trickef9de2a2013-05-25 02:42:55 +00003450 SDLoc DL(Node);
Ulrich Weigand0f1de042017-09-28 16:22:54 +00003451
3452 // We have native support for 32-bit and 64-bit compare and swap, but we
3453 // still need to expand extracting the "success" result from the CC.
3454 EVT NarrowVT = Node->getMemoryVT();
3455 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
3456 if (NarrowVT == WideVT) {
3457 SDVTList Tys = DAG.getVTList(WideVT, MVT::Other, MVT::Glue);
3458 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
3459 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
3460 DL, Tys, Ops, NarrowVT, MMO);
3461 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2),
3462 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
3463
3464 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3465 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
3466 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1));
3467 return SDValue();
3468 }
3469
3470 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
3471 // via a fullword ATOMIC_CMP_SWAPW operation.
3472 int64_t BitSize = NarrowVT.getSizeInBits();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003473 EVT PtrVT = Addr.getValueType();
3474
3475 // Get the address of the containing word.
3476 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003477 DAG.getConstant(-4, DL, PtrVT));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003478
3479 // Get the number of bits that the word must be rotated left in order
3480 // to bring the field to the top bits of a GR32.
3481 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003482 DAG.getConstant(3, DL, PtrVT));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003483 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3484
3485 // Get the complementing shift amount, for rotating a field in the top
3486 // bits back to its proper position.
3487 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003488 DAG.getConstant(0, DL, WideVT), BitShift);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003489
3490 // Construct the ATOMIC_CMP_SWAPW node.
Ulrich Weigand0f1de042017-09-28 16:22:54 +00003491 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other, MVT::Glue);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003492 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003493 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003494 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00003495 VTList, Ops, NarrowVT, MMO);
Ulrich Weigand0f1de042017-09-28 16:22:54 +00003496 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2),
3497 SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ);
3498
3499 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
3500 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
3501 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1));
3502 return SDValue();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003503}
3504
3505SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
3506 SelectionDAG &DAG) const {
3507 MachineFunction &MF = DAG.getMachineFunction();
3508 MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
Andrew Trickef9de2a2013-05-25 02:42:55 +00003509 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003510 SystemZ::R15D, Op.getValueType());
3511}
3512
3513SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
3514 SelectionDAG &DAG) const {
3515 MachineFunction &MF = DAG.getMachineFunction();
3516 MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
Matthias Braunf1caa282017-12-15 22:22:58 +00003517 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
Marcin Koscielnickiad1482c2016-05-05 00:37:30 +00003518
3519 SDValue Chain = Op.getOperand(0);
3520 SDValue NewSP = Op.getOperand(1);
3521 SDValue Backchain;
3522 SDLoc DL(Op);
3523
3524 if (StoreBackchain) {
3525 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
Justin Lebar9c375812016-07-15 18:27:10 +00003526 Backchain = DAG.getLoad(MVT::i64, DL, Chain, OldSP, MachinePointerInfo());
Marcin Koscielnickiad1482c2016-05-05 00:37:30 +00003527 }
3528
3529 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
3530
3531 if (StoreBackchain)
Justin Lebar9c375812016-07-15 18:27:10 +00003532 Chain = DAG.getStore(Chain, DL, Backchain, NewSP, MachinePointerInfo());
Marcin Koscielnickiad1482c2016-05-05 00:37:30 +00003533
3534 return Chain;
Ulrich Weigand5f613df2013-05-06 16:15:19 +00003535}
3536
Richard Sandiford03481332013-08-23 11:36:42 +00003537SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
3538 SelectionDAG &DAG) const {
3539 bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3540 if (!IsData)
3541 // Just preserve the chain.
3542 return Op.getOperand(0);
3543
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003544 SDLoc DL(Op);
Richard Sandiford03481332013-08-23 11:36:42 +00003545 bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3546 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
Richard Sandiford21f5d682014-03-06 11:22:58 +00003547 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
Richard Sandiford03481332013-08-23 11:36:42 +00003548 SDValue Ops[] = {
3549 Op.getOperand(0),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003550 DAG.getConstant(Code, DL, MVT::i32),
Richard Sandiford03481332013-08-23 11:36:42 +00003551 Op.getOperand(1)
3552 };
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003553 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
Craig Topper206fcd42014-04-26 19:29:41 +00003554 Node->getVTList(), Ops,
Richard Sandiford03481332013-08-23 11:36:42 +00003555 Node->getMemoryVT(), Node->getMemOperand());
3556}
3557
Ulrich Weigand57c85f52015-04-01 12:51:43 +00003558// Return an i32 that contains the value of CC immediately after After,
3559// whose final operand must be MVT::Glue.
3560static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003561 SDLoc DL(After);
Ulrich Weigand57c85f52015-04-01 12:51:43 +00003562 SDValue Glue = SDValue(After, After->getNumValues() - 1);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003563 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
3564 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
3565 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
Ulrich Weigand57c85f52015-04-01 12:51:43 +00003566}
3567
3568SDValue
3569SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3570 SelectionDAG &DAG) const {
3571 unsigned Opcode, CCValid;
3572 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
3573 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
3574 SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
3575 SDValue CC = getCCResult(DAG, Glued.getNode());
3576 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
3577 return SDValue();
3578 }
3579
3580 return SDValue();
3581}
3582
Ulrich Weigandc1708b22015-05-05 19:31:09 +00003583SDValue
3584SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3585 SelectionDAG &DAG) const {
3586 unsigned Opcode, CCValid;
3587 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
3588 SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
3589 SDValue CC = getCCResult(DAG, Glued.getNode());
3590 if (Op->getNumValues() == 1)
3591 return CC;
3592 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
NAKAMURA Takumi0a7d0ad2015-09-22 11:15:07 +00003593 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued,
3594 CC);
Ulrich Weigandc1708b22015-05-05 19:31:09 +00003595 }
3596
3597 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3598 switch (Id) {
Marcin Koscielnickif12609c2016-04-20 01:03:48 +00003599 case Intrinsic::thread_pointer:
3600 return lowerThreadPointer(SDLoc(Op), DAG);
3601
Ulrich Weigandc1708b22015-05-05 19:31:09 +00003602 case Intrinsic::s390_vpdi:
3603 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
3604 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3605
3606 case Intrinsic::s390_vperm:
3607 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
3608 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3609
3610 case Intrinsic::s390_vuphb:
3611 case Intrinsic::s390_vuphh:
3612 case Intrinsic::s390_vuphf:
3613 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
3614 Op.getOperand(1));
3615
3616 case Intrinsic::s390_vuplhb:
3617 case Intrinsic::s390_vuplhh:
3618 case Intrinsic::s390_vuplhf:
3619 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
3620 Op.getOperand(1));
3621
3622 case Intrinsic::s390_vuplb:
3623 case Intrinsic::s390_vuplhw:
3624 case Intrinsic::s390_vuplf:
3625 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
3626 Op.getOperand(1));
3627
3628 case Intrinsic::s390_vupllb:
3629 case Intrinsic::s390_vupllh:
3630 case Intrinsic::s390_vupllf:
3631 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
3632 Op.getOperand(1));
3633
3634 case Intrinsic::s390_vsumb:
3635 case Intrinsic::s390_vsumh:
3636 case Intrinsic::s390_vsumgh:
3637 case Intrinsic::s390_vsumgf:
3638 case Intrinsic::s390_vsumqf:
3639 case Intrinsic::s390_vsumqg:
3640 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
3641 Op.getOperand(1), Op.getOperand(2));
3642 }
3643
3644 return SDValue();
3645}
3646
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003647namespace {
3648// Says that SystemZISD operation Opcode can be used to perform the equivalent
3649// of a VPERM with permute vector Bytes. If Opcode takes three operands,
3650// Operand is the constant third operand, otherwise it is the number of
3651// bytes in each element of the result.
3652struct Permute {
3653 unsigned Opcode;
3654 unsigned Operand;
3655 unsigned char Bytes[SystemZ::VectorBytes];
3656};
Alexander Kornienkof00654e2015-06-23 09:49:53 +00003657}
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003658
3659static const Permute PermuteForms[] = {
3660 // VMRHG
3661 { SystemZISD::MERGE_HIGH, 8,
3662 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
3663 // VMRHF
3664 { SystemZISD::MERGE_HIGH, 4,
3665 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
3666 // VMRHH
3667 { SystemZISD::MERGE_HIGH, 2,
3668 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
3669 // VMRHB
3670 { SystemZISD::MERGE_HIGH, 1,
3671 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
3672 // VMRLG
3673 { SystemZISD::MERGE_LOW, 8,
3674 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
3675 // VMRLF
3676 { SystemZISD::MERGE_LOW, 4,
3677 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
3678 // VMRLH
3679 { SystemZISD::MERGE_LOW, 2,
3680 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
3681 // VMRLB
3682 { SystemZISD::MERGE_LOW, 1,
3683 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
3684 // VPKG
3685 { SystemZISD::PACK, 4,
3686 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
3687 // VPKF
3688 { SystemZISD::PACK, 2,
3689 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
3690 // VPKH
3691 { SystemZISD::PACK, 1,
3692 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
3693 // VPDI V1, V2, 4 (low half of V1, high half of V2)
3694 { SystemZISD::PERMUTE_DWORDS, 4,
3695 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
3696 // VPDI V1, V2, 1 (high half of V1, low half of V2)
3697 { SystemZISD::PERMUTE_DWORDS, 1,
3698 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
3699};
3700
3701// Called after matching a vector shuffle against a particular pattern.
3702// Both the original shuffle and the pattern have two vector operands.
3703// OpNos[0] is the operand of the original shuffle that should be used for
3704// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
3705// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
3706// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
3707// for operands 0 and 1 of the pattern.
3708static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
3709 if (OpNos[0] < 0) {
3710 if (OpNos[1] < 0)
3711 return false;
3712 OpNo0 = OpNo1 = OpNos[1];
3713 } else if (OpNos[1] < 0) {
3714 OpNo0 = OpNo1 = OpNos[0];
3715 } else {
3716 OpNo0 = OpNos[0];
3717 OpNo1 = OpNos[1];
3718 }
3719 return true;
3720}
3721
3722// Bytes is a VPERM-like permute vector, except that -1 is used for
3723// undefined bytes. Return true if the VPERM can be implemented using P.
3724// When returning true set OpNo0 to the VPERM operand that should be
3725// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
3726//
3727// For example, if swapping the VPERM operands allows P to match, OpNo0
3728// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
3729// operand, but rewriting it to use two duplicated operands allows it to
3730// match P, then OpNo0 and OpNo1 will be the same.
3731static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
3732 unsigned &OpNo0, unsigned &OpNo1) {
3733 int OpNos[] = { -1, -1 };
3734 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
3735 int Elt = Bytes[I];
3736 if (Elt >= 0) {
3737 // Make sure that the two permute vectors use the same suboperand
3738 // byte number. Only the operand numbers (the high bits) are
3739 // allowed to differ.
3740 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
3741 return false;
3742 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
3743 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
3744 // Make sure that the operand mappings are consistent with previous
3745 // elements.
3746 if (OpNos[ModelOpNo] == 1 - RealOpNo)
3747 return false;
3748 OpNos[ModelOpNo] = RealOpNo;
3749 }
3750 }
3751 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
3752}
3753
3754// As above, but search for a matching permute.
3755static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
3756 unsigned &OpNo0, unsigned &OpNo1) {
3757 for (auto &P : PermuteForms)
3758 if (matchPermute(Bytes, P, OpNo0, OpNo1))
3759 return &P;
3760 return nullptr;
3761}
3762
3763// Bytes is a VPERM-like permute vector, except that -1 is used for
3764// undefined bytes. This permute is an operand of an outer permute.
3765// See whether redistributing the -1 bytes gives a shuffle that can be
3766// implemented using P. If so, set Transform to a VPERM-like permute vector
3767// that, when applied to the result of P, gives the original permute in Bytes.
3768static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
3769 const Permute &P,
3770 SmallVectorImpl<int> &Transform) {
3771 unsigned To = 0;
3772 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
3773 int Elt = Bytes[From];
3774 if (Elt < 0)
3775 // Byte number From of the result is undefined.
3776 Transform[From] = -1;
3777 else {
3778 while (P.Bytes[To] != Elt) {
3779 To += 1;
3780 if (To == SystemZ::VectorBytes)
3781 return false;
3782 }
3783 Transform[From] = To;
3784 }
3785 }
3786 return true;
3787}
3788
3789// As above, but search for a matching permute.
3790static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
3791 SmallVectorImpl<int> &Transform) {
3792 for (auto &P : PermuteForms)
3793 if (matchDoublePermute(Bytes, P, Transform))
3794 return &P;
3795 return nullptr;
3796}
3797
3798// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
3799// as if it had type vNi8.
3800static void getVPermMask(ShuffleVectorSDNode *VSN,
3801 SmallVectorImpl<int> &Bytes) {
3802 EVT VT = VSN->getValueType(0);
3803 unsigned NumElements = VT.getVectorNumElements();
3804 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
3805 Bytes.resize(NumElements * BytesPerElement, -1);
3806 for (unsigned I = 0; I < NumElements; ++I) {
3807 int Index = VSN->getMaskElt(I);
3808 if (Index >= 0)
3809 for (unsigned J = 0; J < BytesPerElement; ++J)
3810 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
3811 }
3812}
3813
3814// Bytes is a VPERM-like permute vector, except that -1 is used for
3815// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
3816// the result come from a contiguous sequence of bytes from one input.
3817// Set Base to the selector for the first byte if so.
3818static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
3819 unsigned BytesPerElement, int &Base) {
3820 Base = -1;
3821 for (unsigned I = 0; I < BytesPerElement; ++I) {
3822 if (Bytes[Start + I] >= 0) {
3823 unsigned Elem = Bytes[Start + I];
3824 if (Base < 0) {
3825 Base = Elem - I;
3826 // Make sure the bytes would come from one input operand.
3827 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
3828 return false;
3829 } else if (unsigned(Base) != Elem - I)
3830 return false;
3831 }
3832 }
3833 return true;
3834}
3835
3836// Bytes is a VPERM-like permute vector, except that -1 is used for
3837// undefined bytes. Return true if it can be performed using VSLDI.
3838// When returning true, set StartIndex to the shift amount and OpNo0
3839// and OpNo1 to the VPERM operands that should be used as the first
3840// and second shift operand respectively.
3841static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
3842 unsigned &StartIndex, unsigned &OpNo0,
3843 unsigned &OpNo1) {
3844 int OpNos[] = { -1, -1 };
3845 int Shift = -1;
3846 for (unsigned I = 0; I < 16; ++I) {
3847 int Index = Bytes[I];
3848 if (Index >= 0) {
3849 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
3850 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
3851 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
3852 if (Shift < 0)
3853 Shift = ExpectedShift;
3854 else if (Shift != ExpectedShift)
3855 return false;
3856 // Make sure that the operand mappings are consistent with previous
3857 // elements.
3858 if (OpNos[ModelOpNo] == 1 - RealOpNo)
3859 return false;
3860 OpNos[ModelOpNo] = RealOpNo;
3861 }
3862 }
3863 StartIndex = Shift;
3864 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
3865}
3866
3867// Create a node that performs P on operands Op0 and Op1, casting the
3868// operands to the appropriate type. The type of the result is determined by P.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00003869static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003870 const Permute &P, SDValue Op0, SDValue Op1) {
3871 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
3872 // elements of a PACK are twice as wide as the outputs.
3873 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
3874 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
3875 P.Operand);
3876 // Cast both operands to the appropriate type.
3877 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
3878 SystemZ::VectorBytes / InBytes);
3879 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
3880 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
3881 SDValue Op;
3882 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
3883 SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
3884 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
3885 } else if (P.Opcode == SystemZISD::PACK) {
3886 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
3887 SystemZ::VectorBytes / P.Operand);
3888 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
3889 } else {
3890 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
3891 }
3892 return Op;
3893}
3894
3895// Bytes is a VPERM-like permute vector, except that -1 is used for
3896// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
3897// VSLDI or VPERM.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00003898static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
3899 SDValue *Ops,
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003900 const SmallVectorImpl<int> &Bytes) {
3901 for (unsigned I = 0; I < 2; ++I)
3902 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
3903
3904 // First see whether VSLDI can be used.
3905 unsigned StartIndex, OpNo0, OpNo1;
3906 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
3907 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
3908 Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
3909
3910 // Fall back on VPERM. Construct an SDNode for the permute vector.
3911 SDValue IndexNodes[SystemZ::VectorBytes];
3912 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
3913 if (Bytes[I] >= 0)
3914 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
3915 else
3916 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
Ahmed Bougacha128f8732016-04-26 21:15:30 +00003917 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003918 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
3919}
3920
3921namespace {
3922// Describes a general N-operand vector shuffle.
3923struct GeneralShuffle {
3924 GeneralShuffle(EVT vt) : VT(vt) {}
3925 void addUndef();
Jonas Paulsson463e2a62017-01-24 05:43:03 +00003926 bool add(SDValue, unsigned);
Benjamin Kramerbdc49562016-06-12 15:39:02 +00003927 SDValue getNode(SelectionDAG &, const SDLoc &);
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003928
3929 // The operands of the shuffle.
3930 SmallVector<SDValue, SystemZ::VectorBytes> Ops;
3931
3932 // Index I is -1 if byte I of the result is undefined. Otherwise the
3933 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
3934 // Bytes[I] / SystemZ::VectorBytes.
3935 SmallVector<int, SystemZ::VectorBytes> Bytes;
3936
3937 // The type of the shuffle result.
3938 EVT VT;
3939};
Alexander Kornienkof00654e2015-06-23 09:49:53 +00003940}
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003941
3942// Add an extra undefined element to the shuffle.
3943void GeneralShuffle::addUndef() {
3944 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
3945 for (unsigned I = 0; I < BytesPerElement; ++I)
3946 Bytes.push_back(-1);
3947}
3948
3949// Add an extra element to the shuffle, taking it from element Elem of Op.
3950// A null Op indicates a vector input whose value will be calculated later;
3951// there is at most one such input per shuffle and it always has the same
Jonas Paulsson463e2a62017-01-24 05:43:03 +00003952// type as the result. Aborts and returns false if the source vector elements
3953// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
3954// LLVM they become implicitly extended, but this is rare and not optimized.
3955bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003956 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
3957
3958 // The source vector can have wider elements than the result,
3959 // either through an explicit TRUNCATE or because of type legalization.
3960 // We want the least significant part.
3961 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
3962 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
Jonas Paulsson463e2a62017-01-24 05:43:03 +00003963
3964 // Return false if the source elements are smaller than their destination
3965 // elements.
3966 if (FromBytesPerElement < BytesPerElement)
3967 return false;
3968
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003969 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
3970 (FromBytesPerElement - BytesPerElement));
3971
3972 // Look through things like shuffles and bitcasts.
3973 while (Op.getNode()) {
3974 if (Op.getOpcode() == ISD::BITCAST)
3975 Op = Op.getOperand(0);
3976 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
3977 // See whether the bytes we need come from a contiguous part of one
3978 // operand.
3979 SmallVector<int, SystemZ::VectorBytes> OpBytes;
3980 getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
3981 int NewByte;
3982 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
3983 break;
3984 if (NewByte < 0) {
3985 addUndef();
Jonas Paulsson463e2a62017-01-24 05:43:03 +00003986 return true;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003987 }
3988 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
3989 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
Sanjay Patel57195842016-03-14 17:28:46 +00003990 } else if (Op.isUndef()) {
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003991 addUndef();
Jonas Paulsson463e2a62017-01-24 05:43:03 +00003992 return true;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00003993 } else
3994 break;
3995 }
3996
3997 // Make sure that the source of the extraction is in Ops.
3998 unsigned OpNo = 0;
3999 for (; OpNo < Ops.size(); ++OpNo)
4000 if (Ops[OpNo] == Op)
4001 break;
4002 if (OpNo == Ops.size())
4003 Ops.push_back(Op);
4004
4005 // Add the element to Bytes.
4006 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4007 for (unsigned I = 0; I < BytesPerElement; ++I)
4008 Bytes.push_back(Base + I);
Jonas Paulsson463e2a62017-01-24 05:43:03 +00004009
4010 return true;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004011}
4012
4013// Return SDNodes for the completed shuffle.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00004014SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004015 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
4016
4017 if (Ops.size() == 0)
4018 return DAG.getUNDEF(VT);
4019
4020 // Make sure that there are at least two shuffle operands.
4021 if (Ops.size() == 1)
4022 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4023
4024 // Create a tree of shuffles, deferring root node until after the loop.
4025 // Try to redistribute the undefined elements of non-root nodes so that
4026 // the non-root shuffles match something like a pack or merge, then adjust
4027 // the parent node's permute vector to compensate for the new order.
4028 // Among other things, this copes with vectors like <2 x i16> that were
4029 // padded with undefined elements during type legalization.
4030 //
4031 // In the best case this redistribution will lead to the whole tree
4032 // using packs and merges. It should rarely be a loss in other cases.
4033 unsigned Stride = 1;
4034 for (; Stride * 2 < Ops.size(); Stride *= 2) {
4035 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
4036 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
4037
4038 // Create a mask for just these two operands.
4039 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
4040 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4041 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
4042 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
4043 if (OpNo == I)
4044 NewBytes[J] = Byte;
4045 else if (OpNo == I + Stride)
4046 NewBytes[J] = SystemZ::VectorBytes + Byte;
4047 else
4048 NewBytes[J] = -1;
4049 }
4050 // See if it would be better to reorganize NewMask to avoid using VPERM.
4051 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
4052 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
4053 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
4054 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4055 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4056 if (NewBytes[J] >= 0) {
4057 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
4058 "Invalid double permute");
4059 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
4060 } else
4061 assert(NewBytesMap[J] < 0 && "Invalid double permute");
4062 }
4063 } else {
4064 // Just use NewBytes on the operands.
4065 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
4066 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
4067 if (NewBytes[J] >= 0)
4068 Bytes[J] = I * SystemZ::VectorBytes + J;
4069 }
4070 }
4071 }
4072
4073 // Now we just have 2 inputs. Put the second operand in Ops[1].
4074 if (Stride > 1) {
4075 Ops[1] = Ops[Stride];
4076 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4077 if (Bytes[I] >= int(SystemZ::VectorBytes))
4078 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
4079 }
4080
4081 // Look for an instruction that can do the permute without resorting
4082 // to VPERM.
4083 unsigned OpNo0, OpNo1;
4084 SDValue Op;
4085 if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
4086 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
4087 else
4088 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
4089 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4090}
4091
Ulrich Weigandcd808232015-05-05 19:26:48 +00004092// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4093static bool isScalarToVector(SDValue Op) {
4094 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
Sanjay Patel75068522016-03-14 18:09:43 +00004095 if (!Op.getOperand(I).isUndef())
Ulrich Weigandcd808232015-05-05 19:26:48 +00004096 return false;
4097 return true;
4098}
4099
4100// Return a vector of type VT that contains Value in the first element.
4101// The other elements don't matter.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00004102static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
Ulrich Weigandcd808232015-05-05 19:26:48 +00004103 SDValue Value) {
4104 // If we have a constant, replicate it to all elements and let the
4105 // BUILD_VECTOR lowering take care of it.
4106 if (Value.getOpcode() == ISD::Constant ||
4107 Value.getOpcode() == ISD::ConstantFP) {
4108 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
Ahmed Bougacha128f8732016-04-26 21:15:30 +00004109 return DAG.getBuildVector(VT, DL, Ops);
Ulrich Weigandcd808232015-05-05 19:26:48 +00004110 }
Sanjay Patel57195842016-03-14 17:28:46 +00004111 if (Value.isUndef())
Ulrich Weigandcd808232015-05-05 19:26:48 +00004112 return DAG.getUNDEF(VT);
4113 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
4114}
4115
4116// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
4117// element 1. Used for cases in which replication is cheap.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00004118static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
Ulrich Weigandcd808232015-05-05 19:26:48 +00004119 SDValue Op0, SDValue Op1) {
Sanjay Patel57195842016-03-14 17:28:46 +00004120 if (Op0.isUndef()) {
4121 if (Op1.isUndef())
Ulrich Weigandcd808232015-05-05 19:26:48 +00004122 return DAG.getUNDEF(VT);
4123 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
4124 }
Sanjay Patel57195842016-03-14 17:28:46 +00004125 if (Op1.isUndef())
Ulrich Weigandcd808232015-05-05 19:26:48 +00004126 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
4127 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
4128 buildScalarToVector(DAG, DL, VT, Op0),
4129 buildScalarToVector(DAG, DL, VT, Op1));
4130}
4131
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004132// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4133// vector for them.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00004134static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004135 SDValue Op1) {
Sanjay Patel57195842016-03-14 17:28:46 +00004136 if (Op0.isUndef() && Op1.isUndef())
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004137 return DAG.getUNDEF(MVT::v2i64);
4138 // If one of the two inputs is undefined then replicate the other one,
4139 // in order to avoid using another register unnecessarily.
Sanjay Patel57195842016-03-14 17:28:46 +00004140 if (Op0.isUndef())
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004141 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
Sanjay Patel57195842016-03-14 17:28:46 +00004142 else if (Op1.isUndef())
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004143 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4144 else {
4145 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4146 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4147 }
4148 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
4149}
4150
4151// Try to represent constant BUILD_VECTOR node BVN using a
4152// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask
4153// on success.
4154static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
4155 EVT ElemVT = BVN->getValueType(0).getVectorElementType();
4156 unsigned BytesPerElement = ElemVT.getStoreSize();
4157 for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
4158 SDValue Op = BVN->getOperand(I);
Sanjay Patel75068522016-03-14 18:09:43 +00004159 if (!Op.isUndef()) {
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004160 uint64_t Value;
4161 if (Op.getOpcode() == ISD::Constant)
4162 Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
4163 else if (Op.getOpcode() == ISD::ConstantFP)
4164 Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
4165 .getZExtValue());
4166 else
4167 return false;
4168 for (unsigned J = 0; J < BytesPerElement; ++J) {
4169 uint64_t Byte = (Value >> (J * 8)) & 0xff;
4170 if (Byte == 0xff)
Aaron Ballman2a3aa1f242015-05-11 12:45:53 +00004171 Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J);
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004172 else if (Byte != 0)
4173 return false;
4174 }
4175 }
4176 }
4177 return true;
4178}
4179
4180// Try to load a vector constant in which BitsPerElement-bit value Value
4181// is replicated to fill the vector. VT is the type of the resulting
4182// constant, which may have elements of a different size from BitsPerElement.
4183// Return the SDValue of the constant on success, otherwise return
4184// an empty value.
4185static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
4186 const SystemZInstrInfo *TII,
Benjamin Kramerbdc49562016-06-12 15:39:02 +00004187 const SDLoc &DL, EVT VT, uint64_t Value,
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004188 unsigned BitsPerElement) {
4189 // Signed 16-bit values can be replicated using VREPI.
Jonas Paulsson13896072018-03-17 08:32:12 +00004190 // Mark the constants as opaque or DAGCombiner will convert back to
4191 // BUILD_VECTOR.
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004192 int64_t SignedValue = SignExtend64(Value, BitsPerElement);
4193 if (isInt<16>(SignedValue)) {
4194 MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
4195 SystemZ::VectorBits / BitsPerElement);
Jonas Paulsson13896072018-03-17 08:32:12 +00004196 SDValue Op = DAG.getNode(
4197 SystemZISD::REPLICATE, DL, VecVT,
4198 DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/));
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004199 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4200 }
4201 // See whether rotating the constant left some N places gives a value that
4202 // is one less than a power of 2 (i.e. all zeros followed by all ones).
4203 // If so we can use VGM.
4204 unsigned Start, End;
4205 if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
4206 // isRxSBGMask returns the bit numbers for a full 64-bit value,
4207 // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to
4208 // bit numbers for an BitsPerElement value, so that 0 denotes
4209 // 1 << (BitsPerElement-1).
4210 Start -= 64 - BitsPerElement;
4211 End -= 64 - BitsPerElement;
4212 MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
4213 SystemZ::VectorBits / BitsPerElement);
Jonas Paulsson13896072018-03-17 08:32:12 +00004214 SDValue Op = DAG.getNode(
4215 SystemZISD::ROTATE_MASK, DL, VecVT,
4216 DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/),
4217 DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/));
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004218 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4219 }
4220 return SDValue();
4221}
4222
4223// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4224// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4225// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
4226// would benefit from this representation and return it if so.
4227static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
4228 BuildVectorSDNode *BVN) {
4229 EVT VT = BVN->getValueType(0);
4230 unsigned NumElements = VT.getVectorNumElements();
4231
4232 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4233 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
4234 // need a BUILD_VECTOR, add an additional placeholder operand for that
4235 // BUILD_VECTOR and store its operands in ResidueOps.
4236 GeneralShuffle GS(VT);
4237 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
4238 bool FoundOne = false;
4239 for (unsigned I = 0; I < NumElements; ++I) {
4240 SDValue Op = BVN->getOperand(I);
4241 if (Op.getOpcode() == ISD::TRUNCATE)
4242 Op = Op.getOperand(0);
4243 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4244 Op.getOperand(1).getOpcode() == ISD::Constant) {
4245 unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Jonas Paulsson463e2a62017-01-24 05:43:03 +00004246 if (!GS.add(Op.getOperand(0), Elem))
4247 return SDValue();
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004248 FoundOne = true;
Sanjay Patel57195842016-03-14 17:28:46 +00004249 } else if (Op.isUndef()) {
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004250 GS.addUndef();
4251 } else {
Jonas Paulsson463e2a62017-01-24 05:43:03 +00004252 if (!GS.add(SDValue(), ResidueOps.size()))
4253 return SDValue();
Ulrich Weigande861e642015-09-15 14:27:46 +00004254 ResidueOps.push_back(BVN->getOperand(I));
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004255 }
4256 }
4257
4258 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
4259 if (!FoundOne)
4260 return SDValue();
4261
4262 // Create the BUILD_VECTOR for the remaining elements, if any.
4263 if (!ResidueOps.empty()) {
4264 while (ResidueOps.size() < NumElements)
Ulrich Weigandf4d14f72015-10-08 17:46:59 +00004265 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004266 for (auto &Op : GS.Ops) {
4267 if (!Op.getNode()) {
Ahmed Bougacha128f8732016-04-26 21:15:30 +00004268 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004269 break;
4270 }
4271 }
4272 }
4273 return GS.getNode(DAG, SDLoc(BVN));
4274}
4275
4276// Combine GPR scalar values Elems into a vector of type VT.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00004277static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004278 SmallVectorImpl<SDValue> &Elems) {
4279 // See whether there is a single replicated value.
4280 SDValue Single;
4281 unsigned int NumElements = Elems.size();
4282 unsigned int Count = 0;
4283 for (auto Elem : Elems) {
Sanjay Patel75068522016-03-14 18:09:43 +00004284 if (!Elem.isUndef()) {
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004285 if (!Single.getNode())
4286 Single = Elem;
4287 else if (Elem != Single) {
4288 Single = SDValue();
4289 break;
4290 }
4291 Count += 1;
4292 }
4293 }
4294 // There are three cases here:
4295 //
4296 // - if the only defined element is a loaded one, the best sequence
4297 // is a replicating load.
4298 //
4299 // - otherwise, if the only defined element is an i64 value, we will
4300 // end up with the same VLVGP sequence regardless of whether we short-cut
4301 // for replication or fall through to the later code.
4302 //
4303 // - otherwise, if the only defined element is an i32 or smaller value,
4304 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
4305 // This is only a win if the single defined element is used more than once.
4306 // In other cases we're better off using a single VLVGx.
4307 if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
4308 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
4309
Jonas Paulssonfe0c0932017-05-29 13:22:23 +00004310 // If all elements are loads, use VLREP/VLEs (below).
4311 bool AllLoads = true;
4312 for (auto Elem : Elems)
4313 if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) {
4314 AllLoads = false;
4315 break;
4316 }
4317
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004318 // The best way of building a v2i64 from two i64s is to use VLVGP.
Jonas Paulssonfe0c0932017-05-29 13:22:23 +00004319 if (VT == MVT::v2i64 && !AllLoads)
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004320 return joinDwords(DAG, DL, Elems[0], Elems[1]);
4321
Ulrich Weigandcd808232015-05-05 19:26:48 +00004322 // Use a 64-bit merge high to combine two doubles.
Jonas Paulssonfe0c0932017-05-29 13:22:23 +00004323 if (VT == MVT::v2f64 && !AllLoads)
Ulrich Weigandcd808232015-05-05 19:26:48 +00004324 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4325
Ulrich Weigand80b3af72015-05-05 19:27:45 +00004326 // Build v4f32 values directly from the FPRs:
4327 //
4328 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
4329 // V V VMRHF
4330 // <ABxx> <CDxx>
4331 // V VMRHG
4332 // <ABCD>
Jonas Paulssonfe0c0932017-05-29 13:22:23 +00004333 if (VT == MVT::v4f32 && !AllLoads) {
Ulrich Weigand80b3af72015-05-05 19:27:45 +00004334 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
4335 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
4336 // Avoid unnecessary undefs by reusing the other operand.
Sanjay Patel57195842016-03-14 17:28:46 +00004337 if (Op01.isUndef())
Ulrich Weigand80b3af72015-05-05 19:27:45 +00004338 Op01 = Op23;
Sanjay Patel57195842016-03-14 17:28:46 +00004339 else if (Op23.isUndef())
Ulrich Weigand80b3af72015-05-05 19:27:45 +00004340 Op23 = Op01;
4341 // Merging identical replications is a no-op.
4342 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
4343 return Op01;
4344 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
4345 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
4346 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
4347 DL, MVT::v2i64, Op01, Op23);
4348 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4349 }
4350
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004351 // Collect the constant terms.
4352 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
4353 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
4354
4355 unsigned NumConstants = 0;
4356 for (unsigned I = 0; I < NumElements; ++I) {
4357 SDValue Elem = Elems[I];
4358 if (Elem.getOpcode() == ISD::Constant ||
4359 Elem.getOpcode() == ISD::ConstantFP) {
4360 NumConstants += 1;
4361 Constants[I] = Elem;
4362 Done[I] = true;
4363 }
4364 }
4365 // If there was at least one constant, fill in the other elements of
4366 // Constants with undefs to get a full vector constant and use that
4367 // as the starting point.
4368 SDValue Result;
4369 if (NumConstants > 0) {
4370 for (unsigned I = 0; I < NumElements; ++I)
4371 if (!Constants[I].getNode())
4372 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
Ahmed Bougacha128f8732016-04-26 21:15:30 +00004373 Result = DAG.getBuildVector(VT, DL, Constants);
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004374 } else {
Jonas Paulssonfe0c0932017-05-29 13:22:23 +00004375 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004376 // avoid a false dependency on any previous contents of the vector
Jonas Paulssonfe0c0932017-05-29 13:22:23 +00004377 // register.
4378
4379 // Use a VLREP if at least one element is a load.
4380 unsigned LoadElIdx = UINT_MAX;
4381 for (unsigned I = 0; I < NumElements; ++I)
4382 if (Elems[I].getOpcode() == ISD::LOAD &&
4383 cast<LoadSDNode>(Elems[I])->isUnindexed()) {
4384 LoadElIdx = I;
4385 break;
4386 }
4387 if (LoadElIdx != UINT_MAX) {
4388 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, Elems[LoadElIdx]);
4389 Done[LoadElIdx] = true;
4390 } else {
4391 // Try to use VLVGP.
4392 unsigned I1 = NumElements / 2 - 1;
4393 unsigned I2 = NumElements - 1;
4394 bool Def1 = !Elems[I1].isUndef();
4395 bool Def2 = !Elems[I2].isUndef();
4396 if (Def1 || Def2) {
4397 SDValue Elem1 = Elems[Def1 ? I1 : I2];
4398 SDValue Elem2 = Elems[Def2 ? I2 : I1];
4399 Result = DAG.getNode(ISD::BITCAST, DL, VT,
4400 joinDwords(DAG, DL, Elem1, Elem2));
4401 Done[I1] = true;
4402 Done[I2] = true;
4403 } else
4404 Result = DAG.getUNDEF(VT);
4405 }
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004406 }
4407
4408 // Use VLVGx to insert the other elements.
4409 for (unsigned I = 0; I < NumElements; ++I)
Sanjay Patel75068522016-03-14 18:09:43 +00004410 if (!Done[I] && !Elems[I].isUndef())
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004411 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
4412 DAG.getConstant(I, DL, MVT::i32));
4413 return Result;
4414}
4415
4416SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
4417 SelectionDAG &DAG) const {
4418 const SystemZInstrInfo *TII =
4419 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
4420 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
4421 SDLoc DL(Op);
4422 EVT VT = Op.getValueType();
4423
4424 if (BVN->isConstant()) {
4425 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
4426 // preferred way of creating all-zero and all-one vectors so give it
4427 // priority over other methods below.
4428 uint64_t Mask = 0;
4429 if (tryBuildVectorByteMask(BVN, Mask)) {
Jonas Paulsson13896072018-03-17 08:32:12 +00004430 SDValue Op = DAG.getNode(
4431 SystemZISD::BYTE_MASK, DL, MVT::v16i8,
4432 DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/));
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004433 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4434 }
4435
4436 // Try using some form of replication.
4437 APInt SplatBits, SplatUndef;
4438 unsigned SplatBitSize;
4439 bool HasAnyUndefs;
4440 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
4441 8, true) &&
4442 SplatBitSize <= 64) {
4443 // First try assuming that any undefined bits above the highest set bit
4444 // and below the lowest set bit are 1s. This increases the likelihood of
4445 // being able to use a sign-extended element value in VECTOR REPLICATE
4446 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
4447 uint64_t SplatBitsZ = SplatBits.getZExtValue();
4448 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
4449 uint64_t Lower = (SplatUndefZ
4450 & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
4451 uint64_t Upper = (SplatUndefZ
4452 & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
4453 uint64_t Value = SplatBitsZ | Upper | Lower;
4454 SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
4455 SplatBitSize);
4456 if (Op.getNode())
4457 return Op;
4458
4459 // Now try assuming that any undefined bits between the first and
4460 // last defined set bits are set. This increases the chances of
4461 // using a non-wraparound mask.
4462 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
4463 Value = SplatBitsZ | Middle;
4464 Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
4465 if (Op.getNode())
4466 return Op;
4467 }
4468
4469 // Fall back to loading it from memory.
4470 return SDValue();
4471 }
4472
4473 // See if we should use shuffles to construct the vector from other vectors.
Ahmed Bougachaf8dfb472016-02-09 22:54:12 +00004474 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004475 return Res;
4476
Ulrich Weigandcd808232015-05-05 19:26:48 +00004477 // Detect SCALAR_TO_VECTOR conversions.
4478 if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
4479 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
4480
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004481 // Otherwise use buildVector to build the vector up from GPRs.
4482 unsigned NumElements = Op.getNumOperands();
4483 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
4484 for (unsigned I = 0; I < NumElements; ++I)
4485 Ops[I] = Op.getOperand(I);
4486 return buildVector(DAG, DL, VT, Ops);
4487}
4488
4489SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
4490 SelectionDAG &DAG) const {
4491 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
4492 SDLoc DL(Op);
4493 EVT VT = Op.getValueType();
4494 unsigned NumElements = VT.getVectorNumElements();
4495
4496 if (VSN->isSplat()) {
4497 SDValue Op0 = Op.getOperand(0);
4498 unsigned Index = VSN->getSplatIndex();
4499 assert(Index < VT.getVectorNumElements() &&
4500 "Splat index should be defined and in first operand");
4501 // See whether the value we're splatting is directly available as a scalar.
4502 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
4503 Op0.getOpcode() == ISD::BUILD_VECTOR)
4504 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
4505 // Otherwise keep it as a vector-to-vector operation.
4506 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
4507 DAG.getConstant(Index, DL, MVT::i32));
4508 }
4509
4510 GeneralShuffle GS(VT);
4511 for (unsigned I = 0; I < NumElements; ++I) {
4512 int Elt = VSN->getMaskElt(I);
4513 if (Elt < 0)
4514 GS.addUndef();
Jonas Paulsson463e2a62017-01-24 05:43:03 +00004515 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
4516 unsigned(Elt) % NumElements))
4517 return SDValue();
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004518 }
4519 return GS.getNode(DAG, SDLoc(VSN));
4520}
4521
4522SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
4523 SelectionDAG &DAG) const {
4524 SDLoc DL(Op);
4525 // Just insert the scalar into element 0 of an undefined vector.
4526 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4527 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
4528 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
4529}
4530
Ulrich Weigandcd808232015-05-05 19:26:48 +00004531SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
4532 SelectionDAG &DAG) const {
4533 // Handle insertions of floating-point values.
4534 SDLoc DL(Op);
4535 SDValue Op0 = Op.getOperand(0);
4536 SDValue Op1 = Op.getOperand(1);
4537 SDValue Op2 = Op.getOperand(2);
4538 EVT VT = Op.getValueType();
4539
Ulrich Weigand80b3af72015-05-05 19:27:45 +00004540 // Insertions into constant indices of a v2f64 can be done using VPDI.
4541 // However, if the inserted value is a bitcast or a constant then it's
4542 // better to use GPRs, as below.
4543 if (VT == MVT::v2f64 &&
4544 Op1.getOpcode() != ISD::BITCAST &&
Ulrich Weigandcd808232015-05-05 19:26:48 +00004545 Op1.getOpcode() != ISD::ConstantFP &&
4546 Op2.getOpcode() == ISD::Constant) {
4547 uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
4548 unsigned Mask = VT.getVectorNumElements() - 1;
4549 if (Index <= Mask)
4550 return Op;
4551 }
4552
4553 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
Sanjay Patel1ed771f2016-09-14 16:37:15 +00004554 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
Ulrich Weigandcd808232015-05-05 19:26:48 +00004555 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
4556 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
4557 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
4558 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
4559 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
4560}
4561
4562SDValue
4563SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
4564 SelectionDAG &DAG) const {
4565 // Handle extractions of floating-point values.
4566 SDLoc DL(Op);
4567 SDValue Op0 = Op.getOperand(0);
4568 SDValue Op1 = Op.getOperand(1);
4569 EVT VT = Op.getValueType();
4570 EVT VecVT = Op0.getValueType();
4571
4572 // Extractions of constant indices can be done directly.
4573 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
4574 uint64_t Index = CIndexN->getZExtValue();
4575 unsigned Mask = VecVT.getVectorNumElements() - 1;
4576 if (Index <= Mask)
4577 return Op;
4578 }
4579
4580 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
4581 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
4582 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
4583 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
4584 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
4585 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
4586}
4587
Ulrich Weigandcd2a1b52015-05-05 19:29:21 +00004588SDValue
4589SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
NAKAMURA Takumi0a7d0ad2015-09-22 11:15:07 +00004590 unsigned UnpackHigh) const {
Ulrich Weigandcd2a1b52015-05-05 19:29:21 +00004591 SDValue PackedOp = Op.getOperand(0);
4592 EVT OutVT = Op.getValueType();
4593 EVT InVT = PackedOp.getValueType();
Sanjay Patel1ed771f2016-09-14 16:37:15 +00004594 unsigned ToBits = OutVT.getScalarSizeInBits();
4595 unsigned FromBits = InVT.getScalarSizeInBits();
Ulrich Weigandcd2a1b52015-05-05 19:29:21 +00004596 do {
4597 FromBits *= 2;
4598 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
4599 SystemZ::VectorBits / FromBits);
4600 PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
4601 } while (FromBits != ToBits);
4602 return PackedOp;
4603}
4604
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004605SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
4606 unsigned ByScalar) const {
4607 // Look for cases where a vector shift can use the *_BY_SCALAR form.
4608 SDValue Op0 = Op.getOperand(0);
4609 SDValue Op1 = Op.getOperand(1);
4610 SDLoc DL(Op);
4611 EVT VT = Op.getValueType();
Sanjay Patel1ed771f2016-09-14 16:37:15 +00004612 unsigned ElemBitSize = VT.getScalarSizeInBits();
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004613
4614 // See whether the shift vector is a splat represented as BUILD_VECTOR.
4615 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
4616 APInt SplatBits, SplatUndef;
4617 unsigned SplatBitSize;
4618 bool HasAnyUndefs;
4619 // Check for constant splats. Use ElemBitSize as the minimum element
4620 // width and reject splats that need wider elements.
4621 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
4622 ElemBitSize, true) &&
4623 SplatBitSize == ElemBitSize) {
4624 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
4625 DL, MVT::i32);
4626 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4627 }
4628 // Check for variable splats.
4629 BitVector UndefElements;
4630 SDValue Splat = BVN->getSplatValue(&UndefElements);
4631 if (Splat) {
4632 // Since i32 is the smallest legal type, we either need a no-op
4633 // or a truncation.
4634 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
4635 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4636 }
4637 }
4638
4639 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
4640 // and the shift amount is directly available in a GPR.
4641 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
4642 if (VSN->isSplat()) {
4643 SDValue VSNOp0 = VSN->getOperand(0);
4644 unsigned Index = VSN->getSplatIndex();
4645 assert(Index < VT.getVectorNumElements() &&
4646 "Splat index should be defined and in first operand");
4647 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
4648 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
4649 // Since i32 is the smallest legal type, we either need a no-op
4650 // or a truncation.
4651 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
4652 VSNOp0.getOperand(Index));
4653 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
4654 }
4655 }
4656 }
4657
4658 // Otherwise just treat the current form as legal.
4659 return Op;
4660}
4661
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004662SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
4663 SelectionDAG &DAG) const {
4664 switch (Op.getOpcode()) {
Ulrich Weigandf557d082016-04-04 12:44:55 +00004665 case ISD::FRAMEADDR:
4666 return lowerFRAMEADDR(Op, DAG);
4667 case ISD::RETURNADDR:
4668 return lowerRETURNADDR(Op, DAG);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004669 case ISD::BR_CC:
4670 return lowerBR_CC(Op, DAG);
4671 case ISD::SELECT_CC:
4672 return lowerSELECT_CC(Op, DAG);
Richard Sandifordf722a8e302013-10-16 11:10:55 +00004673 case ISD::SETCC:
4674 return lowerSETCC(Op, DAG);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004675 case ISD::GlobalAddress:
4676 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
4677 case ISD::GlobalTLSAddress:
4678 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
4679 case ISD::BlockAddress:
4680 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
4681 case ISD::JumpTable:
4682 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
4683 case ISD::ConstantPool:
4684 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
4685 case ISD::BITCAST:
4686 return lowerBITCAST(Op, DAG);
4687 case ISD::VASTART:
4688 return lowerVASTART(Op, DAG);
4689 case ISD::VACOPY:
4690 return lowerVACOPY(Op, DAG);
4691 case ISD::DYNAMIC_STACKALLOC:
4692 return lowerDYNAMIC_STACKALLOC(Op, DAG);
Marcin Koscielnicki9de88d92016-05-04 23:31:26 +00004693 case ISD::GET_DYNAMIC_AREA_OFFSET:
4694 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
Richard Sandiford7d86e472013-08-21 09:34:56 +00004695 case ISD::SMUL_LOHI:
4696 return lowerSMUL_LOHI(Op, DAG);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004697 case ISD::UMUL_LOHI:
4698 return lowerUMUL_LOHI(Op, DAG);
4699 case ISD::SDIVREM:
4700 return lowerSDIVREM(Op, DAG);
4701 case ISD::UDIVREM:
4702 return lowerUDIVREM(Op, DAG);
4703 case ISD::OR:
4704 return lowerOR(Op, DAG);
Ulrich Weigandb4012182015-03-31 12:56:33 +00004705 case ISD::CTPOP:
4706 return lowerCTPOP(Op, DAG);
Ulrich Weiganda9ac6d62016-04-04 12:45:44 +00004707 case ISD::ATOMIC_FENCE:
4708 return lowerATOMIC_FENCE(Op, DAG);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004709 case ISD::ATOMIC_SWAP:
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00004710 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
4711 case ISD::ATOMIC_STORE:
4712 return lowerATOMIC_STORE(Op, DAG);
4713 case ISD::ATOMIC_LOAD:
4714 return lowerATOMIC_LOAD(Op, DAG);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004715 case ISD::ATOMIC_LOAD_ADD:
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00004716 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004717 case ISD::ATOMIC_LOAD_SUB:
Richard Sandiford41350a52013-12-24 15:18:04 +00004718 return lowerATOMIC_LOAD_SUB(Op, DAG);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004719 case ISD::ATOMIC_LOAD_AND:
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00004720 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004721 case ISD::ATOMIC_LOAD_OR:
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00004722 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004723 case ISD::ATOMIC_LOAD_XOR:
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00004724 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004725 case ISD::ATOMIC_LOAD_NAND:
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00004726 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004727 case ISD::ATOMIC_LOAD_MIN:
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00004728 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004729 case ISD::ATOMIC_LOAD_MAX:
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00004730 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004731 case ISD::ATOMIC_LOAD_UMIN:
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00004732 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004733 case ISD::ATOMIC_LOAD_UMAX:
Richard Sandifordbef3d7a2013-12-10 10:49:34 +00004734 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
Ulrich Weigand0f1de042017-09-28 16:22:54 +00004735 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004736 return lowerATOMIC_CMP_SWAP(Op, DAG);
4737 case ISD::STACKSAVE:
4738 return lowerSTACKSAVE(Op, DAG);
4739 case ISD::STACKRESTORE:
4740 return lowerSTACKRESTORE(Op, DAG);
Richard Sandiford03481332013-08-23 11:36:42 +00004741 case ISD::PREFETCH:
4742 return lowerPREFETCH(Op, DAG);
Ulrich Weigand57c85f52015-04-01 12:51:43 +00004743 case ISD::INTRINSIC_W_CHAIN:
4744 return lowerINTRINSIC_W_CHAIN(Op, DAG);
Ulrich Weigandc1708b22015-05-05 19:31:09 +00004745 case ISD::INTRINSIC_WO_CHAIN:
4746 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004747 case ISD::BUILD_VECTOR:
4748 return lowerBUILD_VECTOR(Op, DAG);
4749 case ISD::VECTOR_SHUFFLE:
4750 return lowerVECTOR_SHUFFLE(Op, DAG);
4751 case ISD::SCALAR_TO_VECTOR:
4752 return lowerSCALAR_TO_VECTOR(Op, DAG);
Ulrich Weigandcd808232015-05-05 19:26:48 +00004753 case ISD::INSERT_VECTOR_ELT:
4754 return lowerINSERT_VECTOR_ELT(Op, DAG);
4755 case ISD::EXTRACT_VECTOR_ELT:
4756 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
Ulrich Weigandcd2a1b52015-05-05 19:29:21 +00004757 case ISD::SIGN_EXTEND_VECTOR_INREG:
4758 return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
4759 case ISD::ZERO_EXTEND_VECTOR_INREG:
4760 return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004761 case ISD::SHL:
4762 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
4763 case ISD::SRL:
4764 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
4765 case ISD::SRA:
4766 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004767 default:
4768 llvm_unreachable("Unexpected node to lower");
4769 }
4770}
4771
Ulrich Weiganda11f63a2017-08-04 18:57:58 +00004772// Lower operations with invalid operand or result types (currently used
4773// only for 128-bit integer types).
4774
4775static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
4776 SDLoc DL(In);
4777 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
4778 DAG.getIntPtrConstant(0, DL));
4779 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
4780 DAG.getIntPtrConstant(1, DL));
4781 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
4782 MVT::Untyped, Hi, Lo);
4783 return SDValue(Pair, 0);
4784}
4785
4786static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
4787 SDLoc DL(In);
4788 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
4789 DL, MVT::i64, In);
4790 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
4791 DL, MVT::i64, In);
4792 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
4793}
4794
4795void
4796SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
4797 SmallVectorImpl<SDValue> &Results,
4798 SelectionDAG &DAG) const {
4799 switch (N->getOpcode()) {
4800 case ISD::ATOMIC_LOAD: {
4801 SDLoc DL(N);
4802 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
4803 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
4804 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
4805 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
4806 DL, Tys, Ops, MVT::i128, MMO);
4807 Results.push_back(lowerGR128ToI128(DAG, Res));
4808 Results.push_back(Res.getValue(1));
4809 break;
4810 }
4811 case ISD::ATOMIC_STORE: {
4812 SDLoc DL(N);
4813 SDVTList Tys = DAG.getVTList(MVT::Other);
4814 SDValue Ops[] = { N->getOperand(0),
4815 lowerI128ToGR128(DAG, N->getOperand(2)),
4816 N->getOperand(1) };
4817 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
4818 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
4819 DL, Tys, Ops, MVT::i128, MMO);
4820 // We have to enforce sequential consistency by performing a
4821 // serialization operation after the store.
4822 if (cast<AtomicSDNode>(N)->getOrdering() ==
4823 AtomicOrdering::SequentiallyConsistent)
4824 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
4825 MVT::Other, Res), 0);
4826 Results.push_back(Res);
4827 break;
4828 }
Ulrich Weigand0f1de042017-09-28 16:22:54 +00004829 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
Ulrich Weiganda11f63a2017-08-04 18:57:58 +00004830 SDLoc DL(N);
Ulrich Weigand0f1de042017-09-28 16:22:54 +00004831 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other, MVT::Glue);
Ulrich Weiganda11f63a2017-08-04 18:57:58 +00004832 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
4833 lowerI128ToGR128(DAG, N->getOperand(2)),
4834 lowerI128ToGR128(DAG, N->getOperand(3)) };
4835 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
4836 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
4837 DL, Tys, Ops, MVT::i128, MMO);
Ulrich Weigand0f1de042017-09-28 16:22:54 +00004838 SDValue Success = emitSETCC(DAG, DL, Res.getValue(2),
4839 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
4840 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
Ulrich Weiganda11f63a2017-08-04 18:57:58 +00004841 Results.push_back(lowerGR128ToI128(DAG, Res));
Ulrich Weigand0f1de042017-09-28 16:22:54 +00004842 Results.push_back(Success);
Ulrich Weiganda11f63a2017-08-04 18:57:58 +00004843 Results.push_back(Res.getValue(1));
4844 break;
4845 }
4846 default:
4847 llvm_unreachable("Unexpected node to lower");
4848 }
4849}
4850
4851void
4852SystemZTargetLowering::ReplaceNodeResults(SDNode *N,
4853 SmallVectorImpl<SDValue> &Results,
4854 SelectionDAG &DAG) const {
4855 return LowerOperationWrapper(N, Results, DAG);
4856}
4857
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004858const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
4859#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
Matthias Braund04893f2015-05-07 21:33:59 +00004860 switch ((SystemZISD::NodeType)Opcode) {
4861 case SystemZISD::FIRST_NUMBER: break;
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004862 OPCODE(RET_FLAG);
4863 OPCODE(CALL);
Richard Sandiford709bda62013-08-19 12:42:31 +00004864 OPCODE(SIBCALL);
Ulrich Weigand1c6f07d2015-05-04 17:39:40 +00004865 OPCODE(TLS_GDCALL);
4866 OPCODE(TLS_LDCALL);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004867 OPCODE(PCREL_WRAPPER);
Richard Sandiford54b36912013-09-27 15:14:04 +00004868 OPCODE(PCREL_OFFSET);
Richard Sandiford57485472013-12-13 15:35:00 +00004869 OPCODE(IABS);
Richard Sandiford5bc670b2013-09-06 11:51:39 +00004870 OPCODE(ICMP);
4871 OPCODE(FCMP);
Richard Sandiford35b9be22013-08-28 10:31:43 +00004872 OPCODE(TM);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004873 OPCODE(BR_CCMASK);
4874 OPCODE(SELECT_CCMASK);
4875 OPCODE(ADJDYNALLOC);
Ulrich Weigand1c6f07d2015-05-04 17:39:40 +00004876 OPCODE(POPCNT);
Ulrich Weigand2b3482f2017-07-17 17:41:11 +00004877 OPCODE(SMUL_LOHI);
Ulrich Weigand43579cf2017-07-05 13:17:31 +00004878 OPCODE(UMUL_LOHI);
4879 OPCODE(SDIVREM);
4880 OPCODE(UDIVREM);
Richard Sandifordd131ff82013-07-08 09:35:23 +00004881 OPCODE(MVC);
Richard Sandiford5e318f02013-08-27 09:54:29 +00004882 OPCODE(MVC_LOOP);
Richard Sandiford178273a2013-09-05 10:36:45 +00004883 OPCODE(NC);
4884 OPCODE(NC_LOOP);
4885 OPCODE(OC);
4886 OPCODE(OC_LOOP);
4887 OPCODE(XC);
4888 OPCODE(XC_LOOP);
Richard Sandiford761703a2013-08-12 10:17:33 +00004889 OPCODE(CLC);
Richard Sandiford5e318f02013-08-27 09:54:29 +00004890 OPCODE(CLC_LOOP);
Richard Sandifordbb83a502013-08-16 11:29:37 +00004891 OPCODE(STPCPY);
Ulrich Weigand1c6f07d2015-05-04 17:39:40 +00004892 OPCODE(STRCMP);
Richard Sandiford0dec06a2013-08-16 11:41:43 +00004893 OPCODE(SEARCH_STRING);
Richard Sandiford564681c2013-08-12 10:28:10 +00004894 OPCODE(IPM);
Ulrich Weiganda9ac6d62016-04-04 12:45:44 +00004895 OPCODE(MEMBARRIER);
Ulrich Weigand57c85f52015-04-01 12:51:43 +00004896 OPCODE(TBEGIN);
4897 OPCODE(TBEGIN_NOFLOAT);
4898 OPCODE(TEND);
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004899 OPCODE(BYTE_MASK);
4900 OPCODE(ROTATE_MASK);
4901 OPCODE(REPLICATE);
4902 OPCODE(JOIN_DWORDS);
4903 OPCODE(SPLAT);
4904 OPCODE(MERGE_HIGH);
4905 OPCODE(MERGE_LOW);
4906 OPCODE(SHL_DOUBLE);
4907 OPCODE(PERMUTE_DWORDS);
4908 OPCODE(PERMUTE);
4909 OPCODE(PACK);
Ulrich Weigandc1708b22015-05-05 19:31:09 +00004910 OPCODE(PACKS_CC);
4911 OPCODE(PACKLS_CC);
Ulrich Weigandcd2a1b52015-05-05 19:29:21 +00004912 OPCODE(UNPACK_HIGH);
4913 OPCODE(UNPACKL_HIGH);
4914 OPCODE(UNPACK_LOW);
4915 OPCODE(UNPACKL_LOW);
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004916 OPCODE(VSHL_BY_SCALAR);
4917 OPCODE(VSRL_BY_SCALAR);
4918 OPCODE(VSRA_BY_SCALAR);
4919 OPCODE(VSUM);
4920 OPCODE(VICMPE);
4921 OPCODE(VICMPH);
4922 OPCODE(VICMPHL);
Ulrich Weigandc1708b22015-05-05 19:31:09 +00004923 OPCODE(VICMPES);
4924 OPCODE(VICMPHS);
4925 OPCODE(VICMPHLS);
Ulrich Weigandcd808232015-05-05 19:26:48 +00004926 OPCODE(VFCMPE);
4927 OPCODE(VFCMPH);
4928 OPCODE(VFCMPHE);
Ulrich Weigandc1708b22015-05-05 19:31:09 +00004929 OPCODE(VFCMPES);
4930 OPCODE(VFCMPHS);
4931 OPCODE(VFCMPHES);
4932 OPCODE(VFTCI);
Ulrich Weigand80b3af72015-05-05 19:27:45 +00004933 OPCODE(VEXTEND);
4934 OPCODE(VROUND);
Ulrich Weigandc1708b22015-05-05 19:31:09 +00004935 OPCODE(VTM);
4936 OPCODE(VFAE_CC);
4937 OPCODE(VFAEZ_CC);
4938 OPCODE(VFEE_CC);
4939 OPCODE(VFEEZ_CC);
4940 OPCODE(VFENE_CC);
4941 OPCODE(VFENEZ_CC);
4942 OPCODE(VISTR_CC);
4943 OPCODE(VSTRC_CC);
4944 OPCODE(VSTRCZ_CC);
Marcin Koscielnicki32e87342016-07-02 02:20:40 +00004945 OPCODE(TDC);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004946 OPCODE(ATOMIC_SWAPW);
4947 OPCODE(ATOMIC_LOADW_ADD);
4948 OPCODE(ATOMIC_LOADW_SUB);
4949 OPCODE(ATOMIC_LOADW_AND);
4950 OPCODE(ATOMIC_LOADW_OR);
4951 OPCODE(ATOMIC_LOADW_XOR);
4952 OPCODE(ATOMIC_LOADW_NAND);
4953 OPCODE(ATOMIC_LOADW_MIN);
4954 OPCODE(ATOMIC_LOADW_MAX);
4955 OPCODE(ATOMIC_LOADW_UMIN);
4956 OPCODE(ATOMIC_LOADW_UMAX);
4957 OPCODE(ATOMIC_CMP_SWAPW);
Ulrich Weigand0f1de042017-09-28 16:22:54 +00004958 OPCODE(ATOMIC_CMP_SWAP);
Ulrich Weiganda11f63a2017-08-04 18:57:58 +00004959 OPCODE(ATOMIC_LOAD_128);
4960 OPCODE(ATOMIC_STORE_128);
4961 OPCODE(ATOMIC_CMP_SWAP_128);
Bryan Chan28b759c2016-05-16 20:32:22 +00004962 OPCODE(LRV);
4963 OPCODE(STRV);
Richard Sandiford03481332013-08-23 11:36:42 +00004964 OPCODE(PREFETCH);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004965 }
Craig Topper062a2ba2014-04-25 05:30:21 +00004966 return nullptr;
Ulrich Weigand5f613df2013-05-06 16:15:19 +00004967#undef OPCODE
4968}
4969
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004970// Return true if VT is a vector whose elements are a whole number of bytes
Jonas Paulssoncad72ef2017-04-07 12:35:11 +00004971// in width. Also check for presence of vector support.
4972bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
4973 if (!Subtarget.hasVector())
4974 return false;
4975
Jonas Paulsson1d33cd32017-03-07 09:49:31 +00004976 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004977}
4978
4979// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
4980// producing a result of type ResVT. Op is a possibly bitcast version
4981// of the input vector and Index is the index (based on type VecVT) that
4982// should be extracted. Return the new extraction if a simplification
4983// was possible or if Force is true.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00004984SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
4985 EVT VecVT, SDValue Op,
4986 unsigned Index,
Ulrich Weigandce4c1092015-05-05 19:25:42 +00004987 DAGCombinerInfo &DCI,
4988 bool Force) const {
4989 SelectionDAG &DAG = DCI.DAG;
4990
4991 // The number of bytes being extracted.
4992 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
4993
4994 for (;;) {
4995 unsigned Opcode = Op.getOpcode();
4996 if (Opcode == ISD::BITCAST)
4997 // Look through bitcasts.
4998 Op = Op.getOperand(0);
4999 else if (Opcode == ISD::VECTOR_SHUFFLE &&
5000 canTreatAsByteVector(Op.getValueType())) {
5001 // Get a VPERM-like permute mask and see whether the bytes covered
5002 // by the extracted element are a contiguous sequence from one
5003 // source operand.
5004 SmallVector<int, SystemZ::VectorBytes> Bytes;
5005 getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
5006 int First;
5007 if (!getShuffleInput(Bytes, Index * BytesPerElement,
5008 BytesPerElement, First))
5009 break;
5010 if (First < 0)
5011 return DAG.getUNDEF(ResVT);
5012 // Make sure the contiguous sequence starts at a multiple of the
5013 // original element size.
5014 unsigned Byte = unsigned(First) % Bytes.size();
5015 if (Byte % BytesPerElement != 0)
5016 break;
5017 // We can get the extracted value directly from an input.
5018 Index = Byte / BytesPerElement;
5019 Op = Op.getOperand(unsigned(First) / Bytes.size());
5020 Force = true;
5021 } else if (Opcode == ISD::BUILD_VECTOR &&
5022 canTreatAsByteVector(Op.getValueType())) {
5023 // We can only optimize this case if the BUILD_VECTOR elements are
5024 // at least as wide as the extracted value.
5025 EVT OpVT = Op.getValueType();
5026 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
5027 if (OpBytesPerElement < BytesPerElement)
5028 break;
5029 // Make sure that the least-significant bit of the extracted value
5030 // is the least significant bit of an input.
5031 unsigned End = (Index + 1) * BytesPerElement;
5032 if (End % OpBytesPerElement != 0)
5033 break;
5034 // We're extracting the low part of one operand of the BUILD_VECTOR.
5035 Op = Op.getOperand(End / OpBytesPerElement - 1);
5036 if (!Op.getValueType().isInteger()) {
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00005037 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
Ulrich Weigandce4c1092015-05-05 19:25:42 +00005038 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
5039 DCI.AddToWorklist(Op.getNode());
5040 }
5041 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
5042 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5043 if (VT != ResVT) {
5044 DCI.AddToWorklist(Op.getNode());
5045 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
5046 }
5047 return Op;
5048 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
NAKAMURA Takumi0a7d0ad2015-09-22 11:15:07 +00005049 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
5050 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5051 canTreatAsByteVector(Op.getValueType()) &&
Ulrich Weigandce4c1092015-05-05 19:25:42 +00005052 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
5053 // Make sure that only the unextended bits are significant.
5054 EVT ExtVT = Op.getValueType();
5055 EVT OpVT = Op.getOperand(0).getValueType();
5056 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
5057 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
5058 unsigned Byte = Index * BytesPerElement;
5059 unsigned SubByte = Byte % ExtBytesPerElement;
5060 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
5061 if (SubByte < MinSubByte ||
NAKAMURA Takumi0a7d0ad2015-09-22 11:15:07 +00005062 SubByte + BytesPerElement > ExtBytesPerElement)
5063 break;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00005064 // Get the byte offset of the unextended element
5065 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
5066 // ...then add the byte offset relative to that element.
5067 Byte += SubByte - MinSubByte;
5068 if (Byte % BytesPerElement != 0)
NAKAMURA Takumi0a7d0ad2015-09-22 11:15:07 +00005069 break;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00005070 Op = Op.getOperand(0);
5071 Index = Byte / BytesPerElement;
5072 Force = true;
5073 } else
5074 break;
5075 }
5076 if (Force) {
5077 if (Op.getValueType() != VecVT) {
5078 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
5079 DCI.AddToWorklist(Op.getNode());
5080 }
5081 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
5082 DAG.getConstant(Index, DL, MVT::i32));
5083 }
5084 return SDValue();
5085}
5086
5087// Optimize vector operations in scalar value Op on the basis that Op
5088// is truncated to TruncVT.
Benjamin Kramerbdc49562016-06-12 15:39:02 +00005089SDValue SystemZTargetLowering::combineTruncateExtract(
5090 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
Ulrich Weigandce4c1092015-05-05 19:25:42 +00005091 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
5092 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
5093 // of type TruncVT.
5094 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5095 TruncVT.getSizeInBits() % 8 == 0) {
5096 SDValue Vec = Op.getOperand(0);
5097 EVT VecVT = Vec.getValueType();
5098 if (canTreatAsByteVector(VecVT)) {
5099 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
5100 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
5101 unsigned TruncBytes = TruncVT.getStoreSize();
5102 if (BytesPerElement % TruncBytes == 0) {
5103 // Calculate the value of Y' in the above description. We are
5104 // splitting the original elements into Scale equal-sized pieces
5105 // and for truncation purposes want the last (least-significant)
5106 // of these pieces for IndexN. This is easiest to do by calculating
5107 // the start index of the following element and then subtracting 1.
5108 unsigned Scale = BytesPerElement / TruncBytes;
5109 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
5110
5111 // Defer the creation of the bitcast from X to combineExtract,
5112 // which might be able to optimize the extraction.
5113 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
5114 VecVT.getStoreSize() / TruncBytes);
5115 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
5116 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
5117 }
5118 }
5119 }
5120 }
5121 return SDValue();
5122}
5123
Ulrich Weigand849a59f2018-01-19 20:52:04 +00005124SDValue SystemZTargetLowering::combineZERO_EXTEND(
5125 SDNode *N, DAGCombinerInfo &DCI) const {
5126 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
5127 SelectionDAG &DAG = DCI.DAG;
5128 SDValue N0 = N->getOperand(0);
5129 EVT VT = N->getValueType(0);
5130 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
5131 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
5132 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5133 if (TrueOp && FalseOp) {
5134 SDLoc DL(N0);
5135 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
5136 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
5137 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
5138 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
5139 // If N0 has multiple uses, change other uses as well.
5140 if (!N0.hasOneUse()) {
5141 SDValue TruncSelect =
5142 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
5143 DCI.CombineTo(N0.getNode(), TruncSelect);
5144 }
5145 return NewSelect;
5146 }
5147 }
5148 return SDValue();
5149}
5150
5151SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
5152 SDNode *N, DAGCombinerInfo &DCI) const {
5153 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
5154 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
5155 // into (select_cc LHS, RHS, -1, 0, COND)
5156 SelectionDAG &DAG = DCI.DAG;
5157 SDValue N0 = N->getOperand(0);
5158 EVT VT = N->getValueType(0);
5159 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
5160 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
5161 N0 = N0.getOperand(0);
5162 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
5163 SDLoc DL(N0);
5164 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
5165 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
5166 N0.getOperand(2) };
5167 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
5168 }
5169 return SDValue();
5170}
5171
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005172SDValue SystemZTargetLowering::combineSIGN_EXTEND(
5173 SDNode *N, DAGCombinerInfo &DCI) const {
5174 // Convert (sext (ashr (shl X, C1), C2)) to
5175 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
5176 // cheap as narrower ones.
5177 SelectionDAG &DAG = DCI.DAG;
5178 SDValue N0 = N->getOperand(0);
5179 EVT VT = N->getValueType(0);
5180 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
5181 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5182 SDValue Inner = N0.getOperand(0);
5183 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
5184 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00005185 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005186 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
5187 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
5188 EVT ShiftVT = N0.getOperand(1).getValueType();
5189 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
5190 Inner.getOperand(0));
5191 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
5192 DAG.getConstant(NewShlAmt, SDLoc(Inner),
5193 ShiftVT));
5194 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
5195 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
5196 }
5197 }
5198 }
5199 return SDValue();
5200}
5201
5202SDValue SystemZTargetLowering::combineMERGE(
5203 SDNode *N, DAGCombinerInfo &DCI) const {
Richard Sandiford95bc5f92014-03-07 11:34:35 +00005204 SelectionDAG &DAG = DCI.DAG;
5205 unsigned Opcode = N->getOpcode();
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005206 SDValue Op0 = N->getOperand(0);
5207 SDValue Op1 = N->getOperand(1);
5208 if (Op0.getOpcode() == ISD::BITCAST)
5209 Op0 = Op0.getOperand(0);
5210 if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
5211 cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
5212 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
5213 // for v4f32.
5214 if (Op1 == N->getOperand(0))
5215 return Op1;
5216 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
5217 EVT VT = Op1.getValueType();
5218 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
5219 if (ElemBytes <= 4) {
5220 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
5221 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
5222 EVT InVT = VT.changeVectorElementTypeToInteger();
5223 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
5224 SystemZ::VectorBytes / ElemBytes / 2);
5225 if (VT != InVT) {
5226 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
5227 DCI.AddToWorklist(Op1.getNode());
Richard Sandiford95bc5f92014-03-07 11:34:35 +00005228 }
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005229 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
5230 DCI.AddToWorklist(Op.getNode());
5231 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
Richard Sandiford95bc5f92014-03-07 11:34:35 +00005232 }
5233 }
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005234 return SDValue();
5235}
5236
5237SDValue SystemZTargetLowering::combineSTORE(
5238 SDNode *N, DAGCombinerInfo &DCI) const {
5239 SelectionDAG &DAG = DCI.DAG;
5240 auto *SN = cast<StoreSDNode>(N);
5241 auto &Op1 = N->getOperand(1);
5242 EVT MemVT = SN->getMemoryVT();
Ulrich Weigandce4c1092015-05-05 19:25:42 +00005243 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
5244 // for the extraction to be done on a vMiN value, so that we can use VSTE.
5245 // If X has wider elements then convert it to:
5246 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005247 if (MemVT.isInteger()) {
5248 if (SDValue Value =
5249 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
5250 DCI.AddToWorklist(Value.getNode());
Ulrich Weigandce4c1092015-05-05 19:25:42 +00005251
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005252 // Rewrite the store with the new form of stored value.
5253 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
5254 SN->getBasePtr(), SN->getMemoryVT(),
5255 SN->getMemOperand());
Ulrich Weigandce4c1092015-05-05 19:25:42 +00005256 }
5257 }
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005258 // Combine STORE (BSWAP) into STRVH/STRV/STRVG
Ulrich Weigand59a01a92017-09-19 20:50:05 +00005259 if (!SN->isTruncatingStore() &&
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005260 Op1.getOpcode() == ISD::BSWAP &&
5261 Op1.getNode()->hasOneUse() &&
5262 (Op1.getValueType() == MVT::i16 ||
5263 Op1.getValueType() == MVT::i32 ||
5264 Op1.getValueType() == MVT::i64)) {
5265
5266 SDValue BSwapOp = Op1.getOperand(0);
5267
5268 if (BSwapOp.getValueType() == MVT::i16)
5269 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
5270
5271 SDValue Ops[] = {
5272 N->getOperand(0), BSwapOp, N->getOperand(2),
5273 DAG.getValueType(Op1.getValueType())
5274 };
5275
5276 return
5277 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
5278 Ops, MemVT, SN->getMemOperand());
5279 }
5280 return SDValue();
5281}
5282
5283SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
5284 SDNode *N, DAGCombinerInfo &DCI) const {
Jonas Paulsson56bb0852017-03-31 13:22:59 +00005285
Jonas Paulsson56bb0852017-03-31 13:22:59 +00005286 if (!Subtarget.hasVector())
5287 return SDValue();
5288
Ulrich Weigandce4c1092015-05-05 19:25:42 +00005289 // Try to simplify a vector extraction.
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005290 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
5291 SDValue Op0 = N->getOperand(0);
5292 EVT VecVT = Op0.getValueType();
5293 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
5294 IndexN->getZExtValue(), DCI, false);
Ulrich Weigandce4c1092015-05-05 19:25:42 +00005295 }
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005296 return SDValue();
5297}
5298
5299SDValue SystemZTargetLowering::combineJOIN_DWORDS(
5300 SDNode *N, DAGCombinerInfo &DCI) const {
5301 SelectionDAG &DAG = DCI.DAG;
Ulrich Weigandce4c1092015-05-05 19:25:42 +00005302 // (join_dwords X, X) == (replicate X)
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005303 if (N->getOperand(0) == N->getOperand(1))
Ulrich Weigandce4c1092015-05-05 19:25:42 +00005304 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
5305 N->getOperand(0));
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005306 return SDValue();
5307}
5308
5309SDValue SystemZTargetLowering::combineFP_ROUND(
5310 SDNode *N, DAGCombinerInfo &DCI) const {
Michael Kuperstein2bc3d4d2016-08-18 20:08:15 +00005311 // (fpround (extract_vector_elt X 0))
5312 // (fpround (extract_vector_elt X 1)) ->
Ulrich Weigand80b3af72015-05-05 19:27:45 +00005313 // (extract_vector_elt (VROUND X) 0)
5314 // (extract_vector_elt (VROUND X) 1)
5315 //
5316 // This is a special case since the target doesn't really support v2f32s.
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005317 SelectionDAG &DAG = DCI.DAG;
5318 SDValue Op0 = N->getOperand(0);
5319 if (N->getValueType(0) == MVT::f32 &&
5320 Op0.hasOneUse() &&
5321 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5322 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
5323 Op0.getOperand(1).getOpcode() == ISD::Constant &&
5324 cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
5325 SDValue Vec = Op0.getOperand(0);
5326 for (auto *U : Vec->uses()) {
5327 if (U != Op0.getNode() &&
5328 U->hasOneUse() &&
5329 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5330 U->getOperand(0) == Vec &&
5331 U->getOperand(1).getOpcode() == ISD::Constant &&
5332 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
5333 SDValue OtherRound = SDValue(*U->use_begin(), 0);
5334 if (OtherRound.getOpcode() == ISD::FP_ROUND &&
5335 OtherRound.getOperand(0) == SDValue(U, 0) &&
5336 OtherRound.getValueType() == MVT::f32) {
5337 SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
5338 MVT::v4f32, Vec);
5339 DCI.AddToWorklist(VRound.getNode());
5340 SDValue Extract1 =
5341 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
5342 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
5343 DCI.AddToWorklist(Extract1.getNode());
5344 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
5345 SDValue Extract0 =
5346 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
5347 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
5348 return Extract0;
Ulrich Weigand80b3af72015-05-05 19:27:45 +00005349 }
5350 }
5351 }
5352 }
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005353 return SDValue();
5354}
Bryan Chan28b759c2016-05-16 20:32:22 +00005355
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005356SDValue SystemZTargetLowering::combineBSWAP(
5357 SDNode *N, DAGCombinerInfo &DCI) const {
5358 SelectionDAG &DAG = DCI.DAG;
Bryan Chan28b759c2016-05-16 20:32:22 +00005359 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005360 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
5361 N->getOperand(0).hasOneUse() &&
5362 (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
Ulrich Weiganddb16bee2018-03-02 20:51:59 +00005363 N->getValueType(0) == MVT::i64)) {
Bryan Chan28b759c2016-05-16 20:32:22 +00005364 SDValue Load = N->getOperand(0);
5365 LoadSDNode *LD = cast<LoadSDNode>(Load);
5366
5367 // Create the byte-swapping load.
5368 SDValue Ops[] = {
5369 LD->getChain(), // Chain
5370 LD->getBasePtr(), // Ptr
5371 DAG.getValueType(N->getValueType(0)) // VT
5372 };
5373 SDValue BSLoad =
5374 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
5375 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
5376 MVT::i64 : MVT::i32, MVT::Other),
5377 Ops, LD->getMemoryVT(), LD->getMemOperand());
5378
5379 // If this is an i16 load, insert the truncate.
5380 SDValue ResVal = BSLoad;
5381 if (N->getValueType(0) == MVT::i16)
5382 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
5383
5384 // First, combine the bswap away. This makes the value produced by the
5385 // load dead.
5386 DCI.CombineTo(N, ResVal);
5387
5388 // Next, combine the load away, we give it a bogus result value but a real
5389 // chain result. The result value is dead because the bswap is dead.
5390 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
5391
5392 // Return N so it doesn't get rechecked!
5393 return SDValue(N, 0);
5394 }
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005395 return SDValue();
5396}
Bryan Chan28b759c2016-05-16 20:32:22 +00005397
Elliot Colpbc2cfc22016-07-06 18:13:11 +00005398SDValue SystemZTargetLowering::combineSHIFTROT(
5399 SDNode *N, DAGCombinerInfo &DCI) const {
5400
5401 SelectionDAG &DAG = DCI.DAG;
5402
5403 // Shift/rotate instructions only use the last 6 bits of the second operand
5404 // register. If the second operand is the result of an AND with an immediate
5405 // value that has its last 6 bits set, we can safely remove the AND operation.
Elliot Colp687691a2016-08-18 18:04:26 +00005406 //
5407 // If the AND operation doesn't have the last 6 bits set, we can't remove it
Elliot Colpa4092102016-08-23 14:03:02 +00005408 // entirely, but we can still truncate it to a 16-bit value. This prevents
5409 // us from ending up with a NILL with a signed operand, which will cause the
5410 // instruction printer to abort.
Elliot Colpbc2cfc22016-07-06 18:13:11 +00005411 SDValue N1 = N->getOperand(1);
5412 if (N1.getOpcode() == ISD::AND) {
Elliot Colp687691a2016-08-18 18:04:26 +00005413 SDValue AndMaskOp = N1->getOperand(1);
5414 auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
Elliot Colpbc2cfc22016-07-06 18:13:11 +00005415
5416 // The AND mask is constant
5417 if (AndMask) {
Elliot Colpa4092102016-08-23 14:03:02 +00005418 auto AmtVal = AndMask->getZExtValue();
5419
Elliot Colpbc2cfc22016-07-06 18:13:11 +00005420 // Bottom 6 bits are set
5421 if ((AmtVal & 0x3f) == 0x3f) {
Elliot Colpa4092102016-08-23 14:03:02 +00005422 SDValue AndOp = N1->getOperand(0);
Elliot Colpbc2cfc22016-07-06 18:13:11 +00005423
5424 // This is the only use, so remove the node
5425 if (N1.hasOneUse()) {
5426 // Combine the AND away
5427 DCI.CombineTo(N1.getNode(), AndOp);
5428
5429 // Return N so it isn't rechecked
5430 return SDValue(N, 0);
5431
5432 // The node will be reused, so create a new node for this one use
5433 } else {
5434 SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
5435 N->getValueType(0), N->getOperand(0),
5436 AndOp);
5437 DCI.AddToWorklist(Replace.getNode());
5438
5439 return Replace;
5440 }
Elliot Colp687691a2016-08-18 18:04:26 +00005441
Elliot Colpa4092102016-08-23 14:03:02 +00005442 // We can't remove the AND, but we can use NILL here (normally we would
5443 // use NILF). Only keep the last 16 bits of the mask. The actual
5444 // transformation will be handled by .td definitions.
5445 } else if (AmtVal >> 16 != 0) {
5446 SDValue AndOp = N1->getOperand(0);
Elliot Colp687691a2016-08-18 18:04:26 +00005447
Elliot Colpa4092102016-08-23 14:03:02 +00005448 auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
5449 SDLoc(AndMaskOp),
5450 AndMaskOp.getValueType());
Elliot Colp687691a2016-08-18 18:04:26 +00005451
Elliot Colpa4092102016-08-23 14:03:02 +00005452 auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
5453 AndOp, NewMask);
Elliot Colp687691a2016-08-18 18:04:26 +00005454
Elliot Colpa4092102016-08-23 14:03:02 +00005455 SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
5456 N->getValueType(0), N->getOperand(0),
5457 NewAnd);
5458 DCI.AddToWorklist(Replace.getNode());
Elliot Colp687691a2016-08-18 18:04:26 +00005459
Elliot Colpa4092102016-08-23 14:03:02 +00005460 return Replace;
Elliot Colpbc2cfc22016-07-06 18:13:11 +00005461 }
5462 }
5463 }
5464
5465 return SDValue();
5466}
5467
Ulrich Weigand31112892018-01-19 20:54:18 +00005468static bool combineCCMask(SDValue &Glue, int &CCValid, int &CCMask) {
5469 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
5470 // set by the glued instruction using the CCValid / CCMask masks,
5471 // If the glued instruction is itself a (ICMP (SELECT_CCMASK)) testing
5472 // the condition code set by some other instruction, see whether we
5473 // can directly use that condition code.
5474 bool Invert = false;
5475
5476 // Verify that we have an appropriate mask for a EQ or NE comparison.
5477 if (CCValid != SystemZ::CCMASK_ICMP)
5478 return false;
5479 if (CCMask == SystemZ::CCMASK_CMP_NE)
5480 Invert = !Invert;
5481 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
5482 return false;
5483
5484 // Verify that we have an ICMP that is the single user of a SELECT_CCMASK.
5485 SDNode *ICmp = Glue.getNode();
5486 if (ICmp->getOpcode() != SystemZISD::ICMP)
5487 return false;
5488 SDNode *Select = ICmp->getOperand(0).getNode();
5489 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
5490 return false;
5491 if (!Select->hasOneUse())
5492 return false;
5493
5494 // Verify that the ICMP compares against one of select values.
5495 auto *CompareVal = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
5496 if (!CompareVal)
5497 return false;
5498 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
5499 if (!TrueVal)
5500 return false;
5501 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
5502 if (!FalseVal)
5503 return false;
5504 if (CompareVal->getZExtValue() == FalseVal->getZExtValue())
5505 Invert = !Invert;
5506 else if (CompareVal->getZExtValue() != TrueVal->getZExtValue())
5507 return false;
5508
5509 // Compute the effective CC mask for the new branch or select.
5510 auto *NewCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
5511 auto *NewCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
5512 if (!NewCCValid || !NewCCMask)
5513 return false;
5514 CCValid = NewCCValid->getZExtValue();
5515 CCMask = NewCCMask->getZExtValue();
5516 if (Invert)
5517 CCMask ^= CCValid;
5518
5519 // Return the updated Glue link.
5520 Glue = Select->getOperand(4);
5521 return true;
5522}
5523
Ulrich Weigand145d63f2018-01-22 15:41:49 +00005524static bool combineMergeChains(SDValue &Chain, SDValue Glue) {
5525 // We are about to glue an instruction with input chain Chain to the
5526 // instruction Glue. Verify that this would not create an invalid
5527 // topological sort due to intervening chain nodes.
5528
5529 SDNode *Node = Glue.getNode();
5530 for (int ResNo = Node->getNumValues() - 1; ResNo >= 0; --ResNo)
5531 if (Node->getValueType(ResNo) == MVT::Other) {
5532 SDValue OutChain = SDValue(Node, ResNo);
5533 // FIXME: We should be able to at least handle an intervening
5534 // TokenFactor node by swapping chains around a bit ...
5535 return Chain == OutChain;
5536 }
5537
5538 return true;
5539}
5540
Ulrich Weigand31112892018-01-19 20:54:18 +00005541SDValue SystemZTargetLowering::combineBR_CCMASK(
5542 SDNode *N, DAGCombinerInfo &DCI) const {
5543 SelectionDAG &DAG = DCI.DAG;
5544
5545 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
5546 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
5547 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
5548 if (!CCValid || !CCMask)
5549 return SDValue();
5550
5551 int CCValidVal = CCValid->getZExtValue();
5552 int CCMaskVal = CCMask->getZExtValue();
Ulrich Weigand145d63f2018-01-22 15:41:49 +00005553 SDValue Chain = N->getOperand(0);
Ulrich Weigand31112892018-01-19 20:54:18 +00005554 SDValue Glue = N->getOperand(4);
5555
Ulrich Weigand145d63f2018-01-22 15:41:49 +00005556 if (combineCCMask(Glue, CCValidVal, CCMaskVal)
5557 && combineMergeChains(Chain, Glue))
Ulrich Weigand31112892018-01-19 20:54:18 +00005558 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
Ulrich Weigand145d63f2018-01-22 15:41:49 +00005559 Chain,
Ulrich Weigand31112892018-01-19 20:54:18 +00005560 DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
5561 DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
5562 N->getOperand(3), Glue);
5563 return SDValue();
5564}
5565
5566SDValue SystemZTargetLowering::combineSELECT_CCMASK(
5567 SDNode *N, DAGCombinerInfo &DCI) const {
5568 SelectionDAG &DAG = DCI.DAG;
5569
5570 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
5571 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
5572 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
5573 if (!CCValid || !CCMask)
5574 return SDValue();
5575
5576 int CCValidVal = CCValid->getZExtValue();
5577 int CCMaskVal = CCMask->getZExtValue();
5578 SDValue Glue = N->getOperand(4);
5579
5580 if (combineCCMask(Glue, CCValidVal, CCMaskVal))
5581 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
5582 N->getOperand(0),
5583 N->getOperand(1),
5584 DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
5585 DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
5586 Glue);
5587 return SDValue();
5588}
5589
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005590SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
5591 DAGCombinerInfo &DCI) const {
5592 switch(N->getOpcode()) {
5593 default: break;
Ulrich Weigand849a59f2018-01-19 20:52:04 +00005594 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005595 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
Ulrich Weigand849a59f2018-01-19 20:52:04 +00005596 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005597 case SystemZISD::MERGE_HIGH:
5598 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
5599 case ISD::STORE: return combineSTORE(N, DCI);
5600 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
5601 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
5602 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
5603 case ISD::BSWAP: return combineBSWAP(N, DCI);
Elliot Colpbc2cfc22016-07-06 18:13:11 +00005604 case ISD::SHL:
5605 case ISD::SRA:
5606 case ISD::SRL:
5607 case ISD::ROTL: return combineSHIFTROT(N, DCI);
Ulrich Weigand31112892018-01-19 20:54:18 +00005608 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
5609 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
Marcin Koscielnicki68747ac2016-06-30 00:08:54 +00005610 }
Elliot Colpbc2cfc22016-07-06 18:13:11 +00005611
Richard Sandiford95bc5f92014-03-07 11:34:35 +00005612 return SDValue();
5613}
5614
Jonas Paulsson13896072018-03-17 08:32:12 +00005615// Return the demanded elements for the OpNo source operand of Op. DemandedElts
5616// are for Op.
5617static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
5618 unsigned OpNo) {
5619 EVT VT = Op.getValueType();
5620 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
5621 APInt SrcDemE;
5622 unsigned Opcode = Op.getOpcode();
5623 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
5624 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5625 switch (Id) {
5626 case Intrinsic::s390_vpksh: // PACKS
5627 case Intrinsic::s390_vpksf:
5628 case Intrinsic::s390_vpksg:
5629 case Intrinsic::s390_vpkshs: // PACKS_CC
5630 case Intrinsic::s390_vpksfs:
5631 case Intrinsic::s390_vpksgs:
5632 case Intrinsic::s390_vpklsh: // PACKLS
5633 case Intrinsic::s390_vpklsf:
5634 case Intrinsic::s390_vpklsg:
5635 case Intrinsic::s390_vpklshs: // PACKLS_CC
5636 case Intrinsic::s390_vpklsfs:
5637 case Intrinsic::s390_vpklsgs:
5638 // VECTOR PACK truncates the elements of two source vectors into one.
5639 SrcDemE = DemandedElts;
5640 if (OpNo == 2)
5641 SrcDemE.lshrInPlace(NumElts / 2);
5642 SrcDemE = SrcDemE.trunc(NumElts / 2);
5643 break;
5644 // VECTOR UNPACK extends half the elements of the source vector.
5645 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
5646 case Intrinsic::s390_vuphh:
5647 case Intrinsic::s390_vuphf:
5648 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
5649 case Intrinsic::s390_vuplhh:
5650 case Intrinsic::s390_vuplhf:
5651 SrcDemE = APInt(NumElts * 2, 0);
5652 SrcDemE.insertBits(DemandedElts, 0);
5653 break;
5654 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
5655 case Intrinsic::s390_vuplhw:
5656 case Intrinsic::s390_vuplf:
5657 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
5658 case Intrinsic::s390_vupllh:
5659 case Intrinsic::s390_vupllf:
5660 SrcDemE = APInt(NumElts * 2, 0);
5661 SrcDemE.insertBits(DemandedElts, NumElts);
5662 break;
5663 case Intrinsic::s390_vpdi: {
5664 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
5665 SrcDemE = APInt(NumElts, 0);
5666 if (!DemandedElts[OpNo - 1])
5667 break;
5668 unsigned Mask = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
5669 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
5670 // Demand input element 0 or 1, given by the mask bit value.
5671 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
5672 break;
5673 }
5674 case Intrinsic::s390_vsldb: {
5675 // VECTOR SHIFT LEFT DOUBLE BY BYTE
5676 assert(VT == MVT::v16i8 && "Unexpected type.");
5677 unsigned FirstIdx = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
5678 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
5679 unsigned NumSrc0Els = 16 - FirstIdx;
5680 SrcDemE = APInt(NumElts, 0);
5681 if (OpNo == 1) {
5682 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
5683 SrcDemE.insertBits(DemEls, FirstIdx);
5684 } else {
5685 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
5686 SrcDemE.insertBits(DemEls, 0);
5687 }
5688 break;
5689 }
5690 case Intrinsic::s390_vperm:
5691 SrcDemE = APInt(NumElts, 1);
5692 break;
5693 default:
5694 llvm_unreachable("Unhandled intrinsic.");
5695 break;
5696 }
5697 } else {
5698 switch (Opcode) {
5699 case SystemZISD::JOIN_DWORDS:
5700 // Scalar operand.
5701 SrcDemE = APInt(1, 1);
5702 break;
5703 case SystemZISD::SELECT_CCMASK:
5704 SrcDemE = DemandedElts;
5705 break;
5706 default:
5707 llvm_unreachable("Unhandled opcode.");
5708 break;
5709 }
5710 }
5711 return SrcDemE;
5712}
5713
5714static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
5715 const APInt &DemandedElts,
5716 const SelectionDAG &DAG, unsigned Depth,
5717 unsigned OpNo) {
5718 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
5719 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
5720 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
5721 KnownBits LHSKnown(SrcBitWidth), RHSKnown(SrcBitWidth);
5722 DAG.computeKnownBits(Op.getOperand(OpNo), LHSKnown, Src0DemE, Depth + 1);
5723 DAG.computeKnownBits(Op.getOperand(OpNo + 1), RHSKnown, Src1DemE, Depth + 1);
5724 Known.Zero = LHSKnown.Zero & RHSKnown.Zero;
5725 Known.One = LHSKnown.One & RHSKnown.One;
5726}
5727
Ulrich Weigand9eb858c2018-01-19 20:49:05 +00005728void
5729SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
5730 KnownBits &Known,
5731 const APInt &DemandedElts,
5732 const SelectionDAG &DAG,
5733 unsigned Depth) const {
Ulrich Weigand9eb858c2018-01-19 20:49:05 +00005734 Known.resetAll();
Jonas Paulsson13896072018-03-17 08:32:12 +00005735
5736 // Intrinsic CC result is returned in the two low bits.
5737 unsigned tmp0, tmp1; // not used
5738 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
5739 Known.Zero.setBitsFrom(2);
5740 return;
5741 }
5742 EVT VT = Op.getValueType();
5743 if (Op.getResNo() != 0 || VT == MVT::Untyped)
5744 return;
5745 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
5746 "KnownBits does not match VT in bitwidth");
5747 assert ((!VT.isVector() ||
5748 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
5749 "DemandedElts does not match VT number of elements");
5750 unsigned BitWidth = Known.getBitWidth();
5751 unsigned Opcode = Op.getOpcode();
5752 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
5753 bool IsLogical = false;
5754 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5755 switch (Id) {
5756 case Intrinsic::s390_vpksh: // PACKS
5757 case Intrinsic::s390_vpksf:
5758 case Intrinsic::s390_vpksg:
5759 case Intrinsic::s390_vpkshs: // PACKS_CC
5760 case Intrinsic::s390_vpksfs:
5761 case Intrinsic::s390_vpksgs:
5762 case Intrinsic::s390_vpklsh: // PACKLS
5763 case Intrinsic::s390_vpklsf:
5764 case Intrinsic::s390_vpklsg:
5765 case Intrinsic::s390_vpklshs: // PACKLS_CC
5766 case Intrinsic::s390_vpklsfs:
5767 case Intrinsic::s390_vpklsgs:
5768 case Intrinsic::s390_vpdi:
5769 case Intrinsic::s390_vsldb:
5770 case Intrinsic::s390_vperm:
5771 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
5772 break;
5773 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
5774 case Intrinsic::s390_vuplhh:
5775 case Intrinsic::s390_vuplhf:
5776 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
5777 case Intrinsic::s390_vupllh:
5778 case Intrinsic::s390_vupllf:
5779 IsLogical = true;
5780 LLVM_FALLTHROUGH;
5781 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
5782 case Intrinsic::s390_vuphh:
5783 case Intrinsic::s390_vuphf:
5784 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
5785 case Intrinsic::s390_vuplhw:
5786 case Intrinsic::s390_vuplf: {
5787 SDValue SrcOp = Op.getOperand(1);
5788 unsigned SrcBitWidth = SrcOp.getScalarValueSizeInBits();
5789 Known = KnownBits(SrcBitWidth);
5790 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
5791 DAG.computeKnownBits(SrcOp, Known, SrcDemE, Depth + 1);
5792 if (IsLogical) {
5793 Known = Known.zext(BitWidth);
5794 Known.Zero.setBitsFrom(SrcBitWidth);
5795 } else
5796 Known = Known.sext(BitWidth);
5797 break;
5798 }
5799 default:
5800 break;
5801 }
5802 } else {
5803 switch (Opcode) {
5804 case SystemZISD::JOIN_DWORDS:
5805 case SystemZISD::SELECT_CCMASK:
5806 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
5807 break;
5808 case SystemZISD::REPLICATE: {
5809 SDValue SrcOp = Op.getOperand(0);
5810 DAG.computeKnownBits(SrcOp, Known, Depth + 1);
5811 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
5812 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
5813 break;
5814 }
5815 default:
5816 break;
5817 }
Ulrich Weigand9eb858c2018-01-19 20:49:05 +00005818 }
5819
Jonas Paulsson13896072018-03-17 08:32:12 +00005820 // Known has the width of the source operand(s). Adjust if needed to match
5821 // the passed bitwidth.
5822 if (Known.getBitWidth() != BitWidth)
5823 Known = Known.zextOrTrunc(BitWidth);
5824}
5825
5826static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
5827 const SelectionDAG &DAG, unsigned Depth,
5828 unsigned OpNo) {
5829 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
5830 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
5831 if (LHS == 1) return 1; // Early out.
5832 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
5833 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
5834 if (RHS == 1) return 1; // Early out.
5835 unsigned Common = std::min(LHS, RHS);
5836 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
5837 EVT VT = Op.getValueType();
5838 unsigned VTBits = VT.getScalarSizeInBits();
5839 if (SrcBitWidth > VTBits) { // PACK
5840 unsigned SrcExtraBits = SrcBitWidth - VTBits;
5841 if (Common > SrcExtraBits)
5842 return (Common - SrcExtraBits);
5843 return 1;
Ulrich Weigand9eb858c2018-01-19 20:49:05 +00005844 }
Jonas Paulsson13896072018-03-17 08:32:12 +00005845 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
5846 return Common;
5847}
5848
5849unsigned
5850SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
5851 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
5852 unsigned Depth) const {
5853 if (Op.getResNo() != 0)
5854 return 1;
5855 unsigned Opcode = Op.getOpcode();
5856 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
5857 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5858 switch (Id) {
5859 case Intrinsic::s390_vpksh: // PACKS
5860 case Intrinsic::s390_vpksf:
5861 case Intrinsic::s390_vpksg:
5862 case Intrinsic::s390_vpkshs: // PACKS_CC
5863 case Intrinsic::s390_vpksfs:
5864 case Intrinsic::s390_vpksgs:
5865 case Intrinsic::s390_vpklsh: // PACKLS
5866 case Intrinsic::s390_vpklsf:
5867 case Intrinsic::s390_vpklsg:
5868 case Intrinsic::s390_vpklshs: // PACKLS_CC
5869 case Intrinsic::s390_vpklsfs:
5870 case Intrinsic::s390_vpklsgs:
5871 case Intrinsic::s390_vpdi:
5872 case Intrinsic::s390_vsldb:
5873 case Intrinsic::s390_vperm:
5874 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
5875 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
5876 case Intrinsic::s390_vuphh:
5877 case Intrinsic::s390_vuphf:
5878 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
5879 case Intrinsic::s390_vuplhw:
5880 case Intrinsic::s390_vuplf: {
5881 SDValue PackedOp = Op.getOperand(1);
5882 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
5883 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
5884 EVT VT = Op.getValueType();
5885 unsigned VTBits = VT.getScalarSizeInBits();
5886 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
5887 return Tmp;
5888 }
5889 default:
5890 break;
5891 }
5892 } else {
5893 switch (Opcode) {
5894 case SystemZISD::SELECT_CCMASK:
5895 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
5896 default:
5897 break;
5898 }
5899 }
5900
5901 return 1;
Ulrich Weigand9eb858c2018-01-19 20:49:05 +00005902}
5903
Ulrich Weigand5f613df2013-05-06 16:15:19 +00005904//===----------------------------------------------------------------------===//
5905// Custom insertion
5906//===----------------------------------------------------------------------===//
5907
5908// Create a new basic block after MBB.
5909static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
5910 MachineFunction &MF = *MBB->getParent();
5911 MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +00005912 MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00005913 return NewMBB;
5914}
5915
Richard Sandifordbe133a82013-08-28 09:01:51 +00005916// Split MBB after MI and return the new block (the one that contains
5917// instructions after MI).
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00005918static MachineBasicBlock *splitBlockAfter(MachineBasicBlock::iterator MI,
Richard Sandifordbe133a82013-08-28 09:01:51 +00005919 MachineBasicBlock *MBB) {
5920 MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
5921 NewMBB->splice(NewMBB->begin(), MBB,
Benjamin Kramerb6d0bd42014-03-02 12:27:27 +00005922 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
Richard Sandifordbe133a82013-08-28 09:01:51 +00005923 NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
5924 return NewMBB;
5925}
5926
Richard Sandiford5e318f02013-08-27 09:54:29 +00005927// Split MBB before MI and return the new block (the one that contains MI).
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00005928static MachineBasicBlock *splitBlockBefore(MachineBasicBlock::iterator MI,
Richard Sandiford5e318f02013-08-27 09:54:29 +00005929 MachineBasicBlock *MBB) {
Ulrich Weigand5f613df2013-05-06 16:15:19 +00005930 MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
Richard Sandiford5e318f02013-08-27 09:54:29 +00005931 NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00005932 NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
5933 return NewMBB;
5934}
5935
Richard Sandiford5e318f02013-08-27 09:54:29 +00005936// Force base value Base into a register before MI. Return the register.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00005937static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
Richard Sandiford5e318f02013-08-27 09:54:29 +00005938 const SystemZInstrInfo *TII) {
5939 if (Base.isReg())
5940 return Base.getReg();
5941
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00005942 MachineBasicBlock *MBB = MI.getParent();
Richard Sandiford5e318f02013-08-27 09:54:29 +00005943 MachineFunction &MF = *MBB->getParent();
5944 MachineRegisterInfo &MRI = MF.getRegInfo();
5945
5946 unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00005947 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
Diana Picus116bbab2017-01-13 09:58:52 +00005948 .add(Base)
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00005949 .addImm(0)
5950 .addReg(0);
Richard Sandiford5e318f02013-08-27 09:54:29 +00005951 return Reg;
5952}
5953
Ulrich Weigand5f613df2013-05-06 16:15:19 +00005954// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
5955MachineBasicBlock *
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00005956SystemZTargetLowering::emitSelect(MachineInstr &MI,
Ulrich Weigand524f2762016-11-28 13:34:08 +00005957 MachineBasicBlock *MBB,
5958 unsigned LOCROpcode) const {
Eric Christophera6734172015-01-31 00:06:45 +00005959 const SystemZInstrInfo *TII =
5960 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00005961
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00005962 unsigned DestReg = MI.getOperand(0).getReg();
5963 unsigned TrueReg = MI.getOperand(1).getReg();
5964 unsigned FalseReg = MI.getOperand(2).getReg();
5965 unsigned CCValid = MI.getOperand(3).getImm();
5966 unsigned CCMask = MI.getOperand(4).getImm();
5967 DebugLoc DL = MI.getDebugLoc();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00005968
Ulrich Weigand524f2762016-11-28 13:34:08 +00005969 // Use LOCROpcode if possible.
5970 if (LOCROpcode && Subtarget.hasLoadStoreOnCond()) {
5971 BuildMI(*MBB, MI, DL, TII->get(LOCROpcode), DestReg)
5972 .addReg(FalseReg).addReg(TrueReg)
5973 .addImm(CCValid).addImm(CCMask);
5974 MI.eraseFromParent();
5975 return MBB;
5976 }
5977
Ulrich Weigand5f613df2013-05-06 16:15:19 +00005978 MachineBasicBlock *StartMBB = MBB;
Richard Sandiford5e318f02013-08-27 09:54:29 +00005979 MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00005980 MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
5981
5982 // StartMBB:
Richard Sandiford0fb90ab2013-05-28 10:41:11 +00005983 // BRC CCMask, JoinMBB
Ulrich Weigand5f613df2013-05-06 16:15:19 +00005984 // # fallthrough to FalseMBB
5985 MBB = StartMBB;
Richard Sandiford3d768e32013-07-31 12:30:20 +00005986 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
5987 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00005988 MBB->addSuccessor(JoinMBB);
5989 MBB->addSuccessor(FalseMBB);
5990
5991 // FalseMBB:
5992 // # fallthrough to JoinMBB
5993 MBB = FalseMBB;
5994 MBB->addSuccessor(JoinMBB);
5995
5996 // JoinMBB:
5997 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
5998 // ...
5999 MBB = JoinMBB;
Richard Sandiford5e318f02013-08-27 09:54:29 +00006000 BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg)
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006001 .addReg(TrueReg).addMBB(StartMBB)
6002 .addReg(FalseReg).addMBB(FalseMBB);
6003
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006004 MI.eraseFromParent();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006005 return JoinMBB;
6006}
6007
Richard Sandifordb86a8342013-06-27 09:27:40 +00006008// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
6009// StoreOpcode is the store to use and Invert says whether the store should
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006010// happen when the condition is false rather than true. If a STORE ON
6011// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006012MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
6013 MachineBasicBlock *MBB,
6014 unsigned StoreOpcode,
6015 unsigned STOCOpcode,
6016 bool Invert) const {
Eric Christophera6734172015-01-31 00:06:45 +00006017 const SystemZInstrInfo *TII =
6018 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
Richard Sandifordb86a8342013-06-27 09:27:40 +00006019
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006020 unsigned SrcReg = MI.getOperand(0).getReg();
6021 MachineOperand Base = MI.getOperand(1);
6022 int64_t Disp = MI.getOperand(2).getImm();
6023 unsigned IndexReg = MI.getOperand(3).getReg();
6024 unsigned CCValid = MI.getOperand(4).getImm();
6025 unsigned CCMask = MI.getOperand(5).getImm();
6026 DebugLoc DL = MI.getDebugLoc();
Richard Sandifordb86a8342013-06-27 09:27:40 +00006027
6028 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
6029
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006030 // Use STOCOpcode if possible. We could use different store patterns in
6031 // order to avoid matching the index register, but the performance trade-offs
6032 // might be more complicated in that case.
Eric Christopher93bf97c2014-06-27 07:38:01 +00006033 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006034 if (Invert)
Richard Sandiford3d768e32013-07-31 12:30:20 +00006035 CCMask ^= CCValid;
Jonas Paulssonae8d22c2017-06-07 14:08:34 +00006036
6037 // ISel pattern matching also adds a load memory operand of the same
6038 // address, so take special care to find the storing memory operand.
6039 MachineMemOperand *MMO = nullptr;
6040 for (auto *I : MI.memoperands())
6041 if (I->isStore()) {
6042 MMO = I;
6043 break;
6044 }
6045
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006046 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
Jonas Paulssonae8d22c2017-06-07 14:08:34 +00006047 .addReg(SrcReg)
6048 .add(Base)
6049 .addImm(Disp)
6050 .addImm(CCValid)
6051 .addImm(CCMask)
6052 .addMemOperand(MMO);
6053
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006054 MI.eraseFromParent();
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006055 return MBB;
6056 }
6057
Richard Sandifordb86a8342013-06-27 09:27:40 +00006058 // Get the condition needed to branch around the store.
6059 if (!Invert)
Richard Sandiford3d768e32013-07-31 12:30:20 +00006060 CCMask ^= CCValid;
Richard Sandifordb86a8342013-06-27 09:27:40 +00006061
6062 MachineBasicBlock *StartMBB = MBB;
Richard Sandiford5e318f02013-08-27 09:54:29 +00006063 MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006064 MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
6065
6066 // StartMBB:
6067 // BRC CCMask, JoinMBB
6068 // # fallthrough to FalseMBB
Richard Sandifordb86a8342013-06-27 09:27:40 +00006069 MBB = StartMBB;
Richard Sandiford3d768e32013-07-31 12:30:20 +00006070 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6071 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006072 MBB->addSuccessor(JoinMBB);
6073 MBB->addSuccessor(FalseMBB);
6074
6075 // FalseMBB:
6076 // store %SrcReg, %Disp(%Index,%Base)
6077 // # fallthrough to JoinMBB
6078 MBB = FalseMBB;
6079 BuildMI(MBB, DL, TII->get(StoreOpcode))
Diana Picus116bbab2017-01-13 09:58:52 +00006080 .addReg(SrcReg)
6081 .add(Base)
6082 .addImm(Disp)
6083 .addReg(IndexReg);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006084 MBB->addSuccessor(JoinMBB);
6085
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006086 MI.eraseFromParent();
Richard Sandifordb86a8342013-06-27 09:27:40 +00006087 return JoinMBB;
6088}
6089
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006090// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
6091// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
6092// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
6093// BitSize is the width of the field in bits, or 0 if this is a partword
6094// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
6095// is one of the operands. Invert says whether the field should be
6096// inverted after performing BinOpcode (e.g. for NAND).
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006097MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
6098 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
6099 unsigned BitSize, bool Invert) const {
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006100 MachineFunction &MF = *MBB->getParent();
Eric Christopherfc6de422014-08-05 02:39:49 +00006101 const SystemZInstrInfo *TII =
Eric Christophera6734172015-01-31 00:06:45 +00006102 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006103 MachineRegisterInfo &MRI = MF.getRegInfo();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006104 bool IsSubWord = (BitSize < 32);
6105
6106 // Extract the operands. Base can be a register or a frame index.
6107 // Src2 can be a register or immediate.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006108 unsigned Dest = MI.getOperand(0).getReg();
6109 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
6110 int64_t Disp = MI.getOperand(2).getImm();
6111 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
6112 unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
6113 unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
6114 DebugLoc DL = MI.getDebugLoc();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006115 if (IsSubWord)
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006116 BitSize = MI.getOperand(6).getImm();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006117
6118 // Subword operations use 32-bit registers.
6119 const TargetRegisterClass *RC = (BitSize <= 32 ?
6120 &SystemZ::GR32BitRegClass :
6121 &SystemZ::GR64BitRegClass);
6122 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
6123 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
6124
6125 // Get the right opcodes for the displacement.
6126 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
6127 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
6128 assert(LOpcode && CSOpcode && "Displacement out of range");
6129
6130 // Create virtual registers for temporary results.
6131 unsigned OrigVal = MRI.createVirtualRegister(RC);
6132 unsigned OldVal = MRI.createVirtualRegister(RC);
6133 unsigned NewVal = (BinOpcode || IsSubWord ?
6134 MRI.createVirtualRegister(RC) : Src2.getReg());
6135 unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
6136 unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
6137
6138 // Insert a basic block for the main loop.
6139 MachineBasicBlock *StartMBB = MBB;
Richard Sandiford5e318f02013-08-27 09:54:29 +00006140 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006141 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
6142
6143 // StartMBB:
6144 // ...
6145 // %OrigVal = L Disp(%Base)
6146 // # fall through to LoopMMB
6147 MBB = StartMBB;
Diana Picus116bbab2017-01-13 09:58:52 +00006148 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006149 MBB->addSuccessor(LoopMBB);
6150
6151 // LoopMBB:
6152 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
6153 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
6154 // %RotatedNewVal = OP %RotatedOldVal, %Src2
6155 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
6156 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
6157 // JNE LoopMBB
6158 // # fall through to DoneMMB
6159 MBB = LoopMBB;
6160 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
6161 .addReg(OrigVal).addMBB(StartMBB)
6162 .addReg(Dest).addMBB(LoopMBB);
6163 if (IsSubWord)
6164 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
6165 .addReg(OldVal).addReg(BitShift).addImm(0);
6166 if (Invert) {
6167 // Perform the operation normally and then invert every bit of the field.
6168 unsigned Tmp = MRI.createVirtualRegister(RC);
Diana Picus116bbab2017-01-13 09:58:52 +00006169 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
Alexey Samsonovfffd56ec2014-08-20 21:56:43 +00006170 if (BitSize <= 32)
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006171 // XILF with the upper BitSize bits set.
Richard Sandiford652784e2013-09-25 11:11:53 +00006172 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
Alexey Samsonovfffd56ec2014-08-20 21:56:43 +00006173 .addReg(Tmp).addImm(-1U << (32 - BitSize));
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006174 else {
6175 // Use LCGR and add -1 to the result, which is more compact than
6176 // an XILF, XILH pair.
6177 unsigned Tmp2 = MRI.createVirtualRegister(RC);
6178 BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
6179 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
6180 .addReg(Tmp2).addImm(-1);
6181 }
6182 } else if (BinOpcode)
6183 // A simply binary operation.
6184 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
Diana Picus116bbab2017-01-13 09:58:52 +00006185 .addReg(RotatedOldVal)
6186 .add(Src2);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006187 else if (IsSubWord)
6188 // Use RISBG to rotate Src2 into position and use it to replace the
6189 // field in RotatedOldVal.
6190 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
6191 .addReg(RotatedOldVal).addReg(Src2.getReg())
6192 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
6193 if (IsSubWord)
6194 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
6195 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
6196 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
Diana Picus116bbab2017-01-13 09:58:52 +00006197 .addReg(OldVal)
6198 .addReg(NewVal)
6199 .add(Base)
6200 .addImm(Disp);
Richard Sandiford3d768e32013-07-31 12:30:20 +00006201 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6202 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006203 MBB->addSuccessor(LoopMBB);
6204 MBB->addSuccessor(DoneMBB);
6205
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006206 MI.eraseFromParent();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006207 return DoneMBB;
6208}
6209
6210// Implement EmitInstrWithCustomInserter for pseudo
6211// ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
6212// instruction that should be used to compare the current field with the
6213// minimum or maximum value. KeepOldMask is the BRC condition-code mask
6214// for when the current field should be kept. BitSize is the width of
6215// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006216MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
6217 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
6218 unsigned KeepOldMask, unsigned BitSize) const {
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006219 MachineFunction &MF = *MBB->getParent();
Eric Christopherfc6de422014-08-05 02:39:49 +00006220 const SystemZInstrInfo *TII =
Eric Christophera6734172015-01-31 00:06:45 +00006221 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006222 MachineRegisterInfo &MRI = MF.getRegInfo();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006223 bool IsSubWord = (BitSize < 32);
6224
6225 // Extract the operands. Base can be a register or a frame index.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006226 unsigned Dest = MI.getOperand(0).getReg();
6227 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
6228 int64_t Disp = MI.getOperand(2).getImm();
6229 unsigned Src2 = MI.getOperand(3).getReg();
6230 unsigned BitShift = (IsSubWord ? MI.getOperand(4).getReg() : 0);
6231 unsigned NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : 0);
6232 DebugLoc DL = MI.getDebugLoc();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006233 if (IsSubWord)
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006234 BitSize = MI.getOperand(6).getImm();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006235
6236 // Subword operations use 32-bit registers.
6237 const TargetRegisterClass *RC = (BitSize <= 32 ?
6238 &SystemZ::GR32BitRegClass :
6239 &SystemZ::GR64BitRegClass);
6240 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
6241 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
6242
6243 // Get the right opcodes for the displacement.
6244 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
6245 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
6246 assert(LOpcode && CSOpcode && "Displacement out of range");
6247
6248 // Create virtual registers for temporary results.
6249 unsigned OrigVal = MRI.createVirtualRegister(RC);
6250 unsigned OldVal = MRI.createVirtualRegister(RC);
6251 unsigned NewVal = MRI.createVirtualRegister(RC);
6252 unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
6253 unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
6254 unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
6255
6256 // Insert 3 basic blocks for the loop.
6257 MachineBasicBlock *StartMBB = MBB;
Richard Sandiford5e318f02013-08-27 09:54:29 +00006258 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006259 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
6260 MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
6261 MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
6262
6263 // StartMBB:
6264 // ...
6265 // %OrigVal = L Disp(%Base)
6266 // # fall through to LoopMMB
6267 MBB = StartMBB;
Diana Picus116bbab2017-01-13 09:58:52 +00006268 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006269 MBB->addSuccessor(LoopMBB);
6270
6271 // LoopMBB:
6272 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
6273 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
6274 // CompareOpcode %RotatedOldVal, %Src2
Richard Sandiford312425f2013-05-20 14:23:08 +00006275 // BRC KeepOldMask, UpdateMBB
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006276 MBB = LoopMBB;
6277 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
6278 .addReg(OrigVal).addMBB(StartMBB)
6279 .addReg(Dest).addMBB(UpdateMBB);
6280 if (IsSubWord)
6281 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
6282 .addReg(OldVal).addReg(BitShift).addImm(0);
Richard Sandiford8a757bb2013-07-31 12:11:07 +00006283 BuildMI(MBB, DL, TII->get(CompareOpcode))
6284 .addReg(RotatedOldVal).addReg(Src2);
6285 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
Richard Sandiford3d768e32013-07-31 12:30:20 +00006286 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006287 MBB->addSuccessor(UpdateMBB);
6288 MBB->addSuccessor(UseAltMBB);
6289
6290 // UseAltMBB:
6291 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
6292 // # fall through to UpdateMMB
6293 MBB = UseAltMBB;
6294 if (IsSubWord)
6295 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
6296 .addReg(RotatedOldVal).addReg(Src2)
6297 .addImm(32).addImm(31 + BitSize).addImm(0);
6298 MBB->addSuccessor(UpdateMBB);
6299
6300 // UpdateMBB:
6301 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
6302 // [ %RotatedAltVal, UseAltMBB ]
6303 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
6304 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
6305 // JNE LoopMBB
6306 // # fall through to DoneMMB
6307 MBB = UpdateMBB;
6308 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
6309 .addReg(RotatedOldVal).addMBB(LoopMBB)
6310 .addReg(RotatedAltVal).addMBB(UseAltMBB);
6311 if (IsSubWord)
6312 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
6313 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
6314 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
Diana Picus116bbab2017-01-13 09:58:52 +00006315 .addReg(OldVal)
6316 .addReg(NewVal)
6317 .add(Base)
6318 .addImm(Disp);
Richard Sandiford3d768e32013-07-31 12:30:20 +00006319 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6320 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006321 MBB->addSuccessor(LoopMBB);
6322 MBB->addSuccessor(DoneMBB);
6323
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006324 MI.eraseFromParent();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006325 return DoneMBB;
6326}
6327
6328// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
6329// instruction MI.
6330MachineBasicBlock *
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006331SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006332 MachineBasicBlock *MBB) const {
Ulrich Weiganda9ac6d62016-04-04 12:45:44 +00006333
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006334 MachineFunction &MF = *MBB->getParent();
Eric Christopherfc6de422014-08-05 02:39:49 +00006335 const SystemZInstrInfo *TII =
Eric Christophera6734172015-01-31 00:06:45 +00006336 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006337 MachineRegisterInfo &MRI = MF.getRegInfo();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006338
6339 // Extract the operands. Base can be a register or a frame index.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006340 unsigned Dest = MI.getOperand(0).getReg();
6341 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
6342 int64_t Disp = MI.getOperand(2).getImm();
6343 unsigned OrigCmpVal = MI.getOperand(3).getReg();
6344 unsigned OrigSwapVal = MI.getOperand(4).getReg();
6345 unsigned BitShift = MI.getOperand(5).getReg();
6346 unsigned NegBitShift = MI.getOperand(6).getReg();
6347 int64_t BitSize = MI.getOperand(7).getImm();
6348 DebugLoc DL = MI.getDebugLoc();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006349
6350 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
6351
6352 // Get the right opcodes for the displacement.
6353 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
6354 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
6355 assert(LOpcode && CSOpcode && "Displacement out of range");
6356
6357 // Create virtual registers for temporary results.
6358 unsigned OrigOldVal = MRI.createVirtualRegister(RC);
6359 unsigned OldVal = MRI.createVirtualRegister(RC);
6360 unsigned CmpVal = MRI.createVirtualRegister(RC);
6361 unsigned SwapVal = MRI.createVirtualRegister(RC);
6362 unsigned StoreVal = MRI.createVirtualRegister(RC);
6363 unsigned RetryOldVal = MRI.createVirtualRegister(RC);
6364 unsigned RetryCmpVal = MRI.createVirtualRegister(RC);
6365 unsigned RetrySwapVal = MRI.createVirtualRegister(RC);
6366
6367 // Insert 2 basic blocks for the loop.
6368 MachineBasicBlock *StartMBB = MBB;
Richard Sandiford5e318f02013-08-27 09:54:29 +00006369 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006370 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
6371 MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
6372
6373 // StartMBB:
6374 // ...
6375 // %OrigOldVal = L Disp(%Base)
6376 // # fall through to LoopMMB
6377 MBB = StartMBB;
6378 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
Diana Picus116bbab2017-01-13 09:58:52 +00006379 .add(Base)
6380 .addImm(Disp)
6381 .addReg(0);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006382 MBB->addSuccessor(LoopMBB);
6383
6384 // LoopMBB:
6385 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
6386 // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
6387 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
6388 // %Dest = RLL %OldVal, BitSize(%BitShift)
6389 // ^^ The low BitSize bits contain the field
6390 // of interest.
6391 // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
6392 // ^^ Replace the upper 32-BitSize bits of the
6393 // comparison value with those that we loaded,
6394 // so that we can use a full word comparison.
Richard Sandiford8a757bb2013-07-31 12:11:07 +00006395 // CR %Dest, %RetryCmpVal
6396 // JNE DoneMBB
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006397 // # Fall through to SetMBB
6398 MBB = LoopMBB;
6399 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
6400 .addReg(OrigOldVal).addMBB(StartMBB)
6401 .addReg(RetryOldVal).addMBB(SetMBB);
6402 BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal)
6403 .addReg(OrigCmpVal).addMBB(StartMBB)
6404 .addReg(RetryCmpVal).addMBB(SetMBB);
6405 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
6406 .addReg(OrigSwapVal).addMBB(StartMBB)
6407 .addReg(RetrySwapVal).addMBB(SetMBB);
6408 BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest)
6409 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
6410 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
6411 .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
Richard Sandiford8a757bb2013-07-31 12:11:07 +00006412 BuildMI(MBB, DL, TII->get(SystemZ::CR))
6413 .addReg(Dest).addReg(RetryCmpVal);
6414 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
Richard Sandiford3d768e32013-07-31 12:30:20 +00006415 .addImm(SystemZ::CCMASK_ICMP)
6416 .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006417 MBB->addSuccessor(DoneMBB);
6418 MBB->addSuccessor(SetMBB);
6419
6420 // SetMBB:
6421 // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
6422 // ^^ Replace the upper 32-BitSize bits of the new
6423 // value with those that we loaded.
6424 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
6425 // ^^ Rotate the new field to its proper position.
6426 // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
6427 // JNE LoopMBB
6428 // # fall through to ExitMMB
6429 MBB = SetMBB;
6430 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
6431 .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
6432 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
6433 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
6434 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
Diana Picus116bbab2017-01-13 09:58:52 +00006435 .addReg(OldVal)
6436 .addReg(StoreVal)
6437 .add(Base)
6438 .addImm(Disp);
Richard Sandiford3d768e32013-07-31 12:30:20 +00006439 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6440 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006441 MBB->addSuccessor(LoopMBB);
6442 MBB->addSuccessor(DoneMBB);
6443
Ulrich Weiganddf868552017-09-28 22:08:25 +00006444 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
6445 // to the block after the loop. At this point, CC may have been defined
6446 // either by the CR in LoopMBB or by the CS in SetMBB.
6447 if (!MI.registerDefIsDead(SystemZ::CC))
6448 DoneMBB->addLiveIn(SystemZ::CC);
6449
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006450 MI.eraseFromParent();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006451 return DoneMBB;
6452}
6453
Ulrich Weiganda11f63a2017-08-04 18:57:58 +00006454// Emit a move from two GR64s to a GR128.
6455MachineBasicBlock *
6456SystemZTargetLowering::emitPair128(MachineInstr &MI,
6457 MachineBasicBlock *MBB) const {
6458 MachineFunction &MF = *MBB->getParent();
6459 const SystemZInstrInfo *TII =
6460 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
6461 MachineRegisterInfo &MRI = MF.getRegInfo();
6462 DebugLoc DL = MI.getDebugLoc();
6463
6464 unsigned Dest = MI.getOperand(0).getReg();
6465 unsigned Hi = MI.getOperand(1).getReg();
6466 unsigned Lo = MI.getOperand(2).getReg();
6467 unsigned Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
6468 unsigned Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
6469
6470 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
6471 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
6472 .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
6473 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
6474 .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
6475
6476 MI.eraseFromParent();
6477 return MBB;
6478}
6479
Ulrich Weigand43579cf2017-07-05 13:17:31 +00006480// Emit an extension from a GR64 to a GR128. ClearEven is true
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006481// if the high register of the GR128 value must be cleared or false if
Ulrich Weigand43579cf2017-07-05 13:17:31 +00006482// it's "don't care".
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006483MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
6484 MachineBasicBlock *MBB,
Ulrich Weigand43579cf2017-07-05 13:17:31 +00006485 bool ClearEven) const {
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006486 MachineFunction &MF = *MBB->getParent();
Eric Christopherfc6de422014-08-05 02:39:49 +00006487 const SystemZInstrInfo *TII =
Eric Christophera6734172015-01-31 00:06:45 +00006488 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006489 MachineRegisterInfo &MRI = MF.getRegInfo();
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006490 DebugLoc DL = MI.getDebugLoc();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006491
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006492 unsigned Dest = MI.getOperand(0).getReg();
6493 unsigned Src = MI.getOperand(1).getReg();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006494 unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
6495
6496 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
6497 if (ClearEven) {
6498 unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
6499 unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
6500
6501 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
6502 .addImm(0);
6503 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
Richard Sandiford87a44362013-09-30 10:28:35 +00006504 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006505 In128 = NewIn128;
6506 }
6507 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
Ulrich Weigand43579cf2017-07-05 13:17:31 +00006508 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006509
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006510 MI.eraseFromParent();
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006511 return MBB;
6512}
6513
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006514MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
6515 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
Richard Sandiford5e318f02013-08-27 09:54:29 +00006516 MachineFunction &MF = *MBB->getParent();
Eric Christopherfc6de422014-08-05 02:39:49 +00006517 const SystemZInstrInfo *TII =
Eric Christophera6734172015-01-31 00:06:45 +00006518 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
Richard Sandiford5e318f02013-08-27 09:54:29 +00006519 MachineRegisterInfo &MRI = MF.getRegInfo();
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006520 DebugLoc DL = MI.getDebugLoc();
Richard Sandifordd131ff82013-07-08 09:35:23 +00006521
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006522 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
6523 uint64_t DestDisp = MI.getOperand(1).getImm();
6524 MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
6525 uint64_t SrcDisp = MI.getOperand(3).getImm();
6526 uint64_t Length = MI.getOperand(4).getImm();
Richard Sandifordd131ff82013-07-08 09:35:23 +00006527
Richard Sandifordbe133a82013-08-28 09:01:51 +00006528 // When generating more than one CLC, all but the last will need to
6529 // branch to the end when a difference is found.
6530 MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
Craig Topper062a2ba2014-04-25 05:30:21 +00006531 splitBlockAfter(MI, MBB) : nullptr);
Richard Sandifordbe133a82013-08-28 09:01:51 +00006532
Richard Sandiford5e318f02013-08-27 09:54:29 +00006533 // Check for the loop form, in which operand 5 is the trip count.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006534 if (MI.getNumExplicitOperands() > 5) {
Richard Sandiford5e318f02013-08-27 09:54:29 +00006535 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
6536
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006537 uint64_t StartCountReg = MI.getOperand(5).getReg();
Richard Sandiford5e318f02013-08-27 09:54:29 +00006538 uint64_t StartSrcReg = forceReg(MI, SrcBase, TII);
6539 uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg :
6540 forceReg(MI, DestBase, TII));
6541
6542 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
6543 uint64_t ThisSrcReg = MRI.createVirtualRegister(RC);
6544 uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg :
6545 MRI.createVirtualRegister(RC));
6546 uint64_t NextSrcReg = MRI.createVirtualRegister(RC);
6547 uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg :
6548 MRI.createVirtualRegister(RC));
6549
6550 RC = &SystemZ::GR64BitRegClass;
6551 uint64_t ThisCountReg = MRI.createVirtualRegister(RC);
6552 uint64_t NextCountReg = MRI.createVirtualRegister(RC);
6553
6554 MachineBasicBlock *StartMBB = MBB;
6555 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
6556 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
Richard Sandifordbe133a82013-08-28 09:01:51 +00006557 MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
Richard Sandiford5e318f02013-08-27 09:54:29 +00006558
6559 // StartMBB:
6560 // # fall through to LoopMMB
6561 MBB->addSuccessor(LoopMBB);
6562
6563 // LoopMBB:
6564 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
Richard Sandifordbe133a82013-08-28 09:01:51 +00006565 // [ %NextDestReg, NextMBB ]
Richard Sandiford5e318f02013-08-27 09:54:29 +00006566 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
Richard Sandifordbe133a82013-08-28 09:01:51 +00006567 // [ %NextSrcReg, NextMBB ]
Richard Sandiford5e318f02013-08-27 09:54:29 +00006568 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
Richard Sandifordbe133a82013-08-28 09:01:51 +00006569 // [ %NextCountReg, NextMBB ]
6570 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
Richard Sandiford5e318f02013-08-27 09:54:29 +00006571 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
Richard Sandifordbe133a82013-08-28 09:01:51 +00006572 // ( JLH EndMBB )
6573 //
6574 // The prefetch is used only for MVC. The JLH is used only for CLC.
6575 MBB = LoopMBB;
6576
6577 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
6578 .addReg(StartDestReg).addMBB(StartMBB)
6579 .addReg(NextDestReg).addMBB(NextMBB);
6580 if (!HaveSingleBase)
6581 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
6582 .addReg(StartSrcReg).addMBB(StartMBB)
6583 .addReg(NextSrcReg).addMBB(NextMBB);
6584 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
6585 .addReg(StartCountReg).addMBB(StartMBB)
6586 .addReg(NextCountReg).addMBB(NextMBB);
6587 if (Opcode == SystemZ::MVC)
6588 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
6589 .addImm(SystemZ::PFD_WRITE)
6590 .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0);
6591 BuildMI(MBB, DL, TII->get(Opcode))
6592 .addReg(ThisDestReg).addImm(DestDisp).addImm(256)
6593 .addReg(ThisSrcReg).addImm(SrcDisp);
6594 if (EndMBB) {
6595 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6596 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
6597 .addMBB(EndMBB);
6598 MBB->addSuccessor(EndMBB);
6599 MBB->addSuccessor(NextMBB);
6600 }
6601
6602 // NextMBB:
Richard Sandiford5e318f02013-08-27 09:54:29 +00006603 // %NextDestReg = LA 256(%ThisDestReg)
6604 // %NextSrcReg = LA 256(%ThisSrcReg)
6605 // %NextCountReg = AGHI %ThisCountReg, -1
6606 // CGHI %NextCountReg, 0
6607 // JLH LoopMBB
6608 // # fall through to DoneMMB
6609 //
6610 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
Richard Sandifordbe133a82013-08-28 09:01:51 +00006611 MBB = NextMBB;
Richard Sandiford5e318f02013-08-27 09:54:29 +00006612
Richard Sandiford5e318f02013-08-27 09:54:29 +00006613 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
6614 .addReg(ThisDestReg).addImm(256).addReg(0);
6615 if (!HaveSingleBase)
6616 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
6617 .addReg(ThisSrcReg).addImm(256).addReg(0);
6618 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
6619 .addReg(ThisCountReg).addImm(-1);
6620 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
6621 .addReg(NextCountReg).addImm(0);
6622 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6623 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
6624 .addMBB(LoopMBB);
6625 MBB->addSuccessor(LoopMBB);
6626 MBB->addSuccessor(DoneMBB);
6627
6628 DestBase = MachineOperand::CreateReg(NextDestReg, false);
6629 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
6630 Length &= 255;
Jonas Paulssona6216ec2018-03-19 13:05:22 +00006631 if (EndMBB && !Length)
6632 // If the loop handled the whole CLC range, DoneMBB will be empty with
6633 // CC live-through into EndMBB, so add it as live-in.
6634 DoneMBB->addLiveIn(SystemZ::CC);
Richard Sandiford5e318f02013-08-27 09:54:29 +00006635 MBB = DoneMBB;
6636 }
6637 // Handle any remaining bytes with straight-line code.
6638 while (Length > 0) {
6639 uint64_t ThisLength = std::min(Length, uint64_t(256));
6640 // The previous iteration might have created out-of-range displacements.
6641 // Apply them using LAY if so.
6642 if (!isUInt<12>(DestDisp)) {
6643 unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006644 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
Diana Picus116bbab2017-01-13 09:58:52 +00006645 .add(DestBase)
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006646 .addImm(DestDisp)
6647 .addReg(0);
Richard Sandiford5e318f02013-08-27 09:54:29 +00006648 DestBase = MachineOperand::CreateReg(Reg, false);
6649 DestDisp = 0;
6650 }
6651 if (!isUInt<12>(SrcDisp)) {
6652 unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006653 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
Diana Picus116bbab2017-01-13 09:58:52 +00006654 .add(SrcBase)
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006655 .addImm(SrcDisp)
6656 .addReg(0);
Richard Sandiford5e318f02013-08-27 09:54:29 +00006657 SrcBase = MachineOperand::CreateReg(Reg, false);
6658 SrcDisp = 0;
6659 }
6660 BuildMI(*MBB, MI, DL, TII->get(Opcode))
Diana Picus116bbab2017-01-13 09:58:52 +00006661 .add(DestBase)
6662 .addImm(DestDisp)
6663 .addImm(ThisLength)
6664 .add(SrcBase)
Jonas Paulssonae8d22c2017-06-07 14:08:34 +00006665 .addImm(SrcDisp)
6666 ->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
Richard Sandiford5e318f02013-08-27 09:54:29 +00006667 DestDisp += ThisLength;
6668 SrcDisp += ThisLength;
6669 Length -= ThisLength;
Richard Sandifordbe133a82013-08-28 09:01:51 +00006670 // If there's another CLC to go, branch to the end if a difference
6671 // was found.
6672 if (EndMBB && Length > 0) {
6673 MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
6674 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6675 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
6676 .addMBB(EndMBB);
6677 MBB->addSuccessor(EndMBB);
6678 MBB->addSuccessor(NextMBB);
6679 MBB = NextMBB;
6680 }
6681 }
6682 if (EndMBB) {
6683 MBB->addSuccessor(EndMBB);
6684 MBB = EndMBB;
6685 MBB->addLiveIn(SystemZ::CC);
Richard Sandiford5e318f02013-08-27 09:54:29 +00006686 }
Richard Sandifordd131ff82013-07-08 09:35:23 +00006687
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006688 MI.eraseFromParent();
Richard Sandifordd131ff82013-07-08 09:35:23 +00006689 return MBB;
6690}
6691
Richard Sandifordca232712013-08-16 11:21:54 +00006692// Decompose string pseudo-instruction MI into a loop that continually performs
6693// Opcode until CC != 3.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006694MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
6695 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
Richard Sandifordca232712013-08-16 11:21:54 +00006696 MachineFunction &MF = *MBB->getParent();
Eric Christopherfc6de422014-08-05 02:39:49 +00006697 const SystemZInstrInfo *TII =
Eric Christophera6734172015-01-31 00:06:45 +00006698 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
Richard Sandifordca232712013-08-16 11:21:54 +00006699 MachineRegisterInfo &MRI = MF.getRegInfo();
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006700 DebugLoc DL = MI.getDebugLoc();
Richard Sandifordca232712013-08-16 11:21:54 +00006701
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006702 uint64_t End1Reg = MI.getOperand(0).getReg();
6703 uint64_t Start1Reg = MI.getOperand(1).getReg();
6704 uint64_t Start2Reg = MI.getOperand(2).getReg();
6705 uint64_t CharReg = MI.getOperand(3).getReg();
Richard Sandifordca232712013-08-16 11:21:54 +00006706
6707 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
6708 uint64_t This1Reg = MRI.createVirtualRegister(RC);
6709 uint64_t This2Reg = MRI.createVirtualRegister(RC);
6710 uint64_t End2Reg = MRI.createVirtualRegister(RC);
6711
6712 MachineBasicBlock *StartMBB = MBB;
Richard Sandiford5e318f02013-08-27 09:54:29 +00006713 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
Richard Sandifordca232712013-08-16 11:21:54 +00006714 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
6715
6716 // StartMBB:
Richard Sandifordca232712013-08-16 11:21:54 +00006717 // # fall through to LoopMMB
Richard Sandifordca232712013-08-16 11:21:54 +00006718 MBB->addSuccessor(LoopMBB);
6719
6720 // LoopMBB:
6721 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
6722 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
Richard Sandiford7789b082013-09-30 08:48:38 +00006723 // R0L = %CharReg
6724 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
Richard Sandifordca232712013-08-16 11:21:54 +00006725 // JO LoopMBB
6726 // # fall through to DoneMMB
Richard Sandiford6f6d5512013-08-20 09:38:48 +00006727 //
Richard Sandiford7789b082013-09-30 08:48:38 +00006728 // The load of R0L can be hoisted by post-RA LICM.
Richard Sandifordca232712013-08-16 11:21:54 +00006729 MBB = LoopMBB;
Richard Sandifordca232712013-08-16 11:21:54 +00006730
6731 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
6732 .addReg(Start1Reg).addMBB(StartMBB)
6733 .addReg(End1Reg).addMBB(LoopMBB);
6734 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
6735 .addReg(Start2Reg).addMBB(StartMBB)
6736 .addReg(End2Reg).addMBB(LoopMBB);
Richard Sandiford7789b082013-09-30 08:48:38 +00006737 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
Richard Sandifordca232712013-08-16 11:21:54 +00006738 BuildMI(MBB, DL, TII->get(Opcode))
6739 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
6740 .addReg(This1Reg).addReg(This2Reg);
6741 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
6742 .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB);
6743 MBB->addSuccessor(LoopMBB);
6744 MBB->addSuccessor(DoneMBB);
6745
6746 DoneMBB->addLiveIn(SystemZ::CC);
6747
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006748 MI.eraseFromParent();
Richard Sandifordca232712013-08-16 11:21:54 +00006749 return DoneMBB;
6750}
6751
Ulrich Weigand57c85f52015-04-01 12:51:43 +00006752// Update TBEGIN instruction with final opcode and register clobbers.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006753MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
6754 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
6755 bool NoFloat) const {
Ulrich Weigand57c85f52015-04-01 12:51:43 +00006756 MachineFunction &MF = *MBB->getParent();
6757 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
6758 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
6759
6760 // Update opcode.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006761 MI.setDesc(TII->get(Opcode));
Ulrich Weigand57c85f52015-04-01 12:51:43 +00006762
6763 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
6764 // Make sure to add the corresponding GRSM bits if they are missing.
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006765 uint64_t Control = MI.getOperand(2).getImm();
Ulrich Weigand57c85f52015-04-01 12:51:43 +00006766 static const unsigned GPRControlBit[16] = {
6767 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
6768 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
6769 };
6770 Control |= GPRControlBit[15];
6771 if (TFI->hasFP(MF))
6772 Control |= GPRControlBit[11];
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006773 MI.getOperand(2).setImm(Control);
Ulrich Weigand57c85f52015-04-01 12:51:43 +00006774
6775 // Add GPR clobbers.
6776 for (int I = 0; I < 16; I++) {
6777 if ((Control & GPRControlBit[I]) == 0) {
6778 unsigned Reg = SystemZMC::GR64Regs[I];
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006779 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
Ulrich Weigand57c85f52015-04-01 12:51:43 +00006780 }
6781 }
6782
Ulrich Weigandce4c1092015-05-05 19:25:42 +00006783 // Add FPR/VR clobbers.
Ulrich Weigand57c85f52015-04-01 12:51:43 +00006784 if (!NoFloat && (Control & 4) != 0) {
Ulrich Weigandce4c1092015-05-05 19:25:42 +00006785 if (Subtarget.hasVector()) {
6786 for (int I = 0; I < 32; I++) {
6787 unsigned Reg = SystemZMC::VR128Regs[I];
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006788 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
Ulrich Weigandce4c1092015-05-05 19:25:42 +00006789 }
6790 } else {
6791 for (int I = 0; I < 16; I++) {
6792 unsigned Reg = SystemZMC::FP64Regs[I];
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006793 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
Ulrich Weigandce4c1092015-05-05 19:25:42 +00006794 }
Ulrich Weigand57c85f52015-04-01 12:51:43 +00006795 }
6796 }
6797
6798 return MBB;
6799}
6800
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006801MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
6802 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
Jonas Paulsson7c5ce102015-10-08 07:40:16 +00006803 MachineFunction &MF = *MBB->getParent();
6804 MachineRegisterInfo *MRI = &MF.getRegInfo();
6805 const SystemZInstrInfo *TII =
6806 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006807 DebugLoc DL = MI.getDebugLoc();
Jonas Paulsson7c5ce102015-10-08 07:40:16 +00006808
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006809 unsigned SrcReg = MI.getOperand(0).getReg();
Jonas Paulsson7c5ce102015-10-08 07:40:16 +00006810
6811 // Create new virtual register of the same class as source.
6812 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
6813 unsigned DstReg = MRI->createVirtualRegister(RC);
6814
6815 // Replace pseudo with a normal load-and-test that models the def as
6816 // well.
6817 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
6818 .addReg(SrcReg);
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006819 MI.eraseFromParent();
Jonas Paulsson7c5ce102015-10-08 07:40:16 +00006820
6821 return MBB;
6822}
6823
Duncan P. N. Exon Smithe4f5e4f2016-06-30 22:52:52 +00006824MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
6825 MachineInstr &MI, MachineBasicBlock *MBB) const {
6826 switch (MI.getOpcode()) {
Richard Sandiford7c5c0ea2013-10-01 13:10:16 +00006827 case SystemZ::Select32Mux:
Ulrich Weigand524f2762016-11-28 13:34:08 +00006828 return emitSelect(MI, MBB,
6829 Subtarget.hasLoadStoreOnCond2()? SystemZ::LOCRMux : 0);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006830 case SystemZ::Select32:
Ulrich Weigand524f2762016-11-28 13:34:08 +00006831 return emitSelect(MI, MBB, SystemZ::LOCR);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006832 case SystemZ::Select64:
Ulrich Weigand524f2762016-11-28 13:34:08 +00006833 return emitSelect(MI, MBB, SystemZ::LOCGR);
6834 case SystemZ::SelectF32:
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006835 case SystemZ::SelectF64:
6836 case SystemZ::SelectF128:
Ulrich Weigandf2968d52017-07-17 17:44:20 +00006837 case SystemZ::SelectVR128:
Ulrich Weigand524f2762016-11-28 13:34:08 +00006838 return emitSelect(MI, MBB, 0);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006839
Richard Sandiford2896d042013-10-01 14:33:55 +00006840 case SystemZ::CondStore8Mux:
6841 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
6842 case SystemZ::CondStore8MuxInv:
6843 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
6844 case SystemZ::CondStore16Mux:
6845 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
6846 case SystemZ::CondStore16MuxInv:
6847 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
Ulrich Weigand524f2762016-11-28 13:34:08 +00006848 case SystemZ::CondStore32Mux:
6849 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
6850 case SystemZ::CondStore32MuxInv:
6851 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006852 case SystemZ::CondStore8:
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006853 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006854 case SystemZ::CondStore8Inv:
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006855 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006856 case SystemZ::CondStore16:
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006857 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006858 case SystemZ::CondStore16Inv:
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006859 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006860 case SystemZ::CondStore32:
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006861 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006862 case SystemZ::CondStore32Inv:
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006863 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006864 case SystemZ::CondStore64:
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006865 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006866 case SystemZ::CondStore64Inv:
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006867 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006868 case SystemZ::CondStoreF32:
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006869 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006870 case SystemZ::CondStoreF32Inv:
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006871 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006872 case SystemZ::CondStoreF64:
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006873 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006874 case SystemZ::CondStoreF64Inv:
Richard Sandiforda68e6f52013-07-25 08:57:02 +00006875 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
Richard Sandifordb86a8342013-06-27 09:27:40 +00006876
Ulrich Weiganda11f63a2017-08-04 18:57:58 +00006877 case SystemZ::PAIR128:
6878 return emitPair128(MI, MBB);
Ulrich Weigand43579cf2017-07-05 13:17:31 +00006879 case SystemZ::AEXT128:
6880 return emitExt128(MI, MBB, false);
6881 case SystemZ::ZEXT128:
6882 return emitExt128(MI, MBB, true);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006883
6884 case SystemZ::ATOMIC_SWAPW:
6885 return emitAtomicLoadBinary(MI, MBB, 0, 0);
6886 case SystemZ::ATOMIC_SWAP_32:
6887 return emitAtomicLoadBinary(MI, MBB, 0, 32);
6888 case SystemZ::ATOMIC_SWAP_64:
6889 return emitAtomicLoadBinary(MI, MBB, 0, 64);
6890
6891 case SystemZ::ATOMIC_LOADW_AR:
6892 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
6893 case SystemZ::ATOMIC_LOADW_AFI:
6894 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
6895 case SystemZ::ATOMIC_LOAD_AR:
6896 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
6897 case SystemZ::ATOMIC_LOAD_AHI:
6898 return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
6899 case SystemZ::ATOMIC_LOAD_AFI:
6900 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
6901 case SystemZ::ATOMIC_LOAD_AGR:
6902 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
6903 case SystemZ::ATOMIC_LOAD_AGHI:
6904 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
6905 case SystemZ::ATOMIC_LOAD_AGFI:
6906 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
6907
6908 case SystemZ::ATOMIC_LOADW_SR:
6909 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
6910 case SystemZ::ATOMIC_LOAD_SR:
6911 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
6912 case SystemZ::ATOMIC_LOAD_SGR:
6913 return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
6914
6915 case SystemZ::ATOMIC_LOADW_NR:
6916 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
6917 case SystemZ::ATOMIC_LOADW_NILH:
Richard Sandiford652784e2013-09-25 11:11:53 +00006918 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006919 case SystemZ::ATOMIC_LOAD_NR:
6920 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
Richard Sandiford652784e2013-09-25 11:11:53 +00006921 case SystemZ::ATOMIC_LOAD_NILL:
6922 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
6923 case SystemZ::ATOMIC_LOAD_NILH:
6924 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
6925 case SystemZ::ATOMIC_LOAD_NILF:
6926 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006927 case SystemZ::ATOMIC_LOAD_NGR:
6928 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
Richard Sandiford652784e2013-09-25 11:11:53 +00006929 case SystemZ::ATOMIC_LOAD_NILL64:
6930 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
6931 case SystemZ::ATOMIC_LOAD_NILH64:
6932 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
Richard Sandiford70284282013-10-01 14:20:41 +00006933 case SystemZ::ATOMIC_LOAD_NIHL64:
6934 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
6935 case SystemZ::ATOMIC_LOAD_NIHH64:
6936 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
Richard Sandiford652784e2013-09-25 11:11:53 +00006937 case SystemZ::ATOMIC_LOAD_NILF64:
6938 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
Richard Sandiford70284282013-10-01 14:20:41 +00006939 case SystemZ::ATOMIC_LOAD_NIHF64:
6940 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006941
6942 case SystemZ::ATOMIC_LOADW_OR:
6943 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
6944 case SystemZ::ATOMIC_LOADW_OILH:
Richard Sandiford652784e2013-09-25 11:11:53 +00006945 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006946 case SystemZ::ATOMIC_LOAD_OR:
6947 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
Richard Sandiford652784e2013-09-25 11:11:53 +00006948 case SystemZ::ATOMIC_LOAD_OILL:
6949 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
6950 case SystemZ::ATOMIC_LOAD_OILH:
6951 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
6952 case SystemZ::ATOMIC_LOAD_OILF:
6953 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006954 case SystemZ::ATOMIC_LOAD_OGR:
6955 return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
Richard Sandiford652784e2013-09-25 11:11:53 +00006956 case SystemZ::ATOMIC_LOAD_OILL64:
6957 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
6958 case SystemZ::ATOMIC_LOAD_OILH64:
6959 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
Richard Sandiford6e96ac62013-10-01 13:22:41 +00006960 case SystemZ::ATOMIC_LOAD_OIHL64:
6961 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
6962 case SystemZ::ATOMIC_LOAD_OIHH64:
6963 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
Richard Sandiford652784e2013-09-25 11:11:53 +00006964 case SystemZ::ATOMIC_LOAD_OILF64:
6965 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
Richard Sandiford6e96ac62013-10-01 13:22:41 +00006966 case SystemZ::ATOMIC_LOAD_OIHF64:
6967 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006968
6969 case SystemZ::ATOMIC_LOADW_XR:
6970 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
6971 case SystemZ::ATOMIC_LOADW_XILF:
Richard Sandiford652784e2013-09-25 11:11:53 +00006972 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006973 case SystemZ::ATOMIC_LOAD_XR:
6974 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
Richard Sandiford652784e2013-09-25 11:11:53 +00006975 case SystemZ::ATOMIC_LOAD_XILF:
6976 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006977 case SystemZ::ATOMIC_LOAD_XGR:
6978 return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
Richard Sandiford652784e2013-09-25 11:11:53 +00006979 case SystemZ::ATOMIC_LOAD_XILF64:
6980 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
Richard Sandiford5718dac2013-10-01 14:08:44 +00006981 case SystemZ::ATOMIC_LOAD_XIHF64:
6982 return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006983
6984 case SystemZ::ATOMIC_LOADW_NRi:
6985 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
6986 case SystemZ::ATOMIC_LOADW_NILHi:
Richard Sandiford652784e2013-09-25 11:11:53 +00006987 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006988 case SystemZ::ATOMIC_LOAD_NRi:
6989 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
Richard Sandiford652784e2013-09-25 11:11:53 +00006990 case SystemZ::ATOMIC_LOAD_NILLi:
6991 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
6992 case SystemZ::ATOMIC_LOAD_NILHi:
6993 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
6994 case SystemZ::ATOMIC_LOAD_NILFi:
6995 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00006996 case SystemZ::ATOMIC_LOAD_NGRi:
6997 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
Richard Sandiford652784e2013-09-25 11:11:53 +00006998 case SystemZ::ATOMIC_LOAD_NILL64i:
6999 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
7000 case SystemZ::ATOMIC_LOAD_NILH64i:
7001 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
Richard Sandiford70284282013-10-01 14:20:41 +00007002 case SystemZ::ATOMIC_LOAD_NIHL64i:
7003 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
7004 case SystemZ::ATOMIC_LOAD_NIHH64i:
7005 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
Richard Sandiford652784e2013-09-25 11:11:53 +00007006 case SystemZ::ATOMIC_LOAD_NILF64i:
7007 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
Richard Sandiford70284282013-10-01 14:20:41 +00007008 case SystemZ::ATOMIC_LOAD_NIHF64i:
7009 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
Ulrich Weigand5f613df2013-05-06 16:15:19 +00007010
7011 case SystemZ::ATOMIC_LOADW_MIN:
7012 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
7013 SystemZ::CCMASK_CMP_LE, 0);
7014 case SystemZ::ATOMIC_LOAD_MIN_32:
7015 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
7016 SystemZ::CCMASK_CMP_LE, 32);
7017 case SystemZ::ATOMIC_LOAD_MIN_64:
7018 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
7019 SystemZ::CCMASK_CMP_LE, 64);
7020
7021 case SystemZ::ATOMIC_LOADW_MAX:
7022 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
7023 SystemZ::CCMASK_CMP_GE, 0);
7024 case SystemZ::ATOMIC_LOAD_MAX_32:
7025 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
7026 SystemZ::CCMASK_CMP_GE, 32);
7027 case SystemZ::ATOMIC_LOAD_MAX_64:
7028 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
7029 SystemZ::CCMASK_CMP_GE, 64);
7030
7031 case SystemZ::ATOMIC_LOADW_UMIN:
7032 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
7033 SystemZ::CCMASK_CMP_LE, 0);
7034 case SystemZ::ATOMIC_LOAD_UMIN_32:
7035 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
7036 SystemZ::CCMASK_CMP_LE, 32);
7037 case SystemZ::ATOMIC_LOAD_UMIN_64:
7038 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
7039 SystemZ::CCMASK_CMP_LE, 64);
7040
7041 case SystemZ::ATOMIC_LOADW_UMAX:
7042 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
7043 SystemZ::CCMASK_CMP_GE, 0);
7044 case SystemZ::ATOMIC_LOAD_UMAX_32:
7045 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
7046 SystemZ::CCMASK_CMP_GE, 32);
7047 case SystemZ::ATOMIC_LOAD_UMAX_64:
7048 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
7049 SystemZ::CCMASK_CMP_GE, 64);
7050
7051 case SystemZ::ATOMIC_CMP_SWAPW:
7052 return emitAtomicCmpSwapW(MI, MBB);
Richard Sandiford5e318f02013-08-27 09:54:29 +00007053 case SystemZ::MVCSequence:
7054 case SystemZ::MVCLoop:
Richard Sandiford564681c2013-08-12 10:28:10 +00007055 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
Richard Sandiford178273a2013-09-05 10:36:45 +00007056 case SystemZ::NCSequence:
7057 case SystemZ::NCLoop:
7058 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
7059 case SystemZ::OCSequence:
7060 case SystemZ::OCLoop:
7061 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
7062 case SystemZ::XCSequence:
7063 case SystemZ::XCLoop:
7064 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
Richard Sandiford5e318f02013-08-27 09:54:29 +00007065 case SystemZ::CLCSequence:
7066 case SystemZ::CLCLoop:
Richard Sandiford564681c2013-08-12 10:28:10 +00007067 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
Richard Sandifordca232712013-08-16 11:21:54 +00007068 case SystemZ::CLSTLoop:
7069 return emitStringWrapper(MI, MBB, SystemZ::CLST);
Richard Sandifordbb83a502013-08-16 11:29:37 +00007070 case SystemZ::MVSTLoop:
7071 return emitStringWrapper(MI, MBB, SystemZ::MVST);
Richard Sandiford0dec06a2013-08-16 11:41:43 +00007072 case SystemZ::SRSTLoop:
7073 return emitStringWrapper(MI, MBB, SystemZ::SRST);
Ulrich Weigand57c85f52015-04-01 12:51:43 +00007074 case SystemZ::TBEGIN:
7075 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
7076 case SystemZ::TBEGIN_nofloat:
7077 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
7078 case SystemZ::TBEGINC:
7079 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
Jonas Paulsson7c5ce102015-10-08 07:40:16 +00007080 case SystemZ::LTEBRCompare_VecPseudo:
7081 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
7082 case SystemZ::LTDBRCompare_VecPseudo:
7083 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
7084 case SystemZ::LTXBRCompare_VecPseudo:
7085 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
7086
Ulrich Weigand5eb64112018-03-02 20:39:30 +00007087 case TargetOpcode::STACKMAP:
7088 case TargetOpcode::PATCHPOINT:
7089 return emitPatchPoint(MI, MBB);
7090
Ulrich Weigand5f613df2013-05-06 16:15:19 +00007091 default:
7092 llvm_unreachable("Unexpected instr type to insert");
7093 }
7094}
Jonas Paulsson11d251c2017-05-10 13:03:25 +00007095
7096// This is only used by the isel schedulers, and is needed only to prevent
7097// compiler from crashing when list-ilp is used.
7098const TargetRegisterClass *
7099SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
7100 if (VT == MVT::Untyped)
7101 return &SystemZ::ADDR128BitRegClass;
7102 return TargetLowering::getRepRegClassFor(VT);
7103}