Blame - llvm/lib/Target/SystemZ/SystemZISelLowering.cpp - toolchain/llvm-project

blob: 955b88e1630af085a501cdae0fb37ba2aff0dd9a [file] [log] [blame]

Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1	//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the SystemZTargetLowering class.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#define DEBUG_TYPE "systemz-lower"
				15
				16	#include "SystemZISelLowering.h"
				17	#include "SystemZCallingConv.h"
				18	#include "SystemZConstantPoolValue.h"
				19	#include "SystemZMachineFunctionInfo.h"
				20	#include "SystemZTargetMachine.h"
				21	#include "llvm/CodeGen/CallingConvLower.h"
				22	#include "llvm/CodeGen/MachineInstrBuilder.h"
				23	#include "llvm/CodeGen/MachineRegisterInfo.h"
				24	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
				25
				26	using namespace llvm;
				27
				28	// Classify VT as either 32 or 64 bit.
				29	static bool is32Bit(EVT VT) {
				30	switch (VT.getSimpleVT().SimpleTy) {
				31	case MVT::i32:
				32	return true;
				33	case MVT::i64:
				34	return false;
				35	default:
				36	llvm_unreachable("Unsupported type");
				37	}
				38	}
				39
				40	// Return a version of MachineOperand that can be safely used before the
				41	// final use.
				42	static MachineOperand earlyUseOperand(MachineOperand Op) {
				43	if (Op.isReg())
				44	Op.setIsKill(false);
				45	return Op;
				46	}
				47
				48	SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
				49	: TargetLowering(tm, new TargetLoweringObjectFileELF()),
				50	Subtarget(*tm.getSubtargetImpl()), TM(tm) {
				51	MVT PtrVT = getPointerTy();
				52
				53	// Set up the register classes.
				54	addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
				55	addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
				56	addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
				57	addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
				58	addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
				59
				60	// Compute derived properties from the register classes
				61	computeRegisterProperties();
				62
				63	// Set up special registers.
				64	setExceptionPointerRegister(SystemZ::R6D);
				65	setExceptionSelectorRegister(SystemZ::R7D);
				66	setStackPointerRegisterToSaveRestore(SystemZ::R15D);
				67
				68	// TODO: It may be better to default to latency-oriented scheduling, however
				69	// LLVM's current latency-oriented scheduler can't handle physreg definitions
Richard Sandiford	14a4449	2013-05-22 13:38:45 +0000	[diff] [blame]	70	// such as SystemZ has with CC, so set this to the register-pressure
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	71	// scheduler, because it can.
				72	setSchedulingPreference(Sched::RegPressure);
				73
				74	setBooleanContents(ZeroOrOneBooleanContent);
				75	setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
				76
				77	// Instructions are strings of 2-byte aligned 2-byte values.
				78	setMinFunctionAlignment(2);
				79
				80	// Handle operations that are handled in a similar way for all types.
				81	for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
				82	I <= MVT::LAST_FP_VALUETYPE;
				83	++I) {
				84	MVT VT = MVT::SimpleValueType(I);
				85	if (isTypeLegal(VT)) {
				86	// Expand SETCC(X, Y, COND) into SELECT_CC(X, Y, 1, 0, COND).
				87	setOperationAction(ISD::SETCC, VT, Expand);
				88
				89	// Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
				90	setOperationAction(ISD::SELECT, VT, Expand);
				91
				92	// Lower SELECT_CC and BR_CC into separate comparisons and branches.
				93	setOperationAction(ISD::SELECT_CC, VT, Custom);
				94	setOperationAction(ISD::BR_CC, VT, Custom);
				95	}
				96	}
				97
				98	// Expand jump table branches as address arithmetic followed by an
				99	// indirect jump.
				100	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				101
				102	// Expand BRCOND into a BR_CC (see above).
				103	setOperationAction(ISD::BRCOND, MVT::Other, Expand);
				104
				105	// Handle integer types.
				106	for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
				107	I <= MVT::LAST_INTEGER_VALUETYPE;
				108	++I) {
				109	MVT VT = MVT::SimpleValueType(I);
				110	if (isTypeLegal(VT)) {
				111	// Expand individual DIV and REMs into DIVREMs.
				112	setOperationAction(ISD::SDIV, VT, Expand);
				113	setOperationAction(ISD::UDIV, VT, Expand);
				114	setOperationAction(ISD::SREM, VT, Expand);
				115	setOperationAction(ISD::UREM, VT, Expand);
				116	setOperationAction(ISD::SDIVREM, VT, Custom);
				117	setOperationAction(ISD::UDIVREM, VT, Custom);
				118
				119	// Expand ATOMIC_LOAD and ATOMIC_STORE using ATOMIC_CMP_SWAP.
				120	// FIXME: probably much too conservative.
				121	setOperationAction(ISD::ATOMIC_LOAD, VT, Expand);
				122	setOperationAction(ISD::ATOMIC_STORE, VT, Expand);
				123
				124	// No special instructions for these.
				125	setOperationAction(ISD::CTPOP, VT, Expand);
				126	setOperationAction(ISD::CTTZ, VT, Expand);
				127	setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
				128	setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
				129	setOperationAction(ISD::ROTR, VT, Expand);
				130
				131	// Use *MUL_LOHI where possible and a wider multiplication otherwise.
				132	setOperationAction(ISD::MULHS, VT, Expand);
				133	setOperationAction(ISD::MULHU, VT, Expand);
				134
				135	// We have instructions for signed but not unsigned FP conversion.
				136	setOperationAction(ISD::FP_TO_UINT, VT, Expand);
				137	}
				138	}
				139
				140	// Type legalization will convert 8- and 16-bit atomic operations into
				141	// forms that operate on i32s (but still keeping the original memory VT).
				142	// Lower them into full i32 operations.
				143	setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
				144	setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
				145	setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
				146	setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
				147	setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
				148	setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
				149	setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
				150	setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
				151	setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
				152	setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
				153	setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
				154	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
				155
				156	// We have instructions for signed but not unsigned FP conversion.
				157	// Handle unsigned 32-bit types as signed 64-bit types.
				158	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
				159	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
				160
				161	// We have native support for a 64-bit CTLZ, via FLOGR.
				162	setOperationAction(ISD::CTLZ, MVT::i32, Promote);
				163	setOperationAction(ISD::CTLZ, MVT::i64, Legal);
				164
				165	// Give LowerOperation the chance to replace 64-bit ORs with subregs.
				166	setOperationAction(ISD::OR, MVT::i64, Custom);
				167
				168	// The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR),
				169	// but they aren't really worth using. There is no 64-bit SMUL_LOHI,
				170	// but there is a 64-bit UMUL_LOHI: MLGR.
				171	setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
				172	setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
				173	setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
				174	setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
				175
				176	// FIXME: Can we support these natively?
				177	setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
				178	setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
				179	setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
				180
				181	// We have native instructions for i8, i16 and i32 extensions, but not i1.
				182	setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
				183	setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
				184	setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
				185	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
				186
				187	// Handle the various types of symbolic address.
				188	setOperationAction(ISD::ConstantPool, PtrVT, Custom);
				189	setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
				190	setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
				191	setOperationAction(ISD::BlockAddress, PtrVT, Custom);
				192	setOperationAction(ISD::JumpTable, PtrVT, Custom);
				193
				194	// We need to handle dynamic allocations specially because of the
				195	// 160-byte area at the bottom of the stack.
				196	setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
				197
				198	// Use custom expanders so that we can force the function to use
				199	// a frame pointer.
				200	setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
				201	setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
				202
				203	// Expand these using getExceptionSelectorRegister() and
				204	// getExceptionPointerRegister().
				205	setOperationAction(ISD::EXCEPTIONADDR, PtrVT, Expand);
				206	setOperationAction(ISD::EHSELECTION, PtrVT, Expand);
				207
				208	// Handle floating-point types.
				209	for (unsigned I = MVT::FIRST_FP_VALUETYPE;
				210	I <= MVT::LAST_FP_VALUETYPE;
				211	++I) {
				212	MVT VT = MVT::SimpleValueType(I);
				213	if (isTypeLegal(VT)) {
				214	// We can use FI for FRINT.
				215	setOperationAction(ISD::FRINT, VT, Legal);
				216
				217	// No special instructions for these.
				218	setOperationAction(ISD::FSIN, VT, Expand);
				219	setOperationAction(ISD::FCOS, VT, Expand);
				220	setOperationAction(ISD::FREM, VT, Expand);
				221	}
				222	}
				223
				224	// We have fused multiply-addition for f32 and f64 but not f128.
				225	setOperationAction(ISD::FMA, MVT::f32, Legal);
				226	setOperationAction(ISD::FMA, MVT::f64, Legal);
				227	setOperationAction(ISD::FMA, MVT::f128, Expand);
				228
				229	// Needed so that we don't try to implement f128 constant loads using
				230	// a load-and-extend of a f80 constant (in cases where the constant
				231	// would fit in an f80).
				232	setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
				233
				234	// Floating-point truncation and stores need to be done separately.
				235	setTruncStoreAction(MVT::f64, MVT::f32, Expand);
				236	setTruncStoreAction(MVT::f128, MVT::f32, Expand);
				237	setTruncStoreAction(MVT::f128, MVT::f64, Expand);
				238
				239	// We have 64-bit FPR<->GPR moves, but need special handling for
				240	// 32-bit forms.
				241	setOperationAction(ISD::BITCAST, MVT::i32, Custom);
				242	setOperationAction(ISD::BITCAST, MVT::f32, Custom);
				243
				244	// VASTART and VACOPY need to deal with the SystemZ-specific varargs
				245	// structure, but VAEND is a no-op.
				246	setOperationAction(ISD::VASTART, MVT::Other, Custom);
				247	setOperationAction(ISD::VACOPY, MVT::Other, Custom);
				248	setOperationAction(ISD::VAEND, MVT::Other, Expand);
				249	}
				250
				251	bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
				252	// We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
				253	return Imm.isZero() \|\| Imm.isNegZero();
				254	}
				255
Richard Sandiford	46af5a2	2013-05-30 09:45:42 +0000	[diff] [blame]	256	bool SystemZTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
				257	bool *Fast) const {
				258	// Unaligned accesses should never be slower than the expanded version.
				259	// We check specifically for aligned accesses in the few cases where
				260	// they are required.
				261	if (Fast)
				262	*Fast = true;
				263	return true;
				264	}
				265
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	266	//===----------------------------------------------------------------------===//
				267	// Inline asm support
				268	//===----------------------------------------------------------------------===//
				269
				270	TargetLowering::ConstraintType
				271	SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
				272	if (Constraint.size() == 1) {
				273	switch (Constraint[0]) {
				274	case 'a': // Address register
				275	case 'd': // Data register (equivalent to 'r')
				276	case 'f': // Floating-point register
				277	case 'r': // General-purpose register
				278	return C_RegisterClass;
				279
				280	case 'Q': // Memory with base and unsigned 12-bit displacement
				281	case 'R': // Likewise, plus an index
				282	case 'S': // Memory with base and signed 20-bit displacement
				283	case 'T': // Likewise, plus an index
				284	case 'm': // Equivalent to 'T'.
				285	return C_Memory;
				286
				287	case 'I': // Unsigned 8-bit constant
				288	case 'J': // Unsigned 12-bit constant
				289	case 'K': // Signed 16-bit constant
				290	case 'L': // Signed 20-bit displacement (on all targets we support)
				291	case 'M': // 0x7fffffff
				292	return C_Other;
				293
				294	default:
				295	break;
				296	}
				297	}
				298	return TargetLowering::getConstraintType(Constraint);
				299	}
				300
				301	TargetLowering::ConstraintWeight SystemZTargetLowering::
				302	getSingleConstraintMatchWeight(AsmOperandInfo &info,
				303	const char *constraint) const {
				304	ConstraintWeight weight = CW_Invalid;
				305	Value *CallOperandVal = info.CallOperandVal;
				306	// If we don't have a value, we can't do a match,
				307	// but allow it at the lowest weight.
				308	if (CallOperandVal == NULL)
				309	return CW_Default;
				310	Type *type = CallOperandVal->getType();
				311	// Look at the constraint type.
				312	switch (*constraint) {
				313	default:
				314	weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
				315	break;
				316
				317	case 'a': // Address register
				318	case 'd': // Data register (equivalent to 'r')
				319	case 'r': // General-purpose register
				320	if (CallOperandVal->getType()->isIntegerTy())
				321	weight = CW_Register;
				322	break;
				323
				324	case 'f': // Floating-point register
				325	if (type->isFloatingPointTy())
				326	weight = CW_Register;
				327	break;
				328
				329	case 'I': // Unsigned 8-bit constant
				330	if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
				331	if (isUInt<8>(C->getZExtValue()))
				332	weight = CW_Constant;
				333	break;
				334
				335	case 'J': // Unsigned 12-bit constant
				336	if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
				337	if (isUInt<12>(C->getZExtValue()))
				338	weight = CW_Constant;
				339	break;
				340
				341	case 'K': // Signed 16-bit constant
				342	if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
				343	if (isInt<16>(C->getSExtValue()))
				344	weight = CW_Constant;
				345	break;
				346
				347	case 'L': // Signed 20-bit displacement (on all targets we support)
				348	if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
				349	if (isInt<20>(C->getSExtValue()))
				350	weight = CW_Constant;
				351	break;
				352
				353	case 'M': // 0x7fffffff
				354	if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
				355	if (C->getZExtValue() == 0x7fffffff)
				356	weight = CW_Constant;
				357	break;
				358	}
				359	return weight;
				360	}
				361
				362	std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering::
Chad Rosier	295bd43	2013-06-22 18:37:38 +0000	[diff] [blame]	363	getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const {
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	364	if (Constraint.size() == 1) {
				365	// GCC Constraint Letters
				366	switch (Constraint[0]) {
				367	default: break;
				368	case 'd': // Data register (equivalent to 'r')
				369	case 'r': // General-purpose register
				370	if (VT == MVT::i64)
				371	return std::make_pair(0U, &SystemZ::GR64BitRegClass);
				372	else if (VT == MVT::i128)
				373	return std::make_pair(0U, &SystemZ::GR128BitRegClass);
				374	return std::make_pair(0U, &SystemZ::GR32BitRegClass);
				375
				376	case 'a': // Address register
				377	if (VT == MVT::i64)
				378	return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
				379	else if (VT == MVT::i128)
				380	return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
				381	return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
				382
				383	case 'f': // Floating-point register
				384	if (VT == MVT::f64)
				385	return std::make_pair(0U, &SystemZ::FP64BitRegClass);
				386	else if (VT == MVT::f128)
				387	return std::make_pair(0U, &SystemZ::FP128BitRegClass);
				388	return std::make_pair(0U, &SystemZ::FP32BitRegClass);
				389	}
				390	}
				391	return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
				392	}
				393
				394	void SystemZTargetLowering::
				395	LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
				396	std::vector<SDValue> &Ops,
				397	SelectionDAG &DAG) const {
				398	// Only support length 1 constraints for now.
				399	if (Constraint.length() == 1) {
				400	switch (Constraint[0]) {
				401	case 'I': // Unsigned 8-bit constant
				402	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
				403	if (isUInt<8>(C->getZExtValue()))
				404	Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
				405	Op.getValueType()));
				406	return;
				407
				408	case 'J': // Unsigned 12-bit constant
				409	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
				410	if (isUInt<12>(C->getZExtValue()))
				411	Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
				412	Op.getValueType()));
				413	return;
				414
				415	case 'K': // Signed 16-bit constant
				416	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
				417	if (isInt<16>(C->getSExtValue()))
				418	Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
				419	Op.getValueType()));
				420	return;
				421
				422	case 'L': // Signed 20-bit displacement (on all targets we support)
				423	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
				424	if (isInt<20>(C->getSExtValue()))
				425	Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
				426	Op.getValueType()));
				427	return;
				428
				429	case 'M': // 0x7fffffff
				430	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
				431	if (C->getZExtValue() == 0x7fffffff)
				432	Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
				433	Op.getValueType()));
				434	return;
				435	}
				436	}
				437	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
				438	}
				439
				440	//===----------------------------------------------------------------------===//
				441	// Calling conventions
				442	//===----------------------------------------------------------------------===//
				443
				444	#include "SystemZGenCallingConv.inc"
				445
				446	// Value is a value that has been passed to us in the location described by VA
				447	// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
				448	// any loads onto Chain.
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	449	static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL,
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	450	CCValAssign &VA, SDValue Chain,
				451	SDValue Value) {
				452	// If the argument has been promoted from a smaller type, insert an
				453	// assertion to capture this.
				454	if (VA.getLocInfo() == CCValAssign::SExt)
				455	Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
				456	DAG.getValueType(VA.getValVT()));
				457	else if (VA.getLocInfo() == CCValAssign::ZExt)
				458	Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
				459	DAG.getValueType(VA.getValVT()));
				460
				461	if (VA.isExtInLoc())
				462	Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
				463	else if (VA.getLocInfo() == CCValAssign::Indirect)
				464	Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
				465	MachinePointerInfo(), false, false, false, 0);
				466	else
				467	assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
				468	return Value;
				469	}
				470
				471	// Value is a value of type VA.getValVT() that we need to copy into
				472	// the location described by VA. Return a copy of Value converted to
				473	// VA.getValVT(). The caller is responsible for handling indirect values.
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	474	static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL,
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	475	CCValAssign &VA, SDValue Value) {
				476	switch (VA.getLocInfo()) {
				477	case CCValAssign::SExt:
				478	return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
				479	case CCValAssign::ZExt:
				480	return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
				481	case CCValAssign::AExt:
				482	return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
				483	case CCValAssign::Full:
				484	return Value;
				485	default:
				486	llvm_unreachable("Unhandled getLocInfo()");
				487	}
				488	}
				489
				490	SDValue SystemZTargetLowering::
				491	LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
				492	const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	493	SDLoc DL, SelectionDAG &DAG,
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	494	SmallVectorImpl<SDValue> &InVals) const {
				495	MachineFunction &MF = DAG.getMachineFunction();
				496	MachineFrameInfo *MFI = MF.getFrameInfo();
				497	MachineRegisterInfo &MRI = MF.getRegInfo();
				498	SystemZMachineFunctionInfo *FuncInfo =
				499	MF.getInfo<SystemZMachineFunctionInfo>();
				500	const SystemZFrameLowering *TFL =
				501	static_cast<const SystemZFrameLowering *>(TM.getFrameLowering());
				502
				503	// Assign locations to all of the incoming arguments.
				504	SmallVector<CCValAssign, 16> ArgLocs;
				505	CCState CCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
				506	CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
				507
				508	unsigned NumFixedGPRs = 0;
				509	unsigned NumFixedFPRs = 0;
				510	for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
				511	SDValue ArgValue;
				512	CCValAssign &VA = ArgLocs[I];
				513	EVT LocVT = VA.getLocVT();
				514	if (VA.isRegLoc()) {
				515	// Arguments passed in registers
				516	const TargetRegisterClass *RC;
				517	switch (LocVT.getSimpleVT().SimpleTy) {
				518	default:
				519	// Integers smaller than i64 should be promoted to i64.
				520	llvm_unreachable("Unexpected argument type");
				521	case MVT::i32:
				522	NumFixedGPRs += 1;
				523	RC = &SystemZ::GR32BitRegClass;
				524	break;
				525	case MVT::i64:
				526	NumFixedGPRs += 1;
				527	RC = &SystemZ::GR64BitRegClass;
				528	break;
				529	case MVT::f32:
				530	NumFixedFPRs += 1;
				531	RC = &SystemZ::FP32BitRegClass;
				532	break;
				533	case MVT::f64:
				534	NumFixedFPRs += 1;
				535	RC = &SystemZ::FP64BitRegClass;
				536	break;
				537	}
				538
				539	unsigned VReg = MRI.createVirtualRegister(RC);
				540	MRI.addLiveIn(VA.getLocReg(), VReg);
				541	ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
				542	} else {
				543	assert(VA.isMemLoc() && "Argument not register or memory");
				544
				545	// Create the frame index object for this incoming parameter.
				546	int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
				547	VA.getLocMemOffset(), true);
				548
				549	// Create the SelectionDAG nodes corresponding to a load
				550	// from this parameter. Unpromoted ints and floats are
				551	// passed as right-justified 8-byte values.
				552	EVT PtrVT = getPointerTy();
				553	SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
				554	if (VA.getLocVT() == MVT::i32 \|\| VA.getLocVT() == MVT::f32)
				555	FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4));
				556	ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
				557	MachinePointerInfo::getFixedStack(FI),
				558	false, false, false, 0);
				559	}
				560
				561	// Convert the value of the argument register into the value that's
				562	// being passed.
				563	InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
				564	}
				565
				566	if (IsVarArg) {
				567	// Save the number of non-varargs registers for later use by va_start, etc.
				568	FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
				569	FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
				570
				571	// Likewise the address (in the form of a frame index) of where the
				572	// first stack vararg would be. The 1-byte size here is arbitrary.
				573	int64_t StackSize = CCInfo.getNextStackOffset();
				574	FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true));
				575
				576	// ...and a similar frame index for the caller-allocated save area
				577	// that will be used to store the incoming registers.
				578	int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
				579	unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true);
				580	FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
				581
				582	// Store the FPR varargs in the reserved frame slots. (We store the
				583	// GPRs as part of the prologue.)
				584	if (NumFixedFPRs < SystemZ::NumArgFPRs) {
				585	SDValue MemOps[SystemZ::NumArgFPRs];
				586	for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
				587	unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
				588	int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true);
				589	SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
				590	unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
				591	&SystemZ::FP64BitRegClass);
				592	SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
				593	MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
				594	MachinePointerInfo::getFixedStack(FI),
				595	false, false, 0);
				596
				597	}
				598	// Join the stores, which are independent of one another.
				599	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
				600	&MemOps[NumFixedFPRs],
				601	SystemZ::NumArgFPRs - NumFixedFPRs);
				602	}
				603	}
				604
				605	return Chain;
				606	}
				607
				608	SDValue
				609	SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
				610	SmallVectorImpl<SDValue> &InVals) const {
				611	SelectionDAG &DAG = CLI.DAG;
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	612	SDLoc &DL = CLI.DL;
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	613	SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
				614	SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
				615	SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
				616	SDValue Chain = CLI.Chain;
				617	SDValue Callee = CLI.Callee;
				618	bool &isTailCall = CLI.IsTailCall;
				619	CallingConv::ID CallConv = CLI.CallConv;
				620	bool IsVarArg = CLI.IsVarArg;
				621	MachineFunction &MF = DAG.getMachineFunction();
				622	EVT PtrVT = getPointerTy();
				623
				624	// SystemZ target does not yet support tail call optimization.
				625	isTailCall = false;
				626
				627	// Analyze the operands of the call, assigning locations to each operand.
				628	SmallVector<CCValAssign, 16> ArgLocs;
				629	CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
				630	ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
				631
				632	// Get a count of how many bytes are to be pushed on the stack.
				633	unsigned NumBytes = ArgCCInfo.getNextStackOffset();
				634
				635	// Mark the start of the call.
Andrew Trick	ad6d08a	2013-05-29 22:03:55 +0000	[diff] [blame]	636	Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true),
				637	DL);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	638
				639	// Copy argument values to their designated locations.
				640	SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
				641	SmallVector<SDValue, 8> MemOpChains;
				642	SDValue StackPtr;
				643	for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
				644	CCValAssign &VA = ArgLocs[I];
				645	SDValue ArgValue = OutVals[I];
				646
				647	if (VA.getLocInfo() == CCValAssign::Indirect) {
				648	// Store the argument in a stack slot and pass its address.
				649	SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
				650	int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
				651	MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot,
				652	MachinePointerInfo::getFixedStack(FI),
				653	false, false, 0));
				654	ArgValue = SpillSlot;
				655	} else
				656	ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
				657
				658	if (VA.isRegLoc())
				659	// Queue up the argument copies and emit them at the end.
				660	RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
				661	else {
				662	assert(VA.isMemLoc() && "Argument not register or memory");
				663
				664	// Work out the address of the stack slot. Unpromoted ints and
				665	// floats are passed as right-justified 8-byte values.
				666	if (!StackPtr.getNode())
				667	StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
				668	unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset();
				669	if (VA.getLocVT() == MVT::i32 \|\| VA.getLocVT() == MVT::f32)
				670	Offset += 4;
				671	SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
				672	DAG.getIntPtrConstant(Offset));
				673
				674	// Emit the store.
				675	MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
				676	MachinePointerInfo(),
				677	false, false, 0));
				678	}
				679	}
				680
				681	// Join the stores, which are independent of one another.
				682	if (!MemOpChains.empty())
				683	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
				684	&MemOpChains[0], MemOpChains.size());
				685
				686	// Build a sequence of copy-to-reg nodes, chained and glued together.
				687	SDValue Glue;
				688	for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
				689	Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
				690	RegsToPass[I].second, Glue);
				691	Glue = Chain.getValue(1);
				692	}
				693
				694	// Accept direct calls by converting symbolic call addresses to the
				695	// associated Target* opcodes.
				696	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
				697	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
				698	Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
				699	} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
				700	Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
				701	Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
				702	}
				703
				704	// The first call operand is the chain and the second is the target address.
				705	SmallVector<SDValue, 8> Ops;
				706	Ops.push_back(Chain);
				707	Ops.push_back(Callee);
				708
				709	// Add argument registers to the end of the list so that they are
				710	// known live into the call.
				711	for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
				712	Ops.push_back(DAG.getRegister(RegsToPass[I].first,
				713	RegsToPass[I].second.getValueType()));
				714
				715	// Glue the call to the argument copies, if any.
				716	if (Glue.getNode())
				717	Ops.push_back(Glue);
				718
				719	// Emit the call.
				720	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
				721	Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
				722	Glue = Chain.getValue(1);
				723
				724	// Mark the end of the call, which is glued to the call itself.
				725	Chain = DAG.getCALLSEQ_END(Chain,
				726	DAG.getConstant(NumBytes, PtrVT, true),
				727	DAG.getConstant(0, PtrVT, true),
Andrew Trick	ad6d08a	2013-05-29 22:03:55 +0000	[diff] [blame]	728	Glue, DL);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	729	Glue = Chain.getValue(1);
				730
				731	// Assign locations to each value returned by this call.
				732	SmallVector<CCValAssign, 16> RetLocs;
				733	CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext());
				734	RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
				735
				736	// Copy all of the result registers out of their specified physreg.
				737	for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
				738	CCValAssign &VA = RetLocs[I];
				739
				740	// Copy the value out, gluing the copy to the end of the call sequence.
				741	SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
				742	VA.getLocVT(), Glue);
				743	Chain = RetValue.getValue(1);
				744	Glue = RetValue.getValue(2);
				745
				746	// Convert the value of the return register into the value that's
				747	// being returned.
				748	InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
				749	}
				750
				751	return Chain;
				752	}
				753
				754	SDValue
				755	SystemZTargetLowering::LowerReturn(SDValue Chain,
				756	CallingConv::ID CallConv, bool IsVarArg,
				757	const SmallVectorImpl<ISD::OutputArg> &Outs,
				758	const SmallVectorImpl<SDValue> &OutVals,
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	759	SDLoc DL, SelectionDAG &DAG) const {
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	760	MachineFunction &MF = DAG.getMachineFunction();
				761
				762	// Assign locations to each returned value.
				763	SmallVector<CCValAssign, 16> RetLocs;
				764	CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext());
				765	RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
				766
				767	// Quick exit for void returns
				768	if (RetLocs.empty())
				769	return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
				770
				771	// Copy the result values into the output registers.
				772	SDValue Glue;
				773	SmallVector<SDValue, 4> RetOps;
				774	RetOps.push_back(Chain);
				775	for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
				776	CCValAssign &VA = RetLocs[I];
				777	SDValue RetValue = OutVals[I];
				778
				779	// Make the return register live on exit.
				780	assert(VA.isRegLoc() && "Can only return in registers!");
				781
				782	// Promote the value as required.
				783	RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
				784
				785	// Chain and glue the copies together.
				786	unsigned Reg = VA.getLocReg();
				787	Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
				788	Glue = Chain.getValue(1);
				789	RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
				790	}
				791
				792	// Update chain and glue.
				793	RetOps[0] = Chain;
				794	if (Glue.getNode())
				795	RetOps.push_back(Glue);
				796
				797	return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other,
				798	RetOps.data(), RetOps.size());
				799	}
				800
				801	// CC is a comparison that will be implemented using an integer or
				802	// floating-point comparison. Return the condition code mask for
				803	// a branch on true. In the integer case, CCMASK_CMP_UO is set for
				804	// unsigned comparisons and clear for signed ones. In the floating-point
				805	// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
				806	static unsigned CCMaskForCondCode(ISD::CondCode CC) {
				807	#define CONV(X) \
				808	case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
				809	case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
				810	case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO \| SystemZ::CCMASK_CMP_##X
				811
				812	switch (CC) {
				813	default:
				814	llvm_unreachable("Invalid integer condition!");
				815
				816	CONV(EQ);
				817	CONV(NE);
				818	CONV(GT);
				819	CONV(GE);
				820	CONV(LT);
				821	CONV(LE);
				822
				823	case ISD::SETO: return SystemZ::CCMASK_CMP_O;
				824	case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
				825	}
				826	#undef CONV
				827	}
				828
				829	// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1
				830	// is suitable for CLI(Y), CHHSI or CLHHSI, adjust the operands as necessary.
				831	static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned,
				832	SDValue &CmpOp0, SDValue &CmpOp1,
				833	unsigned &CCMask) {
				834	// For us to make any changes, it must a comparison between a single-use
				835	// load and a constant.
				836	if (!CmpOp0.hasOneUse() \|\|
				837	CmpOp0.getOpcode() != ISD::LOAD \|\|
				838	CmpOp1.getOpcode() != ISD::Constant)
				839	return;
				840
				841	// We must have an 8- or 16-bit load.
				842	LoadSDNode *Load = cast<LoadSDNode>(CmpOp0);
				843	unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
				844	if (NumBits != 8 && NumBits != 16)
				845	return;
				846
				847	// The load must be an extending one and the constant must be within the
				848	// range of the unextended value.
				849	ConstantSDNode *Constant = cast<ConstantSDNode>(CmpOp1);
				850	uint64_t Value = Constant->getZExtValue();
				851	uint64_t Mask = (1 << NumBits) - 1;
				852	if (Load->getExtensionType() == ISD::SEXTLOAD) {
				853	int64_t SignedValue = Constant->getSExtValue();
Aaron Ballman	b4284e6	2013-05-16 16:03:36 +0000	[diff] [blame]	854	if (uint64_t(SignedValue) + (1ULL << (NumBits - 1)) > Mask)
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	855	return;
				856	// Unsigned comparison between two sign-extended values is equivalent
				857	// to unsigned comparison between two zero-extended values.
				858	if (IsUnsigned)
				859	Value &= Mask;
				860	else if (CCMask == SystemZ::CCMASK_CMP_EQ \|\|
				861	CCMask == SystemZ::CCMASK_CMP_NE)
				862	// Any choice of IsUnsigned is OK for equality comparisons.
				863	// We could use either CHHSI or CLHHSI for 16-bit comparisons,
				864	// but since we use CLHHSI for zero extensions, it seems better
				865	// to be consistent and do the same here.
				866	Value &= Mask, IsUnsigned = true;
				867	else if (NumBits == 8) {
				868	// Try to treat the comparison as unsigned, so that we can use CLI.
				869	// Adjust CCMask and Value as necessary.
				870	if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_LT)
				871	// Test whether the high bit of the byte is set.
				872	Value = 127, CCMask = SystemZ::CCMASK_CMP_GT, IsUnsigned = true;
				873	else if (SignedValue == -1 && CCMask == SystemZ::CCMASK_CMP_GT)
				874	// Test whether the high bit of the byte is clear.
				875	Value = 128, CCMask = SystemZ::CCMASK_CMP_LT, IsUnsigned = true;
				876	else
				877	// No instruction exists for this combination.
				878	return;
				879	}
				880	} else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
				881	if (Value > Mask)
				882	return;
				883	// Signed comparison between two zero-extended values is equivalent
				884	// to unsigned comparison.
				885	IsUnsigned = true;
				886	} else
				887	return;
				888
				889	// Make sure that the first operand is an i32 of the right extension type.
				890	ISD::LoadExtType ExtType = IsUnsigned ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
				891	if (CmpOp0.getValueType() != MVT::i32 \|\|
				892	Load->getExtensionType() != ExtType)
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	893	CmpOp0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32,
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	894	Load->getChain(), Load->getBasePtr(),
				895	Load->getPointerInfo(), Load->getMemoryVT(),
				896	Load->isVolatile(), Load->isNonTemporal(),
				897	Load->getAlignment());
				898
				899	// Make sure that the second operand is an i32 with the right value.
				900	if (CmpOp1.getValueType() != MVT::i32 \|\|
				901	Value != Constant->getZExtValue())
				902	CmpOp1 = DAG.getConstant(Value, MVT::i32);
				903	}
				904
				905	// Return true if a comparison described by CCMask, CmpOp0 and CmpOp1
				906	// is an equality comparison that is better implemented using unsigned
				907	// rather than signed comparison instructions.
				908	static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0,
				909	SDValue CmpOp1, unsigned CCMask) {
				910	// The test must be for equality or inequality.
				911	if (CCMask != SystemZ::CCMASK_CMP_EQ && CCMask != SystemZ::CCMASK_CMP_NE)
				912	return false;
				913
				914	if (CmpOp1.getOpcode() == ISD::Constant) {
				915	uint64_t Value = cast<ConstantSDNode>(CmpOp1)->getSExtValue();
				916
				917	// If we're comparing with memory, prefer unsigned comparisons for
				918	// values that are in the unsigned 16-bit range but not the signed
				919	// 16-bit range. We want to use CLFHSI and CLGHSI.
				920	if (CmpOp0.hasOneUse() &&
				921	ISD::isNormalLoad(CmpOp0.getNode()) &&
				922	(Value >= 32768 && Value < 65536))
				923	return true;
				924
				925	// Use unsigned comparisons for values that are in the CLGFI range
				926	// but not in the CGFI range.
				927	if (CmpOp0.getValueType() == MVT::i64 && (Value >> 31) == 1)
				928	return true;
				929
				930	return false;
				931	}
				932
				933	// Prefer CL for zero-extended loads.
				934	if (CmpOp1.getOpcode() == ISD::ZERO_EXTEND \|\|
				935	ISD::isZEXTLoad(CmpOp1.getNode()))
				936	return true;
				937
				938	// ...and for "in-register" zero extensions.
				939	if (CmpOp1.getOpcode() == ISD::AND && CmpOp1.getValueType() == MVT::i64) {
				940	SDValue Mask = CmpOp1.getOperand(1);
				941	if (Mask.getOpcode() == ISD::Constant &&
				942	cast<ConstantSDNode>(Mask)->getZExtValue() == 0xffffffff)
				943	return true;
				944	}
				945
				946	return false;
				947	}
				948
				949	// Return a target node that compares CmpOp0 and CmpOp1. Set CCMask to the
				950	// 4-bit condition-code mask for CC.
				951	static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
				952	ISD::CondCode CC, unsigned &CCMask) {
				953	bool IsUnsigned = false;
				954	CCMask = CCMaskForCondCode(CC);
				955	if (!CmpOp0.getValueType().isFloatingPoint()) {
				956	IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO;
				957	CCMask &= ~SystemZ::CCMASK_CMP_UO;
				958	adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask);
				959	if (preferUnsignedComparison(DAG, CmpOp0, CmpOp1, CCMask))
				960	IsUnsigned = true;
				961	}
				962
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	963	SDLoc DL(CmpOp0);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	964	return DAG.getNode((IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
				965	DL, MVT::Glue, CmpOp0, CmpOp1);
				966	}
				967
				968	// Lower a binary operation that produces two VT results, one in each
				969	// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
				970	// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
				971	// on the extended Op0 and (unextended) Op1. Store the even register result
				972	// in Even and the odd register result in Odd.
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	973	static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT,
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	974	unsigned Extend, unsigned Opcode,
				975	SDValue Op0, SDValue Op1,
				976	SDValue &Even, SDValue &Odd) {
				977	SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
				978	SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
				979	SDValue(In128, 0), Op1);
				980	bool Is32Bit = is32Bit(VT);
				981	SDValue SubReg0 = DAG.getTargetConstant(SystemZ::even128(Is32Bit), VT);
				982	SDValue SubReg1 = DAG.getTargetConstant(SystemZ::odd128(Is32Bit), VT);
				983	SDNode *Reg0 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
				984	VT, Result, SubReg0);
				985	SDNode *Reg1 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
				986	VT, Result, SubReg1);
				987	Even = SDValue(Reg0, 0);
				988	Odd = SDValue(Reg1, 0);
				989	}
				990
				991	SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
				992	SDValue Chain = Op.getOperand(0);
				993	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
				994	SDValue CmpOp0 = Op.getOperand(2);
				995	SDValue CmpOp1 = Op.getOperand(3);
				996	SDValue Dest = Op.getOperand(4);
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	997	SDLoc DL(Op);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	998
				999	unsigned CCMask;
				1000	SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask);
				1001	return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
				1002	Chain, DAG.getConstant(CCMask, MVT::i32), Dest, Flags);
				1003	}
				1004
				1005	SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
				1006	SelectionDAG &DAG) const {
				1007	SDValue CmpOp0 = Op.getOperand(0);
				1008	SDValue CmpOp1 = Op.getOperand(1);
				1009	SDValue TrueOp = Op.getOperand(2);
				1010	SDValue FalseOp = Op.getOperand(3);
				1011	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1012	SDLoc DL(Op);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1013
				1014	unsigned CCMask;
				1015	SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask);
				1016
				1017	SmallVector<SDValue, 4> Ops;
				1018	Ops.push_back(TrueOp);
				1019	Ops.push_back(FalseOp);
				1020	Ops.push_back(DAG.getConstant(CCMask, MVT::i32));
				1021	Ops.push_back(Flags);
				1022
				1023	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
				1024	return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size());
				1025	}
				1026
				1027	SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
				1028	SelectionDAG &DAG) const {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1029	SDLoc DL(Node);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1030	const GlobalValue *GV = Node->getGlobal();
				1031	int64_t Offset = Node->getOffset();
				1032	EVT PtrVT = getPointerTy();
				1033	Reloc::Model RM = TM.getRelocationModel();
				1034	CodeModel::Model CM = TM.getCodeModel();
				1035
				1036	SDValue Result;
				1037	if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
				1038	// Make sure that the offset is aligned to a halfword. If it isn't,
				1039	// create an "anchor" at the previous 12-bit boundary.
				1040	// FIXME check whether there is a better way of handling this.
				1041	if (Offset & 1) {
				1042	Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
				1043	Offset & ~uint64_t(0xfff));
				1044	Offset &= 0xfff;
				1045	} else {
				1046	Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Offset);
				1047	Offset = 0;
				1048	}
				1049	Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
				1050	} else {
				1051	Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
				1052	Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
				1053	Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
				1054	MachinePointerInfo::getGOT(), false, false, false, 0);
				1055	}
				1056
				1057	// If there was a non-zero offset that we didn't fold, create an explicit
				1058	// addition for it.
				1059	if (Offset != 0)
				1060	Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
				1061	DAG.getConstant(Offset, PtrVT));
				1062
				1063	return Result;
				1064	}
				1065
				1066	SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
				1067	SelectionDAG &DAG) const {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1068	SDLoc DL(Node);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1069	const GlobalValue *GV = Node->getGlobal();
				1070	EVT PtrVT = getPointerTy();
				1071	TLSModel::Model model = TM.getTLSModel(GV);
				1072
				1073	if (model != TLSModel::LocalExec)
				1074	llvm_unreachable("only local-exec TLS mode supported");
				1075
				1076	// The high part of the thread pointer is in access register 0.
				1077	SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
				1078	DAG.getConstant(0, MVT::i32));
				1079	TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
				1080
				1081	// The low part of the thread pointer is in access register 1.
				1082	SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
				1083	DAG.getConstant(1, MVT::i32));
				1084	TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
				1085
				1086	// Merge them into a single 64-bit address.
				1087	SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
				1088	DAG.getConstant(32, PtrVT));
				1089	SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
				1090
				1091	// Get the offset of GA from the thread pointer.
				1092	SystemZConstantPoolValue *CPV =
				1093	SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
				1094
				1095	// Force the offset into the constant pool and load it from there.
				1096	SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8);
				1097	SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
				1098	CPAddr, MachinePointerInfo::getConstantPool(),
				1099	false, false, false, 0);
				1100
				1101	// Add the base and offset together.
				1102	return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
				1103	}
				1104
				1105	SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
				1106	SelectionDAG &DAG) const {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1107	SDLoc DL(Node);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1108	const BlockAddress *BA = Node->getBlockAddress();
				1109	int64_t Offset = Node->getOffset();
				1110	EVT PtrVT = getPointerTy();
				1111
				1112	SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
				1113	Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
				1114	return Result;
				1115	}
				1116
				1117	SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
				1118	SelectionDAG &DAG) const {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1119	SDLoc DL(JT);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1120	EVT PtrVT = getPointerTy();
				1121	SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
				1122
				1123	// Use LARL to load the address of the table.
				1124	return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
				1125	}
				1126
				1127	SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
				1128	SelectionDAG &DAG) const {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1129	SDLoc DL(CP);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1130	EVT PtrVT = getPointerTy();
				1131
				1132	SDValue Result;
				1133	if (CP->isMachineConstantPoolEntry())
				1134	Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
				1135	CP->getAlignment());
				1136	else
				1137	Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
				1138	CP->getAlignment(), CP->getOffset());
				1139
				1140	// Use LARL to load the address of the constant pool entry.
				1141	return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
				1142	}
				1143
				1144	SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
				1145	SelectionDAG &DAG) const {
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1146	SDLoc DL(Op);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1147	SDValue In = Op.getOperand(0);
				1148	EVT InVT = In.getValueType();
				1149	EVT ResVT = Op.getValueType();
				1150
				1151	SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
				1152	SDValue Shift32 = DAG.getConstant(32, MVT::i64);
				1153	if (InVT == MVT::i32 && ResVT == MVT::f32) {
				1154	SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
				1155	SDValue Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, Shift32);
				1156	SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shift);
				1157	SDNode *Out = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
				1158	MVT::f32, Out64, SubReg32);
				1159	return SDValue(Out, 0);
				1160	}
				1161	if (InVT == MVT::f32 && ResVT == MVT::i32) {
				1162	SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
				1163	SDNode *In64 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
				1164	MVT::f64, SDValue(U64, 0), In, SubReg32);
				1165	SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, SDValue(In64, 0));
				1166	SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, Shift32);
				1167	SDValue Out = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
				1168	return Out;
				1169	}
				1170	llvm_unreachable("Unexpected bitcast combination");
				1171	}
				1172
				1173	SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
				1174	SelectionDAG &DAG) const {
				1175	MachineFunction &MF = DAG.getMachineFunction();
				1176	SystemZMachineFunctionInfo *FuncInfo =
				1177	MF.getInfo<SystemZMachineFunctionInfo>();
				1178	EVT PtrVT = getPointerTy();
				1179
				1180	SDValue Chain = Op.getOperand(0);
				1181	SDValue Addr = Op.getOperand(1);
				1182	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1183	SDLoc DL(Op);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1184
				1185	// The initial values of each field.
				1186	const unsigned NumFields = 4;
				1187	SDValue Fields[NumFields] = {
				1188	DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT),
				1189	DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT),
				1190	DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
				1191	DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
				1192	};
				1193
				1194	// Store each field into its respective slot.
				1195	SDValue MemOps[NumFields];
				1196	unsigned Offset = 0;
				1197	for (unsigned I = 0; I < NumFields; ++I) {
				1198	SDValue FieldAddr = Addr;
				1199	if (Offset != 0)
				1200	FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
				1201	DAG.getIntPtrConstant(Offset));
				1202	MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
				1203	MachinePointerInfo(SV, Offset),
				1204	false, false, 0);
				1205	Offset += 8;
				1206	}
				1207	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps, NumFields);
				1208	}
				1209
				1210	SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
				1211	SelectionDAG &DAG) const {
				1212	SDValue Chain = Op.getOperand(0);
				1213	SDValue DstPtr = Op.getOperand(1);
				1214	SDValue SrcPtr = Op.getOperand(2);
				1215	const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
				1216	const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1217	SDLoc DL(Op);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1218
				1219	return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32),
				1220	/Align/8, /isVolatile/false, /AlwaysInline/false,
				1221	MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
				1222	}
				1223
				1224	SDValue SystemZTargetLowering::
				1225	lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
				1226	SDValue Chain = Op.getOperand(0);
				1227	SDValue Size = Op.getOperand(1);
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1228	SDLoc DL(Op);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1229
				1230	unsigned SPReg = getStackPointerRegisterToSaveRestore();
				1231
				1232	// Get a reference to the stack pointer.
				1233	SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
				1234
				1235	// Get the new stack pointer value.
				1236	SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size);
				1237
				1238	// Copy the new stack pointer back.
				1239	Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
				1240
				1241	// The allocated data lives above the 160 bytes allocated for the standard
				1242	// frame, plus any outgoing stack arguments. We don't know how much that
				1243	// amounts to yet, so emit a special ADJDYNALLOC placeholder.
				1244	SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
				1245	SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
				1246
				1247	SDValue Ops[2] = { Result, Chain };
				1248	return DAG.getMergeValues(Ops, 2, DL);
				1249	}
				1250
				1251	SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
				1252	SelectionDAG &DAG) const {
				1253	EVT VT = Op.getValueType();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1254	SDLoc DL(Op);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1255	assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI");
				1256
				1257	// UMUL_LOHI64 returns the low result in the odd register and the high
				1258	// result in the even register. UMUL_LOHI is defined to return the
				1259	// low half first, so the results are in reverse order.
				1260	SDValue Ops[2];
				1261	lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
				1262	Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
				1263	return DAG.getMergeValues(Ops, 2, DL);
				1264	}
				1265
				1266	SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
				1267	SelectionDAG &DAG) const {
				1268	SDValue Op0 = Op.getOperand(0);
				1269	SDValue Op1 = Op.getOperand(1);
				1270	EVT VT = Op.getValueType();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1271	SDLoc DL(Op);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1272
				1273	// We use DSGF for 32-bit division.
				1274	if (is32Bit(VT)) {
				1275	Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
				1276	Op1 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op1);
				1277	}
				1278
				1279	// DSG(F) takes a 64-bit dividend, so the even register in the GR128
				1280	// input is "don't care". The instruction returns the remainder in
				1281	// the even register and the quotient in the odd register.
				1282	SDValue Ops[2];
				1283	lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::SDIVREM64,
				1284	Op0, Op1, Ops[1], Ops[0]);
				1285	return DAG.getMergeValues(Ops, 2, DL);
				1286	}
				1287
				1288	SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
				1289	SelectionDAG &DAG) const {
				1290	EVT VT = Op.getValueType();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1291	SDLoc DL(Op);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1292
				1293	// DL(G) uses a double-width dividend, so we need to clear the even
				1294	// register in the GR128 input. The instruction returns the remainder
				1295	// in the even register and the quotient in the odd register.
				1296	SDValue Ops[2];
				1297	if (is32Bit(VT))
				1298	lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32,
				1299	Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
				1300	else
				1301	lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
				1302	Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
				1303	return DAG.getMergeValues(Ops, 2, DL);
				1304	}
				1305
				1306	SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
				1307	assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
				1308
				1309	// Get the known-zero masks for each operand.
				1310	SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
				1311	APInt KnownZero[2], KnownOne[2];
				1312	DAG.ComputeMaskedBits(Ops[0], KnownZero[0], KnownOne[0]);
				1313	DAG.ComputeMaskedBits(Ops[1], KnownZero[1], KnownOne[1]);
				1314
				1315	// See if the upper 32 bits of one operand and the lower 32 bits of the
				1316	// other are known zero. They are the low and high operands respectively.
				1317	uint64_t Masks[] = { KnownZero[0].getZExtValue(),
				1318	KnownZero[1].getZExtValue() };
				1319	unsigned High, Low;
				1320	if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
				1321	High = 1, Low = 0;
				1322	else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
				1323	High = 0, Low = 1;
				1324	else
				1325	return Op;
				1326
				1327	SDValue LowOp = Ops[Low];
				1328	SDValue HighOp = Ops[High];
				1329
				1330	// If the high part is a constant, we're better off using IILH.
				1331	if (HighOp.getOpcode() == ISD::Constant)
				1332	return Op;
				1333
				1334	// If the low part is a constant that is outside the range of LHI,
				1335	// then we're better off using IILF.
				1336	if (LowOp.getOpcode() == ISD::Constant) {
				1337	int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
				1338	if (!isInt<16>(Value))
				1339	return Op;
				1340	}
				1341
				1342	// Check whether the high part is an AND that doesn't change the
				1343	// high 32 bits and just masks out low bits. We can skip it if so.
				1344	if (HighOp.getOpcode() == ISD::AND &&
				1345	HighOp.getOperand(1).getOpcode() == ISD::Constant) {
				1346	ConstantSDNode *MaskNode = cast<ConstantSDNode>(HighOp.getOperand(1));
				1347	uint64_t Mask = MaskNode->getZExtValue() \| Masks[High];
				1348	if ((Mask >> 32) == 0xffffffff)
				1349	HighOp = HighOp.getOperand(0);
				1350	}
				1351
				1352	// Take advantage of the fact that all GR32 operations only change the
				1353	// low 32 bits by truncating Low to an i32 and inserting it directly
				1354	// using a subreg. The interesting cases are those where the truncation
				1355	// can be folded.
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1356	SDLoc DL(Op);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1357	SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
				1358	SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
				1359	SDNode *Result = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
				1360	MVT::i64, HighOp, Low32, SubReg32);
				1361	return SDValue(Result, 0);
				1362	}
				1363
				1364	// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
				1365	// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
				1366	SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
				1367	SelectionDAG &DAG,
				1368	unsigned Opcode) const {
				1369	AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode());
				1370
				1371	// 32-bit operations need no code outside the main loop.
				1372	EVT NarrowVT = Node->getMemoryVT();
				1373	EVT WideVT = MVT::i32;
				1374	if (NarrowVT == WideVT)
				1375	return Op;
				1376
				1377	int64_t BitSize = NarrowVT.getSizeInBits();
				1378	SDValue ChainIn = Node->getChain();
				1379	SDValue Addr = Node->getBasePtr();
				1380	SDValue Src2 = Node->getVal();
				1381	MachineMemOperand *MMO = Node->getMemOperand();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1382	SDLoc DL(Node);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1383	EVT PtrVT = Addr.getValueType();
				1384
				1385	// Convert atomic subtracts of constants into additions.
				1386	if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
				1387	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Src2)) {
				1388	Opcode = SystemZISD::ATOMIC_LOADW_ADD;
				1389	Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType());
				1390	}
				1391
				1392	// Get the address of the containing word.
				1393	SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
				1394	DAG.getConstant(-4, PtrVT));
				1395
				1396	// Get the number of bits that the word must be rotated left in order
				1397	// to bring the field to the top bits of a GR32.
				1398	SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
				1399	DAG.getConstant(3, PtrVT));
				1400	BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
				1401
				1402	// Get the complementing shift amount, for rotating a field in the top
				1403	// bits back to its proper position.
				1404	SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
				1405	DAG.getConstant(0, WideVT), BitShift);
				1406
				1407	// Extend the source operand to 32 bits and prepare it for the inner loop.
				1408	// ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
				1409	// operations require the source to be shifted in advance. (This shift
				1410	// can be folded if the source is constant.) For AND and NAND, the lower
				1411	// bits must be set, while for other opcodes they should be left clear.
				1412	if (Opcode != SystemZISD::ATOMIC_SWAPW)
				1413	Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
				1414	DAG.getConstant(32 - BitSize, WideVT));
				1415	if (Opcode == SystemZISD::ATOMIC_LOADW_AND \|\|
				1416	Opcode == SystemZISD::ATOMIC_LOADW_NAND)
				1417	Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
				1418	DAG.getConstant(uint32_t(-1) >> BitSize, WideVT));
				1419
				1420	// Construct the ATOMIC_LOADW_* node.
				1421	SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
				1422	SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
				1423	DAG.getConstant(BitSize, WideVT) };
				1424	SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
				1425	array_lengthof(Ops),
				1426	NarrowVT, MMO);
				1427
				1428	// Rotate the result of the final CS so that the field is in the lower
				1429	// bits of a GR32, then truncate it.
				1430	SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
				1431	DAG.getConstant(BitSize, WideVT));
				1432	SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
				1433
				1434	SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
				1435	return DAG.getMergeValues(RetOps, 2, DL);
				1436	}
				1437
				1438	// Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two
				1439	// into a fullword ATOMIC_CMP_SWAPW operation.
				1440	SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
				1441	SelectionDAG &DAG) const {
				1442	AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode());
				1443
				1444	// We have native support for 32-bit compare and swap.
				1445	EVT NarrowVT = Node->getMemoryVT();
				1446	EVT WideVT = MVT::i32;
				1447	if (NarrowVT == WideVT)
				1448	return Op;
				1449
				1450	int64_t BitSize = NarrowVT.getSizeInBits();
				1451	SDValue ChainIn = Node->getOperand(0);
				1452	SDValue Addr = Node->getOperand(1);
				1453	SDValue CmpVal = Node->getOperand(2);
				1454	SDValue SwapVal = Node->getOperand(3);
				1455	MachineMemOperand *MMO = Node->getMemOperand();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1456	SDLoc DL(Node);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1457	EVT PtrVT = Addr.getValueType();
				1458
				1459	// Get the address of the containing word.
				1460	SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
				1461	DAG.getConstant(-4, PtrVT));
				1462
				1463	// Get the number of bits that the word must be rotated left in order
				1464	// to bring the field to the top bits of a GR32.
				1465	SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
				1466	DAG.getConstant(3, PtrVT));
				1467	BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
				1468
				1469	// Get the complementing shift amount, for rotating a field in the top
				1470	// bits back to its proper position.
				1471	SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
				1472	DAG.getConstant(0, WideVT), BitShift);
				1473
				1474	// Construct the ATOMIC_CMP_SWAPW node.
				1475	SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
				1476	SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
				1477	NegBitShift, DAG.getConstant(BitSize, WideVT) };
				1478	SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
				1479	VTList, Ops, array_lengthof(Ops),
				1480	NarrowVT, MMO);
				1481	return AtomicOp;
				1482	}
				1483
				1484	SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
				1485	SelectionDAG &DAG) const {
				1486	MachineFunction &MF = DAG.getMachineFunction();
				1487	MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1488	return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1489	SystemZ::R15D, Op.getValueType());
				1490	}
				1491
				1492	SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
				1493	SelectionDAG &DAG) const {
				1494	MachineFunction &MF = DAG.getMachineFunction();
				1495	MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	1496	return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op),
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1497	SystemZ::R15D, Op.getOperand(1));
				1498	}
				1499
				1500	SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
				1501	SelectionDAG &DAG) const {
				1502	switch (Op.getOpcode()) {
				1503	case ISD::BR_CC:
				1504	return lowerBR_CC(Op, DAG);
				1505	case ISD::SELECT_CC:
				1506	return lowerSELECT_CC(Op, DAG);
				1507	case ISD::GlobalAddress:
				1508	return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
				1509	case ISD::GlobalTLSAddress:
				1510	return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
				1511	case ISD::BlockAddress:
				1512	return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
				1513	case ISD::JumpTable:
				1514	return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
				1515	case ISD::ConstantPool:
				1516	return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
				1517	case ISD::BITCAST:
				1518	return lowerBITCAST(Op, DAG);
				1519	case ISD::VASTART:
				1520	return lowerVASTART(Op, DAG);
				1521	case ISD::VACOPY:
				1522	return lowerVACOPY(Op, DAG);
				1523	case ISD::DYNAMIC_STACKALLOC:
				1524	return lowerDYNAMIC_STACKALLOC(Op, DAG);
				1525	case ISD::UMUL_LOHI:
				1526	return lowerUMUL_LOHI(Op, DAG);
				1527	case ISD::SDIVREM:
				1528	return lowerSDIVREM(Op, DAG);
				1529	case ISD::UDIVREM:
				1530	return lowerUDIVREM(Op, DAG);
				1531	case ISD::OR:
				1532	return lowerOR(Op, DAG);
				1533	case ISD::ATOMIC_SWAP:
				1534	return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_SWAPW);
				1535	case ISD::ATOMIC_LOAD_ADD:
				1536	return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
				1537	case ISD::ATOMIC_LOAD_SUB:
				1538	return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
				1539	case ISD::ATOMIC_LOAD_AND:
				1540	return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
				1541	case ISD::ATOMIC_LOAD_OR:
				1542	return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
				1543	case ISD::ATOMIC_LOAD_XOR:
				1544	return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
				1545	case ISD::ATOMIC_LOAD_NAND:
				1546	return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
				1547	case ISD::ATOMIC_LOAD_MIN:
				1548	return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
				1549	case ISD::ATOMIC_LOAD_MAX:
				1550	return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
				1551	case ISD::ATOMIC_LOAD_UMIN:
				1552	return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
				1553	case ISD::ATOMIC_LOAD_UMAX:
				1554	return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
				1555	case ISD::ATOMIC_CMP_SWAP:
				1556	return lowerATOMIC_CMP_SWAP(Op, DAG);
				1557	case ISD::STACKSAVE:
				1558	return lowerSTACKSAVE(Op, DAG);
				1559	case ISD::STACKRESTORE:
				1560	return lowerSTACKRESTORE(Op, DAG);
				1561	default:
				1562	llvm_unreachable("Unexpected node to lower");
				1563	}
				1564	}
				1565
				1566	const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
				1567	#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
				1568	switch (Opcode) {
				1569	OPCODE(RET_FLAG);
				1570	OPCODE(CALL);
				1571	OPCODE(PCREL_WRAPPER);
				1572	OPCODE(CMP);
				1573	OPCODE(UCMP);
				1574	OPCODE(BR_CCMASK);
				1575	OPCODE(SELECT_CCMASK);
				1576	OPCODE(ADJDYNALLOC);
				1577	OPCODE(EXTRACT_ACCESS);
				1578	OPCODE(UMUL_LOHI64);
				1579	OPCODE(SDIVREM64);
				1580	OPCODE(UDIVREM32);
				1581	OPCODE(UDIVREM64);
				1582	OPCODE(ATOMIC_SWAPW);
				1583	OPCODE(ATOMIC_LOADW_ADD);
				1584	OPCODE(ATOMIC_LOADW_SUB);
				1585	OPCODE(ATOMIC_LOADW_AND);
				1586	OPCODE(ATOMIC_LOADW_OR);
				1587	OPCODE(ATOMIC_LOADW_XOR);
				1588	OPCODE(ATOMIC_LOADW_NAND);
				1589	OPCODE(ATOMIC_LOADW_MIN);
				1590	OPCODE(ATOMIC_LOADW_MAX);
				1591	OPCODE(ATOMIC_LOADW_UMIN);
				1592	OPCODE(ATOMIC_LOADW_UMAX);
				1593	OPCODE(ATOMIC_CMP_SWAPW);
				1594	}
				1595	return NULL;
				1596	#undef OPCODE
				1597	}
				1598
				1599	//===----------------------------------------------------------------------===//
				1600	// Custom insertion
				1601	//===----------------------------------------------------------------------===//
				1602
				1603	// Create a new basic block after MBB.
				1604	static MachineBasicBlock emitBlockAfter(MachineBasicBlock MBB) {
				1605	MachineFunction &MF = *MBB->getParent();
				1606	MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
				1607	MF.insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB);
				1608	return NewMBB;
				1609	}
				1610
				1611	// Split MBB after MI and return the new block (the one that contains
				1612	// instructions after MI).
				1613	static MachineBasicBlock splitBlockAfter(MachineInstr MI,
				1614	MachineBasicBlock *MBB) {
				1615	MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
				1616	NewMBB->splice(NewMBB->begin(), MBB,
				1617	llvm::next(MachineBasicBlock::iterator(MI)),
				1618	MBB->end());
				1619	NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
				1620	return NewMBB;
				1621	}
				1622
Richard Sandiford	0fb90ab	2013-05-28 10:41:11 +0000	[diff] [blame]	1623	bool SystemZTargetLowering::
				1624	convertPrevCompareToBranch(MachineBasicBlock *MBB,
				1625	MachineBasicBlock::iterator MBBI,
				1626	unsigned CCMask, MachineBasicBlock *Target) const {
				1627	MachineBasicBlock::iterator Compare = MBBI;
				1628	MachineBasicBlock::iterator Begin = MBB->begin();
				1629	do
				1630	{
				1631	if (Compare == Begin)
				1632	return false;
				1633	--Compare;
				1634	}
				1635	while (Compare->isDebugValue());
				1636
				1637	const SystemZInstrInfo *TII = TM.getInstrInfo();
Richard Sandiford	e1d9f00	2013-05-29 11:58:52 +0000	[diff] [blame]	1638	unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(),
				1639	Compare);
Richard Sandiford	0fb90ab	2013-05-28 10:41:11 +0000	[diff] [blame]	1640	if (!FusedOpcode)
				1641	return false;
				1642
				1643	DebugLoc DL = Compare->getDebugLoc();
				1644	BuildMI(*MBB, MBBI, DL, TII->get(FusedOpcode))
				1645	.addOperand(Compare->getOperand(0)).addOperand(Compare->getOperand(1))
				1646	.addImm(CCMask).addMBB(Target);
				1647	Compare->removeFromParent();
				1648	return true;
				1649	}
				1650
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1651	// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
				1652	MachineBasicBlock *
				1653	SystemZTargetLowering::emitSelect(MachineInstr *MI,
				1654	MachineBasicBlock *MBB) const {
				1655	const SystemZInstrInfo *TII = TM.getInstrInfo();
				1656
				1657	unsigned DestReg = MI->getOperand(0).getReg();
				1658	unsigned TrueReg = MI->getOperand(1).getReg();
				1659	unsigned FalseReg = MI->getOperand(2).getReg();
				1660	unsigned CCMask = MI->getOperand(3).getImm();
				1661	DebugLoc DL = MI->getDebugLoc();
				1662
				1663	MachineBasicBlock *StartMBB = MBB;
				1664	MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB);
				1665	MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
				1666
				1667	// StartMBB:
Richard Sandiford	0fb90ab	2013-05-28 10:41:11 +0000	[diff] [blame]	1668	// BRC CCMask, JoinMBB
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1669	// # fallthrough to FalseMBB
Richard Sandiford	0fb90ab	2013-05-28 10:41:11 +0000	[diff] [blame]	1670	//
				1671	// The original DAG glues comparisons to their uses, both to ensure
				1672	// that no CC-clobbering instructions are inserted between them, and
				1673	// to ensure that comparison results are not reused. This means that
				1674	// this Select is the sole user of any preceding comparison instruction
				1675	// and that we can try to use a fused compare and branch instead.
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1676	MBB = StartMBB;
Richard Sandiford	0fb90ab	2013-05-28 10:41:11 +0000	[diff] [blame]	1677	if (!convertPrevCompareToBranch(MBB, MI, CCMask, JoinMBB))
				1678	BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1679	MBB->addSuccessor(JoinMBB);
				1680	MBB->addSuccessor(FalseMBB);
				1681
				1682	// FalseMBB:
				1683	// # fallthrough to JoinMBB
				1684	MBB = FalseMBB;
				1685	MBB->addSuccessor(JoinMBB);
				1686
				1687	// JoinMBB:
				1688	// %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
				1689	// ...
				1690	MBB = JoinMBB;
				1691	BuildMI(*MBB, MBB->begin(), DL, TII->get(SystemZ::PHI), DestReg)
				1692	.addReg(TrueReg).addMBB(StartMBB)
				1693	.addReg(FalseReg).addMBB(FalseMBB);
				1694
				1695	MI->eraseFromParent();
				1696	return JoinMBB;
				1697	}
				1698
Richard Sandiford	b86a834	2013-06-27 09:27:40 +0000	[diff] [blame]	1699	// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
				1700	// StoreOpcode is the store to use and Invert says whether the store should
				1701	// happen when the condition is false rather than true.
				1702	MachineBasicBlock *
				1703	SystemZTargetLowering::emitCondStore(MachineInstr *MI,
				1704	MachineBasicBlock *MBB,
				1705	unsigned StoreOpcode, bool Invert) const {
				1706	const SystemZInstrInfo *TII = TM.getInstrInfo();
				1707
				1708	MachineOperand Base = MI->getOperand(0);
				1709	int64_t Disp = MI->getOperand(1).getImm();
				1710	unsigned IndexReg = MI->getOperand(2).getReg();
				1711	unsigned SrcReg = MI->getOperand(3).getReg();
				1712	unsigned CCMask = MI->getOperand(4).getImm();
				1713	DebugLoc DL = MI->getDebugLoc();
				1714
				1715	StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
				1716
				1717	// Get the condition needed to branch around the store.
				1718	if (!Invert)
				1719	CCMask = CCMask ^ SystemZ::CCMASK_ANY;
				1720
				1721	MachineBasicBlock *StartMBB = MBB;
				1722	MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB);
				1723	MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
				1724
				1725	// StartMBB:
				1726	// BRC CCMask, JoinMBB
				1727	// # fallthrough to FalseMBB
				1728	//
				1729	// The original DAG glues comparisons to their uses, both to ensure
				1730	// that no CC-clobbering instructions are inserted between them, and
				1731	// to ensure that comparison results are not reused. This means that
				1732	// this CondStore is the sole user of any preceding comparison instruction
				1733	// and that we can try to use a fused compare and branch instead.
				1734	MBB = StartMBB;
				1735	if (!convertPrevCompareToBranch(MBB, MI, CCMask, JoinMBB))
				1736	BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(CCMask).addMBB(JoinMBB);
				1737	MBB->addSuccessor(JoinMBB);
				1738	MBB->addSuccessor(FalseMBB);
				1739
				1740	// FalseMBB:
				1741	// store %SrcReg, %Disp(%Index,%Base)
				1742	// # fallthrough to JoinMBB
				1743	MBB = FalseMBB;
				1744	BuildMI(MBB, DL, TII->get(StoreOpcode))
				1745	.addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
				1746	MBB->addSuccessor(JoinMBB);
				1747
				1748	MI->eraseFromParent();
				1749	return JoinMBB;
				1750	}
				1751
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1752	// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
				1753	// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
				1754	// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
				1755	// BitSize is the width of the field in bits, or 0 if this is a partword
				1756	// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
				1757	// is one of the operands. Invert says whether the field should be
				1758	// inverted after performing BinOpcode (e.g. for NAND).
				1759	MachineBasicBlock *
				1760	SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
				1761	MachineBasicBlock *MBB,
				1762	unsigned BinOpcode,
				1763	unsigned BitSize,
				1764	bool Invert) const {
				1765	const SystemZInstrInfo *TII = TM.getInstrInfo();
				1766	MachineFunction &MF = *MBB->getParent();
				1767	MachineRegisterInfo &MRI = MF.getRegInfo();
				1768	unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
				1769	bool IsSubWord = (BitSize < 32);
				1770
				1771	// Extract the operands. Base can be a register or a frame index.
				1772	// Src2 can be a register or immediate.
				1773	unsigned Dest = MI->getOperand(0).getReg();
				1774	MachineOperand Base = earlyUseOperand(MI->getOperand(1));
				1775	int64_t Disp = MI->getOperand(2).getImm();
				1776	MachineOperand Src2 = earlyUseOperand(MI->getOperand(3));
				1777	unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0);
				1778	unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
				1779	DebugLoc DL = MI->getDebugLoc();
				1780	if (IsSubWord)
				1781	BitSize = MI->getOperand(6).getImm();
				1782
				1783	// Subword operations use 32-bit registers.
				1784	const TargetRegisterClass *RC = (BitSize <= 32 ?
				1785	&SystemZ::GR32BitRegClass :
				1786	&SystemZ::GR64BitRegClass);
				1787	unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
				1788	unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
				1789
				1790	// Get the right opcodes for the displacement.
				1791	LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
				1792	CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
				1793	assert(LOpcode && CSOpcode && "Displacement out of range");
				1794
				1795	// Create virtual registers for temporary results.
				1796	unsigned OrigVal = MRI.createVirtualRegister(RC);
				1797	unsigned OldVal = MRI.createVirtualRegister(RC);
				1798	unsigned NewVal = (BinOpcode \|\| IsSubWord ?
				1799	MRI.createVirtualRegister(RC) : Src2.getReg());
				1800	unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
				1801	unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
				1802
				1803	// Insert a basic block for the main loop.
				1804	MachineBasicBlock *StartMBB = MBB;
				1805	MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
				1806	MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
				1807
				1808	// StartMBB:
				1809	// ...
				1810	// %OrigVal = L Disp(%Base)
				1811	// # fall through to LoopMMB
				1812	MBB = StartMBB;
				1813	BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
				1814	.addOperand(Base).addImm(Disp).addReg(0);
				1815	MBB->addSuccessor(LoopMBB);
				1816
				1817	// LoopMBB:
				1818	// %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
				1819	// %RotatedOldVal = RLL %OldVal, 0(%BitShift)
				1820	// %RotatedNewVal = OP %RotatedOldVal, %Src2
				1821	// %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
				1822	// %Dest = CS %OldVal, %NewVal, Disp(%Base)
				1823	// JNE LoopMBB
				1824	// # fall through to DoneMMB
				1825	MBB = LoopMBB;
				1826	BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
				1827	.addReg(OrigVal).addMBB(StartMBB)
				1828	.addReg(Dest).addMBB(LoopMBB);
				1829	if (IsSubWord)
				1830	BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
				1831	.addReg(OldVal).addReg(BitShift).addImm(0);
				1832	if (Invert) {
				1833	// Perform the operation normally and then invert every bit of the field.
				1834	unsigned Tmp = MRI.createVirtualRegister(RC);
				1835	BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
				1836	.addReg(RotatedOldVal).addOperand(Src2);
				1837	if (BitSize < 32)
				1838	// XILF with the upper BitSize bits set.
				1839	BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
				1840	.addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize)));
				1841	else if (BitSize == 32)
				1842	// XILF with every bit set.
				1843	BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
				1844	.addReg(Tmp).addImm(~uint32_t(0));
				1845	else {
				1846	// Use LCGR and add -1 to the result, which is more compact than
				1847	// an XILF, XILH pair.
				1848	unsigned Tmp2 = MRI.createVirtualRegister(RC);
				1849	BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
				1850	BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
				1851	.addReg(Tmp2).addImm(-1);
				1852	}
				1853	} else if (BinOpcode)
				1854	// A simply binary operation.
				1855	BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
				1856	.addReg(RotatedOldVal).addOperand(Src2);
				1857	else if (IsSubWord)
				1858	// Use RISBG to rotate Src2 into position and use it to replace the
				1859	// field in RotatedOldVal.
				1860	BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
				1861	.addReg(RotatedOldVal).addReg(Src2.getReg())
				1862	.addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
				1863	if (IsSubWord)
				1864	BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
				1865	.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
				1866	BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
				1867	.addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
Richard Sandiford	312425f	2013-05-20 14:23:08 +0000	[diff] [blame]	1868	BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1869	MBB->addSuccessor(LoopMBB);
				1870	MBB->addSuccessor(DoneMBB);
				1871
				1872	MI->eraseFromParent();
				1873	return DoneMBB;
				1874	}
				1875
				1876	// Implement EmitInstrWithCustomInserter for pseudo
				1877	// ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
				1878	// instruction that should be used to compare the current field with the
				1879	// minimum or maximum value. KeepOldMask is the BRC condition-code mask
				1880	// for when the current field should be kept. BitSize is the width of
				1881	// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
				1882	MachineBasicBlock *
				1883	SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
				1884	MachineBasicBlock *MBB,
				1885	unsigned CompareOpcode,
				1886	unsigned KeepOldMask,
				1887	unsigned BitSize) const {
				1888	const SystemZInstrInfo *TII = TM.getInstrInfo();
				1889	MachineFunction &MF = *MBB->getParent();
				1890	MachineRegisterInfo &MRI = MF.getRegInfo();
				1891	unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
				1892	bool IsSubWord = (BitSize < 32);
				1893
				1894	// Extract the operands. Base can be a register or a frame index.
				1895	unsigned Dest = MI->getOperand(0).getReg();
				1896	MachineOperand Base = earlyUseOperand(MI->getOperand(1));
				1897	int64_t Disp = MI->getOperand(2).getImm();
				1898	unsigned Src2 = MI->getOperand(3).getReg();
				1899	unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0);
				1900	unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
				1901	DebugLoc DL = MI->getDebugLoc();
				1902	if (IsSubWord)
				1903	BitSize = MI->getOperand(6).getImm();
				1904
				1905	// Subword operations use 32-bit registers.
				1906	const TargetRegisterClass *RC = (BitSize <= 32 ?
				1907	&SystemZ::GR32BitRegClass :
				1908	&SystemZ::GR64BitRegClass);
				1909	unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
				1910	unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
				1911
				1912	// Get the right opcodes for the displacement.
				1913	LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
				1914	CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
				1915	assert(LOpcode && CSOpcode && "Displacement out of range");
				1916
				1917	// Create virtual registers for temporary results.
				1918	unsigned OrigVal = MRI.createVirtualRegister(RC);
				1919	unsigned OldVal = MRI.createVirtualRegister(RC);
				1920	unsigned NewVal = MRI.createVirtualRegister(RC);
				1921	unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
				1922	unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
				1923	unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
				1924
				1925	// Insert 3 basic blocks for the loop.
				1926	MachineBasicBlock *StartMBB = MBB;
				1927	MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
				1928	MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
				1929	MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
				1930	MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
				1931
				1932	// StartMBB:
				1933	// ...
				1934	// %OrigVal = L Disp(%Base)
				1935	// # fall through to LoopMMB
				1936	MBB = StartMBB;
				1937	BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
				1938	.addOperand(Base).addImm(Disp).addReg(0);
				1939	MBB->addSuccessor(LoopMBB);
				1940
				1941	// LoopMBB:
				1942	// %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
				1943	// %RotatedOldVal = RLL %OldVal, 0(%BitShift)
				1944	// CompareOpcode %RotatedOldVal, %Src2
Richard Sandiford	312425f	2013-05-20 14:23:08 +0000	[diff] [blame]	1945	// BRC KeepOldMask, UpdateMBB
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1946	MBB = LoopMBB;
				1947	BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
				1948	.addReg(OrigVal).addMBB(StartMBB)
				1949	.addReg(Dest).addMBB(UpdateMBB);
				1950	if (IsSubWord)
				1951	BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
				1952	.addReg(OldVal).addReg(BitShift).addImm(0);
Richard Sandiford	0fb90ab	2013-05-28 10:41:11 +0000	[diff] [blame]	1953	unsigned FusedOpcode = TII->getCompareAndBranch(CompareOpcode);
				1954	if (FusedOpcode)
				1955	BuildMI(MBB, DL, TII->get(FusedOpcode))
				1956	.addReg(RotatedOldVal).addReg(Src2)
				1957	.addImm(KeepOldMask).addMBB(UpdateMBB);
				1958	else {
				1959	BuildMI(MBB, DL, TII->get(CompareOpcode))
				1960	.addReg(RotatedOldVal).addReg(Src2);
				1961	BuildMI(MBB, DL, TII->get(SystemZ::BRC))
				1962	.addImm(KeepOldMask).addMBB(UpdateMBB);
				1963	}
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1964	MBB->addSuccessor(UpdateMBB);
				1965	MBB->addSuccessor(UseAltMBB);
				1966
				1967	// UseAltMBB:
				1968	// %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
				1969	// # fall through to UpdateMMB
				1970	MBB = UseAltMBB;
				1971	if (IsSubWord)
				1972	BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
				1973	.addReg(RotatedOldVal).addReg(Src2)
				1974	.addImm(32).addImm(31 + BitSize).addImm(0);
				1975	MBB->addSuccessor(UpdateMBB);
				1976
				1977	// UpdateMBB:
				1978	// %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
				1979	// [ %RotatedAltVal, UseAltMBB ]
				1980	// %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
				1981	// %Dest = CS %OldVal, %NewVal, Disp(%Base)
				1982	// JNE LoopMBB
				1983	// # fall through to DoneMMB
				1984	MBB = UpdateMBB;
				1985	BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
				1986	.addReg(RotatedOldVal).addMBB(LoopMBB)
				1987	.addReg(RotatedAltVal).addMBB(UseAltMBB);
				1988	if (IsSubWord)
				1989	BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
				1990	.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
				1991	BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
				1992	.addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
Richard Sandiford	312425f	2013-05-20 14:23:08 +0000	[diff] [blame]	1993	BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	1994	MBB->addSuccessor(LoopMBB);
				1995	MBB->addSuccessor(DoneMBB);
				1996
				1997	MI->eraseFromParent();
				1998	return DoneMBB;
				1999	}
				2000
				2001	// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
				2002	// instruction MI.
				2003	MachineBasicBlock *
				2004	SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
				2005	MachineBasicBlock *MBB) const {
				2006	const SystemZInstrInfo *TII = TM.getInstrInfo();
				2007	MachineFunction &MF = *MBB->getParent();
				2008	MachineRegisterInfo &MRI = MF.getRegInfo();
				2009	unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
				2010
				2011	// Extract the operands. Base can be a register or a frame index.
				2012	unsigned Dest = MI->getOperand(0).getReg();
				2013	MachineOperand Base = earlyUseOperand(MI->getOperand(1));
				2014	int64_t Disp = MI->getOperand(2).getImm();
				2015	unsigned OrigCmpVal = MI->getOperand(3).getReg();
				2016	unsigned OrigSwapVal = MI->getOperand(4).getReg();
				2017	unsigned BitShift = MI->getOperand(5).getReg();
				2018	unsigned NegBitShift = MI->getOperand(6).getReg();
				2019	int64_t BitSize = MI->getOperand(7).getImm();
				2020	DebugLoc DL = MI->getDebugLoc();
				2021
				2022	const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
				2023
				2024	// Get the right opcodes for the displacement.
				2025	unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
				2026	unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
				2027	assert(LOpcode && CSOpcode && "Displacement out of range");
				2028
				2029	// Create virtual registers for temporary results.
				2030	unsigned OrigOldVal = MRI.createVirtualRegister(RC);
				2031	unsigned OldVal = MRI.createVirtualRegister(RC);
				2032	unsigned CmpVal = MRI.createVirtualRegister(RC);
				2033	unsigned SwapVal = MRI.createVirtualRegister(RC);
				2034	unsigned StoreVal = MRI.createVirtualRegister(RC);
				2035	unsigned RetryOldVal = MRI.createVirtualRegister(RC);
				2036	unsigned RetryCmpVal = MRI.createVirtualRegister(RC);
				2037	unsigned RetrySwapVal = MRI.createVirtualRegister(RC);
				2038
				2039	// Insert 2 basic blocks for the loop.
				2040	MachineBasicBlock *StartMBB = MBB;
				2041	MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
				2042	MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
				2043	MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
				2044
				2045	// StartMBB:
				2046	// ...
				2047	// %OrigOldVal = L Disp(%Base)
				2048	// # fall through to LoopMMB
				2049	MBB = StartMBB;
				2050	BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
				2051	.addOperand(Base).addImm(Disp).addReg(0);
				2052	MBB->addSuccessor(LoopMBB);
				2053
				2054	// LoopMBB:
				2055	// %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
				2056	// %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
				2057	// %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
				2058	// %Dest = RLL %OldVal, BitSize(%BitShift)
				2059	// ^^ The low BitSize bits contain the field
				2060	// of interest.
				2061	// %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
				2062	// ^^ Replace the upper 32-BitSize bits of the
				2063	// comparison value with those that we loaded,
				2064	// so that we can use a full word comparison.
Richard Sandiford	0fb90ab	2013-05-28 10:41:11 +0000	[diff] [blame]	2065	// CRJNE %Dest, %RetryCmpVal, DoneMBB
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	2066	// # Fall through to SetMBB
				2067	MBB = LoopMBB;
				2068	BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
				2069	.addReg(OrigOldVal).addMBB(StartMBB)
				2070	.addReg(RetryOldVal).addMBB(SetMBB);
				2071	BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal)
				2072	.addReg(OrigCmpVal).addMBB(StartMBB)
				2073	.addReg(RetryCmpVal).addMBB(SetMBB);
				2074	BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
				2075	.addReg(OrigSwapVal).addMBB(StartMBB)
				2076	.addReg(RetrySwapVal).addMBB(SetMBB);
				2077	BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest)
				2078	.addReg(OldVal).addReg(BitShift).addImm(BitSize);
				2079	BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
				2080	.addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
Richard Sandiford	0fb90ab	2013-05-28 10:41:11 +0000	[diff] [blame]	2081	BuildMI(MBB, DL, TII->get(SystemZ::CRJ))
				2082	.addReg(Dest).addReg(RetryCmpVal)
				2083	.addImm(MaskNE).addMBB(DoneMBB);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	2084	MBB->addSuccessor(DoneMBB);
				2085	MBB->addSuccessor(SetMBB);
				2086
				2087	// SetMBB:
				2088	// %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
				2089	// ^^ Replace the upper 32-BitSize bits of the new
				2090	// value with those that we loaded.
				2091	// %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
				2092	// ^^ Rotate the new field to its proper position.
				2093	// %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
				2094	// JNE LoopMBB
				2095	// # fall through to ExitMMB
				2096	MBB = SetMBB;
				2097	BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
				2098	.addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
				2099	BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
				2100	.addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
				2101	BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
				2102	.addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
Richard Sandiford	312425f	2013-05-20 14:23:08 +0000	[diff] [blame]	2103	BuildMI(MBB, DL, TII->get(SystemZ::BRC)).addImm(MaskNE).addMBB(LoopMBB);
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	2104	MBB->addSuccessor(LoopMBB);
				2105	MBB->addSuccessor(DoneMBB);
				2106
				2107	MI->eraseFromParent();
				2108	return DoneMBB;
				2109	}
				2110
				2111	// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true
				2112	// if the high register of the GR128 value must be cleared or false if
				2113	// it's "don't care". SubReg is subreg_odd32 when extending a GR32
				2114	// and subreg_odd when extending a GR64.
				2115	MachineBasicBlock *
				2116	SystemZTargetLowering::emitExt128(MachineInstr *MI,
				2117	MachineBasicBlock *MBB,
				2118	bool ClearEven, unsigned SubReg) const {
				2119	const SystemZInstrInfo *TII = TM.getInstrInfo();
				2120	MachineFunction &MF = *MBB->getParent();
				2121	MachineRegisterInfo &MRI = MF.getRegInfo();
				2122	DebugLoc DL = MI->getDebugLoc();
				2123
				2124	unsigned Dest = MI->getOperand(0).getReg();
				2125	unsigned Src = MI->getOperand(1).getReg();
				2126	unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
				2127
				2128	BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
				2129	if (ClearEven) {
				2130	unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
				2131	unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
				2132
				2133	BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
				2134	.addImm(0);
				2135	BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
				2136	.addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_high);
				2137	In128 = NewIn128;
				2138	}
				2139	BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
				2140	.addReg(In128).addReg(Src).addImm(SubReg);
				2141
				2142	MI->eraseFromParent();
				2143	return MBB;
				2144	}
				2145
				2146	MachineBasicBlock *SystemZTargetLowering::
				2147	EmitInstrWithCustomInserter(MachineInstr MI, MachineBasicBlock MBB) const {
				2148	switch (MI->getOpcode()) {
				2149	case SystemZ::Select32:
				2150	case SystemZ::SelectF32:
				2151	case SystemZ::Select64:
				2152	case SystemZ::SelectF64:
				2153	case SystemZ::SelectF128:
				2154	return emitSelect(MI, MBB);
				2155
Richard Sandiford	b86a834	2013-06-27 09:27:40 +0000	[diff] [blame]	2156	case SystemZ::CondStore8_32:
				2157	return emitCondStore(MI, MBB, SystemZ::STC32, false);
				2158	case SystemZ::CondStore8_32Inv:
				2159	return emitCondStore(MI, MBB, SystemZ::STC32, true);
				2160	case SystemZ::CondStore16_32:
				2161	return emitCondStore(MI, MBB, SystemZ::STH32, false);
				2162	case SystemZ::CondStore16_32Inv:
				2163	return emitCondStore(MI, MBB, SystemZ::STH32, true);
				2164	case SystemZ::CondStore32_32:
				2165	return emitCondStore(MI, MBB, SystemZ::ST32, false);
				2166	case SystemZ::CondStore32_32Inv:
				2167	return emitCondStore(MI, MBB, SystemZ::ST32, true);
				2168	case SystemZ::CondStore8:
				2169	return emitCondStore(MI, MBB, SystemZ::STC, false);
				2170	case SystemZ::CondStore8Inv:
				2171	return emitCondStore(MI, MBB, SystemZ::STC, true);
				2172	case SystemZ::CondStore16:
				2173	return emitCondStore(MI, MBB, SystemZ::STH, false);
				2174	case SystemZ::CondStore16Inv:
				2175	return emitCondStore(MI, MBB, SystemZ::STH, true);
				2176	case SystemZ::CondStore32:
				2177	return emitCondStore(MI, MBB, SystemZ::ST, false);
				2178	case SystemZ::CondStore32Inv:
				2179	return emitCondStore(MI, MBB, SystemZ::ST, true);
				2180	case SystemZ::CondStore64:
				2181	return emitCondStore(MI, MBB, SystemZ::STG, false);
				2182	case SystemZ::CondStore64Inv:
				2183	return emitCondStore(MI, MBB, SystemZ::STG, true);
				2184	case SystemZ::CondStoreF32:
				2185	return emitCondStore(MI, MBB, SystemZ::STE, false);
				2186	case SystemZ::CondStoreF32Inv:
				2187	return emitCondStore(MI, MBB, SystemZ::STE, true);
				2188	case SystemZ::CondStoreF64:
				2189	return emitCondStore(MI, MBB, SystemZ::STD, false);
				2190	case SystemZ::CondStoreF64Inv:
				2191	return emitCondStore(MI, MBB, SystemZ::STD, true);
				2192
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	2193	case SystemZ::AEXT128_64:
				2194	return emitExt128(MI, MBB, false, SystemZ::subreg_low);
				2195	case SystemZ::ZEXT128_32:
				2196	return emitExt128(MI, MBB, true, SystemZ::subreg_low32);
				2197	case SystemZ::ZEXT128_64:
				2198	return emitExt128(MI, MBB, true, SystemZ::subreg_low);
				2199
				2200	case SystemZ::ATOMIC_SWAPW:
				2201	return emitAtomicLoadBinary(MI, MBB, 0, 0);
				2202	case SystemZ::ATOMIC_SWAP_32:
				2203	return emitAtomicLoadBinary(MI, MBB, 0, 32);
				2204	case SystemZ::ATOMIC_SWAP_64:
				2205	return emitAtomicLoadBinary(MI, MBB, 0, 64);
				2206
				2207	case SystemZ::ATOMIC_LOADW_AR:
				2208	return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
				2209	case SystemZ::ATOMIC_LOADW_AFI:
				2210	return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
				2211	case SystemZ::ATOMIC_LOAD_AR:
				2212	return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
				2213	case SystemZ::ATOMIC_LOAD_AHI:
				2214	return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
				2215	case SystemZ::ATOMIC_LOAD_AFI:
				2216	return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
				2217	case SystemZ::ATOMIC_LOAD_AGR:
				2218	return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
				2219	case SystemZ::ATOMIC_LOAD_AGHI:
				2220	return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
				2221	case SystemZ::ATOMIC_LOAD_AGFI:
				2222	return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
				2223
				2224	case SystemZ::ATOMIC_LOADW_SR:
				2225	return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
				2226	case SystemZ::ATOMIC_LOAD_SR:
				2227	return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
				2228	case SystemZ::ATOMIC_LOAD_SGR:
				2229	return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
				2230
				2231	case SystemZ::ATOMIC_LOADW_NR:
				2232	return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
				2233	case SystemZ::ATOMIC_LOADW_NILH:
				2234	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0);
				2235	case SystemZ::ATOMIC_LOAD_NR:
				2236	return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
				2237	case SystemZ::ATOMIC_LOAD_NILL32:
				2238	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32);
				2239	case SystemZ::ATOMIC_LOAD_NILH32:
				2240	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32);
				2241	case SystemZ::ATOMIC_LOAD_NILF32:
				2242	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32);
				2243	case SystemZ::ATOMIC_LOAD_NGR:
				2244	return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
				2245	case SystemZ::ATOMIC_LOAD_NILL:
				2246	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64);
				2247	case SystemZ::ATOMIC_LOAD_NILH:
				2248	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64);
				2249	case SystemZ::ATOMIC_LOAD_NIHL:
				2250	return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64);
				2251	case SystemZ::ATOMIC_LOAD_NIHH:
				2252	return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64);
				2253	case SystemZ::ATOMIC_LOAD_NILF:
				2254	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64);
				2255	case SystemZ::ATOMIC_LOAD_NIHF:
				2256	return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64);
				2257
				2258	case SystemZ::ATOMIC_LOADW_OR:
				2259	return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
				2260	case SystemZ::ATOMIC_LOADW_OILH:
				2261	return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 0);
				2262	case SystemZ::ATOMIC_LOAD_OR:
				2263	return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
				2264	case SystemZ::ATOMIC_LOAD_OILL32:
				2265	return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL32, 32);
				2266	case SystemZ::ATOMIC_LOAD_OILH32:
				2267	return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 32);
				2268	case SystemZ::ATOMIC_LOAD_OILF32:
				2269	return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF32, 32);
				2270	case SystemZ::ATOMIC_LOAD_OGR:
				2271	return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
				2272	case SystemZ::ATOMIC_LOAD_OILL:
				2273	return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 64);
				2274	case SystemZ::ATOMIC_LOAD_OILH:
				2275	return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 64);
				2276	case SystemZ::ATOMIC_LOAD_OIHL:
				2277	return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL, 64);
				2278	case SystemZ::ATOMIC_LOAD_OIHH:
				2279	return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH, 64);
				2280	case SystemZ::ATOMIC_LOAD_OILF:
				2281	return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 64);
				2282	case SystemZ::ATOMIC_LOAD_OIHF:
				2283	return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF, 64);
				2284
				2285	case SystemZ::ATOMIC_LOADW_XR:
				2286	return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
				2287	case SystemZ::ATOMIC_LOADW_XILF:
				2288	return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 0);
				2289	case SystemZ::ATOMIC_LOAD_XR:
				2290	return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
				2291	case SystemZ::ATOMIC_LOAD_XILF32:
				2292	return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 32);
				2293	case SystemZ::ATOMIC_LOAD_XGR:
				2294	return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
				2295	case SystemZ::ATOMIC_LOAD_XILF:
				2296	return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 64);
				2297	case SystemZ::ATOMIC_LOAD_XIHF:
				2298	return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF, 64);
				2299
				2300	case SystemZ::ATOMIC_LOADW_NRi:
				2301	return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
				2302	case SystemZ::ATOMIC_LOADW_NILHi:
				2303	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0, true);
				2304	case SystemZ::ATOMIC_LOAD_NRi:
				2305	return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
				2306	case SystemZ::ATOMIC_LOAD_NILL32i:
				2307	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32, true);
				2308	case SystemZ::ATOMIC_LOAD_NILH32i:
				2309	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32, true);
				2310	case SystemZ::ATOMIC_LOAD_NILF32i:
				2311	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32, true);
				2312	case SystemZ::ATOMIC_LOAD_NGRi:
				2313	return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
				2314	case SystemZ::ATOMIC_LOAD_NILLi:
				2315	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64, true);
				2316	case SystemZ::ATOMIC_LOAD_NILHi:
				2317	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64, true);
				2318	case SystemZ::ATOMIC_LOAD_NIHLi:
				2319	return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64, true);
				2320	case SystemZ::ATOMIC_LOAD_NIHHi:
				2321	return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64, true);
				2322	case SystemZ::ATOMIC_LOAD_NILFi:
				2323	return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64, true);
				2324	case SystemZ::ATOMIC_LOAD_NIHFi:
				2325	return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64, true);
				2326
				2327	case SystemZ::ATOMIC_LOADW_MIN:
				2328	return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
				2329	SystemZ::CCMASK_CMP_LE, 0);
				2330	case SystemZ::ATOMIC_LOAD_MIN_32:
				2331	return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
				2332	SystemZ::CCMASK_CMP_LE, 32);
				2333	case SystemZ::ATOMIC_LOAD_MIN_64:
				2334	return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
				2335	SystemZ::CCMASK_CMP_LE, 64);
				2336
				2337	case SystemZ::ATOMIC_LOADW_MAX:
				2338	return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
				2339	SystemZ::CCMASK_CMP_GE, 0);
				2340	case SystemZ::ATOMIC_LOAD_MAX_32:
				2341	return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
				2342	SystemZ::CCMASK_CMP_GE, 32);
				2343	case SystemZ::ATOMIC_LOAD_MAX_64:
				2344	return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
				2345	SystemZ::CCMASK_CMP_GE, 64);
				2346
				2347	case SystemZ::ATOMIC_LOADW_UMIN:
				2348	return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
				2349	SystemZ::CCMASK_CMP_LE, 0);
				2350	case SystemZ::ATOMIC_LOAD_UMIN_32:
				2351	return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
				2352	SystemZ::CCMASK_CMP_LE, 32);
				2353	case SystemZ::ATOMIC_LOAD_UMIN_64:
				2354	return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
				2355	SystemZ::CCMASK_CMP_LE, 64);
				2356
				2357	case SystemZ::ATOMIC_LOADW_UMAX:
				2358	return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
				2359	SystemZ::CCMASK_CMP_GE, 0);
				2360	case SystemZ::ATOMIC_LOAD_UMAX_32:
				2361	return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
				2362	SystemZ::CCMASK_CMP_GE, 32);
				2363	case SystemZ::ATOMIC_LOAD_UMAX_64:
				2364	return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
				2365	SystemZ::CCMASK_CMP_GE, 64);
				2366
				2367	case SystemZ::ATOMIC_CMP_SWAPW:
				2368	return emitAtomicCmpSwapW(MI, MBB);
Richard Sandiford	0fb90ab	2013-05-28 10:41:11 +0000	[diff] [blame]	2369	case SystemZ::BRC:
				2370	// The original DAG glues comparisons to their uses, both to ensure
				2371	// that no CC-clobbering instructions are inserted between them, and
				2372	// to ensure that comparison results are not reused. This means that
				2373	// a BRC is the sole user of a preceding comparison and that we can
				2374	// try to use a fused compare and branch instead.
				2375	if (convertPrevCompareToBranch(MBB, MI, MI->getOperand(0).getImm(),
				2376	MI->getOperand(1).getMBB()))
				2377	MI->eraseFromParent();
				2378	return MBB;
Ulrich Weigand	5f613df	2013-05-06 16:15:19 +0000	[diff] [blame]	2379	default:
				2380	llvm_unreachable("Unexpected instr type to insert");
				2381	}
				2382	}