Blame - lib/Target/R600/AMDILISelLowering.cpp - fp2-dev/platform/external/llvm

blob: 0168906bf6d67670da1d15701dbb98690ad509f9 [file] [log] [blame]

Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//==-----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief TargetLowering functions borrowed from AMDIL.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "AMDGPUISelLowering.h"
				16	#include "AMDGPURegisterInfo.h"
Chandler Carruth	58a2cbe	2013-01-02 10:22:59 +0000	[diff] [blame]	17	#include "AMDGPUSubtarget.h"
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	18	#include "AMDILDevices.h"
				19	#include "AMDILIntrinsicInfo.h"
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	20	#include "llvm/CodeGen/MachineFrameInfo.h"
				21	#include "llvm/CodeGen/MachineRegisterInfo.h"
				22	#include "llvm/CodeGen/PseudoSourceValue.h"
				23	#include "llvm/CodeGen/SelectionDAG.h"
				24	#include "llvm/CodeGen/SelectionDAGNodes.h"
				25	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Chandler Carruth	0b8c9a8	2013-01-02 11:36:10 +0000	[diff] [blame]	26	#include "llvm/IR/CallingConv.h"
				27	#include "llvm/IR/DerivedTypes.h"
				28	#include "llvm/IR/Instructions.h"
				29	#include "llvm/IR/Intrinsics.h"
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	30	#include "llvm/Support/raw_ostream.h"
				31	#include "llvm/Target/TargetInstrInfo.h"
				32	#include "llvm/Target/TargetOptions.h"
				33
				34	using namespace llvm;
				35	//===----------------------------------------------------------------------===//
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	36	// TargetLowering Implementation Help Functions End
				37	//===----------------------------------------------------------------------===//
				38
				39	//===----------------------------------------------------------------------===//
				40	// TargetLowering Class Implementation Begins
				41	//===----------------------------------------------------------------------===//
				42	void AMDGPUTargetLowering::InitAMDILLowering() {
				43	int types[] = {
				44	(int)MVT::i8,
				45	(int)MVT::i16,
				46	(int)MVT::i32,
				47	(int)MVT::f32,
				48	(int)MVT::f64,
				49	(int)MVT::i64,
				50	(int)MVT::v2i8,
				51	(int)MVT::v4i8,
				52	(int)MVT::v2i16,
				53	(int)MVT::v4i16,
				54	(int)MVT::v4f32,
				55	(int)MVT::v4i32,
				56	(int)MVT::v2f32,
				57	(int)MVT::v2i32,
				58	(int)MVT::v2f64,
				59	(int)MVT::v2i64
				60	};
				61
				62	int IntTypes[] = {
				63	(int)MVT::i8,
				64	(int)MVT::i16,
				65	(int)MVT::i32,
				66	(int)MVT::i64
				67	};
				68
				69	int FloatTypes[] = {
				70	(int)MVT::f32,
				71	(int)MVT::f64
				72	};
				73
				74	int VectorTypes[] = {
				75	(int)MVT::v2i8,
				76	(int)MVT::v4i8,
				77	(int)MVT::v2i16,
				78	(int)MVT::v4i16,
				79	(int)MVT::v4f32,
				80	(int)MVT::v4i32,
				81	(int)MVT::v2f32,
				82	(int)MVT::v2i32,
				83	(int)MVT::v2f64,
				84	(int)MVT::v2i64
				85	};
				86	size_t NumTypes = sizeof(types) / sizeof(*types);
				87	size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
				88	size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
				89	size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
				90
				91	const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
				92	// These are the current register classes that are
				93	// supported
				94
				95	for (unsigned int x = 0; x < NumTypes; ++x) {
				96	MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
				97
				98	//FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
				99	// We cannot sextinreg, expand to shifts
				100	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
				101	setOperationAction(ISD::SUBE, VT, Expand);
				102	setOperationAction(ISD::SUBC, VT, Expand);
				103	setOperationAction(ISD::ADDE, VT, Expand);
				104	setOperationAction(ISD::ADDC, VT, Expand);
				105	setOperationAction(ISD::BRCOND, VT, Custom);
				106	setOperationAction(ISD::BR_JT, VT, Expand);
				107	setOperationAction(ISD::BRIND, VT, Expand);
				108	// TODO: Implement custom UREM/SREM routines
				109	setOperationAction(ISD::SREM, VT, Expand);
				110	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
				111	setOperationAction(ISD::UMUL_LOHI, VT, Expand);
				112	if (VT != MVT::i64 && VT != MVT::v2i64) {
				113	setOperationAction(ISD::SDIV, VT, Custom);
				114	}
				115	}
				116	for (unsigned int x = 0; x < NumFloatTypes; ++x) {
				117	MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
				118
				119	// IL does not have these operations for floating point types
				120	setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
				121	setOperationAction(ISD::SETOLT, VT, Expand);
				122	setOperationAction(ISD::SETOGE, VT, Expand);
				123	setOperationAction(ISD::SETOGT, VT, Expand);
				124	setOperationAction(ISD::SETOLE, VT, Expand);
				125	setOperationAction(ISD::SETULT, VT, Expand);
				126	setOperationAction(ISD::SETUGE, VT, Expand);
				127	setOperationAction(ISD::SETUGT, VT, Expand);
				128	setOperationAction(ISD::SETULE, VT, Expand);
				129	}
				130
				131	for (unsigned int x = 0; x < NumIntTypes; ++x) {
				132	MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
				133
				134	// GPU also does not have divrem function for signed or unsigned
				135	setOperationAction(ISD::SDIVREM, VT, Expand);
				136
				137	// GPU does not have [S\|U]MUL_LOHI functions as a single instruction
				138	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
				139	setOperationAction(ISD::UMUL_LOHI, VT, Expand);
				140
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	141	setOperationAction(ISD::BSWAP, VT, Expand);
				142
				143	// GPU doesn't have any counting operators
				144	setOperationAction(ISD::CTPOP, VT, Expand);
				145	setOperationAction(ISD::CTTZ, VT, Expand);
				146	setOperationAction(ISD::CTLZ, VT, Expand);
				147	}
				148
				149	for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
				150	MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
				151
				152	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
				153	setOperationAction(ISD::SDIVREM, VT, Expand);
				154	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
				155	// setOperationAction(ISD::VSETCC, VT, Expand);
				156	setOperationAction(ISD::SELECT_CC, VT, Expand);
				157
				158	}
				159	if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
				160	setOperationAction(ISD::MULHU, MVT::i64, Expand);
				161	setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
				162	setOperationAction(ISD::MULHS, MVT::i64, Expand);
				163	setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
				164	setOperationAction(ISD::ADD, MVT::v2i64, Expand);
				165	setOperationAction(ISD::SREM, MVT::v2i64, Expand);
				166	setOperationAction(ISD::Constant , MVT::i64 , Legal);
				167	setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
				168	setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
				169	setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
				170	setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
				171	setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
				172	}
				173	if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
				174	// we support loading/storing v2f64 but not operations on the type
				175	setOperationAction(ISD::FADD, MVT::v2f64, Expand);
				176	setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
				177	setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
				178	setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
				179	setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
				180	setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
				181	// We want to expand vector conversions into their scalar
				182	// counterparts.
				183	setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
				184	setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
				185	setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
				186	setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
				187	setOperationAction(ISD::FABS, MVT::f64, Expand);
				188	setOperationAction(ISD::FABS, MVT::v2f64, Expand);
				189	}
				190	// TODO: Fix the UDIV24 algorithm so it works for these
				191	// types correctly. This needs vector comparisons
				192	// for this to work correctly.
				193	setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
				194	setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
				195	setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
				196	setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
				197	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
				198	setOperationAction(ISD::SUBC, MVT::Other, Expand);
				199	setOperationAction(ISD::ADDE, MVT::Other, Expand);
				200	setOperationAction(ISD::ADDC, MVT::Other, Expand);
				201	setOperationAction(ISD::BRCOND, MVT::Other, Custom);
				202	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				203	setOperationAction(ISD::BRIND, MVT::Other, Expand);
				204	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
				205
				206
				207	// Use the default implementation.
				208	setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
				209	setOperationAction(ISD::Constant , MVT::i32 , Legal);
				210
				211	setSchedulingPreference(Sched::RegPressure);
				212	setPow2DivIsCheap(false);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	213	setSelectIsExpensive(true);
				214	setJumpIsExpensive(true);
				215
Jim Grosbach	64f3e76	2013-02-20 21:31:28 +0000	[diff] [blame]	216	MaxStoresPerMemcpy = 4096;
				217	MaxStoresPerMemmove = 4096;
				218	MaxStoresPerMemset = 4096;
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	219
				220	}
				221
				222	bool
				223	AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
				224	const CallInst &I, unsigned Intrinsic) const {
				225	return false;
				226	}
				227
				228	// The backend supports 32 and 64 bit floating point immediates
				229	bool
				230	AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
				231	if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
				232	\|\| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
				233	return true;
				234	} else {
				235	return false;
				236	}
				237	}
				238
				239	bool
				240	AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
				241	if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
				242	\|\| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
				243	return false;
				244	} else {
				245	return true;
				246	}
				247	}
				248
				249
				250	// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
				251	// be zero. Op is expected to be a target specific node. Used by DAG
				252	// combiner.
				253
				254	void
				255	AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
				256	const SDValue Op,
				257	APInt &KnownZero,
				258	APInt &KnownOne,
				259	const SelectionDAG &DAG,
				260	unsigned Depth) const {
				261	APInt KnownZero2;
				262	APInt KnownOne2;
				263	KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
				264	switch (Op.getOpcode()) {
				265	default: break;
				266	case ISD::SELECT_CC:
				267	DAG.ComputeMaskedBits(
				268	Op.getOperand(1),
				269	KnownZero,
				270	KnownOne,
				271	Depth + 1
				272	);
				273	DAG.ComputeMaskedBits(
				274	Op.getOperand(0),
				275	KnownZero2,
				276	KnownOne2
				277	);
				278	assert((KnownZero & KnownOne) == 0
				279	&& "Bits known to be one AND zero?");
				280	assert((KnownZero2 & KnownOne2) == 0
				281	&& "Bits known to be one AND zero?");
				282	// Only known if known in both the LHS and RHS
				283	KnownOne &= KnownOne2;
				284	KnownZero &= KnownZero2;
				285	break;
				286	};
				287	}
				288
				289	//===----------------------------------------------------------------------===//
				290	// Other Lowering Hooks
				291	//===----------------------------------------------------------------------===//
				292
				293	SDValue
				294	AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
				295	EVT OVT = Op.getValueType();
				296	SDValue DST;
				297	if (OVT.getScalarType() == MVT::i64) {
				298	DST = LowerSDIV64(Op, DAG);
				299	} else if (OVT.getScalarType() == MVT::i32) {
				300	DST = LowerSDIV32(Op, DAG);
				301	} else if (OVT.getScalarType() == MVT::i16
				302	\|\| OVT.getScalarType() == MVT::i8) {
				303	DST = LowerSDIV24(Op, DAG);
				304	} else {
				305	DST = SDValue(Op.getNode(), 0);
				306	}
				307	return DST;
				308	}
				309
				310	SDValue
				311	AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
				312	EVT OVT = Op.getValueType();
				313	SDValue DST;
				314	if (OVT.getScalarType() == MVT::i64) {
				315	DST = LowerSREM64(Op, DAG);
				316	} else if (OVT.getScalarType() == MVT::i32) {
				317	DST = LowerSREM32(Op, DAG);
				318	} else if (OVT.getScalarType() == MVT::i16) {
				319	DST = LowerSREM16(Op, DAG);
				320	} else if (OVT.getScalarType() == MVT::i8) {
				321	DST = LowerSREM8(Op, DAG);
				322	} else {
				323	DST = SDValue(Op.getNode(), 0);
				324	}
				325	return DST;
				326	}
				327
				328	SDValue
				329	AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
				330	SDValue Data = Op.getOperand(0);
				331	VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame^]	332	SDLoc DL(Op);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	333	EVT DVT = Data.getValueType();
				334	EVT BVT = BaseType->getVT();
				335	unsigned baseBits = BVT.getScalarType().getSizeInBits();
				336	unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
				337	unsigned shiftBits = srcBits - baseBits;
				338	if (srcBits < 32) {
				339	// If the op is less than 32 bits, then it needs to extend to 32bits
				340	// so it can properly keep the upper bits valid.
				341	EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
				342	Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
				343	shiftBits = 32 - baseBits;
				344	DVT = IVT;
				345	}
				346	SDValue Shift = DAG.getConstant(shiftBits, DVT);
				347	// Shift left by 'Shift' bits.
				348	Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
				349	// Signed shift Right by 'Shift' bits.
				350	Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
				351	if (srcBits < 32) {
				352	// Once the sign extension is done, the op needs to be converted to
				353	// its original type.
				354	Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
				355	}
				356	return Data;
				357	}
				358	EVT
				359	AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
				360	int iSize = (size * numEle);
				361	int vEle = (iSize >> ((size == 64) ? 6 : 5));
				362	if (!vEle) {
				363	vEle = 1;
				364	}
				365	if (size == 64) {
				366	if (vEle == 1) {
				367	return EVT(MVT::i64);
				368	} else {
				369	return EVT(MVT::getVectorVT(MVT::i64, vEle));
				370	}
				371	} else {
				372	if (vEle == 1) {
				373	return EVT(MVT::i32);
				374	} else {
				375	return EVT(MVT::getVectorVT(MVT::i32, vEle));
				376	}
				377	}
				378	}
				379
				380	SDValue
				381	AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
				382	SDValue Chain = Op.getOperand(0);
				383	SDValue Cond = Op.getOperand(1);
				384	SDValue Jump = Op.getOperand(2);
				385	SDValue Result;
				386	Result = DAG.getNode(
				387	AMDGPUISD::BRANCH_COND,
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame^]	388	SDLoc(Op),
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	389	Op.getValueType(),
				390	Chain, Jump, Cond);
				391	return Result;
				392	}
				393
				394	SDValue
				395	AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame^]	396	SDLoc DL(Op);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	397	EVT OVT = Op.getValueType();
				398	SDValue LHS = Op.getOperand(0);
				399	SDValue RHS = Op.getOperand(1);
				400	MVT INTTY;
				401	MVT FLTTY;
				402	if (!OVT.isVector()) {
				403	INTTY = MVT::i32;
				404	FLTTY = MVT::f32;
				405	} else if (OVT.getVectorNumElements() == 2) {
				406	INTTY = MVT::v2i32;
				407	FLTTY = MVT::v2f32;
				408	} else if (OVT.getVectorNumElements() == 4) {
				409	INTTY = MVT::v4i32;
				410	FLTTY = MVT::v4f32;
				411	}
				412	unsigned bitsize = OVT.getScalarType().getSizeInBits();
				413	// char\|short jq = ia ^ ib;
				414	SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
				415
				416	// jq = jq >> (bitsize - 2)
				417	jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
				418
				419	// jq = jq \| 0x1
				420	jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
				421
				422	// jq = (int)jq
				423	jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
				424
				425	// int ia = (int)LHS;
				426	SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
				427
				428	// int ib, (int)RHS;
				429	SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
				430
				431	// float fa = (float)ia;
				432	SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
				433
				434	// float fb = (float)ib;
				435	SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
				436
				437	// float fq = native_divide(fa, fb);
				438	SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
				439
				440	// fq = trunc(fq);
				441	fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
				442
				443	// float fqneg = -fq;
				444	SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
				445
				446	// float fr = mad(fqneg, fb, fa);
Vincent Lejeune	e311196	2013-02-18 14:11:28 +0000	[diff] [blame]	447	SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
				448	DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	449
				450	// int iq = (int)fq;
				451	SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
				452
				453	// fr = fabs(fr);
				454	fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
				455
				456	// fb = fabs(fb);
				457	fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
				458
				459	// int cv = fr >= fb;
				460	SDValue cv;
				461	if (INTTY == MVT::i32) {
				462	cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
				463	} else {
				464	cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
				465	}
				466	// jq = (cv ? jq : 0);
				467	jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
				468	DAG.getConstant(0, OVT));
				469	// dst = iq + jq;
				470	iq = DAG.getSExtOrTrunc(iq, DL, OVT);
				471	iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
				472	return iq;
				473	}
				474
				475	SDValue
				476	AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame^]	477	SDLoc DL(Op);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	478	EVT OVT = Op.getValueType();
				479	SDValue LHS = Op.getOperand(0);
				480	SDValue RHS = Op.getOperand(1);
				481	// The LowerSDIV32 function generates equivalent to the following IL.
				482	// mov r0, LHS
				483	// mov r1, RHS
				484	// ilt r10, r0, 0
				485	// ilt r11, r1, 0
				486	// iadd r0, r0, r10
				487	// iadd r1, r1, r11
				488	// ixor r0, r0, r10
				489	// ixor r1, r1, r11
				490	// udiv r0, r0, r1
				491	// ixor r10, r10, r11
				492	// iadd r0, r0, r10
				493	// ixor DST, r0, r10
				494
				495	// mov r0, LHS
				496	SDValue r0 = LHS;
				497
				498	// mov r1, RHS
				499	SDValue r1 = RHS;
				500
				501	// ilt r10, r0, 0
				502	SDValue r10 = DAG.getSelectCC(DL,
				503	r0, DAG.getConstant(0, OVT),
				504	DAG.getConstant(-1, MVT::i32),
				505	DAG.getConstant(0, MVT::i32),
				506	ISD::SETLT);
				507
				508	// ilt r11, r1, 0
				509	SDValue r11 = DAG.getSelectCC(DL,
				510	r1, DAG.getConstant(0, OVT),
				511	DAG.getConstant(-1, MVT::i32),
				512	DAG.getConstant(0, MVT::i32),
				513	ISD::SETLT);
				514
				515	// iadd r0, r0, r10
				516	r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
				517
				518	// iadd r1, r1, r11
				519	r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
				520
				521	// ixor r0, r0, r10
				522	r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
				523
				524	// ixor r1, r1, r11
				525	r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
				526
				527	// udiv r0, r0, r1
				528	r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
				529
				530	// ixor r10, r10, r11
				531	r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
				532
				533	// iadd r0, r0, r10
				534	r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
				535
				536	// ixor DST, r0, r10
				537	SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
				538	return DST;
				539	}
				540
				541	SDValue
				542	AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
				543	return SDValue(Op.getNode(), 0);
				544	}
				545
				546	SDValue
				547	AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame^]	548	SDLoc DL(Op);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	549	EVT OVT = Op.getValueType();
				550	MVT INTTY = MVT::i32;
				551	if (OVT == MVT::v2i8) {
				552	INTTY = MVT::v2i32;
				553	} else if (OVT == MVT::v4i8) {
				554	INTTY = MVT::v4i32;
				555	}
				556	SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
				557	SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
				558	LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
				559	LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
				560	return LHS;
				561	}
				562
				563	SDValue
				564	AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame^]	565	SDLoc DL(Op);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	566	EVT OVT = Op.getValueType();
				567	MVT INTTY = MVT::i32;
				568	if (OVT == MVT::v2i16) {
				569	INTTY = MVT::v2i32;
				570	} else if (OVT == MVT::v4i16) {
				571	INTTY = MVT::v4i32;
				572	}
				573	SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
				574	SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
				575	LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
				576	LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
				577	return LHS;
				578	}
				579
				580	SDValue
				581	AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame^]	582	SDLoc DL(Op);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	583	EVT OVT = Op.getValueType();
				584	SDValue LHS = Op.getOperand(0);
				585	SDValue RHS = Op.getOperand(1);
				586	// The LowerSREM32 function generates equivalent to the following IL.
				587	// mov r0, LHS
				588	// mov r1, RHS
				589	// ilt r10, r0, 0
				590	// ilt r11, r1, 0
				591	// iadd r0, r0, r10
				592	// iadd r1, r1, r11
				593	// ixor r0, r0, r10
				594	// ixor r1, r1, r11
				595	// udiv r20, r0, r1
				596	// umul r20, r20, r1
				597	// sub r0, r0, r20
				598	// iadd r0, r0, r10
				599	// ixor DST, r0, r10
				600
				601	// mov r0, LHS
				602	SDValue r0 = LHS;
				603
				604	// mov r1, RHS
				605	SDValue r1 = RHS;
				606
				607	// ilt r10, r0, 0
				608	SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
				609
				610	// ilt r11, r1, 0
				611	SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
				612
				613	// iadd r0, r0, r10
				614	r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
				615
				616	// iadd r1, r1, r11
				617	r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
				618
				619	// ixor r0, r0, r10
				620	r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
				621
				622	// ixor r1, r1, r11
				623	r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
				624
				625	// udiv r20, r0, r1
				626	SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
				627
				628	// umul r20, r20, r1
				629	r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
				630
				631	// sub r0, r0, r20
				632	r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
				633
				634	// iadd r0, r0, r10
				635	r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
				636
				637	// ixor DST, r0, r10
				638	SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
				639	return DST;
				640	}
				641
				642	SDValue
				643	AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
				644	return SDValue(Op.getNode(), 0);
				645	}