Blame - lib/Target/R600/AMDILISelLowering.cpp - fp2-dev/platform/external/llvm

blob: d669966cce23d4943d9db0ee2ce0ce6c1d4827fc [file] [log] [blame]

Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//==-----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief TargetLowering functions borrowed from AMDIL.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "AMDGPUISelLowering.h"
				16	#include "AMDGPURegisterInfo.h"
Chandler Carruth	58a2cbe	2013-01-02 10:22:59 +0000	[diff] [blame]	17	#include "AMDGPUSubtarget.h"
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	18	#include "AMDILIntrinsicInfo.h"
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	19	#include "llvm/CodeGen/MachineFrameInfo.h"
				20	#include "llvm/CodeGen/MachineRegisterInfo.h"
				21	#include "llvm/CodeGen/PseudoSourceValue.h"
				22	#include "llvm/CodeGen/SelectionDAG.h"
				23	#include "llvm/CodeGen/SelectionDAGNodes.h"
				24	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Chandler Carruth	0b8c9a8	2013-01-02 11:36:10 +0000	[diff] [blame]	25	#include "llvm/IR/CallingConv.h"
				26	#include "llvm/IR/DerivedTypes.h"
				27	#include "llvm/IR/Instructions.h"
				28	#include "llvm/IR/Intrinsics.h"
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	29	#include "llvm/Support/raw_ostream.h"
				30	#include "llvm/Target/TargetInstrInfo.h"
				31	#include "llvm/Target/TargetOptions.h"
				32
				33	using namespace llvm;
				34	//===----------------------------------------------------------------------===//
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	35	// TargetLowering Implementation Help Functions End
				36	//===----------------------------------------------------------------------===//
				37
				38	//===----------------------------------------------------------------------===//
				39	// TargetLowering Class Implementation Begins
				40	//===----------------------------------------------------------------------===//
				41	void AMDGPUTargetLowering::InitAMDILLowering() {
				42	int types[] = {
				43	(int)MVT::i8,
				44	(int)MVT::i16,
				45	(int)MVT::i32,
				46	(int)MVT::f32,
				47	(int)MVT::f64,
				48	(int)MVT::i64,
				49	(int)MVT::v2i8,
				50	(int)MVT::v4i8,
				51	(int)MVT::v2i16,
				52	(int)MVT::v4i16,
				53	(int)MVT::v4f32,
				54	(int)MVT::v4i32,
				55	(int)MVT::v2f32,
				56	(int)MVT::v2i32,
				57	(int)MVT::v2f64,
				58	(int)MVT::v2i64
				59	};
				60
				61	int IntTypes[] = {
				62	(int)MVT::i8,
				63	(int)MVT::i16,
				64	(int)MVT::i32,
				65	(int)MVT::i64
				66	};
				67
				68	int FloatTypes[] = {
				69	(int)MVT::f32,
				70	(int)MVT::f64
				71	};
				72
				73	int VectorTypes[] = {
				74	(int)MVT::v2i8,
				75	(int)MVT::v4i8,
				76	(int)MVT::v2i16,
				77	(int)MVT::v4i16,
				78	(int)MVT::v4f32,
				79	(int)MVT::v4i32,
				80	(int)MVT::v2f32,
				81	(int)MVT::v2i32,
				82	(int)MVT::v2f64,
				83	(int)MVT::v2i64
				84	};
				85	size_t NumTypes = sizeof(types) / sizeof(*types);
				86	size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
				87	size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
				88	size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
				89
				90	const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
				91	// These are the current register classes that are
				92	// supported
				93
				94	for (unsigned int x = 0; x < NumTypes; ++x) {
				95	MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
				96
				97	//FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
				98	// We cannot sextinreg, expand to shifts
				99	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
				100	setOperationAction(ISD::SUBE, VT, Expand);
				101	setOperationAction(ISD::SUBC, VT, Expand);
				102	setOperationAction(ISD::ADDE, VT, Expand);
				103	setOperationAction(ISD::ADDC, VT, Expand);
				104	setOperationAction(ISD::BRCOND, VT, Custom);
				105	setOperationAction(ISD::BR_JT, VT, Expand);
				106	setOperationAction(ISD::BRIND, VT, Expand);
				107	// TODO: Implement custom UREM/SREM routines
				108	setOperationAction(ISD::SREM, VT, Expand);
				109	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
				110	setOperationAction(ISD::UMUL_LOHI, VT, Expand);
				111	if (VT != MVT::i64 && VT != MVT::v2i64) {
				112	setOperationAction(ISD::SDIV, VT, Custom);
				113	}
				114	}
				115	for (unsigned int x = 0; x < NumFloatTypes; ++x) {
				116	MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
				117
				118	// IL does not have these operations for floating point types
				119	setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
				120	setOperationAction(ISD::SETOLT, VT, Expand);
				121	setOperationAction(ISD::SETOGE, VT, Expand);
				122	setOperationAction(ISD::SETOGT, VT, Expand);
				123	setOperationAction(ISD::SETOLE, VT, Expand);
				124	setOperationAction(ISD::SETULT, VT, Expand);
				125	setOperationAction(ISD::SETUGE, VT, Expand);
				126	setOperationAction(ISD::SETUGT, VT, Expand);
				127	setOperationAction(ISD::SETULE, VT, Expand);
				128	}
				129
				130	for (unsigned int x = 0; x < NumIntTypes; ++x) {
				131	MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
				132
				133	// GPU also does not have divrem function for signed or unsigned
				134	setOperationAction(ISD::SDIVREM, VT, Expand);
				135
				136	// GPU does not have [S\|U]MUL_LOHI functions as a single instruction
				137	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
				138	setOperationAction(ISD::UMUL_LOHI, VT, Expand);
				139
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	140	setOperationAction(ISD::BSWAP, VT, Expand);
				141
				142	// GPU doesn't have any counting operators
				143	setOperationAction(ISD::CTPOP, VT, Expand);
				144	setOperationAction(ISD::CTTZ, VT, Expand);
				145	setOperationAction(ISD::CTLZ, VT, Expand);
				146	}
				147
				148	for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
				149	MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
				150
				151	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
				152	setOperationAction(ISD::SDIVREM, VT, Expand);
				153	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
				154	// setOperationAction(ISD::VSETCC, VT, Expand);
				155	setOperationAction(ISD::SELECT_CC, VT, Expand);
				156
				157	}
Tom Stellard	3ff0abf	2013-06-07 20:37:48 +0000	[diff] [blame]	158	setOperationAction(ISD::MULHU, MVT::i64, Expand);
				159	setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
				160	setOperationAction(ISD::MULHS, MVT::i64, Expand);
				161	setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
				162	setOperationAction(ISD::ADD, MVT::v2i64, Expand);
				163	setOperationAction(ISD::SREM, MVT::v2i64, Expand);
				164	setOperationAction(ISD::Constant , MVT::i64 , Legal);
				165	setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
				166	setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
				167	setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
				168	setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
				169	setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
				170	if (STM.hasHWFP64()) {
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	171	// we support loading/storing v2f64 but not operations on the type
				172	setOperationAction(ISD::FADD, MVT::v2f64, Expand);
				173	setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
				174	setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
				175	setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
				176	setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
				177	setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
				178	// We want to expand vector conversions into their scalar
				179	// counterparts.
				180	setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
				181	setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
				182	setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
				183	setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
				184	setOperationAction(ISD::FABS, MVT::f64, Expand);
				185	setOperationAction(ISD::FABS, MVT::v2f64, Expand);
				186	}
				187	// TODO: Fix the UDIV24 algorithm so it works for these
				188	// types correctly. This needs vector comparisons
				189	// for this to work correctly.
				190	setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
				191	setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
				192	setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
				193	setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
				194	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
				195	setOperationAction(ISD::SUBC, MVT::Other, Expand);
				196	setOperationAction(ISD::ADDE, MVT::Other, Expand);
				197	setOperationAction(ISD::ADDC, MVT::Other, Expand);
				198	setOperationAction(ISD::BRCOND, MVT::Other, Custom);
				199	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				200	setOperationAction(ISD::BRIND, MVT::Other, Expand);
				201	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
				202
				203
				204	// Use the default implementation.
				205	setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
				206	setOperationAction(ISD::Constant , MVT::i32 , Legal);
				207
				208	setSchedulingPreference(Sched::RegPressure);
				209	setPow2DivIsCheap(false);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	210	setSelectIsExpensive(true);
				211	setJumpIsExpensive(true);
				212
Jim Grosbach	64f3e76	2013-02-20 21:31:28 +0000	[diff] [blame]	213	MaxStoresPerMemcpy = 4096;
				214	MaxStoresPerMemmove = 4096;
				215	MaxStoresPerMemset = 4096;
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	216
				217	}
				218
				219	bool
				220	AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
				221	const CallInst &I, unsigned Intrinsic) const {
				222	return false;
				223	}
				224
				225	// The backend supports 32 and 64 bit floating point immediates
				226	bool
				227	AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
				228	if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
				229	\|\| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
				230	return true;
				231	} else {
				232	return false;
				233	}
				234	}
				235
				236	bool
				237	AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
				238	if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
				239	\|\| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
				240	return false;
				241	} else {
				242	return true;
				243	}
				244	}
				245
				246
				247	// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
				248	// be zero. Op is expected to be a target specific node. Used by DAG
				249	// combiner.
				250
				251	void
				252	AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
				253	const SDValue Op,
				254	APInt &KnownZero,
				255	APInt &KnownOne,
				256	const SelectionDAG &DAG,
				257	unsigned Depth) const {
				258	APInt KnownZero2;
				259	APInt KnownOne2;
				260	KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
				261	switch (Op.getOpcode()) {
				262	default: break;
				263	case ISD::SELECT_CC:
				264	DAG.ComputeMaskedBits(
				265	Op.getOperand(1),
				266	KnownZero,
				267	KnownOne,
				268	Depth + 1
				269	);
				270	DAG.ComputeMaskedBits(
				271	Op.getOperand(0),
				272	KnownZero2,
				273	KnownOne2
				274	);
				275	assert((KnownZero & KnownOne) == 0
				276	&& "Bits known to be one AND zero?");
				277	assert((KnownZero2 & KnownOne2) == 0
				278	&& "Bits known to be one AND zero?");
				279	// Only known if known in both the LHS and RHS
				280	KnownOne &= KnownOne2;
				281	KnownZero &= KnownZero2;
				282	break;
				283	};
				284	}
				285
				286	//===----------------------------------------------------------------------===//
				287	// Other Lowering Hooks
				288	//===----------------------------------------------------------------------===//
				289
				290	SDValue
				291	AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
				292	EVT OVT = Op.getValueType();
				293	SDValue DST;
				294	if (OVT.getScalarType() == MVT::i64) {
				295	DST = LowerSDIV64(Op, DAG);
				296	} else if (OVT.getScalarType() == MVT::i32) {
				297	DST = LowerSDIV32(Op, DAG);
				298	} else if (OVT.getScalarType() == MVT::i16
				299	\|\| OVT.getScalarType() == MVT::i8) {
				300	DST = LowerSDIV24(Op, DAG);
				301	} else {
				302	DST = SDValue(Op.getNode(), 0);
				303	}
				304	return DST;
				305	}
				306
				307	SDValue
				308	AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
				309	EVT OVT = Op.getValueType();
				310	SDValue DST;
				311	if (OVT.getScalarType() == MVT::i64) {
				312	DST = LowerSREM64(Op, DAG);
				313	} else if (OVT.getScalarType() == MVT::i32) {
				314	DST = LowerSREM32(Op, DAG);
				315	} else if (OVT.getScalarType() == MVT::i16) {
				316	DST = LowerSREM16(Op, DAG);
				317	} else if (OVT.getScalarType() == MVT::i8) {
				318	DST = LowerSREM8(Op, DAG);
				319	} else {
				320	DST = SDValue(Op.getNode(), 0);
				321	}
				322	return DST;
				323	}
				324
				325	SDValue
				326	AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
				327	SDValue Data = Op.getOperand(0);
				328	VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame]	329	SDLoc DL(Op);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	330	EVT DVT = Data.getValueType();
				331	EVT BVT = BaseType->getVT();
				332	unsigned baseBits = BVT.getScalarType().getSizeInBits();
				333	unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
				334	unsigned shiftBits = srcBits - baseBits;
				335	if (srcBits < 32) {
				336	// If the op is less than 32 bits, then it needs to extend to 32bits
				337	// so it can properly keep the upper bits valid.
				338	EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
				339	Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
				340	shiftBits = 32 - baseBits;
				341	DVT = IVT;
				342	}
				343	SDValue Shift = DAG.getConstant(shiftBits, DVT);
				344	// Shift left by 'Shift' bits.
				345	Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
				346	// Signed shift Right by 'Shift' bits.
				347	Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
				348	if (srcBits < 32) {
				349	// Once the sign extension is done, the op needs to be converted to
				350	// its original type.
				351	Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
				352	}
				353	return Data;
				354	}
				355	EVT
				356	AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
				357	int iSize = (size * numEle);
				358	int vEle = (iSize >> ((size == 64) ? 6 : 5));
				359	if (!vEle) {
				360	vEle = 1;
				361	}
				362	if (size == 64) {
				363	if (vEle == 1) {
				364	return EVT(MVT::i64);
				365	} else {
				366	return EVT(MVT::getVectorVT(MVT::i64, vEle));
				367	}
				368	} else {
				369	if (vEle == 1) {
				370	return EVT(MVT::i32);
				371	} else {
				372	return EVT(MVT::getVectorVT(MVT::i32, vEle));
				373	}
				374	}
				375	}
				376
				377	SDValue
				378	AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
				379	SDValue Chain = Op.getOperand(0);
				380	SDValue Cond = Op.getOperand(1);
				381	SDValue Jump = Op.getOperand(2);
				382	SDValue Result;
				383	Result = DAG.getNode(
				384	AMDGPUISD::BRANCH_COND,
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame]	385	SDLoc(Op),
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	386	Op.getValueType(),
				387	Chain, Jump, Cond);
				388	return Result;
				389	}
				390
				391	SDValue
				392	AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame]	393	SDLoc DL(Op);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	394	EVT OVT = Op.getValueType();
				395	SDValue LHS = Op.getOperand(0);
				396	SDValue RHS = Op.getOperand(1);
				397	MVT INTTY;
				398	MVT FLTTY;
				399	if (!OVT.isVector()) {
				400	INTTY = MVT::i32;
				401	FLTTY = MVT::f32;
				402	} else if (OVT.getVectorNumElements() == 2) {
				403	INTTY = MVT::v2i32;
				404	FLTTY = MVT::v2f32;
				405	} else if (OVT.getVectorNumElements() == 4) {
				406	INTTY = MVT::v4i32;
				407	FLTTY = MVT::v4f32;
				408	}
				409	unsigned bitsize = OVT.getScalarType().getSizeInBits();
				410	// char\|short jq = ia ^ ib;
				411	SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
				412
				413	// jq = jq >> (bitsize - 2)
				414	jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
				415
				416	// jq = jq \| 0x1
				417	jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
				418
				419	// jq = (int)jq
				420	jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
				421
				422	// int ia = (int)LHS;
				423	SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
				424
				425	// int ib, (int)RHS;
				426	SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
				427
				428	// float fa = (float)ia;
				429	SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
				430
				431	// float fb = (float)ib;
				432	SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
				433
				434	// float fq = native_divide(fa, fb);
				435	SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
				436
				437	// fq = trunc(fq);
				438	fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
				439
				440	// float fqneg = -fq;
				441	SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
				442
				443	// float fr = mad(fqneg, fb, fa);
Vincent Lejeune	e311196	2013-02-18 14:11:28 +0000	[diff] [blame]	444	SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
				445	DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	446
				447	// int iq = (int)fq;
				448	SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
				449
				450	// fr = fabs(fr);
				451	fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
				452
				453	// fb = fabs(fb);
				454	fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
				455
				456	// int cv = fr >= fb;
				457	SDValue cv;
				458	if (INTTY == MVT::i32) {
				459	cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
				460	} else {
				461	cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
				462	}
				463	// jq = (cv ? jq : 0);
				464	jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
				465	DAG.getConstant(0, OVT));
				466	// dst = iq + jq;
				467	iq = DAG.getSExtOrTrunc(iq, DL, OVT);
				468	iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
				469	return iq;
				470	}
				471
				472	SDValue
				473	AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame]	474	SDLoc DL(Op);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	475	EVT OVT = Op.getValueType();
				476	SDValue LHS = Op.getOperand(0);
				477	SDValue RHS = Op.getOperand(1);
				478	// The LowerSDIV32 function generates equivalent to the following IL.
				479	// mov r0, LHS
				480	// mov r1, RHS
				481	// ilt r10, r0, 0
				482	// ilt r11, r1, 0
				483	// iadd r0, r0, r10
				484	// iadd r1, r1, r11
				485	// ixor r0, r0, r10
				486	// ixor r1, r1, r11
				487	// udiv r0, r0, r1
				488	// ixor r10, r10, r11
				489	// iadd r0, r0, r10
				490	// ixor DST, r0, r10
				491
				492	// mov r0, LHS
				493	SDValue r0 = LHS;
				494
				495	// mov r1, RHS
				496	SDValue r1 = RHS;
				497
				498	// ilt r10, r0, 0
				499	SDValue r10 = DAG.getSelectCC(DL,
				500	r0, DAG.getConstant(0, OVT),
				501	DAG.getConstant(-1, MVT::i32),
				502	DAG.getConstant(0, MVT::i32),
				503	ISD::SETLT);
				504
				505	// ilt r11, r1, 0
				506	SDValue r11 = DAG.getSelectCC(DL,
				507	r1, DAG.getConstant(0, OVT),
				508	DAG.getConstant(-1, MVT::i32),
				509	DAG.getConstant(0, MVT::i32),
				510	ISD::SETLT);
				511
				512	// iadd r0, r0, r10
				513	r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
				514
				515	// iadd r1, r1, r11
				516	r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
				517
				518	// ixor r0, r0, r10
				519	r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
				520
				521	// ixor r1, r1, r11
				522	r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
				523
				524	// udiv r0, r0, r1
				525	r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
				526
				527	// ixor r10, r10, r11
				528	r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
				529
				530	// iadd r0, r0, r10
				531	r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
				532
				533	// ixor DST, r0, r10
				534	SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
				535	return DST;
				536	}
				537
				538	SDValue
				539	AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
				540	return SDValue(Op.getNode(), 0);
				541	}
				542
				543	SDValue
				544	AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame]	545	SDLoc DL(Op);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	546	EVT OVT = Op.getValueType();
				547	MVT INTTY = MVT::i32;
				548	if (OVT == MVT::v2i8) {
				549	INTTY = MVT::v2i32;
				550	} else if (OVT == MVT::v4i8) {
				551	INTTY = MVT::v4i32;
				552	}
				553	SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
				554	SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
				555	LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
				556	LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
				557	return LHS;
				558	}
				559
				560	SDValue
				561	AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame]	562	SDLoc DL(Op);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	563	EVT OVT = Op.getValueType();
				564	MVT INTTY = MVT::i32;
				565	if (OVT == MVT::v2i16) {
				566	INTTY = MVT::v2i32;
				567	} else if (OVT == MVT::v4i16) {
				568	INTTY = MVT::v4i32;
				569	}
				570	SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
				571	SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
				572	LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
				573	LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
				574	return LHS;
				575	}
				576
				577	SDValue
				578	AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
Andrew Trick	ac6d9be	2013-05-25 02:42:55 +0000	[diff] [blame]	579	SDLoc DL(Op);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	580	EVT OVT = Op.getValueType();
				581	SDValue LHS = Op.getOperand(0);
				582	SDValue RHS = Op.getOperand(1);
				583	// The LowerSREM32 function generates equivalent to the following IL.
				584	// mov r0, LHS
				585	// mov r1, RHS
				586	// ilt r10, r0, 0
				587	// ilt r11, r1, 0
				588	// iadd r0, r0, r10
				589	// iadd r1, r1, r11
				590	// ixor r0, r0, r10
				591	// ixor r1, r1, r11
				592	// udiv r20, r0, r1
				593	// umul r20, r20, r1
				594	// sub r0, r0, r20
				595	// iadd r0, r0, r10
				596	// ixor DST, r0, r10
				597
				598	// mov r0, LHS
				599	SDValue r0 = LHS;
				600
				601	// mov r1, RHS
				602	SDValue r1 = RHS;
				603
				604	// ilt r10, r0, 0
				605	SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
				606
				607	// ilt r11, r1, 0
				608	SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
				609
				610	// iadd r0, r0, r10
				611	r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
				612
				613	// iadd r1, r1, r11
				614	r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
				615
				616	// ixor r0, r0, r10
				617	r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
				618
				619	// ixor r1, r1, r11
				620	r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
				621
				622	// udiv r20, r0, r1
				623	SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
				624
				625	// umul r20, r20, r1
				626	r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
				627
				628	// sub r0, r0, r20
				629	r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
				630
				631	// iadd r0, r0, r10
				632	r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
				633
				634	// ixor DST, r0, r10
				635	SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
				636	return DST;
				637	}
				638
				639	SDValue
				640	AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
				641	return SDValue(Op.getNode(), 0);
				642	}