Blame - lib/Target/R600/AMDILISelLowering.cpp - fp2-dev/platform/external/llvm

blob: f65e1f37e7784c31806a2db335e3f45d9f1e112a [file] [log] [blame]

Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//==-----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief TargetLowering functions borrowed from AMDIL.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "AMDGPUISelLowering.h"
				16	#include "AMDGPURegisterInfo.h"
Chandler Carruth	58a2cbe	2013-01-02 10:22:59 +0000	[diff] [blame]	17	#include "AMDGPUSubtarget.h"
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	18	#include "AMDILDevices.h"
				19	#include "AMDILIntrinsicInfo.h"
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	20	#include "llvm/CodeGen/MachineFrameInfo.h"
				21	#include "llvm/CodeGen/MachineRegisterInfo.h"
				22	#include "llvm/CodeGen/PseudoSourceValue.h"
				23	#include "llvm/CodeGen/SelectionDAG.h"
				24	#include "llvm/CodeGen/SelectionDAGNodes.h"
				25	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Chandler Carruth	0b8c9a8	2013-01-02 11:36:10 +0000	[diff] [blame]	26	#include "llvm/IR/CallingConv.h"
				27	#include "llvm/IR/DerivedTypes.h"
				28	#include "llvm/IR/Instructions.h"
				29	#include "llvm/IR/Intrinsics.h"
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	30	#include "llvm/Support/raw_ostream.h"
				31	#include "llvm/Target/TargetInstrInfo.h"
				32	#include "llvm/Target/TargetOptions.h"
				33
				34	using namespace llvm;
				35	//===----------------------------------------------------------------------===//
				36	// Calling Convention Implementation
				37	//===----------------------------------------------------------------------===//
				38	#include "AMDGPUGenCallingConv.inc"
				39
				40	//===----------------------------------------------------------------------===//
				41	// TargetLowering Implementation Help Functions End
				42	//===----------------------------------------------------------------------===//
				43
				44	//===----------------------------------------------------------------------===//
				45	// TargetLowering Class Implementation Begins
				46	//===----------------------------------------------------------------------===//
				47	void AMDGPUTargetLowering::InitAMDILLowering() {
				48	int types[] = {
				49	(int)MVT::i8,
				50	(int)MVT::i16,
				51	(int)MVT::i32,
				52	(int)MVT::f32,
				53	(int)MVT::f64,
				54	(int)MVT::i64,
				55	(int)MVT::v2i8,
				56	(int)MVT::v4i8,
				57	(int)MVT::v2i16,
				58	(int)MVT::v4i16,
				59	(int)MVT::v4f32,
				60	(int)MVT::v4i32,
				61	(int)MVT::v2f32,
				62	(int)MVT::v2i32,
				63	(int)MVT::v2f64,
				64	(int)MVT::v2i64
				65	};
				66
				67	int IntTypes[] = {
				68	(int)MVT::i8,
				69	(int)MVT::i16,
				70	(int)MVT::i32,
				71	(int)MVT::i64
				72	};
				73
				74	int FloatTypes[] = {
				75	(int)MVT::f32,
				76	(int)MVT::f64
				77	};
				78
				79	int VectorTypes[] = {
				80	(int)MVT::v2i8,
				81	(int)MVT::v4i8,
				82	(int)MVT::v2i16,
				83	(int)MVT::v4i16,
				84	(int)MVT::v4f32,
				85	(int)MVT::v4i32,
				86	(int)MVT::v2f32,
				87	(int)MVT::v2i32,
				88	(int)MVT::v2f64,
				89	(int)MVT::v2i64
				90	};
				91	size_t NumTypes = sizeof(types) / sizeof(*types);
				92	size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
				93	size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
				94	size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
				95
				96	const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
				97	// These are the current register classes that are
				98	// supported
				99
				100	for (unsigned int x = 0; x < NumTypes; ++x) {
				101	MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
				102
				103	//FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
				104	// We cannot sextinreg, expand to shifts
				105	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
				106	setOperationAction(ISD::SUBE, VT, Expand);
				107	setOperationAction(ISD::SUBC, VT, Expand);
				108	setOperationAction(ISD::ADDE, VT, Expand);
				109	setOperationAction(ISD::ADDC, VT, Expand);
				110	setOperationAction(ISD::BRCOND, VT, Custom);
				111	setOperationAction(ISD::BR_JT, VT, Expand);
				112	setOperationAction(ISD::BRIND, VT, Expand);
				113	// TODO: Implement custom UREM/SREM routines
				114	setOperationAction(ISD::SREM, VT, Expand);
				115	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
				116	setOperationAction(ISD::UMUL_LOHI, VT, Expand);
				117	if (VT != MVT::i64 && VT != MVT::v2i64) {
				118	setOperationAction(ISD::SDIV, VT, Custom);
				119	}
				120	}
				121	for (unsigned int x = 0; x < NumFloatTypes; ++x) {
				122	MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
				123
				124	// IL does not have these operations for floating point types
				125	setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
				126	setOperationAction(ISD::SETOLT, VT, Expand);
				127	setOperationAction(ISD::SETOGE, VT, Expand);
				128	setOperationAction(ISD::SETOGT, VT, Expand);
				129	setOperationAction(ISD::SETOLE, VT, Expand);
				130	setOperationAction(ISD::SETULT, VT, Expand);
				131	setOperationAction(ISD::SETUGE, VT, Expand);
				132	setOperationAction(ISD::SETUGT, VT, Expand);
				133	setOperationAction(ISD::SETULE, VT, Expand);
				134	}
				135
				136	for (unsigned int x = 0; x < NumIntTypes; ++x) {
				137	MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
				138
				139	// GPU also does not have divrem function for signed or unsigned
				140	setOperationAction(ISD::SDIVREM, VT, Expand);
				141
				142	// GPU does not have [S\|U]MUL_LOHI functions as a single instruction
				143	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
				144	setOperationAction(ISD::UMUL_LOHI, VT, Expand);
				145
				146	// GPU doesn't have a rotl, rotr, or byteswap instruction
				147	setOperationAction(ISD::ROTR, VT, Expand);
				148	setOperationAction(ISD::BSWAP, VT, Expand);
				149
				150	// GPU doesn't have any counting operators
				151	setOperationAction(ISD::CTPOP, VT, Expand);
				152	setOperationAction(ISD::CTTZ, VT, Expand);
				153	setOperationAction(ISD::CTLZ, VT, Expand);
				154	}
				155
				156	for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
				157	MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
				158
				159	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
				160	setOperationAction(ISD::SDIVREM, VT, Expand);
				161	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
				162	// setOperationAction(ISD::VSETCC, VT, Expand);
				163	setOperationAction(ISD::SELECT_CC, VT, Expand);
				164
				165	}
				166	if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
				167	setOperationAction(ISD::MULHU, MVT::i64, Expand);
				168	setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
				169	setOperationAction(ISD::MULHS, MVT::i64, Expand);
				170	setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
				171	setOperationAction(ISD::ADD, MVT::v2i64, Expand);
				172	setOperationAction(ISD::SREM, MVT::v2i64, Expand);
				173	setOperationAction(ISD::Constant , MVT::i64 , Legal);
				174	setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
				175	setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
				176	setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
				177	setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
				178	setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
				179	}
				180	if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
				181	// we support loading/storing v2f64 but not operations on the type
				182	setOperationAction(ISD::FADD, MVT::v2f64, Expand);
				183	setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
				184	setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
				185	setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
				186	setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
				187	setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
				188	// We want to expand vector conversions into their scalar
				189	// counterparts.
				190	setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
				191	setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
				192	setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
				193	setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
				194	setOperationAction(ISD::FABS, MVT::f64, Expand);
				195	setOperationAction(ISD::FABS, MVT::v2f64, Expand);
				196	}
				197	// TODO: Fix the UDIV24 algorithm so it works for these
				198	// types correctly. This needs vector comparisons
				199	// for this to work correctly.
				200	setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
				201	setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
				202	setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
				203	setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
				204	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
				205	setOperationAction(ISD::SUBC, MVT::Other, Expand);
				206	setOperationAction(ISD::ADDE, MVT::Other, Expand);
				207	setOperationAction(ISD::ADDC, MVT::Other, Expand);
				208	setOperationAction(ISD::BRCOND, MVT::Other, Custom);
				209	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
				210	setOperationAction(ISD::BRIND, MVT::Other, Expand);
				211	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
				212
				213
				214	// Use the default implementation.
				215	setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
				216	setOperationAction(ISD::Constant , MVT::i32 , Legal);
				217
				218	setSchedulingPreference(Sched::RegPressure);
				219	setPow2DivIsCheap(false);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	220	setSelectIsExpensive(true);
				221	setJumpIsExpensive(true);
				222
Jim Grosbach	64f3e76	2013-02-20 21:31:28 +0000	[diff] [blame^]	223	MaxStoresPerMemcpy = 4096;
				224	MaxStoresPerMemmove = 4096;
				225	MaxStoresPerMemset = 4096;
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	226
				227	}
				228
				229	bool
				230	AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
				231	const CallInst &I, unsigned Intrinsic) const {
				232	return false;
				233	}
				234
				235	// The backend supports 32 and 64 bit floating point immediates
				236	bool
				237	AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
				238	if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
				239	\|\| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
				240	return true;
				241	} else {
				242	return false;
				243	}
				244	}
				245
				246	bool
				247	AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
				248	if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
				249	\|\| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
				250	return false;
				251	} else {
				252	return true;
				253	}
				254	}
				255
				256
				257	// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
				258	// be zero. Op is expected to be a target specific node. Used by DAG
				259	// combiner.
				260
				261	void
				262	AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
				263	const SDValue Op,
				264	APInt &KnownZero,
				265	APInt &KnownOne,
				266	const SelectionDAG &DAG,
				267	unsigned Depth) const {
				268	APInt KnownZero2;
				269	APInt KnownOne2;
				270	KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
				271	switch (Op.getOpcode()) {
				272	default: break;
				273	case ISD::SELECT_CC:
				274	DAG.ComputeMaskedBits(
				275	Op.getOperand(1),
				276	KnownZero,
				277	KnownOne,
				278	Depth + 1
				279	);
				280	DAG.ComputeMaskedBits(
				281	Op.getOperand(0),
				282	KnownZero2,
				283	KnownOne2
				284	);
				285	assert((KnownZero & KnownOne) == 0
				286	&& "Bits known to be one AND zero?");
				287	assert((KnownZero2 & KnownOne2) == 0
				288	&& "Bits known to be one AND zero?");
				289	// Only known if known in both the LHS and RHS
				290	KnownOne &= KnownOne2;
				291	KnownZero &= KnownZero2;
				292	break;
				293	};
				294	}
				295
				296	//===----------------------------------------------------------------------===//
				297	// Other Lowering Hooks
				298	//===----------------------------------------------------------------------===//
				299
				300	SDValue
				301	AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
				302	EVT OVT = Op.getValueType();
				303	SDValue DST;
				304	if (OVT.getScalarType() == MVT::i64) {
				305	DST = LowerSDIV64(Op, DAG);
				306	} else if (OVT.getScalarType() == MVT::i32) {
				307	DST = LowerSDIV32(Op, DAG);
				308	} else if (OVT.getScalarType() == MVT::i16
				309	\|\| OVT.getScalarType() == MVT::i8) {
				310	DST = LowerSDIV24(Op, DAG);
				311	} else {
				312	DST = SDValue(Op.getNode(), 0);
				313	}
				314	return DST;
				315	}
				316
				317	SDValue
				318	AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
				319	EVT OVT = Op.getValueType();
				320	SDValue DST;
				321	if (OVT.getScalarType() == MVT::i64) {
				322	DST = LowerSREM64(Op, DAG);
				323	} else if (OVT.getScalarType() == MVT::i32) {
				324	DST = LowerSREM32(Op, DAG);
				325	} else if (OVT.getScalarType() == MVT::i16) {
				326	DST = LowerSREM16(Op, DAG);
				327	} else if (OVT.getScalarType() == MVT::i8) {
				328	DST = LowerSREM8(Op, DAG);
				329	} else {
				330	DST = SDValue(Op.getNode(), 0);
				331	}
				332	return DST;
				333	}
				334
				335	SDValue
				336	AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
				337	SDValue Data = Op.getOperand(0);
				338	VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
				339	DebugLoc DL = Op.getDebugLoc();
				340	EVT DVT = Data.getValueType();
				341	EVT BVT = BaseType->getVT();
				342	unsigned baseBits = BVT.getScalarType().getSizeInBits();
				343	unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
				344	unsigned shiftBits = srcBits - baseBits;
				345	if (srcBits < 32) {
				346	// If the op is less than 32 bits, then it needs to extend to 32bits
				347	// so it can properly keep the upper bits valid.
				348	EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
				349	Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
				350	shiftBits = 32 - baseBits;
				351	DVT = IVT;
				352	}
				353	SDValue Shift = DAG.getConstant(shiftBits, DVT);
				354	// Shift left by 'Shift' bits.
				355	Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
				356	// Signed shift Right by 'Shift' bits.
				357	Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
				358	if (srcBits < 32) {
				359	// Once the sign extension is done, the op needs to be converted to
				360	// its original type.
				361	Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
				362	}
				363	return Data;
				364	}
				365	EVT
				366	AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
				367	int iSize = (size * numEle);
				368	int vEle = (iSize >> ((size == 64) ? 6 : 5));
				369	if (!vEle) {
				370	vEle = 1;
				371	}
				372	if (size == 64) {
				373	if (vEle == 1) {
				374	return EVT(MVT::i64);
				375	} else {
				376	return EVT(MVT::getVectorVT(MVT::i64, vEle));
				377	}
				378	} else {
				379	if (vEle == 1) {
				380	return EVT(MVT::i32);
				381	} else {
				382	return EVT(MVT::getVectorVT(MVT::i32, vEle));
				383	}
				384	}
				385	}
				386
				387	SDValue
				388	AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
				389	SDValue Chain = Op.getOperand(0);
				390	SDValue Cond = Op.getOperand(1);
				391	SDValue Jump = Op.getOperand(2);
				392	SDValue Result;
				393	Result = DAG.getNode(
				394	AMDGPUISD::BRANCH_COND,
				395	Op.getDebugLoc(),
				396	Op.getValueType(),
				397	Chain, Jump, Cond);
				398	return Result;
				399	}
				400
				401	SDValue
				402	AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
				403	DebugLoc DL = Op.getDebugLoc();
				404	EVT OVT = Op.getValueType();
				405	SDValue LHS = Op.getOperand(0);
				406	SDValue RHS = Op.getOperand(1);
				407	MVT INTTY;
				408	MVT FLTTY;
				409	if (!OVT.isVector()) {
				410	INTTY = MVT::i32;
				411	FLTTY = MVT::f32;
				412	} else if (OVT.getVectorNumElements() == 2) {
				413	INTTY = MVT::v2i32;
				414	FLTTY = MVT::v2f32;
				415	} else if (OVT.getVectorNumElements() == 4) {
				416	INTTY = MVT::v4i32;
				417	FLTTY = MVT::v4f32;
				418	}
				419	unsigned bitsize = OVT.getScalarType().getSizeInBits();
				420	// char\|short jq = ia ^ ib;
				421	SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
				422
				423	// jq = jq >> (bitsize - 2)
				424	jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
				425
				426	// jq = jq \| 0x1
				427	jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
				428
				429	// jq = (int)jq
				430	jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
				431
				432	// int ia = (int)LHS;
				433	SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
				434
				435	// int ib, (int)RHS;
				436	SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
				437
				438	// float fa = (float)ia;
				439	SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
				440
				441	// float fb = (float)ib;
				442	SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
				443
				444	// float fq = native_divide(fa, fb);
				445	SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
				446
				447	// fq = trunc(fq);
				448	fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
				449
				450	// float fqneg = -fq;
				451	SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
				452
				453	// float fr = mad(fqneg, fb, fa);
Vincent Lejeune	e311196	2013-02-18 14:11:28 +0000	[diff] [blame]	454	SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
				455	DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	456
				457	// int iq = (int)fq;
				458	SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
				459
				460	// fr = fabs(fr);
				461	fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
				462
				463	// fb = fabs(fb);
				464	fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
				465
				466	// int cv = fr >= fb;
				467	SDValue cv;
				468	if (INTTY == MVT::i32) {
				469	cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
				470	} else {
				471	cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
				472	}
				473	// jq = (cv ? jq : 0);
				474	jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
				475	DAG.getConstant(0, OVT));
				476	// dst = iq + jq;
				477	iq = DAG.getSExtOrTrunc(iq, DL, OVT);
				478	iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
				479	return iq;
				480	}
				481
				482	SDValue
				483	AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
				484	DebugLoc DL = Op.getDebugLoc();
				485	EVT OVT = Op.getValueType();
				486	SDValue LHS = Op.getOperand(0);
				487	SDValue RHS = Op.getOperand(1);
				488	// The LowerSDIV32 function generates equivalent to the following IL.
				489	// mov r0, LHS
				490	// mov r1, RHS
				491	// ilt r10, r0, 0
				492	// ilt r11, r1, 0
				493	// iadd r0, r0, r10
				494	// iadd r1, r1, r11
				495	// ixor r0, r0, r10
				496	// ixor r1, r1, r11
				497	// udiv r0, r0, r1
				498	// ixor r10, r10, r11
				499	// iadd r0, r0, r10
				500	// ixor DST, r0, r10
				501
				502	// mov r0, LHS
				503	SDValue r0 = LHS;
				504
				505	// mov r1, RHS
				506	SDValue r1 = RHS;
				507
				508	// ilt r10, r0, 0
				509	SDValue r10 = DAG.getSelectCC(DL,
				510	r0, DAG.getConstant(0, OVT),
				511	DAG.getConstant(-1, MVT::i32),
				512	DAG.getConstant(0, MVT::i32),
				513	ISD::SETLT);
				514
				515	// ilt r11, r1, 0
				516	SDValue r11 = DAG.getSelectCC(DL,
				517	r1, DAG.getConstant(0, OVT),
				518	DAG.getConstant(-1, MVT::i32),
				519	DAG.getConstant(0, MVT::i32),
				520	ISD::SETLT);
				521
				522	// iadd r0, r0, r10
				523	r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
				524
				525	// iadd r1, r1, r11
				526	r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
				527
				528	// ixor r0, r0, r10
				529	r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
				530
				531	// ixor r1, r1, r11
				532	r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
				533
				534	// udiv r0, r0, r1
				535	r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
				536
				537	// ixor r10, r10, r11
				538	r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
				539
				540	// iadd r0, r0, r10
				541	r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
				542
				543	// ixor DST, r0, r10
				544	SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
				545	return DST;
				546	}
				547
				548	SDValue
				549	AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
				550	return SDValue(Op.getNode(), 0);
				551	}
				552
				553	SDValue
				554	AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
				555	DebugLoc DL = Op.getDebugLoc();
				556	EVT OVT = Op.getValueType();
				557	MVT INTTY = MVT::i32;
				558	if (OVT == MVT::v2i8) {
				559	INTTY = MVT::v2i32;
				560	} else if (OVT == MVT::v4i8) {
				561	INTTY = MVT::v4i32;
				562	}
				563	SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
				564	SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
				565	LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
				566	LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
				567	return LHS;
				568	}
				569
				570	SDValue
				571	AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
				572	DebugLoc DL = Op.getDebugLoc();
				573	EVT OVT = Op.getValueType();
				574	MVT INTTY = MVT::i32;
				575	if (OVT == MVT::v2i16) {
				576	INTTY = MVT::v2i32;
				577	} else if (OVT == MVT::v4i16) {
				578	INTTY = MVT::v4i32;
				579	}
				580	SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
				581	SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
				582	LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
				583	LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
				584	return LHS;
				585	}
				586
				587	SDValue
				588	AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
				589	DebugLoc DL = Op.getDebugLoc();
				590	EVT OVT = Op.getValueType();
				591	SDValue LHS = Op.getOperand(0);
				592	SDValue RHS = Op.getOperand(1);
				593	// The LowerSREM32 function generates equivalent to the following IL.
				594	// mov r0, LHS
				595	// mov r1, RHS
				596	// ilt r10, r0, 0
				597	// ilt r11, r1, 0
				598	// iadd r0, r0, r10
				599	// iadd r1, r1, r11
				600	// ixor r0, r0, r10
				601	// ixor r1, r1, r11
				602	// udiv r20, r0, r1
				603	// umul r20, r20, r1
				604	// sub r0, r0, r20
				605	// iadd r0, r0, r10
				606	// ixor DST, r0, r10
				607
				608	// mov r0, LHS
				609	SDValue r0 = LHS;
				610
				611	// mov r1, RHS
				612	SDValue r1 = RHS;
				613
				614	// ilt r10, r0, 0
				615	SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
				616
				617	// ilt r11, r1, 0
				618	SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
				619
				620	// iadd r0, r0, r10
				621	r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
				622
				623	// iadd r1, r1, r11
				624	r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
				625
				626	// ixor r0, r0, r10
				627	r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
				628
				629	// ixor r1, r1, r11
				630	r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
				631
				632	// udiv r20, r0, r1
				633	SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
				634
				635	// umul r20, r20, r1
				636	r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
				637
				638	// sub r0, r0, r20
				639	r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
				640
				641	// iadd r0, r0, r10
				642	r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
				643
				644	// ixor DST, r0, r10
				645	SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
				646	return DST;
				647	}
				648
				649	SDValue
				650	AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
				651	return SDValue(Op.getNode(), 0);
				652	}