Blame - lib/Target/R600/AMDGPUISelLowering.cpp - fp2-dev/platform/external/llvm

blob: 0a33264686d20b2342c0155ee839f58834e3e45e [file] [log] [blame]

Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief This is the parent TargetLowering class for hardware code gen
				12	/// targets.
				13	//
				14	//===----------------------------------------------------------------------===//
				15
				16	#include "AMDGPUISelLowering.h"
				17	#include "AMDILIntrinsicInfo.h"
				18	#include "llvm/CodeGen/MachineFunction.h"
				19	#include "llvm/CodeGen/MachineRegisterInfo.h"
				20	#include "llvm/CodeGen/SelectionDAG.h"
				21	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
				22
				23	using namespace llvm;
				24
				25	AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
				26	TargetLowering(TM, new TargetLoweringObjectFileELF()) {
				27
				28	// Initialize target lowering borrowed from AMDIL
				29	InitAMDILLowering();
				30
				31	// We need to custom lower some of the intrinsics
				32	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				33
				34	// Library functions. These default to Expand, but we have instructions
				35	// for them.
				36	setOperationAction(ISD::FCEIL, MVT::f32, Legal);
				37	setOperationAction(ISD::FEXP2, MVT::f32, Legal);
				38	setOperationAction(ISD::FPOW, MVT::f32, Legal);
				39	setOperationAction(ISD::FLOG2, MVT::f32, Legal);
				40	setOperationAction(ISD::FABS, MVT::f32, Legal);
				41	setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
				42	setOperationAction(ISD::FRINT, MVT::f32, Legal);
				43
				44	// Lower floating point store/load to integer store/load to reduce the number
				45	// of patterns in tablegen.
				46	setOperationAction(ISD::STORE, MVT::f32, Promote);
				47	AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
				48
				49	setOperationAction(ISD::STORE, MVT::v4f32, Promote);
				50	AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
				51
				52	setOperationAction(ISD::LOAD, MVT::f32, Promote);
				53	AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
				54
				55	setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
				56	AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
				57
				58	setOperationAction(ISD::UDIV, MVT::i32, Expand);
				59	setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
				60	setOperationAction(ISD::UREM, MVT::i32, Expand);
				61	}
				62
				63	//===---------------------------------------------------------------------===//
				64	// TargetLowering Callbacks
				65	//===---------------------------------------------------------------------===//
				66
				67	SDValue AMDGPUTargetLowering::LowerFormalArguments(
				68	SDValue Chain,
				69	CallingConv::ID CallConv,
				70	bool isVarArg,
				71	const SmallVectorImpl<ISD::InputArg> &Ins,
				72	DebugLoc DL, SelectionDAG &DAG,
				73	SmallVectorImpl<SDValue> &InVals) const {
				74	for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
				75	InVals.push_back(SDValue());
				76	}
				77	return Chain;
				78	}
				79
				80	SDValue AMDGPUTargetLowering::LowerReturn(
				81	SDValue Chain,
				82	CallingConv::ID CallConv,
				83	bool isVarArg,
				84	const SmallVectorImpl<ISD::OutputArg> &Outs,
				85	const SmallVectorImpl<SDValue> &OutVals,
				86	DebugLoc DL, SelectionDAG &DAG) const {
				87	return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
				88	}
				89
				90	//===---------------------------------------------------------------------===//
				91	// Target specific lowering
				92	//===---------------------------------------------------------------------===//
				93
				94	SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
				95	const {
				96	switch (Op.getOpcode()) {
				97	default:
				98	Op.getNode()->dump();
				99	assert(0 && "Custom lowering code for this"
				100	"instruction is not implemented yet!");
				101	break;
				102	// AMDIL DAG lowering
				103	case ISD::SDIV: return LowerSDIV(Op, DAG);
				104	case ISD::SREM: return LowerSREM(Op, DAG);
				105	case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
				106	case ISD::BRCOND: return LowerBRCOND(Op, DAG);
				107	// AMDGPU DAG lowering
				108	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
				109	case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
				110	}
				111	return Op;
				112	}
				113
				114	SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
				115	SelectionDAG &DAG) const {
				116	unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
				117	DebugLoc DL = Op.getDebugLoc();
				118	EVT VT = Op.getValueType();
				119
				120	switch (IntrinsicID) {
				121	default: return Op;
				122	case AMDGPUIntrinsic::AMDIL_abs:
				123	return LowerIntrinsicIABS(Op, DAG);
				124	case AMDGPUIntrinsic::AMDIL_exp:
				125	return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
				126	case AMDGPUIntrinsic::AMDGPU_lrp:
				127	return LowerIntrinsicLRP(Op, DAG);
				128	case AMDGPUIntrinsic::AMDIL_fraction:
				129	return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	130	case AMDGPUIntrinsic::AMDIL_max:
				131	return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
				132	Op.getOperand(2));
				133	case AMDGPUIntrinsic::AMDGPU_imax:
				134	return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
				135	Op.getOperand(2));
				136	case AMDGPUIntrinsic::AMDGPU_umax:
				137	return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
				138	Op.getOperand(2));
				139	case AMDGPUIntrinsic::AMDIL_min:
				140	return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
				141	Op.getOperand(2));
				142	case AMDGPUIntrinsic::AMDGPU_imin:
				143	return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
				144	Op.getOperand(2));
				145	case AMDGPUIntrinsic::AMDGPU_umin:
				146	return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
				147	Op.getOperand(2));
				148	case AMDGPUIntrinsic::AMDIL_round_nearest:
				149	return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
				150	}
				151	}
				152
				153	///IABS(a) = SMAX(sub(0, a), a)
				154	SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
				155	SelectionDAG &DAG) const {
				156
				157	DebugLoc DL = Op.getDebugLoc();
				158	EVT VT = Op.getValueType();
				159	SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
				160	Op.getOperand(1));
				161
				162	return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
				163	}
				164
				165	/// Linear Interpolation
				166	/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
				167	SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
				168	SelectionDAG &DAG) const {
				169	DebugLoc DL = Op.getDebugLoc();
				170	EVT VT = Op.getValueType();
				171	SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
				172	DAG.getConstantFP(1.0f, MVT::f32),
				173	Op.getOperand(1));
				174	SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
				175	Op.getOperand(3));
Vincent Lejeune	e311196	2013-02-18 14:11:28 +0000	[diff] [blame^]	176	return DAG.getNode(ISD::FADD, DL, VT,
				177	DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
				178	OneSubAC);
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	179	}
				180
				181	/// \brief Generate Min/Max node
				182	SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
				183	SelectionDAG &DAG) const {
				184	DebugLoc DL = Op.getDebugLoc();
				185	EVT VT = Op.getValueType();
				186
				187	SDValue LHS = Op.getOperand(0);
				188	SDValue RHS = Op.getOperand(1);
				189	SDValue True = Op.getOperand(2);
				190	SDValue False = Op.getOperand(3);
				191	SDValue CC = Op.getOperand(4);
				192
				193	if (VT != MVT::f32 \|\|
				194	!((LHS == True && RHS == False) \|\| (LHS == False && RHS == True))) {
				195	return SDValue();
				196	}
				197
				198	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				199	switch (CCOpcode) {
				200	case ISD::SETOEQ:
				201	case ISD::SETONE:
				202	case ISD::SETUNE:
				203	case ISD::SETNE:
				204	case ISD::SETUEQ:
				205	case ISD::SETEQ:
				206	case ISD::SETFALSE:
				207	case ISD::SETFALSE2:
				208	case ISD::SETTRUE:
				209	case ISD::SETTRUE2:
				210	case ISD::SETUO:
				211	case ISD::SETO:
				212	assert(0 && "Operation should already be optimised !");
				213	case ISD::SETULE:
				214	case ISD::SETULT:
				215	case ISD::SETOLE:
				216	case ISD::SETOLT:
				217	case ISD::SETLE:
				218	case ISD::SETLT: {
				219	if (LHS == True)
				220	return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
				221	else
				222	return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
				223	}
				224	case ISD::SETGT:
				225	case ISD::SETGE:
				226	case ISD::SETUGE:
				227	case ISD::SETOGE:
				228	case ISD::SETUGT:
				229	case ISD::SETOGT: {
				230	if (LHS == True)
				231	return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
				232	else
				233	return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
				234	}
				235	case ISD::SETCC_INVALID:
				236	assert(0 && "Invalid setcc condcode !");
				237	}
				238	return Op;
				239	}
				240
				241
				242
				243	SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
				244	SelectionDAG &DAG) const {
				245	DebugLoc DL = Op.getDebugLoc();
				246	EVT VT = Op.getValueType();
				247
				248	SDValue Num = Op.getOperand(0);
				249	SDValue Den = Op.getOperand(1);
				250
				251	SmallVector<SDValue, 8> Results;
				252
				253	// RCP = URECIP(Den) = 2^32 / Den + e
				254	// e is rounding error.
				255	SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
				256
				257	// RCP_LO = umulo(RCP, Den) */
				258	SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
				259
				260	// RCP_HI = mulhu (RCP, Den) */
				261	SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
				262
				263	// NEG_RCP_LO = -RCP_LO
				264	SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
				265	RCP_LO);
				266
				267	// ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
				268	SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
				269	NEG_RCP_LO, RCP_LO,
				270	ISD::SETEQ);
				271	// Calculate the rounding error from the URECIP instruction
				272	// E = mulhu(ABS_RCP_LO, RCP)
				273	SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
				274
				275	// RCP_A_E = RCP + E
				276	SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
				277
				278	// RCP_S_E = RCP - E
				279	SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
				280
				281	// Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
				282	SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
				283	RCP_A_E, RCP_S_E,
				284	ISD::SETEQ);
				285	// Quotient = mulhu(Tmp0, Num)
				286	SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
				287
				288	// Num_S_Remainder = Quotient * Den
				289	SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
				290
				291	// Remainder = Num - Num_S_Remainder
				292	SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
				293
				294	// Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
				295	SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
				296	DAG.getConstant(-1, VT),
				297	DAG.getConstant(0, VT),
				298	ISD::SETGE);
				299	// Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
				300	SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
				301	DAG.getConstant(0, VT),
				302	DAG.getConstant(-1, VT),
				303	DAG.getConstant(0, VT),
				304	ISD::SETGE);
				305	// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
				306	SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
				307	Remainder_GE_Zero);
				308
				309	// Calculate Division result:
				310
				311	// Quotient_A_One = Quotient + 1
				312	SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
				313	DAG.getConstant(1, VT));
				314
				315	// Quotient_S_One = Quotient - 1
				316	SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
				317	DAG.getConstant(1, VT));
				318
				319	// Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
				320	SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
				321	Quotient, Quotient_A_One, ISD::SETEQ);
				322
				323	// Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
				324	Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
				325	Quotient_S_One, Div, ISD::SETEQ);
				326
				327	// Calculate Rem result:
				328
				329	// Remainder_S_Den = Remainder - Den
				330	SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
				331
				332	// Remainder_A_Den = Remainder + Den
				333	SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
				334
				335	// Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
				336	SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
				337	Remainder, Remainder_S_Den, ISD::SETEQ);
				338
				339	// Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
				340	Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
				341	Remainder_A_Den, Rem, ISD::SETEQ);
				342	SDValue Ops[2];
				343	Ops[0] = Div;
				344	Ops[1] = Rem;
				345	return DAG.getMergeValues(Ops, 2, DL);
				346	}
				347
				348	//===----------------------------------------------------------------------===//
				349	// Helper functions
				350	//===----------------------------------------------------------------------===//
				351
				352	bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
				353	if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
				354	return CFP->isExactlyValue(1.0);
				355	}
				356	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
				357	return C->isAllOnesValue();
				358	}
				359	return false;
				360	}
				361
				362	bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
				363	if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
				364	return CFP->getValueAPF().isZero();
				365	}
				366	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
				367	return C->isNullValue();
				368	}
				369	return false;
				370	}
				371
				372	SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
				373	const TargetRegisterClass *RC,
				374	unsigned Reg, EVT VT) const {
				375	MachineFunction &MF = DAG.getMachineFunction();
				376	MachineRegisterInfo &MRI = MF.getRegInfo();
				377	unsigned VirtualRegister;
				378	if (!MRI.isLiveIn(Reg)) {
				379	VirtualRegister = MRI.createVirtualRegister(RC);
				380	MRI.addLiveIn(Reg, VirtualRegister);
				381	} else {
				382	VirtualRegister = MRI.getLiveInVirtReg(Reg);
				383	}
				384	return DAG.getRegister(VirtualRegister, VT);
				385	}
				386
				387	#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
				388
				389	const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
				390	switch (Opcode) {
				391	default: return 0;
				392	// AMDIL DAG nodes
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	393	NODE_NAME_CASE(CALL);
				394	NODE_NAME_CASE(UMUL);
				395	NODE_NAME_CASE(DIV_INF);
				396	NODE_NAME_CASE(RET_FLAG);
				397	NODE_NAME_CASE(BRANCH_COND);
				398
				399	// AMDGPU DAG nodes
				400	NODE_NAME_CASE(DWORDADDR)
				401	NODE_NAME_CASE(FRACT)
				402	NODE_NAME_CASE(FMAX)
				403	NODE_NAME_CASE(SMAX)
				404	NODE_NAME_CASE(UMAX)
				405	NODE_NAME_CASE(FMIN)
				406	NODE_NAME_CASE(SMIN)
				407	NODE_NAME_CASE(UMIN)
				408	NODE_NAME_CASE(URECIP)
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	409	NODE_NAME_CASE(EXPORT)
Tom Stellard	c7e1888	2013-01-23 02:09:03 +0000	[diff] [blame]	410	NODE_NAME_CASE(CONST_ADDRESS)
Tom Stellard	c0b0c67	2013-02-06 17:32:29 +0000	[diff] [blame]	411	NODE_NAME_CASE(REGISTER_LOAD)
				412	NODE_NAME_CASE(REGISTER_STORE)
Tom Stellard	f98f2ce	2012-12-11 21:25:42 +0000	[diff] [blame]	413	}
				414	}