Blame - llvm/lib/Target/R600/R600ISelLowering.cpp - toolchain/llvm-project

blob: e5f6b516d1148e62585b2213dad6df3004e6199c [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief Custom DAG lowering for R600
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "R600ISelLowering.h"
				16	#include "R600Defines.h"
				17	#include "R600InstrInfo.h"
				18	#include "R600MachineFunctionInfo.h"
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	19	#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	20	#include "llvm/CodeGen/MachineInstrBuilder.h"
				21	#include "llvm/CodeGen/MachineRegisterInfo.h"
				22	#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth	9fb823b	2013-01-02 11:36:10 +0000	[diff] [blame]	23	#include "llvm/IR/Argument.h"
				24	#include "llvm/IR/Function.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	25
				26	using namespace llvm;
				27
				28	R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
				29	AMDGPUTargetLowering(TM),
				30	TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
				31	setOperationAction(ISD::MUL, MVT::i64, Expand);
				32	addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
				33	addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
				34	addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
				35	addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
				36	computeRegisterProperties();
				37
				38	setOperationAction(ISD::FADD, MVT::v4f32, Expand);
				39	setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
				40	setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
				41	setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
				42
				43	setOperationAction(ISD::ADD, MVT::v4i32, Expand);
				44	setOperationAction(ISD::AND, MVT::v4i32, Expand);
Tom Stellard	a8b0351	2012-12-21 16:33:24 +0000	[diff] [blame]	45	setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
				46	setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
				47	setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
				48	setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	49	setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
				50	setOperationAction(ISD::UREM, MVT::v4i32, Expand);
				51	setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
				52
				53	setOperationAction(ISD::BR_CC, MVT::i32, Custom);
				54	setOperationAction(ISD::BR_CC, MVT::f32, Custom);
				55
				56	setOperationAction(ISD::FSUB, MVT::f32, Expand);
				57
				58	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
				59	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				60	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
				61	setOperationAction(ISD::FPOW, MVT::f32, Custom);
				62
				63	setOperationAction(ISD::ROTL, MVT::i32, Custom);
				64
				65	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
				66	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
				67
				68	setOperationAction(ISD::SETCC, MVT::i32, Custom);
				69	setOperationAction(ISD::SETCC, MVT::f32, Custom);
				70	setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
				71
				72	setOperationAction(ISD::SELECT, MVT::i32, Custom);
				73	setOperationAction(ISD::SELECT, MVT::f32, Custom);
				74
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	75	// Legalize loads and stores to the private address space.
				76	setOperationAction(ISD::LOAD, MVT::i32, Custom);
				77	setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
				78	setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
				79	setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
				80	setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
				81	setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
				82	setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
				83	setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	84	setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	85	setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	86	setOperationAction(ISD::STORE, MVT::v4i32, Custom);
				87
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	88	setOperationAction(ISD::LOAD, MVT::i32, Custom);
				89	setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	90	setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
				91
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	92	setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	93	setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	94	setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	95	setTargetDAGCombine(ISD::SELECT_CC);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	96
Tom Stellard	b852af5	2013-03-08 15:37:03 +0000	[diff] [blame^]	97	setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	98	setSchedulingPreference(Sched::VLIW);
				99	}
				100
				101	MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
				102	MachineInstr * MI, MachineBasicBlock * BB) const {
				103	MachineFunction * MF = BB->getParent();
				104	MachineRegisterInfo &MRI = MF->getRegInfo();
				105	MachineBasicBlock::iterator I = *MI;
				106
				107	switch (MI->getOpcode()) {
				108	default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	109	case AMDGPU::CLAMP_R600: {
				110	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				111	AMDGPU::MOV,
				112	MI->getOperand(0).getReg(),
				113	MI->getOperand(1).getReg());
				114	TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
				115	break;
				116	}
				117
				118	case AMDGPU::FABS_R600: {
				119	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				120	AMDGPU::MOV,
				121	MI->getOperand(0).getReg(),
				122	MI->getOperand(1).getReg());
				123	TII->addFlag(NewMI, 0, MO_FLAG_ABS);
				124	break;
				125	}
				126
				127	case AMDGPU::FNEG_R600: {
				128	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				129	AMDGPU::MOV,
				130	MI->getOperand(0).getReg(),
				131	MI->getOperand(1).getReg());
				132	TII->addFlag(NewMI, 0, MO_FLAG_NEG);
				133	break;
				134	}
				135
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	136	case AMDGPU::MASK_WRITE: {
				137	unsigned maskedRegister = MI->getOperand(0).getReg();
				138	assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
				139	MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
				140	TII->addFlag(defInstr, 0, MO_FLAG_MASK);
				141	break;
				142	}
				143
				144	case AMDGPU::MOV_IMM_F32:
				145	TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
				146	MI->getOperand(1).getFPImm()->getValueAPF()
				147	.bitcastToAPInt().getZExtValue());
				148	break;
				149	case AMDGPU::MOV_IMM_I32:
				150	TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
				151	MI->getOperand(1).getImm());
				152	break;
Vincent Lejeune	0b72f10	2013-03-05 15:04:55 +0000	[diff] [blame]	153	case AMDGPU::CONST_COPY: {
				154	MachineInstr NewMI = TII->buildDefaultInstruction(BB, MI, AMDGPU::MOV,
				155	MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
				156	TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
				157	MI->getOperand(1).getImm());
				158	break;
				159	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	160
				161	case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
				162	case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
				163	unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
				164
				165	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
				166	.addOperand(MI->getOperand(0))
				167	.addOperand(MI->getOperand(1))
				168	.addImm(EOP); // Set End of program bit
				169	break;
				170	}
				171
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	172	case AMDGPU::TXD: {
				173	unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				174	unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				175
				176	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
				177	.addOperand(MI->getOperand(3))
				178	.addOperand(MI->getOperand(4))
				179	.addOperand(MI->getOperand(5))
				180	.addOperand(MI->getOperand(6));
				181	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
				182	.addOperand(MI->getOperand(2))
				183	.addOperand(MI->getOperand(4))
				184	.addOperand(MI->getOperand(5))
				185	.addOperand(MI->getOperand(6));
				186	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
				187	.addOperand(MI->getOperand(0))
				188	.addOperand(MI->getOperand(1))
				189	.addOperand(MI->getOperand(4))
				190	.addOperand(MI->getOperand(5))
				191	.addOperand(MI->getOperand(6))
				192	.addReg(T0, RegState::Implicit)
				193	.addReg(T1, RegState::Implicit);
				194	break;
				195	}
				196
				197	case AMDGPU::TXD_SHADOW: {
				198	unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				199	unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				200
				201	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
				202	.addOperand(MI->getOperand(3))
				203	.addOperand(MI->getOperand(4))
				204	.addOperand(MI->getOperand(5))
				205	.addOperand(MI->getOperand(6));
				206	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
				207	.addOperand(MI->getOperand(2))
				208	.addOperand(MI->getOperand(4))
				209	.addOperand(MI->getOperand(5))
				210	.addOperand(MI->getOperand(6));
				211	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
				212	.addOperand(MI->getOperand(0))
				213	.addOperand(MI->getOperand(1))
				214	.addOperand(MI->getOperand(4))
				215	.addOperand(MI->getOperand(5))
				216	.addOperand(MI->getOperand(6))
				217	.addReg(T0, RegState::Implicit)
				218	.addReg(T1, RegState::Implicit);
				219	break;
				220	}
				221
				222	case AMDGPU::BRANCH:
				223	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				224	.addOperand(MI->getOperand(0))
				225	.addReg(0);
				226	break;
				227
				228	case AMDGPU::BRANCH_COND_f32: {
				229	MachineInstr *NewMI =
				230	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
				231	AMDGPU::PREDICATE_BIT)
				232	.addOperand(MI->getOperand(1))
				233	.addImm(OPCODE_IS_NOT_ZERO)
				234	.addImm(0); // Flags
				235	TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
				236	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				237	.addOperand(MI->getOperand(0))
				238	.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
				239	break;
				240	}
				241
				242	case AMDGPU::BRANCH_COND_i32: {
				243	MachineInstr *NewMI =
				244	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
				245	AMDGPU::PREDICATE_BIT)
				246	.addOperand(MI->getOperand(1))
				247	.addImm(OPCODE_IS_NOT_ZERO_INT)
				248	.addImm(0); // Flags
				249	TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
				250	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				251	.addOperand(MI->getOperand(0))
				252	.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
				253	break;
				254	}
				255
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	256	case AMDGPU::EG_ExportSwz:
				257	case AMDGPU::R600_ExportSwz: {
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	258	// Instruction is left unmodified if its not the last one of its type
				259	bool isLastInstructionOfItsType = true;
				260	unsigned InstExportType = MI->getOperand(1).getImm();
				261	for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
				262	EndBlock = BB->end(); NextExportInst != EndBlock;
				263	NextExportInst = llvm::next(NextExportInst)) {
				264	if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz \|\|
				265	NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
				266	unsigned CurrentInstExportType = NextExportInst->getOperand(1)
				267	.getImm();
				268	if (CurrentInstExportType == InstExportType) {
				269	isLastInstructionOfItsType = false;
				270	break;
				271	}
				272	}
				273	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	274	bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	275	if (!EOP && !isLastInstructionOfItsType)
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	276	return BB;
				277	unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
				278	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
				279	.addOperand(MI->getOperand(0))
				280	.addOperand(MI->getOperand(1))
				281	.addOperand(MI->getOperand(2))
				282	.addOperand(MI->getOperand(3))
				283	.addOperand(MI->getOperand(4))
				284	.addOperand(MI->getOperand(5))
				285	.addOperand(MI->getOperand(6))
				286	.addImm(CfInst)
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	287	.addImm(EOP);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	288	break;
				289	}
Jakob Stoklund Olesen	fdc3767	2013-02-05 17:53:52 +0000	[diff] [blame]	290	case AMDGPU::RETURN: {
				291	// RETURN instructions must have the live-out registers as implicit uses,
				292	// otherwise they appear dead.
				293	R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
				294	MachineInstrBuilder MIB(*MF, MI);
				295	for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
				296	MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
				297	return BB;
				298	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	299	}
				300
				301	MI->eraseFromParent();
				302	return BB;
				303	}
				304
				305	//===----------------------------------------------------------------------===//
				306	// Custom DAG Lowering Operations
				307	//===----------------------------------------------------------------------===//
				308
				309	using namespace llvm::Intrinsic;
				310	using namespace llvm::AMDGPUIntrinsic;
				311
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	312	SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
				313	switch (Op.getOpcode()) {
				314	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
				315	case ISD::BR_CC: return LowerBR_CC(Op, DAG);
				316	case ISD::ROTL: return LowerROTL(Op, DAG);
				317	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
				318	case ISD::SELECT: return LowerSELECT(Op, DAG);
				319	case ISD::SETCC: return LowerSETCC(Op, DAG);
				320	case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	321	case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	322	case ISD::FPOW: return LowerFPOW(Op, DAG);
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	323	case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	324	case ISD::INTRINSIC_VOID: {
				325	SDValue Chain = Op.getOperand(0);
				326	unsigned IntrinsicID =
				327	cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				328	switch (IntrinsicID) {
				329	case AMDGPUIntrinsic::AMDGPU_store_output: {
				330	MachineFunction &MF = DAG.getMachineFunction();
Jakob Stoklund Olesen	fdc3767	2013-02-05 17:53:52 +0000	[diff] [blame]	331	R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	332	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
				333	unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesen	fdc3767	2013-02-05 17:53:52 +0000	[diff] [blame]	334	MFI->LiveOuts.push_back(Reg);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	335	return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
				336	}
Vincent Lejeune	d80bc15	2013-02-14 16:55:06 +0000	[diff] [blame]	337	case AMDGPUIntrinsic::R600_store_swizzle: {
				338	const SDValue Args[8] = {
				339	Chain,
				340	Op.getOperand(2), // Export Value
				341	Op.getOperand(3), // ArrayBase
				342	Op.getOperand(4), // Type
				343	DAG.getConstant(0, MVT::i32), // SWZ_X
				344	DAG.getConstant(1, MVT::i32), // SWZ_Y
				345	DAG.getConstant(2, MVT::i32), // SWZ_Z
				346	DAG.getConstant(3, MVT::i32) // SWZ_W
				347	};
				348	return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(),
				349	Args, 8);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	350	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	351
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	352	// default for switch(IntrinsicID)
				353	default: break;
				354	}
				355	// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
				356	break;
				357	}
				358	case ISD::INTRINSIC_WO_CHAIN: {
				359	unsigned IntrinsicID =
				360	cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
				361	EVT VT = Op.getValueType();
				362	DebugLoc DL = Op.getDebugLoc();
				363	switch(IntrinsicID) {
				364	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
				365	case AMDGPUIntrinsic::R600_load_input: {
				366	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				367	unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
				368	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
				369	}
Tom Stellard	41afe6a	2013-02-05 17:09:14 +0000	[diff] [blame]	370
				371	case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	372	int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard	41afe6a	2013-02-05 17:09:14 +0000	[diff] [blame]	373	int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
				374	MachineSDNode *interp;
				375	if (ijb < 0) {
				376	interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
				377	MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
				378	return DAG.getTargetExtractSubreg(
				379	TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
				380	DL, MVT::f32, SDValue(interp, 0));
				381	}
				382
				383	if (slot % 4 < 2)
				384	interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
				385	MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
				386	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				387	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
				388	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				389	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
				390	else
				391	interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
				392	MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
				393	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				394	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
				395	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				396	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
				397
				398	return SDValue(interp, slot % 2);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	399	}
				400
				401	case r600_read_ngroups_x:
				402	return LowerImplicitParameter(DAG, VT, DL, 0);
				403	case r600_read_ngroups_y:
				404	return LowerImplicitParameter(DAG, VT, DL, 1);
				405	case r600_read_ngroups_z:
				406	return LowerImplicitParameter(DAG, VT, DL, 2);
				407	case r600_read_global_size_x:
				408	return LowerImplicitParameter(DAG, VT, DL, 3);
				409	case r600_read_global_size_y:
				410	return LowerImplicitParameter(DAG, VT, DL, 4);
				411	case r600_read_global_size_z:
				412	return LowerImplicitParameter(DAG, VT, DL, 5);
				413	case r600_read_local_size_x:
				414	return LowerImplicitParameter(DAG, VT, DL, 6);
				415	case r600_read_local_size_y:
				416	return LowerImplicitParameter(DAG, VT, DL, 7);
				417	case r600_read_local_size_z:
				418	return LowerImplicitParameter(DAG, VT, DL, 8);
				419
				420	case r600_read_tgid_x:
				421	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				422	AMDGPU::T1_X, VT);
				423	case r600_read_tgid_y:
				424	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				425	AMDGPU::T1_Y, VT);
				426	case r600_read_tgid_z:
				427	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				428	AMDGPU::T1_Z, VT);
				429	case r600_read_tidig_x:
				430	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				431	AMDGPU::T0_X, VT);
				432	case r600_read_tidig_y:
				433	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				434	AMDGPU::T0_Y, VT);
				435	case r600_read_tidig_z:
				436	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				437	AMDGPU::T0_Z, VT);
				438	}
				439	// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
				440	break;
				441	}
				442	} // end switch(Op.getOpcode())
				443	return SDValue();
				444	}
				445
				446	void R600TargetLowering::ReplaceNodeResults(SDNode *N,
				447	SmallVectorImpl<SDValue> &Results,
				448	SelectionDAG &DAG) const {
				449	switch (N->getOpcode()) {
				450	default: return;
				451	case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	452	return;
				453	case ISD::LOAD: {
				454	SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
				455	Results.push_back(SDValue(Node, 0));
				456	Results.push_back(SDValue(Node, 1));
				457	// XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
				458	// function
				459	DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
				460	return;
				461	}
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	462	case ISD::STORE:
				463	SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
				464	Results.push_back(SDValue(Node, 0));
				465	return;
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	466	}
				467	}
				468
				469	SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
				470	return DAG.getNode(
				471	ISD::SETCC,
				472	Op.getDebugLoc(),
				473	MVT::i1,
				474	Op, DAG.getConstantFP(0.0f, MVT::f32),
				475	DAG.getCondCode(ISD::SETNE)
				476	);
				477	}
				478
				479	SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
				480	SDValue Chain = Op.getOperand(0);
				481	SDValue CC = Op.getOperand(1);
				482	SDValue LHS = Op.getOperand(2);
				483	SDValue RHS = Op.getOperand(3);
				484	SDValue JumpT = Op.getOperand(4);
				485	SDValue CmpValue;
				486	SDValue Result;
				487
				488	if (LHS.getValueType() == MVT::i32) {
				489	CmpValue = DAG.getNode(
				490	ISD::SELECT_CC,
				491	Op.getDebugLoc(),
				492	MVT::i32,
				493	LHS, RHS,
				494	DAG.getConstant(-1, MVT::i32),
				495	DAG.getConstant(0, MVT::i32),
				496	CC);
				497	} else if (LHS.getValueType() == MVT::f32) {
				498	CmpValue = DAG.getNode(
				499	ISD::SELECT_CC,
				500	Op.getDebugLoc(),
				501	MVT::f32,
				502	LHS, RHS,
				503	DAG.getConstantFP(1.0f, MVT::f32),
				504	DAG.getConstantFP(0.0f, MVT::f32),
				505	CC);
				506	} else {
				507	assert(0 && "Not valid type for br_cc");
				508	}
				509	Result = DAG.getNode(
				510	AMDGPUISD::BRANCH_COND,
				511	CmpValue.getDebugLoc(),
				512	MVT::Other, Chain,
				513	JumpT, CmpValue);
				514	return Result;
				515	}
				516
				517	SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
				518	DebugLoc DL,
				519	unsigned DwordOffset) const {
				520	unsigned ByteOffset = DwordOffset * 4;
				521	PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
				522	AMDGPUAS::PARAM_I_ADDRESS);
				523
				524	// We shouldn't be using an offset wider than 16-bits for implicit parameters.
				525	assert(isInt<16>(ByteOffset));
				526
				527	return DAG.getLoad(VT, DL, DAG.getEntryNode(),
				528	DAG.getConstant(ByteOffset, MVT::i32), // PTR
				529	MachinePointerInfo(ConstantPointerNull::get(PtrType)),
				530	false, false, false, 0);
				531	}
				532
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	533	SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
				534
				535	MachineFunction &MF = DAG.getMachineFunction();
				536	const AMDGPUFrameLowering *TFL =
				537	static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
				538
				539	FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
				540	assert(FIN);
				541
				542	unsigned FrameIndex = FIN->getIndex();
				543	unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
				544	return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
				545	}
				546
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	547	SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
				548	DebugLoc DL = Op.getDebugLoc();
				549	EVT VT = Op.getValueType();
				550
				551	return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
				552	Op.getOperand(0),
				553	Op.getOperand(0),
				554	DAG.getNode(ISD::SUB, DL, VT,
				555	DAG.getConstant(32, MVT::i32),
				556	Op.getOperand(1)));
				557	}
				558
				559	bool R600TargetLowering::isZero(SDValue Op) const {
				560	if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
				561	return Cst->isNullValue();
				562	} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
				563	return CstFP->isZero();
				564	} else {
				565	return false;
				566	}
				567	}
				568
				569	SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
				570	DebugLoc DL = Op.getDebugLoc();
				571	EVT VT = Op.getValueType();
				572
				573	SDValue LHS = Op.getOperand(0);
				574	SDValue RHS = Op.getOperand(1);
				575	SDValue True = Op.getOperand(2);
				576	SDValue False = Op.getOperand(3);
				577	SDValue CC = Op.getOperand(4);
				578	SDValue Temp;
				579
				580	// LHS and RHS are guaranteed to be the same value type
				581	EVT CompareVT = LHS.getValueType();
				582
				583	// Check if we can lower this to a native operation.
				584
				585	// Try to lower to a CND* instruction:
				586	// CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
				587	// can be lowered to CND* instructions can also be lowered to SET*
				588	// instructions. CND* instructions are cheaper, because they dont't
				589	// require additional instructions to convert their result to the correct
				590	// value type, so this check should be first.
				591	if (isZero(LHS) \|\| isZero(RHS)) {
				592	SDValue Cond = (isZero(LHS) ? RHS : LHS);
				593	SDValue Zero = (isZero(LHS) ? LHS : RHS);
				594	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				595	if (CompareVT != VT) {
				596	// Bitcast True / False to the correct types. This will end up being
				597	// a nop, but it allows us to define only a single pattern in the
				598	// .TD files for each CND* instruction rather than having to have
				599	// one pattern for integer True/False and one for fp True/False
				600	True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
				601	False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
				602	}
				603	if (isZero(LHS)) {
				604	CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
				605	}
				606
				607	switch (CCOpcode) {
				608	case ISD::SETONE:
				609	case ISD::SETUNE:
				610	case ISD::SETNE:
				611	case ISD::SETULE:
				612	case ISD::SETULT:
				613	case ISD::SETOLE:
				614	case ISD::SETOLT:
				615	case ISD::SETLE:
				616	case ISD::SETLT:
				617	CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
				618	Temp = True;
				619	True = False;
				620	False = Temp;
				621	break;
				622	default:
				623	break;
				624	}
				625	SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				626	Cond, Zero,
				627	True, False,
				628	DAG.getCondCode(CCOpcode));
				629	return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
				630	}
				631
				632	// Try to lower to a SET* instruction:
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	633	//
				634	// CompareVT == MVT::f32 and VT == MVT::i32 is supported by the hardware,
				635	// but for the other case where CompareVT != VT, all operands of
				636	// SELECT_CC need to have the same value type, so we need to change True and
				637	// False to be the same type as LHS and RHS, and then convert the result of
				638	// the select_cc back to the correct type.
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	639
				640	// Move hardware True/False values to the correct operand.
				641	if (isHWTrueValue(False) && isHWFalseValue(True)) {
				642	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				643	std::swap(False, True);
				644	CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
				645	}
				646
				647	if (isHWTrueValue(True) && isHWFalseValue(False)) {
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	648	if (CompareVT != VT && VT == MVT::f32 && CompareVT == MVT::i32) {
				649	SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				650	LHS, RHS,
				651	DAG.getConstant(-1, MVT::i32),
				652	DAG.getConstant(0, MVT::i32),
				653	CC);
				654	// Convert integer values of true (-1) and false (0) to fp values of
				655	// true (1.0f) and false (0.0f).
				656	SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
				657	DAG.getConstant(1, MVT::i32));
				658	return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	659	} else {
				660	// This SELECT_CC is already legal.
				661	return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
				662	}
				663	}
				664
				665	// Possible Min/Max pattern
				666	SDValue MinMax = LowerMinMax(Op, DAG);
				667	if (MinMax.getNode()) {
				668	return MinMax;
				669	}
				670
				671	// If we make it this for it means we have no native instructions to handle
				672	// this SELECT_CC, so we must lower it.
				673	SDValue HWTrue, HWFalse;
				674
				675	if (CompareVT == MVT::f32) {
				676	HWTrue = DAG.getConstantFP(1.0f, CompareVT);
				677	HWFalse = DAG.getConstantFP(0.0f, CompareVT);
				678	} else if (CompareVT == MVT::i32) {
				679	HWTrue = DAG.getConstant(-1, CompareVT);
				680	HWFalse = DAG.getConstant(0, CompareVT);
				681	}
				682	else {
				683	assert(!"Unhandled value type in LowerSELECT_CC");
				684	}
				685
				686	// Lower this unsupported SELECT_CC into a combination of two supported
				687	// SELECT_CC operations.
				688	SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
				689
				690	return DAG.getNode(ISD::SELECT_CC, DL, VT,
				691	Cond, HWFalse,
				692	True, False,
				693	DAG.getCondCode(ISD::SETNE));
				694	}
				695
				696	SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
				697	return DAG.getNode(ISD::SELECT_CC,
				698	Op.getDebugLoc(),
				699	Op.getValueType(),
				700	Op.getOperand(0),
				701	DAG.getConstant(0, MVT::i32),
				702	Op.getOperand(1),
				703	Op.getOperand(2),
				704	DAG.getCondCode(ISD::SETNE));
				705	}
				706
				707	SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
				708	SDValue Cond;
				709	SDValue LHS = Op.getOperand(0);
				710	SDValue RHS = Op.getOperand(1);
				711	SDValue CC = Op.getOperand(2);
				712	DebugLoc DL = Op.getDebugLoc();
				713	assert(Op.getValueType() == MVT::i32);
				714	if (LHS.getValueType() == MVT::i32) {
				715	Cond = DAG.getNode(
				716	ISD::SELECT_CC,
				717	Op.getDebugLoc(),
				718	MVT::i32,
				719	LHS, RHS,
				720	DAG.getConstant(-1, MVT::i32),
				721	DAG.getConstant(0, MVT::i32),
				722	CC);
				723	} else if (LHS.getValueType() == MVT::f32) {
				724	Cond = DAG.getNode(
				725	ISD::SELECT_CC,
				726	Op.getDebugLoc(),
				727	MVT::f32,
				728	LHS, RHS,
				729	DAG.getConstantFP(1.0f, MVT::f32),
				730	DAG.getConstantFP(0.0f, MVT::f32),
				731	CC);
				732	Cond = DAG.getNode(
				733	ISD::FP_TO_SINT,
				734	DL,
				735	MVT::i32,
				736	Cond);
				737	} else {
				738	assert(0 && "Not valid type for set_cc");
				739	}
				740	Cond = DAG.getNode(
				741	ISD::AND,
				742	DL,
				743	MVT::i32,
				744	DAG.getConstant(1, MVT::i32),
				745	Cond);
				746	return Cond;
				747	}
				748
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	749	/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
				750	/// convert these pointers to a register index. Each register holds
				751	/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
				752	/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
				753	/// for indirect addressing.
				754	SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
				755	unsigned StackWidth,
				756	SelectionDAG &DAG) const {
				757	unsigned SRLPad;
				758	switch(StackWidth) {
				759	case 1:
				760	SRLPad = 2;
				761	break;
				762	case 2:
				763	SRLPad = 3;
				764	break;
				765	case 4:
				766	SRLPad = 4;
				767	break;
				768	default: llvm_unreachable("Invalid stack width");
				769	}
				770
				771	return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
				772	DAG.getConstant(SRLPad, MVT::i32));
				773	}
				774
				775	void R600TargetLowering::getStackAddress(unsigned StackWidth,
				776	unsigned ElemIdx,
				777	unsigned &Channel,
				778	unsigned &PtrIncr) const {
				779	switch (StackWidth) {
				780	default:
				781	case 1:
				782	Channel = 0;
				783	if (ElemIdx > 0) {
				784	PtrIncr = 1;
				785	} else {
				786	PtrIncr = 0;
				787	}
				788	break;
				789	case 2:
				790	Channel = ElemIdx % 2;
				791	if (ElemIdx == 2) {
				792	PtrIncr = 1;
				793	} else {
				794	PtrIncr = 0;
				795	}
				796	break;
				797	case 4:
				798	Channel = ElemIdx;
				799	PtrIncr = 0;
				800	break;
				801	}
				802	}
				803
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	804	SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
				805	DebugLoc DL = Op.getDebugLoc();
				806	StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
				807	SDValue Chain = Op.getOperand(0);
				808	SDValue Value = Op.getOperand(1);
				809	SDValue Ptr = Op.getOperand(2);
				810
				811	if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
				812	Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
				813	// Convert pointer from byte address to dword address.
				814	Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
				815	DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
				816	Ptr, DAG.getConstant(2, MVT::i32)));
				817
				818	if (StoreNode->isTruncatingStore() \|\| StoreNode->isIndexed()) {
				819	assert(!"Truncated and indexed stores not supported yet");
				820	} else {
				821	Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
				822	}
				823	return Chain;
				824	}
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	825
				826	EVT ValueVT = Value.getValueType();
				827
				828	if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
				829	return SDValue();
				830	}
				831
				832	// Lowering for indirect addressing
				833
				834	const MachineFunction &MF = DAG.getMachineFunction();
				835	const AMDGPUFrameLowering TFL = static_cast<const AMDGPUFrameLowering>(
				836	getTargetMachine().getFrameLowering());
				837	unsigned StackWidth = TFL->getStackWidth(MF);
				838
				839	Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
				840
				841	if (ValueVT.isVector()) {
				842	unsigned NumElemVT = ValueVT.getVectorNumElements();
				843	EVT ElemVT = ValueVT.getVectorElementType();
				844	SDValue Stores[4];
				845
				846	assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
				847	"vector width in load");
				848
				849	for (unsigned i = 0; i < NumElemVT; ++i) {
				850	unsigned Channel, PtrIncr;
				851	getStackAddress(StackWidth, i, Channel, PtrIncr);
				852	Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
				853	DAG.getConstant(PtrIncr, MVT::i32));
				854	SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
				855	Value, DAG.getConstant(i, MVT::i32));
				856
				857	Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
				858	Chain, Elem, Ptr,
				859	DAG.getTargetConstant(Channel, MVT::i32));
				860	}
				861	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
				862	} else {
				863	if (ValueVT == MVT::i8) {
				864	Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
				865	}
				866	Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
				867	DAG.getTargetConstant(0, MVT::i32)); // Channel
				868	}
				869
				870	return Chain;
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	871	}
				872
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	873	// return (512 + (kc_bank << 12)
				874	static int
				875	ConstantAddressBlock(unsigned AddressSpace) {
				876	switch (AddressSpace) {
				877	case AMDGPUAS::CONSTANT_BUFFER_0:
				878	return 512;
				879	case AMDGPUAS::CONSTANT_BUFFER_1:
				880	return 512 + 4096;
				881	case AMDGPUAS::CONSTANT_BUFFER_2:
				882	return 512 + 4096 * 2;
				883	case AMDGPUAS::CONSTANT_BUFFER_3:
				884	return 512 + 4096 * 3;
				885	case AMDGPUAS::CONSTANT_BUFFER_4:
				886	return 512 + 4096 * 4;
				887	case AMDGPUAS::CONSTANT_BUFFER_5:
				888	return 512 + 4096 * 5;
				889	case AMDGPUAS::CONSTANT_BUFFER_6:
				890	return 512 + 4096 * 6;
				891	case AMDGPUAS::CONSTANT_BUFFER_7:
				892	return 512 + 4096 * 7;
				893	case AMDGPUAS::CONSTANT_BUFFER_8:
				894	return 512 + 4096 * 8;
				895	case AMDGPUAS::CONSTANT_BUFFER_9:
				896	return 512 + 4096 * 9;
				897	case AMDGPUAS::CONSTANT_BUFFER_10:
				898	return 512 + 4096 * 10;
				899	case AMDGPUAS::CONSTANT_BUFFER_11:
				900	return 512 + 4096 * 11;
				901	case AMDGPUAS::CONSTANT_BUFFER_12:
				902	return 512 + 4096 * 12;
				903	case AMDGPUAS::CONSTANT_BUFFER_13:
				904	return 512 + 4096 * 13;
				905	case AMDGPUAS::CONSTANT_BUFFER_14:
				906	return 512 + 4096 * 14;
				907	case AMDGPUAS::CONSTANT_BUFFER_15:
				908	return 512 + 4096 * 15;
				909	default:
				910	return -1;
				911	}
				912	}
				913
				914	SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
				915	{
				916	EVT VT = Op.getValueType();
				917	DebugLoc DL = Op.getDebugLoc();
				918	LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
				919	SDValue Chain = Op.getOperand(0);
				920	SDValue Ptr = Op.getOperand(1);
				921	SDValue LoweredLoad;
				922
				923	int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
				924	if (ConstantBlock > -1) {
				925	SDValue Result;
				926	if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) \|\|
Vincent Lejeune	743dca0	2013-03-05 15:04:29 +0000	[diff] [blame]	927	dyn_cast<Constant>(LoadNode->getSrcValue()) \|\|
				928	dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	929	SDValue Slots[4];
				930	for (unsigned i = 0; i < 4; i++) {
				931	// We want Const position encoded with the following formula :
				932	// (((512 + (kc_bank << 12) + const_index) << 2) + chan)
				933	// const_index is Ptr computed by llvm using an alignment of 16.
				934	// Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
				935	// then div by 4 at the ISel step
				936	SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
				937	DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
				938	Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
				939	}
				940	Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
				941	} else {
				942	// non constant ptr cant be folded, keeps it as a v4f32 load
				943	Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune	743dca0	2013-03-05 15:04:29 +0000	[diff] [blame]	944	DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig	189357c	2013-03-07 09:03:59 +0000	[diff] [blame]	945	DAG.getConstant(LoadNode->getAddressSpace() -
				946	AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	947	);
				948	}
				949
				950	if (!VT.isVector()) {
				951	Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
				952	DAG.getConstant(0, MVT::i32));
				953	}
				954
				955	SDValue MergedValues[2] = {
				956	Result,
				957	Chain
				958	};
				959	return DAG.getMergeValues(MergedValues, 2, DL);
				960	}
				961
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	962	if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
				963	return SDValue();
				964	}
				965
				966	// Lowering for indirect addressing
				967	const MachineFunction &MF = DAG.getMachineFunction();
				968	const AMDGPUFrameLowering TFL = static_cast<const AMDGPUFrameLowering>(
				969	getTargetMachine().getFrameLowering());
				970	unsigned StackWidth = TFL->getStackWidth(MF);
				971
				972	Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
				973
				974	if (VT.isVector()) {
				975	unsigned NumElemVT = VT.getVectorNumElements();
				976	EVT ElemVT = VT.getVectorElementType();
				977	SDValue Loads[4];
				978
				979	assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
				980	"vector width in load");
				981
				982	for (unsigned i = 0; i < NumElemVT; ++i) {
				983	unsigned Channel, PtrIncr;
				984	getStackAddress(StackWidth, i, Channel, PtrIncr);
				985	Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
				986	DAG.getConstant(PtrIncr, MVT::i32));
				987	Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
				988	Chain, Ptr,
				989	DAG.getTargetConstant(Channel, MVT::i32),
				990	Op.getOperand(2));
				991	}
				992	for (unsigned i = NumElemVT; i < 4; ++i) {
				993	Loads[i] = DAG.getUNDEF(ElemVT);
				994	}
				995	EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
				996	LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
				997	} else {
				998	LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
				999	Chain, Ptr,
				1000	DAG.getTargetConstant(0, MVT::i32), // Channel
				1001	Op.getOperand(2));
				1002	}
				1003
				1004	SDValue Ops[2];
				1005	Ops[0] = LoweredLoad;
				1006	Ops[1] = Chain;
				1007
				1008	return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	1009	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1010
				1011	SDValue R600TargetLowering::LowerFPOW(SDValue Op,
				1012	SelectionDAG &DAG) const {
				1013	DebugLoc DL = Op.getDebugLoc();
				1014	EVT VT = Op.getValueType();
				1015	SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
				1016	SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
				1017	return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
				1018	}
				1019
				1020	/// XXX Only kernel functions are supported, so we can assume for now that
				1021	/// every function is a kernel function, but in the future we should use
				1022	/// separate calling conventions for kernel and non-kernel functions.
				1023	SDValue R600TargetLowering::LowerFormalArguments(
				1024	SDValue Chain,
				1025	CallingConv::ID CallConv,
				1026	bool isVarArg,
				1027	const SmallVectorImpl<ISD::InputArg> &Ins,
				1028	DebugLoc DL, SelectionDAG &DAG,
				1029	SmallVectorImpl<SDValue> &InVals) const {
				1030	unsigned ParamOffsetBytes = 36;
				1031	Function::const_arg_iterator FuncArg =
				1032	DAG.getMachineFunction().getFunction()->arg_begin();
				1033	for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
				1034	EVT VT = Ins[i].VT;
				1035	Type *ArgType = FuncArg->getType();
				1036	unsigned ArgSizeInBits = ArgType->isPointerTy() ?
				1037	32 : ArgType->getPrimitiveSizeInBits();
				1038	unsigned ArgBytes = ArgSizeInBits >> 3;
				1039	EVT ArgVT;
				1040	if (ArgSizeInBits < VT.getSizeInBits()) {
				1041	assert(!ArgType->isFloatTy() &&
				1042	"Extending floating point arguments not supported yet");
				1043	ArgVT = MVT::getIntegerVT(ArgSizeInBits);
				1044	} else {
				1045	ArgVT = VT;
				1046	}
				1047	PointerType PtrTy = PointerType::get(VT.getTypeForEVT(DAG.getContext()),
				1048	AMDGPUAS::PARAM_I_ADDRESS);
				1049	SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
				1050	DAG.getConstant(ParamOffsetBytes, MVT::i32),
Tom Stellard	8d469ed	2013-02-19 15:22:44 +0000	[diff] [blame]	1051	MachinePointerInfo(UndefValue::get(PtrTy)),
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1052	ArgVT, false, false, ArgBytes);
				1053	InVals.push_back(Arg);
				1054	ParamOffsetBytes += ArgBytes;
				1055	}
				1056	return Chain;
				1057	}
				1058
				1059	EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
				1060	if (!VT.isVector()) return MVT::i32;
				1061	return VT.changeVectorElementTypeToInteger();
				1062	}
				1063
				1064	//===----------------------------------------------------------------------===//
				1065	// Custom DAG Optimizations
				1066	//===----------------------------------------------------------------------===//
				1067
				1068	SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
				1069	DAGCombinerInfo &DCI) const {
				1070	SelectionDAG &DAG = DCI.DAG;
				1071
				1072	switch (N->getOpcode()) {
				1073	// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
				1074	case ISD::FP_ROUND: {
				1075	SDValue Arg = N->getOperand(0);
				1076	if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
				1077	return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
				1078	Arg.getOperand(0));
				1079	}
				1080	break;
				1081	}
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	1082
				1083	// (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
				1084	// (i32 select_cc f32, f32, -1, 0 cc)
				1085	//
				1086	// Mesa's GLSL frontend generates the above pattern a lot and we can lower
				1087	// this to one of the SET*_DX10 instructions.
				1088	case ISD::FP_TO_SINT: {
				1089	SDValue FNeg = N->getOperand(0);
				1090	if (FNeg.getOpcode() != ISD::FNEG) {
				1091	return SDValue();
				1092	}
				1093	SDValue SelectCC = FNeg.getOperand(0);
				1094	if (SelectCC.getOpcode() != ISD::SELECT_CC \|\|
				1095	SelectCC.getOperand(0).getValueType() != MVT::f32 \|\| // LHS
				1096	SelectCC.getOperand(2).getValueType() != MVT::f32 \|\| // True
				1097	!isHWTrueValue(SelectCC.getOperand(2)) \|\|
				1098	!isHWFalseValue(SelectCC.getOperand(3))) {
				1099	return SDValue();
				1100	}
				1101
				1102	return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),
				1103	SelectCC.getOperand(0), // LHS
				1104	SelectCC.getOperand(1), // RHS
				1105	DAG.getConstant(-1, MVT::i32), // True
				1106	DAG.getConstant(0, MVT::i32), // Flase
				1107	SelectCC.getOperand(4)); // CC
				1108
				1109	break;
				1110	}
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	1111	// Extract_vec (Build_vector) generated by custom lowering
				1112	// also needs to be customly combined
				1113	case ISD::EXTRACT_VECTOR_ELT: {
				1114	SDValue Arg = N->getOperand(0);
				1115	if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
				1116	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
				1117	unsigned Element = Const->getZExtValue();
				1118	return Arg->getOperand(Element);
				1119	}
				1120	}
Tom Stellard	dd04c83	2013-01-31 22:11:53 +0000	[diff] [blame]	1121	if (Arg.getOpcode() == ISD::BITCAST &&
				1122	Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
				1123	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
				1124	unsigned Element = Const->getZExtValue();
				1125	return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
				1126	Arg->getOperand(0).getOperand(Element));
				1127	}
				1128	}
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	1129	}
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	1130
				1131	case ISD::SELECT_CC: {
				1132	// fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
				1133	// selectcc x, y, a, b, inv(cc)
				1134	SDValue LHS = N->getOperand(0);
				1135	if (LHS.getOpcode() != ISD::SELECT_CC) {
				1136	return SDValue();
				1137	}
				1138
				1139	SDValue RHS = N->getOperand(1);
				1140	SDValue True = N->getOperand(2);
				1141	SDValue False = N->getOperand(3);
				1142
				1143	if (LHS.getOperand(2).getNode() != True.getNode() \|\|
				1144	LHS.getOperand(3).getNode() != False.getNode() \|\|
				1145	RHS.getNode() != False.getNode() \|\|
				1146	cast<CondCodeSDNode>(N->getOperand(4))->get() != ISD::SETEQ) {
				1147	return SDValue();
				1148	}
				1149
				1150	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(LHS->getOperand(4))->get();
				1151	CCOpcode = ISD::getSetCCInverse(
				1152	CCOpcode, LHS.getOperand(0).getValueType().isInteger());
				1153	return DAG.getSelectCC(N->getDebugLoc(),
				1154	LHS.getOperand(0),
				1155	LHS.getOperand(1),
				1156	LHS.getOperand(2),
				1157	LHS.getOperand(3),
				1158	CCOpcode);
Vincent Lejeune	d80bc15	2013-02-14 16:55:06 +0000	[diff] [blame]	1159	}
				1160	case AMDGPUISD::EXPORT: {
				1161	SDValue Arg = N->getOperand(1);
				1162	if (Arg.getOpcode() != ISD::BUILD_VECTOR)
				1163	break;
				1164	SDValue NewBldVec[4] = {
				1165	DAG.getUNDEF(MVT::f32),
				1166	DAG.getUNDEF(MVT::f32),
				1167	DAG.getUNDEF(MVT::f32),
				1168	DAG.getUNDEF(MVT::f32)
				1169	};
				1170	SDValue NewArgs[8] = {
				1171	N->getOperand(0), // Chain
				1172	SDValue(),
				1173	N->getOperand(2), // ArrayBase
				1174	N->getOperand(3), // Type
				1175	N->getOperand(4), // SWZ_X
				1176	N->getOperand(5), // SWZ_Y
				1177	N->getOperand(6), // SWZ_Z
				1178	N->getOperand(7) // SWZ_W
				1179	};
				1180	for (unsigned i = 0; i < Arg.getNumOperands(); i++) {
				1181	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) {
				1182	if (C->isZero()) {
				1183	NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0
				1184	} else if (C->isExactlyValue(1.0)) {
				1185	NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0
				1186	} else {
				1187	NewBldVec[i] = Arg.getOperand(i);
				1188	}
				1189	} else {
				1190	NewBldVec[i] = Arg.getOperand(i);
				1191	}
				1192	}
				1193	DebugLoc DL = N->getDebugLoc();
				1194	NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4);
				1195	return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	1196	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1197	}
				1198	return SDValue();
				1199	}