Blame - llvm/lib/Target/R600/R600ISelLowering.cpp - toolchain/llvm-project

blob: 85187f8fc516597a55f7b76385ca6fb1e28284c4 [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief Custom DAG lowering for R600
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "R600ISelLowering.h"
				16	#include "R600Defines.h"
				17	#include "R600InstrInfo.h"
				18	#include "R600MachineFunctionInfo.h"
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame^]	19	#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	20	#include "llvm/CodeGen/MachineInstrBuilder.h"
				21	#include "llvm/CodeGen/MachineRegisterInfo.h"
				22	#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth	9fb823b	2013-01-02 11:36:10 +0000	[diff] [blame]	23	#include "llvm/IR/Argument.h"
				24	#include "llvm/IR/Function.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	25
				26	using namespace llvm;
				27
				28	R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
				29	AMDGPUTargetLowering(TM),
				30	TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
				31	setOperationAction(ISD::MUL, MVT::i64, Expand);
				32	addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
				33	addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
				34	addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
				35	addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
				36	computeRegisterProperties();
				37
				38	setOperationAction(ISD::FADD, MVT::v4f32, Expand);
				39	setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
				40	setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
				41	setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
				42
				43	setOperationAction(ISD::ADD, MVT::v4i32, Expand);
				44	setOperationAction(ISD::AND, MVT::v4i32, Expand);
Tom Stellard	a8b0351	2012-12-21 16:33:24 +0000	[diff] [blame]	45	setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
				46	setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
				47	setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
				48	setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	49	setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
				50	setOperationAction(ISD::UREM, MVT::v4i32, Expand);
				51	setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
				52
				53	setOperationAction(ISD::BR_CC, MVT::i32, Custom);
				54	setOperationAction(ISD::BR_CC, MVT::f32, Custom);
				55
				56	setOperationAction(ISD::FSUB, MVT::f32, Expand);
				57
				58	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
				59	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				60	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
				61	setOperationAction(ISD::FPOW, MVT::f32, Custom);
				62
				63	setOperationAction(ISD::ROTL, MVT::i32, Custom);
				64
				65	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
				66	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
				67
				68	setOperationAction(ISD::SETCC, MVT::i32, Custom);
				69	setOperationAction(ISD::SETCC, MVT::f32, Custom);
				70	setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
				71
				72	setOperationAction(ISD::SELECT, MVT::i32, Custom);
				73	setOperationAction(ISD::SELECT, MVT::f32, Custom);
				74
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame^]	75	// Legalize loads and stores to the private address space.
				76	setOperationAction(ISD::LOAD, MVT::i32, Custom);
				77	setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
				78	setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
				79	setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
				80	setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
				81	setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
				82	setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
				83	setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	84	setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame^]	85	setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	86	setOperationAction(ISD::STORE, MVT::v4i32, Custom);
				87
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	88	setOperationAction(ISD::LOAD, MVT::i32, Custom);
				89	setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame^]	90	setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
				91
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	92	setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	93	setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	94
				95	setSchedulingPreference(Sched::VLIW);
				96	}
				97
				98	MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
				99	MachineInstr * MI, MachineBasicBlock * BB) const {
				100	MachineFunction * MF = BB->getParent();
				101	MachineRegisterInfo &MRI = MF->getRegInfo();
				102	MachineBasicBlock::iterator I = *MI;
				103
				104	switch (MI->getOpcode()) {
				105	default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
				106	case AMDGPU::SHADER_TYPE: break;
				107	case AMDGPU::CLAMP_R600: {
				108	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				109	AMDGPU::MOV,
				110	MI->getOperand(0).getReg(),
				111	MI->getOperand(1).getReg());
				112	TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
				113	break;
				114	}
				115
				116	case AMDGPU::FABS_R600: {
				117	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				118	AMDGPU::MOV,
				119	MI->getOperand(0).getReg(),
				120	MI->getOperand(1).getReg());
				121	TII->addFlag(NewMI, 0, MO_FLAG_ABS);
				122	break;
				123	}
				124
				125	case AMDGPU::FNEG_R600: {
				126	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				127	AMDGPU::MOV,
				128	MI->getOperand(0).getReg(),
				129	MI->getOperand(1).getReg());
				130	TII->addFlag(NewMI, 0, MO_FLAG_NEG);
				131	break;
				132	}
				133
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	134	case AMDGPU::MASK_WRITE: {
				135	unsigned maskedRegister = MI->getOperand(0).getReg();
				136	assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
				137	MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
				138	TII->addFlag(defInstr, 0, MO_FLAG_MASK);
				139	break;
				140	}
				141
				142	case AMDGPU::MOV_IMM_F32:
				143	TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
				144	MI->getOperand(1).getFPImm()->getValueAPF()
				145	.bitcastToAPInt().getZExtValue());
				146	break;
				147	case AMDGPU::MOV_IMM_I32:
				148	TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
				149	MI->getOperand(1).getImm());
				150	break;
				151
				152
				153	case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
				154	case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
				155	unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
				156
				157	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
				158	.addOperand(MI->getOperand(0))
				159	.addOperand(MI->getOperand(1))
				160	.addImm(EOP); // Set End of program bit
				161	break;
				162	}
				163
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	164	case AMDGPU::TXD: {
				165	unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				166	unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				167
				168	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
				169	.addOperand(MI->getOperand(3))
				170	.addOperand(MI->getOperand(4))
				171	.addOperand(MI->getOperand(5))
				172	.addOperand(MI->getOperand(6));
				173	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
				174	.addOperand(MI->getOperand(2))
				175	.addOperand(MI->getOperand(4))
				176	.addOperand(MI->getOperand(5))
				177	.addOperand(MI->getOperand(6));
				178	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
				179	.addOperand(MI->getOperand(0))
				180	.addOperand(MI->getOperand(1))
				181	.addOperand(MI->getOperand(4))
				182	.addOperand(MI->getOperand(5))
				183	.addOperand(MI->getOperand(6))
				184	.addReg(T0, RegState::Implicit)
				185	.addReg(T1, RegState::Implicit);
				186	break;
				187	}
				188
				189	case AMDGPU::TXD_SHADOW: {
				190	unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				191	unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				192
				193	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
				194	.addOperand(MI->getOperand(3))
				195	.addOperand(MI->getOperand(4))
				196	.addOperand(MI->getOperand(5))
				197	.addOperand(MI->getOperand(6));
				198	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
				199	.addOperand(MI->getOperand(2))
				200	.addOperand(MI->getOperand(4))
				201	.addOperand(MI->getOperand(5))
				202	.addOperand(MI->getOperand(6));
				203	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
				204	.addOperand(MI->getOperand(0))
				205	.addOperand(MI->getOperand(1))
				206	.addOperand(MI->getOperand(4))
				207	.addOperand(MI->getOperand(5))
				208	.addOperand(MI->getOperand(6))
				209	.addReg(T0, RegState::Implicit)
				210	.addReg(T1, RegState::Implicit);
				211	break;
				212	}
				213
				214	case AMDGPU::BRANCH:
				215	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				216	.addOperand(MI->getOperand(0))
				217	.addReg(0);
				218	break;
				219
				220	case AMDGPU::BRANCH_COND_f32: {
				221	MachineInstr *NewMI =
				222	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
				223	AMDGPU::PREDICATE_BIT)
				224	.addOperand(MI->getOperand(1))
				225	.addImm(OPCODE_IS_NOT_ZERO)
				226	.addImm(0); // Flags
				227	TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
				228	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				229	.addOperand(MI->getOperand(0))
				230	.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
				231	break;
				232	}
				233
				234	case AMDGPU::BRANCH_COND_i32: {
				235	MachineInstr *NewMI =
				236	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
				237	AMDGPU::PREDICATE_BIT)
				238	.addOperand(MI->getOperand(1))
				239	.addImm(OPCODE_IS_NOT_ZERO_INT)
				240	.addImm(0); // Flags
				241	TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
				242	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				243	.addOperand(MI->getOperand(0))
				244	.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
				245	break;
				246	}
				247
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	248	case AMDGPU::EG_ExportSwz:
				249	case AMDGPU::R600_ExportSwz: {
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	250	// Instruction is left unmodified if its not the last one of its type
				251	bool isLastInstructionOfItsType = true;
				252	unsigned InstExportType = MI->getOperand(1).getImm();
				253	for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
				254	EndBlock = BB->end(); NextExportInst != EndBlock;
				255	NextExportInst = llvm::next(NextExportInst)) {
				256	if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz \|\|
				257	NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
				258	unsigned CurrentInstExportType = NextExportInst->getOperand(1)
				259	.getImm();
				260	if (CurrentInstExportType == InstExportType) {
				261	isLastInstructionOfItsType = false;
				262	break;
				263	}
				264	}
				265	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	266	bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	267	if (!EOP && !isLastInstructionOfItsType)
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	268	return BB;
				269	unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
				270	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
				271	.addOperand(MI->getOperand(0))
				272	.addOperand(MI->getOperand(1))
				273	.addOperand(MI->getOperand(2))
				274	.addOperand(MI->getOperand(3))
				275	.addOperand(MI->getOperand(4))
				276	.addOperand(MI->getOperand(5))
				277	.addOperand(MI->getOperand(6))
				278	.addImm(CfInst)
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	279	.addImm(EOP);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	280	break;
				281	}
Jakob Stoklund Olesen	fdc3767	2013-02-05 17:53:52 +0000	[diff] [blame]	282	case AMDGPU::RETURN: {
				283	// RETURN instructions must have the live-out registers as implicit uses,
				284	// otherwise they appear dead.
				285	R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
				286	MachineInstrBuilder MIB(*MF, MI);
				287	for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
				288	MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
				289	return BB;
				290	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	291	}
				292
				293	MI->eraseFromParent();
				294	return BB;
				295	}
				296
				297	//===----------------------------------------------------------------------===//
				298	// Custom DAG Lowering Operations
				299	//===----------------------------------------------------------------------===//
				300
				301	using namespace llvm::Intrinsic;
				302	using namespace llvm::AMDGPUIntrinsic;
				303
				304	static SDValue
				305	InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
				306	unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
				307	SDValue Scalar, SDValue Chain) {
				308	if (!ExportMap[Slot]) {
				309	SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
				310	DL, MVT::v4f32,
				311	DAG.getUNDEF(MVT::v4f32),
				312	Scalar,
				313	DAG.getConstant(Channel, MVT::i32));
				314
				315	unsigned Mask = 1 << Channel;
				316
				317	const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
				318	DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
				319	DAG.getConstant(Mask, MVT::i32)};
				320
				321	SDValue Res = DAG.getNode(
				322	AMDGPUISD::EXPORT,
				323	DL,
				324	MVT::Other,
				325	Ops, 6);
				326	ExportMap[Slot] = Res.getNode();
				327	return Res;
				328	}
				329
				330	SDNode ExportInstruction = (SDNode ) ExportMap[Slot] ;
				331	SDValue PreviousVector = ExportInstruction->getOperand(1);
				332	SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
				333	DL, MVT::v4f32,
				334	PreviousVector,
				335	Scalar,
				336	DAG.getConstant(Channel, MVT::i32));
				337
				338	unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
				339	->getZExtValue();
				340	Mask \|= (1 << Channel);
				341
				342	const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
				343	DAG.getConstant(Inst, MVT::i32),
				344	DAG.getConstant(Type, MVT::i32),
				345	DAG.getConstant(Slot, MVT::i32),
				346	DAG.getConstant(Mask, MVT::i32)};
				347
				348	DAG.UpdateNodeOperands(ExportInstruction,
				349	Ops, 6);
				350
				351	return Chain;
				352
				353	}
				354
				355	SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
				356	switch (Op.getOpcode()) {
				357	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
				358	case ISD::BR_CC: return LowerBR_CC(Op, DAG);
				359	case ISD::ROTL: return LowerROTL(Op, DAG);
				360	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
				361	case ISD::SELECT: return LowerSELECT(Op, DAG);
				362	case ISD::SETCC: return LowerSETCC(Op, DAG);
				363	case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	364	case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	365	case ISD::FPOW: return LowerFPOW(Op, DAG);
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame^]	366	case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	367	case ISD::INTRINSIC_VOID: {
				368	SDValue Chain = Op.getOperand(0);
				369	unsigned IntrinsicID =
				370	cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				371	switch (IntrinsicID) {
				372	case AMDGPUIntrinsic::AMDGPU_store_output: {
				373	MachineFunction &MF = DAG.getMachineFunction();
Jakob Stoklund Olesen	fdc3767	2013-02-05 17:53:52 +0000	[diff] [blame]	374	R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	375	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
				376	unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesen	fdc3767	2013-02-05 17:53:52 +0000	[diff] [blame]	377	MFI->LiveOuts.push_back(Reg);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	378	return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
				379	}
				380	case AMDGPUIntrinsic::R600_store_pixel_color: {
				381	MachineFunction &MF = DAG.getMachineFunction();
				382	R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
				383	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
				384
				385	SDNode **OutputsMap = MFI->Outputs;
				386	return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
				387	RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
				388	Chain);
				389
				390	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	391
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	392	// default for switch(IntrinsicID)
				393	default: break;
				394	}
				395	// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
				396	break;
				397	}
				398	case ISD::INTRINSIC_WO_CHAIN: {
				399	unsigned IntrinsicID =
				400	cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
				401	EVT VT = Op.getValueType();
				402	DebugLoc DL = Op.getDebugLoc();
				403	switch(IntrinsicID) {
				404	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
				405	case AMDGPUIntrinsic::R600_load_input: {
				406	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				407	unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
				408	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
				409	}
Tom Stellard	41afe6a	2013-02-05 17:09:14 +0000	[diff] [blame]	410
				411	case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	412	int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard	41afe6a	2013-02-05 17:09:14 +0000	[diff] [blame]	413	int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
				414	MachineSDNode *interp;
				415	if (ijb < 0) {
				416	interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
				417	MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
				418	return DAG.getTargetExtractSubreg(
				419	TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
				420	DL, MVT::f32, SDValue(interp, 0));
				421	}
				422
				423	if (slot % 4 < 2)
				424	interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
				425	MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
				426	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				427	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
				428	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				429	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
				430	else
				431	interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
				432	MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
				433	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				434	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
				435	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				436	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
				437
				438	return SDValue(interp, slot % 2);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	439	}
				440
				441	case r600_read_ngroups_x:
				442	return LowerImplicitParameter(DAG, VT, DL, 0);
				443	case r600_read_ngroups_y:
				444	return LowerImplicitParameter(DAG, VT, DL, 1);
				445	case r600_read_ngroups_z:
				446	return LowerImplicitParameter(DAG, VT, DL, 2);
				447	case r600_read_global_size_x:
				448	return LowerImplicitParameter(DAG, VT, DL, 3);
				449	case r600_read_global_size_y:
				450	return LowerImplicitParameter(DAG, VT, DL, 4);
				451	case r600_read_global_size_z:
				452	return LowerImplicitParameter(DAG, VT, DL, 5);
				453	case r600_read_local_size_x:
				454	return LowerImplicitParameter(DAG, VT, DL, 6);
				455	case r600_read_local_size_y:
				456	return LowerImplicitParameter(DAG, VT, DL, 7);
				457	case r600_read_local_size_z:
				458	return LowerImplicitParameter(DAG, VT, DL, 8);
				459
				460	case r600_read_tgid_x:
				461	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				462	AMDGPU::T1_X, VT);
				463	case r600_read_tgid_y:
				464	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				465	AMDGPU::T1_Y, VT);
				466	case r600_read_tgid_z:
				467	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				468	AMDGPU::T1_Z, VT);
				469	case r600_read_tidig_x:
				470	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				471	AMDGPU::T0_X, VT);
				472	case r600_read_tidig_y:
				473	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				474	AMDGPU::T0_Y, VT);
				475	case r600_read_tidig_z:
				476	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				477	AMDGPU::T0_Z, VT);
				478	}
				479	// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
				480	break;
				481	}
				482	} // end switch(Op.getOpcode())
				483	return SDValue();
				484	}
				485
				486	void R600TargetLowering::ReplaceNodeResults(SDNode *N,
				487	SmallVectorImpl<SDValue> &Results,
				488	SelectionDAG &DAG) const {
				489	switch (N->getOpcode()) {
				490	default: return;
				491	case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	492	return;
				493	case ISD::LOAD: {
				494	SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
				495	Results.push_back(SDValue(Node, 0));
				496	Results.push_back(SDValue(Node, 1));
				497	// XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
				498	// function
				499	DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
				500	return;
				501	}
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame^]	502	case ISD::STORE:
				503	SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
				504	Results.push_back(SDValue(Node, 0));
				505	return;
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	506	}
				507	}
				508
				509	SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
				510	return DAG.getNode(
				511	ISD::SETCC,
				512	Op.getDebugLoc(),
				513	MVT::i1,
				514	Op, DAG.getConstantFP(0.0f, MVT::f32),
				515	DAG.getCondCode(ISD::SETNE)
				516	);
				517	}
				518
				519	SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
				520	SDValue Chain = Op.getOperand(0);
				521	SDValue CC = Op.getOperand(1);
				522	SDValue LHS = Op.getOperand(2);
				523	SDValue RHS = Op.getOperand(3);
				524	SDValue JumpT = Op.getOperand(4);
				525	SDValue CmpValue;
				526	SDValue Result;
				527
				528	if (LHS.getValueType() == MVT::i32) {
				529	CmpValue = DAG.getNode(
				530	ISD::SELECT_CC,
				531	Op.getDebugLoc(),
				532	MVT::i32,
				533	LHS, RHS,
				534	DAG.getConstant(-1, MVT::i32),
				535	DAG.getConstant(0, MVT::i32),
				536	CC);
				537	} else if (LHS.getValueType() == MVT::f32) {
				538	CmpValue = DAG.getNode(
				539	ISD::SELECT_CC,
				540	Op.getDebugLoc(),
				541	MVT::f32,
				542	LHS, RHS,
				543	DAG.getConstantFP(1.0f, MVT::f32),
				544	DAG.getConstantFP(0.0f, MVT::f32),
				545	CC);
				546	} else {
				547	assert(0 && "Not valid type for br_cc");
				548	}
				549	Result = DAG.getNode(
				550	AMDGPUISD::BRANCH_COND,
				551	CmpValue.getDebugLoc(),
				552	MVT::Other, Chain,
				553	JumpT, CmpValue);
				554	return Result;
				555	}
				556
				557	SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
				558	DebugLoc DL,
				559	unsigned DwordOffset) const {
				560	unsigned ByteOffset = DwordOffset * 4;
				561	PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
				562	AMDGPUAS::PARAM_I_ADDRESS);
				563
				564	// We shouldn't be using an offset wider than 16-bits for implicit parameters.
				565	assert(isInt<16>(ByteOffset));
				566
				567	return DAG.getLoad(VT, DL, DAG.getEntryNode(),
				568	DAG.getConstant(ByteOffset, MVT::i32), // PTR
				569	MachinePointerInfo(ConstantPointerNull::get(PtrType)),
				570	false, false, false, 0);
				571	}
				572
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame^]	573	SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
				574
				575	MachineFunction &MF = DAG.getMachineFunction();
				576	const AMDGPUFrameLowering *TFL =
				577	static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
				578
				579	FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
				580	assert(FIN);
				581
				582	unsigned FrameIndex = FIN->getIndex();
				583	unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
				584	return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
				585	}
				586
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	587	SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
				588	DebugLoc DL = Op.getDebugLoc();
				589	EVT VT = Op.getValueType();
				590
				591	return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
				592	Op.getOperand(0),
				593	Op.getOperand(0),
				594	DAG.getNode(ISD::SUB, DL, VT,
				595	DAG.getConstant(32, MVT::i32),
				596	Op.getOperand(1)));
				597	}
				598
				599	bool R600TargetLowering::isZero(SDValue Op) const {
				600	if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
				601	return Cst->isNullValue();
				602	} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
				603	return CstFP->isZero();
				604	} else {
				605	return false;
				606	}
				607	}
				608
				609	SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
				610	DebugLoc DL = Op.getDebugLoc();
				611	EVT VT = Op.getValueType();
				612
				613	SDValue LHS = Op.getOperand(0);
				614	SDValue RHS = Op.getOperand(1);
				615	SDValue True = Op.getOperand(2);
				616	SDValue False = Op.getOperand(3);
				617	SDValue CC = Op.getOperand(4);
				618	SDValue Temp;
				619
				620	// LHS and RHS are guaranteed to be the same value type
				621	EVT CompareVT = LHS.getValueType();
				622
				623	// Check if we can lower this to a native operation.
				624
				625	// Try to lower to a CND* instruction:
				626	// CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
				627	// can be lowered to CND* instructions can also be lowered to SET*
				628	// instructions. CND* instructions are cheaper, because they dont't
				629	// require additional instructions to convert their result to the correct
				630	// value type, so this check should be first.
				631	if (isZero(LHS) \|\| isZero(RHS)) {
				632	SDValue Cond = (isZero(LHS) ? RHS : LHS);
				633	SDValue Zero = (isZero(LHS) ? LHS : RHS);
				634	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				635	if (CompareVT != VT) {
				636	// Bitcast True / False to the correct types. This will end up being
				637	// a nop, but it allows us to define only a single pattern in the
				638	// .TD files for each CND* instruction rather than having to have
				639	// one pattern for integer True/False and one for fp True/False
				640	True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
				641	False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
				642	}
				643	if (isZero(LHS)) {
				644	CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
				645	}
				646
				647	switch (CCOpcode) {
				648	case ISD::SETONE:
				649	case ISD::SETUNE:
				650	case ISD::SETNE:
				651	case ISD::SETULE:
				652	case ISD::SETULT:
				653	case ISD::SETOLE:
				654	case ISD::SETOLT:
				655	case ISD::SETLE:
				656	case ISD::SETLT:
				657	CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
				658	Temp = True;
				659	True = False;
				660	False = Temp;
				661	break;
				662	default:
				663	break;
				664	}
				665	SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				666	Cond, Zero,
				667	True, False,
				668	DAG.getCondCode(CCOpcode));
				669	return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
				670	}
				671
				672	// Try to lower to a SET* instruction:
				673	// We need all the operands of SELECT_CC to have the same value type, so if
				674	// necessary we need to change True and False to be the same type as LHS and
				675	// RHS, and then convert the result of the select_cc back to the correct type.
				676
				677	// Move hardware True/False values to the correct operand.
				678	if (isHWTrueValue(False) && isHWFalseValue(True)) {
				679	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				680	std::swap(False, True);
				681	CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
				682	}
				683
				684	if (isHWTrueValue(True) && isHWFalseValue(False)) {
				685	if (CompareVT != VT) {
				686	if (VT == MVT::f32 && CompareVT == MVT::i32) {
				687	SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				688	LHS, RHS,
				689	DAG.getConstant(-1, MVT::i32),
				690	DAG.getConstant(0, MVT::i32),
				691	CC);
				692	// Convert integer values of true (-1) and false (0) to fp values of
				693	// true (1.0f) and false (0.0f).
				694	SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
				695	DAG.getConstant(1, MVT::i32));
				696	return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
				697	} else if (VT == MVT::i32 && CompareVT == MVT::f32) {
				698	SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				699	LHS, RHS,
				700	DAG.getConstantFP(1.0f, MVT::f32),
				701	DAG.getConstantFP(0.0f, MVT::f32),
				702	CC);
				703	// Convert fp values of true (1.0f) and false (0.0f) to integer values
				704	// of true (-1) and false (0).
				705	SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
				706	return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
				707	} else {
				708	// I don't think there will be any other type pairings.
				709	assert(!"Unhandled operand type parings in SELECT_CC");
				710	}
				711	} else {
				712	// This SELECT_CC is already legal.
				713	return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
				714	}
				715	}
				716
				717	// Possible Min/Max pattern
				718	SDValue MinMax = LowerMinMax(Op, DAG);
				719	if (MinMax.getNode()) {
				720	return MinMax;
				721	}
				722
				723	// If we make it this for it means we have no native instructions to handle
				724	// this SELECT_CC, so we must lower it.
				725	SDValue HWTrue, HWFalse;
				726
				727	if (CompareVT == MVT::f32) {
				728	HWTrue = DAG.getConstantFP(1.0f, CompareVT);
				729	HWFalse = DAG.getConstantFP(0.0f, CompareVT);
				730	} else if (CompareVT == MVT::i32) {
				731	HWTrue = DAG.getConstant(-1, CompareVT);
				732	HWFalse = DAG.getConstant(0, CompareVT);
				733	}
				734	else {
				735	assert(!"Unhandled value type in LowerSELECT_CC");
				736	}
				737
				738	// Lower this unsupported SELECT_CC into a combination of two supported
				739	// SELECT_CC operations.
				740	SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
				741
				742	return DAG.getNode(ISD::SELECT_CC, DL, VT,
				743	Cond, HWFalse,
				744	True, False,
				745	DAG.getCondCode(ISD::SETNE));
				746	}
				747
				748	SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
				749	return DAG.getNode(ISD::SELECT_CC,
				750	Op.getDebugLoc(),
				751	Op.getValueType(),
				752	Op.getOperand(0),
				753	DAG.getConstant(0, MVT::i32),
				754	Op.getOperand(1),
				755	Op.getOperand(2),
				756	DAG.getCondCode(ISD::SETNE));
				757	}
				758
				759	SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
				760	SDValue Cond;
				761	SDValue LHS = Op.getOperand(0);
				762	SDValue RHS = Op.getOperand(1);
				763	SDValue CC = Op.getOperand(2);
				764	DebugLoc DL = Op.getDebugLoc();
				765	assert(Op.getValueType() == MVT::i32);
				766	if (LHS.getValueType() == MVT::i32) {
				767	Cond = DAG.getNode(
				768	ISD::SELECT_CC,
				769	Op.getDebugLoc(),
				770	MVT::i32,
				771	LHS, RHS,
				772	DAG.getConstant(-1, MVT::i32),
				773	DAG.getConstant(0, MVT::i32),
				774	CC);
				775	} else if (LHS.getValueType() == MVT::f32) {
				776	Cond = DAG.getNode(
				777	ISD::SELECT_CC,
				778	Op.getDebugLoc(),
				779	MVT::f32,
				780	LHS, RHS,
				781	DAG.getConstantFP(1.0f, MVT::f32),
				782	DAG.getConstantFP(0.0f, MVT::f32),
				783	CC);
				784	Cond = DAG.getNode(
				785	ISD::FP_TO_SINT,
				786	DL,
				787	MVT::i32,
				788	Cond);
				789	} else {
				790	assert(0 && "Not valid type for set_cc");
				791	}
				792	Cond = DAG.getNode(
				793	ISD::AND,
				794	DL,
				795	MVT::i32,
				796	DAG.getConstant(1, MVT::i32),
				797	Cond);
				798	return Cond;
				799	}
				800
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame^]	801	/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
				802	/// convert these pointers to a register index. Each register holds
				803	/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
				804	/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
				805	/// for indirect addressing.
				806	SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
				807	unsigned StackWidth,
				808	SelectionDAG &DAG) const {
				809	unsigned SRLPad;
				810	switch(StackWidth) {
				811	case 1:
				812	SRLPad = 2;
				813	break;
				814	case 2:
				815	SRLPad = 3;
				816	break;
				817	case 4:
				818	SRLPad = 4;
				819	break;
				820	default: llvm_unreachable("Invalid stack width");
				821	}
				822
				823	return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
				824	DAG.getConstant(SRLPad, MVT::i32));
				825	}
				826
				827	void R600TargetLowering::getStackAddress(unsigned StackWidth,
				828	unsigned ElemIdx,
				829	unsigned &Channel,
				830	unsigned &PtrIncr) const {
				831	switch (StackWidth) {
				832	default:
				833	case 1:
				834	Channel = 0;
				835	if (ElemIdx > 0) {
				836	PtrIncr = 1;
				837	} else {
				838	PtrIncr = 0;
				839	}
				840	break;
				841	case 2:
				842	Channel = ElemIdx % 2;
				843	if (ElemIdx == 2) {
				844	PtrIncr = 1;
				845	} else {
				846	PtrIncr = 0;
				847	}
				848	break;
				849	case 4:
				850	Channel = ElemIdx;
				851	PtrIncr = 0;
				852	break;
				853	}
				854	}
				855
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	856	SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
				857	DebugLoc DL = Op.getDebugLoc();
				858	StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
				859	SDValue Chain = Op.getOperand(0);
				860	SDValue Value = Op.getOperand(1);
				861	SDValue Ptr = Op.getOperand(2);
				862
				863	if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
				864	Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
				865	// Convert pointer from byte address to dword address.
				866	Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
				867	DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
				868	Ptr, DAG.getConstant(2, MVT::i32)));
				869
				870	if (StoreNode->isTruncatingStore() \|\| StoreNode->isIndexed()) {
				871	assert(!"Truncated and indexed stores not supported yet");
				872	} else {
				873	Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
				874	}
				875	return Chain;
				876	}
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame^]	877
				878	EVT ValueVT = Value.getValueType();
				879
				880	if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
				881	return SDValue();
				882	}
				883
				884	// Lowering for indirect addressing
				885
				886	const MachineFunction &MF = DAG.getMachineFunction();
				887	const AMDGPUFrameLowering TFL = static_cast<const AMDGPUFrameLowering>(
				888	getTargetMachine().getFrameLowering());
				889	unsigned StackWidth = TFL->getStackWidth(MF);
				890
				891	Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
				892
				893	if (ValueVT.isVector()) {
				894	unsigned NumElemVT = ValueVT.getVectorNumElements();
				895	EVT ElemVT = ValueVT.getVectorElementType();
				896	SDValue Stores[4];
				897
				898	assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
				899	"vector width in load");
				900
				901	for (unsigned i = 0; i < NumElemVT; ++i) {
				902	unsigned Channel, PtrIncr;
				903	getStackAddress(StackWidth, i, Channel, PtrIncr);
				904	Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
				905	DAG.getConstant(PtrIncr, MVT::i32));
				906	SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
				907	Value, DAG.getConstant(i, MVT::i32));
				908
				909	Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
				910	Chain, Elem, Ptr,
				911	DAG.getTargetConstant(Channel, MVT::i32));
				912	}
				913	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
				914	} else {
				915	if (ValueVT == MVT::i8) {
				916	Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
				917	}
				918	Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
				919	DAG.getTargetConstant(0, MVT::i32)); // Channel
				920	}
				921
				922	return Chain;
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	923	}
				924
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	925	// return (512 + (kc_bank << 12)
				926	static int
				927	ConstantAddressBlock(unsigned AddressSpace) {
				928	switch (AddressSpace) {
				929	case AMDGPUAS::CONSTANT_BUFFER_0:
				930	return 512;
				931	case AMDGPUAS::CONSTANT_BUFFER_1:
				932	return 512 + 4096;
				933	case AMDGPUAS::CONSTANT_BUFFER_2:
				934	return 512 + 4096 * 2;
				935	case AMDGPUAS::CONSTANT_BUFFER_3:
				936	return 512 + 4096 * 3;
				937	case AMDGPUAS::CONSTANT_BUFFER_4:
				938	return 512 + 4096 * 4;
				939	case AMDGPUAS::CONSTANT_BUFFER_5:
				940	return 512 + 4096 * 5;
				941	case AMDGPUAS::CONSTANT_BUFFER_6:
				942	return 512 + 4096 * 6;
				943	case AMDGPUAS::CONSTANT_BUFFER_7:
				944	return 512 + 4096 * 7;
				945	case AMDGPUAS::CONSTANT_BUFFER_8:
				946	return 512 + 4096 * 8;
				947	case AMDGPUAS::CONSTANT_BUFFER_9:
				948	return 512 + 4096 * 9;
				949	case AMDGPUAS::CONSTANT_BUFFER_10:
				950	return 512 + 4096 * 10;
				951	case AMDGPUAS::CONSTANT_BUFFER_11:
				952	return 512 + 4096 * 11;
				953	case AMDGPUAS::CONSTANT_BUFFER_12:
				954	return 512 + 4096 * 12;
				955	case AMDGPUAS::CONSTANT_BUFFER_13:
				956	return 512 + 4096 * 13;
				957	case AMDGPUAS::CONSTANT_BUFFER_14:
				958	return 512 + 4096 * 14;
				959	case AMDGPUAS::CONSTANT_BUFFER_15:
				960	return 512 + 4096 * 15;
				961	default:
				962	return -1;
				963	}
				964	}
				965
				966	SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
				967	{
				968	EVT VT = Op.getValueType();
				969	DebugLoc DL = Op.getDebugLoc();
				970	LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
				971	SDValue Chain = Op.getOperand(0);
				972	SDValue Ptr = Op.getOperand(1);
				973	SDValue LoweredLoad;
				974
				975	int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
				976	if (ConstantBlock > -1) {
				977	SDValue Result;
				978	if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) \|\|
				979	dyn_cast<Constant>(LoadNode->getSrcValue())) {
				980	SDValue Slots[4];
				981	for (unsigned i = 0; i < 4; i++) {
				982	// We want Const position encoded with the following formula :
				983	// (((512 + (kc_bank << 12) + const_index) << 2) + chan)
				984	// const_index is Ptr computed by llvm using an alignment of 16.
				985	// Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
				986	// then div by 4 at the ISel step
				987	SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
				988	DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
				989	Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
				990	}
				991	Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
				992	} else {
				993	// non constant ptr cant be folded, keeps it as a v4f32 load
				994	Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
				995	DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
				996	);
				997	}
				998
				999	if (!VT.isVector()) {
				1000	Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
				1001	DAG.getConstant(0, MVT::i32));
				1002	}
				1003
				1004	SDValue MergedValues[2] = {
				1005	Result,
				1006	Chain
				1007	};
				1008	return DAG.getMergeValues(MergedValues, 2, DL);
				1009	}
				1010
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame^]	1011	if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
				1012	return SDValue();
				1013	}
				1014
				1015	// Lowering for indirect addressing
				1016	const MachineFunction &MF = DAG.getMachineFunction();
				1017	const AMDGPUFrameLowering TFL = static_cast<const AMDGPUFrameLowering>(
				1018	getTargetMachine().getFrameLowering());
				1019	unsigned StackWidth = TFL->getStackWidth(MF);
				1020
				1021	Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
				1022
				1023	if (VT.isVector()) {
				1024	unsigned NumElemVT = VT.getVectorNumElements();
				1025	EVT ElemVT = VT.getVectorElementType();
				1026	SDValue Loads[4];
				1027
				1028	assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
				1029	"vector width in load");
				1030
				1031	for (unsigned i = 0; i < NumElemVT; ++i) {
				1032	unsigned Channel, PtrIncr;
				1033	getStackAddress(StackWidth, i, Channel, PtrIncr);
				1034	Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
				1035	DAG.getConstant(PtrIncr, MVT::i32));
				1036	Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
				1037	Chain, Ptr,
				1038	DAG.getTargetConstant(Channel, MVT::i32),
				1039	Op.getOperand(2));
				1040	}
				1041	for (unsigned i = NumElemVT; i < 4; ++i) {
				1042	Loads[i] = DAG.getUNDEF(ElemVT);
				1043	}
				1044	EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
				1045	LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
				1046	} else {
				1047	LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
				1048	Chain, Ptr,
				1049	DAG.getTargetConstant(0, MVT::i32), // Channel
				1050	Op.getOperand(2));
				1051	}
				1052
				1053	SDValue Ops[2];
				1054	Ops[0] = LoweredLoad;
				1055	Ops[1] = Chain;
				1056
				1057	return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	1058	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1059
				1060	SDValue R600TargetLowering::LowerFPOW(SDValue Op,
				1061	SelectionDAG &DAG) const {
				1062	DebugLoc DL = Op.getDebugLoc();
				1063	EVT VT = Op.getValueType();
				1064	SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
				1065	SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
				1066	return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
				1067	}
				1068
				1069	/// XXX Only kernel functions are supported, so we can assume for now that
				1070	/// every function is a kernel function, but in the future we should use
				1071	/// separate calling conventions for kernel and non-kernel functions.
				1072	SDValue R600TargetLowering::LowerFormalArguments(
				1073	SDValue Chain,
				1074	CallingConv::ID CallConv,
				1075	bool isVarArg,
				1076	const SmallVectorImpl<ISD::InputArg> &Ins,
				1077	DebugLoc DL, SelectionDAG &DAG,
				1078	SmallVectorImpl<SDValue> &InVals) const {
				1079	unsigned ParamOffsetBytes = 36;
				1080	Function::const_arg_iterator FuncArg =
				1081	DAG.getMachineFunction().getFunction()->arg_begin();
				1082	for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
				1083	EVT VT = Ins[i].VT;
				1084	Type *ArgType = FuncArg->getType();
				1085	unsigned ArgSizeInBits = ArgType->isPointerTy() ?
				1086	32 : ArgType->getPrimitiveSizeInBits();
				1087	unsigned ArgBytes = ArgSizeInBits >> 3;
				1088	EVT ArgVT;
				1089	if (ArgSizeInBits < VT.getSizeInBits()) {
				1090	assert(!ArgType->isFloatTy() &&
				1091	"Extending floating point arguments not supported yet");
				1092	ArgVT = MVT::getIntegerVT(ArgSizeInBits);
				1093	} else {
				1094	ArgVT = VT;
				1095	}
				1096	PointerType PtrTy = PointerType::get(VT.getTypeForEVT(DAG.getContext()),
				1097	AMDGPUAS::PARAM_I_ADDRESS);
				1098	SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
				1099	DAG.getConstant(ParamOffsetBytes, MVT::i32),
				1100	MachinePointerInfo(new Argument(PtrTy)),
				1101	ArgVT, false, false, ArgBytes);
				1102	InVals.push_back(Arg);
				1103	ParamOffsetBytes += ArgBytes;
				1104	}
				1105	return Chain;
				1106	}
				1107
				1108	EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
				1109	if (!VT.isVector()) return MVT::i32;
				1110	return VT.changeVectorElementTypeToInteger();
				1111	}
				1112
				1113	//===----------------------------------------------------------------------===//
				1114	// Custom DAG Optimizations
				1115	//===----------------------------------------------------------------------===//
				1116
				1117	SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
				1118	DAGCombinerInfo &DCI) const {
				1119	SelectionDAG &DAG = DCI.DAG;
				1120
				1121	switch (N->getOpcode()) {
				1122	// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
				1123	case ISD::FP_ROUND: {
				1124	SDValue Arg = N->getOperand(0);
				1125	if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
				1126	return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
				1127	Arg.getOperand(0));
				1128	}
				1129	break;
				1130	}
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	1131	// Extract_vec (Build_vector) generated by custom lowering
				1132	// also needs to be customly combined
				1133	case ISD::EXTRACT_VECTOR_ELT: {
				1134	SDValue Arg = N->getOperand(0);
				1135	if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
				1136	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
				1137	unsigned Element = Const->getZExtValue();
				1138	return Arg->getOperand(Element);
				1139	}
				1140	}
Tom Stellard	dd04c83	2013-01-31 22:11:53 +0000	[diff] [blame]	1141	if (Arg.getOpcode() == ISD::BITCAST &&
				1142	Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
				1143	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
				1144	unsigned Element = Const->getZExtValue();
				1145	return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
				1146	Arg->getOperand(0).getOperand(Element));
				1147	}
				1148	}
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	1149	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1150	}
				1151	return SDValue();
				1152	}