Blame - llvm/lib/Target/R600/R600ISelLowering.cpp - toolchain/llvm-project

blob: a20aade25d8e0ef791578c24bd4ad97ee05b5080 [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief Custom DAG lowering for R600
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "R600ISelLowering.h"
				16	#include "R600Defines.h"
				17	#include "R600InstrInfo.h"
				18	#include "R600MachineFunctionInfo.h"
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	19	#include "llvm/CodeGen/MachineFrameInfo.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	20	#include "llvm/CodeGen/MachineInstrBuilder.h"
				21	#include "llvm/CodeGen/MachineRegisterInfo.h"
				22	#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth	9fb823b	2013-01-02 11:36:10 +0000	[diff] [blame]	23	#include "llvm/IR/Argument.h"
				24	#include "llvm/IR/Function.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	25
				26	using namespace llvm;
				27
				28	R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
				29	AMDGPUTargetLowering(TM),
				30	TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
				31	setOperationAction(ISD::MUL, MVT::i64, Expand);
				32	addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
				33	addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
				34	addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
				35	addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
				36	computeRegisterProperties();
				37
				38	setOperationAction(ISD::FADD, MVT::v4f32, Expand);
				39	setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
				40	setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
				41	setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
				42
				43	setOperationAction(ISD::ADD, MVT::v4i32, Expand);
				44	setOperationAction(ISD::AND, MVT::v4i32, Expand);
Tom Stellard	a8b0351	2012-12-21 16:33:24 +0000	[diff] [blame]	45	setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
				46	setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
				47	setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
				48	setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	49	setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
				50	setOperationAction(ISD::UREM, MVT::v4i32, Expand);
				51	setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
				52
				53	setOperationAction(ISD::BR_CC, MVT::i32, Custom);
				54	setOperationAction(ISD::BR_CC, MVT::f32, Custom);
				55
				56	setOperationAction(ISD::FSUB, MVT::f32, Expand);
				57
				58	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
				59	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				60	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
				61	setOperationAction(ISD::FPOW, MVT::f32, Custom);
				62
				63	setOperationAction(ISD::ROTL, MVT::i32, Custom);
				64
				65	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
				66	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
				67
Tom Stellard	e8f9f28	2013-03-08 15:37:05 +0000	[diff] [blame^]	68	setOperationAction(ISD::SETCC, MVT::i32, Expand);
				69	setOperationAction(ISD::SETCC, MVT::f32, Expand);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	70	setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
				71
				72	setOperationAction(ISD::SELECT, MVT::i32, Custom);
				73	setOperationAction(ISD::SELECT, MVT::f32, Custom);
				74
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	75	// Legalize loads and stores to the private address space.
				76	setOperationAction(ISD::LOAD, MVT::i32, Custom);
				77	setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
				78	setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
				79	setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Custom);
				80	setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
				81	setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
				82	setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Custom);
				83	setOperationAction(ISD::STORE, MVT::i8, Custom);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	84	setOperationAction(ISD::STORE, MVT::i32, Custom);
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	85	setOperationAction(ISD::STORE, MVT::v2i32, Custom);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	86	setOperationAction(ISD::STORE, MVT::v4i32, Custom);
				87
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	88	setOperationAction(ISD::LOAD, MVT::i32, Custom);
				89	setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	90	setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
				91
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	92	setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	93	setTargetDAGCombine(ISD::FP_TO_SINT);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	94	setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	95	setTargetDAGCombine(ISD::SELECT_CC);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	96
Tom Stellard	b852af5	2013-03-08 15:37:03 +0000	[diff] [blame]	97	setBooleanContents(ZeroOrNegativeOneBooleanContent);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	98	setSchedulingPreference(Sched::VLIW);
				99	}
				100
				101	MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
				102	MachineInstr * MI, MachineBasicBlock * BB) const {
				103	MachineFunction * MF = BB->getParent();
				104	MachineRegisterInfo &MRI = MF->getRegInfo();
				105	MachineBasicBlock::iterator I = *MI;
				106
				107	switch (MI->getOpcode()) {
				108	default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	109	case AMDGPU::CLAMP_R600: {
				110	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				111	AMDGPU::MOV,
				112	MI->getOperand(0).getReg(),
				113	MI->getOperand(1).getReg());
				114	TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
				115	break;
				116	}
				117
				118	case AMDGPU::FABS_R600: {
				119	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				120	AMDGPU::MOV,
				121	MI->getOperand(0).getReg(),
				122	MI->getOperand(1).getReg());
				123	TII->addFlag(NewMI, 0, MO_FLAG_ABS);
				124	break;
				125	}
				126
				127	case AMDGPU::FNEG_R600: {
				128	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				129	AMDGPU::MOV,
				130	MI->getOperand(0).getReg(),
				131	MI->getOperand(1).getReg());
				132	TII->addFlag(NewMI, 0, MO_FLAG_NEG);
				133	break;
				134	}
				135
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	136	case AMDGPU::MASK_WRITE: {
				137	unsigned maskedRegister = MI->getOperand(0).getReg();
				138	assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
				139	MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
				140	TII->addFlag(defInstr, 0, MO_FLAG_MASK);
				141	break;
				142	}
				143
				144	case AMDGPU::MOV_IMM_F32:
				145	TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
				146	MI->getOperand(1).getFPImm()->getValueAPF()
				147	.bitcastToAPInt().getZExtValue());
				148	break;
				149	case AMDGPU::MOV_IMM_I32:
				150	TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
				151	MI->getOperand(1).getImm());
				152	break;
Vincent Lejeune	0b72f10	2013-03-05 15:04:55 +0000	[diff] [blame]	153	case AMDGPU::CONST_COPY: {
				154	MachineInstr NewMI = TII->buildDefaultInstruction(BB, MI, AMDGPU::MOV,
				155	MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
				156	TII->setImmOperand(NewMI, R600Operands::SRC0_SEL,
				157	MI->getOperand(1).getImm());
				158	break;
				159	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	160
				161	case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
				162	case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
				163	unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
				164
				165	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
				166	.addOperand(MI->getOperand(0))
				167	.addOperand(MI->getOperand(1))
				168	.addImm(EOP); // Set End of program bit
				169	break;
				170	}
				171
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	172	case AMDGPU::TXD: {
				173	unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				174	unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				175
				176	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
				177	.addOperand(MI->getOperand(3))
				178	.addOperand(MI->getOperand(4))
				179	.addOperand(MI->getOperand(5))
				180	.addOperand(MI->getOperand(6));
				181	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
				182	.addOperand(MI->getOperand(2))
				183	.addOperand(MI->getOperand(4))
				184	.addOperand(MI->getOperand(5))
				185	.addOperand(MI->getOperand(6));
				186	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
				187	.addOperand(MI->getOperand(0))
				188	.addOperand(MI->getOperand(1))
				189	.addOperand(MI->getOperand(4))
				190	.addOperand(MI->getOperand(5))
				191	.addOperand(MI->getOperand(6))
				192	.addReg(T0, RegState::Implicit)
				193	.addReg(T1, RegState::Implicit);
				194	break;
				195	}
				196
				197	case AMDGPU::TXD_SHADOW: {
				198	unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				199	unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				200
				201	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
				202	.addOperand(MI->getOperand(3))
				203	.addOperand(MI->getOperand(4))
				204	.addOperand(MI->getOperand(5))
				205	.addOperand(MI->getOperand(6));
				206	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
				207	.addOperand(MI->getOperand(2))
				208	.addOperand(MI->getOperand(4))
				209	.addOperand(MI->getOperand(5))
				210	.addOperand(MI->getOperand(6));
				211	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
				212	.addOperand(MI->getOperand(0))
				213	.addOperand(MI->getOperand(1))
				214	.addOperand(MI->getOperand(4))
				215	.addOperand(MI->getOperand(5))
				216	.addOperand(MI->getOperand(6))
				217	.addReg(T0, RegState::Implicit)
				218	.addReg(T1, RegState::Implicit);
				219	break;
				220	}
				221
				222	case AMDGPU::BRANCH:
				223	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				224	.addOperand(MI->getOperand(0))
				225	.addReg(0);
				226	break;
				227
				228	case AMDGPU::BRANCH_COND_f32: {
				229	MachineInstr *NewMI =
				230	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
				231	AMDGPU::PREDICATE_BIT)
				232	.addOperand(MI->getOperand(1))
				233	.addImm(OPCODE_IS_NOT_ZERO)
				234	.addImm(0); // Flags
				235	TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
				236	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				237	.addOperand(MI->getOperand(0))
				238	.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
				239	break;
				240	}
				241
				242	case AMDGPU::BRANCH_COND_i32: {
				243	MachineInstr *NewMI =
				244	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
				245	AMDGPU::PREDICATE_BIT)
				246	.addOperand(MI->getOperand(1))
				247	.addImm(OPCODE_IS_NOT_ZERO_INT)
				248	.addImm(0); // Flags
				249	TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
				250	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				251	.addOperand(MI->getOperand(0))
				252	.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
				253	break;
				254	}
				255
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	256	case AMDGPU::EG_ExportSwz:
				257	case AMDGPU::R600_ExportSwz: {
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	258	// Instruction is left unmodified if its not the last one of its type
				259	bool isLastInstructionOfItsType = true;
				260	unsigned InstExportType = MI->getOperand(1).getImm();
				261	for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
				262	EndBlock = BB->end(); NextExportInst != EndBlock;
				263	NextExportInst = llvm::next(NextExportInst)) {
				264	if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz \|\|
				265	NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
				266	unsigned CurrentInstExportType = NextExportInst->getOperand(1)
				267	.getImm();
				268	if (CurrentInstExportType == InstExportType) {
				269	isLastInstructionOfItsType = false;
				270	break;
				271	}
				272	}
				273	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	274	bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	275	if (!EOP && !isLastInstructionOfItsType)
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	276	return BB;
				277	unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
				278	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
				279	.addOperand(MI->getOperand(0))
				280	.addOperand(MI->getOperand(1))
				281	.addOperand(MI->getOperand(2))
				282	.addOperand(MI->getOperand(3))
				283	.addOperand(MI->getOperand(4))
				284	.addOperand(MI->getOperand(5))
				285	.addOperand(MI->getOperand(6))
				286	.addImm(CfInst)
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	287	.addImm(EOP);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	288	break;
				289	}
Jakob Stoklund Olesen	fdc3767	2013-02-05 17:53:52 +0000	[diff] [blame]	290	case AMDGPU::RETURN: {
				291	// RETURN instructions must have the live-out registers as implicit uses,
				292	// otherwise they appear dead.
				293	R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
				294	MachineInstrBuilder MIB(*MF, MI);
				295	for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
				296	MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
				297	return BB;
				298	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	299	}
				300
				301	MI->eraseFromParent();
				302	return BB;
				303	}
				304
				305	//===----------------------------------------------------------------------===//
				306	// Custom DAG Lowering Operations
				307	//===----------------------------------------------------------------------===//
				308
				309	using namespace llvm::Intrinsic;
				310	using namespace llvm::AMDGPUIntrinsic;
				311
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	312	SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
				313	switch (Op.getOpcode()) {
				314	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
				315	case ISD::BR_CC: return LowerBR_CC(Op, DAG);
				316	case ISD::ROTL: return LowerROTL(Op, DAG);
				317	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
				318	case ISD::SELECT: return LowerSELECT(Op, DAG);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	319	case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	320	case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	321	case ISD::FPOW: return LowerFPOW(Op, DAG);
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	322	case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	323	case ISD::INTRINSIC_VOID: {
				324	SDValue Chain = Op.getOperand(0);
				325	unsigned IntrinsicID =
				326	cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				327	switch (IntrinsicID) {
				328	case AMDGPUIntrinsic::AMDGPU_store_output: {
				329	MachineFunction &MF = DAG.getMachineFunction();
Jakob Stoklund Olesen	fdc3767	2013-02-05 17:53:52 +0000	[diff] [blame]	330	R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	331	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
				332	unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesen	fdc3767	2013-02-05 17:53:52 +0000	[diff] [blame]	333	MFI->LiveOuts.push_back(Reg);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	334	return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
				335	}
Vincent Lejeune	d80bc15	2013-02-14 16:55:06 +0000	[diff] [blame]	336	case AMDGPUIntrinsic::R600_store_swizzle: {
				337	const SDValue Args[8] = {
				338	Chain,
				339	Op.getOperand(2), // Export Value
				340	Op.getOperand(3), // ArrayBase
				341	Op.getOperand(4), // Type
				342	DAG.getConstant(0, MVT::i32), // SWZ_X
				343	DAG.getConstant(1, MVT::i32), // SWZ_Y
				344	DAG.getConstant(2, MVT::i32), // SWZ_Z
				345	DAG.getConstant(3, MVT::i32) // SWZ_W
				346	};
				347	return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(),
				348	Args, 8);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	349	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	350
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	351	// default for switch(IntrinsicID)
				352	default: break;
				353	}
				354	// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
				355	break;
				356	}
				357	case ISD::INTRINSIC_WO_CHAIN: {
				358	unsigned IntrinsicID =
				359	cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
				360	EVT VT = Op.getValueType();
				361	DebugLoc DL = Op.getDebugLoc();
				362	switch(IntrinsicID) {
				363	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
				364	case AMDGPUIntrinsic::R600_load_input: {
				365	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				366	unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
				367	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
				368	}
Tom Stellard	41afe6a	2013-02-05 17:09:14 +0000	[diff] [blame]	369
				370	case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	371	int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard	41afe6a	2013-02-05 17:09:14 +0000	[diff] [blame]	372	int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
				373	MachineSDNode *interp;
				374	if (ijb < 0) {
				375	interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
				376	MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
				377	return DAG.getTargetExtractSubreg(
				378	TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
				379	DL, MVT::f32, SDValue(interp, 0));
				380	}
				381
				382	if (slot % 4 < 2)
				383	interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
				384	MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
				385	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				386	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
				387	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				388	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
				389	else
				390	interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
				391	MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
				392	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				393	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
				394	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				395	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
				396
				397	return SDValue(interp, slot % 2);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	398	}
				399
				400	case r600_read_ngroups_x:
				401	return LowerImplicitParameter(DAG, VT, DL, 0);
				402	case r600_read_ngroups_y:
				403	return LowerImplicitParameter(DAG, VT, DL, 1);
				404	case r600_read_ngroups_z:
				405	return LowerImplicitParameter(DAG, VT, DL, 2);
				406	case r600_read_global_size_x:
				407	return LowerImplicitParameter(DAG, VT, DL, 3);
				408	case r600_read_global_size_y:
				409	return LowerImplicitParameter(DAG, VT, DL, 4);
				410	case r600_read_global_size_z:
				411	return LowerImplicitParameter(DAG, VT, DL, 5);
				412	case r600_read_local_size_x:
				413	return LowerImplicitParameter(DAG, VT, DL, 6);
				414	case r600_read_local_size_y:
				415	return LowerImplicitParameter(DAG, VT, DL, 7);
				416	case r600_read_local_size_z:
				417	return LowerImplicitParameter(DAG, VT, DL, 8);
				418
				419	case r600_read_tgid_x:
				420	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				421	AMDGPU::T1_X, VT);
				422	case r600_read_tgid_y:
				423	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				424	AMDGPU::T1_Y, VT);
				425	case r600_read_tgid_z:
				426	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				427	AMDGPU::T1_Z, VT);
				428	case r600_read_tidig_x:
				429	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				430	AMDGPU::T0_X, VT);
				431	case r600_read_tidig_y:
				432	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				433	AMDGPU::T0_Y, VT);
				434	case r600_read_tidig_z:
				435	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				436	AMDGPU::T0_Z, VT);
				437	}
				438	// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
				439	break;
				440	}
				441	} // end switch(Op.getOpcode())
				442	return SDValue();
				443	}
				444
				445	void R600TargetLowering::ReplaceNodeResults(SDNode *N,
				446	SmallVectorImpl<SDValue> &Results,
				447	SelectionDAG &DAG) const {
				448	switch (N->getOpcode()) {
				449	default: return;
				450	case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	451	return;
				452	case ISD::LOAD: {
				453	SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
				454	Results.push_back(SDValue(Node, 0));
				455	Results.push_back(SDValue(Node, 1));
				456	// XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
				457	// function
				458	DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
				459	return;
				460	}
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	461	case ISD::STORE:
				462	SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
				463	Results.push_back(SDValue(Node, 0));
				464	return;
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	465	}
				466	}
				467
				468	SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
				469	return DAG.getNode(
				470	ISD::SETCC,
				471	Op.getDebugLoc(),
				472	MVT::i1,
				473	Op, DAG.getConstantFP(0.0f, MVT::f32),
				474	DAG.getCondCode(ISD::SETNE)
				475	);
				476	}
				477
				478	SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
				479	SDValue Chain = Op.getOperand(0);
				480	SDValue CC = Op.getOperand(1);
				481	SDValue LHS = Op.getOperand(2);
				482	SDValue RHS = Op.getOperand(3);
				483	SDValue JumpT = Op.getOperand(4);
				484	SDValue CmpValue;
				485	SDValue Result;
				486
				487	if (LHS.getValueType() == MVT::i32) {
				488	CmpValue = DAG.getNode(
				489	ISD::SELECT_CC,
				490	Op.getDebugLoc(),
				491	MVT::i32,
				492	LHS, RHS,
				493	DAG.getConstant(-1, MVT::i32),
				494	DAG.getConstant(0, MVT::i32),
				495	CC);
				496	} else if (LHS.getValueType() == MVT::f32) {
				497	CmpValue = DAG.getNode(
				498	ISD::SELECT_CC,
				499	Op.getDebugLoc(),
				500	MVT::f32,
				501	LHS, RHS,
				502	DAG.getConstantFP(1.0f, MVT::f32),
				503	DAG.getConstantFP(0.0f, MVT::f32),
				504	CC);
				505	} else {
				506	assert(0 && "Not valid type for br_cc");
				507	}
				508	Result = DAG.getNode(
				509	AMDGPUISD::BRANCH_COND,
				510	CmpValue.getDebugLoc(),
				511	MVT::Other, Chain,
				512	JumpT, CmpValue);
				513	return Result;
				514	}
				515
				516	SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
				517	DebugLoc DL,
				518	unsigned DwordOffset) const {
				519	unsigned ByteOffset = DwordOffset * 4;
				520	PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
				521	AMDGPUAS::PARAM_I_ADDRESS);
				522
				523	// We shouldn't be using an offset wider than 16-bits for implicit parameters.
				524	assert(isInt<16>(ByteOffset));
				525
				526	return DAG.getLoad(VT, DL, DAG.getEntryNode(),
				527	DAG.getConstant(ByteOffset, MVT::i32), // PTR
				528	MachinePointerInfo(ConstantPointerNull::get(PtrType)),
				529	false, false, false, 0);
				530	}
				531
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	532	SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
				533
				534	MachineFunction &MF = DAG.getMachineFunction();
				535	const AMDGPUFrameLowering *TFL =
				536	static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
				537
				538	FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
				539	assert(FIN);
				540
				541	unsigned FrameIndex = FIN->getIndex();
				542	unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
				543	return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
				544	}
				545
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	546	SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
				547	DebugLoc DL = Op.getDebugLoc();
				548	EVT VT = Op.getValueType();
				549
				550	return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
				551	Op.getOperand(0),
				552	Op.getOperand(0),
				553	DAG.getNode(ISD::SUB, DL, VT,
				554	DAG.getConstant(32, MVT::i32),
				555	Op.getOperand(1)));
				556	}
				557
				558	bool R600TargetLowering::isZero(SDValue Op) const {
				559	if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
				560	return Cst->isNullValue();
				561	} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
				562	return CstFP->isZero();
				563	} else {
				564	return false;
				565	}
				566	}
				567
				568	SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
				569	DebugLoc DL = Op.getDebugLoc();
				570	EVT VT = Op.getValueType();
				571
				572	SDValue LHS = Op.getOperand(0);
				573	SDValue RHS = Op.getOperand(1);
				574	SDValue True = Op.getOperand(2);
				575	SDValue False = Op.getOperand(3);
				576	SDValue CC = Op.getOperand(4);
				577	SDValue Temp;
				578
				579	// LHS and RHS are guaranteed to be the same value type
				580	EVT CompareVT = LHS.getValueType();
				581
				582	// Check if we can lower this to a native operation.
				583
				584	// Try to lower to a CND* instruction:
				585	// CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
				586	// can be lowered to CND* instructions can also be lowered to SET*
				587	// instructions. CND* instructions are cheaper, because they dont't
				588	// require additional instructions to convert their result to the correct
				589	// value type, so this check should be first.
				590	if (isZero(LHS) \|\| isZero(RHS)) {
				591	SDValue Cond = (isZero(LHS) ? RHS : LHS);
				592	SDValue Zero = (isZero(LHS) ? LHS : RHS);
				593	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				594	if (CompareVT != VT) {
				595	// Bitcast True / False to the correct types. This will end up being
				596	// a nop, but it allows us to define only a single pattern in the
				597	// .TD files for each CND* instruction rather than having to have
				598	// one pattern for integer True/False and one for fp True/False
				599	True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
				600	False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
				601	}
				602	if (isZero(LHS)) {
				603	CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
				604	}
				605
				606	switch (CCOpcode) {
				607	case ISD::SETONE:
				608	case ISD::SETUNE:
				609	case ISD::SETNE:
				610	case ISD::SETULE:
				611	case ISD::SETULT:
				612	case ISD::SETOLE:
				613	case ISD::SETOLT:
				614	case ISD::SETLE:
				615	case ISD::SETLT:
				616	CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
				617	Temp = True;
				618	True = False;
				619	False = Temp;
				620	break;
				621	default:
				622	break;
				623	}
				624	SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				625	Cond, Zero,
				626	True, False,
				627	DAG.getCondCode(CCOpcode));
				628	return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
				629	}
				630
				631	// Try to lower to a SET* instruction:
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	632	//
				633	// CompareVT == MVT::f32 and VT == MVT::i32 is supported by the hardware,
				634	// but for the other case where CompareVT != VT, all operands of
				635	// SELECT_CC need to have the same value type, so we need to change True and
				636	// False to be the same type as LHS and RHS, and then convert the result of
				637	// the select_cc back to the correct type.
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	638
				639	// Move hardware True/False values to the correct operand.
				640	if (isHWTrueValue(False) && isHWFalseValue(True)) {
				641	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				642	std::swap(False, True);
				643	CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
				644	}
				645
				646	if (isHWTrueValue(True) && isHWFalseValue(False)) {
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	647	if (CompareVT != VT && VT == MVT::f32 && CompareVT == MVT::i32) {
				648	SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				649	LHS, RHS,
				650	DAG.getConstant(-1, MVT::i32),
				651	DAG.getConstant(0, MVT::i32),
				652	CC);
				653	// Convert integer values of true (-1) and false (0) to fp values of
				654	// true (1.0f) and false (0.0f).
				655	SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
				656	DAG.getConstant(1, MVT::i32));
				657	return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	658	} else {
				659	// This SELECT_CC is already legal.
				660	return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
				661	}
				662	}
				663
				664	// Possible Min/Max pattern
				665	SDValue MinMax = LowerMinMax(Op, DAG);
				666	if (MinMax.getNode()) {
				667	return MinMax;
				668	}
				669
				670	// If we make it this for it means we have no native instructions to handle
				671	// this SELECT_CC, so we must lower it.
				672	SDValue HWTrue, HWFalse;
				673
				674	if (CompareVT == MVT::f32) {
				675	HWTrue = DAG.getConstantFP(1.0f, CompareVT);
				676	HWFalse = DAG.getConstantFP(0.0f, CompareVT);
				677	} else if (CompareVT == MVT::i32) {
				678	HWTrue = DAG.getConstant(-1, CompareVT);
				679	HWFalse = DAG.getConstant(0, CompareVT);
				680	}
				681	else {
				682	assert(!"Unhandled value type in LowerSELECT_CC");
				683	}
				684
				685	// Lower this unsupported SELECT_CC into a combination of two supported
				686	// SELECT_CC operations.
				687	SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
				688
				689	return DAG.getNode(ISD::SELECT_CC, DL, VT,
				690	Cond, HWFalse,
				691	True, False,
				692	DAG.getCondCode(ISD::SETNE));
				693	}
				694
				695	SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
				696	return DAG.getNode(ISD::SELECT_CC,
				697	Op.getDebugLoc(),
				698	Op.getValueType(),
				699	Op.getOperand(0),
				700	DAG.getConstant(0, MVT::i32),
				701	Op.getOperand(1),
				702	Op.getOperand(2),
				703	DAG.getCondCode(ISD::SETNE));
				704	}
				705
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	706	/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
				707	/// convert these pointers to a register index. Each register holds
				708	/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
				709	/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
				710	/// for indirect addressing.
				711	SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
				712	unsigned StackWidth,
				713	SelectionDAG &DAG) const {
				714	unsigned SRLPad;
				715	switch(StackWidth) {
				716	case 1:
				717	SRLPad = 2;
				718	break;
				719	case 2:
				720	SRLPad = 3;
				721	break;
				722	case 4:
				723	SRLPad = 4;
				724	break;
				725	default: llvm_unreachable("Invalid stack width");
				726	}
				727
				728	return DAG.getNode(ISD::SRL, Ptr.getDebugLoc(), Ptr.getValueType(), Ptr,
				729	DAG.getConstant(SRLPad, MVT::i32));
				730	}
				731
				732	void R600TargetLowering::getStackAddress(unsigned StackWidth,
				733	unsigned ElemIdx,
				734	unsigned &Channel,
				735	unsigned &PtrIncr) const {
				736	switch (StackWidth) {
				737	default:
				738	case 1:
				739	Channel = 0;
				740	if (ElemIdx > 0) {
				741	PtrIncr = 1;
				742	} else {
				743	PtrIncr = 0;
				744	}
				745	break;
				746	case 2:
				747	Channel = ElemIdx % 2;
				748	if (ElemIdx == 2) {
				749	PtrIncr = 1;
				750	} else {
				751	PtrIncr = 0;
				752	}
				753	break;
				754	case 4:
				755	Channel = ElemIdx;
				756	PtrIncr = 0;
				757	break;
				758	}
				759	}
				760
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	761	SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
				762	DebugLoc DL = Op.getDebugLoc();
				763	StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
				764	SDValue Chain = Op.getOperand(0);
				765	SDValue Value = Op.getOperand(1);
				766	SDValue Ptr = Op.getOperand(2);
				767
				768	if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
				769	Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
				770	// Convert pointer from byte address to dword address.
				771	Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
				772	DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
				773	Ptr, DAG.getConstant(2, MVT::i32)));
				774
				775	if (StoreNode->isTruncatingStore() \|\| StoreNode->isIndexed()) {
				776	assert(!"Truncated and indexed stores not supported yet");
				777	} else {
				778	Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
				779	}
				780	return Chain;
				781	}
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	782
				783	EVT ValueVT = Value.getValueType();
				784
				785	if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
				786	return SDValue();
				787	}
				788
				789	// Lowering for indirect addressing
				790
				791	const MachineFunction &MF = DAG.getMachineFunction();
				792	const AMDGPUFrameLowering TFL = static_cast<const AMDGPUFrameLowering>(
				793	getTargetMachine().getFrameLowering());
				794	unsigned StackWidth = TFL->getStackWidth(MF);
				795
				796	Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
				797
				798	if (ValueVT.isVector()) {
				799	unsigned NumElemVT = ValueVT.getVectorNumElements();
				800	EVT ElemVT = ValueVT.getVectorElementType();
				801	SDValue Stores[4];
				802
				803	assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
				804	"vector width in load");
				805
				806	for (unsigned i = 0; i < NumElemVT; ++i) {
				807	unsigned Channel, PtrIncr;
				808	getStackAddress(StackWidth, i, Channel, PtrIncr);
				809	Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
				810	DAG.getConstant(PtrIncr, MVT::i32));
				811	SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
				812	Value, DAG.getConstant(i, MVT::i32));
				813
				814	Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
				815	Chain, Elem, Ptr,
				816	DAG.getTargetConstant(Channel, MVT::i32));
				817	}
				818	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
				819	} else {
				820	if (ValueVT == MVT::i8) {
				821	Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
				822	}
				823	Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
				824	DAG.getTargetConstant(0, MVT::i32)); // Channel
				825	}
				826
				827	return Chain;
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	828	}
				829
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	830	// return (512 + (kc_bank << 12)
				831	static int
				832	ConstantAddressBlock(unsigned AddressSpace) {
				833	switch (AddressSpace) {
				834	case AMDGPUAS::CONSTANT_BUFFER_0:
				835	return 512;
				836	case AMDGPUAS::CONSTANT_BUFFER_1:
				837	return 512 + 4096;
				838	case AMDGPUAS::CONSTANT_BUFFER_2:
				839	return 512 + 4096 * 2;
				840	case AMDGPUAS::CONSTANT_BUFFER_3:
				841	return 512 + 4096 * 3;
				842	case AMDGPUAS::CONSTANT_BUFFER_4:
				843	return 512 + 4096 * 4;
				844	case AMDGPUAS::CONSTANT_BUFFER_5:
				845	return 512 + 4096 * 5;
				846	case AMDGPUAS::CONSTANT_BUFFER_6:
				847	return 512 + 4096 * 6;
				848	case AMDGPUAS::CONSTANT_BUFFER_7:
				849	return 512 + 4096 * 7;
				850	case AMDGPUAS::CONSTANT_BUFFER_8:
				851	return 512 + 4096 * 8;
				852	case AMDGPUAS::CONSTANT_BUFFER_9:
				853	return 512 + 4096 * 9;
				854	case AMDGPUAS::CONSTANT_BUFFER_10:
				855	return 512 + 4096 * 10;
				856	case AMDGPUAS::CONSTANT_BUFFER_11:
				857	return 512 + 4096 * 11;
				858	case AMDGPUAS::CONSTANT_BUFFER_12:
				859	return 512 + 4096 * 12;
				860	case AMDGPUAS::CONSTANT_BUFFER_13:
				861	return 512 + 4096 * 13;
				862	case AMDGPUAS::CONSTANT_BUFFER_14:
				863	return 512 + 4096 * 14;
				864	case AMDGPUAS::CONSTANT_BUFFER_15:
				865	return 512 + 4096 * 15;
				866	default:
				867	return -1;
				868	}
				869	}
				870
				871	SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
				872	{
				873	EVT VT = Op.getValueType();
				874	DebugLoc DL = Op.getDebugLoc();
				875	LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
				876	SDValue Chain = Op.getOperand(0);
				877	SDValue Ptr = Op.getOperand(1);
				878	SDValue LoweredLoad;
				879
				880	int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
				881	if (ConstantBlock > -1) {
				882	SDValue Result;
				883	if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) \|\|
Vincent Lejeune	743dca0	2013-03-05 15:04:29 +0000	[diff] [blame]	884	dyn_cast<Constant>(LoadNode->getSrcValue()) \|\|
				885	dyn_cast<ConstantSDNode>(Ptr)) {
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	886	SDValue Slots[4];
				887	for (unsigned i = 0; i < 4; i++) {
				888	// We want Const position encoded with the following formula :
				889	// (((512 + (kc_bank << 12) + const_index) << 2) + chan)
				890	// const_index is Ptr computed by llvm using an alignment of 16.
				891	// Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
				892	// then div by 4 at the ISel step
				893	SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
				894	DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
				895	Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
				896	}
				897	Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
				898	} else {
				899	// non constant ptr cant be folded, keeps it as a v4f32 load
				900	Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
Vincent Lejeune	743dca0	2013-03-05 15:04:29 +0000	[diff] [blame]	901	DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
Christian Konig	189357c	2013-03-07 09:03:59 +0000	[diff] [blame]	902	DAG.getConstant(LoadNode->getAddressSpace() -
				903	AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32)
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	904	);
				905	}
				906
				907	if (!VT.isVector()) {
				908	Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
				909	DAG.getConstant(0, MVT::i32));
				910	}
				911
				912	SDValue MergedValues[2] = {
				913	Result,
				914	Chain
				915	};
				916	return DAG.getMergeValues(MergedValues, 2, DL);
				917	}
				918
Tom Stellard	f3b2a1e	2013-02-06 17:32:29 +0000	[diff] [blame]	919	if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
				920	return SDValue();
				921	}
				922
				923	// Lowering for indirect addressing
				924	const MachineFunction &MF = DAG.getMachineFunction();
				925	const AMDGPUFrameLowering TFL = static_cast<const AMDGPUFrameLowering>(
				926	getTargetMachine().getFrameLowering());
				927	unsigned StackWidth = TFL->getStackWidth(MF);
				928
				929	Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
				930
				931	if (VT.isVector()) {
				932	unsigned NumElemVT = VT.getVectorNumElements();
				933	EVT ElemVT = VT.getVectorElementType();
				934	SDValue Loads[4];
				935
				936	assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
				937	"vector width in load");
				938
				939	for (unsigned i = 0; i < NumElemVT; ++i) {
				940	unsigned Channel, PtrIncr;
				941	getStackAddress(StackWidth, i, Channel, PtrIncr);
				942	Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
				943	DAG.getConstant(PtrIncr, MVT::i32));
				944	Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
				945	Chain, Ptr,
				946	DAG.getTargetConstant(Channel, MVT::i32),
				947	Op.getOperand(2));
				948	}
				949	for (unsigned i = NumElemVT; i < 4; ++i) {
				950	Loads[i] = DAG.getUNDEF(ElemVT);
				951	}
				952	EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
				953	LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
				954	} else {
				955	LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
				956	Chain, Ptr,
				957	DAG.getTargetConstant(0, MVT::i32), // Channel
				958	Op.getOperand(2));
				959	}
				960
				961	SDValue Ops[2];
				962	Ops[0] = LoweredLoad;
				963	Ops[1] = Chain;
				964
				965	return DAG.getMergeValues(Ops, 2, DL);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	966	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	967
				968	SDValue R600TargetLowering::LowerFPOW(SDValue Op,
				969	SelectionDAG &DAG) const {
				970	DebugLoc DL = Op.getDebugLoc();
				971	EVT VT = Op.getValueType();
				972	SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
				973	SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
				974	return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
				975	}
				976
				977	/// XXX Only kernel functions are supported, so we can assume for now that
				978	/// every function is a kernel function, but in the future we should use
				979	/// separate calling conventions for kernel and non-kernel functions.
				980	SDValue R600TargetLowering::LowerFormalArguments(
				981	SDValue Chain,
				982	CallingConv::ID CallConv,
				983	bool isVarArg,
				984	const SmallVectorImpl<ISD::InputArg> &Ins,
				985	DebugLoc DL, SelectionDAG &DAG,
				986	SmallVectorImpl<SDValue> &InVals) const {
				987	unsigned ParamOffsetBytes = 36;
				988	Function::const_arg_iterator FuncArg =
				989	DAG.getMachineFunction().getFunction()->arg_begin();
				990	for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
				991	EVT VT = Ins[i].VT;
				992	Type *ArgType = FuncArg->getType();
				993	unsigned ArgSizeInBits = ArgType->isPointerTy() ?
				994	32 : ArgType->getPrimitiveSizeInBits();
				995	unsigned ArgBytes = ArgSizeInBits >> 3;
				996	EVT ArgVT;
				997	if (ArgSizeInBits < VT.getSizeInBits()) {
				998	assert(!ArgType->isFloatTy() &&
				999	"Extending floating point arguments not supported yet");
				1000	ArgVT = MVT::getIntegerVT(ArgSizeInBits);
				1001	} else {
				1002	ArgVT = VT;
				1003	}
				1004	PointerType PtrTy = PointerType::get(VT.getTypeForEVT(DAG.getContext()),
				1005	AMDGPUAS::PARAM_I_ADDRESS);
				1006	SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
				1007	DAG.getConstant(ParamOffsetBytes, MVT::i32),
Tom Stellard	8d469ed	2013-02-19 15:22:44 +0000	[diff] [blame]	1008	MachinePointerInfo(UndefValue::get(PtrTy)),
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1009	ArgVT, false, false, ArgBytes);
				1010	InVals.push_back(Arg);
				1011	ParamOffsetBytes += ArgBytes;
				1012	}
				1013	return Chain;
				1014	}
				1015
				1016	EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
				1017	if (!VT.isVector()) return MVT::i32;
				1018	return VT.changeVectorElementTypeToInteger();
				1019	}
				1020
				1021	//===----------------------------------------------------------------------===//
				1022	// Custom DAG Optimizations
				1023	//===----------------------------------------------------------------------===//
				1024
				1025	SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
				1026	DAGCombinerInfo &DCI) const {
				1027	SelectionDAG &DAG = DCI.DAG;
				1028
				1029	switch (N->getOpcode()) {
				1030	// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
				1031	case ISD::FP_ROUND: {
				1032	SDValue Arg = N->getOperand(0);
				1033	if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
				1034	return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
				1035	Arg.getOperand(0));
				1036	}
				1037	break;
				1038	}
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	1039
				1040	// (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
				1041	// (i32 select_cc f32, f32, -1, 0 cc)
				1042	//
				1043	// Mesa's GLSL frontend generates the above pattern a lot and we can lower
				1044	// this to one of the SET*_DX10 instructions.
				1045	case ISD::FP_TO_SINT: {
				1046	SDValue FNeg = N->getOperand(0);
				1047	if (FNeg.getOpcode() != ISD::FNEG) {
				1048	return SDValue();
				1049	}
				1050	SDValue SelectCC = FNeg.getOperand(0);
				1051	if (SelectCC.getOpcode() != ISD::SELECT_CC \|\|
				1052	SelectCC.getOperand(0).getValueType() != MVT::f32 \|\| // LHS
				1053	SelectCC.getOperand(2).getValueType() != MVT::f32 \|\| // True
				1054	!isHWTrueValue(SelectCC.getOperand(2)) \|\|
				1055	!isHWFalseValue(SelectCC.getOperand(3))) {
				1056	return SDValue();
				1057	}
				1058
				1059	return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N->getValueType(0),
				1060	SelectCC.getOperand(0), // LHS
				1061	SelectCC.getOperand(1), // RHS
				1062	DAG.getConstant(-1, MVT::i32), // True
				1063	DAG.getConstant(0, MVT::i32), // Flase
				1064	SelectCC.getOperand(4)); // CC
				1065
				1066	break;
				1067	}
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	1068	// Extract_vec (Build_vector) generated by custom lowering
				1069	// also needs to be customly combined
				1070	case ISD::EXTRACT_VECTOR_ELT: {
				1071	SDValue Arg = N->getOperand(0);
				1072	if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
				1073	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
				1074	unsigned Element = Const->getZExtValue();
				1075	return Arg->getOperand(Element);
				1076	}
				1077	}
Tom Stellard	dd04c83	2013-01-31 22:11:53 +0000	[diff] [blame]	1078	if (Arg.getOpcode() == ISD::BITCAST &&
				1079	Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
				1080	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
				1081	unsigned Element = Const->getZExtValue();
				1082	return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
				1083	Arg->getOperand(0).getOperand(Element));
				1084	}
				1085	}
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	1086	}
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	1087
				1088	case ISD::SELECT_CC: {
				1089	// fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
				1090	// selectcc x, y, a, b, inv(cc)
				1091	SDValue LHS = N->getOperand(0);
				1092	if (LHS.getOpcode() != ISD::SELECT_CC) {
				1093	return SDValue();
				1094	}
				1095
				1096	SDValue RHS = N->getOperand(1);
				1097	SDValue True = N->getOperand(2);
				1098	SDValue False = N->getOperand(3);
				1099
				1100	if (LHS.getOperand(2).getNode() != True.getNode() \|\|
				1101	LHS.getOperand(3).getNode() != False.getNode() \|\|
				1102	RHS.getNode() != False.getNode() \|\|
				1103	cast<CondCodeSDNode>(N->getOperand(4))->get() != ISD::SETEQ) {
				1104	return SDValue();
				1105	}
				1106
				1107	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(LHS->getOperand(4))->get();
				1108	CCOpcode = ISD::getSetCCInverse(
				1109	CCOpcode, LHS.getOperand(0).getValueType().isInteger());
				1110	return DAG.getSelectCC(N->getDebugLoc(),
				1111	LHS.getOperand(0),
				1112	LHS.getOperand(1),
				1113	LHS.getOperand(2),
				1114	LHS.getOperand(3),
				1115	CCOpcode);
Vincent Lejeune	d80bc15	2013-02-14 16:55:06 +0000	[diff] [blame]	1116	}
				1117	case AMDGPUISD::EXPORT: {
				1118	SDValue Arg = N->getOperand(1);
				1119	if (Arg.getOpcode() != ISD::BUILD_VECTOR)
				1120	break;
				1121	SDValue NewBldVec[4] = {
				1122	DAG.getUNDEF(MVT::f32),
				1123	DAG.getUNDEF(MVT::f32),
				1124	DAG.getUNDEF(MVT::f32),
				1125	DAG.getUNDEF(MVT::f32)
				1126	};
				1127	SDValue NewArgs[8] = {
				1128	N->getOperand(0), // Chain
				1129	SDValue(),
				1130	N->getOperand(2), // ArrayBase
				1131	N->getOperand(3), // Type
				1132	N->getOperand(4), // SWZ_X
				1133	N->getOperand(5), // SWZ_Y
				1134	N->getOperand(6), // SWZ_Z
				1135	N->getOperand(7) // SWZ_W
				1136	};
				1137	for (unsigned i = 0; i < Arg.getNumOperands(); i++) {
				1138	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) {
				1139	if (C->isZero()) {
				1140	NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0
				1141	} else if (C->isExactlyValue(1.0)) {
				1142	NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0
				1143	} else {
				1144	NewBldVec[i] = Arg.getOperand(i);
				1145	}
				1146	} else {
				1147	NewBldVec[i] = Arg.getOperand(i);
				1148	}
				1149	}
				1150	DebugLoc DL = N->getDebugLoc();
				1151	NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4);
				1152	return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
Tom Stellard	e06163a	2013-02-07 14:02:35 +0000	[diff] [blame]	1153	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1154	}
				1155	return SDValue();
				1156	}