Blame - llvm/lib/Target/R600/R600ISelLowering.cpp - toolchain/llvm-project

blob: ff18a444245818e6a836ede9b24ca77284bd2fc8 [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief Custom DAG lowering for R600
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "R600ISelLowering.h"
				16	#include "R600Defines.h"
				17	#include "R600InstrInfo.h"
				18	#include "R600MachineFunctionInfo.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	19	#include "llvm/CodeGen/MachineInstrBuilder.h"
				20	#include "llvm/CodeGen/MachineRegisterInfo.h"
				21	#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth	9fb823b	2013-01-02 11:36:10 +0000	[diff] [blame]	22	#include "llvm/IR/Argument.h"
				23	#include "llvm/IR/Function.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	24
				25	using namespace llvm;
				26
				27	R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
				28	AMDGPUTargetLowering(TM),
				29	TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
				30	setOperationAction(ISD::MUL, MVT::i64, Expand);
				31	addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
				32	addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
				33	addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
				34	addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
				35	computeRegisterProperties();
				36
				37	setOperationAction(ISD::FADD, MVT::v4f32, Expand);
				38	setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
				39	setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
				40	setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
				41
				42	setOperationAction(ISD::ADD, MVT::v4i32, Expand);
				43	setOperationAction(ISD::AND, MVT::v4i32, Expand);
Tom Stellard	a8b0351	2012-12-21 16:33:24 +0000	[diff] [blame]	44	setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
				45	setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
				46	setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
				47	setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	48	setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
				49	setOperationAction(ISD::UREM, MVT::v4i32, Expand);
				50	setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
				51
				52	setOperationAction(ISD::BR_CC, MVT::i32, Custom);
				53	setOperationAction(ISD::BR_CC, MVT::f32, Custom);
				54
				55	setOperationAction(ISD::FSUB, MVT::f32, Expand);
				56
				57	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
				58	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				59	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
				60	setOperationAction(ISD::FPOW, MVT::f32, Custom);
				61
				62	setOperationAction(ISD::ROTL, MVT::i32, Custom);
				63
				64	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
				65	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
				66
				67	setOperationAction(ISD::SETCC, MVT::i32, Custom);
				68	setOperationAction(ISD::SETCC, MVT::f32, Custom);
				69	setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
				70
				71	setOperationAction(ISD::SELECT, MVT::i32, Custom);
				72	setOperationAction(ISD::SELECT, MVT::f32, Custom);
				73
				74	setOperationAction(ISD::STORE, MVT::i32, Custom);
				75	setOperationAction(ISD::STORE, MVT::v4i32, Custom);
				76
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	77	setOperationAction(ISD::LOAD, MVT::i32, Custom);
				78	setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	79	setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	80	setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	81
				82	setSchedulingPreference(Sched::VLIW);
				83	}
				84
				85	MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
				86	MachineInstr * MI, MachineBasicBlock * BB) const {
				87	MachineFunction * MF = BB->getParent();
				88	MachineRegisterInfo &MRI = MF->getRegInfo();
				89	MachineBasicBlock::iterator I = *MI;
				90
				91	switch (MI->getOpcode()) {
				92	default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
				93	case AMDGPU::SHADER_TYPE: break;
				94	case AMDGPU::CLAMP_R600: {
				95	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				96	AMDGPU::MOV,
				97	MI->getOperand(0).getReg(),
				98	MI->getOperand(1).getReg());
				99	TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
				100	break;
				101	}
				102
				103	case AMDGPU::FABS_R600: {
				104	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				105	AMDGPU::MOV,
				106	MI->getOperand(0).getReg(),
				107	MI->getOperand(1).getReg());
				108	TII->addFlag(NewMI, 0, MO_FLAG_ABS);
				109	break;
				110	}
				111
				112	case AMDGPU::FNEG_R600: {
				113	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				114	AMDGPU::MOV,
				115	MI->getOperand(0).getReg(),
				116	MI->getOperand(1).getReg());
				117	TII->addFlag(NewMI, 0, MO_FLAG_NEG);
				118	break;
				119	}
				120
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	121	case AMDGPU::MASK_WRITE: {
				122	unsigned maskedRegister = MI->getOperand(0).getReg();
				123	assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
				124	MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
				125	TII->addFlag(defInstr, 0, MO_FLAG_MASK);
				126	break;
				127	}
				128
				129	case AMDGPU::MOV_IMM_F32:
				130	TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
				131	MI->getOperand(1).getFPImm()->getValueAPF()
				132	.bitcastToAPInt().getZExtValue());
				133	break;
				134	case AMDGPU::MOV_IMM_I32:
				135	TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
				136	MI->getOperand(1).getImm());
				137	break;
				138
				139
				140	case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
				141	case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
				142	unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
				143
				144	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
				145	.addOperand(MI->getOperand(0))
				146	.addOperand(MI->getOperand(1))
				147	.addImm(EOP); // Set End of program bit
				148	break;
				149	}
				150
				151	case AMDGPU::RESERVE_REG: {
				152	R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
				153	int64_t ReservedIndex = MI->getOperand(0).getImm();
				154	unsigned ReservedReg =
				155	AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
				156	MFI->ReservedRegs.push_back(ReservedReg);
				157	unsigned SuperReg =
				158	AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4);
				159	MFI->ReservedRegs.push_back(SuperReg);
				160	break;
				161	}
				162
				163	case AMDGPU::TXD: {
				164	unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				165	unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				166
				167	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
				168	.addOperand(MI->getOperand(3))
				169	.addOperand(MI->getOperand(4))
				170	.addOperand(MI->getOperand(5))
				171	.addOperand(MI->getOperand(6));
				172	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
				173	.addOperand(MI->getOperand(2))
				174	.addOperand(MI->getOperand(4))
				175	.addOperand(MI->getOperand(5))
				176	.addOperand(MI->getOperand(6));
				177	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
				178	.addOperand(MI->getOperand(0))
				179	.addOperand(MI->getOperand(1))
				180	.addOperand(MI->getOperand(4))
				181	.addOperand(MI->getOperand(5))
				182	.addOperand(MI->getOperand(6))
				183	.addReg(T0, RegState::Implicit)
				184	.addReg(T1, RegState::Implicit);
				185	break;
				186	}
				187
				188	case AMDGPU::TXD_SHADOW: {
				189	unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				190	unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				191
				192	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
				193	.addOperand(MI->getOperand(3))
				194	.addOperand(MI->getOperand(4))
				195	.addOperand(MI->getOperand(5))
				196	.addOperand(MI->getOperand(6));
				197	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
				198	.addOperand(MI->getOperand(2))
				199	.addOperand(MI->getOperand(4))
				200	.addOperand(MI->getOperand(5))
				201	.addOperand(MI->getOperand(6));
				202	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
				203	.addOperand(MI->getOperand(0))
				204	.addOperand(MI->getOperand(1))
				205	.addOperand(MI->getOperand(4))
				206	.addOperand(MI->getOperand(5))
				207	.addOperand(MI->getOperand(6))
				208	.addReg(T0, RegState::Implicit)
				209	.addReg(T1, RegState::Implicit);
				210	break;
				211	}
				212
				213	case AMDGPU::BRANCH:
				214	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				215	.addOperand(MI->getOperand(0))
				216	.addReg(0);
				217	break;
				218
				219	case AMDGPU::BRANCH_COND_f32: {
				220	MachineInstr *NewMI =
				221	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
				222	AMDGPU::PREDICATE_BIT)
				223	.addOperand(MI->getOperand(1))
				224	.addImm(OPCODE_IS_NOT_ZERO)
				225	.addImm(0); // Flags
				226	TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
				227	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				228	.addOperand(MI->getOperand(0))
				229	.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
				230	break;
				231	}
				232
				233	case AMDGPU::BRANCH_COND_i32: {
				234	MachineInstr *NewMI =
				235	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
				236	AMDGPU::PREDICATE_BIT)
				237	.addOperand(MI->getOperand(1))
				238	.addImm(OPCODE_IS_NOT_ZERO_INT)
				239	.addImm(0); // Flags
				240	TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
				241	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				242	.addOperand(MI->getOperand(0))
				243	.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
				244	break;
				245	}
				246
				247	case AMDGPU::input_perspective: {
				248	R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
				249
				250	// XXX Be more fine about register reservation
				251	for (unsigned i = 0; i < 4; i ++) {
				252	unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
				253	MFI->ReservedRegs.push_back(ReservedReg);
				254	}
				255
				256	switch (MI->getOperand(1).getImm()) {
				257	case 0:// Perspective
				258	MFI->HasPerspectiveInterpolation = true;
				259	break;
				260	case 1:// Linear
				261	MFI->HasLinearInterpolation = true;
				262	break;
				263	default:
				264	assert(0 && "Unknow ij index");
				265	}
				266
				267	return BB;
				268	}
				269
				270	case AMDGPU::EG_ExportSwz:
				271	case AMDGPU::R600_ExportSwz: {
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	272	// Instruction is left unmodified if its not the last one of its type
				273	bool isLastInstructionOfItsType = true;
				274	unsigned InstExportType = MI->getOperand(1).getImm();
				275	for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
				276	EndBlock = BB->end(); NextExportInst != EndBlock;
				277	NextExportInst = llvm::next(NextExportInst)) {
				278	if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz \|\|
				279	NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
				280	unsigned CurrentInstExportType = NextExportInst->getOperand(1)
				281	.getImm();
				282	if (CurrentInstExportType == InstExportType) {
				283	isLastInstructionOfItsType = false;
				284	break;
				285	}
				286	}
				287	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	288	bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	289	if (!EOP && !isLastInstructionOfItsType)
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	290	return BB;
				291	unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
				292	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
				293	.addOperand(MI->getOperand(0))
				294	.addOperand(MI->getOperand(1))
				295	.addOperand(MI->getOperand(2))
				296	.addOperand(MI->getOperand(3))
				297	.addOperand(MI->getOperand(4))
				298	.addOperand(MI->getOperand(5))
				299	.addOperand(MI->getOperand(6))
				300	.addImm(CfInst)
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	301	.addImm(EOP);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	302	break;
				303	}
				304	}
				305
				306	MI->eraseFromParent();
				307	return BB;
				308	}
				309
				310	//===----------------------------------------------------------------------===//
				311	// Custom DAG Lowering Operations
				312	//===----------------------------------------------------------------------===//
				313
				314	using namespace llvm::Intrinsic;
				315	using namespace llvm::AMDGPUIntrinsic;
				316
				317	static SDValue
				318	InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
				319	unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
				320	SDValue Scalar, SDValue Chain) {
				321	if (!ExportMap[Slot]) {
				322	SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
				323	DL, MVT::v4f32,
				324	DAG.getUNDEF(MVT::v4f32),
				325	Scalar,
				326	DAG.getConstant(Channel, MVT::i32));
				327
				328	unsigned Mask = 1 << Channel;
				329
				330	const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
				331	DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
				332	DAG.getConstant(Mask, MVT::i32)};
				333
				334	SDValue Res = DAG.getNode(
				335	AMDGPUISD::EXPORT,
				336	DL,
				337	MVT::Other,
				338	Ops, 6);
				339	ExportMap[Slot] = Res.getNode();
				340	return Res;
				341	}
				342
				343	SDNode ExportInstruction = (SDNode ) ExportMap[Slot] ;
				344	SDValue PreviousVector = ExportInstruction->getOperand(1);
				345	SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
				346	DL, MVT::v4f32,
				347	PreviousVector,
				348	Scalar,
				349	DAG.getConstant(Channel, MVT::i32));
				350
				351	unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
				352	->getZExtValue();
				353	Mask \|= (1 << Channel);
				354
				355	const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
				356	DAG.getConstant(Inst, MVT::i32),
				357	DAG.getConstant(Type, MVT::i32),
				358	DAG.getConstant(Slot, MVT::i32),
				359	DAG.getConstant(Mask, MVT::i32)};
				360
				361	DAG.UpdateNodeOperands(ExportInstruction,
				362	Ops, 6);
				363
				364	return Chain;
				365
				366	}
				367
				368	SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
				369	switch (Op.getOpcode()) {
				370	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
				371	case ISD::BR_CC: return LowerBR_CC(Op, DAG);
				372	case ISD::ROTL: return LowerROTL(Op, DAG);
				373	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
				374	case ISD::SELECT: return LowerSELECT(Op, DAG);
				375	case ISD::SETCC: return LowerSETCC(Op, DAG);
				376	case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	377	case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	378	case ISD::FPOW: return LowerFPOW(Op, DAG);
				379	case ISD::INTRINSIC_VOID: {
				380	SDValue Chain = Op.getOperand(0);
				381	unsigned IntrinsicID =
				382	cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				383	switch (IntrinsicID) {
				384	case AMDGPUIntrinsic::AMDGPU_store_output: {
				385	MachineFunction &MF = DAG.getMachineFunction();
				386	MachineRegisterInfo &MRI = MF.getRegInfo();
				387	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
				388	unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
				389	if (!MRI.isLiveOut(Reg)) {
				390	MRI.addLiveOut(Reg);
				391	}
				392	return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
				393	}
				394	case AMDGPUIntrinsic::R600_store_pixel_color: {
				395	MachineFunction &MF = DAG.getMachineFunction();
				396	R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
				397	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
				398
				399	SDNode **OutputsMap = MFI->Outputs;
				400	return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
				401	RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
				402	Chain);
				403
				404	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	405
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	406	// default for switch(IntrinsicID)
				407	default: break;
				408	}
				409	// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
				410	break;
				411	}
				412	case ISD::INTRINSIC_WO_CHAIN: {
				413	unsigned IntrinsicID =
				414	cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
				415	EVT VT = Op.getValueType();
				416	DebugLoc DL = Op.getDebugLoc();
				417	switch(IntrinsicID) {
				418	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
				419	case AMDGPUIntrinsic::R600_load_input: {
				420	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				421	unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
				422	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
				423	}
				424	case AMDGPUIntrinsic::R600_load_input_perspective: {
				425	int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				426	if (slot < 0)
				427	return DAG.getUNDEF(MVT::f32);
				428	SDValue FullVector = DAG.getNode(
				429	AMDGPUISD::INTERP,
				430	DL, MVT::v4f32,
				431	DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
				432	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
				433	DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
				434	}
				435	case AMDGPUIntrinsic::R600_load_input_linear: {
				436	int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				437	if (slot < 0)
				438	return DAG.getUNDEF(MVT::f32);
				439	SDValue FullVector = DAG.getNode(
				440	AMDGPUISD::INTERP,
				441	DL, MVT::v4f32,
				442	DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
				443	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
				444	DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
				445	}
				446	case AMDGPUIntrinsic::R600_load_input_constant: {
				447	int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				448	if (slot < 0)
				449	return DAG.getUNDEF(MVT::f32);
				450	SDValue FullVector = DAG.getNode(
				451	AMDGPUISD::INTERP_P0,
				452	DL, MVT::v4f32,
				453	DAG.getConstant(slot / 4 , MVT::i32));
				454	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
				455	DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
				456	}
				457
				458	case r600_read_ngroups_x:
				459	return LowerImplicitParameter(DAG, VT, DL, 0);
				460	case r600_read_ngroups_y:
				461	return LowerImplicitParameter(DAG, VT, DL, 1);
				462	case r600_read_ngroups_z:
				463	return LowerImplicitParameter(DAG, VT, DL, 2);
				464	case r600_read_global_size_x:
				465	return LowerImplicitParameter(DAG, VT, DL, 3);
				466	case r600_read_global_size_y:
				467	return LowerImplicitParameter(DAG, VT, DL, 4);
				468	case r600_read_global_size_z:
				469	return LowerImplicitParameter(DAG, VT, DL, 5);
				470	case r600_read_local_size_x:
				471	return LowerImplicitParameter(DAG, VT, DL, 6);
				472	case r600_read_local_size_y:
				473	return LowerImplicitParameter(DAG, VT, DL, 7);
				474	case r600_read_local_size_z:
				475	return LowerImplicitParameter(DAG, VT, DL, 8);
				476
				477	case r600_read_tgid_x:
				478	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				479	AMDGPU::T1_X, VT);
				480	case r600_read_tgid_y:
				481	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				482	AMDGPU::T1_Y, VT);
				483	case r600_read_tgid_z:
				484	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				485	AMDGPU::T1_Z, VT);
				486	case r600_read_tidig_x:
				487	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				488	AMDGPU::T0_X, VT);
				489	case r600_read_tidig_y:
				490	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				491	AMDGPU::T0_Y, VT);
				492	case r600_read_tidig_z:
				493	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				494	AMDGPU::T0_Z, VT);
				495	}
				496	// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
				497	break;
				498	}
				499	} // end switch(Op.getOpcode())
				500	return SDValue();
				501	}
				502
				503	void R600TargetLowering::ReplaceNodeResults(SDNode *N,
				504	SmallVectorImpl<SDValue> &Results,
				505	SelectionDAG &DAG) const {
				506	switch (N->getOpcode()) {
				507	default: return;
				508	case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	509	return;
				510	case ISD::LOAD: {
				511	SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
				512	Results.push_back(SDValue(Node, 0));
				513	Results.push_back(SDValue(Node, 1));
				514	// XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
				515	// function
				516	DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
				517	return;
				518	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	519	}
				520	}
				521
				522	SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
				523	return DAG.getNode(
				524	ISD::SETCC,
				525	Op.getDebugLoc(),
				526	MVT::i1,
				527	Op, DAG.getConstantFP(0.0f, MVT::f32),
				528	DAG.getCondCode(ISD::SETNE)
				529	);
				530	}
				531
				532	SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
				533	SDValue Chain = Op.getOperand(0);
				534	SDValue CC = Op.getOperand(1);
				535	SDValue LHS = Op.getOperand(2);
				536	SDValue RHS = Op.getOperand(3);
				537	SDValue JumpT = Op.getOperand(4);
				538	SDValue CmpValue;
				539	SDValue Result;
				540
				541	if (LHS.getValueType() == MVT::i32) {
				542	CmpValue = DAG.getNode(
				543	ISD::SELECT_CC,
				544	Op.getDebugLoc(),
				545	MVT::i32,
				546	LHS, RHS,
				547	DAG.getConstant(-1, MVT::i32),
				548	DAG.getConstant(0, MVT::i32),
				549	CC);
				550	} else if (LHS.getValueType() == MVT::f32) {
				551	CmpValue = DAG.getNode(
				552	ISD::SELECT_CC,
				553	Op.getDebugLoc(),
				554	MVT::f32,
				555	LHS, RHS,
				556	DAG.getConstantFP(1.0f, MVT::f32),
				557	DAG.getConstantFP(0.0f, MVT::f32),
				558	CC);
				559	} else {
				560	assert(0 && "Not valid type for br_cc");
				561	}
				562	Result = DAG.getNode(
				563	AMDGPUISD::BRANCH_COND,
				564	CmpValue.getDebugLoc(),
				565	MVT::Other, Chain,
				566	JumpT, CmpValue);
				567	return Result;
				568	}
				569
				570	SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
				571	DebugLoc DL,
				572	unsigned DwordOffset) const {
				573	unsigned ByteOffset = DwordOffset * 4;
				574	PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
				575	AMDGPUAS::PARAM_I_ADDRESS);
				576
				577	// We shouldn't be using an offset wider than 16-bits for implicit parameters.
				578	assert(isInt<16>(ByteOffset));
				579
				580	return DAG.getLoad(VT, DL, DAG.getEntryNode(),
				581	DAG.getConstant(ByteOffset, MVT::i32), // PTR
				582	MachinePointerInfo(ConstantPointerNull::get(PtrType)),
				583	false, false, false, 0);
				584	}
				585
				586	SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
				587	DebugLoc DL = Op.getDebugLoc();
				588	EVT VT = Op.getValueType();
				589
				590	return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
				591	Op.getOperand(0),
				592	Op.getOperand(0),
				593	DAG.getNode(ISD::SUB, DL, VT,
				594	DAG.getConstant(32, MVT::i32),
				595	Op.getOperand(1)));
				596	}
				597
				598	bool R600TargetLowering::isZero(SDValue Op) const {
				599	if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
				600	return Cst->isNullValue();
				601	} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
				602	return CstFP->isZero();
				603	} else {
				604	return false;
				605	}
				606	}
				607
				608	SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
				609	DebugLoc DL = Op.getDebugLoc();
				610	EVT VT = Op.getValueType();
				611
				612	SDValue LHS = Op.getOperand(0);
				613	SDValue RHS = Op.getOperand(1);
				614	SDValue True = Op.getOperand(2);
				615	SDValue False = Op.getOperand(3);
				616	SDValue CC = Op.getOperand(4);
				617	SDValue Temp;
				618
				619	// LHS and RHS are guaranteed to be the same value type
				620	EVT CompareVT = LHS.getValueType();
				621
				622	// Check if we can lower this to a native operation.
				623
				624	// Try to lower to a CND* instruction:
				625	// CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
				626	// can be lowered to CND* instructions can also be lowered to SET*
				627	// instructions. CND* instructions are cheaper, because they dont't
				628	// require additional instructions to convert their result to the correct
				629	// value type, so this check should be first.
				630	if (isZero(LHS) \|\| isZero(RHS)) {
				631	SDValue Cond = (isZero(LHS) ? RHS : LHS);
				632	SDValue Zero = (isZero(LHS) ? LHS : RHS);
				633	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				634	if (CompareVT != VT) {
				635	// Bitcast True / False to the correct types. This will end up being
				636	// a nop, but it allows us to define only a single pattern in the
				637	// .TD files for each CND* instruction rather than having to have
				638	// one pattern for integer True/False and one for fp True/False
				639	True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
				640	False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
				641	}
				642	if (isZero(LHS)) {
				643	CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
				644	}
				645
				646	switch (CCOpcode) {
				647	case ISD::SETONE:
				648	case ISD::SETUNE:
				649	case ISD::SETNE:
				650	case ISD::SETULE:
				651	case ISD::SETULT:
				652	case ISD::SETOLE:
				653	case ISD::SETOLT:
				654	case ISD::SETLE:
				655	case ISD::SETLT:
				656	CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
				657	Temp = True;
				658	True = False;
				659	False = Temp;
				660	break;
				661	default:
				662	break;
				663	}
				664	SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				665	Cond, Zero,
				666	True, False,
				667	DAG.getCondCode(CCOpcode));
				668	return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
				669	}
				670
				671	// Try to lower to a SET* instruction:
				672	// We need all the operands of SELECT_CC to have the same value type, so if
				673	// necessary we need to change True and False to be the same type as LHS and
				674	// RHS, and then convert the result of the select_cc back to the correct type.
				675
				676	// Move hardware True/False values to the correct operand.
				677	if (isHWTrueValue(False) && isHWFalseValue(True)) {
				678	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				679	std::swap(False, True);
				680	CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
				681	}
				682
				683	if (isHWTrueValue(True) && isHWFalseValue(False)) {
				684	if (CompareVT != VT) {
				685	if (VT == MVT::f32 && CompareVT == MVT::i32) {
				686	SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				687	LHS, RHS,
				688	DAG.getConstant(-1, MVT::i32),
				689	DAG.getConstant(0, MVT::i32),
				690	CC);
				691	// Convert integer values of true (-1) and false (0) to fp values of
				692	// true (1.0f) and false (0.0f).
				693	SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
				694	DAG.getConstant(1, MVT::i32));
				695	return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
				696	} else if (VT == MVT::i32 && CompareVT == MVT::f32) {
				697	SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				698	LHS, RHS,
				699	DAG.getConstantFP(1.0f, MVT::f32),
				700	DAG.getConstantFP(0.0f, MVT::f32),
				701	CC);
				702	// Convert fp values of true (1.0f) and false (0.0f) to integer values
				703	// of true (-1) and false (0).
				704	SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
				705	return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
				706	} else {
				707	// I don't think there will be any other type pairings.
				708	assert(!"Unhandled operand type parings in SELECT_CC");
				709	}
				710	} else {
				711	// This SELECT_CC is already legal.
				712	return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
				713	}
				714	}
				715
				716	// Possible Min/Max pattern
				717	SDValue MinMax = LowerMinMax(Op, DAG);
				718	if (MinMax.getNode()) {
				719	return MinMax;
				720	}
				721
				722	// If we make it this for it means we have no native instructions to handle
				723	// this SELECT_CC, so we must lower it.
				724	SDValue HWTrue, HWFalse;
				725
				726	if (CompareVT == MVT::f32) {
				727	HWTrue = DAG.getConstantFP(1.0f, CompareVT);
				728	HWFalse = DAG.getConstantFP(0.0f, CompareVT);
				729	} else if (CompareVT == MVT::i32) {
				730	HWTrue = DAG.getConstant(-1, CompareVT);
				731	HWFalse = DAG.getConstant(0, CompareVT);
				732	}
				733	else {
				734	assert(!"Unhandled value type in LowerSELECT_CC");
				735	}
				736
				737	// Lower this unsupported SELECT_CC into a combination of two supported
				738	// SELECT_CC operations.
				739	SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
				740
				741	return DAG.getNode(ISD::SELECT_CC, DL, VT,
				742	Cond, HWFalse,
				743	True, False,
				744	DAG.getCondCode(ISD::SETNE));
				745	}
				746
				747	SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
				748	return DAG.getNode(ISD::SELECT_CC,
				749	Op.getDebugLoc(),
				750	Op.getValueType(),
				751	Op.getOperand(0),
				752	DAG.getConstant(0, MVT::i32),
				753	Op.getOperand(1),
				754	Op.getOperand(2),
				755	DAG.getCondCode(ISD::SETNE));
				756	}
				757
				758	SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
				759	SDValue Cond;
				760	SDValue LHS = Op.getOperand(0);
				761	SDValue RHS = Op.getOperand(1);
				762	SDValue CC = Op.getOperand(2);
				763	DebugLoc DL = Op.getDebugLoc();
				764	assert(Op.getValueType() == MVT::i32);
				765	if (LHS.getValueType() == MVT::i32) {
				766	Cond = DAG.getNode(
				767	ISD::SELECT_CC,
				768	Op.getDebugLoc(),
				769	MVT::i32,
				770	LHS, RHS,
				771	DAG.getConstant(-1, MVT::i32),
				772	DAG.getConstant(0, MVT::i32),
				773	CC);
				774	} else if (LHS.getValueType() == MVT::f32) {
				775	Cond = DAG.getNode(
				776	ISD::SELECT_CC,
				777	Op.getDebugLoc(),
				778	MVT::f32,
				779	LHS, RHS,
				780	DAG.getConstantFP(1.0f, MVT::f32),
				781	DAG.getConstantFP(0.0f, MVT::f32),
				782	CC);
				783	Cond = DAG.getNode(
				784	ISD::FP_TO_SINT,
				785	DL,
				786	MVT::i32,
				787	Cond);
				788	} else {
				789	assert(0 && "Not valid type for set_cc");
				790	}
				791	Cond = DAG.getNode(
				792	ISD::AND,
				793	DL,
				794	MVT::i32,
				795	DAG.getConstant(1, MVT::i32),
				796	Cond);
				797	return Cond;
				798	}
				799
				800	SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
				801	DebugLoc DL = Op.getDebugLoc();
				802	StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
				803	SDValue Chain = Op.getOperand(0);
				804	SDValue Value = Op.getOperand(1);
				805	SDValue Ptr = Op.getOperand(2);
				806
				807	if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
				808	Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
				809	// Convert pointer from byte address to dword address.
				810	Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
				811	DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
				812	Ptr, DAG.getConstant(2, MVT::i32)));
				813
				814	if (StoreNode->isTruncatingStore() \|\| StoreNode->isIndexed()) {
				815	assert(!"Truncated and indexed stores not supported yet");
				816	} else {
				817	Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
				818	}
				819	return Chain;
				820	}
				821	return SDValue();
				822	}
				823
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	824	// return (512 + (kc_bank << 12)
				825	static int
				826	ConstantAddressBlock(unsigned AddressSpace) {
				827	switch (AddressSpace) {
				828	case AMDGPUAS::CONSTANT_BUFFER_0:
				829	return 512;
				830	case AMDGPUAS::CONSTANT_BUFFER_1:
				831	return 512 + 4096;
				832	case AMDGPUAS::CONSTANT_BUFFER_2:
				833	return 512 + 4096 * 2;
				834	case AMDGPUAS::CONSTANT_BUFFER_3:
				835	return 512 + 4096 * 3;
				836	case AMDGPUAS::CONSTANT_BUFFER_4:
				837	return 512 + 4096 * 4;
				838	case AMDGPUAS::CONSTANT_BUFFER_5:
				839	return 512 + 4096 * 5;
				840	case AMDGPUAS::CONSTANT_BUFFER_6:
				841	return 512 + 4096 * 6;
				842	case AMDGPUAS::CONSTANT_BUFFER_7:
				843	return 512 + 4096 * 7;
				844	case AMDGPUAS::CONSTANT_BUFFER_8:
				845	return 512 + 4096 * 8;
				846	case AMDGPUAS::CONSTANT_BUFFER_9:
				847	return 512 + 4096 * 9;
				848	case AMDGPUAS::CONSTANT_BUFFER_10:
				849	return 512 + 4096 * 10;
				850	case AMDGPUAS::CONSTANT_BUFFER_11:
				851	return 512 + 4096 * 11;
				852	case AMDGPUAS::CONSTANT_BUFFER_12:
				853	return 512 + 4096 * 12;
				854	case AMDGPUAS::CONSTANT_BUFFER_13:
				855	return 512 + 4096 * 13;
				856	case AMDGPUAS::CONSTANT_BUFFER_14:
				857	return 512 + 4096 * 14;
				858	case AMDGPUAS::CONSTANT_BUFFER_15:
				859	return 512 + 4096 * 15;
				860	default:
				861	return -1;
				862	}
				863	}
				864
				865	SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
				866	{
				867	EVT VT = Op.getValueType();
				868	DebugLoc DL = Op.getDebugLoc();
				869	LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
				870	SDValue Chain = Op.getOperand(0);
				871	SDValue Ptr = Op.getOperand(1);
				872	SDValue LoweredLoad;
				873
				874	int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
				875	if (ConstantBlock > -1) {
				876	SDValue Result;
				877	if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) \|\|
				878	dyn_cast<Constant>(LoadNode->getSrcValue())) {
				879	SDValue Slots[4];
				880	for (unsigned i = 0; i < 4; i++) {
				881	// We want Const position encoded with the following formula :
				882	// (((512 + (kc_bank << 12) + const_index) << 2) + chan)
				883	// const_index is Ptr computed by llvm using an alignment of 16.
				884	// Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
				885	// then div by 4 at the ISel step
				886	SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
				887	DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
				888	Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
				889	}
				890	Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
				891	} else {
				892	// non constant ptr cant be folded, keeps it as a v4f32 load
				893	Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
				894	DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
				895	);
				896	}
				897
				898	if (!VT.isVector()) {
				899	Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
				900	DAG.getConstant(0, MVT::i32));
				901	}
				902
				903	SDValue MergedValues[2] = {
				904	Result,
				905	Chain
				906	};
				907	return DAG.getMergeValues(MergedValues, 2, DL);
				908	}
				909
				910	return SDValue();
				911	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	912
				913	SDValue R600TargetLowering::LowerFPOW(SDValue Op,
				914	SelectionDAG &DAG) const {
				915	DebugLoc DL = Op.getDebugLoc();
				916	EVT VT = Op.getValueType();
				917	SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
				918	SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
				919	return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
				920	}
				921
				922	/// XXX Only kernel functions are supported, so we can assume for now that
				923	/// every function is a kernel function, but in the future we should use
				924	/// separate calling conventions for kernel and non-kernel functions.
				925	SDValue R600TargetLowering::LowerFormalArguments(
				926	SDValue Chain,
				927	CallingConv::ID CallConv,
				928	bool isVarArg,
				929	const SmallVectorImpl<ISD::InputArg> &Ins,
				930	DebugLoc DL, SelectionDAG &DAG,
				931	SmallVectorImpl<SDValue> &InVals) const {
				932	unsigned ParamOffsetBytes = 36;
				933	Function::const_arg_iterator FuncArg =
				934	DAG.getMachineFunction().getFunction()->arg_begin();
				935	for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
				936	EVT VT = Ins[i].VT;
				937	Type *ArgType = FuncArg->getType();
				938	unsigned ArgSizeInBits = ArgType->isPointerTy() ?
				939	32 : ArgType->getPrimitiveSizeInBits();
				940	unsigned ArgBytes = ArgSizeInBits >> 3;
				941	EVT ArgVT;
				942	if (ArgSizeInBits < VT.getSizeInBits()) {
				943	assert(!ArgType->isFloatTy() &&
				944	"Extending floating point arguments not supported yet");
				945	ArgVT = MVT::getIntegerVT(ArgSizeInBits);
				946	} else {
				947	ArgVT = VT;
				948	}
				949	PointerType PtrTy = PointerType::get(VT.getTypeForEVT(DAG.getContext()),
				950	AMDGPUAS::PARAM_I_ADDRESS);
				951	SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
				952	DAG.getConstant(ParamOffsetBytes, MVT::i32),
				953	MachinePointerInfo(new Argument(PtrTy)),
				954	ArgVT, false, false, ArgBytes);
				955	InVals.push_back(Arg);
				956	ParamOffsetBytes += ArgBytes;
				957	}
				958	return Chain;
				959	}
				960
				961	EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
				962	if (!VT.isVector()) return MVT::i32;
				963	return VT.changeVectorElementTypeToInteger();
				964	}
				965
				966	//===----------------------------------------------------------------------===//
				967	// Custom DAG Optimizations
				968	//===----------------------------------------------------------------------===//
				969
				970	SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
				971	DAGCombinerInfo &DCI) const {
				972	SelectionDAG &DAG = DCI.DAG;
				973
				974	switch (N->getOpcode()) {
				975	// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
				976	case ISD::FP_ROUND: {
				977	SDValue Arg = N->getOperand(0);
				978	if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
				979	return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
				980	Arg.getOperand(0));
				981	}
				982	break;
				983	}
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	984	// Extract_vec (Build_vector) generated by custom lowering
				985	// also needs to be customly combined
				986	case ISD::EXTRACT_VECTOR_ELT: {
				987	SDValue Arg = N->getOperand(0);
				988	if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
				989	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
				990	unsigned Element = Const->getZExtValue();
				991	return Arg->getOperand(Element);
				992	}
				993	}
Tom Stellard	dd04c83	2013-01-31 22:11:53 +0000	[diff] [blame^]	994	if (Arg.getOpcode() == ISD::BITCAST &&
				995	Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
				996	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
				997	unsigned Element = Const->getZExtValue();
				998	return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
				999	Arg->getOperand(0).getOperand(Element));
				1000	}
				1001	}
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	1002	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1003	}
				1004	return SDValue();
				1005	}