Blame - llvm/lib/Target/R600/R600ISelLowering.cpp - toolchain/llvm-project

blob: 110dcc18876a9379a19cc300c47c6fecc3c963d8 [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief Custom DAG lowering for R600
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "R600ISelLowering.h"
				16	#include "R600Defines.h"
				17	#include "R600InstrInfo.h"
				18	#include "R600MachineFunctionInfo.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	19	#include "llvm/CodeGen/MachineInstrBuilder.h"
				20	#include "llvm/CodeGen/MachineRegisterInfo.h"
				21	#include "llvm/CodeGen/SelectionDAG.h"
Chandler Carruth	9fb823b	2013-01-02 11:36:10 +0000	[diff] [blame]	22	#include "llvm/IR/Argument.h"
				23	#include "llvm/IR/Function.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	24
				25	using namespace llvm;
				26
				27	R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
				28	AMDGPUTargetLowering(TM),
				29	TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
				30	setOperationAction(ISD::MUL, MVT::i64, Expand);
				31	addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
				32	addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
				33	addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
				34	addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
				35	computeRegisterProperties();
				36
				37	setOperationAction(ISD::FADD, MVT::v4f32, Expand);
				38	setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
				39	setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
				40	setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
				41
				42	setOperationAction(ISD::ADD, MVT::v4i32, Expand);
				43	setOperationAction(ISD::AND, MVT::v4i32, Expand);
Tom Stellard	a8b0351	2012-12-21 16:33:24 +0000	[diff] [blame]	44	setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
				45	setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
				46	setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
				47	setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	48	setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
				49	setOperationAction(ISD::UREM, MVT::v4i32, Expand);
				50	setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
				51
				52	setOperationAction(ISD::BR_CC, MVT::i32, Custom);
				53	setOperationAction(ISD::BR_CC, MVT::f32, Custom);
				54
				55	setOperationAction(ISD::FSUB, MVT::f32, Expand);
				56
				57	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
				58	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				59	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
				60	setOperationAction(ISD::FPOW, MVT::f32, Custom);
				61
				62	setOperationAction(ISD::ROTL, MVT::i32, Custom);
				63
				64	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
				65	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
				66
				67	setOperationAction(ISD::SETCC, MVT::i32, Custom);
				68	setOperationAction(ISD::SETCC, MVT::f32, Custom);
				69	setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
				70
				71	setOperationAction(ISD::SELECT, MVT::i32, Custom);
				72	setOperationAction(ISD::SELECT, MVT::f32, Custom);
				73
				74	setOperationAction(ISD::STORE, MVT::i32, Custom);
				75	setOperationAction(ISD::STORE, MVT::v4i32, Custom);
				76
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	77	setOperationAction(ISD::LOAD, MVT::i32, Custom);
				78	setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	79	setTargetDAGCombine(ISD::FP_ROUND);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	80	setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	81
				82	setSchedulingPreference(Sched::VLIW);
				83	}
				84
				85	MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
				86	MachineInstr * MI, MachineBasicBlock * BB) const {
				87	MachineFunction * MF = BB->getParent();
				88	MachineRegisterInfo &MRI = MF->getRegInfo();
				89	MachineBasicBlock::iterator I = *MI;
				90
				91	switch (MI->getOpcode()) {
				92	default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
				93	case AMDGPU::SHADER_TYPE: break;
				94	case AMDGPU::CLAMP_R600: {
				95	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				96	AMDGPU::MOV,
				97	MI->getOperand(0).getReg(),
				98	MI->getOperand(1).getReg());
				99	TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
				100	break;
				101	}
				102
				103	case AMDGPU::FABS_R600: {
				104	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				105	AMDGPU::MOV,
				106	MI->getOperand(0).getReg(),
				107	MI->getOperand(1).getReg());
				108	TII->addFlag(NewMI, 0, MO_FLAG_ABS);
				109	break;
				110	}
				111
				112	case AMDGPU::FNEG_R600: {
				113	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				114	AMDGPU::MOV,
				115	MI->getOperand(0).getReg(),
				116	MI->getOperand(1).getReg());
				117	TII->addFlag(NewMI, 0, MO_FLAG_NEG);
				118	break;
				119	}
				120
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	121	case AMDGPU::MASK_WRITE: {
				122	unsigned maskedRegister = MI->getOperand(0).getReg();
				123	assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
				124	MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
				125	TII->addFlag(defInstr, 0, MO_FLAG_MASK);
				126	break;
				127	}
				128
				129	case AMDGPU::MOV_IMM_F32:
				130	TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
				131	MI->getOperand(1).getFPImm()->getValueAPF()
				132	.bitcastToAPInt().getZExtValue());
				133	break;
				134	case AMDGPU::MOV_IMM_I32:
				135	TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
				136	MI->getOperand(1).getImm());
				137	break;
				138
				139
				140	case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
				141	case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
				142	unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
				143
				144	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
				145	.addOperand(MI->getOperand(0))
				146	.addOperand(MI->getOperand(1))
				147	.addImm(EOP); // Set End of program bit
				148	break;
				149	}
				150
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	151	case AMDGPU::TXD: {
				152	unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				153	unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				154
				155	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
				156	.addOperand(MI->getOperand(3))
				157	.addOperand(MI->getOperand(4))
				158	.addOperand(MI->getOperand(5))
				159	.addOperand(MI->getOperand(6));
				160	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
				161	.addOperand(MI->getOperand(2))
				162	.addOperand(MI->getOperand(4))
				163	.addOperand(MI->getOperand(5))
				164	.addOperand(MI->getOperand(6));
				165	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
				166	.addOperand(MI->getOperand(0))
				167	.addOperand(MI->getOperand(1))
				168	.addOperand(MI->getOperand(4))
				169	.addOperand(MI->getOperand(5))
				170	.addOperand(MI->getOperand(6))
				171	.addReg(T0, RegState::Implicit)
				172	.addReg(T1, RegState::Implicit);
				173	break;
				174	}
				175
				176	case AMDGPU::TXD_SHADOW: {
				177	unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				178	unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				179
				180	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
				181	.addOperand(MI->getOperand(3))
				182	.addOperand(MI->getOperand(4))
				183	.addOperand(MI->getOperand(5))
				184	.addOperand(MI->getOperand(6));
				185	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
				186	.addOperand(MI->getOperand(2))
				187	.addOperand(MI->getOperand(4))
				188	.addOperand(MI->getOperand(5))
				189	.addOperand(MI->getOperand(6));
				190	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
				191	.addOperand(MI->getOperand(0))
				192	.addOperand(MI->getOperand(1))
				193	.addOperand(MI->getOperand(4))
				194	.addOperand(MI->getOperand(5))
				195	.addOperand(MI->getOperand(6))
				196	.addReg(T0, RegState::Implicit)
				197	.addReg(T1, RegState::Implicit);
				198	break;
				199	}
				200
				201	case AMDGPU::BRANCH:
				202	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				203	.addOperand(MI->getOperand(0))
				204	.addReg(0);
				205	break;
				206
				207	case AMDGPU::BRANCH_COND_f32: {
				208	MachineInstr *NewMI =
				209	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
				210	AMDGPU::PREDICATE_BIT)
				211	.addOperand(MI->getOperand(1))
				212	.addImm(OPCODE_IS_NOT_ZERO)
				213	.addImm(0); // Flags
				214	TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
				215	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				216	.addOperand(MI->getOperand(0))
				217	.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
				218	break;
				219	}
				220
				221	case AMDGPU::BRANCH_COND_i32: {
				222	MachineInstr *NewMI =
				223	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
				224	AMDGPU::PREDICATE_BIT)
				225	.addOperand(MI->getOperand(1))
				226	.addImm(OPCODE_IS_NOT_ZERO_INT)
				227	.addImm(0); // Flags
				228	TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
				229	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				230	.addOperand(MI->getOperand(0))
				231	.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
				232	break;
				233	}
				234
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	235	case AMDGPU::EG_ExportSwz:
				236	case AMDGPU::R600_ExportSwz: {
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	237	// Instruction is left unmodified if its not the last one of its type
				238	bool isLastInstructionOfItsType = true;
				239	unsigned InstExportType = MI->getOperand(1).getImm();
				240	for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
				241	EndBlock = BB->end(); NextExportInst != EndBlock;
				242	NextExportInst = llvm::next(NextExportInst)) {
				243	if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz \|\|
				244	NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
				245	unsigned CurrentInstExportType = NextExportInst->getOperand(1)
				246	.getImm();
				247	if (CurrentInstExportType == InstExportType) {
				248	isLastInstructionOfItsType = false;
				249	break;
				250	}
				251	}
				252	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	253	bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	254	if (!EOP && !isLastInstructionOfItsType)
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	255	return BB;
				256	unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
				257	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
				258	.addOperand(MI->getOperand(0))
				259	.addOperand(MI->getOperand(1))
				260	.addOperand(MI->getOperand(2))
				261	.addOperand(MI->getOperand(3))
				262	.addOperand(MI->getOperand(4))
				263	.addOperand(MI->getOperand(5))
				264	.addOperand(MI->getOperand(6))
				265	.addImm(CfInst)
Tom Stellard	6f1b865	2013-01-23 21:39:49 +0000	[diff] [blame]	266	.addImm(EOP);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	267	break;
				268	}
Jakob Stoklund Olesen	fdc3767	2013-02-05 17:53:52 +0000	[diff] [blame^]	269	case AMDGPU::RETURN: {
				270	// RETURN instructions must have the live-out registers as implicit uses,
				271	// otherwise they appear dead.
				272	R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
				273	MachineInstrBuilder MIB(*MF, MI);
				274	for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
				275	MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
				276	return BB;
				277	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	278	}
				279
				280	MI->eraseFromParent();
				281	return BB;
				282	}
				283
				284	//===----------------------------------------------------------------------===//
				285	// Custom DAG Lowering Operations
				286	//===----------------------------------------------------------------------===//
				287
				288	using namespace llvm::Intrinsic;
				289	using namespace llvm::AMDGPUIntrinsic;
				290
				291	static SDValue
				292	InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
				293	unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
				294	SDValue Scalar, SDValue Chain) {
				295	if (!ExportMap[Slot]) {
				296	SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
				297	DL, MVT::v4f32,
				298	DAG.getUNDEF(MVT::v4f32),
				299	Scalar,
				300	DAG.getConstant(Channel, MVT::i32));
				301
				302	unsigned Mask = 1 << Channel;
				303
				304	const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
				305	DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
				306	DAG.getConstant(Mask, MVT::i32)};
				307
				308	SDValue Res = DAG.getNode(
				309	AMDGPUISD::EXPORT,
				310	DL,
				311	MVT::Other,
				312	Ops, 6);
				313	ExportMap[Slot] = Res.getNode();
				314	return Res;
				315	}
				316
				317	SDNode ExportInstruction = (SDNode ) ExportMap[Slot] ;
				318	SDValue PreviousVector = ExportInstruction->getOperand(1);
				319	SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
				320	DL, MVT::v4f32,
				321	PreviousVector,
				322	Scalar,
				323	DAG.getConstant(Channel, MVT::i32));
				324
				325	unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
				326	->getZExtValue();
				327	Mask \|= (1 << Channel);
				328
				329	const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
				330	DAG.getConstant(Inst, MVT::i32),
				331	DAG.getConstant(Type, MVT::i32),
				332	DAG.getConstant(Slot, MVT::i32),
				333	DAG.getConstant(Mask, MVT::i32)};
				334
				335	DAG.UpdateNodeOperands(ExportInstruction,
				336	Ops, 6);
				337
				338	return Chain;
				339
				340	}
				341
				342	SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
				343	switch (Op.getOpcode()) {
				344	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
				345	case ISD::BR_CC: return LowerBR_CC(Op, DAG);
				346	case ISD::ROTL: return LowerROTL(Op, DAG);
				347	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
				348	case ISD::SELECT: return LowerSELECT(Op, DAG);
				349	case ISD::SETCC: return LowerSETCC(Op, DAG);
				350	case ISD::STORE: return LowerSTORE(Op, DAG);
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	351	case ISD::LOAD: return LowerLOAD(Op, DAG);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	352	case ISD::FPOW: return LowerFPOW(Op, DAG);
				353	case ISD::INTRINSIC_VOID: {
				354	SDValue Chain = Op.getOperand(0);
				355	unsigned IntrinsicID =
				356	cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				357	switch (IntrinsicID) {
				358	case AMDGPUIntrinsic::AMDGPU_store_output: {
				359	MachineFunction &MF = DAG.getMachineFunction();
Jakob Stoklund Olesen	fdc3767	2013-02-05 17:53:52 +0000	[diff] [blame^]	360	R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	361	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
				362	unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
Jakob Stoklund Olesen	fdc3767	2013-02-05 17:53:52 +0000	[diff] [blame^]	363	MFI->LiveOuts.push_back(Reg);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	364	return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
				365	}
				366	case AMDGPUIntrinsic::R600_store_pixel_color: {
				367	MachineFunction &MF = DAG.getMachineFunction();
				368	R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
				369	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
				370
				371	SDNode **OutputsMap = MFI->Outputs;
				372	return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
				373	RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
				374	Chain);
				375
				376	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	377
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	378	// default for switch(IntrinsicID)
				379	default: break;
				380	}
				381	// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
				382	break;
				383	}
				384	case ISD::INTRINSIC_WO_CHAIN: {
				385	unsigned IntrinsicID =
				386	cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
				387	EVT VT = Op.getValueType();
				388	DebugLoc DL = Op.getDebugLoc();
				389	switch(IntrinsicID) {
				390	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
				391	case AMDGPUIntrinsic::R600_load_input: {
				392	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				393	unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
				394	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
				395	}
Tom Stellard	41afe6a	2013-02-05 17:09:14 +0000	[diff] [blame]	396
				397	case AMDGPUIntrinsic::R600_interp_input: {
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	398	int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Tom Stellard	41afe6a	2013-02-05 17:09:14 +0000	[diff] [blame]	399	int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
				400	MachineSDNode *interp;
				401	if (ijb < 0) {
				402	interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
				403	MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
				404	return DAG.getTargetExtractSubreg(
				405	TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
				406	DL, MVT::f32, SDValue(interp, 0));
				407	}
				408
				409	if (slot % 4 < 2)
				410	interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
				411	MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
				412	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				413	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
				414	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				415	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
				416	else
				417	interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
				418	MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
				419	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				420	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32),
				421	CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				422	AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32));
				423
				424	return SDValue(interp, slot % 2);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	425	}
				426
				427	case r600_read_ngroups_x:
				428	return LowerImplicitParameter(DAG, VT, DL, 0);
				429	case r600_read_ngroups_y:
				430	return LowerImplicitParameter(DAG, VT, DL, 1);
				431	case r600_read_ngroups_z:
				432	return LowerImplicitParameter(DAG, VT, DL, 2);
				433	case r600_read_global_size_x:
				434	return LowerImplicitParameter(DAG, VT, DL, 3);
				435	case r600_read_global_size_y:
				436	return LowerImplicitParameter(DAG, VT, DL, 4);
				437	case r600_read_global_size_z:
				438	return LowerImplicitParameter(DAG, VT, DL, 5);
				439	case r600_read_local_size_x:
				440	return LowerImplicitParameter(DAG, VT, DL, 6);
				441	case r600_read_local_size_y:
				442	return LowerImplicitParameter(DAG, VT, DL, 7);
				443	case r600_read_local_size_z:
				444	return LowerImplicitParameter(DAG, VT, DL, 8);
				445
				446	case r600_read_tgid_x:
				447	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				448	AMDGPU::T1_X, VT);
				449	case r600_read_tgid_y:
				450	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				451	AMDGPU::T1_Y, VT);
				452	case r600_read_tgid_z:
				453	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				454	AMDGPU::T1_Z, VT);
				455	case r600_read_tidig_x:
				456	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				457	AMDGPU::T0_X, VT);
				458	case r600_read_tidig_y:
				459	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				460	AMDGPU::T0_Y, VT);
				461	case r600_read_tidig_z:
				462	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				463	AMDGPU::T0_Z, VT);
				464	}
				465	// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
				466	break;
				467	}
				468	} // end switch(Op.getOpcode())
				469	return SDValue();
				470	}
				471
				472	void R600TargetLowering::ReplaceNodeResults(SDNode *N,
				473	SmallVectorImpl<SDValue> &Results,
				474	SelectionDAG &DAG) const {
				475	switch (N->getOpcode()) {
				476	default: return;
				477	case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	478	return;
				479	case ISD::LOAD: {
				480	SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
				481	Results.push_back(SDValue(Node, 0));
				482	Results.push_back(SDValue(Node, 1));
				483	// XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
				484	// function
				485	DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
				486	return;
				487	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	488	}
				489	}
				490
				491	SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
				492	return DAG.getNode(
				493	ISD::SETCC,
				494	Op.getDebugLoc(),
				495	MVT::i1,
				496	Op, DAG.getConstantFP(0.0f, MVT::f32),
				497	DAG.getCondCode(ISD::SETNE)
				498	);
				499	}
				500
				501	SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
				502	SDValue Chain = Op.getOperand(0);
				503	SDValue CC = Op.getOperand(1);
				504	SDValue LHS = Op.getOperand(2);
				505	SDValue RHS = Op.getOperand(3);
				506	SDValue JumpT = Op.getOperand(4);
				507	SDValue CmpValue;
				508	SDValue Result;
				509
				510	if (LHS.getValueType() == MVT::i32) {
				511	CmpValue = DAG.getNode(
				512	ISD::SELECT_CC,
				513	Op.getDebugLoc(),
				514	MVT::i32,
				515	LHS, RHS,
				516	DAG.getConstant(-1, MVT::i32),
				517	DAG.getConstant(0, MVT::i32),
				518	CC);
				519	} else if (LHS.getValueType() == MVT::f32) {
				520	CmpValue = DAG.getNode(
				521	ISD::SELECT_CC,
				522	Op.getDebugLoc(),
				523	MVT::f32,
				524	LHS, RHS,
				525	DAG.getConstantFP(1.0f, MVT::f32),
				526	DAG.getConstantFP(0.0f, MVT::f32),
				527	CC);
				528	} else {
				529	assert(0 && "Not valid type for br_cc");
				530	}
				531	Result = DAG.getNode(
				532	AMDGPUISD::BRANCH_COND,
				533	CmpValue.getDebugLoc(),
				534	MVT::Other, Chain,
				535	JumpT, CmpValue);
				536	return Result;
				537	}
				538
				539	SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
				540	DebugLoc DL,
				541	unsigned DwordOffset) const {
				542	unsigned ByteOffset = DwordOffset * 4;
				543	PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
				544	AMDGPUAS::PARAM_I_ADDRESS);
				545
				546	// We shouldn't be using an offset wider than 16-bits for implicit parameters.
				547	assert(isInt<16>(ByteOffset));
				548
				549	return DAG.getLoad(VT, DL, DAG.getEntryNode(),
				550	DAG.getConstant(ByteOffset, MVT::i32), // PTR
				551	MachinePointerInfo(ConstantPointerNull::get(PtrType)),
				552	false, false, false, 0);
				553	}
				554
				555	SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
				556	DebugLoc DL = Op.getDebugLoc();
				557	EVT VT = Op.getValueType();
				558
				559	return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
				560	Op.getOperand(0),
				561	Op.getOperand(0),
				562	DAG.getNode(ISD::SUB, DL, VT,
				563	DAG.getConstant(32, MVT::i32),
				564	Op.getOperand(1)));
				565	}
				566
				567	bool R600TargetLowering::isZero(SDValue Op) const {
				568	if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
				569	return Cst->isNullValue();
				570	} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
				571	return CstFP->isZero();
				572	} else {
				573	return false;
				574	}
				575	}
				576
				577	SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
				578	DebugLoc DL = Op.getDebugLoc();
				579	EVT VT = Op.getValueType();
				580
				581	SDValue LHS = Op.getOperand(0);
				582	SDValue RHS = Op.getOperand(1);
				583	SDValue True = Op.getOperand(2);
				584	SDValue False = Op.getOperand(3);
				585	SDValue CC = Op.getOperand(4);
				586	SDValue Temp;
				587
				588	// LHS and RHS are guaranteed to be the same value type
				589	EVT CompareVT = LHS.getValueType();
				590
				591	// Check if we can lower this to a native operation.
				592
				593	// Try to lower to a CND* instruction:
				594	// CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
				595	// can be lowered to CND* instructions can also be lowered to SET*
				596	// instructions. CND* instructions are cheaper, because they dont't
				597	// require additional instructions to convert their result to the correct
				598	// value type, so this check should be first.
				599	if (isZero(LHS) \|\| isZero(RHS)) {
				600	SDValue Cond = (isZero(LHS) ? RHS : LHS);
				601	SDValue Zero = (isZero(LHS) ? LHS : RHS);
				602	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				603	if (CompareVT != VT) {
				604	// Bitcast True / False to the correct types. This will end up being
				605	// a nop, but it allows us to define only a single pattern in the
				606	// .TD files for each CND* instruction rather than having to have
				607	// one pattern for integer True/False and one for fp True/False
				608	True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
				609	False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
				610	}
				611	if (isZero(LHS)) {
				612	CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
				613	}
				614
				615	switch (CCOpcode) {
				616	case ISD::SETONE:
				617	case ISD::SETUNE:
				618	case ISD::SETNE:
				619	case ISD::SETULE:
				620	case ISD::SETULT:
				621	case ISD::SETOLE:
				622	case ISD::SETOLT:
				623	case ISD::SETLE:
				624	case ISD::SETLT:
				625	CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
				626	Temp = True;
				627	True = False;
				628	False = Temp;
				629	break;
				630	default:
				631	break;
				632	}
				633	SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				634	Cond, Zero,
				635	True, False,
				636	DAG.getCondCode(CCOpcode));
				637	return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
				638	}
				639
				640	// Try to lower to a SET* instruction:
				641	// We need all the operands of SELECT_CC to have the same value type, so if
				642	// necessary we need to change True and False to be the same type as LHS and
				643	// RHS, and then convert the result of the select_cc back to the correct type.
				644
				645	// Move hardware True/False values to the correct operand.
				646	if (isHWTrueValue(False) && isHWFalseValue(True)) {
				647	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				648	std::swap(False, True);
				649	CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
				650	}
				651
				652	if (isHWTrueValue(True) && isHWFalseValue(False)) {
				653	if (CompareVT != VT) {
				654	if (VT == MVT::f32 && CompareVT == MVT::i32) {
				655	SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				656	LHS, RHS,
				657	DAG.getConstant(-1, MVT::i32),
				658	DAG.getConstant(0, MVT::i32),
				659	CC);
				660	// Convert integer values of true (-1) and false (0) to fp values of
				661	// true (1.0f) and false (0.0f).
				662	SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
				663	DAG.getConstant(1, MVT::i32));
				664	return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
				665	} else if (VT == MVT::i32 && CompareVT == MVT::f32) {
				666	SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				667	LHS, RHS,
				668	DAG.getConstantFP(1.0f, MVT::f32),
				669	DAG.getConstantFP(0.0f, MVT::f32),
				670	CC);
				671	// Convert fp values of true (1.0f) and false (0.0f) to integer values
				672	// of true (-1) and false (0).
				673	SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
				674	return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
				675	} else {
				676	// I don't think there will be any other type pairings.
				677	assert(!"Unhandled operand type parings in SELECT_CC");
				678	}
				679	} else {
				680	// This SELECT_CC is already legal.
				681	return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
				682	}
				683	}
				684
				685	// Possible Min/Max pattern
				686	SDValue MinMax = LowerMinMax(Op, DAG);
				687	if (MinMax.getNode()) {
				688	return MinMax;
				689	}
				690
				691	// If we make it this for it means we have no native instructions to handle
				692	// this SELECT_CC, so we must lower it.
				693	SDValue HWTrue, HWFalse;
				694
				695	if (CompareVT == MVT::f32) {
				696	HWTrue = DAG.getConstantFP(1.0f, CompareVT);
				697	HWFalse = DAG.getConstantFP(0.0f, CompareVT);
				698	} else if (CompareVT == MVT::i32) {
				699	HWTrue = DAG.getConstant(-1, CompareVT);
				700	HWFalse = DAG.getConstant(0, CompareVT);
				701	}
				702	else {
				703	assert(!"Unhandled value type in LowerSELECT_CC");
				704	}
				705
				706	// Lower this unsupported SELECT_CC into a combination of two supported
				707	// SELECT_CC operations.
				708	SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
				709
				710	return DAG.getNode(ISD::SELECT_CC, DL, VT,
				711	Cond, HWFalse,
				712	True, False,
				713	DAG.getCondCode(ISD::SETNE));
				714	}
				715
				716	SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
				717	return DAG.getNode(ISD::SELECT_CC,
				718	Op.getDebugLoc(),
				719	Op.getValueType(),
				720	Op.getOperand(0),
				721	DAG.getConstant(0, MVT::i32),
				722	Op.getOperand(1),
				723	Op.getOperand(2),
				724	DAG.getCondCode(ISD::SETNE));
				725	}
				726
				727	SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
				728	SDValue Cond;
				729	SDValue LHS = Op.getOperand(0);
				730	SDValue RHS = Op.getOperand(1);
				731	SDValue CC = Op.getOperand(2);
				732	DebugLoc DL = Op.getDebugLoc();
				733	assert(Op.getValueType() == MVT::i32);
				734	if (LHS.getValueType() == MVT::i32) {
				735	Cond = DAG.getNode(
				736	ISD::SELECT_CC,
				737	Op.getDebugLoc(),
				738	MVT::i32,
				739	LHS, RHS,
				740	DAG.getConstant(-1, MVT::i32),
				741	DAG.getConstant(0, MVT::i32),
				742	CC);
				743	} else if (LHS.getValueType() == MVT::f32) {
				744	Cond = DAG.getNode(
				745	ISD::SELECT_CC,
				746	Op.getDebugLoc(),
				747	MVT::f32,
				748	LHS, RHS,
				749	DAG.getConstantFP(1.0f, MVT::f32),
				750	DAG.getConstantFP(0.0f, MVT::f32),
				751	CC);
				752	Cond = DAG.getNode(
				753	ISD::FP_TO_SINT,
				754	DL,
				755	MVT::i32,
				756	Cond);
				757	} else {
				758	assert(0 && "Not valid type for set_cc");
				759	}
				760	Cond = DAG.getNode(
				761	ISD::AND,
				762	DL,
				763	MVT::i32,
				764	DAG.getConstant(1, MVT::i32),
				765	Cond);
				766	return Cond;
				767	}
				768
				769	SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
				770	DebugLoc DL = Op.getDebugLoc();
				771	StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
				772	SDValue Chain = Op.getOperand(0);
				773	SDValue Value = Op.getOperand(1);
				774	SDValue Ptr = Op.getOperand(2);
				775
				776	if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
				777	Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
				778	// Convert pointer from byte address to dword address.
				779	Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
				780	DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
				781	Ptr, DAG.getConstant(2, MVT::i32)));
				782
				783	if (StoreNode->isTruncatingStore() \|\| StoreNode->isIndexed()) {
				784	assert(!"Truncated and indexed stores not supported yet");
				785	} else {
				786	Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
				787	}
				788	return Chain;
				789	}
				790	return SDValue();
				791	}
				792
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	793	// return (512 + (kc_bank << 12)
				794	static int
				795	ConstantAddressBlock(unsigned AddressSpace) {
				796	switch (AddressSpace) {
				797	case AMDGPUAS::CONSTANT_BUFFER_0:
				798	return 512;
				799	case AMDGPUAS::CONSTANT_BUFFER_1:
				800	return 512 + 4096;
				801	case AMDGPUAS::CONSTANT_BUFFER_2:
				802	return 512 + 4096 * 2;
				803	case AMDGPUAS::CONSTANT_BUFFER_3:
				804	return 512 + 4096 * 3;
				805	case AMDGPUAS::CONSTANT_BUFFER_4:
				806	return 512 + 4096 * 4;
				807	case AMDGPUAS::CONSTANT_BUFFER_5:
				808	return 512 + 4096 * 5;
				809	case AMDGPUAS::CONSTANT_BUFFER_6:
				810	return 512 + 4096 * 6;
				811	case AMDGPUAS::CONSTANT_BUFFER_7:
				812	return 512 + 4096 * 7;
				813	case AMDGPUAS::CONSTANT_BUFFER_8:
				814	return 512 + 4096 * 8;
				815	case AMDGPUAS::CONSTANT_BUFFER_9:
				816	return 512 + 4096 * 9;
				817	case AMDGPUAS::CONSTANT_BUFFER_10:
				818	return 512 + 4096 * 10;
				819	case AMDGPUAS::CONSTANT_BUFFER_11:
				820	return 512 + 4096 * 11;
				821	case AMDGPUAS::CONSTANT_BUFFER_12:
				822	return 512 + 4096 * 12;
				823	case AMDGPUAS::CONSTANT_BUFFER_13:
				824	return 512 + 4096 * 13;
				825	case AMDGPUAS::CONSTANT_BUFFER_14:
				826	return 512 + 4096 * 14;
				827	case AMDGPUAS::CONSTANT_BUFFER_15:
				828	return 512 + 4096 * 15;
				829	default:
				830	return -1;
				831	}
				832	}
				833
				834	SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
				835	{
				836	EVT VT = Op.getValueType();
				837	DebugLoc DL = Op.getDebugLoc();
				838	LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
				839	SDValue Chain = Op.getOperand(0);
				840	SDValue Ptr = Op.getOperand(1);
				841	SDValue LoweredLoad;
				842
				843	int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
				844	if (ConstantBlock > -1) {
				845	SDValue Result;
				846	if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) \|\|
				847	dyn_cast<Constant>(LoadNode->getSrcValue())) {
				848	SDValue Slots[4];
				849	for (unsigned i = 0; i < 4; i++) {
				850	// We want Const position encoded with the following formula :
				851	// (((512 + (kc_bank << 12) + const_index) << 2) + chan)
				852	// const_index is Ptr computed by llvm using an alignment of 16.
				853	// Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
				854	// then div by 4 at the ISel step
				855	SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
				856	DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
				857	Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
				858	}
				859	Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
				860	} else {
				861	// non constant ptr cant be folded, keeps it as a v4f32 load
				862	Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
				863	DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
				864	);
				865	}
				866
				867	if (!VT.isVector()) {
				868	Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
				869	DAG.getConstant(0, MVT::i32));
				870	}
				871
				872	SDValue MergedValues[2] = {
				873	Result,
				874	Chain
				875	};
				876	return DAG.getMergeValues(MergedValues, 2, DL);
				877	}
				878
				879	return SDValue();
				880	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	881
				882	SDValue R600TargetLowering::LowerFPOW(SDValue Op,
				883	SelectionDAG &DAG) const {
				884	DebugLoc DL = Op.getDebugLoc();
				885	EVT VT = Op.getValueType();
				886	SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
				887	SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
				888	return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
				889	}
				890
				891	/// XXX Only kernel functions are supported, so we can assume for now that
				892	/// every function is a kernel function, but in the future we should use
				893	/// separate calling conventions for kernel and non-kernel functions.
				894	SDValue R600TargetLowering::LowerFormalArguments(
				895	SDValue Chain,
				896	CallingConv::ID CallConv,
				897	bool isVarArg,
				898	const SmallVectorImpl<ISD::InputArg> &Ins,
				899	DebugLoc DL, SelectionDAG &DAG,
				900	SmallVectorImpl<SDValue> &InVals) const {
				901	unsigned ParamOffsetBytes = 36;
				902	Function::const_arg_iterator FuncArg =
				903	DAG.getMachineFunction().getFunction()->arg_begin();
				904	for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
				905	EVT VT = Ins[i].VT;
				906	Type *ArgType = FuncArg->getType();
				907	unsigned ArgSizeInBits = ArgType->isPointerTy() ?
				908	32 : ArgType->getPrimitiveSizeInBits();
				909	unsigned ArgBytes = ArgSizeInBits >> 3;
				910	EVT ArgVT;
				911	if (ArgSizeInBits < VT.getSizeInBits()) {
				912	assert(!ArgType->isFloatTy() &&
				913	"Extending floating point arguments not supported yet");
				914	ArgVT = MVT::getIntegerVT(ArgSizeInBits);
				915	} else {
				916	ArgVT = VT;
				917	}
				918	PointerType PtrTy = PointerType::get(VT.getTypeForEVT(DAG.getContext()),
				919	AMDGPUAS::PARAM_I_ADDRESS);
				920	SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
				921	DAG.getConstant(ParamOffsetBytes, MVT::i32),
				922	MachinePointerInfo(new Argument(PtrTy)),
				923	ArgVT, false, false, ArgBytes);
				924	InVals.push_back(Arg);
				925	ParamOffsetBytes += ArgBytes;
				926	}
				927	return Chain;
				928	}
				929
				930	EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
				931	if (!VT.isVector()) return MVT::i32;
				932	return VT.changeVectorElementTypeToInteger();
				933	}
				934
				935	//===----------------------------------------------------------------------===//
				936	// Custom DAG Optimizations
				937	//===----------------------------------------------------------------------===//
				938
				939	SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
				940	DAGCombinerInfo &DCI) const {
				941	SelectionDAG &DAG = DCI.DAG;
				942
				943	switch (N->getOpcode()) {
				944	// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
				945	case ISD::FP_ROUND: {
				946	SDValue Arg = N->getOperand(0);
				947	if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
				948	return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
				949	Arg.getOperand(0));
				950	}
				951	break;
				952	}
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	953	// Extract_vec (Build_vector) generated by custom lowering
				954	// also needs to be customly combined
				955	case ISD::EXTRACT_VECTOR_ELT: {
				956	SDValue Arg = N->getOperand(0);
				957	if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
				958	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
				959	unsigned Element = Const->getZExtValue();
				960	return Arg->getOperand(Element);
				961	}
				962	}
Tom Stellard	dd04c83	2013-01-31 22:11:53 +0000	[diff] [blame]	963	if (Arg.getOpcode() == ISD::BITCAST &&
				964	Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
				965	if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
				966	unsigned Element = Const->getZExtValue();
				967	return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(),
				968	Arg->getOperand(0).getOperand(Element));
				969	}
				970	}
Tom Stellard	365366f	2013-01-23 02:09:06 +0000	[diff] [blame]	971	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	972	}
				973	return SDValue();
				974	}