Blame - llvm/lib/Target/R600/R600ISelLowering.cpp - toolchain/llvm-project

blob: eaeff4ec217e1aa6ef3fa24e88401e279c57c8e9 [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame^]	1	//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief Custom DAG lowering for R600
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "R600ISelLowering.h"
				16	#include "R600Defines.h"
				17	#include "R600InstrInfo.h"
				18	#include "R600MachineFunctionInfo.h"
				19	#include "llvm/Argument.h"
				20	#include "llvm/Function.h"
				21	#include "llvm/CodeGen/MachineInstrBuilder.h"
				22	#include "llvm/CodeGen/MachineRegisterInfo.h"
				23	#include "llvm/CodeGen/SelectionDAG.h"
				24
				25	using namespace llvm;
				26
				27	R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
				28	AMDGPUTargetLowering(TM),
				29	TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) {
				30	setOperationAction(ISD::MUL, MVT::i64, Expand);
				31	addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
				32	addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
				33	addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
				34	addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
				35	computeRegisterProperties();
				36
				37	setOperationAction(ISD::FADD, MVT::v4f32, Expand);
				38	setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
				39	setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
				40	setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
				41
				42	setOperationAction(ISD::ADD, MVT::v4i32, Expand);
				43	setOperationAction(ISD::AND, MVT::v4i32, Expand);
				44	setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
				45	setOperationAction(ISD::UREM, MVT::v4i32, Expand);
				46	setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
				47
				48	setOperationAction(ISD::BR_CC, MVT::i32, Custom);
				49	setOperationAction(ISD::BR_CC, MVT::f32, Custom);
				50
				51	setOperationAction(ISD::FSUB, MVT::f32, Expand);
				52
				53	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
				54	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
				55	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
				56	setOperationAction(ISD::FPOW, MVT::f32, Custom);
				57
				58	setOperationAction(ISD::ROTL, MVT::i32, Custom);
				59
				60	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
				61	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
				62
				63	setOperationAction(ISD::SETCC, MVT::i32, Custom);
				64	setOperationAction(ISD::SETCC, MVT::f32, Custom);
				65	setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
				66
				67	setOperationAction(ISD::SELECT, MVT::i32, Custom);
				68	setOperationAction(ISD::SELECT, MVT::f32, Custom);
				69
				70	setOperationAction(ISD::STORE, MVT::i32, Custom);
				71	setOperationAction(ISD::STORE, MVT::v4i32, Custom);
				72
				73	setTargetDAGCombine(ISD::FP_ROUND);
				74
				75	setSchedulingPreference(Sched::VLIW);
				76	}
				77
				78	MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
				79	MachineInstr * MI, MachineBasicBlock * BB) const {
				80	MachineFunction * MF = BB->getParent();
				81	MachineRegisterInfo &MRI = MF->getRegInfo();
				82	MachineBasicBlock::iterator I = *MI;
				83
				84	switch (MI->getOpcode()) {
				85	default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
				86	case AMDGPU::SHADER_TYPE: break;
				87	case AMDGPU::CLAMP_R600: {
				88	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				89	AMDGPU::MOV,
				90	MI->getOperand(0).getReg(),
				91	MI->getOperand(1).getReg());
				92	TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
				93	break;
				94	}
				95
				96	case AMDGPU::FABS_R600: {
				97	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				98	AMDGPU::MOV,
				99	MI->getOperand(0).getReg(),
				100	MI->getOperand(1).getReg());
				101	TII->addFlag(NewMI, 0, MO_FLAG_ABS);
				102	break;
				103	}
				104
				105	case AMDGPU::FNEG_R600: {
				106	MachineInstr NewMI = TII->buildDefaultInstruction(BB, I,
				107	AMDGPU::MOV,
				108	MI->getOperand(0).getReg(),
				109	MI->getOperand(1).getReg());
				110	TII->addFlag(NewMI, 0, MO_FLAG_NEG);
				111	break;
				112	}
				113
				114	case AMDGPU::R600_LOAD_CONST: {
				115	int64_t RegIndex = MI->getOperand(1).getImm();
				116	unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
				117	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
				118	.addOperand(MI->getOperand(0))
				119	.addReg(ConstantReg);
				120	break;
				121	}
				122
				123	case AMDGPU::MASK_WRITE: {
				124	unsigned maskedRegister = MI->getOperand(0).getReg();
				125	assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
				126	MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
				127	TII->addFlag(defInstr, 0, MO_FLAG_MASK);
				128	break;
				129	}
				130
				131	case AMDGPU::MOV_IMM_F32:
				132	TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
				133	MI->getOperand(1).getFPImm()->getValueAPF()
				134	.bitcastToAPInt().getZExtValue());
				135	break;
				136	case AMDGPU::MOV_IMM_I32:
				137	TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
				138	MI->getOperand(1).getImm());
				139	break;
				140
				141
				142	case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
				143	case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
				144	unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
				145
				146	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
				147	.addOperand(MI->getOperand(0))
				148	.addOperand(MI->getOperand(1))
				149	.addImm(EOP); // Set End of program bit
				150	break;
				151	}
				152
				153	case AMDGPU::RESERVE_REG: {
				154	R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
				155	int64_t ReservedIndex = MI->getOperand(0).getImm();
				156	unsigned ReservedReg =
				157	AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
				158	MFI->ReservedRegs.push_back(ReservedReg);
				159	unsigned SuperReg =
				160	AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4);
				161	MFI->ReservedRegs.push_back(SuperReg);
				162	break;
				163	}
				164
				165	case AMDGPU::TXD: {
				166	unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				167	unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				168
				169	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
				170	.addOperand(MI->getOperand(3))
				171	.addOperand(MI->getOperand(4))
				172	.addOperand(MI->getOperand(5))
				173	.addOperand(MI->getOperand(6));
				174	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
				175	.addOperand(MI->getOperand(2))
				176	.addOperand(MI->getOperand(4))
				177	.addOperand(MI->getOperand(5))
				178	.addOperand(MI->getOperand(6));
				179	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
				180	.addOperand(MI->getOperand(0))
				181	.addOperand(MI->getOperand(1))
				182	.addOperand(MI->getOperand(4))
				183	.addOperand(MI->getOperand(5))
				184	.addOperand(MI->getOperand(6))
				185	.addReg(T0, RegState::Implicit)
				186	.addReg(T1, RegState::Implicit);
				187	break;
				188	}
				189
				190	case AMDGPU::TXD_SHADOW: {
				191	unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				192	unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
				193
				194	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
				195	.addOperand(MI->getOperand(3))
				196	.addOperand(MI->getOperand(4))
				197	.addOperand(MI->getOperand(5))
				198	.addOperand(MI->getOperand(6));
				199	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
				200	.addOperand(MI->getOperand(2))
				201	.addOperand(MI->getOperand(4))
				202	.addOperand(MI->getOperand(5))
				203	.addOperand(MI->getOperand(6));
				204	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
				205	.addOperand(MI->getOperand(0))
				206	.addOperand(MI->getOperand(1))
				207	.addOperand(MI->getOperand(4))
				208	.addOperand(MI->getOperand(5))
				209	.addOperand(MI->getOperand(6))
				210	.addReg(T0, RegState::Implicit)
				211	.addReg(T1, RegState::Implicit);
				212	break;
				213	}
				214
				215	case AMDGPU::BRANCH:
				216	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				217	.addOperand(MI->getOperand(0))
				218	.addReg(0);
				219	break;
				220
				221	case AMDGPU::BRANCH_COND_f32: {
				222	MachineInstr *NewMI =
				223	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
				224	AMDGPU::PREDICATE_BIT)
				225	.addOperand(MI->getOperand(1))
				226	.addImm(OPCODE_IS_NOT_ZERO)
				227	.addImm(0); // Flags
				228	TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
				229	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				230	.addOperand(MI->getOperand(0))
				231	.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
				232	break;
				233	}
				234
				235	case AMDGPU::BRANCH_COND_i32: {
				236	MachineInstr *NewMI =
				237	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
				238	AMDGPU::PREDICATE_BIT)
				239	.addOperand(MI->getOperand(1))
				240	.addImm(OPCODE_IS_NOT_ZERO_INT)
				241	.addImm(0); // Flags
				242	TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
				243	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
				244	.addOperand(MI->getOperand(0))
				245	.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
				246	break;
				247	}
				248
				249	case AMDGPU::input_perspective: {
				250	R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
				251
				252	// XXX Be more fine about register reservation
				253	for (unsigned i = 0; i < 4; i ++) {
				254	unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
				255	MFI->ReservedRegs.push_back(ReservedReg);
				256	}
				257
				258	switch (MI->getOperand(1).getImm()) {
				259	case 0:// Perspective
				260	MFI->HasPerspectiveInterpolation = true;
				261	break;
				262	case 1:// Linear
				263	MFI->HasLinearInterpolation = true;
				264	break;
				265	default:
				266	assert(0 && "Unknow ij index");
				267	}
				268
				269	return BB;
				270	}
				271
				272	case AMDGPU::EG_ExportSwz:
				273	case AMDGPU::R600_ExportSwz: {
				274	bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
				275	if (!EOP)
				276	return BB;
				277	unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
				278	BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
				279	.addOperand(MI->getOperand(0))
				280	.addOperand(MI->getOperand(1))
				281	.addOperand(MI->getOperand(2))
				282	.addOperand(MI->getOperand(3))
				283	.addOperand(MI->getOperand(4))
				284	.addOperand(MI->getOperand(5))
				285	.addOperand(MI->getOperand(6))
				286	.addImm(CfInst)
				287	.addImm(1);
				288	break;
				289	}
				290	}
				291
				292	MI->eraseFromParent();
				293	return BB;
				294	}
				295
				296	//===----------------------------------------------------------------------===//
				297	// Custom DAG Lowering Operations
				298	//===----------------------------------------------------------------------===//
				299
				300	using namespace llvm::Intrinsic;
				301	using namespace llvm::AMDGPUIntrinsic;
				302
				303	static SDValue
				304	InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap,
				305	unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type,
				306	SDValue Scalar, SDValue Chain) {
				307	if (!ExportMap[Slot]) {
				308	SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
				309	DL, MVT::v4f32,
				310	DAG.getUNDEF(MVT::v4f32),
				311	Scalar,
				312	DAG.getConstant(Channel, MVT::i32));
				313
				314	unsigned Mask = 1 << Channel;
				315
				316	const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32),
				317	DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32),
				318	DAG.getConstant(Mask, MVT::i32)};
				319
				320	SDValue Res = DAG.getNode(
				321	AMDGPUISD::EXPORT,
				322	DL,
				323	MVT::Other,
				324	Ops, 6);
				325	ExportMap[Slot] = Res.getNode();
				326	return Res;
				327	}
				328
				329	SDNode ExportInstruction = (SDNode ) ExportMap[Slot] ;
				330	SDValue PreviousVector = ExportInstruction->getOperand(1);
				331	SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT,
				332	DL, MVT::v4f32,
				333	PreviousVector,
				334	Scalar,
				335	DAG.getConstant(Channel, MVT::i32));
				336
				337	unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5))
				338	->getZExtValue();
				339	Mask \|= (1 << Channel);
				340
				341	const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector,
				342	DAG.getConstant(Inst, MVT::i32),
				343	DAG.getConstant(Type, MVT::i32),
				344	DAG.getConstant(Slot, MVT::i32),
				345	DAG.getConstant(Mask, MVT::i32)};
				346
				347	DAG.UpdateNodeOperands(ExportInstruction,
				348	Ops, 6);
				349
				350	return Chain;
				351
				352	}
				353
				354	SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
				355	switch (Op.getOpcode()) {
				356	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
				357	case ISD::BR_CC: return LowerBR_CC(Op, DAG);
				358	case ISD::ROTL: return LowerROTL(Op, DAG);
				359	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
				360	case ISD::SELECT: return LowerSELECT(Op, DAG);
				361	case ISD::SETCC: return LowerSETCC(Op, DAG);
				362	case ISD::STORE: return LowerSTORE(Op, DAG);
				363	case ISD::FPOW: return LowerFPOW(Op, DAG);
				364	case ISD::INTRINSIC_VOID: {
				365	SDValue Chain = Op.getOperand(0);
				366	unsigned IntrinsicID =
				367	cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				368	switch (IntrinsicID) {
				369	case AMDGPUIntrinsic::AMDGPU_store_output: {
				370	MachineFunction &MF = DAG.getMachineFunction();
				371	MachineRegisterInfo &MRI = MF.getRegInfo();
				372	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
				373	unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
				374	if (!MRI.isLiveOut(Reg)) {
				375	MRI.addLiveOut(Reg);
				376	}
				377	return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
				378	}
				379	case AMDGPUIntrinsic::R600_store_pixel_color: {
				380	MachineFunction &MF = DAG.getMachineFunction();
				381	R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
				382	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
				383
				384	SDNode **OutputsMap = MFI->Outputs;
				385	return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
				386	RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2),
				387	Chain);
				388
				389	}
				390	case AMDGPUIntrinsic::R600_store_stream_output : {
				391	MachineFunction &MF = DAG.getMachineFunction();
				392	R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
				393	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
				394	int64_t BufIndex = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
				395
				396	SDNode **OutputsMap = MFI->StreamOutputs[BufIndex];
				397	unsigned Inst;
				398	switch (cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue() ) {
				399	// STREAM3
				400	case 3:
				401	Inst = 4;
				402	break;
				403	// STREAM2
				404	case 2:
				405	Inst = 3;
				406	break;
				407	// STREAM1
				408	case 1:
				409	Inst = 2;
				410	break;
				411	// STREAM0
				412	case 0:
				413	Inst = 1;
				414	break;
				415	default:
				416	assert(0 && "Wrong buffer id for stream outputs !");
				417	}
				418
				419	return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap,
				420	RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2),
				421	Chain);
				422	}
				423	// default for switch(IntrinsicID)
				424	default: break;
				425	}
				426	// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
				427	break;
				428	}
				429	case ISD::INTRINSIC_WO_CHAIN: {
				430	unsigned IntrinsicID =
				431	cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
				432	EVT VT = Op.getValueType();
				433	DebugLoc DL = Op.getDebugLoc();
				434	switch(IntrinsicID) {
				435	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
				436	case AMDGPUIntrinsic::R600_load_input: {
				437	int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				438	unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
				439	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
				440	}
				441	case AMDGPUIntrinsic::R600_load_input_perspective: {
				442	int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				443	if (slot < 0)
				444	return DAG.getUNDEF(MVT::f32);
				445	SDValue FullVector = DAG.getNode(
				446	AMDGPUISD::INTERP,
				447	DL, MVT::v4f32,
				448	DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
				449	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
				450	DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
				451	}
				452	case AMDGPUIntrinsic::R600_load_input_linear: {
				453	int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				454	if (slot < 0)
				455	return DAG.getUNDEF(MVT::f32);
				456	SDValue FullVector = DAG.getNode(
				457	AMDGPUISD::INTERP,
				458	DL, MVT::v4f32,
				459	DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
				460	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
				461	DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
				462	}
				463	case AMDGPUIntrinsic::R600_load_input_constant: {
				464	int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
				465	if (slot < 0)
				466	return DAG.getUNDEF(MVT::f32);
				467	SDValue FullVector = DAG.getNode(
				468	AMDGPUISD::INTERP_P0,
				469	DL, MVT::v4f32,
				470	DAG.getConstant(slot / 4 , MVT::i32));
				471	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
				472	DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
				473	}
				474
				475	case r600_read_ngroups_x:
				476	return LowerImplicitParameter(DAG, VT, DL, 0);
				477	case r600_read_ngroups_y:
				478	return LowerImplicitParameter(DAG, VT, DL, 1);
				479	case r600_read_ngroups_z:
				480	return LowerImplicitParameter(DAG, VT, DL, 2);
				481	case r600_read_global_size_x:
				482	return LowerImplicitParameter(DAG, VT, DL, 3);
				483	case r600_read_global_size_y:
				484	return LowerImplicitParameter(DAG, VT, DL, 4);
				485	case r600_read_global_size_z:
				486	return LowerImplicitParameter(DAG, VT, DL, 5);
				487	case r600_read_local_size_x:
				488	return LowerImplicitParameter(DAG, VT, DL, 6);
				489	case r600_read_local_size_y:
				490	return LowerImplicitParameter(DAG, VT, DL, 7);
				491	case r600_read_local_size_z:
				492	return LowerImplicitParameter(DAG, VT, DL, 8);
				493
				494	case r600_read_tgid_x:
				495	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				496	AMDGPU::T1_X, VT);
				497	case r600_read_tgid_y:
				498	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				499	AMDGPU::T1_Y, VT);
				500	case r600_read_tgid_z:
				501	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				502	AMDGPU::T1_Z, VT);
				503	case r600_read_tidig_x:
				504	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				505	AMDGPU::T0_X, VT);
				506	case r600_read_tidig_y:
				507	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				508	AMDGPU::T0_Y, VT);
				509	case r600_read_tidig_z:
				510	return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
				511	AMDGPU::T0_Z, VT);
				512	}
				513	// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
				514	break;
				515	}
				516	} // end switch(Op.getOpcode())
				517	return SDValue();
				518	}
				519
				520	void R600TargetLowering::ReplaceNodeResults(SDNode *N,
				521	SmallVectorImpl<SDValue> &Results,
				522	SelectionDAG &DAG) const {
				523	switch (N->getOpcode()) {
				524	default: return;
				525	case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
				526	}
				527	}
				528
				529	SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
				530	return DAG.getNode(
				531	ISD::SETCC,
				532	Op.getDebugLoc(),
				533	MVT::i1,
				534	Op, DAG.getConstantFP(0.0f, MVT::f32),
				535	DAG.getCondCode(ISD::SETNE)
				536	);
				537	}
				538
				539	SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
				540	SDValue Chain = Op.getOperand(0);
				541	SDValue CC = Op.getOperand(1);
				542	SDValue LHS = Op.getOperand(2);
				543	SDValue RHS = Op.getOperand(3);
				544	SDValue JumpT = Op.getOperand(4);
				545	SDValue CmpValue;
				546	SDValue Result;
				547
				548	if (LHS.getValueType() == MVT::i32) {
				549	CmpValue = DAG.getNode(
				550	ISD::SELECT_CC,
				551	Op.getDebugLoc(),
				552	MVT::i32,
				553	LHS, RHS,
				554	DAG.getConstant(-1, MVT::i32),
				555	DAG.getConstant(0, MVT::i32),
				556	CC);
				557	} else if (LHS.getValueType() == MVT::f32) {
				558	CmpValue = DAG.getNode(
				559	ISD::SELECT_CC,
				560	Op.getDebugLoc(),
				561	MVT::f32,
				562	LHS, RHS,
				563	DAG.getConstantFP(1.0f, MVT::f32),
				564	DAG.getConstantFP(0.0f, MVT::f32),
				565	CC);
				566	} else {
				567	assert(0 && "Not valid type for br_cc");
				568	}
				569	Result = DAG.getNode(
				570	AMDGPUISD::BRANCH_COND,
				571	CmpValue.getDebugLoc(),
				572	MVT::Other, Chain,
				573	JumpT, CmpValue);
				574	return Result;
				575	}
				576
				577	SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
				578	DebugLoc DL,
				579	unsigned DwordOffset) const {
				580	unsigned ByteOffset = DwordOffset * 4;
				581	PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
				582	AMDGPUAS::PARAM_I_ADDRESS);
				583
				584	// We shouldn't be using an offset wider than 16-bits for implicit parameters.
				585	assert(isInt<16>(ByteOffset));
				586
				587	return DAG.getLoad(VT, DL, DAG.getEntryNode(),
				588	DAG.getConstant(ByteOffset, MVT::i32), // PTR
				589	MachinePointerInfo(ConstantPointerNull::get(PtrType)),
				590	false, false, false, 0);
				591	}
				592
				593	SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
				594	DebugLoc DL = Op.getDebugLoc();
				595	EVT VT = Op.getValueType();
				596
				597	return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
				598	Op.getOperand(0),
				599	Op.getOperand(0),
				600	DAG.getNode(ISD::SUB, DL, VT,
				601	DAG.getConstant(32, MVT::i32),
				602	Op.getOperand(1)));
				603	}
				604
				605	bool R600TargetLowering::isZero(SDValue Op) const {
				606	if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
				607	return Cst->isNullValue();
				608	} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
				609	return CstFP->isZero();
				610	} else {
				611	return false;
				612	}
				613	}
				614
				615	SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
				616	DebugLoc DL = Op.getDebugLoc();
				617	EVT VT = Op.getValueType();
				618
				619	SDValue LHS = Op.getOperand(0);
				620	SDValue RHS = Op.getOperand(1);
				621	SDValue True = Op.getOperand(2);
				622	SDValue False = Op.getOperand(3);
				623	SDValue CC = Op.getOperand(4);
				624	SDValue Temp;
				625
				626	// LHS and RHS are guaranteed to be the same value type
				627	EVT CompareVT = LHS.getValueType();
				628
				629	// Check if we can lower this to a native operation.
				630
				631	// Try to lower to a CND* instruction:
				632	// CND* instructions requires RHS to be zero. Some SELECT_CC nodes that
				633	// can be lowered to CND* instructions can also be lowered to SET*
				634	// instructions. CND* instructions are cheaper, because they dont't
				635	// require additional instructions to convert their result to the correct
				636	// value type, so this check should be first.
				637	if (isZero(LHS) \|\| isZero(RHS)) {
				638	SDValue Cond = (isZero(LHS) ? RHS : LHS);
				639	SDValue Zero = (isZero(LHS) ? LHS : RHS);
				640	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				641	if (CompareVT != VT) {
				642	// Bitcast True / False to the correct types. This will end up being
				643	// a nop, but it allows us to define only a single pattern in the
				644	// .TD files for each CND* instruction rather than having to have
				645	// one pattern for integer True/False and one for fp True/False
				646	True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
				647	False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
				648	}
				649	if (isZero(LHS)) {
				650	CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
				651	}
				652
				653	switch (CCOpcode) {
				654	case ISD::SETONE:
				655	case ISD::SETUNE:
				656	case ISD::SETNE:
				657	case ISD::SETULE:
				658	case ISD::SETULT:
				659	case ISD::SETOLE:
				660	case ISD::SETOLT:
				661	case ISD::SETLE:
				662	case ISD::SETLT:
				663	CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
				664	Temp = True;
				665	True = False;
				666	False = Temp;
				667	break;
				668	default:
				669	break;
				670	}
				671	SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				672	Cond, Zero,
				673	True, False,
				674	DAG.getCondCode(CCOpcode));
				675	return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
				676	}
				677
				678	// Try to lower to a SET* instruction:
				679	// We need all the operands of SELECT_CC to have the same value type, so if
				680	// necessary we need to change True and False to be the same type as LHS and
				681	// RHS, and then convert the result of the select_cc back to the correct type.
				682
				683	// Move hardware True/False values to the correct operand.
				684	if (isHWTrueValue(False) && isHWFalseValue(True)) {
				685	ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
				686	std::swap(False, True);
				687	CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
				688	}
				689
				690	if (isHWTrueValue(True) && isHWFalseValue(False)) {
				691	if (CompareVT != VT) {
				692	if (VT == MVT::f32 && CompareVT == MVT::i32) {
				693	SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				694	LHS, RHS,
				695	DAG.getConstant(-1, MVT::i32),
				696	DAG.getConstant(0, MVT::i32),
				697	CC);
				698	// Convert integer values of true (-1) and false (0) to fp values of
				699	// true (1.0f) and false (0.0f).
				700	SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean,
				701	DAG.getConstant(1, MVT::i32));
				702	return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB);
				703	} else if (VT == MVT::i32 && CompareVT == MVT::f32) {
				704	SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
				705	LHS, RHS,
				706	DAG.getConstantFP(1.0f, MVT::f32),
				707	DAG.getConstantFP(0.0f, MVT::f32),
				708	CC);
				709	// Convert fp values of true (1.0f) and false (0.0f) to integer values
				710	// of true (-1) and false (0).
				711	SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt);
				712	return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg);
				713	} else {
				714	// I don't think there will be any other type pairings.
				715	assert(!"Unhandled operand type parings in SELECT_CC");
				716	}
				717	} else {
				718	// This SELECT_CC is already legal.
				719	return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
				720	}
				721	}
				722
				723	// Possible Min/Max pattern
				724	SDValue MinMax = LowerMinMax(Op, DAG);
				725	if (MinMax.getNode()) {
				726	return MinMax;
				727	}
				728
				729	// If we make it this for it means we have no native instructions to handle
				730	// this SELECT_CC, so we must lower it.
				731	SDValue HWTrue, HWFalse;
				732
				733	if (CompareVT == MVT::f32) {
				734	HWTrue = DAG.getConstantFP(1.0f, CompareVT);
				735	HWFalse = DAG.getConstantFP(0.0f, CompareVT);
				736	} else if (CompareVT == MVT::i32) {
				737	HWTrue = DAG.getConstant(-1, CompareVT);
				738	HWFalse = DAG.getConstant(0, CompareVT);
				739	}
				740	else {
				741	assert(!"Unhandled value type in LowerSELECT_CC");
				742	}
				743
				744	// Lower this unsupported SELECT_CC into a combination of two supported
				745	// SELECT_CC operations.
				746	SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
				747
				748	return DAG.getNode(ISD::SELECT_CC, DL, VT,
				749	Cond, HWFalse,
				750	True, False,
				751	DAG.getCondCode(ISD::SETNE));
				752	}
				753
				754	SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
				755	return DAG.getNode(ISD::SELECT_CC,
				756	Op.getDebugLoc(),
				757	Op.getValueType(),
				758	Op.getOperand(0),
				759	DAG.getConstant(0, MVT::i32),
				760	Op.getOperand(1),
				761	Op.getOperand(2),
				762	DAG.getCondCode(ISD::SETNE));
				763	}
				764
				765	SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
				766	SDValue Cond;
				767	SDValue LHS = Op.getOperand(0);
				768	SDValue RHS = Op.getOperand(1);
				769	SDValue CC = Op.getOperand(2);
				770	DebugLoc DL = Op.getDebugLoc();
				771	assert(Op.getValueType() == MVT::i32);
				772	if (LHS.getValueType() == MVT::i32) {
				773	Cond = DAG.getNode(
				774	ISD::SELECT_CC,
				775	Op.getDebugLoc(),
				776	MVT::i32,
				777	LHS, RHS,
				778	DAG.getConstant(-1, MVT::i32),
				779	DAG.getConstant(0, MVT::i32),
				780	CC);
				781	} else if (LHS.getValueType() == MVT::f32) {
				782	Cond = DAG.getNode(
				783	ISD::SELECT_CC,
				784	Op.getDebugLoc(),
				785	MVT::f32,
				786	LHS, RHS,
				787	DAG.getConstantFP(1.0f, MVT::f32),
				788	DAG.getConstantFP(0.0f, MVT::f32),
				789	CC);
				790	Cond = DAG.getNode(
				791	ISD::FP_TO_SINT,
				792	DL,
				793	MVT::i32,
				794	Cond);
				795	} else {
				796	assert(0 && "Not valid type for set_cc");
				797	}
				798	Cond = DAG.getNode(
				799	ISD::AND,
				800	DL,
				801	MVT::i32,
				802	DAG.getConstant(1, MVT::i32),
				803	Cond);
				804	return Cond;
				805	}
				806
				807	SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
				808	DebugLoc DL = Op.getDebugLoc();
				809	StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
				810	SDValue Chain = Op.getOperand(0);
				811	SDValue Value = Op.getOperand(1);
				812	SDValue Ptr = Op.getOperand(2);
				813
				814	if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
				815	Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
				816	// Convert pointer from byte address to dword address.
				817	Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
				818	DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
				819	Ptr, DAG.getConstant(2, MVT::i32)));
				820
				821	if (StoreNode->isTruncatingStore() \|\| StoreNode->isIndexed()) {
				822	assert(!"Truncated and indexed stores not supported yet");
				823	} else {
				824	Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
				825	}
				826	return Chain;
				827	}
				828	return SDValue();
				829	}
				830
				831
				832	SDValue R600TargetLowering::LowerFPOW(SDValue Op,
				833	SelectionDAG &DAG) const {
				834	DebugLoc DL = Op.getDebugLoc();
				835	EVT VT = Op.getValueType();
				836	SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0));
				837	SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase);
				838	return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase);
				839	}
				840
				841	/// XXX Only kernel functions are supported, so we can assume for now that
				842	/// every function is a kernel function, but in the future we should use
				843	/// separate calling conventions for kernel and non-kernel functions.
				844	SDValue R600TargetLowering::LowerFormalArguments(
				845	SDValue Chain,
				846	CallingConv::ID CallConv,
				847	bool isVarArg,
				848	const SmallVectorImpl<ISD::InputArg> &Ins,
				849	DebugLoc DL, SelectionDAG &DAG,
				850	SmallVectorImpl<SDValue> &InVals) const {
				851	unsigned ParamOffsetBytes = 36;
				852	Function::const_arg_iterator FuncArg =
				853	DAG.getMachineFunction().getFunction()->arg_begin();
				854	for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
				855	EVT VT = Ins[i].VT;
				856	Type *ArgType = FuncArg->getType();
				857	unsigned ArgSizeInBits = ArgType->isPointerTy() ?
				858	32 : ArgType->getPrimitiveSizeInBits();
				859	unsigned ArgBytes = ArgSizeInBits >> 3;
				860	EVT ArgVT;
				861	if (ArgSizeInBits < VT.getSizeInBits()) {
				862	assert(!ArgType->isFloatTy() &&
				863	"Extending floating point arguments not supported yet");
				864	ArgVT = MVT::getIntegerVT(ArgSizeInBits);
				865	} else {
				866	ArgVT = VT;
				867	}
				868	PointerType PtrTy = PointerType::get(VT.getTypeForEVT(DAG.getContext()),
				869	AMDGPUAS::PARAM_I_ADDRESS);
				870	SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(),
				871	DAG.getConstant(ParamOffsetBytes, MVT::i32),
				872	MachinePointerInfo(new Argument(PtrTy)),
				873	ArgVT, false, false, ArgBytes);
				874	InVals.push_back(Arg);
				875	ParamOffsetBytes += ArgBytes;
				876	}
				877	return Chain;
				878	}
				879
				880	EVT R600TargetLowering::getSetCCResultType(EVT VT) const {
				881	if (!VT.isVector()) return MVT::i32;
				882	return VT.changeVectorElementTypeToInteger();
				883	}
				884
				885	//===----------------------------------------------------------------------===//
				886	// Custom DAG Optimizations
				887	//===----------------------------------------------------------------------===//
				888
				889	SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
				890	DAGCombinerInfo &DCI) const {
				891	SelectionDAG &DAG = DCI.DAG;
				892
				893	switch (N->getOpcode()) {
				894	// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
				895	case ISD::FP_ROUND: {
				896	SDValue Arg = N->getOperand(0);
				897	if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
				898	return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
				899	Arg.getOperand(0));
				900	}
				901	break;
				902	}
				903	}
				904	return SDValue();
				905	}