Blame - llvm/lib/Target/R600/SIISelLowering.cpp - toolchain/llvm-project

blob: 65d5479adcbbbc6d302ff081a7fd19d9eec88f22 [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	1	//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief Custom DAG lowering for SI
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "SIISelLowering.h"
Christian Konig	99ee0f4	2013-03-07 09:04:14 +0000	[diff] [blame]	16	#include "AMDGPU.h"
Benjamin Kramer	d78bb46	2013-05-23 17:10:37 +0000	[diff] [blame]	17	#include "AMDIL.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	18	#include "AMDILIntrinsicInfo.h"
				19	#include "SIInstrInfo.h"
				20	#include "SIMachineFunctionInfo.h"
				21	#include "SIRegisterInfo.h"
Christian Konig	2c8f6d5	2013-03-07 09:03:52 +0000	[diff] [blame]	22	#include "llvm/CodeGen/CallingConvLower.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	23	#include "llvm/CodeGen/MachineInstrBuilder.h"
				24	#include "llvm/CodeGen/MachineRegisterInfo.h"
				25	#include "llvm/CodeGen/SelectionDAG.h"
Benjamin Kramer	d78bb46	2013-05-23 17:10:37 +0000	[diff] [blame]	26	#include "llvm/IR/Function.h"
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	27
Tom Stellard	556d9aa	2013-06-03 17:39:37 +0000	[diff] [blame]	28	const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
				29
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	30	using namespace llvm;
				31
				32	SITargetLowering::SITargetLowering(TargetMachine &TM) :
				33	AMDGPUTargetLowering(TM),
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	34	TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())),
				35	TRI(TM.getRegisterInfo()) {
Christian Konig	2214f14	2013-03-07 09:03:38 +0000	[diff] [blame]	36
Christian Konig	a881179	2013-02-16 11:28:30 +0000	[diff] [blame]	37	addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
Christian Konig	2214f14	2013-03-07 09:03:38 +0000	[diff] [blame]	38	addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
				39
				40	addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass);
				41	addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
				42	addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
				43
				44	addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
				45	addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	46
Tom Stellard	538ceeb	2013-02-07 17:02:09 +0000	[diff] [blame]	47	addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass);
Christian Konig	2214f14	2013-03-07 09:03:38 +0000	[diff] [blame]	48
Tom Stellard	538ceeb	2013-02-07 17:02:09 +0000	[diff] [blame]	49	addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass);
Christian Konig	2214f14	2013-03-07 09:03:38 +0000	[diff] [blame]	50	addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
				51
Tom Stellard	538ceeb	2013-02-07 17:02:09 +0000	[diff] [blame]	52	addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
Christian Konig	2214f14	2013-03-07 09:03:38 +0000	[diff] [blame]	53	addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
Tom Stellard	754f80f	2013-04-05 23:31:51 +0000	[diff] [blame]	54	addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass);
Christian Konig	2214f14	2013-03-07 09:03:38 +0000	[diff] [blame]	55
Tom Stellard	538ceeb	2013-02-07 17:02:09 +0000	[diff] [blame]	56	addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
Christian Konig	2214f14	2013-03-07 09:03:38 +0000	[diff] [blame]	57	addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
				58
Tom Stellard	538ceeb	2013-02-07 17:02:09 +0000	[diff] [blame]	59	addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass);
Christian Konig	2214f14	2013-03-07 09:03:38 +0000	[diff] [blame]	60	addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	61
				62	computeRegisterProperties();
				63
Christian Konig	2989ffc	2013-03-18 11:34:16 +0000	[diff] [blame]	64	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
				65	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
				66	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
				67	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
				68
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	69	setOperationAction(ISD::ADD, MVT::i64, Legal);
				70	setOperationAction(ISD::ADD, MVT::i32, Legal);
				71
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	72	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
				73	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
				74
				75	setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
Tom Stellard	754f80f	2013-04-05 23:31:51 +0000	[diff] [blame]	76
Tom Stellard	046039e	2013-06-03 17:40:03 +0000	[diff] [blame]	77	setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
				78
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	79	setTargetDAGCombine(ISD::SELECT_CC);
				80
				81	setTargetDAGCombine(ISD::SETCC);
Michel Danzer	f52a672	2013-03-08 10:58:01 +0000	[diff] [blame]	82
Christian Konig	eecebd0	2013-03-26 14:04:02 +0000	[diff] [blame]	83	setSchedulingPreference(Sched::RegPressure);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	84	}
				85
Christian Konig	2c8f6d5	2013-03-07 09:03:52 +0000	[diff] [blame]	86	SDValue SITargetLowering::LowerFormalArguments(
				87	SDValue Chain,
				88	CallingConv::ID CallConv,
				89	bool isVarArg,
				90	const SmallVectorImpl<ISD::InputArg> &Ins,
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	91	SDLoc DL, SelectionDAG &DAG,
Christian Konig	2c8f6d5	2013-03-07 09:03:52 +0000	[diff] [blame]	92	SmallVectorImpl<SDValue> &InVals) const {
				93
				94	const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
				95
				96	MachineFunction &MF = DAG.getMachineFunction();
				97	FunctionType *FType = MF.getFunction()->getFunctionType();
Christian Konig	99ee0f4	2013-03-07 09:04:14 +0000	[diff] [blame]	98	SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Christian Konig	2c8f6d5	2013-03-07 09:03:52 +0000	[diff] [blame]	99
				100	assert(CallConv == CallingConv::C);
				101
				102	SmallVector<ISD::InputArg, 16> Splits;
Christian Konig	99ee0f4	2013-03-07 09:04:14 +0000	[diff] [blame]	103	uint32_t Skipped = 0;
				104
				105	for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
Christian Konig	2c8f6d5	2013-03-07 09:03:52 +0000	[diff] [blame]	106	const ISD::InputArg &Arg = Ins[i];
Matt Arsenault	75865923	2013-05-18 00:21:46 +0000	[diff] [blame]	107
				108	// First check if it's a PS input addr
Christian Konig	99ee0f4	2013-03-07 09:04:14 +0000	[diff] [blame]	109	if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) {
				110
				111	assert((PSInputNum <= 15) && "Too many PS inputs!");
				112
				113	if (!Arg.Used) {
				114	// We can savely skip PS inputs
				115	Skipped \|= 1 << i;
				116	++PSInputNum;
				117	continue;
				118	}
				119
				120	Info->PSInputAddr \|= 1 << PSInputNum++;
				121	}
				122
				123	// Second split vertices into their elements
Tom Stellard	ed882c2	2013-06-03 17:40:11 +0000	[diff] [blame^]	124	if (Info->ShaderType != ShaderType::COMPUTE && Arg.VT.isVector()) {
Christian Konig	2c8f6d5	2013-03-07 09:03:52 +0000	[diff] [blame]	125	ISD::InputArg NewArg = Arg;
				126	NewArg.Flags.setSplit();
				127	NewArg.VT = Arg.VT.getVectorElementType();
				128
				129	// We REALLY want the ORIGINAL number of vertex elements here, e.g. a
				130	// three or five element vertex only needs three or five registers,
				131	// NOT four or eigth.
				132	Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
				133	unsigned NumElements = ParamType->getVectorNumElements();
				134
				135	for (unsigned j = 0; j != NumElements; ++j) {
				136	Splits.push_back(NewArg);
				137	NewArg.PartOffset += NewArg.VT.getStoreSize();
				138	}
				139
				140	} else {
				141	Splits.push_back(Arg);
				142	}
				143	}
				144
				145	SmallVector<CCValAssign, 16> ArgLocs;
				146	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
				147	getTargetMachine(), ArgLocs, *DAG.getContext());
				148
Christian Konig	99ee0f4	2013-03-07 09:04:14 +0000	[diff] [blame]	149	// At least one interpolation mode must be enabled or else the GPU will hang.
				150	if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) {
				151	Info->PSInputAddr \|= 1;
				152	CCInfo.AllocateReg(AMDGPU::VGPR0);
				153	CCInfo.AllocateReg(AMDGPU::VGPR1);
				154	}
				155
Tom Stellard	ed882c2	2013-06-03 17:40:11 +0000	[diff] [blame^]	156	unsigned ArgReg = 0;
				157	// The pointer to the list of arguments is stored in SGPR0, SGPR1
				158	if (Info->ShaderType == ShaderType::COMPUTE) {
				159	CCInfo.AllocateReg(AMDGPU::SGPR0);
				160	CCInfo.AllocateReg(AMDGPU::SGPR1);
				161	ArgReg = MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass);
				162	}
				163
Christian Konig	2c8f6d5	2013-03-07 09:03:52 +0000	[diff] [blame]	164	AnalyzeFormalArguments(CCInfo, Splits);
				165
				166	for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
				167
Christian Konig	b7be72d	2013-05-17 09:46:48 +0000	[diff] [blame]	168	const ISD::InputArg &Arg = Ins[i];
Christian Konig	99ee0f4	2013-03-07 09:04:14 +0000	[diff] [blame]	169	if (Skipped & (1 << i)) {
Christian Konig	b7be72d	2013-05-17 09:46:48 +0000	[diff] [blame]	170	InVals.push_back(DAG.getUNDEF(Arg.VT));
Christian Konig	99ee0f4	2013-03-07 09:04:14 +0000	[diff] [blame]	171	continue;
				172	}
				173
Christian Konig	2c8f6d5	2013-03-07 09:03:52 +0000	[diff] [blame]	174	CCValAssign &VA = ArgLocs[ArgIdx++];
Tom Stellard	ed882c2	2013-06-03 17:40:11 +0000	[diff] [blame^]	175	EVT VT = VA.getLocVT();
				176
				177	if (VA.isMemLoc()) {
				178	assert(ArgReg);
				179	PointerType PtrTy = PointerType::get(VT.getTypeForEVT(DAG.getContext()),
				180	AMDGPUAS::CONSTANT_ADDRESS);
				181	EVT ArgVT = MVT::getIntegerVT(VT.getSizeInBits());
				182	SDValue BasePtr = DAG.getCopyFromReg(DAG.getRoot(), DL,
				183	ArgReg, MVT::i64);
				184	SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
				185	DAG.getConstant(VA.getLocMemOffset(), MVT::i64));
				186	SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(), Ptr,
				187	MachinePointerInfo(UndefValue::get(PtrTy)),
				188	VA.getValVT(), false, false, ArgVT.getSizeInBits() >> 3);
				189	InVals.push_back(Arg);
				190	continue;
				191	}
Christian Konig	2c8f6d5	2013-03-07 09:03:52 +0000	[diff] [blame]	192	assert(VA.isRegLoc() && "Parameter must be in a register!");
				193
				194	unsigned Reg = VA.getLocReg();
Christian Konig	2c8f6d5	2013-03-07 09:03:52 +0000	[diff] [blame]	195
				196	if (VT == MVT::i64) {
				197	// For now assume it is a pointer
				198	Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
				199	&AMDGPU::SReg_64RegClass);
				200	Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
				201	InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
				202	continue;
				203	}
				204
				205	const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
				206
				207	Reg = MF.addLiveIn(Reg, RC);
				208	SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
				209
Christian Konig	2c8f6d5	2013-03-07 09:03:52 +0000	[diff] [blame]	210	if (Arg.VT.isVector()) {
				211
				212	// Build a vector from the registers
				213	Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
				214	unsigned NumElements = ParamType->getVectorNumElements();
				215
				216	SmallVector<SDValue, 4> Regs;
				217	Regs.push_back(Val);
				218	for (unsigned j = 1; j != NumElements; ++j) {
				219	Reg = ArgLocs[ArgIdx++].getLocReg();
				220	Reg = MF.addLiveIn(Reg, RC);
				221	Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
				222	}
				223
				224	// Fill up the missing vector elements
				225	NumElements = Arg.VT.getVectorNumElements() - NumElements;
				226	for (unsigned j = 0; j != NumElements; ++j)
				227	Regs.push_back(DAG.getUNDEF(VT));
Matt Arsenault	75865923	2013-05-18 00:21:46 +0000	[diff] [blame]	228
Christian Konig	2c8f6d5	2013-03-07 09:03:52 +0000	[diff] [blame]	229	InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT,
				230	Regs.data(), Regs.size()));
				231	continue;
				232	}
				233
				234	InVals.push_back(Val);
				235	}
				236	return Chain;
				237	}
				238
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	239	MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
				240	MachineInstr * MI, MachineBasicBlock * BB) const {
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	241
Tom Stellard	556d9aa	2013-06-03 17:39:37 +0000	[diff] [blame]	242	MachineBasicBlock::iterator I = *MI;
				243
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	244	switch (MI->getOpcode()) {
				245	default:
				246	return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
				247	case AMDGPU::BRANCH: return BB;
Tom Stellard	556d9aa	2013-06-03 17:39:37 +0000	[diff] [blame]	248	case AMDGPU::SI_ADDR64_RSRC: {
				249	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
				250	unsigned SuperReg = MI->getOperand(0).getReg();
				251	unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				252	unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
				253	unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
				254	unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
				255	BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), SubRegLo)
				256	.addOperand(MI->getOperand(1));
				257	BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
				258	.addImm(0);
				259	BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
				260	.addImm(RSRC_DATA_FORMAT >> 32);
				261	BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
				262	.addReg(SubRegHiLo)
				263	.addImm(AMDGPU::sub0)
				264	.addReg(SubRegHiHi)
				265	.addImm(AMDGPU::sub1);
				266	BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SuperReg)
				267	.addReg(SubRegLo)
				268	.addImm(AMDGPU::sub0_sub1)
				269	.addReg(SubRegHi)
				270	.addImm(AMDGPU::sub2_sub3);
				271	MI->eraseFromParent();
				272	break;
				273	}
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	274	}
				275	return BB;
				276	}
				277
Matt Arsenault	75865923	2013-05-18 00:21:46 +0000	[diff] [blame]	278	EVT SITargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	279	return MVT::i1;
				280	}
				281
Christian Konig	082a14a	2013-03-18 11:34:05 +0000	[diff] [blame]	282	MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
				283	return MVT::i32;
				284	}
				285
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	286	//===----------------------------------------------------------------------===//
				287	// Custom DAG Lowering Operations
				288	//===----------------------------------------------------------------------===//
				289
				290	SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
				291	switch (Op.getOpcode()) {
				292	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
Tom Stellard	f879435	2012-12-19 22:10:31 +0000	[diff] [blame]	293	case ISD::BRCOND: return LowerBRCOND(Op, DAG);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	294	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
Tom Stellard	046039e	2013-06-03 17:40:03 +0000	[diff] [blame]	295	case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	296	}
				297	return SDValue();
				298	}
				299
Tom Stellard	f879435	2012-12-19 22:10:31 +0000	[diff] [blame]	300	/// \brief Helper function for LowerBRCOND
				301	static SDNode *findUser(SDValue Value, unsigned Opcode) {
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	302
Tom Stellard	f879435	2012-12-19 22:10:31 +0000	[diff] [blame]	303	SDNode *Parent = Value.getNode();
				304	for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
				305	I != E; ++I) {
				306
				307	if (I.getUse().get() != Value)
				308	continue;
				309
				310	if (I->getOpcode() == Opcode)
				311	return *I;
				312	}
				313	return 0;
				314	}
				315
				316	/// This transforms the control flow intrinsics to get the branch destination as
				317	/// last parameter, also switches branch target with BR if the need arise
				318	SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
				319	SelectionDAG &DAG) const {
				320
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	321	SDLoc DL(BRCOND);
Tom Stellard	f879435	2012-12-19 22:10:31 +0000	[diff] [blame]	322
				323	SDNode *Intr = BRCOND.getOperand(1).getNode();
				324	SDValue Target = BRCOND.getOperand(2);
				325	SDNode *BR = 0;
				326
				327	if (Intr->getOpcode() == ISD::SETCC) {
				328	// As long as we negate the condition everything is fine
				329	SDNode *SetCC = Intr;
				330	assert(SetCC->getConstantOperandVal(1) == 1);
NAKAMURA Takumi	458a827	2013-01-07 11:14:44 +0000	[diff] [blame]	331	assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
				332	ISD::SETNE);
Tom Stellard	f879435	2012-12-19 22:10:31 +0000	[diff] [blame]	333	Intr = SetCC->getOperand(0).getNode();
				334
				335	} else {
				336	// Get the target from BR if we don't negate the condition
				337	BR = findUser(BRCOND, ISD::BR);
				338	Target = BR->getOperand(1);
				339	}
				340
				341	assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
				342
				343	// Build the result and
				344	SmallVector<EVT, 4> Res;
				345	for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i)
				346	Res.push_back(Intr->getValueType(i));
				347
				348	// operands of the new intrinsic call
				349	SmallVector<SDValue, 4> Ops;
				350	Ops.push_back(BRCOND.getOperand(0));
				351	for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i)
				352	Ops.push_back(Intr->getOperand(i));
				353	Ops.push_back(Target);
				354
				355	// build the new intrinsic call
				356	SDNode *Result = DAG.getNode(
				357	Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
				358	DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode();
				359
				360	if (BR) {
				361	// Give the branch instruction our target
				362	SDValue Ops[] = {
				363	BR->getOperand(0),
				364	BRCOND.getOperand(2)
				365	};
				366	DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2);
				367	}
				368
				369	SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
				370
				371	// Copy the intrinsic results to registers
				372	for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
				373	SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
				374	if (!CopyToReg)
				375	continue;
				376
				377	Chain = DAG.getCopyToReg(
				378	Chain, DL,
				379	CopyToReg->getOperand(1),
				380	SDValue(Result, i - 1),
				381	SDValue());
				382
				383	DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
				384	}
				385
				386	// Remove the old intrinsic from the chain
				387	DAG.ReplaceAllUsesOfValueWith(
				388	SDValue(Intr, Intr->getNumValues() - 1),
				389	Intr->getOperand(0));
				390
				391	return Chain;
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	392	}
				393
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	394	SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
				395	SDValue LHS = Op.getOperand(0);
				396	SDValue RHS = Op.getOperand(1);
				397	SDValue True = Op.getOperand(2);
				398	SDValue False = Op.getOperand(3);
				399	SDValue CC = Op.getOperand(4);
				400	EVT VT = Op.getValueType();
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	401	SDLoc DL(Op);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	402
				403	// Possible Min/Max pattern
				404	SDValue MinMax = LowerMinMax(Op, DAG);
				405	if (MinMax.getNode()) {
				406	return MinMax;
				407	}
				408
				409	SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
				410	return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
				411	}
				412
Tom Stellard	046039e	2013-06-03 17:40:03 +0000	[diff] [blame]	413	SDValue SITargetLowering::LowerSIGN_EXTEND(SDValue Op,
				414	SelectionDAG &DAG) const {
				415	EVT VT = Op.getValueType();
				416	SDLoc DL(Op);
				417
				418	if (VT != MVT::i64) {
				419	return SDValue();
				420	}
				421
				422	SDValue Hi = DAG.getNode(ISD::SRA, DL, MVT::i32, Op.getOperand(0),
				423	DAG.getConstant(31, MVT::i32));
				424
				425	return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), Hi);
				426	}
				427
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	428	//===----------------------------------------------------------------------===//
				429	// Custom DAG optimizations
				430	//===----------------------------------------------------------------------===//
				431
				432	SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
				433	DAGCombinerInfo &DCI) const {
				434	SelectionDAG &DAG = DCI.DAG;
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	435	SDLoc DL(N);
Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame]	436	EVT VT = N->getValueType(0);
				437
				438	switch (N->getOpcode()) {
				439	default: break;
				440	case ISD::SELECT_CC: {
				441	N->dump();
				442	ConstantSDNode True, False;
				443	// i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
				444	if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
				445	&& (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
				446	&& True->isAllOnesValue()
				447	&& False->isNullValue()
				448	&& VT == MVT::i1) {
				449	return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
				450	N->getOperand(1), N->getOperand(4));
				451
				452	}
				453	break;
				454	}
				455	case ISD::SETCC: {
				456	SDValue Arg0 = N->getOperand(0);
				457	SDValue Arg1 = N->getOperand(1);
				458	SDValue CC = N->getOperand(2);
				459	ConstantSDNode * C = NULL;
				460	ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
				461
				462	// i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
				463	if (VT == MVT::i1
				464	&& Arg0.getOpcode() == ISD::SIGN_EXTEND
				465	&& Arg0.getOperand(0).getValueType() == MVT::i1
				466	&& (C = dyn_cast<ConstantSDNode>(Arg1))
				467	&& C->isNullValue()
				468	&& CCOp == ISD::SETNE) {
				469	return SimplifySetCC(VT, Arg0.getOperand(0),
				470	DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
				471	}
				472	break;
				473	}
				474	}
				475	return SDValue();
				476	}
Christian Konig	d910b7d	2013-02-26 17:52:16 +0000	[diff] [blame]	477
Matt Arsenault	75865923	2013-05-18 00:21:46 +0000	[diff] [blame]	478	/// \brief Test if RegClass is one of the VSrc classes
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	479	static bool isVSrc(unsigned RegClass) {
				480	return AMDGPU::VSrc_32RegClassID == RegClass \|\|
				481	AMDGPU::VSrc_64RegClassID == RegClass;
				482	}
				483
Matt Arsenault	75865923	2013-05-18 00:21:46 +0000	[diff] [blame]	484	/// \brief Test if RegClass is one of the SSrc classes
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	485	static bool isSSrc(unsigned RegClass) {
				486	return AMDGPU::SSrc_32RegClassID == RegClass \|\|
				487	AMDGPU::SSrc_64RegClassID == RegClass;
				488	}
				489
				490	/// \brief Analyze the possible immediate value Op
				491	///
				492	/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
				493	/// and the immediate value if it's a literal immediate
				494	int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
				495
				496	union {
				497	int32_t I;
				498	float F;
				499	} Imm;
				500
Tom Stellard	edbf1eb	2013-04-05 23:31:20 +0000	[diff] [blame]	501	if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
				502	if (Node->getZExtValue() >> 32) {
				503	return -1;
				504	}
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	505	Imm.I = Node->getSExtValue();
Tom Stellard	edbf1eb	2013-04-05 23:31:20 +0000	[diff] [blame]	506	} else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	507	Imm.F = Node->getValueAPF().convertToFloat();
				508	else
				509	return -1; // It isn't an immediate
				510
				511	if ((Imm.I >= -16 && Imm.I <= 64) \|\|
				512	Imm.F == 0.5f \|\| Imm.F == -0.5f \|\|
				513	Imm.F == 1.0f \|\| Imm.F == -1.0f \|\|
				514	Imm.F == 2.0f \|\| Imm.F == -2.0f \|\|
				515	Imm.F == 4.0f \|\| Imm.F == -4.0f)
				516	return 0; // It's an inline immediate
				517
				518	return Imm.I; // It's a literal immediate
				519	}
				520
				521	/// \brief Try to fold an immediate directly into an instruction
				522	bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
				523	bool &ScalarSlotUsed) const {
				524
				525	MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
				526	if (Mov == 0 \|\| !TII->isMov(Mov->getMachineOpcode()))
				527	return false;
				528
				529	const SDValue &Op = Mov->getOperand(0);
				530	int32_t Value = analyzeImmediate(Op.getNode());
				531	if (Value == -1) {
				532	// Not an immediate at all
				533	return false;
				534
				535	} else if (Value == 0) {
				536	// Inline immediates can always be fold
				537	Operand = Op;
				538	return true;
				539
				540	} else if (Value == Immediate) {
				541	// Already fold literal immediate
				542	Operand = Op;
				543	return true;
				544
				545	} else if (!ScalarSlotUsed && !Immediate) {
				546	// Fold this literal immediate
				547	ScalarSlotUsed = true;
				548	Immediate = Value;
				549	Operand = Op;
				550	return true;
				551
				552	}
				553
				554	return false;
				555	}
				556
				557	/// \brief Does "Op" fit into register class "RegClass" ?
Tom Stellard	b35efba	2013-05-20 15:02:01 +0000	[diff] [blame]	558	bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	559	unsigned RegClass) const {
				560
Matt Arsenault	75865923	2013-05-18 00:21:46 +0000	[diff] [blame]	561	MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	562	SDNode *Node = Op.getNode();
				563
Christian Konig	8370dbb	2013-03-26 14:04:17 +0000	[diff] [blame]	564	const TargetRegisterClass *OpClass;
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	565	if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Node)) {
				566	const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode());
Christian Konig	8370dbb	2013-03-26 14:04:17 +0000	[diff] [blame]	567	int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass;
Tom Stellard	bad1f59	2013-06-03 17:39:54 +0000	[diff] [blame]	568	if (OpClassID == -1) {
				569	switch (MN->getMachineOpcode()) {
				570	case AMDGPU::REG_SEQUENCE:
				571	// Operand 0 is the register class id for REG_SEQUENCE instructions.
				572	OpClass = TRI->getRegClass(
				573	cast<ConstantSDNode>(MN->getOperand(0))->getZExtValue());
				574	break;
				575	default:
				576	OpClass = getRegClassFor(Op.getSimpleValueType());
				577	break;
				578	}
				579	} else {
Christian Konig	8370dbb	2013-03-26 14:04:17 +0000	[diff] [blame]	580	OpClass = TRI->getRegClass(OpClassID);
Tom Stellard	bad1f59	2013-06-03 17:39:54 +0000	[diff] [blame]	581	}
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	582
				583	} else if (Node->getOpcode() == ISD::CopyFromReg) {
				584	RegisterSDNode *Reg = cast<RegisterSDNode>(Node->getOperand(1).getNode());
Christian Konig	8370dbb	2013-03-26 14:04:17 +0000	[diff] [blame]	585	OpClass = MRI.getRegClass(Reg->getReg());
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	586
				587	} else
				588	return false;
				589
Christian Konig	8370dbb	2013-03-26 14:04:17 +0000	[diff] [blame]	590	return TRI->getRegClass(RegClass)->hasSubClassEq(OpClass);
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	591	}
				592
				593	/// \brief Make sure that we don't exeed the number of allowed scalars
				594	void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
				595	unsigned RegClass,
				596	bool &ScalarSlotUsed) const {
				597
				598	// First map the operands register class to a destination class
				599	if (RegClass == AMDGPU::VSrc_32RegClassID)
				600	RegClass = AMDGPU::VReg_32RegClassID;
				601	else if (RegClass == AMDGPU::VSrc_64RegClassID)
				602	RegClass = AMDGPU::VReg_64RegClassID;
				603	else
				604	return;
				605
				606	// Nothing todo if they fit naturaly
				607	if (fitsRegClass(DAG, Operand, RegClass))
				608	return;
				609
				610	// If the scalar slot isn't used yet use it now
				611	if (!ScalarSlotUsed) {
				612	ScalarSlotUsed = true;
				613	return;
				614	}
				615
				616	// This is a conservative aproach, it is possible that we can't determine
				617	// the correct register class and copy too often, but better save than sorry.
				618	SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	619	SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(),
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	620	Operand.getValueType(), Operand, RC);
				621	Operand = SDValue(Node, 0);
				622	}
				623
Christian Konig	8e06e2a	2013-04-10 08:39:08 +0000	[diff] [blame]	624	/// \brief Try to fold the Nodes operands into the Node
				625	SDNode SITargetLowering::foldOperands(MachineSDNode Node,
				626	SelectionDAG &DAG) const {
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	627
				628	// Original encoding (either e32 or e64)
				629	int Opcode = Node->getMachineOpcode();
				630	const MCInstrDesc *Desc = &TII->get(Opcode);
				631
				632	unsigned NumDefs = Desc->getNumDefs();
				633	unsigned NumOps = Desc->getNumOperands();
				634
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	635	// Commuted opcode if available
				636	int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
				637	const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev);
				638
				639	assert(!DescRev \|\| DescRev->getNumDefs() == NumDefs);
				640	assert(!DescRev \|\| DescRev->getNumOperands() == NumOps);
				641
Christian Konig	e500e44	2013-02-26 17:52:47 +0000	[diff] [blame]	642	// e64 version if available, -1 otherwise
				643	int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
				644	const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
				645
				646	assert(!DescE64 \|\| DescE64->getNumDefs() == NumDefs);
				647	assert(!DescE64 \|\| DescE64->getNumOperands() == (NumOps + 4));
				648
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	649	int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
				650	bool HaveVSrc = false, HaveSSrc = false;
				651
				652	// First figure out what we alread have in this instruction
				653	for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
				654	i != e && Op < NumOps; ++i, ++Op) {
				655
				656	unsigned RegClass = Desc->OpInfo[Op].RegClass;
				657	if (isVSrc(RegClass))
				658	HaveVSrc = true;
				659	else if (isSSrc(RegClass))
				660	HaveSSrc = true;
				661	else
				662	continue;
				663
				664	int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode());
				665	if (Imm != -1 && Imm != 0) {
				666	// Literal immediate
				667	Immediate = Imm;
				668	}
				669	}
				670
				671	// If we neither have VSrc nor SSrc it makes no sense to continue
				672	if (!HaveVSrc && !HaveSSrc)
				673	return Node;
				674
				675	// No scalar allowed when we have both VSrc and SSrc
				676	bool ScalarSlotUsed = HaveVSrc && HaveSSrc;
				677
				678	// Second go over the operands and try to fold them
				679	std::vector<SDValue> Ops;
Christian Konig	e500e44	2013-02-26 17:52:47 +0000	[diff] [blame]	680	bool Promote2e64 = false;
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	681	for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
				682	i != e && Op < NumOps; ++i, ++Op) {
				683
				684	const SDValue &Operand = Node->getOperand(i);
				685	Ops.push_back(Operand);
				686
				687	// Already folded immediate ?
				688	if (isa<ConstantSDNode>(Operand.getNode()) \|\|
				689	isa<ConstantFPSDNode>(Operand.getNode()))
				690	continue;
				691
				692	// Is this a VSrc or SSrc operand ?
				693	unsigned RegClass = Desc->OpInfo[Op].RegClass;
Christian Konig	8370dbb	2013-03-26 14:04:17 +0000	[diff] [blame]	694	if (isVSrc(RegClass) \|\| isSSrc(RegClass)) {
				695	// Try to fold the immediates
				696	if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
				697	// Folding didn't worked, make sure we don't hit the SReg limit
				698	ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
				699	}
				700	continue;
				701	}
Christian Konig	6612ac3	2013-02-26 17:52:36 +0000	[diff] [blame]	702
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	703	if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
Christian Konig	6612ac3	2013-02-26 17:52:36 +0000	[diff] [blame]	704
Christian Konig	8370dbb	2013-03-26 14:04:17 +0000	[diff] [blame]	705	unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
				706	assert(isVSrc(OtherRegClass) \|\| isSSrc(OtherRegClass));
				707
				708	// Test if it makes sense to swap operands
				709	if (foldImm(Ops[1], Immediate, ScalarSlotUsed) \|\|
				710	(!fitsRegClass(DAG, Ops[1], RegClass) &&
				711	fitsRegClass(DAG, Ops[1], OtherRegClass))) {
Christian Konig	6612ac3	2013-02-26 17:52:36 +0000	[diff] [blame]	712
				713	// Swap commutable operands
				714	SDValue Tmp = Ops[1];
				715	Ops[1] = Ops[0];
				716	Ops[0] = Tmp;
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	717
				718	Desc = DescRev;
				719	DescRev = 0;
Christian Konig	8370dbb	2013-03-26 14:04:17 +0000	[diff] [blame]	720	continue;
Christian Konig	6612ac3	2013-02-26 17:52:36 +0000	[diff] [blame]	721	}
Christian Konig	6612ac3	2013-02-26 17:52:36 +0000	[diff] [blame]	722	}
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	723
Christian Konig	8370dbb	2013-03-26 14:04:17 +0000	[diff] [blame]	724	if (DescE64 && !Immediate) {
				725
				726	// Test if it makes sense to switch to e64 encoding
				727	unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
				728	if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass))
				729	continue;
				730
				731	int32_t TmpImm = -1;
				732	if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) \|\|
				733	(!fitsRegClass(DAG, Ops[i], RegClass) &&
				734	fitsRegClass(DAG, Ops[1], OtherRegClass))) {
				735
				736	// Switch to e64 encoding
				737	Immediate = -1;
				738	Promote2e64 = true;
				739	Desc = DescE64;
				740	DescE64 = 0;
				741	}
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	742	}
				743	}
				744
Christian Konig	e500e44	2013-02-26 17:52:47 +0000	[diff] [blame]	745	if (Promote2e64) {
				746	// Add the modifier flags while promoting
				747	for (unsigned i = 0; i < 4; ++i)
				748	Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
				749	}
				750
Christian Konig	f82901a	2013-02-26 17:52:23 +0000	[diff] [blame]	751	// Add optional chain and glue
				752	for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
				753	Ops.push_back(Node->getOperand(i));
				754
Tom Stellard	b5a9700	2013-06-03 17:39:50 +0000	[diff] [blame]	755	// Nodes that have a glue result are not CSE'd by getMachineNode(), so in
				756	// this case a brand new node is always be created, even if the operands
				757	// are the same as before. So, manually check if anything has been changed.
				758	if (Desc->Opcode == Opcode) {
				759	bool Changed = false;
				760	for (unsigned i = 0, e = Node->getNumOperands(); i < e; ++i) {
				761	if (Ops[i].getNode() != Node->getOperand(i).getNode()) {
				762	Changed = true;
				763	break;
				764	}
				765	}
				766	if (!Changed) {
				767	return Node;
				768	}
				769	}
				770
Christian Konig	3c14580	2013-03-27 09:12:59 +0000	[diff] [blame]	771	// Create a complete new instruction
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	772	return DAG.getMachineNode(Desc->Opcode, SDLoc(Node), Node->getVTList(), Ops);
Christian Konig	d910b7d	2013-02-26 17:52:16 +0000	[diff] [blame]	773	}
Christian Konig	8e06e2a	2013-04-10 08:39:08 +0000	[diff] [blame]	774
				775	/// \brief Helper function for adjustWritemask
Benjamin Kramer	635e368	2013-05-23 15:43:05 +0000	[diff] [blame]	776	static unsigned SubIdx2Lane(unsigned Idx) {
Christian Konig	8e06e2a	2013-04-10 08:39:08 +0000	[diff] [blame]	777	switch (Idx) {
				778	default: return 0;
				779	case AMDGPU::sub0: return 0;
				780	case AMDGPU::sub1: return 1;
				781	case AMDGPU::sub2: return 2;
				782	case AMDGPU::sub3: return 3;
				783	}
				784	}
				785
				786	/// \brief Adjust the writemask of MIMG instructions
				787	void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
				788	SelectionDAG &DAG) const {
				789	SDNode *Users[4] = { };
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	790	unsigned Writemask = 0, Lane = 0;
Christian Konig	8e06e2a	2013-04-10 08:39:08 +0000	[diff] [blame]	791
				792	// Try to figure out the used register components
				793	for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
				794	I != E; ++I) {
				795
				796	// Abort if we can't understand the usage
				797	if (!I->isMachineOpcode() \|\|
				798	I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
				799	return;
				800
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	801	Lane = SubIdx2Lane(I->getConstantOperandVal(1));
Christian Konig	8e06e2a	2013-04-10 08:39:08 +0000	[diff] [blame]	802
				803	// Abort if we have more than one user per component
				804	if (Users[Lane])
				805	return;
				806
				807	Users[Lane] = *I;
				808	Writemask \|= 1 << Lane;
				809	}
				810
				811	// Abort if all components are used
				812	if (Writemask == 0xf)
				813	return;
				814
				815	// Adjust the writemask in the node
				816	std::vector<SDValue> Ops;
				817	Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32));
				818	for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
				819	Ops.push_back(Node->getOperand(i));
				820	Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
				821
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	822	// If we only got one lane, replace it with a copy
				823	if (Writemask == (1U << Lane)) {
				824	SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
				825	SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
Andrew Trick	ef9de2a	2013-05-25 02:42:55 +0000	[diff] [blame]	826	SDLoc(), Users[Lane]->getValueType(0),
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	827	SDValue(Node, 0), RC);
				828	DAG.ReplaceAllUsesWith(Users[Lane], Copy);
				829	return;
				830	}
				831
Christian Konig	8e06e2a	2013-04-10 08:39:08 +0000	[diff] [blame]	832	// Update the users of the node with the new indices
				833	for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
				834
				835	SDNode *User = Users[i];
				836	if (!User)
				837	continue;
				838
				839	SDValue Op = DAG.getTargetConstant(Idx, MVT::i32);
				840	DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
				841
				842	switch (Idx) {
				843	default: break;
				844	case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
				845	case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
				846	case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
				847	}
				848	}
				849	}
				850
				851	/// \brief Fold the instructions after slecting them
				852	SDNode SITargetLowering::PostISelFolding(MachineSDNode Node,
				853	SelectionDAG &DAG) const {
Tom Stellard	0518ff8	2013-06-03 17:39:58 +0000	[diff] [blame]	854	Node = AdjustRegClass(Node, DAG);
Christian Konig	8e06e2a	2013-04-10 08:39:08 +0000	[diff] [blame]	855
				856	if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1)
				857	adjustWritemask(Node, DAG);
				858
				859	return foldOperands(Node, DAG);
				860	}
Christian Konig	8b1ed28	2013-04-10 08:39:16 +0000	[diff] [blame]	861
				862	/// \brief Assign the register class depending on the number of
				863	/// bits set in the writemask
				864	void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
				865	SDNode *Node) const {
				866	if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
				867	return;
				868
				869	unsigned VReg = MI->getOperand(0).getReg();
				870	unsigned Writemask = MI->getOperand(1).getImm();
				871	unsigned BitsSet = 0;
				872	for (unsigned i = 0; i < 4; ++i)
				873	BitsSet += Writemask & (1 << i) ? 1 : 0;
				874
				875	const TargetRegisterClass *RC;
				876	switch (BitsSet) {
				877	default: return;
				878	case 1: RC = &AMDGPU::VReg_32RegClass; break;
				879	case 2: RC = &AMDGPU::VReg_64RegClass; break;
				880	case 3: RC = &AMDGPU::VReg_96RegClass; break;
				881	}
				882
				883	MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
				884	MRI.setRegClass(VReg, RC);
				885	}
Tom Stellard	0518ff8	2013-06-03 17:39:58 +0000	[diff] [blame]	886
				887	MachineSDNode SITargetLowering::AdjustRegClass(MachineSDNode N,
				888	SelectionDAG &DAG) const {
				889
				890	SDLoc DL(N);
				891	unsigned NewOpcode = N->getMachineOpcode();
				892
				893	switch (N->getMachineOpcode()) {
				894	default: return N;
				895	case AMDGPU::REG_SEQUENCE: {
				896	// MVT::i128 only use SGPRs, so i128 REG_SEQUENCEs don't need to be
				897	// rewritten.
				898	if (N->getValueType(0) == MVT::i128) {
				899	return N;
				900	}
				901	const SDValue Ops[] = {
				902	DAG.getTargetConstant(AMDGPU::VReg_64RegClassID, MVT::i32),
				903	N->getOperand(1) , N->getOperand(2),
				904	N->getOperand(3), N->getOperand(4)
				905	};
				906	return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::i64, Ops);
				907	}
				908
				909	case AMDGPU::S_LOAD_DWORD_IMM:
				910	NewOpcode = AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
				911	// Fall-through
				912	case AMDGPU::S_LOAD_DWORDX2_SGPR:
				913	if (NewOpcode == N->getMachineOpcode()) {
				914	NewOpcode = AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
				915	}
				916	// Fall-through
				917	case AMDGPU::S_LOAD_DWORDX4_IMM:
				918	case AMDGPU::S_LOAD_DWORDX4_SGPR: {
				919	if (NewOpcode == N->getMachineOpcode()) {
				920	NewOpcode = AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
				921	}
				922	if (fitsRegClass(DAG, N->getOperand(0), AMDGPU::SReg_64RegClassID)) {
				923	return N;
				924	}
				925	ConstantSDNode *Offset = cast<ConstantSDNode>(N->getOperand(1));
				926	SDValue Ops[] = {
				927	SDValue(DAG.getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::i128,
				928	DAG.getConstant(0, MVT::i64)), 0),
				929	N->getOperand(0),
				930	DAG.getConstant(Offset->getSExtValue() << 2, MVT::i32)
				931	};
				932	return DAG.getMachineNode(NewOpcode, DL, N->getVTList(), Ops);
				933	}
				934	}
				935	}