Blame - llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp - toolchain/llvm-project

blob: 58221f9ec2b45c1ad6021a51e184f29a11a02326 [file] [log] [blame]

Tom Stellard	75aadc2	2012-12-11 21:25:42 +0000	[diff] [blame^]	1	//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// Vector, Reduction, and Cube instructions need to fill the entire instruction
				12	/// group to work correctly. This pass expands these individual instructions
				13	/// into several instructions that will completely fill the instruction group.
				14	//
				15	//===----------------------------------------------------------------------===//
				16
				17	#include "AMDGPU.h"
				18	#include "R600Defines.h"
				19	#include "R600InstrInfo.h"
				20	#include "R600RegisterInfo.h"
				21	#include "R600MachineFunctionInfo.h"
				22	#include "llvm/CodeGen/MachineFunctionPass.h"
				23	#include "llvm/CodeGen/MachineInstrBuilder.h"
				24	#include "llvm/CodeGen/MachineRegisterInfo.h"
				25
				26	using namespace llvm;
				27
				28	namespace {
				29
				30	class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
				31
				32	private:
				33	static char ID;
				34	const R600InstrInfo *TII;
				35
				36	bool ExpandInputPerspective(MachineInstr& MI);
				37	bool ExpandInputConstant(MachineInstr& MI);
				38
				39	public:
				40	R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
				41	TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
				42
				43	virtual bool runOnMachineFunction(MachineFunction &MF);
				44
				45	const char *getPassName() const {
				46	return "R600 Expand special instructions pass";
				47	}
				48	};
				49
				50	} // End anonymous namespace
				51
				52	char R600ExpandSpecialInstrsPass::ID = 0;
				53
				54	FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
				55	return new R600ExpandSpecialInstrsPass(TM);
				56	}
				57
				58	bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) {
				59	const R600RegisterInfo &TRI = TII->getRegisterInfo();
				60	if (MI.getOpcode() != AMDGPU::input_perspective)
				61	return false;
				62
				63	MachineBasicBlock::iterator I = &MI;
				64	unsigned DstReg = MI.getOperand(0).getReg();
				65	R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
				66	->getInfo<R600MachineFunctionInfo>();
				67	unsigned IJIndexBase;
				68
				69	// In Evergreen ISA doc section 8.3.2 :
				70	// We need to interpolate XY and ZW in two different instruction groups.
				71	// An INTERP_* must occupy all 4 slots of an instruction group.
				72	// Output of INTERP_XY is written in X,Y slots
				73	// Output of INTERP_ZW is written in Z,W slots
				74	//
				75	// Thus interpolation requires the following sequences :
				76	//
				77	// AnyGPR.x = INTERP_ZW; (Write Masked Out)
				78	// AnyGPR.y = INTERP_ZW; (Write Masked Out)
				79	// DstGPR.z = INTERP_ZW;
				80	// DstGPR.w = INTERP_ZW; (End of first IG)
				81	// DstGPR.x = INTERP_XY;
				82	// DstGPR.y = INTERP_XY;
				83	// AnyGPR.z = INTERP_XY; (Write Masked Out)
				84	// AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
				85	//
				86	switch (MI.getOperand(1).getImm()) {
				87	case 0:
				88	IJIndexBase = MFI->GetIJPerspectiveIndex();
				89	break;
				90	case 1:
				91	IJIndexBase = MFI->GetIJLinearIndex();
				92	break;
				93	default:
				94	assert(0 && "Unknow ij index");
				95	}
				96
				97	for (unsigned i = 0; i < 8; i++) {
				98	unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
				99	2 * IJIndexBase + ((i + 1) % 2));
				100	unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
				101	MI.getOperand(2).getImm());
				102
				103
				104	unsigned Sel = AMDGPU::sel_x;
				105	switch (i % 4) {
				106	case 0:Sel = AMDGPU::sel_x;break;
				107	case 1:Sel = AMDGPU::sel_y;break;
				108	case 2:Sel = AMDGPU::sel_z;break;
				109	case 3:Sel = AMDGPU::sel_w;break;
				110	default:break;
				111	}
				112
				113	unsigned Res = TRI.getSubReg(DstReg, Sel);
				114
				115	unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY;
				116
				117	MachineBasicBlock &MBB = *(MI.getParent());
				118	MachineInstr *NewMI =
				119	TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg);
				120
				121	if (!(i> 1 && i < 6)) {
				122	TII->addFlag(NewMI, 0, MO_FLAG_MASK);
				123	}
				124
				125	if (i % 4 != 3)
				126	TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
				127	}
				128
				129	MI.eraseFromParent();
				130
				131	return true;
				132	}
				133
				134	bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) {
				135	const R600RegisterInfo &TRI = TII->getRegisterInfo();
				136	if (MI.getOpcode() != AMDGPU::input_constant)
				137	return false;
				138
				139	MachineBasicBlock::iterator I = &MI;
				140	unsigned DstReg = MI.getOperand(0).getReg();
				141
				142	for (unsigned i = 0; i < 4; i++) {
				143	unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
				144	MI.getOperand(1).getImm());
				145
				146	unsigned Sel = AMDGPU::sel_x;
				147	switch (i % 4) {
				148	case 0:Sel = AMDGPU::sel_x;break;
				149	case 1:Sel = AMDGPU::sel_y;break;
				150	case 2:Sel = AMDGPU::sel_z;break;
				151	case 3:Sel = AMDGPU::sel_w;break;
				152	default:break;
				153	}
				154
				155	unsigned Res = TRI.getSubReg(DstReg, Sel);
				156
				157	MachineBasicBlock &MBB = *(MI.getParent());
				158	MachineInstr *NewMI = TII->buildDefaultInstruction(
				159	MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg);
				160
				161	if (i % 4 != 3)
				162	TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
				163	}
				164
				165	MI.eraseFromParent();
				166
				167	return true;
				168	}
				169
				170	bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
				171
				172	const R600RegisterInfo &TRI = TII->getRegisterInfo();
				173
				174	for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
				175	BB != BB_E; ++BB) {
				176	MachineBasicBlock &MBB = *BB;
				177	MachineBasicBlock::iterator I = MBB.begin();
				178	while (I != MBB.end()) {
				179	MachineInstr &MI = *I;
				180	I = llvm::next(I);
				181
				182	switch (MI.getOpcode()) {
				183	default: break;
				184	// Expand PRED_X to one of the PRED_SET instructions.
				185	case AMDGPU::PRED_X: {
				186	uint64_t Flags = MI.getOperand(3).getImm();
				187	// The native opcode used by PRED_X is stored as an immediate in the
				188	// third operand.
				189	MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
				190	MI.getOperand(2).getImm(), // opcode
				191	MI.getOperand(0).getReg(), // dst
				192	MI.getOperand(1).getReg(), // src0
				193	AMDGPU::ZERO); // src1
				194	TII->addFlag(PredSet, 0, MO_FLAG_MASK);
				195	if (Flags & MO_FLAG_PUSH) {
				196	TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
				197	} else {
				198	TII->setImmOperand(PredSet, R600Operands::UPDATE_PREDICATE, 1);
				199	}
				200	MI.eraseFromParent();
				201	continue;
				202	}
				203	case AMDGPU::BREAK:
				204	MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
				205	AMDGPU::PRED_SETE_INT,
				206	AMDGPU::PREDICATE_BIT,
				207	AMDGPU::ZERO,
				208	AMDGPU::ZERO);
				209	TII->addFlag(PredSet, 0, MO_FLAG_MASK);
				210	TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1);
				211
				212	BuildMI(MBB, I, MBB.findDebugLoc(I),
				213	TII->get(AMDGPU::PREDICATED_BREAK))
				214	.addReg(AMDGPU::PREDICATE_BIT);
				215	MI.eraseFromParent();
				216	continue;
				217	}
				218
				219	if (ExpandInputPerspective(MI))
				220	continue;
				221	if (ExpandInputConstant(MI))
				222	continue;
				223
				224	bool IsReduction = TII->isReductionOp(MI.getOpcode());
				225	bool IsVector = TII->isVector(MI);
				226	bool IsCube = TII->isCubeOp(MI.getOpcode());
				227	if (!IsReduction && !IsVector && !IsCube) {
				228	continue;
				229	}
				230
				231	// Expand the instruction
				232	//
				233	// Reduction instructions:
				234	// T0_X = DP4 T1_XYZW, T2_XYZW
				235	// becomes:
				236	// TO_X = DP4 T1_X, T2_X
				237	// TO_Y (write masked) = DP4 T1_Y, T2_Y
				238	// TO_Z (write masked) = DP4 T1_Z, T2_Z
				239	// TO_W (write masked) = DP4 T1_W, T2_W
				240	//
				241	// Vector instructions:
				242	// T0_X = MULLO_INT T1_X, T2_X
				243	// becomes:
				244	// T0_X = MULLO_INT T1_X, T2_X
				245	// T0_Y (write masked) = MULLO_INT T1_X, T2_X
				246	// T0_Z (write masked) = MULLO_INT T1_X, T2_X
				247	// T0_W (write masked) = MULLO_INT T1_X, T2_X
				248	//
				249	// Cube instructions:
				250	// T0_XYZW = CUBE T1_XYZW
				251	// becomes:
				252	// TO_X = CUBE T1_Z, T1_Y
				253	// T0_Y = CUBE T1_Z, T1_X
				254	// T0_Z = CUBE T1_X, T1_Z
				255	// T0_W = CUBE T1_Y, T1_Z
				256	for (unsigned Chan = 0; Chan < 4; Chan++) {
				257	unsigned DstReg = MI.getOperand(
				258	TII->getOperandIdx(MI, R600Operands::DST)).getReg();
				259	unsigned Src0 = MI.getOperand(
				260	TII->getOperandIdx(MI, R600Operands::SRC0)).getReg();
				261	unsigned Src1 = 0;
				262
				263	// Determine the correct source registers
				264	if (!IsCube) {
				265	int Src1Idx = TII->getOperandIdx(MI, R600Operands::SRC1);
				266	if (Src1Idx != -1) {
				267	Src1 = MI.getOperand(Src1Idx).getReg();
				268	}
				269	}
				270	if (IsReduction) {
				271	unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
				272	Src0 = TRI.getSubReg(Src0, SubRegIndex);
				273	Src1 = TRI.getSubReg(Src1, SubRegIndex);
				274	} else if (IsCube) {
				275	static const int CubeSrcSwz[] = {2, 2, 0, 1};
				276	unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
				277	unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
				278	Src1 = TRI.getSubReg(Src0, SubRegIndex1);
				279	Src0 = TRI.getSubReg(Src0, SubRegIndex0);
				280	}
				281
				282	// Determine the correct destination registers;
				283	bool Mask = false;
				284	bool NotLast = true;
				285	if (IsCube) {
				286	unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
				287	DstReg = TRI.getSubReg(DstReg, SubRegIndex);
				288	} else {
				289	// Mask the write if the original instruction does not write to
				290	// the current Channel.
				291	Mask = (Chan != TRI.getHWRegChan(DstReg));
				292	unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
				293	DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
				294	}
				295
				296	// Set the IsLast bit
				297	NotLast = (Chan != 3 );
				298
				299	// Add the new instruction
				300	unsigned Opcode = MI.getOpcode();
				301	switch (Opcode) {
				302	case AMDGPU::CUBE_r600_pseudo:
				303	Opcode = AMDGPU::CUBE_r600_real;
				304	break;
				305	case AMDGPU::CUBE_eg_pseudo:
				306	Opcode = AMDGPU::CUBE_eg_real;
				307	break;
				308	case AMDGPU::DOT4_r600_pseudo:
				309	Opcode = AMDGPU::DOT4_r600_real;
				310	break;
				311	case AMDGPU::DOT4_eg_pseudo:
				312	Opcode = AMDGPU::DOT4_eg_real;
				313	break;
				314	default:
				315	break;
				316	}
				317
				318	MachineInstr *NewMI =
				319	TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
				320
				321	NewMI->setIsInsideBundle(Chan != 0);
				322	if (Mask) {
				323	TII->addFlag(NewMI, 0, MO_FLAG_MASK);
				324	}
				325	if (NotLast) {
				326	TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
				327	}
				328	}
				329	MI.eraseFromParent();
				330	}
				331	}
				332	return false;
				333	}