Blame - llvm/lib/Target/R600/SILoadStoreOptimizer.cpp - toolchain/llvm-project

blob: 46630d073d9ec2231ede051d21182a9e8a08d3ed [file] [log] [blame]

Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	1	//===-- SILoadStoreOptimizer.cpp ------------------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This pass tries to fuse DS instructions with close by immediate offsets.
				11	// This will fuse operations such as
				12	// ds_read_b32 v0, v2 offset:16
				13	// ds_read_b32 v1, v2 offset:32
				14	// ==>
				15	// ds_read2_b32 v[0:1], v2, offset0:4 offset1:8
				16	//
				17	//
				18	// Future improvements:
				19	//
				20	// - This currently relies on the scheduler to place loads and stores next to
				21	// each other, and then only merges adjacent pairs of instructions. It would
				22	// be good to be more flexible with interleaved instructions, and possibly run
				23	// before scheduling. It currently missing stores of constants because loading
				24	// the constant into the data register is placed between the stores, although
				25	// this is arguably a scheduling problem.
				26	//
				27	// - Live interval recomputing seems inefficient. This currently only matches
				28	// one pair, and recomputes live intervals and moves on to the next pair. It
				29	// would be better to compute a list of all merges that need to occur
				30	//
				31	// - With a list of instructions to process, we can also merge more. If a
				32	// cluster of loads have offsets that are too large to fit in the 8-bit
				33	// offsets, but are close enough to fit in the 8 bits, we can add to the base
				34	// pointer and use the new reduced offsets.
				35	//
				36	//===----------------------------------------------------------------------===//
				37
				38	#include "AMDGPU.h"
				39	#include "SIInstrInfo.h"
				40	#include "SIRegisterInfo.h"
				41	#include "llvm/CodeGen/LiveIntervalAnalysis.h"
				42	#include "llvm/CodeGen/LiveVariables.h"
				43	#include "llvm/CodeGen/MachineFunction.h"
				44	#include "llvm/CodeGen/MachineFunctionPass.h"
				45	#include "llvm/CodeGen/MachineInstrBuilder.h"
				46	#include "llvm/CodeGen/MachineRegisterInfo.h"
				47	#include "llvm/Support/Debug.h"
				48	#include "llvm/Target/TargetMachine.h"
				49
				50	using namespace llvm;
				51
				52	#define DEBUG_TYPE "si-load-store-opt"
				53
				54	namespace {
				55
				56	class SILoadStoreOptimizer : public MachineFunctionPass {
				57	private:
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	58	const SIInstrInfo *TII;
				59	const SIRegisterInfo *TRI;
				60	MachineRegisterInfo *MRI;
				61	LiveIntervals *LIS;
				62
				63
				64	static bool offsetsCanBeCombined(unsigned Offset0,
				65	unsigned Offset1,
				66	unsigned EltSize);
				67
				68	MachineBasicBlock::iterator findMatchingDSInst(MachineBasicBlock::iterator I,
				69	unsigned EltSize);
				70
				71	void updateRegDefsUses(unsigned SrcReg,
				72	unsigned DstReg,
				73	unsigned SubIdx);
				74
				75	MachineBasicBlock::iterator mergeRead2Pair(
				76	MachineBasicBlock::iterator I,
				77	MachineBasicBlock::iterator Paired,
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	78	unsigned EltSize);
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	79
				80	MachineBasicBlock::iterator mergeWrite2Pair(
				81	MachineBasicBlock::iterator I,
				82	MachineBasicBlock::iterator Paired,
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	83	unsigned EltSize);
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	84
				85	public:
				86	static char ID;
				87
Eric Christopher	7792e32	2015-01-30 23:24:40 +0000	[diff] [blame]	88	SILoadStoreOptimizer()
				89	: MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), MRI(nullptr),
				90	LIS(nullptr) {}
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	91
Eric Christopher	7792e32	2015-01-30 23:24:40 +0000	[diff] [blame]	92	SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) {
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	93	initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
				94	}
				95
				96	bool optimizeBlock(MachineBasicBlock &MBB);
				97
				98	bool runOnMachineFunction(MachineFunction &MF) override;
				99
				100	const char *getPassName() const override {
				101	return "SI Load / Store Optimizer";
				102	}
				103
				104	void getAnalysisUsage(AnalysisUsage &AU) const override {
				105	AU.setPreservesCFG();
				106	AU.addPreserved<SlotIndexes>();
				107	AU.addPreserved<LiveIntervals>();
				108	AU.addPreserved<LiveVariables>();
				109	AU.addRequired<LiveIntervals>();
				110
				111	MachineFunctionPass::getAnalysisUsage(AU);
				112	}
				113	};
				114
				115	} // End anonymous namespace.
				116
				117	INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
				118	"SI Load / Store Optimizer", false, false)
				119	INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
				120	INITIALIZE_PASS_DEPENDENCY(LiveVariables)
				121	INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
				122	INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE,
				123	"SI Load / Store Optimizer", false, false)
				124
				125	char SILoadStoreOptimizer::ID = 0;
				126
				127	char &llvm::SILoadStoreOptimizerID = SILoadStoreOptimizer::ID;
				128
				129	FunctionPass *llvm::createSILoadStoreOptimizerPass(TargetMachine &TM) {
				130	return new SILoadStoreOptimizer(TM);
				131	}
				132
				133	bool SILoadStoreOptimizer::offsetsCanBeCombined(unsigned Offset0,
				134	unsigned Offset1,
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	135	unsigned Size) {
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	136	// XXX - Would the same offset be OK? Is there any reason this would happen or
				137	// be useful?
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	138	if (Offset0 == Offset1)
				139	return false;
				140
				141	// This won't be valid if the offset isn't aligned.
				142	if ((Offset0 % Size != 0) \|\| (Offset1 % Size != 0))
				143	return false;
				144
				145	unsigned EltOffset0 = Offset0 / Size;
				146	unsigned EltOffset1 = Offset1 / Size;
				147
				148	// Check if the new offsets fit in the reduced 8-bit range.
				149	if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1))
				150	return true;
				151
				152	// If the offset in elements doesn't fit in 8-bits, we might be able to use
				153	// the stride 64 versions.
				154	if ((EltOffset0 % 64 != 0) \|\| (EltOffset1 % 64) != 0)
				155	return false;
				156
				157	return isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64);
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	158	}
				159
				160	MachineBasicBlock::iterator
				161	SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
				162	unsigned EltSize){
				163	MachineBasicBlock::iterator E = I->getParent()->end();
				164	MachineBasicBlock::iterator MBBI = I;
				165	++MBBI;
				166
				167	if (MBBI->getOpcode() != I->getOpcode())
				168	return E;
				169
				170	// Don't merge volatiles.
				171	if (MBBI->hasOrderedMemoryRef())
				172	return E;
				173
				174	int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
				175	const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
				176	const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
				177
				178	// Check same base pointer. Be careful of subregisters, which can occur with
				179	// vectors of pointers.
				180	if (AddrReg0.getReg() == AddrReg1.getReg() &&
				181	AddrReg0.getSubReg() == AddrReg1.getSubReg()) {
				182	int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(),
				183	AMDGPU::OpName::offset);
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	184	unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
				185	unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	186
				187	// Check both offsets fit in the reduced range.
				188	if (offsetsCanBeCombined(Offset0, Offset1, EltSize))
				189	return MBBI;
				190	}
				191
				192	return E;
				193	}
				194
				195	void SILoadStoreOptimizer::updateRegDefsUses(unsigned SrcReg,
				196	unsigned DstReg,
				197	unsigned SubIdx) {
				198	for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg),
				199	E = MRI->reg_end(); I != E; ) {
				200	MachineOperand &O = *I;
				201	++I;
				202	O.substVirtReg(DstReg, SubIdx, *TRI);
				203	}
				204	}
				205
				206	MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
				207	MachineBasicBlock::iterator I,
				208	MachineBasicBlock::iterator Paired,
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	209	unsigned EltSize) {
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	210	MachineBasicBlock *MBB = I->getParent();
				211
				212	// Be careful, since the addresses could be subregisters themselves in weird
				213	// cases, like vectors of pointers.
				214	const MachineOperand AddrReg = TII->getNamedOperand(I, AMDGPU::OpName::addr);
Tom Stellard	a99ada5	2014-11-21 22:31:44 +0000	[diff] [blame]	215	const MachineOperand M0Reg = TII->getNamedOperand(I, AMDGPU::OpName::m0);
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	216
				217	unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
				218	unsigned DestReg1
				219	= TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst)->getReg();
				220
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	221	unsigned Offset0
				222	= TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	223	unsigned Offset1
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	224	= TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
				225
				226	unsigned NewOffset0 = Offset0 / EltSize;
				227	unsigned NewOffset1 = Offset1 / EltSize;
				228	unsigned Opc = (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;
				229
				230	// Prefer the st64 form if we can use it, even if we can fit the offset in the
				231	// non st64 version. I'm not sure if there's any real reason to do this.
				232	bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
				233	if (UseST64) {
				234	NewOffset0 /= 64;
				235	NewOffset1 /= 64;
				236	Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
				237	}
				238
				239	assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
				240	(NewOffset0 != NewOffset1) &&
				241	"Computed offset doesn't fit");
				242
				243	const MCInstrDesc &Read2Desc = TII->get(Opc);
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	244
				245	const TargetRegisterClass *SuperRC
				246	= (EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
				247	unsigned DestReg = MRI->createVirtualRegister(SuperRC);
				248
				249	DebugLoc DL = I->getDebugLoc();
				250	MachineInstrBuilder Read2
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	251	= BuildMI(*MBB, I, DL, Read2Desc, DestReg)
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	252	.addImm(0) // gds
				253	.addOperand(*AddrReg) // addr
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	254	.addImm(NewOffset0) // offset0
				255	.addImm(NewOffset1) // offset1
Tom Stellard	a99ada5	2014-11-21 22:31:44 +0000	[diff] [blame]	256	.addOperand(*M0Reg) // M0
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	257	.addMemOperand(*I->memoperands_begin())
				258	.addMemOperand(*Paired->memoperands_begin());
				259
				260	LIS->InsertMachineInstrInMaps(Read2);
				261
				262	unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
				263	unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
				264	updateRegDefsUses(DestReg0, DestReg, SubRegIdx0);
				265	updateRegDefsUses(DestReg1, DestReg, SubRegIdx1);
				266
				267	LIS->RemoveMachineInstrFromMaps(I);
				268	LIS->RemoveMachineInstrFromMaps(Paired);
				269	I->eraseFromParent();
				270	Paired->eraseFromParent();
				271
				272	LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
				273	LIS->shrinkToUses(&AddrRegLI);
				274
Tom Stellard	a99ada5	2014-11-21 22:31:44 +0000	[diff] [blame]	275	LiveInterval &M0RegLI = LIS->getInterval(M0Reg->getReg());
				276	LIS->shrinkToUses(&M0RegLI);
				277
Matt Arsenault	0d2832a	2014-12-03 05:22:29 +0000	[diff] [blame]	278	// Currently m0 is treated as a register class with one member instead of an
				279	// implicit physical register. We are using the virtual register for the first
				280	// one, but we still need to update the live range of the now unused second m0
				281	// virtual register to avoid verifier errors.
				282	const MachineOperand *PairedM0Reg
				283	= TII->getNamedOperand(*Paired, AMDGPU::OpName::m0);
				284	LiveInterval &PairedM0RegLI = LIS->getInterval(PairedM0Reg->getReg());
				285	LIS->shrinkToUses(&PairedM0RegLI);
				286
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	287	LIS->getInterval(DestReg); // Create new LI
				288
				289	DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
Reid Kleckner	da00cf5	2014-10-31 23:19:46 +0000	[diff] [blame]	290	return Read2.getInstr();
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	291	}
				292
				293	MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
				294	MachineBasicBlock::iterator I,
				295	MachineBasicBlock::iterator Paired,
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	296	unsigned EltSize) {
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	297	MachineBasicBlock *MBB = I->getParent();
				298
				299	// Be sure to use .addOperand(), and not .addReg() with these. We want to be
				300	// sure we preserve the subregister index and any register flags set on them.
				301	const MachineOperand Addr = TII->getNamedOperand(I, AMDGPU::OpName::addr);
Tom Stellard	a99ada5	2014-11-21 22:31:44 +0000	[diff] [blame]	302	const MachineOperand M0Reg = TII->getNamedOperand(I, AMDGPU::OpName::m0);
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	303	const MachineOperand Data0 = TII->getNamedOperand(I, AMDGPU::OpName::data0);
				304	const MachineOperand *Data1
				305	= TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
				306
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	307
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	308	unsigned Offset0
				309	= TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
				310	unsigned Offset1
				311	= TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
				312
				313	unsigned NewOffset0 = Offset0 / EltSize;
				314	unsigned NewOffset1 = Offset1 / EltSize;
				315	unsigned Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
				316
				317	// Prefer the st64 form if we can use it, even if we can fit the offset in the
				318	// non st64 version. I'm not sure if there's any real reason to do this.
				319	bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
				320	if (UseST64) {
				321	NewOffset0 /= 64;
				322	NewOffset1 /= 64;
				323	Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
				324	}
				325
				326	assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
				327	(NewOffset0 != NewOffset1) &&
				328	"Computed offset doesn't fit");
				329
				330	const MCInstrDesc &Write2Desc = TII->get(Opc);
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	331	DebugLoc DL = I->getDebugLoc();
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	332
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	333	MachineInstrBuilder Write2
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	334	= BuildMI(*MBB, I, DL, Write2Desc)
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	335	.addImm(0) // gds
				336	.addOperand(*Addr) // addr
				337	.addOperand(*Data0) // data0
				338	.addOperand(*Data1) // data1
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	339	.addImm(NewOffset0) // offset0
				340	.addImm(NewOffset1) // offset1
Tom Stellard	a99ada5	2014-11-21 22:31:44 +0000	[diff] [blame]	341	.addOperand(*M0Reg) // m0
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	342	.addMemOperand(*I->memoperands_begin())
				343	.addMemOperand(*Paired->memoperands_begin());
				344
				345	// XXX - How do we express subregisters here?
Tom Stellard	a99ada5	2014-11-21 22:31:44 +0000	[diff] [blame]	346	unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg(),
				347	M0Reg->getReg()};
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	348
				349	LIS->RemoveMachineInstrFromMaps(I);
				350	LIS->RemoveMachineInstrFromMaps(Paired);
				351	I->eraseFromParent();
				352	Paired->eraseFromParent();
				353
				354	LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);
				355
				356	DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
Reid Kleckner	da00cf5	2014-10-31 23:19:46 +0000	[diff] [blame]	357	return Write2.getInstr();
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	358	}
				359
				360	// Scan through looking for adjacent LDS operations with constant offsets from
				361	// the same base register. We rely on the scheduler to do the hard work of
				362	// clustering nearby loads, and assume these are all adjacent.
				363	bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	364	bool Modified = false;
				365
				366	for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
				367	MachineInstr &MI = *I;
				368
				369	// Don't combine if volatile.
				370	if (MI.hasOrderedMemoryRef()) {
				371	++I;
				372	continue;
				373	}
				374
				375	unsigned Opc = MI.getOpcode();
				376	if (Opc == AMDGPU::DS_READ_B32 \|\| Opc == AMDGPU::DS_READ_B64) {
				377	unsigned Size = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
				378	MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size);
				379	if (Match != E) {
				380	Modified = true;
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	381	I = mergeRead2Pair(I, Match, Size);
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	382	} else {
				383	++I;
				384	}
				385
				386	continue;
				387	} else if (Opc == AMDGPU::DS_WRITE_B32 \|\| Opc == AMDGPU::DS_WRITE_B64) {
				388	unsigned Size = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
				389	MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size);
				390	if (Match != E) {
				391	Modified = true;
Matt Arsenault	fe0a2e6	2014-10-10 22:12:32 +0000	[diff] [blame]	392	I = mergeWrite2Pair(I, Match, Size);
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	393	} else {
				394	++I;
				395	}
				396
				397	continue;
				398	}
				399
				400	++I;
				401	}
				402
				403	return Modified;
				404	}
				405
				406	bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
Eric Christopher	7792e32	2015-01-30 23:24:40 +0000	[diff] [blame]	407	const TargetSubtargetInfo &STM = MF.getSubtarget();
				408	TRI = static_cast<const SIRegisterInfo *>(STM.getRegisterInfo());
				409	TII = static_cast<const SIInstrInfo *>(STM.getInstrInfo());
Matt Arsenault	4103328	2014-10-10 22:01:59 +0000	[diff] [blame]	410	MRI = &MF.getRegInfo();
				411
				412	LIS = &getAnalysis<LiveIntervals>();
				413
				414	DEBUG(dbgs() << "Running SILoadStoreOptimizer\n");
				415
				416	assert(!MRI->isSSA());
				417
				418	bool Modified = false;
				419
				420	for (MachineBasicBlock &MBB : MF)
				421	Modified \|= optimizeBlock(MBB);
				422
				423	return Modified;
				424	}