Blame - llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp - toolchain/llvm-project

blob: 11e4ba4b5010dc3655d1014dfd81596241488845 [file] [log] [blame]

Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	1	//===- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
Tom Stellard	8b1e021	2013-07-27 00:01:07 +0000	[diff] [blame]	2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// \file
				11	// This file implements a TargetTransformInfo analysis pass specific to the
				12	// AMDGPU target machine. It uses the target's detailed information to provide
				13	// more precise answers to certain TTI queries, while letting the target
				14	// independent and default TTI implementations handle the rest.
				15	//
				16	//===----------------------------------------------------------------------===//
				17
Chandler Carruth	93dcdc4	2015-01-31 11:17:59 +0000	[diff] [blame]	18	#include "AMDGPUTargetTransformInfo.h"
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	19	#include "AMDGPUSubtarget.h"
Alexander Timofeev	2e5eece	2018-03-05 15:12:21 +0000	[diff] [blame]	20	#include "Utils/AMDGPUBaseInfo.h"
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	21	#include "llvm/ADT/STLExtras.h"
Tom Stellard	8cce9bd	2014-01-23 18:49:28 +0000	[diff] [blame]	22	#include "llvm/Analysis/LoopInfo.h"
Tom Stellard	8b1e021	2013-07-27 00:01:07 +0000	[diff] [blame]	23	#include "llvm/Analysis/TargetTransformInfo.h"
Tom Stellard	8cce9bd	2014-01-23 18:49:28 +0000	[diff] [blame]	24	#include "llvm/Analysis/ValueTracking.h"
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	25	#include "llvm/CodeGen/ISDOpcodes.h"
Craig Topper	2fa1436	2018-03-29 17:21:10 +0000	[diff] [blame]	26	#include "llvm/CodeGen/ValueTypes.h"
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	27	#include "llvm/IR/Argument.h"
				28	#include "llvm/IR/Attributes.h"
				29	#include "llvm/IR/BasicBlock.h"
				30	#include "llvm/IR/CallingConv.h"
				31	#include "llvm/IR/DataLayout.h"
				32	#include "llvm/IR/DerivedTypes.h"
				33	#include "llvm/IR/Function.h"
				34	#include "llvm/IR/Instruction.h"
				35	#include "llvm/IR/Instructions.h"
				36	#include "llvm/IR/IntrinsicInst.h"
Chandler Carruth	6bda14b	2017-06-06 11:49:48 +0000	[diff] [blame]	37	#include "llvm/IR/Module.h"
Matt Arsenault	376f1bd	2017-08-31 05:47:00 +0000	[diff] [blame]	38	#include "llvm/IR/PatternMatch.h"
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	39	#include "llvm/IR/Type.h"
				40	#include "llvm/IR/Value.h"
				41	#include "llvm/MC/SubtargetFeature.h"
				42	#include "llvm/Support/Casting.h"
				43	#include "llvm/Support/CommandLine.h"
Tom Stellard	8b1e021	2013-07-27 00:01:07 +0000	[diff] [blame]	44	#include "llvm/Support/Debug.h"
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	45	#include "llvm/Support/ErrorHandling.h"
David Blaikie	13e77db	2018-03-23 23:58:25 +0000	[diff] [blame]	46	#include "llvm/Support/MachineValueType.h"
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	47	#include "llvm/Support/raw_ostream.h"
				48	#include "llvm/Target/TargetMachine.h"
				49	#include <algorithm>
				50	#include <cassert>
				51	#include <limits>
				52	#include <utility>
				53
Tom Stellard	8b1e021	2013-07-27 00:01:07 +0000	[diff] [blame]	54	using namespace llvm;
				55
Chandler Carruth	84e68b2	2014-04-22 02:41:26 +0000	[diff] [blame]	56	#define DEBUG_TYPE "AMDGPUtti"
				57
Stanislav Mekhanoshin	f29602d	2017-02-03 02:20:05 +0000	[diff] [blame]	58	static cl::opt<unsigned> UnrollThresholdPrivate(
				59	"amdgpu-unroll-threshold-private",
				60	cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
Stanislav Mekhanoshin	478b819	2017-04-07 16:26:28 +0000	[diff] [blame]	61	cl::init(2500), cl::Hidden);
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	62
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	63	static cl::opt<unsigned> UnrollThresholdLocal(
				64	"amdgpu-unroll-threshold-local",
				65	cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"),
				66	cl::init(1000), cl::Hidden);
				67
Stanislav Mekhanoshin	478b819	2017-04-07 16:26:28 +0000	[diff] [blame]	68	static cl::opt<unsigned> UnrollThresholdIf(
				69	"amdgpu-unroll-threshold-if",
				70	cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
				71	cl::init(150), cl::Hidden);
				72
				73	static bool dependsOnLocalPhi(const Loop L, const Value Cond,
				74	unsigned Depth = 0) {
				75	const Instruction *I = dyn_cast<Instruction>(Cond);
				76	if (!I)
				77	return false;
				78
				79	for (const Value *V : I->operand_values()) {
				80	if (!L->contains(I))
				81	continue;
				82	if (const PHINode *PHI = dyn_cast<PHINode>(V)) {
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	83	if (llvm::none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
Stanislav Mekhanoshin	478b819	2017-04-07 16:26:28 +0000	[diff] [blame]	84	return SubLoop->contains(PHI); }))
				85	return true;
				86	} else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1))
				87	return true;
				88	}
				89	return false;
				90	}
				91
Geoff Berry	66d9bdb	2017-06-28 15:53:17 +0000	[diff] [blame]	92	void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
Chandler Carruth	705b185	2015-01-31 03:43:40 +0000	[diff] [blame]	93	TTI::UnrollingPreferences &UP) {
Matt Arsenault	c824458	2014-07-25 23:02:42 +0000	[diff] [blame]	94	UP.Threshold = 300; // Twice the default.
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	95	UP.MaxCount = std::numeric_limits<unsigned>::max();
Matt Arsenault	c824458	2014-07-25 23:02:42 +0000	[diff] [blame]	96	UP.Partial = true;
				97
				98	// TODO: Do we want runtime unrolling?
				99
Stanislav Mekhanoshin	f29602d	2017-02-03 02:20:05 +0000	[diff] [blame]	100	// Maximum alloca size than can fit registers. Reserve 16 registers.
				101	const unsigned MaxAlloca = (256 - 16) * 4;
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	102	unsigned ThresholdPrivate = UnrollThresholdPrivate;
				103	unsigned ThresholdLocal = UnrollThresholdLocal;
				104	unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
Matt Arsenault	ac6e39c	2014-07-17 06:19:06 +0000	[diff] [blame]	105	for (const BasicBlock *BB : L->getBlocks()) {
Mehdi Amini	a28d91d	2015-03-10 02:37:25 +0000	[diff] [blame]	106	const DataLayout &DL = BB->getModule()->getDataLayout();
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	107	unsigned LocalGEPsSeen = 0;
				108
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	109	if (llvm::any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
Stanislav Mekhanoshin	478b819	2017-04-07 16:26:28 +0000	[diff] [blame]	110	return SubLoop->contains(BB); }))
				111	continue; // Block belongs to an inner loop.
				112
Matt Arsenault	ac6e39c	2014-07-17 06:19:06 +0000	[diff] [blame]	113	for (const Instruction &I : *BB) {
Stanislav Mekhanoshin	478b819	2017-04-07 16:26:28 +0000	[diff] [blame]	114	// Unroll a loop which contains an "if" statement whose condition
				115	// defined by a PHI belonging to the loop. This may help to eliminate
				116	// if region and potentially even PHI itself, saving on both divergence
				117	// and registers used for the PHI.
				118	// Add a small bonus for each of such "if" statements.
				119	if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
				120	if (UP.Threshold < MaxBoost && Br->isConditional()) {
				121	if (L->isLoopExiting(Br->getSuccessor(0)) \|\|
				122	L->isLoopExiting(Br->getSuccessor(1)))
				123	continue;
				124	if (dependsOnLocalPhi(L, Br->getCondition())) {
				125	UP.Threshold += UnrollThresholdIf;
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	126	LLVM_DEBUG(dbgs() << "Set unroll threshold " << UP.Threshold
				127	<< " for loop:\n"
				128	<< L << " due to " << Br << '\n');
Stanislav Mekhanoshin	478b819	2017-04-07 16:26:28 +0000	[diff] [blame]	129	if (UP.Threshold >= MaxBoost)
				130	return;
				131	}
				132	}
				133	continue;
				134	}
				135
Matt Arsenault	ac6e39c	2014-07-17 06:19:06 +0000	[diff] [blame]	136	const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	137	if (!GEP)
Tom Stellard	8cce9bd	2014-01-23 18:49:28 +0000	[diff] [blame]	138	continue;
Matt Arsenault	ac6e39c	2014-07-17 06:19:06 +0000	[diff] [blame]	139
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	140	unsigned AS = GEP->getAddressSpace();
				141	unsigned Threshold = 0;
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	142	if (AS == AMDGPUAS::PRIVATE_ADDRESS)
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	143	Threshold = ThresholdPrivate;
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	144	else if (AS == AMDGPUAS::LOCAL_ADDRESS)
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	145	Threshold = ThresholdLocal;
				146	else
				147	continue;
				148
				149	if (UP.Threshold >= Threshold)
				150	continue;
				151
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	152	if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	153	const Value *Ptr = GEP->getPointerOperand();
				154	const AllocaInst *Alloca =
				155	dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
				156	if (!Alloca \|\| !Alloca->isStaticAlloca())
				157	continue;
Stanislav Mekhanoshin	f29602d	2017-02-03 02:20:05 +0000	[diff] [blame]	158	Type *Ty = Alloca->getAllocatedType();
				159	unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
				160	if (AllocaSize > MaxAlloca)
				161	continue;
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	162	} else if (AS == AMDGPUAS::LOCAL_ADDRESS) {
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	163	LocalGEPsSeen++;
				164	// Inhibit unroll for local memory if we have seen addressing not to
				165	// a variable, most likely we will be unable to combine it.
				166	// Do not unroll too deep inner loops for local memory to give a chance
				167	// to unroll an outer loop for a more important reason.
				168	if (LocalGEPsSeen > 1 \|\| L->getLoopDepth() > 2 \|\|
				169	(!isa<GlobalVariable>(GEP->getPointerOperand()) &&
				170	!isa<Argument>(GEP->getPointerOperand())))
				171	continue;
				172	}
Stanislav Mekhanoshin	f29602d	2017-02-03 02:20:05 +0000	[diff] [blame]	173
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	174	// Check if GEP depends on a value defined by this loop itself.
				175	bool HasLoopDef = false;
				176	for (const Value *Op : GEP->operands()) {
				177	const Instruction *Inst = dyn_cast<Instruction>(Op);
				178	if (!Inst \|\| L->isLoopInvariant(Op))
Stanislav Mekhanoshin	f29602d	2017-02-03 02:20:05 +0000	[diff] [blame]	179	continue;
				180
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	181	if (llvm::any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	182	return SubLoop->contains(Inst); }))
				183	continue;
				184	HasLoopDef = true;
				185	break;
Tom Stellard	8cce9bd	2014-01-23 18:49:28 +0000	[diff] [blame]	186	}
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	187	if (!HasLoopDef)
				188	continue;
				189
				190	// We want to do whatever we can to limit the number of alloca
				191	// instructions that make it through to the code generator. allocas
				192	// require us to use indirect addressing, which is slow and prone to
				193	// compiler bugs. If this loop does an address calculation on an
				194	// alloca ptr, then we want to use a higher than normal loop unroll
				195	// threshold. This will give SROA a better chance to eliminate these
				196	// allocas.
				197	//
				198	// We also want to have more unrolling for local memory to let ds
				199	// instructions with different offsets combine.
				200	//
				201	// Don't use the maximum allowed value here as it will make some
				202	// programs way too big.
				203	UP.Threshold = Threshold;
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	204	LLVM_DEBUG(dbgs() << "Set unroll threshold " << Threshold
				205	<< " for loop:\n"
				206	<< L << " due to " << GEP << '\n');
Stanislav Mekhanoshin	478b819	2017-04-07 16:26:28 +0000	[diff] [blame]	207	if (UP.Threshold >= MaxBoost)
Stanislav Mekhanoshin	baf31ac	2017-03-28 22:13:51 +0000	[diff] [blame]	208	return;
Tom Stellard	8cce9bd	2014-01-23 18:49:28 +0000	[diff] [blame]	209	}
				210	}
				211	}
Matt Arsenault	3dd43fc	2014-07-18 06:07:13 +0000	[diff] [blame]	212
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	213	unsigned GCNTTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
Matt Arsenault	67cd347	2017-06-20 20:38:06 +0000	[diff] [blame]	214	// The concept of vector registers doesn't really exist. Some packed vector
				215	// operations operate on the normal 32-bit registers.
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	216	return 256;
Matt Arsenault	a93441f	2014-07-19 18:15:16 +0000	[diff] [blame]	217	}
				218
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	219	unsigned GCNTTIImpl::getNumberOfRegisters(bool Vec) const {
Matt Arsenault	67cd347	2017-06-20 20:38:06 +0000	[diff] [blame]	220	// This is really the number of registers to fill when vectorizing /
				221	// interleaving loops, so we lie to avoid trying to use all registers.
				222	return getHardwareNumberOfRegisters(Vec) >> 3;
				223	}
				224
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	225	unsigned GCNTTIImpl::getRegisterBitWidth(bool Vector) const {
Matt Arsenault	67cd347	2017-06-20 20:38:06 +0000	[diff] [blame]	226	return 32;
				227	}
				228
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	229	unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const {
Matt Arsenault	67cd347	2017-06-20 20:38:06 +0000	[diff] [blame]	230	return 32;
Matt Arsenault	4339b3f	2015-12-24 05:14:55 +0000	[diff] [blame]	231	}
Matt Arsenault	a93441f	2014-07-19 18:15:16 +0000	[diff] [blame]	232
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	233	unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
Farhana Aleen	8919664	2018-03-07 17:09:18 +0000	[diff] [blame]	234	unsigned ChainSizeInBytes,
				235	VectorType *VecTy) const {
				236	unsigned VecRegBitWidth = VF * LoadSize;
				237	if (VecRegBitWidth > 128 && VecTy->getScalarSizeInBits() < 32)
				238	// TODO: Support element-size less than 32bit?
				239	return 128 / LoadSize;
				240
				241	return VF;
				242	}
				243
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	244	unsigned GCNTTIImpl::getStoreVectorFactor(unsigned VF, unsigned StoreSize,
Farhana Aleen	8919664	2018-03-07 17:09:18 +0000	[diff] [blame]	245	unsigned ChainSizeInBytes,
				246	VectorType *VecTy) const {
				247	unsigned VecRegBitWidth = VF * StoreSize;
				248	if (VecRegBitWidth > 128)
				249	return 128 / StoreSize;
				250
				251	return VF;
				252	}
				253
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	254	unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	255	if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS \|\|
				256	AddrSpace == AMDGPUAS::CONSTANT_ADDRESS \|\|
				257	AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
Farhana Aleen	8919664	2018-03-07 17:09:18 +0000	[diff] [blame]	258	return 512;
				259	}
				260
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	261	if (AddrSpace == AMDGPUAS::FLAT_ADDRESS \|\|
				262	AddrSpace == AMDGPUAS::LOCAL_ADDRESS \|\|
				263	AddrSpace == AMDGPUAS::REGION_ADDRESS)
Farhana Aleen	eacb102	2018-05-28 18:15:11 +0000	[diff] [blame]	264	return 128;
Marek Olsak	a9a58fa	2018-04-10 22:48:23 +0000	[diff] [blame]	265
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	266	if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
Matt Arsenault	0994bd5	2016-07-01 00:56:27 +0000	[diff] [blame]	267	return 8 * ST->getMaxPrivateElementSize();
Yaxun Liu	1a14bfa	2017-03-27 14:04:01 +0000	[diff] [blame]	268
Yaxun Liu	1a14bfa	2017-03-27 14:04:01 +0000	[diff] [blame]	269	llvm_unreachable("unhandled address space");
Matt Arsenault	0994bd5	2016-07-01 00:56:27 +0000	[diff] [blame]	270	}
				271
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	272	bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
Matt Arsenault	f0a88db	2017-02-23 03:58:53 +0000	[diff] [blame]	273	unsigned Alignment,
				274	unsigned AddrSpace) const {
				275	// We allow vectorization of flat stores, even though we may need to decompose
				276	// them later if they may access private memory. We don't have enough context
				277	// here, and legalization can handle it.
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	278	if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
Matt Arsenault	f0a88db	2017-02-23 03:58:53 +0000	[diff] [blame]	279	return (Alignment >= 4 \|\| ST->hasUnalignedScratchAccess()) &&
				280	ChainSizeInBytes <= ST->getMaxPrivateElementSize();
				281	}
				282	return true;
				283	}
				284
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	285	bool GCNTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
Matt Arsenault	f0a88db	2017-02-23 03:58:53 +0000	[diff] [blame]	286	unsigned Alignment,
				287	unsigned AddrSpace) const {
				288	return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
				289	}
				290
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	291	bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
Matt Arsenault	f0a88db	2017-02-23 03:58:53 +0000	[diff] [blame]	292	unsigned Alignment,
				293	unsigned AddrSpace) const {
				294	return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
				295	}
				296
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	297	unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) {
Changpeng Fang	1be9b9f	2017-03-09 00:07:00 +0000	[diff] [blame]	298	// Disable unrolling if the loop is not vectorized.
Matt Arsenault	67cd347	2017-06-20 20:38:06 +0000	[diff] [blame]	299	// TODO: Enable this again.
Changpeng Fang	1be9b9f	2017-03-09 00:07:00 +0000	[diff] [blame]	300	if (VF == 1)
				301	return 1;
				302
Matt Arsenault	67cd347	2017-06-20 20:38:06 +0000	[diff] [blame]	303	return 8;
Matt Arsenault	a93441f	2014-07-19 18:15:16 +0000	[diff] [blame]	304	}
Matt Arsenault	e830f54	2015-12-01 19:08:39 +0000	[diff] [blame]	305
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	306	bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
Matt Arsenault	3e268cc	2017-12-11 21:38:43 +0000	[diff] [blame]	307	MemIntrinsicInfo &Info) const {
				308	switch (Inst->getIntrinsicID()) {
				309	case Intrinsic::amdgcn_atomic_inc:
Daniil Fukalov	6e1dc68	2018-01-26 11:09:38 +0000	[diff] [blame]	310	case Intrinsic::amdgcn_atomic_dec:
				311	case Intrinsic::amdgcn_ds_fadd:
				312	case Intrinsic::amdgcn_ds_fmin:
				313	case Intrinsic::amdgcn_ds_fmax: {
Matt Arsenault	3e268cc	2017-12-11 21:38:43 +0000	[diff] [blame]	314	auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2));
				315	auto *Volatile = dyn_cast<ConstantInt>(Inst->getArgOperand(4));
				316	if (!Ordering \|\| !Volatile)
				317	return false; // Invalid.
				318
				319	unsigned OrderingVal = Ordering->getZExtValue();
				320	if (OrderingVal > static_cast<unsigned>(AtomicOrdering::SequentiallyConsistent))
				321	return false;
				322
				323	Info.PtrVal = Inst->getArgOperand(0);
				324	Info.Ordering = static_cast<AtomicOrdering>(OrderingVal);
				325	Info.ReadMem = true;
				326	Info.WriteMem = true;
				327	Info.IsVolatile = !Volatile->isNullValue();
				328	return true;
				329	}
				330	default:
				331	return false;
				332	}
				333	}
				334
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	335	int GCNTTIImpl::getArithmeticInstrCost(
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	336	unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
				337	TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
Mohammed Agabaria	2c96c43	2017-01-11 08:23:37 +0000	[diff] [blame]	338	TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args ) {
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	339	EVT OrigTy = TLI->getValueType(DL, Ty);
				340	if (!OrigTy.isSimple()) {
				341	return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
				342	Opd1PropInfo, Opd2PropInfo);
				343	}
				344
				345	// Legalize the type.
				346	std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
				347	int ISD = TLI->InstructionOpcodeToISD(Opcode);
				348
				349	// Because we don't have any legal vector operations, but the legal types, we
				350	// need to account for split vectors.
				351	unsigned NElts = LT.second.isVector() ?
				352	LT.second.getVectorNumElements() : 1;
				353
				354	MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
				355
				356	switch (ISD) {
Matt Arsenault	8c8fcb2	2016-03-25 01:16:40 +0000	[diff] [blame]	357	case ISD::SHL:
				358	case ISD::SRL:
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	359	case ISD::SRA:
Matt Arsenault	8c8fcb2	2016-03-25 01:16:40 +0000	[diff] [blame]	360	if (SLT == MVT::i64)
				361	return get64BitInstrCost() * LT.first * NElts;
				362
				363	// i32
				364	return getFullRateInstrCost() * LT.first * NElts;
Matt Arsenault	8c8fcb2	2016-03-25 01:16:40 +0000	[diff] [blame]	365	case ISD::ADD:
				366	case ISD::SUB:
				367	case ISD::AND:
				368	case ISD::OR:
Eugene Zelenko	d16eff8	2017-08-08 23:53:55 +0000	[diff] [blame]	369	case ISD::XOR:
Matt Arsenault	8c8fcb2	2016-03-25 01:16:40 +0000	[diff] [blame]	370	if (SLT == MVT::i64){
				371	// and, or and xor are typically split into 2 VALU instructions.
				372	return 2 * getFullRateInstrCost() * LT.first * NElts;
				373	}
				374
				375	return LT.first * NElts * getFullRateInstrCost();
Matt Arsenault	8c8fcb2	2016-03-25 01:16:40 +0000	[diff] [blame]	376	case ISD::MUL: {
				377	const int QuarterRateCost = getQuarterRateInstrCost();
				378	if (SLT == MVT::i64) {
				379	const int FullRateCost = getFullRateInstrCost();
				380	return (4 * QuarterRateCost + (2 * 2) * FullRateCost) * LT.first * NElts;
				381	}
				382
				383	// i32
				384	return QuarterRateCost * NElts * LT.first;
				385	}
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	386	case ISD::FADD:
				387	case ISD::FSUB:
				388	case ISD::FMUL:
				389	if (SLT == MVT::f64)
				390	return LT.first * NElts * get64BitInstrCost();
				391
				392	if (SLT == MVT::f32 \|\| SLT == MVT::f16)
				393	return LT.first * NElts * getFullRateInstrCost();
				394	break;
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	395	case ISD::FDIV:
				396	case ISD::FREM:
				397	// FIXME: frem should be handled separately. The fdiv in it is most of it,
				398	// but the current lowering is also not entirely correct.
				399	if (SLT == MVT::f64) {
				400	int Cost = 4 * get64BitInstrCost() + 7 * getQuarterRateInstrCost();
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	401	// Add cost of workaround.
				402	if (ST->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS)
				403	Cost += 3 * getFullRateInstrCost();
				404
				405	return LT.first * Cost * NElts;
				406	}
				407
Matt Arsenault	376f1bd	2017-08-31 05:47:00 +0000	[diff] [blame]	408	if (!Args.empty() && match(Args[0], PatternMatch::m_FPOne())) {
				409	// TODO: This is more complicated, unsafe flags etc.
				410	if ((SLT == MVT::f32 && !ST->hasFP32Denormals()) \|\|
				411	(SLT == MVT::f16 && ST->has16BitInsts())) {
				412	return LT.first * getQuarterRateInstrCost() * NElts;
				413	}
				414	}
				415
				416	if (SLT == MVT::f16 && ST->has16BitInsts()) {
				417	// 2 x v_cvt_f32_f16
				418	// f32 rcp
				419	// f32 fmul
				420	// v_cvt_f16_f32
				421	// f16 div_fixup
				422	int Cost = 4 * getFullRateInstrCost() + 2 * getQuarterRateInstrCost();
				423	return LT.first * Cost * NElts;
				424	}
				425
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	426	if (SLT == MVT::f32 \|\| SLT == MVT::f16) {
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	427	int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost();
Matt Arsenault	376f1bd	2017-08-31 05:47:00 +0000	[diff] [blame]	428
				429	if (!ST->hasFP32Denormals()) {
				430	// FP mode switches.
				431	Cost += 2 * getFullRateInstrCost();
				432	}
				433
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	434	return LT.first * NElts * Cost;
				435	}
Matt Arsenault	9651813	2016-03-25 01:00:32 +0000	[diff] [blame]	436	break;
				437	default:
				438	break;
				439	}
				440
				441	return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
				442	Opd1PropInfo, Opd2PropInfo);
				443	}
				444
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	445	unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) {
Matt Arsenault	e05ff15	2015-12-16 18:37:19 +0000	[diff] [blame]	446	// XXX - For some reason this isn't called for switch.
				447	switch (Opcode) {
				448	case Instruction::Br:
				449	case Instruction::Ret:
				450	return 10;
				451	default:
				452	return BaseT::getCFInstrCost(Opcode);
				453	}
				454	}
				455
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	456	int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
Farhana Aleen	e2dfe8a	2018-05-01 21:41:12 +0000	[diff] [blame]	457	bool IsPairwise) {
				458	EVT OrigTy = TLI->getValueType(DL, Ty);
				459
				460	// Computes cost on targets that have packed math instructions(which support
				461	// 16-bit types only).
				462	if (IsPairwise \|\|
				463	!ST->hasVOP3PInsts() \|\|
				464	OrigTy.getScalarSizeInBits() != 16)
				465	return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise);
				466
				467	std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
				468	return LT.first * getFullRateInstrCost();
				469	}
				470
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	471	int GCNTTIImpl::getMinMaxReductionCost(Type Ty, Type CondTy,
Farhana Aleen	e24f3ff	2018-05-09 21:18:34 +0000	[diff] [blame]	472	bool IsPairwise,
				473	bool IsUnsigned) {
				474	EVT OrigTy = TLI->getValueType(DL, Ty);
				475
				476	// Computes cost on targets that have packed math instructions(which support
				477	// 16-bit types only).
				478	if (IsPairwise \|\|
				479	!ST->hasVOP3PInsts() \|\|
				480	OrigTy.getScalarSizeInBits() != 16)
				481	return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned);
				482
				483	std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
				484	return LT.first * getHalfRateInstrCost();
				485	}
				486
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	487	int GCNTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
Matt Arsenault	e830f54	2015-12-01 19:08:39 +0000	[diff] [blame]	488	unsigned Index) {
				489	switch (Opcode) {
				490	case Instruction::ExtractElement:
Matt Arsenault	3c5e423	2017-05-10 21:29:33 +0000	[diff] [blame]	491	case Instruction::InsertElement: {
				492	unsigned EltSize
				493	= DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
				494	if (EltSize < 32) {
				495	if (EltSize == 16 && Index == 0 && ST->has16BitInsts())
				496	return 0;
				497	return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
				498	}
				499
Matt Arsenault	59767ce	2016-03-25 00:14:11 +0000	[diff] [blame]	500	// Extracts are just reads of a subregister, so are free. Inserts are
				501	// considered free because we don't want to have any cost for scalarizing
				502	// operations, and we don't have to copy into a different register class.
				503
Matt Arsenault	e830f54	2015-12-01 19:08:39 +0000	[diff] [blame]	504	// Dynamic indexing isn't free and is best avoided.
				505	return Index == ~0u ? 2 : 0;
Matt Arsenault	3c5e423	2017-05-10 21:29:33 +0000	[diff] [blame]	506	}
Matt Arsenault	e830f54	2015-12-01 19:08:39 +0000	[diff] [blame]	507	default:
				508	return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
				509	}
				510	}
Tom Stellard	dbe374b	2015-12-15 18:04:38 +0000	[diff] [blame]	511
Alexander Timofeev	2e5eece	2018-03-05 15:12:21 +0000	[diff] [blame]	512
Tom Stellard	dbe374b	2015-12-15 18:04:38 +0000	[diff] [blame]	513
				514	static bool isArgPassedInSGPR(const Argument *A) {
				515	const Function *F = A->getParent();
Tom Stellard	dbe374b	2015-12-15 18:04:38 +0000	[diff] [blame]	516
				517	// Arguments to compute shaders are never a source of divergence.
Matt Arsenault	4c1ecde	2017-04-19 17:42:34 +0000	[diff] [blame]	518	CallingConv::ID CC = F->getCallingConv();
				519	switch (CC) {
				520	case CallingConv::AMDGPU_KERNEL:
				521	case CallingConv::SPIR_KERNEL:
Tom Stellard	dbe374b	2015-12-15 18:04:38 +0000	[diff] [blame]	522	return true;
Matt Arsenault	4c1ecde	2017-04-19 17:42:34 +0000	[diff] [blame]	523	case CallingConv::AMDGPU_VS:
Tim Renouf	ef1ae8f	2017-09-29 09:51:22 +0000	[diff] [blame]	524	case CallingConv::AMDGPU_LS:
Marek Olsak	a302a736	2017-05-02 15:41:10 +0000	[diff] [blame]	525	case CallingConv::AMDGPU_HS:
Tim Renouf	ef1ae8f	2017-09-29 09:51:22 +0000	[diff] [blame]	526	case CallingConv::AMDGPU_ES:
Matt Arsenault	4c1ecde	2017-04-19 17:42:34 +0000	[diff] [blame]	527	case CallingConv::AMDGPU_GS:
				528	case CallingConv::AMDGPU_PS:
				529	case CallingConv::AMDGPU_CS:
				530	// For non-compute shaders, SGPR inputs are marked with either inreg or byval.
				531	// Everything else is in VGPRs.
				532	return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) \|\|
				533	F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
				534	default:
				535	// TODO: Should calls support inreg for SGPR inputs?
				536	return false;
				537	}
Tom Stellard	dbe374b	2015-12-15 18:04:38 +0000	[diff] [blame]	538	}
				539
Tom Stellard	dbe374b	2015-12-15 18:04:38 +0000	[diff] [blame]	540	/// \returns true if the result of the value could potentially be
				541	/// different across workitems in a wavefront.
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	542	bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
Tom Stellard	dbe374b	2015-12-15 18:04:38 +0000	[diff] [blame]	543	if (const Argument *A = dyn_cast<Argument>(V))
				544	return !isArgPassedInSGPR(A);
				545
Scott Linder	72855e3	2018-08-21 21:24:31 +0000	[diff] [blame]	546	// Loads from the private and flat address spaces are divergent, because
				547	// threads can execute the load instruction with the same inputs and get
				548	// different results.
Tom Stellard	dbe374b	2015-12-15 18:04:38 +0000	[diff] [blame]	549	//
				550	// All other loads are not divergent, because if threads issue loads with the
				551	// same arguments, they will always get the same result.
				552	if (const LoadInst *Load = dyn_cast<LoadInst>(V))
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	553	return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS \|\|
				554	Load->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
Tom Stellard	dbe374b	2015-12-15 18:04:38 +0000	[diff] [blame]	555
Nicolai Haehnle	79cad85	2016-03-17 16:21:59 +0000	[diff] [blame]	556	// Atomics are divergent because they are executed sequentially: when an
				557	// atomic operation refers to the same address in each thread, then each
				558	// thread after the first sees the value written by the previous thread as
				559	// original value.
				560	if (isa<AtomicRMWInst>(V) \|\| isa<AtomicCmpXchgInst>(V))
				561	return true;
				562
Matt Arsenault	d2c8a33	2017-02-16 02:01:13 +0000	[diff] [blame]	563	if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V))
Alexander Timofeev	2e5eece	2018-03-05 15:12:21 +0000	[diff] [blame]	564	return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID());
Tom Stellard	dbe374b	2015-12-15 18:04:38 +0000	[diff] [blame]	565
				566	// Assume all function calls are a source of divergence.
				567	if (isa<CallInst>(V) \|\| isa<InvokeInst>(V))
				568	return true;
				569
				570	return false;
				571	}
Matt Arsenault	3c5e423	2017-05-10 21:29:33 +0000	[diff] [blame]	572
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	573	bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
Alexander Timofeev	0f9c84c	2017-06-15 19:33:10 +0000	[diff] [blame]	574	if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
				575	switch (Intrinsic->getIntrinsicID()) {
				576	default:
				577	return false;
				578	case Intrinsic::amdgcn_readfirstlane:
				579	case Intrinsic::amdgcn_readlane:
				580	return true;
				581	}
				582	}
				583	return false;
				584	}
				585
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	586	unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Matt Arsenault	3c5e423	2017-05-10 21:29:33 +0000	[diff] [blame]	587	Type *SubTp) {
				588	if (ST->hasVOP3PInsts()) {
				589	VectorType *VT = cast<VectorType>(Tp);
				590	if (VT->getNumElements() == 2 &&
				591	DL.getTypeSizeInBits(VT->getElementType()) == 16) {
				592	// With op_sel VOP3P instructions freely can access the low half or high
				593	// half of a register, so any swizzle is free.
				594
				595	switch (Kind) {
				596	case TTI::SK_Broadcast:
				597	case TTI::SK_Reverse:
				598	case TTI::SK_PermuteSingleSrc:
				599	return 0;
				600	default:
				601	break;
				602	}
				603	}
				604	}
				605
				606	return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
				607	}
Matt Arsenault	aac47c1	2017-08-07 17:08:44 +0000	[diff] [blame]	608
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	609	bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
Matt Arsenault	aac47c1	2017-08-07 17:08:44 +0000	[diff] [blame]	610	const Function *Callee) const {
				611	const TargetMachine &TM = getTLI()->getTargetMachine();
				612	const FeatureBitset &CallerBits =
				613	TM.getSubtargetImpl(*Caller)->getFeatureBits();
				614	const FeatureBitset &CalleeBits =
				615	TM.getSubtargetImpl(*Callee)->getFeatureBits();
				616
				617	FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
				618	FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
				619	return ((RealCallerBits & RealCalleeBits) == RealCalleeBits);
				620	}
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	621
				622	void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
				623	TTI::UnrollingPreferences &UP) {
				624	CommonTTI.getUnrollingPreferences(L, SE, UP);
				625	}
				626
				627	unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
				628	return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
				629	}
				630
				631	unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
				632	return getHardwareNumberOfRegisters(Vec);
				633	}
				634
				635	unsigned R600TTIImpl::getRegisterBitWidth(bool Vector) const {
				636	return 32;
				637	}
				638
				639	unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const {
				640	return 32;
				641	}
				642
				643	unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	644	if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS \|\|
				645	AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	646	return 128;
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	647	if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS \|\|
				648	AddrSpace == AMDGPUAS::REGION_ADDRESS)
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	649	return 64;
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	650	if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	651	return 32;
				652
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	653	if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS \|\|
				654	AddrSpace == AMDGPUAS::PARAM_I_ADDRESS \|\|
				655	(AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
				656	AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	657	return 128;
				658	llvm_unreachable("unhandled address space");
				659	}
				660
				661	bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
				662	unsigned Alignment,
				663	unsigned AddrSpace) const {
				664	// We allow vectorization of flat stores, even though we may need to decompose
				665	// them later if they may access private memory. We don't have enough context
				666	// here, and legalization can handle it.
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame^]	667	return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
Tom Stellard	c762431	2018-05-30 22:55:35 +0000	[diff] [blame]	668	}
				669
				670	bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
				671	unsigned Alignment,
				672	unsigned AddrSpace) const {
				673	return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
				674	}
				675
				676	bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
				677	unsigned Alignment,
				678	unsigned AddrSpace) const {
				679	return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
				680	}
				681
				682	unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
				683	// Disable unrolling if the loop is not vectorized.
				684	// TODO: Enable this again.
				685	if (VF == 1)
				686	return 1;
				687
				688	return 8;
				689	}
				690
				691	unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode) {
				692	// XXX - For some reason this isn't called for switch.
				693	switch (Opcode) {
				694	case Instruction::Br:
				695	case Instruction::Ret:
				696	return 10;
				697	default:
				698	return BaseT::getCFInstrCost(Opcode);
				699	}
				700	}
				701
				702	int R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
				703	unsigned Index) {
				704	switch (Opcode) {
				705	case Instruction::ExtractElement:
				706	case Instruction::InsertElement: {
				707	unsigned EltSize
				708	= DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
				709	if (EltSize < 32) {
				710	return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
				711	}
				712
				713	// Extracts are just reads of a subregister, so are free. Inserts are
				714	// considered free because we don't want to have any cost for scalarizing
				715	// operations, and we don't have to copy into a different register class.
				716
				717	// Dynamic indexing isn't free and is best avoided.
				718	return Index == ~0u ? 2 : 0;
				719	}
				720	default:
				721	return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
				722	}
				723	}
				724
				725	void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
				726	TTI::UnrollingPreferences &UP) {
				727	CommonTTI.getUnrollingPreferences(L, SE, UP);
				728	}