Reapply "AMDGPU: Add 32-bit constant address space"
This reverts r324494 and reapplies r324487.
llvm-svn: 324747
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ab88376..0bda70d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -900,7 +900,8 @@
if (AS == AMDGPUASI.GLOBAL_ADDRESS)
return isLegalGlobalAddressingMode(AM);
- if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
+ if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
+ AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
// If the offset isn't a multiple of 4, it probably isn't going to be
// correctly aligned.
// FIXME: Can we get the real alignment here?
@@ -1023,7 +1024,8 @@
// If we have an uniform constant load, it still requires using a slow
// buffer instruction if unaligned.
if (IsFast) {
- *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS) ?
+ *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS ||
+ AddrSpace == AMDGPUASI.CONSTANT_ADDRESS_32BIT) ?
(Align % 4 == 0) : true;
}
@@ -1066,7 +1068,8 @@
static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) {
return AS == AMDGPUASI.GLOBAL_ADDRESS ||
AS == AMDGPUASI.FLAT_ADDRESS ||
- AS == AMDGPUASI.CONSTANT_ADDRESS;
+ AS == AMDGPUASI.CONSTANT_ADDRESS ||
+ AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
}
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
@@ -4008,13 +4011,15 @@
bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
const Triple &TT = getTargetMachine().getTargetTriple();
- return GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
+ return (GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
+ GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
AMDGPU::shouldEmitConstantsToTextSection(TT);
}
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
+ GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
+ GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
!shouldEmitFixup(GV) &&
!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
}
@@ -4391,7 +4396,8 @@
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// We can fold offsets for anything that doesn't require a GOT relocation.
return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
- GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
+ GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
+ GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
!shouldEmitGOTReloc(GA->getGlobal());
}
@@ -4444,6 +4450,7 @@
const GlobalValue *GV = GSD->getGlobal();
if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
+ GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS_32BIT &&
GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS &&
// FIXME: It isn't correct to rely on the type of the pointer. This should
// be removed when address space 0 is 64-bit.
@@ -5378,7 +5385,8 @@
AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
unsigned NumElements = MemVT.getVectorNumElements();
- if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
+ if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
+ AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
if (isMemOpUniform(Load))
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
@@ -5386,7 +5394,9 @@
// loads.
//
}
- if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) {
+ if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
+ AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
+ AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load))
return SDValue();
@@ -5395,7 +5405,9 @@
// loads.
//
}
- if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS ||
+ if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
+ AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
+ AS == AMDGPUASI.GLOBAL_ADDRESS ||
AS == AMDGPUASI.FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorLoad(Op, DAG);