AMDGPU: Match load d16 hi instructions
Also starts selecting global loads for constant address
in some cases. Some end up selecting to mubuf still, which
requires investigation.
We still get sub-optimal regalloc and extra waitcnts inserted
due to not really tracking the liveness of the separate register
halves.
llvm-svn: 313716
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 22619142..52f803a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -252,6 +252,11 @@
return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
}]>;
+class GlobalLoadAddress : CodePatPred<[{
+ auto AS = cast<MemSDNode>(N)->getAddressSpace();
+ return AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS;
+}]>;
+
class FlatLoadAddress : CodePatPred<[{
const auto AS = cast<MemSDNode>(N)->getAddressSpace();
return AS == AMDGPUASI.FLAT_ADDRESS ||
@@ -292,7 +297,7 @@
class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
-class GlobalLoad <SDPatternOperator op> : LoadFrag<op>, GlobalAddress;
+class GlobalLoad <SDPatternOperator op> : LoadFrag<op>, GlobalLoadAddress;
class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
class FlatLoad <SDPatternOperator op> : LoadFrag <op>, FlatLoadAddress;