AMDGPU: Match load d16 hi instructions

Also starts selecting global loads for constant address
in some cases. Some end up selecting to mubuf still, which
requires investigation.

We still get sub-optimal regalloc and extra waitcnts inserted
due to not really tracking the liveness of the separate register
halves.

llvm-svn: 313716
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 22619142..52f803a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -252,6 +252,11 @@
   return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
 }]>;
 
+class GlobalLoadAddress : CodePatPred<[{
+  auto AS = cast<MemSDNode>(N)->getAddressSpace();
+  return AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.CONSTANT_ADDRESS;
+}]>;
+
 class FlatLoadAddress : CodePatPred<[{
   const auto AS = cast<MemSDNode>(N)->getAddressSpace();
   return AS == AMDGPUASI.FLAT_ADDRESS ||
@@ -292,7 +297,7 @@
 class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
 class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
 
-class GlobalLoad <SDPatternOperator op> : LoadFrag<op>, GlobalAddress;
+class GlobalLoad <SDPatternOperator op> : LoadFrag<op>, GlobalLoadAddress;
 class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
 
 class FlatLoad <SDPatternOperator op> : LoadFrag <op>, FlatLoadAddress;