AMDGPU: Fix tests using old number for constant address space

llvm-svn: 341770
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index b097484..e68b179 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -186,22 +186,22 @@
 
 ; HSA: define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #8 {
 define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
-  %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(2)*
-  store volatile i32 0, i32 addrspace(2)* %stof
+  %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
+  store volatile i32 0, i32 addrspace(4)* %stof
   ret void
 }
 
 ; HSA: define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #12 {
 define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
-  %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(2)*
-  store volatile i32 0, i32 addrspace(2)* %stof
+  %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
+  store volatile i32 0, i32 addrspace(4)* %stof
   ret void
 }
 
 ; HSA: define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #13 {
 define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 {
-  %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(2)*
-  store volatile i32 0, i32 addrspace(2)* %stof
+  %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
+  store volatile i32 0, i32 addrspace(4)* %stof
   call void @func_indirect_use_queue_ptr()
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
index b98b48d..4e3b326 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
@@ -75,8 +75,8 @@
 ;  %x.1 = call i32 @llvm.amdgcn.workgroup.id.y()
 ;  %x.2 = call i32 @llvm.amdgcn.workgroup.id.z()
 ;  %x.3 = call i64 @llvm.amdgcn.dispatch.id()
-;  %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
-;  %x.5 = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
+;  %x.4 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+;  %x.5 = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
 ;  store volatile i32 0, i32* undef
 ;  br label %stores
 ;
@@ -85,8 +85,8 @@
 ;  store volatile i32 %x.0, i32 addrspace(1)* undef
 ;  store volatile i32 %x.0, i32 addrspace(1)* undef
 ;  store volatile i64 %x.3, i64 addrspace(1)* undef
-;  store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef
-;  store volatile i8 addrspace(2)* %x.5, i8 addrspace(2)* addrspace(1)* undef
+;  store volatile i8 addrspace(4)* %x.4, i8 addrspace(4)* addrspace(1)* undef
+;  store volatile i8 addrspace(4)* %x.5, i8 addrspace(4)* addrspace(1)* undef
 ;
 ;  store i32 %one, i32 addrspace(1)* %out1
 ;  store i32 %two, i32 addrspace(1)* %out2
@@ -121,8 +121,8 @@
 ;  store volatile i32 %x.0, i32 addrspace(1)* undef
 ;  %x.3 = call i64 @llvm.amdgcn.dispatch.id()
 ;  store volatile i64 %x.3, i64 addrspace(1)* undef
-;  %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
-;  store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef
+;  %x.4 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+;  store volatile i8 addrspace(4)* %x.4, i8 addrspace(4)* addrspace(1)* undef
 ;
 ;  store i32 %one, i32 addrspace(1)* %out1
 ;  store i32 %two, i32 addrspace(1)* %out2
@@ -135,8 +135,8 @@
 declare i32 @llvm.amdgcn.workgroup.id.y() #1
 declare i32 @llvm.amdgcn.workgroup.id.z() #1
 declare i64 @llvm.amdgcn.dispatch.id() #1
-declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
-declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #1
+declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
+declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #1
 
 attributes #0 = { nounwind "amdgpu-num-sgpr"="14" }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir b/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
index cf6b105..243358f 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
+++ b/llvm/test/CodeGen/AMDGPU/clamp-omod-special-case.mir
@@ -43,8 +43,8 @@
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %25 = REG_SEQUENCE %3, 1, %24, 2
     %10 = S_MOV_B32 61440
@@ -105,8 +105,8 @@
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %25 = REG_SEQUENCE %3, 1, %24, 2
     %10 = S_MOV_B32 61440
@@ -168,8 +168,8 @@
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %25 = REG_SEQUENCE %3, 1, %24, 2
     %10 = S_MOV_B32 61440
@@ -233,8 +233,8 @@
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %25 = REG_SEQUENCE %3, 1, %24, 2
     %10 = S_MOV_B32 61440
@@ -310,8 +310,8 @@
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %25 = REG_SEQUENCE %3, 1, %24, 2
     %10 = S_MOV_B32 61440
@@ -375,8 +375,8 @@
 
     %3 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
-    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     %25 = REG_SEQUENCE %3, 1, %24, 2
     %10 = S_MOV_B32 61440
diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
index 82a15b3..84069ec 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
@@ -316,7 +316,7 @@
 
     %2 = COPY $vgpr0
     %0 = COPY $sgpr0_sgpr1
-    %3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %15 = V_ASHRREV_I32_e64 31, %2, implicit $exec
     %16 = REG_SEQUENCE %2, 1, %15, 2
     %17 = V_LSHLREV_B64 2, killed %16, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
index ad9111a..6867afe 100644
--- a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
@@ -97,7 +97,7 @@
 
 ; GCN-LABEL: {{^}}kill_vcc_implicit_def:
 ; GCN: IeeeMode: 0
-define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
+define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(4)* byval, [17 x <16 x i8>] addrspace(4)* byval, [17 x <4 x i32>] addrspace(4)* byval, [34 x <8 x i32>] addrspace(4)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
 entry:
   %tmp0 = fcmp olt float %13, 0.0
   call void @llvm.AMDGPU.kill(float %14)
diff --git a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
index c629798..617427a 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
+++ b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
@@ -4,15 +4,15 @@
 # GCN:      FLAT_LOAD_DWORD
 # GCN-NEXT: FLAT_LOAD_DWORD
 --- |
-  define amdgpu_kernel void @flat_load_clustering(i32 addrspace(1)* nocapture %arg, i32 addrspace(2)* nocapture readonly %arg1) {
+  define amdgpu_kernel void @flat_load_clustering(i32 addrspace(1)* nocapture %arg, i32 addrspace(4)* nocapture readonly %arg1) {
   bb:
     %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
     %idxprom = sext i32 %tid to i64
-    %gep1 = getelementptr inbounds i32, i32 addrspace(2)* %arg1, i64 %idxprom
-    %load1 = load i32, i32 addrspace(2)* %gep1, align 4
+    %gep1 = getelementptr inbounds i32, i32 addrspace(4)* %arg1, i64 %idxprom
+    %load1 = load i32, i32 addrspace(4)* %gep1, align 4
     %gep2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %idxprom
-    %gep34 = getelementptr inbounds i32, i32 addrspace(2)* %gep1, i64 4
-    %load2 = load i32, i32 addrspace(2)* %gep34, align 4
+    %gep34 = getelementptr inbounds i32, i32 addrspace(4)* %gep1, i64 4
+    %load2 = load i32, i32 addrspace(4)* %gep34, align 4
     %gep4 = getelementptr inbounds i32, i32 addrspace(1)* %gep2, i64 4
     store i32 %load1, i32 addrspace(1)* %gep2, align 4
     store i32 %load2, i32 addrspace(1)* %gep4, align 4
@@ -54,8 +54,8 @@
 
     %1 = COPY $sgpr4_sgpr5
     %0 = COPY $vgpr0
-    %3 = S_LOAD_DWORDX2_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
-    %4 = S_LOAD_DWORDX2_IMM %1, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %3 = S_LOAD_DWORDX2_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %1, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %7 = V_LSHLREV_B32_e32 2, %0, implicit $exec
     %2 = V_MOV_B32_e32 0, implicit $exec
     undef %12.sub0 = V_ADD_I32_e32 %4.sub0, %7, implicit-def $vcc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-globals.ll b/llvm/test/CodeGen/AMDGPU/hsa-globals.ll
index 2ec57a40..09c4b5f 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-globals.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-globals.ll
@@ -6,8 +6,8 @@
 @common_global_program = common addrspace(1) global i32 0
 @external_global_program = addrspace(1) global i32 0
 
-@internal_readonly = internal unnamed_addr addrspace(2) constant i32 0
-@external_readonly = unnamed_addr addrspace(2) constant i32 0
+@internal_readonly = internal unnamed_addr addrspace(4) constant i32 0
+@external_readonly = unnamed_addr addrspace(4) constant i32 0
 
 define amdgpu_kernel void @test() {
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index 631a847..d846186 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -549,7 +549,7 @@
     $flat_scr_lo = S_ADD_U32 $sgpr6, $sgpr9, implicit-def $scc
     $flat_scr_hi = S_ADDC_U32 $sgpr7, 0, implicit-def $scc, implicit $scc
     DBG_VALUE $noreg, 2, !5, !11, debug-location !12
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     dead $sgpr6_sgpr7 = KILL $sgpr4_sgpr5
     $sgpr8 = S_MOV_B32 $sgpr5
     $vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir b/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
index 3429726..b88b50a 100644
--- a/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
+++ b/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir
@@ -55,7 +55,7 @@
   bb.0.entry:
     liveins: $sgpr0_sgpr1
 
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
     S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
diff --git a/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll b/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll
index b762a91..4b6d1d6 100644
--- a/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/large-constant-initializer.ll
@@ -2,10 +2,10 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga < %s
 ; CHECK: s_endpgm
 
-@gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4
+@gv = external unnamed_addr addrspace(4) constant [239 x i32], align 4
 
 define amdgpu_kernel void @opencv_cvtfloat_crash(i32 addrspace(1)* %out, i32 %x) nounwind {
-  %val = load i32, i32 addrspace(2)* getelementptr ([239 x i32], [239 x i32] addrspace(2)* @gv, i64 0, i64 239), align 4
+  %val = load i32, i32 addrspace(4)* getelementptr ([239 x i32], [239 x i32] addrspace(4)* @gv, i64 0, i64 239), align 4
   %mul12 = mul nsw i32 %val, 7
   br i1 undef, label %exit, label %bb
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll
index 595f632..6374c9e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll
@@ -20,7 +20,7 @@
 ; SI: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}}
 ; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}}
 ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]]
-define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) {
+define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(4)* byval %arg, [17 x <16 x i8>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) {
 entry:
   %tmp0 = fcmp olt float %arg13, 0.000000e+00
   call void @llvm.AMDGPU.kill(float %arg14)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
index 8f9eede..16a463d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
@@ -10,7 +10,7 @@
 ; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
 ; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}}
 ; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p0, attr0.x{{$}}
-define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x float> %arg4) #0 {
+define amdgpu_ps void @v_interp(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x float> %arg4) #0 {
 main_body:
   %i = extractelement <2 x float> %arg4, i32 0
   %j = extractelement <2 x float> %arg4, i32 1
@@ -185,7 +185,7 @@
 
 ; GCN-LABEL: {{^}}v_interp_p1_bank16_bug:
 ; 16BANK-NOT: v_interp_p1_f32{{(_e32)*}} [[DST:v[0-9]+]], [[DST]]
-define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg13, [17 x <4 x i32>] addrspace(2)* byval %arg14, [34 x <8 x i32>] addrspace(2)* byval %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) #0 {
+define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(4)* byval %arg, [17 x <16 x i8>] addrspace(4)* byval %arg13, [17 x <4 x i32>] addrspace(4)* byval %arg14, [34 x <8 x i32>] addrspace(4)* byval %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) #0 {
 main_body:
   %i.i = extractelement <2 x i32> %arg19, i32 0
   %j.i = extractelement <2 x i32> %arg19, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
index 1877cc9..0440547 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
@@ -5,7 +5,7 @@
 ; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[LO:v[0-9]+]], -1, 0
 ; SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]]
 ; VI: v_mbcnt_hi_u32_b32 {{v[0-9]+}}, -1, [[LO]]
-define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3) {
+define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3) {
 main_body:
   %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
   %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
index 89a4780..aaf5358 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
@@ -85,8 +85,8 @@
   bb.0 (%ir-block.0):
     successors: %bb.1.atomic(0x40000000), %bb.2.exit(0x40000000)
     liveins: $vgpr0, $sgpr0_sgpr1
- 
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $vgpr1 = V_ASHRREV_I32_e32 31, $vgpr0, implicit $exec
     $vgpr1_vgpr2 = V_LSHL_B64 $vgpr0_vgpr1, 3, implicit $exec
     $sgpr7 = S_MOV_B32 61440
@@ -98,12 +98,12 @@
     $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
     $sgpr2_sgpr3 = S_XOR_B64 $exec, killed $sgpr2_sgpr3, implicit-def dead $scc
     SI_MASK_BRANCH %bb.2.exit, implicit $exec
- 
+
   bb.1.atomic:
     successors: %bb.2.exit(0x80000000)
     liveins: $sgpr4_sgpr5_sgpr6_sgpr7:0x0000000C, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr1_vgpr2_vgpr3_vgpr4:0x00000003
- 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     dead $vgpr0 = V_MOV_B32_e32 -1, implicit $exec
     dead $vgpr0 = V_MOV_B32_e32 61440, implicit $exec
     $sgpr4_sgpr5 = S_MOV_B64 0
@@ -111,7 +111,7 @@
     $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec
     S_WAITCNT 3952
     BUFFER_ATOMIC_SMAX_ADDR64 killed $vgpr0, killed $vgpr1_vgpr2, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 400, 0, implicit $exec :: (volatile load seq_cst 4 from %ir.gep)
- 
+
   bb.2.exit:
     liveins: $sgpr2_sgpr3
 
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
index 94b0970..59d6828 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
@@ -16,9 +16,9 @@
     successors: %bb.1(0x30000000), %bb.2(0x50000000)
     liveins: $sgpr0_sgpr1, $sgpr3
 
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     $sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
@@ -36,7 +36,7 @@
     successors: %bb.3(0x80000000)
     liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 32772, implicit $exec
     S_BRANCH %bb.3
@@ -45,7 +45,7 @@
     successors: %bb.3(0x80000000)
     liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 4, implicit $exec
 
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
index ef30887..b3be1c6 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
@@ -114,9 +114,9 @@
     successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000)
     liveins: $sgpr0_sgpr1, $sgpr3
 
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     $sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
@@ -134,7 +134,7 @@
     successors: %bb.3.done(0x80000000)
     liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 32772, implicit $exec
     S_BRANCH %bb.3.done
@@ -143,7 +143,7 @@
     successors: %bb.3.done(0x80000000)
     liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 4, implicit $exec
 
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
index 92b6074..24c6cf8 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
@@ -114,9 +114,9 @@
     successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000)
     liveins: $sgpr0_sgpr1, $sgpr3
 
-    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     $sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
     $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
@@ -134,7 +134,7 @@
     successors: %bb.3.done(0x80000000)
     liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 32772, implicit $exec
     S_BRANCH %bb.3.done
@@ -143,7 +143,7 @@
     successors: %bb.3.done(0x80000000)
     liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11
 
-    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+    $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     S_WAITCNT 3855
     $vgpr0 = V_MOV_B32_e32 4, implicit $exec
 
diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll
index b68f343..790e31c 100644
--- a/llvm/test/CodeGen/AMDGPU/nop-data.ll
+++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll
@@ -81,7 +81,7 @@
 ; CHECK-NEXT: {{^$}}
 ; CHECK-NEXT: kernel1:
 ; CHECK-NEXT: s_endpgm
-define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(2)* %ptr.out) align 256 {
+define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(4)* %ptr.out) align 256 {
 entry:
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir b/llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir
index daa2dbe..693b7d8 100644
--- a/llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir
+++ b/llvm/test/CodeGen/AMDGPU/regcoalesce-dbg.mir
@@ -56,8 +56,8 @@
 
     %3 = COPY killed $vgpr0
     %0 = COPY killed $sgpr0_sgpr1
-    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
-    %5 = S_LOAD_DWORD_IMM killed %0, 13, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+    %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
+    %5 = S_LOAD_DWORD_IMM killed %0, 13, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     %18 = V_ASHRREV_I32_e32 31, %3, implicit $exec
     undef %19.sub0 = COPY killed %3
     %19.sub1 = COPY killed %18
diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
index f6fea1b..894c9c9 100644
--- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
@@ -22,7 +22,7 @@
 ; GCN-NEXT: [[RET_BB]]:
 ; GCN-NEXT: ; return
 ; GCN-NEXT: .Lfunc_end0
-define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_trivial_ret_divergent_br_trivial_unreachable([9 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, i32 inreg %arg17, i32 %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
+define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_trivial_ret_divergent_br_trivial_unreachable([9 x <4 x i32>] addrspace(4)* byval %arg, [17 x <4 x i32>] addrspace(4)* byval %arg1, [17 x <8 x i32>] addrspace(4)* byval %arg2, i32 addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, i32 inreg %arg17, i32 %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
 entry:
   %i.i = extractelement <2 x i32> %arg7, i32 0
   %j.i = extractelement <2 x i32> %arg7, i32 1
@@ -73,7 +73,7 @@
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: ; return
 ; GCN-NEXT: .Lfunc_end
-define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
+define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <4 x i32>] addrspace(4)* byval %arg, [17 x <4 x i32>] addrspace(4)* byval %arg1, [17 x <8 x i32>] addrspace(4)* byval %arg2, i32 addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
 main_body:
   %i.i = extractelement <2 x i32> %arg7, i32 0
   %j.i = extractelement <2 x i32> %arg7, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir b/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir
index 348b8f5..537880a 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure.mir
@@ -47,7 +47,7 @@
     liveins: $sgpr4_sgpr5
 
     %1 = COPY $sgpr4_sgpr5
-    %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
+    %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`)
     $m0 = S_MOV_B32 -1
     %7 = COPY %5
     %6 = DS_READ_B32 %7, 0, 0, implicit $m0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll b/llvm/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
index 9b490bb..ff3d0fc 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
@@ -36,63 +36,63 @@
   %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %101, %Flow2 ]
   %15 = extractelement <4 x float> %reg1, i32 1
   %16 = extractelement <4 x float> %reg1, i32 3
-  %17 = load <4 x float>, <4 x float> addrspace(2)* null
+  %17 = load <4 x float>, <4 x float> addrspace(4)* null
   %18 = extractelement <4 x float> %17, i32 0
   %19 = fmul float %18, %0
-  %20 = load <4 x float>, <4 x float> addrspace(2)* null
+  %20 = load <4 x float>, <4 x float> addrspace(4)* null
   %21 = extractelement <4 x float> %20, i32 1
   %22 = fmul float %21, %0
-  %23 = load <4 x float>, <4 x float> addrspace(2)* null
+  %23 = load <4 x float>, <4 x float> addrspace(4)* null
   %24 = extractelement <4 x float> %23, i32 2
   %25 = fmul float %24, %0
-  %26 = load <4 x float>, <4 x float> addrspace(2)* null
+  %26 = load <4 x float>, <4 x float> addrspace(4)* null
   %27 = extractelement <4 x float> %26, i32 3
   %28 = fmul float %27, %0
-  %29 = load <4 x float>, <4 x float> addrspace(2)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(2)* null, i64 0, i32 1)
+  %29 = load <4 x float>, <4 x float> addrspace(4)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(4)* null, i64 0, i32 1)
   %30 = extractelement <4 x float> %29, i32 0
   %31 = fmul float %30, %15
   %32 = fadd float %31, %19
-  %33 = load <4 x float>, <4 x float> addrspace(2)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(2)* null, i64 0, i32 1)
+  %33 = load <4 x float>, <4 x float> addrspace(4)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(4)* null, i64 0, i32 1)
   %34 = extractelement <4 x float> %33, i32 1
   %35 = fmul float %34, %15
   %36 = fadd float %35, %22
-  %37 = load <4 x float>, <4 x float> addrspace(2)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(2)* null, i64 0, i32 1)
+  %37 = load <4 x float>, <4 x float> addrspace(4)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(4)* null, i64 0, i32 1)
   %38 = extractelement <4 x float> %37, i32 2
   %39 = fmul float %38, %15
   %40 = fadd float %39, %25
-  %41 = load <4 x float>, <4 x float> addrspace(2)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(2)* null, i64 0, i32 1)
+  %41 = load <4 x float>, <4 x float> addrspace(4)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(4)* null, i64 0, i32 1)
   %42 = extractelement <4 x float> %41, i32 3
   %43 = fmul float %42, %15
   %44 = fadd float %43, %28
-  %45 = load <4 x float>, <4 x float> addrspace(2)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(2)* null, i64 0, i32 2)
+  %45 = load <4 x float>, <4 x float> addrspace(4)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(4)* null, i64 0, i32 2)
   %46 = extractelement <4 x float> %45, i32 0
   %47 = fmul float %46, %1
   %48 = fadd float %47, %32
-  %49 = load <4 x float>, <4 x float> addrspace(2)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(2)* null, i64 0, i32 2)
+  %49 = load <4 x float>, <4 x float> addrspace(4)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(4)* null, i64 0, i32 2)
   %50 = extractelement <4 x float> %49, i32 1
   %51 = fmul float %50, %1
   %52 = fadd float %51, %36
-  %53 = load <4 x float>, <4 x float> addrspace(2)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(2)* null, i64 0, i32 2)
+  %53 = load <4 x float>, <4 x float> addrspace(4)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(4)* null, i64 0, i32 2)
   %54 = extractelement <4 x float> %53, i32 2
   %55 = fmul float %54, %1
   %56 = fadd float %55, %40
-  %57 = load <4 x float>, <4 x float> addrspace(2)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(2)* null, i64 0, i32 2)
+  %57 = load <4 x float>, <4 x float> addrspace(4)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(4)* null, i64 0, i32 2)
   %58 = extractelement <4 x float> %57, i32 3
   %59 = fmul float %58, %1
   %60 = fadd float %59, %44
-  %61 = load <4 x float>, <4 x float> addrspace(2)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(2)* null, i64 0, i32 3)
+  %61 = load <4 x float>, <4 x float> addrspace(4)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(4)* null, i64 0, i32 3)
   %62 = extractelement <4 x float> %61, i32 0
   %63 = fmul float %62, %16
   %64 = fadd float %63, %48
-  %65 = load <4 x float>, <4 x float> addrspace(2)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(2)* null, i64 0, i32 3)
+  %65 = load <4 x float>, <4 x float> addrspace(4)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(4)* null, i64 0, i32 3)
   %66 = extractelement <4 x float> %65, i32 1
   %67 = fmul float %66, %16
   %68 = fadd float %67, %52
-  %69 = load <4 x float>, <4 x float> addrspace(2)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(2)* null, i64 0, i32 3)
+  %69 = load <4 x float>, <4 x float> addrspace(4)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(4)* null, i64 0, i32 3)
   %70 = extractelement <4 x float> %69, i32 2
   %71 = fmul float %70, %16
   %72 = fadd float %71, %56
-  %73 = load <4 x float>, <4 x float> addrspace(2)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(2)* null, i64 0, i32 3)
+  %73 = load <4 x float>, <4 x float> addrspace(4)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(4)* null, i64 0, i32 3)
   %74 = extractelement <4 x float> %73, i32 3
   %75 = fmul float %74, %16
   %76 = fadd float %75, %60
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
index 58d721c..e257db25 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
@@ -203,7 +203,7 @@
     liveins: $sgpr4_sgpr5
 
     %4 = COPY $sgpr4_sgpr5
-    %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %8 = S_MOV_B64 0
     %7 = COPY %9
     %30 = V_MOV_B32_e32 1, implicit $exec
@@ -365,7 +365,7 @@
     liveins: $sgpr4_sgpr5
 
     %4 = COPY $sgpr4_sgpr5
-    %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %8 = S_MOV_B64 0
     %7 = COPY %9
     %30 = V_MOV_B32_e32 1, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
index 0103f0a..d3c1ad6 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
@@ -44,7 +44,7 @@
     liveins: $vgpr0, $sgpr4_sgpr5
 
     $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec
-    $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(2)* undef`)
+    $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
     $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec
     $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec
     $vgpr1 = V_CNDMASK_B32_e64 0, -1, killed $sgpr0_sgpr1, implicit $exec
@@ -108,7 +108,7 @@
     liveins: $vgpr0, $sgpr4_sgpr5
 
     $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec
-    $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(2)* undef`)
+    $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
     $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec
     $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec
     $vgpr1 = V_CNDMASK_B32_e64 0, -1, killed $sgpr0_sgpr1, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll b/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
index 603b5d4..56fc086 100644
--- a/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
@@ -9,7 +9,7 @@
 ;
 ; Check for a valid output.
 ; CHECK: image_sample_c
-define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg, [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg1, [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg2, [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg3, [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615) %arg4, float inreg %arg5, i32 inreg %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <3 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, <2 x i32> %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, i32 %arg20, float %arg21, i32 %arg22) #0 {
+define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(4)* byval dereferenceable(18446744073709551615) %arg, [16 x <16 x i8>] addrspace(4)* byval dereferenceable(18446744073709551615) %arg1, [32 x <8 x i32>] addrspace(4)* byval dereferenceable(18446744073709551615) %arg2, [16 x <8 x i32>] addrspace(4)* byval dereferenceable(18446744073709551615) %arg3, [16 x <4 x i32>] addrspace(4)* byval dereferenceable(18446744073709551615) %arg4, float inreg %arg5, i32 inreg %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <3 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, <2 x i32> %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, i32 %arg20, float %arg21, i32 %arg22) #0 {
 main_body:
   %i.i = extractelement <2 x i32> %arg8, i32 0
   %j.i = extractelement <2 x i32> %arg8, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
index 6f8d0ce..3dfc2a3 100644
--- a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
+++ b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
@@ -78,7 +78,7 @@
   bb.0.entry:
     liveins: $sgpr0_sgpr1, $vcc
 
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
     $vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $exec
@@ -140,7 +140,7 @@
   bb.0.entry:
     liveins: $sgpr0_sgpr1
 
-    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     $sgpr7 = S_MOV_B32 61440
     $sgpr6 = S_MOV_B32 -1
     S_CBRANCH_VCCZ %bb.1, implicit undef $vcc