[opaque pointer type] Add textual IR support for explicit type parameter to load instruction

Essentially the same as the GEP change in r230786.

A similar migration script can be used to update test cases, though a few more
test case improvements/changes were required this time around: (r229269-r229278)

import fileinput
import sys
import re

pat = re.compile(r"((?:=|:|^)\s*load (?:atomic )?(?:volatile )?(.*?))(| addrspace\(\d+\) *)\*($| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$)")

for line in sys.stdin:
  sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line))

Reviewers: rafael, dexonsmith, grosser

Differential Revision: http://reviews.llvm.org/D7649

llvm-svn: 230794
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
index 7be97c2..de830de 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -16,8 +16,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -36,8 +36,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -56,8 +56,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -76,8 +76,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -95,7 +95,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
-  %b = load float addrspace(1)* %gep, align 4
+  %b = load float, float addrspace(1)* %gep, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -113,7 +113,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
-  %b = load float addrspace(1)* %gep, align 4
+  %b = load float, float addrspace(1)* %gep, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -131,7 +131,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
-  %a = load float addrspace(1)* %gep, align 4
+  %a = load float, float addrspace(1)* %gep, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -149,7 +149,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
-  %a = load float addrspace(1)* %gep, align 4
+  %a = load float, float addrspace(1)* %gep, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -167,7 +167,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
-  %b = load double addrspace(1)* %gep, align 8
+  %b = load double, double addrspace(1)* %gep, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -185,7 +185,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
-  %b = load double addrspace(1)* %gep, align 8
+  %b = load double, double addrspace(1)* %gep, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -203,7 +203,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
-  %a = load double addrspace(1)* %gep, align 8
+  %a = load double, double addrspace(1)* %gep, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -221,7 +221,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
-  %a = load double addrspace(1)* %gep, align 8
+  %a = load double, double addrspace(1)* %gep, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -295,7 +295,7 @@
 define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %a = load float addrspace(1)* %gep.0, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -311,7 +311,7 @@
 define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %a = load float addrspace(1)* %gep.0, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -330,8 +330,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
 
@@ -352,8 +352,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone