Tim Renouf | f1c7b92 | 2018-08-02 22:53:57 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SICI %s |
| 2 | |
| 3 | ; Check that an addrspace(1) (const) load with various combinations of |
| 4 | ; uniform, nonuniform and constant address components all load with an |
| 5 | ; addr64 mubuf with no readfirstlane. |
| 6 | |
| 7 | @indexable = internal unnamed_addr addrspace(1) constant [6 x <3 x float>] [<3 x float> <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, <3 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00>, <3 x float> <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, <3 x float> <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 0.000000e+00>] |
| 8 | |
| 9 | ; GCN-LABEL: {{^}}nonuniform_uniform: |
| 10 | ; GCN-NOT: readfirstlane |
| 11 | ; SICI: buffer_load_dwordx4 {{.*}} addr64 |
| 12 | |
| 13 | define amdgpu_ps float @nonuniform_uniform(i32 %arg18) { |
| 14 | .entry: |
| 15 | %tmp31 = sext i32 %arg18 to i64 |
| 16 | %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31 |
| 17 | %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 |
| 18 | %tmp34 = extractelement <3 x float> %tmp33, i32 0 |
| 19 | ret float %tmp34 |
| 20 | } |
| 21 | |
| 22 | ; GCN-LABEL: {{^}}uniform_nonuniform: |
| 23 | ; GCN-NOT: readfirstlane |
| 24 | ; SICI: buffer_load_dwordx4 {{.*}} addr64 |
| 25 | |
| 26 | define amdgpu_ps float @uniform_nonuniform(i32 inreg %offset, i32 %arg18) { |
| 27 | .entry: |
| 28 | %tmp1 = zext i32 %arg18 to i64 |
| 29 | %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* |
| 30 | %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset |
| 31 | %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 |
| 32 | %tmp34 = extractelement <3 x float> %tmp33, i32 0 |
| 33 | ret float %tmp34 |
| 34 | } |
| 35 | |
| 36 | ; GCN-LABEL: {{^}}const_nonuniform: |
| 37 | ; GCN-NOT: readfirstlane |
| 38 | ; SICI: buffer_load_dwordx4 {{.*}} addr64 |
| 39 | |
| 40 | define amdgpu_ps float @const_nonuniform(i32 %arg18) { |
| 41 | .entry: |
| 42 | %tmp1 = zext i32 %arg18 to i64 |
| 43 | %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* |
| 44 | %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 1 |
| 45 | %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 |
| 46 | %tmp34 = extractelement <3 x float> %tmp33, i32 0 |
| 47 | ret float %tmp34 |
| 48 | } |
| 49 | |
| 50 | ; GCN-LABEL: {{^}}nonuniform_nonuniform: |
| 51 | ; GCN-NOT: readfirstlane |
| 52 | ; SICI: buffer_load_dwordx4 {{.*}} addr64 |
| 53 | |
| 54 | define amdgpu_ps float @nonuniform_nonuniform(i32 %offset, i32 %arg18) { |
| 55 | .entry: |
| 56 | %tmp1 = zext i32 %arg18 to i64 |
| 57 | %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* |
| 58 | %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset |
| 59 | %tmp33 = load <3 x float>, <3 x float> addrspace(1)* %tmp32, align 16 |
| 60 | %tmp34 = extractelement <3 x float> %tmp33, i32 0 |
| 61 | ret float %tmp34 |
| 62 | } |
| 63 | |
| 64 | ; GCN-LABEL: {{^}}nonuniform_uniform_const: |
| 65 | ; GCN-NOT: readfirstlane |
| 66 | ; SICI: buffer_load_dword {{.*}} addr64 |
| 67 | |
| 68 | define amdgpu_ps float @nonuniform_uniform_const(i32 %arg18) { |
| 69 | .entry: |
| 70 | %tmp31 = sext i32 %arg18 to i64 |
| 71 | %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* @indexable, i64 0, i64 %tmp31, i64 1 |
| 72 | %tmp33 = load float, float addrspace(1)* %tmp32, align 4 |
| 73 | ret float %tmp33 |
| 74 | } |
| 75 | |
| 76 | ; GCN-LABEL: {{^}}uniform_nonuniform_const: |
| 77 | ; GCN-NOT: readfirstlane |
| 78 | ; SICI: buffer_load_dword {{.*}} addr64 |
| 79 | |
| 80 | define amdgpu_ps float @uniform_nonuniform_const(i32 inreg %offset, i32 %arg18) { |
| 81 | .entry: |
| 82 | %tmp1 = zext i32 %arg18 to i64 |
| 83 | %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* |
| 84 | %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset, i32 1 |
| 85 | %tmp33 = load float, float addrspace(1)* %tmp32, align 4 |
| 86 | ret float %tmp33 |
| 87 | } |
| 88 | |
| 89 | ; GCN-LABEL: {{^}}nonuniform_nonuniform_const: |
| 90 | ; GCN-NOT: readfirstlane |
| 91 | ; SICI: buffer_load_dword {{.*}} addr64 |
| 92 | |
| 93 | define amdgpu_ps float @nonuniform_nonuniform_const(i32 %offset, i32 %arg18) { |
| 94 | .entry: |
| 95 | %tmp1 = zext i32 %arg18 to i64 |
| 96 | %tmp2 = inttoptr i64 %tmp1 to [6 x <3 x float>] addrspace(1)* |
| 97 | %tmp32 = getelementptr [6 x <3 x float>], [6 x <3 x float>] addrspace(1)* %tmp2, i32 0, i32 %offset, i32 1 |
| 98 | %tmp33 = load float, float addrspace(1)* %tmp32, align 4 |
| 99 | ret float %tmp33 |
| 100 | } |
| 101 | |
| 102 | |
| 103 | |
| 104 | |