Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s |
| 2 | ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s |
| 3 | |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 4 | declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) |
| 5 | |
| 6 | ; GCN-LABEL: {{^}}vgpr: |
| 7 | ; GCN: v_mov_b32_e32 v1, v0 |
| 8 | ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 |
Matt Arsenault | 8a63cb9 | 2016-12-05 20:31:49 +0000 | [diff] [blame] | 9 | ; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 10 | ; GCN: s_waitcnt expcnt(0) |
| 11 | ; GCN-NOT: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 12 | define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 13 | call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) |
| 14 | %x = fadd float %3, 1.0 |
| 15 | %a = insertvalue {float, float} undef, float %x, 0 |
| 16 | %b = insertvalue {float, float} %a, float %3, 1 |
| 17 | ret {float, float} %b |
| 18 | } |
| 19 | |
| 20 | ; GCN-LABEL: {{^}}vgpr_literal: |
Tom Stellard | 0d23ebe | 2016-08-29 19:42:52 +0000 | [diff] [blame] | 21 | ; GCN: v_mov_b32_e32 v4, v0 |
Matt Arsenault | 8a63cb9 | 2016-12-05 20:31:49 +0000 | [diff] [blame] | 22 | ; GCN: exp mrt0 v4, v4, v4, v4 done compr vm |
| 23 | |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 24 | ; GCN-DAG: v_mov_b32_e32 v0, 1.0 |
| 25 | ; GCN-DAG: v_mov_b32_e32 v1, 2.0 |
| 26 | ; GCN-DAG: v_mov_b32_e32 v2, 4.0 |
| 27 | ; GCN-DAG: v_mov_b32_e32 v3, -1.0 |
Tom Stellard | 0d23ebe | 2016-08-29 19:42:52 +0000 | [diff] [blame] | 28 | ; GCN: s_waitcnt expcnt(0) |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 29 | ; GCN-NOT: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 30 | define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 31 | call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) |
| 32 | ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0} |
| 33 | } |
| 34 | |
| 35 | |
| 36 | ; GCN: .long 165580 |
| 37 | ; GCN-NEXT: .long 562 |
| 38 | ; GCN-NEXT: .long 165584 |
| 39 | ; GCN-NEXT: .long 562 |
| 40 | ; GCN-LABEL: {{^}}vgpr_ps_addr0: |
| 41 | ; GCN-NOT: v_mov_b32_e32 v0 |
| 42 | ; GCN-NOT: v_mov_b32_e32 v1 |
| 43 | ; GCN-NOT: v_mov_b32_e32 v2 |
| 44 | ; GCN: v_mov_b32_e32 v3, v4 |
| 45 | ; GCN: v_mov_b32_e32 v4, v6 |
| 46 | ; GCN-NOT: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 47 | define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 48 | %i0 = extractelement <2 x i32> %4, i32 0 |
| 49 | %i1 = extractelement <2 x i32> %4, i32 1 |
| 50 | %i2 = extractelement <2 x i32> %7, i32 0 |
| 51 | %i3 = extractelement <2 x i32> %8, i32 0 |
| 52 | %f0 = bitcast i32 %i0 to float |
| 53 | %f1 = bitcast i32 %i1 to float |
| 54 | %f2 = bitcast i32 %i2 to float |
| 55 | %f3 = bitcast i32 %i3 to float |
| 56 | %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0 |
| 57 | %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1 |
| 58 | %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2 |
| 59 | %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3 |
| 60 | %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4 |
| 61 | ret {float, float, float, float, float} %r4 |
| 62 | } |
| 63 | |
| 64 | |
| 65 | ; GCN: .long 165580 |
| 66 | ; GCN-NEXT: .long 1 |
| 67 | ; GCN-NEXT: .long 165584 |
| 68 | ; GCN-NEXT: .long 1 |
| 69 | ; GCN-LABEL: {{^}}ps_input_ena_no_inputs: |
| 70 | ; GCN: v_mov_b32_e32 v0, 1.0 |
| 71 | ; GCN-NOT: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 72 | define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 73 | ret float 1.0 |
| 74 | } |
| 75 | |
| 76 | |
| 77 | ; GCN: .long 165580 |
Marek Olsak | 46dadbf | 2016-01-13 17:23:20 +0000 | [diff] [blame] | 78 | ; GCN-NEXT: .long 2081 |
| 79 | ; GCN-NEXT: .long 165584 |
| 80 | ; GCN-NEXT: .long 2081 |
| 81 | ; GCN-LABEL: {{^}}ps_input_ena_pos_w: |
| 82 | ; GCN-DAG: v_mov_b32_e32 v0, v4 |
| 83 | ; GCN-DAG: v_mov_b32_e32 v1, v2 |
| 84 | ; GCN: v_mov_b32_e32 v2, v3 |
| 85 | ; GCN-NOT: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 86 | define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { |
Marek Olsak | 46dadbf | 2016-01-13 17:23:20 +0000 | [diff] [blame] | 87 | %f = bitcast <2 x i32> %8 to <2 x float> |
| 88 | %s = insertvalue {float, <2 x float>} undef, float %14, 0 |
| 89 | %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1 |
| 90 | ret {float, <2 x float>} %s1 |
| 91 | } |
| 92 | |
| 93 | |
| 94 | ; GCN: .long 165580 |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 95 | ; GCN-NEXT: .long 562 |
| 96 | ; GCN-NEXT: .long 165584 |
| 97 | ; GCN-NEXT: .long 563 |
| 98 | ; GCN-LABEL: {{^}}vgpr_ps_addr1: |
| 99 | ; GCN-DAG: v_mov_b32_e32 v0, v2 |
| 100 | ; GCN-DAG: v_mov_b32_e32 v1, v3 |
| 101 | ; GCN: v_mov_b32_e32 v2, v4 |
| 102 | ; GCN-DAG: v_mov_b32_e32 v3, v6 |
| 103 | ; GCN-DAG: v_mov_b32_e32 v4, v8 |
| 104 | ; GCN-NOT: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 105 | attributes #1 = { "InitialPSInputAddr"="1" } |
| 106 | define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 { |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 107 | %i0 = extractelement <2 x i32> %4, i32 0 |
| 108 | %i1 = extractelement <2 x i32> %4, i32 1 |
| 109 | %i2 = extractelement <2 x i32> %7, i32 0 |
| 110 | %i3 = extractelement <2 x i32> %8, i32 0 |
| 111 | %f0 = bitcast i32 %i0 to float |
| 112 | %f1 = bitcast i32 %i1 to float |
| 113 | %f2 = bitcast i32 %i2 to float |
| 114 | %f3 = bitcast i32 %i3 to float |
| 115 | %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0 |
| 116 | %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1 |
| 117 | %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2 |
| 118 | %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3 |
| 119 | %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4 |
| 120 | ret {float, float, float, float, float} %r4 |
| 121 | } |
| 122 | |
| 123 | |
| 124 | ; GCN: .long 165580 |
| 125 | ; GCN-NEXT: .long 562 |
| 126 | ; GCN-NEXT: .long 165584 |
| 127 | ; GCN-NEXT: .long 631 |
| 128 | ; GCN-LABEL: {{^}}vgpr_ps_addr119: |
| 129 | ; GCN-DAG: v_mov_b32_e32 v0, v2 |
| 130 | ; GCN-DAG: v_mov_b32_e32 v1, v3 |
| 131 | ; GCN: v_mov_b32_e32 v2, v6 |
| 132 | ; GCN: v_mov_b32_e32 v3, v8 |
| 133 | ; GCN: v_mov_b32_e32 v4, v12 |
| 134 | ; GCN-NOT: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 135 | attributes #2 = { "InitialPSInputAddr"="119" } |
| 136 | define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 { |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 137 | %i0 = extractelement <2 x i32> %4, i32 0 |
| 138 | %i1 = extractelement <2 x i32> %4, i32 1 |
| 139 | %i2 = extractelement <2 x i32> %7, i32 0 |
| 140 | %i3 = extractelement <2 x i32> %8, i32 0 |
| 141 | %f0 = bitcast i32 %i0 to float |
| 142 | %f1 = bitcast i32 %i1 to float |
| 143 | %f2 = bitcast i32 %i2 to float |
| 144 | %f3 = bitcast i32 %i3 to float |
| 145 | %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0 |
| 146 | %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1 |
| 147 | %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2 |
| 148 | %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3 |
| 149 | %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4 |
| 150 | ret {float, float, float, float, float} %r4 |
| 151 | } |
| 152 | |
| 153 | |
| 154 | ; GCN: .long 165580 |
| 155 | ; GCN-NEXT: .long 562 |
| 156 | ; GCN-NEXT: .long 165584 |
| 157 | ; GCN-NEXT: .long 946 |
| 158 | ; GCN-LABEL: {{^}}vgpr_ps_addr418: |
| 159 | ; GCN-NOT: v_mov_b32_e32 v0 |
| 160 | ; GCN-NOT: v_mov_b32_e32 v1 |
| 161 | ; GCN-NOT: v_mov_b32_e32 v2 |
| 162 | ; GCN: v_mov_b32_e32 v3, v4 |
| 163 | ; GCN: v_mov_b32_e32 v4, v8 |
| 164 | ; GCN-NOT: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 165 | attributes #3 = { "InitialPSInputAddr"="418" } |
| 166 | define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 { |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 167 | %i0 = extractelement <2 x i32> %4, i32 0 |
| 168 | %i1 = extractelement <2 x i32> %4, i32 1 |
| 169 | %i2 = extractelement <2 x i32> %7, i32 0 |
| 170 | %i3 = extractelement <2 x i32> %8, i32 0 |
| 171 | %f0 = bitcast i32 %i0 to float |
| 172 | %f1 = bitcast i32 %i1 to float |
| 173 | %f2 = bitcast i32 %i2 to float |
| 174 | %f3 = bitcast i32 %i3 to float |
| 175 | %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0 |
| 176 | %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1 |
| 177 | %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2 |
| 178 | %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3 |
| 179 | %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4 |
| 180 | ret {float, float, float, float, float} %r4 |
| 181 | } |
| 182 | |
| 183 | |
| 184 | ; GCN-LABEL: {{^}}sgpr: |
| 185 | ; GCN: s_add_i32 s0, s3, 2 |
| 186 | ; GCN: s_mov_b32 s2, s3 |
| 187 | ; GCN-NOT: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 188 | define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 189 | %x = add i32 %2, 2 |
| 190 | %a = insertvalue {i32, i32, i32} undef, i32 %x, 0 |
| 191 | %b = insertvalue {i32, i32, i32} %a, i32 %1, 1 |
| 192 | %c = insertvalue {i32, i32, i32} %a, i32 %2, 2 |
| 193 | ret {i32, i32, i32} %c |
| 194 | } |
| 195 | |
| 196 | |
| 197 | ; GCN-LABEL: {{^}}sgpr_literal: |
| 198 | ; GCN: s_mov_b32 s0, 5 |
| 199 | ; GCN-NOT: s_mov_b32 s0, s0 |
| 200 | ; GCN-DAG: s_mov_b32 s1, 6 |
| 201 | ; GCN-DAG: s_mov_b32 s2, 7 |
| 202 | ; GCN-DAG: s_mov_b32 s3, 8 |
| 203 | ; GCN-NOT: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 204 | define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 205 | %x = add i32 %2, 2 |
| 206 | ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8} |
| 207 | } |
| 208 | |
| 209 | |
| 210 | ; GCN-LABEL: {{^}}both: |
| 211 | ; GCN: v_mov_b32_e32 v1, v0 |
Matt Arsenault | 8a63cb9 | 2016-12-05 20:31:49 +0000 | [diff] [blame] | 212 | ; GCN-DAG: exp mrt0 v1, v1, v1, v1 done compr vm |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 213 | ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 |
| 214 | ; GCN-DAG: s_add_i32 s0, s3, 2 |
| 215 | ; GCN-DAG: s_mov_b32 s1, s2 |
| 216 | ; GCN: s_mov_b32 s2, s3 |
| 217 | ; GCN: s_waitcnt expcnt(0) |
| 218 | ; GCN-NOT: s_endpgm |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 219 | define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 220 | call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) |
| 221 | %v = fadd float %3, 1.0 |
| 222 | %s = add i32 %2, 2 |
| 223 | %a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0 |
| 224 | %a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1 |
| 225 | %a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2 |
| 226 | %a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3 |
| 227 | %a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4 |
| 228 | ret {float, i32, float, i32, i32} %a4 |
| 229 | } |
| 230 | |
| 231 | |
| 232 | ; GCN-LABEL: {{^}}structure_literal: |
Tom Stellard | 0d23ebe | 2016-08-29 19:42:52 +0000 | [diff] [blame] | 233 | ; GCN: v_mov_b32_e32 v3, v0 |
Matt Arsenault | 8a63cb9 | 2016-12-05 20:31:49 +0000 | [diff] [blame] | 234 | ; GCN: exp mrt0 v3, v3, v3, v3 done compr vm |
| 235 | |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 236 | ; GCN-DAG: v_mov_b32_e32 v0, 1.0 |
| 237 | ; GCN-DAG: s_mov_b32 s0, 2 |
| 238 | ; GCN-DAG: s_mov_b32 s1, 3 |
| 239 | ; GCN-DAG: v_mov_b32_e32 v1, 2.0 |
| 240 | ; GCN-DAG: v_mov_b32_e32 v2, 4.0 |
Tom Stellard | 0d23ebe | 2016-08-29 19:42:52 +0000 | [diff] [blame] | 241 | ; GCN: s_waitcnt expcnt(0) |
Nicolai Haehnle | df3a20c | 2016-04-06 19:40:20 +0000 | [diff] [blame] | 242 | define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) { |
Marek Olsak | 774c0d6 | 2016-01-13 17:23:15 +0000 | [diff] [blame] | 243 | call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3) |
| 244 | ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}} |
| 245 | } |
Matt Arsenault | 8a63cb9 | 2016-12-05 20:31:49 +0000 | [diff] [blame] | 246 | |
| 247 | attributes #0 = { nounwind "InitialPSInputAddr"="0" } |