Tom Stellard | 754f80f | 2013-04-05 23:31:51 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s |
Tom Stellard | 6aa0d55 | 2013-06-14 22:12:24 +0000 | [diff] [blame] | 2 | ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s |
Tom Stellard | 754f80f | 2013-04-05 23:31:51 +0000 | [diff] [blame] | 3 | ; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s |
| 4 | |
Tom Stellard | d3ee8c1 | 2013-08-16 01:12:06 +0000 | [diff] [blame] | 5 | ;===------------------------------------------------------------------------===; |
| 6 | ; Global Address Space |
| 7 | ;===------------------------------------------------------------------------===; |
| 8 | |
| 9 | ; i8 store |
| 10 | ; EG-CHECK: @store_i8 |
| 11 | ; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X |
| 12 | ; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]] |
| 13 | ; IG 0: Get the byte index |
| 14 | ; EG-CHECK: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x |
| 15 | ; EG-CHECK-NEXT: 3 |
| 16 | ; IG 1: Truncate the value and calculated the shift amount for the mask |
| 17 | ; EG-CHECK: AND_INT T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.x |
| 18 | ; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.y |
| 19 | ; EG-CHECK: 255(3.573311e-43), 3 |
| 20 | ; IG 2: Shift the value and the mask |
| 21 | ; EG-CHECK: LSHL T[[RW_GPR]].X, PV.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]] |
| 22 | ; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]] |
| 23 | ; EG-CHECK-NEXT: 255 |
| 24 | ; IG 3: Initialize the Y and Z channels to zero |
| 25 | ; XXX: An optimal scheduler should merge this into one of the prevous IGs. |
| 26 | ; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0 |
| 27 | ; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0 |
| 28 | |
| 29 | ; SI-CHECK: @store_i8 |
| 30 | ; SI-CHECK: BUFFER_STORE_BYTE |
| 31 | |
| 32 | define void @store_i8(i8 addrspace(1)* %out, i8 %in) { |
| 33 | entry: |
| 34 | store i8 %in, i8 addrspace(1)* %out |
| 35 | ret void |
| 36 | } |
| 37 | |
| 38 | ; i16 store |
| 39 | ; EG-CHECK: @store_i16 |
| 40 | ; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X |
| 41 | ; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]] |
| 42 | ; IG 0: Get the byte index |
| 43 | ; EG-CHECK: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x |
| 44 | ; EG-CHECK-NEXT: 3 |
| 45 | ; IG 1: Truncate the value and calculated the shift amount for the mask |
| 46 | ; EG-CHECK: AND_INT T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.x |
| 47 | ; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.y |
| 48 | ; EG-CHECK: 65535(9.183409e-41), 3 |
| 49 | ; IG 2: Shift the value and the mask |
| 50 | ; EG-CHECK: LSHL T[[RW_GPR]].X, PV.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]] |
| 51 | ; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]] |
| 52 | ; EG-CHECK-NEXT: 65535 |
| 53 | ; IG 3: Initialize the Y and Z channels to zero |
| 54 | ; XXX: An optimal scheduler should merge this into one of the prevous IGs. |
| 55 | ; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0 |
| 56 | ; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0 |
| 57 | |
| 58 | ; SI-CHECK: @store_i16 |
| 59 | ; SI-CHECK: BUFFER_STORE_SHORT |
| 60 | define void @store_i16(i16 addrspace(1)* %out, i16 %in) { |
| 61 | entry: |
| 62 | store i16 %in, i16 addrspace(1)* %out |
| 63 | ret void |
| 64 | } |
| 65 | |
Tom Stellard | fbab827 | 2013-08-16 01:12:11 +0000 | [diff] [blame] | 66 | ; EG-CHECK: @store_v2i8 |
| 67 | ; EG-CHECK: MEM_RAT MSKOR |
| 68 | ; EG-CHECK-NOT: MEM_RAT MSKOR |
| 69 | ; SI-CHECK: @store_v2i8 |
| 70 | ; SI-CHECK: BUFFER_STORE_BYTE |
| 71 | ; SI-CHECK: BUFFER_STORE_BYTE |
| 72 | define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) { |
| 73 | entry: |
| 74 | %0 = trunc <2 x i32> %in to <2 x i8> |
| 75 | store <2 x i8> %0, <2 x i8> addrspace(1)* %out |
| 76 | ret void |
| 77 | } |
| 78 | |
| 79 | |
| 80 | ; EG-CHECK: @store_v2i16 |
| 81 | ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW |
| 82 | ; CM-CHECK: @store_v2i16 |
| 83 | ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD |
| 84 | ; SI-CHECK: @store_v2i16 |
| 85 | ; SI-CHECK: BUFFER_STORE_DWORD |
| 86 | define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) { |
| 87 | entry: |
| 88 | %0 = trunc <2 x i32> %in to <2 x i16> |
| 89 | store <2 x i16> %0, <2 x i16> addrspace(1)* %out |
| 90 | ret void |
| 91 | } |
| 92 | |
| 93 | ; EG-CHECK: @store_v4i8 |
| 94 | ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW |
| 95 | ; CM-CHECK: @store_v4i8 |
| 96 | ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD |
| 97 | ; SI-CHECK: @store_v4i8 |
| 98 | ; SI-CHECK: BUFFER_STORE_BYTE |
| 99 | ; SI-CHECK: BUFFER_STORE_BYTE |
| 100 | ; SI-CHECK: BUFFER_STORE_BYTE |
| 101 | ; SI-CHECK: BUFFER_STORE_BYTE |
| 102 | define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) { |
| 103 | entry: |
| 104 | %0 = trunc <4 x i32> %in to <4 x i8> |
| 105 | store <4 x i8> %0, <4 x i8> addrspace(1)* %out |
| 106 | ret void |
| 107 | } |
| 108 | |
Tom Stellard | 5a6b0d8 | 2013-04-19 02:10:53 +0000 | [diff] [blame] | 109 | ; floating-point store |
| 110 | ; EG-CHECK: @store_f32 |
Tom Stellard | ac00f9d | 2013-08-16 01:11:46 +0000 | [diff] [blame] | 111 | ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1 |
Tom Stellard | 6aa0d55 | 2013-06-14 22:12:24 +0000 | [diff] [blame] | 112 | ; CM-CHECK: @store_f32 |
Tom Stellard | ac00f9d | 2013-08-16 01:11:46 +0000 | [diff] [blame] | 113 | ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}} |
Tom Stellard | 5a6b0d8 | 2013-04-19 02:10:53 +0000 | [diff] [blame] | 114 | ; SI-CHECK: @store_f32 |
Tom Stellard | 754f80f | 2013-04-05 23:31:51 +0000 | [diff] [blame] | 115 | ; SI-CHECK: BUFFER_STORE_DWORD |
| 116 | |
Tom Stellard | 5a6b0d8 | 2013-04-19 02:10:53 +0000 | [diff] [blame] | 117 | define void @store_f32(float addrspace(1)* %out, float %in) { |
Tom Stellard | 754f80f | 2013-04-05 23:31:51 +0000 | [diff] [blame] | 118 | store float %in, float addrspace(1)* %out |
| 119 | ret void |
| 120 | } |
Tom Stellard | 0125f2a | 2013-06-25 02:39:35 +0000 | [diff] [blame] | 121 | |
Tom Stellard | fbab827 | 2013-08-16 01:12:11 +0000 | [diff] [blame] | 122 | ; EG-CHECK: @store_v4i16 |
| 123 | ; EG-CHECK: MEM_RAT MSKOR |
| 124 | ; EG-CHECK: MEM_RAT MSKOR |
| 125 | ; EG-CHECK: MEM_RAT MSKOR |
| 126 | ; EG-CHECK: MEM_RAT MSKOR |
| 127 | ; EG-CHECK-NOT: MEM_RAT MSKOR |
| 128 | ; SI-CHECK: @store_v4i16 |
| 129 | ; SI-CHECK: BUFFER_STORE_SHORT |
| 130 | ; SI-CHECK: BUFFER_STORE_SHORT |
| 131 | ; SI-CHECK: BUFFER_STORE_SHORT |
| 132 | ; SI-CHECK: BUFFER_STORE_SHORT |
| 133 | ; SI-CHECK-NOT: BUFFER_STORE_BYTE |
| 134 | define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) { |
| 135 | entry: |
| 136 | %0 = trunc <4 x i32> %in to <4 x i16> |
| 137 | store <4 x i16> %0, <4 x i16> addrspace(1)* %out |
| 138 | ret void |
| 139 | } |
| 140 | |
Tom Stellard | ed2f614 | 2013-07-18 21:43:42 +0000 | [diff] [blame] | 141 | ; vec2 floating-point stores |
| 142 | ; EG-CHECK: @store_v2f32 |
Tom Stellard | ac00f9d | 2013-08-16 01:11:46 +0000 | [diff] [blame] | 143 | ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW |
Tom Stellard | ed2f614 | 2013-07-18 21:43:42 +0000 | [diff] [blame] | 144 | ; CM-CHECK: @store_v2f32 |
Tom Stellard | ac00f9d | 2013-08-16 01:11:46 +0000 | [diff] [blame] | 145 | ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD |
Tom Stellard | ed2f614 | 2013-07-18 21:43:42 +0000 | [diff] [blame] | 146 | ; SI-CHECK: @store_v2f32 |
| 147 | ; SI-CHECK: BUFFER_STORE_DWORDX2 |
| 148 | |
| 149 | define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) { |
| 150 | entry: |
| 151 | %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0 |
Tom Stellard | 8e5da41 | 2013-08-14 23:24:32 +0000 | [diff] [blame] | 152 | %1 = insertelement <2 x float> %0, float %b, i32 1 |
Tom Stellard | ed2f614 | 2013-07-18 21:43:42 +0000 | [diff] [blame] | 153 | store <2 x float> %1, <2 x float> addrspace(1)* %out |
| 154 | ret void |
| 155 | } |
| 156 | |
Tom Stellard | 6d1379e | 2013-08-16 01:12:00 +0000 | [diff] [blame] | 157 | ; EG-CHECK: @store_v4i32 |
| 158 | ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW |
| 159 | ; EG-CHECK-NOT: MEM_RAT_CACHELESS STORE_RAW |
| 160 | ; CM-CHECK: @store_v4i32 |
| 161 | ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD |
| 162 | ; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD |
| 163 | ; SI-CHECK: @store_v4i32 |
| 164 | ; SI-CHECK: BUFFER_STORE_DWORDX4 |
| 165 | define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { |
| 166 | entry: |
| 167 | store <4 x i32> %in, <4 x i32> addrspace(1)* %out |
| 168 | ret void |
| 169 | } |
| 170 | |
Tom Stellard | 0125f2a | 2013-06-25 02:39:35 +0000 | [diff] [blame] | 171 | ; The stores in this function are combined by the optimizer to create a |
| 172 | ; 64-bit store with 32-bit alignment. This is legal for SI and the legalizer |
| 173 | ; should not try to split the 64-bit store back into 2 32-bit stores. |
| 174 | ; |
| 175 | ; Evergreen / Northern Islands don't support 64-bit stores yet, so there should |
| 176 | ; be two 32-bit stores. |
| 177 | |
| 178 | ; EG-CHECK: @vecload2 |
Tom Stellard | ac00f9d | 2013-08-16 01:11:46 +0000 | [diff] [blame] | 179 | ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW |
Tom Stellard | 0125f2a | 2013-06-25 02:39:35 +0000 | [diff] [blame] | 180 | ; CM-CHECK: @vecload2 |
Tom Stellard | ac00f9d | 2013-08-16 01:11:46 +0000 | [diff] [blame] | 181 | ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD |
Tom Stellard | 0125f2a | 2013-06-25 02:39:35 +0000 | [diff] [blame] | 182 | ; SI-CHECK: @vecload2 |
| 183 | ; SI-CHECK: BUFFER_STORE_DWORDX2 |
| 184 | define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 { |
| 185 | entry: |
| 186 | %0 = load i32 addrspace(2)* %mem, align 4, !tbaa !5 |
| 187 | %arrayidx1.i = getelementptr inbounds i32 addrspace(2)* %mem, i64 1 |
| 188 | %1 = load i32 addrspace(2)* %arrayidx1.i, align 4, !tbaa !5 |
| 189 | store i32 %0, i32 addrspace(1)* %out, align 4, !tbaa !5 |
| 190 | %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %out, i64 1 |
| 191 | store i32 %1, i32 addrspace(1)* %arrayidx1, align 4, !tbaa !5 |
| 192 | ret void |
| 193 | } |
| 194 | |
Bill Wendling | 187d3dd | 2013-08-22 21:28:54 +0000 | [diff] [blame^] | 195 | attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } |
Tom Stellard | 0125f2a | 2013-06-25 02:39:35 +0000 | [diff] [blame] | 196 | |
| 197 | !5 = metadata !{metadata !"int", metadata !6} |
| 198 | !6 = metadata !{metadata !"omnipotent char", metadata !7} |
| 199 | !7 = metadata !{metadata !"Simple C/C++ TBAA"} |