Tom Stellard | 754f80f | 2013-04-05 23:31:51 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s |
Tom Stellard | 6aa0d55 | 2013-06-14 22:12:24 +0000 | [diff] [blame] | 2 | ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s |
Tom Stellard | 754f80f | 2013-04-05 23:31:51 +0000 | [diff] [blame] | 3 | ; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s |
| 4 | |
Tom Stellard | d3ee8c1 | 2013-08-16 01:12:06 +0000 | [diff] [blame^] | 5 | ;===------------------------------------------------------------------------===; |
| 6 | ; Global Address Space |
| 7 | ;===------------------------------------------------------------------------===; |
| 8 | |
| 9 | ; i8 store |
| 10 | ; EG-CHECK: @store_i8 |
| 11 | ; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X |
| 12 | ; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]] |
| 13 | ; IG 0: Get the byte index |
| 14 | ; EG-CHECK: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x |
| 15 | ; EG-CHECK-NEXT: 3 |
| 16 | ; IG 1: Truncate the value and calculated the shift amount for the mask |
| 17 | ; EG-CHECK: AND_INT T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.x |
| 18 | ; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.y |
| 19 | ; EG-CHECK: 255(3.573311e-43), 3 |
| 20 | ; IG 2: Shift the value and the mask |
| 21 | ; EG-CHECK: LSHL T[[RW_GPR]].X, PV.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]] |
| 22 | ; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]] |
| 23 | ; EG-CHECK-NEXT: 255 |
| 24 | ; IG 3: Initialize the Y and Z channels to zero |
| 25 | ; XXX: An optimal scheduler should merge this into one of the prevous IGs. |
| 26 | ; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0 |
| 27 | ; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0 |
| 28 | |
| 29 | ; SI-CHECK: @store_i8 |
| 30 | ; SI-CHECK: BUFFER_STORE_BYTE |
| 31 | |
| 32 | define void @store_i8(i8 addrspace(1)* %out, i8 %in) { |
| 33 | entry: |
| 34 | store i8 %in, i8 addrspace(1)* %out |
| 35 | ret void |
| 36 | } |
| 37 | |
| 38 | ; i16 store |
| 39 | ; EG-CHECK: @store_i16 |
| 40 | ; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X |
| 41 | ; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]] |
| 42 | ; IG 0: Get the byte index |
| 43 | ; EG-CHECK: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x |
| 44 | ; EG-CHECK-NEXT: 3 |
| 45 | ; IG 1: Truncate the value and calculated the shift amount for the mask |
| 46 | ; EG-CHECK: AND_INT T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.x |
| 47 | ; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.y |
| 48 | ; EG-CHECK: 65535(9.183409e-41), 3 |
| 49 | ; IG 2: Shift the value and the mask |
| 50 | ; EG-CHECK: LSHL T[[RW_GPR]].X, PV.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]] |
| 51 | ; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]] |
| 52 | ; EG-CHECK-NEXT: 65535 |
| 53 | ; IG 3: Initialize the Y and Z channels to zero |
| 54 | ; XXX: An optimal scheduler should merge this into one of the prevous IGs. |
| 55 | ; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0 |
| 56 | ; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0 |
| 57 | |
| 58 | ; SI-CHECK: @store_i16 |
| 59 | ; SI-CHECK: BUFFER_STORE_SHORT |
| 60 | define void @store_i16(i16 addrspace(1)* %out, i16 %in) { |
| 61 | entry: |
| 62 | store i16 %in, i16 addrspace(1)* %out |
| 63 | ret void |
| 64 | } |
| 65 | |
Tom Stellard | 5a6b0d8 | 2013-04-19 02:10:53 +0000 | [diff] [blame] | 66 | ; floating-point store |
| 67 | ; EG-CHECK: @store_f32 |
Tom Stellard | ac00f9d | 2013-08-16 01:11:46 +0000 | [diff] [blame] | 68 | ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1 |
Tom Stellard | 6aa0d55 | 2013-06-14 22:12:24 +0000 | [diff] [blame] | 69 | ; CM-CHECK: @store_f32 |
Tom Stellard | ac00f9d | 2013-08-16 01:11:46 +0000 | [diff] [blame] | 70 | ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}} |
Tom Stellard | 5a6b0d8 | 2013-04-19 02:10:53 +0000 | [diff] [blame] | 71 | ; SI-CHECK: @store_f32 |
Tom Stellard | 754f80f | 2013-04-05 23:31:51 +0000 | [diff] [blame] | 72 | ; SI-CHECK: BUFFER_STORE_DWORD |
| 73 | |
Tom Stellard | 5a6b0d8 | 2013-04-19 02:10:53 +0000 | [diff] [blame] | 74 | define void @store_f32(float addrspace(1)* %out, float %in) { |
Tom Stellard | 754f80f | 2013-04-05 23:31:51 +0000 | [diff] [blame] | 75 | store float %in, float addrspace(1)* %out |
| 76 | ret void |
| 77 | } |
Tom Stellard | 0125f2a | 2013-06-25 02:39:35 +0000 | [diff] [blame] | 78 | |
Tom Stellard | ed2f614 | 2013-07-18 21:43:42 +0000 | [diff] [blame] | 79 | ; vec2 floating-point stores |
| 80 | ; EG-CHECK: @store_v2f32 |
Tom Stellard | ac00f9d | 2013-08-16 01:11:46 +0000 | [diff] [blame] | 81 | ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW |
Tom Stellard | ed2f614 | 2013-07-18 21:43:42 +0000 | [diff] [blame] | 82 | ; CM-CHECK: @store_v2f32 |
Tom Stellard | ac00f9d | 2013-08-16 01:11:46 +0000 | [diff] [blame] | 83 | ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD |
Tom Stellard | ed2f614 | 2013-07-18 21:43:42 +0000 | [diff] [blame] | 84 | ; SI-CHECK: @store_v2f32 |
| 85 | ; SI-CHECK: BUFFER_STORE_DWORDX2 |
| 86 | |
| 87 | define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) { |
| 88 | entry: |
| 89 | %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0 |
Tom Stellard | 8e5da41 | 2013-08-14 23:24:32 +0000 | [diff] [blame] | 90 | %1 = insertelement <2 x float> %0, float %b, i32 1 |
Tom Stellard | ed2f614 | 2013-07-18 21:43:42 +0000 | [diff] [blame] | 91 | store <2 x float> %1, <2 x float> addrspace(1)* %out |
| 92 | ret void |
| 93 | } |
| 94 | |
Tom Stellard | 6d1379e | 2013-08-16 01:12:00 +0000 | [diff] [blame] | 95 | ; EG-CHECK: @store_v4i32 |
| 96 | ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW |
| 97 | ; EG-CHECK-NOT: MEM_RAT_CACHELESS STORE_RAW |
| 98 | ; CM-CHECK: @store_v4i32 |
| 99 | ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD |
| 100 | ; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD |
| 101 | ; SI-CHECK: @store_v4i32 |
| 102 | ; SI-CHECK: BUFFER_STORE_DWORDX4 |
| 103 | define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { |
| 104 | entry: |
| 105 | store <4 x i32> %in, <4 x i32> addrspace(1)* %out |
| 106 | ret void |
| 107 | } |
| 108 | |
Tom Stellard | 0125f2a | 2013-06-25 02:39:35 +0000 | [diff] [blame] | 109 | ; The stores in this function are combined by the optimizer to create a |
| 110 | ; 64-bit store with 32-bit alignment. This is legal for SI and the legalizer |
| 111 | ; should not try to split the 64-bit store back into 2 32-bit stores. |
| 112 | ; |
| 113 | ; Evergreen / Northern Islands don't support 64-bit stores yet, so there should |
| 114 | ; be two 32-bit stores. |
| 115 | |
| 116 | ; EG-CHECK: @vecload2 |
Tom Stellard | ac00f9d | 2013-08-16 01:11:46 +0000 | [diff] [blame] | 117 | ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW |
Tom Stellard | 0125f2a | 2013-06-25 02:39:35 +0000 | [diff] [blame] | 118 | ; CM-CHECK: @vecload2 |
Tom Stellard | ac00f9d | 2013-08-16 01:11:46 +0000 | [diff] [blame] | 119 | ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD |
Tom Stellard | 0125f2a | 2013-06-25 02:39:35 +0000 | [diff] [blame] | 120 | ; SI-CHECK: @vecload2 |
| 121 | ; SI-CHECK: BUFFER_STORE_DWORDX2 |
| 122 | define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 { |
| 123 | entry: |
| 124 | %0 = load i32 addrspace(2)* %mem, align 4, !tbaa !5 |
| 125 | %arrayidx1.i = getelementptr inbounds i32 addrspace(2)* %mem, i64 1 |
| 126 | %1 = load i32 addrspace(2)* %arrayidx1.i, align 4, !tbaa !5 |
| 127 | store i32 %0, i32 addrspace(1)* %out, align 4, !tbaa !5 |
| 128 | %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %out, i64 1 |
| 129 | store i32 %1, i32 addrspace(1)* %arrayidx1, align 4, !tbaa !5 |
| 130 | ret void |
| 131 | } |
| 132 | |
| 133 | attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } |
| 134 | |
| 135 | !5 = metadata !{metadata !"int", metadata !6} |
| 136 | !6 = metadata !{metadata !"omnipotent char", metadata !7} |
| 137 | !7 = metadata !{metadata !"Simple C/C++ TBAA"} |