blob: e29e67f77d2e7c393c0df00885bb034b373dbe84 [file] [log] [blame]
Stanislav Mekhanoshin64196852019-05-10 00:09:01 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GFX9 %s
3; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s
4
5; ===================================================================================
6; V_XOR3_B32
7; ===================================================================================
8
9define amdgpu_ps float @xor3(i32 %a, i32 %b, i32 %c) {
10; GFX9-LABEL: xor3:
11; GFX9: ; %bb.0:
12; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1
13; GFX9-NEXT: v_xor_b32_e32 v0, v0, v2
14; GFX9-NEXT: ; return to shader part epilog
15;
16; GFX10-LABEL: xor3:
17; GFX10: ; %bb.0:
18; GFX10-NEXT: v_xor3_b32 v0, v0, v1, v2
Stanislav Mekhanoshin0846c122019-06-20 15:08:34 +000019; GFX10-NEXT: ; implicit-def: $vcc_hi
Stanislav Mekhanoshin64196852019-05-10 00:09:01 +000020; GFX10-NEXT: ; return to shader part epilog
21 %x = xor i32 %a, %b
22 %result = xor i32 %x, %c
23 %bc = bitcast i32 %result to float
24 ret float %bc
25}
26
27define amdgpu_ps float @xor3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) {
28; GFX9-LABEL: xor3_vgpr_b:
29; GFX9: ; %bb.0:
30; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0
31; GFX9-NEXT: v_xor_b32_e32 v0, s3, v0
32; GFX9-NEXT: ; return to shader part epilog
33;
34; GFX10-LABEL: xor3_vgpr_b:
35; GFX10: ; %bb.0:
36; GFX10-NEXT: v_xor3_b32 v0, s2, v0, s3
Stanislav Mekhanoshin0846c122019-06-20 15:08:34 +000037; GFX10-NEXT: ; implicit-def: $vcc_hi
Stanislav Mekhanoshin64196852019-05-10 00:09:01 +000038; GFX10-NEXT: ; return to shader part epilog
39 %x = xor i32 %a, %b
40 %result = xor i32 %x, %c
41 %bc = bitcast i32 %result to float
42 ret float %bc
43}
44
45define amdgpu_ps float @xor3_vgpr_all2(i32 %a, i32 %b, i32 %c) {
46; GFX9-LABEL: xor3_vgpr_all2:
47; GFX9: ; %bb.0:
48; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2
49; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1
50; GFX9-NEXT: ; return to shader part epilog
51;
52; GFX10-LABEL: xor3_vgpr_all2:
53; GFX10: ; %bb.0:
54; GFX10-NEXT: v_xor3_b32 v0, v1, v2, v0
Stanislav Mekhanoshin0846c122019-06-20 15:08:34 +000055; GFX10-NEXT: ; implicit-def: $vcc_hi
Stanislav Mekhanoshin64196852019-05-10 00:09:01 +000056; GFX10-NEXT: ; return to shader part epilog
57 %x = xor i32 %b, %c
58 %result = xor i32 %a, %x
59 %bc = bitcast i32 %result to float
60 ret float %bc
61}
62
63define amdgpu_ps float @xor3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) {
64; GFX9-LABEL: xor3_vgpr_bc:
65; GFX9: ; %bb.0:
66; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0
67; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1
68; GFX9-NEXT: ; return to shader part epilog
69;
70; GFX10-LABEL: xor3_vgpr_bc:
71; GFX10: ; %bb.0:
72; GFX10-NEXT: v_xor3_b32 v0, s2, v0, v1
Stanislav Mekhanoshin0846c122019-06-20 15:08:34 +000073; GFX10-NEXT: ; implicit-def: $vcc_hi
Stanislav Mekhanoshin64196852019-05-10 00:09:01 +000074; GFX10-NEXT: ; return to shader part epilog
75 %x = xor i32 %a, %b
76 %result = xor i32 %x, %c
77 %bc = bitcast i32 %result to float
78 ret float %bc
79}
80
81define amdgpu_ps float @xor3_vgpr_const(i32 %a, i32 %b) {
82; GFX9-LABEL: xor3_vgpr_const:
83; GFX9: ; %bb.0:
84; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1
85; GFX9-NEXT: v_xor_b32_e32 v0, 16, v0
86; GFX9-NEXT: ; return to shader part epilog
87;
88; GFX10-LABEL: xor3_vgpr_const:
89; GFX10: ; %bb.0:
90; GFX10-NEXT: v_xor3_b32 v0, v0, v1, 16
Stanislav Mekhanoshin0846c122019-06-20 15:08:34 +000091; GFX10-NEXT: ; implicit-def: $vcc_hi
Stanislav Mekhanoshin64196852019-05-10 00:09:01 +000092; GFX10-NEXT: ; return to shader part epilog
93 %x = xor i32 %a, %b
94 %result = xor i32 %x, 16
95 %bc = bitcast i32 %result to float
96 ret float %bc
97}
98
99define amdgpu_ps <2 x float> @xor3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x) {
100; GFX9-LABEL: xor3_multiuse_outer:
101; GFX9: ; %bb.0:
102; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1
103; GFX9-NEXT: v_xor_b32_e32 v0, v0, v2
104; GFX9-NEXT: v_mul_lo_u32 v1, v0, v3
105; GFX9-NEXT: ; return to shader part epilog
106;
107; GFX10-LABEL: xor3_multiuse_outer:
108; GFX10: ; %bb.0:
109; GFX10-NEXT: v_xor3_b32 v0, v0, v1, v2
Stanislav Mekhanoshin0846c122019-06-20 15:08:34 +0000110; GFX10-NEXT: ; implicit-def: $vcc_hi
Stanislav Mekhanoshin64196852019-05-10 00:09:01 +0000111; GFX10-NEXT: v_mul_lo_u32 v1, v0, v3
112; GFX10-NEXT: ; return to shader part epilog
113 %inner = xor i32 %a, %b
114 %outer = xor i32 %inner, %c
115 %x1 = mul i32 %outer, %x
116 %r1 = insertelement <2 x i32> undef, i32 %outer, i32 0
117 %r0 = insertelement <2 x i32> %r1, i32 %x1, i32 1
118 %bc = bitcast <2 x i32> %r0 to <2 x float>
119 ret <2 x float> %bc
120}
121
122define amdgpu_ps <2 x float> @xor3_multiuse_inner(i32 %a, i32 %b, i32 %c) {
123; GFX9-LABEL: xor3_multiuse_inner:
124; GFX9: ; %bb.0:
125; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1
126; GFX9-NEXT: v_xor_b32_e32 v1, v0, v2
127; GFX9-NEXT: ; return to shader part epilog
128;
129; GFX10-LABEL: xor3_multiuse_inner:
130; GFX10: ; %bb.0:
131; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1
Stanislav Mekhanoshin0846c122019-06-20 15:08:34 +0000132; GFX10-NEXT: ; implicit-def: $vcc_hi
Stanislav Mekhanoshin64196852019-05-10 00:09:01 +0000133; GFX10-NEXT: v_xor_b32_e32 v1, v0, v2
134; GFX10-NEXT: ; return to shader part epilog
135 %inner = xor i32 %a, %b
136 %outer = xor i32 %inner, %c
137 %r1 = insertelement <2 x i32> undef, i32 %inner, i32 0
138 %r0 = insertelement <2 x i32> %r1, i32 %outer, i32 1
139 %bc = bitcast <2 x i32> %r0 to <2 x float>
140 ret <2 x float> %bc
141}
142
143; A case where uniform values end up in VGPRs -- we could use v_xor3_b32 here,
144; but we don't.
145define amdgpu_ps float @xor3_uniform_vgpr(float inreg %a, float inreg %b, float inreg %c) {
146; GFX9-LABEL: xor3_uniform_vgpr:
147; GFX9: ; %bb.0:
148; GFX9-NEXT: v_mov_b32_e32 v2, 0x40400000
149; GFX9-NEXT: v_add_f32_e64 v0, s2, 1.0
150; GFX9-NEXT: v_add_f32_e64 v1, s3, 2.0
151; GFX9-NEXT: v_add_f32_e32 v2, s4, v2
152; GFX9-NEXT: v_xor_b32_e32 v0, v0, v1
153; GFX9-NEXT: v_xor_b32_e32 v0, v0, v2
154; GFX9-NEXT: ; return to shader part epilog
155;
156; GFX10-LABEL: xor3_uniform_vgpr:
157; GFX10: ; %bb.0:
158; GFX10-NEXT: v_add_f32_e64 v1, s3, 2.0
159; GFX10-NEXT: v_add_f32_e64 v2, s2, 1.0
160; GFX10-NEXT: v_add_f32_e64 v0, 0x40400000, s4
Stanislav Mekhanoshin0846c122019-06-20 15:08:34 +0000161; GFX10-NEXT: ; implicit-def: $vcc_hi
Stanislav Mekhanoshin64196852019-05-10 00:09:01 +0000162; GFX10-NEXT: v_xor_b32_e32 v1, v2, v1
163; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0
164; GFX10-NEXT: ; return to shader part epilog
165 %a1 = fadd float %a, 1.0
166 %b2 = fadd float %b, 2.0
167 %c3 = fadd float %c, 3.0
168 %bc.a = bitcast float %a1 to i32
169 %bc.b = bitcast float %b2 to i32
170 %bc.c = bitcast float %c3 to i32
171 %x = xor i32 %bc.a, %bc.b
172 %result = xor i32 %x, %bc.c
173 %bc = bitcast i32 %result to float
174 ret float %bc
175}