blob: 617f1f19e360ae6e2550ec8287740bcc9a5691af [file] [log] [blame]
Marek Olsak2d825902017-04-28 20:21:58 +00001;RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN
2
3; GCN-LABEL: {{^}}full_mask:
4; GCN: s_mov_b64 exec, -1
5; GCN: v_add_f32_e32 v0,
6define amdgpu_ps float @full_mask(float %a, float %b) {
7main_body:
8 %s = fadd float %a, %b
9 call void @llvm.amdgcn.init.exec(i64 -1)
10 ret float %s
11}
12
13; GCN-LABEL: {{^}}partial_mask:
14; GCN: s_mov_b64 exec, 0x1e240
15; GCN: v_add_f32_e32 v0,
16define amdgpu_ps float @partial_mask(float %a, float %b) {
17main_body:
18 %s = fadd float %a, %b
19 call void @llvm.amdgcn.init.exec(i64 123456)
20 ret float %s
21}
22
23; GCN-LABEL: {{^}}input_s3off8:
24; GCN: s_bfe_u32 s0, s3, 0x70008
25; GCN: s_bfm_b64 exec, s0, 0
26; GCN: s_cmp_eq_u32 s0, 64
27; GCN: s_cmov_b64 exec, -1
28; GCN: v_add_f32_e32 v0,
29define amdgpu_ps float @input_s3off8(i32 inreg, i32 inreg, i32 inreg, i32 inreg %count, float %a, float %b) {
30main_body:
31 %s = fadd float %a, %b
32 call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8)
33 ret float %s
34}
35
36; GCN-LABEL: {{^}}input_s0off19:
37; GCN: s_bfe_u32 s0, s0, 0x70013
38; GCN: s_bfm_b64 exec, s0, 0
39; GCN: s_cmp_eq_u32 s0, 64
40; GCN: s_cmov_b64 exec, -1
41; GCN: v_add_f32_e32 v0,
42define amdgpu_ps float @input_s0off19(i32 inreg %count, float %a, float %b) {
43main_body:
44 %s = fadd float %a, %b
45 call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
46 ret float %s
47}
48
49; GCN-LABEL: {{^}}reuse_input:
50; GCN: s_bfe_u32 s1, s0, 0x70013
51; GCN: s_bfm_b64 exec, s1, 0
52; GCN: s_cmp_eq_u32 s1, 64
53; GCN: s_cmov_b64 exec, -1
54; GCN: v_add_i32_e32 v0, vcc, s0, v0
55define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) {
56main_body:
57 call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
58 %s = add i32 %a, %count
59 %f = sitofp i32 %s to float
60 ret float %f
61}
62
63; GCN-LABEL: {{^}}reuse_input2:
64; GCN: s_bfe_u32 s1, s0, 0x70013
65; GCN: s_bfm_b64 exec, s1, 0
66; GCN: s_cmp_eq_u32 s1, 64
67; GCN: s_cmov_b64 exec, -1
68; GCN: v_add_i32_e32 v0, vcc, s0, v0
69define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) {
70main_body:
71 %s = add i32 %a, %count
72 %f = sitofp i32 %s to float
73 call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
74 ret float %f
75}
76
77declare void @llvm.amdgcn.init.exec(i64) #1
78declare void @llvm.amdgcn.init.exec.from.input(i32, i32) #1
79
80attributes #1 = { convergent }