blob: 4db73750ae0cac1a1107f24cd7d38a467d8e622f [file] [log] [blame]
Farhana Aleene2dfe8a2018-05-01 21:41:12 +00001; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
3
4; GCN-LABEL: {{^}}reduction_half4:
5; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[ADD:v[0-9]+]], v{{[0-9]+}}{{$}}
6; GFX9-NEXT: v_add_f16_sdwa [[ADD]], [[ADD]], [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
7
8; VI: v_add_f16_e32
9; VI-NEXT: v_add_f16_e32
10; VI-NEXT: v_add_f16_e32
11define half @reduction_half4(<4 x half> %vec4) {
12entry:
13 %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
14 %bin.rdx = fadd fast <4 x half> %vec4, %rdx.shuf
15 %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
16 %bin.rdx2 = fadd fast <4 x half> %bin.rdx, %rdx.shuf1
17 %res = extractelement <4 x half> %bin.rdx2, i32 0
18 ret half %res
19}
20
21; GCN-LABEL: {{^}}reduction_v4i16:
22; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[ADD:v[0-9]+]], v{{[0-9]+}}{{$}}
23; GFX9-NEXT: v_add_u16_sdwa [[ADD]], [[ADD]], [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
24
25; VI: v_add_u16_e32
26; VI-NEXT: v_add_u16_e32
27; VI-NEXT: v_add_u16_e32
28define i16 @reduction_v4i16(<4 x i16> %vec4) {
29entry:
30 %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
31 %bin.rdx = add <4 x i16> %vec4, %rdx.shuf
32 %rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
33 %bin.rdx2 = add <4 x i16> %bin.rdx, %rdx.shuf1
34 %res = extractelement <4 x i16> %bin.rdx2, i32 0
35 ret i16 %res
36}
37
38; GCN-LABEL: {{^}}reduction_half8:
39; GFX9: v_pk_add_f16 [[ADD1:v[0-9]+]], [[ADD1:v[0-9]+]], v{{[0-9]+}}{{$}}
40; GFX9-NEXT: v_pk_add_f16 [[ADD:v[0-9]+]], [[ADD:v[0-9]+]], v{{[0-9]+}}{{$}}
41; GFX9-NEXT: v_pk_add_f16 [[ADD:v[0-9]+]], [[ADD]], [[ADD1]]{{$}}
42; GFX9-NEXT: v_add_f16_sdwa [[ADD]], [[ADD]], [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
43
44; VI: v_add_f16_e32
45; VI-NEXT: v_add_f16_e32
46; VI-NEXT: v_add_f16_e32
47; VI-NEXT: v_add_f16_e32
48; VI-NEXT: v_add_f16_e32
49; VI-NEXT: v_add_f16_e32
50; VI-NEXT: v_add_f16_e32
51
52define half @reduction_half8(<8 x half> %vec8) {
53entry:
54 %rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
55 %bin.rdx = fadd fast <8 x half> %vec8, %rdx.shuf
56 %rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
57 %bin.rdx2 = fadd fast <8 x half> %bin.rdx, %rdx.shuf1
58 %rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
59 %bin.rdx4 = fadd fast <8 x half> %bin.rdx2, %rdx.shuf3
60 %res = extractelement <8 x half> %bin.rdx4, i32 0
61 ret half %res
62}
63
64; GCN-LABEL: {{^}}reduction_v8i16:
65; GFX9: v_pk_add_u16 [[ADD1]], [[ADD1:v[0-9]+]], v{{[0-9]+}}{{$}}
66; GFX9-NEXT: v_pk_add_u16 [[ADD]], [[ADD]], v{{[0-9]+}}{{$}}
67; GFX9-NEXT: v_pk_add_u16 [[ADD]], [[ADD]], [[ADD1]]{{$}}
68; GFX9-NEXT: v_add_u16_sdwa [[ADD]], [[ADD]], [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
69
70; VI: v_add_u16_e32
71; VI-NEXT: v_add_u16_e32
72; VI-NEXT: v_add_u16_e32
73; VI-NEXT: v_add_u16_e32
74; VI-NEXT: v_add_u16_e32
75; VI-NEXT: v_add_u16_e32
76; VI-NEXT: v_add_u16_e32
77
78define i16 @reduction_v8i16(<8 x i16> %vec8) {
79entry:
80 %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
81 %bin.rdx = add <8 x i16> %vec8, %rdx.shuf
82 %rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
83 %bin.rdx2 = add <8 x i16> %bin.rdx, %rdx.shuf1
84 %rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
85 %bin.rdx4 = add <8 x i16> %bin.rdx2, %rdx.shuf3
86 %res = extractelement <8 x i16> %bin.rdx4, i32 0
87 ret i16 %res
88}
89
90; GCN-LABEL: {{^}}reduction_half16:
91; GFX9: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
92; GFX9-NEXT: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
93; GFX9-NEXT: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
94; GFX9: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}}
95; GFX9-NEXT: v_pk_add_f16 [[ADD1]], [[ADD1]], v{{[0-9]+}}{{$}}
96; GFX9-NEXT: v_pk_add_f16 [[ADD]], [[ADD]], v{{[0-9]+}}{{$}}
97; GFX9-NEXT: v_pk_add_f16 [[ADD]], [[ADD]], [[ADD1]]{{$}}
98; GFX9-NEXT: v_add_f16_sdwa [[ADD]], [[ADD]], [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
99
100; VI: v_add_f16_e32
101; VI-NEXT: v_add_f16_e32
102; VI-NEXT: v_add_f16_e32
103; VI-NEXT: v_add_f16_e32
104; VI-NEXT: v_add_f16_e32
105; VI-NEXT: v_add_f16_e32
106; VI-NEXT: v_add_f16_e32
107; VI-NEXT: v_add_f16_e32
108; VI-NEXT: v_add_f16_e32
109; VI-NEXT: v_add_f16_e32
110; VI-NEXT: v_add_f16_e32
111; VI-NEXT: v_add_f16_e32
112; VI-NEXT: v_add_f16_e32
113; VI-NEXT: v_add_f16_e32
114; VI-NEXT: v_add_f16_e32
115
116define half @reduction_half16(<16 x half> %vec16) {
117entry:
118 %rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
119 %bin.rdx = fadd fast <16 x half> %vec16, %rdx.shuf
120 %rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
121 %bin.rdx2 = fadd fast <16 x half> %bin.rdx, %rdx.shuf1
122 %rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
123 %bin.rdx4 = fadd fast <16 x half> %bin.rdx2, %rdx.shuf3
124 %rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
125 %bin.rdx6 = fadd fast <16 x half> %bin.rdx4, %rdx.shuf5
126 %res = extractelement <16 x half> %bin.rdx6, i32 0
127 ret half %res
128}