blob: b3ccb961c55061414c0b7dd91ba52b89bd9cbb01 [file] [log] [blame]
Craig Topperdc957d42018-11-16 06:15:20 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
7
8; Test multiplies of various narrow types.
9
10define <2 x i8> @mul_v2i8(<2 x i8> %x, <2 x i8> %y) {
11; SSE2-LABEL: mul_v2i8:
12; SSE2: # %bb.0:
Craig Topperdc957d42018-11-16 06:15:20 +000013; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
14; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
15; SSE2-NEXT: pmullw %xmm1, %xmm0
Craig Topper079c37d2018-11-16 06:15:21 +000016; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
17; SSE2-NEXT: packuswb %xmm0, %xmm0
Craig Topperdc957d42018-11-16 06:15:20 +000018; SSE2-NEXT: retq
19;
20; SSE41-LABEL: mul_v2i8:
21; SSE41: # %bb.0:
Craig Topper079c37d2018-11-16 06:15:21 +000022; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
23; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
Craig Topperdc957d42018-11-16 06:15:20 +000024; SSE41-NEXT: pmullw %xmm1, %xmm0
Craig Topper079c37d2018-11-16 06:15:21 +000025; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
Craig Topperdc957d42018-11-16 06:15:20 +000026; SSE41-NEXT: retq
27;
Craig Topper079c37d2018-11-16 06:15:21 +000028; AVX-LABEL: mul_v2i8:
29; AVX: # %bb.0:
30; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
31; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
32; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
33; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
34; AVX-NEXT: retq
Craig Topperdc957d42018-11-16 06:15:20 +000035 %res = mul <2 x i8> %x, %y
36 ret <2 x i8> %res
37}
38
39define <4 x i8> @mul_v4i8(<4 x i8> %x, <4 x i8> %y) {
40; SSE2-LABEL: mul_v4i8:
41; SSE2: # %bb.0:
Craig Topperdc957d42018-11-16 06:15:20 +000042; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
43; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
44; SSE2-NEXT: pmullw %xmm1, %xmm0
Craig Topper079c37d2018-11-16 06:15:21 +000045; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
46; SSE2-NEXT: packuswb %xmm0, %xmm0
Craig Topperdc957d42018-11-16 06:15:20 +000047; SSE2-NEXT: retq
48;
49; SSE41-LABEL: mul_v4i8:
50; SSE41: # %bb.0:
Craig Topper079c37d2018-11-16 06:15:21 +000051; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
52; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
Craig Topperdc957d42018-11-16 06:15:20 +000053; SSE41-NEXT: pmullw %xmm1, %xmm0
Craig Topper079c37d2018-11-16 06:15:21 +000054; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
Craig Topperdc957d42018-11-16 06:15:20 +000055; SSE41-NEXT: retq
56;
Craig Topper079c37d2018-11-16 06:15:21 +000057; AVX-LABEL: mul_v4i8:
58; AVX: # %bb.0:
59; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
60; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
61; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
62; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
63; AVX-NEXT: retq
Craig Topperdc957d42018-11-16 06:15:20 +000064 %res = mul <4 x i8> %x, %y
65 ret <4 x i8> %res
66}
67
68define <8 x i8> @mul_v8i8(<8 x i8> %x, <8 x i8> %y) {
69; SSE2-LABEL: mul_v8i8:
70; SSE2: # %bb.0:
Craig Topperdc957d42018-11-16 06:15:20 +000071; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
72; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
73; SSE2-NEXT: pmullw %xmm1, %xmm0
Craig Topper079c37d2018-11-16 06:15:21 +000074; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
75; SSE2-NEXT: packuswb %xmm0, %xmm0
Craig Topperdc957d42018-11-16 06:15:20 +000076; SSE2-NEXT: retq
77;
78; SSE41-LABEL: mul_v8i8:
79; SSE41: # %bb.0:
Craig Topper079c37d2018-11-16 06:15:21 +000080; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
81; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
Craig Topperdc957d42018-11-16 06:15:20 +000082; SSE41-NEXT: pmullw %xmm1, %xmm0
Craig Topper079c37d2018-11-16 06:15:21 +000083; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
Craig Topperdc957d42018-11-16 06:15:20 +000084; SSE41-NEXT: retq
85;
Craig Topper079c37d2018-11-16 06:15:21 +000086; AVX-LABEL: mul_v8i8:
87; AVX: # %bb.0:
88; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
89; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
90; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
91; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
92; AVX-NEXT: retq
Craig Topperdc957d42018-11-16 06:15:20 +000093 %res = mul <8 x i8> %x, %y
94 ret <8 x i8> %res
95}
96
97define <2 x i16> @mul_v2i16(<2 x i16> %x, <2 x i16> %y) {
98; SSE-LABEL: mul_v2i16:
99; SSE: # %bb.0:
100; SSE-NEXT: pmullw %xmm1, %xmm0
101; SSE-NEXT: retq
102;
103; AVX-LABEL: mul_v2i16:
104; AVX: # %bb.0:
105; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
106; AVX-NEXT: retq
107 %res = mul <2 x i16> %x, %y
108 ret <2 x i16> %res
109}
110
111define <4 x i16> @mul_v4i16(<4 x i16> %x, <4 x i16> %y) {
112; SSE-LABEL: mul_v4i16:
113; SSE: # %bb.0:
114; SSE-NEXT: pmullw %xmm1, %xmm0
115; SSE-NEXT: retq
116;
117; AVX-LABEL: mul_v4i16:
118; AVX: # %bb.0:
119; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
120; AVX-NEXT: retq
121 %res = mul <4 x i16> %x, %y
122 ret <4 x i16> %res
123}
124
125define <2 x i32> @mul_v2i32(<2 x i32> %x, <2 x i32> %y) {
126; SSE2-LABEL: mul_v2i32:
127; SSE2: # %bb.0:
128; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
129; SSE2-NEXT: pmuludq %xmm1, %xmm0
130; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
131; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
132; SSE2-NEXT: pmuludq %xmm2, %xmm1
133; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
134; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
135; SSE2-NEXT: retq
136;
137; SSE41-LABEL: mul_v2i32:
138; SSE41: # %bb.0:
139; SSE41-NEXT: pmulld %xmm1, %xmm0
140; SSE41-NEXT: retq
141;
142; AVX-LABEL: mul_v2i32:
143; AVX: # %bb.0:
144; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0
145; AVX-NEXT: retq
146 %res = mul <2 x i32> %x, %y
147 ret <2 x i32> %res
148}