blob: af69a5ac2283975bcd50631758b8b14031b08696 [file] [log] [blame]
Simon Pilgrimdecfaca2017-07-03 15:01:07 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrimb5c68a62017-07-03 15:55:54 +00002; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
Simon Pilgrimdecfaca2017-07-03 15:01:07 +00004; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,+sse4a| FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6;
Simon Pilgrimb5c68a62017-07-03 15:55:54 +00007; Combine tests involving SSE4A target shuffles (EXTRQI,INSERTQI)
Simon Pilgrimdecfaca2017-07-03 15:01:07 +00008
9declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
10
11define <16 x i8> @combine_extrqi_pshufb_16i8(<16 x i8> %a0) {
Simon Pilgrimcc0f7852017-07-06 12:22:58 +000012; ALL-LABEL: combine_extrqi_pshufb_16i8:
13; ALL: # BB#0:
14; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
15; ALL-NEXT: retq
Simon Pilgrimdecfaca2017-07-03 15:01:07 +000016 %1 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
17 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
18 ret <16 x i8> %2
19}
20
21define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
Simon Pilgrimcc0f7852017-07-06 12:22:58 +000022; ALL-LABEL: combine_extrqi_pshufb_8i16:
23; ALL: # BB#0:
24; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
25; ALL-NEXT: retq
Simon Pilgrimdecfaca2017-07-03 15:01:07 +000026 %1 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
27 %2 = bitcast <8 x i16> %1 to <16 x i8>
28 %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
29 %4 = bitcast <16 x i8> %3 to <8 x i16>
30 ret <8 x i16> %4
31}
32
33define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
Simon Pilgrimb5c68a62017-07-03 15:55:54 +000034; SSSE3-LABEL: combine_insertqi_pshufb_16i8:
35; SSSE3: # BB#0:
Simon Pilgrimcc0f7852017-07-06 12:22:58 +000036; SSSE3-NEXT: extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
37; SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrimb5c68a62017-07-03 15:55:54 +000038; SSSE3-NEXT: retq
39;
40; SSE42-LABEL: combine_insertqi_pshufb_16i8:
41; SSE42: # BB#0:
Simon Pilgrimfa6e6752017-07-03 20:58:16 +000042; SSE42-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
Simon Pilgrimb5c68a62017-07-03 15:55:54 +000043; SSE42-NEXT: retq
Simon Pilgrimdecfaca2017-07-03 15:01:07 +000044;
45; AVX-LABEL: combine_insertqi_pshufb_16i8:
46; AVX: # BB#0:
Simon Pilgrimfa6e6752017-07-03 20:58:16 +000047; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
Simon Pilgrimdecfaca2017-07-03 15:01:07 +000048; AVX-NEXT: retq
49 %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
50 %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
51 ret <16 x i8> %2
52}
53
54define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) {
Simon Pilgrimb5c68a62017-07-03 15:55:54 +000055; SSSE3-LABEL: combine_insertqi_pshufb_8i16:
56; SSSE3: # BB#0:
Simon Pilgrimcc0f7852017-07-06 12:22:58 +000057; SSSE3-NEXT: extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
58; SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrimb5c68a62017-07-03 15:55:54 +000059; SSSE3-NEXT: retq
60;
61; SSE42-LABEL: combine_insertqi_pshufb_8i16:
62; SSE42: # BB#0:
Simon Pilgrimac3e7f32017-07-04 18:11:02 +000063; SSE42-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
Simon Pilgrimb5c68a62017-07-03 15:55:54 +000064; SSE42-NEXT: retq
Simon Pilgrimdecfaca2017-07-03 15:01:07 +000065;
66; AVX-LABEL: combine_insertqi_pshufb_8i16:
67; AVX: # BB#0:
Simon Pilgrimac3e7f32017-07-04 18:11:02 +000068; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
Simon Pilgrimdecfaca2017-07-03 15:01:07 +000069; AVX-NEXT: retq
70 %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
71 %2 = bitcast <8 x i16> %1 to <16 x i8>
72 %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
73 %4 = bitcast <16 x i8> %3 to <8 x i16>
74 ret <8 x i16> %4
75}
Simon Pilgrim03641df2017-07-06 14:52:24 +000076
77define <16 x i8> @combine_pshufb_insertqi_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
Simon Pilgrim71360072017-07-06 15:34:17 +000078; ALL-LABEL: combine_pshufb_insertqi_pshufb:
79; ALL: # BB#0:
80; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
81; ALL-NEXT: retq
Simon Pilgrim03641df2017-07-06 14:52:24 +000082 %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
83 %2 = shufflevector <16 x i8> %1, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 17, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
84 %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 7, i8 1, i8 2, i8 4, i8 3, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
85 ret <16 x i8> %3
86}