| Ahmed Bougacha | 671795a | 2016-03-03 16:53:50 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s |
| 3 | |
| 4 | declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) |
| 5 | declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) |
| Simon Pilgrim | bec6543 | 2016-07-05 20:11:29 +0000 | [diff] [blame] | 6 | declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) |
| Simon Pilgrim | 21b2c56 | 2016-05-02 19:46:58 +0000 | [diff] [blame] | 7 | declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) |
| Ahmed Bougacha | 671795a | 2016-03-03 16:53:50 +0000 | [diff] [blame] | 8 | |
| Simon Pilgrim | 8dd73e3 | 2016-06-11 13:18:21 +0000 | [diff] [blame] | 9 | define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) { |
| 10 | ; CHECK-LABEL: combine_pshufb_pslldq: |
| 11 | ; CHECK: # BB#0: |
| Simon Pilgrim | 6800a45 | 2016-06-11 13:38:28 +0000 | [diff] [blame] | 12 | ; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 |
| Simon Pilgrim | 8dd73e3 | 2016-06-11 13:18:21 +0000 | [diff] [blame] | 13 | ; CHECK-NEXT: retq |
| 14 | %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>) |
| 15 | %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> |
| 16 | ret <32 x i8> %2 |
| 17 | } |
| 18 | |
| 19 | define <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) { |
| 20 | ; CHECK-LABEL: combine_pshufb_psrldq: |
| 21 | ; CHECK: # BB#0: |
| Simon Pilgrim | 6800a45 | 2016-06-11 13:38:28 +0000 | [diff] [blame] | 22 | ; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 |
| Simon Pilgrim | 8dd73e3 | 2016-06-11 13:18:21 +0000 | [diff] [blame] | 23 | ; CHECK-NEXT: retq |
| 24 | %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>) |
| 25 | %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> |
| 26 | ret <32 x i8> %2 |
| 27 | } |
| 28 | |
| Ahmed Bougacha | 671795a | 2016-03-03 16:53:50 +0000 | [diff] [blame] | 29 | define <32 x i8> @combine_pshufb_vpermd(<8 x i32> %a) { |
| 30 | ; CHECK-LABEL: combine_pshufb_vpermd: |
| 31 | ; CHECK: # BB#0: |
| 32 | ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18] |
| 33 | ; CHECK-NEXT: retq |
| 34 | %tmp0 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>) |
| 35 | %tmp1 = bitcast <8 x i32> %tmp0 to <32 x i8> |
| 36 | %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30> |
| 37 | ret <32 x i8> %tmp2 |
| 38 | } |
| 39 | |
| 40 | define <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) { |
| 41 | ; CHECK-LABEL: combine_pshufb_vpermps: |
| 42 | ; CHECK: # BB#0: |
| 43 | ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18] |
| 44 | ; CHECK-NEXT: retq |
| 45 | %tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>) |
| 46 | %tmp1 = bitcast <8 x float> %tmp0 to <32 x i8> |
| 47 | %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30> |
| 48 | ret <32 x i8> %tmp2 |
| 49 | } |
| Simon Pilgrim | 21b2c56 | 2016-05-02 19:46:58 +0000 | [diff] [blame] | 50 | |
| Simon Pilgrim | 6fa71da | 2016-07-12 20:27:32 +0000 | [diff] [blame] | 51 | define <4 x i64> @combine_permq_pshufb_as_vperm2i128(<4 x i64> %a0) { |
| 52 | ; CHECK-LABEL: combine_permq_pshufb_as_vperm2i128: |
| Simon Pilgrim | 21b2c56 | 2016-05-02 19:46:58 +0000 | [diff] [blame] | 53 | ; CHECK: # BB#0: |
| Simon Pilgrim | 6fa71da | 2016-07-12 20:27:32 +0000 | [diff] [blame] | 54 | ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero |
| 55 | ; CHECK-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0 |
| Simon Pilgrim | 21b2c56 | 2016-05-02 19:46:58 +0000 | [diff] [blame] | 56 | ; CHECK-NEXT: retq |
| 57 | %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| 58 | %2 = bitcast <4 x i64> %1 to <32 x i8> |
| Simon Pilgrim | 6fa71da | 2016-07-12 20:27:32 +0000 | [diff] [blame] | 59 | %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>) |
| Simon Pilgrim | 21b2c56 | 2016-05-02 19:46:58 +0000 | [diff] [blame] | 60 | %4 = bitcast <32 x i8> %3 to <4 x i64> |
| Simon Pilgrim | 6fa71da | 2016-07-12 20:27:32 +0000 | [diff] [blame] | 61 | %5 = add <4 x i64> %4, <i64 1, i64 1, i64 3, i64 3> |
| 62 | ret <4 x i64> %5 |
| Simon Pilgrim | 21b2c56 | 2016-05-02 19:46:58 +0000 | [diff] [blame] | 63 | } |
| Simon Pilgrim | bec6543 | 2016-07-05 20:11:29 +0000 | [diff] [blame] | 64 | |
| Simon Pilgrim | 700e4a1 | 2016-07-14 12:21:40 +0000 | [diff] [blame] | 65 | define <32 x i8> @combine_permq_pshufb_as_vpblendd(<4 x i64> %a0) { |
| 66 | ; CHECK-LABEL: combine_permq_pshufb_as_vpblendd: |
| 67 | ; CHECK: # BB#0: |
| Simon Pilgrim | 053d329 | 2016-07-14 12:58:04 +0000 | [diff] [blame] | 68 | ; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 |
| 69 | ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] |
| Simon Pilgrim | 700e4a1 | 2016-07-14 12:21:40 +0000 | [diff] [blame] | 70 | ; CHECK-NEXT: retq |
| 71 | %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> |
| 72 | %2 = bitcast <4 x i64> %1 to <32 x i8> |
| 73 | %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>) |
| 74 | ret <32 x i8> %3 |
| 75 | } |
| 76 | |
| Simon Pilgrim | bec6543 | 2016-07-05 20:11:29 +0000 | [diff] [blame] | 77 | define <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) { |
| 78 | ; CHECK-LABEL: combine_pshufb_as_vpbroadcastb128: |
| 79 | ; CHECK: # BB#0: |
| 80 | ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 |
| 81 | ; CHECK-NEXT: retq |
| 82 | %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> zeroinitializer) |
| 83 | ret <16 x i8> %1 |
| 84 | } |
| 85 | |
| 86 | define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) { |
| 87 | ; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256: |
| 88 | ; CHECK: # BB#0: |
| Matthias Braun | 152e7c8 | 2016-07-09 00:19:07 +0000 | [diff] [blame] | 89 | ; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> |
| Simon Pilgrim | bec6543 | 2016-07-05 20:11:29 +0000 | [diff] [blame] | 90 | ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 |
| 91 | ; CHECK-NEXT: retq |
| 92 | %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> |
| 93 | %2 = bitcast <4 x i64> %1 to <32 x i8> |
| 94 | %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> zeroinitializer) |
| 95 | %4 = bitcast <32 x i8> %3 to <8 x i32> |
| 96 | %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer) |
| 97 | %6 = bitcast <8 x i32> %5 to <32 x i8> |
| 98 | ret <32 x i8> %6 |
| 99 | } |
| 100 | |
| 101 | define <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) { |
| 102 | ; CHECK-LABEL: combine_pshufb_as_vpbroadcastw128: |
| 103 | ; CHECK: # BB#0: |
| 104 | ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 |
| 105 | ; CHECK-NEXT: retq |
| 106 | %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>) |
| 107 | ret <16 x i8> %1 |
| 108 | } |
| 109 | |
| 110 | define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) { |
| 111 | ; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256: |
| 112 | ; CHECK: # BB#0: |
| Matthias Braun | 152e7c8 | 2016-07-09 00:19:07 +0000 | [diff] [blame] | 113 | ; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> |
| Simon Pilgrim | bec6543 | 2016-07-05 20:11:29 +0000 | [diff] [blame] | 114 | ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 |
| 115 | ; CHECK-NEXT: retq |
| 116 | %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> |
| 117 | %2 = bitcast <4 x i64> %1 to <32 x i8> |
| 118 | %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>) |
| 119 | %4 = bitcast <32 x i8> %3 to <8 x i32> |
| 120 | %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer) |
| 121 | %6 = bitcast <8 x i32> %5 to <32 x i8> |
| 122 | ret <32 x i8> %6 |
| 123 | } |
| 124 | |
| 125 | define <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) { |
| 126 | ; CHECK-LABEL: combine_pshufb_as_vpbroadcastd128: |
| 127 | ; CHECK: # BB#0: |
| 128 | ; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0 |
| 129 | ; CHECK-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 |
| 130 | ; CHECK-NEXT: retq |
| 131 | %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>) |
| 132 | %2 = add <16 x i8> %1, <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3> |
| 133 | ret <16 x i8> %2 |
| 134 | } |
| 135 | |
| 136 | define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) { |
| 137 | ; CHECK-LABEL: combine_permd_as_vpbroadcastd256: |
| 138 | ; CHECK: # BB#0: |
| Matthias Braun | 152e7c8 | 2016-07-09 00:19:07 +0000 | [diff] [blame] | 139 | ; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> |
| Simon Pilgrim | bec6543 | 2016-07-05 20:11:29 +0000 | [diff] [blame] | 140 | ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0 |
| 141 | ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 |
| 142 | ; CHECK-NEXT: retq |
| 143 | %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> |
| 144 | %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> zeroinitializer) |
| 145 | %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
| 146 | ret <8 x i32> %3 |
| 147 | } |
| 148 | |
| 149 | define <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) { |
| 150 | ; CHECK-LABEL: combine_pshufb_as_vpbroadcastq128: |
| 151 | ; CHECK: # BB#0: |
| 152 | ; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 |
| 153 | ; CHECK-NEXT: retq |
| 154 | %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>) |
| 155 | ret <16 x i8> %1 |
| 156 | } |
| 157 | |
| 158 | define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) { |
| 159 | ; CHECK-LABEL: combine_permd_as_vpbroadcastq256: |
| 160 | ; CHECK: # BB#0: |
| Matthias Braun | 152e7c8 | 2016-07-09 00:19:07 +0000 | [diff] [blame] | 161 | ; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> |
| Simon Pilgrim | bec6543 | 2016-07-05 20:11:29 +0000 | [diff] [blame] | 162 | ; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 |
| 163 | ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 |
| 164 | ; CHECK-NEXT: retq |
| 165 | %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> |
| 166 | %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>) |
| 167 | %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
| 168 | ret <8 x i32> %3 |
| 169 | } |
| 170 | |
| 171 | define <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) { |
| 172 | ; CHECK-LABEL: combine_pshufb_as_vpbroadcastss128: |
| 173 | ; CHECK: # BB#0: |
| 174 | ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 |
| 175 | ; CHECK-NEXT: retq |
| 176 | %1 = bitcast <4 x float> %a to <16 x i8> |
| 177 | %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>) |
| 178 | %3 = bitcast <16 x i8> %2 to <4 x float> |
| 179 | ret <4 x float> %3 |
| 180 | } |
| 181 | |
| 182 | define <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) { |
| 183 | ; CHECK-LABEL: combine_permd_as_vpbroadcastss256: |
| 184 | ; CHECK: # BB#0: |
| Matthias Braun | 152e7c8 | 2016-07-09 00:19:07 +0000 | [diff] [blame] | 185 | ; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> |
| Simon Pilgrim | bec6543 | 2016-07-05 20:11:29 +0000 | [diff] [blame] | 186 | ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 |
| 187 | ; CHECK-NEXT: retq |
| 188 | %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> |
| 189 | %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer) |
| 190 | ret <8 x float> %2 |
| 191 | } |
| 192 | |
| 193 | define <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) { |
| 194 | ; CHECK-LABEL: combine_permd_as_vpbroadcastsd256: |
| 195 | ; CHECK: # BB#0: |
| Matthias Braun | 152e7c8 | 2016-07-09 00:19:07 +0000 | [diff] [blame] | 196 | ; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> |
| Simon Pilgrim | bec6543 | 2016-07-05 20:11:29 +0000 | [diff] [blame] | 197 | ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 |
| 198 | ; CHECK-NEXT: retq |
| 199 | %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> |
| 200 | %2 = bitcast <4 x double> %1 to <8 x float> |
| 201 | %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>) |
| 202 | %4 = bitcast <8 x float> %3 to <4 x double> |
| 203 | ret <4 x double> %4 |
| 204 | } |
| Simon Pilgrim | 118da63 | 2016-07-06 15:09:48 +0000 | [diff] [blame] | 205 | |
| Simon Pilgrim | 4ac7420 | 2016-07-18 16:17:34 +0000 | [diff] [blame] | 206 | define <16 x i8> @combine_vpbroadcast_pshufb_as_vpbroadcastb128(<16 x i8> %a) { |
| 207 | ; CHECK-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastb128: |
| 208 | ; CHECK: # BB#0: |
| 209 | ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 |
| Simon Pilgrim | 4ac7420 | 2016-07-18 16:17:34 +0000 | [diff] [blame] | 210 | ; CHECK-NEXT: retq |
| 211 | %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer |
| 212 | %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> zeroinitializer) |
| 213 | ret <16 x i8> %2 |
| 214 | } |
| 215 | |
| 216 | define <32 x i8> @combine_vpbroadcast_pshufb_as_vpbroadcastb256(<32 x i8> %a) { |
| 217 | ; CHECK-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastb256: |
| 218 | ; CHECK: # BB#0: |
| 219 | ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 |
| Simon Pilgrim | 4ac7420 | 2016-07-18 16:17:34 +0000 | [diff] [blame] | 220 | ; CHECK-NEXT: retq |
| 221 | %1 = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer |
| 222 | %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> zeroinitializer) |
| 223 | ret <32 x i8> %2 |
| 224 | } |
| 225 | |
| 226 | define <4 x float> @combine_vpbroadcast_pshufb_as_vpbroadcastss128(<4 x float> %a) { |
| 227 | ; CHECK-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastss128: |
| 228 | ; CHECK: # BB#0: |
| 229 | ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 |
| Simon Pilgrim | 4ac7420 | 2016-07-18 16:17:34 +0000 | [diff] [blame] | 230 | ; CHECK-NEXT: retq |
| 231 | %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer |
| 232 | %2 = bitcast <4 x float> %1 to <16 x i8> |
| 233 | %3 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>) |
| 234 | %4 = bitcast <16 x i8> %3 to <4 x float> |
| 235 | ret <4 x float> %4 |
| 236 | } |
| 237 | |
| 238 | define <8 x float> @combine_vpbroadcast_permd_as_vpbroadcastss256(<4 x float> %a) { |
| 239 | ; CHECK-LABEL: combine_vpbroadcast_permd_as_vpbroadcastss256: |
| 240 | ; CHECK: # BB#0: |
| 241 | ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 |
| 242 | ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 |
| 243 | ; CHECK-NEXT: retq |
| 244 | %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> zeroinitializer |
| 245 | %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer) |
| 246 | ret <8 x float> %2 |
| 247 | } |
| 248 | |
| 249 | define <4 x double> @combine_vpbroadcast_permd_as_vpbroadcastsd256(<2 x double> %a) { |
| 250 | ; CHECK-LABEL: combine_vpbroadcast_permd_as_vpbroadcastsd256: |
| 251 | ; CHECK: # BB#0: |
| 252 | ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 |
| 253 | ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 |
| 254 | ; CHECK-NEXT: retq |
| 255 | %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> zeroinitializer |
| 256 | %2 = bitcast <4 x double> %1 to <8 x float> |
| 257 | %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>) |
| 258 | %4 = bitcast <8 x float> %3 to <4 x double> |
| 259 | ret <4 x double> %4 |
| 260 | } |
| 261 | |
| Simon Pilgrim | 950419f | 2016-07-08 19:23:29 +0000 | [diff] [blame] | 262 | define <8 x i32> @combine_permd_as_permq(<8 x i32> %a) { |
| 263 | ; CHECK-LABEL: combine_permd_as_permq: |
| 264 | ; CHECK: # BB#0: |
| 265 | ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1] |
| 266 | ; CHECK-NEXT: retq |
| 267 | %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>) |
| 268 | ret <8 x i32> %1 |
| 269 | } |
| 270 | |
| 271 | define <8 x float> @combine_permps_as_permpd(<8 x float> %a) { |
| 272 | ; CHECK-LABEL: combine_permps_as_permpd: |
| 273 | ; CHECK: # BB#0: |
| 274 | ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1] |
| 275 | ; CHECK-NEXT: retq |
| 276 | %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>) |
| 277 | ret <8 x float> %1 |
| 278 | } |
| 279 | |
| Simon Pilgrim | d7a3782 | 2016-08-19 17:02:00 +0000 | [diff] [blame^] | 280 | define <4 x double> @combine_pshufb_as_vzmovl_64(<4 x double> %a0) { |
| 281 | ; CHECK-LABEL: combine_pshufb_as_vzmovl_64: |
| 282 | ; CHECK: # BB#0: |
| 283 | ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 |
| 284 | ; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] |
| 285 | ; CHECK-NEXT: retq |
| 286 | %1 = bitcast <4 x double> %a0 to <32 x i8> |
| 287 | %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) |
| 288 | %3 = bitcast <32 x i8> %2 to <4 x double> |
| 289 | ret <4 x double> %3 |
| 290 | } |
| 291 | |
| 292 | define <8 x float> @combine_pshufb_as_vzmovl_32(<8 x float> %a0) { |
| 293 | ; CHECK-LABEL: combine_pshufb_as_vzmovl_32: |
| 294 | ; CHECK: # BB#0: |
| 295 | ; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1 |
| 296 | ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] |
| 297 | ; CHECK-NEXT: retq |
| 298 | %1 = bitcast <8 x float> %a0 to <32 x i8> |
| 299 | %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) |
| 300 | %3 = bitcast <32 x i8> %2 to <8 x float> |
| 301 | ret <8 x float> %3 |
| 302 | } |
| 303 | |
| Simon Pilgrim | 118da63 | 2016-07-06 15:09:48 +0000 | [diff] [blame] | 304 | define <32 x i8> @combine_pshufb_as_pslldq(<32 x i8> %a0) { |
| 305 | ; CHECK-LABEL: combine_pshufb_as_pslldq: |
| 306 | ; CHECK: # BB#0: |
| Simon Pilgrim | 687d71e | 2016-08-12 11:24:34 +0000 | [diff] [blame] | 307 | ; CHECK-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21] |
| Simon Pilgrim | 118da63 | 2016-07-06 15:09:48 +0000 | [diff] [blame] | 308 | ; CHECK-NEXT: retq |
| 309 | %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>) |
| 310 | ret <32 x i8> %res0 |
| 311 | } |
| 312 | |
| 313 | define <32 x i8> @combine_pshufb_as_psrldq(<32 x i8> %a0) { |
| 314 | ; CHECK-LABEL: combine_pshufb_as_psrldq: |
| 315 | ; CHECK: # BB#0: |
| Simon Pilgrim | 687d71e | 2016-08-12 11:24:34 +0000 | [diff] [blame] | 316 | ; CHECK-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero |
| Simon Pilgrim | 118da63 | 2016-07-06 15:09:48 +0000 | [diff] [blame] | 317 | ; CHECK-NEXT: retq |
| 318 | %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>) |
| 319 | ret <32 x i8> %res0 |
| 320 | } |
| Simon Pilgrim | 51c786b | 2016-07-10 20:19:56 +0000 | [diff] [blame] | 321 | |
| 322 | define <32 x i8> @combine_pshufb_as_pshuflw(<32 x i8> %a0) { |
| 323 | ; CHECK-LABEL: combine_pshufb_as_pshuflw: |
| 324 | ; CHECK: # BB#0: |
| Simon Pilgrim | 2191faa | 2016-07-10 21:02:47 +0000 | [diff] [blame] | 325 | ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] |
| Simon Pilgrim | 51c786b | 2016-07-10 20:19:56 +0000 | [diff] [blame] | 326 | ; CHECK-NEXT: retq |
| 327 | %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>) |
| 328 | ret <32 x i8> %res0 |
| 329 | } |
| 330 | |
| 331 | define <32 x i8> @combine_pshufb_as_pshufhw(<32 x i8> %a0) { |
| 332 | ; CHECK-LABEL: combine_pshufb_as_pshufhw: |
| 333 | ; CHECK: # BB#0: |
| Simon Pilgrim | 2191faa | 2016-07-10 21:02:47 +0000 | [diff] [blame] | 334 | ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] |
| Simon Pilgrim | 51c786b | 2016-07-10 20:19:56 +0000 | [diff] [blame] | 335 | ; CHECK-NEXT: retq |
| 336 | %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>) |
| 337 | ret <32 x i8> %res0 |
| 338 | } |
| 339 | |
| Simon Pilgrim | 2191faa | 2016-07-10 21:02:47 +0000 | [diff] [blame] | 340 | define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) { |
| 341 | ; CHECK-LABEL: combine_pshufb_not_as_pshufw: |
| Simon Pilgrim | 51c786b | 2016-07-10 20:19:56 +0000 | [diff] [blame] | 342 | ; CHECK: # BB#0: |
| 343 | ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29] |
| 344 | ; CHECK-NEXT: retq |
| 345 | %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>) |
| 346 | %res1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %res0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>) |
| 347 | ret <32 x i8> %res1 |
| 348 | } |