Simon Pilgrim | 9b7aaaf | 2016-03-14 00:18:26 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s |
| 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s |
| 4 | |
| 5 | declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone |
| 6 | declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone |
| 7 | |
| 8 | declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone |
| 9 | declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone |
| 10 | |
| 11 | declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone |
| 12 | |
| 13 | define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) { |
| 14 | ; CHECK-LABEL: combine_vpperm_identity: |
| 15 | ; CHECK: # BB#0: |
Simon Pilgrim | fd4b9b0 | 2016-04-16 17:52:07 +0000 | [diff] [blame] | 16 | ; CHECK-NEXT: vmovaps %xmm1, %xmm0 |
Simon Pilgrim | 9b7aaaf | 2016-03-14 00:18:26 +0000 | [diff] [blame] | 17 | ; CHECK-NEXT: retq |
| 18 | %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16>) |
| 19 | %res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>) |
| 20 | ret <16 x i8> %res1 |
| 21 | } |
| 22 | |
Simon Pilgrim | 7eedee9 | 2016-04-24 13:45:30 +0000 | [diff] [blame^] | 23 | define <16 x i8> @combine_vpperm_zero(<16 x i8> %a0, <16 x i8> %a1) { |
| 24 | ; CHECK-LABEL: combine_vpperm_zero: |
| 25 | ; CHECK: # BB#0: |
| 26 | ; CHECK-NEXT: movl $128, %eax |
| 27 | ; CHECK-NEXT: vmovd %eax, %xmm2 |
| 28 | ; CHECK-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 |
| 29 | ; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0] |
| 30 | ; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero |
| 31 | ; CHECK-NEXT: retq |
| 32 | %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) |
| 33 | %res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> <i8 0, i8 128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) |
| 34 | %res2 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res1, <16 x i8> undef, <16 x i8> <i8 0, i8 1, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>) |
| 35 | ret <16 x i8> %res2 |
| 36 | } |
| 37 | |
Simon Pilgrim | fd4b9b0 | 2016-04-16 17:52:07 +0000 | [diff] [blame] | 38 | define <16 x i8> @combine_vpperm_identity_bitcast(<16 x i8> %a0, <16 x i8> %a1) { |
| 39 | ; CHECK-LABEL: combine_vpperm_identity_bitcast: |
| 40 | ; CHECK: # BB#0: |
| 41 | ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 |
| 42 | ; CHECK-NEXT: retq |
| 43 | %mask = bitcast <2 x i64> <i64 1084818905618843912, i64 506097522914230528> to <16 x i8> |
| 44 | %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %mask) |
| 45 | %res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> %mask) |
| 46 | %res2 = bitcast <16 x i8> %res1 to <2 x i64> |
| 47 | %res3 = add <2 x i64> %res2, <i64 1084818905618843912, i64 506097522914230528> |
| 48 | %res4 = bitcast <2 x i64> %res3 to <16 x i8> |
| 49 | ret <16 x i8> %res4 |
| 50 | } |
| 51 | |
Simon Pilgrim | 9448b11 | 2016-04-23 11:14:18 +0000 | [diff] [blame] | 52 | define <16 x i8> @combine_vpperm_as_blend_with_zero(<16 x i8> %a0, <16 x i8> %a1) { |
| 53 | ; CHECK-LABEL: combine_vpperm_as_blend_with_zero: |
| 54 | ; CHECK: # BB#0: |
| 55 | ; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero |
| 56 | ; CHECK-NEXT: retq |
| 57 | %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 0, i8 1, i8 128, i8 129, i8 4, i8 5, i8 6, i8 7, i8 130, i8 131, i8 132, i8 133, i8 134, i8 135, i8 136, i8 137>) |
| 58 | ret <16 x i8> %res0 |
| 59 | } |
| 60 | |
Simon Pilgrim | 572ca71 | 2016-03-24 11:52:43 +0000 | [diff] [blame] | 61 | define <16 x i8> @combine_vpperm_as_unary_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { |
| 62 | ; CHECK-LABEL: combine_vpperm_as_unary_unpckhwd: |
Simon Pilgrim | 9b7aaaf | 2016-03-14 00:18:26 +0000 | [diff] [blame] | 63 | ; CHECK: # BB#0: |
Simon Pilgrim | fd4b9b0 | 2016-04-16 17:52:07 +0000 | [diff] [blame] | 64 | ; CHECK-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] |
Simon Pilgrim | 9b7aaaf | 2016-03-14 00:18:26 +0000 | [diff] [blame] | 65 | ; CHECK-NEXT: retq |
| 66 | %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>) |
| 67 | ret <16 x i8> %res0 |
| 68 | } |
Simon Pilgrim | 572ca71 | 2016-03-24 11:52:43 +0000 | [diff] [blame] | 69 | |
| 70 | define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { |
| 71 | ; CHECK-LABEL: combine_vpperm_as_unpckhwd: |
| 72 | ; CHECK: # BB#0: |
Simon Pilgrim | 1cc5712 | 2016-04-09 14:51:26 +0000 | [diff] [blame] | 73 | ; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] |
Simon Pilgrim | 572ca71 | 2016-03-24 11:52:43 +0000 | [diff] [blame] | 74 | ; CHECK-NEXT: retq |
| 75 | %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>) |
| 76 | ret <16 x i8> %res0 |
| 77 | } |