blob: b1c2f93b47c6ca2934b636ceeaece5b7bac6592d [file] [log] [blame]
Dan Gohmanc8054d92009-09-09 00:09:15 +00001; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
Bob Wilsona7062312009-08-21 20:54:19 +00002
Bob Wilsona7062312009-08-21 20:54:19 +00003define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
4;CHECK: vtrni8:
5;CHECK: vtrn.8
Bob Wilson616335f2009-08-22 00:13:23 +00006;CHECK-NEXT: vadd.i8
Bob Wilsona7062312009-08-21 20:54:19 +00007 %tmp1 = load <8 x i8>* %A
8 %tmp2 = load <8 x i8>* %B
9 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
10 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
11 %tmp5 = add <8 x i8> %tmp3, %tmp4
12 ret <8 x i8> %tmp5
13}
14
15define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
16;CHECK: vtrni16:
17;CHECK: vtrn.16
Bob Wilson616335f2009-08-22 00:13:23 +000018;CHECK-NEXT: vadd.i16
Bob Wilsona7062312009-08-21 20:54:19 +000019 %tmp1 = load <4 x i16>* %A
20 %tmp2 = load <4 x i16>* %B
21 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
22 %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
23 %tmp5 = add <4 x i16> %tmp3, %tmp4
24 ret <4 x i16> %tmp5
25}
26
27define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
28;CHECK: vtrni32:
29;CHECK: vtrn.32
Bob Wilson616335f2009-08-22 00:13:23 +000030;CHECK-NEXT: vadd.i32
Bob Wilsona7062312009-08-21 20:54:19 +000031 %tmp1 = load <2 x i32>* %A
32 %tmp2 = load <2 x i32>* %B
33 %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
34 %tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
35 %tmp5 = add <2 x i32> %tmp3, %tmp4
36 ret <2 x i32> %tmp5
37}
38
39define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
40;CHECK: vtrnf:
41;CHECK: vtrn.32
Bob Wilson616335f2009-08-22 00:13:23 +000042;CHECK-NEXT: vadd.f32
Bob Wilsona7062312009-08-21 20:54:19 +000043 %tmp1 = load <2 x float>* %A
44 %tmp2 = load <2 x float>* %B
45 %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
46 %tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
Dan Gohman0553acf2010-05-03 22:36:46 +000047 %tmp5 = fadd <2 x float> %tmp3, %tmp4
Bob Wilsona7062312009-08-21 20:54:19 +000048 ret <2 x float> %tmp5
49}
50
51define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
52;CHECK: vtrnQi8:
53;CHECK: vtrn.8
Bob Wilson616335f2009-08-22 00:13:23 +000054;CHECK-NEXT: vadd.i8
Bob Wilsona7062312009-08-21 20:54:19 +000055 %tmp1 = load <16 x i8>* %A
56 %tmp2 = load <16 x i8>* %B
57 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
58 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
59 %tmp5 = add <16 x i8> %tmp3, %tmp4
60 ret <16 x i8> %tmp5
61}
62
63define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
64;CHECK: vtrnQi16:
65;CHECK: vtrn.16
Bob Wilson616335f2009-08-22 00:13:23 +000066;CHECK-NEXT: vadd.i16
Bob Wilsona7062312009-08-21 20:54:19 +000067 %tmp1 = load <8 x i16>* %A
68 %tmp2 = load <8 x i16>* %B
69 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
70 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
71 %tmp5 = add <8 x i16> %tmp3, %tmp4
72 ret <8 x i16> %tmp5
73}
74
75define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
76;CHECK: vtrnQi32:
77;CHECK: vtrn.32
Bob Wilson616335f2009-08-22 00:13:23 +000078;CHECK-NEXT: vadd.i32
Bob Wilsona7062312009-08-21 20:54:19 +000079 %tmp1 = load <4 x i32>* %A
80 %tmp2 = load <4 x i32>* %B
81 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
82 %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
83 %tmp5 = add <4 x i32> %tmp3, %tmp4
84 ret <4 x i32> %tmp5
85}
86
87define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
88;CHECK: vtrnQf:
89;CHECK: vtrn.32
Bob Wilson616335f2009-08-22 00:13:23 +000090;CHECK-NEXT: vadd.f32
Bob Wilsona7062312009-08-21 20:54:19 +000091 %tmp1 = load <4 x float>* %A
92 %tmp2 = load <4 x float>* %B
93 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
94 %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
Dan Gohman0553acf2010-05-03 22:36:46 +000095 %tmp5 = fadd <4 x float> %tmp3, %tmp4
Bob Wilsona7062312009-08-21 20:54:19 +000096 ret <4 x float> %tmp5
97}
Bob Wilson411dfad2010-08-17 05:54:34 +000098
99; Undef shuffle indices should not prevent matching to VTRN:
100
101define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
102;CHECK: vtrni8_undef:
103;CHECK: vtrn.8
104;CHECK-NEXT: vadd.i8
105 %tmp1 = load <8 x i8>* %A
106 %tmp2 = load <8 x i8>* %B
107 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
108 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
109 %tmp5 = add <8 x i8> %tmp3, %tmp4
110 ret <8 x i8> %tmp5
111}
112
113define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
114;CHECK: vtrnQi16_undef:
115;CHECK: vtrn.16
116;CHECK-NEXT: vadd.i16
117 %tmp1 = load <8 x i16>* %A
118 %tmp2 = load <8 x i16>* %B
119 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
120 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
121 %tmp5 = add <8 x i16> %tmp3, %tmp4
122 ret <8 x i16> %tmp5
123}
124