blob: 201c15b9c735957af13c932f01ea03278e222dd0 [file] [log] [blame]
Adhemerval Zanellac4182d12012-11-05 17:15:56 +00001; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
2
3; Check vector extend load expansion with altivec enabled.
4
5target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
6target triple = "powerpc64-unknown-linux-gnu"
7
8; Altivec does not provides an sext intruction, so it expands
9; a set of vector stores (stvx), bytes load/sign expand/store
10; (lbz/stb), and a final vector load (lvx) to load the result
11; extended vector.
12define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) {
13 %b = trunc <16 x i8> %a to <16 x i4>
14 %c = sext <16 x i4> %b to <16 x i8>
15 ret <16 x i8> %c
16}
17; CHECK: v16si8_sext_in_reg:
18; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
19; CHECK: lbz
20; CHECK: stb
21; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
22; CHECK: lbz
23; CHECK: stb
24; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
25; CHECK: lbz
26; CHECK: stb
27; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
28; CHECK: lbz
29; CHECK: stb
30; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
31; CHECK: lbz
32; CHECK: stb
33; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
34; CHECK: lbz
35; CHECK: stb
36; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
37; CHECK: lbz
38; CHECK: stb
39; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
40; CHECK: lbz
41; CHECK: stb
42; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
43; CHECK: lbz
44; CHECK: stb
45; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
46; CHECK: lbz
47; CHECK: stb
48; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
49; CHECK: lbz
50; CHECK: stb
51; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
52; CHECK: lbz
53; CHECK: stb
54; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
55; CHECK: lbz
56; CHECK: stb
57; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
58; CHECK: lbz
59; CHECK: stb
60; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
61; CHECK: lbz
62; CHECK: stb
63; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
64; CHECK: lbz
65; CHECK: stb
66; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
67
68; The zero extend uses a more clever logic: a vector splat
69; and a logic and to set higher bits to 0.
70define <16 x i8> @v16si8_zext_in_reg(<16 x i8> %a) {
71 %b = trunc <16 x i8> %a to <16 x i4>
72 %c = zext <16 x i4> %b to <16 x i8>
73 ret <16 x i8> %c
74}
75; CHECK: v16si8_zext_in_reg:
76; CHECK: vspltisb [[VMASK:[0-9]+]], 15
77; CHECK-NEXT: vand 2, 2, [[VMASK]]
78
79; Same as v16si8_sext_in_reg, expands to load/store halfwords (lhz/sth).
80define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) {
81 %b = trunc <8 x i16> %a to <8 x i8>
82 %c = sext <8 x i8> %b to <8 x i16>
83 ret <8 x i16> %c
84}
85; CHECK: v8si16_sext_in_reg:
86; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
87; CHECK: lhz
88; CHECK: sth
89; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
90; CHECK: lhz
91; CHECK: sth
92; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
93; CHECK: lhz
94; CHECK: sth
95; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
96; CHECK: lhz
97; CHECK: sth
98; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
99; CHECK: lhz
100; CHECK: sth
101; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
102; CHECK: lhz
103; CHECK: sth
104; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
105; CHECK: lhz
106; CHECK: sth
107; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
108; CHECK: lhz
109; CHECK: sth
110; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
111
112; Same as v8si16_sext_in_reg, but instead of creating the mask
113; with a splat, loads it from memory.
114define <8 x i16> @v8si16_zext_in_reg(<8 x i16> %a) {
115 %b = trunc <8 x i16> %a to <8 x i8>
116 %c = zext <8 x i8> %b to <8 x i16>
117 ret <8 x i16> %c
118}
119; CHECK: v8si16_zext_in_reg:
120; CHECK: ld [[RMASKTOC:[0-9]+]], .LC{{[0-9]+}}@toc(2)
121; CHECK-NEXT: lvx [[VMASK:[0-9]+]], {{[0-9]+}}, [[RMASKTOC]]
122; CHECK-NEXT: vand 2, 2, [[VMASK]]
123
124; Same as v16si8_sext_in_reg, expands to load halfword (lha) and
125; store words (stw).
126define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) {
127 %b = trunc <4 x i32> %a to <4 x i16>
128 %c = sext <4 x i16> %b to <4 x i32>
129 ret <4 x i32> %c
130}
131; CHECK: v4si32_sext_in_reg:
132; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
133; CHECK: lha
134; CHECK: stw
135; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
136; CHECK: lha
137; CHECK: stw
138; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
139; CHECK: lha
140; CHECK: stw
141; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
142; CHECK: lha
143; CHECK: stw
144; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
145
146; Same as v8si16_sext_in_reg.
147define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) {
148 %b = trunc <4 x i32> %a to <4 x i16>
149 %c = zext <4 x i16> %b to <4 x i32>
150 ret <4 x i32> %c
151}
152; CHECK: v4si32_zext_in_reg:
153; CHECK: vspltisw [[VMASK:[0-9]+]], -16
154; CHECK-NEXT: vsrw [[VMASK]], [[VMASK]], [[VMASK]]
155; CHECK-NEXT: vand 2, 2, [[VMASK]]