blob: 086af6bb114b7070e9ad97f9b92ee77de0ed0be8 [file] [log] [blame]
Dan Gohman0a063102009-09-08 23:54:48 +00001; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
Mon P Wang7bfa4642008-11-16 05:06:27 +00002; RUN: grep unpcklps %t | count 1
3; RUN: grep unpckhps %t | count 3
4
5; Transpose example using the more generic vector shuffle. Return float8
6; instead of float16
7; ModuleID = 'transpose2_opt.bc'
8target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
9target triple = "i386-apple-cl.1.0"
10@r0 = common global <4 x float> zeroinitializer, align 16 ; <<4 x float>*> [#uses=1]
11@r1 = common global <4 x float> zeroinitializer, align 16 ; <<4 x float>*> [#uses=1]
12@r2 = common global <4 x float> zeroinitializer, align 16 ; <<4 x float>*> [#uses=1]
13@r3 = common global <4 x float> zeroinitializer, align 16 ; <<4 x float>*> [#uses=1]
14
15define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1, <4 x float> %p2, <4 x float> %p3) nounwind {
16entry:
17 %unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=2]
18 %unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=2]
19 %unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=2]
20 %unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=2]
21 %unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1]
22 %unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1]
Dan Gohman81bc62c2010-01-05 17:55:26 +000023 %r1 = shufflevector <4 x float> %unpcklps14, <4 x float> %unpckhps17, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
Mon P Wang7bfa4642008-11-16 05:06:27 +000024 %unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1]
25 %unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1]
Dan Gohman81bc62c2010-01-05 17:55:26 +000026 %r2 = shufflevector <4 x float> %unpcklps20, <4 x float> %unpckhps23, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
Mon P Wang7bfa4642008-11-16 05:06:27 +000027; %r3 = shufflevector <8 x float> %r1, <8 x float> %r2, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >;
Dan Gohman81bc62c2010-01-05 17:55:26 +000028 ret <8 x float> %r2
Mon P Wang7bfa4642008-11-16 05:06:27 +000029}