Bruno Cardoso Lopes | 62f67f8 | 2011-07-14 18:50:58 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s |
| 2 | |
| 3 | ; CHECK: vmovaps |
| 4 | ; CHECK: vmovaps |
| 5 | ; CHECK: vmovapd |
| 6 | ; CHECK: vmovapd |
| 7 | ; CHECK: vmovaps |
| 8 | ; CHECK: vmovaps |
| 9 | define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* nocapture %i) nounwind uwtable ssp { |
| 10 | entry: |
| 11 | %0 = bitcast double* %d to <4 x double>* |
| 12 | %tmp1.i = load <4 x double>* %0, align 32 |
| 13 | %1 = bitcast float* %f to <8 x float>* |
| 14 | %tmp1.i17 = load <8 x float>* %1, align 32 |
| 15 | %tmp1.i16 = load <4 x i64>* %i, align 32 |
| 16 | tail call void @dummy(<4 x double> %tmp1.i, <8 x float> %tmp1.i17, <4 x i64> %tmp1.i16) nounwind |
| 17 | store <4 x double> %tmp1.i, <4 x double>* %0, align 32 |
| 18 | store <8 x float> %tmp1.i17, <8 x float>* %1, align 32 |
| 19 | store <4 x i64> %tmp1.i16, <4 x i64>* %i, align 32 |
| 20 | ret void |
| 21 | } |
| 22 | |
| 23 | declare void @dummy(<4 x double>, <8 x float>, <4 x i64>) |
| 24 | |
Bruno Cardoso Lopes | e5118ab | 2011-08-09 01:43:09 +0000 | [diff] [blame] | 25 | ;; |
| 26 | ;; The two tests below check that we must fold load + scalar_to_vector |
| 27 | ;; + ins_subvec+ zext into only a single vmovss or vmovsd |
| 28 | |
| 29 | ; CHECK: vmovss (% |
| 30 | define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind { |
| 31 | %val = load float* %ptr |
| 32 | %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0 |
| 33 | ret <8 x float> %i0 |
| 34 | } |
| 35 | |
| 36 | ; CHECK: vmovsd (% |
| 37 | define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind { |
| 38 | %val = load double* %ptr |
| 39 | %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0 |
| 40 | ret <4 x double> %i0 |
| 41 | } |
| 42 | |
Bruno Cardoso Lopes | 18deb04 | 2011-08-09 22:39:53 +0000 | [diff] [blame] | 43 | ; CHECK: vmovaps %ymm |
| 44 | define void @storev16i16(<16 x i16> %a) nounwind { |
| 45 | store <16 x i16> %a, <16 x i16>* undef, align 32 |
| 46 | unreachable |
| 47 | } |
| 48 | |
| 49 | ; CHECK: vmovups %ymm |
| 50 | define void @storev16i16_01(<16 x i16> %a) nounwind { |
| 51 | store <16 x i16> %a, <16 x i16>* undef, align 4 |
| 52 | unreachable |
| 53 | } |
| 54 | |
| 55 | ; CHECK: vmovaps %ymm |
| 56 | define void @storev32i8(<32 x i8> %a) nounwind { |
| 57 | store <32 x i8> %a, <32 x i8>* undef, align 32 |
| 58 | unreachable |
| 59 | } |
| 60 | |
| 61 | ; CHECK: vmovups %ymm |
| 62 | define void @storev32i8_01(<32 x i8> %a) nounwind { |
| 63 | store <32 x i8> %a, <32 x i8>* undef, align 4 |
| 64 | unreachable |
| 65 | } |
| 66 | |
Bruno Cardoso Lopes | ec91640 | 2011-08-11 21:50:35 +0000 | [diff] [blame] | 67 | ; It is faster to make two saves, if the data is already in XMM registers. For |
| 68 | ; example, after making an integer operation. |
| 69 | ; CHECK: _double_save |
| 70 | ; CHECK-NOT: vinsertf128 $1 |
| 71 | ; CHECK-NOT: vinsertf128 $0 |
| 72 | ; CHECK: vmovaps %xmm |
| 73 | ; CHECK: vmovaps %xmm |
| 74 | define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp { |
| 75 | entry: |
| 76 | %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| 77 | store <8 x i32> %Z, <8 x i32>* %P, align 16 |
| 78 | ret void |
| 79 | } |
| 80 | |