Bill Schmidt | 451499f | 2012-11-14 23:23:27 +0000 | [diff] [blame] | 1 | ; RUN: llc -mcpu=pwr6 -mattr=+altivec -code-model=small < %s | FileCheck %s |
Adhemerval Zanella | c4182d1 | 2012-11-05 17:15:56 +0000 | [diff] [blame] | 2 | |
| 3 | ; Check vector extend load expansion with altivec enabled. |
| 4 | |
| 5 | target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" |
| 6 | target triple = "powerpc64-unknown-linux-gnu" |
| 7 | |
| 8 | ; Altivec does not provides an sext intruction, so it expands |
| 9 | ; a set of vector stores (stvx), bytes load/sign expand/store |
| 10 | ; (lbz/stb), and a final vector load (lvx) to load the result |
| 11 | ; extended vector. |
| 12 | define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) { |
| 13 | %b = trunc <16 x i8> %a to <16 x i4> |
| 14 | %c = sext <16 x i4> %b to <16 x i8> |
| 15 | ret <16 x i8> %c |
| 16 | } |
Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame^] | 17 | ; CHECK-LABEL: v16si8_sext_in_reg: |
Benjamin Kramer | 5ea0349 | 2013-01-12 19:06:44 +0000 | [diff] [blame] | 18 | ; CHECK: vslb |
Nadav Rotem | dbe5c72 | 2013-01-11 22:57:48 +0000 | [diff] [blame] | 19 | ; CHECK: vsrab |
| 20 | ; CHECK: blr |
Adhemerval Zanella | c4182d1 | 2012-11-05 17:15:56 +0000 | [diff] [blame] | 21 | |
| 22 | ; The zero extend uses a more clever logic: a vector splat |
| 23 | ; and a logic and to set higher bits to 0. |
| 24 | define <16 x i8> @v16si8_zext_in_reg(<16 x i8> %a) { |
| 25 | %b = trunc <16 x i8> %a to <16 x i4> |
| 26 | %c = zext <16 x i4> %b to <16 x i8> |
| 27 | ret <16 x i8> %c |
| 28 | } |
Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame^] | 29 | ; CHECK-LABEL: v16si8_zext_in_reg: |
Adhemerval Zanella | c4182d1 | 2012-11-05 17:15:56 +0000 | [diff] [blame] | 30 | ; CHECK: vspltisb [[VMASK:[0-9]+]], 15 |
| 31 | ; CHECK-NEXT: vand 2, 2, [[VMASK]] |
| 32 | |
| 33 | ; Same as v16si8_sext_in_reg, expands to load/store halfwords (lhz/sth). |
| 34 | define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) { |
| 35 | %b = trunc <8 x i16> %a to <8 x i8> |
| 36 | %c = sext <8 x i8> %b to <8 x i16> |
| 37 | ret <8 x i16> %c |
| 38 | } |
Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame^] | 39 | ; CHECK-LABEL: v8si16_sext_in_reg: |
Benjamin Kramer | 5ea0349 | 2013-01-12 19:06:44 +0000 | [diff] [blame] | 40 | ; CHECK: vslh |
Nadav Rotem | dbe5c72 | 2013-01-11 22:57:48 +0000 | [diff] [blame] | 41 | ; CHECK: vsrah |
| 42 | ; CHECK: blr |
Adhemerval Zanella | c4182d1 | 2012-11-05 17:15:56 +0000 | [diff] [blame] | 43 | |
| 44 | ; Same as v8si16_sext_in_reg, but instead of creating the mask |
| 45 | ; with a splat, loads it from memory. |
| 46 | define <8 x i16> @v8si16_zext_in_reg(<8 x i16> %a) { |
| 47 | %b = trunc <8 x i16> %a to <8 x i8> |
| 48 | %c = zext <8 x i8> %b to <8 x i16> |
| 49 | ret <8 x i16> %c |
| 50 | } |
Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame^] | 51 | ; CHECK-LABEL: v8si16_zext_in_reg: |
Adhemerval Zanella | c4182d1 | 2012-11-05 17:15:56 +0000 | [diff] [blame] | 52 | ; CHECK: ld [[RMASKTOC:[0-9]+]], .LC{{[0-9]+}}@toc(2) |
| 53 | ; CHECK-NEXT: lvx [[VMASK:[0-9]+]], {{[0-9]+}}, [[RMASKTOC]] |
| 54 | ; CHECK-NEXT: vand 2, 2, [[VMASK]] |
| 55 | |
| 56 | ; Same as v16si8_sext_in_reg, expands to load halfword (lha) and |
| 57 | ; store words (stw). |
| 58 | define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) { |
| 59 | %b = trunc <4 x i32> %a to <4 x i16> |
| 60 | %c = sext <4 x i16> %b to <4 x i32> |
| 61 | ret <4 x i32> %c |
| 62 | } |
Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame^] | 63 | ; CHECK-LABEL: v4si32_sext_in_reg: |
Benjamin Kramer | 5ea0349 | 2013-01-12 19:06:44 +0000 | [diff] [blame] | 64 | ; CHECK: vslw |
Nadav Rotem | dbe5c72 | 2013-01-11 22:57:48 +0000 | [diff] [blame] | 65 | ; CHECK: vsraw |
| 66 | ; CHECK: blr |
Adhemerval Zanella | c4182d1 | 2012-11-05 17:15:56 +0000 | [diff] [blame] | 67 | |
| 68 | ; Same as v8si16_sext_in_reg. |
| 69 | define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) { |
| 70 | %b = trunc <4 x i32> %a to <4 x i16> |
| 71 | %c = zext <4 x i16> %b to <4 x i32> |
| 72 | ret <4 x i32> %c |
| 73 | } |
Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame^] | 74 | ; CHECK-LABEL: v4si32_zext_in_reg: |
Adhemerval Zanella | c4182d1 | 2012-11-05 17:15:56 +0000 | [diff] [blame] | 75 | ; CHECK: vspltisw [[VMASK:[0-9]+]], -16 |
| 76 | ; CHECK-NEXT: vsrw [[VMASK]], [[VMASK]], [[VMASK]] |
| 77 | ; CHECK-NEXT: vand 2, 2, [[VMASK]] |