Simon Tatham | a4b415a | 2019-06-25 16:48:46 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -o - %s | FileCheck %s |
| 3 | ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -o - %s | FileCheck %s |
| 4 | |
| 5 | define arm_aapcs_vfpcc <4 x i32> @vector_add_by_value(<4 x i32> %lhs, <4 x i32>%rhs) { |
| 6 | ; CHECK-LABEL: vector_add_by_value: |
| 7 | ; CHECK: @ %bb.0: |
| 8 | ; CHECK-NEXT: @APP |
| 9 | ; CHECK-NEXT: vadd.i32 q0, q0, q1 |
| 10 | ; CHECK-NEXT: @NO_APP |
| 11 | ; CHECK-NEXT: bx lr |
| 12 | %result = tail call <4 x i32> asm "vadd.i32 $0,$1,$2", "=t,t,t"(<4 x i32> %lhs, <4 x i32> %rhs) |
| 13 | ret <4 x i32> %result |
| 14 | } |
| 15 | |
| 16 | define void @vector_add_by_reference(<4 x i32>* %resultp, <4 x i32>* %lhsp, <4 x i32>* %rhsp) { |
| 17 | ; CHECK-LABEL: vector_add_by_reference: |
| 18 | ; CHECK: @ %bb.0: |
| 19 | ; CHECK-NEXT: vldrw.u32 q0, [r1] |
| 20 | ; CHECK-NEXT: vldrw.u32 q1, [r2] |
| 21 | ; CHECK-NEXT: @APP |
| 22 | ; CHECK-NEXT: vadd.i32 q0, q0, q1 |
| 23 | ; CHECK-NEXT: @NO_APP |
| 24 | ; CHECK-NEXT: vstrw.32 q0, [r0] |
| 25 | ; CHECK-NEXT: bx lr |
| 26 | %lhs = load <4 x i32>, <4 x i32>* %lhsp, align 16 |
| 27 | %rhs = load <4 x i32>, <4 x i32>* %rhsp, align 16 |
| 28 | %result = tail call <4 x i32> asm "vadd.i32 $0,$1,$2", "=t,t,t"(<4 x i32> %lhs, <4 x i32> %rhs) |
| 29 | store <4 x i32> %result, <4 x i32>* %resultp, align 16 |
| 30 | ret void |
| 31 | } |
David Green | 07e53fe | 2019-06-28 08:41:40 +0000 | [diff] [blame^] | 32 | |
| 33 | define void @vector_f64_copy(<2 x double>* %from, <2 x double>* %to) { |
| 34 | ; CHECK-LABEL: vector_f64_copy: |
| 35 | ; CHECK: @ %bb.0: |
| 36 | ; CHECK-NEXT: vldrw.u32 q0, [r0] |
| 37 | ; CHECK-NEXT: vstrw.32 q0, [r1] |
| 38 | ; CHECK-NEXT: bx lr |
| 39 | %v = load <2 x double>, <2 x double>* %from, align 16 |
| 40 | store <2 x double> %v, <2 x double>* %to, align 16 |
| 41 | ret void |
| 42 | } |
| 43 | |
| 44 | define arm_aapcs_vfpcc <16 x i8> @stack_slot_handling(<16 x i8> %a) #0 { |
| 45 | ; CHECK-LABEL: stack_slot_handling: |
| 46 | ; CHECK: @ %bb.0: @ %entry |
| 47 | ; CHECK-NEXT: push {r4, r6, r7, lr} |
| 48 | ; CHECK-NEXT: add r7, sp, #8 |
| 49 | ; CHECK-NEXT: sub sp, #16 |
| 50 | ; CHECK-NEXT: mov r4, sp |
| 51 | ; CHECK-NEXT: bfc r4, #0, #4 |
| 52 | ; CHECK-NEXT: mov sp, r4 |
| 53 | ; CHECK-NEXT: mov r0, sp |
| 54 | ; CHECK-NEXT: vstrw.32 q0, [r0] |
| 55 | ; CHECK-NEXT: vldrw.u32 q0, [r0] |
| 56 | ; CHECK-NEXT: sub.w r4, r7, #8 |
| 57 | ; CHECK-NEXT: mov sp, r4 |
| 58 | ; CHECK-NEXT: pop {r4, r6, r7, pc} |
| 59 | entry: |
| 60 | %a.addr = alloca <16 x i8>, align 8 |
| 61 | store <16 x i8> %a, <16 x i8>* %a.addr, align 8 |
| 62 | %0 = load <16 x i8>, <16 x i8>* %a.addr, align 8 |
| 63 | ret <16 x i8> %0 |
| 64 | } |
| 65 | |
| 66 | attributes #0 = { noinline optnone } |