Diana Picus | 850043b | 2016-08-01 05:56:57 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT |
| 2 | ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT |
| 3 | ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT |
| 4 | ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 5 | |
| 6 | define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { |
| 7 | ; CHECK-LABEL: bar: |
| 8 | ; CHECK: add.2d v[[REG:[0-9]+]], v0, v1 |
| 9 | ; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1 |
Andrew Trick | 43adfb3 | 2015-03-27 06:10:13 +0000 | [diff] [blame] | 10 | ; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1 |
Quentin Colombet | 0c740d4 | 2014-08-21 18:10:07 +0000 | [diff] [blame] | 11 | ; Without advanced copy optimization, we end up with cross register |
| 12 | ; banks copies that cannot be coalesced. |
| 13 | ; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]] |
| 14 | ; With advanced copy optimization, we end up with just one copy |
| 15 | ; to insert the computed high part into the V register. |
| 16 | ; CHECK-OPT-NOT: fmov |
Quentin Colombet | 0c740d4 | 2014-08-21 18:10:07 +0000 | [diff] [blame] | 17 | ; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]] |
| 18 | ; CHECK-NOOPT: fmov d0, [[COPY_REG3]] |
| 19 | ; CHECK-OPT-NOT: fmov |
| 20 | ; CHECK: ins.d v0[1], [[COPY_REG2]] |
| 21 | ; CHECK-NEXT: ret |
| 22 | ; |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 23 | ; GENERIC-LABEL: bar: |
| 24 | ; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d |
| 25 | ; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1 |
Andrew Trick | 43adfb3 | 2015-03-27 06:10:13 +0000 | [diff] [blame] | 26 | ; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1 |
Quentin Colombet | 0c740d4 | 2014-08-21 18:10:07 +0000 | [diff] [blame] | 27 | ; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]] |
| 28 | ; GENERIC-OPT-NOT: fmov |
Quentin Colombet | 0c740d4 | 2014-08-21 18:10:07 +0000 | [diff] [blame] | 29 | ; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]] |
| 30 | ; GENERIC-NOOPT: fmov d0, [[COPY_REG3]] |
| 31 | ; GENERIC-OPT-NOT: fmov |
| 32 | ; GENERIC: ins v0.d[1], [[COPY_REG2]] |
| 33 | ; GENERIC-NEXT: ret |
Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 34 | %add = add <2 x i64> %a, %b |
| 35 | %vgetq_lane = extractelement <2 x i64> %add, i32 0 |
| 36 | %vgetq_lane2 = extractelement <2 x i64> %b, i32 0 |
| 37 | %add3 = add i64 %vgetq_lane, %vgetq_lane2 |
| 38 | %sub = sub i64 %vgetq_lane, %vgetq_lane2 |
| 39 | %vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0 |
| 40 | %vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1 |
| 41 | ret <2 x i64> %vecinit8 |
| 42 | } |
| 43 | |
| 44 | define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { |
| 45 | ; CHECK-LABEL: subdd_su64: |
| 46 | ; CHECK: sub d0, d1, d0 |
| 47 | ; CHECK-NEXT: ret |
| 48 | ; GENERIC-LABEL: subdd_su64: |
| 49 | ; GENERIC: sub d0, d1, d0 |
| 50 | ; GENERIC-NEXT: ret |
| 51 | %vecext = extractelement <2 x i64> %a, i32 0 |
| 52 | %vecext1 = extractelement <2 x i64> %b, i32 0 |
| 53 | %sub.i = sub nsw i64 %vecext1, %vecext |
| 54 | %retval = bitcast i64 %sub.i to double |
| 55 | ret double %retval |
| 56 | } |
| 57 | |
| 58 | define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { |
| 59 | ; CHECK-LABEL: vaddd_su64: |
| 60 | ; CHECK: add d0, d1, d0 |
| 61 | ; CHECK-NEXT: ret |
| 62 | ; GENERIC-LABEL: vaddd_su64: |
| 63 | ; GENERIC: add d0, d1, d0 |
| 64 | ; GENERIC-NEXT: ret |
| 65 | %vecext = extractelement <2 x i64> %a, i32 0 |
| 66 | %vecext1 = extractelement <2 x i64> %b, i32 0 |
| 67 | %add.i = add nsw i64 %vecext1, %vecext |
| 68 | %retval = bitcast i64 %add.i to double |
| 69 | ret double %retval |
| 70 | } |
| 71 | |
| 72 | ; sub MI doesn't access dsub register. |
| 73 | define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { |
| 74 | ; CHECK-LABEL: add_sub_su64: |
| 75 | ; CHECK: add d0, d1, d0 |
| 76 | ; CHECK: sub d0, {{d[0-9]+}}, d0 |
| 77 | ; CHECK-NEXT: ret |
| 78 | ; GENERIC-LABEL: add_sub_su64: |
| 79 | ; GENERIC: add d0, d1, d0 |
| 80 | ; GENERIC: sub d0, {{d[0-9]+}}, d0 |
| 81 | ; GENERIC-NEXT: ret |
| 82 | %vecext = extractelement <2 x i64> %a, i32 0 |
| 83 | %vecext1 = extractelement <2 x i64> %b, i32 0 |
| 84 | %add.i = add i64 %vecext1, %vecext |
| 85 | %sub.i = sub i64 0, %add.i |
| 86 | %retval = bitcast i64 %sub.i to double |
| 87 | ret double %retval |
| 88 | } |
Chad Rosier | 5908ab4 | 2014-08-04 21:20:25 +0000 | [diff] [blame] | 89 | define double @and_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { |
| 90 | ; CHECK-LABEL: and_su64: |
| 91 | ; CHECK: and.8b v0, v1, v0 |
| 92 | ; CHECK-NEXT: ret |
| 93 | ; GENERIC-LABEL: and_su64: |
| 94 | ; GENERIC: and v0.8b, v1.8b, v0.8b |
| 95 | ; GENERIC-NEXT: ret |
| 96 | %vecext = extractelement <2 x i64> %a, i32 0 |
| 97 | %vecext1 = extractelement <2 x i64> %b, i32 0 |
| 98 | %or.i = and i64 %vecext1, %vecext |
| 99 | %retval = bitcast i64 %or.i to double |
| 100 | ret double %retval |
| 101 | } |
| 102 | |
| 103 | define double @orr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { |
| 104 | ; CHECK-LABEL: orr_su64: |
| 105 | ; CHECK: orr.8b v0, v1, v0 |
| 106 | ; CHECK-NEXT: ret |
| 107 | ; GENERIC-LABEL: orr_su64: |
| 108 | ; GENERIC: orr v0.8b, v1.8b, v0.8b |
| 109 | ; GENERIC-NEXT: ret |
| 110 | %vecext = extractelement <2 x i64> %a, i32 0 |
| 111 | %vecext1 = extractelement <2 x i64> %b, i32 0 |
| 112 | %or.i = or i64 %vecext1, %vecext |
| 113 | %retval = bitcast i64 %or.i to double |
| 114 | ret double %retval |
| 115 | } |
| 116 | |
| 117 | define double @xorr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { |
| 118 | ; CHECK-LABEL: xorr_su64: |
| 119 | ; CHECK: eor.8b v0, v1, v0 |
| 120 | ; CHECK-NEXT: ret |
| 121 | ; GENERIC-LABEL: xorr_su64: |
| 122 | ; GENERIC: eor v0.8b, v1.8b, v0.8b |
| 123 | ; GENERIC-NEXT: ret |
| 124 | %vecext = extractelement <2 x i64> %a, i32 0 |
| 125 | %vecext1 = extractelement <2 x i64> %b, i32 0 |
| 126 | %xor.i = xor i64 %vecext1, %vecext |
| 127 | %retval = bitcast i64 %xor.i to double |
| 128 | ret double %retval |
| 129 | } |