Sanjay Patel | ffb37a2 | 2018-01-30 19:17:38 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | ; RUN: opt < %s -loop-reduce -mcpu=btver2 -S | FileCheck %s --check-prefix=JAG |
| 3 | ; RUN: opt < %s -loop-reduce -mcpu=haswell -S | FileCheck %s --check-prefix=HSW |
| 4 | |
| 5 | ; RUN: llc < %s | FileCheck %s --check-prefix=BASE |
| 6 | ; RUN: llc < %s -mattr=macrofusion | FileCheck %s --check-prefix=FUSE |
| 7 | |
| 8 | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |
| 9 | target triple = "x86_64-unknown-unknown" |
| 10 | |
| 11 | ; PR35681 - https://bugs.llvm.org/show_bug.cgi?id=35681 |
| 12 | ; FIXME: If a CPU can macro-fuse a compare and branch, then we discount that |
| 13 | ; cost in LSR and avoid generating large offsets in each memory access. |
| 14 | ; This reduces code size and may improve decode throughput. |
| 15 | |
| 16 | define void @maxArray(double* noalias nocapture %x, double* noalias nocapture readonly %y) { |
| 17 | ; JAG-LABEL: @maxArray( |
| 18 | ; JAG-NEXT: entry: |
Mikael Holmen | 6d06976 | 2018-02-01 06:38:34 +0000 | [diff] [blame] | 19 | ; JAG-NEXT: [[Y1:%.*]] = bitcast double* [[Y:%.*]] to i8* |
| 20 | ; JAG-NEXT: [[X3:%.*]] = bitcast double* [[X:%.*]] to i8* |
Sanjay Patel | ffb37a2 | 2018-01-30 19:17:38 +0000 | [diff] [blame] | 21 | ; JAG-NEXT: br label [[VECTOR_BODY:%.*]] |
| 22 | ; JAG: vector.body: |
| 23 | ; JAG-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ -524288, [[ENTRY:%.*]] ] |
Mikael Holmen | 6d06976 | 2018-02-01 06:38:34 +0000 | [diff] [blame] | 24 | ; JAG-NEXT: [[UGLYGEP7:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]] |
| 25 | ; JAG-NEXT: [[UGLYGEP78:%.*]] = bitcast i8* [[UGLYGEP7]] to <2 x double>* |
| 26 | ; JAG-NEXT: [[SCEVGEP9:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP78]], i64 32768 |
| 27 | ; JAG-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Y1]], i64 [[LSR_IV]] |
| 28 | ; JAG-NEXT: [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to <2 x double>* |
| 29 | ; JAG-NEXT: [[SCEVGEP:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP2]], i64 32768 |
| 30 | ; JAG-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP9]], align 8 |
Sanjay Patel | ffb37a2 | 2018-01-30 19:17:38 +0000 | [diff] [blame] | 31 | ; JAG-NEXT: [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP]], align 8 |
| 32 | ; JAG-NEXT: [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]] |
| 33 | ; JAG-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]] |
Mikael Holmen | 6d06976 | 2018-02-01 06:38:34 +0000 | [diff] [blame] | 34 | ; JAG-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[X3]], i64 [[LSR_IV]] |
| 35 | ; JAG-NEXT: [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to <2 x double>* |
| 36 | ; JAG-NEXT: [[SCEVGEP6:%.*]] = getelementptr <2 x double>, <2 x double>* [[UGLYGEP45]], i64 32768 |
| 37 | ; JAG-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP6]], align 8 |
Sanjay Patel | ffb37a2 | 2018-01-30 19:17:38 +0000 | [diff] [blame] | 38 | ; JAG-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 16 |
| 39 | ; JAG-NEXT: [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 |
| 40 | ; JAG-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]] |
| 41 | ; JAG: exit: |
| 42 | ; JAG-NEXT: ret void |
| 43 | ; |
| 44 | ; HSW-LABEL: @maxArray( |
| 45 | ; HSW-NEXT: entry: |
Sanjay Patel | ffb37a2 | 2018-01-30 19:17:38 +0000 | [diff] [blame] | 46 | ; HSW-NEXT: br label [[VECTOR_BODY:%.*]] |
| 47 | ; HSW: vector.body: |
Sanjay Patel | d7c702b | 2018-02-05 23:43:05 +0000 | [diff] [blame] | 48 | ; HSW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] |
| 49 | ; HSW-NEXT: [[SCEVGEP4:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[INDEX]] |
| 50 | ; HSW-NEXT: [[SCEVGEP45:%.*]] = bitcast double* [[SCEVGEP4]] to <2 x double>* |
| 51 | ; HSW-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[Y:%.*]], i64 [[INDEX]] |
| 52 | ; HSW-NEXT: [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to <2 x double>* |
| 53 | ; HSW-NEXT: [[XVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP45]], align 8 |
| 54 | ; HSW-NEXT: [[YVAL:%.*]] = load <2 x double>, <2 x double>* [[SCEVGEP1]], align 8 |
Sanjay Patel | ffb37a2 | 2018-01-30 19:17:38 +0000 | [diff] [blame] | 55 | ; HSW-NEXT: [[CMP:%.*]] = fcmp ogt <2 x double> [[YVAL]], [[XVAL]] |
| 56 | ; HSW-NEXT: [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x double> [[YVAL]], <2 x double> [[XVAL]] |
Sanjay Patel | d7c702b | 2018-02-05 23:43:05 +0000 | [diff] [blame] | 57 | ; HSW-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[X]], i64 [[INDEX]] |
| 58 | ; HSW-NEXT: [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to <2 x double>* |
| 59 | ; HSW-NEXT: store <2 x double> [[MAX]], <2 x double>* [[SCEVGEP23]], align 8 |
| 60 | ; HSW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 |
| 61 | ; HSW-NEXT: [[DONE:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536 |
Sanjay Patel | ffb37a2 | 2018-01-30 19:17:38 +0000 | [diff] [blame] | 62 | ; HSW-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]] |
| 63 | ; HSW: exit: |
| 64 | ; HSW-NEXT: ret void |
| 65 | ; |
| 66 | ; BASE-LABEL: maxArray: |
| 67 | ; BASE: # %bb.0: # %entry |
| 68 | ; BASE-NEXT: movq $-524288, %rax # imm = 0xFFF80000 |
| 69 | ; BASE-NEXT: .p2align 4, 0x90 |
| 70 | ; BASE-NEXT: .LBB0_1: # %vector.body |
| 71 | ; BASE-NEXT: # =>This Inner Loop Header: Depth=1 |
| 72 | ; BASE-NEXT: movupd 524288(%rdi,%rax), %xmm0 |
| 73 | ; BASE-NEXT: movupd 524288(%rsi,%rax), %xmm1 |
| 74 | ; BASE-NEXT: maxpd %xmm0, %xmm1 |
| 75 | ; BASE-NEXT: movupd %xmm1, 524288(%rdi,%rax) |
| 76 | ; BASE-NEXT: addq $16, %rax |
| 77 | ; BASE-NEXT: jne .LBB0_1 |
| 78 | ; BASE-NEXT: # %bb.2: # %exit |
| 79 | ; BASE-NEXT: retq |
| 80 | ; |
| 81 | ; FUSE-LABEL: maxArray: |
| 82 | ; FUSE: # %bb.0: # %entry |
Sanjay Patel | d7c702b | 2018-02-05 23:43:05 +0000 | [diff] [blame] | 83 | ; FUSE-NEXT: xorl %eax, %eax |
Sanjay Patel | ffb37a2 | 2018-01-30 19:17:38 +0000 | [diff] [blame] | 84 | ; FUSE-NEXT: .p2align 4, 0x90 |
| 85 | ; FUSE-NEXT: .LBB0_1: # %vector.body |
| 86 | ; FUSE-NEXT: # =>This Inner Loop Header: Depth=1 |
Sanjay Patel | d7c702b | 2018-02-05 23:43:05 +0000 | [diff] [blame] | 87 | ; FUSE-NEXT: movupd (%rdi,%rax,8), %xmm0 |
| 88 | ; FUSE-NEXT: movupd (%rsi,%rax,8), %xmm1 |
Sanjay Patel | ffb37a2 | 2018-01-30 19:17:38 +0000 | [diff] [blame] | 89 | ; FUSE-NEXT: maxpd %xmm0, %xmm1 |
Sanjay Patel | d7c702b | 2018-02-05 23:43:05 +0000 | [diff] [blame] | 90 | ; FUSE-NEXT: movupd %xmm1, (%rdi,%rax,8) |
| 91 | ; FUSE-NEXT: addq $2, %rax |
| 92 | ; FUSE-NEXT: cmpq $65536, %rax # imm = 0x10000 |
Sanjay Patel | ffb37a2 | 2018-01-30 19:17:38 +0000 | [diff] [blame] | 93 | ; FUSE-NEXT: jne .LBB0_1 |
| 94 | ; FUSE-NEXT: # %bb.2: # %exit |
| 95 | ; FUSE-NEXT: retq |
| 96 | entry: |
| 97 | br label %vector.body |
| 98 | |
| 99 | vector.body: |
| 100 | %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] |
| 101 | %gepx = getelementptr inbounds double, double* %x, i64 %index |
| 102 | %gepy = getelementptr inbounds double, double* %y, i64 %index |
| 103 | %xptr = bitcast double* %gepx to <2 x double>* |
| 104 | %yptr = bitcast double* %gepy to <2 x double>* |
| 105 | %xval = load <2 x double>, <2 x double>* %xptr, align 8 |
| 106 | %yval = load <2 x double>, <2 x double>* %yptr, align 8 |
| 107 | %cmp = fcmp ogt <2 x double> %yval, %xval |
| 108 | %max = select <2 x i1> %cmp, <2 x double> %yval, <2 x double> %xval |
| 109 | %xptr_again = bitcast double* %gepx to <2 x double>* |
| 110 | store <2 x double> %max, <2 x double>* %xptr_again, align 8 |
| 111 | %index.next = add i64 %index, 2 |
| 112 | %done = icmp eq i64 %index.next, 65536 |
| 113 | br i1 %done, label %exit, label %vector.body |
| 114 | |
| 115 | exit: |
| 116 | ret void |
| 117 | } |
| 118 | |