| Jim Grosbach | 24e102a | 2013-07-08 18:18:52 +0000 | [diff] [blame] | 1 | ; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST |
| Saleem Abdulrasool | 7258735 | 2014-04-03 16:01:44 +0000 | [diff] [blame^] | 2 | ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s |
| Jim Grosbach | 30af442 | 2012-10-12 22:59:21 +0000 | [diff] [blame] | 3 | ; Make sure that ARM backend with NEON handles vselect. |
| 4 | |
| 5 | define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) { |
| 6 | ; CHECK: vcgt.s32 [[QR:q[0-9]+]], [[Q1:q[0-9]+]], [[Q2:q[0-9]+]] |
| 7 | ; CHECK: vbsl [[QR]], [[Q1]], [[Q2]] |
| 8 | %cmpres = icmp sgt <4 x i32> %a, %b |
| 9 | %maxres = select <4 x i1> %cmpres, <4 x i32> %a, <4 x i32> %b |
| 10 | store <4 x i32> %maxres, <4 x i32>* %m |
| 11 | ret void |
| 12 | } |
| 13 | |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 14 | ; We adjusted the cost model of the following selects. When we improve code |
| 15 | ; lowering we also need to adjust the cost. |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 16 | %T0_10 = type <16 x i16> |
| 17 | %T1_10 = type <16 x i1> |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 18 | ; CHECK-LABEL: func_blend10: |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 19 | define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2, |
| 20 | %T1_10* %blend, %T0_10* %storeaddr) { |
| 21 | %v0 = load %T0_10* %loadaddr |
| 22 | %v1 = load %T0_10* %loadaddr2 |
| Arnold Schwaighofer | 9d7a382 | 2013-03-15 18:31:01 +0000 | [diff] [blame] | 23 | %c = icmp slt %T0_10 %v0, %v1 |
| Jim Grosbach | 24e102a | 2013-07-08 18:18:52 +0000 | [diff] [blame] | 24 | ; CHECK: vbsl |
| 25 | ; CHECK: vbsl |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 26 | ; COST: func_blend10 |
| 27 | ; COST: cost of 40 {{.*}} select |
| 28 | %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1 |
| 29 | store %T0_10 %r, %T0_10* %storeaddr |
| 30 | ret void |
| 31 | } |
| 32 | %T0_14 = type <8 x i32> |
| 33 | %T1_14 = type <8 x i1> |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 34 | ; CHECK-LABEL: func_blend14: |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 35 | define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2, |
| 36 | %T1_14* %blend, %T0_14* %storeaddr) { |
| 37 | %v0 = load %T0_14* %loadaddr |
| 38 | %v1 = load %T0_14* %loadaddr2 |
| Arnold Schwaighofer | 9d7a382 | 2013-03-15 18:31:01 +0000 | [diff] [blame] | 39 | %c = icmp slt %T0_14 %v0, %v1 |
| Jim Grosbach | 24e102a | 2013-07-08 18:18:52 +0000 | [diff] [blame] | 40 | ; CHECK: vbsl |
| 41 | ; CHECK: vbsl |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 42 | ; COST: func_blend14 |
| 43 | ; COST: cost of 41 {{.*}} select |
| 44 | %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1 |
| 45 | store %T0_14 %r, %T0_14* %storeaddr |
| 46 | ret void |
| 47 | } |
| 48 | %T0_15 = type <16 x i32> |
| 49 | %T1_15 = type <16 x i1> |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 50 | ; CHECK-LABEL: func_blend15: |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 51 | define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2, |
| 52 | %T1_15* %blend, %T0_15* %storeaddr) { |
| Jim Grosbach | 24e102a | 2013-07-08 18:18:52 +0000 | [diff] [blame] | 53 | ; CHECK: vbsl |
| 54 | ; CHECK: vbsl |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 55 | %v0 = load %T0_15* %loadaddr |
| 56 | %v1 = load %T0_15* %loadaddr2 |
| Arnold Schwaighofer | 9d7a382 | 2013-03-15 18:31:01 +0000 | [diff] [blame] | 57 | %c = icmp slt %T0_15 %v0, %v1 |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 58 | ; COST: func_blend15 |
| 59 | ; COST: cost of 82 {{.*}} select |
| 60 | %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1 |
| 61 | store %T0_15 %r, %T0_15* %storeaddr |
| 62 | ret void |
| 63 | } |
| 64 | %T0_18 = type <4 x i64> |
| 65 | %T1_18 = type <4 x i1> |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 66 | ; CHECK-LABEL: func_blend18: |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 67 | define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2, |
| 68 | %T1_18* %blend, %T0_18* %storeaddr) { |
| Jim Grosbach | 24e102a | 2013-07-08 18:18:52 +0000 | [diff] [blame] | 69 | ; CHECK: vbsl |
| 70 | ; CHECK: vbsl |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 71 | %v0 = load %T0_18* %loadaddr |
| 72 | %v1 = load %T0_18* %loadaddr2 |
| Arnold Schwaighofer | 9d7a382 | 2013-03-15 18:31:01 +0000 | [diff] [blame] | 73 | %c = icmp slt %T0_18 %v0, %v1 |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 74 | ; COST: func_blend18 |
| 75 | ; COST: cost of 19 {{.*}} select |
| 76 | %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1 |
| 77 | store %T0_18 %r, %T0_18* %storeaddr |
| 78 | ret void |
| 79 | } |
| 80 | %T0_19 = type <8 x i64> |
| 81 | %T1_19 = type <8 x i1> |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 82 | ; CHECK-LABEL: func_blend19: |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 83 | define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2, |
| 84 | %T1_19* %blend, %T0_19* %storeaddr) { |
| Jim Grosbach | 24e102a | 2013-07-08 18:18:52 +0000 | [diff] [blame] | 85 | ; CHECK: vbsl |
| 86 | ; CHECK: vbsl |
| 87 | ; CHECK: vbsl |
| 88 | ; CHECK: vbsl |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 89 | %v0 = load %T0_19* %loadaddr |
| 90 | %v1 = load %T0_19* %loadaddr2 |
| Arnold Schwaighofer | 9d7a382 | 2013-03-15 18:31:01 +0000 | [diff] [blame] | 91 | %c = icmp slt %T0_19 %v0, %v1 |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 92 | ; COST: func_blend19 |
| 93 | ; COST: cost of 50 {{.*}} select |
| 94 | %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1 |
| 95 | store %T0_19 %r, %T0_19* %storeaddr |
| 96 | ret void |
| 97 | } |
| 98 | %T0_20 = type <16 x i64> |
| 99 | %T1_20 = type <16 x i1> |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 100 | ; CHECK-LABEL: func_blend20: |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 101 | define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2, |
| 102 | %T1_20* %blend, %T0_20* %storeaddr) { |
| Jim Grosbach | 24e102a | 2013-07-08 18:18:52 +0000 | [diff] [blame] | 103 | ; CHECK: vbsl |
| 104 | ; CHECK: vbsl |
| 105 | ; CHECK: vbsl |
| 106 | ; CHECK: vbsl |
| 107 | ; CHECK: vbsl |
| 108 | ; CHECK: vbsl |
| 109 | ; CHECK: vbsl |
| 110 | ; CHECK: vbsl |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 111 | %v0 = load %T0_20* %loadaddr |
| 112 | %v1 = load %T0_20* %loadaddr2 |
| Arnold Schwaighofer | 9d7a382 | 2013-03-15 18:31:01 +0000 | [diff] [blame] | 113 | %c = icmp slt %T0_20 %v0, %v1 |
| Arnold Schwaighofer | 8070b38 | 2013-03-14 19:17:02 +0000 | [diff] [blame] | 114 | ; COST: func_blend20 |
| 115 | ; COST: cost of 100 {{.*}} select |
| 116 | %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1 |
| 117 | store %T0_20 %r, %T0_20* %storeaddr |
| 118 | ret void |
| 119 | } |