Sjoerd Meijer | 1ee119f | 2016-07-08 15:32:01 +0000 | [diff] [blame] | 1 | ; When optimising for minimum size, we don't want to expand a div to a mul |
| 2 | ; and a shift sequence. As a result, the urem instruction e.g. will not be |
Sjoerd Meijer | a625af3 | 2016-07-08 12:54:43 +0000 | [diff] [blame] | 3 | ; expanded to a sequence of umull, lsrs, muls and sub instructions, but |
| 4 | ; just a call to __aeabi_uidivmod. |
| 5 | ; |
Sjoerd Meijer | 4dbe73c | 2016-10-03 10:12:32 +0000 | [diff] [blame] | 6 | ; When the processor features hardware division, UDIV + UREM can be turned |
| 7 | ; into UDIV + MLS. This prevents the library function __aeabi_uidivmod to be |
| 8 | ; pulled into the binary. The test uses ARMv7-M. |
| 9 | ; |
Sjoerd Meijer | a625af3 | 2016-07-08 12:54:43 +0000 | [diff] [blame] | 10 | ; RUN: llc -mtriple=armv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - | FileCheck %s |
Sjoerd Meijer | 4dbe73c | 2016-10-03 10:12:32 +0000 | [diff] [blame] | 11 | ; RUN: llc -mtriple=thumbv7m-eabi -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=V7M |
Sjoerd Meijer | a625af3 | 2016-07-08 12:54:43 +0000 | [diff] [blame] | 12 | |
| 13 | target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" |
| 14 | target triple = "thumbv7m-arm-none-eabi" |
| 15 | |
Sjoerd Meijer | 1ee119f | 2016-07-08 15:32:01 +0000 | [diff] [blame] | 16 | define i32 @foo1() local_unnamed_addr #0 { |
Sjoerd Meijer | a625af3 | 2016-07-08 12:54:43 +0000 | [diff] [blame] | 17 | entry: |
Sjoerd Meijer | 1ee119f | 2016-07-08 15:32:01 +0000 | [diff] [blame] | 18 | ; CHECK-LABEL: foo1: |
| 19 | ; CHECK:__aeabi_idiv |
| 20 | ; CHECK-NOT: smmul |
| 21 | %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)() |
| 22 | %div = sdiv i32 %call, 1000000 |
| 23 | ret i32 %div |
| 24 | } |
| 25 | |
| 26 | define i32 @foo2() local_unnamed_addr #0 { |
| 27 | entry: |
| 28 | ; CHECK-LABEL: foo2: |
| 29 | ; CHECK: __aeabi_uidiv |
| 30 | ; CHECK-NOT: umull |
| 31 | %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)() |
| 32 | %div = udiv i32 %call, 1000000 |
| 33 | ret i32 %div |
| 34 | } |
| 35 | |
Sjoerd Meijer | 4dbe73c | 2016-10-03 10:12:32 +0000 | [diff] [blame] | 36 | ; Test for unsigned remainder |
Sjoerd Meijer | 1ee119f | 2016-07-08 15:32:01 +0000 | [diff] [blame] | 37 | define i32 @foo3() local_unnamed_addr #0 { |
| 38 | entry: |
| 39 | ; CHECK-LABEL: foo3: |
Sjoerd Meijer | a625af3 | 2016-07-08 12:54:43 +0000 | [diff] [blame] | 40 | ; CHECK: __aeabi_uidivmod |
| 41 | ; CHECK-NOT: umull |
Sjoerd Meijer | 4dbe73c | 2016-10-03 10:12:32 +0000 | [diff] [blame] | 42 | ; V7M-LABEL: foo3: |
| 43 | ; V7M: udiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]] |
| 44 | ; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]] |
| 45 | ; V7M-NOT: __aeabi_uidivmod |
Sjoerd Meijer | a625af3 | 2016-07-08 12:54:43 +0000 | [diff] [blame] | 46 | %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)() |
| 47 | %rem = urem i32 %call, 1000000 |
| 48 | %cmp = icmp eq i32 %rem, 0 |
| 49 | %conv = zext i1 %cmp to i32 |
| 50 | ret i32 %conv |
| 51 | } |
| 52 | |
Sjoerd Meijer | 4dbe73c | 2016-10-03 10:12:32 +0000 | [diff] [blame] | 53 | ; Test for signed remainder |
| 54 | define i32 @foo4() local_unnamed_addr #0 { |
| 55 | entry: |
| 56 | ; CHECK-LABEL: foo4: |
| 57 | ; CHECK:__aeabi_idivmod |
| 58 | ; V7M-LABEL: foo4: |
| 59 | ; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]] |
| 60 | ; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]] |
| 61 | ; V7M-NOT: __aeabi_idivmod |
| 62 | %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)() |
| 63 | %rem = srem i32 %call, 1000000 |
| 64 | ret i32 %rem |
| 65 | } |
| 66 | |
| 67 | ; Check that doing a sdiv+srem has the same effect as only the srem, |
| 68 | ; as the division needs to be computed anyway in order to calculate |
| 69 | ; the remainder (i.e. make sure we don't end up with two divisions). |
| 70 | define i32 @foo5() local_unnamed_addr #0 { |
| 71 | entry: |
| 72 | ; CHECK-LABEL: foo5: |
| 73 | ; CHECK:__aeabi_idivmod |
| 74 | ; V7M-LABEL: foo5: |
| 75 | ; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]] |
| 76 | ; V7M-NOT: sdiv |
| 77 | ; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]] |
| 78 | ; V7M-NOT: __aeabi_idivmod |
| 79 | %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)() |
| 80 | %div = sdiv i32 %call, 1000000 |
| 81 | %rem = srem i32 %call, 1000000 |
| 82 | %add = add i32 %div, %rem |
| 83 | ret i32 %add |
| 84 | } |
| 85 | |
| 86 | ; An early version of this patch caused isel to hang. The reason |
| 87 | ; was that it shouldn't do the rewrite for i64 because that's not |
| 88 | ; supported by hardware. Isel was stuck in a loop with type |
| 89 | ; legalization and this optimisation. |
| 90 | ; Function Attrs: norecurse nounwind |
| 91 | define i64 @isel_dont_hang(i32 %bar) local_unnamed_addr #4 { |
| 92 | entry: |
| 93 | ; CHECK-LABEL: isel_dont_hang: |
| 94 | ; CHECK: __aeabi_uldivmod |
| 95 | %temp.0 = sext i32 %bar to i64 |
| 96 | %mul83 = shl i64 %temp.0, 1 |
| 97 | %add84 = add i64 %temp.0, 2 |
| 98 | %div85 = udiv i64 %mul83, %add84 |
| 99 | ret i64 %div85 |
| 100 | } |
| 101 | |
| 102 | ; i16 types are promoted to i32, and we expect a normal udiv here: |
| 103 | define i16 @isel_dont_hang_2(i16 %bar) local_unnamed_addr #4 { |
| 104 | entry: |
| 105 | ; CHECK-LABEL: isel_dont_hang_2: |
| 106 | ; CHECK: udiv |
| 107 | ; CHECK-NOT: __aeabi_ |
| 108 | %mul83 = shl i16 %bar, 1 |
| 109 | %add84 = add i16 %bar, 2 |
| 110 | %div85 = udiv i16 %mul83, %add84 |
| 111 | ret i16 %div85 |
| 112 | } |
Sjoerd Meijer | a625af3 | 2016-07-08 12:54:43 +0000 | [diff] [blame] | 113 | declare i32 @GetValue(...) local_unnamed_addr |
| 114 | |
| 115 | attributes #0 = { minsize nounwind optsize } |
Sjoerd Meijer | 4dbe73c | 2016-10-03 10:12:32 +0000 | [diff] [blame] | 116 | attributes #4 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-jump-tables"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a15" "target-features"="+dsp,+hwdiv,+hwdiv-arm,+neon,+vfp4" "use-soft-float"="false" } |
| 117 | |