blob: bcc536049598d28167e29ac02842bb6375256621 [file] [log] [blame]
Sjoerd Meijer1ee119f2016-07-08 15:32:01 +00001; When optimising for minimum size, we don't want to expand a div to a mul
2; and a shift sequence. As a result, the urem instruction e.g. will not be
Sjoerd Meijera625af32016-07-08 12:54:43 +00003; expanded to a sequence of umull, lsrs, muls and sub instructions, but
4; just a call to __aeabi_uidivmod.
5;
Sjoerd Meijer4dbe73c2016-10-03 10:12:32 +00006; When the processor features hardware division, UDIV + UREM can be turned
7; into UDIV + MLS. This prevents the library function __aeabi_uidivmod to be
8; pulled into the binary. The test uses ARMv7-M.
9;
Sjoerd Meijera625af32016-07-08 12:54:43 +000010; RUN: llc -mtriple=armv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - | FileCheck %s
Sjoerd Meijer4dbe73c2016-10-03 10:12:32 +000011; RUN: llc -mtriple=thumbv7m-eabi -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=V7M
Sjoerd Meijera625af32016-07-08 12:54:43 +000012
13target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
14target triple = "thumbv7m-arm-none-eabi"
15
Sjoerd Meijer1ee119f2016-07-08 15:32:01 +000016define i32 @foo1() local_unnamed_addr #0 {
Sjoerd Meijera625af32016-07-08 12:54:43 +000017entry:
Sjoerd Meijer1ee119f2016-07-08 15:32:01 +000018; CHECK-LABEL: foo1:
19; CHECK:__aeabi_idiv
20; CHECK-NOT: smmul
21 %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
22 %div = sdiv i32 %call, 1000000
23 ret i32 %div
24}
25
26define i32 @foo2() local_unnamed_addr #0 {
27entry:
28; CHECK-LABEL: foo2:
29; CHECK: __aeabi_uidiv
30; CHECK-NOT: umull
31 %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
32 %div = udiv i32 %call, 1000000
33 ret i32 %div
34}
35
Sjoerd Meijer4dbe73c2016-10-03 10:12:32 +000036; Test for unsigned remainder
Sjoerd Meijer1ee119f2016-07-08 15:32:01 +000037define i32 @foo3() local_unnamed_addr #0 {
38entry:
39; CHECK-LABEL: foo3:
Sjoerd Meijera625af32016-07-08 12:54:43 +000040; CHECK: __aeabi_uidivmod
41; CHECK-NOT: umull
Sjoerd Meijer4dbe73c2016-10-03 10:12:32 +000042; V7M-LABEL: foo3:
43; V7M: udiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]
44; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]
45; V7M-NOT: __aeabi_uidivmod
Sjoerd Meijera625af32016-07-08 12:54:43 +000046 %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
47 %rem = urem i32 %call, 1000000
48 %cmp = icmp eq i32 %rem, 0
49 %conv = zext i1 %cmp to i32
50 ret i32 %conv
51}
52
Sjoerd Meijer4dbe73c2016-10-03 10:12:32 +000053; Test for signed remainder
54define i32 @foo4() local_unnamed_addr #0 {
55entry:
56; CHECK-LABEL: foo4:
57; CHECK:__aeabi_idivmod
58; V7M-LABEL: foo4:
59; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]
60; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]
61; V7M-NOT: __aeabi_idivmod
62 %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
63 %rem = srem i32 %call, 1000000
64 ret i32 %rem
65}
66
67; Check that doing a sdiv+srem has the same effect as only the srem,
68; as the division needs to be computed anyway in order to calculate
69; the remainder (i.e. make sure we don't end up with two divisions).
70define i32 @foo5() local_unnamed_addr #0 {
71entry:
72; CHECK-LABEL: foo5:
73; CHECK:__aeabi_idivmod
74; V7M-LABEL: foo5:
75; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]
76; V7M-NOT: sdiv
77; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]
78; V7M-NOT: __aeabi_idivmod
79 %call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
80 %div = sdiv i32 %call, 1000000
81 %rem = srem i32 %call, 1000000
82 %add = add i32 %div, %rem
83 ret i32 %add
84}
85
86; An early version of this patch caused isel to hang. The reason
87; was that it shouldn't do the rewrite for i64 because that's not
88; supported by hardware. Isel was stuck in a loop with type
89; legalization and this optimisation.
90; Function Attrs: norecurse nounwind
91define i64 @isel_dont_hang(i32 %bar) local_unnamed_addr #4 {
92entry:
93; CHECK-LABEL: isel_dont_hang:
94; CHECK: __aeabi_uldivmod
95 %temp.0 = sext i32 %bar to i64
96 %mul83 = shl i64 %temp.0, 1
97 %add84 = add i64 %temp.0, 2
98 %div85 = udiv i64 %mul83, %add84
99 ret i64 %div85
100}
101
102; i16 types are promoted to i32, and we expect a normal udiv here:
103define i16 @isel_dont_hang_2(i16 %bar) local_unnamed_addr #4 {
104entry:
105; CHECK-LABEL: isel_dont_hang_2:
106; CHECK: udiv
107; CHECK-NOT: __aeabi_
108 %mul83 = shl i16 %bar, 1
109 %add84 = add i16 %bar, 2
110 %div85 = udiv i16 %mul83, %add84
111 ret i16 %div85
112}
Sjoerd Meijera625af32016-07-08 12:54:43 +0000113declare i32 @GetValue(...) local_unnamed_addr
114
115attributes #0 = { minsize nounwind optsize }
Sjoerd Meijer4dbe73c2016-10-03 10:12:32 +0000116attributes #4 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-jump-tables"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a15" "target-features"="+dsp,+hwdiv,+hwdiv-arm,+neon,+vfp4" "use-soft-float"="false" }
117