blob: 334dac9fcfccc9b7e995ad1c230dc9794948b158 [file] [log] [blame]
Dmitri Gribenko2bf8d772019-09-10 10:39:09 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s
3target datalayout = "e-m:e-i64:64-n32:64"
4target triple = "powerpc64le-unknown-linux-gnu"
5
6@zeroEqualityTest01.buffer1 = private unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 4], align 4
7@zeroEqualityTest01.buffer2 = private unnamed_addr constant [3 x i32] [i32 1, i32 2, i32 3], align 4
8@zeroEqualityTest02.buffer1 = private unnamed_addr constant [4 x i32] [i32 4, i32 0, i32 0, i32 0], align 4
9@zeroEqualityTest02.buffer2 = private unnamed_addr constant [4 x i32] [i32 3, i32 0, i32 0, i32 0], align 4
10@zeroEqualityTest03.buffer1 = private unnamed_addr constant [4 x i32] [i32 0, i32 0, i32 0, i32 3], align 4
11@zeroEqualityTest03.buffer2 = private unnamed_addr constant [4 x i32] [i32 0, i32 0, i32 0, i32 4], align 4
12@zeroEqualityTest04.buffer1 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14], align 4
13@zeroEqualityTest04.buffer2 = private unnamed_addr constant [15 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 13], align 4
14
15declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #1
16
17; Check 4 bytes - requires 1 load for each param.
18define signext i32 @zeroEqualityTest02(i8* %x, i8* %y) {
19; CHECK-LABEL: zeroEqualityTest02:
20; CHECK: # %bb.0:
21; CHECK-NEXT: lwz 3, 0(3)
22; CHECK-NEXT: lwz 4, 0(4)
23; CHECK-NEXT: xor 3, 3, 4
24; CHECK-NEXT: cntlzw 3, 3
25; CHECK-NEXT: srwi 3, 3, 5
26; CHECK-NEXT: xori 3, 3, 1
27; CHECK-NEXT: blr
28 %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 4)
29 %not.cmp = icmp ne i32 %call, 0
30 %. = zext i1 %not.cmp to i32
31 ret i32 %.
32}
33
34; Check 16 bytes - requires 2 loads for each param (or use vectors?).
35define signext i32 @zeroEqualityTest01(i8* %x, i8* %y) {
36; CHECK-LABEL: zeroEqualityTest01:
37; CHECK: # %bb.0:
38; CHECK-NEXT: ld 5, 0(3)
39; CHECK-NEXT: ld 6, 0(4)
40; CHECK-NEXT: cmpld 5, 6
41; CHECK-NEXT: bne 0, .LBB1_2
42; CHECK-NEXT: # %bb.1: # %loadbb1
43; CHECK-NEXT: ld 3, 8(3)
44; CHECK-NEXT: ld 4, 8(4)
45; CHECK-NEXT: cmpld 3, 4
46; CHECK-NEXT: li 3, 0
47; CHECK-NEXT: beq 0, .LBB1_3
48; CHECK-NEXT: .LBB1_2: # %res_block
49; CHECK-NEXT: li 3, 1
50; CHECK-NEXT: .LBB1_3: # %endblock
51; CHECK-NEXT: clrldi 3, 3, 32
52; CHECK-NEXT: blr
53 %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 16)
54 %not.tobool = icmp ne i32 %call, 0
55 %. = zext i1 %not.tobool to i32
56 ret i32 %.
57}
58
59; Check 7 bytes - requires 3 loads for each param.
60define signext i32 @zeroEqualityTest03(i8* %x, i8* %y) {
61; CHECK-LABEL: zeroEqualityTest03:
62; CHECK: # %bb.0:
63; CHECK-NEXT: lwz 5, 0(3)
64; CHECK-NEXT: lwz 6, 0(4)
65; CHECK-NEXT: cmplw 5, 6
66; CHECK-NEXT: bne 0, .LBB2_3
67; CHECK-NEXT: # %bb.1: # %loadbb1
68; CHECK-NEXT: lhz 5, 4(3)
69; CHECK-NEXT: lhz 6, 4(4)
70; CHECK-NEXT: cmplw 5, 6
71; CHECK-NEXT: bne 0, .LBB2_3
72; CHECK-NEXT: # %bb.2: # %loadbb2
73; CHECK-NEXT: lbz 3, 6(3)
74; CHECK-NEXT: lbz 4, 6(4)
75; CHECK-NEXT: cmplw 3, 4
76; CHECK-NEXT: li 3, 0
77; CHECK-NEXT: beq 0, .LBB2_4
78; CHECK-NEXT: .LBB2_3: # %res_block
79; CHECK-NEXT: li 3, 1
80; CHECK-NEXT: .LBB2_4: # %endblock
81; CHECK-NEXT: clrldi 3, 3, 32
82; CHECK-NEXT: blr
83 %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 7)
84 %not.lnot = icmp ne i32 %call, 0
85 %cond = zext i1 %not.lnot to i32
86 ret i32 %cond
87}
88
89; Validate with > 0
90define signext i32 @zeroEqualityTest04() {
91; CHECK-LABEL: zeroEqualityTest04:
92; CHECK: # %bb.0:
93; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest02.buffer1@toc@ha
94; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest02.buffer2@toc@ha
95; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest02.buffer1@toc@l
96; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest02.buffer2@toc@l
97; CHECK-NEXT: ldbrx 3, 0, 6
98; CHECK-NEXT: ldbrx 4, 0, 5
99; CHECK-NEXT: cmpld 3, 4
100; CHECK-NEXT: bne 0, .LBB3_2
101; CHECK-NEXT: # %bb.1: # %loadbb1
102; CHECK-NEXT: li 4, 8
103; CHECK-NEXT: ldbrx 3, 6, 4
104; CHECK-NEXT: ldbrx 4, 5, 4
105; CHECK-NEXT: li 5, 0
106; CHECK-NEXT: cmpld 3, 4
107; CHECK-NEXT: beq 0, .LBB3_3
108; CHECK-NEXT: .LBB3_2: # %res_block
109; CHECK-NEXT: cmpld 3, 4
110; CHECK-NEXT: li 3, 1
111; CHECK-NEXT: li 4, -1
112; CHECK-NEXT: isel 5, 4, 3, 0
113; CHECK-NEXT: .LBB3_3: # %endblock
114; CHECK-NEXT: extsw 3, 5
115; CHECK-NEXT: neg 3, 3
116; CHECK-NEXT: rldicl 3, 3, 1, 63
117; CHECK-NEXT: xori 3, 3, 1
118; CHECK-NEXT: blr
119 %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16)
120 %not.cmp = icmp slt i32 %call, 1
121 %. = zext i1 %not.cmp to i32
122 ret i32 %.
123}
124
125; Validate with < 0
126define signext i32 @zeroEqualityTest05() {
127; CHECK-LABEL: zeroEqualityTest05:
128; CHECK: # %bb.0:
129; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest03.buffer1@toc@ha
130; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest03.buffer2@toc@ha
131; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest03.buffer1@toc@l
132; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest03.buffer2@toc@l
133; CHECK-NEXT: ldbrx 3, 0, 6
134; CHECK-NEXT: ldbrx 4, 0, 5
135; CHECK-NEXT: cmpld 3, 4
136; CHECK-NEXT: bne 0, .LBB4_2
137; CHECK-NEXT: # %bb.1: # %loadbb1
138; CHECK-NEXT: li 4, 8
139; CHECK-NEXT: ldbrx 3, 6, 4
140; CHECK-NEXT: ldbrx 4, 5, 4
141; CHECK-NEXT: li 5, 0
142; CHECK-NEXT: cmpld 3, 4
143; CHECK-NEXT: beq 0, .LBB4_3
144; CHECK-NEXT: .LBB4_2: # %res_block
145; CHECK-NEXT: cmpld 3, 4
146; CHECK-NEXT: li 3, 1
147; CHECK-NEXT: li 4, -1
148; CHECK-NEXT: isel 5, 4, 3, 0
149; CHECK-NEXT: .LBB4_3: # %endblock
150; CHECK-NEXT: nor 3, 5, 5
151; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31
152; CHECK-NEXT: blr
153 %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16)
154 %call.lobit = lshr i32 %call, 31
155 %call.lobit.not = xor i32 %call.lobit, 1
156 ret i32 %call.lobit.not
157}
158
159; Validate with memcmp()?:
160define signext i32 @equalityFoldTwoConstants() {
161; CHECK-LABEL: equalityFoldTwoConstants:
162; CHECK: # %bb.0: # %loadbb
163; CHECK-NEXT: li 3, 1
164; CHECK-NEXT: blr
165 %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16)
166 %not.tobool = icmp eq i32 %call, 0
167 %cond = zext i1 %not.tobool to i32
168 ret i32 %cond
169}
170
171define signext i32 @equalityFoldOneConstant(i8* %X) {
172; CHECK-LABEL: equalityFoldOneConstant:
173; CHECK: # %bb.0:
174; CHECK-NEXT: ld 4, 0(3)
175; CHECK-NEXT: li 5, 1
176; CHECK-NEXT: sldi 5, 5, 32
177; CHECK-NEXT: cmpld 4, 5
178; CHECK-NEXT: bne 0, .LBB6_2
179; CHECK-NEXT: # %bb.1: # %loadbb1
180; CHECK-NEXT: li 4, 3
181; CHECK-NEXT: ld 3, 8(3)
182; CHECK-NEXT: sldi 4, 4, 32
183; CHECK-NEXT: ori 4, 4, 2
184; CHECK-NEXT: cmpld 3, 4
185; CHECK-NEXT: li 3, 0
186; CHECK-NEXT: beq 0, .LBB6_3
187; CHECK-NEXT: .LBB6_2: # %res_block
188; CHECK-NEXT: li 3, 1
189; CHECK-NEXT: .LBB6_3: # %endblock
190; CHECK-NEXT: cntlzw 3, 3
191; CHECK-NEXT: srwi 3, 3, 5
192; CHECK-NEXT: blr
193 %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* %X, i64 16)
194 %not.tobool = icmp eq i32 %call, 0
195 %cond = zext i1 %not.tobool to i32
196 ret i32 %cond
197}
198
199define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind {
200; CHECK-LABEL: length2_eq_nobuiltin_attr:
201; CHECK: # %bb.0:
202; CHECK-NEXT: mflr 0
203; CHECK-NEXT: std 0, 16(1)
204; CHECK-NEXT: stdu 1, -32(1)
205; CHECK-NEXT: li 5, 2
206; CHECK-NEXT: bl memcmp
207; CHECK-NEXT: nop
208; CHECK-NEXT: cntlzw 3, 3
209; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31
210; CHECK-NEXT: addi 1, 1, 32
211; CHECK-NEXT: ld 0, 16(1)
212; CHECK-NEXT: mtlr 0
213; CHECK-NEXT: blr
214 %m = tail call signext i32 @memcmp(i8* %X, i8* %Y, i64 2) nobuiltin
215 %c = icmp eq i32 %m, 0
216 ret i1 %c
217}
218