Benjamin Kramer | c8160d6 | 2013-11-20 19:08:44 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=x86-64 < %s -block-placement-exit-block-bias=20 | FileCheck %s |
| 2 | target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" |
| 3 | target triple = "x86_64-apple-macosx10.9.0" |
| 4 | |
| 5 | ; This is longest_match, the hot function from zlib's deflate implementation. |
| 6 | |
| 7 | %struct.internal_state = type { %struct.z_stream_s*, i32, i8*, i64, i8*, i32, i32, %struct.gz_header_s*, i32, i8, i32, i32, i32, i32, i8*, i64, i16*, i16*, i32, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [573 x %struct.ct_data_s], [61 x %struct.ct_data_s], [39 x %struct.ct_data_s], %struct.tree_desc_s, %struct.tree_desc_s, %struct.tree_desc_s, [16 x i16], [573 x i32], i32, i32, [573 x i8], i8*, i32, i32, i16*, i64, i64, i32, i32, i16, i32, i64 } |
| 8 | %struct.z_stream_s = type { i8*, i32, i64, i8*, i32, i64, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i64, i64 } |
| 9 | %struct.gz_header_s = type { i32, i64, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, i32 } |
| 10 | %struct.ct_data_s = type { %union.anon, %union.anon.0 } |
| 11 | %union.anon = type { i16 } |
| 12 | %union.anon.0 = type { i16 } |
| 13 | %struct.tree_desc_s = type { %struct.ct_data_s*, i32, %struct.static_tree_desc_s* } |
| 14 | %struct.static_tree_desc_s = type { i32 } |
| 15 | |
| 16 | ; CHECK-LABEL: longest_match: |
| 17 | |
| 18 | ; Verify that there are no spills or reloads in the loop exit block. This loop |
| 19 | ; is mostly cold, only %do.cond125 and %land.rhs131 are hot. |
| 20 | ; CHECK: %do.cond125 |
| 21 | ; CHECK-NOT: {{Spill|Reload}} |
| 22 | ; CHECK: jbe |
| 23 | |
| 24 | ; Verify that block placement doesn't destroy source order. It's important that |
| 25 | ; the two hot blocks are laid out close to each other. |
| 26 | ; CHECK-NEXT: %land.rhs131 |
| 27 | ; CHECK: jne |
| 28 | ; CHECK: jmp |
| 29 | define i32 @longest_match(%struct.internal_state* nocapture %s, i32 %cur_match) nounwind { |
| 30 | entry: |
| 31 | %max_chain_length = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 31 |
| 32 | %0 = load i32* %max_chain_length, align 4 |
| 33 | %window = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 14 |
| 34 | %1 = load i8** %window, align 8 |
| 35 | %strstart = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 27 |
| 36 | %2 = load i32* %strstart, align 4 |
| 37 | %idx.ext = zext i32 %2 to i64 |
| 38 | %add.ptr = getelementptr inbounds i8* %1, i64 %idx.ext |
| 39 | %prev_length = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 30 |
| 40 | %3 = load i32* %prev_length, align 4 |
| 41 | %nice_match1 = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 36 |
| 42 | %4 = load i32* %nice_match1, align 4 |
| 43 | %w_size = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 11 |
| 44 | %5 = load i32* %w_size, align 4 |
| 45 | %sub = add i32 %5, -262 |
| 46 | %cmp = icmp ugt i32 %2, %sub |
| 47 | %sub6 = sub i32 %2, %sub |
| 48 | %sub6. = select i1 %cmp, i32 %sub6, i32 0 |
| 49 | %prev7 = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 16 |
| 50 | %6 = load i16** %prev7, align 8 |
| 51 | %w_mask = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 13 |
| 52 | %7 = load i32* %w_mask, align 4 |
| 53 | %add.ptr11.sum = add i64 %idx.ext, 258 |
| 54 | %add.ptr12 = getelementptr inbounds i8* %1, i64 %add.ptr11.sum |
| 55 | %sub13 = add nsw i32 %3, -1 |
| 56 | %idxprom = sext i32 %sub13 to i64 |
| 57 | %add.ptr.sum = add i64 %idxprom, %idx.ext |
| 58 | %arrayidx = getelementptr inbounds i8* %1, i64 %add.ptr.sum |
| 59 | %8 = load i8* %arrayidx, align 1 |
| 60 | %idxprom14 = sext i32 %3 to i64 |
| 61 | %add.ptr.sum213 = add i64 %idxprom14, %idx.ext |
| 62 | %arrayidx15 = getelementptr inbounds i8* %1, i64 %add.ptr.sum213 |
| 63 | %9 = load i8* %arrayidx15, align 1 |
| 64 | %good_match = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 35 |
| 65 | %10 = load i32* %good_match, align 4 |
| 66 | %cmp17 = icmp ult i32 %3, %10 |
| 67 | %shr = lshr i32 %0, 2 |
| 68 | %chain_length.0 = select i1 %cmp17, i32 %0, i32 %shr |
| 69 | %lookahead = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 29 |
| 70 | %11 = load i32* %lookahead, align 4 |
| 71 | %cmp18 = icmp ugt i32 %4, %11 |
| 72 | %. = select i1 %cmp18, i32 %11, i32 %4 |
| 73 | %match_start = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 28 |
| 74 | %add.ptr.sum217 = add i64 %idx.ext, 1 |
| 75 | %arrayidx44 = getelementptr inbounds i8* %1, i64 %add.ptr.sum217 |
| 76 | %add.ptr.sum218 = add i64 %idx.ext, 2 |
| 77 | %add.ptr50 = getelementptr inbounds i8* %1, i64 %add.ptr.sum218 |
| 78 | %sub.ptr.lhs.cast = ptrtoint i8* %add.ptr12 to i64 |
| 79 | br label %do.body |
| 80 | |
| 81 | do.body: ; preds = %land.rhs131, %entry |
| 82 | %best_len.0 = phi i32 [ %best_len.1, %land.rhs131 ], [ %3, %entry ] |
| 83 | %chain_length.1 = phi i32 [ %dec, %land.rhs131 ], [ %chain_length.0, %entry ] |
| 84 | %cur_match.addr.0 = phi i32 [ %conv128, %land.rhs131 ], [ %cur_match, %entry ] |
| 85 | %scan_end1.0 = phi i8 [ %scan_end1.1, %land.rhs131 ], [ %8, %entry ] |
| 86 | %scan_end.0 = phi i8 [ %scan_end.1, %land.rhs131 ], [ %9, %entry ] |
| 87 | %idx.ext23 = zext i32 %cur_match.addr.0 to i64 |
| 88 | %add.ptr24 = getelementptr inbounds i8* %1, i64 %idx.ext23 |
| 89 | %idxprom25 = sext i32 %best_len.0 to i64 |
| 90 | %add.ptr24.sum = add i64 %idx.ext23, %idxprom25 |
| 91 | %arrayidx26 = getelementptr inbounds i8* %1, i64 %add.ptr24.sum |
| 92 | %12 = load i8* %arrayidx26, align 1 |
| 93 | %cmp28 = icmp eq i8 %12, %scan_end.0 |
| 94 | br i1 %cmp28, label %lor.lhs.false, label %do.cond125 |
| 95 | |
| 96 | lor.lhs.false: ; preds = %do.body |
| 97 | %sub30 = add nsw i32 %best_len.0, -1 |
| 98 | %idxprom31 = sext i32 %sub30 to i64 |
| 99 | %add.ptr24.sum214 = add i64 %idx.ext23, %idxprom31 |
| 100 | %arrayidx32 = getelementptr inbounds i8* %1, i64 %add.ptr24.sum214 |
| 101 | %13 = load i8* %arrayidx32, align 1 |
| 102 | %cmp35 = icmp eq i8 %13, %scan_end1.0 |
| 103 | br i1 %cmp35, label %lor.lhs.false37, label %do.cond125 |
| 104 | |
| 105 | lor.lhs.false37: ; preds = %lor.lhs.false |
| 106 | %14 = load i8* %add.ptr24, align 1 |
| 107 | %15 = load i8* %add.ptr, align 1 |
| 108 | %cmp40 = icmp eq i8 %14, %15 |
| 109 | br i1 %cmp40, label %lor.lhs.false42, label %do.cond125 |
| 110 | |
| 111 | lor.lhs.false42: ; preds = %lor.lhs.false37 |
| 112 | %add.ptr24.sum215 = add i64 %idx.ext23, 1 |
| 113 | %incdec.ptr = getelementptr inbounds i8* %1, i64 %add.ptr24.sum215 |
| 114 | %16 = load i8* %incdec.ptr, align 1 |
| 115 | %17 = load i8* %arrayidx44, align 1 |
| 116 | %cmp46 = icmp eq i8 %16, %17 |
| 117 | br i1 %cmp46, label %if.end49, label %do.cond125 |
| 118 | |
| 119 | if.end49: ; preds = %lor.lhs.false42 |
| 120 | %incdec.ptr.sum = add i64 %idx.ext23, 2 |
| 121 | %incdec.ptr51 = getelementptr inbounds i8* %1, i64 %incdec.ptr.sum |
| 122 | br label %do.cond |
| 123 | |
| 124 | do.cond: ; preds = %land.lhs.true100, %if.end49 |
| 125 | %match.0 = phi i8* [ %incdec.ptr51, %if.end49 ], [ %incdec.ptr103, %land.lhs.true100 ] |
| 126 | %scan.1 = phi i8* [ %add.ptr50, %if.end49 ], [ %incdec.ptr101, %land.lhs.true100 ] |
| 127 | %incdec.ptr53 = getelementptr inbounds i8* %scan.1, i64 1 |
| 128 | %18 = load i8* %incdec.ptr53, align 1 |
| 129 | %incdec.ptr55 = getelementptr inbounds i8* %match.0, i64 1 |
| 130 | %19 = load i8* %incdec.ptr55, align 1 |
| 131 | %cmp57 = icmp eq i8 %18, %19 |
| 132 | br i1 %cmp57, label %land.lhs.true, label %do.end |
| 133 | |
| 134 | land.lhs.true: ; preds = %do.cond |
| 135 | %incdec.ptr59 = getelementptr inbounds i8* %scan.1, i64 2 |
| 136 | %20 = load i8* %incdec.ptr59, align 1 |
| 137 | %incdec.ptr61 = getelementptr inbounds i8* %match.0, i64 2 |
| 138 | %21 = load i8* %incdec.ptr61, align 1 |
| 139 | %cmp63 = icmp eq i8 %20, %21 |
| 140 | br i1 %cmp63, label %land.lhs.true65, label %do.end |
| 141 | |
| 142 | land.lhs.true65: ; preds = %land.lhs.true |
| 143 | %incdec.ptr66 = getelementptr inbounds i8* %scan.1, i64 3 |
| 144 | %22 = load i8* %incdec.ptr66, align 1 |
| 145 | %incdec.ptr68 = getelementptr inbounds i8* %match.0, i64 3 |
| 146 | %23 = load i8* %incdec.ptr68, align 1 |
| 147 | %cmp70 = icmp eq i8 %22, %23 |
| 148 | br i1 %cmp70, label %land.lhs.true72, label %do.end |
| 149 | |
| 150 | land.lhs.true72: ; preds = %land.lhs.true65 |
| 151 | %incdec.ptr73 = getelementptr inbounds i8* %scan.1, i64 4 |
| 152 | %24 = load i8* %incdec.ptr73, align 1 |
| 153 | %incdec.ptr75 = getelementptr inbounds i8* %match.0, i64 4 |
| 154 | %25 = load i8* %incdec.ptr75, align 1 |
| 155 | %cmp77 = icmp eq i8 %24, %25 |
| 156 | br i1 %cmp77, label %land.lhs.true79, label %do.end |
| 157 | |
| 158 | land.lhs.true79: ; preds = %land.lhs.true72 |
| 159 | %incdec.ptr80 = getelementptr inbounds i8* %scan.1, i64 5 |
| 160 | %26 = load i8* %incdec.ptr80, align 1 |
| 161 | %incdec.ptr82 = getelementptr inbounds i8* %match.0, i64 5 |
| 162 | %27 = load i8* %incdec.ptr82, align 1 |
| 163 | %cmp84 = icmp eq i8 %26, %27 |
| 164 | br i1 %cmp84, label %land.lhs.true86, label %do.end |
| 165 | |
| 166 | land.lhs.true86: ; preds = %land.lhs.true79 |
| 167 | %incdec.ptr87 = getelementptr inbounds i8* %scan.1, i64 6 |
| 168 | %28 = load i8* %incdec.ptr87, align 1 |
| 169 | %incdec.ptr89 = getelementptr inbounds i8* %match.0, i64 6 |
| 170 | %29 = load i8* %incdec.ptr89, align 1 |
| 171 | %cmp91 = icmp eq i8 %28, %29 |
| 172 | br i1 %cmp91, label %land.lhs.true93, label %do.end |
| 173 | |
| 174 | land.lhs.true93: ; preds = %land.lhs.true86 |
| 175 | %incdec.ptr94 = getelementptr inbounds i8* %scan.1, i64 7 |
| 176 | %30 = load i8* %incdec.ptr94, align 1 |
| 177 | %incdec.ptr96 = getelementptr inbounds i8* %match.0, i64 7 |
| 178 | %31 = load i8* %incdec.ptr96, align 1 |
| 179 | %cmp98 = icmp eq i8 %30, %31 |
| 180 | br i1 %cmp98, label %land.lhs.true100, label %do.end |
| 181 | |
| 182 | land.lhs.true100: ; preds = %land.lhs.true93 |
| 183 | %incdec.ptr101 = getelementptr inbounds i8* %scan.1, i64 8 |
| 184 | %32 = load i8* %incdec.ptr101, align 1 |
| 185 | %incdec.ptr103 = getelementptr inbounds i8* %match.0, i64 8 |
| 186 | %33 = load i8* %incdec.ptr103, align 1 |
| 187 | %cmp105 = icmp eq i8 %32, %33 |
| 188 | %cmp107 = icmp ult i8* %incdec.ptr101, %add.ptr12 |
| 189 | %or.cond = and i1 %cmp105, %cmp107 |
| 190 | br i1 %or.cond, label %do.cond, label %do.end |
| 191 | |
| 192 | do.end: ; preds = %land.lhs.true100, %land.lhs.true93, %land.lhs.true86, %land.lhs.true79, %land.lhs.true72, %land.lhs.true65, %land.lhs.true, %do.cond |
| 193 | %scan.2 = phi i8* [ %incdec.ptr101, %land.lhs.true100 ], [ %incdec.ptr94, %land.lhs.true93 ], [ %incdec.ptr87, %land.lhs.true86 ], [ %incdec.ptr80, %land.lhs.true79 ], [ %incdec.ptr73, %land.lhs.true72 ], [ %incdec.ptr66, %land.lhs.true65 ], [ %incdec.ptr59, %land.lhs.true ], [ %incdec.ptr53, %do.cond ] |
| 194 | %sub.ptr.rhs.cast = ptrtoint i8* %scan.2 to i64 |
| 195 | %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast |
| 196 | %conv109 = trunc i64 %sub.ptr.sub to i32 |
| 197 | %sub110 = sub nsw i32 258, %conv109 |
| 198 | %cmp112 = icmp sgt i32 %sub110, %best_len.0 |
| 199 | br i1 %cmp112, label %if.then114, label %do.cond125 |
| 200 | |
| 201 | if.then114: ; preds = %do.end |
| 202 | store i32 %cur_match.addr.0, i32* %match_start, align 4 |
| 203 | %cmp115 = icmp slt i32 %sub110, %. |
| 204 | br i1 %cmp115, label %if.end118, label %do.end135 |
| 205 | |
| 206 | if.end118: ; preds = %if.then114 |
| 207 | %sub119 = add nsw i32 %sub110, -1 |
| 208 | %idxprom120 = sext i32 %sub119 to i64 |
| 209 | %add.ptr111.sum = add i64 %idxprom120, %idx.ext |
| 210 | %arrayidx121 = getelementptr inbounds i8* %1, i64 %add.ptr111.sum |
| 211 | %34 = load i8* %arrayidx121, align 1 |
| 212 | %idxprom122 = sext i32 %sub110 to i64 |
| 213 | %add.ptr111.sum216 = add i64 %idxprom122, %idx.ext |
| 214 | %arrayidx123 = getelementptr inbounds i8* %1, i64 %add.ptr111.sum216 |
| 215 | %35 = load i8* %arrayidx123, align 1 |
| 216 | br label %do.cond125 |
| 217 | |
| 218 | do.cond125: ; preds = %if.end118, %do.end, %lor.lhs.false42, %lor.lhs.false37, %lor.lhs.false, %do.body |
| 219 | %best_len.1 = phi i32 [ %best_len.0, %do.body ], [ %best_len.0, %lor.lhs.false ], [ %best_len.0, %lor.lhs.false37 ], [ %best_len.0, %lor.lhs.false42 ], [ %sub110, %if.end118 ], [ %best_len.0, %do.end ] |
| 220 | %scan_end1.1 = phi i8 [ %scan_end1.0, %do.body ], [ %scan_end1.0, %lor.lhs.false ], [ %scan_end1.0, %lor.lhs.false37 ], [ %scan_end1.0, %lor.lhs.false42 ], [ %34, %if.end118 ], [ %scan_end1.0, %do.end ] |
| 221 | %scan_end.1 = phi i8 [ %scan_end.0, %do.body ], [ %scan_end.0, %lor.lhs.false ], [ %scan_end.0, %lor.lhs.false37 ], [ %scan_end.0, %lor.lhs.false42 ], [ %35, %if.end118 ], [ %scan_end.0, %do.end ] |
| 222 | %and = and i32 %cur_match.addr.0, %7 |
| 223 | %idxprom126 = zext i32 %and to i64 |
| 224 | %arrayidx127 = getelementptr inbounds i16* %6, i64 %idxprom126 |
| 225 | %36 = load i16* %arrayidx127, align 2 |
| 226 | %conv128 = zext i16 %36 to i32 |
| 227 | %cmp129 = icmp ugt i32 %conv128, %sub6. |
| 228 | br i1 %cmp129, label %land.rhs131, label %do.end135 |
| 229 | |
| 230 | land.rhs131: ; preds = %do.cond125 |
| 231 | %dec = add i32 %chain_length.1, -1 |
| 232 | %cmp132 = icmp eq i32 %dec, 0 |
| 233 | br i1 %cmp132, label %do.end135, label %do.body |
| 234 | |
| 235 | do.end135: ; preds = %land.rhs131, %do.cond125, %if.then114 |
| 236 | %best_len.2 = phi i32 [ %best_len.1, %land.rhs131 ], [ %best_len.1, %do.cond125 ], [ %sub110, %if.then114 ] |
| 237 | %cmp137 = icmp ugt i32 %best_len.2, %11 |
| 238 | %.best_len.2 = select i1 %cmp137, i32 %11, i32 %best_len.2 |
| 239 | ret i32 %.best_len.2 |
| 240 | } |