Wei Mi | a62f058 | 2016-02-05 18:14:24 +0000 | [diff] [blame] | 1 | ; PR26374: Check no stack slots are allocated for vregs which have no real reference. |
| 2 | ; RUN: llc < %s | FileCheck %s |
| 3 | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |
| 4 | target triple = "x86_64-unknown-linux-gnu" |
| 5 | |
| 6 | %struct.ImageParameters = type { i32, i32, [0 x [16 x i16]] } |
| 7 | %struct.InputParameters = type { i32, i32 } |
| 8 | |
| 9 | @c = common global %struct.ImageParameters* null, align 8 |
| 10 | @a = common global i16** null, align 8 |
| 11 | @d = common global [6 x i32] zeroinitializer, align 16 |
| 12 | @b = common global %struct.InputParameters* null, align 8 |
| 13 | @e = common global [4 x i32] zeroinitializer, align 16 |
| 14 | |
| 15 | ; It is not easy to check there is no unused holes in stack allocated for spills, |
| 16 | ; so simply check the size of stack allocated cannot exceed 350. |
| 17 | ; (408 is used before the fix for PR26374. 344 is used after the fix). |
| 18 | ; |
| 19 | ; CHECK-LABEL: @fn |
| 20 | ; CHECK: subq {{\$3[0-4][0-9]}}, %rsp |
| 21 | |
| 22 | ; Function Attrs: nounwind uwtable |
| 23 | define i32 @fn() #0 { |
| 24 | entry: |
| 25 | %n = alloca [8 x [8 x i32]], align 16 |
| 26 | %tmp = bitcast [8 x [8 x i32]]* %n to i8* |
| 27 | call void @llvm.lifetime.start(i64 256, i8* %tmp) #3 |
| 28 | %tmp1 = bitcast [8 x [8 x i32]]* %n to i8* |
| 29 | %arraydecay.1 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %n, i64 0, i64 1, i64 0 |
| 30 | %tmp2 = bitcast i32* %arraydecay.1 to i8* |
| 31 | %arraydecay.2 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %n, i64 0, i64 2, i64 0 |
| 32 | %tmp3 = bitcast i32* %arraydecay.2 to i8* |
| 33 | %arraydecay.3 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %n, i64 0, i64 3, i64 0 |
| 34 | %tmp4 = bitcast i32* %arraydecay.3 to i8* |
| 35 | %arraydecay.4 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %n, i64 0, i64 4, i64 0 |
| 36 | %tmp5 = bitcast i32* %arraydecay.4 to i8* |
| 37 | %arraydecay.5 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %n, i64 0, i64 5, i64 0 |
| 38 | %tmp6 = bitcast i32* %arraydecay.5 to i8* |
| 39 | %arraydecay.6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %n, i64 0, i64 6, i64 0 |
| 40 | %tmp7 = bitcast i32* %arraydecay.6 to i8* |
| 41 | %arraydecay.7 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %n, i64 0, i64 7, i64 0 |
| 42 | %tmp8 = bitcast i32* %arraydecay.7 to i8* |
| 43 | br label %for.body |
| 44 | |
| 45 | for.body: ; preds = %for.inc73, %entry |
| 46 | %q.0131 = phi i32 [ 0, %entry ], [ %inc74, %for.inc73 ] |
| 47 | %m.0130 = phi i32 [ 0, %entry ], [ %m.4, %for.inc73 ] |
| 48 | %div = sdiv i32 %q.0131, 2 |
| 49 | %shl = shl i32 %div, 3 |
| 50 | %rem = srem i32 %q.0131, 2 |
| 51 | %shl1 = shl nsw i32 %rem, 3 |
| 52 | %tmp9 = sext i32 %shl1 to i64 |
| 53 | %tmp10 = sext i32 %shl to i64 |
| 54 | %tmp11 = or i32 %shl1, 4 |
| 55 | %tmp12 = sext i32 %tmp11 to i64 |
| 56 | %tmp13 = or i32 %shl, 4 |
| 57 | %tmp14 = sext i32 %tmp13 to i64 |
| 58 | br label %for.body4 |
| 59 | |
| 60 | for.body4: ; preds = %for.inc48, %for.body |
| 61 | %indvars.iv148 = phi i64 [ %tmp10, %for.body ], [ %indvars.iv.next149, %for.inc48 ] |
| 62 | %m.1126 = phi i32 [ %m.0130, %for.body ], [ %m.3.lcssa, %for.inc48 ] |
| 63 | %tmp15 = load %struct.ImageParameters*, %struct.ImageParameters** @c, align 8 |
| 64 | %opix_y = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %tmp15, i64 0, i32 1 |
| 65 | %tmp16 = load i32, i32* %opix_y, align 4 |
| 66 | %tmp17 = trunc i64 %indvars.iv148 to i32 |
| 67 | %add5 = add nsw i32 %tmp16, %tmp17 |
| 68 | %tmp18 = sub nuw nsw i64 %indvars.iv148, %tmp10 |
| 69 | %tmp19 = sext i32 %add5 to i64 |
| 70 | %tmp20 = add nsw i64 %tmp19, 1 |
| 71 | %tmp21 = or i64 %indvars.iv148, 1 |
| 72 | %tmp22 = or i64 %tmp18, 1 |
| 73 | %tmp23 = add nsw i64 %tmp19, 2 |
| 74 | %tmp24 = or i64 %indvars.iv148, 2 |
| 75 | %tmp25 = or i64 %tmp18, 2 |
| 76 | %tmp26 = add nsw i64 %tmp19, 3 |
| 77 | %tmp27 = or i64 %indvars.iv148, 3 |
| 78 | %tmp28 = or i64 %tmp18, 3 |
| 79 | br label %for.body9 |
| 80 | |
| 81 | for.body9: ; preds = %for.inc45.for.body9_crit_edge, %for.body4 |
| 82 | %tmp29 = phi %struct.ImageParameters* [ %tmp15, %for.body4 ], [ %.pre, %for.inc45.for.body9_crit_edge ] |
| 83 | %indvars.iv145 = phi i64 [ %tmp9, %for.body4 ], [ %indvars.iv.next146, %for.inc45.for.body9_crit_edge ] |
| 84 | %m.2124 = phi i32 [ %m.1126, %for.body4 ], [ %m.3, %for.inc45.for.body9_crit_edge ] |
| 85 | %opix_x = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %tmp29, i64 0, i32 0 |
| 86 | %tmp30 = load i32, i32* %opix_x, align 4 |
| 87 | %tmp31 = trunc i64 %indvars.iv145 to i32 |
| 88 | %add10 = add nsw i32 %tmp30, %tmp31 |
| 89 | tail call void @LumaPrediction4x4(i32 %tmp31, i32 %tmp17, i32 0, i32 0, i32 0, i16 signext 0, i16 signext 0) #3 |
| 90 | %tmp32 = load i16**, i16*** @a, align 8 |
| 91 | %tmp33 = load %struct.ImageParameters*, %struct.ImageParameters** @c, align 8 |
| 92 | %tmp34 = sub nuw nsw i64 %indvars.iv145, %tmp9 |
| 93 | %tmp35 = sext i32 %add10 to i64 |
| 94 | br label %for.cond14.preheader |
| 95 | |
| 96 | for.cond14.preheader: ; preds = %for.body9 |
| 97 | %arrayidx = getelementptr inbounds i16*, i16** %tmp32, i64 %tmp19 |
| 98 | %tmp36 = load i16*, i16** %arrayidx, align 8 |
| 99 | %arrayidx20 = getelementptr inbounds i16, i16* %tmp36, i64 %tmp35 |
| 100 | %arrayidx26 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %tmp33, i64 0, i32 2, i64 %indvars.iv148, i64 %indvars.iv145 |
| 101 | %arrayidx35 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %n, i64 0, i64 %tmp18, i64 %tmp34 |
| 102 | %tmp37 = bitcast i16* %arrayidx20 to <4 x i16>* |
| 103 | %tmp38 = load <4 x i16>, <4 x i16>* %tmp37, align 2 |
| 104 | %tmp39 = zext <4 x i16> %tmp38 to <4 x i32> |
| 105 | %tmp40 = bitcast i16* %arrayidx26 to <4 x i16>* |
| 106 | %tmp41 = load <4 x i16>, <4 x i16>* %tmp40, align 2 |
| 107 | %tmp42 = zext <4 x i16> %tmp41 to <4 x i32> |
| 108 | %tmp43 = sub nsw <4 x i32> %tmp39, %tmp42 |
| 109 | %tmp44 = bitcast i32* %arrayidx35 to <4 x i32>* |
| 110 | store <4 x i32> %tmp43, <4 x i32>* %tmp44, align 16 |
| 111 | store <4 x i32> %tmp43, <4 x i32>* bitcast ([6 x i32]* @d to <4 x i32>*), align 16 |
| 112 | %arrayidx.1 = getelementptr inbounds i16*, i16** %tmp32, i64 %tmp20 |
| 113 | %tmp45 = load i16*, i16** %arrayidx.1, align 8 |
| 114 | %arrayidx20.1 = getelementptr inbounds i16, i16* %tmp45, i64 %tmp35 |
| 115 | %arrayidx26.1 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %tmp33, i64 0, i32 2, i64 %tmp21, i64 %indvars.iv145 |
| 116 | %arrayidx35.1 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %n, i64 0, i64 %tmp22, i64 %tmp34 |
| 117 | %tmp46 = bitcast i16* %arrayidx20.1 to <4 x i16>* |
| 118 | %tmp47 = load <4 x i16>, <4 x i16>* %tmp46, align 2 |
| 119 | %tmp48 = zext <4 x i16> %tmp47 to <4 x i32> |
| 120 | %tmp49 = bitcast i16* %arrayidx26.1 to <4 x i16>* |
| 121 | %tmp50 = load <4 x i16>, <4 x i16>* %tmp49, align 2 |
| 122 | %tmp51 = zext <4 x i16> %tmp50 to <4 x i32> |
| 123 | %tmp52 = sub nsw <4 x i32> %tmp48, %tmp51 |
| 124 | %tmp53 = bitcast i32* %arrayidx35.1 to <4 x i32>* |
| 125 | store <4 x i32> %tmp52, <4 x i32>* %tmp53, align 16 |
| 126 | store <4 x i32> %tmp52, <4 x i32>* bitcast (i32* getelementptr inbounds ([6 x i32], [6 x i32]* @d, i64 0, i64 4) to <4 x i32>*), align 16 |
| 127 | %arrayidx.2 = getelementptr inbounds i16*, i16** %tmp32, i64 %tmp23 |
| 128 | %tmp54 = load i16*, i16** %arrayidx.2, align 8 |
| 129 | %arrayidx20.2 = getelementptr inbounds i16, i16* %tmp54, i64 %tmp35 |
| 130 | %arrayidx26.2 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %tmp33, i64 0, i32 2, i64 %tmp24, i64 %indvars.iv145 |
| 131 | %arrayidx35.2 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %n, i64 0, i64 %tmp25, i64 %tmp34 |
| 132 | %tmp55 = bitcast i16* %arrayidx20.2 to <4 x i16>* |
| 133 | %tmp56 = load <4 x i16>, <4 x i16>* %tmp55, align 2 |
| 134 | %tmp57 = zext <4 x i16> %tmp56 to <4 x i32> |
| 135 | %tmp58 = bitcast i16* %arrayidx26.2 to <4 x i16>* |
| 136 | %tmp59 = load <4 x i16>, <4 x i16>* %tmp58, align 2 |
| 137 | %tmp60 = zext <4 x i16> %tmp59 to <4 x i32> |
| 138 | %tmp61 = sub nsw <4 x i32> %tmp57, %tmp60 |
| 139 | %tmp62 = bitcast i32* %arrayidx35.2 to <4 x i32>* |
| 140 | store <4 x i32> %tmp61, <4 x i32>* %tmp62, align 16 |
| 141 | store <4 x i32> %tmp61, <4 x i32>* bitcast (i32* getelementptr ([6 x i32], [6 x i32]* @d, i64 1, i64 2) to <4 x i32>*), align 16 |
| 142 | %arrayidx.3 = getelementptr inbounds i16*, i16** %tmp32, i64 %tmp26 |
| 143 | %tmp63 = load i16*, i16** %arrayidx.3, align 8 |
| 144 | %arrayidx20.3 = getelementptr inbounds i16, i16* %tmp63, i64 %tmp35 |
| 145 | %arrayidx26.3 = getelementptr inbounds %struct.ImageParameters, %struct.ImageParameters* %tmp33, i64 0, i32 2, i64 %tmp27, i64 %indvars.iv145 |
| 146 | %arrayidx35.3 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %n, i64 0, i64 %tmp28, i64 %tmp34 |
| 147 | %tmp64 = bitcast i16* %arrayidx20.3 to <4 x i16>* |
| 148 | %tmp65 = load <4 x i16>, <4 x i16>* %tmp64, align 2 |
| 149 | %tmp66 = zext <4 x i16> %tmp65 to <4 x i32> |
| 150 | %tmp67 = bitcast i16* %arrayidx26.3 to <4 x i16>* |
| 151 | %tmp68 = load <4 x i16>, <4 x i16>* %tmp67, align 2 |
| 152 | %tmp69 = zext <4 x i16> %tmp68 to <4 x i32> |
| 153 | %tmp70 = sub nsw <4 x i32> %tmp66, %tmp69 |
| 154 | %tmp71 = bitcast i32* %arrayidx35.3 to <4 x i32>* |
| 155 | store <4 x i32> %tmp70, <4 x i32>* %tmp71, align 16 |
| 156 | store <4 x i32> %tmp70, <4 x i32>* bitcast (i32* getelementptr ([6 x i32], [6 x i32]* @d, i64 2, i64 0) to <4 x i32>*), align 16 |
| 157 | %tmp72 = load %struct.InputParameters*, %struct.InputParameters** @b, align 8 |
| 158 | %rdopt = getelementptr inbounds %struct.InputParameters, %struct.InputParameters* %tmp72, i64 0, i32 0 |
| 159 | %tmp73 = load i32, i32* %rdopt, align 4 |
| 160 | %cmp42 = icmp eq i32 %tmp73, 0 |
| 161 | br i1 %cmp42, label %land.lhs.true, label %if.then |
| 162 | |
| 163 | land.lhs.true: ; preds = %for.cond14.preheader |
| 164 | %Transform8x8Mode = getelementptr inbounds %struct.InputParameters, %struct.InputParameters* %tmp72, i64 0, i32 1 |
| 165 | %tmp74 = load i32, i32* %Transform8x8Mode, align 4 |
| 166 | %tobool = icmp eq i32 %tmp74, 0 |
| 167 | br i1 %tobool, label %if.then, label %for.inc45 |
| 168 | |
| 169 | if.then: ; preds = %land.lhs.true, %for.cond14.preheader |
| 170 | %call = tail call i32 @distortion4x4(i32* nonnull getelementptr inbounds ([6 x i32], [6 x i32]* @d, i64 0, i64 0)) #3 |
| 171 | %add44 = add nsw i32 %call, %m.2124 |
| 172 | br label %for.inc45 |
| 173 | |
| 174 | for.inc45: ; preds = %if.then, %land.lhs.true |
| 175 | %m.3 = phi i32 [ %m.2124, %land.lhs.true ], [ %add44, %if.then ] |
| 176 | %cmp8 = icmp slt i64 %indvars.iv145, %tmp12 |
| 177 | br i1 %cmp8, label %for.inc45.for.body9_crit_edge, label %for.inc48 |
| 178 | |
| 179 | for.inc45.for.body9_crit_edge: ; preds = %for.inc45 |
| 180 | %indvars.iv.next146 = add nsw i64 %indvars.iv145, 4 |
| 181 | %.pre = load %struct.ImageParameters*, %struct.ImageParameters** @c, align 8 |
| 182 | br label %for.body9 |
| 183 | |
| 184 | for.inc48: ; preds = %for.inc45 |
| 185 | %m.3.lcssa = phi i32 [ %m.3, %for.inc45 ] |
| 186 | %indvars.iv.next149 = add nsw i64 %indvars.iv148, 4 |
| 187 | %cmp3 = icmp slt i64 %indvars.iv148, %tmp14 |
| 188 | br i1 %cmp3, label %for.body4, label %for.end50 |
| 189 | |
| 190 | for.end50: ; preds = %for.inc48 |
| 191 | %m.3.lcssa.lcssa = phi i32 [ %m.3.lcssa, %for.inc48 ] |
| 192 | %tmp75 = load %struct.InputParameters*, %struct.InputParameters** @b, align 8 |
| 193 | %rdopt51 = getelementptr inbounds %struct.InputParameters, %struct.InputParameters* %tmp75, i64 0, i32 0 |
| 194 | %tmp76 = load i32, i32* %rdopt51, align 4 |
| 195 | %cmp52 = icmp eq i32 %tmp76, 0 |
| 196 | br i1 %cmp52, label %land.lhs.true54, label %for.inc73 |
| 197 | |
| 198 | land.lhs.true54: ; preds = %for.end50 |
| 199 | %Transform8x8Mode55 = getelementptr inbounds %struct.InputParameters, %struct.InputParameters* %tmp75, i64 0, i32 1 |
| 200 | %tmp77 = load i32, i32* %Transform8x8Mode55, align 4 |
| 201 | %tobool56 = icmp eq i32 %tmp77, 0 |
| 202 | br i1 %tobool56, label %for.inc73, label %for.body61.preheader |
| 203 | |
| 204 | for.body61.preheader: ; preds = %land.lhs.true54 |
| 205 | call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull bitcast (i32* getelementptr ([4 x i32], [4 x i32]* @e, i64 4, i64 0) to i8*), i8* %tmp1, i64 32, i32 16, i1 false) |
| 206 | call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull bitcast (i32* getelementptr ([4 x i32], [4 x i32]* @e, i64 6, i64 0) to i8*), i8* %tmp2, i64 32, i32 16, i1 false) |
| 207 | call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull bitcast (i32* getelementptr ([4 x i32], [4 x i32]* @e, i64 8, i64 0) to i8*), i8* %tmp3, i64 32, i32 16, i1 false) |
| 208 | call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull bitcast (i32* getelementptr ([4 x i32], [4 x i32]* @e, i64 10, i64 0) to i8*), i8* %tmp4, i64 32, i32 16, i1 false) |
| 209 | call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull bitcast (i32* getelementptr ([4 x i32], [4 x i32]* @e, i64 12, i64 0) to i8*), i8* %tmp5, i64 32, i32 16, i1 false) |
| 210 | call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull bitcast (i32* getelementptr ([4 x i32], [4 x i32]* @e, i64 14, i64 0) to i8*), i8* %tmp6, i64 32, i32 16, i1 false) |
| 211 | call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull bitcast (i32* getelementptr ([4 x i32], [4 x i32]* @e, i64 16, i64 0) to i8*), i8* %tmp7, i64 32, i32 16, i1 false) |
| 212 | call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull bitcast (i32* getelementptr ([4 x i32], [4 x i32]* @e, i64 18, i64 0) to i8*), i8* %tmp8, i64 32, i32 16, i1 false) |
| 213 | %call70 = tail call i32 @distortion4x4(i32* nonnull getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 0)) #3 |
| 214 | %add71 = add nsw i32 %call70, %m.3.lcssa.lcssa |
| 215 | br label %for.inc73 |
| 216 | |
| 217 | for.inc73: ; preds = %for.body61.preheader, %land.lhs.true54, %for.end50 |
| 218 | %m.4 = phi i32 [ %add71, %for.body61.preheader ], [ %m.3.lcssa.lcssa, %land.lhs.true54 ], [ %m.3.lcssa.lcssa, %for.end50 ] |
| 219 | %inc74 = add nuw nsw i32 %q.0131, 1 |
| 220 | %exitcond156 = icmp eq i32 %inc74, 4 |
| 221 | br i1 %exitcond156, label %for.end75, label %for.body |
| 222 | |
| 223 | for.end75: ; preds = %for.inc73 |
| 224 | %m.4.lcssa = phi i32 [ %m.4, %for.inc73 ] |
| 225 | call void @llvm.lifetime.end(i64 256, i8* %tmp) #3 |
| 226 | ret i32 %m.4.lcssa |
| 227 | } |
| 228 | |
| 229 | ; Function Attrs: argmemonly nounwind |
| 230 | declare void @llvm.lifetime.start(i64, i8* nocapture) #1 |
| 231 | |
| 232 | declare void @LumaPrediction4x4(i32, i32, i32, i32, i32, i16 signext, i16 signext) #2 |
| 233 | |
| 234 | declare i32 @distortion4x4(i32*) #2 |
| 235 | |
| 236 | ; Function Attrs: argmemonly nounwind |
| 237 | declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 |
| 238 | |
| 239 | ; Function Attrs: argmemonly nounwind |
| 240 | declare void @llvm.lifetime.end(i64, i8* nocapture) #1 |
| 241 | |
| 242 | attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } |
| 243 | attributes #1 = { argmemonly nounwind } |
| 244 | attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } |
| 245 | attributes #3 = { nounwind } |
| 246 | |