LowerBitSets: Align referenced globals.
This change aligns globals to the next highest power of 2 bytes, up to a
maximum of 128. This makes it more likely that we will be able to compress
bit sets with a greater alignment. In many more cases, we can now take
advantage of a new optimization also introduced in this patch that removes
bit set checks if the bit set is all ones.
The 128 byte maximum was found to provide the best tradeoff between instruction
overhead and data overhead in a recent build of Chromium. It allows us to
remove ~2.4MB of instructions at the cost of ~250KB of data.
Differential Revision: http://reviews.llvm.org/D7873
llvm-svn: 230540
diff --git a/llvm/test/Transforms/LowerBitSets/layout.ll b/llvm/test/Transforms/LowerBitSets/layout.ll
index 2966284..a0c6e77 100644
--- a/llvm/test/Transforms/LowerBitSets/layout.ll
+++ b/llvm/test/Transforms/LowerBitSets/layout.ll
@@ -6,7 +6,7 @@
; (see GlobalLayoutBuilder in include/llvm/Transforms/IPO/LowerBitSets.h).
; The chosen layout in this case is a, e, b, d, c.
-; CHECK: private constant { i32, i32, i32, i32, i32 } { i32 1, i32 5, i32 2, i32 4, i32 3 }
+; CHECK: private constant { i32, [0 x i8], i32, [0 x i8], i32, [0 x i8], i32, [0 x i8], i32 } { i32 1, [0 x i8] zeroinitializer, i32 5, [0 x i8] zeroinitializer, i32 2, [0 x i8] zeroinitializer, i32 4, [0 x i8] zeroinitializer, i32 3 }
@a = constant i32 1
@b = constant i32 2
@c = constant i32 3
diff --git a/llvm/test/Transforms/LowerBitSets/simple.ll b/llvm/test/Transforms/LowerBitSets/simple.ll
index aaf89c0..0928524 100644
--- a/llvm/test/Transforms/LowerBitSets/simple.ll
+++ b/llvm/test/Transforms/LowerBitSets/simple.ll
@@ -3,14 +3,14 @@
target datalayout = "e-p:32:32"
-; CHECK: [[G:@[^ ]*]] = private constant { i32, [63 x i32], i32, [2 x i32] } { i32 1, [63 x i32] zeroinitializer, i32 3, [2 x i32] [i32 4, i32 5] }
+; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } { i32 1, [0 x i8] zeroinitializer, [63 x i32] zeroinitializer, [4 x i8] zeroinitializer, i32 3, [0 x i8] zeroinitializer, [2 x i32] [i32 4, i32 5] }
@a = constant i32 1
@b = constant [63 x i32] zeroinitializer
@c = constant i32 3
@d = constant [2 x i32] [i32 4, i32 5]
; Offset 0, 4 byte alignment
-; CHECK: @bitset1.bits = private constant [9 x i8] c"\03\00\00\00\00\00\00\00\04"
+; CHECK: @bitset1.bits = private constant [9 x i8] c"\03\00\00\00\00\00\00\00\08"
!0 = !{!"bitset1", i32* @a, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset1", i32* @a, i32 0}
!1 = !{!"bitset1", [63 x i32]* @b, i32 0}
@@ -18,15 +18,15 @@
!2 = !{!"bitset1", [2 x i32]* @d, i32 4}
; CHECK-NODISCARD-DAG: !{!"bitset1", [2 x i32]* @d, i32 4}
-; Offset 4, 4 byte alignment
-; CHECK: @bitset2.bits = private constant [8 x i8] c"\01\00\00\00\00\00\00\80"
+; Offset 4, 256 byte alignment
+; CHECK: @bitset2.bits = private constant [1 x i8] c"\03"
!3 = !{!"bitset2", [63 x i32]* @b, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset2", [63 x i32]* @b, i32 0}
!4 = !{!"bitset2", i32* @c, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset2", i32* @c, i32 0}
-; Offset 0, 256 byte alignment
-; CHECK: @bitset3.bits = private constant [1 x i8] c"\03"
+; Offset 0, 4 byte alignment
+; CHECK: @bitset3.bits = private constant [9 x i8] c"\01\00\00\00\00\00\00\00\02"
!5 = !{!"bitset3", i32* @a, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @a, i32 0}
!6 = !{!"bitset3", i32* @c, i32 0}
@@ -38,10 +38,10 @@
!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6, !7 }
-; CHECK: @a = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 0)
-; CHECK: @b = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 1)
-; CHECK: @c = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 2)
-; CHECK: @d = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 3)
+; CHECK: @a = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)
+; CHECK: @b = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
+; CHECK: @c = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
+; CHECK: @d = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
@@ -52,11 +52,11 @@
; CHECK: [[R0:%[^ ]*]] = bitcast i32* [[A0]] to i8*
%pi8 = bitcast i32* %p to i8*
; CHECK: [[R1:%[^ ]*]] = ptrtoint i8* [[R0]] to i32
- ; CHECK: [[R2:%[^ ]*]] = sub i32 [[R1]], ptrtoint ({ i32, [63 x i32], i32, [2 x i32] }* [[G]] to i32)
+ ; CHECK: [[R2:%[^ ]*]] = sub i32 [[R1]], ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]] to i32)
; CHECK: [[R3:%[^ ]*]] = lshr i32 [[R2]], 2
; CHECK: [[R4:%[^ ]*]] = shl i32 [[R2]], 30
; CHECK: [[R5:%[^ ]*]] = or i32 [[R3]], [[R4]]
- ; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 67
+ ; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 68
; CHECK: br i1 [[R6]]
; CHECK: [[R8:%[^ ]*]] = lshr i32 [[R5]], 5
@@ -82,22 +82,14 @@
; CHECK: [[S0:%[^ ]*]] = bitcast i32* [[B0]] to i8*
%pi8 = bitcast i32* %p to i8*
; CHECK: [[S1:%[^ ]*]] = ptrtoint i8* [[S0]] to i32
- ; CHECK: [[S2:%[^ ]*]] = sub i32 [[S1]], add (i32 ptrtoint ({ i32, [63 x i32], i32, [2 x i32] }* [[G]] to i32), i32 4)
- ; CHECK: [[S3:%[^ ]*]] = lshr i32 [[S2]], 2
- ; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 30
+ ; CHECK: [[S2:%[^ ]*]] = sub i32 [[S1]], add (i32 ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]] to i32), i32 4)
+ ; CHECK: [[S3:%[^ ]*]] = lshr i32 [[S2]], 8
+ ; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 24
; CHECK: [[S5:%[^ ]*]] = or i32 [[S3]], [[S4]]
- ; CHECK: [[S6:%[^ ]*]] = icmp ult i32 [[S5]], 64
- ; CHECK: br i1 [[S6]]
-
- ; CHECK: [[S8:%[^ ]*]] = zext i32 [[S5]] to i64
- ; CHECK: [[S9:%[^ ]*]] = and i64 [[S8]], 63
- ; CHECK: [[S10:%[^ ]*]] = shl i64 1, [[S9]]
- ; CHECK: [[S11:%[^ ]*]] = and i64 -9223372036854775807, [[S10]]
- ; CHECK: [[S12:%[^ ]*]] = icmp ne i64 [[S11]], 0
-
- ; CHECK: [[S16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[S12]], {{%[^ ]*}} ]
+ ; CHECK: [[S6:%[^ ]*]] = icmp ult i32 [[S5]], 2
%x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset2")
- ; CHECK: ret i1 [[S16]]
+
+ ; CHECK: ret i1 [[S6]]
ret i1 %x
}
@@ -106,19 +98,22 @@
; CHECK: [[T0:%[^ ]*]] = bitcast i32* [[C0]] to i8*
%pi8 = bitcast i32* %p to i8*
; CHECK: [[T1:%[^ ]*]] = ptrtoint i8* [[T0]] to i32
- ; CHECK: [[T2:%[^ ]*]] = sub i32 [[T1]], ptrtoint ({ i32, [63 x i32], i32, [2 x i32] }* [[G]] to i32)
- ; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 8
- ; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 24
+ ; CHECK: [[T2:%[^ ]*]] = sub i32 [[T1]], ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]] to i32)
+ ; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 2
+ ; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 30
; CHECK: [[T5:%[^ ]*]] = or i32 [[T3]], [[T4]]
- ; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 2
+ ; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 66
; CHECK: br i1 [[T6]]
- ; CHECK: [[T8:%[^ ]*]] = and i32 [[T5]], 31
- ; CHECK: [[T9:%[^ ]*]] = shl i32 1, [[T8]]
- ; CHECK: [[T10:%[^ ]*]] = and i32 3, [[T9]]
- ; CHECK: [[T11:%[^ ]*]] = icmp ne i32 [[T10]], 0
+ ; CHECK: [[T8:%[^ ]*]] = lshr i32 [[T5]], 5
+ ; CHECK: [[T9:%[^ ]*]] = getelementptr i32* bitcast ([9 x i8]* @bitset3.bits to i32*), i32 [[T8]]
+ ; CHECK: [[T10:%[^ ]*]] = load i32* [[T9]]
+ ; CHECK: [[T11:%[^ ]*]] = and i32 [[T5]], 31
+ ; CHECK: [[T12:%[^ ]*]] = shl i32 1, [[T11]]
+ ; CHECK: [[T13:%[^ ]*]] = and i32 [[T10]], [[T12]]
+ ; CHECK: [[T14:%[^ ]*]] = icmp ne i32 [[T13]], 0
- ; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T11]], {{%[^ ]*}} ]
+ ; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T14]], {{%[^ ]*}} ]
%x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset3")
; CHECK: ret i1 [[T16]]
ret i1 %x
diff --git a/llvm/test/Transforms/LowerBitSets/single-offset.ll b/llvm/test/Transforms/LowerBitSets/single-offset.ll
index ebe57bf..57194f4 100644
--- a/llvm/test/Transforms/LowerBitSets/single-offset.ll
+++ b/llvm/test/Transforms/LowerBitSets/single-offset.ll
@@ -2,7 +2,7 @@
target datalayout = "e-p:32:32"
-; CHECK: [[G:@[^ ]*]] = private constant { i32, i32 }
+; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], i32 }
@a = constant i32 1
@b = constant i32 2
@@ -18,7 +18,7 @@
; CHECK: @foo(i8* [[A0:%[^ ]*]])
define i1 @foo(i8* %p) {
; CHECK: [[R0:%[^ ]*]] = ptrtoint i8* [[A0]] to i32
- ; CHECK: [[R1:%[^ ]*]] = icmp eq i32 [[R0]], ptrtoint ({ i32, i32 }* [[G]] to i32)
+ ; CHECK: [[R1:%[^ ]*]] = icmp eq i32 [[R0]], ptrtoint ({ i32, [0 x i8], i32 }* [[G]] to i32)
%x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset2")
; CHECK: ret i1 [[R1]]
ret i1 %x
@@ -27,7 +27,7 @@
; CHECK: @bar(i8* [[B0:%[^ ]*]])
define i1 @bar(i8* %p) {
; CHECK: [[S0:%[^ ]*]] = ptrtoint i8* [[B0]] to i32
- ; CHECK: [[S1:%[^ ]*]] = icmp eq i32 [[S0]], add (i32 ptrtoint ({ i32, i32 }* [[G]] to i32), i32 4)
+ ; CHECK: [[S1:%[^ ]*]] = icmp eq i32 [[S0]], add (i32 ptrtoint ({ i32, [0 x i8], i32 }* [[G]] to i32), i32 4)
%x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset3")
; CHECK: ret i1 [[S1]]
ret i1 %x