[AArch64] Static (de)allocation of SVE stack objects.
Adds support to AArch64FrameLowering to allocate fixed-stack SVE objects.
The focus of this patch is purely to allow the stack frame to
allocate/deallocate space for scalable SVE objects. More dynamic
allocation (at compile-time, i.e. determining placement of SVE objects
on the stack), or resolving frame-index references that include
scalable-sized offsets, are left for subsequent patches.
SVE objects are allocated in the stack frame as a separate region below
the callee-save area, and above the alignment gap. This is done so that
the SVE objects can be accessed directly from the FP at (runtime)
VL-based offsets to benefit from using the VL-scaled addressing modes.
The layout looks as follows:
+-------------+
| stack arg |
+-------------+
| Callee Saves|
| X29, X30 | (if available)
|-------------| <- FP (if available)
| : |
| SVE area |
| : |
+-------------+
|/////////////| alignment gap.
| : |
| Stack objs |
| : |
+-------------+ <- SP after call and frame-setup
SVE and non-SVE stack objects are distinguished using different
StackIDs. The offsets for objects with TargetStackID::SVEVector should be
interpreted as purely scalable offsets within their respective SVE region.
Reviewers: thegameg, rovka, t.p.northover, efriedma, rengolin, greened
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D61437
llvm-svn: 373585
diff --git a/llvm/unittests/Target/AArch64/TestStackOffset.cpp b/llvm/unittests/Target/AArch64/TestStackOffset.cpp
index 240cec9..c85135e 100644
--- a/llvm/unittests/Target/AArch64/TestStackOffset.cpp
+++ b/llvm/unittests/Target/AArch64/TestStackOffset.cpp
@@ -20,6 +20,15 @@
StackOffset C(2, MVT::v4i64);
EXPECT_EQ(64, C.getBytes());
+
+ StackOffset D(2, MVT::nxv4i64);
+ EXPECT_EQ(64, D.getScalableBytes());
+
+ StackOffset E(2, MVT::v4i64);
+ EXPECT_EQ(0, E.getScalableBytes());
+
+ StackOffset F(2, MVT::nxv4i64);
+ EXPECT_EQ(0, F.getBytes());
}
TEST(StackOffset, Add) {
@@ -31,6 +40,11 @@
StackOffset D(1, MVT::i32);
D += A;
EXPECT_EQ(12, D.getBytes());
+
+ StackOffset E(1, MVT::nxv1i32);
+ StackOffset F = C + E;
+ EXPECT_EQ(12, F.getBytes());
+ EXPECT_EQ(4, F.getScalableBytes());
}
TEST(StackOffset, Sub) {
@@ -42,6 +56,12 @@
StackOffset D(1, MVT::i64);
D -= A;
EXPECT_EQ(0, D.getBytes());
+
+ C += StackOffset(2, MVT::nxv1i32);
+ StackOffset E = StackOffset(1, MVT::nxv1i32);
+ StackOffset F = C - E;
+ EXPECT_EQ(4, F.getBytes());
+ EXPECT_EQ(4, F.getScalableBytes());
}
TEST(StackOffset, isZero) {
@@ -49,12 +69,63 @@
StackOffset B(0, MVT::i32);
EXPECT_TRUE(!A);
EXPECT_TRUE(!(A + B));
+
+ StackOffset C(0, MVT::nxv1i32);
+ EXPECT_TRUE(!(A + C));
+
+ StackOffset D(1, MVT::nxv1i32);
+ EXPECT_FALSE(!(A + D));
+}
+
+TEST(StackOffset, isValid) {
+ EXPECT_FALSE(StackOffset(1, MVT::nxv8i1).isValid());
+ EXPECT_TRUE(StackOffset(2, MVT::nxv8i1).isValid());
+
+#ifndef NDEBUG
+#ifdef GTEST_HAS_DEATH_TEST
+ EXPECT_DEATH(StackOffset(1, MVT::i1),
+ "Offset type is not a multiple of bytes");
+ EXPECT_DEATH(StackOffset(1, MVT::nxv1i1),
+ "Offset type is not a multiple of bytes");
+#endif // defined GTEST_HAS_DEATH_TEST
+#endif // not defined NDEBUG
}
TEST(StackOffset, getForFrameOffset) {
StackOffset A(1, MVT::i64);
StackOffset B(1, MVT::i32);
- int64_t ByteSized;
- (A + B).getForFrameOffset(ByteSized);
+ StackOffset C(1, MVT::nxv4i32);
+
+ // If all offsets can be materialized with only ADDVL,
+ // make sure PLSized is 0.
+ int64_t ByteSized, VLSized, PLSized;
+ (A + B + C).getForFrameOffset(ByteSized, PLSized, VLSized);
EXPECT_EQ(12, ByteSized);
+ EXPECT_EQ(1, VLSized);
+ EXPECT_EQ(0, PLSized);
+
+ // If we need an ADDPL to materialize the offset, and the number of scalable
+ // bytes fits the ADDPL immediate, fold the scalable bytes to fit in PLSized.
+ StackOffset D(1, MVT::nxv16i1);
+ (C + D).getForFrameOffset(ByteSized, PLSized, VLSized);
+ EXPECT_EQ(0, ByteSized);
+ EXPECT_EQ(0, VLSized);
+ EXPECT_EQ(9, PLSized);
+
+ StackOffset E(4, MVT::nxv4i32);
+ StackOffset F(1, MVT::nxv16i1);
+ (E + F).getForFrameOffset(ByteSized, PLSized, VLSized);
+ EXPECT_EQ(0, ByteSized);
+ EXPECT_EQ(0, VLSized);
+ EXPECT_EQ(33, PLSized);
+
+ // If the offset requires an ADDPL instruction to materialize, and would
+ // require more than two instructions, decompose it into both
+ // ADDVL (n x 16 bytes) and ADDPL (n x 2 bytes) instructions.
+ StackOffset G(8, MVT::nxv4i32);
+ StackOffset H(1, MVT::nxv16i1);
+ (G + H).getForFrameOffset(ByteSized, PLSized, VLSized);
+ EXPECT_EQ(0, ByteSized);
+ EXPECT_EQ(8, VLSized);
+ EXPECT_EQ(1, PLSized);
}