ART: Add entrypoint and intrinsic for Math.pow().
MathBenchmarks.java#timePow results on taimen's little cores
fixed at frequency 1401600 with forced JIT compilation:
- before:
- X32: 356.33 (@FastNative), 315.39 (@CriticalNative)
- X64: 357.31 (@FastNative), 315.37 (@CriticalNative)
- after (LICM defeats the benchmark):
- X32: 2.88
- X64: 2.87
- after but with kAllSideEffects to prevent LICM:
- X32: 275.42
- X64: 275.67
Test: Rely on TreeHugger.
Bug: 70727450
Change-Id: Iaa31f70acabbd57c163cfeafe02eed67c1348861
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index ca1b451..2f8e33f 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2011,6 +2011,14 @@
GenFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 99b8b5d..830d040 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -2811,6 +2811,14 @@
GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
}
+void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow);
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 113c9de..cafa522 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2835,6 +2835,15 @@
GenFPFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+// static double java.lang.Math.pow(double y, double x)
+void IntrinsicLocationsBuilderMIPS::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitMathPow(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
// static double java.lang.Math.cbrt(double a)
void IntrinsicLocationsBuilderMIPS::VisitMathCbrt(HInvoke* invoke) {
CreateFPToFPCallLocations(allocator_, invoke);
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 521bad2..89f1818 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -2416,6 +2416,15 @@
GenFPFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+// static double java.lang.Math.pow(double y, double x)
+void IntrinsicLocationsBuilderMIPS64::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitMathPow(HInvoke* invoke) {
+ GenFPFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
// static double java.lang.Math.cbrt(double a)
void IntrinsicLocationsBuilderMIPS64::VisitMathCbrt(HInvoke* invoke) {
CreateFPToFPCallLocations(allocator_, invoke);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index baa410b..46b7f3f 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1105,6 +1105,14 @@
GenFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 6dd8b8e..6483b7c 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -897,6 +897,14 @@
GenFPToFPCall(invoke, codegen_, kQuickAtan2);
}
+void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
+ GenFPToFPCall(invoke, codegen_, kQuickPow);
+}
+
void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
CreateFPFPToFPCallLocations(allocator_, invoke);
}
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 0a09435..674dc9a 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -153,7 +153,7 @@
" 21c: f8d9 8034 ldr.w r8, [r9, #52] ; 0x34\n",
" 220: 4770 bx lr\n",
" 222: 4660 mov r0, ip\n",
- " 224: f8d9 c2c0 ldr.w ip, [r9, #704] ; 0x2c0\n",
+ " 224: f8d9 c2c4 ldr.w ip, [r9, #708] ; 0x2c4\n",
" 228: 47e0 blx ip\n",
nullptr
};
diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc
index c45166b..d5a8783 100644
--- a/dex2oat/linker/oat_writer_test.cc
+++ b/dex2oat/linker/oat_writer_test.cc
@@ -485,7 +485,7 @@
EXPECT_EQ(76U, sizeof(OatHeader));
EXPECT_EQ(4U, sizeof(OatMethodOffsets));
EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
- EXPECT_EQ(161 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+ EXPECT_EQ(162 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
sizeof(QuickEntryPoints));
}
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 78b9e46..80080e9 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -144,6 +144,7 @@
qpoints->pAsin = asin;
qpoints->pAtan = atan;
qpoints->pAtan2 = atan2;
+ qpoints->pPow = pow;
qpoints->pCbrt = cbrt;
qpoints->pCosh = cosh;
qpoints->pExp = exp;
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 80bf3ab..4c43b7e 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -168,6 +168,7 @@
qpoints->pAsin = asin;
qpoints->pAtan = atan;
qpoints->pAtan2 = atan2;
+ qpoints->pPow = pow;
qpoints->pCbrt = cbrt;
qpoints->pCosh = cosh;
qpoints->pExp = exp;
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 209f367..badee59 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -348,6 +348,8 @@
static_assert(IsDirectEntrypoint(kQuickAtan), "Direct C stub marked non-direct.");
qpoints->pAtan2 = atan2;
static_assert(IsDirectEntrypoint(kQuickAtan2), "Direct C stub marked non-direct.");
+ qpoints->pPow = pow;
+ static_assert(IsDirectEntrypoint(kQuickPow), "Direct C stub marked non-direct.");
qpoints->pCbrt = cbrt;
static_assert(IsDirectEntrypoint(kQuickCbrt), "Direct C stub marked non-direct.");
qpoints->pCosh = cosh;
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 35cbd1d..bdfb942 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -165,6 +165,7 @@
qpoints->pAsin = asin;
qpoints->pAtan = atan;
qpoints->pAtan2 = atan2;
+ qpoints->pPow = pow;
qpoints->pCbrt = cbrt;
qpoints->pCosh = cosh;
qpoints->pExp = exp;
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 24bf9cc..ffb0c94 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -68,6 +68,7 @@
qpoints->pAsin = asin;
qpoints->pAtan = atan;
qpoints->pAtan2 = atan2;
+ qpoints->pPow = pow;
qpoints->pCbrt = cbrt;
qpoints->pCosh = cosh;
qpoints->pExp = exp;
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 3656f83..6bae69c 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -91,6 +91,7 @@
qpoints->pAsin = asin;
qpoints->pAtan = atan;
qpoints->pAtan2 = atan2;
+ qpoints->pPow = pow;
qpoints->pCbrt = cbrt;
qpoints->pCosh = cosh;
qpoints->pExp = exp;
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 3cf2b93..2f7d6ab 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -73,7 +73,7 @@
// Offset of field Thread::tlsPtr_.mterp_current_ibase.
#define THREAD_CURRENT_IBASE_OFFSET \
- (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__ + (1 + 161) * __SIZEOF_POINTER__)
+ (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__ + (1 + 162) * __SIZEOF_POINTER__)
ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET,
art::Thread::MterpCurrentIBaseOffset<POINTER_SIZE>().Int32Value())
// Offset of field Thread::tlsPtr_.mterp_default_ibase.
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 74e7c18..48a56f2 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -91,6 +91,7 @@
V(Asin, double, double) \
V(Atan, double, double) \
V(Atan2, double, double, double) \
+ V(Pow, double, double, double) \
V(Cbrt, double, double) \
V(Cosh, double, double) \
V(Exp, double, double) \
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 7c912d0..1fdf439 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -238,7 +238,8 @@
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAcos, pAsin, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAsin, pAtan, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan, pAtan2, sizeof(void*));
- EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan2, pCbrt, sizeof(void*));
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAtan2, pPow, sizeof(void*));
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pPow, pCbrt, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCbrt, pCosh, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCosh, pExp, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pExp, pExpm1, sizeof(void*));
diff --git a/runtime/image.cc b/runtime/image.cc
index dd0c148..8e3615f 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -26,7 +26,7 @@
namespace art {
const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '5', '3', '\0' }; // ClassStatus in high bits.
+const uint8_t ImageHeader::kImageVersion[] = { '0', '5', '4', '\0' }; // Math.pow() intrinsic.
ImageHeader::ImageHeader(uint32_t image_begin,
uint32_t image_size,
diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc
index 99a4f76..681a582 100644
--- a/runtime/interpreter/interpreter_intrinsics.cc
+++ b/runtime/interpreter/interpreter_intrinsics.cc
@@ -478,6 +478,7 @@
UNIMPLEMENTED_CASE(MathLog /* (D)D */)
UNIMPLEMENTED_CASE(MathLog10 /* (D)D */)
UNIMPLEMENTED_CASE(MathNextAfter /* (DD)D */)
+ UNIMPLEMENTED_CASE(MathPow /* (DD)D */)
UNIMPLEMENTED_CASE(MathSinh /* (D)D */)
INTRINSIC_CASE(MathTan)
UNIMPLEMENTED_CASE(MathTanh /* (D)D */)
diff --git a/runtime/intrinsics_list.h b/runtime/intrinsics_list.h
index d007728..da08793 100644
--- a/runtime/intrinsics_list.h
+++ b/runtime/intrinsics_list.h
@@ -136,6 +136,7 @@
V(MathAsin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "asin", "(D)D") \
V(MathAtan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "atan", "(D)D") \
V(MathAtan2, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "atan2", "(DD)D") \
+ V(MathPow, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "pow", "(DD)D") \
V(MathCbrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "cbrt", "(D)D") \
V(MathCosh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "cosh", "(D)D") \
V(MathExp, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Math;", "exp", "(D)D") \
diff --git a/runtime/oat.h b/runtime/oat.h
index 36099b9..8f81010 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- // Last oat version changed reason: ClassStatus in high bits.
- static constexpr uint8_t kOatVersion[] = { '1', '3', '7', '\0' };
+ // Last oat version changed reason: Math.pow() intrinsic.
+ static constexpr uint8_t kOatVersion[] = { '1', '3', '8', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";