blob: 420f5583ecfd96be6a2924e0058f8d0821178bb6 [file] [log] [blame]
Alexandre Rames22aa54b2016-10-18 09:32:29 +01001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
18#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
19
Vladimir Markoe2727152019-10-10 10:46:42 +010020#include "base/macros.h"
Alexandre Rames22aa54b2016-10-18 09:32:29 +010021#include "scheduler.h"
22
Vladimir Markoe2727152019-10-10 10:46:42 +010023namespace art HIDDEN {
Alexandre Rames22aa54b2016-10-18 09:32:29 +010024namespace arm64 {
25
26static constexpr uint32_t kArm64MemoryLoadLatency = 5;
27static constexpr uint32_t kArm64MemoryStoreLatency = 3;
28
29static constexpr uint32_t kArm64CallInternalLatency = 10;
30static constexpr uint32_t kArm64CallLatency = 5;
31
32// AArch64 instruction latency.
33// We currently assume that all arm64 CPUs share the same instruction latency list.
34static constexpr uint32_t kArm64IntegerOpLatency = 2;
35static constexpr uint32_t kArm64FloatingPointOpLatency = 5;
36
37
38static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3;
39static constexpr uint32_t kArm64DivDoubleLatency = 30;
40static constexpr uint32_t kArm64DivFloatLatency = 15;
41static constexpr uint32_t kArm64DivIntegerLatency = 5;
42static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
43static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
44static constexpr uint32_t kArm64MulIntegerLatency = 6;
45static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
Artem Serovf0fc4c62017-05-03 15:07:15 +010046static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;
47
48static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
49static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
50static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
51static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
52static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
53static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
54static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
55static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
56static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
57static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
Alexandre Rames22aa54b2016-10-18 09:32:29 +010058
59class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
60 public:
61 // Default visitor for instructions not handled specifically below.
Yi Kong39402542019-03-24 02:47:16 -070062 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
Alexandre Rames22aa54b2016-10-18 09:32:29 +010063 last_visited_latency_ = kArm64IntegerOpLatency;
64 }
65
66// We add a second unused parameter to be able to use this macro like the others
67// defined in `nodes.h`.
Artem Serovf0fc4c62017-05-03 15:07:15 +010068#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \
69 M(ArrayGet , unused) \
70 M(ArrayLength , unused) \
71 M(ArraySet , unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +010072 M(BoundsCheck , unused) \
73 M(Div , unused) \
74 M(InstanceFieldGet , unused) \
75 M(InstanceOf , unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +010076 M(LoadString , unused) \
77 M(Mul , unused) \
78 M(NewArray , unused) \
79 M(NewInstance , unused) \
80 M(Rem , unused) \
81 M(StaticFieldGet , unused) \
82 M(SuspendCheck , unused) \
83 M(TypeConversion , unused) \
84 M(VecReplicateScalar , unused) \
Aart Bik0148de42017-09-05 09:25:01 -070085 M(VecExtractScalar , unused) \
86 M(VecReduce , unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +010087 M(VecCnv , unused) \
88 M(VecNeg , unused) \
89 M(VecAbs , unused) \
90 M(VecNot , unused) \
91 M(VecAdd , unused) \
92 M(VecHalvingAdd , unused) \
93 M(VecSub , unused) \
94 M(VecMul , unused) \
95 M(VecDiv , unused) \
96 M(VecMin , unused) \
97 M(VecMax , unused) \
98 M(VecAnd , unused) \
99 M(VecAndNot , unused) \
100 M(VecOr , unused) \
101 M(VecXor , unused) \
102 M(VecShl , unused) \
103 M(VecShr , unused) \
104 M(VecUShr , unused) \
Aart Bik0148de42017-09-05 09:25:01 -0700105 M(VecSetScalars , unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +0100106 M(VecMultiplyAccumulate, unused) \
107 M(VecLoad , unused) \
108 M(VecStore , unused)
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100109
Vladimir Markoe3946222018-05-04 14:18:47 +0100110#define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M) \
111 M(BinaryOperation , unused) \
112 M(Invoke , unused)
113
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100114#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
115 M(BitwiseNegatedRight, unused) \
116 M(MultiplyAccumulate, unused) \
Anton Kirilov74234da2017-01-13 14:42:47 +0000117 M(IntermediateAddress, unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +0100118 M(IntermediateAddressIndex, unused) \
Anton Kirilov74234da2017-01-13 14:42:47 +0000119 M(DataProcWithShifterOp, unused)
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100120
121#define DECLARE_VISIT_INSTRUCTION(type, unused) \
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100122 void Visit##type(H##type* instruction) override;
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100123
124 FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
Vladimir Markoe3946222018-05-04 14:18:47 +0100125 FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100126 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
127 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
128
129#undef DECLARE_VISIT_INSTRUCTION
Artem Serovf0fc4c62017-05-03 15:07:15 +0100130
131 private:
132 void HandleSimpleArithmeticSIMD(HVecOperation *instr);
133 void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100134};
135
136class HSchedulerARM64 : public HScheduler {
137 public:
Vladimir Markoced04832018-07-26 14:42:17 +0100138 explicit HSchedulerARM64(SchedulingNodeSelector* selector)
139 : HScheduler(&arm64_latency_visitor_, selector) {}
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100140 ~HSchedulerARM64() override {}
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100141
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100142 bool IsSchedulable(const HInstruction* instruction) const override {
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100143#define CASE_INSTRUCTION_KIND(type, unused) case \
144 HInstruction::InstructionKind::k##type:
145 switch (instruction->GetKind()) {
146 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
147 return true;
148 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
149 return true;
Artem Serovf0fc4c62017-05-03 15:07:15 +0100150 FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND)
151 return true;
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100152 default:
153 return HScheduler::IsSchedulable(instruction);
154 }
155#undef CASE_INSTRUCTION_KIND
156 }
157
Artem Serov89ff8b22017-11-20 11:51:05 +0000158 // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized
159 // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler;
160 // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of
161 // SIMD&FP registers are callee saved) so don't reorder such vector instructions.
162 //
163 // TODO: remove this when a proper support of SIMD registers is introduced to the compiler.
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100164 bool IsSchedulingBarrier(const HInstruction* instr) const override {
Artem Serov89ff8b22017-11-20 11:51:05 +0000165 return HScheduler::IsSchedulingBarrier(instr) ||
166 instr->IsVecReduce() ||
167 instr->IsVecExtractScalar() ||
168 instr->IsVecSetScalars() ||
169 instr->IsVecReplicateScalar();
170 }
171
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100172 private:
173 SchedulingLatencyVisitorARM64 arm64_latency_visitor_;
174 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64);
175};
176
177} // namespace arm64
178} // namespace art
179
180#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_